revision-id: 4720db9441fe9af7a69b59f2b097039ba256741e (mariadb-10.3.0-348-g4720db9441f) parent(s): c9ad134e56cc70119aaab8c8ac60c837fbb98dac author: Varun Gupta committer: Varun Gupta timestamp: 2018-01-05 04:53:54 +0530 message: MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on When we have the optimization orderby_uses_equalities ON then for semi-join materialised tables we get wrong results when these table is involved in filesort. This is due to the reason that the fields that are referenced are the ones from the semi-join materialised table and not from the original table The fix is to have the fields refer to the original table. So a copy_back technique is implemented for rr_sequential functions, which reads data from semi-join materialised tables and copies that back to the original table fields. So we need to extend this technique to other rr_* functions which are used to read the sorted data after filesort is performed --- mysql-test/r/order_by.result | 51 ++++++++++++++++++++++++++++++++++++- mysql-test/r/order_by_innodb.result | 16 ++++++------ mysql-test/t/order_by.test | 47 ++++++++++++++++++++++++++++++++++ sql/records.cc | 20 +++++++++++++++ sql/records.h | 5 ++-- sql/sql_select.cc | 46 ++++++++++++++++++--------------- 6 files changed, 154 insertions(+), 31 deletions(-) diff --git a/mysql-test/r/order_by.result b/mysql-test/r/order_by.result index 946ecb51426..b45eb6481a0 100644 --- a/mysql-test/r/order_by.result +++ b/mysql-test/r/order_by.result @@ -3200,10 +3200,59 @@ WHERE books.library_id = 8663 AND books.scheduled_for_removal=0 ) ORDER BY wings.id; id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 1 100.00 Using temporary; Using filesort +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 1 100.00 Using filesort 1 PRIMARY wings eq_ref PRIMARY PRIMARY 4 test.books.wings_id 1 100.00 2 MATERIALIZED books ref library_idx library_idx 4 const 1 100.00 Using where Warnings: Note 1003 select `test`.`wings`.`id` AS `wing_id`,`test`.`wings`.`department_id` AS `department_id` from `test`.`wings` semi join (`test`.`books`) where `test`.`books`.`library_id` = 8663 and `test`.`books`.`scheduled_for_removal` = 0 and `test`.`wings`.`id` = `test`.`books`.`wings_id` order by `test`.`wings`.`id` set optimizer_switch= @save_optimizer_switch; DROP TABLE books, wings; +# +# Wrong result upon GROUP BY with orderby_uses_equalities=on +# +CREATE TABLE person ( +PersonID MEDIUMINT(8) UNSIGNED AUTO_INCREMENT, +PRIMARY KEY (PersonID) +) ; +CREATE TABLE percat ( +PersonID MEDIUMINT(8) DEFAULT 0, +CategoryID MEDIUMINT(8) DEFAULT 0, +PRIMARY KEY (PersonID, CategoryID), +INDEX (CategoryID) +) ; +CREATE TABLE action ( +PersonID MEDIUMINT(8) UNSIGNED DEFAULT 0, +ActionTypeID MEDIUMINT(8) UNSIGNED DEFAULT 0, +INDEX (PersonID), +INDEX (ActionTypeID) +) ; +INSERT INTO person (PersonID) VALUES +(58),(96),(273),(352); +INSERT INTO percat VALUES +(58,9),(273,1),(273,9),(273,14),(352,1),(352,13); +INSERT INTO action (PersonID, ActionTypeID) VALUES +(58,3),(96,3),(273,3),(352,3); +SELECT person.PersonID, +GROUP_CONCAT(CategoryID ORDER BY CategoryID SEPARATOR ',') AS categories +FROM person LEFT JOIN percat ON person.PersonID=percat.PersonID +WHERE person.PersonID IN (SELECT PersonID FROM action WHERE ActionTypeID=3) +GROUP BY person.PersonID; +PersonID categories +58 9 +96 NULL +273 1,9,14 +352 1,13 +SET @save_optimizer_switch=@@optimizer_switch; +SET optimizer_switch='orderby_uses_equalities=off'; +SELECT person.PersonID, +GROUP_CONCAT(CategoryID ORDER BY CategoryID SEPARATOR ',') AS categories +FROM person LEFT JOIN percat ON person.PersonID=percat.PersonID +WHERE person.PersonID IN (SELECT PersonID FROM action WHERE ActionTypeID=3) +GROUP BY person.PersonID; +PersonID categories +58 9 +96 NULL +273 1,9,14 +352 1,13 +set optimizer_switch= @save_optimizer_switch; +DROP TABLE action, percat, person; diff --git a/mysql-test/r/order_by_innodb.result b/mysql-test/r/order_by_innodb.result index 3ff1f92e94a..102323a9cbc 100644 --- a/mysql-test/r/order_by_innodb.result +++ b/mysql-test/r/order_by_innodb.result @@ -99,25 +99,25 @@ SELECT i,n FROM t1 INNER JOIN t2 USING (i,j) LEFT JOIN t3 USING (j) WHERE i IN (SELECT i FROM t1 WHERE z=1) AND z=0 ORDER BY i; i n -188 eight -218 eight -338 four -409 seven 466 eight 469 eight 498 eight 656 eight -SELECT i,n -FROM t1 x INNER JOIN t2 USING (i,j) LEFT JOIN t3 USING (j) -WHERE EXISTS (SELECT * FROM t1 WHERE i=x.i AND z=1) AND z=0 ORDER BY i; -i n 188 eight 218 eight 338 four 409 seven +SELECT i,n +FROM t1 x INNER JOIN t2 USING (i,j) LEFT JOIN t3 USING (j) +WHERE EXISTS (SELECT * FROM t1 WHERE i=x.i AND z=1) AND z=0 ORDER BY i; +i n 466 eight 469 eight 498 eight 656 eight +188 eight +218 eight +338 four +409 seven set optimizer_switch= @save_optimizer_switch; DROP TABLE t1,t2,t3; diff --git a/mysql-test/t/order_by.test b/mysql-test/t/order_by.test index 914911648b2..38537af835b 100644 --- a/mysql-test/t/order_by.test +++ b/mysql-test/t/order_by.test @@ -2149,3 +2149,50 @@ eval explain extended $q; set optimizer_switch= @save_optimizer_switch; DROP TABLE books, wings; + +--echo # +--echo # Wrong result upon GROUP BY with orderby_uses_equalities=on +--echo # + +CREATE TABLE person ( + PersonID MEDIUMINT(8) UNSIGNED AUTO_INCREMENT, + PRIMARY KEY (PersonID) +) ; + +CREATE TABLE percat ( + PersonID MEDIUMINT(8) DEFAULT 0, + CategoryID MEDIUMINT(8) DEFAULT 0, + PRIMARY KEY (PersonID, CategoryID), + INDEX (CategoryID) +) ; + +CREATE TABLE action ( + PersonID MEDIUMINT(8) UNSIGNED DEFAULT 0, + ActionTypeID MEDIUMINT(8) UNSIGNED DEFAULT 0, + INDEX (PersonID), + INDEX (ActionTypeID) +) ; + +INSERT INTO person (PersonID) VALUES (58),(96),(273),(352); + +INSERT INTO percat VALUES (58,9),(273,1),(273,9),(273,14),(352,1),(352,13); + +INSERT INTO action (PersonID, ActionTypeID) VALUES (58,3),(96,3),(273,3),(352,3); + +SELECT person.PersonID, +GROUP_CONCAT(CategoryID ORDER BY CategoryID SEPARATOR ',') AS categories +FROM person LEFT JOIN percat ON person.PersonID=percat.PersonID +WHERE person.PersonID IN (SELECT PersonID FROM action WHERE ActionTypeID=3) +GROUP BY person.PersonID; + +SET @save_optimizer_switch=@@optimizer_switch; +SET optimizer_switch='orderby_uses_equalities=off'; + +SELECT person.PersonID, +GROUP_CONCAT(CategoryID ORDER BY CategoryID SEPARATOR ',') AS categories +FROM person LEFT JOIN percat ON person.PersonID=percat.PersonID +WHERE person.PersonID IN (SELECT PersonID FROM action WHERE ActionTypeID=3) +GROUP BY person.PersonID; + +set optimizer_switch= @save_optimizer_switch; +DROP TABLE action, percat, person; diff --git a/sql/records.cc b/sql/records.cc index 650a51f7f37..f29554c59ef 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -49,6 +49,18 @@ static int rr_index_last(READ_RECORD *info); static int rr_index(READ_RECORD *info); static int rr_index_desc(READ_RECORD *info); +static int rr_read_record_and_unpack(READ_RECORD *info) +{ + int error; + if ((error= info->read_record_func_and_unpack_calls(info))) + return error; + + for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) + (*cp->do_copy)(cp); + + return error; +} + /** Initialize READ_RECORD structure to perform full index scan in desired @@ -189,6 +201,7 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, { IO_CACHE *tempfile; SORT_ADDON_FIELD *addon_field= filesort ? filesort->addon_field : 0; + bool need_unpacking= info->need_unpacking; DBUG_ENTER("init_read_record"); bzero((char*) info,sizeof(*info)); @@ -308,6 +321,13 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table, (void) table->file->extra_opt(HA_EXTRA_CACHE, thd->variables.read_buff_size); } + + if (need_unpacking) + { + info->read_record_func_and_unpack_calls= info->read_record_func; + info->read_record_func = rr_read_record_and_unpack; + } + /* Condition pushdown to storage engine */ if ((table->file->ha_table_flags() & HA_CAN_TABLE_CONDITION_PUSHDOWN) && select && select->cond && diff --git a/sql/records.h b/sql/records.h index 940c88ca0c7..ee923297ef9 100644 --- a/sql/records.h +++ b/sql/records.h @@ -56,6 +56,7 @@ struct READ_RECORD TABLE **forms; /* head and ref forms */ Unlock_row_func unlock_row; Read_func read_record_func; + Read_func read_record_func_and_unpack_calls; THD *thd; SQL_SELECT *select; uint cache_records; @@ -67,7 +68,7 @@ struct READ_RECORD uchar *cache,*cache_pos,*cache_end,*read_positions; struct st_sort_addon_field *addon_field; /* Pointer to the fields info */ struct st_io_cache *io_cache; - bool print_error, ignore_not_found_rows; + bool print_error, ignore_not_found_rows, need_unpacking; void (*unpack)(struct st_sort_addon_field *, uchar *, uchar *); int read_record() { return read_record_func(this); } @@ -79,7 +80,7 @@ struct READ_RECORD Copy_field *copy_field; Copy_field *copy_field_end; public: - READ_RECORD() : table(NULL), cache(NULL) {} + READ_RECORD() : table(NULL), cache(NULL), need_unpacking(false) {} ~READ_RECORD() { end_read_record(this); } }; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 8aed093af7f..98734715efe 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -12696,20 +12696,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, can be used without tmp. table. */ bool can_subst_to_first_table= false; - bool first_is_in_sjm_nest= false; - if (first_is_base_table) - { - TABLE_LIST *tbl_for_first= - join->join_tab[join->const_tables].table->pos_in_table_list; - first_is_in_sjm_nest= tbl_for_first->sj_mat_info && - tbl_for_first->sj_mat_info->is_used; - } /* - Currently we do not employ the optimization that uses multiple - equalities for ORDER BY to remove tmp table in the case when - the first table happens to be the result of materialization of - a semi-join nest ( <=> first_is_in_sjm_nest == true). - When a semi-join nest is materialized and scanned to look for possible matches in the remaining tables for every its row the fields from the result of materialization are copied @@ -12717,16 +12704,12 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, So these copies are used to access the remaining tables rather than the fields from the result of materialization. - Unfortunately now this so-called 'copy back' technique is - supported only if the rows are scanned with the rr_sequential - function, but not with other rr_* functions that are employed + Now this so-called 'copy back' technique is + supported for all rr_* functions that are employed when the result of materialization is required to be sorted. - - TODO: either to support 'copy back' technique for the above case, - or to get rid of this technique altogether. */ if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) && - first_is_base_table && !first_is_in_sjm_nest && + first_is_base_table && order->item[0]->real_item()->type() == Item::FIELD_ITEM && join->cond_equal) { @@ -19693,6 +19676,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab) int join_init_read_record(JOIN_TAB *tab) { + Copy_field *save_copy, *save_copy_end; + TABLE_LIST *tbl_for_first; /* Note: the query plan tree for the below operations is constructed in save_agg_explain_data. @@ -19716,9 +19701,30 @@ int join_init_read_record(JOIN_TAB *tab) tab->join->thd->reset_killed();); if (!tab->preread_init_done && tab->preread_init()) return 1; + + /* + Allow unpacking of field for semi-join materialised table when + they are involved in filesort + */ + + tbl_for_first= tab->table->pos_in_table_list; + tab->read_record.need_unpacking= tbl_for_first ? (tab->filesort && + tbl_for_first->sj_mat_info && + tbl_for_first->sj_mat_info->is_used) : FALSE; + + /* + init_read_record resets all elements of tab->read_record(). + Remember things that we don't want to have reset. + */ + save_copy= tab->read_record.copy_field; + save_copy_end= tab->read_record.copy_field_end; + if (init_read_record(&tab->read_record, tab->join->thd, tab->table, tab->select, tab->filesort_result, 1,1, FALSE)) return 1; + tab->read_record.copy_field= save_copy; + tab->read_record.copy_field_end= save_copy_end; + return tab->read_record.read_record(); }