Hi Varun, Ok to push. On Thu, Oct 10, 2019 at 03:14:10PM +0530, Varun wrote:
revision-id: 8ccda715d0e657673445e150a9ed6ef784c68a61 (mariadb-10.4.4-341-g8ccda715d0e) parent(s): a340af922361e3958e5d6653c8b840771db282f2 author: Varun Gupta committer: Varun Gupta timestamp: 2019-10-10 12:58:57 +0530 message:
MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
For the case when the SJM scan table is the first table in the join order, then if we want to do the sorting on the SJM scan table, then we need to make sure that we unpack the values to base table fields in two cases: 1) Reading the SJM table and writing the sort-keys inside the sort-buffer 2) Reading the sorted data from the sort file
--- mysql-test/main/order_by.result | 138 +++++++++++++++++++++++++++++++++++++++- mysql-test/main/order_by.test | 34 ++++++++++ sql/filesort.cc | 10 +++ sql/filesort.h | 14 +++- sql/opt_subselect.cc | 10 ++- sql/records.cc | 13 ++++ sql/records.h | 1 + sql/sql_select.cc | 99 +++++++++++++--------------- sql/sql_select.h | 4 +- sql/sql_sort.h | 2 + sql/table.h | 1 + 11 files changed, 264 insertions(+), 62 deletions(-)
diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result index b059cc686cd..e74583670fc 100644 --- a/mysql-test/main/order_by.result +++ b/mysql-test/main/order_by.result @@ -3322,7 +3322,7 @@ WHERE books.library_id = 8663 AND books.scheduled_for_removal=0 ) ORDER BY wings.id; id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using temporary; Using filesort +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using filesort 1 PRIMARY wings eq_ref PRIMARY PRIMARY 4 test.books.wings_id 1 100.00 2 MATERIALIZED books ref library_idx library_idx 4 const 2 100.00 Using where Warnings: @@ -3436,3 +3436,139 @@ Note 1003 select `test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` A set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u, optimizer_use_condition_selectivity=@tmp_o; drop table t1,t2,t3,t4; +# +# MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on +# +CREATE TABLE t1 (a INT, b int, primary key(a)); +CREATE TABLE t2 (a INT, b INT); +INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6), +(2354,7),(321421,3),(535,2),(4535,3); +INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3); +# Join order should have the SJM scan table as the first table for both +# the queries with GROUP BY and ORDER BY clause. +EXPLAIN SELECT t1.a +FROM t1 +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) +ORDER BY t1.a DESC; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort +1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 Using index +2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where +EXPLAIN FORMAT=JSON SELECT t1.a +FROM t1 +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) +ORDER BY t1.a DESC; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "read_sorted_file": { + "filesort": { + "sort_key": "t1.a desc", + "table": { + "table_name": "<subquery2>", + "access_type": "ALL", + "possible_keys": ["distinct_key"], + "rows": 3, + "filtered": 100, + "materialized": { + "unique": 1, + "query_block": { + "select_id": 2, + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 3, + "filtered": 100, + "attached_condition": "t2.b = 3 and t2.a is not null" + } + } + } + } + } + }, + "table": { + "table_name": "t1", + "access_type": "eq_ref", + "possible_keys": ["PRIMARY"], + "key": "PRIMARY", + "key_length": "4", + "used_key_parts": ["a"], + "ref": ["test.t2.a"], + "rows": 1, + "filtered": 100, + "using_index": true + } + } +} +SELECT t1.a +FROM t1 +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) +ORDER BY t1.a DESC; +a +273 +96 +58 +EXPLAIN SELECT t1.a, group_concat(t1.b) +FROM t1 +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) +GROUP BY t1.a DESC; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort +1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 +2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where +EXPLAIN FORMAT=JSON SELECT t1.a, group_concat(t1.b) +FROM t1 +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) +GROUP BY t1.a DESC; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "read_sorted_file": { + "filesort": { + "sort_key": "t1.a desc", + "table": { + "table_name": "<subquery2>", + "access_type": "ALL", + "possible_keys": ["distinct_key"], + "rows": 3, + "filtered": 100, + "materialized": { + "unique": 1, + "query_block": { + "select_id": 2, + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 3, + "filtered": 100, + "attached_condition": "t2.b = 3 and t2.a is not null" + } + } + } + } + } + }, + "table": { + "table_name": "t1", + "access_type": "eq_ref", + "possible_keys": ["PRIMARY"], + "key": "PRIMARY", + "key_length": "4", + "used_key_parts": ["a"], + "ref": ["test.t2.a"], + "rows": 1, + "filtered": 100 + } + } +} +SELECT t1.a, group_concat(t1.b) +FROM t1 +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) +GROUP BY t1.a DESC; +a group_concat(t1.b) +273 3 +96 2 +58 1 +DROP TABLE t1, t2; diff --git a/mysql-test/main/order_by.test b/mysql-test/main/order_by.test index 934c503302f..b3e43d27e2f 100644 --- a/mysql-test/main/order_by.test +++ b/mysql-test/main/order_by.test @@ -2276,3 +2276,37 @@ set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u, optimizer_use_condition_selectivity=@tmp_o;
drop table t1,t2,t3,t4; + + +--echo # +--echo # MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on +--echo # + +CREATE TABLE t1 (a INT, b int, primary key(a)); +CREATE TABLE t2 (a INT, b INT); + +INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6), + (2354,7),(321421,3),(535,2),(4535,3); +INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3); + +--echo # Join order should have the SJM scan table as the first table for both +--echo # the queries with GROUP BY and ORDER BY clause. + +let $query= SELECT t1.a + FROM t1 + WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) + ORDER BY t1.a DESC; + +eval EXPLAIN $query; +eval EXPLAIN FORMAT=JSON $query; +eval $query; + +let $query= SELECT t1.a, group_concat(t1.b) + FROM t1 + WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) + GROUP BY t1.a DESC; + +eval EXPLAIN $query; +eval EXPLAIN FORMAT=JSON $query; +eval $query; +DROP TABLE t1, t2; diff --git a/sql/filesort.cc b/sql/filesort.cc index 3f4291cfb1f..0fc5724cf43 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -183,6 +183,9 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort, &multi_byte_charset), table, max_rows, filesort->sort_positions);
+ param.set_all_read_bits= filesort->set_all_read_bits; + param.unpack= filesort->unpack; + sort->addon_buf= param.addon_buf; sort->addon_field= param.addon_field; sort->unpack= unpack_addon_fields; @@ -756,13 +759,20 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, goto err; }
+ if (param->set_all_read_bits) + sort_form->column_bitmaps_set(save_read_set, save_write_set); + DEBUG_SYNC(thd, "after_index_merge_phase1"); for (;;) { if (quick_select) error= select->quick->get_next(); else /* Not quick-select */ + { error= file->ha_rnd_next(sort_form->record[0]); + if (param->unpack) + param->unpack(sort_form); + } if (unlikely(error)) break; file->position(sort_form->record[0]); diff --git a/sql/filesort.h b/sql/filesort.h index 5f79a5095cc..7e88982ab5f 100644 --- a/sql/filesort.h +++ b/sql/filesort.h @@ -58,6 +58,13 @@ class Filesort: public Sql_alloc
Filesort_tracker *tracker;
+ /* + TRUE means all the fields of table of whose bitmap read_set is set + need to be read while reading records in the sort buffer. + FALSE otherwise + */ + bool set_all_read_bits; + Filesort(ORDER *order_arg, ha_rows limit_arg, bool sort_positions_arg, SQL_SELECT *select_arg): order(order_arg), @@ -66,7 +73,9 @@ class Filesort: public Sql_alloc select(select_arg), own_select(false), using_pq(false), - sort_positions(sort_positions_arg) + sort_positions(sort_positions_arg), + set_all_read_bits(FALSE), + unpack(NULL) { DBUG_ASSERT(order); }; @@ -75,6 +84,9 @@ class Filesort: public Sql_alloc /* Prepare ORDER BY list for sorting. */ uint make_sortorder(THD *thd, JOIN *join, table_map first_table_bit);
+ /* Unpack temp table columns to base table columns*/ + void (*unpack)(TABLE *); + private: void cleanup(); }; diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 87458357865..f837a6394af 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -4252,11 +4252,11 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab) sjm_tab->type= JT_ALL;
/* Initialize full scan */ - sjm_tab->read_first_record= join_read_record_no_init; + sjm_tab->read_first_record= join_init_read_record; sjm_tab->read_record.copy_field= sjm->copy_field; sjm_tab->read_record.copy_field_end= sjm->copy_field + sjm->sjm_table_cols.elements; - sjm_tab->read_record.read_record_func= rr_sequential_and_unpack; + sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack; }
sjm_tab->bush_children->end[-1].next_select= end_sj_materialize; @@ -7105,3 +7105,9 @@ bool Item_in_subselect::pushdown_cond_for_in_subquery(THD *thd, Item *cond) thd->lex->current_select= save_curr_select; DBUG_RETURN(FALSE); } + + +bool TABLE_LIST::is_sjm_scan_table() +{ + return is_active_sjm() && sj_mat_info->is_sj_scan; +} diff --git a/sql/records.cc b/sql/records.cc index 3d709182a4e..f6885f773d5 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -709,3 +709,16 @@ static int rr_cmp(uchar *a,uchar *b) return (int) a[7] - (int) b[7]; #endif } + + +int read_record_func_for_rr_and_unpack(READ_RECORD *info) +{ + int error; + if ((error= info->read_record_func_and_unpack_calls(info))) + return error; + + for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) + (*cp->do_copy)(cp); + + return error; +} diff --git a/sql/records.h b/sql/records.h index faf0d13c9a9..037a06b9d34 100644 --- a/sql/records.h +++ b/sql/records.h @@ -55,6 +55,7 @@ struct READ_RECORD TABLE *table; /* Head-form */ Unlock_row_func unlock_row; Read_func read_record_func; + Read_func read_record_func_and_unpack_calls; THD *thd; SQL_SELECT *select; uint ref_length, reclength, rec_cache_size, error_offset; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 36d9eda3383..0523cdef178 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -3799,6 +3799,15 @@ JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order) tab->select); if (!tab->filesort) return true; + + TABLE *table= tab->table; + if ((tab == join_tab + const_tables) && + table->pos_in_table_list && + table->pos_in_table_list->is_active_sjm()) + { + tab->filesort->set_all_read_bits= TRUE; + tab->filesort->unpack= unpack_to_base_table_fields; + } /* Select was moved to filesort->select to force join_init_read_record to use sorted result instead of reading table through select. @@ -14015,37 +14024,8 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, can be used without tmp. table. */ bool can_subst_to_first_table= false; - bool first_is_in_sjm_nest= false; - if (first_is_base_table) - { - TABLE_LIST *tbl_for_first= - join->join_tab[join->const_tables].table->pos_in_table_list; - first_is_in_sjm_nest= tbl_for_first->sj_mat_info && - tbl_for_first->sj_mat_info->is_used; - } - /* - Currently we do not employ the optimization that uses multiple - equalities for ORDER BY to remove tmp table in the case when - the first table happens to be the result of materialization of - a semi-join nest ( <=> first_is_in_sjm_nest == true). - - When a semi-join nest is materialized and scanned to look for - possible matches in the remaining tables for every its row - the fields from the result of materialization are copied - into the record buffers of tables from the semi-join nest. - So these copies are used to access the remaining tables rather - than the fields from the result of materialization. - - Unfortunately now this so-called 'copy back' technique is - supported only if the rows are scanned with the rr_sequential - function, but not with other rr_* functions that are employed - when the result of materialization is required to be sorted. - - TODO: either to support 'copy back' technique for the above case, - or to get rid of this technique altogether. - */ if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) && - first_is_base_table && !first_is_in_sjm_nest && + first_is_base_table && order->item[0]->real_item()->type() == Item::FIELD_ITEM && join->cond_equal) { @@ -19922,19 +19902,6 @@ do_select(JOIN *join, Procedure *procedure) }
-int rr_sequential_and_unpack(READ_RECORD *info) -{ - int error; - if (unlikely((error= rr_sequential(info)))) - return error; - - for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) - (*cp->do_copy)(cp); - - return error; -} - - /** @brief Instantiates temporary table @@ -21223,6 +21190,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
int join_init_read_record(JOIN_TAB *tab) { + bool need_unpacking= FALSE; + JOIN *join= tab->join; /* Note: the query plan tree for the below operations is constructed in save_agg_explain_data. @@ -21232,6 +21201,12 @@ int join_init_read_record(JOIN_TAB *tab) if (tab->filesort && tab->sort_table()) // Sort table. return 1;
+ if (join->top_join_tab_count != join->const_tables) + { + TABLE_LIST *tbl= tab->table->pos_in_table_list; + need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE; + } + tab->build_range_rowid_filter_if_needed();
DBUG_EXECUTE_IF("kill_join_init_read_record", @@ -21249,16 +21224,6 @@ int join_init_read_record(JOIN_TAB *tab) if (!tab->preread_init_done && tab->preread_init()) return 1;
- - if (init_read_record(&tab->read_record, tab->join->thd, tab->table, - tab->select, tab->filesort_result, 1,1, FALSE)) - return 1; - return tab->read_record.read_record(); -} - -int -join_read_record_no_init(JOIN_TAB *tab) -{ Copy_field *save_copy, *save_copy_end;
/* @@ -21268,12 +21233,20 @@ join_read_record_no_init(JOIN_TAB *tab) save_copy= tab->read_record.copy_field; save_copy_end= tab->read_record.copy_field_end;
- init_read_record(&tab->read_record, tab->join->thd, tab->table, - tab->select, tab->filesort_result, 1, 1, FALSE); + + if (init_read_record(&tab->read_record, tab->join->thd, tab->table, + tab->select, tab->filesort_result, 1, 1, FALSE)) + return 1;
tab->read_record.copy_field= save_copy; tab->read_record.copy_field_end= save_copy_end; - tab->read_record.read_record_func= rr_sequential_and_unpack; + + if (need_unpacking) + { + tab->read_record.read_record_func_and_unpack_calls= + tab->read_record.read_record_func; + tab->read_record.read_record_func = read_record_func_for_rr_and_unpack; + }
return tab->read_record.read_record(); } @@ -28981,6 +28954,20 @@ void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join, }
+/* + @brief + Unpacking temp table fields to base table fields. +*/ + +void unpack_to_base_table_fields(TABLE *table) +{ + JOIN_TAB *tab= table->reginfo.join_tab; + for (Copy_field *cp= tab->read_record.copy_field; + cp != tab->read_record.copy_field_end; cp++) + (*cp->do_copy)(cp); +} + + /** @} (end of group Query_Optimizer) */ diff --git a/sql/sql_select.h b/sql/sql_select.h index 4f7bf49f635..21c07c9bacd 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -223,7 +223,7 @@ typedef enum_nested_loop_state (*Next_select_func)(JOIN *, struct st_join_table *, bool); Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab); int rr_sequential(READ_RECORD *info); -int rr_sequential_and_unpack(READ_RECORD *info); +int read_record_func_for_rr_and_unpack(READ_RECORD *info); Item *remove_pushed_top_conjuncts(THD *thd, Item *cond); Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond, COND_EQUAL **cond_eq, @@ -2352,7 +2352,6 @@ create_virtual_tmp_table(THD *thd, Field *field)
int test_if_item_cache_changed(List<Cached_item> &list); int join_init_read_record(JOIN_TAB *tab); -int join_read_record_no_init(JOIN_TAB *tab); void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key); inline Item * and_items(THD *thd, Item* cond, Item *item) { @@ -2410,6 +2409,7 @@ int print_explain_message_line(select_result_sink *result, void explain_append_mrr_info(QUICK_RANGE_SELECT *quick, String *res); int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table, key_map possible_keys); +void unpack_to_base_table_fields(TABLE *table);
/**************************************************************************** Temporary table support for SQL Runtime diff --git a/sql/sql_sort.h b/sql/sql_sort.h index 7abbc808632..cd712cb50aa 100644 --- a/sql/sql_sort.h +++ b/sql/sql_sort.h @@ -79,6 +79,7 @@ class Sort_param { SORT_FIELD *end; SORT_ADDON_FIELD *addon_field; // Descriptors for companion fields. LEX_STRING addon_buf; // Buffer & length of added packed fields. + bool set_all_read_bits;
uchar *unique_buff; bool not_killable; @@ -93,6 +94,7 @@ class Sort_param { } void init_for_filesort(uint sortlen, TABLE *table, ha_rows maxrows, bool sort_positions); + void (*unpack)(TABLE *); };
diff --git a/sql/table.h b/sql/table.h index 1a7e5fbd4dc..35ba9bbb95d 100644 --- a/sql/table.h +++ b/sql/table.h @@ -2622,6 +2622,7 @@ struct TABLE_LIST */ const char *get_table_name() const { return view != NULL ? view_name.str : table_name.str; } bool is_active_sjm(); + bool is_sjm_scan_table(); bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); } st_select_lex_unit *get_unit(); st_select_lex *get_single_select(); _______________________________________________ commits mailing list commits@mariadb.org https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits
-- BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog