Hi Varun, This patch is a step in the right direction, but I think we're not quite there yet. The patch adds logic about semi-join materialization into the filesort code This makes things really messy. I think, some isolation would be beneficial. The first suggestion: In Sort_param (as this structure is visible in find_all_keys), add members that control the behavior inside find_all_keys: - unpack_function (default is NULL) - bool set_all_read_bits (default is FALSE) Now, Sort_param is allocated in filesort(), so we will have to add the same members in "class Filesort" and have filesort() copy them to Sort_param (necessary because find_all_keys() doesn't see the "Filesort" object). Then, these members can be set in the SQL layer e.g. in JOIN::add_sorting_to_table which creates the Filesort structure. (It would be ideal if semi-join code set them, but this is probably not possible to achieve as Filesort is set up after the semi-join code is set up). If after all these changes "JOIN_TAB::unpacking_to_base_table_fields()" is still there, please rename it to "unpack_..." so that it follows the established convention. That's all the input. On Sat, Sep 28, 2019 at 02:12:01PM +0530, Varun wrote:
revision-id: d48294d15c4895215d5facef97fc80c03cd6b4b0 (mariadb-10.4.4-341-gd48294d15c4) parent(s): a340af922361e3958e5d6653c8b840771db282f2 author: Varun Gupta committer: Varun Gupta timestamp: 2019-09-28 13:06:44 +0530 message:
MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
For the case when the SJM scan table is the first table in the join order, then if we want to do the sorting on the SJM scan table, then we need to make sure that we unpack the values to base table fields in two cases: 1) Reading the SJM table and writing the sort-keys inside the sort-buffer 2) Reading the sorted data from the sort file
--- mysql-test/main/order_by.result | 138 +++++++++++++++++++++++++++++++++++++++- mysql-test/main/order_by.test | 34 ++++++++++ sql/filesort.cc | 17 +++++ sql/opt_subselect.cc | 10 ++- sql/records.cc | 13 ++++ sql/records.h | 1 + sql/sql_select.cc | 89 ++++++++++---------------- sql/sql_select.h | 4 +- sql/table.h | 1 + 9 files changed, 246 insertions(+), 61 deletions(-)
diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result index b059cc686cd..e74583670fc 100644 --- a/mysql-test/main/order_by.result +++ b/mysql-test/main/order_by.result @@ -3322,7 +3322,7 @@ WHERE books.library_id = 8663 AND books.scheduled_for_removal=0 ) ORDER BY wings.id; id select_type table type possible_keys key key_len ref rows filtered Extra -1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using temporary; Using filesort +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using filesort 1 PRIMARY wings eq_ref PRIMARY PRIMARY 4 test.books.wings_id 1 100.00 2 MATERIALIZED books ref library_idx library_idx 4 const 2 100.00 Using where Warnings: @@ -3436,3 +3436,139 @@ Note 1003 select `test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` A set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u, optimizer_use_condition_selectivity=@tmp_o; drop table t1,t2,t3,t4; +# +# MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on +# +CREATE TABLE t1 (a INT, b int, primary key(a)); +CREATE TABLE t2 (a INT, b INT); +INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6), +(2354,7),(321421,3),(535,2),(4535,3); +INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3); +# Join order should have the SJM scan table as the first table for both +# the queries with GROUP BY and ORDER BY clause. +EXPLAIN SELECT t1.a +FROM t1 +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) +ORDER BY t1.a DESC; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort +1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 Using index +2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where +EXPLAIN FORMAT=JSON SELECT t1.a +FROM t1 +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) +ORDER BY t1.a DESC; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "read_sorted_file": { + "filesort": { + "sort_key": "t1.a desc", + "table": { + "table_name": "<subquery2>", + "access_type": "ALL", + "possible_keys": ["distinct_key"], + "rows": 3, + "filtered": 100, + "materialized": { + "unique": 1, + "query_block": { + "select_id": 2, + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 3, + "filtered": 100, + "attached_condition": "t2.b = 3 and t2.a is not null" + } + } + } + } + } + }, + "table": { + "table_name": "t1", + "access_type": "eq_ref", + "possible_keys": ["PRIMARY"], + "key": "PRIMARY", + "key_length": "4", + "used_key_parts": ["a"], + "ref": ["test.t2.a"], + "rows": 1, + "filtered": 100, + "using_index": true + } + } +} +SELECT t1.a +FROM t1 +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) +ORDER BY t1.a DESC; +a +273 +96 +58 +EXPLAIN SELECT t1.a, group_concat(t1.b) +FROM t1 +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) +GROUP BY t1.a DESC; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort +1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 +2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where +EXPLAIN FORMAT=JSON SELECT t1.a, group_concat(t1.b) +FROM t1 +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) +GROUP BY t1.a DESC; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "read_sorted_file": { + "filesort": { + "sort_key": "t1.a desc", + "table": { + "table_name": "<subquery2>", + "access_type": "ALL", + "possible_keys": ["distinct_key"], + "rows": 3, + "filtered": 100, + "materialized": { + "unique": 1, + "query_block": { + "select_id": 2, + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 3, + "filtered": 100, + "attached_condition": "t2.b = 3 and t2.a is not null" + } + } + } + } + } + }, + "table": { + "table_name": "t1", + "access_type": "eq_ref", + "possible_keys": ["PRIMARY"], + "key": "PRIMARY", + "key_length": "4", + "used_key_parts": ["a"], + "ref": ["test.t2.a"], + "rows": 1, + "filtered": 100 + } + } +} +SELECT t1.a, group_concat(t1.b) +FROM t1 +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) +GROUP BY t1.a DESC; +a group_concat(t1.b) +273 3 +96 2 +58 1 +DROP TABLE t1, t2; diff --git a/mysql-test/main/order_by.test b/mysql-test/main/order_by.test index 934c503302f..b3e43d27e2f 100644 --- a/mysql-test/main/order_by.test +++ b/mysql-test/main/order_by.test @@ -2276,3 +2276,37 @@ set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u, optimizer_use_condition_selectivity=@tmp_o;
drop table t1,t2,t3,t4; + + +--echo # +--echo # MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on +--echo # + +CREATE TABLE t1 (a INT, b int, primary key(a)); +CREATE TABLE t2 (a INT, b INT); + +INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6), + (2354,7),(321421,3),(535,2),(4535,3); +INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3); + +--echo # Join order should have the SJM scan table as the first table for both +--echo # the queries with GROUP BY and ORDER BY clause. + +let $query= SELECT t1.a + FROM t1 + WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) + ORDER BY t1.a DESC; + +eval EXPLAIN $query; +eval EXPLAIN FORMAT=JSON $query; +eval $query; + +let $query= SELECT t1.a, group_concat(t1.b) + FROM t1 + WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) + GROUP BY t1.a DESC; + +eval EXPLAIN $query; +eval EXPLAIN FORMAT=JSON $query; +eval $query; +DROP TABLE t1, t2; diff --git a/sql/filesort.cc b/sql/filesort.cc index 3f4291cfb1f..e5c83293e9f 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -716,11 +716,21 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, *found_rows= 0; ref_pos= &file->ref[0]; next_pos=ref_pos; + JOIN_TAB *tab= sort_form->reginfo.join_tab; + JOIN *join= tab ? tab->join : NULL; + bool first_is_in_sjm_nest= FALSE;
DBUG_EXECUTE_IF("show_explain_in_find_all_keys", dbug_serve_apcs(thd, 1); );
+ if (join && join->table_count != join->const_tables && + (join->join_tab + join->const_tables == tab)) + { + TABLE_LIST *tbl_for_first= sort_form->pos_in_table_list; + first_is_in_sjm_nest= tbl_for_first && tbl_for_first->is_sjm_scan_table(); + } + if (!quick_select) { next_pos=(uchar*) 0; /* Find records in sequence */ @@ -756,13 +766,20 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select, goto err; }
+ if (first_is_in_sjm_nest) + sort_form->column_bitmaps_set(save_read_set, save_write_set); + DEBUG_SYNC(thd, "after_index_merge_phase1"); for (;;) { if (quick_select) error= select->quick->get_next(); else /* Not quick-select */ + { error= file->ha_rnd_next(sort_form->record[0]); + if (first_is_in_sjm_nest) + tab->unpacking_to_base_table_fields(); + } if (unlikely(error)) break; file->position(sort_form->record[0]); diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 87458357865..f837a6394af 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -4252,11 +4252,11 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab) sjm_tab->type= JT_ALL;
/* Initialize full scan */ - sjm_tab->read_first_record= join_read_record_no_init; + sjm_tab->read_first_record= join_init_read_record; sjm_tab->read_record.copy_field= sjm->copy_field; sjm_tab->read_record.copy_field_end= sjm->copy_field + sjm->sjm_table_cols.elements; - sjm_tab->read_record.read_record_func= rr_sequential_and_unpack; + sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack; }
sjm_tab->bush_children->end[-1].next_select= end_sj_materialize; @@ -7105,3 +7105,9 @@ bool Item_in_subselect::pushdown_cond_for_in_subquery(THD *thd, Item *cond) thd->lex->current_select= save_curr_select; DBUG_RETURN(FALSE); } + + +bool TABLE_LIST::is_sjm_scan_table() +{ + return is_active_sjm() && sj_mat_info->is_sj_scan; +} diff --git a/sql/records.cc b/sql/records.cc index 3d709182a4e..f6885f773d5 100644 --- a/sql/records.cc +++ b/sql/records.cc @@ -709,3 +709,16 @@ static int rr_cmp(uchar *a,uchar *b) return (int) a[7] - (int) b[7]; #endif } + + +int read_record_func_for_rr_and_unpack(READ_RECORD *info) +{ + int error; + if ((error= info->read_record_func_and_unpack_calls(info))) + return error; + + for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) + (*cp->do_copy)(cp); + + return error; +} diff --git a/sql/records.h b/sql/records.h index faf0d13c9a9..037a06b9d34 100644 --- a/sql/records.h +++ b/sql/records.h @@ -55,6 +55,7 @@ struct READ_RECORD TABLE *table; /* Head-form */ Unlock_row_func unlock_row; Read_func read_record_func; + Read_func read_record_func_and_unpack_calls; THD *thd; SQL_SELECT *select; uint ref_length, reclength, rec_cache_size, error_offset; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 36d9eda3383..28bc57c692f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -14015,37 +14015,8 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, can be used without tmp. table. */ bool can_subst_to_first_table= false; - bool first_is_in_sjm_nest= false; - if (first_is_base_table) - { - TABLE_LIST *tbl_for_first= - join->join_tab[join->const_tables].table->pos_in_table_list; - first_is_in_sjm_nest= tbl_for_first->sj_mat_info && - tbl_for_first->sj_mat_info->is_used; - } - /* - Currently we do not employ the optimization that uses multiple - equalities for ORDER BY to remove tmp table in the case when - the first table happens to be the result of materialization of - a semi-join nest ( <=> first_is_in_sjm_nest == true). - - When a semi-join nest is materialized and scanned to look for - possible matches in the remaining tables for every its row - the fields from the result of materialization are copied - into the record buffers of tables from the semi-join nest. - So these copies are used to access the remaining tables rather - than the fields from the result of materialization. - - Unfortunately now this so-called 'copy back' technique is - supported only if the rows are scanned with the rr_sequential - function, but not with other rr_* functions that are employed - when the result of materialization is required to be sorted. - - TODO: either to support 'copy back' technique for the above case, - or to get rid of this technique altogether. - */ if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) && - first_is_base_table && !first_is_in_sjm_nest && + first_is_base_table && order->item[0]->real_item()->type() == Item::FIELD_ITEM && join->cond_equal) { @@ -19922,19 +19893,6 @@ do_select(JOIN *join, Procedure *procedure) }
-int rr_sequential_and_unpack(READ_RECORD *info) -{ - int error; - if (unlikely((error= rr_sequential(info)))) - return error; - - for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) - (*cp->do_copy)(cp); - - return error; -} - - /** @brief Instantiates temporary table @@ -21223,6 +21181,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
int join_init_read_record(JOIN_TAB *tab) { + bool need_unpacking= FALSE; + JOIN *join= tab->join; /* Note: the query plan tree for the below operations is constructed in save_agg_explain_data. @@ -21232,6 +21192,12 @@ int join_init_read_record(JOIN_TAB *tab) if (tab->filesort && tab->sort_table()) // Sort table. return 1;
+ if (join->top_join_tab_count != join->const_tables) + { + TABLE_LIST *tbl= tab->table->pos_in_table_list; + need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE; + } + tab->build_range_rowid_filter_if_needed();
DBUG_EXECUTE_IF("kill_join_init_read_record", @@ -21249,16 +21215,6 @@ int join_init_read_record(JOIN_TAB *tab) if (!tab->preread_init_done && tab->preread_init()) return 1;
- - if (init_read_record(&tab->read_record, tab->join->thd, tab->table, - tab->select, tab->filesort_result, 1,1, FALSE)) - return 1; - return tab->read_record.read_record(); -} - -int -join_read_record_no_init(JOIN_TAB *tab) -{ Copy_field *save_copy, *save_copy_end;
/* @@ -21268,12 +21224,20 @@ join_read_record_no_init(JOIN_TAB *tab) save_copy= tab->read_record.copy_field; save_copy_end= tab->read_record.copy_field_end;
- init_read_record(&tab->read_record, tab->join->thd, tab->table, - tab->select, tab->filesort_result, 1, 1, FALSE); + + if (init_read_record(&tab->read_record, tab->join->thd, tab->table, + tab->select, tab->filesort_result, 1, 1, FALSE)) + return 1;
tab->read_record.copy_field= save_copy; tab->read_record.copy_field_end= save_copy_end; - tab->read_record.read_record_func= rr_sequential_and_unpack; + + if (need_unpacking) + { + tab->read_record.read_record_func_and_unpack_calls= + tab->read_record.read_record_func; + tab->read_record.read_record_func = read_record_func_for_rr_and_unpack; + }
return tab->read_record.read_record(); } @@ -28981,6 +28945,19 @@ void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join, }
+/* + @brief + Unpacking temp table fields to base table fields. +*/ + +void JOIN_TAB::unpacking_to_base_table_fields() +{ + for (Copy_field *cp= read_record.copy_field; + cp != read_record.copy_field_end; cp++) + (*cp->do_copy)(cp); +} + + /** @} (end of group Query_Optimizer) */ diff --git a/sql/sql_select.h b/sql/sql_select.h index 4f7bf49f635..545d4a788cc 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -223,7 +223,7 @@ typedef enum_nested_loop_state (*Next_select_func)(JOIN *, struct st_join_table *, bool); Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab); int rr_sequential(READ_RECORD *info); -int rr_sequential_and_unpack(READ_RECORD *info); +int read_record_func_for_rr_and_unpack(READ_RECORD *info); Item *remove_pushed_top_conjuncts(THD *thd, Item *cond); Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond, COND_EQUAL **cond_eq, @@ -676,6 +676,7 @@ typedef struct st_join_table { table_map remaining_tables); bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables, bool is_const_table); + void unpacking_to_base_table_fields(); } JOIN_TAB;
@@ -2352,7 +2353,6 @@ create_virtual_tmp_table(THD *thd, Field *field)
int test_if_item_cache_changed(List<Cached_item> &list); int join_init_read_record(JOIN_TAB *tab); -int join_read_record_no_init(JOIN_TAB *tab); void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key); inline Item * and_items(THD *thd, Item* cond, Item *item) { diff --git a/sql/table.h b/sql/table.h index 1a7e5fbd4dc..35ba9bbb95d 100644 --- a/sql/table.h +++ b/sql/table.h @@ -2622,6 +2622,7 @@ struct TABLE_LIST */ const char *get_table_name() const { return view != NULL ? view_name.str : table_name.str; } bool is_active_sjm(); + bool is_sjm_scan_table(); bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); } st_select_lex_unit *get_unit(); st_select_lex *get_single_select(); _______________________________________________ commits mailing list commits@mariadb.org https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits
-- BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog