revision-id: 767ccf27c70b7fbc746dacfcba3ebe14c0539a86 (mariadb-10.5.11-56-g767ccf27c70) parent(s): 398387f0e638ee027406e72e3460fb1bf755b31d author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-08-03 16:54:05 +0300 message: MDEV-26301: LATERAL DERIVED refills the temp. table too many times Add support of the fix in the optimizer part. Also added @@optimizer_lateral_lazy_refill to control this. --- mysql-test/main/derived_split_innodb.result | 117 ++++++++++++++++++++++++++++ mysql-test/main/derived_split_innodb.test | 87 +++++++++++++++++++++ mysql-test/main/opt_trace.result | 2 +- sql/opt_split.cc | 116 ++++++++++++++++++++++++++- sql/sql_class.h | 1 + sql/sql_select.cc | 2 +- sql/sql_select.h | 4 +- sql/sys_vars.cc | 8 ++ 8 files changed, 331 insertions(+), 6 deletions(-) diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result index 9edf9a1f2ae..063a29d2366 100644 --- a/mysql-test/main/derived_split_innodb.result +++ b/mysql-test/main/derived_split_innodb.result @@ -241,3 +241,120 @@ set optimizer_switch='split_materialized=default'; set use_stat_tables=default; set optimizer_use_condition_selectivity=default; # End of 10.3 tests +# +# MDEV-26301: LATERAL DERIVED refills the temp. table too many times +# +create table t1(a int, b int); +insert into t1 select seq,seq from seq_1_to_5; +create table t2(a int, b int, key(a)); +insert into t2 +select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B; +create table t3(a int, b int, key(a)); +insert into t3 +select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B; +analyze table t1,t2,t3 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status Table is already up to date +test.t3 analyze status Engine-independent statistics collected +test.t3 analyze status Table is already up to date +explain +select * from +(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 +1 SIMPLE t2 ref a a 5 test.t1.b 2 Using where +1 SIMPLE t3 ref a a 5 test.t1.b 3 Using where +create table t10 ( +grp_id int, +col1 int, +key(grp_id) +); +insert into t10 +select +A.seq, +B.seq +from +seq_1_to_100 A, +seq_1_to_100 B; +create table t11 ( +col1 int, +col2 int +); +insert into t11 +select A.seq, A.seq from seq_1_to_10 A; +analyze table t10,t11 persistent for all; +Table Op Msg_type Msg_text +test.t10 analyze status Engine-independent statistics collected +test.t10 analyze status Table is already up to date +test.t11 analyze status Engine-independent statistics collected +test.t11 analyze status OK +set optimizer_trace=1; +explain +select * from +( +(t1 left join t2 on t2.a=t1.b) +left join t3 on t3.a=t1.b +) left join (select grp_id, count(*) +from t10 left join t11 on t11.col1=t10.col1 +group by grp_id) T on T.grp_id=t1.b; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 5 +1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where +1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where +2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 +2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +select +json_detailed(json_extract(trace, '$**.split_plan_choice')) +from +information_schema.optimizer_trace; +json_detailed(json_extract(trace, '$**.split_plan_choice')) +[ + + { + "unsplit_cost": 253440.0075, + "split_cost": 2535.968504, + "record_count_for_split": 30, + "split_chosen": true + } +] +select @@optimizer_lateral_lazy_refill; +@@optimizer_lateral_lazy_refill +0 +set @@optimizer_lateral_lazy_refill=1; +explain +select * from +( +(t1 left join t2 on t2.a=t1.b) +left join t3 on t3.a=t1.b +) left join (select grp_id, count(*) +from t10 left join t11 on t11.col1=t10.col1 +group by grp_id) T on T.grp_id=t1.b; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 5 +1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where +1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where +2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 +2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +select +json_detailed(json_extract(trace, '$**.split_plan_choice')) +from +information_schema.optimizer_trace; +json_detailed(json_extract(trace, '$**.split_plan_choice')) +[ + + { + "unsplit_cost": 253440.0075, + "split_cost": 2535.968504, + "record_count_for_split": 30, + "join_prefix_fanout_for_split": 5, + "split_chosen": true + } +] +set optimizer_trace=0; +set @@optimizer_lateral_lazy_refill=default; +drop table t1,t2,t3, t10, t11; diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test index bee9ef497b6..71a734b09df 100644 --- a/mysql-test/main/derived_split_innodb.test +++ b/mysql-test/main/derived_split_innodb.test @@ -193,3 +193,90 @@ set use_stat_tables=default; set optimizer_use_condition_selectivity=default; --echo # End of 10.3 tests + +--echo # +--echo # MDEV-26301: LATERAL DERIVED refills the temp. table too many times +--echo # + +# 5 values +create table t1(a int, b int); +insert into t1 select seq,seq from seq_1_to_5; + +# 5 value groups of size 2 each +create table t2(a int, b int, key(a)); +insert into t2 +select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B; + +# 5 value groups of size 3 each +create table t3(a int, b int, key(a)); +insert into t3 +select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B; + +analyze table t1,t2,t3 persistent for all; + +explain +select * from + (t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b; + +# Now, create tables for Groups. + +create table t10 ( + grp_id int, + col1 int, + key(grp_id) +); + +# 100 groups of 100 values each +insert into t10 +select + A.seq, + B.seq +from + seq_1_to_100 A, + seq_1_to_100 B; + +# and X10 multiplier + +create table t11 ( + col1 int, + col2 int +); +insert into t11 +select A.seq, A.seq from seq_1_to_10 A; + + +analyze table t10,t11 persistent for all; +set optimizer_trace=1; + +explain +select * from + ( + (t1 left join t2 on t2.a=t1.b) + left join t3 on t3.a=t1.b + ) left join (select grp_id, count(*) + from t10 left join t11 on t11.col1=t10.col1 + group by grp_id) T on T.grp_id=t1.b; +select + json_detailed(json_extract(trace, '$**.split_plan_choice')) +from + information_schema.optimizer_trace; + +select @@optimizer_lateral_lazy_refill; +set @@optimizer_lateral_lazy_refill=1; + +explain +select * from + ( + (t1 left join t2 on t2.a=t1.b) + left join t3 on t3.a=t1.b + ) left join (select grp_id, count(*) + from t10 left join t11 on t11.col1=t10.col1 + group by grp_id) T on T.grp_id=t1.b; +select + json_detailed(json_extract(trace, '$**.split_plan_choice')) +from + information_schema.optimizer_trace; + +set optimizer_trace=0; +set @@optimizer_lateral_lazy_refill=default; +drop table t1,t2,t3, t10, t11; diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index d4ba8eccb91..97d3b48ee85 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -8954,9 +8954,9 @@ json_detailed(json_extract(trace, '$**.lateral_derived_choice')) ], "split_plan_choice": { + "unsplit_cost": 25.72361682, "split_cost": 2.488945919, "record_count_for_split": 4, - "unsplit_cost": 25.72361682, "split_chosen": true }, "chosen_lateral_derived": diff --git a/sql/opt_split.cc b/sql/opt_split.cc index d482c2de2a4..a569c323792 100644 --- a/sql/opt_split.cc +++ b/sql/opt_split.cc @@ -876,6 +876,70 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start, } + +/* + given a table bitmap, find the prefix of positions array that covers the + tables in the bitmap. +*/ + +uint get_prefix_size(const POSITION *positions, uint max_position, table_map map) +{ + map= map & ~PSEUDO_TABLE_BITS; // remove OUTER_REF_TABLE_BIT + table_map covered= 0; + uint i; + + for (i=0; i < max_position; i++) + { + covered |= positions[i].table->table->map; + if (!(map & ~covered)) + break; + } + return i; +} + + +/* + Walk through the KEYUSE_EXT objects for given {table,key,key_part} and + find the shortest prefix of positions array that we have a keyuse object for. +*/ + +uint get_min_prefix_for_key_part(const POSITION *positions, uint max_prefix_size, + KEYUSE_EXT *keyuse) +{ + TABLE *table= keyuse->table; + uint key= keyuse->key; + uint keypart= keyuse->keypart; + uint prefix_size= max_prefix_size; + + while (keyuse->table == table && keyuse->key == key && + keyuse->keypart == keypart) + { + uint cur_size= get_prefix_size(positions, max_prefix_size, + keyuse->needed_in_prefix); + if (cur_size < prefix_size) + prefix_size= cur_size; + keyuse++; + } + return prefix_size; +} + + +/* + Get fanout of of a join prefix +*/ +double get_join_prefix_fanout(const POSITION *positions, uint prefix_size) +{ + double fanout = 1.0; + for (uint i=0; i <= prefix_size; i++) + { + if (positions[i].records_read > 1e-30) + fanout *= positions[i].records_read; + if (positions[i].cond_selectivity > 1e-30) + fanout *= positions[i].cond_selectivity; + } + return fanout; +} + /** @brief Choose the best splitting to extend the evaluated partial join @@ -906,7 +970,9 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start, if the plan has been chosen, NULL - otherwise. */ -SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, +SplM_plan_info * JOIN_TAB::choose_best_splitting(const POSITION *join_prefix, + uint top_prefix_size, + double record_count, table_map remaining_tables) { SplM_opt_info *spl_opt_info= table->spl_opt_info; @@ -922,6 +988,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, SplM_plan_info *spl_plan= 0; uint best_key= 0; uint best_key_parts= 0; + uint best_min_prefix_size; Json_writer_object spl_trace(thd, "lateral_derived_choice"); Json_writer_array trace_indexes(thd, "indexes_for_splitting"); @@ -942,6 +1009,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, uint key= keyuse_ext->key; KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext; key_part_map found_parts= 0; + uint prefix_len= 0; do { if (keyuse_ext->needed_in_prefix & remaining_tables) @@ -951,11 +1019,31 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, } if (!(keyuse_ext->keypart_map & found_parts)) { + /* + 1. found_parts is empty and this is the first key part we've found + 2. or, we are looking at the first keyuse object for this keypart, + and found_parts has a bit set for the previous key part. + */ if ((!found_parts && !keyuse_ext->keypart) || (found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts))) + { + if (thd->variables.optimizer_lateral_lazy_refill > 0) + { + uint min_for_kp= get_min_prefix_for_key_part(join_prefix, + top_prefix_size, + keyuse_ext); + // The prefix length needed for {kp1, kp2, ...} is max of the + // needed prefix length for each key part. + if (min_for_kp > prefix_len) + prefix_len= min_for_kp; + } + found_parts|= keyuse_ext->keypart_map; + } else { + // This is a new key part but it doesn't form a continuous + // index prefix. Skip all KEYUSEs for this index. do { keyuse_ext++; @@ -981,6 +1069,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, best_key_parts= keyuse_ext->keypart + 1; best_rec_per_key= rec_per_key; best_key_keyuse_ext_start= key_keyuse_ext_start; + if (thd->variables.optimizer_lateral_lazy_refill > 0) + { + best_min_prefix_size= prefix_len; + trace.add("min_prefix_len", (longlong)prefix_len); + } } keyuse_ext++; } @@ -1069,10 +1162,21 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, if (spl_plan) { Json_writer_object choice(thd, "split_plan_choice"); + choice.add("unsplit_cost", spl_opt_info->unsplit_cost); + choice.add("split_cost", spl_plan->cost); choice.add("record_count_for_split", record_count); - choice.add("unsplit_cost", spl_opt_info->unsplit_cost); - if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01) + + // psergey-todo: best_min_prefix_size allows us to compute a more tight + // bound than record_count. + double fanout= record_count; + if (thd->variables.optimizer_lateral_lazy_refill > 0) + { + fanout= get_join_prefix_fanout(join_prefix, best_min_prefix_size); + choice.add("join_prefix_fanout_for_split", fanout); + } + + if(fanout * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01) { /* The best plan that employs splitting is cheaper than @@ -1134,12 +1238,14 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables) List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list; List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields); KEY_FIELD *added_key_field; + table_map needed_tables= 0; while ((added_key_field= li++)) { if (remaining_tables & added_key_field->val->used_tables()) continue; if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root)) return true; + needed_tables |= added_key_field->val->used_tables(); } DBUG_ASSERT(inj_cond_list->elements); switch (inj_cond_list->elements) { @@ -1156,6 +1262,10 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables) if (inject_cond_into_where(inj_cond)) return true; + // psergey-todo: walk the join tabs until we are satisfying needed_tables... + // and inject the check at that point. + // Other solution is to just add a cache... + select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED; st_select_lex_unit *unit= select_lex->master_unit(); unit->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED; diff --git a/sql/sql_class.h b/sql/sql_class.h index 62ce5c8a2d2..8964a8aa4f7 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -689,6 +689,7 @@ typedef struct system_variables ulong optimizer_search_depth; ulong optimizer_selectivity_sampling_limit; ulong optimizer_use_condition_selectivity; + ulong optimizer_lateral_lazy_refill; ulong use_stat_tables; double sample_percentage; ulong histogram_size; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 7057ed1b5e1..5b901df2e54 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7455,7 +7455,7 @@ best_access_path(JOIN *join, loose_scan_opt.init(join, s, remaining_tables); if (s->table->is_splittable()) - spl_plan= s->choose_best_splitting(record_count, remaining_tables); + spl_plan= s->choose_best_splitting(join_positions, idx, record_count, remaining_tables); Json_writer_array trace_paths(thd, "considered_access_paths"); if (s->keyuse) diff --git a/sql/sql_select.h b/sql/sql_select.h index 29e42ff8ef8..7ffab774248 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -682,7 +682,9 @@ typedef struct st_join_table { void partial_cleanup(); void add_keyuses_for_splitting(); - SplM_plan_info *choose_best_splitting(double record_count, + SplM_plan_info *choose_best_splitting(const POSITION *join_prefix, + uint top_prefix_size, + double record_count, table_map remaining_tables); bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables, bool is_const_table); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index a8e4ff2dded..e87d371453d 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2672,6 +2672,14 @@ static Sys_var_ulong Sys_optimizer_use_condition_selectivity( SESSION_VAR(optimizer_use_condition_selectivity), CMD_LINE(REQUIRED_ARG), VALID_RANGE(1, 5), DEFAULT(4), BLOCK_SIZE(1)); +static Sys_var_ulong Sys_optimizer_lateral_lazy_refill( + "optimizer_lateral_lazy_refill", + "Controls Lazy Lateral Table Refill fix. 0 means disabled, " + "1 means enabled in the optimizer. Higher values are reserved " + "for future use.", + SESSION_VAR(optimizer_lateral_lazy_refill), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, 2), DEFAULT(0), BLOCK_SIZE(1)); + static Sys_var_ulong Sys_optimizer_search_depth( "optimizer_search_depth", "Maximum depth of search performed by the query optimizer. Values "