[Commits] 44f4f3ebbd0: MDEV-26301: LATERAL DERIVED refills the temp. table too many times
revision-id: 44f4f3ebbd0ab0208ff592f941f329173465e08c (mariadb-10.5.4-837-g44f4f3ebbd0) parent(s): 4ba4c06038165726dad6a18d85c2b54446ea5ef8 author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-08-08 19:56:05 +0300 message: MDEV-26301: LATERAL DERIVED refills the temp. table too many times Do remember and check LATERAL DERIVED table's "parameters". If the values of parameters have not changed, do not refill the derived table. Take this into account at optimization phase: compute the shortest join prefix $SP that LATERAL DERIVED table depends on, then use $SP's output cardinality as an upper bound of #of times the LATERAL DERIVED table will be refilled. The optimization is controlled by @@optimizer_switch flag named split_materialized_is_lazy and is disabled by default. --- mysql-test/main/derived_split_innodb.result | 347 ++++++++++++++++++++++++++++ mysql-test/main/derived_split_innodb.test | 168 ++++++++++++++ mysql-test/main/mysqld--help.result | 3 +- mysql-test/main/opt_trace.result | 2 +- sql/opt_split.cc | 192 ++++++++++++++- sql/sql_derived.cc | 9 + sql/sql_lex.cc | 1 + sql/sql_lex.h | 3 + sql/sql_priv.h | 1 + sql/sql_select.cc | 5 +- sql/sql_select.h | 28 ++- sql/sql_union.cc | 1 + sql/sys_vars.cc | 1 + 13 files changed, 751 insertions(+), 10 deletions(-) diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result index 9edf9a1f2ae..5e54c0151f0 100644 --- a/mysql-test/main/derived_split_innodb.result +++ b/mysql-test/main/derived_split_innodb.result @@ -241,3 +241,350 @@ set optimizer_switch='split_materialized=default'; set use_stat_tables=default; set optimizer_use_condition_selectivity=default; # End of 10.3 tests +# +# MDEV-26301: LATERAL DERIVED refills the temp. table too many times +# +create table t1(a int, b int); +insert into t1 select seq,seq from seq_1_to_5; +create table t2(a int, b int, key(a)); +insert into t2 +select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B; +create table t3(a int, b int, key(a)); +insert into t3 +select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B; +analyze table t1,t2,t3 persistent for all; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +test.t2 analyze status Engine-independent statistics collected +test.t2 analyze status Table is already up to date +test.t3 analyze status Engine-independent statistics collected +test.t3 analyze status Table is already up to date +explain +select * from +(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 5 +1 SIMPLE t2 ref a a 5 test.t1.b 2 Using where +1 SIMPLE t3 ref a a 5 test.t1.b 3 Using where +create table t10 ( +grp_id int, +col1 int, +key(grp_id) +); +insert into t10 +select +A.seq, +B.seq +from +seq_1_to_100 A, +seq_1_to_100 B; +create table t11 ( +col1 int, +col2 int +); +insert into t11 +select A.seq, A.seq from seq_1_to_10 A; +analyze table t10,t11 persistent for all; +Table Op Msg_type Msg_text +test.t10 analyze status Engine-independent statistics collected +test.t10 analyze status Table is already up to date +test.t11 analyze status Engine-independent statistics collected +test.t11 analyze status OK +set optimizer_trace=1; +explain +select * from +( +(t1 left join t2 on t2.a=t1.b) +left join t3 on t3.a=t1.b +) left join (select grp_id, count(*) +from t10 left join t11 on t11.col1=t10.col1 +group by grp_id) T on T.grp_id=t1.b; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 5 +1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where +1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where +2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 +2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +select +json_detailed(json_extract(trace, '$**.split_plan_choice')) +from +information_schema.optimizer_trace; +json_detailed(json_extract(trace, '$**.split_plan_choice')) +[ + + { + "unsplit_cost": 253440.0075, + "split_cost": 2535.968504, + "record_count_for_split": 30, + "split_chosen": true + } +] +set @tmp_26301=@@optimizer_switch; +set optimizer_switch='split_materialized_is_lazy=on'; +explain +select * from +( +(t1 left join t2 on t2.a=t1.b) +left join t3 on t3.a=t1.b +) left join (select grp_id, count(*) +from t10 left join t11 on t11.col1=t10.col1 +group by grp_id) T on T.grp_id=t1.b; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 5 +1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where +1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where +2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 +2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +select +json_detailed(json_extract(trace, '$**.split_plan_choice')) +from +information_schema.optimizer_trace; +json_detailed(json_extract(trace, '$**.split_plan_choice')) +[ + + { + "unsplit_cost": 253440.0075, + "split_cost": 2535.968504, + "record_count_for_split": 30, + "join_prefix_fanout_for_split": 5, + "split_chosen": true + } +] +# The important part in the below output is: +# "lateral": 1, +# "query_block": { +# "select_id": 2, +# "r_loops": 5, <-- must be 5, not 30. +analyze format=json +select * from +( +(t1 left join t2 on t2.a=t1.b) +left join t3 on t3.a=t1.b +) left join (select grp_id, count(*) +from t10 left join t11 on t11.col1=t10.col1 +group by grp_id) T on T.grp_id=t1.b; +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "const_condition": "1", + "table": { + "table_name": "t1", + "access_type": "ALL", + "r_loops": 1, + "rows": 5, + "r_rows": 5, + "r_table_time_ms": "REPLACED", + "r_other_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100 + }, + "table": { + "table_name": "t2", + "access_type": "ref", + "possible_keys": ["a"], + "key": "a", + "key_length": "5", + "used_key_parts": ["a"], + "ref": ["test.t1.b"], + "r_loops": 5, + "rows": 2, + "r_rows": 2, + "r_table_time_ms": "REPLACED", + "r_other_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100, + "attached_condition": "trigcond(trigcond(t1.b is not null))" + }, + "table": { + "table_name": "t3", + "access_type": "ref", + "possible_keys": ["a"], + "key": "a", + "key_length": "5", + "used_key_parts": ["a"], + "ref": ["test.t1.b"], + "r_loops": 10, + "rows": 3, + "r_rows": 3, + "r_table_time_ms": "REPLACED", + "r_other_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100, + "attached_condition": "trigcond(trigcond(t1.b is not null))" + }, + "table": { + "table_name": "<derived2>", + "access_type": "ref", + "possible_keys": ["key0"], + "key": "key0", + "key_length": "5", + "used_key_parts": ["grp_id"], + "ref": ["test.t1.b"], + "r_loops": 30, + "rows": 10, + "r_rows": 1, + "r_table_time_ms": "REPLACED", + "r_other_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100, + "attached_condition": "trigcond(trigcond(t1.b is not null))", + "materialized": { + "lateral": 1, + "query_block": { + "select_id": 2, + "r_loops": 5, + "r_total_time_ms": "REPLACED", + "outer_ref_condition": "t1.b is not null", + "table": { + "table_name": "t10", + "access_type": "ref", + "possible_keys": ["grp_id"], + "key": "grp_id", + "key_length": "5", + "used_key_parts": ["grp_id"], + "ref": ["test.t1.b"], + "r_loops": 5, + "rows": 100, + "r_rows": 100, + "r_table_time_ms": "REPLACED", + "r_other_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100 + }, + "block-nl-join": { + "table": { + "table_name": "t11", + "access_type": "ALL", + "r_loops": 5, + "rows": 10, + "r_rows": 10, + "r_table_time_ms": "REPLACED", + "r_other_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100 + }, + "buffer_type": "flat", + "buffer_size": "1Kb", + "join_type": "BNL", + "attached_condition": "trigcond(t11.col1 = t10.col1)", + "r_filtered": 10 + } + } + } + } + } +} +create table t21 (pk int primary key); +insert into t21 values (1),(2),(3); +create table t22 (pk int primary key); +insert into t22 values (1),(2),(3); +explain +select * from +t21, t22, +( +(t1 left join t2 on t2.a=t1.b) +left join t3 on t3.a=t1.b +) left join (select grp_id, count(*) +from t10 left join t11 on t11.col1=t10.col1 +group by grp_id) T on T.grp_id=t1.b +where +t21.pk=1 and t22.pk=2; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index +1 PRIMARY t22 const PRIMARY PRIMARY 4 const 1 Using index +1 PRIMARY t1 ALL NULL NULL NULL NULL 5 +1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where +1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where +2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 +2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +select +json_detailed(json_extract(trace, '$**.split_plan_choice')) +from +information_schema.optimizer_trace; +json_detailed(json_extract(trace, '$**.split_plan_choice')) +[ + + { + "unsplit_cost": 253440.0075, + "split_cost": 2535.968504, + "record_count_for_split": 30, + "join_prefix_fanout_for_split": 5, + "split_chosen": true + } +] +explain +select * from +t21, +( +(t1 left join t2 on t2.a=t1.b) +left join t3 on t3.a=t1.b +) left join (select grp_id, count(*) +from +t22 join t10 left join t11 on t11.col1=t10.col1 +where +t22.pk=1 +group by grp_id) T on T.grp_id=t1.b +where +t21.pk=1; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index +1 PRIMARY t1 ALL NULL NULL NULL NULL 5 +1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where +1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where +2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index +2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100 +2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +select +json_detailed(json_extract(trace, '$**.split_plan_choice')) +from +information_schema.optimizer_trace; +json_detailed(json_extract(trace, '$**.split_plan_choice')) +[ + + { + "unsplit_cost": 253440.0075, + "split_cost": 2535.968504, + "record_count_for_split": 30, + "join_prefix_fanout_for_split": 5, + "split_chosen": true + } +] +create table t5 ( +pk int primary key +); +insert into t5 select seq from seq_1_to_1000; +explain +select * from +t21, +( +(((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b) +left join t3 on t3.a=t1.b +) left join (select grp_id, count(*) +from +t22 join t10 left join t11 on t11.col1=t10.col1 +where +t22.pk=1 +group by grp_id) T on T.grp_id=t1.b +where +t21.pk=1; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index +1 PRIMARY t1 ALL NULL NULL NULL NULL 5 Using where +1 PRIMARY t5 eq_ref PRIMARY PRIMARY 4 test.t1.b 1 Using index +1 PRIMARY t2 ref a a 5 test.t1.b 2 +1 PRIMARY t3 ref a a 5 test.t1.b 3 +1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where +2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index +2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t5.pk 100 +2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join) +set optimizer_trace=0; +set optimizer_switch=@tmp_26301; +drop table t1,t2,t3,t5, t10, t11, t21, t22; diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test index bee9ef497b6..1776a88ba05 100644 --- a/mysql-test/main/derived_split_innodb.test +++ b/mysql-test/main/derived_split_innodb.test @@ -193,3 +193,171 @@ set use_stat_tables=default; set optimizer_use_condition_selectivity=default; --echo # End of 10.3 tests + +--echo # +--echo # MDEV-26301: LATERAL DERIVED refills the temp. table too many times +--echo # + +# 5 values +create table t1(a int, b int); +insert into t1 select seq,seq from seq_1_to_5; + +# 5 value groups of size 2 each +create table t2(a int, b int, key(a)); +insert into t2 +select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B; + +# 5 value groups of size 3 each +create table t3(a int, b int, key(a)); +insert into t3 +select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B; + +analyze table t1,t2,t3 persistent for all; + +explain +select * from + (t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b; + +# Now, create tables for Groups. + +create table t10 ( + grp_id int, + col1 int, + key(grp_id) +); + +# 100 groups of 100 values each +insert into t10 +select + A.seq, + B.seq +from + seq_1_to_100 A, + seq_1_to_100 B; + +# and X10 multiplier + +create table t11 ( + col1 int, + col2 int +); +insert into t11 +select A.seq, A.seq from seq_1_to_10 A; + + +analyze table t10,t11 persistent for all; +set optimizer_trace=1; + +explain +select * from + ( + (t1 left join t2 on t2.a=t1.b) + left join t3 on t3.a=t1.b + ) left join (select grp_id, count(*) + from t10 left join t11 on t11.col1=t10.col1 + group by grp_id) T on T.grp_id=t1.b; +select + json_detailed(json_extract(trace, '$**.split_plan_choice')) +from + information_schema.optimizer_trace; + +set @tmp_26301=@@optimizer_switch; +set optimizer_switch='split_materialized_is_lazy=on'; + +explain +select * from + ( + (t1 left join t2 on t2.a=t1.b) + left join t3 on t3.a=t1.b + ) left join (select grp_id, count(*) + from t10 left join t11 on t11.col1=t10.col1 + group by grp_id) T on T.grp_id=t1.b; +select + json_detailed(json_extract(trace, '$**.split_plan_choice')) +from + information_schema.optimizer_trace; + +--echo # The important part in the below output is: +--echo # "lateral": 1, +--echo # "query_block": { +--echo # "select_id": 2, +--echo # "r_loops": 5, <-- must be 5, not 30. +--source include/analyze-format.inc +analyze format=json +select * from + ( + (t1 left join t2 on t2.a=t1.b) + left join t3 on t3.a=t1.b + ) left join (select grp_id, count(*) + from t10 left join t11 on t11.col1=t10.col1 + group by grp_id) T on T.grp_id=t1.b; + +create table t21 (pk int primary key); +insert into t21 values (1),(2),(3); + +create table t22 (pk int primary key); +insert into t22 values (1),(2),(3); + +# Same as above but throw in a couple of const tables. +explain +select * from + t21, t22, + ( + (t1 left join t2 on t2.a=t1.b) + left join t3 on t3.a=t1.b + ) left join (select grp_id, count(*) + from t10 left join t11 on t11.col1=t10.col1 + group by grp_id) T on T.grp_id=t1.b +where + t21.pk=1 and t22.pk=2; + +select + json_detailed(json_extract(trace, '$**.split_plan_choice')) +from + information_schema.optimizer_trace; + +explain +select * from + t21, + ( + (t1 left join t2 on t2.a=t1.b) + left join t3 on t3.a=t1.b + ) left join (select grp_id, count(*) + from + t22 join t10 left join t11 on t11.col1=t10.col1 + where + t22.pk=1 + group by grp_id) T on T.grp_id=t1.b +where + t21.pk=1; + +select + json_detailed(json_extract(trace, '$**.split_plan_choice')) +from + information_schema.optimizer_trace; + +# And also add a non-const table + +create table t5 ( + pk int primary key + ); +insert into t5 select seq from seq_1_to_1000; + +explain +select * from + t21, + ( + (((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b) + left join t3 on t3.a=t1.b + ) left join (select grp_id, count(*) + from + t22 join t10 left join t11 on t11.col1=t10.col1 + where + t22.pk=1 + group by grp_id) T on T.grp_id=t1.b +where + t21.pk=1; + +set optimizer_trace=0; +set optimizer_switch=@tmp_26301; +drop table t1,t2,t3,t5, t10, t11, t21, t22; diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 78616ae7c9d..6a38d720493 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -718,7 +718,8 @@ The following specify which files/extra groups are read (specified before remain extended_keys, exists_to_in, orderby_uses_equalities, condition_pushdown_for_derived, split_materialized, condition_pushdown_for_subquery, rowid_filter, - condition_pushdown_from_having, not_null_range_scan + condition_pushdown_from_having, not_null_range_scan, + split_materialized_is_lazy --optimizer-trace=name Controls tracing of the Optimizer: optimizer_trace=option=val[,option=val...], where option diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result index d4ba8eccb91..97d3b48ee85 100644 --- a/mysql-test/main/opt_trace.result +++ b/mysql-test/main/opt_trace.result @@ -8954,9 +8954,9 @@ json_detailed(json_extract(trace, '$**.lateral_derived_choice')) ], "split_plan_choice": { + "unsplit_cost": 25.72361682, "split_cost": 2.488945919, "record_count_for_split": 4, - "unsplit_cost": 25.72361682, "split_chosen": true }, "chosen_lateral_derived": diff --git a/sql/opt_split.cc b/sql/opt_split.cc index d482c2de2a4..7157caa248e 100644 --- a/sql/opt_split.cc +++ b/sql/opt_split.cc @@ -876,11 +876,92 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start, } + +/* + @brief + Find shortest join order sub-prefix that covers the specified table_map. + + @detail + Given a join prefix and a bitmap of table(s) in table_map, find the + shortest prefix of the prefix that covers the tables in the table_map. +*/ + +uint get_prefix_size(const POSITION *positions, uint max_pos, table_map map) +{ + map= map & ~PSEUDO_TABLE_BITS; // remove OUTER_REF_TABLE_BIT + table_map covered= 0; + uint i; + + for (i=0; i < max_pos; i++) + { + covered |= positions[i].table->table->map; + if (!(map & ~covered)) + break; + } + return i; +} + + +/* + @brief + Find the shortest join prefix that has a KEYUSE_EXT for a given key_part. + + @detail + Given an array of KEYUSE objects for access to {table, key, key_part}, + and a join order prefix (specified by array of POSITION structs), + find the shortest prefix for which there is a KEYUSE_EXT object describin + the access to they table.key.key_part + + @note + KEYUSE_EXT objects are from inside the derived table, while the join + prefix is from the parent JOIN, so we need to use + KEYUSE_EXT::needed_in_prefix +*/ + +uint get_min_prefix_for_key_part(const POSITION *positions, uint max_prefix_size, + KEYUSE_EXT *keyuse) +{ + TABLE *table= keyuse->table; + uint key= keyuse->key; + uint keypart= keyuse->keypart; + uint prefix_size= max_prefix_size; + + while (keyuse->table == table && keyuse->key == key && + keyuse->keypart == keypart) + { + uint cur_size= get_prefix_size(positions, max_prefix_size, + keyuse->needed_in_prefix); + if (cur_size < prefix_size) + prefix_size= cur_size; + keyuse++; + } + return prefix_size; +} + + +/* + Get fanout of of a join prefix +*/ +double get_join_prefix_fanout(const POSITION *positions, uint prefix_size) +{ + double fanout = 1.0; + for (uint i=0; i <= prefix_size; i++) + { + if (positions[i].records_read > 1e-30) + fanout *= positions[i].records_read; + if (positions[i].cond_selectivity > 1e-30) + fanout *= positions[i].cond_selectivity; + } + return fanout; +} + /** @brief Choose the best splitting to extend the evaluated partial join @param + join_prefix Join order prefix built so far + prefix_size Length of the join prefix record_count estimated cardinality of the extended partial join remaining_tables tables not joined yet @@ -906,7 +987,9 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start, if the plan has been chosen, NULL - otherwise. */ -SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, +SplM_plan_info * JOIN_TAB::choose_best_splitting(const POSITION *join_prefix, + uint prefix_size, + double record_count, table_map remaining_tables) { SplM_opt_info *spl_opt_info= table->spl_opt_info; @@ -922,6 +1005,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, SplM_plan_info *spl_plan= 0; uint best_key= 0; uint best_key_parts= 0; + uint best_min_prefix_size; Json_writer_object spl_trace(thd, "lateral_derived_choice"); Json_writer_array trace_indexes(thd, "indexes_for_splitting"); @@ -942,6 +1026,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, uint key= keyuse_ext->key; KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext; key_part_map found_parts= 0; + uint prefix_len= 0; do { if (keyuse_ext->needed_in_prefix & remaining_tables) @@ -951,11 +1036,40 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, } if (!(keyuse_ext->keypart_map & found_parts)) { + /* + Check for + 1. found_parts is empty and this is the first key part we've found + 2. or, we are looking at the first keyuse object for this keypart, + and found_parts has a bit set for the previous key part. + */ if ((!found_parts && !keyuse_ext->keypart) || (found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts))) + { + if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY)) + { + /* + Find the shortest join prefix we'll need to get a lookup value + for this keypart read this key part + */ + uint min_for_kp= get_min_prefix_for_key_part(join_prefix, + prefix_size, + keyuse_ext); + /* + The join prefix needed to read all keyparts is the longest + prefix of all key parts. + */ + if (min_for_kp > prefix_len) + prefix_len= min_for_kp; + } + found_parts|= keyuse_ext->keypart_map; + } else { + /* + This is a new key part but it doesn't form a continuous index + prefix. Skip all KEYUSEs for this index. + */ do { keyuse_ext++; @@ -981,6 +1095,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, best_key_parts= keyuse_ext->keypart + 1; best_rec_per_key= rec_per_key; best_key_keyuse_ext_start= key_keyuse_ext_start; + if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY)) + { + best_min_prefix_size= prefix_len; + trace.add("min_prefix_len", (longlong)prefix_len); + } } keyuse_ext++; } @@ -1069,10 +1188,29 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, if (spl_plan) { Json_writer_object choice(thd, "split_plan_choice"); + choice.add("unsplit_cost", spl_opt_info->unsplit_cost); + choice.add("split_cost", spl_plan->cost); choice.add("record_count_for_split", record_count); - choice.add("unsplit_cost", spl_opt_info->unsplit_cost); - if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01) + + double fanout= record_count; + if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY)) + { + /* + The LATERAL DERIVED table will only be refilled if the values it + refers to change. + The values refer only to the prefix of the join prefix so far. + Adjust the number of LATERAL DERIVED refills accordingly. + */ + double prefix_fanout= get_join_prefix_fanout(join_prefix, + best_min_prefix_size); + if (prefix_fanout < fanout) + fanout= prefix_fanout; + + choice.add("join_prefix_fanout_for_split", fanout); + } + + if (fanout * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01) { /* The best plan that employs splitting is cheaper than @@ -1112,6 +1250,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, @param remaining_tables used to filter out the equalities that cannot be pushed. + lparams if non-NULL, call lparams->add() to collect a list + of the "parameter" items for the lateral subquery. + The list will be used to reduce the number of times + the lateral derived table is refilled. @details This function is called by JOIN_TAB::fix_splitting that is used @@ -1128,18 +1270,23 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count, true on failure */ -bool JOIN::inject_best_splitting_cond(table_map remaining_tables) +bool JOIN::inject_best_splitting_cond(table_map remaining_tables, + Lateral_derived_parameters *lparams) { Item *inj_cond= 0; List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list; List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields); KEY_FIELD *added_key_field; + table_map needed_tables= 0; while ((added_key_field= li++)) { if (remaining_tables & added_key_field->val->used_tables()) continue; + if (lparams && lparams->add_param(added_key_field->val)) + return true; if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root)) return true; + needed_tables |= added_key_field->val->used_tables(); } DBUG_ASSERT(inj_cond_list->elements); switch (inj_cond_list->elements) { @@ -1230,8 +1377,15 @@ bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan, memcpy((char *) md_join->best_positions, (char *) spl_plan->best_positions, sizeof(POSITION) * md_join->table_count); - if (md_join->inject_best_splitting_cond(remaining_tables)) + + Lateral_derived_parameters *lparams= NULL; + if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY)) + lparams= new Lateral_derived_parameters; + + if (md_join->inject_best_splitting_cond(remaining_tables, lparams)) return true; + + table->pos_in_table_list->get_unit()->lateral_derived_params= lparams; /* This is called for a proper work of JOIN::get_best_combination() called for the join that materializes T @@ -1283,3 +1437,31 @@ bool JOIN::fix_all_splittings_in_plan() } return false; } + + +bool Lateral_derived_parameters::add_param(Item *item) +{ + Cached_item *tmp; + if (!(tmp= new_Cached_item(current_thd, item, FALSE)) || + list.push_back(tmp)) + return true; + + return false; +} + + +/* + @brief + Test if LATERAL DERIVED table's parameters changed since last invocation + + @return + false Parameter values have not changed since last invocation + true Parameter values have changed since last invocation. Current values + of the parameters are now saved. +*/ +bool Lateral_derived_parameters::need_refill() +{ + if (test_if_group_changed(list) == -1) + return false; // not changed + return true; +} diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index f8ee3475af8..25524c90da5 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -1233,6 +1233,15 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived) DBUG_RETURN(res); } + /* + For LATERAL DERIVED tables, check if we actually need to refill it. If its + "parameters" (i.e. items it refers to) have the same values, we don't need + to refill. + */ + if (unit->lateral_derived_params && + !unit->lateral_derived_params->need_refill()) + DBUG_RETURN(false); + if (unit->executed && !derived_is_recursive && (unit->uncacheable & UNCACHEABLE_DEPENDENT)) { diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index b9ca67182cf..5b3897b28fa 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -2915,6 +2915,7 @@ void st_select_lex_unit::init_query() columns_are_renamed= false; with_wrapped_tvc= false; have_except_all_or_intersect_all= false; + lateral_derived_params= NULL; } void st_select_lex::init_query() diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 45a3bf72fa4..278ee7d5276 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -845,6 +845,7 @@ class JOIN; class select_unit; class Procedure; class Explain_query; +class Lateral_derived_parameters; void delete_explain_query(LEX *lex); void create_explain_query(LEX *lex, MEM_ROOT *mem_root); @@ -964,6 +965,8 @@ class st_select_lex_unit: public st_select_lex_node { bool columns_are_renamed; + Lateral_derived_parameters *lateral_derived_params; + void init_query(); st_select_lex* outer_select(); const st_select_lex* first_select() const diff --git a/sql/sql_priv.h b/sql/sql_priv.h index 07f07a7150f..7cdb6a62d08 100644 --- a/sql/sql_priv.h +++ b/sql/sql_priv.h @@ -234,6 +234,7 @@ #define OPTIMIZER_SWITCH_USE_ROWID_FILTER (1ULL << 33) #define OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING (1ULL << 34) #define OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN (1ULL << 35) +#define OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY (1ULL << 36) #define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \ OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \ diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 39e224ab024..85c3355cf08 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7460,7 +7460,10 @@ best_access_path(JOIN *join, loose_scan_opt.init(join, s, remaining_tables); if (s->table->is_splittable()) - spl_plan= s->choose_best_splitting(record_count, remaining_tables); + { + spl_plan= s->choose_best_splitting(join_positions, idx, record_count, + remaining_tables); + } Json_writer_array trace_paths(thd, "considered_access_paths"); if (s->keyuse) diff --git a/sql/sql_select.h b/sql/sql_select.h index 29e42ff8ef8..2a9cb5464e6 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -249,6 +249,7 @@ class AGGR_OP; class Filesort; struct SplM_plan_info; class SplM_opt_info; +class Lateral_derived_parameters; typedef struct st_join_table { TABLE *table; @@ -682,7 +683,9 @@ typedef struct st_join_table { void partial_cleanup(); void add_keyuses_for_splitting(); - SplM_plan_info *choose_best_splitting(double record_count, + SplM_plan_info *choose_best_splitting(const POSITION *join_prefix, + uint top_prefix_size, + double record_count, table_map remaining_tables); bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables, bool is_const_table); @@ -1795,7 +1798,8 @@ class JOIN :public Sql_alloc bool inject_cond_into_where(Item *injected_cond); bool check_for_splittable_materialized(); void add_keyuses_for_splitting(); - bool inject_best_splitting_cond(table_map remaining_tables); + bool inject_best_splitting_cond(table_map remaining_tables, + Lateral_derived_parameters *params); bool fix_all_splittings_in_plan(); void make_notnull_conds_for_range_scans(); @@ -2532,4 +2536,24 @@ void propagate_new_equalities(THD *thd, Item *cond, COND_EQUAL *inherited, bool *is_simplifiable_cond); +/* + A list of parameters for a derived table that uses LATERAL DERIVED + optimization. + + The last values of the parameters are saved, which one can use to check + whether + - they've changed and one needs to refill the LATERAL DERIVED table + - they are not changed and refill is not needed. +*/ +class Lateral_derived_parameters : public Sql_alloc +{ + List<Cached_item> list; +public: + // Construction interface + bool add_param(Item *item); + + // Execution interface + bool need_refill(); +}; + #endif /* SQL_SELECT_INCLUDED */ diff --git a/sql/sql_union.cc b/sql/sql_union.cc index b88d78c0db3..a0a89b1e401 100644 --- a/sql/sql_union.cc +++ b/sql/sql_union.cc @@ -2588,6 +2588,7 @@ bool st_select_lex_unit::cleanup() } columns_are_renamed= false; cleaned= 1; + lateral_derived_params= NULL; for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select()) error|= sl->cleanup(); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 2f7cf290dcd..725f407fc3b 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2717,6 +2717,7 @@ export const char *optimizer_switch_names[]= "rowid_filter", "condition_pushdown_from_having", "not_null_range_scan", + "split_materialized_is_lazy", "default", NullS };
participants (1)
-
psergey