revision-id: 58d6489885b9459f6c0d71c94ec9f60cac893f2e (mariadb-10.5.11-59-g58d6489885b)
parent(s): c03949f15325c331a943a15465adf74487cad91b
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-03 19:09:49 +0300
message:
Update test results 2
---
mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
index 3498d5de743..d0012bda4b3 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
@@ -2392,6 +2392,16 @@ NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT REQUIRED
+VARIABLE_NAME OPTIMIZER_LATERAL_LAZY_REFILL
+VARIABLE_SCOPE SESSION
+VARIABLE_TYPE BIGINT UNSIGNED
+VARIABLE_COMMENT Controls Lazy Lateral Table Refill fix. 0 means disabled, 1 means enabled in the optimizer. Higher values are reserved for future use.
+NUMERIC_MIN_VALUE 0
+NUMERIC_MAX_VALUE 2
+NUMERIC_BLOCK_SIZE 1
+ENUM_VALUE_LIST NULL
+READ_ONLY NO
+COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_MAX_SEL_ARG_WEIGHT
VARIABLE_SCOPE SESSION
VARIABLE_TYPE BIGINT UNSIGNED
1
0
revision-id: c03949f15325c331a943a15465adf74487cad91b (mariadb-10.5.11-58-gc03949f1532)
parent(s): 3216601d4a56cd6db30cc1a1db629dd73086df4b
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-03 17:27:16 +0300
message:
Update test results
---
mysql-test/main/mysqld--help.result | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result
index 7b0ce27ead3..6872adbcfba 100644
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@@ -681,6 +681,10 @@ The following specify which files/extra groups are read (specified before remain
max_connections*5 or max_connections + table_cache*2
(whichever is larger) number of file descriptors
(Automatically configured unless set explicitly)
+ --optimizer-lateral-lazy-refill=#
+ Controls Lazy Lateral Table Refill fix. 0 means disabled,
+ 1 means enabled in the optimizer. Higher values are
+ reserved for future use.
--optimizer-max-sel-arg-weight=#
The maximum weight of the SEL_ARG graph. Set to 0 for no
limit
@@ -1640,6 +1644,7 @@ old-alter-table DEFAULT
old-mode
old-passwords FALSE
old-style-user-limits FALSE
+optimizer-lateral-lazy-refill 0
optimizer-max-sel-arg-weight 32000
optimizer-prune-level 1
optimizer-search-depth 62
1
0
revision-id: 3216601d4a56cd6db30cc1a1db629dd73086df4b (mariadb-10.5.11-57-g3216601d4a5)
parent(s): 767ccf27c70b7fbc746dacfcba3ebe14c0539a86
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-03 17:10:19 +0300
message:
MDEV-26301: more test coverage
---
mysql-test/main/derived_split_innodb.result | 79 ++++++++++++++++++++++++++++-
mysql-test/main/derived_split_innodb.test | 46 ++++++++++++++++-
2 files changed, 123 insertions(+), 2 deletions(-)
diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 063a29d2366..0392b806c7b 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -355,6 +355,83 @@ json_detailed(json_extract(trace, '$**.split_plan_choice'))
"split_chosen": true
}
]
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+explain
+select * from
+t21, t22,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1 and t22.pk=2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t22 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+explain
+select * from
+t21,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t21 const PRIMARY PRIMARY 4 const 1 Using index
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t22 const PRIMARY PRIMARY 4 const 1 Using index
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
set optimizer_trace=0;
set @@optimizer_lateral_lazy_refill=default;
-drop table t1,t2,t3, t10, t11;
+drop table t1,t2,t3, t10, t11, t21, t22;
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index 71a734b09df..98a5ca354ae 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -277,6 +277,50 @@ select
from
information_schema.optimizer_trace;
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+
+# Same as above but throw in a couple of const tables.
+explain
+select * from
+ t21, t22,
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1 and t22.pk=2;
+
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+explain
+select * from
+ t21,
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from
+ t22 join t10 left join t11 on t11.col1=t10.col1
+ where
+ t22.pk=1
+ group by grp_id) T on T.grp_id=t1.b
+where
+ t21.pk=1;
+
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
set optimizer_trace=0;
set @@optimizer_lateral_lazy_refill=default;
-drop table t1,t2,t3, t10, t11;
+drop table t1,t2,t3, t10, t11, t21, t22;
1
0

[Commits] 767ccf27c70: MDEV-26301: LATERAL DERIVED refills the temp. table too many times
by psergey 03 Aug '21
by psergey 03 Aug '21
03 Aug '21
revision-id: 767ccf27c70b7fbc746dacfcba3ebe14c0539a86 (mariadb-10.5.11-56-g767ccf27c70)
parent(s): 398387f0e638ee027406e72e3460fb1bf755b31d
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-03 16:54:05 +0300
message:
MDEV-26301: LATERAL DERIVED refills the temp. table too many times
Add support of the fix in the optimizer part.
Also added @@optimizer_lateral_lazy_refill to control this.
---
mysql-test/main/derived_split_innodb.result | 117 ++++++++++++++++++++++++++++
mysql-test/main/derived_split_innodb.test | 87 +++++++++++++++++++++
mysql-test/main/opt_trace.result | 2 +-
sql/opt_split.cc | 116 ++++++++++++++++++++++++++-
sql/sql_class.h | 1 +
sql/sql_select.cc | 2 +-
sql/sql_select.h | 4 +-
sql/sys_vars.cc | 8 ++
8 files changed, 331 insertions(+), 6 deletions(-)
diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 9edf9a1f2ae..063a29d2366 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -241,3 +241,120 @@ set optimizer_switch='split_materialized=default';
set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
# End of 10.3 tests
+#
+# MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status Table is already up to date
+test.t3 analyze status Engine-independent statistics collected
+test.t3 analyze status Table is already up to date
+explain
+select * from
+(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 5
+1 SIMPLE t2 ref a a 5 test.t1.b 2 Using where
+1 SIMPLE t3 ref a a 5 test.t1.b 3 Using where
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status Table is already up to date
+test.t11 analyze status Engine-independent statistics collected
+test.t11 analyze status OK
+set optimizer_trace=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "split_chosen": true
+ }
+]
+select @@optimizer_lateral_lazy_refill;
+@@optimizer_lateral_lazy_refill
+0
+set @@optimizer_lateral_lazy_refill=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+set optimizer_trace=0;
+set @@optimizer_lateral_lazy_refill=default;
+drop table t1,t2,t3, t10, t11;
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index bee9ef497b6..71a734b09df 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -193,3 +193,90 @@ set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
--echo # End of 10.3 tests
+
+--echo #
+--echo # MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+--echo #
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+explain
+select * from
+ (t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+
+# Now, create tables for Groups.
+
+create table t10 (
+ grp_id int,
+ col1 int,
+ key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+ A.seq,
+ B.seq
+from
+ seq_1_to_100 A,
+ seq_1_to_100 B;
+
+# and X10 multiplier
+
+create table t11 (
+ col1 int,
+ col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+
+analyze table t10,t11 persistent for all;
+set optimizer_trace=1;
+
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+select @@optimizer_lateral_lazy_refill;
+set @@optimizer_lateral_lazy_refill=1;
+
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+set optimizer_trace=0;
+set @@optimizer_lateral_lazy_refill=default;
+drop table t1,t2,t3, t10, t11;
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index d4ba8eccb91..97d3b48ee85 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -8954,9 +8954,9 @@ json_detailed(json_extract(trace, '$**.lateral_derived_choice'))
],
"split_plan_choice":
{
+ "unsplit_cost": 25.72361682,
"split_cost": 2.488945919,
"record_count_for_split": 4,
- "unsplit_cost": 25.72361682,
"split_chosen": true
},
"chosen_lateral_derived":
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index d482c2de2a4..a569c323792 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -876,6 +876,70 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
}
+
+/*
+ given a table bitmap, find the prefix of positions array that covers the
+ tables in the bitmap.
+*/
+
+uint get_prefix_size(const POSITION *positions, uint max_position, table_map map)
+{
+ map= map & ~PSEUDO_TABLE_BITS; // remove OUTER_REF_TABLE_BIT
+ table_map covered= 0;
+ uint i;
+
+ for (i=0; i < max_position; i++)
+ {
+ covered |= positions[i].table->table->map;
+ if (!(map & ~covered))
+ break;
+ }
+ return i;
+}
+
+
+/*
+ Walk through the KEYUSE_EXT objects for given {table,key,key_part} and
+ find the shortest prefix of positions array that we have a keyuse object for.
+*/
+
+uint get_min_prefix_for_key_part(const POSITION *positions, uint max_prefix_size,
+ KEYUSE_EXT *keyuse)
+{
+ TABLE *table= keyuse->table;
+ uint key= keyuse->key;
+ uint keypart= keyuse->keypart;
+ uint prefix_size= max_prefix_size;
+
+ while (keyuse->table == table && keyuse->key == key &&
+ keyuse->keypart == keypart)
+ {
+ uint cur_size= get_prefix_size(positions, max_prefix_size,
+ keyuse->needed_in_prefix);
+ if (cur_size < prefix_size)
+ prefix_size= cur_size;
+ keyuse++;
+ }
+ return prefix_size;
+}
+
+
+/*
+ Get fanout of of a join prefix
+*/
+double get_join_prefix_fanout(const POSITION *positions, uint prefix_size)
+{
+ double fanout = 1.0;
+ for (uint i=0; i <= prefix_size; i++)
+ {
+ if (positions[i].records_read > 1e-30)
+ fanout *= positions[i].records_read;
+ if (positions[i].cond_selectivity > 1e-30)
+ fanout *= positions[i].cond_selectivity;
+ }
+ return fanout;
+}
+
/**
@brief
Choose the best splitting to extend the evaluated partial join
@@ -906,7 +970,9 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
if the plan has been chosen, NULL - otherwise.
*/
-SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
+SplM_plan_info * JOIN_TAB::choose_best_splitting(const POSITION *join_prefix,
+ uint top_prefix_size,
+ double record_count,
table_map remaining_tables)
{
SplM_opt_info *spl_opt_info= table->spl_opt_info;
@@ -922,6 +988,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
SplM_plan_info *spl_plan= 0;
uint best_key= 0;
uint best_key_parts= 0;
+ uint best_min_prefix_size;
Json_writer_object spl_trace(thd, "lateral_derived_choice");
Json_writer_array trace_indexes(thd, "indexes_for_splitting");
@@ -942,6 +1009,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
uint key= keyuse_ext->key;
KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext;
key_part_map found_parts= 0;
+ uint prefix_len= 0;
do
{
if (keyuse_ext->needed_in_prefix & remaining_tables)
@@ -951,11 +1019,31 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
}
if (!(keyuse_ext->keypart_map & found_parts))
{
+ /*
+ 1. found_parts is empty and this is the first key part we've found
+ 2. or, we are looking at the first keyuse object for this keypart,
+ and found_parts has a bit set for the previous key part.
+ */
if ((!found_parts && !keyuse_ext->keypart) ||
(found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts)))
+ {
+ if (thd->variables.optimizer_lateral_lazy_refill > 0)
+ {
+ uint min_for_kp= get_min_prefix_for_key_part(join_prefix,
+ top_prefix_size,
+ keyuse_ext);
+ // The prefix length needed for {kp1, kp2, ...} is max of the
+ // needed prefix length for each key part.
+ if (min_for_kp > prefix_len)
+ prefix_len= min_for_kp;
+ }
+
found_parts|= keyuse_ext->keypart_map;
+ }
else
{
+ // This is a new key part but it doesn't form a continuous
+ // index prefix. Skip all KEYUSEs for this index.
do
{
keyuse_ext++;
@@ -981,6 +1069,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
best_key_parts= keyuse_ext->keypart + 1;
best_rec_per_key= rec_per_key;
best_key_keyuse_ext_start= key_keyuse_ext_start;
+ if (thd->variables.optimizer_lateral_lazy_refill > 0)
+ {
+ best_min_prefix_size= prefix_len;
+ trace.add("min_prefix_len", (longlong)prefix_len);
+ }
}
keyuse_ext++;
}
@@ -1069,10 +1162,21 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
if (spl_plan)
{
Json_writer_object choice(thd, "split_plan_choice");
+ choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
+
choice.add("split_cost", spl_plan->cost);
choice.add("record_count_for_split", record_count);
- choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
- if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
+
+ // psergey-todo: best_min_prefix_size allows us to compute a more tight
+ // bound than record_count.
+ double fanout= record_count;
+ if (thd->variables.optimizer_lateral_lazy_refill > 0)
+ {
+ fanout= get_join_prefix_fanout(join_prefix, best_min_prefix_size);
+ choice.add("join_prefix_fanout_for_split", fanout);
+ }
+
+ if(fanout * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
{
/*
The best plan that employs splitting is cheaper than
@@ -1134,12 +1238,14 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list;
List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields);
KEY_FIELD *added_key_field;
+ table_map needed_tables= 0;
while ((added_key_field= li++))
{
if (remaining_tables & added_key_field->val->used_tables())
continue;
if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root))
return true;
+ needed_tables |= added_key_field->val->used_tables();
}
DBUG_ASSERT(inj_cond_list->elements);
switch (inj_cond_list->elements) {
@@ -1156,6 +1262,10 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
if (inject_cond_into_where(inj_cond))
return true;
+ // psergey-todo: walk the join tabs until we are satisfying needed_tables...
+ // and inject the check at that point.
+ // Other solution is to just add a cache...
+
select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
st_select_lex_unit *unit= select_lex->master_unit();
unit->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 62ce5c8a2d2..8964a8aa4f7 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -689,6 +689,7 @@ typedef struct system_variables
ulong optimizer_search_depth;
ulong optimizer_selectivity_sampling_limit;
ulong optimizer_use_condition_selectivity;
+ ulong optimizer_lateral_lazy_refill;
ulong use_stat_tables;
double sample_percentage;
ulong histogram_size;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 7057ed1b5e1..5b901df2e54 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7455,7 +7455,7 @@ best_access_path(JOIN *join,
loose_scan_opt.init(join, s, remaining_tables);
if (s->table->is_splittable())
- spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+ spl_plan= s->choose_best_splitting(join_positions, idx, record_count, remaining_tables);
Json_writer_array trace_paths(thd, "considered_access_paths");
if (s->keyuse)
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 29e42ff8ef8..7ffab774248 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -682,7 +682,9 @@ typedef struct st_join_table {
void partial_cleanup();
void add_keyuses_for_splitting();
- SplM_plan_info *choose_best_splitting(double record_count,
+ SplM_plan_info *choose_best_splitting(const POSITION *join_prefix,
+ uint top_prefix_size,
+ double record_count,
table_map remaining_tables);
bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
bool is_const_table);
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index a8e4ff2dded..e87d371453d 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -2672,6 +2672,14 @@ static Sys_var_ulong Sys_optimizer_use_condition_selectivity(
SESSION_VAR(optimizer_use_condition_selectivity), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(1, 5), DEFAULT(4), BLOCK_SIZE(1));
+static Sys_var_ulong Sys_optimizer_lateral_lazy_refill(
+ "optimizer_lateral_lazy_refill",
+ "Controls Lazy Lateral Table Refill fix. 0 means disabled, "
+ "1 means enabled in the optimizer. Higher values are reserved "
+ "for future use.",
+ SESSION_VAR(optimizer_lateral_lazy_refill), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0, 2), DEFAULT(0), BLOCK_SIZE(1));
+
static Sys_var_ulong Sys_optimizer_search_depth(
"optimizer_search_depth",
"Maximum depth of search performed by the query optimizer. Values "
1
0

[Commits] 34c7b851360: MDEV-26301: LATERAL DERIVED refills the temp. table too many times
by psergey 03 Aug '21
by psergey 03 Aug '21
03 Aug '21
revision-id: 34c7b851360fe7f12ebadfd743d99738aad5c8f3 (mariadb-10.5.11-56-g34c7b851360)
parent(s): 398387f0e638ee027406e72e3460fb1bf755b31d
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-03 16:51:59 +0300
message:
MDEV-26301: LATERAL DERIVED refills the temp. table too many times
Add support of the fix in the optimizer part.
Also added @@optimizer_lateral_lazy_refill to control this.
---
mysql-test/main/derived_split_innodb.result | 116 ++++++++++++++++++++++++++++
mysql-test/main/derived_split_innodb.test | 86 +++++++++++++++++++++
mysql-test/main/opt_trace.result | 2 +-
sql/opt_split.cc | 116 +++++++++++++++++++++++++++-
sql/sql_class.h | 1 +
sql/sql_select.cc | 2 +-
sql/sql_select.h | 4 +-
sql/sys_vars.cc | 8 ++
8 files changed, 329 insertions(+), 6 deletions(-)
diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 9edf9a1f2ae..9847c159d3c 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -241,3 +241,119 @@ set optimizer_switch='split_materialized=default';
set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
# End of 10.3 tests
+#
+# MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status Table is already up to date
+test.t3 analyze status Engine-independent statistics collected
+test.t3 analyze status Table is already up to date
+explain
+select * from
+(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 5
+1 SIMPLE t2 ref a a 5 test.t1.b 2 Using where
+1 SIMPLE t3 ref a a 5 test.t1.b 3 Using where
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table Op Msg_type Msg_text
+test.t10 analyze status Engine-independent statistics collected
+test.t10 analyze status Table is already up to date
+test.t11 analyze status Engine-independent statistics collected
+test.t11 analyze status OK
+set optimizer_trace=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "split_chosen": true
+ }
+]
+select @@optimizer_lateral_lazy_refill;
+@@optimizer_lateral_lazy_refill
+0
+set @@optimizer_lateral_lazy_refill=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 ALL NULL NULL NULL NULL 5
+1 PRIMARY t2 ref a a 5 test.t1.b 2 Using where
+1 PRIMARY t3 ref a a 5 test.t1.b 3 Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.b 10 Using where
+2 LATERAL DERIVED t10 ref grp_id grp_id 5 test.t1.b 100
+2 LATERAL DERIVED t11 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+
+ {
+ "unsplit_cost": 253440.0075,
+ "split_cost": 2535.968504,
+ "record_count_for_split": 30,
+ "join_prefix_fanout_for_split": 5,
+ "split_chosen": true
+ }
+]
+set @@optimizer_lateral_lazy_refill=default;
+drop table t1,t2,t3, t10, t11;
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index bee9ef497b6..9c425cc1885 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -193,3 +193,89 @@ set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
--echo # End of 10.3 tests
+
+--echo #
+--echo # MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+--echo #
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+explain
+select * from
+ (t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+
+# Now, create tables for Groups.
+
+create table t10 (
+ grp_id int,
+ col1 int,
+ key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+ A.seq,
+ B.seq
+from
+ seq_1_to_100 A,
+ seq_1_to_100 B;
+
+# and X10 multiplier
+
+create table t11 (
+ col1 int,
+ col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+
+analyze table t10,t11 persistent for all;
+set optimizer_trace=1;
+
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+select @@optimizer_lateral_lazy_refill;
+set @@optimizer_lateral_lazy_refill=1;
+
+explain
+select * from
+ (
+ (t1 left join t2 on t2.a=t1.b)
+ left join t3 on t3.a=t1.b
+ ) left join (select grp_id, count(*)
+ from t10 left join t11 on t11.col1=t10.col1
+ group by grp_id) T on T.grp_id=t1.b;
+select
+ json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+ information_schema.optimizer_trace;
+
+set @@optimizer_lateral_lazy_refill=default;
+drop table t1,t2,t3, t10, t11;
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index d4ba8eccb91..97d3b48ee85 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -8954,9 +8954,9 @@ json_detailed(json_extract(trace, '$**.lateral_derived_choice'))
],
"split_plan_choice":
{
+ "unsplit_cost": 25.72361682,
"split_cost": 2.488945919,
"record_count_for_split": 4,
- "unsplit_cost": 25.72361682,
"split_chosen": true
},
"chosen_lateral_derived":
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index d482c2de2a4..a569c323792 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -876,6 +876,70 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
}
+
+/*
+ given a table bitmap, find the prefix of positions array that covers the
+ tables in the bitmap.
+*/
+
+uint get_prefix_size(const POSITION *positions, uint max_position, table_map map)
+{
+ map= map & ~PSEUDO_TABLE_BITS; // remove OUTER_REF_TABLE_BIT
+ table_map covered= 0;
+ uint i;
+
+ for (i=0; i < max_position; i++)
+ {
+ covered |= positions[i].table->table->map;
+ if (!(map & ~covered))
+ break;
+ }
+ return i;
+}
+
+
+/*
+ Walk through the KEYUSE_EXT objects for given {table,key,key_part} and
+ find the shortest prefix of positions array that we have a keyuse object for.
+*/
+
+uint get_min_prefix_for_key_part(const POSITION *positions, uint max_prefix_size,
+ KEYUSE_EXT *keyuse)
+{
+ TABLE *table= keyuse->table;
+ uint key= keyuse->key;
+ uint keypart= keyuse->keypart;
+ uint prefix_size= max_prefix_size;
+
+ while (keyuse->table == table && keyuse->key == key &&
+ keyuse->keypart == keypart)
+ {
+ uint cur_size= get_prefix_size(positions, max_prefix_size,
+ keyuse->needed_in_prefix);
+ if (cur_size < prefix_size)
+ prefix_size= cur_size;
+ keyuse++;
+ }
+ return prefix_size;
+}
+
+
+/*
+ Get fanout of of a join prefix
+*/
+double get_join_prefix_fanout(const POSITION *positions, uint prefix_size)
+{
+ double fanout = 1.0;
+ for (uint i=0; i <= prefix_size; i++)
+ {
+ if (positions[i].records_read > 1e-30)
+ fanout *= positions[i].records_read;
+ if (positions[i].cond_selectivity > 1e-30)
+ fanout *= positions[i].cond_selectivity;
+ }
+ return fanout;
+}
+
/**
@brief
Choose the best splitting to extend the evaluated partial join
@@ -906,7 +970,9 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
if the plan has been chosen, NULL - otherwise.
*/
-SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
+SplM_plan_info * JOIN_TAB::choose_best_splitting(const POSITION *join_prefix,
+ uint top_prefix_size,
+ double record_count,
table_map remaining_tables)
{
SplM_opt_info *spl_opt_info= table->spl_opt_info;
@@ -922,6 +988,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
SplM_plan_info *spl_plan= 0;
uint best_key= 0;
uint best_key_parts= 0;
+ uint best_min_prefix_size;
Json_writer_object spl_trace(thd, "lateral_derived_choice");
Json_writer_array trace_indexes(thd, "indexes_for_splitting");
@@ -942,6 +1009,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
uint key= keyuse_ext->key;
KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext;
key_part_map found_parts= 0;
+ uint prefix_len= 0;
do
{
if (keyuse_ext->needed_in_prefix & remaining_tables)
@@ -951,11 +1019,31 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
}
if (!(keyuse_ext->keypart_map & found_parts))
{
+ /*
+ 1. found_parts is empty and this is the first key part we've found
+ 2. or, we are looking at the first keyuse object for this keypart,
+ and found_parts has a bit set for the previous key part.
+ */
if ((!found_parts && !keyuse_ext->keypart) ||
(found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts)))
+ {
+ if (thd->variables.optimizer_lateral_lazy_refill > 0)
+ {
+ uint min_for_kp= get_min_prefix_for_key_part(join_prefix,
+ top_prefix_size,
+ keyuse_ext);
+ // The prefix length needed for {kp1, kp2, ...} is max of the
+ // needed prefix length for each key part.
+ if (min_for_kp > prefix_len)
+ prefix_len= min_for_kp;
+ }
+
found_parts|= keyuse_ext->keypart_map;
+ }
else
{
+ // This is a new key part but it doesn't form a continuous
+ // index prefix. Skip all KEYUSEs for this index.
do
{
keyuse_ext++;
@@ -981,6 +1069,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
best_key_parts= keyuse_ext->keypart + 1;
best_rec_per_key= rec_per_key;
best_key_keyuse_ext_start= key_keyuse_ext_start;
+ if (thd->variables.optimizer_lateral_lazy_refill > 0)
+ {
+ best_min_prefix_size= prefix_len;
+ trace.add("min_prefix_len", (longlong)prefix_len);
+ }
}
keyuse_ext++;
}
@@ -1069,10 +1162,21 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
if (spl_plan)
{
Json_writer_object choice(thd, "split_plan_choice");
+ choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
+
choice.add("split_cost", spl_plan->cost);
choice.add("record_count_for_split", record_count);
- choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
- if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
+
+ // psergey-todo: best_min_prefix_size allows us to compute a more tight
+ // bound than record_count.
+ double fanout= record_count;
+ if (thd->variables.optimizer_lateral_lazy_refill > 0)
+ {
+ fanout= get_join_prefix_fanout(join_prefix, best_min_prefix_size);
+ choice.add("join_prefix_fanout_for_split", fanout);
+ }
+
+ if(fanout * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
{
/*
The best plan that employs splitting is cheaper than
@@ -1134,12 +1238,14 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list;
List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields);
KEY_FIELD *added_key_field;
+ table_map needed_tables= 0;
while ((added_key_field= li++))
{
if (remaining_tables & added_key_field->val->used_tables())
continue;
if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root))
return true;
+ needed_tables |= added_key_field->val->used_tables();
}
DBUG_ASSERT(inj_cond_list->elements);
switch (inj_cond_list->elements) {
@@ -1156,6 +1262,10 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
if (inject_cond_into_where(inj_cond))
return true;
+ // psergey-todo: walk the join tabs until we are satisfying needed_tables...
+ // and inject the check at that point.
+ // Other solution is to just add a cache...
+
select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
st_select_lex_unit *unit= select_lex->master_unit();
unit->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 62ce5c8a2d2..8964a8aa4f7 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -689,6 +689,7 @@ typedef struct system_variables
ulong optimizer_search_depth;
ulong optimizer_selectivity_sampling_limit;
ulong optimizer_use_condition_selectivity;
+ ulong optimizer_lateral_lazy_refill;
ulong use_stat_tables;
double sample_percentage;
ulong histogram_size;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 7057ed1b5e1..5b901df2e54 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7455,7 +7455,7 @@ best_access_path(JOIN *join,
loose_scan_opt.init(join, s, remaining_tables);
if (s->table->is_splittable())
- spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+ spl_plan= s->choose_best_splitting(join_positions, idx, record_count, remaining_tables);
Json_writer_array trace_paths(thd, "considered_access_paths");
if (s->keyuse)
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 29e42ff8ef8..7ffab774248 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -682,7 +682,9 @@ typedef struct st_join_table {
void partial_cleanup();
void add_keyuses_for_splitting();
- SplM_plan_info *choose_best_splitting(double record_count,
+ SplM_plan_info *choose_best_splitting(const POSITION *join_prefix,
+ uint top_prefix_size,
+ double record_count,
table_map remaining_tables);
bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
bool is_const_table);
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index a8e4ff2dded..e87d371453d 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -2672,6 +2672,14 @@ static Sys_var_ulong Sys_optimizer_use_condition_selectivity(
SESSION_VAR(optimizer_use_condition_selectivity), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(1, 5), DEFAULT(4), BLOCK_SIZE(1));
+static Sys_var_ulong Sys_optimizer_lateral_lazy_refill(
+ "optimizer_lateral_lazy_refill",
+ "Controls Lazy Lateral Table Refill fix. 0 means disabled, "
+ "1 means enabled in the optimizer. Higher values are reserved "
+ "for future use.",
+ SESSION_VAR(optimizer_lateral_lazy_refill), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0, 2), DEFAULT(0), BLOCK_SIZE(1));
+
static Sys_var_ulong Sys_optimizer_search_depth(
"optimizer_search_depth",
"Maximum depth of search performed by the query optimizer. Values "
1
0

[Commits] 398387f0e63: MDEV-26300: Better Optimize Trace support for LATERAL DERIVED optimization
by psergey 03 Aug '21
by psergey 03 Aug '21
03 Aug '21
revision-id: 398387f0e638ee027406e72e3460fb1bf755b31d (mariadb-10.5.11-55-g398387f0e63)
parent(s): 91e925e199ce61623f8413bfa789d0e7098c3d72
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-03 13:36:08 +0300
message:
MDEV-26300: Better Optimize Trace support for LATERAL DERIVED optimization
---
mysql-test/main/opt_trace.result | 186 ++++++++++++++++++++++++++-------------
mysql-test/main/opt_trace.test | 17 ++--
sql/opt_split.cc | 86 ++++++++++++++----
3 files changed, 202 insertions(+), 87 deletions(-)
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index 9bf1cda18a3..d4ba8eccb91 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -449,6 +449,11 @@ select * from v2 {
}
]
},
+ {
+ "check_lateral_derived": {
+ "not_applicable": "no candidate field can be accessed through ref"
+ }
+ },
{
"best_join_order": ["t1"]
},
@@ -774,6 +779,11 @@ explain select * from v1 {
}
]
},
+ {
+ "check_lateral_derived": {
+ "not_applicable": "group list has no candidates"
+ }
+ },
{
"best_join_order": ["t1"]
},
@@ -8829,84 +8839,138 @@ id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 range idx_b idx_b 5 NULL 4 Using index condition; Using where
1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 2
2 LATERAL DERIVED t2 ref idx_a idx_a 5 test.t1.a 1
+select json_valid(trace) from information_schema.optimizer_trace;
+json_valid(trace)
+1
select
-json_detailed(json_extract(trace, '$**.choose_best_splitting'))
+json_detailed(json_extract(trace, '$**.check_lateral_derived'))
from
information_schema.optimizer_trace;
-json_detailed(json_extract(trace, '$**.choose_best_splitting'))
+json_detailed(json_extract(trace, '$**.check_lateral_derived'))
[
- [
-
+ {
+ "split_variants":
+ [
+ "t2.a"
+ ]
+ }
+]
+select
+json_detailed(json_extract(trace, '$**.lateral_derived_info'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.lateral_derived_info'))
+[
+
+ {
+ "cost_breakdown":
{
- "considered_execution_plans":
- [
-
- {
- "plan_prefix":
- [
- ],
- "table": "t2",
- "best_access_path":
+ "oper_cost": 20.73533557,
+ "read_time": 4.98828125,
+ "join_read_time": 112.9872812
+ },
+ "unsplit_cost": 25.72361682,
+ "unsplit_card": 90,
+ "rec_len": 152
+ }
+]
+select
+json_detailed(json_extract(trace, '$**.lateral_derived_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.lateral_derived_choice'))
+[
+
+ {
+ "indexes_for_splitting":
+ [
+
+ {
+ "table": "t2",
+ "index": "idx_a",
+ "parts": 1,
+ "rec_per_key": 1.8367
+ }
+ ],
+ "build_split_plan":
+ [
+
+ {
+ "considered_execution_plans":
+ [
+
{
- "considered_access_paths":
+ "plan_prefix":
[
-
+ ],
+ "table": "t2",
+ "best_access_path":
+ {
+ "considered_access_paths":
+ [
+
+ {
+ "access_type": "ref",
+ "index": "idx_a",
+ "used_range_estimates": false,
+ "cause": "not available",
+ "rows": 1.8367,
+ "cost": 2.000585794,
+ "chosen": true
+ },
+
+ {
+ "type": "scan",
+ "chosen": false,
+ "cause": "cost"
+ }
+ ],
+ "chosen_access_method":
{
- "access_type": "ref",
- "index": "idx_a",
- "used_range_estimates": false,
- "cause": "not available",
- "rows": 1.8367,
+ "type": "ref",
+ "records": 1.8367,
"cost": 2.000585794,
- "chosen": true
- },
-
- {
- "type": "scan",
- "chosen": false,
- "cause": "cost"
+ "uses_join_buffering": false
}
- ],
- "chosen_access_method":
- {
- "type": "ref",
- "records": 1.8367,
- "cost": 2.000585794,
- "uses_join_buffering": false
- }
- },
- "rows_for_plan": 1.8367,
- "cost_for_plan": 2.367925794,
- "cost_for_sorting": 1.8367,
- "estimated_join_cardinality": 1.8367
+ },
+ "rows_for_plan": 1.8367,
+ "cost_for_plan": 2.367925794,
+ "cost_for_sorting": 1.8367,
+ "estimated_join_cardinality": 1.8367
+ }
+ ]
+ },
+
+ {
+ "found_split_plan":
+ {
+ "oper_cost": 0.488360125,
+ "join_best_read": 4.203625794,
+ "cost": 2.488945919,
+ "output_cardinality": 1.8367
}
- ]
+ }
+ ],
+ "split_plan_choice":
+ {
+ "split_cost": 2.488945919,
+ "record_count_for_split": 4,
+ "unsplit_cost": 25.72361682,
+ "split_chosen": true
},
-
+ "chosen_lateral_derived":
{
- "best_splitting":
- {
- "table": "t2",
- "key": "idx_a",
- "record_count": 4,
- "cost": 2.488945919,
- "unsplit_cost": 25.72361682
- }
+ "startup_cost": 9.955783677,
+ "lateral_cost": 2.488945919,
+ "records": 1
}
- ]
-]
-select
-json_detailed(json_extract(trace, '$**.lateral_derived'))
-from
-information_schema.optimizer_trace;
-json_detailed(json_extract(trace, '$**.lateral_derived'))
-[
+ },
{
- "startup_cost": 9.955783677,
- "splitting_cost": 2.488945919,
- "records": 1
+ "indexes_for_splitting":
+ [
+ ]
}
]
drop table t1,t2;
diff --git a/mysql-test/main/opt_trace.test b/mysql-test/main/opt_trace.test
index 66a333d7dc5..16788695e6a 100644
--- a/mysql-test/main/opt_trace.test
+++ b/mysql-test/main/opt_trace.test
@@ -729,19 +729,20 @@ from t1 join
on t1.a=t.a
where t1.b < 3;
-#
-# Just show that choose_best_splitting function has coverage in the
-# optimizer trace and re-optmization of child select inside it is distinct
-# from the rest of join optimization.
+select json_valid(trace) from information_schema.optimizer_trace;
+
+select
+ json_detailed(json_extract(trace, '$**.check_lateral_derived'))
+from
+ information_schema.optimizer_trace;
+
select
- json_detailed(json_extract(trace, '$**.choose_best_splitting'))
+ json_detailed(json_extract(trace, '$**.lateral_derived_info'))
from
information_schema.optimizer_trace;
-# Same as above. just to show that splitting plan has some coverage in the
-# trace.
select
- json_detailed(json_extract(trace, '$**.lateral_derived'))
+ json_detailed(json_extract(trace, '$**.lateral_derived_choice'))
from
information_schema.optimizer_trace;
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index 41b8acf5dcb..d482c2de2a4 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -343,6 +343,9 @@ bool JOIN::check_for_splittable_materialized()
if (!partition_list)
return false;
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_split(thd, "check_lateral_derived");
+
ORDER *ord;
Dynamic_array<SplM_field_ext_info> candidates(PSI_INSTRUMENT_MEM);
@@ -388,8 +391,10 @@ bool JOIN::check_for_splittable_materialized()
}
}
if (candidates.elements() == 0) // no candidates satisfying (8.1) && (8.2)
+ {
+ trace_split.add("not_applicable", "group list has no candidates");
return false;
-
+ }
/*
For each table from this join find the keys that can be used for ref access
of the fields mentioned in the 'array candidates'
@@ -447,7 +452,11 @@ bool JOIN::check_for_splittable_materialized()
}
if (!spl_field_cnt) // No candidate field can be accessed by ref => !(9)
+ {
+ trace_split.add("not_applicable",
+ "no candidate field can be accessed through ref");
return false;
+ }
/*
Create a structure of the type SplM_opt_info and fill it with
@@ -465,16 +474,20 @@ bool JOIN::check_for_splittable_materialized()
spl_opt_info->tables_usable_for_splitting= 0;
spl_opt_info->spl_field_cnt= spl_field_cnt;
spl_opt_info->spl_fields= spl_field;
- for (cand= cand_start; cand < cand_end; cand++)
{
- if (!cand->is_usable_for_ref_access)
- continue;
- spl_field->producing_item= cand->producing_item;
- spl_field->underlying_field= cand->underlying_field;
- spl_field->mat_field= cand->mat_field;
- spl_opt_info->tables_usable_for_splitting|=
- cand->underlying_field->table->map;
- spl_field++;
+ Json_writer_array trace_range(thd, "split_variants");
+ for (cand= cand_start; cand < cand_end; cand++)
+ {
+ if (!cand->is_usable_for_ref_access)
+ continue;
+ spl_field->producing_item= cand->producing_item;
+ trace_range.add(cand->producing_item);
+ spl_field->underlying_field= cand->underlying_field;
+ spl_field->mat_field= cand->mat_field;
+ spl_opt_info->tables_usable_for_splitting|=
+ cand->underlying_field->table->map;
+ spl_field++;
+ }
}
/* Attach this info to the table T */
@@ -738,7 +751,19 @@ void JOIN::add_keyuses_for_splitting()
spl_opt_info->unsplit_cost= best_positions[table_count-1].read_time +
oper_cost;
+ {
+ Json_writer_object trace(thd, "lateral_derived_info");
+ {
+ Json_writer_object cost_detail(thd, "cost_breakdown");
+ cost_detail.add("oper_cost", oper_cost);
+ cost_detail.add("read_time", best_positions[table_count-1].read_time);
+ cost_detail.add("join_read_time", best_read);
+ }
+ trace.add("unsplit_cost", spl_opt_info->unsplit_cost);
+ trace.add("unsplit_card", spl_opt_info->unsplit_card);
+ trace.add("rec_len", (ulonglong) rec_len);
+ }
if (!(save_qep= new Join_plan_state(table_count + 1)))
goto err;
@@ -793,6 +818,9 @@ void JOIN::add_keyuses_for_splitting()
void JOIN_TAB::add_keyuses_for_splitting()
{
+ Json_writer_object trace(join->thd);
+ trace.add_table_name(this);
+
DBUG_ASSERT(table->spl_opt_info != NULL);
SplM_opt_info *spl_opt_info= table->spl_opt_info;
spl_opt_info->join->add_keyuses_for_splitting();
@@ -895,6 +923,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
uint best_key= 0;
uint best_key_parts= 0;
+ Json_writer_object spl_trace(thd, "lateral_derived_choice");
+ Json_writer_array trace_indexes(thd, "indexes_for_splitting");
/*
Check whether there are keys that can be used to join T employing splitting
and if so, select the best out of such keys
@@ -939,6 +969,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
key_info->actual_rec_per_key(keyuse_ext->keypart);
if (rec_per_key < best_rec_per_key)
{
+ Json_writer_object trace(thd);
+ trace.add_table_name(keyuse_ext->table);
+ trace.add("index",
+ keyuse_ext->table->key_info[keyuse_ext->key].name.str);
+ trace.add("parts", (longlong)keyuse_ext->keypart + 1);
+ trace.add("rec_per_key", rec_per_key);
+
best_table= keyuse_ext->table;
best_key= keyuse_ext->key;
best_key_parts= keyuse_ext->keypart + 1;
@@ -951,17 +988,19 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
}
while (keyuse_ext->table == table);
}
+ trace_indexes.end();
spl_opt_info->last_plan= 0;
+
if (best_table)
{
/*
The key for splitting was chosen, look for the plan for this key
in the cache
*/
- Json_writer_array spl_trace(thd, "choose_best_splitting");
spl_plan= spl_opt_info->find_plan(best_table, best_key, best_key_parts);
if (!spl_plan)
{
+ Json_writer_array spl_trace(thd, "build_split_plan");
/*
The plan for the chosen key has not been found in the cache.
Build a new plan and save info on it in the cache
@@ -1010,12 +1049,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
if (unlikely(thd->trace_started()))
{
Json_writer_object wrapper(thd);
- Json_writer_object find_trace(thd, "best_splitting");
- find_trace.add("table", best_table->alias.c_ptr());
- find_trace.add("key", best_table->key_info[best_key].name);
- find_trace.add("record_count", record_count);
+ Json_writer_object find_trace(thd, "found_split_plan");
+ find_trace.add("oper_cost", oper_cost);
+ find_trace.add("join_best_read", join->best_read);
find_trace.add("cost", spl_plan->cost);
- find_trace.add("unsplit_cost", spl_opt_info->unsplit_cost);
+ find_trace.add("output_cardinality", split_card);
}
memcpy((char *) spl_plan->best_positions,
(char *) join->best_positions,
@@ -1023,8 +1061,17 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
best_key, remaining_tables, false);
}
+ else
+ {
+ Json_writer_object find_trace(thd, "cached_split_plan_found");
+ find_trace.add("cost", spl_plan->cost);
+ }
if (spl_plan)
{
+ Json_writer_object choice(thd, "split_plan_choice");
+ choice.add("split_cost", spl_plan->cost);
+ choice.add("record_count_for_split", record_count);
+ choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
{
/*
@@ -1032,7 +1079,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
the plan without splitting
*/
spl_opt_info->last_plan= spl_plan;
+ choice.add("split_chosen", true);
}
+ else
+ choice.add("split_chosen", false);
}
}
@@ -1044,9 +1094,9 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
startup_cost= record_count * spl_plan->cost;
records= (ha_rows) (records * spl_plan->split_sel);
- Json_writer_object trace(thd, "lateral_derived");
+ Json_writer_object trace(thd, "chosen_lateral_derived");
trace.add("startup_cost", startup_cost);
- trace.add("splitting_cost", spl_plan->cost);
+ trace.add("lateral_cost", spl_plan->cost);
trace.add("records", records);
}
else
1
0

[Commits] 4dac8c1cb79: Better Optimize Trace support for LATERAL DERIVED optimization
by Sergei Petrunia 01 Aug '21
by Sergei Petrunia 01 Aug '21
01 Aug '21
revision-id: 4dac8c1cb79f9204a8eb566915f65cb01d66424f (mariadb-10.5.11-55-g4dac8c1cb79)
parent(s): 91e925e199ce61623f8413bfa789d0e7098c3d72
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-01 23:24:41 +0300
message:
Better Optimize Trace support for LATERAL DERIVED optimization
---
sql/opt_split.cc | 81 +++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 62 insertions(+), 19 deletions(-)
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index 41b8acf5dcb..c7a2726edc3 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -343,6 +343,9 @@ bool JOIN::check_for_splittable_materialized()
if (!partition_list)
return false;
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_split(thd, "check_lateral_derived");
+
ORDER *ord;
Dynamic_array<SplM_field_ext_info> candidates(PSI_INSTRUMENT_MEM);
@@ -388,8 +391,10 @@ bool JOIN::check_for_splittable_materialized()
}
}
if (candidates.elements() == 0) // no candidates satisfying (8.1) && (8.2)
+ {
+ trace_split.add("not_applicable", "group list has no candidates");
return false;
-
+ }
/*
For each table from this join find the keys that can be used for ref access
of the fields mentioned in the 'array candidates'
@@ -447,7 +452,11 @@ bool JOIN::check_for_splittable_materialized()
}
if (!spl_field_cnt) // No candidate field can be accessed by ref => !(9)
+ {
+ trace_split.add("not_applicable",
+ "no candidate field can be accessed through ref");
return false;
+ }
/*
Create a structure of the type SplM_opt_info and fill it with
@@ -465,16 +474,20 @@ bool JOIN::check_for_splittable_materialized()
spl_opt_info->tables_usable_for_splitting= 0;
spl_opt_info->spl_field_cnt= spl_field_cnt;
spl_opt_info->spl_fields= spl_field;
- for (cand= cand_start; cand < cand_end; cand++)
{
- if (!cand->is_usable_for_ref_access)
- continue;
- spl_field->producing_item= cand->producing_item;
- spl_field->underlying_field= cand->underlying_field;
- spl_field->mat_field= cand->mat_field;
- spl_opt_info->tables_usable_for_splitting|=
- cand->underlying_field->table->map;
- spl_field++;
+ Json_writer_array trace_range(thd, "split_variants");
+ for (cand= cand_start; cand < cand_end; cand++)
+ {
+ if (!cand->is_usable_for_ref_access)
+ continue;
+ spl_field->producing_item= cand->producing_item;
+ trace_range.add(cand->producing_item);
+ spl_field->underlying_field= cand->underlying_field;
+ spl_field->mat_field= cand->mat_field;
+ spl_opt_info->tables_usable_for_splitting|=
+ cand->underlying_field->table->map;
+ spl_field++;
+ }
}
/* Attach this info to the table T */
@@ -738,7 +751,12 @@ void JOIN::add_keyuses_for_splitting()
spl_opt_info->unsplit_cost= best_positions[table_count-1].read_time +
oper_cost;
-
+ {
+ Json_writer_object trace(thd, "lateral_derived");
+ trace.add("unsplit_cost", spl_opt_info->unsplit_cost);
+ trace.add("unsplit_card", spl_opt_info->unsplit_card);
+ trace.add("rec_len", (ulonglong) rec_len);
+ }
if (!(save_qep= new Join_plan_state(table_count + 1)))
goto err;
@@ -793,6 +811,9 @@ void JOIN::add_keyuses_for_splitting()
void JOIN_TAB::add_keyuses_for_splitting()
{
+ Json_writer_object trace(join->thd);
+ trace.add_table_name(this);
+
DBUG_ASSERT(table->spl_opt_info != NULL);
SplM_opt_info *spl_opt_info= table->spl_opt_info;
spl_opt_info->join->add_keyuses_for_splitting();
@@ -895,6 +916,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
uint best_key= 0;
uint best_key_parts= 0;
+ Json_writer_object spl_trace(thd, "lateral_plan_choice");
+ Json_writer_array trace_indexes(thd, "indexes_for_splitting");
/*
Check whether there are keys that can be used to join T employing splitting
and if so, select the best out of such keys
@@ -939,6 +962,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
key_info->actual_rec_per_key(keyuse_ext->keypart);
if (rec_per_key < best_rec_per_key)
{
+ Json_writer_object trace(thd);
+ trace.add_table_name(keyuse_ext->table);
+ trace.add("index",
+ keyuse_ext->table->key_info[keyuse_ext->key].name.str);
+ trace.add("parts", (longlong)keyuse_ext->keypart + 1);
+ trace.add("rec_per_key", rec_per_key);
+
best_table= keyuse_ext->table;
best_key= keyuse_ext->key;
best_key_parts= keyuse_ext->keypart + 1;
@@ -951,17 +981,19 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
}
while (keyuse_ext->table == table);
}
+ trace_indexes.end();
spl_opt_info->last_plan= 0;
+
if (best_table)
{
/*
The key for splitting was chosen, look for the plan for this key
in the cache
*/
- Json_writer_array spl_trace(thd, "choose_best_splitting");
spl_plan= spl_opt_info->find_plan(best_table, best_key, best_key_parts);
if (!spl_plan)
{
+ Json_writer_array spl_trace(thd, "build_split_plan");
/*
The plan for the chosen key has not been found in the cache.
Build a new plan and save info on it in the cache
@@ -1010,12 +1042,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
if (unlikely(thd->trace_started()))
{
Json_writer_object wrapper(thd);
- Json_writer_object find_trace(thd, "best_splitting");
- find_trace.add("table", best_table->alias.c_ptr());
- find_trace.add("key", best_table->key_info[best_key].name);
- find_trace.add("record_count", record_count);
+ Json_writer_object find_trace(thd, "found_split_plan");
+ find_trace.add("oper_cost", oper_cost);
+ find_trace.add("join_best_read", join->best_read);
find_trace.add("cost", spl_plan->cost);
- find_trace.add("unsplit_cost", spl_opt_info->unsplit_cost);
+ find_trace.add("output_cardinality", split_card);
}
memcpy((char *) spl_plan->best_positions,
(char *) join->best_positions,
@@ -1023,8 +1054,17 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
best_key, remaining_tables, false);
}
+ else
+ {
+ Json_writer_object find_trace(thd, "cached_split_plan_found");
+ find_trace.add("cost", spl_plan->cost);
+ }
if (spl_plan)
{
+ Json_writer_object choice(thd, "split_plan_choice");
+ choice.add("split_cost", spl_plan->cost);
+ choice.add("record_count_for_split", record_count);
+ choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
{
/*
@@ -1032,7 +1072,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
the plan without splitting
*/
spl_opt_info->last_plan= spl_plan;
+ choice.add("split_chosen", true);
}
+ else
+ choice.add("split_chosen", false);
}
}
@@ -1044,9 +1087,9 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
startup_cost= record_count * spl_plan->cost;
records= (ha_rows) (records * spl_plan->split_sel);
- Json_writer_object trace(thd, "lateral_derived");
+ Json_writer_object trace(thd, "chosen_lateral_derived");
trace.add("startup_cost", startup_cost);
- trace.add("splitting_cost", spl_plan->cost);
+ trace.add("lateral_cost", spl_plan->cost);
trace.add("records", records);
}
else
1
0

[Commits] 93e964a8f55: Better Optimize Trace support for LATERAL DERIVED optimization
by Sergei Petrunia 30 Jul '21
by Sergei Petrunia 30 Jul '21
30 Jul '21
revision-id: 93e964a8f5545c1688fcf1101ea298403cc6dbe1 (mariadb-10.5.11-55-g93e964a8f55)
parent(s): 91e925e199ce61623f8413bfa789d0e7098c3d72
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-07-30 22:53:00 +0300
message:
Better Optimize Trace support for LATERAL DERIVED optimization
---
sql/opt_split.cc | 86 ++++++++++++++++++++++++++++++++++++++++++-------------
sql/sql_array.h | 2 ++
sql/sql_select.cc | 5 ++++
3 files changed, 73 insertions(+), 20 deletions(-)
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index 41b8acf5dcb..61e4eed1b6f 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -188,6 +188,7 @@
#include "mariadb.h"
#include "sql_select.h"
#include "opt_trace.h"
+#include "sql_test.h" /* for print_keyuse_array_for_trace */
/* Info on a splitting field */
struct SplM_field_info
@@ -343,6 +344,9 @@ bool JOIN::check_for_splittable_materialized()
if (!partition_list)
return false;
+ Json_writer_object trace_wrapper(thd);
+ Json_writer_object trace_split(thd, "check_lateral_derived");
+
ORDER *ord;
Dynamic_array<SplM_field_ext_info> candidates(PSI_INSTRUMENT_MEM);
@@ -388,8 +392,10 @@ bool JOIN::check_for_splittable_materialized()
}
}
if (candidates.elements() == 0) // no candidates satisfying (8.1) && (8.2)
+ {
+ trace_split.add("not_applicable", "group list has no candidates");
return false;
-
+ }
/*
For each table from this join find the keys that can be used for ref access
of the fields mentioned in the 'array candidates'
@@ -447,7 +453,11 @@ bool JOIN::check_for_splittable_materialized()
}
if (!spl_field_cnt) // No candidate field can be accessed by ref => !(9)
+ {
+ trace_split.add("not_applicable",
+ "no candidate field can be accessed through ref");
return false;
+ }
/*
Create a structure of the type SplM_opt_info and fill it with
@@ -465,16 +475,20 @@ bool JOIN::check_for_splittable_materialized()
spl_opt_info->tables_usable_for_splitting= 0;
spl_opt_info->spl_field_cnt= spl_field_cnt;
spl_opt_info->spl_fields= spl_field;
- for (cand= cand_start; cand < cand_end; cand++)
{
- if (!cand->is_usable_for_ref_access)
- continue;
- spl_field->producing_item= cand->producing_item;
- spl_field->underlying_field= cand->underlying_field;
- spl_field->mat_field= cand->mat_field;
- spl_opt_info->tables_usable_for_splitting|=
- cand->underlying_field->table->map;
- spl_field++;
+ Json_writer_array trace_range(thd, "split_variants");
+ for (cand= cand_start; cand < cand_end; cand++)
+ {
+ if (!cand->is_usable_for_ref_access)
+ continue;
+ spl_field->producing_item= cand->producing_item;
+ trace_range.add(cand->producing_item);
+ spl_field->underlying_field= cand->underlying_field;
+ spl_field->mat_field= cand->mat_field;
+ spl_opt_info->tables_usable_for_splitting|=
+ cand->underlying_field->table->map;
+ spl_field++;
+ }
}
/* Attach this info to the table T */
@@ -738,7 +752,16 @@ void JOIN::add_keyuses_for_splitting()
spl_opt_info->unsplit_cost= best_positions[table_count-1].read_time +
oper_cost;
-
+ {
+ Json_writer_object obj(thd);
+ {
+ //Json_writer_object(thd, "added_keyuses");
+ //print_keyuse_array_for_trace(thd, &ext_keyuses_for_splitting);
+ }
+ obj.add("unsplit_cost", spl_opt_info->unsplit_cost);
+ obj.add("unsplit_card", spl_opt_info->unsplit_card);
+ obj.add("rec_len", (ulonglong) rec_len);
+ }
if (!(save_qep= new Join_plan_state(table_count + 1)))
goto err;
@@ -894,7 +917,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
SplM_plan_info *spl_plan= 0;
uint best_key= 0;
uint best_key_parts= 0;
-
+
+ Json_writer_array spl_trace(thd, "lateral_plan_choice");
+ Json_writer_array wrapper(thd);
+ Json_writer_array trace_indexes(thd, "indexes_for_splitting");
/*
Check whether there are keys that can be used to join T employing splitting
and if so, select the best out of such keys
@@ -939,6 +965,13 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
key_info->actual_rec_per_key(keyuse_ext->keypart);
if (rec_per_key < best_rec_per_key)
{
+ Json_writer_object trace(thd);
+ trace.add_table_name(keyuse_ext->table);
+ trace.add("index",
+ keyuse_ext->table->key_info[keyuse_ext->key].name.str);
+ trace.add("parts", (longlong)keyuse_ext->keypart + 1);
+ trace.add("rec_per_key", rec_per_key);
+
best_table= keyuse_ext->table;
best_key= keyuse_ext->key;
best_key_parts= keyuse_ext->keypart + 1;
@@ -951,17 +984,19 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
}
while (keyuse_ext->table == table);
}
+ trace_indexes.end();
spl_opt_info->last_plan= 0;
+
if (best_table)
{
/*
The key for splitting was chosen, look for the plan for this key
in the cache
*/
- Json_writer_array spl_trace(thd, "choose_best_splitting");
spl_plan= spl_opt_info->find_plan(best_table, best_key, best_key_parts);
if (!spl_plan)
{
+ Json_writer_array spl_trace(thd, "build_split_plan");
/*
The plan for the chosen key has not been found in the cache.
Build a new plan and save info on it in the cache
@@ -1010,12 +1045,9 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
if (unlikely(thd->trace_started()))
{
Json_writer_object wrapper(thd);
- Json_writer_object find_trace(thd, "best_splitting");
- find_trace.add("table", best_table->alias.c_ptr());
- find_trace.add("key", best_table->key_info[best_key].name);
- find_trace.add("record_count", record_count);
+ Json_writer_object find_trace(thd, "found_split_plan");
find_trace.add("cost", spl_plan->cost);
- find_trace.add("unsplit_cost", spl_opt_info->unsplit_cost);
+ find_trace.add("output_cardinality", split_card);
}
memcpy((char *) spl_plan->best_positions,
(char *) join->best_positions,
@@ -1023,8 +1055,19 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
best_key, remaining_tables, false);
}
+ else
+ {
+ Json_writer_object wrapper(thd);
+ Json_writer_object find_trace(thd, "cached_split_plan_found");
+ find_trace.add("cost", spl_plan->cost);
+ }
if (spl_plan)
{
+ Json_writer_object wrapper(thd);
+ Json_writer_object choice(thd, "split_plan_choice");
+ choice.add("split_cost", spl_plan->cost);
+ choice.add("record_count_for_split", record_count);
+ choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
{
/*
@@ -1032,7 +1075,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
the plan without splitting
*/
spl_opt_info->last_plan= spl_plan;
+ choice.add("split_chosen", true);
}
+ else
+ choice.add("split_chosen", false);
}
}
@@ -1044,9 +1090,9 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
startup_cost= record_count * spl_plan->cost;
records= (ha_rows) (records * spl_plan->split_sel);
- Json_writer_object trace(thd, "lateral_derived");
+ Json_writer_object trace(thd, "chosen_lateral_derived");
trace.add("startup_cost", startup_cost);
- trace.add("splitting_cost", spl_plan->cost);
+ trace.add("lateral_cost", spl_plan->cost);
trace.add("records", records);
}
else
diff --git a/sql/sql_array.h b/sql/sql_array.h
index b6de1b18d78..244d4a5be06 100644
--- a/sql/sql_array.h
+++ b/sql/sql_array.h
@@ -289,6 +289,8 @@ template <class Elem> class Dynamic_array
{
my_qsort2(array.buffer, array.elements, sizeof(Elem), (qsort2_cmp)cmp_func, data);
}
+
+ DYNAMIC_ARRAY *impl() { return &array; }
};
typedef Bounds_checked_array<Item*> Ref_ptr_array;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 7057ed1b5e1..538696b2c7a 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -5484,7 +5484,12 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
s->scan_time();
if (s->table->is_splittable())
+ {
+ Json_writer_object trace(thd);
+ trace.add_table_name(s);
+ Json_writer_object trace_lateral(thd, "lateral_derived");
s->add_keyuses_for_splitting();
+ }
/*
Set a max range of how many seeks we can expect when using keys
1
0

[Commits] 91e925e199c: MDEV-24325: Optimizer trace doesn't cover LATERAL DERIVED
by Sergei Petrunia 28 Jul '21
by Sergei Petrunia 28 Jul '21
28 Jul '21
revision-id: 91e925e199ce61623f8413bfa789d0e7098c3d72 (mariadb-10.5.11-54-g91e925e199c)
parent(s): 0805cdebd33eb39515d05041a924df12396ebc69
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-07-28 19:56:51 +0300
message:
MDEV-24325: Optimizer trace doesn't cover LATERAL DERIVED
(cherry-pick into 10.5)
Provide basic coverage in the Optimizer Trace
---
mysql-test/main/opt_trace.result | 116 +++++++++++++++++++++++++++++++++++++++
mysql-test/main/opt_trace.test | 49 +++++++++++++++++
sql/opt_split.cc | 17 ++++++
sql/sql_select.cc | 2 +-
4 files changed, 183 insertions(+), 1 deletion(-)
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index 8f912e0500d..9bf1cda18a3 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -8794,4 +8794,120 @@ json_detailed(json_extract(trace, '$**.in_to_subquery_conversion'))
set in_predicate_conversion_threshold=@tmp;
drop table t0;
# End of 10.5 tests
+#
+# MDEV-24325: Optimizer trace doesn't cover LATERAL DERIVED
+#
+create table t1 (a int, b int, index idx_b(b)) engine=myisam;
+insert into t1 values
+(8,3), (5,7), (1,2), (2,1), (9,7), (7,5), (2,2), (7,3),
+(9,3), (8,1), (4,5), (2,3);
+create table t2 (a int, b int, c char(127), index idx_a(a)) engine=myisam;
+insert into t2 values
+(7,10,'x'), (1,20,'a'), (2,23,'b'), (7,18,'z'), (1,30,'c'),
+(4,71,'d'), (3,15,'x'), (7,82,'y'), (8,12,'t'), (4,15,'b'),
+(11,33,'a'), (10,42,'u'), (4,53,'p'), (10,17,'r'), (2,90,'x'),
+(17,10,'s'), (11,20,'v'), (12,23,'y'), (17,18,'a'), (11,30,'d'),
+(24,71,'h'), (23,15,'i'), (27,82,'k'), (28,12,'p'), (24,15,'q'),
+(31,33,'f'), (30,42,'h'), (40,53,'m'), (30,17,'o'), (21,90,'b'),
+(37,10,'e'), (31,20,'g'), (32,23,'f'), (37,18,'n'), (41,30,'l'),
+(54,71,'j'), (53,15,'w'), (57,82,'z'), (58,12,'k'), (54,15,'p'),
+(61,33,'c'), (60,42,'a'), (62,53,'x'), (67,17,'g'), (64,90,'v');
+insert into t2 select a+10, b+10, concat(c,'f') from t2;
+analyze table t1,t2;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status OK
+explain
+select t1.a,t.s,t.m
+from t1 join
+(select a, sum(t2.b) as s, min(t2.c) as m from t2 group by t2.a) t
+on t1.a=t.a
+where t1.b < 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 range idx_b idx_b 5 NULL 4 Using index condition; Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 2
+2 LATERAL DERIVED t2 ref idx_a idx_a 5 test.t1.a 1
+select
+json_detailed(json_extract(trace, '$**.choose_best_splitting'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.choose_best_splitting'))
+[
+
+ [
+
+ {
+ "considered_execution_plans":
+ [
+
+ {
+ "plan_prefix":
+ [
+ ],
+ "table": "t2",
+ "best_access_path":
+ {
+ "considered_access_paths":
+ [
+
+ {
+ "access_type": "ref",
+ "index": "idx_a",
+ "used_range_estimates": false,
+ "cause": "not available",
+ "rows": 1.8367,
+ "cost": 2.000585794,
+ "chosen": true
+ },
+
+ {
+ "type": "scan",
+ "chosen": false,
+ "cause": "cost"
+ }
+ ],
+ "chosen_access_method":
+ {
+ "type": "ref",
+ "records": 1.8367,
+ "cost": 2.000585794,
+ "uses_join_buffering": false
+ }
+ },
+ "rows_for_plan": 1.8367,
+ "cost_for_plan": 2.367925794,
+ "cost_for_sorting": 1.8367,
+ "estimated_join_cardinality": 1.8367
+ }
+ ]
+ },
+
+ {
+ "best_splitting":
+ {
+ "table": "t2",
+ "key": "idx_a",
+ "record_count": 4,
+ "cost": 2.488945919,
+ "unsplit_cost": 25.72361682
+ }
+ }
+ ]
+]
+select
+json_detailed(json_extract(trace, '$**.lateral_derived'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.lateral_derived'))
+[
+
+ {
+ "startup_cost": 9.955783677,
+ "splitting_cost": 2.488945919,
+ "records": 1
+ }
+]
+drop table t1,t2;
set optimizer_trace='enabled=off';
diff --git a/mysql-test/main/opt_trace.test b/mysql-test/main/opt_trace.test
index ecb6658e338..66a333d7dc5 100644
--- a/mysql-test/main/opt_trace.test
+++ b/mysql-test/main/opt_trace.test
@@ -697,4 +697,53 @@ set in_predicate_conversion_threshold=@tmp;
drop table t0;
--echo # End of 10.5 tests
+
+--echo #
+--echo # MDEV-24325: Optimizer trace doesn't cover LATERAL DERIVED
+--echo #
+create table t1 (a int, b int, index idx_b(b)) engine=myisam;
+insert into t1 values
+(8,3), (5,7), (1,2), (2,1), (9,7), (7,5), (2,2), (7,3),
+(9,3), (8,1), (4,5), (2,3);
+
+create table t2 (a int, b int, c char(127), index idx_a(a)) engine=myisam;
+insert into t2 values
+ (7,10,'x'), (1,20,'a'), (2,23,'b'), (7,18,'z'), (1,30,'c'),
+ (4,71,'d'), (3,15,'x'), (7,82,'y'), (8,12,'t'), (4,15,'b'),
+ (11,33,'a'), (10,42,'u'), (4,53,'p'), (10,17,'r'), (2,90,'x'),
+ (17,10,'s'), (11,20,'v'), (12,23,'y'), (17,18,'a'), (11,30,'d'),
+ (24,71,'h'), (23,15,'i'), (27,82,'k'), (28,12,'p'), (24,15,'q'),
+ (31,33,'f'), (30,42,'h'), (40,53,'m'), (30,17,'o'), (21,90,'b'),
+ (37,10,'e'), (31,20,'g'), (32,23,'f'), (37,18,'n'), (41,30,'l'),
+ (54,71,'j'), (53,15,'w'), (57,82,'z'), (58,12,'k'), (54,15,'p'),
+ (61,33,'c'), (60,42,'a'), (62,53,'x'), (67,17,'g'), (64,90,'v');
+
+insert into t2 select a+10, b+10, concat(c,'f') from t2;
+
+analyze table t1,t2;
+
+explain
+select t1.a,t.s,t.m
+from t1 join
+ (select a, sum(t2.b) as s, min(t2.c) as m from t2 group by t2.a) t
+ on t1.a=t.a
+where t1.b < 3;
+
+#
+# Just show that choose_best_splitting function has coverage in the
+# optimizer trace and re-optmization of child select inside it is distinct
+# from the rest of join optimization.
+select
+ json_detailed(json_extract(trace, '$**.choose_best_splitting'))
+from
+ information_schema.optimizer_trace;
+
+# Same as above. just to show that splitting plan has some coverage in the
+# trace.
+select
+ json_detailed(json_extract(trace, '$**.lateral_derived'))
+from
+ information_schema.optimizer_trace;
+
+drop table t1,t2;
set optimizer_trace='enabled=off';
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index 316919c4a81..41b8acf5dcb 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -187,6 +187,7 @@
#include "mariadb.h"
#include "sql_select.h"
+#include "opt_trace.h"
/* Info on a splitting field */
struct SplM_field_info
@@ -957,6 +958,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
The key for splitting was chosen, look for the plan for this key
in the cache
*/
+ Json_writer_array spl_trace(thd, "choose_best_splitting");
spl_plan= spl_opt_info->find_plan(best_table, best_key, best_key_parts);
if (!spl_plan)
{
@@ -1005,6 +1007,16 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
spl_plan->cost= join->best_positions[join->table_count-1].read_time +
+ oper_cost;
+ if (unlikely(thd->trace_started()))
+ {
+ Json_writer_object wrapper(thd);
+ Json_writer_object find_trace(thd, "best_splitting");
+ find_trace.add("table", best_table->alias.c_ptr());
+ find_trace.add("key", best_table->key_info[best_key].name);
+ find_trace.add("record_count", record_count);
+ find_trace.add("cost", spl_plan->cost);
+ find_trace.add("unsplit_cost", spl_opt_info->unsplit_cost);
+ }
memcpy((char *) spl_plan->best_positions,
(char *) join->best_positions,
sizeof(POSITION) * join->table_count);
@@ -1031,6 +1043,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
{
startup_cost= record_count * spl_plan->cost;
records= (ha_rows) (records * spl_plan->split_sel);
+
+ Json_writer_object trace(thd, "lateral_derived");
+ trace.add("startup_cost", startup_cost);
+ trace.add("splitting_cost", spl_plan->cost);
+ trace.add("records", records);
}
else
startup_cost= spl_opt_info->unsplit_cost;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 80bc24cd3cd..7057ed1b5e1 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7449,7 +7449,6 @@ best_access_path(JOIN *join,
DBUG_ENTER("best_access_path");
Json_writer_object trace_wrapper(thd, "best_access_path");
- Json_writer_array trace_paths(thd, "considered_access_paths");
bitmap_clear_all(eq_join_set);
@@ -7457,6 +7456,7 @@ best_access_path(JOIN *join,
if (s->table->is_splittable())
spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+ Json_writer_array trace_paths(thd, "considered_access_paths");
if (s->keyuse)
{ /* Use key if possible */
1
0

[Commits] c276daf8d35: MDEV-24325: Optimizer trace doesn't cover LATERAL DERIVED
by Sergei Petrunia 28 Jul '21
by Sergei Petrunia 28 Jul '21
28 Jul '21
revision-id: c276daf8d3528b05784bba9706fab4b307c18426 (mariadb-10.4.20-49-gc276daf8d35)
parent(s): 2173f382ca5e02b8c05ae2d75b040df701bf497e
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-07-28 19:54:11 +0300
message:
MDEV-24325: Optimizer trace doesn't cover LATERAL DERIVED
(cherry-pick into 10.5)
Provide basic coverage in the Optimizer Trace
---
mysql-test/main/opt_trace.result | 116 +++++++++++++++++++++++++++++++++++++++
mysql-test/main/opt_trace.test | 50 +++++++++++++++++
sql/opt_split.cc | 17 ++++++
sql/sql_select.cc | 2 +-
4 files changed, 184 insertions(+), 1 deletion(-)
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index 0ced5f19e14..023f2a30745 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -8601,3 +8601,119 @@ set max_session_mem_used=default;
#
# End of 10.4 tests
#
+#
+# MDEV-24325: Optimizer trace doesn't cover LATERAL DERIVED
+#
+create table t1 (a int, b int, index idx_b(b)) engine=myisam;
+insert into t1 values
+(8,3), (5,7), (1,2), (2,1), (9,7), (7,5), (2,2), (7,3),
+(9,3), (8,1), (4,5), (2,3);
+create table t2 (a int, b int, c char(127), index idx_a(a)) engine=myisam;
+insert into t2 values
+(7,10,'x'), (1,20,'a'), (2,23,'b'), (7,18,'z'), (1,30,'c'),
+(4,71,'d'), (3,15,'x'), (7,82,'y'), (8,12,'t'), (4,15,'b'),
+(11,33,'a'), (10,42,'u'), (4,53,'p'), (10,17,'r'), (2,90,'x'),
+(17,10,'s'), (11,20,'v'), (12,23,'y'), (17,18,'a'), (11,30,'d'),
+(24,71,'h'), (23,15,'i'), (27,82,'k'), (28,12,'p'), (24,15,'q'),
+(31,33,'f'), (30,42,'h'), (40,53,'m'), (30,17,'o'), (21,90,'b'),
+(37,10,'e'), (31,20,'g'), (32,23,'f'), (37,18,'n'), (41,30,'l'),
+(54,71,'j'), (53,15,'w'), (57,82,'z'), (58,12,'k'), (54,15,'p'),
+(61,33,'c'), (60,42,'a'), (62,53,'x'), (67,17,'g'), (64,90,'v');
+insert into t2 select a+10, b+10, concat(c,'f') from t2;
+analyze table t1,t2;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status OK
+explain
+select t1.a,t.s,t.m
+from t1 join
+(select a, sum(t2.b) as s, min(t2.c) as m from t2 group by t2.a) t
+on t1.a=t.a
+where t1.b < 3;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 range idx_b idx_b 5 NULL 4 Using index condition; Using where
+1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 2
+2 LATERAL DERIVED t2 ref idx_a idx_a 5 test.t1.a 1
+select
+json_detailed(json_extract(trace, '$**.choose_best_splitting'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.choose_best_splitting'))
+[
+
+ [
+
+ {
+ "considered_execution_plans":
+ [
+
+ {
+ "plan_prefix":
+ [
+ ],
+ "table": "t2",
+ "best_access_path":
+ {
+ "considered_access_paths":
+ [
+
+ {
+ "access_type": "ref",
+ "index": "idx_a",
+ "used_range_estimates": false,
+ "cause": "not available",
+ "rows": 1.8367,
+ "cost": 2.000585794,
+ "chosen": true
+ },
+
+ {
+ "type": "scan",
+ "chosen": false,
+ "cause": "cost"
+ }
+ ],
+ "chosen_access_method":
+ {
+ "type": "ref",
+ "records": 1.8367,
+ "cost": 2.000585794,
+ "uses_join_buffering": false
+ }
+ },
+ "rows_for_plan": 1.8367,
+ "cost_for_plan": 2.367925794,
+ "cost_for_sorting": 1.8367,
+ "estimated_join_cardinality": 1.8367
+ }
+ ]
+ },
+
+ {
+ "best_splitting":
+ {
+ "table": "t2",
+ "key": "idx_a",
+ "record_count": 4,
+ "cost": 2.488945919,
+ "unsplit_cost": 25.72361682
+ }
+ }
+ ]
+]
+select
+json_detailed(json_extract(trace, '$**.lateral_derived'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.lateral_derived'))
+[
+
+ {
+ "startup_cost": 9.955783677,
+ "splitting_cost": 2.488945919,
+ "records": 1
+ }
+]
+drop table t1,t2;
diff --git a/mysql-test/main/opt_trace.test b/mysql-test/main/opt_trace.test
index 3fae7f34750..b3ba937e958 100644
--- a/mysql-test/main/opt_trace.test
+++ b/mysql-test/main/opt_trace.test
@@ -637,3 +637,53 @@ set max_session_mem_used=default;
--echo #
--echo # End of 10.4 tests
--echo #
+
+
+--echo #
+--echo # MDEV-24325: Optimizer trace doesn't cover LATERAL DERIVED
+--echo #
+create table t1 (a int, b int, index idx_b(b)) engine=myisam;
+insert into t1 values
+(8,3), (5,7), (1,2), (2,1), (9,7), (7,5), (2,2), (7,3),
+(9,3), (8,1), (4,5), (2,3);
+
+create table t2 (a int, b int, c char(127), index idx_a(a)) engine=myisam;
+insert into t2 values
+ (7,10,'x'), (1,20,'a'), (2,23,'b'), (7,18,'z'), (1,30,'c'),
+ (4,71,'d'), (3,15,'x'), (7,82,'y'), (8,12,'t'), (4,15,'b'),
+ (11,33,'a'), (10,42,'u'), (4,53,'p'), (10,17,'r'), (2,90,'x'),
+ (17,10,'s'), (11,20,'v'), (12,23,'y'), (17,18,'a'), (11,30,'d'),
+ (24,71,'h'), (23,15,'i'), (27,82,'k'), (28,12,'p'), (24,15,'q'),
+ (31,33,'f'), (30,42,'h'), (40,53,'m'), (30,17,'o'), (21,90,'b'),
+ (37,10,'e'), (31,20,'g'), (32,23,'f'), (37,18,'n'), (41,30,'l'),
+ (54,71,'j'), (53,15,'w'), (57,82,'z'), (58,12,'k'), (54,15,'p'),
+ (61,33,'c'), (60,42,'a'), (62,53,'x'), (67,17,'g'), (64,90,'v');
+
+insert into t2 select a+10, b+10, concat(c,'f') from t2;
+
+analyze table t1,t2;
+
+explain
+select t1.a,t.s,t.m
+from t1 join
+ (select a, sum(t2.b) as s, min(t2.c) as m from t2 group by t2.a) t
+ on t1.a=t.a
+where t1.b < 3;
+
+#
+# Just show that choose_best_splitting function has coverage in the
+# optimizer trace and re-optmization of child select inside it is distinct
+# from the rest of join optimization.
+select
+ json_detailed(json_extract(trace, '$**.choose_best_splitting'))
+from
+ information_schema.optimizer_trace;
+
+# Same as above. just to show that splitting plan has some coverage in the
+# trace.
+select
+ json_detailed(json_extract(trace, '$**.lateral_derived'))
+from
+ information_schema.optimizer_trace;
+
+drop table t1,t2;
\ No newline at end of file
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index d272638f00c..fc584d94c45 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -187,6 +187,7 @@
#include "mariadb.h"
#include "sql_select.h"
+#include "opt_trace.h"
/* Info on a splitting field */
struct SplM_field_info
@@ -959,6 +960,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
The key for splitting was chosen, look for the plan for this key
in the cache
*/
+ Json_writer_array spl_trace(thd, "choose_best_splitting");
spl_plan= spl_opt_info->find_plan(best_table, best_key, best_key_parts);
if (!spl_plan)
{
@@ -1007,6 +1009,16 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
spl_plan->cost= join->best_positions[join->table_count-1].read_time +
+ oper_cost;
+ if (unlikely(thd->trace_started()))
+ {
+ Json_writer_object wrapper(thd);
+ Json_writer_object find_trace(thd, "best_splitting");
+ find_trace.add("table", best_table->alias.c_ptr());
+ find_trace.add("key", best_table->key_info[best_key].name);
+ find_trace.add("record_count", record_count);
+ find_trace.add("cost", spl_plan->cost);
+ find_trace.add("unsplit_cost", spl_opt_info->unsplit_cost);
+ }
memcpy((char *) spl_plan->best_positions,
(char *) join->best_positions,
sizeof(POSITION) * join->table_count);
@@ -1033,6 +1045,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
{
startup_cost= record_count * spl_plan->cost;
records= (ha_rows) (records * spl_plan->split_sel);
+
+ Json_writer_object trace(thd, "lateral_derived");
+ trace.add("startup_cost", startup_cost);
+ trace.add("splitting_cost", spl_plan->cost);
+ trace.add("records", records);
}
else
startup_cost= spl_opt_info->unsplit_cost;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 8a251ae339e..dcbed6cba25 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7375,7 +7375,6 @@ best_access_path(JOIN *join,
DBUG_ENTER("best_access_path");
Json_writer_object trace_wrapper(thd, "best_access_path");
- Json_writer_array trace_paths(thd, "considered_access_paths");
bitmap_clear_all(eq_join_set);
@@ -7383,6 +7382,7 @@ best_access_path(JOIN *join,
if (s->table->is_splittable())
spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+ Json_writer_array trace_paths(thd, "considered_access_paths");
if (s->keyuse)
{ /* Use key if possible */
1
0