[Commits] 9f6066441c1: MDEV-26301: LATERAL DERIVED refills the temp. table too many times

8 Aug 2021

revision-id: 9f6066441c1f014c48770e52164cf2fab93ca078 (mariadb-10.5.4-837-g9f6066441c1)
parent(s): 4ba4c06038165726dad6a18d85c2b54446ea5ef8
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-08 19:15:45 +0300
message:

MDEV-26301: LATERAL DERIVED refills the temp. table too many times

---
 mysql-test/main/derived_split_innodb.result | 220 ++++++++++++++++++++++++++++
 mysql-test/main/derived_split_innodb.test   | 153 +++++++++++++++++++
 mysql-test/main/mysqld--help.result         |   3 +-
 mysql-test/main/opt_trace.result            |   2 +-
 sql/opt_split.cc                            | 192 +++++++++++++++++++++++-
 sql/sql_derived.cc                          |   9 ++
 sql/sql_lex.cc                              |   1 +
 sql/sql_lex.h                               |   3 +
 sql/sql_priv.h                              |   1 +
 sql/sql_select.cc                           |   5 +-
 sql/sql_select.h                            |  28 +++-
 sql/sql_union.cc                            |   1 +
 sql/sys_vars.cc                             |   1 +
 13 files changed, 609 insertions(+), 10 deletions(-)

diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 9edf9a1f2ae..d37339b8393 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -241,3 +241,223 @@ set optimizer_switch='split_materialized=default';
 set use_stat_tables=default;
 set optimizer_use_condition_selectivity=default;
 # End of 10.3 tests
+#
+# MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	Table is already up to date
+test.t3	analyze	status	Engine-independent statistics collected
+test.t3	analyze	status	Table is already up to date
+explain
+select * from
+(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	SIMPLE	t2	ref	a	a	5	test.t1.b	2	Using where
+1	SIMPLE	t3	ref	a	a	5	test.t1.b	3	Using where
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t10	analyze	status	Engine-independent statistics collected
+test.t10	analyze	status	Table is already up to date
+test.t11	analyze	status	Engine-independent statistics collected
+test.t11	analyze	status	OK
+set optimizer_trace=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+    
+    {
+        "unsplit_cost": 253440.0075,
+        "split_cost": 2535.968504,
+        "record_count_for_split": 30,
+        "split_chosen": true
+    }
+]
+set @tmp_26301=@@optimizer_switch;
+set optimizer_switch='split_materialized_is_lazy=on';
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+    
+    {
+        "unsplit_cost": 253440.0075,
+        "split_cost": 2535.968504,
+        "record_count_for_split": 30,
+        "join_prefix_fanout_for_split": 5,
+        "split_chosen": true
+    }
+]
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+explain
+select * from
+t21, t22,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1 and t22.pk=2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t21	const	PRIMARY	PRIMARY	4	const	1	Using index
+1	PRIMARY	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+    
+    {
+        "unsplit_cost": 253440.0075,
+        "split_cost": 2535.968504,
+        "record_count_for_split": 30,
+        "join_prefix_fanout_for_split": 5,
+        "split_chosen": true
+    }
+]
+explain
+select * from
+t21,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t21	const	PRIMARY	PRIMARY	4	const	1	Using index
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+    
+    {
+        "unsplit_cost": 253440.0075,
+        "split_cost": 2535.968504,
+        "record_count_for_split": 30,
+        "join_prefix_fanout_for_split": 5,
+        "split_chosen": true
+    }
+]
+create table t5 (
+pk int primary key
+);
+insert into t5 select seq from seq_1_to_1000;
+explain
+select * from
+t21,
+(
+(((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t21	const	PRIMARY	PRIMARY	4	const	1	Using index
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	t5	eq_ref	PRIMARY	PRIMARY	4	test.t1.b	1	Using index
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t5.pk	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+set optimizer_trace=0;
+set optimizer_switch=@tmp_26301;
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index bee9ef497b6..1369018f532 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -193,3 +193,156 @@ set use_stat_tables=default;
 set optimizer_use_condition_selectivity=default;
 
 --echo # End of 10.3 tests
+
+--echo #
+--echo # MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+--echo #
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+explain
+select * from
+  (t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+
+# Now, create tables for Groups.
+
+create table t10 (
+  grp_id int,
+  col1 int,
+  key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+  A.seq,
+  B.seq
+from
+  seq_1_to_100 A,
+  seq_1_to_100 B;
+
+# and X10 multiplier
+
+create table t11 (
+  col1 int,
+  col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+
+analyze table t10,t11 persistent for all;
+set optimizer_trace=1;
+
+explain
+select * from
+  (
+    (t1 left join t2 on t2.a=t1.b)
+    left join t3 on t3.a=t1.b
+  ) left join (select grp_id, count(*)
+               from t10 left join t11 on t11.col1=t10.col1
+               group by grp_id) T on T.grp_id=t1.b;
+select
+  json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+  information_schema.optimizer_trace;
+
+set @tmp_26301=@@optimizer_switch;
+set optimizer_switch='split_materialized_is_lazy=on';
+
+explain
+select * from
+  (
+    (t1 left join t2 on t2.a=t1.b)
+    left join t3 on t3.a=t1.b
+  ) left join (select grp_id, count(*)
+               from t10 left join t11 on t11.col1=t10.col1
+               group by grp_id) T on T.grp_id=t1.b;
+select
+  json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+  information_schema.optimizer_trace;
+
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+
+# Same as above but throw in a couple of const tables.
+explain
+select * from
+  t21, t22,
+  (
+    (t1 left join t2 on t2.a=t1.b)
+    left join t3 on t3.a=t1.b
+  ) left join (select grp_id, count(*)
+               from t10 left join t11 on t11.col1=t10.col1
+               group by grp_id) T on T.grp_id=t1.b
+where
+  t21.pk=1 and t22.pk=2;
+
+select
+  json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+  information_schema.optimizer_trace;
+
+explain
+select * from
+  t21,
+  (
+    (t1 left join t2 on t2.a=t1.b)
+    left join t3 on t3.a=t1.b
+  ) left join (select grp_id, count(*)
+               from
+                 t22 join t10 left join t11 on t11.col1=t10.col1
+               where
+                 t22.pk=1
+               group by grp_id) T on T.grp_id=t1.b
+where
+  t21.pk=1;
+
+select
+  json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+  information_schema.optimizer_trace;
+
+# And also add a non-const table
+
+create table t5 (
+  pk int primary key
+  );
+insert into t5 select seq from seq_1_to_1000;
+
+explain
+select * from
+  t21,
+  (
+    (((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+    left join t3 on t3.a=t1.b
+  ) left join (select grp_id, count(*)
+               from
+                 t22 join t10 left join t11 on t11.col1=t10.col1
+               where
+                 t22.pk=1
+               group by grp_id) T on T.grp_id=t1.b
+where
+  t21.pk=1;
+
+set optimizer_trace=0;
+set optimizer_switch=@tmp_26301;
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result
index 78616ae7c9d..6a38d720493 100644
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@@ -718,7 +718,8 @@ The following specify which files/extra groups are read (specified before remain
  extended_keys, exists_to_in, orderby_uses_equalities, 
  condition_pushdown_for_derived, split_materialized, 
  condition_pushdown_for_subquery, rowid_filter, 
- condition_pushdown_from_having, not_null_range_scan
+ condition_pushdown_from_having, not_null_range_scan, 
+ split_materialized_is_lazy
  --optimizer-trace=name 
  Controls tracing of the Optimizer:
  optimizer_trace=option=val[,option=val...], where option
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index d4ba8eccb91..97d3b48ee85 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -8954,9 +8954,9 @@ json_detailed(json_extract(trace, '$**.lateral_derived_choice'))
         ],
         "split_plan_choice": 
         {
+            "unsplit_cost": 25.72361682,
             "split_cost": 2.488945919,
             "record_count_for_split": 4,
-            "unsplit_cost": 25.72361682,
             "split_chosen": true
         },
         "chosen_lateral_derived": 
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index d482c2de2a4..2d124a7fe2e 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -876,11 +876,92 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
 }
 
 
+
+/*
+  @brief
+    Find shortest join order sub-prefix that covers the specified table_map.
+
+  @detail
+    Given a join prefix and a bitmap of table(s) in table_map, find the
+    shortest prefix of the prefix that covers the tables in the table_map.
+*/
+
+uint get_prefix_size(const POSITION *positions, uint max_pos, table_map map)
+{
+  map= map & ~PSEUDO_TABLE_BITS; // remove OUTER_REF_TABLE_BIT
+  table_map covered= 0;
+  uint i;
+
+  for (i=0; i < max_pos; i++)
+  {
+    covered |= positions[i].table->table->map;
+    if (!(map & ~covered))
+      break;
+  }
+  return i;
+}
+
+
+/*
+  @brief
+    Find the shortest join prefix that has a KEYUSE_EXT for a given key_part.
+
+  @detail
+    Given an array of KEYUSE objects for access to {table, key, key_part},
+    and a join order prefix (specified by array of POSITION structs),
+    find the shortest prefix for which there is a KEYUSE_EXT object describin
+    the access to they table.key.key_part
+
+  @note
+     KEYUSE_EXT objects are from inside the derived table, while the join
+     prefix is from the parent JOIN, so we need to use
+     KEYUSE_EXT::needed_in_prefix
+*/
+
+uint get_min_prefix_for_key_part(const POSITION *positions, uint max_prefix_size,
+                                 KEYUSE_EXT *keyuse)
+{
+  TABLE *table= keyuse->table;
+  uint key= keyuse->key;
+  uint keypart= keyuse->keypart;
+  uint prefix_size= max_prefix_size;
+
+  while (keyuse->table == table && keyuse->key == key &&
+         keyuse->keypart == keypart)
+  {
+    uint cur_size= get_prefix_size(positions, max_prefix_size,
+                                   keyuse->needed_in_prefix);
+    if (cur_size < prefix_size)
+      prefix_size= cur_size;
+    keyuse++;
+  }
+  return prefix_size;
+}
+
+
+/*
+  Get fanout of of a join prefix
+*/
+double get_join_prefix_fanout(const POSITION *positions, uint prefix_size)
+{
+  double fanout = 1.0;
+  for (uint i=0; i <= prefix_size; i++)
+  {
+    if (positions[i].records_read > 1e-30)
+      fanout *= positions[i].records_read;
+    if (positions[i].cond_selectivity > 1e-30)
+      fanout *= positions[i].cond_selectivity;
+  }
+  return fanout;
+}
+
 /**
   @brief
     Choose the best splitting to extend the evaluated partial join
 
   @param
+    join_prefix       Join order prefix built so far
+    prefix_size       Length of the join prefix
     record_count      estimated cardinality of the extended partial join
     remaining_tables  tables not joined yet
 
@@ -906,7 +987,9 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
     if the plan has been chosen, NULL - otherwise.
 */
 
-SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
+SplM_plan_info * JOIN_TAB::choose_best_splitting(const POSITION *join_prefix,
+                                                 uint prefix_size,
+                                                 double record_count,
                                                  table_map remaining_tables)
 {
   SplM_opt_info *spl_opt_info= table->spl_opt_info;
@@ -922,6 +1005,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
   SplM_plan_info *spl_plan= 0;
   uint best_key= 0;
   uint best_key_parts= 0;
+  uint best_min_prefix_size;
 
   Json_writer_object spl_trace(thd, "lateral_derived_choice");
   Json_writer_array trace_indexes(thd, "indexes_for_splitting");
@@ -942,6 +1026,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
       uint key= keyuse_ext->key;
       KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext;
       key_part_map found_parts= 0;
+      uint prefix_len= 0;
       do
       {
         if (keyuse_ext->needed_in_prefix & remaining_tables)
@@ -951,11 +1036,40 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
         }
         if (!(keyuse_ext->keypart_map & found_parts))
 	{
+          /*
+            Check for
+            1. found_parts is empty and this is the first key part we've found
+            2. or, we are looking at the first keyuse object for this keypart,
+               and found_parts has a bit set for the previous key part.
+          */
           if ((!found_parts && !keyuse_ext->keypart) ||
               (found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts)))
+          {
+            if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+            {
+              /*
+                Find the shortest join prefix we'll need to get a lookup value
+                for this keypart read this key part
+              */
+              uint min_for_kp= get_min_prefix_for_key_part(join_prefix,
+                                                           prefix_size,
+                                                           keyuse_ext);
+              /*
+                The join prefix needed to read all keyparts is the longest
+                prefix of all key parts.
+              */
+              if (min_for_kp > prefix_len)
+                prefix_len= min_for_kp;
+            }
+
             found_parts|= keyuse_ext->keypart_map;
+          }
           else
 	  {
+            /*
+              This is a new key part but it doesn't form a continuous index
+              prefix. Skip all KEYUSEs for this index.
+            */
             do
 	    {
               keyuse_ext++;
@@ -981,6 +1095,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
 	  best_key_parts= keyuse_ext->keypart + 1;
           best_rec_per_key= rec_per_key;
           best_key_keyuse_ext_start= key_keyuse_ext_start;
+          if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+          {
+            best_min_prefix_size= prefix_len;
+            trace.add("min_prefix_len", (longlong)prefix_len);
+          }
         }
         keyuse_ext++;
       }
@@ -1069,10 +1188,29 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
     if (spl_plan)
     {
       Json_writer_object choice(thd, "split_plan_choice");
+      choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
+
       choice.add("split_cost", spl_plan->cost);
       choice.add("record_count_for_split", record_count);
-      choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
-      if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
+
+      double fanout= record_count;
+      if (optimizer_flag(thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+      {
+        /*
+          The LATERAL DERIVED table will only be refilled if the values it
+          refers to change.
+          The values refer only to the prefix of the join prefix so far.
+          Adjust the number of LATERAL DERIVED refills accordingly.
+        */
+        double prefix_fanout= get_join_prefix_fanout(join_prefix,
+                                                     best_min_prefix_size);
+        if (prefix_fanout < record_count)
+          prefix_fanout= record_count;
+
+        choice.add("join_prefix_fanout_for_split", fanout);
+      }
+
+      if (fanout * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
       {
         /*
           The best plan that employs splitting is cheaper than
@@ -1112,6 +1250,10 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
   @param
     remaining_tables  used to filter out the equalities that cannot
                       be pushed.
+    lparams           if non-NULL, call lparams->add() to collect a list
+                      of the "parameter" items for the lateral subquery.
+                      The list will be used to reduce the number of times
+                      the lateral derived table is refilled.
 
   @details
     This function is called by JOIN_TAB::fix_splitting that is used
@@ -1128,18 +1270,23 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
     true   on failure
 */
 
-bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
+bool JOIN::inject_best_splitting_cond(table_map remaining_tables,
+                                      Lateral_derived_parameters *lparams)
 {
   Item *inj_cond= 0;
   List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list;
   List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields);
   KEY_FIELD *added_key_field;
+  table_map needed_tables= 0;
   while ((added_key_field= li++))
   {
     if (remaining_tables & added_key_field->val->used_tables())
       continue;
+    if (lparams && lparams->add_param(added_key_field->val))
+      return true;
     if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root))
       return true;
+    needed_tables |= added_key_field->val->used_tables();
   }
   DBUG_ASSERT(inj_cond_list->elements);
   switch (inj_cond_list->elements) {
@@ -1230,8 +1377,15 @@ bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
     memcpy((char *) md_join->best_positions,
            (char *) spl_plan->best_positions,
            sizeof(POSITION) * md_join->table_count);
-    if (md_join->inject_best_splitting_cond(remaining_tables))
+
+    Lateral_derived_parameters *lparams= NULL;
+    if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY))
+      lparams= new Lateral_derived_parameters;
+
+    if (md_join->inject_best_splitting_cond(remaining_tables, lparams))
       return true;
+
+    table->pos_in_table_list->get_unit()->lateral_derived_params= lparams;
     /*
       This is called for a proper work of JOIN::get_best_combination()
       called for the join that materializes T
@@ -1283,3 +1437,31 @@ bool JOIN::fix_all_splittings_in_plan()
   }
   return false;
 }
+
+
+bool Lateral_derived_parameters::add_param(Item *item)
+{
+  Cached_item *tmp;
+  if (!(tmp= new_Cached_item(current_thd, item, FALSE)) ||
+      list.push_back(tmp))
+    return true;
+
+  return false;
+}
+
+
+/*
+  @brief
+    Test if LATERAL DERIVED table's parameters changed since last invocation
+
+  @return
+     false  Parameter values have not changed since last invocation
+     true   Parameter values have changed since last invocation. Current values
+            of the parameters are now saved.
+*/
+bool Lateral_derived_parameters::need_refill()
+{
+  if (test_if_group_changed(list) == -1)
+    return false; // not changed
+  return true;
+}
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index f8ee3475af8..25524c90da5 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -1233,6 +1233,15 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived)
       DBUG_RETURN(res);
   }
 
+  /*
+    For LATERAL DERIVED tables, check if we actually need to refill it. If its
+    "parameters" (i.e. items it refers to) have the same values, we don't need
+    to refill.
+  */
+  if (unit->lateral_derived_params &&
+     !unit->lateral_derived_params->need_refill())
+    DBUG_RETURN(false);
+
   if (unit->executed && !derived_is_recursive &&
       (unit->uncacheable & UNCACHEABLE_DEPENDENT))
   {
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index b9ca67182cf..5b3897b28fa 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -2915,6 +2915,7 @@ void st_select_lex_unit::init_query()
   columns_are_renamed= false;
   with_wrapped_tvc= false;
   have_except_all_or_intersect_all= false;
+  lateral_derived_params= NULL;
 }
 
 void st_select_lex::init_query()
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index 45a3bf72fa4..278ee7d5276 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -845,6 +845,7 @@ class JOIN;
 class select_unit;
 class Procedure;
 class Explain_query;
+class Lateral_derived_parameters;
 
 void delete_explain_query(LEX *lex);
 void create_explain_query(LEX *lex, MEM_ROOT *mem_root);
@@ -964,6 +965,8 @@ class st_select_lex_unit: public st_select_lex_node {
 
   bool columns_are_renamed;
 
+  Lateral_derived_parameters *lateral_derived_params;
+
   void init_query();
   st_select_lex* outer_select();
   const st_select_lex* first_select() const
diff --git a/sql/sql_priv.h b/sql/sql_priv.h
index 07f07a7150f..7cdb6a62d08 100644
--- a/sql/sql_priv.h
+++ b/sql/sql_priv.h
@@ -234,6 +234,7 @@
 #define OPTIMIZER_SWITCH_USE_ROWID_FILTER          (1ULL << 33)
 #define OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING (1ULL << 34)
 #define OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN       (1ULL << 35)
+#define OPTIMIZER_SWITCH_SPLIT_MATERIALIZED_IS_LAZY (1ULL << 36)
 
 #define OPTIMIZER_SWITCH_DEFAULT   (OPTIMIZER_SWITCH_INDEX_MERGE | \
                                     OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 39e224ab024..85c3355cf08 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7460,7 +7460,10 @@ best_access_path(JOIN      *join,
   loose_scan_opt.init(join, s, remaining_tables);
 
   if (s->table->is_splittable())
-    spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+  {
+    spl_plan= s->choose_best_splitting(join_positions, idx, record_count,
+                                       remaining_tables);
+  }
   Json_writer_array trace_paths(thd, "considered_access_paths");
 
   if (s->keyuse)
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 29e42ff8ef8..2a9cb5464e6 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -249,6 +249,7 @@ class AGGR_OP;
 class Filesort;
 struct SplM_plan_info;
 class SplM_opt_info;
+class Lateral_derived_parameters;
 
 typedef struct st_join_table {
   TABLE		*table;
@@ -682,7 +683,9 @@ typedef struct st_join_table {
 
   void partial_cleanup();
   void add_keyuses_for_splitting();
-  SplM_plan_info *choose_best_splitting(double record_count,
+  SplM_plan_info *choose_best_splitting(const POSITION *join_prefix,
+                                        uint top_prefix_size,
+                                        double record_count,
                                         table_map remaining_tables);
   bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
                      bool is_const_table);
@@ -1795,7 +1798,8 @@ class JOIN :public Sql_alloc
   bool inject_cond_into_where(Item *injected_cond);
   bool check_for_splittable_materialized();
   void add_keyuses_for_splitting();
-  bool inject_best_splitting_cond(table_map remaining_tables);
+  bool inject_best_splitting_cond(table_map remaining_tables,
+                                  Lateral_derived_parameters *params);
   bool fix_all_splittings_in_plan();
   void make_notnull_conds_for_range_scans();
 
@@ -2532,4 +2536,24 @@ void propagate_new_equalities(THD *thd, Item *cond,
                               COND_EQUAL *inherited,
                               bool *is_simplifiable_cond);
 
+/*
+  A list of parameters for a derived table that uses LATERAL DERIVED
+  optimization.
+
+  The last values of the parameters are saved, which one can use to check
+  whether
+   - they've changed and one needs to refill the LATERAL DERIVED table
+   - they are not changed and refill is not needed.
+*/
+class Lateral_derived_parameters : public Sql_alloc
+{
+  List<Cached_item> list;
+public:
+  // Construction interface
+  bool add_param(Item *item);
+
+  // Execution interface
+  bool need_refill();
+};
+
 #endif /* SQL_SELECT_INCLUDED */
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index b88d78c0db3..a0a89b1e401 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -2588,6 +2588,7 @@ bool st_select_lex_unit::cleanup()
   }
   columns_are_renamed= false;
   cleaned= 1;
+  lateral_derived_params= NULL;
 
   for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select())
     error|= sl->cleanup();
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 2f7cf290dcd..725f407fc3b 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -2717,6 +2717,7 @@ export const char *optimizer_switch_names[]=
   "rowid_filter",
   "condition_pushdown_from_having",
   "not_null_range_scan",
+  "split_materialized_is_lazy",
   "default", 
   NullS
 };

    

psergey

tags

participants (1)