[Commits] 767ccf27c70: MDEV-26301: LATERAL DERIVED refills the temp. table too many times

3 Aug 2021

revision-id: 767ccf27c70b7fbc746dacfcba3ebe14c0539a86 (mariadb-10.5.11-56-g767ccf27c70)
parent(s): 398387f0e638ee027406e72e3460fb1bf755b31d
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-03 16:54:05 +0300
message:

MDEV-26301: LATERAL DERIVED refills the temp. table too many times

Add support of the fix in the optimizer part.
Also added @@optimizer_lateral_lazy_refill to control this.

---
 mysql-test/main/derived_split_innodb.result | 117 ++++++++++++++++++++++++++++
 mysql-test/main/derived_split_innodb.test   |  87 +++++++++++++++++++++
 mysql-test/main/opt_trace.result            |   2 +-
 sql/opt_split.cc                            | 116 ++++++++++++++++++++++++++-
 sql/sql_class.h                             |   1 +
 sql/sql_select.cc                           |   2 +-
 sql/sql_select.h                            |   4 +-
 sql/sys_vars.cc                             |   8 ++
 8 files changed, 331 insertions(+), 6 deletions(-)

diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 9edf9a1f2ae..063a29d2366 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -241,3 +241,120 @@ set optimizer_switch='split_materialized=default';
 set use_stat_tables=default;
 set optimizer_use_condition_selectivity=default;
 # End of 10.3 tests
+#
+# MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	Table is already up to date
+test.t3	analyze	status	Engine-independent statistics collected
+test.t3	analyze	status	Table is already up to date
+explain
+select * from
+(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	SIMPLE	t2	ref	a	a	5	test.t1.b	2	Using where
+1	SIMPLE	t3	ref	a	a	5	test.t1.b	3	Using where
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t10	analyze	status	Engine-independent statistics collected
+test.t10	analyze	status	Table is already up to date
+test.t11	analyze	status	Engine-independent statistics collected
+test.t11	analyze	status	OK
+set optimizer_trace=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+    
+    {
+        "unsplit_cost": 253440.0075,
+        "split_cost": 2535.968504,
+        "record_count_for_split": 30,
+        "split_chosen": true
+    }
+]
+select @@optimizer_lateral_lazy_refill;
+@@optimizer_lateral_lazy_refill
+0
+set @@optimizer_lateral_lazy_refill=1;
+explain
+select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+select
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+information_schema.optimizer_trace;
+json_detailed(json_extract(trace, '$**.split_plan_choice'))
+[
+    
+    {
+        "unsplit_cost": 253440.0075,
+        "split_cost": 2535.968504,
+        "record_count_for_split": 30,
+        "join_prefix_fanout_for_split": 5,
+        "split_chosen": true
+    }
+]
+set optimizer_trace=0;
+set @@optimizer_lateral_lazy_refill=default;
+drop table t1,t2,t3, t10, t11;
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index bee9ef497b6..71a734b09df 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -193,3 +193,90 @@ set use_stat_tables=default;
 set optimizer_use_condition_selectivity=default;
 
 --echo # End of 10.3 tests
+
+--echo #
+--echo # MDEV-26301: LATERAL DERIVED refills the temp. table too many times
+--echo #
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+explain
+select * from
+  (t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+
+# Now, create tables for Groups.
+
+create table t10 (
+  grp_id int,
+  col1 int,
+  key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+  A.seq,
+  B.seq
+from
+  seq_1_to_100 A,
+  seq_1_to_100 B;
+
+# and X10 multiplier
+
+create table t11 (
+  col1 int,
+  col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+
+analyze table t10,t11 persistent for all;
+set optimizer_trace=1;
+
+explain
+select * from
+  (
+    (t1 left join t2 on t2.a=t1.b)
+    left join t3 on t3.a=t1.b
+  ) left join (select grp_id, count(*)
+               from t10 left join t11 on t11.col1=t10.col1
+               group by grp_id) T on T.grp_id=t1.b;
+select
+  json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+  information_schema.optimizer_trace;
+
+select @@optimizer_lateral_lazy_refill;
+set @@optimizer_lateral_lazy_refill=1;
+
+explain
+select * from
+  (
+    (t1 left join t2 on t2.a=t1.b)
+    left join t3 on t3.a=t1.b
+  ) left join (select grp_id, count(*)
+               from t10 left join t11 on t11.col1=t10.col1
+               group by grp_id) T on T.grp_id=t1.b;
+select
+  json_detailed(json_extract(trace, '$**.split_plan_choice'))
+from
+  information_schema.optimizer_trace;
+
+set optimizer_trace=0;
+set @@optimizer_lateral_lazy_refill=default;
+drop table t1,t2,t3, t10, t11;
diff --git a/mysql-test/main/opt_trace.result b/mysql-test/main/opt_trace.result
index d4ba8eccb91..97d3b48ee85 100644
--- a/mysql-test/main/opt_trace.result
+++ b/mysql-test/main/opt_trace.result
@@ -8954,9 +8954,9 @@ json_detailed(json_extract(trace, '$**.lateral_derived_choice'))
         ],
         "split_plan_choice": 
         {
+            "unsplit_cost": 25.72361682,
             "split_cost": 2.488945919,
             "record_count_for_split": 4,
-            "unsplit_cost": 25.72361682,
             "split_chosen": true
         },
         "chosen_lateral_derived": 
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index d482c2de2a4..a569c323792 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -876,6 +876,70 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
 }
 
 
+
+/*
+  given a table bitmap, find the prefix of positions array that covers the
+  tables in the bitmap.
+*/
+
+uint get_prefix_size(const POSITION *positions, uint max_position, table_map map)
+{
+  map= map & ~PSEUDO_TABLE_BITS; // remove OUTER_REF_TABLE_BIT
+  table_map covered= 0;
+  uint i;
+
+  for (i=0; i < max_position; i++)
+  {
+    covered |= positions[i].table->table->map;
+    if (!(map & ~covered))
+      break;
+  }
+  return i;
+}
+
+
+/*
+  Walk through the KEYUSE_EXT objects for given {table,key,key_part} and
+  find the shortest prefix of positions array that we have a keyuse object for.
+*/
+
+uint get_min_prefix_for_key_part(const POSITION *positions, uint max_prefix_size,
+                                 KEYUSE_EXT *keyuse)
+{
+  TABLE *table= keyuse->table;
+  uint key= keyuse->key;
+  uint keypart= keyuse->keypart;
+  uint prefix_size= max_prefix_size;
+
+  while (keyuse->table == table && keyuse->key == key &&
+         keyuse->keypart == keypart)
+  {
+    uint cur_size= get_prefix_size(positions, max_prefix_size,
+                                   keyuse->needed_in_prefix);
+    if (cur_size < prefix_size)
+      prefix_size= cur_size;
+    keyuse++;
+  }
+  return prefix_size;
+}
+
+
+/*
+  Get fanout of of a join prefix
+*/
+double get_join_prefix_fanout(const POSITION *positions, uint prefix_size)
+{
+  double fanout = 1.0;
+  for (uint i=0; i <= prefix_size; i++)
+  {
+    if (positions[i].records_read > 1e-30)
+      fanout *= positions[i].records_read;
+    if (positions[i].cond_selectivity > 1e-30)
+      fanout *= positions[i].cond_selectivity;
+  }
+  return fanout;
+}
+
 /**
   @brief
     Choose the best splitting to extend the evaluated partial join
@@ -906,7 +970,9 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
     if the plan has been chosen, NULL - otherwise.
 */
 
-SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
+SplM_plan_info * JOIN_TAB::choose_best_splitting(const POSITION *join_prefix,
+                                                 uint top_prefix_size,
+                                                 double record_count,
                                                  table_map remaining_tables)
 {
   SplM_opt_info *spl_opt_info= table->spl_opt_info;
@@ -922,6 +988,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
   SplM_plan_info *spl_plan= 0;
   uint best_key= 0;
   uint best_key_parts= 0;
+  uint best_min_prefix_size;
 
   Json_writer_object spl_trace(thd, "lateral_derived_choice");
   Json_writer_array trace_indexes(thd, "indexes_for_splitting");
@@ -942,6 +1009,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
       uint key= keyuse_ext->key;
       KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext;
       key_part_map found_parts= 0;
+      uint prefix_len= 0;
       do
       {
         if (keyuse_ext->needed_in_prefix & remaining_tables)
@@ -951,11 +1019,31 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
         }
         if (!(keyuse_ext->keypart_map & found_parts))
 	{
+          /*
+            1. found_parts is empty and this is the first key part we've found
+            2. or, we are looking at the first keyuse object for this keypart,
+               and found_parts has a bit set for the previous key part.
+          */
           if ((!found_parts && !keyuse_ext->keypart) ||
               (found_parts && ((keyuse_ext->keypart_map >> 1) & found_parts)))
+          {
+            if (thd->variables.optimizer_lateral_lazy_refill > 0)
+            {
+              uint min_for_kp= get_min_prefix_for_key_part(join_prefix,
+                                                           top_prefix_size,
+                                                           keyuse_ext);
+              // The prefix length needed for {kp1, kp2, ...} is max of the
+              // needed prefix length for each key part.
+              if (min_for_kp > prefix_len)
+                prefix_len= min_for_kp;
+            }
+
             found_parts|= keyuse_ext->keypart_map;
+          }
           else
 	  {
+            // This is a new key part but it doesn't form a continuous
+            // index prefix. Skip all KEYUSEs for this index.
             do
 	    {
               keyuse_ext++;
@@ -981,6 +1069,11 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
 	  best_key_parts= keyuse_ext->keypart + 1;
           best_rec_per_key= rec_per_key;
           best_key_keyuse_ext_start= key_keyuse_ext_start;
+          if (thd->variables.optimizer_lateral_lazy_refill > 0)
+          {
+            best_min_prefix_size= prefix_len;
+            trace.add("min_prefix_len", (longlong)prefix_len);
+          }
         }
         keyuse_ext++;
       }
@@ -1069,10 +1162,21 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
     if (spl_plan)
     {
       Json_writer_object choice(thd, "split_plan_choice");
+      choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
+
       choice.add("split_cost", spl_plan->cost);
       choice.add("record_count_for_split", record_count);
-      choice.add("unsplit_cost", spl_opt_info->unsplit_cost);
-      if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
+
+      // psergey-todo: best_min_prefix_size allows us to compute a more tight
+      //  bound than record_count.
+      double fanout= record_count;
+      if (thd->variables.optimizer_lateral_lazy_refill > 0)
+      {
+        fanout= get_join_prefix_fanout(join_prefix, best_min_prefix_size);
+        choice.add("join_prefix_fanout_for_split", fanout);
+      }
+
+      if(fanout * spl_plan->cost < spl_opt_info->unsplit_cost - 0.01)
       {
         /*
           The best plan that employs splitting is cheaper than
@@ -1134,12 +1238,14 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
   List<Item> *inj_cond_list= &spl_opt_info->inj_cond_list;
   List_iterator<KEY_FIELD> li(spl_opt_info->added_key_fields);
   KEY_FIELD *added_key_field;
+  table_map needed_tables= 0;
   while ((added_key_field= li++))
   {
     if (remaining_tables & added_key_field->val->used_tables())
       continue;
     if (inj_cond_list->push_back(added_key_field->cond, thd->mem_root))
       return true;
+    needed_tables |= added_key_field->val->used_tables();
   }
   DBUG_ASSERT(inj_cond_list->elements);
   switch (inj_cond_list->elements) {
@@ -1156,6 +1262,10 @@ bool JOIN::inject_best_splitting_cond(table_map remaining_tables)
   if (inject_cond_into_where(inj_cond))
     return true;
 
+  // psergey-todo: walk the join tabs until we are satisfying needed_tables...
+  // and inject the check at that point.
+  // Other solution is to just add a cache...
+
   select_lex->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
   st_select_lex_unit *unit= select_lex->master_unit();
   unit->uncacheable|= UNCACHEABLE_DEPENDENT_INJECTED;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 62ce5c8a2d2..8964a8aa4f7 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -689,6 +689,7 @@ typedef struct system_variables
   ulong optimizer_search_depth;
   ulong optimizer_selectivity_sampling_limit;
   ulong optimizer_use_condition_selectivity;
+  ulong optimizer_lateral_lazy_refill;
   ulong use_stat_tables;
   double sample_percentage;
   ulong histogram_size;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 7057ed1b5e1..5b901df2e54 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7455,7 +7455,7 @@ best_access_path(JOIN      *join,
   loose_scan_opt.init(join, s, remaining_tables);
 
   if (s->table->is_splittable())
-    spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+    spl_plan= s->choose_best_splitting(join_positions, idx, record_count, remaining_tables);
   Json_writer_array trace_paths(thd, "considered_access_paths");
 
   if (s->keyuse)
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 29e42ff8ef8..7ffab774248 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -682,7 +682,9 @@ typedef struct st_join_table {
 
   void partial_cleanup();
   void add_keyuses_for_splitting();
-  SplM_plan_info *choose_best_splitting(double record_count,
+  SplM_plan_info *choose_best_splitting(const POSITION *join_prefix,
+                                        uint top_prefix_size,
+                                        double record_count,
                                         table_map remaining_tables);
   bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
                      bool is_const_table);
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index a8e4ff2dded..e87d371453d 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -2672,6 +2672,14 @@ static Sys_var_ulong Sys_optimizer_use_condition_selectivity(
        SESSION_VAR(optimizer_use_condition_selectivity), CMD_LINE(REQUIRED_ARG),
        VALID_RANGE(1, 5), DEFAULT(4), BLOCK_SIZE(1));
 
+static Sys_var_ulong Sys_optimizer_lateral_lazy_refill(
+       "optimizer_lateral_lazy_refill",
+       "Controls Lazy Lateral Table Refill fix. 0 means disabled, "
+       "1 means enabled in the optimizer. Higher values are reserved "
+       "for future use.",
+       SESSION_VAR(optimizer_lateral_lazy_refill), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, 2), DEFAULT(0), BLOCK_SIZE(1));
+
 static Sys_var_ulong Sys_optimizer_search_depth(
        "optimizer_search_depth",
        "Maximum depth of search performed by the query optimizer. Values "