[Commits] f510aad: MDEV-26301 Split optimization refills temporary table too many times

6 Apr 2023

revision-id: f510aad1c7c649a50cd0ed435217e9f162c94bd8 (mariadb-10.4.28-75-gf510aad)
parent(s): a6780df49b443b172124e7e881ed0bea54d75907
author: Igor Babaev
committer: Igor Babaev
timestamp: 2023-04-06 10:48:23 -0700
message:

MDEV-26301 Split optimization refills temporary table too many times

This an alternative patch that resolves the problem of unnecessary refills
of split derived tables. The patch also improves the estimate of the number
of such refills to lower it in some cases that allows wider usage of split
optimizations.
The patch is not intrusive and does not require any new optimizer switches.

---
 mysql-test/main/derived_split_innodb.result | 261 ++++++++++++++++++++++++++++
 mysql-test/main/derived_split_innodb.test   | 136 +++++++++++++++
 sql/opt_split.cc                            | 108 +++++++++---
 sql/sql_select.cc                           |  20 ++-
 sql/sql_select.h                            |  22 ++-
 5 files changed, 518 insertions(+), 29 deletions(-)

diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 04f79d3..8a6585d 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -284,3 +284,264 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 2	DERIVED	t4	ALL	NULL	NULL	NULL	NULL	40	Using filesort
 drop table t3, t4;
 # End of 10.3 tests
+#
+# MDEV-26301: Split optimization refills temporary table too many times
+#
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+analyze table t1,t2,t3 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	Table is already up to date
+test.t3	analyze	status	Engine-independent statistics collected
+test.t3	analyze	status	Table is already up to date
+explain
+select * from
+(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	SIMPLE	t2	ref	a	a	5	test.t1.b	2	Using where
+1	SIMPLE	t3	ref	a	a	5	test.t1.b	3	Using where
+create table t10 (
+grp_id int,
+col1 int,
+key(grp_id)
+);
+insert into t10
+select
+A.seq,
+B.seq
+from
+seq_1_to_100 A,
+seq_1_to_100 B;
+create table t11 (
+col1 int,
+col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+analyze table t10,t11 persistent for all;
+Table	Op	Msg_type	Msg_text
+test.t10	analyze	status	Engine-independent statistics collected
+test.t10	analyze	status	Table is already up to date
+test.t11	analyze	status	Engine-independent statistics collected
+test.t11	analyze	status	OK
+explain select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+# The important part in the below output is:
+#        "lateral": 1,
+#        "query_block": {
+#          "select_id": 2,
+#          "r_loops": 5,  <-- must be 5, not 30.
+analyze format=json select * from
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b;
+ANALYZE
+{
+  "query_block": {
+    "select_id": 1,
+    "r_loops": 1,
+    "r_total_time_ms": "REPLACED",
+    "const_condition": "1",
+    "table": {
+      "table_name": "t1",
+      "access_type": "ALL",
+      "r_loops": 1,
+      "rows": 5,
+      "r_rows": 5,
+      "r_total_time_ms": "REPLACED",
+      "filtered": 100,
+      "r_filtered": 100
+    },
+    "table": {
+      "table_name": "t2",
+      "access_type": "ref",
+      "possible_keys": ["a"],
+      "key": "a",
+      "key_length": "5",
+      "used_key_parts": ["a"],
+      "ref": ["test.t1.b"],
+      "r_loops": 5,
+      "rows": 2,
+      "r_rows": 2,
+      "r_total_time_ms": "REPLACED",
+      "filtered": 100,
+      "r_filtered": 100,
+      "attached_condition": "trigcond(trigcond(t1.b is not null))"
+    },
+    "table": {
+      "table_name": "t3",
+      "access_type": "ref",
+      "possible_keys": ["a"],
+      "key": "a",
+      "key_length": "5",
+      "used_key_parts": ["a"],
+      "ref": ["test.t1.b"],
+      "r_loops": 10,
+      "rows": 3,
+      "r_rows": 3,
+      "r_total_time_ms": "REPLACED",
+      "filtered": 100,
+      "r_filtered": 100,
+      "attached_condition": "trigcond(trigcond(t1.b is not null))"
+    },
+    "table": {
+      "table_name": "<derived2>",
+      "access_type": "ref",
+      "possible_keys": ["key0"],
+      "key": "key0",
+      "key_length": "5",
+      "used_key_parts": ["grp_id"],
+      "ref": ["test.t1.b"],
+      "r_loops": 30,
+      "rows": 10,
+      "r_rows": 1,
+      "r_total_time_ms": "REPLACED",
+      "filtered": 100,
+      "r_filtered": 100,
+      "attached_condition": "trigcond(trigcond(t1.b is not null))",
+      "materialized": {
+        "lateral": 1,
+        "query_block": {
+          "select_id": 2,
+          "r_loops": 5,
+          "r_total_time_ms": "REPLACED",
+          "outer_ref_condition": "t1.b is not null",
+          "table": {
+            "table_name": "t10",
+            "access_type": "ref",
+            "possible_keys": ["grp_id"],
+            "key": "grp_id",
+            "key_length": "5",
+            "used_key_parts": ["grp_id"],
+            "ref": ["test.t1.b"],
+            "r_loops": 5,
+            "rows": 100,
+            "r_rows": 100,
+            "r_total_time_ms": "REPLACED",
+            "filtered": 100,
+            "r_filtered": 100
+          },
+          "block-nl-join": {
+            "table": {
+              "table_name": "t11",
+              "access_type": "ALL",
+              "r_loops": 5,
+              "rows": 10,
+              "r_rows": 10,
+              "r_total_time_ms": "REPLACED",
+              "filtered": 100,
+              "r_filtered": 100
+            },
+            "buffer_type": "flat",
+            "buffer_size": "1Kb",
+            "join_type": "BNL",
+            "attached_condition": "trigcond(t11.col1 = t10.col1)",
+            "r_filtered": 10
+          }
+        }
+      }
+    }
+  }
+}
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+explain
+select * from
+t21, t22,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from t10 left join t11 on t11.col1=t10.col1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1 and t22.pk=2;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t21	const	PRIMARY	PRIMARY	4	const	1	Using index
+1	PRIMARY	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+explain
+select * from
+t21,
+(
+(t1 left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t21	const	PRIMARY	PRIMARY	4	const	1	Using index
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	Using where
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	Using where
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t1.b	100	
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+create table t5 (
+pk int primary key
+);
+insert into t5 select seq from seq_1_to_1000;
+explain
+select * from
+t21,
+(
+(((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+left join t3 on t3.a=t1.b
+) left join (select grp_id, count(*)
+from
+t22 join t10 left join t11 on t11.col1=t10.col1
+where
+t22.pk=1
+group by grp_id) T on T.grp_id=t1.b
+where
+t21.pk=1;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t21	const	PRIMARY	PRIMARY	4	const	1	Using index
+1	PRIMARY	t1	ALL	NULL	NULL	NULL	NULL	5	Using where
+1	PRIMARY	t5	eq_ref	PRIMARY	PRIMARY	4	test.t1.b	1	Using index
+1	PRIMARY	t2	ref	a	a	5	test.t1.b	2	
+1	PRIMARY	t3	ref	a	a	5	test.t1.b	3	
+1	PRIMARY	<derived2>	ref	key0	key0	5	test.t1.b	10	Using where
+2	LATERAL DERIVED	t22	const	PRIMARY	PRIMARY	4	const	1	Using index
+2	LATERAL DERIVED	t10	ref	grp_id	grp_id	5	test.t5.pk	100	Using index condition
+2	LATERAL DERIVED	t11	ALL	NULL	NULL	NULL	NULL	10	Using where; Using join buffer (flat, BNL join)
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
+# End of 10.4 tests
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index 2f74f5f..ab83850 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -227,3 +227,139 @@ where t3.b > 15;
 drop table t3, t4;
 
 --echo # End of 10.3 tests
+
+--source include/have_sequence.inc
+
+--echo #
+--echo # MDEV-26301: Split optimization refills temporary table too many times
+--echo #
+
+# 5 values
+create table t1(a int, b int);
+insert into t1 select seq,seq from seq_1_to_5;
+
+# 5 value groups of size 2 each
+create table t2(a int, b int, key(a));
+insert into t2
+select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
+
+# 5 value groups of size 3 each
+create table t3(a int, b int, key(a));
+insert into t3
+select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
+
+analyze table t1,t2,t3 persistent for all;
+
+explain
+select * from
+  (t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
+
+# Now, create tables for Groups.
+
+create table t10 (
+  grp_id int,
+  col1 int,
+  key(grp_id)
+);
+
+# 100 groups of 100 values each
+insert into t10
+select
+  A.seq,
+  B.seq
+from
+  seq_1_to_100 A,
+  seq_1_to_100 B;
+
+# and X10 multiplier
+
+create table t11 (
+  col1 int,
+  col2 int
+);
+insert into t11
+select A.seq, A.seq from seq_1_to_10 A;
+
+analyze table t10,t11 persistent for all;
+
+let $q1=
+select * from
+  (
+    (t1 left join t2 on t2.a=t1.b)
+    left join t3 on t3.a=t1.b
+  ) left join (select grp_id, count(*)
+               from t10 left join t11 on t11.col1=t10.col1
+               group by grp_id) T on T.grp_id=t1.b;
+
+eval
+explain $q1;
+
+--echo # The important part in the below output is:
+--echo #        "lateral": 1,
+--echo #        "query_block": {
+--echo #          "select_id": 2,
+--echo #          "r_loops": 5,  <-- must be 5, not 30.
+--source include/analyze-format.inc
+
+eval
+analyze format=json $q1;
+
+create table t21 (pk int primary key);
+insert into t21 values (1),(2),(3);
+
+create table t22 (pk int primary key);
+insert into t22 values (1),(2),(3);
+
+# Same as above but throw in a couple of const tables.
+explain
+select * from
+  t21, t22,
+  (
+    (t1 left join t2 on t2.a=t1.b)
+    left join t3 on t3.a=t1.b
+  ) left join (select grp_id, count(*)
+               from t10 left join t11 on t11.col1=t10.col1
+               group by grp_id) T on T.grp_id=t1.b
+where
+  t21.pk=1 and t22.pk=2;
+
+explain
+select * from
+  t21,
+  (
+    (t1 left join t2 on t2.a=t1.b)
+    left join t3 on t3.a=t1.b
+  ) left join (select grp_id, count(*)
+               from
+                 t22 join t10 left join t11 on t11.col1=t10.col1
+               where
+                 t22.pk=1
+               group by grp_id) T on T.grp_id=t1.b
+where
+  t21.pk=1;
+
+# And also add a non-const table
+
+create table t5 (
+  pk int primary key
+  );
+insert into t5 select seq from seq_1_to_1000;
+
+explain
+select * from
+  t21,
+  (
+    (((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
+    left join t3 on t3.a=t1.b
+  ) left join (select grp_id, count(*)
+               from
+                 t22 join t10 left join t11 on t11.col1=t10.col1
+               where
+                 t22.pk=1
+               group by grp_id) T on T.grp_id=t1.b
+where
+  t21.pk=1;
+
+drop table t1,t2,t3,t5, t10, t11, t21, t22;
+
+--echo # End of 10.4 tests
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index a356335..30d9161 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -248,6 +248,8 @@ class SplM_opt_info : public Sql_alloc
   double unsplit_card;
   /* Lastly evaluated execution plan for 'join' with pushed equalities */
   SplM_plan_info *last_plan;
+  table_map last_spl_param_tables;
+  double last_refills;
 
   SplM_plan_info *find_plan(TABLE *table, uint key, uint parts);
 };
@@ -831,13 +833,13 @@ SplM_plan_info *SplM_opt_info::find_plan(TABLE *table, uint key, uint parts)
 static
 void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
                                      TABLE *table, uint key,
-                                     table_map remaining_tables,
+                                     table_map excluded_tables,
                                      bool validity_val)
 {
   KEYUSE_EXT *keyuse_ext= key_keyuse_ext_start;
   do
   {
-    if (!(keyuse_ext->needed_in_prefix & remaining_tables))
+    if (!(keyuse_ext->needed_in_prefix & excluded_tables))
     {
       /*
         The enabling/disabling flags are set just in KEYUSE_EXT structures.
@@ -857,8 +859,9 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
     Choose the best splitting to extend the evaluated partial join
 
   @param
-    record_count      estimated cardinality of the extended partial join
+    join_tab_pos      position for joined table in current partial join
     remaining_tables  tables not joined yet
+    spl_param_tables  OUT tables used in key values to access split table
 
   @details
     This function is called during the search for the best execution
@@ -873,7 +876,7 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
     splitting the function set it as the true plan of materialization
     of the table T.
     The function caches the found plans for materialization of table T
-    together if the info what key was used for splitting. Next time when
+    together with the info what key was used for splitting. Next time when
     the optimizer prefers to use the same key the plan is taken from
     the cache of plans
 
@@ -882,8 +885,9 @@ void reset_validity_vars_for_keyuses(KEYUSE_EXT *key_keyuse_ext_start,
     if the plan has been chosen, NULL - otherwise.
 */
 
-SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
-                                                 table_map remaining_tables)
+SplM_plan_info * JOIN_TAB::choose_best_splitting(const POSITION *join_tab_pos,
+                                                 table_map remaining_tables,
+                                                 table_map &spl_param_tables)
 {
   SplM_opt_info *spl_opt_info= table->spl_opt_info;
   DBUG_ASSERT(spl_opt_info != NULL);
@@ -898,6 +902,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
   SplM_plan_info *spl_plan= 0;
   uint best_key= 0;
   uint best_key_parts= 0;
+  table_map best_param_tables;
 
   /*
     Check whether there are keys that can be used to join T employing splitting
@@ -916,6 +921,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
       uint key= keyuse_ext->key;
       KEYUSE_EXT *key_keyuse_ext_start= keyuse_ext;
       key_part_map found_parts= 0;
+      table_map needed_in_prefix= 0;
       do
       {
         if (keyuse_ext->needed_in_prefix & remaining_tables)
@@ -941,6 +947,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
         KEY *key_info= table->key_info + key;
         double rec_per_key=
                  key_info->actual_rec_per_key(keyuse_ext->keypart);
+        needed_in_prefix|= keyuse_ext->needed_in_prefix;
         if (rec_per_key < best_rec_per_key)
 	{
           best_table= keyuse_ext->table;
@@ -948,6 +955,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
 	  best_key_parts= keyuse_ext->keypart + 1;
           best_rec_per_key= rec_per_key;
           best_key_keyuse_ext_start= key_keyuse_ext_start;
+          best_param_tables= needed_in_prefix;
         }
         keyuse_ext++;
       }
@@ -956,8 +964,24 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
     while (keyuse_ext->table == table);
   }
   spl_opt_info->last_plan= 0;
+  double refills= DBL_MAX;
+  table_map excluded_tables= remaining_tables | this->join->sjm_lookup_tables;
   if (best_table)
   {
+      if (!best_param_tables)
+        refills= 1;
+      else
+      {
+        for (const POSITION *pos= join_tab_pos - 1; ; pos--)
+        {
+          if (pos->table->table->map & this->join->sjm_lookup_tables)
+            continue;
+          set_if_smaller(refills, pos->partial_join_cardinality);
+	  if (pos->table->table->map & best_param_tables)
+            break;
+          excluded_tables|= pos->table->table->map;
+        }
+      }
     /*
       The key for splitting was chosen, look for the plan for this key
       in the cache
@@ -971,7 +995,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
       */
       table_map all_table_map= (((table_map) 1) << join->table_count) - 1;
       reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
-                                      best_key, remaining_tables, true);
+                                      best_key, excluded_tables, true);
       choose_plan(join, all_table_map & ~join->const_table_map);
 
       /*
@@ -990,7 +1014,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
 	  spl_opt_info->plan_cache.push_back(spl_plan))
       {
         reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
-                                        best_key, remaining_tables, false);
+                                        best_key, excluded_tables, false);
         return 0;
       }
 
@@ -1014,17 +1038,20 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
              (char *) join->best_positions,
              sizeof(POSITION) * join->table_count);
       reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
-                                      best_key, remaining_tables, false);
+                                      best_key, excluded_tables, false);
     }
+
     if (spl_plan)
     {
-      if(record_count * spl_plan->cost < spl_opt_info->unsplit_cost)
+      if (refills * spl_plan->cost < spl_opt_info->unsplit_cost)
       {
         /*
           The best plan that employs splitting is cheaper than
           the plan without splitting
 	*/
         spl_opt_info->last_plan= spl_plan;
+        spl_opt_info->last_spl_param_tables= best_param_tables;
+        spl_opt_info->last_refills= refills;
       }
     }
   }
@@ -1034,7 +1061,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
   spl_plan= spl_opt_info->last_plan;
   if (spl_plan)
   {
-    startup_cost= record_count * spl_plan->cost;
+    spl_param_tables= spl_opt_info->last_spl_param_tables;
+    startup_cost= spl_opt_info->last_refills * spl_plan->cost;
     records= (ha_rows) (records * spl_plan->split_sel);
   }
   else
@@ -1048,8 +1076,8 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
     Inject equalities for splitting used by the materialization join
 
   @param
-    excluded_tables  used to filter out the equalities that cannot
-                      be pushed.
+    excluded_tables    used to filter out the equalities that are not
+                       to be pushed.
 
   @details
     This function injects equalities pushed into a derived table T for which
@@ -1142,7 +1170,7 @@ bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item)
 
   @param
     spl_plan   info on the splitting plan chosen for the splittable table T
-    remaining_tables  the table T is joined just before these tables
+    excluded_tables  tables that cannot be used in equalities pushed into T
     is_const_table    the table T is a constant table
 
   @details
@@ -1157,7 +1185,7 @@ bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item)
 */
 
 bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
-                             table_map remaining_tables,
+                             table_map excluded_tables,
                              bool is_const_table)
 {
   SplM_opt_info *spl_opt_info= table->spl_opt_info;
@@ -1165,6 +1193,7 @@ bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
   JOIN *md_join= spl_opt_info->join;
   if (spl_plan && !is_const_table)
   {
+    is_split_derived= true;
     memcpy((char *) md_join->best_positions,
            (char *) spl_plan->best_positions,
            sizeof(POSITION) * md_join->table_count);
@@ -1175,7 +1204,7 @@ bool JOIN_TAB::fix_splitting(SplM_plan_info *spl_plan,
     reset_validity_vars_for_keyuses(spl_plan->keyuse_ext_start,
                                     spl_plan->table,
                                     spl_plan->key,
-                                    remaining_tables,
+                                    excluded_tables,
                                     true);
   }
   else if (md_join->save_qep)
@@ -1211,8 +1240,27 @@ bool JOIN::fix_all_splittings_in_plan()
     if (tab->table->is_splittable())
     {
       SplM_plan_info *spl_plan= cur_pos->spl_plan;
+      table_map excluded_tables= (all_tables & ~prev_tables) |
+                                 sjm_lookup_tables;
+                                   ;
+      if (spl_plan)
+      {
+        table_map spl_param_tables= cur_pos->spl_param_tables;
+        excluded_tables|=  tab->table->map;
+        if (spl_param_tables)
+	{
+          for (POSITION *pos= cur_pos - 1; ; pos--)
+          {
+            if (pos->table->table->map & sjm_lookup_tables)
+              continue;
+            if (pos->table->table->map & spl_param_tables)
+              break;
+            excluded_tables|= pos->table->table->map;
+          }
+        }
+      }
       if (tab->fix_splitting(spl_plan,
-                             all_tables & ~prev_tables,
+                             excluded_tables,
                              tablenr < const_tables ))
           return true;
     }
@@ -1251,13 +1299,25 @@ bool JOIN::inject_splitting_cond_for_all_tables_with_split_opt()
       continue;
     SplM_opt_info *spl_opt_info= tab->table->spl_opt_info;
     JOIN *join= spl_opt_info->join;
-    /*
-      Currently the equalities referencing columns of SJM tables with
-      look-up access cannot be pushed into materialized derived.
-    */
-    if (join->inject_best_splitting_cond((all_tables & ~prev_tables) |
-				          sjm_lookup_tables))
-        return true;
+    table_map excluded_tables= (all_tables & ~prev_tables) | sjm_lookup_tables;
+    table_map param_tables= cur_pos->spl_param_tables;
+
+    POSITION *found_pos= cur_pos;
+    if (param_tables)
+    {
+      for (POSITION *pos= cur_pos - 1; ; pos--)
+      {
+        if (pos->table->table->map & excluded_tables)
+          continue;
+        if (pos->table->table->map & param_tables)
+          break;
+        excluded_tables|= pos->table->table->map;
+        found_pos= pos;
+      }
+    }
+    if (join->inject_best_splitting_cond(excluded_tables))
+      return true;
+    found_pos->table->split_derived_to_update|= tab->table->map;
   }
   return false;
 }
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index c4fc2d1..5d501de 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -7431,6 +7431,7 @@ best_access_path(JOIN      *join,
   MY_BITMAP *eq_join_set= &s->table->eq_join_set;
   KEYUSE *hj_start_key= 0;
   SplM_plan_info *spl_plan= 0;
+  table_map spl_param_tables= 0;
   Range_rowid_filter_cost_info *filter= 0;
   const char* cause= NULL;
   enum join_type best_type= JT_UNKNOWN, type= JT_UNKNOWN;
@@ -7448,7 +7449,8 @@ best_access_path(JOIN      *join,
   loose_scan_opt.init(join, s, remaining_tables);
 
   if (s->table->is_splittable())
-    spl_plan= s->choose_best_splitting(record_count, remaining_tables);
+    spl_plan= s->choose_best_splitting(join_positions + idx, remaining_tables,
+                                       spl_param_tables);
 
   if (s->keyuse)
   {                                            /* Use key if possible */
@@ -8266,6 +8268,7 @@ best_access_path(JOIN      *join,
   pos->loosescan_picker.loosescan_key= MAX_KEY;
   pos->use_join_buffer= best_uses_jbuf;
   pos->spl_plan= spl_plan;
+  pos->spl_param_tables= !spl_plan ? 0 : spl_param_tables;
   pos->range_rowid_filter_info= best_filter;
 
   loose_scan_opt.save_to_position(s, loose_scan_pos);
@@ -9833,6 +9836,8 @@ best_extension_by_limited_search(JOIN      *join,
 
       double partial_join_cardinality= current_record_count *
                                         pushdown_cond_selectivity;
+      join->positions[idx].partial_join_cardinality= partial_join_cardinality;
+
       if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables )
       { /* Recursively expand the current partial plan */
         swap_variables(JOIN_TAB*, join->best_ref[idx], *pos);
@@ -13825,7 +13830,8 @@ bool JOIN_TAB::preread_init()
     DBUG_RETURN(TRUE);
 
   if (!(derived->get_unit()->uncacheable & UNCACHEABLE_DEPENDENT) ||
-      derived->is_nonrecursive_derived_with_rec_ref())
+      derived->is_nonrecursive_derived_with_rec_ref() ||
+      is_split_derived)
     preread_init_done= TRUE;
   if (select && select->quick)
     select->quick->replace_handler(table->file);
@@ -20764,6 +20770,16 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
 {
   DBUG_ENTER("sub_select");
 
+  if (join_tab->split_derived_to_update && !end_of_records)
+  {
+    table_map tab_map= join_tab->split_derived_to_update;
+    for (uint i= 0; tab_map; i++, tab_map>>= 1)
+    {
+      if (tab_map & 1)
+        join->map2table[i]->preread_init_done= false;
+    }
+  }
+
   if (join_tab->last_inner)
   {
     JOIN_TAB *last_inner_tab= join_tab->last_inner;
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 0dfecc9..db9025a 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -520,6 +520,16 @@ typedef struct st_join_table {
 
   bool preread_init_done;
 
+  /* true <=> split optimization has been applied to this materialized table */
+  bool is_split_derived;
+
+  /*
+    Bitmap of split materialized derived tables that can be filled just before
+    this join table is to be joined. All parameters of the split derived tables
+    belong to tables preceding this join table.
+  */
+  table_map split_derived_to_update;
+
   /*
     Cost info to the range filter used when joining this join table
     (Defined when the best join order has been already chosen)
@@ -680,9 +690,10 @@ typedef struct st_join_table {
 
   void partial_cleanup();
   void add_keyuses_for_splitting();
-  SplM_plan_info *choose_best_splitting(double record_count,
-                                        table_map remaining_tables);
-  bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
+  SplM_plan_info *choose_best_splitting(const POSITION *join_tab_pos,
+                                        table_map remaining_tables,
+                                        table_map &spl_param_tables);
+  bool fix_splitting(SplM_plan_info *spl_plan, table_map excluded_tables,
                      bool is_const_table);
 } JOIN_TAB;
 
@@ -947,9 +958,14 @@ class POSITION
   */
   KEYUSE *key;
 
+  /* Cardinality of current partial join ending with this position */
+  double partial_join_cardinality;
+
   /* Info on splitting plan used at this position */
   SplM_plan_info *spl_plan;
 
+  table_map spl_param_tables;
+
   /* Cost info for the range filter used at this position */
   Range_rowid_filter_cost_info *range_rowid_filter_info;

    

[Commits] f510aad: MDEV-26301 Split optimization refills temporary table too many times

IgorBabaev