[Commits] b3dede9: MDEV-25714 Join using derived with aggregation returns incorrect results

8 Jun 2021

revision-id: b3dede93e6226c4680e6326dd47b523f4a8b9715 (mariadb-10.4.4-2310-gb3dede9)
parent(s): 72b33a046263dafa03c04d2ec59ed2ee2b430434
author: Igor Babaev
committer: Igor Babaev
timestamp: 2021-06-07 20:53:37 -0700
message:

MDEV-25714 Join using derived with aggregation returns incorrect results

If a join query uses a derived table (view / CTE) with GROUP BY clause then
the execution plan for such join may employ split optimization. When this
optimization is employed the derived table is not materialized. Rather only
some partitions of the derived table are subject to grouping. Split
optimization can be applied only if:
- there are some indexes over the tables used in the join specifying the
  derived table whose prefixes partially cover the field items used in the
  GROUP BY list (such indexes are called splitting indexes)
- the WHERE condition of the join query contains conjunctive equalities
  between columns of the derived table that comprise major parts of
  splitting indexes and columns of the other join tables.

When the optimizer evaluates extending of a partial join by the rows of the
derived table it always considers a possibility of using split optimization.
Different splitting indexes can be used depending on the extended partial
join. At some rare conditions, for example, when there is a non-splitting
covering index for a table joined in the join specifying the derived table
usage of a splitting index to produce rows needed for grouping may be still
less beneficial than usage of such covering index without any splitting
technique. The function JOIN_TAB::choose_best_splitting() must take this
into account.

Approved by Oleksandr Byelkin <sanja@mariadb.com>

---
 mysql-test/main/derived_cond_pushdown.result |  2 +-
 mysql-test/main/derived_split_innodb.result  | 65 ++++++++++++++++++++++++++++
 mysql-test/main/derived_split_innodb.test    | 41 ++++++++++++++++++
 sql/opt_split.cc                             | 27 +++++++++---
 4 files changed, 129 insertions(+), 6 deletions(-)

diff --git a/mysql-test/main/derived_cond_pushdown.result b/mysql-test/main/derived_cond_pushdown.result
index 33b625a..016ca22 100644
--- a/mysql-test/main/derived_cond_pushdown.result
+++ b/mysql-test/main/derived_cond_pushdown.result
@@ -16679,7 +16679,7 @@ EXPLAIN EXTENDED
 SELECT * FROM v1 JOIN v2 ON v1.f = v2.f;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
 1	PRIMARY	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	Impossible WHERE noticed after reading const tables
-3	LATERAL DERIVED	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	no matching row in const table
+3	DERIVED	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	no matching row in const table
 Warnings:
 Note	1003	/* select#1 */ select NULL AS `f`,`v2`.`f` AS `f` from `test`.`t1` `a` straight_join `test`.`t1` `b` join `test`.`v2` where 0
 DROP VIEW v1,v2;
diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 55ace91..9edf9a1 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -176,3 +176,68 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 3	LATERAL DERIVED	t1	ref	a,a_2	a	5	test.t1.a	1	Using where; Using temporary; Using filesort
 3	LATERAL DERIVED	t2	ref	c	c	5	test.t1.b	1	Using index
 DROP TABLE t1, t2;
+#
+# Bug mdev-25714: usage non-splitting covering index is cheaper than
+#                 usage of the best splitting index for one group
+#
+create table t1 (
+id int not null, itemid int not null, index idx (itemid)
+) engine=innodb;
+insert into t1 values (1, 2), (2,2), (4,2), (4,2), (0,3), (3,3);
+create table t2 (id int not null) engine=innodb;
+insert into t2 values (2);
+create table t3 (
+id int not null, itemid int not null, userid int not null, primary key (id),
+index idx1 (userid, itemid), index idx2 (itemid)
+) engine innodb;
+insert into t3 values (1,1,1), (2,1,1), (3,2,1), (4,2,1), (5,3,1);
+set use_stat_tables='never';
+set optimizer_use_condition_selectivity=1;
+analyze table t1,t2,t3;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	OK
+test.t2	analyze	status	OK
+test.t3	analyze	status	OK
+set optimizer_switch='split_materialized=on';
+explain select t1.id, t1.itemid, dt.id, t2.id
+from t1,
+(select itemid, max(id) as id from t3 where userid = 1 group by itemid) dt,
+t2
+where t1.id = dt.id and t1.itemid = dt.itemid and t2.id=t1.itemid;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	1	
+1	PRIMARY	<derived2>	ref	key1	key1	4	test.t2.id	2	
+1	PRIMARY	t1	ALL	idx	NULL	NULL	NULL	6	Using where; Using join buffer (flat, BNL join)
+2	DERIVED	t3	ref	idx1,idx2	idx1	4	const	5	Using where; Using index
+select t1.id, t1.itemid, dt.id, t2.id
+from t1,
+(select itemid, max(id) as id from t3 where userid = 1 group by itemid) dt,
+t2
+where t1.id = dt.id and t1.itemid = dt.itemid and t2.id=t1.itemid;
+id	itemid	id	id
+4	2	4	2
+4	2	4	2
+set optimizer_switch='split_materialized=off';
+explain select t1.id, t1.itemid, dt.id, t2.id
+from t1,
+(select itemid, max(id) as id from t3 where userid = 1 group by itemid) dt,
+t2
+where t1.id = dt.id and t1.itemid = dt.itemid and t2.id=t1.itemid;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	PRIMARY	t2	ALL	NULL	NULL	NULL	NULL	1	
+1	PRIMARY	<derived2>	ref	key1	key1	4	test.t2.id	2	
+1	PRIMARY	t1	ALL	idx	NULL	NULL	NULL	6	Using where; Using join buffer (flat, BNL join)
+2	DERIVED	t3	ref	idx1	idx1	4	const	5	Using where; Using index
+select t1.id, t1.itemid, dt.id, t2.id
+from t1,
+(select itemid, max(id) as id from t3 where userid = 1 group by itemid) dt,
+t2
+where t1.id = dt.id and t1.itemid = dt.itemid and t2.id=t1.itemid;
+id	itemid	id	id
+4	2	4	2
+4	2	4	2
+drop table t1,t2,t3;
+set optimizer_switch='split_materialized=default';
+set use_stat_tables=default;
+set optimizer_use_condition_selectivity=default;
+# End of 10.3 tests
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index 10fc3f9..bee9ef4 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -152,3 +152,44 @@ eval set statement optimizer_switch='split_materialized=on' for  $query;
 
 DROP TABLE t1, t2;
 
+--echo #
+--echo # Bug mdev-25714: usage non-splitting covering index is cheaper than
+--echo #                 usage of the best splitting index for one group
+--echo #
+
+create table t1 (
+  id int not null, itemid int not null, index idx (itemid)
+) engine=innodb;
+insert into t1 values (1, 2), (2,2), (4,2), (4,2), (0,3), (3,3);
+create table t2 (id int not null) engine=innodb;
+insert into t2 values (2);
+create table t3 (
+  id int not null, itemid int not null, userid int not null, primary key (id),
+  index idx1 (userid, itemid), index idx2 (itemid)
+) engine innodb;
+insert into t3 values (1,1,1), (2,1,1), (3,2,1), (4,2,1), (5,3,1);
+set use_stat_tables='never';
+set optimizer_use_condition_selectivity=1;
+analyze table t1,t2,t3;
+
+let $q=
+select t1.id, t1.itemid, dt.id, t2.id
+  from t1,
+  (select itemid, max(id) as id from t3 where userid = 1 group by itemid) dt,
+  t2
+    where t1.id = dt.id and t1.itemid = dt.itemid and t2.id=t1.itemid;
+
+set optimizer_switch='split_materialized=on';
+eval explain $q;
+eval $q;
+
+set optimizer_switch='split_materialized=off';
+eval explain $q;
+eval $q;
+
+drop table t1,t2,t3;
+set optimizer_switch='split_materialized=default';
+set use_stat_tables=default;
+set optimizer_use_condition_selectivity=default;
+
+--echo # End of 10.3 tests
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index 28a5483..316919c 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -958,11 +958,7 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
       in the cache
     */
     spl_plan= spl_opt_info->find_plan(best_table, best_key, best_key_parts);
-    if (!spl_plan &&
-	(spl_plan= (SplM_plan_info *) thd->alloc(sizeof(SplM_plan_info))) &&
-	(spl_plan->best_positions=
-	   (POSITION *) thd->alloc(sizeof(POSITION) * join->table_count)) &&
-	!spl_opt_info->plan_cache.push_back(spl_plan))
+    if (!spl_plan)
     {
       /*
         The plan for the chosen key has not been found in the cache.
@@ -972,6 +968,27 @@ SplM_plan_info * JOIN_TAB::choose_best_splitting(double record_count,
       reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
                                       best_key, remaining_tables, true);
       choose_plan(join, all_table_map & ~join->const_table_map);
+
+      /*
+        Check that the chosen plan is really a splitting plan.
+        If not or if there is not enough memory to save the plan in the cache
+        then just return with no splitting plan.
+      */
+      POSITION *first_non_const_pos= join->best_positions + join->const_tables;
+      TABLE *table= first_non_const_pos->table->table;
+      key_map spl_keys= table->keys_usable_for_splitting;
+      if (!(first_non_const_pos->key &&
+            spl_keys.is_set(first_non_const_pos->key->key)) ||
+          !(spl_plan= (SplM_plan_info *) thd->alloc(sizeof(SplM_plan_info))) ||
+	  !(spl_plan->best_positions=
+	     (POSITION *) thd->alloc(sizeof(POSITION) * join->table_count)) ||
+	  spl_opt_info->plan_cache.push_back(spl_plan))
+      {
+        reset_validity_vars_for_keyuses(best_key_keyuse_ext_start, best_table,
+                                        best_key, remaining_tables, false);
+        return 0;
+      }
+
       spl_plan->keyuse_ext_start= best_key_keyuse_ext_start;
       spl_plan->table= best_table;
       spl_plan->key= best_key;

    

[Commits] b3dede9: MDEV-25714 Join using derived with aggregation returns incorrect results

IgorBabaev