revision-id: f7648d8ef74016a2d07433f38d12e3c22bdef4a9 (mariadb-10.3.31-70-gf7648d8ef74)
parent(s): 9962cda52722b77c2a7e0314bbaa2e4f963f55c1
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2022-01-03 23:53:45 +0300
message:
MDEV-26337: subquery with groupby and ROLLUP returns incorrect results on LEFT JOIN on INDEXED values
Disable LATERAL DERIVED optimization for subqueries that have WITH ROLLUP.
---
mysql-test/main/derived_split_innodb.result | 88 ++++++++++++++++
mysql-test/main/derived_split_innodb.test | 149 ++++++++++++++++++++++++++++
sql/opt_split.cc | 5 +-
sql/sql_select.cc | 34 +++++++
4 files changed, 275 insertions(+), 1 deletion(-)
diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result
index 7ea3b689f23..fcdba0d7d07 100644
--- a/mysql-test/main/derived_split_innodb.result
+++ b/mysql-test/main/derived_split_innodb.result
@@ -234,4 +234,92 @@ id itemid id id
4 2 4 2
drop table t1,t2,t3;
set optimizer_switch='split_materialized=default';
+#
+# MDEV-26337: subquery with groupby and ROLLUP returns incorrect results on LEFT JOIN on INDEXED values
+#
+CREATE TABLE t1 (
+the_date date NOT NULL
+, PRIMARY KEY ( the_date )
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+INSERT INTO t1 VALUES ('2021-08-10'),('2021-08-11'),('2021-08-12'),('2021-08-13');
+CREATE TABLE t2 (
+the_date date NOT NULL,
+ptn_id char(5) CHARACTER SET utf8mb3 NOT NULL DEFAULT '',
+cco_stk_ttl int,
+PRIMARY KEY ( the_date , ptn_id )
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+INSERT INTO t2 VALUES
+('2021-08-11','10002',NULL),('2021-08-11','10741',128),
+('2021-08-11','11001',4),('2021-08-11','11003',2048),
+('2021-08-12','10001',4096),('2021-08-12','10002',1),
+('2021-08-12','10429',256),('2021-08-12','10499',16),
+('2021-08-12','10580',8),('2021-08-12','10740',32),
+('2021-08-12','10741',64),('2021-08-12','10771',512),
+('2021-08-12','11001',2),('2021-08-12','11003',1024);
+CREATE TABLE t3 (
+id int NOT NULL AUTO_INCREMENT,
+nsc_id char(5) NOT NULL,
+dept_id char(4) NOT NULL,
+district_id char(3) NOT NULL,
+region_id char(2) NOT NULL,
+PRIMARY KEY ( id ),
+UNIQUE KEY dept_district ( dept_id , district_id ),
+KEY region_id ( dept_id , region_id )
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+INSERT INTO t3 VALUES
+(1,'MMD','ADVB','10','1'), (2,'MMD','ADVB','11','1'),
+(3,'MMD','ADVB','21','2'),(4,'MMD','ADVB','22','2');
+CREATE TABLE t4 (
+dept_id char(4) CHARACTER SET utf8mb3 NOT NULL,
+ptn_id char(5) CHARACTER SET utf8mb3 NOT NULL,
+district_id char(3) CHARACTER SET utf8mb3 NOT NULL DEFAULT '0',
+nsc_id char(5) CHARACTER SET utf8mb3 NOT NULL
+, PRIMARY KEY (ptn_id , dept_id)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+INSERT INTO t4 VALUES
+('ADVB','10001','10','MMD'),('ADVB','10002','10','MMD'),
+('ADVB','10003','10','MMD'),('ADVB','10429','22','MMD'),
+('ADVB','10740','21','MMD'),('ADVB','10741','21','MMD'),
+('ADVB','10771','23','MMD'),('ADVB','11001','11','MMD'),
+('ADVB','11002','11','MMD');
+CREATE TABLE `t10` (
+`the_date` date NOT NULL,
+`dept_id` char(4) CHARACTER SET utf8mb4 ,
+`org_id` varchar(3) CHARACTER SET utf8mb4 ,
+`district_id` char(3) CHARACTER SET utf8mb4 ,
+`region_id` char(2) CHARACTER SET utf8mb4
+);
+insert into t10
+SELECT cal.the_date ,
+org.dept_id ,
+coalesce(org.district_id, org.region_id, 'MMD') AS org_id ,
+org.district_id ,
+org.region_id
+FROM t1 cal
+CROSS JOIN t3 org
+WHERE org.nsc_id = 'MMD'
+ AND org.dept_id IN ('ADVB')
+AND cal.the_date = '2021-08-12'
+GROUP BY cal.the_date,
+org.dept_id,
+org.region_id,
+org.district_id WITH ROLLUP HAVING NOT (cal.the_date IS NULL
+OR org.dept_id IS NULL);
+explain $q;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY org2 ALL NULL NULL NULL NULL 7
+1 PRIMARY <derived2> ref key0 key0 43 test.org2.the_date,test.org2.dept_id,test.org2.region_id,test.org2.district_id 2 Using where
+2 LATERAL DERIVED sub ref PRIMARY PRIMARY 3 test.org2.the_date 1 Using temporary; Using filesort
+2 LATERAL DERIVED org ref PRIMARY PRIMARY 15 test.sub.ptn_id 1 Using where
+2 LATERAL DERIVED dis eq_ref dept_district,region_id dept_district 28 const,func 1 Using index condition; Using where
+$q;
+the_date org_id dept_id cco_stk_ttl
+2021-08-12 10 ADVB 4097
+2021-08-12 11 ADVB 2
+2021-08-12 1 ADVB 4099
+2021-08-12 21 ADVB 96
+2021-08-12 22 ADVB 256
+2021-08-12 2 ADVB 352
+2021-08-12 MMD ADVB 4451
+drop table t1,t2,t3,t4,t10;
# End of 10.3 tests
diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test
index 6f33c71eede..dc283792c13 100644
--- a/mysql-test/main/derived_split_innodb.test
+++ b/mysql-test/main/derived_split_innodb.test
@@ -186,4 +186,153 @@ eval $q;
drop table t1,t2,t3;
set optimizer_switch='split_materialized=default';
+--echo #
+--echo # MDEV-26337: subquery with groupby and ROLLUP returns incorrect results on LEFT JOIN on INDEXED values
+--echo #
+
+CREATE TABLE t1 (
+ the_date date NOT NULL
+ , PRIMARY KEY ( the_date )
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+INSERT INTO t1 VALUES ('2021-08-10'),('2021-08-11'),('2021-08-12'),('2021-08-13');
+
+CREATE TABLE t2 (
+ the_date date NOT NULL,
+ ptn_id char(5) CHARACTER SET utf8mb3 NOT NULL DEFAULT '',
+ cco_stk_ttl int,
+ PRIMARY KEY ( the_date , ptn_id )
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+
+INSERT INTO t2 VALUES
+('2021-08-11','10002',NULL),('2021-08-11','10741',128),
+('2021-08-11','11001',4),('2021-08-11','11003',2048),
+('2021-08-12','10001',4096),('2021-08-12','10002',1),
+('2021-08-12','10429',256),('2021-08-12','10499',16),
+('2021-08-12','10580',8),('2021-08-12','10740',32),
+('2021-08-12','10741',64),('2021-08-12','10771',512),
+('2021-08-12','11001',2),('2021-08-12','11003',1024);
+
+CREATE TABLE t3 (
+ id int NOT NULL AUTO_INCREMENT,
+ nsc_id char(5) NOT NULL,
+ dept_id char(4) NOT NULL,
+ district_id char(3) NOT NULL,
+ region_id char(2) NOT NULL,
+ PRIMARY KEY ( id ),
+ UNIQUE KEY dept_district ( dept_id , district_id ),
+ KEY region_id ( dept_id , region_id )
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+
+INSERT INTO t3 VALUES
+(1,'MMD','ADVB','10','1'), (2,'MMD','ADVB','11','1'),
+(3,'MMD','ADVB','21','2'),(4,'MMD','ADVB','22','2');
+
+CREATE TABLE t4 (
+ dept_id char(4) CHARACTER SET utf8mb3 NOT NULL,
+ ptn_id char(5) CHARACTER SET utf8mb3 NOT NULL,
+ district_id char(3) CHARACTER SET utf8mb3 NOT NULL DEFAULT '0',
+ nsc_id char(5) CHARACTER SET utf8mb3 NOT NULL
+ , PRIMARY KEY (ptn_id , dept_id)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
+
+INSERT INTO t4 VALUES
+('ADVB','10001','10','MMD'),('ADVB','10002','10','MMD'),
+('ADVB','10003','10','MMD'),('ADVB','10429','22','MMD'),
+('ADVB','10740','21','MMD'),('ADVB','10741','21','MMD'),
+('ADVB','10771','23','MMD'),('ADVB','11001','11','MMD'),
+('ADVB','11002','11','MMD');
+
+CREATE TABLE `t10` (
+ `the_date` date NOT NULL,
+ `dept_id` char(4) CHARACTER SET utf8mb4 ,
+ `org_id` varchar(3) CHARACTER SET utf8mb4 ,
+ `district_id` char(3) CHARACTER SET utf8mb4 ,
+ `region_id` char(2) CHARACTER SET utf8mb4
+);
+
+insert into t10
+SELECT cal.the_date ,
+ org.dept_id ,
+ coalesce(org.district_id, org.region_id, 'MMD') AS org_id ,
+ org.district_id ,
+ org.region_id
+FROM t1 cal
+CROSS JOIN t3 org
+WHERE org.nsc_id = 'MMD'
+ AND org.dept_id IN ('ADVB')
+ AND cal.the_date = '2021-08-12'
+GROUP BY cal.the_date,
+ org.dept_id,
+ org.region_id,
+ org.district_id WITH ROLLUP HAVING NOT (cal.the_date IS NULL
+ OR org.dept_id IS NULL);
+
+let $q=
+SELECT sql_no_cache org2.the_date ,
+ org2.org_id ,
+ org2.dept_id ,
+ msr. cco_stk_ttl
+FROM
+ t10 org2
+LEFT JOIN
+ ( SELECT sub.the_date ,
+ dis.dept_id ,
+ dis.region_id ,
+ dis.district_id ,
+ sum(sub.cco_stk_ttl) AS cco_stk_ttl
+ FROM t2 sub
+ JOIN t4 org ON org.ptn_id = sub.ptn_id
+ JOIN t3 dis ON dis.dept_id = org.dept_id
+ AND dis.district_id = org.district_id
+ WHERE dis.nsc_id = 'MMD'
+ AND dis.dept_id IN ('ADVB')
+ GROUP BY sub.the_date,
+ dis.dept_id,
+ dis.region_id,
+ dis.district_id WITH ROLLUP ) msr ON msr.the_date = org2.the_date
+AND msr.dept_id <=> org2.dept_id
+AND msr.region_id <=> org2.region_id
+AND msr.district_id <=> org2.district_id;
+
+evalp explain $q;
+evalp $q;
+
+drop table t1,t2,t3,t4,t10;
+
+--echo #
+--echo # MDEV-26337: subquery with groupby and ROLLUP returns incorrect results
+--echo # (The testcase is taken from testcase for MDEV-13389 due to it being
+--echo # much smaller)
+--echo #
+
+create table t3 (a int, b int, c char(127), index idx_b(b)) engine=myisam;
+insert into t3 values
+(8,11,'aa'), (5,15,'cc'), (1,14,'bb'), (2,12,'aa'), (7,17,'cc'),
+(7,18,'aa'), (2,11,'aa'), (7,10,'bb'), (3,11,'dd'), (4,12,'ee'),
+(5,14,'dd'), (9,12,'ee');
+create table t4 (a int, b int, c char(127), index idx(a,c)) engine=myisam;
+insert into t4 values
+(7,10,'cc'), (1,20,'aa'), (2,23,'bb'), (7,18,'cc'), (1,30,'bb'),
+(4,71,'xx'), (3,15,'aa'), (7,82,'aa'), (8,12,'dd'), (4,15,'aa'),
+(11,33,'yy'), (10,42,'zz'), (4,53,'xx'), (10,17,'yy'), (7,12,'cc'),
+(8,20,'dd'), (7,32,'bb'), (1,50,'aa'), (3,40,'bb'), (3,77,'aa');
+insert into t4 select a+10, b+10, concat(c,'f') from t4;
+analyze table t3,t4;
+
+--echo # This should use a plan with LATERAL DERIVED:
+explain select t3.a,t3.c,t.max,t.min
+from t3 join
+(select a, c, max(b) max, min(b) min from t4 group by a,c) t
+on t3.a=t.a and t3.c=t.c
+where t3.b > 15;
+
+--echo # ... and if one adds WITH ROLLUP, then LATERAL DERIVED is no longer used:
+explain select t3.a,t3.c,t.max,t.min
+from t3 join
+(select a, c, max(b) max, min(b) min from t4 group by a,c with rollup) t
+on t3.a=t.a and t3.c=t.c
+where t3.b > 15;
+
+drop table t3, t4;
+
--echo # End of 10.3 tests
diff --git a/sql/opt_split.cc b/sql/opt_split.cc
index edf9ae3deff..8a985095f3c 100644
--- a/sql/opt_split.cc
+++ b/sql/opt_split.cc
@@ -310,6 +310,8 @@ struct SplM_field_ext_info: public SplM_field_info
occurred also in the select list of this join
9. There are defined some keys usable for ref access of fields from C
with available statistics.
+ 10. The select doesn't use WITH ROLLUP (This limitation can probably be
+ lifted)
@retval
true if the answer is positive
@@ -326,7 +328,8 @@ bool JOIN::check_for_splittable_materialized()
(unit->first_select()->next_select()) || // !(3)
(derived->prohibit_cond_pushdown) || // !(4)
(derived->is_recursive_with_table()) || // !(5)
- (table_count == 0 || const_tables == top_join_tab_count)) // !(6)
+ (table_count == 0 || const_tables == top_join_tab_count) || // !(6)
+ rollup.state != ROLLUP::STATE_NONE) // (10)
return false;
if (group_list) // (7.1)
{
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index fe02e7b44e4..0ea861987bd 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -17047,6 +17047,26 @@ const_expression_in_where(COND *cond, Item *comp_item, Field *comp_field,
Item *right_item= ((Item_func*) cond)->arguments()[1];
if (equal(left_item, comp_item, comp_field))
{
+ /*
+ If this condition
+ 1. Was injected by LATERAL DERIVED optimization,
+ 2. But is not used for ref access
+ then we ingore it. This is the same as what mmake_cond_for_table() does
+ when it is invoked from make_join_select().
+ */
+ if (func->functype() == Item_func::EQ_FUNC)
+ {
+ if (is_eq_cond_injected_for_split_opt((Item_func_eq*)func))
+ {
+ bool used_for_ref= false;
+ if (left_item->type() == Item::FIELD_ITEM &&
+ test_if_ref(func, (Item_field*)left_item, right_item))
+ used_for_ref= true;
+
+ if (!used_for_ref)
+ return 0;
+ }
+ }
if (test_if_equality_guarantees_uniqueness (left_item, right_item))
{
if (*const_item)
@@ -17057,6 +17077,20 @@ const_expression_in_where(COND *cond, Item *comp_item, Field *comp_field,
}
else if (equal(right_item, comp_item, comp_field))
{
+ /* Do the same as above */
+ if (func->functype() == Item_func::EQ_FUNC)
+ {
+ if (is_eq_cond_injected_for_split_opt((Item_func_eq*)func))
+ {
+ bool used_for_ref= false;
+ if (right_item->type() == Item::FIELD_ITEM &&
+ test_if_ref(func, (Item_field*)right_item, left_item))
+ used_for_ref= true;
+
+ if (!used_for_ref)
+ return 0;
+ }
+ }
if (test_if_equality_guarantees_uniqueness (right_item, left_item))
{
if (*const_item)