revision-id: f7648d8ef74016a2d07433f38d12e3c22bdef4a9 (mariadb-10.3.31-70-gf7648d8ef74) parent(s): 9962cda52722b77c2a7e0314bbaa2e4f963f55c1 author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2022-01-03 23:53:45 +0300 message: MDEV-26337: subquery with groupby and ROLLUP returns incorrect results on LEFT JOIN on INDEXED values Disable LATERAL DERIVED optimization for subqueries that have WITH ROLLUP. --- mysql-test/main/derived_split_innodb.result | 88 ++++++++++++++++ mysql-test/main/derived_split_innodb.test | 149 ++++++++++++++++++++++++++++ sql/opt_split.cc | 5 +- sql/sql_select.cc | 34 +++++++ 4 files changed, 275 insertions(+), 1 deletion(-) diff --git a/mysql-test/main/derived_split_innodb.result b/mysql-test/main/derived_split_innodb.result index 7ea3b689f23..fcdba0d7d07 100644 --- a/mysql-test/main/derived_split_innodb.result +++ b/mysql-test/main/derived_split_innodb.result @@ -234,4 +234,92 @@ id itemid id id 4 2 4 2 drop table t1,t2,t3; set optimizer_switch='split_materialized=default'; +# +# MDEV-26337: subquery with groupby and ROLLUP returns incorrect results on LEFT JOIN on INDEXED values +# +CREATE TABLE t1 ( +the_date date NOT NULL +, PRIMARY KEY ( the_date ) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +INSERT INTO t1 VALUES ('2021-08-10'),('2021-08-11'),('2021-08-12'),('2021-08-13'); +CREATE TABLE t2 ( +the_date date NOT NULL, +ptn_id char(5) CHARACTER SET utf8mb3 NOT NULL DEFAULT '', +cco_stk_ttl int, +PRIMARY KEY ( the_date , ptn_id ) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +INSERT INTO t2 VALUES +('2021-08-11','10002',NULL),('2021-08-11','10741',128), +('2021-08-11','11001',4),('2021-08-11','11003',2048), +('2021-08-12','10001',4096),('2021-08-12','10002',1), +('2021-08-12','10429',256),('2021-08-12','10499',16), +('2021-08-12','10580',8),('2021-08-12','10740',32), +('2021-08-12','10741',64),('2021-08-12','10771',512), +('2021-08-12','11001',2),('2021-08-12','11003',1024); +CREATE TABLE t3 ( +id int NOT NULL AUTO_INCREMENT, +nsc_id char(5) NOT NULL, +dept_id char(4) NOT NULL, +district_id char(3) NOT NULL, +region_id char(2) NOT NULL, +PRIMARY KEY ( id ), +UNIQUE KEY dept_district ( dept_id , district_id ), +KEY region_id ( dept_id , region_id ) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +INSERT INTO t3 VALUES +(1,'MMD','ADVB','10','1'), (2,'MMD','ADVB','11','1'), +(3,'MMD','ADVB','21','2'),(4,'MMD','ADVB','22','2'); +CREATE TABLE t4 ( +dept_id char(4) CHARACTER SET utf8mb3 NOT NULL, +ptn_id char(5) CHARACTER SET utf8mb3 NOT NULL, +district_id char(3) CHARACTER SET utf8mb3 NOT NULL DEFAULT '0', +nsc_id char(5) CHARACTER SET utf8mb3 NOT NULL +, PRIMARY KEY (ptn_id , dept_id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +INSERT INTO t4 VALUES +('ADVB','10001','10','MMD'),('ADVB','10002','10','MMD'), +('ADVB','10003','10','MMD'),('ADVB','10429','22','MMD'), +('ADVB','10740','21','MMD'),('ADVB','10741','21','MMD'), +('ADVB','10771','23','MMD'),('ADVB','11001','11','MMD'), +('ADVB','11002','11','MMD'); +CREATE TABLE `t10` ( +`the_date` date NOT NULL, +`dept_id` char(4) CHARACTER SET utf8mb4 , +`org_id` varchar(3) CHARACTER SET utf8mb4 , +`district_id` char(3) CHARACTER SET utf8mb4 , +`region_id` char(2) CHARACTER SET utf8mb4 +); +insert into t10 +SELECT cal.the_date , +org.dept_id , +coalesce(org.district_id, org.region_id, 'MMD') AS org_id , +org.district_id , +org.region_id +FROM t1 cal +CROSS JOIN t3 org +WHERE org.nsc_id = 'MMD' + AND org.dept_id IN ('ADVB') +AND cal.the_date = '2021-08-12' +GROUP BY cal.the_date, +org.dept_id, +org.region_id, +org.district_id WITH ROLLUP HAVING NOT (cal.the_date IS NULL +OR org.dept_id IS NULL); +explain $q; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY org2 ALL NULL NULL NULL NULL 7 +1 PRIMARY <derived2> ref key0 key0 43 test.org2.the_date,test.org2.dept_id,test.org2.region_id,test.org2.district_id 2 Using where +2 LATERAL DERIVED sub ref PRIMARY PRIMARY 3 test.org2.the_date 1 Using temporary; Using filesort +2 LATERAL DERIVED org ref PRIMARY PRIMARY 15 test.sub.ptn_id 1 Using where +2 LATERAL DERIVED dis eq_ref dept_district,region_id dept_district 28 const,func 1 Using index condition; Using where +$q; +the_date org_id dept_id cco_stk_ttl +2021-08-12 10 ADVB 4097 +2021-08-12 11 ADVB 2 +2021-08-12 1 ADVB 4099 +2021-08-12 21 ADVB 96 +2021-08-12 22 ADVB 256 +2021-08-12 2 ADVB 352 +2021-08-12 MMD ADVB 4451 +drop table t1,t2,t3,t4,t10; # End of 10.3 tests diff --git a/mysql-test/main/derived_split_innodb.test b/mysql-test/main/derived_split_innodb.test index 6f33c71eede..dc283792c13 100644 --- a/mysql-test/main/derived_split_innodb.test +++ b/mysql-test/main/derived_split_innodb.test @@ -186,4 +186,153 @@ eval $q; drop table t1,t2,t3; set optimizer_switch='split_materialized=default'; +--echo # +--echo # MDEV-26337: subquery with groupby and ROLLUP returns incorrect results on LEFT JOIN on INDEXED values +--echo # + +CREATE TABLE t1 ( + the_date date NOT NULL + , PRIMARY KEY ( the_date ) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; +INSERT INTO t1 VALUES ('2021-08-10'),('2021-08-11'),('2021-08-12'),('2021-08-13'); + +CREATE TABLE t2 ( + the_date date NOT NULL, + ptn_id char(5) CHARACTER SET utf8mb3 NOT NULL DEFAULT '', + cco_stk_ttl int, + PRIMARY KEY ( the_date , ptn_id ) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +INSERT INTO t2 VALUES +('2021-08-11','10002',NULL),('2021-08-11','10741',128), +('2021-08-11','11001',4),('2021-08-11','11003',2048), +('2021-08-12','10001',4096),('2021-08-12','10002',1), +('2021-08-12','10429',256),('2021-08-12','10499',16), +('2021-08-12','10580',8),('2021-08-12','10740',32), +('2021-08-12','10741',64),('2021-08-12','10771',512), +('2021-08-12','11001',2),('2021-08-12','11003',1024); + +CREATE TABLE t3 ( + id int NOT NULL AUTO_INCREMENT, + nsc_id char(5) NOT NULL, + dept_id char(4) NOT NULL, + district_id char(3) NOT NULL, + region_id char(2) NOT NULL, + PRIMARY KEY ( id ), + UNIQUE KEY dept_district ( dept_id , district_id ), + KEY region_id ( dept_id , region_id ) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +INSERT INTO t3 VALUES +(1,'MMD','ADVB','10','1'), (2,'MMD','ADVB','11','1'), +(3,'MMD','ADVB','21','2'),(4,'MMD','ADVB','22','2'); + +CREATE TABLE t4 ( + dept_id char(4) CHARACTER SET utf8mb3 NOT NULL, + ptn_id char(5) CHARACTER SET utf8mb3 NOT NULL, + district_id char(3) CHARACTER SET utf8mb3 NOT NULL DEFAULT '0', + nsc_id char(5) CHARACTER SET utf8mb3 NOT NULL + , PRIMARY KEY (ptn_id , dept_id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +INSERT INTO t4 VALUES +('ADVB','10001','10','MMD'),('ADVB','10002','10','MMD'), +('ADVB','10003','10','MMD'),('ADVB','10429','22','MMD'), +('ADVB','10740','21','MMD'),('ADVB','10741','21','MMD'), +('ADVB','10771','23','MMD'),('ADVB','11001','11','MMD'), +('ADVB','11002','11','MMD'); + +CREATE TABLE `t10` ( + `the_date` date NOT NULL, + `dept_id` char(4) CHARACTER SET utf8mb4 , + `org_id` varchar(3) CHARACTER SET utf8mb4 , + `district_id` char(3) CHARACTER SET utf8mb4 , + `region_id` char(2) CHARACTER SET utf8mb4 +); + +insert into t10 +SELECT cal.the_date , + org.dept_id , + coalesce(org.district_id, org.region_id, 'MMD') AS org_id , + org.district_id , + org.region_id +FROM t1 cal +CROSS JOIN t3 org +WHERE org.nsc_id = 'MMD' + AND org.dept_id IN ('ADVB') + AND cal.the_date = '2021-08-12' +GROUP BY cal.the_date, + org.dept_id, + org.region_id, + org.district_id WITH ROLLUP HAVING NOT (cal.the_date IS NULL + OR org.dept_id IS NULL); + +let $q= +SELECT sql_no_cache org2.the_date , + org2.org_id , + org2.dept_id , + msr. cco_stk_ttl +FROM + t10 org2 +LEFT JOIN + ( SELECT sub.the_date , + dis.dept_id , + dis.region_id , + dis.district_id , + sum(sub.cco_stk_ttl) AS cco_stk_ttl + FROM t2 sub + JOIN t4 org ON org.ptn_id = sub.ptn_id + JOIN t3 dis ON dis.dept_id = org.dept_id + AND dis.district_id = org.district_id + WHERE dis.nsc_id = 'MMD' + AND dis.dept_id IN ('ADVB') + GROUP BY sub.the_date, + dis.dept_id, + dis.region_id, + dis.district_id WITH ROLLUP ) msr ON msr.the_date = org2.the_date +AND msr.dept_id <=> org2.dept_id +AND msr.region_id <=> org2.region_id +AND msr.district_id <=> org2.district_id; + +evalp explain $q; +evalp $q; + +drop table t1,t2,t3,t4,t10; + +--echo # +--echo # MDEV-26337: subquery with groupby and ROLLUP returns incorrect results +--echo # (The testcase is taken from testcase for MDEV-13389 due to it being +--echo # much smaller) +--echo # + +create table t3 (a int, b int, c char(127), index idx_b(b)) engine=myisam; +insert into t3 values +(8,11,'aa'), (5,15,'cc'), (1,14,'bb'), (2,12,'aa'), (7,17,'cc'), +(7,18,'aa'), (2,11,'aa'), (7,10,'bb'), (3,11,'dd'), (4,12,'ee'), +(5,14,'dd'), (9,12,'ee'); +create table t4 (a int, b int, c char(127), index idx(a,c)) engine=myisam; +insert into t4 values +(7,10,'cc'), (1,20,'aa'), (2,23,'bb'), (7,18,'cc'), (1,30,'bb'), +(4,71,'xx'), (3,15,'aa'), (7,82,'aa'), (8,12,'dd'), (4,15,'aa'), +(11,33,'yy'), (10,42,'zz'), (4,53,'xx'), (10,17,'yy'), (7,12,'cc'), +(8,20,'dd'), (7,32,'bb'), (1,50,'aa'), (3,40,'bb'), (3,77,'aa'); +insert into t4 select a+10, b+10, concat(c,'f') from t4; +analyze table t3,t4; + +--echo # This should use a plan with LATERAL DERIVED: +explain select t3.a,t3.c,t.max,t.min +from t3 join +(select a, c, max(b) max, min(b) min from t4 group by a,c) t +on t3.a=t.a and t3.c=t.c +where t3.b > 15; + +--echo # ... and if one adds WITH ROLLUP, then LATERAL DERIVED is no longer used: +explain select t3.a,t3.c,t.max,t.min +from t3 join +(select a, c, max(b) max, min(b) min from t4 group by a,c with rollup) t +on t3.a=t.a and t3.c=t.c +where t3.b > 15; + +drop table t3, t4; + --echo # End of 10.3 tests diff --git a/sql/opt_split.cc b/sql/opt_split.cc index edf9ae3deff..8a985095f3c 100644 --- a/sql/opt_split.cc +++ b/sql/opt_split.cc @@ -310,6 +310,8 @@ struct SplM_field_ext_info: public SplM_field_info occurred also in the select list of this join 9. There are defined some keys usable for ref access of fields from C with available statistics. + 10. The select doesn't use WITH ROLLUP (This limitation can probably be + lifted) @retval true if the answer is positive @@ -326,7 +328,8 @@ bool JOIN::check_for_splittable_materialized() (unit->first_select()->next_select()) || // !(3) (derived->prohibit_cond_pushdown) || // !(4) (derived->is_recursive_with_table()) || // !(5) - (table_count == 0 || const_tables == top_join_tab_count)) // !(6) + (table_count == 0 || const_tables == top_join_tab_count) || // !(6) + rollup.state != ROLLUP::STATE_NONE) // (10) return false; if (group_list) // (7.1) { diff --git a/sql/sql_select.cc b/sql/sql_select.cc index fe02e7b44e4..0ea861987bd 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -17047,6 +17047,26 @@ const_expression_in_where(COND *cond, Item *comp_item, Field *comp_field, Item *right_item= ((Item_func*) cond)->arguments()[1]; if (equal(left_item, comp_item, comp_field)) { + /* + If this condition + 1. Was injected by LATERAL DERIVED optimization, + 2. But is not used for ref access + then we ingore it. This is the same as what mmake_cond_for_table() does + when it is invoked from make_join_select(). + */ + if (func->functype() == Item_func::EQ_FUNC) + { + if (is_eq_cond_injected_for_split_opt((Item_func_eq*)func)) + { + bool used_for_ref= false; + if (left_item->type() == Item::FIELD_ITEM && + test_if_ref(func, (Item_field*)left_item, right_item)) + used_for_ref= true; + + if (!used_for_ref) + return 0; + } + } if (test_if_equality_guarantees_uniqueness (left_item, right_item)) { if (*const_item) @@ -17057,6 +17077,20 @@ const_expression_in_where(COND *cond, Item *comp_item, Field *comp_field, } else if (equal(right_item, comp_item, comp_field)) { + /* Do the same as above */ + if (func->functype() == Item_func::EQ_FUNC) + { + if (is_eq_cond_injected_for_split_opt((Item_func_eq*)func)) + { + bool used_for_ref= false; + if (right_item->type() == Item::FIELD_ITEM && + test_if_ref(func, (Item_field*)right_item, left_item)) + used_for_ref= true; + + if (!used_for_ref) + return 0; + } + } if (test_if_equality_guarantees_uniqueness (right_item, left_item)) { if (*const_item)