[Maria-developers] Igor please review: MDEV-22377: Subquery in an [UPDATE] query uses full scan instead of range
Hello Igor, Could you please review the below: commit 793b100cc01228f01ead841d6c45f9e2b07786f9 Author: Sergei Petrunia <psergey@askmonty.org> Date: Thu Apr 30 01:25:11 2020 +0300 MDEV-22377: Subquery in an [UPDATE] query uses full scan instead of range When doing IN->EXISTS rewrite, Item_in_subselect::inject_in_to_exists_cond injects equalities into subquery's WHERE condition. The problem is that build_equal_items() has already been called for the subquery's WHERE, and tampering with the WHERE condition can prevent equality propagation from working. If the subquery's WHERE is an Item_cond_and with multiple equalities: - Item_equal objects form a suffix sub-list of the list in WHERE's Item_cond_and::list. The suffix is stored in JOIN::cond_equal->current_level. - Item_cond_and::m_cond_equal must also be preserved. This patch makes inject_in_to_exists_cond() not to break these properties diff --git a/mysql-test/main/subselect2.result b/mysql-test/main/subselect2.result index a3d7fda7abc..c54d635230f 100644 --- a/mysql-test/main/subselect2.result +++ b/mysql-test/main/subselect2.result @@ -262,7 +262,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 2 DEPENDENT SUBQUERY t2c ALL NULL NULL NULL NULL 2 100.00 Using where; Using join buffer (incremental, BNL join) Warnings: Note 1276 Field or reference 'test.t1.a' of SELECT #2 was resolved in SELECT #1 -Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,`test`.`t1`.`c1` AS `c1` from `test`.`t1` where !<expr_cache><`test`.`t1`.`c1`,`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`c1`,<exists>(/* select#2 */ select `test`.`t2a`.`c2` from `test`.`t2` `t2a` join `test`.`t2` `t2b` join `test`.`t2` `t2c` where (`test`.`t2b`.`m` <> `test`.`t1`.`a` or `test`.`t2b`.`m` = `test`.`t2a`.`m`) and trigcond(<cache>(`test`.`t1`.`c1`) = `test`.`t2a`.`c2` or `test`.`t2a`.`c2` is null) and `test`.`t2c`.`c2` = `test`.`t2b`.`c2` and `test`.`t2b`.`n` = `test`.`t2a`.`m` having trigcond(`test`.`t2a`.`c2` is null)))) +Note 1003 /* select#1 */ select `test`.`t1`.`a` AS `a`,`test`.`t1`.`c1` AS `c1` from `test`.`t1` where !<expr_cache><`test`.`t1`.`c1`,`test`.`t1`.`a`>(<in_optimizer>(`test`.`t1`.`c1`,<exists>(/* select#2 */ select `test`.`t2a`.`c2` from `test`.`t2` `t2a` join `test`.`t2` `t2b` join `test`.`t2` `t2c` where `test`.`t2c`.`c2` = `test`.`t2b`.`c2` and `test`.`t2b`.`n` = `test`.`t2a`.`m` and (`test`.`t2b`.`m` <> `test`.`t1`.`a` or `test`.`t2b`.`m` = `test`.`t2a`.`m`) and trigcond(<cache>(`test`.`t1`.`c1`) = `test`.`t2a`.`c2` or `test`.`t2a`.`c2` is null) having trigcond(`test`.`t2a`.`c2` is null)))) DROP TABLE t1,t2; # # MDEV-614, also MDEV-536, also LP:1050806: diff --git a/mysql-test/main/subselect3.result b/mysql-test/main/subselect3.result index 5c4544a1b05..0034f61ac23 100644 --- a/mysql-test/main/subselect3.result +++ b/mysql-test/main/subselect3.result @@ -169,7 +169,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 2 DEPENDENT SUBQUERY t2 ref a a 5 test.t1.b 1 100.00 Using where Warnings: Note 1276 Field or reference 'test.t3.oref' of SELECT #2 was resolved in SELECT #1 -Note 1003 /* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a` or `test`.`t1`.`a` is null) and `test`.`t2`.`a` = `test`.`t1`.`b` having trigcond(`test`.`t1`.`a` is null)))) AS `Z` from `test`.`t3` +Note 1003 /* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`a` = `test`.`t1`.`b` and `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a` or `test`.`t1`.`a` is null) having trigcond(`test`.`t1`.`a` is null)))) AS `Z` from `test`.`t3` drop table t1, t2, t3; create table t1 (a int NOT NULL, b int NOT NULL, key(a)); insert into t1 values @@ -197,7 +197,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 2 DEPENDENT SUBQUERY t2 ref a a 4 test.t1.b 1 100.00 Using where Warnings: Note 1276 Field or reference 'test.t3.oref' of SELECT #2 was resolved in SELECT #1 -Note 1003 /* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a`) and `test`.`t2`.`a` = `test`.`t1`.`b`))) AS `Z` from `test`.`t3` +Note 1003 /* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`a` = `test`.`t1`.`b` and `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a`)))) AS `Z` from `test`.`t3` drop table t1,t2,t3; create table t1 (oref int, grp int); insert into t1 (oref, grp) values diff --git a/mysql-test/main/subselect3_jcl6.result b/mysql-test/main/subselect3_jcl6.result index 4260676cc37..b7b18bf80e0 100644 --- a/mysql-test/main/subselect3_jcl6.result +++ b/mysql-test/main/subselect3_jcl6.result @@ -172,7 +172,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 2 DEPENDENT SUBQUERY t2 ref a a 5 test.t1.b 1 100.00 Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan Warnings: Note 1276 Field or reference 'test.t3.oref' of SELECT #2 was resolved in SELECT #1 -Note 1003 /* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a` or `test`.`t1`.`a` is null) and `test`.`t2`.`a` = `test`.`t1`.`b` having trigcond(`test`.`t1`.`a` is null)))) AS `Z` from `test`.`t3` +Note 1003 /* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`a` = `test`.`t1`.`b` and `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a` or `test`.`t1`.`a` is null) having trigcond(`test`.`t1`.`a` is null)))) AS `Z` from `test`.`t3` drop table t1, t2, t3; create table t1 (a int NOT NULL, b int NOT NULL, key(a)); insert into t1 values @@ -200,7 +200,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 2 DEPENDENT SUBQUERY t2 ref a a 4 test.t1.b 1 100.00 Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan Warnings: Note 1276 Field or reference 'test.t3.oref' of SELECT #2 was resolved in SELECT #1 -Note 1003 /* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a`) and `test`.`t2`.`a` = `test`.`t1`.`b`))) AS `Z` from `test`.`t3` +Note 1003 /* select#1 */ select `test`.`t3`.`a` AS `a`,`test`.`t3`.`oref` AS `oref`,<expr_cache><`test`.`t3`.`a`,`test`.`t3`.`oref`>(<in_optimizer>(`test`.`t3`.`a`,<exists>(/* select#2 */ select `test`.`t1`.`a` from `test`.`t1` join `test`.`t2` where `test`.`t2`.`a` = `test`.`t1`.`b` and `test`.`t2`.`b` = `test`.`t3`.`oref` and trigcond(<cache>(`test`.`t3`.`a`) = `test`.`t1`.`a`)))) AS `Z` from `test`.`t3` drop table t1,t2,t3; create table t1 (oref int, grp int); insert into t1 (oref, grp) values diff --git a/mysql-test/main/subselect4.result b/mysql-test/main/subselect4.result index 3dc019cc6c1..e355cfc60f0 100644 --- a/mysql-test/main/subselect4.result +++ b/mysql-test/main/subselect4.result @@ -2349,7 +2349,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t2 system NULL NULL NULL NULL 1 100.00 2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 2 100.00 Using where Warnings: -Note 1003 /* select#1 */ select 3 AS `f` from dual where !<expr_cache><3>(<in_optimizer>(3,<exists>(/* select#2 */ select `test`.`t1`.`b` from `test`.`t1` where (`test`.`t1`.`c` = 'USA' or `test`.`t1`.`c` <> 'USA') and trigcond(<cache>(3) = `test`.`t1`.`b` or `test`.`t1`.`b` is null) and `test`.`t1`.`b` = `test`.`t1`.`a` having trigcond(`test`.`t1`.`b` is null)))) +Note 1003 /* select#1 */ select 3 AS `f` from dual where !<expr_cache><3>(<in_optimizer>(3,<exists>(/* select#2 */ select `test`.`t1`.`b` from `test`.`t1` where `test`.`t1`.`b` = `test`.`t1`.`a` and (`test`.`t1`.`c` = 'USA' or `test`.`t1`.`c` <> 'USA') and trigcond(<cache>(3) = `test`.`t1`.`b` or `test`.`t1`.`b` is null) having trigcond(`test`.`t1`.`b` is null)))) SELECT * FROM t2 WHERE f NOT IN (SELECT b FROM t1 WHERE 0 OR (c IN ('USA') OR c NOT IN ('USA')) AND a = b); @@ -2582,3 +2582,29 @@ id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 func 1 2 MATERIALIZED t2 ALL NULL NULL NULL NULL 100 drop table t0, t1, t2; +# +# MDEV-22377: Subquery in an UPDATE query uses full scan instead of range +# +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1(nkey1 int, col1 int); +insert into t1 select A.a+10*B.a+100*C.a, A.a+10*B.a+100*C.a from t0 A, t0 B, t0 C; +create table t2(key1 int, col1 int, key(key1)); +insert into t2 select A.a+10*B.a, A.a+10*B.a from t0 A, t0 B; +alter table t0 add b int; +# This must use the join order of t0,t2,t1 +# And table t2 must use range access, not full scan: +explain +select * +from t0 +where +a not in (SELECT t1.col1 +FROM t1 JOIN t2 ON t2.key1 = t1.nkey1 +WHERE +t1.nkey1 IN (1,2,3,4,5) and t2.col1>t0.b +); +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t0 ALL NULL NULL NULL NULL 10 Using where +2 DEPENDENT SUBQUERY t2 range key1 key1 5 NULL 5 Using index condition; Using where +2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 1000 Using where; Using join buffer (flat, BNL join) +drop table t0, t1, t2; diff --git a/mysql-test/main/subselect4.test b/mysql-test/main/subselect4.test index bc8db41c505..e3d8b5be832 100644 --- a/mysql-test/main/subselect4.test +++ b/mysql-test/main/subselect4.test @@ -2115,6 +2115,32 @@ where exists (select * from t2 where t2.a=t1.a order by t2.b limit 2,3); explain select * from t1 where t1.a in (select t2.a from t2 order by t2.b); +drop table t0, t1, t2; + +--echo # +--echo # MDEV-22377: Subquery in an UPDATE query uses full scan instead of range +--echo # +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1(nkey1 int, col1 int); +insert into t1 select A.a+10*B.a+100*C.a, A.a+10*B.a+100*C.a from t0 A, t0 B, t0 C; + +create table t2(key1 int, col1 int, key(key1)); +insert into t2 select A.a+10*B.a, A.a+10*B.a from t0 A, t0 B; + +alter table t0 add b int; +--echo # This must use the join order of t0,t2,t1 +--echo # And table t2 must use range access, not full scan: +explain +select * +from t0 +where + a not in (SELECT t1.col1 + FROM t1 JOIN t2 ON t2.key1 = t1.nkey1 + WHERE + t1.nkey1 IN (1,2,3,4,5) and t2.col1>t0.b + ); drop table t0, t1, t2; + diff --git a/mysql-test/main/subselect_mat_cost_bugs.result b/mysql-test/main/subselect_mat_cost_bugs.result index a18c5e608f1..755f1e2ad13 100644 --- a/mysql-test/main/subselect_mat_cost_bugs.result +++ b/mysql-test/main/subselect_mat_cost_bugs.result @@ -100,7 +100,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra 2 DEPENDENT SUBQUERY t2 index c3 c3 9 NULL 2 100.00 Using where; Using index; Using join buffer (flat, BNL join) Warnings: Note 1276 Field or reference 'test.t1.pk' of SELECT #2 was resolved in SELECT #1 -Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk` from `test`.`t1` where <expr_cache><`test`.`t1`.`c1`,`test`.`t1`.`pk`>(<in_optimizer>(`test`.`t1`.`c1`,<exists>(/* select#2 */ select `test`.`t1a`.`c1` from `test`.`t1b` join `test`.`t2` left join `test`.`t1a` on(`test`.`t1a`.`c2` = `test`.`t1b`.`pk` and 2) where `test`.`t1`.`pk` <> 0 and <cache>(`test`.`t1`.`c1`) = `test`.`t1a`.`c1` and `test`.`t2`.`c3` = `test`.`t1b`.`c4`))) +Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk` from `test`.`t1` where <expr_cache><`test`.`t1`.`c1`,`test`.`t1`.`pk`>(<in_optimizer>(`test`.`t1`.`c1`,<exists>(/* select#2 */ select `test`.`t1a`.`c1` from `test`.`t1b` join `test`.`t2` left join `test`.`t1a` on(`test`.`t1a`.`c2` = `test`.`t1b`.`pk` and 2) where `test`.`t2`.`c3` = `test`.`t1b`.`c4` and `test`.`t1`.`pk` <> 0 and <cache>(`test`.`t1`.`c1`) = `test`.`t1a`.`c1`))) SELECT pk FROM t1 WHERE c1 IN @@ -364,7 +364,7 @@ AND a = SOME (SELECT b FROM t5)); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t3 ALL NULL NULL NULL NULL 2 Using where 2 DEPENDENT SUBQUERY t5 index c c 10 NULL 2 Using where; Using index; Start temporary -2 DEPENDENT SUBQUERY t4 eq_ref PRIMARY PRIMARY 4 test.t5.b 1 Using index condition; Using where; End temporary +2 DEPENDENT SUBQUERY t4 eq_ref PRIMARY PRIMARY 4 test.t5.b 1 Using where; End temporary SELECT * FROM t3 WHERE t3.b > ALL ( diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index 736dfbd33b4..726c58e49a2 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -2704,9 +2704,28 @@ bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg) and_args= ((Item_cond*) join_arg->conds)->argument_list(); if (join_arg->cond_equal) and_args->disjoin((List<Item> *) &join_arg->cond_equal->current_level); + + /* + If subquery's WHERE is an Item_cond_and, add the injected conditions + into it manually. If we just called and_items(...) that could + potentially replace the top-level Item_cond_and and current top-level + Item_cond_and::m_cond_equal will be lost. + */ + if (where_item->type() == Item::COND_ITEM && + ((Item_cond*)where_item)->functype() == Item_func::COND_AND_FUNC) + { + List_iterator<Item> it(*((Item_cond*)where_item)->argument_list()); + while (Item *item=it++) + and_args->push_back(item); + } + else + and_args->push_back(where_item); + + where_item= join_arg->conds; } + else + where_item= and_items(thd, join_arg->conds, where_item); - where_item= and_items(thd, join_arg->conds, where_item); if (where_item->fix_fields_if_needed(thd, 0)) DBUG_RETURN(true); // TIMOUR TODO: call optimize_cond() for the new where clause @@ -2719,12 +2738,15 @@ bool Item_in_subselect::inject_in_to_exists_cond(JOIN *join_arg) { /* The argument list of the top-level AND may change after fix fields. */ and_args= ((Item_cond*) join_arg->conds)->argument_list(); - List_iterator<Item_equal> li(join_arg->cond_equal->current_level); - Item_equal *elem; - while ((elem= li++)) - { - and_args->push_back(elem, thd->mem_root); - } + + /* + Note that Item_equal objects must be present in this form: + Item_cond::conds list must have the end of the list to be the + same linked list as join_arg->cond_equal->current_level. + The code in substitute_for_best_equal_field depends on it (see the + disjoin() call). + */ + and_args->append((List<Item> *)&join_arg->cond_equal->current_level); } } BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog
participants (1)
-
Sergey Petrunia