[Commits] 108af298032: MDEV-15454: Nested SELECT IN returns wrong results
revision-id: 108af298032cb4cea316a08c304c306268a2badf (mariadb-5.5.56-213-g108af298032) parent(s): 318097bb8f6e12c546b5dcd287416158209dbb39 author: Varun Gupta committer: Varun Gupta timestamp: 2018-05-11 02:56:33 +0530 message: MDEV-15454: Nested SELECT IN returns wrong results In this case we are setting the field Item_func_eq::in_eqaulity_no for the semi-join equalities. This helps us to remove these equalites as the inner tables are not available during parent select execution while the outer tables are not available during materialization phase. We only have it set for the equalites for the fields involved with the IN subquery and reset it for the equalities which do not belong to the IN subquery. For example in case of nested IN subqueries: SELECT t1.a FROM t1 WHERE t1.a IN (SELECT t2.a FROM t2 where t2.b IN (select t3.b from t3 where t3.c=27 )) there are two equalites involving the fields of the IN subquery 1) t2.b = t3.b : the field Item_func_eq::in_eqaulity_no is set when we merge the grandchild select into the child select 2) t1.a = t2.a : the field Item_func_eq::in_eqaulity_no is set when we merge the child select into the parent select But when we perform case 2) we should ensure that we reset the equalities in the child's WHERE clause. --- mysql-test/r/subselect_sj_mat.result | 46 +++++++++++++++++++++++++ mysql-test/t/subselect_sj_mat.test | 49 ++++++++++++++++++++++++++ sql/opt_subselect.cc | 66 ++++++++++++++++++++++++++++++++++++ 3 files changed, 161 insertions(+) diff --git a/mysql-test/r/subselect_sj_mat.result b/mysql-test/r/subselect_sj_mat.result index cb5012a91c9..2af1ac1f7b3 100644 --- a/mysql-test/r/subselect_sj_mat.result +++ b/mysql-test/r/subselect_sj_mat.result @@ -2400,4 +2400,50 @@ ec70316637232000158bbfc8bcbe5d60 ebb4620037332000158bbfc8bcbe5d89 DROP TABLE t1,t2,t3; set optimizer_switch=@save_optimizer_switch; +# +# MDEV-15454: Nested SELECT IN returns wrong results +# +CREATE TABLE t1 ( a int NOT NULL PRIMARY KEY); +CREATE TABLE t2 ( a int, b int ); +INSERT INTO t2 VALUES (7878, 96),(3465, 96),(1403, 96),(4189, 96),(8732, 96), (5,96); +CREATE TABLE t3 (c int unsigned NOT NULL, b int unsigned NOT NULL, PRIMARY KEY (c,b)); +INSERT INTO t3 (c, b) VALUES (27, 96); +CREATE PROCEDURE prepare_data() +BEGIN +DECLARE i INT DEFAULT 1; +WHILE i < 1000 DO +INSERT INTO t1 (a) VALUES (i); +INSERT INTO t2 (a,b) VALUES (i,56); +INSERT INTO t3 (c,b) VALUES (i,i); +SET i = i + 1; +END WHILE; +END$$ +CALL prepare_data(); +SET optimizer_switch='materialization=off'; +explain +SELECT t1.a FROM t1 +WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t3 ref PRIMARY PRIMARY 4 const 2 Using index; Start temporary +1 PRIMARY t2 ALL NULL NULL NULL NULL 1005 Using where; Using join buffer (flat, BNL join) +1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 Using index; End temporary +SELECT t1.a FROM t1 +WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5; +a +5 +SET optimizer_switch='materialization=on'; +explain +SELECT t1.a FROM t1 +WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t1 index PRIMARY PRIMARY 4 NULL 999 Using index +1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 func 1 +2 MATERIALIZED t3 ref PRIMARY PRIMARY 4 const 2 Using index +2 MATERIALIZED t2 ALL NULL NULL NULL NULL 1005 Using where; Using join buffer (flat, BNL join) +SELECT t1.a FROM t1 +WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5; +a +5 +drop procedure prepare_data; +drop table t1,t2,t3; # End of 5.5 tests diff --git a/mysql-test/t/subselect_sj_mat.test b/mysql-test/t/subselect_sj_mat.test index 90f63bea561..67cba08b5df 100644 --- a/mysql-test/t/subselect_sj_mat.test +++ b/mysql-test/t/subselect_sj_mat.test @@ -2151,4 +2151,53 @@ eval $q; DROP TABLE t1,t2,t3; set optimizer_switch=@save_optimizer_switch; +--echo # +--echo # MDEV-15454: Nested SELECT IN returns wrong results +--echo # + +CREATE TABLE t1 ( a int NOT NULL PRIMARY KEY); + +CREATE TABLE t2 ( a int, b int ); +INSERT INTO t2 VALUES (7878, 96),(3465, 96),(1403, 96),(4189, 96),(8732, 96), (5,96); + +CREATE TABLE t3 (c int unsigned NOT NULL, b int unsigned NOT NULL, PRIMARY KEY (c,b)); +INSERT INTO t3 (c, b) VALUES (27, 96); + +DELIMITER $$; +CREATE PROCEDURE prepare_data() +BEGIN + DECLARE i INT DEFAULT 1; + + WHILE i < 1000 DO + INSERT INTO t1 (a) VALUES (i); + INSERT INTO t2 (a,b) VALUES (i,56); + INSERT INTO t3 (c,b) VALUES (i,i); + SET i = i + 1; + END WHILE; +END$$ +DELIMITER ;$$ + +CALL prepare_data(); + +SET optimizer_switch='materialization=off'; + +explain +SELECT t1.a FROM t1 +WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5; + +SELECT t1.a FROM t1 +WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5; + +SET optimizer_switch='materialization=on'; + +explain +SELECT t1.a FROM t1 +WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5; + +SELECT t1.a FROM t1 +WHERE t1.a IN (SELECT t2.a FROM t2 WHERE t2.b IN (SELECT t3.b FROM t3 WHERE t3.c= 27)) LIMIT 5; + +drop procedure prepare_data; +drop table t1,t2,t3; + --echo # End of 5.5 tests diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index a7edd64e68b..31213a5cf1d 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -440,6 +440,7 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs); static bool replace_where_subcondition(JOIN *, Item **, Item *, Item *, bool); static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2, void *arg); +static void reset_equality_number_for_subq_conds(Item * cond); static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred); static bool convert_subq_to_jtbm(JOIN *parent_join, Item_in_subselect *subq_pred, bool *remove); @@ -1453,6 +1454,67 @@ static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2, } +/** + @brief + reset the value of the field in_eqaulity_no for all Item_func_eq + items in the where clause of the subquery. + + Look for in_equality_no description in Item_func_eq class + + DESCRIPTION + Lets have an example: + SELECT t1.a FROM t1 WHERE t1.a IN + (SELECT t2.a FROM t2 where t2.b IN + (select t3.b from t3 where t3.c=27 )) + + So for such a query we have the parent, child and + grandchild select. + + So for the equality t2.b = t3.b we set the value for in_equality_no to + 0 according to its description. Wewe do the same for t1.a = t2.a. + But when we look at the child select (with the grandchild select merged), + the query would be + + SELECT t1.a FROM t1 WHERE t1.a IN + (SELECT t2.a FROM t2 where t2.b = t3.b and t3.c=27) + + and then when the child select is merged into the parent select the query + would look like + + SELECT t1.a FROM t1, semi-join-nest(t2,t3) + WHERE t1.a =t2.a and t2.b = t3.b and t3.c=27 + + Still we would have in_equality_no set for t2.b = t3.b + though it does not take part in the semi-join equality for the parent select, + so we should reset its value to UINT_MAX. + + @param cond WHERE clause of the subquery +*/ + +static void reset_equality_number_for_subq_conds(Item * cond) +{ + if (!cond) + return; + if (cond->type() == Item::COND_ITEM) + { + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + if (item->type() == Item::FUNC_ITEM && + ((Item_func*)item)->functype()== Item_func::EQ_FUNC) + ((Item_func_eq*)item)->in_equality_no= UINT_MAX; + } + } + else + { + if (cond->type() == Item::FUNC_ITEM && + ((Item_func*)cond)->functype()== Item_func::EQ_FUNC) + ((Item_func_eq*)cond)->in_equality_no= UINT_MAX; + } + return; +} + /* Convert a subquery predicate into a TABLE_LIST semi-join nest @@ -1481,6 +1543,7 @@ static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2, TRUE Out of memory error */ + static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) { SELECT_LEX *parent_lex= parent_join->select_lex; @@ -1713,6 +1776,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) if (subq_pred->left_expr->cols() == 1) { + reset_equality_number_for_subq_conds(sj_nest->sj_on_expr); /* add left = select_list_element */ nested_join->sj_outer_expr_list.push_back(&subq_pred->left_expr); /* @@ -1735,6 +1799,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) } else if (subq_pred->left_expr->type() == Item::ROW_ITEM) { + reset_equality_number_for_subq_conds(sj_nest->sj_on_expr); /* disassemple left expression and add left1 = select_list_element1 and left2 = select_list_element2 ... @@ -1759,6 +1824,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) } else { + reset_equality_number_for_subq_conds(sj_nest->sj_on_expr); /* add row operation left = (select_list_element1, select_list_element2, ...)
On Fri, May 11, 2018 at 03:08:29AM +0530, Varun wrote:
revision-id: 108af298032cb4cea316a08c304c306268a2badf (mariadb-5.5.56-213-g108af298032) parent(s): 318097bb8f6e12c546b5dcd287416158209dbb39 author: Varun Gupta committer: Varun Gupta timestamp: 2018-05-11 02:56:33 +0530 message:
MDEV-15454: Nested SELECT IN returns wrong results
In this case we are setting the field Item_func_eq::in_eqaulity_no for the semi-join equalities. This helps us to remove these equalites as the inner tables are not available during parent select execution while the outer tables are not available during materialization phase. We only have it set for the equalites for the fields involved with the IN subquery and reset it for the equalities which do not belong to the IN subquery.
For example in case of nested IN subqueries:
SELECT t1.a FROM t1 WHERE t1.a IN (SELECT t2.a FROM t2 where t2.b IN (select t3.b from t3 where t3.c=27 ))
there are two equalites involving the fields of the IN subquery
1) t2.b = t3.b : the field Item_func_eq::in_eqaulity_no is set when we merge the grandchild select into the child select 2) t1.a = t2.a : the field Item_func_eq::in_eqaulity_no is set when we merge the child select into the parent select
But when we perform case 2) we should ensure that we reset the equalities in the child's WHERE clause.
--- mysql-test/r/subselect_sj_mat.result | 46 +++++++++++++++++++++++++ mysql-test/t/subselect_sj_mat.test | 49 ++++++++++++++++++++++++++ sql/opt_subselect.cc | 66 ++++++++++++++++++++++++++++++++++++ 3 files changed, 161 insertions(+)
...
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index a7edd64e68b..31213a5cf1d 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -1481,6 +1543,7 @@ static int subq_sj_candidate_cmp(Item_in_subselect* el1, Item_in_subselect* el2, TRUE Out of memory error */
+ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) { SELECT_LEX *parent_lex= parent_join->select_lex; @@ -1713,6 +1776,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
if (subq_pred->left_expr->cols() == 1) { + reset_equality_number_for_subq_conds(sj_nest->sj_on_expr); /* add left = select_list_element */ nested_join->sj_outer_expr_list.push_back(&subq_pred->left_expr); /* @@ -1735,6 +1799,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) } else if (subq_pred->left_expr->type() == Item::ROW_ITEM) { + reset_equality_number_for_subq_conds(sj_nest->sj_on_expr); /* disassemple left expression and add left1 = select_list_element1 and left2 = select_list_element2 ... @@ -1759,6 +1824,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) } else { + reset_equality_number_for_subq_conds(sj_nest->sj_on_expr); /* add row operation left = (select_list_element1, select_list_element2, ...)
So, the code here is if (...) { variant1; } else if (...) { variant2; } else { variant3; } and there is the same call added to all three:
+ reset_equality_number_for_subq_conds(sj_nest->sj_on_expr);
Can you move it out of the "if" and make it one call instead of three? Otherwise, the patch is ok, ok to push after the above is addressed. BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog
participants (2)
-
Sergey Petrunia
-
varunraiko1803@gmail.com