[Maria-developers] Igor please review: [Commits] Rev 3464: BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view in file:///home/psergey/dev2/5.3-look60/]
Hello Igor, Could you please review the below: ----- Forwarded message from Sergey Petrunya <psergey@askmonty.org> ----- From: Sergey Petrunya <psergey@askmonty.org> To: commits@mariadb.org X-Mailer: mail (GNU Mailutils 1.2) Date: Sat, 24 Mar 2012 01:40:02 +0400 (GST) Subject: [Commits] Rev 3464: BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view in file:///home/psergey/dev2/5.3-look60/ At file:///home/psergey/dev2/5.3-look60/ ------------------------------------------------------------ revno: 3464 revision-id: psergey@askmonty.org-20120323213956-ohxb9jitekne7yya parent: psergey@askmonty.org-20120321071820-t2azo8xxlm73mezd committer: Sergey Petrunya <psergey@askmonty.org> branch nick: 5.3-look60 timestamp: Sat 2012-03-24 01:39:56 +0400 message: BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view - Fix equality propagation to work with SJM nests and OR clauses (full descirption of problem and solution in the comment in the patch) === modified file 'mysql-test/r/subselect_sj2.result' --- a/mysql-test/r/subselect_sj2.result 2012-02-25 00:50:22 +0000 +++ b/mysql-test/r/subselect_sj2.result 2012-03-23 21:39:56 +0000 @@ -903,5 +903,48 @@ 3 1 1 4 1 1 DROP TABLE t1,t2; +# +# BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view +# +CREATE TABLE t1 ( +a VARCHAR(1), +b VARCHAR(1) NOT NULL, +KEY(a) +) ENGINE=InnoDB; +INSERT INTO t1 VALUES +('j','j'),('v','v'),('c','c'),('m','m'),('d','d'), +('y','y'),('t','t'),('d','d'),('s','s'),('r','r'), +('m','m'),('b','b'),('x','x'),('g','g'),('p','p'), +('q','q'),('w','w'),('d','d'),('e','e'); +CREATE ALGORITHM=TEMPTABLE VIEW v1 AS SELECT * FROM t1; +SELECT * FROM v1 +WHERE ( a, a ) IN ( +SELECT alias2.b, alias2.a +FROM t1 AS alias1, t1 AS alias2 +WHERE alias2.b = alias1.a +AND ( alias1.b >= alias1.a OR alias2.b = 'z' ) +); +a b +j j +v v +c c +m m +m m +d d +d d +d d +y y +t t +s s +r r +b b +x x +g g +p p +q q +w w +e e +DROP VIEW v1; +DROP TABLE t1; # This must be the last in the file: set optimizer_switch=@subselect_sj2_tmp; === modified file 'mysql-test/r/subselect_sj2_jcl6.result' --- a/mysql-test/r/subselect_sj2_jcl6.result 2012-02-25 00:50:22 +0000 +++ b/mysql-test/r/subselect_sj2_jcl6.result 2012-03-23 21:39:56 +0000 @@ -917,6 +917,49 @@ 3 1 1 4 1 1 DROP TABLE t1,t2; +# +# BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view +# +CREATE TABLE t1 ( +a VARCHAR(1), +b VARCHAR(1) NOT NULL, +KEY(a) +) ENGINE=InnoDB; +INSERT INTO t1 VALUES +('j','j'),('v','v'),('c','c'),('m','m'),('d','d'), +('y','y'),('t','t'),('d','d'),('s','s'),('r','r'), +('m','m'),('b','b'),('x','x'),('g','g'),('p','p'), +('q','q'),('w','w'),('d','d'),('e','e'); +CREATE ALGORITHM=TEMPTABLE VIEW v1 AS SELECT * FROM t1; +SELECT * FROM v1 +WHERE ( a, a ) IN ( +SELECT alias2.b, alias2.a +FROM t1 AS alias1, t1 AS alias2 +WHERE alias2.b = alias1.a +AND ( alias1.b >= alias1.a OR alias2.b = 'z' ) +); +a b +j j +v v +c c +m m +m m +d d +d d +d d +y y +t t +s s +r r +b b +x x +g g +p p +q q +w w +e e +DROP VIEW v1; +DROP TABLE t1; # This must be the last in the file: set optimizer_switch=@subselect_sj2_tmp; # === modified file 'mysql-test/r/subselect_sj2_mat.result' --- a/mysql-test/r/subselect_sj2_mat.result 2012-02-25 00:50:22 +0000 +++ b/mysql-test/r/subselect_sj2_mat.result 2012-03-23 21:39:56 +0000 @@ -905,6 +905,49 @@ 3 1 1 4 1 1 DROP TABLE t1,t2; +# +# BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view +# +CREATE TABLE t1 ( +a VARCHAR(1), +b VARCHAR(1) NOT NULL, +KEY(a) +) ENGINE=InnoDB; +INSERT INTO t1 VALUES +('j','j'),('v','v'),('c','c'),('m','m'),('d','d'), +('y','y'),('t','t'),('d','d'),('s','s'),('r','r'), +('m','m'),('b','b'),('x','x'),('g','g'),('p','p'), +('q','q'),('w','w'),('d','d'),('e','e'); +CREATE ALGORITHM=TEMPTABLE VIEW v1 AS SELECT * FROM t1; +SELECT * FROM v1 +WHERE ( a, a ) IN ( +SELECT alias2.b, alias2.a +FROM t1 AS alias1, t1 AS alias2 +WHERE alias2.b = alias1.a +AND ( alias1.b >= alias1.a OR alias2.b = 'z' ) +); +a b +j j +v v +c c +m m +m m +d d +d d +d d +y y +t t +s s +r r +b b +x x +g g +p p +q q +w w +e e +DROP VIEW v1; +DROP TABLE t1; # This must be the last in the file: set optimizer_switch=@subselect_sj2_tmp; set optimizer_switch=default; === modified file 'mysql-test/t/subselect_sj2.test' --- a/mysql-test/t/subselect_sj2.test 2012-01-19 19:44:43 +0000 +++ b/mysql-test/t/subselect_sj2.test 2012-03-23 21:39:56 +0000 @@ -1090,5 +1090,32 @@ DROP TABLE t1,t2; +--echo # +--echo # BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view +--echo # +CREATE TABLE t1 ( + a VARCHAR(1), + b VARCHAR(1) NOT NULL, + KEY(a) +) ENGINE=InnoDB; +INSERT INTO t1 VALUES +('j','j'),('v','v'),('c','c'),('m','m'),('d','d'), +('y','y'),('t','t'),('d','d'),('s','s'),('r','r'), +('m','m'),('b','b'),('x','x'),('g','g'),('p','p'), +('q','q'),('w','w'),('d','d'),('e','e'); + +CREATE ALGORITHM=TEMPTABLE VIEW v1 AS SELECT * FROM t1; + +# This query returns 6 rows instead of 19 + +SELECT * FROM v1 +WHERE ( a, a ) IN ( + SELECT alias2.b, alias2.a + FROM t1 AS alias1, t1 AS alias2 + WHERE alias2.b = alias1.a + AND ( alias1.b >= alias1.a OR alias2.b = 'z' ) +); +DROP VIEW v1; +DROP TABLE t1; --echo # This must be the last in the file: set optimizer_switch=@subselect_sj2_tmp; === modified file 'sql/item_cmpfunc.cc' --- a/sql/item_cmpfunc.cc 2012-02-28 14:41:55 +0000 +++ b/sql/item_cmpfunc.cc 2012-03-23 21:39:56 +0000 @@ -5388,7 +5388,7 @@ */ Item_equal::Item_equal(Item *f1, Item *f2, bool with_const_item) - : Item_bool_func(), eval_item(0), cond_false(0) + : Item_bool_func(), eval_item(0), cond_false(0), context_field(NULL) { const_item_cache= 0; with_const= with_const_item; @@ -5411,7 +5411,7 @@ */ Item_equal::Item_equal(Item_equal *item_equal) - : Item_bool_func(), eval_item(0), cond_false(0) + : Item_bool_func(), eval_item(0), cond_false(0), context_field(NULL) { const_item_cache= 0; List_iterator_fast<Item> li(item_equal->equal_items); === modified file 'sql/item_cmpfunc.h' --- a/sql/item_cmpfunc.h 2012-01-19 22:11:53 +0000 +++ b/sql/item_cmpfunc.h 2012-03-23 21:39:56 +0000 @@ -1700,9 +1700,16 @@ as datetimes. The comparator is used only if compare_as_dates=TRUE */ Arg_comparator cmp; + + /* + For Item_equal objects inside an OR clause: one of the fields that were + used in the original comparison. + */ + Item_field *context_field; public: inline Item_equal() - : Item_bool_func(), with_const(FALSE), eval_item(0), cond_false(0) + : Item_bool_func(), with_const(FALSE), eval_item(0), cond_false(0), + context_field(NULL) { const_item_cache=0 ;} Item_equal(Item *f1, Item *f2, bool with_const_item); Item_equal(Item_equal *item_equal); @@ -1729,6 +1736,8 @@ Item *transform(Item_transformer transformer, uchar *arg); virtual void print(String *str, enum_query_type query_type); CHARSET_INFO *compare_collation(); + + void set_context_field(Item_field *ctx_field) { context_field= ctx_field; } friend class Item_equal_fields_iterator; friend Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels, Item_equal *item_equal); === modified file 'sql/opt_subselect.cc' --- a/sql/opt_subselect.cc 2012-03-18 19:58:20 +0000 +++ b/sql/opt_subselect.cc 2012-03-23 21:39:56 +0000 @@ -171,6 +171,246 @@ */ +/* +EqualityPropagationAndSjmNests +****************************** + +The process of equality propagation has two parts +P1. Equality propagation +P2. Equality substitution [for a certain join order] + +The propagation part is not affected by SJM nests. In fact, it is done before +we determine the execution plan, i.e. before we even know we will use +SJM-nests for execution. + +The substitution part is affected. + +Substitution without SJMs +========================= +When one doesn't have SJM nests, tables have a strict join order: + + ---------------------------------> + t1 -- t2 -- t3 -- t4 --- t5 + + + ? ^ + \ + --(part-of-WHERE) + + +parts WHERE/ON and ref. expressions are attached at some point along the axis. +Expression is allowed to refer to a table column if the table is to the left of +the attachment point. For any given expression, we have a goal: + + "Move leftmost allowed attachment point as much as possible to the left" + +Substitution with SJMs - task setting +===================================== + +When SJM nests are present, there is no global strict table ordering anymore: + + + ---------------------------------> + + ot1 -- ot2 --- sjm -- ot4 --- ot5 + | + | Main execution + - - - - - - - - - - - - - - - - - - - - - - - - + | Materialization + it1 -- it2 --/ + + +Besides that, we must take into account that + - values for outer table columns, otN.col, are inaccessible at + materialization step (SJM-RULE) + - values for inner table columns, itN.col, are inaccessible at Main execution + step, except for SJ-Materialization-Scan and columns that are in the + subquery's select list. (SJM-RULE) + +Substitution with SJMs - solution +================================= + +First, we introduce global strict table ordering like this: + + ot1 - ot2 --\ /--- ot3 -- ot5 + \--- it1 --- it2 --/ + +Now, let's see how to meet (SJM-RULE). + +SJ-Materialization is only applicable for uncorrelated subqueries. From this, it +follows that any multiple equality will either +1. include only columns of outer tables, or +2. include only columns of inner tables, or +3. include columns of inner and outer tables, joined together through one + of IN-equalities. + +Cases #1 and #2 can be handled in the same way as with regular inner joins. + +Case #3 requires special handling, so that we don't construct violations of +(SJM-RULE). Let's consider possible ways to build violations. + +Equality propagation starts with the clause in this form + + top_query_where AND subquery_where AND in_equalities + +First, it builds multi-equalities. It can also build a mixed multi-equality + + multiple-equal(ot1.col, ot2.col, ... it1.col, itN.col) + +Multi-equalities are pushed down the OR-clauses in top_query_where and in +subquery_where, so it's possible that clauses like this one are built: + + subquery_cond OR (multiple-equal(it1.col, ot1.col,...) AND ...) + ^^^^^^^^^^^^^ \ + | this must be evaluated + \- can only be invaluated on the main phase. + on materialization phase + +Finally, equality substitution is started. It does two operations: + + +1. Field reference substitution +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +(In the code, this is Item_field::replace_equal_field) + +This is a process of replacing each reference to "tblX.col" +with the first element of the multi-equality. (REF-SUBST-ORIG) + +This behaviour can cause problems with Semi-join nests. Suppose, we have a +condition: + + func(it1.col, it2.col) + +and a multi-equality(ot1.col, it1.col). Then, reference to "it1.col" will be +replaced with "ot1.col", constructing a condition + + func(ot1.col, it2.col) + +which will be a vioation of (SJM-RULE). + +In order to avoid this, (REF-SUBST-ORIG) is amended as follows: + +- references to tables "itX.col" that are inner wrt some SJM nest, are + replaced with references to the first inner table from the same SJM nest. + +- references to top-level tables "otX.col" are replaced with references to + the first element of the multi-equality, no matter if that first element is + a column of a top-level table or of table from some SJM nest. + (REF-SUBST-SJM) + + The case where the first element is a table from an SJM nest $SJM is ok, + because it can be proven that $SJM uses SJ-Materialization-Scan, and + "unpacks" correct column values to the first element during the main + execution phase. + +2. Item_equal elimination +~~~~~~~~~~~~~~~~~~~~~~~~~ +(In the code: eliminate_item_equal) This is a process of taking + + multiple-equal(a,b,c,d,e) + +and replacing it with an equivalent expression which is an AND of pair-wise +equalities: + + a=b AND a=c AND ... + +The equalities are picked such that for any given join prefix (t1,t2...) the +subset of equalities that can be evaluated gives the most restrictive +filtering. + +Without SJM nests, it is sufficient to compare every multi-equality member +with the first one: + + elem1=elem2 AND elem1=elem3 AND elem1=elem4 ... + +When SJM nests are present, we should take care not to construct equalities +that violate the (SJM-RULE). This is achieved by generating separate sets of +equalites for top-level tables and for inner tables. That is, for the join +order + + ot1 - ot2 --\ /--- ot3 -- ot5 + \--- it1 --- it2 --/ + +we will generate + ot1.col=ot2.col + ot1.col=ot3.col + ot1.col=ot5.col + it2.col=it1.col + + +2.1 The problem with Item_equals and ORs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As has been mentioned above, multiple equalities are pushed down into OR +clauses, possibly building clauses like this: + + func(it.col2) OR multiple-equal(it1.col1, it1.col2, ot1.col) (1) + +where the first part of the clause has references to inner tables, while the +second has references to the top-level tables, which is a violation of +(SJM-RULE). + +AND-clauses of this kind do not create problems, because make_cond_for_table() +will take them apart. OR-clauses will not be split. It is possible to +split-out the part that's dependent on the inner table: + + func(it.col2) OR it1.col1=it1.col2 + +but this is a less-restrictive condition than condition (1). Current execution +scheme will still try to generate the "remainder" condition: + + func(it.col2) OR it1.col1=ot1.col + +which is a violation of (SJM-RULE). + +QQ: "ot1.col=it1.col" is checked at the upper level. Why was it not removed +here? +AA: because has a proper subset of conditions that are found on this level. + consider a join order of ot, sjm(it) + and a condition + ot.col=it.col AND ( ot.col=it.col='foo' OR it.col2='bar') + + we will produce: + table ot: nothing + table it: ot.col=it.col AND (ot.col='foo' OR it.col2='bar') + ^^^^ ^^^^^^^^^^^^^^^^ + | \ the problem is that + | this part condition didnt + | receive a substitution + | + +--- it was correct to subst, 'ot' is + the left-most. + + +Is there any merit in pushing inner=outer down into ORs? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Yes, there is. Consider the query: + + select * from ot + where ot.col in (select it.col from it where (it.col='foo' OR it.col='bar')) + +here, it may be useful to infer that + + (ot.col='foo' OR ot.col='bar') (CASE-FOR-SUBST) + +and attach that condition to the table 'ot'. + +Solution #1 +~~~~~~~~~~~ +Let make_cond_for_table() chop off the bad parts of produced OR clause. If it +has produced an OR clause: +- check if all of its branches are evaluable for the context join_tab +- If some aren't, destroy the OR clause + +Solution #2 +~~~~~~~~~~~ +Before the equality propagation phase, none of the OR clauses violate the +(SJM-RULE). This way, if we remember which tables the original equality +referred to, we can only generate equalities that refer to outer or inner +tables. Note that this will disallow handling of cases like (CASE-FOR-SUBST). +*/ + static bool subquery_types_allow_materialization(Item_in_subselect *in_subs); === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2012-03-13 20:49:18 +0000 +++ b/sql/sql_select.cc 2012-03-23 21:39:56 +0000 @@ -10666,6 +10666,9 @@ acceptable, as this happens rarely. The implementation without copying would be much more complicated. + For description of how equality propagation works with SJM nests, grep + for EqualityPropagationAndSjmNests. + @param left_item left term of the quality to be checked @param right_item right term of the equality to be checked @param item equality item if the equality originates from a condition @@ -10739,12 +10742,14 @@ { /* left_item_equal of an upper level contains left_item */ left_item_equal= new Item_equal(left_item_equal); + left_item_equal->set_context_field(((Item_field*) left_item)); cond_equal->current_level.push_back(left_item_equal); } if (right_copyfl) { /* right_item_equal of an upper level contains right_item */ right_item_equal= new Item_equal(right_item_equal); + right_item_equal->set_context_field(((Item_field*) right_item)); cond_equal->current_level.push_back(right_item_equal); } @@ -10830,6 +10835,7 @@ { item_equal= new Item_equal(item_equal); cond_equal->current_level.push_back(item_equal); + item_equal->set_context_field(field_item); } if (item_equal) { @@ -11480,6 +11486,8 @@ Item_equal::get_first() also takes similar measures for dealing with equality substitution in presense of SJM nests. + Grep for EqualityPropagationAndSjmNests for a more verbose description. + @return - The condition with generated simple equalities or a pointer to the simple generated equality, if success. @@ -11543,9 +11551,13 @@ on upper AND-levels. */ if (upper) - { + { + TABLE_LIST *native_sjm= embedding_sjm(item_equal->context_field); if (item_const && upper->get_const()) + { + /* Upper item also has "field_item=const". Don't produce equality here */ item= 0; + } else { Item_equal_fields_iterator li(*item_equal); @@ -11556,6 +11568,8 @@ break; } } + if (embedding_sjm(field_item) != native_sjm) + item= NULL; /* Don't produce equality */ } bool produce_equality= test(item == field_item); _______________________________________________ commits mailing list commits@mariadb.org https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits ----- End forwarded message ----- -- BR Sergei -- Sergei Petrunia, Software Developer Monty Program AB, http://askmonty.org Blog: http://s.petrunia.net/blog
On 03/23/2012 02:41 PM, Sergei Petrunia wrote:
Hello Igor,
Could you please review the below:
Hi Sergey, Here's my feedback:
----- Forwarded message from Sergey Petrunya <psergey@askmonty.org> -----
From: Sergey Petrunya <psergey@askmonty.org> To: commits@mariadb.org X-Mailer: mail (GNU Mailutils 1.2) Date: Sat, 24 Mar 2012 01:40:02 +0400 (GST) Subject: [Commits] Rev 3464: BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view in file:///home/psergey/dev2/5.3-look60/
At file:///home/psergey/dev2/5.3-look60/
------------------------------------------------------------ revno: 3464 revision-id: psergey@askmonty.org-20120323213956-ohxb9jitekne7yya parent: psergey@askmonty.org-20120321071820-t2azo8xxlm73mezd committer: Sergey Petrunya <psergey@askmonty.org> branch nick: 5.3-look60 timestamp: Sat 2012-03-24 01:39:56 +0400 message: BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view - Fix equality propagation to work with SJM nests and OR clauses (full descirption of problem and solution in the comment in the patch) === modified file 'mysql-test/r/subselect_sj2.result' --- a/mysql-test/r/subselect_sj2.result 2012-02-25 00:50:22 +0000 +++ b/mysql-test/r/subselect_sj2.result 2012-03-23 21:39:56 +0000 @@ -903,5 +903,48 @@ 3 1 1 4 1 1 DROP TABLE t1,t2; +# +# BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view +# +CREATE TABLE t1 ( +a VARCHAR(1), +b VARCHAR(1) NOT NULL, +KEY(a) +) ENGINE=InnoDB; +INSERT INTO t1 VALUES +('j','j'),('v','v'),('c','c'),('m','m'),('d','d'), +('y','y'),('t','t'),('d','d'),('s','s'),('r','r'), +('m','m'),('b','b'),('x','x'),('g','g'),('p','p'), +('q','q'),('w','w'),('d','d'),('e','e'); +CREATE ALGORITHM=TEMPTABLE VIEW v1 AS SELECT * FROM t1; +SELECT * FROM v1 +WHERE ( a, a ) IN ( +SELECT alias2.b, alias2.a +FROM t1 AS alias1, t1 AS alias2 +WHERE alias2.b = alias1.a +AND ( alias1.b >= alias1.a OR alias2.b = 'z' ) +); +a b +j j +v v +c c +m m +m m +d d +d d +d d +y y +t t +s s +r r +b b +x x +g g +p p +q q +w w +e e +DROP VIEW v1; +DROP TABLE t1; # This must be the last in the file: set optimizer_switch=@subselect_sj2_tmp;
=== modified file 'mysql-test/r/subselect_sj2_jcl6.result' --- a/mysql-test/r/subselect_sj2_jcl6.result 2012-02-25 00:50:22 +0000 +++ b/mysql-test/r/subselect_sj2_jcl6.result 2012-03-23 21:39:56 +0000 @@ -917,6 +917,49 @@ 3 1 1 4 1 1 DROP TABLE t1,t2; +# +# BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view +# +CREATE TABLE t1 ( +a VARCHAR(1), +b VARCHAR(1) NOT NULL, +KEY(a) +) ENGINE=InnoDB; +INSERT INTO t1 VALUES +('j','j'),('v','v'),('c','c'),('m','m'),('d','d'), +('y','y'),('t','t'),('d','d'),('s','s'),('r','r'), +('m','m'),('b','b'),('x','x'),('g','g'),('p','p'), +('q','q'),('w','w'),('d','d'),('e','e'); +CREATE ALGORITHM=TEMPTABLE VIEW v1 AS SELECT * FROM t1; +SELECT * FROM v1 +WHERE ( a, a ) IN ( +SELECT alias2.b, alias2.a +FROM t1 AS alias1, t1 AS alias2 +WHERE alias2.b = alias1.a +AND ( alias1.b >= alias1.a OR alias2.b = 'z' ) +); +a b +j j +v v +c c +m m +m m +d d +d d +d d +y y +t t +s s +r r +b b +x x +g g +p p +q q +w w +e e +DROP VIEW v1; +DROP TABLE t1; # This must be the last in the file: set optimizer_switch=@subselect_sj2_tmp; #
=== modified file 'mysql-test/r/subselect_sj2_mat.result' --- a/mysql-test/r/subselect_sj2_mat.result 2012-02-25 00:50:22 +0000 +++ b/mysql-test/r/subselect_sj2_mat.result 2012-03-23 21:39:56 +0000 @@ -905,6 +905,49 @@ 3 1 1 4 1 1 DROP TABLE t1,t2; +# +# BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view +# +CREATE TABLE t1 ( +a VARCHAR(1), +b VARCHAR(1) NOT NULL, +KEY(a) +) ENGINE=InnoDB; +INSERT INTO t1 VALUES +('j','j'),('v','v'),('c','c'),('m','m'),('d','d'), +('y','y'),('t','t'),('d','d'),('s','s'),('r','r'), +('m','m'),('b','b'),('x','x'),('g','g'),('p','p'), +('q','q'),('w','w'),('d','d'),('e','e'); +CREATE ALGORITHM=TEMPTABLE VIEW v1 AS SELECT * FROM t1; +SELECT * FROM v1 +WHERE ( a, a ) IN ( +SELECT alias2.b, alias2.a +FROM t1 AS alias1, t1 AS alias2 +WHERE alias2.b = alias1.a +AND ( alias1.b >= alias1.a OR alias2.b = 'z' ) +); +a b +j j +v v +c c +m m +m m +d d +d d +d d +y y +t t +s s +r r +b b +x x +g g +p p +q q +w w +e e +DROP VIEW v1; +DROP TABLE t1; # This must be the last in the file: set optimizer_switch=@subselect_sj2_tmp; set optimizer_switch=default;
=== modified file 'mysql-test/t/subselect_sj2.test' --- a/mysql-test/t/subselect_sj2.test 2012-01-19 19:44:43 +0000 +++ b/mysql-test/t/subselect_sj2.test 2012-03-23 21:39:56 +0000 @@ -1090,5 +1090,32 @@
DROP TABLE t1,t2;
+--echo # +--echo # BUG#951937: Wrong result (missing rows) with semijoin+materialization, IN subquery, InnoDB, TEMPTABLE view +--echo # +CREATE TABLE t1 ( + a VARCHAR(1), + b VARCHAR(1) NOT NULL, + KEY(a) +) ENGINE=InnoDB; +INSERT INTO t1 VALUES +('j','j'),('v','v'),('c','c'),('m','m'),('d','d'), +('y','y'),('t','t'),('d','d'),('s','s'),('r','r'), +('m','m'),('b','b'),('x','x'),('g','g'),('p','p'), +('q','q'),('w','w'),('d','d'),('e','e'); + +CREATE ALGORITHM=TEMPTABLE VIEW v1 AS SELECT * FROM t1; + +# This query returns 6 rows instead of 19 + +SELECT * FROM v1 +WHERE ( a, a ) IN ( + SELECT alias2.b, alias2.a + FROM t1 AS alias1, t1 AS alias2 + WHERE alias2.b = alias1.a + AND ( alias1.b >= alias1.a OR alias2.b = 'z' ) +); +DROP VIEW v1; +DROP TABLE t1; --echo # This must be the last in the file: set optimizer_switch=@subselect_sj2_tmp;
I tried a test case without any view: MariaDB [test]> CREATE TABLE t2 (a VARCHAR(1), b VARCHAR(1) NOT NULL, KEY(a)) ENGINE=InnoDB; Query OK, 0 rows affected (0.02 sec) MariaDB [test]> INSERT INTO t2 SELECT * FROM t1; Query OK, 19 rows affected (0.00 sec) Records: 19 Duplicates: 0 Warnings: 0 MariaDB [test]> INSERT INTO t2 SELECT * FROM t1; Query OK, 19 rows affected (0.00 sec) Records: 19 Duplicates: 0 Warnings: 0 MariaDB [test]> EXPLAIN EXTENDED SELECT * FROM t2 WHERE ( a, a ) IN ( SELECT alias2.b, alias2.a FROM t1 AS alias1, t1 AS alias2 WHERE alias2.b = alias1.a AND ( alias1.b >= alias1.a OR alias2.b = 'z' ) ); +----+--------------+-------------+------+---------------+------+---------+---------------+------+----------+------------------------------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | filtered | Extra | +----+--------------+-------------+------+---------------+------+---------+---------------+------+----------+------------------------------------+ | 1 | PRIMARY | <subquery2> | ALL | distinct_key | NULL | NULL | NULL | 19 | 100.00 | | | 1 | PRIMARY | t2 | ref | a | a | 4 | test.alias1.a | 1 | 100.00 | Using index condition; Using where | | 2 | MATERIALIZED | alias1 | ALL | a | NULL | NULL | NULL | 19 | 100.00 | Using where | | 2 | MATERIALIZED | alias2 | ref | a | a | 4 | test.alias1.a | 1 | 100.00 | Using where | +----+--------------+-------------+------+---------------+------+---------+---------------+------+----------+------------------------------------+ 4 rows in set, 1 warning (0.00 sec) MariaDB [test]> SHOW WARNINGS; +-------+------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Level | Code | Message | +-------+------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Note | 1003 | select `test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` semi join (`test`.`t1` `alias1` join `test`.`t1` `alias2`) where ((`test`.`alias2`.`a` = `test`.`alias1`.`a`) and (`test`.`alias2`.`b` = `test`.`alias1`.`a`) and (`test`.`t2`.`a` = `test`.`alias1`.`a`) and ((`test`.`alias1`.`b` >= `test`.`alias1`.`a`) or ((`test`.`alias1`.`a` = 'z') and (`test`.`t2`.`a` = 'z')))) | +-------+------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ 1 row in set (0.00 sec) MariaDB [test]> SELECT * FROM t2 WHERE ( a, a ) IN ( SELECT alias2.b, alias2.a FROM t1 AS alias1, t1 AS alias2 WHERE alias2.b = alias1.a AND ( alias1.b >= alias1.a OR alias2.b = 'z' ) ); +------+---+ | a | b | +------+---+ | c | c | | c | c | | d | d | | d | d | | d | d | | d | d | | d | d | | d | d | | b | b | | b | b | | e | e | | e | e | +------+---+ 12 rows in set (0.01 sec) As you can see here 5.3 also have similar problems. I would suggest this test case as the main one for this bug. I would preserve the old one as well.
=== modified file 'sql/item_cmpfunc.cc' --- a/sql/item_cmpfunc.cc 2012-02-28 14:41:55 +0000 +++ b/sql/item_cmpfunc.cc 2012-03-23 21:39:56 +0000 @@ -5388,7 +5388,7 @@ */
Item_equal::Item_equal(Item *f1, Item *f2, bool with_const_item) - : Item_bool_func(), eval_item(0), cond_false(0) + : Item_bool_func(), eval_item(0), cond_false(0), context_field(NULL) { const_item_cache= 0; with_const= with_const_item; @@ -5411,7 +5411,7 @@ */
Item_equal::Item_equal(Item_equal *item_equal) - : Item_bool_func(), eval_item(0), cond_false(0) + : Item_bool_func(), eval_item(0), cond_false(0), context_field(NULL) { const_item_cache= 0; List_iterator_fast<Item> li(item_equal->equal_items);
=== modified file 'sql/item_cmpfunc.h' --- a/sql/item_cmpfunc.h 2012-01-19 22:11:53 +0000 +++ b/sql/item_cmpfunc.h 2012-03-23 21:39:56 +0000 @@ -1700,9 +1700,16 @@ as datetimes. The comparator is used only if compare_as_dates=TRUE */ Arg_comparator cmp; + + /* + For Item_equal objects inside an OR clause: one of the fields that were + used in the original comparison.
^ an original equality (?)
+ */ + Item_field *context_field; public: inline Item_equal() - : Item_bool_func(), with_const(FALSE), eval_item(0), cond_false(0) + : Item_bool_func(), with_const(FALSE), eval_item(0), cond_false(0), + context_field(NULL) { const_item_cache=0 ;} Item_equal(Item *f1, Item *f2, bool with_const_item); Item_equal(Item_equal *item_equal); @@ -1729,6 +1736,8 @@ Item *transform(Item_transformer transformer, uchar *arg); virtual void print(String *str, enum_query_type query_type); CHARSET_INFO *compare_collation(); + + void set_context_field(Item_field *ctx_field) { context_field= ctx_field; } friend class Item_equal_fields_iterator; friend Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels, Item_equal *item_equal);
=== modified file 'sql/opt_subselect.cc' --- a/sql/opt_subselect.cc 2012-03-18 19:58:20 +0000 +++ b/sql/opt_subselect.cc 2012-03-23 21:39:56 +0000 @@ -171,6 +171,246 @@
*/
+/* +EqualityPropagationAndSjmNests +****************************** + +The process of equality propagation has two parts ^Equalities are used for:
+P1. Equality propagation +P2. Equality substitution [for a certain join order] + +The propagation part is not affected by SJM nests. In fact, it is done before ^The equality propagation is ... +we determine the execution plan, i.e. before we even know we will use +SJM-nests for execution. + +The substitution part is affected. The equality substitution is ... + +Substitution without SJMs +========================= +When one doesn't have SJM nests, tables have a strict join order: + + ---------------------------------> + t1 -- t2 -- t3 -- t4 --- t5 + + + ? ^ + \ + --(part-of-WHERE) + + +parts WHERE/ON and ref. expressions are attached at some point along the axis. +Expression is allowed to refer to a table column if the table is to the left of +the attachment point. For any given expression, we have a goal: + + "Move leftmost allowed attachment point as much as possible to the left" + +Substitution with SJMs - task setting +===================================== + +When SJM nests are present, there is no global strict table ordering anymore: + + + ---------------------------------> + + ot1 -- ot2 --- sjm -- ot4 --- ot5 + | + | Main execution + - - - - - - - - - - - - - - - - - - - - - - - - + | Materialization + it1 -- it2 --/ + + +Besides that, we must take into account that + - values for outer table columns, otN.col, are inaccessible at + materialization step (SJM-RULE) + - values for inner table columns, itN.col, are inaccessible at Main execution + step, except for SJ-Materialization-Scan and columns that are in the + subquery's select list. (SJM-RULE) + +Substitution with SJMs - solution +================================= + +First, we introduce global strict table ordering like this: + + ot1 - ot2 --\ /--- ot3 -- ot5 + \--- it1 --- it2 --/ + +Now, let's see how to meet (SJM-RULE). + +SJ-Materialization is only applicable for uncorrelated subqueries. From this, it +follows that any multiple equality will either +1. include only columns of outer tables, or +2. include only columns of inner tables, or +3. include columns of inner and outer tables, joined together through one + of IN-equalities. + +Cases #1 and #2 can be handled in the same way as with regular inner joins. + +Case #3 requires special handling, so that we don't construct violations of +(SJM-RULE). Let's consider possible ways to build violations. + +Equality propagation starts with the clause in this form + + top_query_where AND subquery_where AND in_equalities + +First, it builds multi-equalities. It can also build a mixed multi-equality + + multiple-equal(ot1.col, ot2.col, ... it1.col, itN.col) + +Multi-equalities are pushed down the OR-clauses in top_query_where and in +subquery_where, so it's possible that clauses like this one are built: + + subquery_cond OR (multiple-equal(it1.col, ot1.col,...) AND ...) + ^^^^^^^^^^^^^ \ + | this must be evaluated + \- can only be invaluated on the main phase ^evaluated ^at + on materialization phase ^at the + +Finally, equality substitution is started. It does two operations: + + +1. Field reference substitution +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +(In the code, this is Item_field::replace_equal_field) + +This is a process of replacing each reference to "tblX.col" +with the first element of the multi-equality. (REF-SUBST-ORIG) + +This behaviour can cause problems with Semi-join nests. Suppose, we have a +condition: + + func(it1.col, it2.col) + +and a multi-equality(ot1.col, it1.col). Then, reference to "it1.col" will be +replaced with "ot1.col", constructing a condition + + func(ot1.col, it2.col) + +which will be a vioation of (SJM-RULE). ^violation + +In order to avoid this, (REF-SUBST-ORIG) is amended as follows: + +- references to tables "itX.col" that are inner wrt some SJM nest, are + replaced with references to the first inner table from the same SJM nest. + +- references to top-level tables "otX.col" are replaced with references to + the first element of the multi-equality, no matter if that first element is + a column of a top-level table or of table from some SJM nest. + (REF-SUBST-SJM) + + The case where the first element is a table from an SJM nest $SJM is ok, + because it can be proven that $SJM uses SJ-Materialization-Scan, and + "unpacks" correct column values to the first element during the main + execution phase. + +2. Item_equal elimination +~~~~~~~~~~~~~~~~~~~~~~~~~ +(In the code: eliminate_item_equal) This is a process of taking + + multiple-equal(a,b,c,d,e) + +and replacing it with an equivalent expression which is an AND of pair-wise +equalities: + + a=b AND a=c AND ... + +The equalities are picked such that for any given join prefix (t1,t2...) the +subset of equalities that can be evaluated gives the most restrictive +filtering. + +Without SJM nests, it is sufficient to compare every multi-equality member +with the first one: + + elem1=elem2 AND elem1=elem3 AND elem1=elem4 ... + +When SJM nests are present, we should take care not to construct equalities +that violate the (SJM-RULE). This is achieved by generating separate sets of +equalites for top-level tables and for inner tables. That is, for the join +order + + ot1 - ot2 --\ /--- ot3 -- ot5 + \--- it1 --- it2 --/ + +we will generate + ot1.col=ot2.col + ot1.col=ot3.col + ot1.col=ot5.col + it2.col=it1.col + + +2.1 The problem with Item_equals and ORs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As has been mentioned above, multiple equalities are pushed down into OR +clauses, possibly building clauses like this: + + func(it.col2) OR multiple-equal(it1.col1, it1.col2, ot1.col) (1) + +where the first part of the clause has references to inner tables, while the +second has references to the top-level tables, which is a violation of +(SJM-RULE). + +AND-clauses of this kind do not create problems, because make_cond_for_table() +will take them apart. OR-clauses will not be split. It is possible to +split-out the part that's dependent on the inner table: + + func(it.col2) OR it1.col1=it1.col2 + +but this is a less-restrictive condition than condition (1). Current execution +scheme will still try to generate the "remainder" condition: + + func(it.col2) OR it1.col1=ot1.col + +which is a violation of (SJM-RULE). + +QQ: "ot1.col=it1.col" is checked at the upper level. Why was it not removed +here? +AA: because has a proper subset of conditions that are found on this level. + consider a join order of ot, sjm(it) + and a condition + ot.col=it.col AND ( ot.col=it.col='foo' OR it.col2='bar') + + we will produce: + table ot: nothing + table it: ot.col=it.col AND (ot.col='foo' OR it.col2='bar') + ^^^^ ^^^^^^^^^^^^^^^^ + | \ the problem is that + | this part condition didnt + | receive a substitution + | + +--- it was correct to subst, 'ot' is + the left-most. + + +Is there any merit in pushing inner=outer down into ORs? ^sense? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Yes, there is. Consider the query: + + select * from ot + where ot.col in (select it.col from it where (it.col='foo' OR it.col='bar')) + +here, it may be useful to infer that + + (ot.col='foo' OR ot.col='bar') (CASE-FOR-SUBST) + +and attach that condition to the table 'ot'. + +Solution #1 +~~~~~~~~~~~ +Let make_cond_for_table() chop off the bad parts of produced OR clause. If it +has produced an OR clause: +- check if all of its branches are evaluable for the context join_tab +- If some aren't, destroy the OR clause + +Solution #2 +~~~~~~~~~~~ +Before the equality propagation phase, none of the OR clauses violate the +(SJM-RULE). This way, if we remember which tables the original equality +referred to, we can only generate equalities that refer to outer or inner +tables. Note that this will disallow handling of cases like (CASE-FOR-SUBST). +*/
You have to say somewhere that Solution #2 is currently chosen.
+
static bool subquery_types_allow_materialization(Item_in_subselect *in_subs);
=== modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2012-03-13 20:49:18 +0000 +++ b/sql/sql_select.cc 2012-03-23 21:39:56 +0000 @@ -10666,6 +10666,9 @@ acceptable, as this happens rarely. The implementation without copying would be much more complicated.
+ For description of how equality propagation works with SJM nests, grep + for EqualityPropagationAndSjmNests. + @param left_item left term of the quality to be checked @param right_item right term of the equality to be checked @param item equality item if the equality originates from a condition @@ -10739,12 +10742,14 @@ { /* left_item_equal of an upper level contains left_item */ left_item_equal= new Item_equal(left_item_equal); + left_item_equal->set_context_field(((Item_field*) left_item)); cond_equal->current_level.push_back(left_item_equal); } if (right_copyfl) { /* right_item_equal of an upper level contains right_item */ right_item_equal= new Item_equal(right_item_equal); + right_item_equal->set_context_field(((Item_field*) right_item)); cond_equal->current_level.push_back(right_item_equal); }
@@ -10830,6 +10835,7 @@ { item_equal= new Item_equal(item_equal); cond_equal->current_level.push_back(item_equal); + item_equal->set_context_field(field_item); } if (item_equal) { @@ -11480,6 +11486,8 @@ Item_equal::get_first() also takes similar measures for dealing with equality substitution in presense of SJM nests.
+ Grep for EqualityPropagationAndSjmNests for a more verbose description. + @return - The condition with generated simple equalities or a pointer to the simple generated equality, if success. @@ -11543,9 +11551,13 @@ on upper AND-levels. */ if (upper) - { + { + TABLE_LIST *native_sjm= embedding_sjm(item_equal->context_field); if (item_const && upper->get_const()) + { + /* Upper item also has "field_item=const". Don't produce equality here */ item= 0; + } else { Item_equal_fields_iterator li(*item_equal); @@ -11556,6 +11568,8 @@ break; } } + if (embedding_sjm(field_item) != native_sjm) + item= NULL; /* Don't produce equality */ }
bool produce_equality= test(item == field_item);
_______________________________________________ commits mailing list commits@mariadb.org https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits
----- End forwarded message -----
If you agree with what I said above and ready to correct the patch appropriately you can push it.
participants (2)
-
Igor Babaev
-
Sergei Petrunia