At file:///home/psergey/dev/maria-5.3-subqueries-r7/ ------------------------------------------------------------ revno: 2774 revision-id: psergey@askmonty.org-20100313200452-kq4dxayp7b45zum1 parent: psergey@askmonty.org-20100307154145-ksby2b1l0sqm1xne committer: Sergey Petrunya <psergey@askmonty.org> branch nick: maria-5.3-subqueries-r7 timestamp: Sat 2010-03-13 23:04:52 +0300 message: BUG#45174: XOR in subqueries produces differing results in 5.1 and 5.4 BUG#50019: Wrong result for IN-subquery with materialization - Fix equality substitution in presense of semi-join materialization, lookup and scan variants (started off from fix by Evgen Potemkin, then modified it to work in all cases) === modified file 'mysql-test/r/subselect_mat.result' --- a/mysql-test/r/subselect_mat.result 2010-01-17 14:51:10 +0000 +++ b/mysql-test/r/subselect_mat.result 2010-03-13 20:04:52 +0000 @@ -1219,3 +1219,28 @@ pk 2 DROP TABLE t1, t2; +# +# BUG#50019: Wrong result for IN-subquery with materialization +# +create table t1(i int); +insert into t1 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +create table t2(i int); +insert into t2 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +create table t3(i int); +insert into t3 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5); +i +1 +2 +3 +4 +set @save_optimizer_switch=@@optimizer_switch; +set session optimizer_switch='materialization=off'; +select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5); +i +1 +2 +3 +4 +set session optimizer_switch=@save_optimizer_switch; +drop table t1, t2, t3; === modified file 'mysql-test/r/subselect_sj.result' --- a/mysql-test/r/subselect_sj.result 2010-02-24 11:33:42 +0000 +++ b/mysql-test/r/subselect_sj.result 2010-03-13 20:04:52 +0000 @@ -871,3 +871,39 @@ DROP TABLE t1, t2, t3; DROP VIEW v2, v3; # End of Bug#49198 +# +# Bug#45174: Incorrectly applied equality propagation caused wrong +# result on a query with a materialized semi-join. +# +CREATE TABLE `t1` ( +`pk` int(11) NOT NULL AUTO_INCREMENT, +`varchar_key` varchar(1) NOT NULL, +`varchar_nokey` varchar(1) NOT NULL, +PRIMARY KEY (`pk`), +KEY `varchar_key` (`varchar_key`) +); +INSERT INTO `t1` VALUES (11,'m','m'),(12,'j','j'),(13,'z','z'),(14,'a','a'),(15,'',''),(16,'e','e'),(17,'t','t'),(19,'b','b'),(20,'w','w'),(21,'m','m'),(23,'',''),(24,'w','w'),(26,'e','e'),(27,'e','e'),(28,'p','p'); +CREATE TABLE `t2` ( +`varchar_nokey` varchar(1) NOT NULL +); +INSERT INTO `t2` VALUES ('v'),('u'),('n'),('l'),('h'),('u'),('n'),('j'),('k'),('e'),('i'),('u'),('n'),('b'),('x'),(''),('q'),('u'); +EXPLAIN EXTENDED SELECT varchar_nokey +FROM t2 +WHERE ( `varchar_nokey` , `varchar_nokey` ) IN ( +SELECT `varchar_key` , `varchar_nokey` +FROM t1 +WHERE `varchar_nokey` < 'n' XOR `pk` ) ; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 18 100.00 +1 PRIMARY t1 ALL varchar_key NULL NULL NULL 15 100.00 Using where; Materialize +Warnings: +Note 1003 select `test`.`t2`.`varchar_nokey` AS `varchar_nokey` from `test`.`t2` semi join (`test`.`t1`) where ((`test`.`t1`.`varchar_nokey` = `test`.`t1`.`varchar_key`) and ((`test`.`t1`.`varchar_nokey` < 'n') xor `test`.`t1`.`pk`)) +SELECT varchar_nokey +FROM t2 +WHERE ( `varchar_nokey` , `varchar_nokey` ) IN ( +SELECT `varchar_key` , `varchar_nokey` +FROM t1 +WHERE `varchar_nokey` < 'n' XOR `pk` ) ; +varchar_nokey +DROP TABLE t1, t2; +# End of the test for bug#45174. === modified file 'mysql-test/r/subselect_sj_jcl6.result' --- a/mysql-test/r/subselect_sj_jcl6.result 2010-03-07 15:41:45 +0000 +++ b/mysql-test/r/subselect_sj_jcl6.result 2010-03-13 20:04:52 +0000 @@ -876,6 +876,42 @@ DROP VIEW v2, v3; # End of Bug#49198 # +# Bug#45174: Incorrectly applied equality propagation caused wrong +# result on a query with a materialized semi-join. +# +CREATE TABLE `t1` ( +`pk` int(11) NOT NULL AUTO_INCREMENT, +`varchar_key` varchar(1) NOT NULL, +`varchar_nokey` varchar(1) NOT NULL, +PRIMARY KEY (`pk`), +KEY `varchar_key` (`varchar_key`) +); +INSERT INTO `t1` VALUES (11,'m','m'),(12,'j','j'),(13,'z','z'),(14,'a','a'),(15,'',''),(16,'e','e'),(17,'t','t'),(19,'b','b'),(20,'w','w'),(21,'m','m'),(23,'',''),(24,'w','w'),(26,'e','e'),(27,'e','e'),(28,'p','p'); +CREATE TABLE `t2` ( +`varchar_nokey` varchar(1) NOT NULL +); +INSERT INTO `t2` VALUES ('v'),('u'),('n'),('l'),('h'),('u'),('n'),('j'),('k'),('e'),('i'),('u'),('n'),('b'),('x'),(''),('q'),('u'); +EXPLAIN EXTENDED SELECT varchar_nokey +FROM t2 +WHERE ( `varchar_nokey` , `varchar_nokey` ) IN ( +SELECT `varchar_key` , `varchar_nokey` +FROM t1 +WHERE `varchar_nokey` < 'n' XOR `pk` ) ; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 18 100.00 +1 PRIMARY t1 ALL varchar_key NULL NULL NULL 15 100.00 Using where; Materialize +Warnings: +Note 1003 select `test`.`t2`.`varchar_nokey` AS `varchar_nokey` from `test`.`t2` semi join (`test`.`t1`) where ((`test`.`t1`.`varchar_nokey` = `test`.`t1`.`varchar_key`) and ((`test`.`t1`.`varchar_nokey` < 'n') xor `test`.`t1`.`pk`)) +SELECT varchar_nokey +FROM t2 +WHERE ( `varchar_nokey` , `varchar_nokey` ) IN ( +SELECT `varchar_key` , `varchar_nokey` +FROM t1 +WHERE `varchar_nokey` < 'n' XOR `pk` ) ; +varchar_nokey +DROP TABLE t1, t2; +# End of the test for bug#45174. +# # BUG#49129: Wrong result with IN-subquery with join_cache_level=6 and firstmatch=off # CREATE TABLE t0 (a INT); === modified file 'mysql-test/t/subselect_mat.test' --- a/mysql-test/t/subselect_mat.test 2010-01-17 14:51:10 +0000 +++ b/mysql-test/t/subselect_mat.test 2010-03-13 20:04:52 +0000 @@ -889,3 +889,19 @@ SELECT pk FROM t1 WHERE (b,c,d) IN (SELECT b,c,d FROM t2 WHERE pk > 0); DROP TABLE t1, t2; +--echo # +--echo # BUG#50019: Wrong result for IN-subquery with materialization +--echo # +create table t1(i int); +insert into t1 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +create table t2(i int); +insert into t2 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +create table t3(i int); +insert into t3 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); +select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5); +set @save_optimizer_switch=@@optimizer_switch; +set session optimizer_switch='materialization=off'; +select * from t1 where t1.i in (select t2.i from t2 join t3 where t2.i + t3.i = 5); +set session optimizer_switch=@save_optimizer_switch; +drop table t1, t2, t3; + === modified file 'mysql-test/t/subselect_sj.test' --- a/mysql-test/t/subselect_sj.test 2010-02-24 11:33:42 +0000 +++ b/mysql-test/t/subselect_sj.test 2010-03-13 20:04:52 +0000 @@ -770,3 +770,42 @@ DROP VIEW v2, v3; --echo # End of Bug#49198 + +--echo # +--echo # Bug#45174: Incorrectly applied equality propagation caused wrong +--echo # result on a query with a materialized semi-join. +--echo # + +CREATE TABLE `t1` ( + `pk` int(11) NOT NULL AUTO_INCREMENT, + `varchar_key` varchar(1) NOT NULL, + `varchar_nokey` varchar(1) NOT NULL, + PRIMARY KEY (`pk`), + KEY `varchar_key` (`varchar_key`) +); + +INSERT INTO `t1` VALUES (11,'m','m'),(12,'j','j'),(13,'z','z'),(14,'a','a'),(15,'',''),(16,'e','e'),(17,'t','t'),(19,'b','b'),(20,'w','w'),(21,'m','m'),(23,'',''),(24,'w','w'),(26,'e','e'),(27,'e','e'),(28,'p','p'); + +CREATE TABLE `t2` ( + `varchar_nokey` varchar(1) NOT NULL +); + +INSERT INTO `t2` VALUES ('v'),('u'),('n'),('l'),('h'),('u'),('n'),('j'),('k'),('e'),('i'),('u'),('n'),('b'),('x'),(''),('q'),('u'); + +EXPLAIN EXTENDED SELECT varchar_nokey +FROM t2 +WHERE ( `varchar_nokey` , `varchar_nokey` ) IN ( +SELECT `varchar_key` , `varchar_nokey` +FROM t1 +WHERE `varchar_nokey` < 'n' XOR `pk` ) ; + +SELECT varchar_nokey +FROM t2 +WHERE ( `varchar_nokey` , `varchar_nokey` ) IN ( +SELECT `varchar_key` , `varchar_nokey` +FROM t1 +WHERE `varchar_nokey` < 'n' XOR `pk` ) ; + +DROP TABLE t1, t2; + +--echo # End of the test for bug#45174. === modified file 'sql/item.cc' --- a/sql/item.cc 2010-02-24 11:33:42 +0000 +++ b/sql/item.cc 2010-03-13 20:04:52 +0000 @@ -4761,7 +4761,7 @@ return this; return const_item; } - Item_field *subst= item_equal->get_first(); + Item_field *subst= item_equal->get_first(this); if (subst && field->table != subst->field->table && !field->eq(subst->field)) return subst; } === modified file 'sql/item_cmpfunc.cc' --- a/sql/item_cmpfunc.cc 2010-02-17 10:05:27 +0000 +++ b/sql/item_cmpfunc.cc 2010-03-13 20:04:52 +0000 @@ -5369,7 +5369,7 @@ void Item_equal::fix_length_and_dec() { - Item *item= get_first(); + Item *item= get_first(NULL); eval_item= cmp_item::get_comparator(item->result_type(), item->collation.collation); } @@ -5432,3 +5432,128 @@ str->append(')'); } + +/* + @brief Get the first equal field of multiple equality. + @param[in] field the field to get equal field to + + @details Get the first field of multiple equality that is equal to the + given field. In order to make semi-join materialization strategy work + correctly we can't propagate equal fields from upper select to a + materialized semi-join. + Thus the fields is returned according to following rules: + + 1) If the given field belongs to a semi-join then the first field in + multiple equality which belong to the same semi-join is returned. + Otherwise NULL is returned. + 2) If the given field doesn't belong to a semi-join then + the first field in the multiple equality that doesn't belong to any + semi-join is returned. + If all fields in the equality are belong to semi-join(s) then NULL + is returned. + 3) If no field is given then the first field in the multiple equality + is returned without regarding whether it belongs to a semi-join or not. + + @retval Found first field in the multiple equality. + @retval 0 if no field found. +*/ + +Item_field* Item_equal::get_first(Item_field *field) +{ + List_iterator<Item_field> it(fields); + Item_field *item; + JOIN_TAB *field_tab; + + if (!field) + return fields.head(); + + /* + Of all equal fields, return the first one we can use. Normally, this is the + field which belongs to the table that is the first in the join order. + + There is one exception to this: When semi-join materialization strategy is + used, and the given field belongs to a table within the semi-join nest, we + must pick the first field in the semi-join nest. + + Example: suppose we have a join order: + + ot1 ot2 SJ-Mat(it1 it2 it3) ot3 + + and equality ot2.col = it1.col = it2.col + If we're looking for best substitute for 'it2.col', we should pick it1.col + and not ot2.col. + + eliminate_item_equal() also has code that deals with equality substitution + in presense of SJM nests. + */ + + field_tab= field->field->table->reginfo.join_tab; + + TABLE_LIST *emb_nest= field->field->table->pos_in_table_list->embedding; + + if (emb_nest && emb_nest->sj_mat_info && emb_nest->sj_mat_info->is_used) + { + /* + It's a field from an materialized semi-join. We can substitute it only + for a field from the same semi-join. + */ + JOIN_TAB *first; + JOIN *join= field_tab->join; + uint tab_idx= field_tab - field_tab->join->join_tab; + + /* Find the first table of this semi-join nest */ + for (uint i= tab_idx; i != join->const_tables; i--) + { + if (join->join_tab[i].table->map & emb_nest->sj_inner_tables) + first= join->join_tab + i; + else + // Found first tab that doesn't belong to current SJ. + break; + } + /* Find an item to substitute for. */ + while ((item= it++)) + { + if (item->field->table->reginfo.join_tab >= first) + { + /* + If we found given field then return NULL to avoid unnecessary + substitution. + */ + return (item != field) ? item : NULL; + } + } + } + else + { +#if 0 + /* + The field is not in SJ-Materialization nest. We must return the first + field that's not embedded in a SJ-Materialization nest. + Example: suppose we have a join order: + + SJ-Mat(it1 it2) ot1 ot2 + + and equality ot2.col = ot1.col = it2.col + If we're looking for best substitute for 'ot2.col', we should pick ot1.col + and not it2.col, because when we run a join between ot1 and ot2 + execution of SJ-Mat(...) has already finished and we can't rely on the + value of it*.*. + psergey-fix-fix: ^^ THAT IS INCORRECT ^^. Pick the first, whatever that + is. + */ + while ((item= it++)) + { + TABLE_LIST *emb_nest= item->field->table->pos_in_table_list->embedding; + if (!emb_nest || !emb_nest->sj_mat_info || + !emb_nest->sj_mat_info->is_used) + { + return item; + } + } +#endif + return fields.head(); + } + // Shouldn't get here. + DBUG_ASSERT(0); + return NULL; +} === modified file 'sql/item_cmpfunc.h' --- a/sql/item_cmpfunc.h 2010-02-17 10:05:27 +0000 +++ b/sql/item_cmpfunc.h 2010-03-13 20:04:52 +0000 @@ -1589,7 +1589,7 @@ void add(Item_field *f); uint members(); bool contains(Field *field); - Item_field* get_first() { return fields.head(); } + Item_field* get_first(Item_field *field); uint n_fields() { return fields.elements; } void merge(Item_equal *item); void update_const(); === modified file 'sql/opt_subselect.cc' --- a/sql/opt_subselect.cc 2010-03-07 15:41:45 +0000 +++ b/sql/opt_subselect.cc 2010-03-13 20:04:52 +0000 @@ -2159,6 +2159,8 @@ if (tablenr != first) pos->sj_strategy= SJ_OPT_NONE; remaining_tables |= s->table->map; + //s->sj_strategy= pos->sj_strategy; + join->join_tab[first].sj_strategy= join->best_positions[first].sj_strategy; } } === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2010-03-07 15:41:45 +0000 +++ b/sql/sql_select.cc 2010-03-13 20:04:52 +0000 @@ -8867,6 +8867,15 @@ } +static TABLE_LIST* embedding_sjm(Item_field *item_field) +{ + TABLE_LIST *nest= item_field->field->table->pos_in_table_list->embedding; + if (nest && nest->sj_mat_info && nest->sj_mat_info->is_used) + return nest; + else + return NULL; +} + /** Generate minimal set of simple equalities equivalent to a multiple equality. @@ -8900,6 +8909,23 @@ So only t1.a=t3.c should be left in the lower level. If cond is equal to 0, then not more then one equality is generated and a pointer to it is returned as the result of the function. + + Equality substutution and semi-join materialization nests: + + In case join order looks like this: + + outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3 + + We must not construct equalities like + + outer_tbl1.col = inner_tbl1.col + + because they would get attached to inner_tbl1 and will get evaluated + during materialization phase, when we don't have current value of + outer_tbl1.col. + + Item_equal::get_first() also takes similar measures for dealing with + equality substitution in presense of SJM nests. @return - The condition with generated simple equalities or @@ -8917,18 +8943,44 @@ Item *item_const= item_equal->get_const(); Item_equal_iterator it(*item_equal); Item *head; + TABLE_LIST *current_sjm= NULL; + Item *current_sjm_head= NULL; + + /* + Pick the "head" item: the constant one or the first in the join order + that's not inside some SJM nest. + */ if (item_const) head= item_const; else { - head= item_equal->get_first(); + TABLE_LIST *emb_nest; + Item_field *item_field; + head= item_field= item_equal->get_first(NULL); it++; + if ((emb_nest= embedding_sjm(item_field))) + { + current_sjm= emb_nest; + current_sjm_head= head; + } } + Item_field *item_field; + /* + For each other item, generate "item=head" equality (except the tables that + are within SJ-Materialization nests, for those "head" is defined + differently) + */ while ((item_field= it++)) { Item_equal *upper= item_field->find_item_equal(upper_levels); Item_field *item= item_field; + TABLE_LIST *field_sjm= embedding_sjm(item_field); + + /* + Check if "item_field=head" equality is already guaranteed to be true + on upper AND-levels. + */ if (upper) { if (item_const && upper->get_const()) @@ -8943,65 +8995,29 @@ } } } - if (item == item_field) + + bool produce_equality= test(item == item_field); + if (!item_const && field_sjm && field_sjm != current_sjm) + { + /* Entering an SJM nest */ + current_sjm_head= item_field; + if (!field_sjm->sj_mat_info->is_sj_scan) + produce_equality= FALSE; + } + + if (produce_equality) { if (eq_item) eq_list.push_back(eq_item); - /* - item_field might refer to a table that is within a semi-join - materialization nest. In that case, the join order looks like this: - - outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3 - - We must not construct equalities like - - outer_tbl1.col = inner_tbl1.col - - because they would get attached to inner_tbl1 and will get evaluated - during materialization phase, when we don't have current value of - outer_tbl1.col. - */ - TABLE_LIST *emb_nest= - item_field->field->table->pos_in_table_list->embedding; - if (!item_const && emb_nest && emb_nest->sj_mat_info && - emb_nest->sj_mat_info->is_used) - { - /* - Find the first equal expression that refers to a table that is - within the semijoin nest. If we can't find it, do nothing - */ - List_iterator<Item_field> fit(item_equal->fields); - Item_field *head_in_sjm; - bool found= FALSE; - while ((head_in_sjm= fit++)) - { - if (head_in_sjm->used_tables() & emb_nest->sj_inner_tables) - { - if (head_in_sjm == item_field) - { - /* This is the first table inside the semi-join*/ - eq_item= new Item_func_eq(item_field, head); - /* Tell make_cond_for_table don't use this. */ - eq_item->marker=3; - } - else - { - eq_item= new Item_func_eq(item_field, head_in_sjm); - found= TRUE; - } - break; - } - } - if (!found) - continue; - } - else - eq_item= new Item_func_eq(item_field, head); + + eq_item= new Item_func_eq(item_field, current_sjm? current_sjm_head: head); + if (!eq_item) return 0; eq_item->set_cmp_func(); eq_item->quick_fix_field(); } + current_sjm= field_sjm; } if (!cond && !eq_list.head()) === modified file 'sql/sql_select.h' --- a/sql/sql_select.h 2010-03-05 18:54:48 +0000 +++ b/sql/sql_select.h 2010-03-13 20:04:52 +0000 @@ -279,6 +279,13 @@ /* NestedOuterJoins: Bitmap of nested joins this table is part of */ nested_join_map embedding_map; + /* + Semi-join strategy to be used for this join table. This is a copy of + POSITION::sj_strategy field. This field is set up by the + fix_semijion_strategies_for_picked_join_order. + */ + uint sj_strategy; + void cleanup(); inline bool is_using_loose_index_scan() {