Hello Igor, I believe that the following patch has the property that item_subselect->update_used_tables() will handle table-bit re-assignments, i.e. it is what we have discussed earlier today. I've also pushed this into 5.3-sj-subqueries tree. ----- Forwarded message from Sergey Petrunya <psergey@askmonty.org> ----- From: Sergey Petrunya <psergey@askmonty.org> To: maria-developers@lists.launchpad.net X-Mailer: mail (GNU Mailutils 1.2) Date: Tue, 2 Feb 2010 23:00:49 +0300 (MSK) Subject: [Maria-developers] Rev 2750: BUG#31480: Incorrect result for nested subquery when executed via semi join: in file:///home/psergey/dev/maria-5.3-subqueries-r3/ At file:///home/psergey/dev/maria-5.3-subqueries-r3/ ------------------------------------------------------------ revno: 2750 revision-id: psergey@askmonty.org-20100202200045-13q0nb5dwzm739j6 parent: psergey@askmonty.org-20100128134833-9000udjp5wa3tsff committer: Sergey Petrunya <psergey@askmonty.org> branch nick: maria-5.3-subqueries-r3 timestamp: Tue 2010-02-02 23:00:45 +0300 message: BUG#31480: Incorrect result for nested subquery when executed via semi join: A mark-2 fix that can survive FROM subquery handling and has some code unification with table elimination: Each subquery predicate now stores a (flat) list of all references from inside to outside the subquery. We actually store (select, referred_item) pairs which allows Item_subselect::fix_after_pullout() to recalculate subquery predicate's attributes after a broad range of FROM- and IN-subselect flattening operations. === modified file 'mysql-test/r/subselect_sj.result' --- a/mysql-test/r/subselect_sj.result 2010-01-17 14:51:10 +0000 +++ b/mysql-test/r/subselect_sj.result 2010-02-02 20:00:45 +0000 @@ -779,3 +779,48 @@ 1 PRIMARY it2 ALL NULL NULL NULL NULL 20 Using where; End temporary DROP TABLE ot1, it1, it2; # End of BUG#38075 +# +# BUG#31480: Incorrect result for nested subquery when executed via semi join +# +create table t1 (a int not null, b int not null); +create table t2 (c int not null, d int not null); +create table t3 (e int not null); +insert into t1 values (1,10); +insert into t1 values (2,10); +insert into t1 values (1,20); +insert into t1 values (2,20); +insert into t1 values (3,20); +insert into t1 values (2,30); +insert into t1 values (4,40); +insert into t2 values (2,10); +insert into t2 values (2,20); +insert into t2 values (4,10); +insert into t2 values (5,10); +insert into t2 values (3,20); +insert into t2 values (2,40); +insert into t3 values (10); +insert into t3 values (30); +insert into t3 values (10); +insert into t3 values (20); +explain extended +select a from t1 +where a in (select c from t2 where d >= some(select e from t3 where b=e)); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 6 100.00 Start temporary +1 PRIMARY t1 ALL NULL NULL NULL NULL 7 100.00 Using where; End temporary; Using join buffer +3 DEPENDENT SUBQUERY t3 ALL NULL NULL NULL NULL 4 100.00 Using where +Warnings: +Note 1276 Field or reference 'test.t1.b' of SELECT #3 was resolved in SELECT #1 +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` = `test`.`t2`.`c`) and <nop>(<in_optimizer>(`test`.`t2`.`d`,<exists>(select 1 AS `Not_used` from `test`.`t3` where ((`test`.`t1`.`b` = `test`.`t3`.`e`) and (<cache>(`test`.`t2`.`d`) >= `test`.`t3`.`e`)))))) +show warnings; +Level Code Message +Note 1276 Field or reference 'test.t1.b' of SELECT #3 was resolved in SELECT #1 +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` = `test`.`t2`.`c`) and <nop>(<in_optimizer>(`test`.`t2`.`d`,<exists>(select 1 AS `Not_used` from `test`.`t3` where ((`test`.`t1`.`b` = `test`.`t3`.`e`) and (<cache>(`test`.`t2`.`d`) >= `test`.`t3`.`e`)))))) +select a from t1 +where a in (select c from t2 where d >= some(select e from t3 where b=e)); +a +2 +2 +3 +2 +drop table t1, t2, t3; === modified file 'mysql-test/r/subselect_sj_jcl6.result' --- a/mysql-test/r/subselect_sj_jcl6.result 2010-01-17 14:51:10 +0000 +++ b/mysql-test/r/subselect_sj_jcl6.result 2010-02-02 20:00:45 +0000 @@ -783,6 +783,51 @@ 1 PRIMARY it2 ALL NULL NULL NULL NULL 20 Using where; End temporary; Using join buffer DROP TABLE ot1, it1, it2; # End of BUG#38075 +# +# BUG#31480: Incorrect result for nested subquery when executed via semi join +# +create table t1 (a int not null, b int not null); +create table t2 (c int not null, d int not null); +create table t3 (e int not null); +insert into t1 values (1,10); +insert into t1 values (2,10); +insert into t1 values (1,20); +insert into t1 values (2,20); +insert into t1 values (3,20); +insert into t1 values (2,30); +insert into t1 values (4,40); +insert into t2 values (2,10); +insert into t2 values (2,20); +insert into t2 values (4,10); +insert into t2 values (5,10); +insert into t2 values (3,20); +insert into t2 values (2,40); +insert into t3 values (10); +insert into t3 values (30); +insert into t3 values (10); +insert into t3 values (20); +explain extended +select a from t1 +where a in (select c from t2 where d >= some(select e from t3 where b=e)); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t2 ALL NULL NULL NULL NULL 6 100.00 Start temporary +1 PRIMARY t1 ALL NULL NULL NULL NULL 7 100.00 Using where; End temporary; Using join buffer +3 DEPENDENT SUBQUERY t3 ALL NULL NULL NULL NULL 4 100.00 Using where +Warnings: +Note 1276 Field or reference 'test.t1.b' of SELECT #3 was resolved in SELECT #1 +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` = `test`.`t2`.`c`) and <nop>(<in_optimizer>(`test`.`t2`.`d`,<exists>(select 1 AS `Not_used` from `test`.`t3` where ((`test`.`t1`.`b` = `test`.`t3`.`e`) and (<cache>(`test`.`t2`.`d`) >= `test`.`t3`.`e`)))))) +show warnings; +Level Code Message +Note 1276 Field or reference 'test.t1.b' of SELECT #3 was resolved in SELECT #1 +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t2`) where ((`test`.`t1`.`a` = `test`.`t2`.`c`) and <nop>(<in_optimizer>(`test`.`t2`.`d`,<exists>(select 1 AS `Not_used` from `test`.`t3` where ((`test`.`t1`.`b` = `test`.`t3`.`e`) and (<cache>(`test`.`t2`.`d`) >= `test`.`t3`.`e`)))))) +select a from t1 +where a in (select c from t2 where d >= some(select e from t3 where b=e)); +a +2 +2 +3 +2 +drop table t1, t2, t3; set join_cache_level=default; show variables like 'join_cache_level'; Variable_name Value === modified file 'mysql-test/t/subselect_sj.test' --- a/mysql-test/t/subselect_sj.test 2010-01-17 14:51:10 +0000 +++ b/mysql-test/t/subselect_sj.test 2010-02-02 20:00:45 +0000 @@ -681,3 +681,41 @@ DROP TABLE ot1, it1, it2; --echo # End of BUG#38075 + +--echo # +--echo # BUG#31480: Incorrect result for nested subquery when executed via semi join +--echo # +create table t1 (a int not null, b int not null); +create table t2 (c int not null, d int not null); +create table t3 (e int not null); + +insert into t1 values (1,10); +insert into t1 values (2,10); +insert into t1 values (1,20); +insert into t1 values (2,20); +insert into t1 values (3,20); +insert into t1 values (2,30); +insert into t1 values (4,40); + +insert into t2 values (2,10); +insert into t2 values (2,20); +insert into t2 values (4,10); +insert into t2 values (5,10); +insert into t2 values (3,20); +insert into t2 values (2,40); + +insert into t3 values (10); +insert into t3 values (30); +insert into t3 values (10); +insert into t3 values (20); + +explain extended +select a from t1 +where a in (select c from t2 where d >= some(select e from t3 where b=e)); +show warnings; + +select a from t1 +where a in (select c from t2 where d >= some(select e from t3 where b=e)); + +drop table t1, t2, t3; + === modified file 'sql/item.cc' --- a/sql/item.cc 2010-01-17 14:55:08 +0000 +++ b/sql/item.cc 2010-02-02 20:00:45 +0000 @@ -3646,7 +3646,7 @@ substitution) */ -static void mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current, +static bool mark_as_dependent(THD *thd, SELECT_LEX *last, SELECT_LEX *current, Item_ident *resolved_item, Item_ident *mark_item) { @@ -3657,7 +3657,8 @@ /* store pointer on SELECT_LEX from which item is dependent */ if (mark_item) mark_item->depended_from= last; - current->mark_as_dependent(last, resolved_item); + if (current->mark_as_dependent(thd, last, resolved_item)) + return TRUE; if (thd->lex->describe & DESCRIBE_EXTENDED) { push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, @@ -3667,6 +3668,7 @@ resolved_item->field_name, current->select_number, last->select_number); } + return FALSE; } @@ -4118,6 +4120,7 @@ ((ref_type == REF_ITEM || ref_type == FIELD_ITEM) ? (Item_ident*) (*reference) : 0)); + /* A reference to a view field had been found and we substituted it instead of this Item (find_field_in_tables @@ -6437,7 +6440,7 @@ if (depended_from == new_parent) { *ref= outer_ref; - outer_ref->fix_after_pullout(new_parent, ref); + (*ref)->fix_after_pullout(new_parent, ref); } } === modified file 'sql/item_subselect.cc' --- a/sql/item_subselect.cc 2010-01-28 13:48:33 +0000 +++ b/sql/item_subselect.cc 2010-02-02 20:00:45 +0000 @@ -39,8 +39,8 @@ Item_subselect::Item_subselect(): Item_result_field(), value_assigned(0), thd(0), substitution(0), engine(0), old_engine(0), used_tables_cache(0), have_to_be_excluded(0), - const_item_cache(1), in_fix_fields(0), engine_changed(0), changed(0), - is_correlated(FALSE) + const_item_cache(1), inside_first_fix_fields(0), done_first_fix_fields(FALSE), + engine_changed(0), changed(0), is_correlated(FALSE) { with_subselect= 1; reset(); @@ -167,18 +167,23 @@ DBUG_ASSERT(fixed == 0); engine->set_thd((thd= thd_param)); - if (!in_fix_fields) - refers_to.empty(); + if (!done_first_fix_fields) + { + done_first_fix_fields= TRUE; + inside_first_fix_fields= TRUE; + } + eliminated= FALSE; + parent_select= thd_param->lex->current_select; if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*)&res)) return TRUE; - in_fix_fields++; res= engine->prepare(); // all transformation is done (used by prepared statements) changed= 1; + inside_first_fix_fields= FALSE; if (!res) { @@ -210,14 +215,12 @@ if (!(*ref)->fixed) ret= (*ref)->fix_fields(thd, ref); thd->where= save_where; - in_fix_fields--; return ret; } // Is it one field subselect? if (engine->cols() > max_columns) { my_error(ER_OPERAND_COLUMNS, MYF(0), 1); - in_fix_fields--; return TRUE; } fix_length_and_dec(); @@ -234,7 +237,6 @@ fixed= 1; err: - in_fix_fields--; thd->where= save_where; return res; } @@ -242,11 +244,12 @@ bool Item_subselect::enumerate_field_refs_processor(uchar *arg) { - List_iterator<Item> it(refers_to); - Item *item; - while ((item= it++)) + List_iterator<Ref_to_outside> it(upper_refs); + Ref_to_outside *upper; + + while ((upper= it++)) { - if (item->walk(&Item::enumerate_field_refs_processor, FALSE, arg)) + if (upper->item->walk(&Item::enumerate_field_refs_processor, FALSE, arg)) return TRUE; } return FALSE; @@ -258,6 +261,115 @@ return FALSE; } + +bool Item_subselect::mark_as_dependent(THD *thd, st_select_lex *select, + Item *item) +{ + if (inside_first_fix_fields) + { + is_correlated= TRUE; + Ref_to_outside *upper; + if (!(upper= new (thd->stmt_arena->mem_root) Ref_to_outside())) + return TRUE; + upper->select= select; + upper->item= item; + if (upper_refs.push_back(upper, thd->stmt_arena->mem_root)) + return TRUE; + } + return FALSE; +} + +/* + Adjust attributes after our parent select has been merged into grandparent + + DESCRIPTION + Subquery is a composite object which may be correlated, that is, it may + have + 1. references to tables of the parent select (i.e. one that has the clause + with the subquery predicate) + 2. references to tables of the grandparent select + 3. references to tables of further ancestors. + + Before the pullout, this item indicates: + - #1 with table bits in used_tables() + - #2 and #3 with OUTER_REF_TABLE_BIT. + + After parent has been merged with grandparent: + - references to parent and grandparent tables should be indicated with + table bits. + - references to greatgrandparent and further ancestors - with + OUTER_REF_TABLE_BIT. +*/ + +void Item_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref) +{ + recalc_used_tables(new_parent, TRUE); + parent_select= new_parent; +} + + +/* + Recalculate used_tables_cache +*/ + +void Item_subselect::recalc_used_tables(st_select_lex *new_parent, + bool after_pullout) +{ + List_iterator<Ref_to_outside> it(upper_refs); + Ref_to_outside *upper; + + used_tables_cache= 0; + while ((upper= it++)) + { + bool found= FALSE; + /* + Check if + 1. the upper reference refers to the new immediate parent select, or + 2. one of the further ancestors. + + We rely on the fact that the tree of selects is modified by some kind of + 'flattening', i.e. a process where child selects are merged into their + parents. + The merged selects are removed from the select tree but keep pointers to + their parents. + */ + for (st_select_lex *sel= upper->select; sel; sel= sel->outer_select()) + { + /* + If we've reached the new parent select by walking upwards from + reference's original select, this means that the reference is now + referring to the direct parent: + */ + if (sel == new_parent) + { + found= TRUE; + /* + upper->item may be NULL when we've referred to a grouping function, + in which case we don't care about what it's table_map really is, + because item->with_sum_func==1 will ensure correct placement of the + item. + */ + if (upper->item) + { + if (after_pullout) + upper->item->fix_after_pullout(new_parent, &(upper->item)); + upper->item->update_used_tables(); + used_tables_cache |= upper->item->used_tables(); + } + } + } + if (!found) + used_tables_cache|= OUTER_REF_TABLE_BIT; + } + /* + Don't update const_tables_cache yet as we don't yet know which of the + parent's tables are constant. Parent will call update_used_tables() after + he has done const table detection, and that will be our chance to update + const_tables_cache. + */ +} + + bool Item_subselect::walk(Item_processor processor, bool walk_subquery, uchar *argument) { @@ -397,6 +509,7 @@ void Item_subselect::update_used_tables() { + recalc_used_tables(parent_select, FALSE); if (!engine->uncacheable()) { // did all used tables become static? @@ -1843,6 +1956,18 @@ return result || Item_subselect::fix_fields(thd_arg, ref); } +void Item_in_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref) +{ + left_expr->fix_after_pullout(new_parent, &left_expr); + Item_subselect::fix_after_pullout(new_parent, ref); +} + +void Item_in_subselect::update_used_tables() +{ + Item_subselect::update_used_tables(); + left_expr->update_used_tables(); + used_tables_cache |= left_expr->used_tables(); +} /** Try to create an engine to compute the subselect via materialization, === modified file 'sql/item_subselect.h' --- a/sql/item_subselect.h 2010-01-28 13:48:33 +0000 +++ b/sql/item_subselect.h 2010-02-02 20:00:45 +0000 @@ -67,14 +67,32 @@ bool have_to_be_excluded; /* cache of constant state */ bool const_item_cache; - + + bool inside_first_fix_fields; + bool done_first_fix_fields; public: - /* - References from inside the subquery to the select that this predicate is - in. References to parent selects not included. + /* A reference from inside subquery predicate to somewhere outside of it */ + class Ref_to_outside : public Sql_alloc + { + public: + st_select_lex *select; /* Select where the reference is pointing to */ + /* + What is being referred. This may be NULL when we're referring to an + aggregate function. + */ + Item *item; + }; + /* + References from within this subquery to somewhere outside of it (i.e. to + parent select, grandparent select, etc) */ - List<Item> refers_to; - int in_fix_fields; + List<Ref_to_outside> upper_refs; + st_select_lex *parent_select; + + /* + TRUE<=>Table Elimination has made it redundant to evaluate this select + (and so it is not part of QEP, etc) + */ bool eliminated; /* changed engine indicator */ @@ -117,6 +135,9 @@ return null_value; } bool fix_fields(THD *thd, Item **ref); + bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item); + void fix_after_pullout(st_select_lex *new_parent, Item **ref); + void recalc_used_tables(st_select_lex *new_parent, bool after_pullout); virtual bool exec(); virtual void fix_length_and_dec(); table_map used_tables() const; @@ -396,6 +417,8 @@ bool test_limit(st_select_lex_unit *unit); virtual void print(String *str, enum_query_type query_type); bool fix_fields(THD *thd, Item **ref); + void fix_after_pullout(st_select_lex *new_parent, Item **ref); + void update_used_tables(); bool setup_engine(); bool init_left_expr_cache(); bool is_expensive_processor(uchar *arg); === modified file 'sql/item_sum.cc' --- a/sql/item_sum.cc 2009-10-15 21:38:29 +0000 +++ b/sql/item_sum.cc 2010-02-02 20:00:45 +0000 @@ -350,7 +350,7 @@ sl= sl->master_unit()->outer_select() ) sl->master_unit()->item->with_sum_func= 1; } - thd->lex->current_select->mark_as_dependent(aggr_sel, NULL); + thd->lex->current_select->mark_as_dependent(thd, aggr_sel, NULL); return FALSE; } === modified file 'sql/sql_lex.cc' --- a/sql/sql_lex.cc 2010-01-28 13:48:33 +0000 +++ b/sql/sql_lex.cc 2010-02-02 20:00:45 +0000 @@ -1841,9 +1841,8 @@ 'last' should be reachable from this st_select_lex_node */ -void st_select_lex::mark_as_dependent(st_select_lex *last, Item *dependency) +bool st_select_lex::mark_as_dependent(THD *thd, st_select_lex *last, Item *dependency) { - SELECT_LEX *next_to_last; /* Mark all selects from resolved to 1 before select where was found table as depended (of select where was found table) @@ -1867,12 +1866,15 @@ sl->uncacheable|= UNCACHEABLE_UNITED; } } - next_to_last= s; + + Item_subselect *subquery_expr= s->master_unit()->item; + if (subquery_expr && subquery_expr->mark_as_dependent(thd, last, + dependency)) + return TRUE; } is_correlated= TRUE; this->master_unit()->item->is_correlated= TRUE; - if (dependency) - next_to_last->master_unit()->item->refers_to.push_back(dependency); + return FALSE; } bool st_select_lex_node::set_braces(bool value) { return 1; } === modified file 'sql/sql_lex.h' --- a/sql/sql_lex.h 2010-01-28 13:48:33 +0000 +++ b/sql/sql_lex.h 2010-02-02 20:00:45 +0000 @@ -747,7 +747,7 @@ return master_unit()->return_after_parsing(); } - void mark_as_dependent(st_select_lex *last, Item *dependency); + bool mark_as_dependent(THD *thd, st_select_lex *last, Item *dependency); bool set_braces(bool value); bool inc_in_sum_expr(); === modified file 'sql/sql_select.h' --- a/sql/sql_select.h 2010-01-28 13:48:33 +0000 +++ b/sql/sql_select.h 2010-02-02 20:00:45 +0000 @@ -282,13 +282,11 @@ } bool check_rowid_field() { -/* !!!NB igor: enable the code in this comment after backporting the SJ code if (keep_current_rowid && !used_rowid_fields) { used_rowid_fields= 1; used_fieldlength+= table->file->ref_length; } -*/ return test(used_rowid_fields); } bool is_inner_table_of_semi_join_with_first_match() _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp ----- End forwarded message ----- -- BR Sergey -- Sergey Petrunia, Software Developer Monty Program AB, http://askmonty.org Blog: http://s.petrunia.net/blog