[Maria-developers] FW: Rev 3758: MDEV-5470: Cost-based subquery item pushdown must take into account semi-joins
Hi Timour, Please find below a patch that should implement execution-time protection against malfunction between condition moving and semi-joins. There needs also to be an optimization-time counterpart, which should have the same logic. Will you code it yourself or want me to? ----- Forwarded message from Sergei Petrunia <psergey@askmonty.org> ----- Date: Thu, 19 Dec 2013 02:49:22 +0400 From: Sergei Petrunia <psergey@askmonty.org> To: commits@mariadb.org Subject: Rev 3758: MDEV-5470: Cost-based subquery item pushdown must take into account semi-joins At http://bazaar.launchpad.net/~maria-captains/maria/10.0-mdev83 ------------------------------------------------------------ revno: 3758 committer: Sergey Petrunya <psergey@askmonty.org> branch nick: 10.0-mdev83 timestamp: Thu 2013-12-19 06:50:28 +0400 message: MDEV-5470: Cost-based subquery item pushdown must take into account semi-joins - When moving expensive conditions, take into account that semi-joins limit where they can be moved. - This is only execution part in JOIN::setup_dynamic_conditions(). Optimization part is not yet present. - No testcases yet. - Moving inside SJ-Materialization nests doesn't seem to be supported (either before this patch or after. This patch itself does take SJ-Materialization into account) === modified file 'sql/item_cmpfunc.cc' --- sql/item_cmpfunc.cc 2013-11-01 11:10:38 +0000 +++ sql/item_cmpfunc.cc 2013-12-19 02:50:28 +0000 @@ -6591,6 +6591,8 @@ void Item_func_collect_stats::dbug_print } #endif /*DBUG_OFF*/ +JOIN_TAB *find_last_tab_for_dynamic_cond(table_map cond_tables, + JOIN *join, JOIN_TAB *join_tab); Item_func_dynamic_cond::Item_func_dynamic_cond(Item *cond, JOIN_TAB *atab, JOIN_TAB **ctab) : Item_func_collect_stats(cond) @@ -6624,13 +6626,15 @@ Item_func_dynamic_cond::Item_func_dynami if (active_tab->bush_root_tab) { first_tab= active_tab->bush_root_tab->bush_children->start; - last_tab= active_tab->bush_root_tab->bush_children->end; + //last_tab= active_tab->bush_root_tab->bush_children->end; } else { first_tab= join->join_tab + join->const_tables; - last_tab= join->join_tab + join->top_join_tab_count; + //last_tab= join->join_tab + join->top_join_tab_count; } + last_tab= find_last_tab_for_dynamic_cond(cond->used_tables(), + active_tab->join, active_tab); DBUG_ASSERT(active_tab >= first_tab && active_tab < last_tab); === modified file 'sql/item_cmpfunc.h' --- sql/item_cmpfunc.h 2013-10-18 19:56:55 +0000 +++ sql/item_cmpfunc.h 2013-12-19 02:50:28 +0000 @@ -561,14 +561,20 @@ class Item_func_collect_stats : public I class Item_func_dynamic_cond : public Item_func_collect_stats { -protected: +protected: // as if there were any ancestor classes? List<Item_subselect> subqueries; /* The join_tab where the condition is currently 'pushed'.*/ st_join_table *active_tab; /* The join_tab currently being executed. */ st_join_table **exec_tab; /* The boundaries of the sub-plan where this predicate can be pushed. */ - st_join_table *first_tab, *last_tab; + st_join_table *first_tab; + st_join_table *last_tab; //psergey-todo: this should point to the last tab. + // conditions depending on semi-join tables have + // limits re. how far they can be moved. +public: + inline st_join_table * get_last_tab() { return last_tab; } +protected: /* The relative access cost based on optimizer estimates. This is the cost of all access methods per one record from the first table in the plan. === modified file 'sql/opt_subselect.cc' --- sql/opt_subselect.cc 2013-11-04 15:56:09 +0000 +++ sql/opt_subselect.cc 2013-12-19 02:50:28 +0000 @@ -1480,6 +1480,7 @@ static bool convert_subq_to_sj(JOIN *par sj_nest->sj_subq_pred= subq_pred; sj_nest->original_subq_pred_used_tables= subq_pred->used_tables() | subq_pred->left_expr->used_tables(); + sj_nest->sj_strategys_join_tab= uint(-1); /* Nests do not participate in those 'chains', so: */ /* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/ emb_join_list->push_back(sj_nest); @@ -4321,6 +4322,21 @@ int init_dups_weedout(JOIN *join, uint f /* + For each SJ-inner table in [start_tab; end_tab], set emb_sj_nest to idx +*/ +static void update_sj_strategys_join_tab(JOIN_TAB *start_tab, + JOIN_TAB *end_tab, + uint idx) +{ + for (JOIN_TAB *j= start_tab; j < end_tab; j++) + { + TABLE_LIST *emb_nest; + if ((emb_nest= j->emb_sj_nest)) + emb_nest->sj_strategys_join_tab= idx; + } +} + +/* Setup the strategies to eliminate semi-join duplicates. SYNOPSIS @@ -4448,6 +4464,10 @@ int setup_semijoin_dups_elimination(JOIN for (uint j= i; j < i + pos->n_sj_tables; j++) join->join_tab[j].inside_loosescan_range= TRUE; + + update_sj_strategys_join_tab(join->join_tab + i, + join->join_tab + i + pos->n_sj_tables, + i + pos->n_sj_tables - 1); /* Calculate key length */ keylen= 0; @@ -4461,6 +4481,7 @@ int setup_semijoin_dups_elimination(JOIN tab[pos->n_sj_tables - 1].do_firstmatch= tab; i+= pos->n_sj_tables; pos+= pos->n_sj_tables; + break; } case SJ_OPT_DUPS_WEEDOUT: @@ -4503,6 +4524,9 @@ int setup_semijoin_dups_elimination(JOIN break; } } + update_sj_strategys_join_tab(join->join_tab + i, + join->join_tab + i + pos->n_sj_tables, + i + pos->n_sj_tables - 1); init_dups_weedout(join, first_table, i, i + pos->n_sj_tables - first_table); i+= pos->n_sj_tables; @@ -4558,6 +4582,9 @@ int setup_semijoin_dups_elimination(JOIN } } j[-1].do_firstmatch= jump_to; + update_sj_strategys_join_tab(join->join_tab + i, + join->join_tab + i + pos->n_sj_tables, + i + pos->n_sj_tables - 1); i+= pos->n_sj_tables; pos+= pos->n_sj_tables; === modified file 'sql/sql_select.cc' --- sql/sql_select.cc 2013-11-08 12:36:02 +0000 +++ sql/sql_select.cc 2013-12-19 02:50:28 +0000 @@ -24608,8 +24608,8 @@ double JOIN::static_pushdown_cost(uint i @param pred the predicate to be wrapped in Item_func_dynamic_cond @param init_tab the initial 'active' JOIN_TAB where the dynamic predicate is pushed to - @param cur_tab pointer to the JOIN_TAB* that stores the current JOIN_TAB - during execution + @param active_tab_ptr pointer to the JOIN_TAB* that stores the current JOIN_TAB + during execution */ static Item_func_collect_stats * wrap_with_dynamic_conds(Item *pred, JOIN_TAB *init_tab, JOIN_TAB **active_tab_ptr, @@ -24674,6 +24674,81 @@ wrap_with_dynamic_conds(Item *pred, JOIN } +/* + @brief Find the last JOIN_TAB that a condition can be attached. + + + @param cond_tables used_tables() of the condition that we intend to move. + @param join_tab The left-most join tab where the condition can be + attached (in other words, default attachment point) + + @detail + In a regular inner join, one can take a condition that is attached to a + table $T, and move it to any table that comes after $T. + + When semi-joins are present, this is no longer true. Detailed explanation + can be found in maria-developers@, email "Movable conditions and semi-joins". + In short: + + - Inside an SJ-Materialization nest, a condition can be moved to any table to + the right, but it must stay within the nest. (We don't support nested + semi-joins, so one will not find semi-joins inside an SJ-M nest) + + - All other Semi-Join strategies have a "range" where they apply. If a + condition depends on a table that is within a semi-join, it must not be + moved over the right edge of the range. +*/ + +JOIN_TAB *find_last_tab_for_dynamic_cond(table_map cond_tables, + JOIN *join, JOIN_TAB *join_tab) +{ + if (join_tab->bush_root_tab) + { + /* + We are inside SJ-Materialization nest. There are no semi-joins here, + return the last tab in the nest. + */ + return join_tab->bush_root_tab->bush_children->end; + } + + /* Start from the right-most bound */ + uint last_tab = join->top_join_tab_count; + + cond_tables= cond_tables & ~(PSEUDO_TABLE_BITS | join->const_table_map); + /* + Walk through semi-joins that this table depends on, and check their last + join tabs. The smallest last join tab sets the limit about how far right + the condition can be moved. + */ + Table_map_iterator tm_it(cond_tables); + int tableno; + while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END) + { + TABLE_LIST *sj_nest; + if ((sj_nest= join->map2table[tableno]->emb_sj_nest)) + { + if (sj_nest->is_active_sjm()) + { + /* + Whoa this condition also depends on a table from within + SJ-Materialization nest. (Can happen possible because of equality + propagation/substitution). If we didn't return earlier in this + function, we're not inside a semi-join nest. A value that depends + on a table within a SJ-Materialization nest means that it depends on + the materialized table columns. + */ + continue; + } + + if (sj_nest->sj_strategys_join_tab != uint(-1) && + sj_nest->sj_strategys_join_tab < last_tab) + last_tab= sj_nest->sj_strategys_join_tab; + } + } + return join->join_tab + last_tab; +} + + /** Make expensive subqueries dynamically pushdownable. */ @@ -24703,7 +24778,12 @@ bool JOIN::setup_dynamic_conditions() Update the least cardinality join_tab for each tab for the chosen join order. Check if there are any conditions at all that need to be made dynamic. */ - min_tab= NULL; + min_tab= NULL; + /* + psergey-todo: the following loop should walk inside SJ-Materialization + nests, too. Maybe, this whole function should be invoked for each + SJ-Materilization nest. + */ for (tab= last_tab - 1; tab >= first_tab; tab--) { if (optimizer_flag(thd, OPTIMIZER_SWITCH_STATIC_COND_PUSHDOWN)) @@ -24746,7 +24826,8 @@ bool JOIN::setup_dynamic_conditions() */ for (tab= first_tab; tab < last_tab; tab++) { - Item_func_collect_stats *wrapped_select_cond= NULL, *cur_wrapped_cond; + Item_func_collect_stats *wrapped_select_cond= NULL; + Item_func_dynamic_cond *cur_wrapped_cond; if (tab->select_cond && !(wrapped_select_cond= @@ -24800,14 +24881,29 @@ bool JOIN::setup_dynamic_conditions() Add all dynamic conditions pushed to previous JOIN_TABs also to the current JOIN_TAB. This allows to "move" a dynamic condition from one tab to another by enabling it for the corrseponding tab. + + psergey-todo: conditions have limits on how far to the right they can be + moved (Item_func_dynamic_cond::last_tab). Don't attach conditions to + where they can't be moved. */ if (!prev_dynamic_conds.is_empty()) { - List_iterator_fast<Item_func_dynamic_cond> dyn_cond_it(prev_dynamic_conds); - if (!wrapped_select_cond) - wrapped_select_cond= dyn_cond_it++; + List_iterator<Item_func_dynamic_cond> dyn_cond_it(prev_dynamic_conds); while ((cur_wrapped_cond= dyn_cond_it++)) - wrapped_select_cond->add_pred(cur_wrapped_cond); + { + if (!wrapped_select_cond) + wrapped_select_cond= cur_wrapped_cond; + else + wrapped_select_cond->add_pred(cur_wrapped_cond); + + /* + psergey-todo: conditions have limits on how far to the right they can be + moved (Item_func_dynamic_cond::last_tab). Don't attach conditions to + where they can't be moved. + */ + if (cur_wrapped_cond->get_last_tab() == tab) + dyn_cond_it.remove(); + } } prev_dynamic_conds.concat(&cur_dynamic_conds); cur_dynamic_conds.empty(); === modified file 'sql/table.h' --- sql/table.h 2013-09-25 18:07:06 +0000 +++ sql/table.h 2013-12-19 02:50:28 +0000 @@ -1721,6 +1721,9 @@ struct TABLE_LIST table_map sj_inner_tables; /* Number of IN-compared expressions */ uint sj_in_exprs; + + //psergey-todo: add the index of last table in the join order + uint sj_strategys_join_tab; /* If this is a non-jtbm semi-join nest: corresponding subselect predicate */ Item_in_subselect *sj_subq_pred; ----- End forwarded message ----- -- BR Sergei -- Sergei Petrunia, Software Developer MariaDB | Skype: sergefp | Blog: http://s.petrunia.net/blog
participants (1)
-
Sergei Petrunia