[Maria-developers] bzr commit into file:///home/tsk/mprog/src/5.3-mwl89/ branch (timour:2792)
#At file:///home/tsk/mprog/src/5.3-mwl89/ based on revid:psergey@askmonty.org-20100503154606-z7v6errebcv9gax1 2792 timour@askmonty.org 2010-05-27 MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation Phase 1: Implement recursive bottom-up optimization of subqueires instead of lazy optimization. The patch implements a preparatory phase for MWL#89, which is a prerequisite to implement a cost-based choice between both strategies. The patch passes the complete regression test. The main change is implemented by the method: JOIN::optimize_materialized_in_subqueries(). All other changes were required to correct problems resulting from changing the order of optimization. Most of these problems followed the same pattern - there are some shared structures between a subquery and its parent query. Depending on which one is optimized first (parent or child query), these shared strucutres may get different values, thus resulting in an inconsistent query plan. modified: mysql-test/r/subselect_mat.result sql/item_subselect.cc sql/item_subselect.h sql/sql_class.cc sql/sql_class.h sql/sql_select.cc sql/sql_select.h === modified file 'mysql-test/r/subselect_mat.result' --- a/mysql-test/r/subselect_mat.result 2010-04-05 21:15:15 +0000 +++ b/mysql-test/r/subselect_mat.result 2010-05-27 13:13:47 +0000 @@ -1139,7 +1139,7 @@ insert into t1 values (5); explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away -2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found +2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table select min(a1) from t1 where 7 in (select b1 from t2 group by b1); min(a1) set @@optimizer_switch='default,materialization=off'; @@ -1153,7 +1153,7 @@ set @@optimizer_switch='default,semijoin explain select min(a1) from t1 where 7 in (select b1 from t2); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away -2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found +2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table select min(a1) from t1 where 7 in (select b1 from t2); min(a1) set @@optimizer_switch='default,materialization=off'; === modified file 'sql/item_subselect.cc' --- a/sql/item_subselect.cc 2010-04-02 14:27:06 +0000 +++ b/sql/item_subselect.cc 2010-05-27 13:13:47 +0000 @@ -2110,7 +2110,7 @@ bool Item_in_subselect::setup_engine() if (!(new_engine= new subselect_hash_sj_engine(thd, this, old_engine)) || - new_engine->init_permanent(unit->get_unit_column_types())) + new_engine->init_permanent(&old_engine->join->fields_list)) { Item_subselect::trans_res trans_res; /* @@ -2126,6 +2126,15 @@ bool Item_in_subselect::setup_engine() &eq_creator); else trans_res= row_value_in_to_exists_transformer(old_engine->join); + /* + The IN=>EXISTS transformation above injects new predicates into the + WHERE and HAVING clauses. Since the subquery was already optimized, + below we force its reoptimization with the new injected conditions + by the first call to subselect_single_select_engine::exec(). + This is the only case of lazy subquery optimization in the server. + */ + DBUG_ASSERT(old_engine->join->optimized); + old_engine->join->optimized= false; res= (trans_res != Item_subselect::RES_OK); } if (new_engine) @@ -3673,6 +3682,7 @@ bitmap_init_memroot(MY_BITMAP *map, uint bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) { + select_union *result_sink; /* Options to create_tmp_table. */ ulonglong tmp_create_options= thd->options | TMP_TABLE_ALL_COLUMNS; /* | TMP_TABLE_FORCE_MYISAM; TIMOUR: force MYISAM */ @@ -3706,15 +3716,16 @@ bool subselect_hash_sj_engine::init_perm DBUG_RETURN(TRUE); } */ - if (!(result= new select_materialize_with_stats)) + if (!(result_sink= new select_materialize_with_stats)) DBUG_RETURN(TRUE); - if (((select_union*) result)->create_result_table( - thd, tmp_columns, TRUE, tmp_create_options, - "materialized subselect", TRUE)) + if (result_sink->create_result_table(thd, tmp_columns, TRUE, + tmp_create_options, + "materialized subselect", TRUE)) DBUG_RETURN(TRUE); - tmp_table= ((select_union*) result)->table; + tmp_table= result_sink->table; + result= result_sink; /* If the subquery has blobs, or the total key lenght is bigger than @@ -3882,7 +3893,6 @@ subselect_hash_sj_engine::make_unique_en cur_ref_buff + test(maybe_null), we could use that information instead. */ - cur_ref_buff + null_count, null_count ? cur_ref_buff : 0, cur_key_part->length, tab->ref.items[i]); @@ -3908,11 +3918,6 @@ subselect_hash_sj_engine::make_unique_en bool subselect_hash_sj_engine::init_runtime() { /* - Create and optimize the JOIN that will be used to materialize - the subquery if not yet created. - */ - materialize_engine->prepare(); - /* Repeat name resolution for 'cond' since cond is not part of any clause of the query, and it is not 'fixed' during JOIN::prepare. */ @@ -3935,6 +3940,16 @@ subselect_hash_sj_engine::~subselect_has } +int subselect_hash_sj_engine::prepare() +{ + /* + Create and optimize the JOIN that will be used to materialize + the subquery if not yet created. + */ + return materialize_engine->prepare(); +} + + /** Cleanup performed after each PS execution. @@ -3996,9 +4011,8 @@ int subselect_hash_sj_engine::exec() the subquery predicate. */ thd->lex->current_select= materialize_engine->select_lex; - if ((res= materialize_join->optimize())) - goto err; /* purecov: inspected */ - DBUG_ASSERT(!is_materialized); /* We should materialize only once. */ + /* The subquery should be optimized, and materialized only once. */ + DBUG_ASSERT(materialize_join->optimized && !is_materialized); materialize_join->exec(); if ((res= test(materialize_join->error || thd->is_fatal_error))) goto err; @@ -4909,7 +4923,7 @@ bool subselect_rowid_merge_engine::parti /* If there is a non-NULL key, it must be the first key in the keys array. */ DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key)); - /* The prioryty queue for keys must be empty. */ + /* The priority queue for keys must be empty. */ DBUG_ASSERT(!pq.elements); /* All data accesses during execution are via handler::ha_rnd_pos() */ === modified file 'sql/item_subselect.h' --- a/sql/item_subselect.h 2010-03-29 14:04:35 +0000 +++ b/sql/item_subselect.h 2010-05-27 13:13:47 +0000 @@ -805,7 +805,7 @@ public: bool init_permanent(List<Item> *tmp_columns); bool init_runtime(); void cleanup(); - int prepare() { return 0; } /* Override virtual function in base class. */ + int prepare(); int exec(); virtual void print(String *str, enum_query_type query_type); uint cols() === modified file 'sql/sql_class.cc' --- a/sql/sql_class.cc 2010-04-05 21:15:15 +0000 +++ b/sql/sql_class.cc 2010-05-27 13:13:47 +0000 @@ -2948,8 +2948,15 @@ create_result_table(THD *thd_arg, List<I const char *table_alias, bool bit_fields_as_long) { DBUG_ASSERT(table == 0); + tmp_table_param.init(); tmp_table_param.field_count= column_types->elements; tmp_table_param.bit_fields_as_long= bit_fields_as_long; + /* + TIMOUR: + Setting this parameter here limits the use of this class only for + materialized subqueries. + */ + tmp_table_param.materialized_subquery= true; if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types, (ORDER*) 0, is_union_distinct, 1, @@ -3034,6 +3041,7 @@ void TMP_TABLE_PARAM::init() table_charset= 0; precomputed_group_by= 0; bit_fields_as_long= 0; + materialized_subquery= 0; DBUG_VOID_RETURN; } === modified file 'sql/sql_class.h' --- a/sql/sql_class.h 2010-04-05 21:15:15 +0000 +++ b/sql/sql_class.h 2010-05-27 13:13:47 +0000 @@ -2772,6 +2772,7 @@ public: uint convert_blob_length; CHARSET_INFO *table_charset; bool schema_table; + bool materialized_subquery; /* True if GROUP BY and its aggregate functions are already computed by a table access method (e.g. by loose index scan). In this case @@ -2790,8 +2791,8 @@ public: TMP_TABLE_PARAM() :copy_field(0), group_parts(0), group_length(0), group_null_parts(0), convert_blob_length(0), - schema_table(0), precomputed_group_by(0), force_copy_fields(0), - bit_fields_as_long(0) + schema_table(0), materialized_subquery(0), precomputed_group_by(0), + force_copy_fields(0), bit_fields_as_long(0) {} ~TMP_TABLE_PARAM() { === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2010-05-03 15:46:06 +0000 +++ b/sql/sql_select.cc 2010-05-27 13:13:47 +0000 @@ -714,6 +714,7 @@ JOIN::optimize() { ulonglong select_opts_for_readinfo; uint no_jbuf_after; + int res; DBUG_ENTER("JOIN::optimize"); // to prevent double initialization on EXPLAIN @@ -723,6 +724,10 @@ JOIN::optimize() thd_proc_info(thd, "optimizing"); + /* Optimize recursively all IN subqueries of this query. */ + if ((res= optimize_materialized_in_subqueries())) + DBUG_RETURN(res); + /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */ if (convert_join_subqueries_to_semijoins(this)) DBUG_RETURN(1); /* purecov: inspected */ @@ -848,7 +853,6 @@ JOIN::optimize() */ if (tables_list && implicit_grouping) { - int res; /* opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match to the WHERE conditions, @@ -1277,7 +1281,6 @@ JOIN::optimize() if (setup_subquery_materialization()) DBUG_RETURN(1); - int res; if ((res= rewrite_to_index_subquery_engine(this)) != -1) DBUG_RETURN(res); /* @@ -2413,8 +2416,9 @@ err: Setup for execution all subqueries of a query, for which the optimizer chose hash semi-join. - @details Iterate over all subqueries of the query, and if they are under an - IN predicate, and the optimizer chose to compute it via hash semi-join: + @details Iterate over all immediate child subqueries of the query, and if + they are under an IN predicate, and the optimizer chose to compute it via + hash semi-join: - try to initialize all data structures needed for the materialized execution of the IN predicate, - if this fails, then perform the IN=>EXISTS transformation which was @@ -2454,6 +2458,51 @@ bool JOIN::setup_subquery_materializatio } +/** + Optimize all immediate children IN subqueries of this join. + + @note + This method must be called in the very beginning of JOIN::optimize(). + As a result all children subqueries are optimized recursively before + their parent. +*/ + +int +JOIN::optimize_materialized_in_subqueries() +{ + int res; + for (SELECT_LEX_UNIT *un= select_lex->first_inner_unit(); un; + un= un->next_unit()) + { + for (SELECT_LEX *sl= un->first_select(); sl; sl= sl->next_select()) + { + Item_subselect *subquery_predicate= sl->master_unit()->item; + if (subquery_predicate && + subquery_predicate->substype() == Item_subselect::IN_SUBS && + ((Item_in_subselect*) subquery_predicate)->exec_method == + Item_in_subselect::MATERIALIZATION + // @todo TIMOUR: + // Think also how to pre-optimize for IN_TO_EXISTS because we still + // call the optimizer in subselect_single_select_engine::exec() + ) + { + JOIN *subquery_join= sl->join; + if (subquery_join) + { + SELECT_LEX *save_select= thd->lex->current_select; + thd->lex->current_select= subquery_predicate->get_select_lex(); + res= subquery_join->optimize(); + thd->lex->current_select= save_select; + if (res) + return res; + } + } + } + } + return 0; +} + + /***************************************************************************** Create JOIN_TABS, make a guess about the table types, Approximate how many records will be used in each table @@ -11142,7 +11191,27 @@ create_tmp_table(THD *thd,TMP_TABLE_PARA continue; // Some kindf of const item } if (type == Item::SUM_FUNC_ITEM) - ((Item_sum *) item)->result_field= new_field; + { + Item_sum *agg_item= (Item_sum *) item; + /* + Update the result field only if it has never been set, or if the + created temporary table is not to be used for subquery + materialization. + + The reason is that for subqueries that require materialization as part + of their plan, we create the 'external' temporary table needed for IN + execution, after the 'internal' temporary table needed for grouping. + Since both the external and the internal temporary tables are created + for the same list of SELECT fields of the subquery, setting + 'result_field' for each temp table creation overrides the previous + value of result field. + + The condition below prevents the creation of the external temp table + to override the 'result_field' that was set for the internal temp table. + */ + if (!agg_item->result_field || !param->materialized_subquery) + agg_item->result_field= new_field; + } tmp_from_field++; reclength+=new_field->pack_length(); if (!(new_field->flags & NOT_NULL_FLAG)) @@ -18881,6 +18950,8 @@ bool JOIN::change_result(select_result * { DBUG_ENTER("JOIN::change_result"); result= res; + if (tmp_join) + tmp_join->result= res; if (!procedure && (result->prepare(fields_list, select_lex->master_unit()) || result->prepare2())) { === modified file 'sql/sql_select.h' --- a/sql/sql_select.h 2010-03-20 12:01:47 +0000 +++ b/sql/sql_select.h 2010-05-27 13:13:47 +0000 @@ -1717,6 +1717,7 @@ private: */ bool implicit_grouping; bool make_simple_join(JOIN *join, TABLE *tmp_table); + int optimize_materialized_in_subqueries(); };
participants (1)
-
timour@askmonty.org