#At file:///home/tsk/mprog/src/5.3-mwl68/ based on revid:timour@askmonty.org-20100222151655-ltjv0rlv6z2sdiiu 2765 timour@askmonty.org 2010-03-09 MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs * Implemented a second partial matching strategy via table scan. This strategy is a fallback when there is no memory for rowid merging. * Refactored the selection and creation of partial matching strategies, so that the choice of strategy is encapsulated in a separate method choose_partial_match_strategy(). * Refactored the representation of partial match strategies so that: - each strategy is represented by a polymorphic class, and - the base class for all partial match strategies contains common execution code. * Added an estimate of the memory needed for the rowid merge strategy, and the system variable "rowid_merge_buff_size" to control the maximum memory to be used by the rowid merge algorithm. * Added two optimizer_switch system variables to control the choice of partial match strategy: "partial_match_rowid_merge", "partial_match_table_scan". * Fixed multiple problems with deallocation of resources by the partial match strategies. @ sql/mysql_priv.h * Added two optimizer_switch system variables to control the choice of partial match strategy: "partial_match_rowid_merge", "partial_match_table_scan". @ sql/mysqld.cc * Added two optimizer_switch system variables to control the choice of partial match strategy: "partial_match_rowid_merge", "partial_match_table_scan". * Added a system variable "rowid_merge_buff_size" to control the maximum memory to be used by the rowid merge algorithm. @ sql/set_var.cc * Added a system variable "rowid_merge_buff_size" to control the maximum memory to be used by the rowid merge algorithm. @ sql/sql_class.h * Added a system variable "rowid_merge_buff_size" to control the maximum memory to be used by the rowid merge algorithm. @ support-files/build-tags Newer versions of BZR require the recursive flag in order to list all files. modified: sql/item_subselect.cc sql/item_subselect.h sql/mysql_priv.h sql/mysqld.cc sql/set_var.cc sql/sql_class.h support-files/build-tags === modified file 'sql/item_subselect.cc' --- a/sql/item_subselect.cc 2010-02-22 15:16:55 +0000 +++ b/sql/item_subselect.cc 2010-03-09 10:14:06 +0000 @@ -2910,13 +2910,7 @@ int subselect_uniquesubquery_engine::exe /* - TIMOUR: this needs more thinking, as exec() is a wrong IMO because: - - we don't need empty_result_set, as it is == 1 <=> when - item->value == 0 - - scan_table() returns >0 even when there was no actuall error, - but we only found EOF while scanning. - - scan_table should not check table->status, but it should check - HA_ERR_END_OF_FILE + TIMOUR: write comment */ int subselect_uniquesubquery_engine::index_lookup() @@ -2924,8 +2918,6 @@ int subselect_uniquesubquery_engine::ind DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup"); int error; TABLE *table= tab->table; - empty_result_set= TRUE; - table->status= 0; if (!table->file->inited) table->file->ha_index_init(tab->ref.key, 0); @@ -2934,25 +2926,25 @@ int subselect_uniquesubquery_engine::ind make_prev_keypart_map(tab-> ref.key_parts), HA_READ_KEY_EXACT); - DBUG_PRINT("info", ("lookup result: %i", error)); - if (error && - error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) - error= report_error(table, error); - else + + if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) { - error= 0; - table->null_row= 0; - if (!table->status && (!cond || cond->val_int())) - { - ((Item_in_subselect *) item)->value= 1; - empty_result_set= FALSE; - } - else - ((Item_in_subselect *) item)->value= 0; + /* + TIMOUR: I don't understand at all when do we need to call report_error. + In most places where we access an index, we don't do this. Why here? + */ + error= report_error(table, error); + DBUG_RETURN(error); } - DBUG_RETURN(error); + table->null_row= 0; + if (!error && (!cond || cond->val_int())) + ((Item_in_subselect *) item)->value= 1; + else + ((Item_in_subselect *) item)->value= 0; + + DBUG_RETURN(0); } @@ -3415,19 +3407,24 @@ bool subselect_uniquesubquery_engine::no If max_keys > 1, then we need partial matching because there are more indexes than just the one we use during materialization to remove duplicates. + + @note + TIMOUR: The schema-based analysis for partial matching can be done once for + prepared statement and remembered. It is done here to remove the need to + save/restore all related variables between each re-execution, thus making + the code simpler. + + @retval PARTIAL_MATCH if a partial match should be used + @retval COMPLETE_MATCH if a complete match (index lookup) should be used */ -void subselect_hash_sj_engine::set_strategy_using_schema() +subselect_hash_sj_engine::exec_strategy +subselect_hash_sj_engine::get_strategy_using_schema() { Item_in_subselect *item_in= (Item_in_subselect *) item; - DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_schema"); - if (item_in->is_top_level_item()) - { - strategy= COMPLETE_MATCH; - DBUG_VOID_RETURN; - } + return COMPLETE_MATCH; else { List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types()); @@ -3450,10 +3447,8 @@ void subselect_hash_sj_engine::set_strat /* If no column contains NULLs use regular hash index lookups. */ if (count_partial_match_columns) - strategy= PARTIAL_MATCH; - else - strategy= COMPLETE_MATCH; - DBUG_VOID_RETURN; + return PARTIAL_MATCH; + return COMPLETE_MATCH; } @@ -3465,19 +3460,25 @@ void subselect_hash_sj_engine::set_strat matching type of columns that cannot be NULL or that contain only NULLs. Based on this, the procedure determines the final execution strategy for the [NOT] IN predicate. + + @retval PARTIAL_MATCH if a partial match should be used + @retval COMPLETE_MATCH if a complete match (index lookup) should be used */ -void subselect_hash_sj_engine::set_strategy_using_data() +subselect_hash_sj_engine::exec_strategy +subselect_hash_sj_engine::get_strategy_using_data() { Item_in_subselect *item_in= (Item_in_subselect *) item; select_materialize_with_stats *result_sink= (select_materialize_with_stats *) result; Item *outer_col; - DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_data"); - - /* Call this procedure only if already selected partial matching. */ - DBUG_ASSERT(strategy == PARTIAL_MATCH); + /* + If we already determined that a complete match is enough based on schema + information, nothing can be better. + */ + if (strategy == COMPLETE_MATCH) + return COMPLETE_MATCH; for (uint i= 0; i < item_in->left_expr->cols(); i++) { @@ -3501,9 +3502,117 @@ void subselect_hash_sj_engine::set_strat /* If no column contains NULLs use regular hash index lookups. */ if (!count_partial_match_columns) - strategy= COMPLETE_MATCH; + return COMPLETE_MATCH; + return PARTIAL_MATCH; +} + + +void +subselect_hash_sj_engine::choose_partial_match_strategy( + bool has_non_null_key, bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts) +{ + size_t pm_buff_size; - DBUG_VOID_RETURN; + DBUG_ASSERT(strategy == PARTIAL_MATCH); + /* + Choose according to global optimizer switch. If only one of the switches is + 'ON', then the remaining strategy is the only possible one. The only cases + when this will be overriden is when the total size of all buffers for the + merge strategy is bigger than the 'rowid_merge_buff_size' system variable, + or if there isn't enough physical memory to allocate the buffers. + */ + if (!optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) && + optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) + strategy= PARTIAL_MATCH_SCAN; + else if + ( optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) && + !optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) + strategy= PARTIAL_MATCH_MERGE; + + /* + If both switches are ON, or both are OFF, we interpret that as "let the + optimizer decide". Perform a cost based choice between the two partial + matching strategies. + */ + /* + TIMOUR: the above interpretation of the switch values could be changed to: + - if both are ON - let the optimizer decide, + - if both are OFF - do not use partial matching, therefore do not use + materialization in non-top-level predicates. + The problem with this is that we know for sure if we need partial matching + only after the subquery is materialized, and this is too late to revert to + the IN=>EXISTS strategy. + */ + if (strategy == PARTIAL_MATCH) + { + /* + TIMOUR: Currently we use a super simplistic measure. This will be + addressed in a separate task. + */ + if (tmp_table->file->stats.records < 100) + strategy= PARTIAL_MATCH_SCAN; + else + strategy= PARTIAL_MATCH_MERGE; + } + + /* Check if there is enough memory for the rowid merge strategy. */ + if (strategy == PARTIAL_MATCH_MERGE) + { + pm_buff_size= rowid_merge_buff_size(has_non_null_key, + has_covering_null_row, + partial_match_key_parts); + if (pm_buff_size > thd->variables.rowid_merge_buff_size) + strategy= PARTIAL_MATCH_SCAN; + } +} + + +/* + Compute the memory size of all buffers proportional to the number of rows + in tmp_table. + + @details + If the result is bigger than thd->variables.rowid_merge_buff_size, partial + matching via merging is not applicable. +*/ + +size_t subselect_hash_sj_engine::rowid_merge_buff_size( + bool has_non_null_key, bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts) +{ + size_t buff_size; /* Total size of all buffers used by partial matching. */ + ha_rows row_count= tmp_table->file->stats.records; + uint rowid_length= tmp_table->file->ref_length; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + + /* Size of the subselect_rowid_merge_engine::row_num_to_rowid buffer. */ + buff_size= row_count * rowid_length * sizeof(uchar); + + if (has_non_null_key) + { + /* Add the size of Ordered_key::key_buff of the only non-NULL key. */ + buff_size+= row_count * sizeof(rownum_t); + } + + if (!has_covering_null_row) + { + for (uint i= 0; i < partial_match_key_parts->n_bits; i++) + { + if (!bitmap_is_set(partial_match_key_parts, i) || + result_sink->get_null_count_of_col(i) == row_count) + continue; /* In these cases we wouldn't construct Ordered keys. */ + + /* Add the size of Ordered_key::key_buff */ + buff_size+= (row_count - result_sink->get_null_count_of_col(i)) * + sizeof(rownum_t); + /* Add the size of Ordered_key::null_key */ + buff_size+= bitmap_buffer_size(result_sink->get_max_null_of_col(i)); + } + } + + return buff_size; } @@ -3561,7 +3670,6 @@ bool subselect_hash_sj_engine::init_perm thd->mem_root)) DBUG_RETURN(TRUE); - set_strategy_using_schema(); /* Create and initialize a select result interceptor that stores the result stream in a temporary table. The temporary table itself is @@ -3623,7 +3731,9 @@ bool subselect_hash_sj_engine::init_perm ((Item_in_subselect *) item)->left_expr->cols() == tmp_table->key_info->key_parts); - if (make_semi_join_conds()) + if (make_semi_join_conds() || + /* A unique_engine is used both for complete and partial matching. */ + !(lookup_engine= make_unique_engine())) DBUG_RETURN(TRUE); DBUG_RETURN(FALSE); @@ -3691,7 +3801,7 @@ bool subselect_hash_sj_engine::make_semi DBUG_RETURN(TRUE); } } - if (semi_join_conds->fix_fields(thd, &semi_join_conds)) + if (semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds)) DBUG_RETURN(TRUE); DBUG_RETURN(FALSE); @@ -3791,7 +3901,7 @@ bool subselect_hash_sj_engine::init_runt clause of the query, and it is not 'fixed' during JOIN::prepare. */ if (semi_join_conds && !semi_join_conds->fixed && - semi_join_conds->fix_fields(thd, &semi_join_conds)) + semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds)) return TRUE; /* Let our engine reuse this query plan for materialization. */ materialize_join= materialize_engine->join; @@ -3802,6 +3912,7 @@ bool subselect_hash_sj_engine::init_runt subselect_hash_sj_engine::~subselect_hash_sj_engine() { + delete lookup_engine; delete result; if (tmp_table) free_tmp_table(thd, tmp_table); @@ -3817,9 +3928,30 @@ subselect_hash_sj_engine::~subselect_has void subselect_hash_sj_engine::cleanup() { + enum_engine_type lookup_engine_type= lookup_engine->engine_type(); is_materialized= FALSE; - result->cleanup(); /* Resets the temp table as well. */ + bitmap_clear_all(&non_null_key_parts); + bitmap_clear_all(&partial_match_key_parts); + count_partial_match_columns= 0; + count_null_only_columns= 0; + strategy= UNDEFINED; materialize_engine->cleanup(); + if (lookup_engine_type == TABLE_SCAN_ENGINE || + lookup_engine_type == ROWID_MERGE_ENGINE) + { + subselect_engine *inner_lookup_engine; + inner_lookup_engine= + ((subselect_partial_match_engine*) lookup_engine)->lookup_engine; + /* + Partial match engines are recreated for each PS execution inside + subselect_hash_sj_engine::exec(). + */ + delete lookup_engine; + lookup_engine= inner_lookup_engine; + } + DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE); + lookup_engine->cleanup(); + result->cleanup(); /* Resets the temp table as well. */ } @@ -3838,6 +3970,7 @@ int subselect_hash_sj_engine::exec() { Item_in_subselect *item_in= (Item_in_subselect *) item; SELECT_LEX *save_select= thd->lex->current_select; + subselect_partial_match_engine *pm_engine= NULL; int res= 0; DBUG_ENTER("subselect_hash_sj_engine::exec"); @@ -3881,59 +4014,86 @@ int subselect_hash_sj_engine::exec() DBUG_RETURN(FALSE); } - if (strategy == PARTIAL_MATCH) - set_strategy_using_data(); - - /* A unique_engine is used both for complete and partial matching. */ - if (!(lookup_engine= make_unique_engine())) - { - res= 1; - goto err; - } - + /* + TIMOUR: The schema-based analysis for partial matching can be done once for + prepared statement and remembered. It is done here to remove the need to + save/restore all related variables between each re-execution, thus making + the code simpler. + */ + strategy= get_strategy_using_schema(); + /* This call may discover that we don't need partial matching at all. */ + strategy= get_strategy_using_data(); if (strategy == PARTIAL_MATCH) { - subselect_rowid_merge_engine *rowid_merge_engine; - uint count_pm_keys; - MY_BITMAP *nn_key_parts; - bool has_covering_null_row; + uint count_pm_keys; /* Total number of keys needed for partial matching. */ + MY_BITMAP *nn_key_parts; /* The key parts of the only non-NULL index. */ + uint covering_null_row_width; select_materialize_with_stats *result_sink= (select_materialize_with_stats *) result; - /* Total number of keys needed for partial matching. */ nn_key_parts= (count_partial_match_columns < tmp_table->s->fields) ? &non_null_key_parts : NULL; - has_covering_null_row= (result_sink->get_max_nulls_in_row() == - tmp_table->s->fields - - (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0)); + if (result_sink->get_max_nulls_in_row() == + tmp_table->s->fields - + (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0)) + covering_null_row_width= result_sink->get_max_nulls_in_row(); + else + covering_null_row_width= 0; - if (has_covering_null_row) + if (covering_null_row_width) count_pm_keys= nn_key_parts ? 1 : 0; else count_pm_keys= count_partial_match_columns - count_null_only_columns + (nn_key_parts ? 1 : 0); - if (!(rowid_merge_engine= - new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*) - lookup_engine, - tmp_table, - count_pm_keys, - has_covering_null_row, - item, result)) || - rowid_merge_engine->init(nn_key_parts, &partial_match_key_parts)) - { - strategy= PARTIAL_MATCH_SCAN; - delete rowid_merge_engine; - /* TIMOUR: setup execution structures for partial match via scanning. */ + choose_partial_match_strategy(test(nn_key_parts), + test(covering_null_row_width), + &partial_match_key_parts); + DBUG_ASSERT(strategy == PARTIAL_MATCH_MERGE || + strategy == PARTIAL_MATCH_SCAN); + if (strategy == PARTIAL_MATCH_MERGE) + { + pm_engine= + new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*) + lookup_engine, tmp_table, + count_pm_keys, + covering_null_row_width, + item, result, + semi_join_conds->argument_list()); + if (!pm_engine || + ((subselect_rowid_merge_engine*) pm_engine)-> + init(nn_key_parts, &partial_match_key_parts)) + { + /* + The call to init() would fail if there was not enough memory to allocate + all buffers for the rowid merge strategy. In this case revert to table + scanning which doesn't need any big buffers. + */ + delete pm_engine; + pm_engine= NULL; + strategy= PARTIAL_MATCH_SCAN; + } } - else + + if (strategy == PARTIAL_MATCH_SCAN) { - strategy= PARTIAL_MATCH_INDEX; - lookup_engine= rowid_merge_engine; + if (!(pm_engine= + new subselect_table_scan_engine((subselect_uniquesubquery_engine*) + lookup_engine, tmp_table, + item, result, + semi_join_conds->argument_list(), + covering_null_row_width))) + { + /* This is an irrecoverable error. */ + res= 1; + goto err; + } } } + if (pm_engine) + lookup_engine= pm_engine; item_in->change_engine(lookup_engine); err: @@ -4009,10 +4169,8 @@ Ordered_key::Ordered_key(uint keyid_arg, Ordered_key::~Ordered_key() { - /* - All data structures are allocated on thd->mem_root, thus we don't - free them here. - */ + my_free((char*) key_buff, MYF(0)); + bitmap_free(&null_key); } @@ -4030,6 +4188,7 @@ void Ordered_key::cleanup() */ } + /* Initialize a multi-column index. */ @@ -4103,14 +4262,16 @@ bool Ordered_key::init(int col_idx) } +/* + Allocate the buffers for both the row number, and the NULL-bitmap indexes. +*/ + bool Ordered_key::alloc_keys_buffers() { - THD *thd= tbl->in_use; - DBUG_ASSERT(key_buff_elements > 0); - if (!(key_buff= (rownum_t*) thd->alloc(key_buff_elements * - sizeof(rownum_t)))) + if (!(key_buff= (rownum_t*) my_malloc(key_buff_elements * sizeof(rownum_t), + MYF(MY_WME)))) return TRUE; /* @@ -4118,10 +4279,8 @@ bool Ordered_key::alloc_keys_buffers() (max_null_row - min_null_row), and then use min_null_row as lookup offset. */ - if (bitmap_init_memroot(&null_key, - /* this is max array index, we need count, so +1. */ - max_null_row + 1, - thd->mem_root)) + /* Notice that max_null_row is max array index, we need count, so +1. */ + if (bitmap_init(&null_key, NULL, max_null_row + 1, FALSE)) return TRUE; cur_key_idx= HA_POS_ERROR; @@ -4193,8 +4352,9 @@ void Ordered_key::sort_keys() /* - The probability that a certain row does not contain a NULL in some row in - a NULL-indexed column. + The fraction of rows that do not contain NULL in the columns indexed by + this key. + @retval 1 if there are no NULLs @retval 0 if only NULLs */ @@ -4353,10 +4513,122 @@ void Ordered_key::print(String *str) } +subselect_partial_match_engine::subselect_partial_match_engine( + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg, + uint covering_null_row_width_arg) + :subselect_engine(item_arg, result_arg), + tmp_table(tmp_table_arg), lookup_engine(engine_arg), + equi_join_conds(equi_join_conds_arg), + covering_null_row_width(covering_null_row_width_arg) +{} + + +int subselect_partial_match_engine::exec() +{ + Item_in_subselect *item_in= (Item_in_subselect *) item; + int res; + + /* Try to find a matching row by index lookup. */ + res= lookup_engine->copy_ref_key_simple(); + if (res == -1) + { + /* The result is FALSE based on the outer reference. */ + item_in->value= 0; + item_in->null_value= 0; + return 0; + } + else if (res == 0) + { + /* Search for a complete match. */ + if ((res= lookup_engine->index_lookup())) + { + /* An error occured during lookup(). */ + item_in->value= 0; + item_in->null_value= 0; + return res; + } + else if (item_in->value) + { + /* + A complete match was found, the result of IN is TRUE. + Notice: (this->item == lookup_engine->item) + */ + return 0; + } + } + + if (covering_null_row_width == tmp_table->s->fields) + { + /* + If there is a NULL-only row that coveres all columns the result of IN + is UNKNOWN. + */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 1; + item_in->null_value= 1; + return 0; + } + + /* + There is no complete match. Look for a partial match (UNKNOWN result), or + no match (FALSE). + */ + if (tmp_table->file->inited) + tmp_table->file->ha_index_end(); + + if (partial_match()) + { + /* The result of IN is UNKNOWN. */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 1; + item_in->null_value= 1; + } + else + { + /* The result of IN is FALSE. */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 0; + item_in->null_value= 0; + } + + return 0; +} + + +void subselect_partial_match_engine::print(String *str, + enum_query_type query_type) +{ + /* + Should never be called as the actual engine cannot be known at query + optimization time. + */ + DBUG_ASSERT(FALSE); +} + + /* @param non_null_key_parts @param partial_match_key_parts A union of all single-column NULL key parts. @param count_partial_match_columns Number of NULL keyparts (set bits above). + + @retval FALSE the engine was initialized successfully + @retval TRUE there was some (memory allocation) error during initialization, + such errors should be interpreted as revert to other strategy */ bool @@ -4379,14 +4651,17 @@ subselect_rowid_merge_engine::init(MY_BI return FALSE; } - DBUG_ASSERT(!has_covering_null_row || (has_covering_null_row && - keys_count == 1 && - non_null_key_parts)); - + DBUG_ASSERT(!covering_null_row_width || (covering_null_row_width && + keys_count == 1 && + non_null_key_parts)); + /* + Allocate buffers to hold the merged keys and the mapping between rowids and + row numbers. + */ if (!(merge_keys= (Ordered_key**) thd->alloc(keys_count * sizeof(Ordered_key*))) || - !(row_num_to_rowid= (uchar*) thd->alloc(row_count * rowid_length * - sizeof(uchar)))) + !(row_num_to_rowid= (uchar*) my_malloc(row_count * rowid_length * + sizeof(uchar), MYF(MY_WME)))) return TRUE; /* Create the only non-NULL key if there is any. */ @@ -4395,10 +4670,7 @@ subselect_rowid_merge_engine::init(MY_BI non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr, 0, 0, 0, row_num_to_rowid); if (non_null_key->init(non_null_key_parts)) - { - // TIMOUR: revert to partial matching via scanning return TRUE; - } merge_keys[cur_keyid]= non_null_key; merge_keys[cur_keyid]->first(); ++cur_keyid; @@ -4406,9 +4678,10 @@ subselect_rowid_merge_engine::init(MY_BI /* If there is a covering NULL row, the only key that is needed is the - only non-NULL key that is already created above. + only non-NULL key that is already created above. We create keys on + NULL-able columns only if there is no covering NULL row. */ - if (!has_covering_null_row) + if (!covering_null_row_width) { if (bitmap_init_memroot(&matching_keys, keys_count, thd->mem_root) || bitmap_init_memroot(&matching_outer_cols, keys_count, thd->mem_root) || @@ -4436,10 +4709,7 @@ subselect_rowid_merge_engine::init(MY_BI result_sink->get_max_null_of_col(i), row_num_to_rowid); if (merge_keys[cur_keyid]->init(i)) - { - // TIMOUR: revert to partial matching via scanning return TRUE; - } merge_keys[cur_keyid]->first(); } ++cur_keyid; @@ -4510,10 +4780,7 @@ subselect_rowid_merge_engine::init(MY_BI if (init_queue(&pq, keys_count, 0, FALSE, subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL)) - { - // TIMOUR: revert to partial matching via scanning return TRUE; - } return FALSE; } @@ -4521,26 +4788,21 @@ subselect_rowid_merge_engine::init(MY_BI subselect_rowid_merge_engine::~subselect_rowid_merge_engine() { - delete_queue(&pq); + /* None of the resources below is allocated if there are no ordered keys. */ + if (keys_count) + { + my_free((char*) row_num_to_rowid, MYF(0)); + for (uint i= 0; i < keys_count; i++) + delete merge_keys[i]; + delete_queue(&pq); + if (tmp_table->file->inited == handler::RND) + tmp_table->file->ha_rnd_end(); + } } void subselect_rowid_merge_engine::cleanup() { - lookup_engine->cleanup(); - /* Tell handler we don't need the index anymore */ - if (tmp_table->file->inited) - tmp_table->file->ha_rnd_end(); - queue_remove_all(&pq); -} - - -void subselect_rowid_merge_engine::print(String *str, enum_query_type query_type) -{ - str->append(STRING_WITH_LEN("<rowid_merge>(")); - for (uint i= 0; i < keys_count; i++) - merge_keys[i]->print(str); - str->append(')'); } @@ -4627,20 +4889,31 @@ bool subselect_rowid_merge_engine::parti Ordered_key *cur_key; rownum_t cur_row_num; uint count_nulls_in_search_key= 0; + bool res= FALSE; /* If there is a non-NULL key, it must be the first key in the keys array. */ DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key)); + + /* All data accesses during execution are via handler::ha_rnd_pos() */ + tmp_table->file->ha_rnd_init(0); + /* Check if there is a match for the columns of the only non-NULL key. */ if (non_null_key && !non_null_key->lookup()) - return FALSE; + { + res= FALSE; + goto end; + } /* If there is a NULL (sub)row that covers all NULL-able columns, then there is a guranteed partial match, and we don't need to search for the matching row. */ - if (has_covering_null_row) - return TRUE; + if (covering_null_row_width) + { + res= TRUE; + goto end; + } if (non_null_key) queue_insert(&pq, (uchar *) non_null_key); @@ -4667,14 +4940,20 @@ bool subselect_rowid_merge_engine::parti if (count_nulls_in_search_key == ((Item_in_subselect *) item)->left_expr->cols() - (non_null_key ? non_null_key->get_column_count() : 0)) - return TRUE; + { + res= TRUE; + goto end; + } /* If there is no NULL (sub)row that covers all NULL columns, and there is no single match for any of the NULL columns, the result is FALSE. */ if (pq.elements - test(non_null_key) == 0) - return FALSE; + { + res= FALSE; + goto end; + } DBUG_ASSERT(pq.elements); @@ -4692,10 +4971,8 @@ bool subselect_rowid_merge_engine::parti Check the only matching row of the only key min_key for NULL matches in the other columns. */ - if (test_null_row(min_row_num)) - return TRUE; - else - return FALSE; + res= test_null_row(min_row_num); + goto end; } while (TRUE) @@ -4710,7 +4987,10 @@ bool subselect_rowid_merge_engine::parti /* Follows from the correct use of priority queue. */ DBUG_ASSERT(cur_row_num > min_row_num); if (test_null_row(min_row_num)) - return TRUE; + { + res= TRUE; + goto end; + } else { min_key= cur_key; @@ -4727,99 +5007,112 @@ bool subselect_rowid_merge_engine::parti if (pq.elements == 0) { /* Check the last row of the last column in PQ for NULL matches. */ - if (test_null_row(min_row_num)) - return TRUE; - else - return FALSE; + res= test_null_row(min_row_num); + goto end; } } - /* We should never get here. */ + /* We should never get here - all branches must be handled explicitly above. */ DBUG_ASSERT(FALSE); - return FALSE; + +end: + tmp_table->file->ha_rnd_end(); + return res; } -int subselect_rowid_merge_engine::exec() -{ - Item_in_subselect *item_in= (Item_in_subselect *) item; - int res; +subselect_table_scan_engine::subselect_table_scan_engine( + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, + Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg, + uint covering_null_row_width_arg) + :subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg, + result_arg, equi_join_conds_arg, + covering_null_row_width_arg) +{} - /* Try to find a matching row by index lookup. */ - res= lookup_engine->copy_ref_key_simple(); - if (res == -1) - { - /* The result is FALSE based on the outer reference. */ - item_in->value= 0; - item_in->null_value= 0; - return 0; - } - else if (res == 0) + +/* + TIMOUR: + This method is based on subselect_uniquesubquery_engine::scan_table(). + Consider refactoring somehow, 80% of the code is the same. + + for each row_i in tmp_table { - if ((res= lookup_engine->index_lookup())) - { - /* An error occured during lookup(). */ - item_in->value= 0; - item_in->null_value= 0; - return res; - } - else if (item_in->value) + count_matches= 0; + for each row element row_i[j] { - /* - A complete match was found, the result of IN is TRUE. - Notice: (this->item == lookup_engine->item) - */ - return 0; + if (outer_ref[j] is NULL || row_i[j] is NULL || outer_ref[j] == row_i[j]) + ++count_matches; } + if (count_matches == outer_ref.elements) + return TRUE } + return FALSE +*/ - if (has_covering_null_row && !keys_count) - { - /* - If there is a NULL-only row that coveres all columns the result of IN - is UNKNOWN. - */ - item_in->value= 0; - /* - TIMOUR: which one is the right way to propagate an UNKNOWN result? - Should we also set empty_result_set= FALSE; ??? - */ - //item_in->was_null= 1; - item_in->null_value= 1; - return 0; - } +bool subselect_table_scan_engine::partial_match() +{ + List_iterator_fast<Item> equality_it(*equi_join_conds); + Item *cur_eq; + uint count_matches; + int error; + bool res; - /* All data accesses during execution are via handler::ha_rnd_pos() */ - if (tmp_table->file->inited) - tmp_table->file->ha_index_end(); - tmp_table->file->ha_rnd_init(0); + tmp_table->file->ha_rnd_init(1); + tmp_table->file->extra_opt(HA_EXTRA_CACHE, + current_thd->variables.read_buff_size); /* - There is no complete match. Look for a partial match (UNKNOWN result), or - no match (FALSE). + TIMOUR: + scan_table() also calls "table->null_row= 0;", why, do we need it? */ - if (partial_match()) - { - /* The result of IN is UNKNOWN. */ - item_in->value= 0; - /* - TIMOUR: which one is the right way to propagate an UNKNOWN result? - Should we also set empty_result_set= FALSE; ??? - */ - //item_in->was_null= 1; - item_in->null_value= 1; - } - else + for (;;) { - /* The result of IN is FALSE. */ - item_in->value= 0; - /* - TIMOUR: which one is the right way to propagate an UNKNOWN result? - Should we also set empty_result_set= FALSE; ??? - */ - //item_in->was_null= 0; - item_in->null_value= 0; + error= tmp_table->file->ha_rnd_next(tmp_table->record[0]); + if (error) { + if (error == HA_ERR_RECORD_DELETED) + { + error= 0; + continue; + } + if (error == HA_ERR_END_OF_FILE) + { + error= 0; + break; + } + else + { + error= report_error(tmp_table, error); + break; + } + } + + equality_it.rewind(); + count_matches= 0; + while ((cur_eq= equality_it++)) + { + DBUG_ASSERT(cur_eq->type() == Item::FUNC_ITEM && + ((Item_func*)cur_eq)->functype() == Item_func::EQ_FUNC); + if (!cur_eq->val_int() && !cur_eq->null_value) + break; + ++count_matches; + } + if (count_matches == tmp_table->s->fields) + { + res= TRUE; /* Found a matching row. */ + goto end; + } } + + res= FALSE; +end: tmp_table->file->ha_rnd_end(); + return res; +} - return 0; + +void subselect_table_scan_engine::cleanup() +{ } === modified file 'sql/item_subselect.h' --- a/sql/item_subselect.h 2010-02-22 15:16:55 +0000 +++ b/sql/item_subselect.h 2010-03-09 10:14:06 +0000 @@ -436,7 +436,7 @@ public: friend class Item_in_optimizer; friend class subselect_indexsubquery_engine; friend class subselect_hash_sj_engine; - friend class subselect_rowid_merge_engine; + friend class subselect_partial_match_engine; }; @@ -472,7 +472,7 @@ public: enum enum_engine_type {ABSTRACT_ENGINE, SINGLE_SELECT_ENGINE, UNION_ENGINE, UNIQUESUBQUERY_ENGINE, INDEXSUBQUERY_ENGINE, HASH_SJ_ENGINE, - ROR_INTERSECT_ENGINE}; + ROWID_MERGE_ENGINE, TABLE_SCAN_ENGINE}; subselect_engine(Item_subselect *si, select_result_interceptor *res) :thd(0) @@ -716,6 +716,109 @@ inline bool Item_subselect::is_uncacheab } +/** + Compute an IN predicate via a hash semi-join. This class is responsible for + the materialization of the subquery, and the selection of the correct and + optimal execution method (e.g. direct index lookup, or partial matching) for + the IN predicate. +*/ + +class subselect_hash_sj_engine : public subselect_engine +{ +protected: + /* The table into which the subquery is materialized. */ + TABLE *tmp_table; + /* TRUE if the subquery was materialized into a temp table. */ + bool is_materialized; + /* + The old engine already chosen at parse time and stored in permanent memory. + Through this member we can re-create and re-prepare materialize_join for + each execution of a prepared statement. We also reuse the functionality + of subselect_single_select_engine::[prepare | cols]. + */ + subselect_single_select_engine *materialize_engine; + /* The engine used to compute the IN predicate. */ + subselect_engine *lookup_engine; + /* + QEP to execute the subquery and materialize its result into a + temporary table. Created during the first call to exec(). + */ + JOIN *materialize_join; + + /* Keyparts of the only non-NULL composite index in a rowid merge. */ + MY_BITMAP non_null_key_parts; + /* Keyparts of the single column indexes with NULL, one keypart per index. */ + MY_BITMAP partial_match_key_parts; + uint count_partial_match_columns; + uint count_null_only_columns; + /* + A conjunction of all the equality condtions between all pairs of expressions + that are arguments of an IN predicate. We need these to post-filter some + IN results because index lookups sometimes match values that are actually + not equal to the search key in SQL terms. + */ + Item_cond_and *semi_join_conds; + /* Possible execution strategies that can be used to compute hash semi-join.*/ + enum exec_strategy { + UNDEFINED, + COMPLETE_MATCH, /* Use regular index lookups. */ + PARTIAL_MATCH, /* Use some partial matching strategy. */ + PARTIAL_MATCH_MERGE, /* Use partial matching through index merging. */ + PARTIAL_MATCH_SCAN, /* Use partial matching through table scan. */ + IMPOSSIBLE /* Subquery materialization is not applicable. */ + }; + /* The chosen execution strategy. Computed after materialization. */ + exec_strategy strategy; +protected: + exec_strategy get_strategy_using_schema(); + exec_strategy get_strategy_using_data(); + size_t rowid_merge_buff_size(bool has_non_null_key, + bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts); + void choose_partial_match_strategy(bool has_non_null_key, + bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts); + bool make_semi_join_conds(); + subselect_uniquesubquery_engine* make_unique_engine(); + +public: + subselect_hash_sj_engine(THD *thd, Item_subselect *in_predicate, + subselect_single_select_engine *old_engine) + :subselect_engine(in_predicate, NULL), tmp_table(NULL), + is_materialized(FALSE), materialize_engine(old_engine), lookup_engine(NULL), + materialize_join(NULL), count_partial_match_columns(0), + count_null_only_columns(0), semi_join_conds(NULL), strategy(UNDEFINED) + { + set_thd(thd); + } + ~subselect_hash_sj_engine(); + + bool init_permanent(List<Item> *tmp_columns); + bool init_runtime(); + void cleanup(); + int prepare() { return 0; } /* Override virtual function in base class. */ + int exec(); + virtual void print(String *str, enum_query_type query_type); + uint cols() + { + return materialize_engine->cols(); + } + uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; } + table_map upper_select_const_tables() { return 0; } + bool no_rows() { return !tmp_table->file->stats.records; } + virtual enum_engine_type engine_type() { return HASH_SJ_ENGINE; } + /* + TODO: factor out all these methods in a base subselect_index_engine class + because all of them have dummy implementations and should never be called. + */ + void fix_length_and_dec(Item_cache** row);//=>base class + void exclude(); //=>base class + //=>base class + bool change_result(Item_subselect *si, select_result_interceptor *result); + bool no_tables();//=>base class +}; + + /* Distinguish the type od (0-based) row numbers from the type of the index into an array of row numbers. @@ -745,7 +848,7 @@ typedef ha_rows rownum_t; PS (re)execution, however most of the comprising objects can be reused. */ -class Ordered_key +class Ordered_key : public Sql_alloc { protected: /* @@ -761,6 +864,8 @@ protected: uint key_column_count; /* An expression, or sequence of expressions that forms the search key. + The search key is a sequence when it is Item_row. Each element of the + sequence is accessible via Item::element_index(int i). */ Item *search_key; @@ -808,8 +913,6 @@ protected: int cmp_key_with_search_key(rownum_t row_num); public: - static void *operator new(size_t size) throw () - { return sql_alloc(size); } Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg, ha_rows null_count_arg, ha_rows min_null_row_arg, ha_rows max_null_row_arg, @@ -828,6 +931,10 @@ public: DBUG_ASSERT(i < key_column_count); return key_columns[i]->field->field_index; } + /* + Get the search key element that corresponds to the i-th key part of this + index. + */ Item *get_search_key(uint i) { return search_key->element_index(key_columns[i]->field->field_index); @@ -899,7 +1006,7 @@ public: }; -class subselect_rowid_merge_engine: public subselect_engine +class subselect_partial_match_engine : public subselect_engine { protected: /* The temporary table that contains a materialized subquery. */ @@ -910,6 +1017,51 @@ protected: FALSE and UNKNOWN. */ subselect_uniquesubquery_engine *lookup_engine; + /* A list of equalities between each pair of IN operands. */ + List<Item> *equi_join_conds; + /* + If there is a row, such that all its NULL-able components are NULL, this + member is set to the number of covered columns. If there is no covering + row, then this is 0. + */ + uint covering_null_row_width; +protected: + virtual bool partial_match()= 0; +public: + subselect_partial_match_engine(subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg, + uint covering_null_row_width_arg); + int prepare() { return 0; } + int exec(); + void fix_length_and_dec(Item_cache**) {} + uint cols() { /* TODO: what is the correct value? */ return 1; } + uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; } + void exclude() {} + table_map upper_select_const_tables() { return 0; } + bool change_result(Item_subselect*, select_result_interceptor*) + { DBUG_ASSERT(FALSE); return false; } + bool no_tables() { return false; } + bool no_rows() + { + /* + TODO: It is completely unclear what is the semantics of this + method. The current result is computed so that the call to no_rows() + from Item_in_optimizer::val_int() sets Item_in_optimizer::null_value + correctly. + */ + return !(((Item_in_subselect *) item)->null_value); + } + void print(String*, enum_query_type); + + friend void subselect_hash_sj_engine::cleanup(); +}; + + +class subselect_rowid_merge_engine: public subselect_partial_match_engine +{ +protected: /* Mapping from row numbers to row ids. The rowids are stored sequentially in the array - rowid[i] is located in row_num_to_rowid + i * rowid_length. @@ -953,8 +1105,6 @@ protected: This queue is used by the partial match algorithm in method exec(). */ QUEUE pq; - /* True if there is a NULL (sub)row that covers all NULLable columns. */ - bool has_covering_null_row; protected: /* Comparison function to compare keys in order of decreasing bitmap @@ -972,143 +1122,34 @@ protected: public: subselect_rowid_merge_engine(subselect_uniquesubquery_engine *engine_arg, TABLE *tmp_table_arg, uint keys_count_arg, - uint has_covering_null_row_arg, + uint covering_null_row_width_arg, Item_subselect *item_arg, - select_result_interceptor *result_arg) - :subselect_engine(item_arg, result_arg), - tmp_table(tmp_table_arg), lookup_engine(engine_arg), - keys_count(keys_count_arg), non_null_key(NULL), - has_covering_null_row(has_covering_null_row_arg) + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg) + :subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg, + result_arg, equi_join_conds_arg, + covering_null_row_width_arg), + keys_count(keys_count_arg), non_null_key(NULL) { thd= lookup_engine->get_thd(); } ~subselect_rowid_merge_engine(); bool init(MY_BITMAP *non_null_key_parts, MY_BITMAP *partial_match_key_parts); void cleanup(); - int prepare() { return 0; } - void fix_length_and_dec(Item_cache**) {} - int exec(); - uint cols() { /* TODO: what is the correct value? */ return 1; } - uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; } - void exclude() {} - table_map upper_select_const_tables() { return 0; } - void print(String*, enum_query_type); - bool change_result(Item_subselect*, select_result_interceptor*) - { DBUG_ASSERT(FALSE); return false; } - bool no_tables() { return false; } - bool no_rows() - { - /* - TODO: It is completely unclear what is the semantics of this - method. The current result is computed so that the call to no_rows() - from Item_in_optimizer::val_int() sets Item_in_optimizer::null_value - correctly. - */ - return !(((Item_in_subselect *) item)->null_value); - } + virtual enum_engine_type engine_type() { return ROWID_MERGE_ENGINE; } }; -/** - Compute an IN predicate via a hash semi-join. This class is responsible for - the materialization of the subquery, and the selection of the correct and - optimal execution method (e.g. direct index lookup, or partial matching) for - the IN predicate. -*/ - -class subselect_hash_sj_engine : public subselect_engine +class subselect_table_scan_engine: public subselect_partial_match_engine { protected: - /* The table into which the subquery is materialized. */ - TABLE *tmp_table; - /* TRUE if the subquery was materialized into a temp table. */ - bool is_materialized; - /* - The old engine already chosen at parse time and stored in permanent memory. - Through this member we can re-create and re-prepare materialize_join for - each execution of a prepared statement. We also reuse the functionality - of subselect_single_select_engine::[prepare | cols]. - */ - subselect_single_select_engine *materialize_engine; - /* The engine used to compute the IN predicate. */ - subselect_engine *lookup_engine; - /* - QEP to execute the subquery and materialize its result into a - temporary table. Created during the first call to exec(). - */ - JOIN *materialize_join; - /* - TRUE if the subquery result has an all-NULL column, which means that - there at best can be a partial match for any IN execution. - */ - bool inner_partial_match; - /* - TRUE if the materialized subquery contains a whole row only of NULLs. - */ - bool has_null_row; - - /* Keyparts of the only non-NULL composite index in a rowid merge. */ - MY_BITMAP non_null_key_parts; - /* Keyparts of the single column indexes with NULL, one keypart per index. */ - MY_BITMAP partial_match_key_parts; - uint count_partial_match_columns; - uint count_null_only_columns; - /* - A conjunction of all the equality condtions between all pairs of expressions - that are arguments of an IN predicate. We need these to post-filter some - IN results because index lookups sometimes match values that are actually - not equal to the search key in SQL terms. - */ - Item *semi_join_conds; - /* Possible execution strategies that can be used to compute hash semi-join.*/ - enum exec_strategy { - COMPLETE_MATCH, /* Use regular index lookups. */ - PARTIAL_MATCH, /* Use some partial matching strategy. */ - PARTIAL_MATCH_INDEX, /* Use partial matching through index merging. */ - PARTIAL_MATCH_SCAN, /* Use partial matching through table scan. */ - IMPOSSIBLE /* Subquery materialization is not applicable. */ - }; - /* The chosen execution strategy. Computed after materialization. */ - exec_strategy strategy; -protected: - void set_strategy_using_schema(); - void set_strategy_using_data(); - bool make_semi_join_conds(); - subselect_uniquesubquery_engine* make_unique_engine(); - + bool partial_match(); public: - subselect_hash_sj_engine(THD *thd, Item_subselect *in_predicate, - subselect_single_select_engine *old_engine) - :subselect_engine(in_predicate, NULL), tmp_table(NULL), - is_materialized(FALSE), materialize_engine(old_engine), lookup_engine(NULL), - materialize_join(NULL), count_partial_match_columns(0), - count_null_only_columns(0), semi_join_conds(NULL) - { - set_thd(thd); - } - ~subselect_hash_sj_engine(); - - bool init_permanent(List<Item> *tmp_columns); - bool init_runtime(); + subselect_table_scan_engine(subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg, + uint covering_null_row_width_arg); void cleanup(); - int prepare() { return 0; } /* Override virtual function in base class. */ - int exec(); - virtual void print (String *str, enum_query_type query_type); - uint cols() - { - return materialize_engine->cols(); - } - uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; } - table_map upper_select_const_tables() { return 0; } - bool no_rows() { return !tmp_table->file->stats.records; } - virtual enum_engine_type engine_type() { return HASH_SJ_ENGINE; } - /* - TODO: factor out all these methods in a base subselect_index_engine class - because all of them have dummy implementations and should never be called. - */ - void fix_length_and_dec(Item_cache** row);//=>base class - void exclude(); //=>base class - //=>base class - bool change_result(Item_subselect *si, select_result_interceptor *result); - bool no_tables();//=>base class + virtual enum_engine_type engine_type() { return TABLE_SCAN_ENGINE; } }; === modified file 'sql/mysql_priv.h' --- a/sql/mysql_priv.h 2010-01-17 14:55:08 +0000 +++ b/sql/mysql_priv.h 2010-03-09 10:14:06 +0000 @@ -552,12 +552,14 @@ protected: #define OPTIMIZER_SWITCH_LOOSE_SCAN 64 #define OPTIMIZER_SWITCH_MATERIALIZATION 128 #define OPTIMIZER_SWITCH_SEMIJOIN 256 +#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512 +#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024 #ifdef DBUG_OFF -# define OPTIMIZER_SWITCH_LAST 512 +# define OPTIMIZER_SWITCH_LAST 2048 #else -# define OPTIMIZER_SWITCH_TABLE_ELIMINATION 512 -# define OPTIMIZER_SWITCH_LAST 1024 +# define OPTIMIZER_SWITCH_TABLE_ELIMINATION 2048 +# define OPTIMIZER_SWITCH_LAST 4096 #endif #ifdef DBUG_OFF @@ -570,8 +572,10 @@ protected: OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_MATERIALIZATION | \ - OPTIMIZER_SWITCH_SEMIJOIN) -#else + OPTIMIZER_SWITCH_SEMIJOIN | \ + OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ + OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN) +#else # define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \ OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \ OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \ @@ -581,7 +585,9 @@ protected: OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_MATERIALIZATION | \ - OPTIMIZER_SWITCH_SEMIJOIN) + OPTIMIZER_SWITCH_SEMIJOIN | \ + OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ + OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN) #endif /* === modified file 'sql/mysqld.cc' --- a/sql/mysqld.cc 2010-01-17 14:55:08 +0000 +++ b/sql/mysqld.cc 2010-03-09 10:14:06 +0000 @@ -301,7 +301,9 @@ static const char *optimizer_switch_name "index_merge","index_merge_union","index_merge_sort_union", "index_merge_intersection", "index_condition_pushdown", - "firstmatch","loosescan","materialization", "semijoin", + "firstmatch","loosescan","materialization", "semijoin", + "partial_match_rowid_merge", + "partial_match_table_scan", #ifndef DBUG_OFF "table_elimination", #endif @@ -320,6 +322,8 @@ static const unsigned int optimizer_swit sizeof("loosescan") - 1, sizeof("materialization") - 1, sizeof("semijoin") - 1, + sizeof("partial_match_rowid_merge") - 1, + sizeof("partial_match_table_scan") - 1, #ifndef DBUG_OFF sizeof("table_elimination") - 1, #endif @@ -5794,7 +5798,8 @@ enum options_mysqld OPT_RECORD_RND_BUFFER, OPT_DIV_PRECINCREMENT, OPT_RELAY_LOG_SPACE_LIMIT, OPT_RELAY_LOG_PURGE, OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME, - OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING, OPT_DEBUG_FLUSH, + OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_ROWID_MERGE_BUFF_SIZE, + OPT_DEBUGGING, OPT_DEBUG_FLUSH, OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE, OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE, OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK, @@ -7130,6 +7135,11 @@ The minimum value for this variable is 4 (uchar**) &max_system_variables.range_alloc_block_size, 0, GET_ULONG, REQUIRED_ARG, RANGE_ALLOC_BLOCK_SIZE, RANGE_ALLOC_BLOCK_SIZE, (longlong) ULONG_MAX, 0, 1024, 0}, + {"rowid_merge_buff_size", OPT_ROWID_MERGE_BUFF_SIZE, + "The size of the buffers used [NOT] IN evaluation via partial matching.", + (uchar**) &global_system_variables.rowid_merge_buff_size, + (uchar**) &max_system_variables.rowid_merge_buff_size, 0, GET_ULONG, + REQUIRED_ARG, 8*1024*1024L, 0, MAX_MEM_TABLE_SIZE/2, 0, 1, 0}, {"read_buffer_size", OPT_RECORD_BUFFER, "Each thread that does a sequential scan allocates a buffer of this size for each table it scans. If you do many sequential scans, you may want to increase this value.", (uchar**) &global_system_variables.read_buff_size, === modified file 'sql/set_var.cc' --- a/sql/set_var.cc 2009-12-22 12:49:15 +0000 +++ b/sql/set_var.cc 2010-03-09 10:14:06 +0000 @@ -540,6 +540,9 @@ static sys_var_long_ptr sys_query_cache_ static sys_var_thd_ulong sys_range_alloc_block_size(&vars, "range_alloc_block_size", &SV::range_alloc_block_size); +static sys_var_thd_ulong sys_rowid_merge_buff_size(&vars, "rowid_merge_buff_size", + &SV::rowid_merge_buff_size); + static sys_var_thd_ulong sys_query_alloc_block_size(&vars, "query_alloc_block_size", &SV::query_alloc_block_size, 0, fix_thd_mem_root); === modified file 'sql/sql_class.h' --- a/sql/sql_class.h 2010-02-19 21:55:57 +0000 +++ b/sql/sql_class.h 2010-03-09 10:14:06 +0000 @@ -343,6 +343,8 @@ struct system_variables ulong mrr_buff_size; ulong div_precincrement; ulong sortbuff_size; + /* Total size of all buffers used by the subselect_rowid_merge_engine. */ + ulong rowid_merge_buff_size; ulong thread_handling; ulong tx_isolation; ulong completion_type; === modified file 'support-files/build-tags' --- a/support-files/build-tags 2009-12-15 07:16:46 +0000 +++ b/support-files/build-tags 2010-03-09 10:14:06 +0000 @@ -4,7 +4,7 @@ rm -f TAGS filter='\.cc$\|\.c$\|\.h$\|\.yy$' list="find . -type f" -bzr root >/dev/null 2>/dev/null && list="bzr ls --from-root --kind=file --versioned" +bzr root >/dev/null 2>/dev/null && list="bzr ls --from-root -R --kind=file --versioned" $list |grep $filter |while read f; do