#At file:///home/tsk/mprog/src/5.3-mwl68/ based on revid:timour@askmonty.org-20100222151655-ltjv0rlv6z2sdiiu
2765 timour(a)askmonty.org 2010-03-09
MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs
* Implemented a second partial matching strategy via table scan.
This strategy is a fallback when there is no memory for rowid merging.
* Refactored the selection and creation of partial matching strategies,
so that the choice of strategy is encapsulated in a separate method
choose_partial_match_strategy().
* Refactored the representation of partial match strategies so that:
- each strategy is represented by a polymorphic class, and
- the base class for all partial match strategies contains common
execution code.
* Added an estimate of the memory needed for the rowid merge strategy,
and the system variable "rowid_merge_buff_size" to control the maximum
memory to be used by the rowid merge algorithm.
* Added two optimizer_switch system variables to control the choice of
partial match strategy:
"partial_match_rowid_merge", "partial_match_table_scan".
* Fixed multiple problems with deallocation of resources by the partial
match strategies.
@ sql/mysql_priv.h
* Added two optimizer_switch system variables to control the choice of
partial match strategy:
"partial_match_rowid_merge", "partial_match_table_scan".
@ sql/mysqld.cc
* Added two optimizer_switch system variables to control the choice of
partial match strategy:
"partial_match_rowid_merge", "partial_match_table_scan".
* Added a system variable "rowid_merge_buff_size" to control the maximum
memory to be used by the rowid merge algorithm.
@ sql/set_var.cc
* Added a system variable "rowid_merge_buff_size" to control the maximum
memory to be used by the rowid merge algorithm.
@ sql/sql_class.h
* Added a system variable "rowid_merge_buff_size" to control the maximum
memory to be used by the rowid merge algorithm.
@ support-files/build-tags
Newer versions of BZR require the recursive flag in order to list all files.
modified:
sql/item_subselect.cc
sql/item_subselect.h
sql/mysql_priv.h
sql/mysqld.cc
sql/set_var.cc
sql/sql_class.h
support-files/build-tags
=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc 2010-02-22 15:16:55 +0000
+++ b/sql/item_subselect.cc 2010-03-09 10:14:06 +0000
@@ -2910,13 +2910,7 @@ int subselect_uniquesubquery_engine::exe
/*
- TIMOUR: this needs more thinking, as exec() is a wrong IMO because:
- - we don't need empty_result_set, as it is == 1 <=> when
- item->value == 0
- - scan_table() returns >0 even when there was no actuall error,
- but we only found EOF while scanning.
- - scan_table should not check table->status, but it should check
- HA_ERR_END_OF_FILE
+ TIMOUR: write comment
*/
int subselect_uniquesubquery_engine::index_lookup()
@@ -2924,8 +2918,6 @@ int subselect_uniquesubquery_engine::ind
DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup");
int error;
TABLE *table= tab->table;
- empty_result_set= TRUE;
- table->status= 0;
if (!table->file->inited)
table->file->ha_index_init(tab->ref.key, 0);
@@ -2934,25 +2926,25 @@ int subselect_uniquesubquery_engine::ind
make_prev_keypart_map(tab->
ref.key_parts),
HA_READ_KEY_EXACT);
-
DBUG_PRINT("info", ("lookup result: %i", error));
- if (error &&
- error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
- error= report_error(table, error);
- else
+
+ if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
{
- error= 0;
- table->null_row= 0;
- if (!table->status && (!cond || cond->val_int()))
- {
- ((Item_in_subselect *) item)->value= 1;
- empty_result_set= FALSE;
- }
- else
- ((Item_in_subselect *) item)->value= 0;
+ /*
+ TIMOUR: I don't understand at all when do we need to call report_error.
+ In most places where we access an index, we don't do this. Why here?
+ */
+ error= report_error(table, error);
+ DBUG_RETURN(error);
}
- DBUG_RETURN(error);
+ table->null_row= 0;
+ if (!error && (!cond || cond->val_int()))
+ ((Item_in_subselect *) item)->value= 1;
+ else
+ ((Item_in_subselect *) item)->value= 0;
+
+ DBUG_RETURN(0);
}
@@ -3415,19 +3407,24 @@ bool subselect_uniquesubquery_engine::no
If max_keys > 1, then we need partial matching because there are
more indexes than just the one we use during materialization to
remove duplicates.
+
+ @note
+ TIMOUR: The schema-based analysis for partial matching can be done once for
+ prepared statement and remembered. It is done here to remove the need to
+ save/restore all related variables between each re-execution, thus making
+ the code simpler.
+
+ @retval PARTIAL_MATCH if a partial match should be used
+ @retval COMPLETE_MATCH if a complete match (index lookup) should be used
*/
-void subselect_hash_sj_engine::set_strategy_using_schema()
+subselect_hash_sj_engine::exec_strategy
+subselect_hash_sj_engine::get_strategy_using_schema()
{
Item_in_subselect *item_in= (Item_in_subselect *) item;
- DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_schema");
-
if (item_in->is_top_level_item())
- {
- strategy= COMPLETE_MATCH;
- DBUG_VOID_RETURN;
- }
+ return COMPLETE_MATCH;
else
{
List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types());
@@ -3450,10 +3447,8 @@ void subselect_hash_sj_engine::set_strat
/* If no column contains NULLs use regular hash index lookups. */
if (count_partial_match_columns)
- strategy= PARTIAL_MATCH;
- else
- strategy= COMPLETE_MATCH;
- DBUG_VOID_RETURN;
+ return PARTIAL_MATCH;
+ return COMPLETE_MATCH;
}
@@ -3465,19 +3460,25 @@ void subselect_hash_sj_engine::set_strat
matching type of columns that cannot be NULL or that contain only NULLs.
Based on this, the procedure determines the final execution strategy for
the [NOT] IN predicate.
+
+ @retval PARTIAL_MATCH if a partial match should be used
+ @retval COMPLETE_MATCH if a complete match (index lookup) should be used
*/
-void subselect_hash_sj_engine::set_strategy_using_data()
+subselect_hash_sj_engine::exec_strategy
+subselect_hash_sj_engine::get_strategy_using_data()
{
Item_in_subselect *item_in= (Item_in_subselect *) item;
select_materialize_with_stats *result_sink=
(select_materialize_with_stats *) result;
Item *outer_col;
- DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_data");
-
- /* Call this procedure only if already selected partial matching. */
- DBUG_ASSERT(strategy == PARTIAL_MATCH);
+ /*
+ If we already determined that a complete match is enough based on schema
+ information, nothing can be better.
+ */
+ if (strategy == COMPLETE_MATCH)
+ return COMPLETE_MATCH;
for (uint i= 0; i < item_in->left_expr->cols(); i++)
{
@@ -3501,9 +3502,117 @@ void subselect_hash_sj_engine::set_strat
/* If no column contains NULLs use regular hash index lookups. */
if (!count_partial_match_columns)
- strategy= COMPLETE_MATCH;
+ return COMPLETE_MATCH;
+ return PARTIAL_MATCH;
+}
+
+
+void
+subselect_hash_sj_engine::choose_partial_match_strategy(
+ bool has_non_null_key, bool has_covering_null_row,
+ MY_BITMAP *partial_match_key_parts)
+{
+ size_t pm_buff_size;
- DBUG_VOID_RETURN;
+ DBUG_ASSERT(strategy == PARTIAL_MATCH);
+ /*
+ Choose according to global optimizer switch. If only one of the switches is
+ 'ON', then the remaining strategy is the only possible one. The only cases
+ when this will be overriden is when the total size of all buffers for the
+ merge strategy is bigger than the 'rowid_merge_buff_size' system variable,
+ or if there isn't enough physical memory to allocate the buffers.
+ */
+ if (!optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) &&
+ optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN))
+ strategy= PARTIAL_MATCH_SCAN;
+ else if
+ ( optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) &&
+ !optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN))
+ strategy= PARTIAL_MATCH_MERGE;
+
+ /*
+ If both switches are ON, or both are OFF, we interpret that as "let the
+ optimizer decide". Perform a cost based choice between the two partial
+ matching strategies.
+ */
+ /*
+ TIMOUR: the above interpretation of the switch values could be changed to:
+ - if both are ON - let the optimizer decide,
+ - if both are OFF - do not use partial matching, therefore do not use
+ materialization in non-top-level predicates.
+ The problem with this is that we know for sure if we need partial matching
+ only after the subquery is materialized, and this is too late to revert to
+ the IN=>EXISTS strategy.
+ */
+ if (strategy == PARTIAL_MATCH)
+ {
+ /*
+ TIMOUR: Currently we use a super simplistic measure. This will be
+ addressed in a separate task.
+ */
+ if (tmp_table->file->stats.records < 100)
+ strategy= PARTIAL_MATCH_SCAN;
+ else
+ strategy= PARTIAL_MATCH_MERGE;
+ }
+
+ /* Check if there is enough memory for the rowid merge strategy. */
+ if (strategy == PARTIAL_MATCH_MERGE)
+ {
+ pm_buff_size= rowid_merge_buff_size(has_non_null_key,
+ has_covering_null_row,
+ partial_match_key_parts);
+ if (pm_buff_size > thd->variables.rowid_merge_buff_size)
+ strategy= PARTIAL_MATCH_SCAN;
+ }
+}
+
+
+/*
+ Compute the memory size of all buffers proportional to the number of rows
+ in tmp_table.
+
+ @details
+ If the result is bigger than thd->variables.rowid_merge_buff_size, partial
+ matching via merging is not applicable.
+*/
+
+size_t subselect_hash_sj_engine::rowid_merge_buff_size(
+ bool has_non_null_key, bool has_covering_null_row,
+ MY_BITMAP *partial_match_key_parts)
+{
+ size_t buff_size; /* Total size of all buffers used by partial matching. */
+ ha_rows row_count= tmp_table->file->stats.records;
+ uint rowid_length= tmp_table->file->ref_length;
+ select_materialize_with_stats *result_sink=
+ (select_materialize_with_stats *) result;
+
+ /* Size of the subselect_rowid_merge_engine::row_num_to_rowid buffer. */
+ buff_size= row_count * rowid_length * sizeof(uchar);
+
+ if (has_non_null_key)
+ {
+ /* Add the size of Ordered_key::key_buff of the only non-NULL key. */
+ buff_size+= row_count * sizeof(rownum_t);
+ }
+
+ if (!has_covering_null_row)
+ {
+ for (uint i= 0; i < partial_match_key_parts->n_bits; i++)
+ {
+ if (!bitmap_is_set(partial_match_key_parts, i) ||
+ result_sink->get_null_count_of_col(i) == row_count)
+ continue; /* In these cases we wouldn't construct Ordered keys. */
+
+ /* Add the size of Ordered_key::key_buff */
+ buff_size+= (row_count - result_sink->get_null_count_of_col(i)) *
+ sizeof(rownum_t);
+ /* Add the size of Ordered_key::null_key */
+ buff_size+= bitmap_buffer_size(result_sink->get_max_null_of_col(i));
+ }
+ }
+
+ return buff_size;
}
@@ -3561,7 +3670,6 @@ bool subselect_hash_sj_engine::init_perm
thd->mem_root))
DBUG_RETURN(TRUE);
- set_strategy_using_schema();
/*
Create and initialize a select result interceptor that stores the
result stream in a temporary table. The temporary table itself is
@@ -3623,7 +3731,9 @@ bool subselect_hash_sj_engine::init_perm
((Item_in_subselect *) item)->left_expr->cols() ==
tmp_table->key_info->key_parts);
- if (make_semi_join_conds())
+ if (make_semi_join_conds() ||
+ /* A unique_engine is used both for complete and partial matching. */
+ !(lookup_engine= make_unique_engine()))
DBUG_RETURN(TRUE);
DBUG_RETURN(FALSE);
@@ -3691,7 +3801,7 @@ bool subselect_hash_sj_engine::make_semi
DBUG_RETURN(TRUE);
}
}
- if (semi_join_conds->fix_fields(thd, &semi_join_conds))
+ if (semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
DBUG_RETURN(TRUE);
DBUG_RETURN(FALSE);
@@ -3791,7 +3901,7 @@ bool subselect_hash_sj_engine::init_runt
clause of the query, and it is not 'fixed' during JOIN::prepare.
*/
if (semi_join_conds && !semi_join_conds->fixed &&
- semi_join_conds->fix_fields(thd, &semi_join_conds))
+ semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
return TRUE;
/* Let our engine reuse this query plan for materialization. */
materialize_join= materialize_engine->join;
@@ -3802,6 +3912,7 @@ bool subselect_hash_sj_engine::init_runt
subselect_hash_sj_engine::~subselect_hash_sj_engine()
{
+ delete lookup_engine;
delete result;
if (tmp_table)
free_tmp_table(thd, tmp_table);
@@ -3817,9 +3928,30 @@ subselect_hash_sj_engine::~subselect_has
void subselect_hash_sj_engine::cleanup()
{
+ enum_engine_type lookup_engine_type= lookup_engine->engine_type();
is_materialized= FALSE;
- result->cleanup(); /* Resets the temp table as well. */
+ bitmap_clear_all(&non_null_key_parts);
+ bitmap_clear_all(&partial_match_key_parts);
+ count_partial_match_columns= 0;
+ count_null_only_columns= 0;
+ strategy= UNDEFINED;
materialize_engine->cleanup();
+ if (lookup_engine_type == TABLE_SCAN_ENGINE ||
+ lookup_engine_type == ROWID_MERGE_ENGINE)
+ {
+ subselect_engine *inner_lookup_engine;
+ inner_lookup_engine=
+ ((subselect_partial_match_engine*) lookup_engine)->lookup_engine;
+ /*
+ Partial match engines are recreated for each PS execution inside
+ subselect_hash_sj_engine::exec().
+ */
+ delete lookup_engine;
+ lookup_engine= inner_lookup_engine;
+ }
+ DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE);
+ lookup_engine->cleanup();
+ result->cleanup(); /* Resets the temp table as well. */
}
@@ -3838,6 +3970,7 @@ int subselect_hash_sj_engine::exec()
{
Item_in_subselect *item_in= (Item_in_subselect *) item;
SELECT_LEX *save_select= thd->lex->current_select;
+ subselect_partial_match_engine *pm_engine= NULL;
int res= 0;
DBUG_ENTER("subselect_hash_sj_engine::exec");
@@ -3881,59 +4014,86 @@ int subselect_hash_sj_engine::exec()
DBUG_RETURN(FALSE);
}
- if (strategy == PARTIAL_MATCH)
- set_strategy_using_data();
-
- /* A unique_engine is used both for complete and partial matching. */
- if (!(lookup_engine= make_unique_engine()))
- {
- res= 1;
- goto err;
- }
-
+ /*
+ TIMOUR: The schema-based analysis for partial matching can be done once for
+ prepared statement and remembered. It is done here to remove the need to
+ save/restore all related variables between each re-execution, thus making
+ the code simpler.
+ */
+ strategy= get_strategy_using_schema();
+ /* This call may discover that we don't need partial matching at all. */
+ strategy= get_strategy_using_data();
if (strategy == PARTIAL_MATCH)
{
- subselect_rowid_merge_engine *rowid_merge_engine;
- uint count_pm_keys;
- MY_BITMAP *nn_key_parts;
- bool has_covering_null_row;
+ uint count_pm_keys; /* Total number of keys needed for partial matching. */
+ MY_BITMAP *nn_key_parts; /* The key parts of the only non-NULL index. */
+ uint covering_null_row_width;
select_materialize_with_stats *result_sink=
(select_materialize_with_stats *) result;
- /* Total number of keys needed for partial matching. */
nn_key_parts= (count_partial_match_columns < tmp_table->s->fields) ?
&non_null_key_parts : NULL;
- has_covering_null_row= (result_sink->get_max_nulls_in_row() ==
- tmp_table->s->fields -
- (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0));
+ if (result_sink->get_max_nulls_in_row() ==
+ tmp_table->s->fields -
+ (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0))
+ covering_null_row_width= result_sink->get_max_nulls_in_row();
+ else
+ covering_null_row_width= 0;
- if (has_covering_null_row)
+ if (covering_null_row_width)
count_pm_keys= nn_key_parts ? 1 : 0;
else
count_pm_keys= count_partial_match_columns - count_null_only_columns +
(nn_key_parts ? 1 : 0);
- if (!(rowid_merge_engine=
- new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*)
- lookup_engine,
- tmp_table,
- count_pm_keys,
- has_covering_null_row,
- item, result)) ||
- rowid_merge_engine->init(nn_key_parts, &partial_match_key_parts))
- {
- strategy= PARTIAL_MATCH_SCAN;
- delete rowid_merge_engine;
- /* TIMOUR: setup execution structures for partial match via scanning. */
+ choose_partial_match_strategy(test(nn_key_parts),
+ test(covering_null_row_width),
+ &partial_match_key_parts);
+ DBUG_ASSERT(strategy == PARTIAL_MATCH_MERGE ||
+ strategy == PARTIAL_MATCH_SCAN);
+ if (strategy == PARTIAL_MATCH_MERGE)
+ {
+ pm_engine=
+ new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*)
+ lookup_engine, tmp_table,
+ count_pm_keys,
+ covering_null_row_width,
+ item, result,
+ semi_join_conds->argument_list());
+ if (!pm_engine ||
+ ((subselect_rowid_merge_engine*) pm_engine)->
+ init(nn_key_parts, &partial_match_key_parts))
+ {
+ /*
+ The call to init() would fail if there was not enough memory to allocate
+ all buffers for the rowid merge strategy. In this case revert to table
+ scanning which doesn't need any big buffers.
+ */
+ delete pm_engine;
+ pm_engine= NULL;
+ strategy= PARTIAL_MATCH_SCAN;
+ }
}
- else
+
+ if (strategy == PARTIAL_MATCH_SCAN)
{
- strategy= PARTIAL_MATCH_INDEX;
- lookup_engine= rowid_merge_engine;
+ if (!(pm_engine=
+ new subselect_table_scan_engine((subselect_uniquesubquery_engine*)
+ lookup_engine, tmp_table,
+ item, result,
+ semi_join_conds->argument_list(),
+ covering_null_row_width)))
+ {
+ /* This is an irrecoverable error. */
+ res= 1;
+ goto err;
+ }
}
}
+ if (pm_engine)
+ lookup_engine= pm_engine;
item_in->change_engine(lookup_engine);
err:
@@ -4009,10 +4169,8 @@ Ordered_key::Ordered_key(uint keyid_arg,
Ordered_key::~Ordered_key()
{
- /*
- All data structures are allocated on thd->mem_root, thus we don't
- free them here.
- */
+ my_free((char*) key_buff, MYF(0));
+ bitmap_free(&null_key);
}
@@ -4030,6 +4188,7 @@ void Ordered_key::cleanup()
*/
}
+
/*
Initialize a multi-column index.
*/
@@ -4103,14 +4262,16 @@ bool Ordered_key::init(int col_idx)
}
+/*
+ Allocate the buffers for both the row number, and the NULL-bitmap indexes.
+*/
+
bool Ordered_key::alloc_keys_buffers()
{
- THD *thd= tbl->in_use;
-
DBUG_ASSERT(key_buff_elements > 0);
- if (!(key_buff= (rownum_t*) thd->alloc(key_buff_elements *
- sizeof(rownum_t))))
+ if (!(key_buff= (rownum_t*) my_malloc(key_buff_elements * sizeof(rownum_t),
+ MYF(MY_WME))))
return TRUE;
/*
@@ -4118,10 +4279,8 @@ bool Ordered_key::alloc_keys_buffers()
(max_null_row - min_null_row), and then use min_null_row as
lookup offset.
*/
- if (bitmap_init_memroot(&null_key,
- /* this is max array index, we need count, so +1. */
- max_null_row + 1,
- thd->mem_root))
+ /* Notice that max_null_row is max array index, we need count, so +1. */
+ if (bitmap_init(&null_key, NULL, max_null_row + 1, FALSE))
return TRUE;
cur_key_idx= HA_POS_ERROR;
@@ -4193,8 +4352,9 @@ void Ordered_key::sort_keys()
/*
- The probability that a certain row does not contain a NULL in some row in
- a NULL-indexed column.
+ The fraction of rows that do not contain NULL in the columns indexed by
+ this key.
+
@retval 1 if there are no NULLs
@retval 0 if only NULLs
*/
@@ -4353,10 +4513,122 @@ void Ordered_key::print(String *str)
}
+subselect_partial_match_engine::subselect_partial_match_engine(
+ subselect_uniquesubquery_engine *engine_arg,
+ TABLE *tmp_table_arg, Item_subselect *item_arg,
+ select_result_interceptor *result_arg,
+ List<Item> *equi_join_conds_arg,
+ uint covering_null_row_width_arg)
+ :subselect_engine(item_arg, result_arg),
+ tmp_table(tmp_table_arg), lookup_engine(engine_arg),
+ equi_join_conds(equi_join_conds_arg),
+ covering_null_row_width(covering_null_row_width_arg)
+{}
+
+
+int subselect_partial_match_engine::exec()
+{
+ Item_in_subselect *item_in= (Item_in_subselect *) item;
+ int res;
+
+ /* Try to find a matching row by index lookup. */
+ res= lookup_engine->copy_ref_key_simple();
+ if (res == -1)
+ {
+ /* The result is FALSE based on the outer reference. */
+ item_in->value= 0;
+ item_in->null_value= 0;
+ return 0;
+ }
+ else if (res == 0)
+ {
+ /* Search for a complete match. */
+ if ((res= lookup_engine->index_lookup()))
+ {
+ /* An error occured during lookup(). */
+ item_in->value= 0;
+ item_in->null_value= 0;
+ return res;
+ }
+ else if (item_in->value)
+ {
+ /*
+ A complete match was found, the result of IN is TRUE.
+ Notice: (this->item == lookup_engine->item)
+ */
+ return 0;
+ }
+ }
+
+ if (covering_null_row_width == tmp_table->s->fields)
+ {
+ /*
+ If there is a NULL-only row that coveres all columns the result of IN
+ is UNKNOWN.
+ */
+ item_in->value= 0;
+ /*
+ TIMOUR: which one is the right way to propagate an UNKNOWN result?
+ Should we also set empty_result_set= FALSE; ???
+ */
+ //item_in->was_null= 1;
+ item_in->null_value= 1;
+ return 0;
+ }
+
+ /*
+ There is no complete match. Look for a partial match (UNKNOWN result), or
+ no match (FALSE).
+ */
+ if (tmp_table->file->inited)
+ tmp_table->file->ha_index_end();
+
+ if (partial_match())
+ {
+ /* The result of IN is UNKNOWN. */
+ item_in->value= 0;
+ /*
+ TIMOUR: which one is the right way to propagate an UNKNOWN result?
+ Should we also set empty_result_set= FALSE; ???
+ */
+ //item_in->was_null= 1;
+ item_in->null_value= 1;
+ }
+ else
+ {
+ /* The result of IN is FALSE. */
+ item_in->value= 0;
+ /*
+ TIMOUR: which one is the right way to propagate an UNKNOWN result?
+ Should we also set empty_result_set= FALSE; ???
+ */
+ //item_in->was_null= 0;
+ item_in->null_value= 0;
+ }
+
+ return 0;
+}
+
+
+void subselect_partial_match_engine::print(String *str,
+ enum_query_type query_type)
+{
+ /*
+ Should never be called as the actual engine cannot be known at query
+ optimization time.
+ */
+ DBUG_ASSERT(FALSE);
+}
+
+
/*
@param non_null_key_parts
@param partial_match_key_parts A union of all single-column NULL key parts.
@param count_partial_match_columns Number of NULL keyparts (set bits above).
+
+ @retval FALSE the engine was initialized successfully
+ @retval TRUE there was some (memory allocation) error during initialization,
+ such errors should be interpreted as revert to other strategy
*/
bool
@@ -4379,14 +4651,17 @@ subselect_rowid_merge_engine::init(MY_BI
return FALSE;
}
- DBUG_ASSERT(!has_covering_null_row || (has_covering_null_row &&
- keys_count == 1 &&
- non_null_key_parts));
-
+ DBUG_ASSERT(!covering_null_row_width || (covering_null_row_width &&
+ keys_count == 1 &&
+ non_null_key_parts));
+ /*
+ Allocate buffers to hold the merged keys and the mapping between rowids and
+ row numbers.
+ */
if (!(merge_keys= (Ordered_key**) thd->alloc(keys_count *
sizeof(Ordered_key*))) ||
- !(row_num_to_rowid= (uchar*) thd->alloc(row_count * rowid_length *
- sizeof(uchar))))
+ !(row_num_to_rowid= (uchar*) my_malloc(row_count * rowid_length *
+ sizeof(uchar), MYF(MY_WME))))
return TRUE;
/* Create the only non-NULL key if there is any. */
@@ -4395,10 +4670,7 @@ subselect_rowid_merge_engine::init(MY_BI
non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr,
0, 0, 0, row_num_to_rowid);
if (non_null_key->init(non_null_key_parts))
- {
- // TIMOUR: revert to partial matching via scanning
return TRUE;
- }
merge_keys[cur_keyid]= non_null_key;
merge_keys[cur_keyid]->first();
++cur_keyid;
@@ -4406,9 +4678,10 @@ subselect_rowid_merge_engine::init(MY_BI
/*
If there is a covering NULL row, the only key that is needed is the
- only non-NULL key that is already created above.
+ only non-NULL key that is already created above. We create keys on
+ NULL-able columns only if there is no covering NULL row.
*/
- if (!has_covering_null_row)
+ if (!covering_null_row_width)
{
if (bitmap_init_memroot(&matching_keys, keys_count, thd->mem_root) ||
bitmap_init_memroot(&matching_outer_cols, keys_count, thd->mem_root) ||
@@ -4436,10 +4709,7 @@ subselect_rowid_merge_engine::init(MY_BI
result_sink->get_max_null_of_col(i),
row_num_to_rowid);
if (merge_keys[cur_keyid]->init(i))
- {
- // TIMOUR: revert to partial matching via scanning
return TRUE;
- }
merge_keys[cur_keyid]->first();
}
++cur_keyid;
@@ -4510,10 +4780,7 @@ subselect_rowid_merge_engine::init(MY_BI
if (init_queue(&pq, keys_count, 0, FALSE,
subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL))
- {
- // TIMOUR: revert to partial matching via scanning
return TRUE;
- }
return FALSE;
}
@@ -4521,26 +4788,21 @@ subselect_rowid_merge_engine::init(MY_BI
subselect_rowid_merge_engine::~subselect_rowid_merge_engine()
{
- delete_queue(&pq);
+ /* None of the resources below is allocated if there are no ordered keys. */
+ if (keys_count)
+ {
+ my_free((char*) row_num_to_rowid, MYF(0));
+ for (uint i= 0; i < keys_count; i++)
+ delete merge_keys[i];
+ delete_queue(&pq);
+ if (tmp_table->file->inited == handler::RND)
+ tmp_table->file->ha_rnd_end();
+ }
}
void subselect_rowid_merge_engine::cleanup()
{
- lookup_engine->cleanup();
- /* Tell handler we don't need the index anymore */
- if (tmp_table->file->inited)
- tmp_table->file->ha_rnd_end();
- queue_remove_all(&pq);
-}
-
-
-void subselect_rowid_merge_engine::print(String *str, enum_query_type query_type)
-{
- str->append(STRING_WITH_LEN("<rowid_merge>("));
- for (uint i= 0; i < keys_count; i++)
- merge_keys[i]->print(str);
- str->append(')');
}
@@ -4627,20 +4889,31 @@ bool subselect_rowid_merge_engine::parti
Ordered_key *cur_key;
rownum_t cur_row_num;
uint count_nulls_in_search_key= 0;
+ bool res= FALSE;
/* If there is a non-NULL key, it must be the first key in the keys array. */
DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key));
+
+ /* All data accesses during execution are via handler::ha_rnd_pos() */
+ tmp_table->file->ha_rnd_init(0);
+
/* Check if there is a match for the columns of the only non-NULL key. */
if (non_null_key && !non_null_key->lookup())
- return FALSE;
+ {
+ res= FALSE;
+ goto end;
+ }
/*
If there is a NULL (sub)row that covers all NULL-able columns,
then there is a guranteed partial match, and we don't need to search
for the matching row.
*/
- if (has_covering_null_row)
- return TRUE;
+ if (covering_null_row_width)
+ {
+ res= TRUE;
+ goto end;
+ }
if (non_null_key)
queue_insert(&pq, (uchar *) non_null_key);
@@ -4667,14 +4940,20 @@ bool subselect_rowid_merge_engine::parti
if (count_nulls_in_search_key ==
((Item_in_subselect *) item)->left_expr->cols() -
(non_null_key ? non_null_key->get_column_count() : 0))
- return TRUE;
+ {
+ res= TRUE;
+ goto end;
+ }
/*
If there is no NULL (sub)row that covers all NULL columns, and there is no
single match for any of the NULL columns, the result is FALSE.
*/
if (pq.elements - test(non_null_key) == 0)
- return FALSE;
+ {
+ res= FALSE;
+ goto end;
+ }
DBUG_ASSERT(pq.elements);
@@ -4692,10 +4971,8 @@ bool subselect_rowid_merge_engine::parti
Check the only matching row of the only key min_key for NULL matches
in the other columns.
*/
- if (test_null_row(min_row_num))
- return TRUE;
- else
- return FALSE;
+ res= test_null_row(min_row_num);
+ goto end;
}
while (TRUE)
@@ -4710,7 +4987,10 @@ bool subselect_rowid_merge_engine::parti
/* Follows from the correct use of priority queue. */
DBUG_ASSERT(cur_row_num > min_row_num);
if (test_null_row(min_row_num))
- return TRUE;
+ {
+ res= TRUE;
+ goto end;
+ }
else
{
min_key= cur_key;
@@ -4727,99 +5007,112 @@ bool subselect_rowid_merge_engine::parti
if (pq.elements == 0)
{
/* Check the last row of the last column in PQ for NULL matches. */
- if (test_null_row(min_row_num))
- return TRUE;
- else
- return FALSE;
+ res= test_null_row(min_row_num);
+ goto end;
}
}
- /* We should never get here. */
+ /* We should never get here - all branches must be handled explicitly above. */
DBUG_ASSERT(FALSE);
- return FALSE;
+
+end:
+ tmp_table->file->ha_rnd_end();
+ return res;
}
-int subselect_rowid_merge_engine::exec()
-{
- Item_in_subselect *item_in= (Item_in_subselect *) item;
- int res;
+subselect_table_scan_engine::subselect_table_scan_engine(
+ subselect_uniquesubquery_engine *engine_arg,
+ TABLE *tmp_table_arg,
+ Item_subselect *item_arg,
+ select_result_interceptor *result_arg,
+ List<Item> *equi_join_conds_arg,
+ uint covering_null_row_width_arg)
+ :subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg,
+ result_arg, equi_join_conds_arg,
+ covering_null_row_width_arg)
+{}
- /* Try to find a matching row by index lookup. */
- res= lookup_engine->copy_ref_key_simple();
- if (res == -1)
- {
- /* The result is FALSE based on the outer reference. */
- item_in->value= 0;
- item_in->null_value= 0;
- return 0;
- }
- else if (res == 0)
+
+/*
+ TIMOUR:
+ This method is based on subselect_uniquesubquery_engine::scan_table().
+ Consider refactoring somehow, 80% of the code is the same.
+
+ for each row_i in tmp_table
{
- if ((res= lookup_engine->index_lookup()))
- {
- /* An error occured during lookup(). */
- item_in->value= 0;
- item_in->null_value= 0;
- return res;
- }
- else if (item_in->value)
+ count_matches= 0;
+ for each row element row_i[j]
{
- /*
- A complete match was found, the result of IN is TRUE.
- Notice: (this->item == lookup_engine->item)
- */
- return 0;
+ if (outer_ref[j] is NULL || row_i[j] is NULL || outer_ref[j] == row_i[j])
+ ++count_matches;
}
+ if (count_matches == outer_ref.elements)
+ return TRUE
}
+ return FALSE
+*/
- if (has_covering_null_row && !keys_count)
- {
- /*
- If there is a NULL-only row that coveres all columns the result of IN
- is UNKNOWN.
- */
- item_in->value= 0;
- /*
- TIMOUR: which one is the right way to propagate an UNKNOWN result?
- Should we also set empty_result_set= FALSE; ???
- */
- //item_in->was_null= 1;
- item_in->null_value= 1;
- return 0;
- }
+bool subselect_table_scan_engine::partial_match()
+{
+ List_iterator_fast<Item> equality_it(*equi_join_conds);
+ Item *cur_eq;
+ uint count_matches;
+ int error;
+ bool res;
- /* All data accesses during execution are via handler::ha_rnd_pos() */
- if (tmp_table->file->inited)
- tmp_table->file->ha_index_end();
- tmp_table->file->ha_rnd_init(0);
+ tmp_table->file->ha_rnd_init(1);
+ tmp_table->file->extra_opt(HA_EXTRA_CACHE,
+ current_thd->variables.read_buff_size);
/*
- There is no complete match. Look for a partial match (UNKNOWN result), or
- no match (FALSE).
+ TIMOUR:
+ scan_table() also calls "table->null_row= 0;", why, do we need it?
*/
- if (partial_match())
- {
- /* The result of IN is UNKNOWN. */
- item_in->value= 0;
- /*
- TIMOUR: which one is the right way to propagate an UNKNOWN result?
- Should we also set empty_result_set= FALSE; ???
- */
- //item_in->was_null= 1;
- item_in->null_value= 1;
- }
- else
+ for (;;)
{
- /* The result of IN is FALSE. */
- item_in->value= 0;
- /*
- TIMOUR: which one is the right way to propagate an UNKNOWN result?
- Should we also set empty_result_set= FALSE; ???
- */
- //item_in->was_null= 0;
- item_in->null_value= 0;
+ error= tmp_table->file->ha_rnd_next(tmp_table->record[0]);
+ if (error) {
+ if (error == HA_ERR_RECORD_DELETED)
+ {
+ error= 0;
+ continue;
+ }
+ if (error == HA_ERR_END_OF_FILE)
+ {
+ error= 0;
+ break;
+ }
+ else
+ {
+ error= report_error(tmp_table, error);
+ break;
+ }
+ }
+
+ equality_it.rewind();
+ count_matches= 0;
+ while ((cur_eq= equality_it++))
+ {
+ DBUG_ASSERT(cur_eq->type() == Item::FUNC_ITEM &&
+ ((Item_func*)cur_eq)->functype() == Item_func::EQ_FUNC);
+ if (!cur_eq->val_int() && !cur_eq->null_value)
+ break;
+ ++count_matches;
+ }
+ if (count_matches == tmp_table->s->fields)
+ {
+ res= TRUE; /* Found a matching row. */
+ goto end;
+ }
}
+
+ res= FALSE;
+end:
tmp_table->file->ha_rnd_end();
+ return res;
+}
- return 0;
+
+void subselect_table_scan_engine::cleanup()
+{
}
=== modified file 'sql/item_subselect.h'
--- a/sql/item_subselect.h 2010-02-22 15:16:55 +0000
+++ b/sql/item_subselect.h 2010-03-09 10:14:06 +0000
@@ -436,7 +436,7 @@ public:
friend class Item_in_optimizer;
friend class subselect_indexsubquery_engine;
friend class subselect_hash_sj_engine;
- friend class subselect_rowid_merge_engine;
+ friend class subselect_partial_match_engine;
};
@@ -472,7 +472,7 @@ public:
enum enum_engine_type {ABSTRACT_ENGINE, SINGLE_SELECT_ENGINE,
UNION_ENGINE, UNIQUESUBQUERY_ENGINE,
INDEXSUBQUERY_ENGINE, HASH_SJ_ENGINE,
- ROR_INTERSECT_ENGINE};
+ ROWID_MERGE_ENGINE, TABLE_SCAN_ENGINE};
subselect_engine(Item_subselect *si, select_result_interceptor *res)
:thd(0)
@@ -716,6 +716,109 @@ inline bool Item_subselect::is_uncacheab
}
+/**
+ Compute an IN predicate via a hash semi-join. This class is responsible for
+ the materialization of the subquery, and the selection of the correct and
+ optimal execution method (e.g. direct index lookup, or partial matching) for
+ the IN predicate.
+*/
+
+class subselect_hash_sj_engine : public subselect_engine
+{
+protected:
+ /* The table into which the subquery is materialized. */
+ TABLE *tmp_table;
+ /* TRUE if the subquery was materialized into a temp table. */
+ bool is_materialized;
+ /*
+ The old engine already chosen at parse time and stored in permanent memory.
+ Through this member we can re-create and re-prepare materialize_join for
+ each execution of a prepared statement. We also reuse the functionality
+ of subselect_single_select_engine::[prepare | cols].
+ */
+ subselect_single_select_engine *materialize_engine;
+ /* The engine used to compute the IN predicate. */
+ subselect_engine *lookup_engine;
+ /*
+ QEP to execute the subquery and materialize its result into a
+ temporary table. Created during the first call to exec().
+ */
+ JOIN *materialize_join;
+
+ /* Keyparts of the only non-NULL composite index in a rowid merge. */
+ MY_BITMAP non_null_key_parts;
+ /* Keyparts of the single column indexes with NULL, one keypart per index. */
+ MY_BITMAP partial_match_key_parts;
+ uint count_partial_match_columns;
+ uint count_null_only_columns;
+ /*
+ A conjunction of all the equality condtions between all pairs of expressions
+ that are arguments of an IN predicate. We need these to post-filter some
+ IN results because index lookups sometimes match values that are actually
+ not equal to the search key in SQL terms.
+ */
+ Item_cond_and *semi_join_conds;
+ /* Possible execution strategies that can be used to compute hash semi-join.*/
+ enum exec_strategy {
+ UNDEFINED,
+ COMPLETE_MATCH, /* Use regular index lookups. */
+ PARTIAL_MATCH, /* Use some partial matching strategy. */
+ PARTIAL_MATCH_MERGE, /* Use partial matching through index merging. */
+ PARTIAL_MATCH_SCAN, /* Use partial matching through table scan. */
+ IMPOSSIBLE /* Subquery materialization is not applicable. */
+ };
+ /* The chosen execution strategy. Computed after materialization. */
+ exec_strategy strategy;
+protected:
+ exec_strategy get_strategy_using_schema();
+ exec_strategy get_strategy_using_data();
+ size_t rowid_merge_buff_size(bool has_non_null_key,
+ bool has_covering_null_row,
+ MY_BITMAP *partial_match_key_parts);
+ void choose_partial_match_strategy(bool has_non_null_key,
+ bool has_covering_null_row,
+ MY_BITMAP *partial_match_key_parts);
+ bool make_semi_join_conds();
+ subselect_uniquesubquery_engine* make_unique_engine();
+
+public:
+ subselect_hash_sj_engine(THD *thd, Item_subselect *in_predicate,
+ subselect_single_select_engine *old_engine)
+ :subselect_engine(in_predicate, NULL), tmp_table(NULL),
+ is_materialized(FALSE), materialize_engine(old_engine), lookup_engine(NULL),
+ materialize_join(NULL), count_partial_match_columns(0),
+ count_null_only_columns(0), semi_join_conds(NULL), strategy(UNDEFINED)
+ {
+ set_thd(thd);
+ }
+ ~subselect_hash_sj_engine();
+
+ bool init_permanent(List<Item> *tmp_columns);
+ bool init_runtime();
+ void cleanup();
+ int prepare() { return 0; } /* Override virtual function in base class. */
+ int exec();
+ virtual void print(String *str, enum_query_type query_type);
+ uint cols()
+ {
+ return materialize_engine->cols();
+ }
+ uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
+ table_map upper_select_const_tables() { return 0; }
+ bool no_rows() { return !tmp_table->file->stats.records; }
+ virtual enum_engine_type engine_type() { return HASH_SJ_ENGINE; }
+ /*
+ TODO: factor out all these methods in a base subselect_index_engine class
+ because all of them have dummy implementations and should never be called.
+ */
+ void fix_length_and_dec(Item_cache** row);//=>base class
+ void exclude(); //=>base class
+ //=>base class
+ bool change_result(Item_subselect *si, select_result_interceptor *result);
+ bool no_tables();//=>base class
+};
+
+
/*
Distinguish the type od (0-based) row numbers from the type of the index into
an array of row numbers.
@@ -745,7 +848,7 @@ typedef ha_rows rownum_t;
PS (re)execution, however most of the comprising objects can be reused.
*/
-class Ordered_key
+class Ordered_key : public Sql_alloc
{
protected:
/*
@@ -761,6 +864,8 @@ protected:
uint key_column_count;
/*
An expression, or sequence of expressions that forms the search key.
+ The search key is a sequence when it is Item_row. Each element of the
+ sequence is accessible via Item::element_index(int i).
*/
Item *search_key;
@@ -808,8 +913,6 @@ protected:
int cmp_key_with_search_key(rownum_t row_num);
public:
- static void *operator new(size_t size) throw ()
- { return sql_alloc(size); }
Ordered_key(uint keyid_arg, TABLE *tbl_arg,
Item *search_key_arg, ha_rows null_count_arg,
ha_rows min_null_row_arg, ha_rows max_null_row_arg,
@@ -828,6 +931,10 @@ public:
DBUG_ASSERT(i < key_column_count);
return key_columns[i]->field->field_index;
}
+ /*
+ Get the search key element that corresponds to the i-th key part of this
+ index.
+ */
Item *get_search_key(uint i)
{
return search_key->element_index(key_columns[i]->field->field_index);
@@ -899,7 +1006,7 @@ public:
};
-class subselect_rowid_merge_engine: public subselect_engine
+class subselect_partial_match_engine : public subselect_engine
{
protected:
/* The temporary table that contains a materialized subquery. */
@@ -910,6 +1017,51 @@ protected:
FALSE and UNKNOWN.
*/
subselect_uniquesubquery_engine *lookup_engine;
+ /* A list of equalities between each pair of IN operands. */
+ List<Item> *equi_join_conds;
+ /*
+ If there is a row, such that all its NULL-able components are NULL, this
+ member is set to the number of covered columns. If there is no covering
+ row, then this is 0.
+ */
+ uint covering_null_row_width;
+protected:
+ virtual bool partial_match()= 0;
+public:
+ subselect_partial_match_engine(subselect_uniquesubquery_engine *engine_arg,
+ TABLE *tmp_table_arg, Item_subselect *item_arg,
+ select_result_interceptor *result_arg,
+ List<Item> *equi_join_conds_arg,
+ uint covering_null_row_width_arg);
+ int prepare() { return 0; }
+ int exec();
+ void fix_length_and_dec(Item_cache**) {}
+ uint cols() { /* TODO: what is the correct value? */ return 1; }
+ uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
+ void exclude() {}
+ table_map upper_select_const_tables() { return 0; }
+ bool change_result(Item_subselect*, select_result_interceptor*)
+ { DBUG_ASSERT(FALSE); return false; }
+ bool no_tables() { return false; }
+ bool no_rows()
+ {
+ /*
+ TODO: It is completely unclear what is the semantics of this
+ method. The current result is computed so that the call to no_rows()
+ from Item_in_optimizer::val_int() sets Item_in_optimizer::null_value
+ correctly.
+ */
+ return !(((Item_in_subselect *) item)->null_value);
+ }
+ void print(String*, enum_query_type);
+
+ friend void subselect_hash_sj_engine::cleanup();
+};
+
+
+class subselect_rowid_merge_engine: public subselect_partial_match_engine
+{
+protected:
/*
Mapping from row numbers to row ids. The rowids are stored sequentially
in the array - rowid[i] is located in row_num_to_rowid + i * rowid_length.
@@ -953,8 +1105,6 @@ protected:
This queue is used by the partial match algorithm in method exec().
*/
QUEUE pq;
- /* True if there is a NULL (sub)row that covers all NULLable columns. */
- bool has_covering_null_row;
protected:
/*
Comparison function to compare keys in order of decreasing bitmap
@@ -972,143 +1122,34 @@ protected:
public:
subselect_rowid_merge_engine(subselect_uniquesubquery_engine *engine_arg,
TABLE *tmp_table_arg, uint keys_count_arg,
- uint has_covering_null_row_arg,
+ uint covering_null_row_width_arg,
Item_subselect *item_arg,
- select_result_interceptor *result_arg)
- :subselect_engine(item_arg, result_arg),
- tmp_table(tmp_table_arg), lookup_engine(engine_arg),
- keys_count(keys_count_arg), non_null_key(NULL),
- has_covering_null_row(has_covering_null_row_arg)
+ select_result_interceptor *result_arg,
+ List<Item> *equi_join_conds_arg)
+ :subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg,
+ result_arg, equi_join_conds_arg,
+ covering_null_row_width_arg),
+ keys_count(keys_count_arg), non_null_key(NULL)
{
thd= lookup_engine->get_thd();
}
~subselect_rowid_merge_engine();
bool init(MY_BITMAP *non_null_key_parts, MY_BITMAP *partial_match_key_parts);
void cleanup();
- int prepare() { return 0; }
- void fix_length_and_dec(Item_cache**) {}
- int exec();
- uint cols() { /* TODO: what is the correct value? */ return 1; }
- uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
- void exclude() {}
- table_map upper_select_const_tables() { return 0; }
- void print(String*, enum_query_type);
- bool change_result(Item_subselect*, select_result_interceptor*)
- { DBUG_ASSERT(FALSE); return false; }
- bool no_tables() { return false; }
- bool no_rows()
- {
- /*
- TODO: It is completely unclear what is the semantics of this
- method. The current result is computed so that the call to no_rows()
- from Item_in_optimizer::val_int() sets Item_in_optimizer::null_value
- correctly.
- */
- return !(((Item_in_subselect *) item)->null_value);
- }
+ virtual enum_engine_type engine_type() { return ROWID_MERGE_ENGINE; }
};
-/**
- Compute an IN predicate via a hash semi-join. This class is responsible for
- the materialization of the subquery, and the selection of the correct and
- optimal execution method (e.g. direct index lookup, or partial matching) for
- the IN predicate.
-*/
-
-class subselect_hash_sj_engine : public subselect_engine
+class subselect_table_scan_engine: public subselect_partial_match_engine
{
protected:
- /* The table into which the subquery is materialized. */
- TABLE *tmp_table;
- /* TRUE if the subquery was materialized into a temp table. */
- bool is_materialized;
- /*
- The old engine already chosen at parse time and stored in permanent memory.
- Through this member we can re-create and re-prepare materialize_join for
- each execution of a prepared statement. We also reuse the functionality
- of subselect_single_select_engine::[prepare | cols].
- */
- subselect_single_select_engine *materialize_engine;
- /* The engine used to compute the IN predicate. */
- subselect_engine *lookup_engine;
- /*
- QEP to execute the subquery and materialize its result into a
- temporary table. Created during the first call to exec().
- */
- JOIN *materialize_join;
- /*
- TRUE if the subquery result has an all-NULL column, which means that
- there at best can be a partial match for any IN execution.
- */
- bool inner_partial_match;
- /*
- TRUE if the materialized subquery contains a whole row only of NULLs.
- */
- bool has_null_row;
-
- /* Keyparts of the only non-NULL composite index in a rowid merge. */
- MY_BITMAP non_null_key_parts;
- /* Keyparts of the single column indexes with NULL, one keypart per index. */
- MY_BITMAP partial_match_key_parts;
- uint count_partial_match_columns;
- uint count_null_only_columns;
- /*
- A conjunction of all the equality condtions between all pairs of expressions
- that are arguments of an IN predicate. We need these to post-filter some
- IN results because index lookups sometimes match values that are actually
- not equal to the search key in SQL terms.
- */
- Item *semi_join_conds;
- /* Possible execution strategies that can be used to compute hash semi-join.*/
- enum exec_strategy {
- COMPLETE_MATCH, /* Use regular index lookups. */
- PARTIAL_MATCH, /* Use some partial matching strategy. */
- PARTIAL_MATCH_INDEX, /* Use partial matching through index merging. */
- PARTIAL_MATCH_SCAN, /* Use partial matching through table scan. */
- IMPOSSIBLE /* Subquery materialization is not applicable. */
- };
- /* The chosen execution strategy. Computed after materialization. */
- exec_strategy strategy;
-protected:
- void set_strategy_using_schema();
- void set_strategy_using_data();
- bool make_semi_join_conds();
- subselect_uniquesubquery_engine* make_unique_engine();
-
+ bool partial_match();
public:
- subselect_hash_sj_engine(THD *thd, Item_subselect *in_predicate,
- subselect_single_select_engine *old_engine)
- :subselect_engine(in_predicate, NULL), tmp_table(NULL),
- is_materialized(FALSE), materialize_engine(old_engine), lookup_engine(NULL),
- materialize_join(NULL), count_partial_match_columns(0),
- count_null_only_columns(0), semi_join_conds(NULL)
- {
- set_thd(thd);
- }
- ~subselect_hash_sj_engine();
-
- bool init_permanent(List<Item> *tmp_columns);
- bool init_runtime();
+ subselect_table_scan_engine(subselect_uniquesubquery_engine *engine_arg,
+ TABLE *tmp_table_arg, Item_subselect *item_arg,
+ select_result_interceptor *result_arg,
+ List<Item> *equi_join_conds_arg,
+ uint covering_null_row_width_arg);
void cleanup();
- int prepare() { return 0; } /* Override virtual function in base class. */
- int exec();
- virtual void print (String *str, enum_query_type query_type);
- uint cols()
- {
- return materialize_engine->cols();
- }
- uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
- table_map upper_select_const_tables() { return 0; }
- bool no_rows() { return !tmp_table->file->stats.records; }
- virtual enum_engine_type engine_type() { return HASH_SJ_ENGINE; }
- /*
- TODO: factor out all these methods in a base subselect_index_engine class
- because all of them have dummy implementations and should never be called.
- */
- void fix_length_and_dec(Item_cache** row);//=>base class
- void exclude(); //=>base class
- //=>base class
- bool change_result(Item_subselect *si, select_result_interceptor *result);
- bool no_tables();//=>base class
+ virtual enum_engine_type engine_type() { return TABLE_SCAN_ENGINE; }
};
=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h 2010-01-17 14:55:08 +0000
+++ b/sql/mysql_priv.h 2010-03-09 10:14:06 +0000
@@ -552,12 +552,14 @@ protected:
#define OPTIMIZER_SWITCH_LOOSE_SCAN 64
#define OPTIMIZER_SWITCH_MATERIALIZATION 128
#define OPTIMIZER_SWITCH_SEMIJOIN 256
+#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512
+#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024
#ifdef DBUG_OFF
-# define OPTIMIZER_SWITCH_LAST 512
+# define OPTIMIZER_SWITCH_LAST 2048
#else
-# define OPTIMIZER_SWITCH_TABLE_ELIMINATION 512
-# define OPTIMIZER_SWITCH_LAST 1024
+# define OPTIMIZER_SWITCH_TABLE_ELIMINATION 2048
+# define OPTIMIZER_SWITCH_LAST 4096
#endif
#ifdef DBUG_OFF
@@ -570,8 +572,10 @@ protected:
OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \
- OPTIMIZER_SWITCH_SEMIJOIN)
-#else
+ OPTIMIZER_SWITCH_SEMIJOIN | \
+ OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
+ OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)
+#else
# define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \
@@ -581,7 +585,9 @@ protected:
OPTIMIZER_SWITCH_FIRSTMATCH | \
OPTIMIZER_SWITCH_LOOSE_SCAN | \
OPTIMIZER_SWITCH_MATERIALIZATION | \
- OPTIMIZER_SWITCH_SEMIJOIN)
+ OPTIMIZER_SWITCH_SEMIJOIN | \
+ OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\
+ OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)
#endif
/*
=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc 2010-01-17 14:55:08 +0000
+++ b/sql/mysqld.cc 2010-03-09 10:14:06 +0000
@@ -301,7 +301,9 @@ static const char *optimizer_switch_name
"index_merge","index_merge_union","index_merge_sort_union",
"index_merge_intersection",
"index_condition_pushdown",
- "firstmatch","loosescan","materialization", "semijoin",
+ "firstmatch","loosescan","materialization", "semijoin",
+ "partial_match_rowid_merge",
+ "partial_match_table_scan",
#ifndef DBUG_OFF
"table_elimination",
#endif
@@ -320,6 +322,8 @@ static const unsigned int optimizer_swit
sizeof("loosescan") - 1,
sizeof("materialization") - 1,
sizeof("semijoin") - 1,
+ sizeof("partial_match_rowid_merge") - 1,
+ sizeof("partial_match_table_scan") - 1,
#ifndef DBUG_OFF
sizeof("table_elimination") - 1,
#endif
@@ -5794,7 +5798,8 @@ enum options_mysqld
OPT_RECORD_RND_BUFFER, OPT_DIV_PRECINCREMENT, OPT_RELAY_LOG_SPACE_LIMIT,
OPT_RELAY_LOG_PURGE,
OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME,
- OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING, OPT_DEBUG_FLUSH,
+ OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_ROWID_MERGE_BUFF_SIZE,
+ OPT_DEBUGGING, OPT_DEBUG_FLUSH,
OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE,
OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE,
OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK,
@@ -7130,6 +7135,11 @@ The minimum value for this variable is 4
(uchar**) &max_system_variables.range_alloc_block_size, 0, GET_ULONG,
REQUIRED_ARG, RANGE_ALLOC_BLOCK_SIZE, RANGE_ALLOC_BLOCK_SIZE,
(longlong) ULONG_MAX, 0, 1024, 0},
+ {"rowid_merge_buff_size", OPT_ROWID_MERGE_BUFF_SIZE,
+ "The size of the buffers used [NOT] IN evaluation via partial matching.",
+ (uchar**) &global_system_variables.rowid_merge_buff_size,
+ (uchar**) &max_system_variables.rowid_merge_buff_size, 0, GET_ULONG,
+ REQUIRED_ARG, 8*1024*1024L, 0, MAX_MEM_TABLE_SIZE/2, 0, 1, 0},
{"read_buffer_size", OPT_RECORD_BUFFER,
"Each thread that does a sequential scan allocates a buffer of this size for each table it scans. If you do many sequential scans, you may want to increase this value.",
(uchar**) &global_system_variables.read_buff_size,
=== modified file 'sql/set_var.cc'
--- a/sql/set_var.cc 2009-12-22 12:49:15 +0000
+++ b/sql/set_var.cc 2010-03-09 10:14:06 +0000
@@ -540,6 +540,9 @@ static sys_var_long_ptr sys_query_cache_
static sys_var_thd_ulong sys_range_alloc_block_size(&vars, "range_alloc_block_size",
&SV::range_alloc_block_size);
+static sys_var_thd_ulong sys_rowid_merge_buff_size(&vars, "rowid_merge_buff_size",
+ &SV::rowid_merge_buff_size);
+
static sys_var_thd_ulong sys_query_alloc_block_size(&vars, "query_alloc_block_size",
&SV::query_alloc_block_size,
0, fix_thd_mem_root);
=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h 2010-02-19 21:55:57 +0000
+++ b/sql/sql_class.h 2010-03-09 10:14:06 +0000
@@ -343,6 +343,8 @@ struct system_variables
ulong mrr_buff_size;
ulong div_precincrement;
ulong sortbuff_size;
+ /* Total size of all buffers used by the subselect_rowid_merge_engine. */
+ ulong rowid_merge_buff_size;
ulong thread_handling;
ulong tx_isolation;
ulong completion_type;
=== modified file 'support-files/build-tags'
--- a/support-files/build-tags 2009-12-15 07:16:46 +0000
+++ b/support-files/build-tags 2010-03-09 10:14:06 +0000
@@ -4,7 +4,7 @@ rm -f TAGS
filter='\.cc$\|\.c$\|\.h$\|\.yy$'
list="find . -type f"
-bzr root >/dev/null 2>/dev/null && list="bzr ls --from-root --kind=file --versioned"
+bzr root >/dev/null 2>/dev/null && list="bzr ls --from-root -R --kind=file --versioned"
$list |grep $filter |while read f;
do