#At file:///home/tsk/mprog/src/mysql-6.0-mwl68/ based on revid:timour@askmonty.org-20100201120948-mdt7gtwcz50q1dzp
3770 timour(a)askmonty.org 2010-02-12
MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs
This patch implements working partial matching for materialized subqueries.
The code passes the full regression test, except differences in EXPLAIN.
There are no other known test failures.
modified:
sql/item_subselect.cc
sql/item_subselect.h
sql/sql_class.cc
sql/sql_class.h
sql/sql_select.cc
=== modified file 'sql/item_subselect.cc'
--- a/sql/item_subselect.cc 2010-02-01 12:09:48 +0000
+++ b/sql/item_subselect.cc 2010-02-12 14:33:43 +0000
@@ -2436,6 +2436,17 @@ int subselect_uniquesubquery_engine::sca
for (;;)
{
error=table->file->rnd_next(table->record[0]);
+ /*
+ TODO: The below tests are wrong, Monty's proposal:
+ if (error) {
+ if (error == HA_ERR_RECORD_DELETED)
+ continue;
+ if (error = HA_ERR_END_OF_FILE)
+ break;
+ else
+ report error;
+ break;
+ */
if (error && error != HA_ERR_END_OF_FILE)
{
error= report_error(table, error);
@@ -2453,6 +2464,11 @@ int subselect_uniquesubquery_engine::sca
}
table->file->ha_rnd_end();
+ /*
+ TODO: it seems to be an error to return TRUE when the error was
+ HA_ERR_END_OF_FILE which is perfectly fine. HA_ERR_END_OF_FILE
+ only means we didn't find a match.
+ */
DBUG_RETURN(error != 0);
}
@@ -2517,6 +2533,10 @@ bool subselect_uniquesubquery_engine::co
See also the comment for the subselect_uniquesubquery_engine::exec()
function.
*/
+ /*
+ TODO: If not all outer cols are NULL, how we know the result is NULL,
+ and not FALSE? Even on top-level.
+ */
null_keypart= (*copy)->null_key;
if (null_keypart)
{
@@ -2556,6 +2576,59 @@ bool subselect_uniquesubquery_engine::co
/*
+ @retval 1 A NULL was found in the outer reference, index lookup is
+ not applicable, the outer ref is unsusable as a lookup key,
+ use some other method to find a match.
+ @retval 0 The outer ref was copied into an index lookup key.
+ @retval -1 The outer ref cannot possibly match any row, IN is FALSE.
+*/
+
+int subselect_uniquesubquery_engine::copy_ref_key_simple()
+{
+ for (store_key **copy= tab->ref.key_copy ; *copy ; copy++)
+ {
+ enum store_key::store_key_result store_res;
+ store_res= (*copy)->copy();
+ tab->ref.key_err= store_res;
+
+ /*
+ When there is a NULL part in the key we don't need to make index
+ lookup for such key thus we don't need to copy whole key.
+ If we later should do a sequential scan return OK. Fail otherwise.
+
+ See also the comment for the subselect_uniquesubquery_engine::exec()
+ function.
+ */
+ /*
+ TODO: If not all outer cols are NULL, how we know the result is NULL,
+ and not FALSE? Even on top-level.
+ */
+ null_keypart= (*copy)->null_key;
+ if (null_keypart)
+ return 1;
+
+ /*
+ Check if the error is equal to STORE_KEY_FATAL. This is not expressed
+ using the store_key::store_key_result enum because ref.key_err is a
+ boolean and we want to detect both TRUE and STORE_KEY_FATAL from the
+ space of the union of the values of [TRUE, FALSE] and
+ store_key::store_key_result.
+ TODO: fix the variable an return types.
+ */
+ if (store_res == store_key::STORE_KEY_FATAL)
+ {
+ /*
+ Error converting the left IN operand to the column type of the right
+ IN operand.
+ */
+ return -1;
+ }
+ }
+ return 0;
+}
+
+
+/*
Execute subselect
SYNOPSIS
@@ -2595,7 +2668,10 @@ int subselect_uniquesubquery_engine::exe
/* TODO: change to use of 'full_scan' here? */
if (copy_ref_key())
+ {
+ /* TODO: copy_ref_key() == 1 means NULL result, not error, why return 1? */
DBUG_RETURN(1);
+ }
if (table->status)
{
/*
@@ -2637,6 +2713,52 @@ int subselect_uniquesubquery_engine::exe
/*
+ TODO: this needs more thinking, as exec() is a bit wrong IMO.
+ - we don't need empty_result_set, as it is == 1 <=> when
+ item->value == 0
+ - scan_table() returns >0 even when there was no actuall error,
+ but we only found EOF while scanning.
+ - scan_table should not check table->status, but it should check
+ HA_ERR_END_OF_FILE
+*/
+
+int subselect_uniquesubquery_engine::index_lookup()
+{
+ DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup");
+ int error;
+ TABLE *table= tab->table;
+ empty_result_set= TRUE;
+ table->status= 0;
+
+ if (!table->file->inited)
+ table->file->ha_index_init(tab->ref.key, 0);
+ error= table->file->index_read_map(table->record[0],
+ tab->ref.key_buff,
+ make_prev_keypart_map(tab->ref.key_parts),
+ HA_READ_KEY_EXACT);
+ DBUG_PRINT("info", ("lookup result: %i", error));
+ if (error &&
+ error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
+ error= report_error(table, error);
+ else
+ {
+ error= 0;
+ table->null_row= 0;
+ if (!table->status && (!cond || cond->val_int()))
+ {
+ ((Item_in_subselect *) item)->value= 1;
+ empty_result_set= FALSE;
+ }
+ else
+ ((Item_in_subselect *) item)->value= 0;
+ }
+
+ DBUG_RETURN(error);
+}
+
+
+
+/*
Index-lookup subselect 'engine' - run the subquery
SYNOPSIS
@@ -3136,6 +3258,7 @@ void subselect_hash_sj_engine::set_strat
Item_in_subselect *item_in= (Item_in_subselect *) item;
select_materialize_with_stats *result_sink=
(select_materialize_with_stats *) result;
+ Item *outer_col;
DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_data");
@@ -3146,13 +3269,20 @@ void subselect_hash_sj_engine::set_strat
{
if (!bitmap_is_set(&partial_match_key_parts, i))
continue;
-
- if (result_sink->get_null_count_of_col(i) == 0)
+ outer_col= item_in->left_expr->element_index(i);
+ /*
+ If column 'i' doesn't contain NULLs, and the corresponding outer reference
+ cannot have a NULL value, then 'i' is a non-nullable column.
+ */
+ if (result_sink->get_null_count_of_col(i) == 0 && !outer_col->maybe_null)
{
bitmap_clear_bit(&partial_match_key_parts, i);
bitmap_set_bit(&non_null_key_parts, i);
--count_partial_match_columns;
}
+ if (result_sink->get_null_count_of_col(i) ==
+ tmp_table->file->stats.records)
+ ++count_null_only_columns;
}
/* If no column contains NULLs use regular hash index lookups. */
@@ -3177,6 +3307,7 @@ bitmap_init_memroot(MY_BITMAP *map, uint
bitmap_buffer_size(n_bits))) ||
bitmap_init(map, bitmap_buf, n_bits, FALSE))
return TRUE;
+ bitmap_clear_all(map);
return FALSE;
}
@@ -3209,10 +3340,10 @@ bool subselect_hash_sj_engine::init_perm
DBUG_ENTER("subselect_hash_sj_engine::init_permanent");
- if (!(bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements,
- thd->mem_root)) ||
- !(bitmap_init_memroot(&partial_match_key_parts, tmp_columns->elements,
- thd->mem_root)))
+ if (bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements,
+ thd->mem_root) ||
+ bitmap_init_memroot(&partial_match_key_parts, tmp_columns->elements,
+ thd->mem_root))
DBUG_RETURN(TRUE);
set_strategy_using_schema();
@@ -3548,33 +3679,45 @@ int subselect_hash_sj_engine::exec()
if (strategy == PARTIAL_MATCH)
{
- subselect_rowid_merge_engine *new_lookup_engine;
+ subselect_rowid_merge_engine *rowid_merge_engine;
uint count_pm_keys;
MY_BITMAP *nn_key_parts;
+ bool has_covering_null_row;
+ select_materialize_with_stats *result_sink=
+ (select_materialize_with_stats *) result;
+
/* Total number of keys needed for partial matching. */
- if (count_partial_match_columns < tmp_table->s->fields)
- {
- count_pm_keys= count_partial_match_columns + 1;
- nn_key_parts= &non_null_key_parts;
- }
+ nn_key_parts= (count_partial_match_columns < tmp_table->s->fields) ?
+ &non_null_key_parts : NULL;
+
+ has_covering_null_row= (result_sink->get_max_nulls_in_row() ==
+ tmp_table->s->fields -
+ (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0));
+
+ if (has_covering_null_row)
+ count_pm_keys= nn_key_parts ? 1 : 0;
else
- {
- count_pm_keys= count_partial_match_columns;
- nn_key_parts= NULL;
- }
+ count_pm_keys= count_partial_match_columns - count_null_only_columns +
+ (nn_key_parts ? 1 : 0);
- if (!(new_lookup_engine=
- new subselect_rowid_merge_engine(lookup_engine,
+ if (!(rowid_merge_engine=
+ new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*)
+ lookup_engine,
tmp_table,
count_pm_keys,
+ has_covering_null_row,
item, result)) ||
- new_lookup_engine->init(nn_key_parts, &partial_match_key_parts))
+ rowid_merge_engine->init(nn_key_parts, &partial_match_key_parts))
{
- delete new_lookup_engine;
strategy= PARTIAL_MATCH_SCAN;
+ delete rowid_merge_engine;
/* TODO: setup execution structures for partial match via scanning. */
}
- strategy= PARTIAL_MATCH_INDEX;
+ else
+ {
+ strategy= PARTIAL_MATCH_INDEX;
+ lookup_engine= rowid_merge_engine;
+ }
}
item_in->change_engine(lookup_engine);
@@ -3632,15 +3775,49 @@ Ordered_key::Ordered_key(uint key_idx_ar
ha_rows min_null_row_arg, ha_rows max_null_row_arg,
uchar *row_num_to_rowid_arg)
: key_idx(key_idx_arg), tbl(tbl_arg), search_key(search_key_arg),
- row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg),
- min_null_row(min_null_row_arg), max_null_row(max_null_row_arg)
+ row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg)
+{
+ DBUG_ASSERT(tbl->file->stats.records > null_count);
+ key_buff_elements= tbl->file->stats.records - null_count;
+ cur_key_idx= HA_POS_ERROR;
+
+ DBUG_ASSERT((null_count && min_null_row_arg && max_null_row_arg) ||
+ (!null_count && !min_null_row_arg && !max_null_row_arg));
+ if (null_count)
+ {
+ /* The counters are 1-based, for key access we need 0-based indexes. */
+ min_null_row= min_null_row_arg - 1;
+ max_null_row= max_null_row_arg - 1;
+ }
+ else
+ min_null_row= max_null_row= 0;
+}
+
+
+Ordered_key::~Ordered_key()
{
- key_column_count= search_key->cols();
- cur_row= HA_POS_ERROR;
+ /*
+ All data structures are allocated on thd->mem_root, thus we don't
+ free them here.
+ */
}
/*
+ Cleanup that needs to be done for each PS (re)execution.
+*/
+
+void Ordered_key::cleanup()
+{
+ /*
+ Currently these keys are recreated for each PS re-execution, thus
+ there is nothing to cleanup, the whole object goes away after execution
+ is over. All handler related initialization/deinitialization is done by
+ the parent subselect_rowid_merge_engine object.
+ */
+}
+
+/*
Initialize a multi-column index.
*/
@@ -3648,10 +3825,10 @@ bool Ordered_key::init(MY_BITMAP *column
{
THD *thd= tbl->in_use;
uint cur_key_col= 0;
+ Item_field *cur_tmp_field;
+ Item_func_lt *fn_less_than;
- DBUG_ENTER("Ordered_key::init");
-
- DBUG_ASSERT(key_column_count == bitmap_bits_set(columns_to_index));
+ key_column_count= bitmap_bits_set(columns_to_index);
// TODO: check for mem allocation err, revert to scan
@@ -3660,22 +3837,26 @@ bool Ordered_key::init(MY_BITMAP *column
compare_pred= (Item_func_lt**) thd->alloc(key_column_count *
sizeof(Item_func_lt*));
- for (uint i= 0; i < columns_to_index->n_bits; i++, cur_key_col++)
+ for (uint i= 0; i < columns_to_index->n_bits; i++)
{
if (!bitmap_is_set(columns_to_index, i))
continue;
- key_columns[cur_key_col]= new Item_field(tbl->field[i]);
+ cur_tmp_field= new Item_field(tbl->field[i]);
/* Create the predicate (tmp_column[i] < outer_ref[i]). */
- compare_pred[cur_key_col]= new Item_func_lt(key_columns[cur_key_col],
- search_key->element_index(i));
+ fn_less_than= new Item_func_lt(cur_tmp_field,
+ search_key->element_index(i));
+ fn_less_than->fix_fields(thd, (Item**) &fn_less_than);
+ key_columns[cur_key_col]= cur_tmp_field;
+ compare_pred[cur_key_col]= fn_less_than;
+ ++cur_key_col;
}
if (alloc_keys_buffers())
{
/* TODO revert to partial match via table scan. */
- DBUG_RETURN(TRUE);
+ return TRUE;
}
- DBUG_RETURN(FALSE);
+ return FALSE;
}
@@ -3687,9 +3868,7 @@ bool Ordered_key::init(int col_idx)
{
THD *thd= tbl->in_use;
- DBUG_ENTER("Ordered_key::init");
-
- DBUG_ASSERT(key_column_count == 1);
+ key_column_count= 1;
// TODO: check for mem allocation err, revert to scan
@@ -3700,23 +3879,25 @@ bool Ordered_key::init(int col_idx)
/* Create the predicate (tmp_column[i] < outer_ref[i]). */
compare_pred[0]= new Item_func_lt(key_columns[0],
search_key->element_index(col_idx));
+ compare_pred[0]->fix_fields(thd, (Item**)&compare_pred[0]);
if (alloc_keys_buffers())
{
/* TODO revert to partial match via table scan. */
- DBUG_RETURN(TRUE);
+ return TRUE;
}
- DBUG_RETURN(FALSE);
+ return FALSE;
}
bool Ordered_key::alloc_keys_buffers()
{
THD *thd= tbl->in_use;
- ha_rows row_count= tbl->file->stats.records;
- if (!(row_index= (ha_rows*) thd->alloc((row_count - null_count) *
- sizeof(ha_rows))))
+ DBUG_ASSERT(key_buff_elements > 0);
+
+ if (!(key_buff= (rownum_t*) thd->alloc(key_buff_elements *
+ sizeof(rownum_t))))
return TRUE;
/*
@@ -3724,10 +3905,14 @@ bool Ordered_key::alloc_keys_buffers()
(max_null_row - min_null_row), and then use min_null_row as
lookup offset.
*/
- if (!(bitmap_init_memroot(&null_key, max_null_row,
- thd->mem_root)))
+ if (bitmap_init_memroot(&null_key,
+ /* this is max array index, we need count, so +1. */
+ max_null_row + 1,
+ thd->mem_root))
return TRUE;
+ cur_key_idx= HA_POS_ERROR;
+
return FALSE;
}
@@ -3735,66 +3920,88 @@ bool Ordered_key::alloc_keys_buffers()
/*
Quick sort comparison function that compares two rows of the same table
indentfied with their row numbers.
+
+ @retval -1
+ @retval 0
+ @retval +1
*/
-int Ordered_key::cmp_rows_by_rownum(Ordered_key *key, ha_rows *a, ha_rows *b)
+int
+Ordered_key::cmp_keys_by_row_data(ha_rows a, ha_rows b)
{
uchar *rowid_a, *rowid_b;
int error, cmp_res;
- TABLE *tbl= key->tbl;
/* The length in bytes of the rowids (positions) of tmp_table. */
uint rowid_length= tbl->file->ref_length;
- DBUG_ENTER("Ordered_key::cmp_rows_by_rownum");
if (a == b)
- DBUG_RETURN(0);
+ return 0;
/* Get the corresponding rowids. */
- rowid_a= key->row_num_to_rowid + (*a) * rowid_length;
- rowid_b= key->row_num_to_rowid + (*b) * rowid_length;
+ rowid_a= row_num_to_rowid + a * rowid_length;
+ rowid_b= row_num_to_rowid + b * rowid_length;
/* Fetch the rows for comparison. */
error= tbl->file->rnd_pos(tbl->record[0], rowid_a);
DBUG_ASSERT(!error);
error= tbl->file->rnd_pos(tbl->record[1], rowid_b);
DBUG_ASSERT(!error);
- /* Compare the two rows. */
- for (Field **f_ptr= tbl->field; *f_ptr; f_ptr++)
+ /*
+ Compare the two rows by the corresponding values of the indexed
+ columns.
+ */
+ for (uint i= 0; i < key_column_count; i++)
{
- if ((cmp_res= (*f_ptr)->cmp_offset(tbl->s->rec_buff_length)))
- DBUG_RETURN(cmp_res);
+ Field *cur_field= key_columns[i]->field;
+ if ((cmp_res= cur_field->cmp_offset(tbl->s->rec_buff_length)))
+ return (cmp_res > 0 ? 1 : -1);
}
- DBUG_RETURN(0);
+ return 0;
+}
+
+
+int
+Ordered_key::cmp_keys_by_row_data_and_rownum(Ordered_key *key,
+ rownum_t* a, rownum_t* b)
+{
+ /* The result of comparing the two keys according to their row data. */
+ int cmp_row_res= key->cmp_keys_by_row_data(*a, *b);
+ if (cmp_row_res)
+ return cmp_row_res;
+ return (*a < *b) ? -1 : (*a > *b) ? 1 : 0;
}
void Ordered_key::sort_keys()
{
- my_qsort(row_index, tbl->file->stats.records, sizeof(ha_rows),
- (qsort_cmp) &cmp_rows_by_rownum);
+ my_qsort2(key_buff, key_buff_elements, sizeof(rownum_t),
+ (qsort2_cmp) &cmp_keys_by_row_data_and_rownum, (void*) this);
+ /* Invalidate the current row position. */
+ cur_key_idx= HA_POS_ERROR;
}
/*
Compare the value(s) of the current key in 'search_key' with the
- data of the current table record accessible via 'key_columns'.
+ data of the current table record.
@notes The comparison result follows from the way compare_pred
is created in Ordered_key::init. Currently compare_pred compares
a field in of the current row with the corresponding Item that
contains the search key.
+ @param row_num Number of the row (not index in the key_buff array)
+
@retval -1 if (current row < search_key)
@retval 0 if (current row == search_key)
@retval +1 if (current row > search_key)
*/
-int Ordered_key::compare_row_with_key(ha_rows row_num)
+int Ordered_key::cmp_key_with_search_key(rownum_t row_num)
{
/* The length in bytes of the rowids (positions) of tmp_table. */
uint rowid_length= tbl->file->ref_length;
uchar *cur_rowid= row_num_to_rowid + row_num * rowid_length;
int error, cmp_res;
- DBUG_ENTER("Ordered_key::compare");
error= tbl->file->rnd_pos(tbl->record[0], cur_rowid);
DBUG_ASSERT(!error);
@@ -3804,9 +4011,9 @@ int Ordered_key::compare_row_with_key(ha
/* Unlike Arg_comparator::compare_row() here there should be no NULLs. */
DBUG_ASSERT(!compare_pred[i]->null_value);
if (cmp_res)
- DBUG_RETURN(cmp_res);
+ return (cmp_res > 0 ? 1 : -1);
}
- DBUG_RETURN(0);
+ return 0;
}
@@ -3818,17 +4025,24 @@ int Ordered_key::compare_row_with_key(ha
bool Ordered_key::lookup()
{
- DBUG_ENTER("Ordered_key::lookup");
+ DBUG_ASSERT(key_buff_elements);
ha_rows lo= 0;
- ha_rows hi= tbl->file->stats.records - 1;
+ ha_rows hi= key_buff_elements - 1;
ha_rows mid;
int cmp_res;
while (lo <= hi)
{
mid= lo + (hi - lo) / 2;
- cmp_res= compare_row_with_key(mid);
+ cmp_res= cmp_key_with_search_key(key_buff[mid]);
+ /*
+ In order to find the minimum match, check if the pevious element is
+ equal or smaller than the found one. If equal, we need to search further
+ to the left.
+ */
+ if (!cmp_res && mid > 0)
+ cmp_res= !cmp_key_with_search_key(key_buff[mid - 1]) ? 1 : 0;
if (cmp_res == -1)
{
@@ -3838,17 +4052,48 @@ bool Ordered_key::lookup()
else if (cmp_res == 1)
{
/* row[mid] > search_key */
+ if (!mid)
+ goto not_found;
hi= mid - 1;
}
else
{
/* row[mid] == search_key */
- cur_row= mid;
- DBUG_RETURN(TRUE);
+ cur_key_idx= mid;
+ return TRUE;
}
}
+not_found:
+ cur_key_idx= HA_POS_ERROR;
+ return FALSE;
+}
- DBUG_RETURN(FALSE);
+
+/*
+ Move the current index pointer to the next key with the same column
+ values as the current key. Since the index is sorted, all such keys
+ are contiguous.
+*/
+
+bool Ordered_key::next_same()
+{
+ DBUG_ASSERT(key_buff_elements);
+
+ if (cur_key_idx < key_buff_elements - 1)
+ {
+ /*
+ TODO:
+ The below is quite inefficient, since as a result we will fetch every
+ row (except the last one) twice. There must be a more efficient way,
+ e.g. swapping record[0] and record[1], and reading only the new record.
+ */
+ if (!cmp_keys_by_row_data(key_buff[cur_key_idx], key_buff[cur_key_idx + 1]))
+ {
+ ++cur_key_idx;
+ return TRUE;
+ }
+ }
+ return FALSE;
}
@@ -3865,56 +4110,147 @@ subselect_rowid_merge_engine::init(MY_BI
/* The length in bytes of the rowids (positions) of tmp_table. */
uint rowid_length= tmp_table->file->ref_length;
ha_rows row_count= tmp_table->file->stats.records;
+ rownum_t cur_rownum= 0;
select_materialize_with_stats *result_sink=
(select_materialize_with_stats *) result;
uint cur_key= 0;
+ Item_in_subselect *item_in= (Item_in_subselect*) item;
+ int error;
- if (!(row_num_to_rowid= (uchar*) thd->alloc(row_count * rowid_length *
- sizeof(uchar))))
- return TRUE;
+ if (keys_count == 0)
+ {
+ /* There is nothing to initialize, we will only do regular lookups. */
+ return FALSE;
+ }
- if (!(bitmap_init_memroot(&matching_keys, keys_count, thd->mem_root)))
+ DBUG_ASSERT(!has_covering_null_row || (has_covering_null_row &&
+ keys_count == 1 &&
+ non_null_key_parts));
+
+ if (!(merge_keys= (Ordered_key**) thd->alloc(keys_count *
+ sizeof(Ordered_key*))) ||
+ !(row_num_to_rowid= (uchar*) thd->alloc(row_count * rowid_length *
+ sizeof(uchar))))
return TRUE;
- merge_keys= (Ordered_key**) thd->alloc(keys_count * sizeof(Ordered_key*));
/* Create the only non-NULL key if there is any. */
if (non_null_key_parts)
{
- non_null_key= new Ordered_key(cur_key, tmp_table, item, 0, 0, 0,
- row_num_to_rowid);
+ non_null_key= new Ordered_key(cur_key, tmp_table, item_in->left_expr,
+ 0, 0, 0, row_num_to_rowid);
if (non_null_key->init(non_null_key_parts))
{
// TODO: revert to partial matching via scanning
return TRUE;
}
merge_keys[cur_key]= non_null_key;
- non_null_key->sort_keys();
+ merge_keys[cur_key]->first();
++cur_key;
}
+
/*
- Create one single-column NULL-key for each column in
- partial_match_key_parts.
+ If there is a covering NULL row, the only key that is needed is the
+ only non-NULL key that is already created above.
*/
- for (uint i= 0; i < partial_match_key_parts->n_bits; i++, cur_key++)
+ if (!has_covering_null_row)
+ {
+ if (bitmap_init_memroot(&matching_keys, keys_count, thd->mem_root) ||
+ bitmap_init_memroot(&matching_outer_cols, keys_count, thd->mem_root) ||
+ bitmap_init_memroot(&null_only_columns, keys_count, thd->mem_root))
+ return TRUE;
+
+ /*
+ Create one single-column NULL-key for each column in
+ partial_match_key_parts.
+ */
+ for (uint i= 0; i < partial_match_key_parts->n_bits; i++)
+ {
+ if (!bitmap_is_set(partial_match_key_parts, i))
+ continue;
+
+ if (result_sink->get_null_count_of_col(i) == row_count)
+ bitmap_set_bit(&null_only_columns, cur_key);
+ else
+ {
+ merge_keys[cur_key]= new Ordered_key(cur_key, tmp_table,
+ item_in->left_expr->element_index(i),
+ result_sink->get_null_count_of_col(i),
+ result_sink->get_min_null_of_col(i),
+ result_sink->get_max_null_of_col(i),
+ row_num_to_rowid);
+ if (merge_keys[cur_key]->init(i))
+ {
+ // TODO: revert to partial matching via scanning
+ return TRUE;
+ }
+ merge_keys[cur_key]->first();
+ }
+ ++cur_key;
+ }
+ }
+
+ /* Populate the indexes with data from the temporary table. */
+ tmp_table->file->ha_rnd_init(1);
+ tmp_table->file->extra_opt(HA_EXTRA_CACHE,
+ current_thd->variables.read_buff_size);
+ tmp_table->null_row= 0;
+ while (TRUE)
{
- if (!bitmap_is_set(partial_match_key_parts, i))
+ error= tmp_table->file->rnd_next(tmp_table->record[0]);
+ if (error == HA_ERR_RECORD_DELETED)
+ {
+ /* We get this for duplicate records that should not be in tmp_table. */
continue;
+ }
+ /*
+ This is a temp table that we fully own, there should be no other
+ cause to stop the iteration than EOF.
+ */
+ DBUG_ASSERT(!error || error == HA_ERR_END_OF_FILE);
+ if (error == HA_ERR_END_OF_FILE)
+ {
+ DBUG_ASSERT(cur_rownum == tmp_table->file->stats.records);
+ break;
+ }
- merge_keys[cur_key]= new Ordered_key(cur_key, tmp_table, item,
- result_sink->get_null_count_of_col(i),
- result_sink->get_min_null_of_col(i),
- result_sink->get_max_null_of_col(i),
- row_num_to_rowid);
- if (merge_keys[cur_key]->init(i))
+ /*
+ Save the position of this record in the row_num -> rowid mapping.
+ */
+ tmp_table->file->position(tmp_table->record[0]);
+ memcpy(row_num_to_rowid + cur_rownum * rowid_length,
+ tmp_table->file->ref, rowid_length);
+
+ /* Add the current row number to the corresponding keys. */
+ if (non_null_key)
{
- // TODO: revert to partial matching via scanning
- return TRUE;
+ /* By definition there are no NULLs in the non-NULL key. */
+ non_null_key->add_key(cur_rownum);
}
- merge_keys[cur_key]->sort_keys();
+
+ for (uint i= (non_null_key ? 1 : 0); i < keys_count; i++)
+ {
+ /*
+ Check if the first and only indexed column contains NULL in the curent
+ row, and add the row number to the corresponding key.
+ */
+ if (tmp_table->field[merge_keys[i]->get_field_idx(0)]->is_null())
+ merge_keys[i]->set_null(cur_rownum);
+ else
+ merge_keys[i]->add_key(cur_rownum);
+ }
+ ++cur_rownum;
}
+ tmp_table->file->ha_rnd_end();
+
+ /* Sort the keys in each of the indexes. */
+ for (uint i= 0; i < keys_count; i++)
+ merge_keys[i]->sort_keys();
+
+ // TODO: sort all the keys by NULL selectivity
+
if (init_queue(&pq, keys_count, 0, FALSE,
- subselect_rowid_merge_engine::cmp_key_by_cur_row, NULL))
+ subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL))
{
// TODO: revert to partial matching via scanning
return TRUE;
@@ -3924,9 +4260,19 @@ subselect_rowid_merge_engine::init(MY_BI
}
+subselect_rowid_merge_engine::~subselect_rowid_merge_engine()
+{
+ delete_queue(&pq);
+}
+
+
void subselect_rowid_merge_engine::cleanup()
{
- // TODO
+ lookup_engine->cleanup();
+ /* Tell handler we don't need the index anymore */
+ if (tmp_table->file->inited)
+ tmp_table->file->ha_rnd_end();
+ queue_remove_all(&pq);
}
@@ -3934,8 +4280,8 @@ void subselect_rowid_merge_engine::clean
*/
int
-subselect_rowid_merge_engine::cmp_key_by_null_selectivity(Ordered_key *a,
- Ordered_key *b)
+subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key *a,
+ Ordered_key *b)
{
double a_sel= a->null_selectivity();
double b_sel= b->null_selectivity();
@@ -3951,37 +4297,26 @@ subselect_rowid_merge_engine::cmp_key_by
*/
int
-subselect_rowid_merge_engine::cmp_key_by_cur_row(void *arg,
- uchar *k1, uchar *k2)
+subselect_rowid_merge_engine::cmp_keys_by_cur_rownum(void *arg,
+ uchar *k1, uchar *k2)
{
- ha_rows row1= ((Ordered_key*) k1)->current();
- ha_rows row2= ((Ordered_key*) k2)->current();
+ rownum_t r1= ((Ordered_key*) k1)->current();
+ rownum_t r2= ((Ordered_key*) k2)->current();
- if (row1 > row2)
- return 1;
- if (row1 == row2)
- return 0;
- return -1;
+ return (r1 < r2) ? -1 : (r1 > r2) ? 1 : 0;
}
/*
- Check if certain table row contains a NULL in all columns in all columns for
- which there is no value match.
-
- @details Notice that if a column is not in the set 'keys', we assume that has
- been checked otherwise that there is a partial or complete match for this
- column. This allows to encode columns that consist of only NULLs as simply
- missing in the set 'keys', because such columns match any value in any row.
+ Check if certain table row contains a NULL in all columns for which there is
+ no match in the corresponding value index.
@retval TRUE if a NULL row exists
@retval FALSE otherwise
*/
-bool subselect_rowid_merge_engine::test_null_row(ha_rows row_num)
+bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num)
{
- DBUG_ENTER("subselect_rowid_merge_engine::test_null_row");
-
for (uint i = 0; i < keys_count; i++)
{
if (bitmap_is_set(&matching_keys, i))
@@ -3993,9 +4328,9 @@ bool subselect_rowid_merge_engine::test_
continue;
}
if (!merge_keys[i]->is_null(row_num))
- DBUG_RETURN(FALSE);
+ return FALSE;
}
- DBUG_RETURN(TRUE);
+ return TRUE;
}
@@ -4007,88 +4342,120 @@ bool subselect_rowid_merge_engine::test_
bool subselect_rowid_merge_engine::partial_match()
{
Ordered_key *min_key; /* Key that contains the current minimum position. */
- ha_rows min_row; /* Current row number of min_key. */
+ rownum_t min_row_num; /* Current row number of min_key. */
Ordered_key *cur_key;
- ha_rows cur_row;
-
- DBUG_ENTER("subselect_rowid_merge_engine::partial_match");
+ rownum_t cur_row_num;
+ uint count_nulls_in_search_key= 0;
/* If there is a non-NULL key, it must be the first key in the keys array. */
- DBUG_ASSERT(non_null_key && merge_keys[0] == non_null_key);
+ DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key));
/* Check if there is a match for the columns of the only non-NULL key. */
if (non_null_key && !non_null_key->lookup())
- DBUG_RETURN(FALSE);
+ return FALSE;
+
+ /*
+ If there is a NULL (sub)row that covers all NULL-able columns,
+ then there is a guranteed partial match, and we don't need to search
+ for the matching row.
+ */
+ if (has_covering_null_row)
+ return TRUE;
+
if (non_null_key)
queue_insert(&pq, (uchar *) non_null_key);
-
/*
- Add all non-empty value keys to the priority queue. Do not process the
- non_null_key, since it was already processed above.
+ Do not add the non_null_key, since it was already processed above.
*/
- uint i= non_null_key ? 1 : 0; /* Skip the non-NULL key, already processed. */
- for (; i < keys_count; i++)
+ bitmap_clear_all(&matching_outer_cols);
+ for (uint i= test(non_null_key); i < keys_count; i++)
{
- if (merge_keys[i]->lookup())
+ DBUG_ASSERT(merge_keys[i]->get_column_count() == 1);
+ if (merge_keys[i]->get_search_key(0)->is_null())
+ {
+ ++count_nulls_in_search_key;
+ bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_key_idx());
+ }
+ else if (merge_keys[i]->lookup())
queue_insert(&pq, (uchar *) merge_keys[i]);
}
+
/*
- Not all value keys are empty, thus we don't have only NULL keys. If we had,
- the only possible match is a NULL row, and we cheked there is no such row,
- therefore the result is known to be FALSE. In fact this algorithm makes
- sense for at least two non-NULL columns.
+ If the outer reference consists of only NULLs, or if it has NULLs in all
+ nullable columns, the result is UNKNOWN.
*/
- DBUG_ASSERT(pq.elements > 1);
+ if (count_nulls_in_search_key ==
+ ((Item_in_subselect *) item)->left_expr->cols() -
+ (non_null_key ? non_null_key->get_column_count() : 0))
+ return TRUE;
+
+ /*
+ If there is no NULL (sub)row that covers all NULL columns, and there is no
+ single match for any of the NULL columns, the result is FALSE.
+ */
+ if (pq.elements - test(non_null_key) == 0)
+ return FALSE;
+
+ DBUG_ASSERT(pq.elements);
+
min_key= (Ordered_key*) queue_remove(&pq, 0);
- min_row= min_key->current();
- bitmap_clear_all(&matching_keys);
+ min_row_num= min_key->current();
+ bitmap_copy(&matching_keys, &null_only_columns);
bitmap_set_bit(&matching_keys, min_key->get_key_idx());
- min_key->next();
- if (!min_key->is_eof())
+ bitmap_union(&matching_keys, &matching_outer_cols);
+ if (min_key->next_same())
queue_insert(&pq, (uchar *) min_key);
+ if (pq.elements == 0)
+ {
+ /*
+ Check the only matching row of the only key min_key for NULL matches
+ in the other columns.
+ */
+ if (test_null_row(min_row_num))
+ return TRUE;
+ else
+ return FALSE;
+ }
+
while (TRUE)
{
cur_key= (Ordered_key*) queue_remove(&pq, 0);
- cur_row= min_key->current();
+ cur_row_num= cur_key->current();
- if (cur_row == min_row)
- {
+ if (cur_row_num == min_row_num)
bitmap_set_bit(&matching_keys, cur_key->get_key_idx());
- /* There cannot be a complete match, as we already checked for one. */
- DBUG_ASSERT(bitmap_bits_set(&matching_keys) < matching_keys.n_bits);
- }
else
{
/* Follows from the correct use of priority queue. */
- DBUG_ASSERT(cur_row > min_row);
- if (test_null_row(min_row))
- DBUG_RETURN(TRUE);
+ DBUG_ASSERT(cur_row_num > min_row_num);
+ if (test_null_row(min_row_num))
+ return TRUE;
else
{
min_key= cur_key;
- min_row= cur_row;
- bitmap_clear_all(&matching_keys);
+ min_row_num= cur_row_num;
+ bitmap_copy(&matching_keys, &null_only_columns);
bitmap_set_bit(&matching_keys, min_key->get_key_idx());
+ bitmap_union(&matching_keys, &matching_outer_cols);
}
}
- cur_key->next();
- if (!cur_key->is_eof())
+ if (cur_key->next_same())
queue_insert(&pq, (uchar *) cur_key);
if (pq.elements == 0)
{
/* Check the last row of the last column in PQ for NULL matches. */
- if (test_null_row(min_row))
- DBUG_RETURN(TRUE);
+ if (test_null_row(min_row_num))
+ return TRUE;
else
- DBUG_RETURN(FALSE);
+ return FALSE;
}
}
/* We should never get here. */
DBUG_ASSERT(FALSE);
- DBUG_RETURN(FALSE);
+ return FALSE;
}
@@ -4097,22 +4464,54 @@ int subselect_rowid_merge_engine::exec()
Item_in_subselect *item_in= (Item_in_subselect *) item;
int res;
- DBUG_ENTER("subselect_rowid_merge_engine::exec");
-
- if ((res= lookup_engine->exec()))
+ /* Try to find a matching row by index lookup. */
+ res= lookup_engine->copy_ref_key_simple();
+ if (res == -1)
+ {
+ /* The result is FALSE based on the outer reference. */
+ item_in->value= 0;
+ item_in->null_value= 0;
+ return 0;
+ }
+ else if (res == 0)
{
- /* An error occured during exec(). */
- DBUG_RETURN(res);
+ if ((res= lookup_engine->index_lookup()))
+ {
+ /* An error occured during lookup(). */
+ item_in->value= 0;
+ item_in->null_value= 0;
+ return res;
+ }
+ else if (item_in->value)
+ {
+ /*
+ A complete match was found, the result of IN is TRUE.
+ Notice: (this->item == lookup_engine->item)
+ */
+ return 0;
+ }
}
- else if (item_in->value == 1)
+
+ if (has_covering_null_row && !keys_count)
{
/*
- A complete match was found, the result of IN is TRUE.
- Notice: (this->item == lookup_engine->item)
+ If there is a NULL-only row that coveres all columns the result of IN
+ is UNKNOWN.
*/
- DBUG_RETURN(0);
+ item_in->value= 0;
+ /*
+ TODO: which one is the right way to propagate an UNKNOWN result?
+ Should we also set empty_result_set= FALSE; ???
+ */
+ //item_in->was_null= 1;
+ item_in->null_value= 1;
+ return 0;
}
+ /* All data accesses during execution are via handler::rnd_pos() */
+ if (tmp_table->file->inited)
+ tmp_table->file->ha_index_end();
+ tmp_table->file->ha_rnd_init(0);
/*
There is no complete match. Look for a partial match (UNKNOWN result), or
no match (FALSE).
@@ -4121,18 +4520,25 @@ int subselect_rowid_merge_engine::exec()
{
/* The result of IN is UNKNOWN. */
item_in->value= 0;
- /* TODO: which one is the right way to propagate an UNKNOWN result? */
- item_in->was_null= 1;
+ /*
+ TODO: which one is the right way to propagate an UNKNOWN result?
+ Should we also set empty_result_set= FALSE; ???
+ */
+ //item_in->was_null= 1;
item_in->null_value= 1;
}
else
{
/* The result of IN is FALSE. */
item_in->value= 0;
- /* TODO: which one is the right way to propagate an UNKNOWN result? */
- item_in->was_null= 0;
+ /*
+ TODO: which one is the right way to propagate an UNKNOWN result?
+ Should we also set empty_result_set= FALSE; ???
+ */
+ //item_in->was_null= 0;
item_in->null_value= 0;
}
+ tmp_table->file->ha_rnd_end();
- DBUG_RETURN(0);
+ return 0;
}
=== modified file 'sql/item_subselect.h'
--- a/sql/item_subselect.h 2010-02-01 12:09:48 +0000
+++ b/sql/item_subselect.h 2010-02-12 14:33:43 +0000
@@ -610,8 +610,10 @@ public:
virtual void print (String *str, enum_query_type query_type);
bool change_result(Item_subselect *si, select_result_interceptor *result);
bool no_tables();
+ int index_lookup();
int scan_table();
bool copy_ref_key();
+ int copy_ref_key_simple();
bool no_rows() { return empty_result_set; }
virtual enum_engine_type engine_type() { return UNIQUESUBQUERY_ENGINE; }
};
@@ -678,6 +680,34 @@ inline bool Item_subselect::is_uncacheab
return engine->uncacheable();
}
+/*
+ Distinguish the type od (0-based) row numbers from the type of the index into
+ an array of row numbers.
+*/
+typedef ha_rows rownum_t;
+
+
+/*
+ An Ordered_key is an in-memory table index that allows O(log(N)) time
+ lookups of a multi-part key.
+
+ If the index is over a single column, then this column may contain NULLs, and
+ the NULLs are stored and tested separately for NULL in O(1) via is_null().
+ Multi-part indexes assume that the indexed columns do not contain NULLs.
+
+ TODO:
+ = Due to the unnatural assymetry between single and multi-part indexes, it
+ makes sense to somehow refactor or extend the class.
+
+ = This class can be refactored into a base abstract interface, and two
+ subclasses:
+ - one to represent single-column indexes, and
+ - another to represent multi-column indexes.
+ Such separation would allow slightly more efficient implementation of
+ the single-column indexes.
+ = The current design requires such indexes to be fully recreated for each
+ PS (re)execution, however most of the comprising objects can be reused.
+*/
class Ordered_key
{
@@ -701,11 +731,12 @@ protected:
/* Value index related members. */
/*
The actual value index, consists of a sorted sequence of row numbers.
- There are tbl->file->stats.records elements in this array.
*/
- ha_rows *row_index;
- /* Current element in 'row_index'. */
- ha_rows cur_row;
+ rownum_t *key_buff;
+ /* Number of elements in key_buff. */
+ ha_rows key_buff_elements;
+ /* Current element in 'key_buff'. */
+ ha_rows cur_key_idx;
/*
Mapping from row numbers to row ids. The element row_num_to_rowid[i]
contains a buffer with the rowid for the row numbered 'i'.
@@ -734,15 +765,21 @@ protected:
Quick sort comparison function that compares two rows of the same table
indentfied with their row numbers.
*/
- static int cmp_rows_by_rownum(Ordered_key *key, ha_rows* a, ha_rows* b);
+ int cmp_keys_by_row_data(rownum_t a, rownum_t b);
+ static int cmp_keys_by_row_data_and_rownum(Ordered_key *key,
+ rownum_t* a, rownum_t* b);
- int compare_row_with_key(ha_rows row_num);
+ int cmp_key_with_search_key(rownum_t row_num);
public:
+ static void *operator new(size_t size) throw ()
+ { return sql_alloc(size); }
Ordered_key(uint key_idx_arg, TABLE *tbl_arg,
Item *search_key_arg, ha_rows null_count_arg,
ha_rows min_null_row_arg, ha_rows max_null_row_arg,
uchar *row_num_to_rowid_arg);
+ ~Ordered_key();
+ void cleanup();
/* Initialize a multi-column index. */
bool init(MY_BITMAP *columns_to_index);
/* Initialize a single-column index. */
@@ -750,10 +787,21 @@ public:
uint get_column_count() { return key_column_count; }
uint get_key_idx() { return key_idx; }
- void add_key(ha_rows row_num)
+ uint get_field_idx(uint i)
+ {
+ DBUG_ASSERT(i < key_column_count);
+ return key_columns[i]->field->field_index;
+ }
+ Item *get_search_key(uint i)
{
- row_index[cur_row]= row_num;
- ++cur_row;
+ return search_key->element_index(key_columns[i]->field->field_index);
+ }
+ void add_key(rownum_t row_num)
+ {
+ /* The caller must know how many elements to add. */
+ DBUG_ASSERT(key_buff_elements && cur_key_idx < key_buff_elements);
+ key_buff[cur_key_idx]= row_num;
+ ++cur_key_idx;
}
void sort_keys();
@@ -766,28 +814,38 @@ public:
this->search_key.
*/
bool lookup();
- /* Return the current index element. */
- ha_rows current() { return row_index[cur_row]; }
- /* Move the current index cursor at the next match. */
+ /* Move the current index cursor to the first key. */
+ void first()
+ {
+ DBUG_ASSERT(key_buff_elements);
+ cur_key_idx= 0;
+ }
+ /* TODO */
+ bool next_same();
+ /* Move the current index cursor to the next key. */
bool next()
{
- if (cur_row < tbl->file->stats.records)
+ DBUG_ASSERT(key_buff_elements);
+ if (cur_key_idx < key_buff_elements - 1)
{
- ++cur_row;
+ ++cur_key_idx;
return TRUE;
}
return FALSE;
};
- /* Return false if all matches are exhausted, true otherwise. */
- bool is_eof() { return cur_row == tbl->file->stats.records; }
+ /* Return the current index element. */
+ rownum_t current()
+ {
+ DBUG_ASSERT(key_buff_elements && cur_key_idx < key_buff_elements);
+ return key_buff[cur_key_idx];
+ }
- void set_null(ha_rows row_num)
+ void set_null(rownum_t row_num)
{
bitmap_set_bit(&null_key, row_num);
}
- bool is_null(ha_rows row_num)
+ bool is_null(rownum_t row_num)
{
- DBUG_ENTER("Ordered_key::is_null");
/*
Indexes consisting of only NULLs do not have a bitmap buffer at all.
Their only initialized member is 'n_bits', which is equal to the number
@@ -796,11 +854,11 @@ public:
if (null_count == tbl->file->stats.records)
{
DBUG_ASSERT(tbl->file->stats.records == null_key.n_bits);
- DBUG_RETURN(TRUE);
+ return TRUE;
}
if (row_num > max_null_row || row_num < min_null_row)
- DBUG_RETURN(FALSE);
- DBUG_RETURN(bitmap_is_set(&null_key, row_num));
+ return FALSE;
+ return bitmap_is_set(&null_key, row_num);
}
};
@@ -815,18 +873,28 @@ protected:
TRUE, then subselect_rowid_merge_engine further distinguishes between
FALSE and UNKNOWN.
*/
- subselect_engine *lookup_engine;
+ subselect_uniquesubquery_engine *lookup_engine;
/*
- Mapping from row numbers to row ids. The element row_num_to_rowid[i]
- contains a buffer with the rowid for the row numbered 'i'.
+ Mapping from row numbers to row ids. The rowids are stored sequentially
+ in the array - rowid[i] is located in row_num_to_rowid + i * rowid_length.
*/
uchar *row_num_to_rowid;
/*
A subset of all the keys for which there is a match for the same row.
- Used during execution. Computed for each call to exec().
+ Used during execution. Computed for each outer reference
*/
MY_BITMAP matching_keys;
/*
+ The columns of the outer reference that are NULL. Computed for each
+ outer reference.
+ */
+ MY_BITMAP matching_outer_cols;
+ /*
+ Columns that consist of only NULLs. Such columns match any value.
+ Computed once per query execution.
+ */
+ MY_BITMAP null_only_columns;
+ /*
Indexes of row numbers, sorted by <column_value, row_number>. If an
index may contain NULLs, the NULLs are stored efficiently in a bitmap.
@@ -849,44 +917,59 @@ protected:
This queue is used by the partial match algorithm in method exec().
*/
QUEUE pq;
+ /* True if there is a NULL (sub)row that covers all NULLable columns. */
+ bool has_covering_null_row;
protected:
/*
Comparison function to compare keys in order of increasing bitmap
selectivity.
*/
- static int cmp_key_by_null_selectivity(Ordered_key *a, Ordered_key *b);
+ static int cmp_keys_by_null_selectivity(Ordered_key *a, Ordered_key *b);
/*
Comparison function used by the priority queue pq, the 'smaller' key
is the one with the smaller current row number.
*/
- static int cmp_key_by_cur_row(void *arg, uchar *k1, uchar *k2);
+ static int cmp_keys_by_cur_rownum(void *arg, uchar *k1, uchar *k2);
- bool test_null_row(ha_rows row_num);
+ bool test_null_row(rownum_t row_num);
bool partial_match();
public:
- subselect_rowid_merge_engine(subselect_engine *lookup_engine_arg,
+ subselect_rowid_merge_engine(subselect_uniquesubquery_engine *engine_arg,
TABLE *tmp_table_arg, uint keys_count_arg,
+ uint has_covering_null_row_arg,
Item_subselect *item_arg,
select_result_interceptor *result_arg)
:subselect_engine(item_arg, result_arg),
- tmp_table(tmp_table_arg), lookup_engine(lookup_engine_arg),
- keys_count(keys_count_arg)
- {}
-
+ tmp_table(tmp_table_arg), lookup_engine(engine_arg),
+ keys_count(keys_count_arg), non_null_key(NULL),
+ has_covering_null_row(has_covering_null_row_arg)
+ {
+ thd= lookup_engine->get_thd();
+ }
+ ~subselect_rowid_merge_engine();
bool init(MY_BITMAP *non_null_key_parts, MY_BITMAP *partial_match_key_parts);
void cleanup();
int prepare() { return 0; }
void fix_length_and_dec(Item_cache**) {}
int exec();
- uint cols() { return 0; }
+ uint cols() { /* TODO: what is the correct value? */ return 1; }
uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; }
void exclude() {}
table_map upper_select_const_tables() { return 0; }
void print(String*, enum_query_type) {}
bool change_result(Item_subselect*, select_result_interceptor*)
- { return false; }
+ { DBUG_ASSERT(FALSE); return false; }
bool no_tables() { return false; }
- bool no_rows() {return false; }
+ bool no_rows()
+ {
+ /*
+ TODO: It is completely unclear what is the semantics of this
+ method. The current result is computed so that the call to no_rows()
+ from Item_in_optimizer::val_int() sets Item_in_optimizer::null_value
+ correctly.
+ */
+ return !(((Item_in_subselect *) item)->null_value);
+ }
};
@@ -933,6 +1016,7 @@ protected:
/* Keyparts of the single column indexes with NULL, one keypart per index. */
MY_BITMAP partial_match_key_parts;
uint count_partial_match_columns;
+ uint count_null_only_columns;
/*
A conjunction of all the equality condtions between all pairs of expressions
that are arguments of an IN predicate. We need these to post-filter some
@@ -962,7 +1046,7 @@ public:
:subselect_engine(in_predicate, NULL), tmp_table(NULL),
is_materialized(FALSE), materialize_engine(old_engine), lookup_engine(NULL),
materialize_join(NULL), count_partial_match_columns(0),
- semi_join_conds(NULL)
+ count_null_only_columns(0), semi_join_conds(NULL)
{
set_thd(thd);
}
=== modified file 'sql/sql_class.cc'
--- a/sql/sql_class.cc 2010-01-22 16:18:05 +0000
+++ b/sql/sql_class.cc 2010-02-12 14:33:43 +0000
@@ -2931,12 +2931,13 @@ create_result_table(THD *thd_arg, List<I
options, HA_POS_ERROR, (char*) table_alias)))
return TRUE;
- /* TODO: if/where/when to free this buffer? */
- col_stat= (Column_statistics*) table->in_use->calloc(table->s->fields *
- sizeof(Column_statistics));
+ col_stat= (Column_statistics*) table->in_use->alloc(table->s->fields *
+ sizeof(Column_statistics));
if (!stat)
return TRUE;
+ cleanup();
+
table->file->extra(HA_EXTRA_WRITE_CACHE);
table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
return FALSE;
@@ -2966,14 +2967,14 @@ bool select_materialize_with_stats::send
{
++cur_col_stat->null_count;
cur_col_stat->max_null_row= count_rows;
- if (cur_col_stat->min_null_row == 0)
+ if (!cur_col_stat->min_null_row)
cur_col_stat->min_null_row= count_rows;
++nulls_in_row;
}
++cur_col_stat;
}
- if (nulls_in_row == items.elements)
- ++null_record_count;
+ if (nulls_in_row > max_nulls_in_row)
+ max_nulls_in_row= nulls_in_row;
return select_union::send_data(items);
}
=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h 2010-02-01 12:09:48 +0000
+++ b/sql/sql_class.h 2010-02-12 14:33:43 +0000
@@ -3044,17 +3044,20 @@ protected:
public:
/* Count of NULLs per column. */
ha_rows null_count;
- /* The row number that contains the last NULL in a column. */
- ha_rows max_null_row;
/* The row number that contains the first NULL in a column. */
ha_rows min_null_row;
+ /* The row number that contains the last NULL in a column. */
+ ha_rows max_null_row;
};
/* Array of statistics data per column. */
Column_statistics* col_stat;
- /* The number of rows that consist only of NULL values. */
- ha_rows null_record_count;
+ /*
+ The number of columns in the biggest sub-row that consists of only
+ NULL values.
+ */
+ ha_rows max_nulls_in_row;
/*
Count of rows writtent to the temp table. This is redundant as it is
already stored in handler::stats.records, however that one is relatively
@@ -3063,11 +3066,7 @@ protected:
ha_rows count_rows;
public:
- select_materialize_with_stats()
- {
- null_record_count= 0;
- count_rows= 0;
- }
+ select_materialize_with_stats() {}
virtual bool create_result_table(THD *thd, List<Item> *column_types,
bool is_distinct, ulonglong options,
const char *alias, bool bit_fields_as_long);
@@ -3075,9 +3074,9 @@ public:
bool send_data(List<Item> &items);
void cleanup()
{
- null_record_count= 0;
- count_rows= 0;
memset(col_stat, 0, table->s->fields * sizeof(Column_statistics));
+ max_nulls_in_row= 0;
+ count_rows= 0;
}
ha_rows get_null_count_of_col(uint idx)
{
@@ -3094,7 +3093,7 @@ public:
DBUG_ASSERT(idx < table->s->fields);
return col_stat[idx].min_null_row;
}
- ha_rows get_null_record_count() { return null_record_count; }
+ ha_rows get_max_nulls_in_row() { return max_nulls_in_row; }
};
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-01-22 16:18:05 +0000
+++ b/sql/sql_select.cc 2010-02-12 14:33:43 +0000
@@ -707,7 +707,7 @@ JOIN::prepare(Item ***rref_pointer_array
subquery_types_allow_materialization(in_subs))
{
// psergey-todo: duplicated_subselect_card_check: where it's done?
- if (in_subs->is_top_level_item() && // 4
+ if (//in_subs->is_top_level_item() && // 4
!in_subs->is_correlated && // 5
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6
in_subs->exec_method= Item_in_subselect::MATERIALIZATION;