#At file:///home/tsk/mprog/src/mysql-6.0-mwl68/ based on revid:timour@sun.com-20100122161805-8lgrisqabrlvc3nc 3769 timour@askmonty.org 2010-02-01 MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs Completed main coding of partial matching. The code compiles, but cannot run. Changes compared to the previous commit: - Completed creation and initialization of all objects needed for partial matching. - Adjusted the interfaces of multiple methods in order to pass the correct information needed for creation/initialization. - Added comparion functions needed for binary search and index sorting. - Fixed binary search in the value index. - Exposed the Arg_comparator of comparison predicates. @ sql/item_cmpfunc.h Expose the Arg_comparator of a comparison predicate so that it is possible to get the comparison result. @ sql/item_subselect.cc - Completed creation and initialization of all objects needed for partial matching. - Adjusted the interfaces of multiple methods in order to pass the correct information needed for creation/initialization. - Added comparion functions needed for binary search and index sorting. - Fixed binary search in the value index. @ sql/item_subselect.h Completed creation and initialization of all objects needed for partial matching. @ sql/sql_class.h - added accessors for NULL statistics modified: sql/item_cmpfunc.h sql/item_subselect.cc sql/item_subselect.h sql/sql_class.h === modified file 'sql/item_cmpfunc.h' --- a/sql/item_cmpfunc.h 2009-12-04 07:48:05 +0000 +++ b/sql/item_cmpfunc.h 2010-02-01 12:09:48 +0000 @@ -355,6 +355,7 @@ public: CHARSET_INFO *compare_collation() { return cmp.cmp_collation.collation; } uint decimal_precision() const { return 1; } void top_level_item() { abort_on_null= TRUE; } + Arg_comparator *get_comparator() { return &cmp; } friend class Arg_comparator; }; === modified file 'sql/item_subselect.cc' --- a/sql/item_subselect.cc 2010-01-22 16:18:05 +0000 +++ b/sql/item_subselect.cc 2010-02-01 12:09:48 +0000 @@ -3102,32 +3102,21 @@ void subselect_hash_sj_engine::set_strat outer_col= item_in->left_expr->element_index(i); inner_col= inner_col_it++; - if (!inner_col->maybe_null) - { - if (!outer_col->maybe_null) - { - non_null_outer_cols.push_back(outer_col); - non_null_key_parts |= 1 << i; - } - else - { - non_null_late_key_parts |= 1 << i; - ++count_non_null_late_cols; - } - } + if (!inner_col->maybe_null && !outer_col->maybe_null) + bitmap_set_bit(&non_null_key_parts, i); else { - partial_match_key_parts |= 1 << i; - ++count_partial_match_cols; + bitmap_set_bit(&partial_match_key_parts, i); + ++count_partial_match_columns; } } } /* If no column contains NULLs use regular hash index lookups. */ - if (!(non_null_late_key_parts || partial_match_key_parts)) - strategy= COMPLETE_MATCH; - else + if (count_partial_match_columns) strategy= PARTIAL_MATCH; + else + strategy= COMPLETE_MATCH; DBUG_VOID_RETURN; } @@ -3145,7 +3134,6 @@ void subselect_hash_sj_engine::set_strat void subselect_hash_sj_engine::set_strategy_using_data() { Item_in_subselect *item_in= (Item_in_subselect *) item; - key_part_map cur_col= 0; select_materialize_with_stats *result_sink= (select_materialize_with_stats *) result; @@ -3154,77 +3142,45 @@ void subselect_hash_sj_engine::set_strat /* Call this procedure only if already selected partial matching. */ DBUG_ASSERT(strategy == PARTIAL_MATCH); - /* - TODO: uncomment after enabling index creation after materialization. - List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types()); - Item *inner_col, *outer_col; - */ - for (uint i= 0; i < item_in->left_expr->cols(); i++) { - /* - TODO: uncomment after enabling index creation after materialization. - outer_col= item_in->left_expr->element_index(i); - inner_col= inner_col_it++; - */ - cur_col= 1 << i; - - if (!(cur_col & partial_match_key_parts)) + if (!bitmap_is_set(&partial_match_key_parts, i)) continue; - if (result_sink->get_column_null_count(i) == - tmp_table->file->stats.records) + if (result_sink->get_null_count_of_col(i) == 0) { - /* Column i of the temp table consists only of NULLs. */ - --count_partial_match_cols; - inner_partial_match= TRUE; - partial_match_key_parts &= ~cur_col; /* unset bit 'i' */ - } - else if (result_sink->get_column_null_count(i) == 0) - { - --count_partial_match_cols; - partial_match_key_parts &= ~cur_col; - /* - TODO - Column i of the temp table doesn't contain any NULLs. Currently we - cannot create/alter an index on an already populated internal - temporary table. As a result even if we detect that a column should - belong to the NON_NULL index, it is too late to alter that index. The - only thing we can do is change it from PARTIAL_MATCH to NON_NULL_LATE, - thus removing the "OR NULL" predicate during lookup. Once this - limitation is removed, use the commented code below instead of the - following two lines. - */ - ++count_non_null_late_cols; - non_null_late_key_parts |= cur_col; - /* - if (!inner_col->maybe_null) - { - non_null_key_parts |= cur_col; - non_null_outer_cols.push_back(outer_col); - } - else - { - ++count_non_null_late_cols; - non_null_late_key_parts |= cur_col; - } - */ + bitmap_clear_bit(&partial_match_key_parts, i); + bitmap_set_bit(&non_null_key_parts, i); + --count_partial_match_columns; } } - /* - if (non_null_outer_cols.elements > max number of key parts) - DBUG_RETURN(TRUE); - */ - /* If no column contains NULLs use regular hash index lookups. */ - if (!(non_null_late_key_parts || partial_match_key_parts)) + if (!count_partial_match_columns) strategy= COMPLETE_MATCH; DBUG_VOID_RETURN; } +/* + Initialize a MY_BITMAP with a buffer allocated on the current + memory root. +*/ + +static my_bool +bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root) +{ + my_bitmap_map *bitmap_buf; + + if (!(bitmap_buf= (my_bitmap_map*) alloc_root(mem_root, + bitmap_buffer_size(n_bits))) || + bitmap_init(map, bitmap_buf, n_bits, FALSE)) + return TRUE; + return FALSE; +} + + /** Create all structures needed for IN execution that can live between PS reexecution. @@ -3253,6 +3209,11 @@ bool subselect_hash_sj_engine::init_perm DBUG_ENTER("subselect_hash_sj_engine::init_permanent"); + if (!(bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements, + thd->mem_root)) || + !(bitmap_init_memroot(&partial_match_key_parts, tmp_columns->elements, + thd->mem_root))) + DBUG_RETURN(TRUE); set_strategy_using_schema(); /* @@ -3261,6 +3222,10 @@ bool subselect_hash_sj_engine::init_perm managed (created/filled/etc) internally by the interceptor. */ /* + TODO: + Select a more efficient result sink when we know there is no need to collect + data statistics. + if (strategy == COMPLETE_MATCH) { if (!(result= new select_union)) @@ -3314,13 +3279,6 @@ bool subselect_hash_sj_engine::init_perm if (make_semi_join_conds()) DBUG_RETURN(TRUE); - /* - A complete match is the best we can get, so we can immediately - create the enginte to be used for lookup. - */ - if (strategy == COMPLETE_MATCH && - !(lookup_engine= make_unique_engine())) - DBUG_RETURN(TRUE); DBUG_RETURN(FALSE); } @@ -3582,7 +3540,7 @@ int subselect_hash_sj_engine::exec() set_strategy_using_data(); /* A unique_engine is used both for complete and partial matching. */ - if (!lookup_engine && !(lookup_engine= make_unique_engine())) + if (!(lookup_engine= make_unique_engine())) { res= 1; goto err; @@ -3590,12 +3548,33 @@ int subselect_hash_sj_engine::exec() if (strategy == PARTIAL_MATCH) { - if (!(lookup_engine= new subselect_rowid_merge_engine(lookup_engine, - tmp_table))) + subselect_rowid_merge_engine *new_lookup_engine; + uint count_pm_keys; + MY_BITMAP *nn_key_parts; + /* Total number of keys needed for partial matching. */ + if (count_partial_match_columns < tmp_table->s->fields) { - res= 1; - goto err; + count_pm_keys= count_partial_match_columns + 1; + nn_key_parts= &non_null_key_parts; } + else + { + count_pm_keys= count_partial_match_columns; + nn_key_parts= NULL; + } + + if (!(new_lookup_engine= + new subselect_rowid_merge_engine(lookup_engine, + tmp_table, + count_pm_keys, + item, result)) || + new_lookup_engine->init(nn_key_parts, &partial_match_key_parts)) + { + delete new_lookup_engine; + strategy= PARTIAL_MATCH_SCAN; + /* TODO: setup execution structures for partial match via scanning. */ + } + strategy= PARTIAL_MATCH_INDEX; } item_in->change_engine(lookup_engine); @@ -3648,37 +3627,299 @@ bool subselect_hash_sj_engine::change_re } -bool Ordered_key::sort_keys() +Ordered_key::Ordered_key(uint key_idx_arg, TABLE *tbl_arg, + Item *search_key_arg, ha_rows null_count_arg, + ha_rows min_null_row_arg, ha_rows max_null_row_arg, + uchar *row_num_to_rowid_arg) + : key_idx(key_idx_arg), tbl(tbl_arg), search_key(search_key_arg), + row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg), + min_null_row(min_null_row_arg), max_null_row(max_null_row_arg) { - return TRUE; + key_column_count= search_key->cols(); + cur_row= HA_POS_ERROR; +} + + +/* + Initialize a multi-column index. +*/ + +bool Ordered_key::init(MY_BITMAP *columns_to_index) +{ + THD *thd= tbl->in_use; + uint cur_key_col= 0; + + DBUG_ENTER("Ordered_key::init"); + + DBUG_ASSERT(key_column_count == bitmap_bits_set(columns_to_index)); + + // TODO: check for mem allocation err, revert to scan + + key_columns= (Item_field**) thd->alloc(key_column_count * + sizeof(Item_field*)); + compare_pred= (Item_func_lt**) thd->alloc(key_column_count * + sizeof(Item_func_lt*)); + + for (uint i= 0; i < columns_to_index->n_bits; i++, cur_key_col++) + { + if (!bitmap_is_set(columns_to_index, i)) + continue; + key_columns[cur_key_col]= new Item_field(tbl->field[i]); + /* Create the predicate (tmp_column[i] < outer_ref[i]). */ + compare_pred[cur_key_col]= new Item_func_lt(key_columns[cur_key_col], + search_key->element_index(i)); + } + + if (alloc_keys_buffers()) + { + /* TODO revert to partial match via table scan. */ + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +/* + Initialize a single-column index. +*/ + +bool Ordered_key::init(int col_idx) +{ + THD *thd= tbl->in_use; + + DBUG_ENTER("Ordered_key::init"); + + DBUG_ASSERT(key_column_count == 1); + + // TODO: check for mem allocation err, revert to scan + + key_columns= (Item_field**) thd->alloc(sizeof(Item_field*)); + compare_pred= (Item_func_lt**) thd->alloc(sizeof(Item_func_lt*)); + + key_columns[0]= new Item_field(tbl->field[col_idx]); + /* Create the predicate (tmp_column[i] < outer_ref[i]). */ + compare_pred[0]= new Item_func_lt(key_columns[0], + search_key->element_index(col_idx)); + + if (alloc_keys_buffers()) + { + /* TODO revert to partial match via table scan. */ + DBUG_RETURN(TRUE); + } + DBUG_RETURN(FALSE); +} + + +bool Ordered_key::alloc_keys_buffers() +{ + THD *thd= tbl->in_use; + ha_rows row_count= tbl->file->stats.records; + + if (!(row_index= (ha_rows*) thd->alloc((row_count - null_count) * + sizeof(ha_rows)))) + return TRUE; + + /* + TODO: it is enough to create bitmaps with size + (max_null_row - min_null_row), and then use min_null_row as + lookup offset. + */ + if (!(bitmap_init_memroot(&null_key, max_null_row, + thd->mem_root))) + return TRUE; + + return FALSE; +} + + +/* + Quick sort comparison function that compares two rows of the same table + indentfied with their row numbers. +*/ + +int Ordered_key::cmp_rows_by_rownum(Ordered_key *key, ha_rows *a, ha_rows *b) +{ + uchar *rowid_a, *rowid_b; + int error, cmp_res; + TABLE *tbl= key->tbl; + /* The length in bytes of the rowids (positions) of tmp_table. */ + uint rowid_length= tbl->file->ref_length; + + DBUG_ENTER("Ordered_key::cmp_rows_by_rownum"); + if (a == b) + DBUG_RETURN(0); + /* Get the corresponding rowids. */ + rowid_a= key->row_num_to_rowid + (*a) * rowid_length; + rowid_b= key->row_num_to_rowid + (*b) * rowid_length; + /* Fetch the rows for comparison. */ + error= tbl->file->rnd_pos(tbl->record[0], rowid_a); + DBUG_ASSERT(!error); + error= tbl->file->rnd_pos(tbl->record[1], rowid_b); + DBUG_ASSERT(!error); + /* Compare the two rows. */ + for (Field **f_ptr= tbl->field; *f_ptr; f_ptr++) + { + if ((cmp_res= (*f_ptr)->cmp_offset(tbl->s->rec_buff_length))) + DBUG_RETURN(cmp_res); + } + DBUG_RETURN(0); +} + + +void Ordered_key::sort_keys() +{ + my_qsort(row_index, tbl->file->stats.records, sizeof(ha_rows), + (qsort_cmp) &cmp_rows_by_rownum); +} + + +/* + Compare the value(s) of the current key in 'search_key' with the + data of the current table record accessible via 'key_columns'. + + @notes The comparison result follows from the way compare_pred + is created in Ordered_key::init. Currently compare_pred compares + a field in of the current row with the corresponding Item that + contains the search key. + + @retval -1 if (current row < search_key) + @retval 0 if (current row == search_key) + @retval +1 if (current row > search_key) +*/ + +int Ordered_key::compare_row_with_key(ha_rows row_num) +{ + /* The length in bytes of the rowids (positions) of tmp_table. */ + uint rowid_length= tbl->file->ref_length; + uchar *cur_rowid= row_num_to_rowid + row_num * rowid_length; + int error, cmp_res; + + DBUG_ENTER("Ordered_key::compare"); + error= tbl->file->rnd_pos(tbl->record[0], cur_rowid); + DBUG_ASSERT(!error); + + for (uint i= 0; i < key_column_count; i++) + { + cmp_res= compare_pred[i]->get_comparator()->compare(); + /* Unlike Arg_comparator::compare_row() here there should be no NULLs. */ + DBUG_ASSERT(!compare_pred[i]->null_value); + if (cmp_res) + DBUG_RETURN(cmp_res); + } + DBUG_RETURN(0); } /* + Find a key in a sorted array of keys via binary search. + see create_subq_in_equalities() */ -bool Ordered_key::lookup(Item *search_key) +bool Ordered_key::lookup() { DBUG_ENTER("Ordered_key::lookup"); - DBUG_ASSERT(search_key->cols() == key_column_count); - for (uint i= 0; i < key_column_count; i++) + ha_rows lo= 0; + ha_rows hi= tbl->file->stats.records - 1; + ha_rows mid; + int cmp_res; + + while (lo <= hi) { - // j = corresponding colum at pos i - // compare(search_key->element_index(i), key_columns(j)) - ; + mid= lo + (hi - lo) / 2; + cmp_res= compare_row_with_key(mid); + + if (cmp_res == -1) + { + /* row[mid] < search_key */ + lo= mid + 1; + } + else if (cmp_res == 1) + { + /* row[mid] > search_key */ + hi= mid - 1; + } + else + { + /* row[mid] == search_key */ + cur_row= mid; + DBUG_RETURN(TRUE); + } } - DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); } /* + @param non_null_key_parts + @param partial_match_key_parts A union of all single-column NULL key parts. + @param count_partial_match_columns Number of NULL keyparts (set bits above). */ -bool subselect_rowid_merge_engine::init() +bool +subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, + MY_BITMAP *partial_match_key_parts) { - // TODO + /* The length in bytes of the rowids (positions) of tmp_table. */ + uint rowid_length= tmp_table->file->ref_length; + ha_rows row_count= tmp_table->file->stats.records; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + uint cur_key= 0; + + if (!(row_num_to_rowid= (uchar*) thd->alloc(row_count * rowid_length * + sizeof(uchar)))) + return TRUE; + + if (!(bitmap_init_memroot(&matching_keys, keys_count, thd->mem_root))) + return TRUE; + + merge_keys= (Ordered_key**) thd->alloc(keys_count * sizeof(Ordered_key*)); + /* Create the only non-NULL key if there is any. */ + if (non_null_key_parts) + { + non_null_key= new Ordered_key(cur_key, tmp_table, item, 0, 0, 0, + row_num_to_rowid); + if (non_null_key->init(non_null_key_parts)) + { + // TODO: revert to partial matching via scanning + return TRUE; + } + merge_keys[cur_key]= non_null_key; + non_null_key->sort_keys(); + ++cur_key; + } + /* + Create one single-column NULL-key for each column in + partial_match_key_parts. + */ + for (uint i= 0; i < partial_match_key_parts->n_bits; i++, cur_key++) + { + if (!bitmap_is_set(partial_match_key_parts, i)) + continue; + + merge_keys[cur_key]= new Ordered_key(cur_key, tmp_table, item, + result_sink->get_null_count_of_col(i), + result_sink->get_min_null_of_col(i), + result_sink->get_max_null_of_col(i), + row_num_to_rowid); + if (merge_keys[cur_key]->init(i)) + { + // TODO: revert to partial matching via scanning + return TRUE; + } + merge_keys[cur_key]->sort_keys(); + } + + if (init_queue(&pq, keys_count, 0, FALSE, + subselect_rowid_merge_engine::cmp_key_by_cur_row, NULL)) + { + // TODO: revert to partial matching via scanning + return TRUE; + } + return FALSE; } @@ -3690,6 +3931,41 @@ void subselect_rowid_merge_engine::clean /* +*/ + +int +subselect_rowid_merge_engine::cmp_key_by_null_selectivity(Ordered_key *a, + Ordered_key *b) +{ + double a_sel= a->null_selectivity(); + double b_sel= b->null_selectivity(); + if (a_sel == b_sel) + return 0; + if (a_sel > b_sel) + return 1; + return -1; +} + + +/* +*/ + +int +subselect_rowid_merge_engine::cmp_key_by_cur_row(void *arg, + uchar *k1, uchar *k2) +{ + ha_rows row1= ((Ordered_key*) k1)->current(); + ha_rows row2= ((Ordered_key*) k2)->current(); + + if (row1 > row2) + return 1; + if (row1 == row2) + return 0; + return -1; +} + + +/* Check if certain table row contains a NULL in all columns in all columns for which there is no value match. @@ -3704,13 +3980,11 @@ void subselect_rowid_merge_engine::clean bool subselect_rowid_merge_engine::test_null_row(ha_rows row_num) { - Ordered_key *cur_key= keys; - DBUG_ENTER("subselect_rowid_merge_engine::test_null_row"); - for (uint i = 0; i < keys_count; i++, cur_key++) + for (uint i = 0; i < keys_count; i++) { - if (bitmap_is_set(matching_keys, i)) + if (bitmap_is_set(&matching_keys, i)) { /* The key 'i' already matches a value in row 'row_num', thus we @@ -3718,7 +3992,7 @@ bool subselect_rowid_merge_engine::test_ */ continue; } - if (!cur_key->is_null(row_num)) + if (!merge_keys[i]->is_null(row_num)) DBUG_RETURN(FALSE); } DBUG_RETURN(TRUE); @@ -3736,14 +4010,13 @@ bool subselect_rowid_merge_engine::parti ha_rows min_row; /* Current row number of min_key. */ Ordered_key *cur_key; ha_rows cur_row; - Item_in_subselect *item_in= (Item_in_subselect *) item; DBUG_ENTER("subselect_rowid_merge_engine::partial_match"); /* If there is a non-NULL key, it must be the first key in the keys array. */ - DBUG_ASSERT(non_null_key && keys == non_null_key); + DBUG_ASSERT(non_null_key && merge_keys[0] == non_null_key); /* Check if there is a match for the columns of the only non-NULL key. */ - if (non_null_key && !non_null_key->lookup(item_in->left_expr)) + if (non_null_key && !non_null_key->lookup()) DBUG_RETURN(FALSE); if (non_null_key) queue_insert(&pq, (uchar *) non_null_key); @@ -3753,10 +4026,10 @@ bool subselect_rowid_merge_engine::parti non_null_key, since it was already processed above. */ uint i= non_null_key ? 1 : 0; /* Skip the non-NULL key, already processed. */ - for (cur_key= keys; i < keys_count; i++, cur_key++) + for (; i < keys_count; i++) { - if (cur_key->lookup(item_in->left_expr)) - queue_insert(&pq, (uchar *) cur_key); + if (merge_keys[i]->lookup()) + queue_insert(&pq, (uchar *) merge_keys[i]); } /* Not all value keys are empty, thus we don't have only NULL keys. If we had, @@ -3767,8 +4040,8 @@ bool subselect_rowid_merge_engine::parti DBUG_ASSERT(pq.elements > 1); min_key= (Ordered_key*) queue_remove(&pq, 0); min_row= min_key->current(); - bitmap_clear_all(matching_keys); - bitmap_set_bit(matching_keys, min_key->get_key_idx()); + bitmap_clear_all(&matching_keys); + bitmap_set_bit(&matching_keys, min_key->get_key_idx()); min_key->next(); if (!min_key->is_eof()) queue_insert(&pq, (uchar *) min_key); @@ -3780,9 +4053,9 @@ bool subselect_rowid_merge_engine::parti if (cur_row == min_row) { - bitmap_set_bit(matching_keys, cur_key->get_key_idx()); + bitmap_set_bit(&matching_keys, cur_key->get_key_idx()); /* There cannot be a complete match, as we already checked for one. */ - DBUG_ASSERT(bitmap_bits_set(matching_keys) < matching_keys->n_bits); + DBUG_ASSERT(bitmap_bits_set(&matching_keys) < matching_keys.n_bits); } else { @@ -3794,8 +4067,8 @@ bool subselect_rowid_merge_engine::parti { min_key= cur_key; min_row= cur_row; - bitmap_clear_all(matching_keys); - bitmap_set_bit(matching_keys, min_key->get_key_idx()); + bitmap_clear_all(&matching_keys); + bitmap_set_bit(&matching_keys, min_key->get_key_idx()); } } === modified file 'sql/item_subselect.h' --- a/sql/item_subselect.h 2010-01-22 16:18:05 +0000 +++ b/sql/item_subselect.h 2010-02-01 12:09:48 +0000 @@ -683,68 +683,70 @@ class Ordered_key { protected: /* - Index of the key in some array of keys. This index allows to + Index of the key in an array of keys. This index allows to construct (sub)sets of keys represented by bitmaps. */ uint key_idx; + /* The table being indexed. */ + TABLE *tbl; /* The columns being indexed. */ Item_field **key_columns; /* Number of elements in 'key_columns' (number of key parts). */ uint key_column_count; + /* + An expression, or sequence of expressions that forms the search key. + */ + Item *search_key; /* Value index related members. */ - /* The actual value index, consists of a sorted sequence of row numbers. */ + /* + The actual value index, consists of a sorted sequence of row numbers. + There are tbl->file->stats.records elements in this array. + */ ha_rows *row_index; /* Current element in 'row_index'. */ ha_rows cur_row; /* - TODO: define a quick sort comparison function. + Mapping from row numbers to row ids. The element row_num_to_rowid[i] + contains a buffer with the rowid for the row numbered 'i'. + The memory for this member is not maintanined by this class because + all Ordered_key indexes of the same table share the same mapping. + */ + uchar *row_num_to_rowid; + /* + A sequence of predicates to compare the search key with the corresponding + columns of a table row from the index. */ + Item_func_lt **compare_pred; /* Null index related members. */ MY_BITMAP null_key; /* Count of NULLs per column. */ ha_rows null_count; - /* The row number that contains the last NULL in a column. */ - ha_rows max_null_row; /* The row number that contains the first NULL in a column. */ ha_rows min_null_row; - /* - TODO: define a qsort comparison function to compare keys in order of - increasing bitmap selectivity. - */ + /* The row number that contains the last NULL in a column. */ + ha_rows max_null_row; protected: - ha_rows get_row_count() - { - /* Assume that file->info(HA_STATUS_VARIABLE) has been called. */ - return key_columns[0]->field->table->file->stats.records; - } + bool alloc_keys_buffers(); /* - Compute the index (position) of an indexed column in the table definition. - - @param i index in the 'key_columns' array. - - @returns The index of the corresponding indexed column in the TABLE::field - array of all table fields. + Quick sort comparison function that compares two rows of the same table + indentfied with their row numbers. */ - uint get_column_idx(uint i) - { - DBUG_ENTER("get_column_idx"); - DBUG_ASSERT(i < key_column_count); - /* All key_columns must be from the same table, so any one is fine. */ - //TABLE *tab= key_columns[0]->field->table; - //Field *col_i= columns->field + i; - //DBUG_RETURN(col_i - tab->field); - DBUG_RETURN(0); - } + static int cmp_rows_by_rownum(Ordered_key *key, ha_rows* a, ha_rows* b); + + int compare_row_with_key(ha_rows row_num); public: - Ordered_key(TABLE *tab) - { - /* TODO: init all Item_fields from the table columns. */ - } - bool init(ha_rows row_count); + Ordered_key(uint key_idx_arg, TABLE *tbl_arg, + Item *search_key_arg, ha_rows null_count_arg, + ha_rows min_null_row_arg, ha_rows max_null_row_arg, + uchar *row_num_to_rowid_arg); + /* Initialize a multi-column index. */ + bool init(MY_BITMAP *columns_to_index); + /* Initialize a single-column index. */ + bool init(int col_idx); uint get_column_count() { return key_column_count; } uint get_key_idx() { return key_idx; } @@ -753,21 +755,23 @@ public: row_index[cur_row]= row_num; ++cur_row; } - bool sort_keys(); + + void sort_keys(); + + double null_selectivity() { return (1 - null_count / null_key.n_bits); } + /* Position the current element at the first row that matches the key. - TODO: the argument here is the left IN argument, which is a sequence - of Items. We have to compare these Items with the corresponding Fields - of the temp table. To do this wrap each field in an Item_field, then - compare. See how it is done in create_subq_in_equalities(). + The key itself is propagated by evaluating the current value(s) of + this->search_key. */ - bool lookup(Item *search_key); + bool lookup(); /* Return the current index element. */ ha_rows current() { return row_index[cur_row]; } /* Move the current index cursor at the next match. */ bool next() { - if (cur_row < get_row_count()) + if (cur_row < tbl->file->stats.records) { ++cur_row; return TRUE; @@ -775,7 +779,7 @@ public: return FALSE; }; /* Return false if all matches are exhausted, true otherwise. */ - bool is_eof() { return cur_row == get_row_count(); } + bool is_eof() { return cur_row == tbl->file->stats.records; } void set_null(ha_rows row_num) { @@ -789,9 +793,9 @@ public: Their only initialized member is 'n_bits', which is equal to the number of temp table rows. */ - if (null_count == get_row_count()) + if (null_count == tbl->file->stats.records) { - DBUG_ASSERT(get_row_count() == null_key.n_bits); + DBUG_ASSERT(tbl->file->stats.records == null_key.n_bits); DBUG_RETURN(TRUE); } if (row_num > max_null_row || row_num < min_null_row) @@ -812,20 +816,16 @@ protected: FALSE and UNKNOWN. */ subselect_engine *lookup_engine; - - /* The length in bytes of the rowids (positions) of tmp_table. */ - uint rowid_length; /* - Mapping from row numbers to row ids. The element 'i' with lenght - 'rowid_length' - (row_num_to_rowid + i*rowid_length) contains - the rowid of row numbered 'i'. + Mapping from row numbers to row ids. The element row_num_to_rowid[i] + contains a buffer with the rowid for the row numbered 'i'. */ uchar *row_num_to_rowid; /* A subset of all the keys for which there is a match for the same row. Used during execution. Computed for each call to exec(). */ - MY_BITMAP *matching_keys; + MY_BITMAP matching_keys; /* Indexes of row numbers, sorted by <column_value, row_number>. If an index may contain NULLs, the NULLs are stored efficiently in a bitmap. @@ -834,7 +834,7 @@ protected: one with the fewer NULLs is first. Thus, if there is any index on non-NULL columns, it is contained in keys[0]. */ - Ordered_key *keys; + Ordered_key **merge_keys; /* The number of elements in keys. */ uint keys_count; /* @@ -849,33 +849,31 @@ protected: This queue is used by the partial match algorithm in method exec(). */ QUEUE pq; - +protected: /* - True if some column in the temp table consist of only NULLs. Then - any match is a partial match. + Comparison function to compare keys in order of increasing bitmap + selectivity. */ - bool inner_partial_match; - bool null_keypart; /* TRUE <=> constructed search tuple has a NULL */ + static int cmp_key_by_null_selectivity(Ordered_key *a, Ordered_key *b); /* - A conjunction of all the equality condtions between all pairs of expressions - that are arguments of an IN predicate. We need these to post-filter some - IN results because index lookups sometimes match values that are actually - not equal to the search key in SQL terms. + Comparison function used by the priority queue pq, the 'smaller' key + is the one with the smaller current row number. */ - Item_cond_and *semi_join_conds; -protected: + static int cmp_key_by_cur_row(void *arg, uchar *k1, uchar *k2); + bool test_null_row(ha_rows row_num); bool partial_match(); public: subselect_rowid_merge_engine(subselect_engine *lookup_engine_arg, - TABLE *tmp_table_arg) - :subselect_engine(NULL, NULL) - { - lookup_engine= lookup_engine_arg; - tmp_table= tmp_table_arg; - rowid_length= tmp_table->file->ref_length; - } - bool init(); + TABLE *tmp_table_arg, uint keys_count_arg, + Item_subselect *item_arg, + select_result_interceptor *result_arg) + :subselect_engine(item_arg, result_arg), + tmp_table(tmp_table_arg), lookup_engine(lookup_engine_arg), + keys_count(keys_count_arg) + {} + + bool init(MY_BITMAP *non_null_key_parts, MY_BITMAP *partial_match_key_parts); void cleanup(); int prepare() { return 0; } void fix_length_and_dec(Item_cache**) {} @@ -930,15 +928,11 @@ protected: */ bool has_null_row; - /* Keyparts of the only non-NULL composite index in a ror_intersect. */ - key_part_map non_null_key_parts; - List<Item> non_null_outer_cols; /* Corresponding non-NULL outer columns. */ - /* keyparts of the non-NULL single column indexes, one keypart per index. */ - key_part_map non_null_late_key_parts; - /* keyparts of the single column indexes with NULL, one keypart per index. */ - key_part_map partial_match_key_parts; - uint count_non_null_late_cols, count_partial_match_cols; - + /* Keyparts of the only non-NULL composite index in a rowid merge. */ + MY_BITMAP non_null_key_parts; + /* Keyparts of the single column indexes with NULL, one keypart per index. */ + MY_BITMAP partial_match_key_parts; + uint count_partial_match_columns; /* A conjunction of all the equality condtions between all pairs of expressions that are arguments of an IN predicate. We need these to post-filter some @@ -948,8 +942,10 @@ protected: Item *semi_join_conds; /* Possible execution strategies that can be used to compute hash semi-join.*/ enum exec_strategy { - COMPLETE_MATCH, /* Use plain index lookups. */ - PARTIAL_MATCH, /* Use partial matching. */ + COMPLETE_MATCH, /* Use regular index lookups. */ + PARTIAL_MATCH, /* Use some partial matching strategy. */ + PARTIAL_MATCH_INDEX, /* Use partial matching through index merging. */ + PARTIAL_MATCH_SCAN, /* Use partial matching through table scan. */ IMPOSSIBLE /* Subquery materialization is not applicable. */ }; /* The chosen execution strategy. Computed after materialization. */ @@ -965,14 +961,10 @@ public: subselect_single_select_engine *old_engine) :subselect_engine(in_predicate, NULL), tmp_table(NULL), is_materialized(FALSE), materialize_engine(old_engine), lookup_engine(NULL), - materialize_join(NULL), inner_partial_match(FALSE), - count_non_null_late_cols(0), count_partial_match_cols(0), + materialize_join(NULL), count_partial_match_columns(0), semi_join_conds(NULL) { set_thd(thd); - non_null_key_parts= (key_part_map) 0; - non_null_late_key_parts= (key_part_map) 0; - partial_match_key_parts= (key_part_map) 0; } ~subselect_hash_sj_engine(); === modified file 'sql/sql_class.h' --- a/sql/sql_class.h 2010-01-22 16:18:05 +0000 +++ b/sql/sql_class.h 2010-02-01 12:09:48 +0000 @@ -3079,11 +3079,21 @@ public: count_rows= 0; memset(col_stat, 0, table->s->fields * sizeof(Column_statistics)); } - ha_rows get_column_null_count(uint idx) + ha_rows get_null_count_of_col(uint idx) { DBUG_ASSERT(idx < table->s->fields); return col_stat[idx].null_count; } + ha_rows get_max_null_of_col(uint idx) + { + DBUG_ASSERT(idx < table->s->fields); + return col_stat[idx].max_null_row; + } + ha_rows get_min_null_of_col(uint idx) + { + DBUG_ASSERT(idx < table->s->fields); + return col_stat[idx].min_null_row; + } ha_rows get_null_record_count() { return null_record_count; } };