[Maria-developers] Rev 2764: MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs in file:///home/tsk/mprog/src/5.3-mwl68-unmerged/
At file:///home/tsk/mprog/src/5.3-mwl68-unmerged/ ------------------------------------------------------------ revno: 2764 revision-id: timour@askmonty.org-20100222151655-ltjv0rlv6z2sdiiu parent: timour@askmonty.org-20100222135709-3568ya6z76hkwfzs committer: timour@askmonty.org branch nick: 5.3-mwl68-unmerged timestamp: Mon 2010-02-22 17:16:55 +0200 message: MWL#68 Subquery optimization: Efficient NOT IN execution with NULLs This patch mainly adds sorting of all indexes for partial matching according to their NULL selectivity. The patch also fixes a related bug in subselect_rowid_merge_engine::test_null_row() where the wrong matched indexes were skipped. In addition the patch: - adds few ::print() methods, - renames few variables that had similar names but different purpose. === modified file 'sql/item_subselect.cc' --- a/sql/item_subselect.cc 2010-02-22 13:57:09 +0000 +++ b/sql/item_subselect.cc 2010-02-22 15:16:55 +0000 @@ -3424,7 +3424,10 @@ DBUG_ENTER("subselect_hash_sj_engine::set_strategy_using_schema"); if (item_in->is_top_level_item()) + { strategy= COMPLETE_MATCH; + DBUG_VOID_RETURN; + } else { List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types()); @@ -3846,6 +3849,7 @@ thd->lex->current_select= materialize_engine->select_lex; if ((res= materialize_join->optimize())) goto err; /* purecov: inspected */ + DBUG_ASSERT(!is_materialized); /* We should materialize only once. */ materialize_join->exec(); if ((res= test(materialize_join->error || thd->is_fatal_error))) goto err; @@ -3952,7 +3956,7 @@ lookup_engine->print(str, query_type); else str->append(STRING_WITH_LEN( - "<the access method for lookups is not yet created>" + "<engine selected at execution time>" )); } @@ -3980,11 +3984,10 @@ } -Ordered_key::Ordered_key(uint key_idx_arg, TABLE *tbl_arg, - Item *search_key_arg, ha_rows null_count_arg, - ha_rows min_null_row_arg, ha_rows max_null_row_arg, - uchar *row_num_to_rowid_arg) - : key_idx(key_idx_arg), tbl(tbl_arg), search_key(search_key_arg), +Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg, + ha_rows null_count_arg, ha_rows min_null_row_arg, + ha_rows max_null_row_arg, uchar *row_num_to_rowid_arg) + : keyid(keyid_arg), tbl(tbl_arg), search_key(search_key_arg), row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg) { DBUG_ASSERT(tbl->file->stats.records > null_count); @@ -4190,6 +4193,21 @@ /* + The probability that a certain row does not contain a NULL in some row in + a NULL-indexed column. + @retval 1 if there are no NULLs + @retval 0 if only NULLs +*/ + +double Ordered_key::null_selectivity() +{ + /* We should not be processing empty tables. */ + DBUG_ASSERT(tbl->file->stats.records); + return (1 - (double) null_count / (double) tbl->file->stats.records); +} + + +/* Compare the value(s) of the current key in 'search_key' with the data of the current table record. @@ -4307,6 +4325,34 @@ } +void Ordered_key::print(String *str) +{ + uint i; + str->append("{idx="); + str->qs_append(keyid); + str->append(", ("); + for (i= 0; i < key_column_count - 1; i++) + { + str->append(key_columns[i]->field->field_name); + str->append(", "); + } + str->append(key_columns[i]->field->field_name); + str->append("), "); + + str->append("null_bitmap: (bits="); + str->qs_append(null_key.n_bits); + str->append(", nulls= "); + str->qs_append((double)null_count); + str->append(", min_null= "); + str->qs_append((double)min_null_row); + str->append(", max_null= "); + str->qs_append((double)max_null_row); + str->append("), "); + + str->append('}'); +} + + /* @param non_null_key_parts @param partial_match_key_parts A union of all single-column NULL key parts. @@ -4323,7 +4369,7 @@ rownum_t cur_rownum= 0; select_materialize_with_stats *result_sink= (select_materialize_with_stats *) result; - uint cur_key= 0; + uint cur_keyid= 0; Item_in_subselect *item_in= (Item_in_subselect*) item; int error; @@ -4346,16 +4392,16 @@ /* Create the only non-NULL key if there is any. */ if (non_null_key_parts) { - non_null_key= new Ordered_key(cur_key, tmp_table, item_in->left_expr, + non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr, 0, 0, 0, row_num_to_rowid); if (non_null_key->init(non_null_key_parts)) { // TIMOUR: revert to partial matching via scanning return TRUE; } - merge_keys[cur_key]= non_null_key; - merge_keys[cur_key]->first(); - ++cur_key; + merge_keys[cur_keyid]= non_null_key; + merge_keys[cur_keyid]->first(); + ++cur_keyid; } /* @@ -4379,23 +4425,24 @@ continue; if (result_sink->get_null_count_of_col(i) == row_count) - bitmap_set_bit(&null_only_columns, cur_key); + bitmap_set_bit(&null_only_columns, cur_keyid); else { - merge_keys[cur_key]= new Ordered_key(cur_key, tmp_table, - item_in->left_expr->element_index(i), - result_sink->get_null_count_of_col(i), - result_sink->get_min_null_of_col(i), - result_sink->get_max_null_of_col(i), - row_num_to_rowid); - if (merge_keys[cur_key]->init(i)) + merge_keys[cur_keyid]= new Ordered_key( + cur_keyid, tmp_table, + item_in->left_expr->element_index(i), + result_sink->get_null_count_of_col(i), + result_sink->get_min_null_of_col(i), + result_sink->get_max_null_of_col(i), + row_num_to_rowid); + if (merge_keys[cur_keyid]->init(i)) { // TIMOUR: revert to partial matching via scanning return TRUE; } - merge_keys[cur_key]->first(); + merge_keys[cur_keyid]->first(); } - ++cur_key; + ++cur_keyid; } } @@ -4453,12 +4500,14 @@ tmp_table->file->ha_rnd_end(); + /* Sort all the keys by their NULL selectivity. */ + my_qsort(merge_keys, keys_count, sizeof(Ordered_key*), + (qsort_cmp) cmp_keys_by_null_selectivity); + /* Sort the keys in each of the indexes. */ for (uint i= 0; i < keys_count; i++) merge_keys[i]->sort_keys(); - // TIMOUR: sort all the keys by NULL selectivity - if (init_queue(&pq, keys_count, 0, FALSE, subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL)) { @@ -4486,20 +4535,38 @@ } +void subselect_rowid_merge_engine::print(String *str, enum_query_type query_type) +{ + str->append(STRING_WITH_LEN("<rowid_merge>(")); + for (uint i= 0; i < keys_count; i++) + merge_keys[i]->print(str); + str->append(')'); +} + + /* + Quick sort comparison function to compare keys in order of decreasing bitmap + selectivity, so that the most selective keys come first. + + @param k1 first key to compare + @param k2 second key to compare + + @retval 1 if k1 is less selective than k2 + @retval 0 if k1 is equally selective as k2 + @retval -1 if k1 is more selective than k2 */ int -subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key *a, - Ordered_key *b) +subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key **k1, + Ordered_key **k2) { - double a_sel= a->null_selectivity(); - double b_sel= b->null_selectivity(); - if (a_sel == b_sel) - return 0; - if (a_sel > b_sel) + double k1_sel= (*k1)->null_selectivity(); + double k2_sel= (*k2)->null_selectivity(); + if (k1_sel < k2_sel) return 1; - return -1; + if (k1_sel > k2_sel) + return -1; + return 0; } @@ -4527,17 +4594,21 @@ bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num) { + Ordered_key *cur_key; + uint cur_id; for (uint i = 0; i < keys_count; i++) { - if (bitmap_is_set(&matching_keys, i)) + cur_key= merge_keys[i]; + cur_id= cur_key->get_keyid(); + if (bitmap_is_set(&matching_keys, cur_id)) { /* - The key 'i' already matches a value in row 'row_num', thus we - skip it as it can't possibly match a NULL. + The key 'i' (with id 'cur_keyid') already matches a value in row 'row_num', + thus we skip it as it can't possibly match a NULL. */ continue; } - if (!merge_keys[i]->is_null(row_num)) + if (!cur_key->is_null(row_num)) return FALSE; } return TRUE; @@ -4583,7 +4654,7 @@ if (merge_keys[i]->get_search_key(0)->is_null()) { ++count_nulls_in_search_key; - bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_key_idx()); + bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_keyid()); } else if (merge_keys[i]->lookup()) queue_insert(&pq, (uchar *) merge_keys[i]); @@ -4610,7 +4681,7 @@ min_key= (Ordered_key*) queue_remove(&pq, 0); min_row_num= min_key->current(); bitmap_copy(&matching_keys, &null_only_columns); - bitmap_set_bit(&matching_keys, min_key->get_key_idx()); + bitmap_set_bit(&matching_keys, min_key->get_keyid()); bitmap_union(&matching_keys, &matching_outer_cols); if (min_key->next_same()) queue_insert(&pq, (uchar *) min_key); @@ -4633,7 +4704,7 @@ cur_row_num= cur_key->current(); if (cur_row_num == min_row_num) - bitmap_set_bit(&matching_keys, cur_key->get_key_idx()); + bitmap_set_bit(&matching_keys, cur_key->get_keyid()); else { /* Follows from the correct use of priority queue. */ @@ -4645,7 +4716,7 @@ min_key= cur_key; min_row_num= cur_row_num; bitmap_copy(&matching_keys, &null_only_columns); - bitmap_set_bit(&matching_keys, min_key->get_key_idx()); + bitmap_set_bit(&matching_keys, min_key->get_keyid()); bitmap_union(&matching_keys, &matching_outer_cols); } } === modified file 'sql/item_subselect.h' --- a/sql/item_subselect.h 2010-02-19 21:55:57 +0000 +++ b/sql/item_subselect.h 2010-02-22 15:16:55 +0000 @@ -752,7 +752,7 @@ Index of the key in an array of keys. This index allows to construct (sub)sets of keys represented by bitmaps. */ - uint key_idx; + uint keyid; /* The table being indexed. */ TABLE *tbl; /* The columns being indexed. */ @@ -810,7 +810,7 @@ public: static void *operator new(size_t size) throw () { return sql_alloc(size); } - Ordered_key(uint key_idx_arg, TABLE *tbl_arg, + Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg, ha_rows null_count_arg, ha_rows min_null_row_arg, ha_rows max_null_row_arg, uchar *row_num_to_rowid_arg); @@ -822,7 +822,7 @@ bool init(int col_idx); uint get_column_count() { return key_column_count; } - uint get_key_idx() { return key_idx; } + uint get_keyid() { return keyid; } uint get_field_idx(uint i) { DBUG_ASSERT(i < key_column_count); @@ -841,8 +841,7 @@ } void sort_keys(); - - double null_selectivity() { return (1 - null_count / null_key.n_bits); } + double null_selectivity(); /* Position the current element at the first row that matches the key. @@ -896,6 +895,7 @@ return FALSE; return bitmap_is_set(&null_key, row_num); } + void print(String *str); }; @@ -957,10 +957,10 @@ bool has_covering_null_row; protected: /* - Comparison function to compare keys in order of increasing bitmap + Comparison function to compare keys in order of decreasing bitmap selectivity. */ - static int cmp_keys_by_null_selectivity(Ordered_key *a, Ordered_key *b); + static int cmp_keys_by_null_selectivity(Ordered_key **k1, Ordered_key **k2); /* Comparison function used by the priority queue pq, the 'smaller' key is the one with the smaller current row number. @@ -992,7 +992,7 @@ uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; } void exclude() {} table_map upper_select_const_tables() { return 0; } - void print(String*, enum_query_type) {} + void print(String*, enum_query_type); bool change_result(Item_subselect*, select_result_interceptor*) { DBUG_ASSERT(FALSE); return false; } bool no_tables() { return false; }
participants (1)
-
timour@askmonty.org