Re: [Maria-developers] [Commits] 0ff8b73b60f: MDEV-15777:Support Early NULLs filtering-like restrictions in the range optimizer
Hi Varun, Please finally find the review below. The patch is almost there I think, but there are some minor adjustments that still need to be made.
revision-id: 0ff8b73b60fae40556f91d3959f20bf3cf182b28 (mariadb-10.3.0-947-g0ff8b73b60f) parent(s): 15419a558370aeed9521b498c34d50f20a8d47a5 author: Varun Gupta committer: Varun Gupta timestamp: 2018-05-15 13:53:11 +0530 message:
MDEV-15777:Support Early NULLs filtering-like restrictions in the range optimizer
--- mysql-test/main/mdev15777.result | 455 +++++++++++++++++++++++++++++++++++++++ mysql-test/main/mdev15777.test | 138 ++++++++++++ sql/opt_range.cc | 125 ++++++++++- sql/opt_range.h | 2 + sql/sql_select.cc | 22 +- sql/table.cc | 1 + sql/table.h | 6 + 7 files changed, 745 insertions(+), 4 deletions(-)
diff --git a/mysql-test/main/mdev15777.result b/mysql-test/main/mdev15777.result new file mode 100644 index 00000000000..549ac08b91f --- /dev/null +++ b/mysql-test/main/mdev15777.result @@ -0,0 +1,455 @@ +# Test added to check that null filtering is used by range optimizer +create table ten(a int); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table one_k(a int); +insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; +create table one_m(a int); +insert into one_m select A.a + B.a* 1000 from one_k A, one_k B;
The testcases are extremely confusing. * Do you really need a table with one millon rows to demonstrate the issue? * Why use examples with subqueries (which are converted into join anyway)? - if we want to verify that this optimization works together with subquery-to-semi-join conversion, let's start with a join example, and then construct a subquery which is converted to the query. * The testcases produce a lot of EXPLAIN outputs, and there is no explanation provided what one should expect. Please create a testcase: - of reasonable size (I think tables of 10-20K rows should suffice) - start from a join query - Like in other testcases, tables should be named, t1,t2, ... - I assume the query plan should be: range access on the first table (constructed from a NOT NULL predicate), ref access for the second one. - The testcase should have '--echo #' comments explaining what is relevant in the query plan
+delete from one_m where a=0 limit 1; +create table t1 ( +id int(10) unsigned NOT NULL AUTO_INCREMENT, +filler varchar(100), +subset_id int(11) DEFAULT NULL, +PRIMARY KEY (id), +KEY t1_subset_id (subset_id) +); +create table t1_subsets ( ...
diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 8422917065f..ae06b899ac9 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -155,6 +155,7 @@ class SEL_IMERGE; #define CLONE_KEY1_MAYBE 1 #define CLONE_KEY2_MAYBE 2 #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1) +#define FT_KEYPART (MAX_FIELDS+10)
/* @@ -2400,6 +2401,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, { uint idx; double scan_time; + Item *null_rejecting_conds= NULL; DBUG_ENTER("SQL_SELECT::test_quick_select"); DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu", (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables, @@ -2423,6 +2425,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, read_time= (double) records + scan_time + 1; // Force to use index
possible_keys.clear_all(); + null_rejecting_conds= head->null_rejecting_conds;
DBUG_PRINT("info",("Time to scan table: %g", read_time));
@@ -2431,7 +2434,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, { uchar buff[STACK_BUFF_ALLOC]; MEM_ROOT alloc; - SEL_TREE *tree= NULL; + SEL_TREE *tree= NULL, *not_null_cond_tree= NULL; KEY_PART *key_parts; KEY *key_info; PARAM param; @@ -2540,6 +2543,12 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, TRP_GROUP_MIN_MAX *group_trp; double best_read_time= read_time;
Please add a comment saying that as far as range optimizer is concerned, null_rejecting_conds is just an extra condition on this table. (That is, we don't care if it has NOT NULL conditions or something else).
+ if (null_rejecting_conds) + { + not_null_cond_tree= null_rejecting_conds->get_mm_tree(¶m, + &null_rejecting_conds); + } + if (cond) { if ((tree= cond->get_mm_tree(¶m, &cond))) @@ -2558,6 +2567,13 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, tree= NULL; } } + if (not_null_cond_tree) + { + if (!tree) + tree= not_null_cond_tree; + else + tree= tree_and(¶m, tree, not_null_cond_tree);
This if-else logic is redundant, as it's already present in tree_and(). Can just have this: tree= tree_and(tree, not_null_cond_tree);
+ }
/* Try to construct a QUICK_GROUP_MIN_MAX_SELECT. @@ -14643,6 +14659,113 @@ void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names, add_key_and_length(key_names, used_lengths, &first); }
+inline void add_cond(THD *thd, Item **e1, Item *e2) +{ + if (*e1) + { + if (!e2) + return; + Item *res; + if ((res= new (thd->mem_root) Item_cond_and(thd, *e1, e2))) + { + res->fix_fields(thd, 0); + res->update_used_tables(); + *e1= res; + } + } + else + *e1= e2; +} + +/* + Create null rejecting conditions for a table, for all the equalites + present in the WHERE clause of a query. + + SYNOPSIS + make_null_rejecting_conds() + @param TABLE - Keys of this table will participate in null + rejecting conditions + @param keyuse_array - array that has all the equalites of the + WHERE clasuse + + DESCRIPTION + This function creates null rejecting conditions for a table. These + conditions are created to do range analysis on them , the conditions + are of the form tbl.key.keypart IS NOT NULL. + + IMPLEMENTATION + Lookup in the keyuse array to check if it has equalites that belong + to the given table. If yes then find out if the conditions are null + rejecting and accordingly create all the condition for the keys of a + given table and AND them. + + + RETURN + NOT NULL - Found null rejecting conditions for the given table + NULL - No null rejecting conditions for the given table +*/ + +void make_null_rejecting_conds(THD *thd, TABLE *table, + DYNAMIC_ARRAY *keyuse_array, key_map *const_keys) ^ Please fix identation.
+{ + KEY *keyinfo; + Item *cond= NULL; + KEYUSE* keyuse; + + /* + The null rejecting conds added will be on the keypart of a key, so for + that we need the table to atleast have a key. + */ please fix typos/wording in the above comment.
+ if (!table->s->keys) + return ; + if (table->null_rejecting_conds) + return; + + for(uint i=0; i < keyuse_array->elements; i++) + { This loop does excessive work. Check out how best_access_path() examines the KEYUSE array.
+ keyuse= (KEYUSE*)dynamic_array_ptr(keyuse_array, i); + if (keyuse->table == table) + { + /* + No null rejecting conds for a hash key orr full-text keys + */
Key points - the array is sorted by table, index_nr, key_part_nr. - JOIN_TAB::keyuse points to the first element that refers to the table described by the JOIN_TAB - The array may have "duplicates", especially when multiple equalities are present: KEYUSE(t.keyXpartY = other_table.colX) KEYUSE(t.keyXpartY = other_table.colY) KEYUSE(t.keyXpartY = some_expr) ... Since the elements are sorted by keyXpartY, it is trival to avoid generating multiple "t.keyXpartY IS NOT NULL" for consecutive KEYUSEs. I would go even further: the same "t.keyXpartY" can be part of multiple indexes (and so its KEYUSE objects will not be adjacent). Can we use a table's column bitmap to avoid generating duplicate IS NOT NULL objects? please fix typos/wording in the above comment.
+ if (keyuse->key == MAX_KEY || keyuse->keypart == FT_KEYPART) + continue; + keyinfo= keyuse->table->key_info+keyuse->key; + Field *field= keyinfo->key_part[keyuse->keypart].field; + + /* + No need to add null-rejecting condition if we have a + keyuse element as + - table.key.keypart= const + - (table.key.keypart= tbl.otherfield or table.key.keypart IS NULL) + - table.key.keypart IS NOT NULLABLE + */ + + if (keyuse->val->const_item() + || !(keyuse->null_rejecting && field->maybe_null()) Please remove the brackets: !(A && B) -> !A || !B + || keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) Please also put the '||' at the end of the lines, like it is done in the rest of the code. + continue; + + Item_field *field_item= new (thd->mem_root)Item_field(thd, field); + Item* not_null_item= new (thd->mem_root)Item_func_isnotnull(thd, + field_item); Please add error handling (just return from the function if we failed to allocate any of the above)
+ + /* + adding the key to const keys as we have the condition + as key.keypart IS NOT NULL + */ + + const_keys->set_bit(keyuse->key); + not_null_item->fix_fields(thd, 0); + not_null_item->update_used_tables(); + add_cond(thd, &cond, not_null_item); + } + } + table->null_rejecting_conds= cond; + return;
Please remove the above 'return;' as it is redundant and makes one suspect a merge error
+} +
#ifndef DBUG_OFF
diff --git a/sql/opt_range.h b/sql/opt_range.h index bd85a12d4a1..894d46a892c 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -1728,6 +1728,8 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond); bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond); #endif void store_key_image_to_rec(Field *field, uchar *ptr, uint len); +void make_null_rejecting_conds(THD *thd, TABLE *table, + DYNAMIC_ARRAY *keyuse_array, key_map *const_keys); Please fix identation.
extern String null_string;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc index f658b78c33c..4aceb333340 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -4805,6 +4805,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, add_group_and_distinct_keys(join, s);
s->table->cond_selectivity= 1.0; + + make_null_rejecting_conds(join->thd, s->table, + keyuse_array, &s->const_keys);
/* Perform range analysis if there are keys it could use (1). @@ -5352,15 +5356,24 @@ add_key_field(JOIN *join, If the condition has form "tbl.keypart = othertbl.field" and othertbl.field can be NULL, there will be no matches if othertbl.field has NULL value. + + The field KEY_FIELD::null_rejecting is set to TRUE if we have both + the left and right hand side of the equality are NULLABLE + This is NOT what is happening. null_rejecting is set to true, if *either* left or right-hand side of the equality is nullable. Please fix the comment.
Please also fix the comment in KEY_FIELD::null_rejecting to reflect that.
We use null_rejecting in add_not_null_conds() to add 'othertbl.field IS NOT NULL' to tab->select_cond. + + We use null_rejecting in make_null_rejecting_conds() to add + tbl.keypart IS NOT NULL so we can do range analysis on this condition +
Please join the above two sentences together: null_rejecting is used - by add_not_null_conds(), to add "othertbl.field IS NOT NULL" to othertbl's tab->select_cond. (This is called "Early NULLs filtering") - by make_null_rejecting_conds(), to provide range optimizer with additional "tbl.keypart IS NOT NULL" condition.
*/ { Item *real= (*value)->real_item(); if (((cond->functype() == Item_func::EQ_FUNC) || (cond->functype() == Item_func::MULT_EQUAL_FUNC)) && - (real->type() == Item::FIELD_ITEM) && + (((real->type() == Item::FIELD_ITEM) && ((Item_field*)real)->field->maybe_null()) + ||(field->maybe_null()))) ^Please fix the identation.
(*key_fields)->null_rejecting= true; else (*key_fields)->null_rejecting= false; @@ -9813,7 +9826,10 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit); j->ref.items[i]=keyuse->val; // Save for cond removal j->ref.cond_guards[i]= keyuse->cond_guard; - if (keyuse->null_rejecting) + Item *real= (keyuse->val)->real_item(); + if (keyuse->null_rejecting && + (real->type() == Item::FIELD_ITEM) && + ((Item_field*)real)->field->maybe_null())
^ Please fix identation
j->ref.null_rejecting|= (key_part_map)1 << i; keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables; /* @@ -18538,7 +18554,7 @@ free_tmp_table(THD *thd, TABLE *entry) DBUG_ASSERT(entry->pos_in_table_list->table == entry); entry->pos_in_table_list->table= NULL; } - + entry->null_rejecting_conds= NULL;
What is this for? As far as I understand we are freeing the TABLE object
free_root(&own_root, MYF(0)); /* the table is allocated in its own root */ thd_proc_info(thd, save_proc_info);
diff --git a/sql/table.cc b/sql/table.cc index a6e445d0d2e..097fee465de 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -4597,6 +4597,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) created= TRUE; cond_selectivity= 1.0; cond_selectivity_sampling_explain= NULL; + null_rejecting_conds= NULL; #ifdef HAVE_REPLICATION /* used in RBR Triggers */ master_had_triggers= 0; diff --git a/sql/table.h b/sql/table.h index 6fd3f219914..50d47899b49 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1356,6 +1356,12 @@ struct TABLE SplM_opt_info *spl_opt_info; key_map keys_usable_for_splitting;
+ /* + Null rejecting conds added for all tables so we can do range analysis + on these conditions + */ "for all tables"? Please change the comment to something like:
"NULL-rejecting conditions for this table. They are created from eq_ref access, and used by the range optimizer".
+ Item* null_rejecting_conds; + void init(THD *thd, TABLE_LIST *tl); bool fill_item_list(List<Item> *item_list) const; void reset_item_list(List<Item> *item_list, uint skip) const;
BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog
participants (1)
-
Sergey Petrunia