revision-id: f503b2a8e55623726648bbb69bda92fbcebe75dd (mariadb-10.3.0-765-gf503b2a8e55) parent(s): 91245909a2f0c89444ecb5af587284f53b7196ee author: Varun Gupta committer: Varun Gupta timestamp: 2018-04-19 13:33:35 +0530 message: MDEV-15777: Support Early NULLs filtering-like restrictions in the range optimizer Improved patch with comments and test case added. --- mysql-test/main/mdev15777.result | 65 ++++++++++++++++++++++++ mysql-test/main/mdev15777.test | 34 +++++++++++++ sql/opt_range.cc | 107 ++++++++++++++++++++++++++++++++++++++- sql/opt_range.h | 3 ++ sql/sql_select.cc | 29 +++++++++-- 5 files changed, 234 insertions(+), 4 deletions(-) diff --git a/mysql-test/main/mdev15777.result b/mysql-test/main/mdev15777.result new file mode 100644 index 00000000000..87167c16f6d --- /dev/null +++ b/mysql-test/main/mdev15777.result @@ -0,0 +1,65 @@ +create table ten(a int); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table one_k(a int); +insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; +create table one_m(a int); +insert into one_m select A.a + B.a* 1000 from one_k A, one_k B; +delete from one_m where a=0 limit 1; +create table t1 ( +id int(10) unsigned NOT NULL AUTO_INCREMENT, +filler varchar(100), +subset_id int(11) DEFAULT NULL, +PRIMARY KEY (id), +KEY t1_subset_id (subset_id) +); +create table t1_subsets ( +id int(10) unsigned NOT NULL AUTO_INCREMENT, +filler1 varchar(100), +filler2 varchar(100), +filler3 varchar(100), +PRIMARY KEY (id) +); +insert into t1 select a,a, NULL from one_m where a < 50*1000; +insert into t1_subsets select a,a,a,a from one_m where a < 500*1000 limit 499000; +analyze format=json +SELECT * FROM t1 WHERE t1.subset_id IN (SELECT t1_subsets.id FROM t1_subsets); +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": 0.0444, + "table": { + "table_name": "t1", + "access_type": "range", + "possible_keys": ["t1_subset_id"], + "key": "t1_subset_id", + "key_length": "5", + "used_key_parts": ["subset_id"], + "r_loops": 1, + "rows": 3, + "r_rows": 0, + "r_total_time_ms": 0.0146, + "filtered": 100, + "r_filtered": 100, + "index_condition": "t1.subset_id is not null" + }, + "table": { + "table_name": "t1_subsets", + "access_type": "eq_ref", + "possible_keys": ["PRIMARY"], + "key": "PRIMARY", + "key_length": "4", + "used_key_parts": ["id"], + "ref": ["test.t1.subset_id"], + "r_loops": 0, + "rows": 1, + "r_rows": null, + "filtered": 100, + "r_filtered": null, + "attached_condition": "t1.subset_id = t1_subsets.`id`", + "using_index": true + } + } +} +drop table t1,t1_subsets,ten,one_k,one_m; diff --git a/mysql-test/main/mdev15777.test b/mysql-test/main/mdev15777.test new file mode 100644 index 00000000000..ad5777aeecf --- /dev/null +++ b/mysql-test/main/mdev15777.test @@ -0,0 +1,34 @@ +create table ten(a int); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table one_k(a int); +insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; + +create table one_m(a int); +insert into one_m select A.a + B.a* 1000 from one_k A, one_k B; +delete from one_m where a=0 limit 1; + +create table t1 ( + id int(10) unsigned NOT NULL AUTO_INCREMENT, + filler varchar(100), + subset_id int(11) DEFAULT NULL, + PRIMARY KEY (id), + KEY t1_subset_id (subset_id) +); + +create table t1_subsets ( + id int(10) unsigned NOT NULL AUTO_INCREMENT, + filler1 varchar(100), + filler2 varchar(100), + filler3 varchar(100), + PRIMARY KEY (id) +); + +insert into t1 select a,a, NULL from one_m where a < 50*1000; +insert into t1_subsets select a,a,a,a from one_m where a < 500*1000 limit 499000; + +analyze format=json +SELECT * FROM t1 WHERE t1.subset_id IN (SELECT t1_subsets.id FROM t1_subsets); +drop table t1,t1_subsets,ten,one_k,one_m; + + diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 38dbed92a22..650b04166e7 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -1184,6 +1184,7 @@ SQL_SELECT *make_select(TABLE *head, table_map const_tables, select->const_tables=const_tables; select->head=head; select->cond= conds; + select->null_rejecting_conds= NULL; if (filesort && my_b_inited(&filesort->io_cache)) { @@ -2430,7 +2431,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, { uchar buff[STACK_BUFF_ALLOC]; MEM_ROOT alloc; - SEL_TREE *tree= NULL; + SEL_TREE *tree= NULL, *not_null_cond_tree= NULL; KEY_PART *key_parts; KEY *key_info; PARAM param; @@ -2539,6 +2540,12 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, TRP_GROUP_MIN_MAX *group_trp; double best_read_time= read_time; + if (null_rejecting_conds) + { + not_null_cond_tree= null_rejecting_conds->get_mm_tree(¶m, + &null_rejecting_conds); + } + if (cond) { if ((tree= cond->get_mm_tree(¶m, &cond))) @@ -2557,6 +2564,13 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, tree= NULL; } } + if (not_null_cond_tree) + { + if (!tree) + tree= not_null_cond_tree; + else + tree= tree_and(¶m, tree, not_null_cond_tree); + } /* Try to construct a QUICK_GROUP_MIN_MAX_SELECT. @@ -14647,6 +14661,97 @@ void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names, add_key_and_length(key_names, used_lengths, &first); } +inline void add_cond(THD *thd, Item **e1, Item *e2) +{ + if (*e1) + { + if (!e2) + return; + Item *res; + if ((res= new (thd->mem_root) Item_cond_and(thd, *e1, e2))) + { + res->fix_fields(thd, 0); + res->update_used_tables(); + *e1= res; + } + } + else + *e1= e2; +} + +/* + Create null rejecting conditions for a table, for all the equalites + present in the WHERE clause of a query. + + SYNOPSIS + make_null_rejecting_conds() + @param TABLE - Keys of this table will participate in null + rejecting conditions + @param keyuse_array - array that has all the equalites of the + WHERE clasuse + + DESCRIPTION + This function creates null rejecting conditions for a table. These + conditions are created to do range analysis on them , the conditions + are of the form tbl.key.keypart IS NOT NULL. + + IMPLEMENTATION + Lookup in the keyuse array to check if it has equalites that belong + to the given table. If yes then find out if the conditions are null + rejecting and accordingly create all the condition for the keys of a + given table and AND them. + + + RETURN + NOT NULL - Found null rejecting conditions for the given table + NULL - No null rejecting conditions for the given table +*/ + +Item* make_null_rejecting_conds(THD *thd, TABLE *table, + DYNAMIC_ARRAY *keyuse_array, key_map *const_keys) +{ + KEY *keyinfo; + Item *cond= NULL; + KEYUSE* keyuse; + for(uint i=0; i < keyuse_array->elements; i++) + { + KEYUSE* keyuse= (KEYUSE*)dynamic_array_ptr(keyuse_array, i); + if (keyuse->table == table) + { + keyinfo= keyuse->table->key_info+keyuse->key; + Field *field= keyinfo->key_part[keyuse->keypart].field; + + /* + No need to add null-rejecting condition if we have a + keyuse element as + - table.key.keypart= const + - (table.key.keypart= tbl.otherfield or table.key.keypart IS NULL) + - table.key.keypart IS NOT NULLABLE + */ + + if (keyuse->val->const_item() + || !(keyuse->null_rejecting && field->maybe_null()) + || keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) + continue; + + Item_field *field_item= new (thd->mem_root)Item_field(thd, field); + Item* not_null_item= new (thd->mem_root)Item_func_isnotnull(thd, + field_item); + + /* + adding the key to const keys as we have the condition + as key.keypart IS NOT NULL + */ + + const_keys->set_bit(keyuse->key); + not_null_item->fix_fields(thd, 0); + not_null_item->update_used_tables(); + add_cond(thd, &cond, not_null_item); + } + } + return cond; +} + #ifndef DBUG_OFF diff --git a/sql/opt_range.h b/sql/opt_range.h index bd85a12d4a1..8ef0993e0af 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -1636,6 +1636,7 @@ class SQL_SELECT :public Sql_alloc { /* See PARAM::possible_keys */ key_map possible_keys; bool free_cond; /* Currently not used and always FALSE */ + Item *null_rejecting_conds; SQL_SELECT(); ~SQL_SELECT(); @@ -1728,6 +1729,8 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond); bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond); #endif void store_key_image_to_rec(Field *field, uchar *ptr, uint len); +Item* make_null_rejecting_conds(THD *thd, TABLE *table, + DYNAMIC_ARRAY *keyuse_array, key_map *const_keys); extern String null_string; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 796ea569e64..77a9e28501b 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -4242,6 +4242,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, SARGABLE_PARAM *sargables= 0; List_iterator<TABLE_LIST> ti(tables_list); TABLE_LIST *tables; + Item* null_rejecting_conds= NULL; DBUG_ENTER("make_join_statistics"); table_count=join->table_count; @@ -4783,6 +4784,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, add_group_and_distinct_keys(join, s); s->table->cond_selectivity= 1.0; + + null_rejecting_conds= make_null_rejecting_conds(join->thd, s->table, + keyuse_array, &s->const_keys); /* Perform range analysis if there are keys it could use (1). @@ -4812,6 +4816,8 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, 1, &error); if (!select) goto error; + + select->null_rejecting_conds= null_rejecting_conds; records= get_quick_record_count(join->thd, select, s->table, &s->const_keys, join->row_limit); /* Range analyzer could modify the condition. */ @@ -4824,6 +4830,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, s->needed_reg=select->needed_reg; select->quick=0; impossible_range= records == 0 && s->table->reginfo.impossible_range; + select->null_rejecting_conds= NULL; } if (!impossible_range) { @@ -4867,7 +4874,11 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, } } - + if (null_rejecting_conds) + { + delete null_rejecting_conds; + null_rejecting_conds= NULL; + } if (pull_out_semijoin_tables(join)) DBUG_RETURN(TRUE); @@ -5330,15 +5341,24 @@ add_key_field(JOIN *join, If the condition has form "tbl.keypart = othertbl.field" and othertbl.field can be NULL, there will be no matches if othertbl.field has NULL value. + + The field KEY_FIELD::null_rejecting is set to TRUE if we have both + the left and right hand side of the equality are NULLABLE + We use null_rejecting in add_not_null_conds() to add 'othertbl.field IS NOT NULL' to tab->select_cond. + + We use null_rejecting in make_null_rejecting_conds() to add + tbl.keypart IS NOT NULL so we can do range analysis on this condition + */ { Item *real= (*value)->real_item(); if (((cond->functype() == Item_func::EQ_FUNC) || (cond->functype() == Item_func::MULT_EQUAL_FUNC)) && - (real->type() == Item::FIELD_ITEM) && + (((real->type() == Item::FIELD_ITEM) && ((Item_field*)real)->field->maybe_null()) + ||(field->maybe_null()))) (*key_fields)->null_rejecting= true; else (*key_fields)->null_rejecting= false; @@ -9794,7 +9814,10 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit); j->ref.items[i]=keyuse->val; // Save for cond removal j->ref.cond_guards[i]= keyuse->cond_guard; - if (keyuse->null_rejecting) + Item *real= (keyuse->val)->real_item(); + if (keyuse->null_rejecting && + (real->type() == Item::FIELD_ITEM) && + ((Item_field*)real)->field->maybe_null()) j->ref.null_rejecting|= (key_part_map)1 << i; keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables; /*