Hi Varun,
Please find review input below. Only cosmetic changes are requested.
I also assume that after this is pushed
- we add a optimizer trace printout into 10.4
- the feature is enabled by default in 10.5
? (do we need separate MDEVs for this?)
One final question is the choice of name for @@optimizer_switch flag. Let me get back to
you on this.
commit b4696ee97e6846ba49ef203cfc7189f50c1e53a7
Author: Varun Gupta <varun.gupta@mariadb.com>
Date: Mon May 20 15:14:30 2019 +0530
MDEV-15777: Support Early NULLs filtering-like restrictions in the range optimizer
For eqjoin conditions, we add a NOT NULL predicate so as to allow the range optimizer
to use the predicate and possibly create a range access on the given table.
Example:
select * from t1,t2 where t1.a=t2.a; we have KEY(a) on t1
we would inject a NOT NULL predicate t1.a IS NOT NULL for table t1
this would allow the range optimizer to create ranges and we get a
range access on table t1, then we will be able
to do an early NULL filtering for ranges too.
diff --git a/mysql-test/main/range.result b/mysql-test/main/range.result
index 32e0cf2868c..d75d6734277 100644
--- a/mysql-test/main/range.result
+++ b/mysql-test/main/range.result
@@ -3024,3 +3024,151 @@ drop table t1;
#
# End of 10.2 tests
#
+#
+# MDEV-15777: Support Early NULLs filtering-like restrictions in the range optimizer
+#
+set @save_optimizer_switch= @@optimizer_switch;
+set @@optimizer_switch='null_rejecting_for_ranges=on';
+create table ten(a int);
+insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table one_k(a int);
+insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C;
Did we abandon the convention of naming the tables t1,t2,t3,... ?
If not, please rename.
+create table t1 (
+id int(10) unsigned NOT NULL,
+subset_id int(11) DEFAULT NULL,
+PRIMARY KEY (id),
+KEY t1_subset_id (subset_id));
...
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index ec7b3dbbd7a..cb7800d1ba6 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -2539,6 +2546,42 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
TRP_GROUP_MIN_MAX *group_trp;
double best_read_time= read_time;
+ /*
+ For the range optimizer, null_rejecting_conds are just an extra condition
+ on which one can use index to create ranges.
+
+ Also here we try to allow consider range access for a column involed in a
+ null rejecting predicate only if it is the first keypart of an index.
+ Let try to make it clear with examples:
+
+ Query 1
+ select * from t1,t2 where t1.a=2 and t1.b=t2.b;
+ and we have a key(a,b)
+ So lets consider range access for table t1:
+ Null rejecting predicate : t1.b IS NOT NULL
+ but the column t1.b is NOT the first keypart in the index,
+ so we don't consider it while building ranges
+
+ Query 2
+ select * from t1,t2 where t1.a=t2.a and t1.b=t2.b;
+ and we have a key (a,b)
+
+ So lets consider range access for table t1:
+ Null rejecting predicate : t1.a IS NOT NULL AND t1.b IS NOT NULL
+ so here t1.a is the first keypart in the index, so we consider
+ the predicate for building the ranges.
+
+ The first keypart prerequisite is made as we try to reuse range
+ estimates (because they are more accurate) at different places
+ and we needed to make sure that correct/accurate estimates are used there.
+ */
+
+ if (null_rejecting_conds)
+ not_null_cond_tree= null_rejecting_conds->get_mm_tree(¶m,
+ &null_rejecting_conds);
Please fix indentation and fit within 80 chars. ^
And also use curly braces as the code inside if doesn't fit on one line.
+ if (not_null_cond_tree)
+ remove_nonrange_trees(¶m, not_null_cond_tree);
+
if (cond)
{
if ((tree= cond->get_mm_tree(¶m, &cond)))
@@ -2557,6 +2600,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
tree= NULL;
}
}
+ tree= tree_and(¶m, tree, not_null_cond_tree);
/*
Try to construct a QUICK_GROUP_MIN_MAX_SELECT.
@@ -4422,6 +4466,128 @@ static bool create_partition_index_description(PART_PRUNE_PARAM *ppar)
return FALSE;
}
+inline void add_cond(THD *thd, Item **e1, Item *e2)
+{
+ if (*e1)
+ {
+ if (!e2)
+ return;
+ Item *res;
+ if ((res= new (thd->mem_root) Item_cond_and(thd, *e1, e2)))
+ {
+ res->fix_fields(thd, 0);
+ res->update_used_tables();
+ *e1= res;
+ }
+ }
+ else
+ *e1= e2;
+}
+
+/*
+ Create null rejecting conditions for a table, for all the equalites
+ present in the WHERE clause of a query.
+
+ SYNOPSIS
+ make_null_rejecting_conds()
+ @param TABLE - Keys of this table will participate in null
+ rejecting conditions
+ @param keyuse_array - array that has all the equalites of the
+ WHERE clasuse
The above doesn't match the function's definition. Out of date comment?
+
+ DESCRIPTION
+ This function creates null rejecting conditions for a table. These
+ conditions are created to do range analysis on them , the conditions
+ are of the form tbl.key.keypart IS NOT NULL.
+
+ IMPLEMENTATION
+ Lookup in the keyuse array to check if it has equalites that belong
+ to the given table. If yes then find out if the conditions are null
+ rejecting and accordingly create all the condition for the keys of a
+ given table and AND them.
+
+
+ RETURN
+ NOT NULL - Found null rejecting conditions for the given table
+ NULL - No null rejecting conditions for the given table
The above doesn't match the function's definition. Out of date comment?
+*/
+
+void make_null_rejecting_conds(THD *thd, JOIN_TAB *tab)
+{
+ KEY *keyinfo;
+ Item *cond= NULL;
+ KEYUSE* keyuse;
+ TABLE *table= tab->table;
+ key_map *const_keys= &tab->const_keys;
^ Does it make sense to introduce a variable if it is only used once?
+
+ /*
+ No need to add NOT NULL condition for materialized derived tables
+ or materialized subqueries as we do not run the range optimizer
+ on their conditions
+ */
+
+ if (!optimizer_flag(thd, OPTIMIZER_SWITCH_NULL_REJECTING_FOR_RANGES))
+ return;
+
+ if (tab->table->is_filled_at_execution() ||
+ (tab->table->pos_in_table_list->derived &&
+ tab->table->pos_in_table_list->is_materialized_derived()))
+ return;
+
+ Field_map not_null_keypart_map;
+
+ /*
+ The null rejecting conds added will be on the keypart of a key, so for
+ that we need the table to atleast have a key.
+ */
+ if (!table->s->keys || table->null_rejecting_conds || !tab->keyuse)
+ return;
+
+ for(keyuse= tab->keyuse; keyuse->table == table; keyuse++)
+ {
Please add space, "for ".
+ /*
+ No null rejecting conds for a hash key or full-text keys
+ */
+ if (keyuse->key == MAX_KEY || keyuse->keypart == FT_KEYPART)
+ continue;
+ keyinfo= keyuse->table->key_info + keyuse->key;
+ Field *field= keyinfo->key_part[keyuse->keypart].field;
+
+ if (not_null_keypart_map.is_set(field->field_index))
+ continue;
+
+ /*
+ No need to add null-rejecting condition if we have a
+ keyuse element as
+ - table.key.keypart= const
+ - (table.key.keypart= tbl.otherfield or table.key.keypart IS NULL)
+ - table.key.keypart IS NOT NULLABLE
+ */
+
+ if (keyuse->val->const_item() ||
+ !(keyuse->null_rejecting && field->maybe_null()) ||
+ keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
+ continue;
+
+ Item_field *field_item= new (thd->mem_root)Item_field(thd, field);
+ Item* not_null_item= new (thd->mem_root)Item_func_isnotnull(thd, field_item);
+
+ /*
+ adding the key to const keys as we have the condition
+ as key.keypart IS NOT NULL
+ */
+
+ const_keys->set_bit(keyuse->key);
+ not_null_item->fix_fields(thd, 0);
^^ please change 0 to ¬_null_item.
+ not_null_item->update_used_tables();
+ add_cond(thd, &cond, not_null_item);
+ not_null_keypart_map.set_bit(field->field_index);
+ }
+ DBUG_EXECUTE("where", print_where(cond, "Early Null Filtering",
+ QT_ORDINARY););
+ table->null_rejecting_conds= cond;
+}
+
#ifndef DBUG_OFF
...
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 8cdcf3afc8b..140ceed5c2d 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -5510,15 +5512,30 @@ add_key_field(JOIN *join,
If the condition has form "tbl.keypart = othertbl.field" and
othertbl.field can be NULL, there will be no matches if othertbl.field
has NULL value.
- We use null_rejecting in add_not_null_conds() to add
- 'othertbl.field IS NOT NULL' to tab->select_cond.
+
+ The field KEY_FIELD::null_rejecting is set to TRUE if either
+ the left or the right hand side of the equality is NULLABLE
+
+ null_rejecting is used
+ - by add_not_null_conds(), to add "othertbl.field IS NOT NULL" to
+ othertbl's tab->select_cond. (This is called "Early NULLs filtering")
+
+ - by make_null_rejecting_conds(), to provide range optimizer with
+ additional "tbl.keypart IS NOT NULL" condition.
*/
{
Item *real= (*value)->real_item();
- if (((cond->functype() == Item_func::EQ_FUNC) ||
- (cond->functype() == Item_func::MULT_EQUAL_FUNC)) &&
- (real->type() == Item::FIELD_ITEM) &&
- ((Item_field*)real)->field->maybe_null())
+ if ((
+ (cond->functype() == Item_func::EQ_FUNC) ||
+ (cond->functype() == Item_func::MULT_EQUAL_FUNC)
+ ) &&
+ (
+ (
+ (real->type() == Item::FIELD_ITEM) &&
+ ((Item_field*)real)->field->maybe_null()
+ ) ||
+ (field->maybe_null())
+ ))
(*key_fields)->null_rejecting= true;
else
(*key_fields)->null_rejecting= false;
@@ -9982,8 +9999,12 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j,
uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit);
j->ref.items[i]=keyuse->val; // Save for cond removal
j->ref.cond_guards[i]= keyuse->cond_guard;
- if (keyuse->null_rejecting)
+ Item *real= (keyuse->val)->real_item();
+ if (keyuse->null_rejecting &&
+ (real->type() == Item::FIELD_ITEM) &&
+ ((Item_field*)real)->field->maybe_null())
This code looks as if
if keyuse->val->maybe_null() != keyuse->val->real_item()->maybe_null()
do you have any specific case in mind or this is just to match the code in
add_key_field?
j->ref.null_rejecting|= (key_part_map)1 << i;
+
keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
/*
We don't want to compute heavy expressions in EXPLAIN, an example would
BR
Sergei
--
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog