revision-id: ed0b27a1577b61fee7031a6c3e33b8d658c0f45b (mariadb-10.2.16-50-ged0b27a) parent(s): 3c141e319ab29afdfe843553da385fe5d981906e author: Igor Babaev committer: Igor Babaev timestamp: 2018-08-17 14:27:42 -0700 message: MDEV-16934 Query with very large IN clause lists runs slowly This patch introduces support for the system variable eq_range_index_dive_limit that existed in MySQL starting from 5.6. The variable sets a limit for index dives into equality ranges. Index dives are performed by optimizer to estimate the number of rows in range scans. Index dives usually provide good estimate but they are pretty expensive. To estimate the number of rows in equality ranges statistical data on indexes can be employed. Its usage gives not so good estimates but it's cheap. So if the number of equality dives required by an index scan exceeds the set limit no dives for equality ranges are performed by the optimizer for this index. As the new system variable is introduced in a stable version the default value for it is set to a special value meaning there is no limit for the number of index dives performed by the optimizer. The patch partially uses the MySQL code for WL 5957 'Statistics-based Range optimization for many ranges'. --- mysql-test/r/mysqld--help.result | 6 +++ mysql-test/r/range.result | 41 ++++++++++++++++ mysql-test/r/range_mrr_icp.result | 41 ++++++++++++++++ .../sys_vars/r/sysvars_server_embedded.result | 14 ++++++ .../sys_vars/r/sysvars_server_notembedded.result | 14 ++++++ mysql-test/t/range.test | 33 +++++++++++++ sql/multi_range_read.cc | 11 +++++ sql/opt_range.cc | 27 +++++++++++ sql/opt_range.h | 4 +- sql/opt_range_mrr.cc | 54 +++++++++++++++------- sql/sql_class.h | 1 + sql/sql_statistics.h | 2 +- sql/sys_vars.cc | 10 ++++ 13 files changed, 239 insertions(+), 19 deletions(-) diff --git a/mysql-test/r/mysqld--help.result b/mysql-test/r/mysqld--help.result index 3b7d5e2..51ece33 100644 --- a/mysql-test/r/mysqld--help.result +++ b/mysql-test/r/mysqld--help.result @@ -197,6 +197,11 @@ The following specify which files/extra groups are read (specified before remain cache, etc) --enforce-storage-engine=name Force the use of a storage engine for new tables + --eq-range-index-dive-limit=# + The optimizer will use existing index statistics instead + of doing index dives for equality ranges if the number of + equality ranges for the index is larger than or equal to + this number. If set to 0, index dives are always used. --event-scheduler[=name] Enable the event scheduler. Possible values are ON, OFF, and DISABLED (keep the event scheduler completely @@ -1259,6 +1264,7 @@ encrypt-binlog FALSE encrypt-tmp-disk-tables FALSE encrypt-tmp-files FALSE enforce-storage-engine (No default value) +eq-range-index-dive-limit 0 event-scheduler OFF expensive-subquery-limit 100 expire-logs-days 0 diff --git a/mysql-test/r/range.result b/mysql-test/r/range.result index 3a71d08..0c6be5e 100644 --- a/mysql-test/r/range.result +++ b/mysql-test/r/range.result @@ -3002,5 +3002,46 @@ deallocate prepare stmt; set optimizer_switch=@save_optimizer_switch; drop table t1,t2,t3; # +# MDEV-16934: using system variable eq_range_index_dive_limit +# to reduce the number of index dives +# +create table t1 (a int, b varchar(31), index idx(a)); +insert into t1 values +(7,'xxxx'), (1,'yy'), (3,'aaa'), (1,'bbb'), (2,'zz'), +(4,'vvvvv'), (7,'ddd'), (9,'zzzzz'), (1,'cc'), (5,'ffff'); +insert into t1 select a+10, concat(b,'zz') from t1; +insert into t1 select a+15, concat(b,'yy') from t1; +insert into t1 select a+100, concat(b,'xx') from t1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select cast(count(a)/count(distinct a) as unsigned) as rec_per_key from t1; +rec_per_key +2 +set eq_range_index_dive_limit=0; +explain select * from t1 where a in (8, 15, 31, 1, 9); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx idx 5 NULL 7 Using index condition +select * from t1 where a in (8, 15, 31, 1, 9); +a b +1 yy +1 bbb +1 cc +9 zzzzz +15 ffffzz +set eq_range_index_dive_limit=2; +explain select * from t1 where a in (8, 15, 31, 1, 9); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx idx 5 NULL 10 Using index condition +select * from t1 where a in (8, 15, 31, 1, 9); +a b +1 yy +1 bbb +1 cc +9 zzzzz +15 ffffzz +set eq_range_index_dive_limit=default; +drop table t1; +# # End of 10.2 tests # diff --git a/mysql-test/r/range_mrr_icp.result b/mysql-test/r/range_mrr_icp.result index 799a299..93f414f 100644 --- a/mysql-test/r/range_mrr_icp.result +++ b/mysql-test/r/range_mrr_icp.result @@ -3014,6 +3014,47 @@ deallocate prepare stmt; set optimizer_switch=@save_optimizer_switch; drop table t1,t2,t3; # +# MDEV-16934: using system variable eq_range_index_dive_limit +# to reduce the number of index dives +# +create table t1 (a int, b varchar(31), index idx(a)); +insert into t1 values +(7,'xxxx'), (1,'yy'), (3,'aaa'), (1,'bbb'), (2,'zz'), +(4,'vvvvv'), (7,'ddd'), (9,'zzzzz'), (1,'cc'), (5,'ffff'); +insert into t1 select a+10, concat(b,'zz') from t1; +insert into t1 select a+15, concat(b,'yy') from t1; +insert into t1 select a+100, concat(b,'xx') from t1; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select cast(count(a)/count(distinct a) as unsigned) as rec_per_key from t1; +rec_per_key +2 +set eq_range_index_dive_limit=0; +explain select * from t1 where a in (8, 15, 31, 1, 9); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx idx 5 NULL 7 Using index condition; Rowid-ordered scan +select * from t1 where a in (8, 15, 31, 1, 9); +a b +1 yy +1 bbb +9 zzzzz +1 cc +15 ffffzz +set eq_range_index_dive_limit=2; +explain select * from t1 where a in (8, 15, 31, 1, 9); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx idx 5 NULL 10 Using index condition; Rowid-ordered scan +select * from t1 where a in (8, 15, 31, 1, 9); +a b +1 yy +1 bbb +9 zzzzz +1 cc +15 ffffzz +set eq_range_index_dive_limit=default; +drop table t1; +# # End of 10.2 tests # set optimizer_switch=@mrr_icp_extra_tmp; diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index f7a5cf5..9d7fa10 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -765,6 +765,20 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT NULL +VARIABLE_NAME EQ_RANGE_INDEX_DIVE_LIMIT +SESSION_VALUE 0 +GLOBAL_VALUE 0 +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE 0 +VARIABLE_SCOPE SESSION +VARIABLE_TYPE INT UNSIGNED +VARIABLE_COMMENT The optimizer will use existing index statistics instead of doing index dives for equality ranges if the number of equality ranges for the index is larger than or equal to this number. If set to 0, index dives are always used. +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME ERROR_COUNT SESSION_VALUE 0 GLOBAL_VALUE NULL diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 034195e..cada139 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -779,6 +779,20 @@ NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT NULL +VARIABLE_NAME EQ_RANGE_INDEX_DIVE_LIMIT +SESSION_VALUE 0 +GLOBAL_VALUE 0 +GLOBAL_VALUE_ORIGIN COMPILE-TIME +DEFAULT_VALUE 0 +VARIABLE_SCOPE SESSION +VARIABLE_TYPE INT UNSIGNED +VARIABLE_COMMENT The optimizer will use existing index statistics instead of doing index dives for equality ranges if the number of equality ranges for the index is larger than or equal to this number. If set to 0, index dives are always used. +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 4294967295 +NUMERIC_BLOCK_SIZE 1 +ENUM_VALUE_LIST NULL +READ_ONLY NO +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME ERROR_COUNT SESSION_VALUE 0 GLOBAL_VALUE NULL diff --git a/mysql-test/t/range.test b/mysql-test/t/range.test index ab95180..ed68cfb 100644 --- a/mysql-test/t/range.test +++ b/mysql-test/t/range.test @@ -2046,6 +2046,39 @@ set optimizer_switch=@save_optimizer_switch; drop table t1,t2,t3; --echo # +--echo # MDEV-16934: using system variable eq_range_index_dive_limit +--echo # to reduce the number of index dives +--echo # + +create table t1 (a int, b varchar(31), index idx(a)); + +insert into t1 values + (7,'xxxx'), (1,'yy'), (3,'aaa'), (1,'bbb'), (2,'zz'), + (4,'vvvvv'), (7,'ddd'), (9,'zzzzz'), (1,'cc'), (5,'ffff'); +insert into t1 select a+10, concat(b,'zz') from t1; +insert into t1 select a+15, concat(b,'yy') from t1; +insert into t1 select a+100, concat(b,'xx') from t1; + +analyze table t1; + +select cast(count(a)/count(distinct a) as unsigned) as rec_per_key from t1; + +let $q= +select * from t1 where a in (8, 15, 31, 1, 9); + +set eq_range_index_dive_limit=0; +eval explain $q; +eval $q; + +set eq_range_index_dive_limit=2; +eval explain $q; +eval $q; + +set eq_range_index_dive_limit=default; + +drop table t1; + +--echo # --echo # End of 10.2 tests --echo # diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index 50918d8..94a96c2 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -17,6 +17,7 @@ #include <my_bit.h> #include "sql_select.h" #include "key.h" +#include "sql_statistics.h" /**************************************************************************** * Default MRR implementation (MRR to non-MRR converter) @@ -67,6 +68,9 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, /* Default MRR implementation doesn't need buffer */ *bufsz= 0; + bool use_statistics_for_eq_range= eq_ranges_exceeds_limit(seq, + seq_init_param); + seq_it= seq->init(seq_init_param, n_ranges, *flags); while (!seq->next(seq_it, &range)) { @@ -87,8 +91,15 @@ handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, min_endp= range.start_key.length? &range.start_key : NULL; max_endp= range.end_key.length? &range.end_key : NULL; } + int keyparts_used= my_count_bits(range.start_key.keypart_map); if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) rows= 1; /* there can be at most one row */ + else if (use_statistics_for_eq_range && + !(range.range_flag & NULL_RANGE) && + (range.range_flag & EQ_RANGE) && + table->key_info[keyno].actual_rec_per_key(keyparts_used - 1) > 0.5) + rows= + (ha_rows) table->key_info[keyno].actual_rec_per_key(keyparts_used - 1); else { if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, diff --git a/sql/opt_range.cc b/sql/opt_range.cc index b011c01..c9c37a2 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -14616,6 +14616,33 @@ void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names, } +/* Check whether the number for equality ranges exceeds the set threshold */ + +bool eq_ranges_exceeds_limit(RANGE_SEQ_IF *seq, void *seq_init_param) +{ + KEY_MULTI_RANGE range; + range_seq_t seq_it; + uint count = 0; + PARAM *param= ((SEL_ARG_RANGE_SEQ*) seq_init_param)->param; + uint limit= param->thd->variables.eq_range_index_dive_limit; + + if (limit == 0) + { + /* 'Statistics instead of index dives' feature is turned off */ + return false; + } + seq_it= seq->init(seq_init_param, 0, 0); + while (!seq->next(seq_it, &range)) + { + if ((range.range_flag & EQ_RANGE) && !(range.range_flag & NULL_RANGE)) + { + if (++count >= limit) + return true; + } + } + return false; +} + #ifndef DBUG_OFF static void print_sel_tree(PARAM *param, SEL_TREE *tree, key_map *tree_map, diff --git a/sql/opt_range.h b/sql/opt_range.h index c1f7079..6698c98 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -242,7 +242,7 @@ class SEL_ARG :public Sql_alloc Number of children of this element in the RB-tree, plus 1 for this element itself. */ - uint16 elements; + uint32 elements; /* Valid only for elements which are RB-tree roots: Number of times this RB-tree is referred to (it is referred by SEL_ARG::next_key_part or by @@ -1664,6 +1664,8 @@ SQL_SELECT *make_select(TABLE *head, table_map const_tables, bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond); +bool eq_ranges_exceeds_limit(RANGE_SEQ_IF *seq, void *seq_init_param); + #ifdef WITH_PARTITION_STORAGE_ENGINE bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond); #endif diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc index ace6208..c7e4496 100644 --- a/sql/opt_range_mrr.cc +++ b/sql/opt_range_mrr.cc @@ -72,6 +72,7 @@ typedef struct st_sel_arg_range_seq range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags) { SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)init_param; + seq->param->range_count=0; seq->at_start= TRUE; seq->stack[0].key_tree= NULL; seq->stack[0].min_key= seq->param->min_key; @@ -272,25 +273,44 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) key_info= NULL; else key_info= &seq->param->table->key_info[seq->real_keyno]; - + /* - Conditions below: - (1) - range analysis is used for estimating condition selectivity - (2) - This is a unique key, and we have conditions for all its - user-defined key parts. - (3) - The table uses extended keys, this key covers all components, - and we have conditions for all key parts. + This is an equality range (keypart_0=X and ... and keypart_n=Z) if + (1) - There are no flags indicating open range (e.g., + "keypart_x > y") or GIS. + (2) - The lower bound and the upper bound of the range has the + same value (min_key == max_key). */ - if (!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag && - (!key_info || // (1) - ((uint)key_tree->part+1 == key_info->user_defined_key_parts && // (2) - key_info->flags & HA_NOSAME) || // (2) - ((key_info->flags & HA_EXT_NOSAME) && // (3) - (uint)key_tree->part+1 == key_info->ext_key_parts) // (3) - ) && - range->start_key.length == range->end_key.length && - !memcmp(seq->param->min_key,seq->param->max_key,range->start_key.length)) - range->range_flag= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE); + const uint is_open_range = + (NO_MIN_RANGE | NO_MAX_RANGE | NEAR_MIN | NEAR_MAX | GEOM_FLAG); + const bool is_eq_range_pred = + !(cur->min_key_flag & is_open_range) && // (1) + !(cur->max_key_flag & is_open_range) && // (1) + range->start_key.length == range->end_key.length && // (2) + !memcmp(seq->param->min_key, seq->param->max_key, // (2) + range->start_key.length); + + if (is_eq_range_pred) + { + range->range_flag = EQ_RANGE; + + /* + Conditions below: + (1) - Range analysis is used for estimating condition selectivity + (2) - This is a unique key, and we have conditions for all its + user-defined key parts. + (3) - The table uses extended keys, this key covers all components, + and we have conditions for all key parts. + */ + if ( + !key_info || // (1) + ((uint)key_tree->part+1 == key_info->user_defined_key_parts && // (2) + key_info->flags & HA_NOSAME) || // (2) + ((key_info->flags & HA_EXT_NOSAME) && // (3) + (uint)key_tree->part+1 == key_info->ext_key_parts) // (3) + ) + range->range_flag |= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE); + } if (seq->param->is_ror_scan) { diff --git a/sql/sql_class.h b/sql/sql_class.h index 6663d9e..d2f00b2d 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -569,6 +569,7 @@ typedef struct system_variables ha_rows max_join_size; ha_rows expensive_subquery_limit; ulong auto_increment_increment, auto_increment_offset; + uint eq_range_index_dive_limit; ulong lock_wait_timeout; ulong join_cache_level; ulong max_allowed_packet; diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index f465838..4ff85cd 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -21,7 +21,7 @@ enum enum_use_stat_tables_mode { NEVER, COMPLEMENTARY, - PEFERABLY, + PREFERABLY, } Use_stat_tables_mode; typedef diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index e6aec47..650b17a 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -2578,6 +2578,16 @@ static Sys_var_ulong Sys_div_precincrement( SESSION_VAR(div_precincrement), CMD_LINE(REQUIRED_ARG), VALID_RANGE(0, DECIMAL_MAX_SCALE), DEFAULT(4), BLOCK_SIZE(1)); +static Sys_var_uint Sys_eq_range_index_dive_limit( + "eq_range_index_dive_limit", + "The optimizer will use existing index statistics instead of " + "doing index dives for equality ranges if the number of equality " + "ranges for the index is larger than or equal to this number. " + "If set to 0, index dives are always used.", + SESSION_VAR(eq_range_index_dive_limit), CMD_LINE(REQUIRED_ARG), + VALID_RANGE(0, UINT_MAX32), DEFAULT(0), + BLOCK_SIZE(1)); + static Sys_var_ulong Sys_range_alloc_block_size( "range_alloc_block_size", "Allocation block size for storing ranges during optimization",