revision-id: 4addd31531f722438b8b702c9cd00c28b61efce3 (mariadb-10.4.11-495-g4addd31531f) parent(s): 0adbf27f00e19801a8e244292000e85e6889b9aa author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2020-12-11 18:54:21 +0300 message: MDEV-21958: Query having many NOT-IN clauses running forever Basic variant of the fix: do not consider conditions in form unique_key NOT IN (c1,c2...) to be sargable. If there are only a few constants, the condition is not selective. If there are a lot constants, the overhead of processing such a huge range list is not worth it. --- mysql-test/main/range.result | 30 ++++++++++++++++++++++++++++++ mysql-test/main/range.test | 24 ++++++++++++++++++++++++ mysql-test/main/range_mrr_icp.result | 30 ++++++++++++++++++++++++++++++ sql/opt_range.cc | 24 ++++++++++++++++++++++++ 4 files changed, 108 insertions(+) diff --git a/mysql-test/main/range.result b/mysql-test/main/range.result index c10ddf9d9fd..132ca019a61 100644 --- a/mysql-test/main/range.result +++ b/mysql-test/main/range.result @@ -3241,6 +3241,36 @@ analyze SELECT * FROM t1 where a in (0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17 id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE t1 index a a 5 NULL 2000 2000.00 10.05 60.05 Using where; Using index drop table t1,ten,t2; +# +# MDEV-21958: Query having many NOT-IN clauses running forever +# +create table t2 ( +pk int primary key, +key1 int, +col1 int, +key (key1, pk) +); +insert into t2 (pk, key1) values (1,1),(2,2),(3,3),(4,4),(5,5); +set @tmp_21958=@@optimizer_trace; +set optimizer_trace=1; +explain select * from t2 where key1 in (1,2,3) and pk not in (1,2,3); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL PRIMARY,key1 NULL NULL NULL 5 Using where +# This should show only ranges in form "(1) <= (key1) <= (1)" +# ranges over "pk" should not be constructed. +select json_detailed(JSON_EXTRACT(trace, '$**.ranges')) +from information_schema.optimizer_trace; +json_detailed(JSON_EXTRACT(trace, '$**.ranges')) +[ + + [ + "(1) <= (key1) <= (1)", + "(2) <= (key1) <= (2)", + "(3) <= (key1) <= (3)" + ] +] +set optimizer_trace=@tmp_21958; +drop table t2; # End of 10.4 tests set global innodb_stats_persistent= @innodb_stats_persistent_save; set global innodb_stats_persistent_sample_pages= diff --git a/mysql-test/main/range.test b/mysql-test/main/range.test index 65f580698c5..fab6c8530c2 100644 --- a/mysql-test/main/range.test +++ b/mysql-test/main/range.test @@ -2207,6 +2207,30 @@ let $a= `select group_concat(a) from t2`; eval analyze SELECT * FROM t1 where a in ($a); drop table t1,ten,t2; +--echo # +--echo # MDEV-21958: Query having many NOT-IN clauses running forever +--echo # +create table t2 ( + pk int primary key, + key1 int, + col1 int, + key (key1, pk) +); + +insert into t2 (pk, key1) values (1,1),(2,2),(3,3),(4,4),(5,5); + +set @tmp_21958=@@optimizer_trace; +set optimizer_trace=1; +explain select * from t2 where key1 in (1,2,3) and pk not in (1,2,3); + +--echo # This should show only ranges in form "(1) <= (key1) <= (1)" +--echo # ranges over "pk" should not be constructed. +select json_detailed(JSON_EXTRACT(trace, '$**.ranges')) +from information_schema.optimizer_trace; +set optimizer_trace=@tmp_21958; + +drop table t2; + --echo # End of 10.4 tests set global innodb_stats_persistent= @innodb_stats_persistent_save; diff --git a/mysql-test/main/range_mrr_icp.result b/mysql-test/main/range_mrr_icp.result index 826ac621064..ebefc21a58d 100644 --- a/mysql-test/main/range_mrr_icp.result +++ b/mysql-test/main/range_mrr_icp.result @@ -3238,6 +3238,36 @@ analyze SELECT * FROM t1 where a in (0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17 id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE t1 index a a 5 NULL 2000 2000.00 10.05 60.05 Using where; Using index drop table t1,ten,t2; +# +# MDEV-21958: Query having many NOT-IN clauses running forever +# +create table t2 ( +pk int primary key, +key1 int, +col1 int, +key (key1, pk) +); +insert into t2 (pk, key1) values (1,1),(2,2),(3,3),(4,4),(5,5); +set @tmp_21958=@@optimizer_trace; +set optimizer_trace=1; +explain select * from t2 where key1 in (1,2,3) and pk not in (1,2,3); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL PRIMARY,key1 NULL NULL NULL 5 Using where +# This should show only ranges in form "(1) <= (key1) <= (1)" +# ranges over "pk" should not be constructed. +select json_detailed(JSON_EXTRACT(trace, '$**.ranges')) +from information_schema.optimizer_trace; +json_detailed(JSON_EXTRACT(trace, '$**.ranges')) +[ + + [ + "(1) <= (key1) <= (1)", + "(2) <= (key1) <= (2)", + "(3) <= (key1) <= (3)" + ] +] +set optimizer_trace=@tmp_21958; +drop table t2; # End of 10.4 tests set global innodb_stats_persistent= @innodb_stats_persistent_save; set global innodb_stats_persistent_sample_pages= diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 043a1e70f61..1a7ac1044c3 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -7787,6 +7787,30 @@ SEL_TREE *Item_func_in::get_func_mm_tree(RANGE_OPT_PARAM *param, if (array->count > NOT_IN_IGNORE_THRESHOLD || !value_item) DBUG_RETURN(0); + /* + If this is "unique_key NOT IN (...)", do not consider it sargable (for + any index, not just the unique one). The logic is as follows: + - if there are only a few constants, this condition is not selective + (unless the table is also very small in which case we won't gain + anything) + - If there are a lot of constants, the overhead of building and + processing enormous range list is not worth it. + */ + if (param->using_real_indexes) + { + key_map::Iterator it(field->key_start); + uint key_no; + while ((key_no= it.next_bit()) != key_map::Iterator::BITMAP_END) + { + KEY *key_info= ¶m->table->key_info[key_no]; + if (key_info->user_defined_key_parts == 1 && + (key_info->flags & HA_NOSAME)) + { + DBUG_RETURN(0); + } + } + } + /* Get a SEL_TREE for "(-inf|NULL) < X < c_0" interval. */ uint i=0; do