[Commits] 0ff8b73b60f: MDEV-15777:Support Early NULLs filtering-like restrictions in the range optimizer
revision-id: 0ff8b73b60fae40556f91d3959f20bf3cf182b28 (mariadb-10.3.0-947-g0ff8b73b60f) parent(s): 15419a558370aeed9521b498c34d50f20a8d47a5 author: Varun Gupta committer: Varun Gupta timestamp: 2018-05-15 13:53:11 +0530 message: MDEV-15777:Support Early NULLs filtering-like restrictions in the range optimizer --- mysql-test/main/mdev15777.result | 455 +++++++++++++++++++++++++++++++++++++++ mysql-test/main/mdev15777.test | 138 ++++++++++++ sql/opt_range.cc | 125 ++++++++++- sql/opt_range.h | 2 + sql/sql_select.cc | 22 +- sql/table.cc | 1 + sql/table.h | 6 + 7 files changed, 745 insertions(+), 4 deletions(-) diff --git a/mysql-test/main/mdev15777.result b/mysql-test/main/mdev15777.result new file mode 100644 index 00000000000..549ac08b91f --- /dev/null +++ b/mysql-test/main/mdev15777.result @@ -0,0 +1,455 @@ +# Test added to check that null filtering is used by range optimizer +create table ten(a int); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table one_k(a int); +insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; +create table one_m(a int); +insert into one_m select A.a + B.a* 1000 from one_k A, one_k B; +delete from one_m where a=0 limit 1; +create table t1 ( +id int(10) unsigned NOT NULL AUTO_INCREMENT, +filler varchar(100), +subset_id int(11) DEFAULT NULL, +PRIMARY KEY (id), +KEY t1_subset_id (subset_id) +); +create table t1_subsets ( +id int(10) unsigned NOT NULL AUTO_INCREMENT, +filler1 varchar(100), +filler2 varchar(100), +filler3 varchar(100), +PRIMARY KEY (id) +); +insert into t1 select a,a, NULL from one_m where a < 50*1000; +insert into t1_subsets select a,a,a,a from one_m where a < 500*1000 limit 499000; +analyze format=json +SELECT * FROM t1 WHERE t1.subset_id IN (SELECT t1_subsets.id FROM t1_subsets); +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "table": { + "table_name": "t1", + "access_type": "range", + "possible_keys": ["t1_subset_id"], + "key": "t1_subset_id", + "key_length": "5", + "used_key_parts": ["subset_id"], + "r_loops": 1, + "rows": 3, + "r_rows": 0, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100, + "index_condition": "t1.subset_id is not null" + }, + "table": { + "table_name": "t1_subsets", + "access_type": "eq_ref", + "possible_keys": ["PRIMARY"], + "key": "PRIMARY", + "key_length": "4", + "used_key_parts": ["id"], + "ref": ["test.t1.subset_id"], + "r_loops": 0, + "rows": 1, + "r_rows": null, + "filtered": 100, + "r_filtered": null, + "attached_condition": "t1.subset_id = t1_subsets.`id`", + "using_index": true + } + } +} +drop table t1,t1_subsets,ten,one_k,one_m; +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1(a int); +insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C; +create table t2 ( +pk int primary key, +a int, b int, +filler char(200), +key(a) +); +insert into t2 select a, 1000-a, 1000-a, repeat('abc-',50) from t1 where a<200 limit 200; +create table t3 ( +pk int primary key, +a int, b int, +filler char(200), +key(a) +); +insert into t3 select a, 1000-a, 1000-a, repeat('abc-',50) from t1; +insert into t3 select a+1000, 1000+a, 1000+a, repeat('abc-',50) from t1; +analyze format=json +select t2.pk,t2.a,t2.b,t3.pk,t3.a,t3.b +from t2, t3 where t2.a=t3.a order by t2.a limit 5; +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "table": { + "table_name": "t2", + "access_type": "range", + "possible_keys": ["a"], + "key": "a", + "key_length": "5", + "used_key_parts": ["a"], + "r_loops": 1, + "rows": 200, + "r_rows": 5, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100, + "index_condition": "t2.a is not null" + }, + "table": { + "table_name": "t3", + "access_type": "ref", + "possible_keys": ["a"], + "key": "a", + "key_length": "5", + "used_key_parts": ["a"], + "ref": ["test.t2.a"], + "r_loops": 5, + "rows": 1, + "r_rows": 1, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100 + } + } +} +drop table t1,t2,t3,t0; +CREATE TABLE t1 ( a varchar(1)) ; +INSERT INTO t1 VALUES ('c'),('b'); +CREATE TABLE t3 ( a int NOT NULL , b varchar(1)) ; +INSERT INTO t3 VALUES (29,'c'); +INSERT INTO t3 VALUES (2,'c'); +alter table t1 add index aa (a); +alter table t3 add index bb (b); +SELECT * FROM t1 WHERE EXISTS ( SELECT a FROM t3 WHERE t3.b = t1.a); +a +c +explain extended +SELECT * FROM t1 WHERE EXISTS ( SELECT a FROM t3 WHERE t3.b = t1.a); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t3 range bb bb 4 NULL 2 100.00 Using index condition; LooseScan +1 PRIMARY t1 ref aa aa 4 test.t3.b 2 100.00 Using index +Warnings: +Note 1276 Field or reference 'test.t1.a' of SELECT #2 was resolved in SELECT #1 +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t3`) where `test`.`t1`.`a` = `test`.`t3`.`b` +analyze format=json +SELECT * FROM t1 WHERE EXISTS ( SELECT a FROM t3 WHERE t3.b = t1.a); +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "table": { + "table_name": "t3", + "access_type": "range", + "possible_keys": ["bb"], + "key": "bb", + "key_length": "4", + "used_key_parts": ["b"], + "r_loops": 1, + "rows": 2, + "r_rows": 1, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100, + "index_condition": "t3.b is not null", + "loose_scan": true + }, + "table": { + "table_name": "t1", + "access_type": "ref", + "possible_keys": ["aa"], + "key": "aa", + "key_length": "4", + "used_key_parts": ["a"], + "ref": ["test.t3.b"], + "r_loops": 1, + "rows": 2, + "r_rows": 1, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100, + "using_index": true + } + } +} +drop table t1,t3; +create table t1 (d int, id1 int, index idx1 (d, id1)); +insert into t1 values +(3, 20), (2, 40), (3, 10), (1, 10), (3, 20), (1, 40), (2, 30), (3, 30); +create table t2 (id1 int, id2 int, index idx2 (id1)); +insert into t2 values +(20, 100), (30, 400), (20, 400), (30, 200), (10, 300), (10, 200), (40, 100), +(40, 200), (30, 300), (10, 400), (20, 200), (20, 300); +insert into t2 values +(21, 10), (31, 400), (21, 400), (31, 200), (11, 300), (11, 200), (41, 100), +(41, 200), (31, 300), (11, 400), (21, 200), (21, 300); +set join_cache_level=6; +explain +select t1.id1, sum(t2.id2) from t1 join t2 on t1.id1=t2.id1 +where t1.d=3 group by t1.id1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range idx1 idx1 10 NULL 4 Using where; Using index +1 SIMPLE t2 ref idx2 idx2 5 test.t1.id1 2 +analyze format=json +select t1.id1, sum(t2.id2) from t1 join t2 on t1.id1=t2.id1 +where t1.d=3 group by t1.id1; +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "table": { + "table_name": "t1", + "access_type": "range", + "possible_keys": ["idx1"], + "key": "idx1", + "key_length": "10", + "used_key_parts": ["d", "id1"], + "r_loops": 1, + "rows": 4, + "r_rows": 4, + "r_total_time_ms": "REPLACED", + "filtered": 25, + "r_filtered": 100, + "attached_condition": "t1.d = 3 and t1.id1 is not null", + "using_index": true + }, + "table": { + "table_name": "t2", + "access_type": "ref", + "possible_keys": ["idx2"], + "key": "idx2", + "key_length": "5", + "used_key_parts": ["id1"], + "ref": ["test.t1.id1"], + "r_loops": 4, + "rows": 2, + "r_rows": 3.5, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100 + } + } +} +drop table t1,t2; +set @save_optimizer_switch= @@optimizer_switch; +set @@optimizer_switch= 'materialization=off'; +CREATE TABLE t1 (a CHAR(1), b VARCHAR(10), key(a) , key(b)); +INSERT INTO t1 VALUES ('a', 'aa'); +INSERT INTO t1 VALUES ('a', 'aaa'); +analyze format=json SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1); +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "table": { + "table_name": "t1", + "access_type": "range", + "possible_keys": ["b"], + "key": "b", + "key_length": "13", + "used_key_parts": ["b"], + "r_loops": 1, + "rows": 2, + "r_rows": 2, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100, + "index_condition": "t1.b is not null" + }, + "table": { + "table_name": "t1", + "access_type": "ref", + "possible_keys": ["a"], + "key": "a", + "key_length": "2", + "used_key_parts": ["a"], + "ref": ["test.t1.b"], + "r_loops": 2, + "rows": 2, + "r_rows": 2, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 0, + "attached_condition": "t1.b = t1.a", + "using_index": true, + "first_match": "t1" + } + } +} +SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1); +a b +CREATE TABLE t2 (a VARCHAR(1), b VARCHAR(10), key(a), key(b)); +INSERT INTO t2 SELECT * FROM t1; +analyze format=json SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2); +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "table": { + "table_name": "t2", + "access_type": "index", + "possible_keys": ["a"], + "key": "a", + "key_length": "4", + "used_key_parts": ["a"], + "r_loops": 1, + "rows": 2, + "r_rows": 1, + "r_total_time_ms": "REPLACED", + "filtered": 50, + "r_filtered": 100, + "attached_condition": "t2.a is not null", + "using_index": true, + "loose_scan": true + }, + "table": { + "table_name": "t2", + "access_type": "ref", + "possible_keys": ["b"], + "key": "b", + "key_length": "13", + "used_key_parts": ["b"], + "ref": ["test.t2.a"], + "r_loops": 1, + "rows": 1, + "r_rows": 0, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100, + "index_condition": "t2.b = t2.a" + } + } +} +SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2); +a b +analyze format=json SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500); +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "table": { + "table_name": "t1", + "access_type": "range", + "possible_keys": ["b"], + "key": "b", + "key_length": "13", + "used_key_parts": ["b"], + "r_loops": 1, + "rows": 2, + "r_rows": 2, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100, + "index_condition": "t1.b is not null" + }, + "table": { + "table_name": "t1", + "access_type": "ref", + "possible_keys": ["a"], + "key": "a", + "key_length": "2", + "used_key_parts": ["a"], + "ref": ["test.t1.b"], + "r_loops": 2, + "rows": 2, + "r_rows": 2, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 0, + "attached_condition": "octet_length(t1.a) < 500 and t1.b = t1.a", + "using_index": true, + "first_match": "t1" + } + } +} +SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500); +a b +DROP TABLE t1,t2; +set @@optimizer_switch= @save_optimizer_switch; +set @save_optimizer_switch= @@optimizer_switch; +set optimizer_switch='exists_to_in=on'; +set optimizer_switch='exists_to_in=on,in_to_exists=on,semijoin=on,materialization=off,subquery_cache=off'; +CREATE TABLE t1 ( a varchar(1)) ; +INSERT INTO t1 VALUES ('c'),('b'); +CREATE TABLE t2 ( b varchar(1)) ; +INSERT INTO t2 VALUES ('v'),('v'),('c'),(NULL),('x'),('i'),('e'),('p'),('s'),('j'),('z'),('c'),('a'),('q'),('y'),(NULL),('r'),('v'),(NULL),('r'); +CREATE TABLE t3 ( a int NOT NULL , b varchar(1)) ; +INSERT INTO t3 VALUES (29,'c'); +INSERT INTO t3 VALUES (2,'c'); +alter table t1 add index aa (a); +alter table t3 add index bb (b); +SELECT * FROM t1 WHERE EXISTS ( SELECT a FROM t3 WHERE t3.b = t1.a); +a +c +explain extended +SELECT * FROM t1 WHERE EXISTS ( SELECT a FROM t3 WHERE t3.b = t1.a); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY t3 range bb bb 4 NULL 2 100.00 Using index condition; LooseScan +1 PRIMARY t1 ref aa aa 4 test.t3.b 2 100.00 Using index +Warnings: +Note 1276 Field or reference 'test.t1.a' of SELECT #2 was resolved in SELECT #1 +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` semi join (`test`.`t3`) where `test`.`t1`.`a` = `test`.`t3`.`b` +analyze format=json SELECT * FROM t1 WHERE EXISTS ( SELECT a FROM t3 WHERE t3.b = t1.a); +ANALYZE +{ + "query_block": { + "select_id": 1, + "r_loops": 1, + "r_total_time_ms": "REPLACED", + "table": { + "table_name": "t3", + "access_type": "range", + "possible_keys": ["bb"], + "key": "bb", + "key_length": "4", + "used_key_parts": ["b"], + "r_loops": 1, + "rows": 2, + "r_rows": 1, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100, + "index_condition": "t3.b is not null", + "loose_scan": true + }, + "table": { + "table_name": "t1", + "access_type": "ref", + "possible_keys": ["aa"], + "key": "aa", + "key_length": "4", + "used_key_parts": ["a"], + "ref": ["test.t3.b"], + "r_loops": 1, + "rows": 2, + "r_rows": 1, + "r_total_time_ms": "REPLACED", + "filtered": 100, + "r_filtered": 100, + "using_index": true + } + } +} +drop table t1,t2,t3; +set @@optimizer_switch= @save_optimizer_switch; diff --git a/mysql-test/main/mdev15777.test b/mysql-test/main/mdev15777.test new file mode 100644 index 00000000000..a7f21f6b6a2 --- /dev/null +++ b/mysql-test/main/mdev15777.test @@ -0,0 +1,138 @@ +--echo # Test added to check that null filtering is used by range optimizer + +create table ten(a int); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table one_k(a int); +insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; + +create table one_m(a int); +insert into one_m select A.a + B.a* 1000 from one_k A, one_k B; +delete from one_m where a=0 limit 1; + +create table t1 ( + id int(10) unsigned NOT NULL AUTO_INCREMENT, + filler varchar(100), + subset_id int(11) DEFAULT NULL, + PRIMARY KEY (id), + KEY t1_subset_id (subset_id) +); + +create table t1_subsets ( + id int(10) unsigned NOT NULL AUTO_INCREMENT, + filler1 varchar(100), + filler2 varchar(100), + filler3 varchar(100), + PRIMARY KEY (id) +); + +insert into t1 select a,a, NULL from one_m where a < 50*1000; +insert into t1_subsets select a,a,a,a from one_m where a < 500*1000 limit 499000; + +--source include/analyze-format.inc +analyze format=json +SELECT * FROM t1 WHERE t1.subset_id IN (SELECT t1_subsets.id FROM t1_subsets); +drop table t1,t1_subsets,ten,one_k,one_m; + +create table t0(a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1(a int); +insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C; +create table t2 ( + pk int primary key, + a int, b int, + filler char(200), + key(a) +); +insert into t2 select a, 1000-a, 1000-a, repeat('abc-',50) from t1 where a<200 limit 200; + +create table t3 ( + pk int primary key, + a int, b int, + filler char(200), + key(a) +); +insert into t3 select a, 1000-a, 1000-a, repeat('abc-',50) from t1; +insert into t3 select a+1000, 1000+a, 1000+a, repeat('abc-',50) from t1; + +--source include/analyze-format.inc +analyze format=json +select t2.pk,t2.a,t2.b,t3.pk,t3.a,t3.b +from t2, t3 where t2.a=t3.a order by t2.a limit 5; +drop table t1,t2,t3,t0; + +CREATE TABLE t1 ( a varchar(1)) ; +INSERT INTO t1 VALUES ('c'),('b'); +CREATE TABLE t3 ( a int NOT NULL , b varchar(1)) ; +INSERT INTO t3 VALUES (29,'c'); +INSERT INTO t3 VALUES (2,'c'); +alter table t1 add index aa (a); +alter table t3 add index bb (b); + +SELECT * FROM t1 WHERE EXISTS ( SELECT a FROM t3 WHERE t3.b = t1.a); +explain extended +SELECT * FROM t1 WHERE EXISTS ( SELECT a FROM t3 WHERE t3.b = t1.a); +--source include/analyze-format.inc +analyze format=json +SELECT * FROM t1 WHERE EXISTS ( SELECT a FROM t3 WHERE t3.b = t1.a); +drop table t1,t3; + +create table t1 (d int, id1 int, index idx1 (d, id1)); +insert into t1 values +(3, 20), (2, 40), (3, 10), (1, 10), (3, 20), (1, 40), (2, 30), (3, 30); +create table t2 (id1 int, id2 int, index idx2 (id1)); +insert into t2 values +(20, 100), (30, 400), (20, 400), (30, 200), (10, 300), (10, 200), (40, 100), +(40, 200), (30, 300), (10, 400), (20, 200), (20, 300); +insert into t2 values +(21, 10), (31, 400), (21, 400), (31, 200), (11, 300), (11, 200), (41, 100), +(41, 200), (31, 300), (11, 400), (21, 200), (21, 300); +set join_cache_level=6; +explain +select t1.id1, sum(t2.id2) from t1 join t2 on t1.id1=t2.id1 +where t1.d=3 group by t1.id1; +--source include/analyze-format.inc +analyze format=json +select t1.id1, sum(t2.id2) from t1 join t2 on t1.id1=t2.id1 +where t1.d=3 group by t1.id1; +drop table t1,t2; + +#### Test 4 ###### +set @save_optimizer_switch= @@optimizer_switch; +set @@optimizer_switch= 'materialization=off'; +CREATE TABLE t1 (a CHAR(1), b VARCHAR(10), key(a) , key(b)); +INSERT INTO t1 VALUES ('a', 'aa'); +INSERT INTO t1 VALUES ('a', 'aaa'); +--source include/analyze-format.inc +analyze format=json SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1); +SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1); +CREATE TABLE t2 (a VARCHAR(1), b VARCHAR(10), key(a), key(b)); +INSERT INTO t2 SELECT * FROM t1; +--source include/analyze-format.inc +analyze format=json SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2); +SELECT a,b FROM t2 WHERE b IN (SELECT a FROM t2); +--source include/analyze-format.inc +analyze format=json SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500); +SELECT a,b FROM t1 WHERE b IN (SELECT a FROM t1 WHERE LENGTH(a)<500); +DROP TABLE t1,t2; +set @@optimizer_switch= @save_optimizer_switch; + +set @save_optimizer_switch= @@optimizer_switch; +set optimizer_switch='exists_to_in=on'; +set optimizer_switch='exists_to_in=on,in_to_exists=on,semijoin=on,materialization=off,subquery_cache=off'; +CREATE TABLE t1 ( a varchar(1)) ; +INSERT INTO t1 VALUES ('c'),('b'); +CREATE TABLE t2 ( b varchar(1)) ; +INSERT INTO t2 VALUES ('v'),('v'),('c'),(NULL),('x'),('i'),('e'),('p'),('s'),('j'),('z'),('c'),('a'),('q'),('y'),(NULL),('r'),('v'),(NULL),('r'); +CREATE TABLE t3 ( a int NOT NULL , b varchar(1)) ; +INSERT INTO t3 VALUES (29,'c'); +INSERT INTO t3 VALUES (2,'c'); +alter table t1 add index aa (a); +alter table t3 add index bb (b); +SELECT * FROM t1 WHERE EXISTS ( SELECT a FROM t3 WHERE t3.b = t1.a); +explain extended +SELECT * FROM t1 WHERE EXISTS ( SELECT a FROM t3 WHERE t3.b = t1.a); +--source include/analyze-format.inc +analyze format=json SELECT * FROM t1 WHERE EXISTS ( SELECT a FROM t3 WHERE t3.b = t1.a); +drop table t1,t2,t3; +set @@optimizer_switch= @save_optimizer_switch; \ No newline at end of file diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 8422917065f..ae06b899ac9 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -155,6 +155,7 @@ class SEL_IMERGE; #define CLONE_KEY1_MAYBE 1 #define CLONE_KEY2_MAYBE 2 #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1) +#define FT_KEYPART (MAX_FIELDS+10) /* @@ -2400,6 +2401,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, { uint idx; double scan_time; + Item *null_rejecting_conds= NULL; DBUG_ENTER("SQL_SELECT::test_quick_select"); DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu", (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables, @@ -2423,6 +2425,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, read_time= (double) records + scan_time + 1; // Force to use index possible_keys.clear_all(); + null_rejecting_conds= head->null_rejecting_conds; DBUG_PRINT("info",("Time to scan table: %g", read_time)); @@ -2431,7 +2434,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, { uchar buff[STACK_BUFF_ALLOC]; MEM_ROOT alloc; - SEL_TREE *tree= NULL; + SEL_TREE *tree= NULL, *not_null_cond_tree= NULL; KEY_PART *key_parts; KEY *key_info; PARAM param; @@ -2540,6 +2543,12 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, TRP_GROUP_MIN_MAX *group_trp; double best_read_time= read_time; + if (null_rejecting_conds) + { + not_null_cond_tree= null_rejecting_conds->get_mm_tree(¶m, + &null_rejecting_conds); + } + if (cond) { if ((tree= cond->get_mm_tree(¶m, &cond))) @@ -2558,6 +2567,13 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, tree= NULL; } } + if (not_null_cond_tree) + { + if (!tree) + tree= not_null_cond_tree; + else + tree= tree_and(¶m, tree, not_null_cond_tree); + } /* Try to construct a QUICK_GROUP_MIN_MAX_SELECT. @@ -14643,6 +14659,113 @@ void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names, add_key_and_length(key_names, used_lengths, &first); } +inline void add_cond(THD *thd, Item **e1, Item *e2) +{ + if (*e1) + { + if (!e2) + return; + Item *res; + if ((res= new (thd->mem_root) Item_cond_and(thd, *e1, e2))) + { + res->fix_fields(thd, 0); + res->update_used_tables(); + *e1= res; + } + } + else + *e1= e2; +} + +/* + Create null rejecting conditions for a table, for all the equalites + present in the WHERE clause of a query. + + SYNOPSIS + make_null_rejecting_conds() + @param TABLE - Keys of this table will participate in null + rejecting conditions + @param keyuse_array - array that has all the equalites of the + WHERE clasuse + + DESCRIPTION + This function creates null rejecting conditions for a table. These + conditions are created to do range analysis on them , the conditions + are of the form tbl.key.keypart IS NOT NULL. + + IMPLEMENTATION + Lookup in the keyuse array to check if it has equalites that belong + to the given table. If yes then find out if the conditions are null + rejecting and accordingly create all the condition for the keys of a + given table and AND them. + + + RETURN + NOT NULL - Found null rejecting conditions for the given table + NULL - No null rejecting conditions for the given table +*/ + +void make_null_rejecting_conds(THD *thd, TABLE *table, + DYNAMIC_ARRAY *keyuse_array, key_map *const_keys) +{ + KEY *keyinfo; + Item *cond= NULL; + KEYUSE* keyuse; + + /* + The null rejecting conds added will be on the keypart of a key, so for + that we need the table to atleast have a key. + */ + if (!table->s->keys) + return ; + if (table->null_rejecting_conds) + return; + + for(uint i=0; i < keyuse_array->elements; i++) + { + keyuse= (KEYUSE*)dynamic_array_ptr(keyuse_array, i); + if (keyuse->table == table) + { + /* + No null rejecting conds for a hash key orr full-text keys + */ + if (keyuse->key == MAX_KEY || keyuse->keypart == FT_KEYPART) + continue; + keyinfo= keyuse->table->key_info+keyuse->key; + Field *field= keyinfo->key_part[keyuse->keypart].field; + + /* + No need to add null-rejecting condition if we have a + keyuse element as + - table.key.keypart= const + - (table.key.keypart= tbl.otherfield or table.key.keypart IS NULL) + - table.key.keypart IS NOT NULLABLE + */ + + if (keyuse->val->const_item() + || !(keyuse->null_rejecting && field->maybe_null()) + || keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) + continue; + + Item_field *field_item= new (thd->mem_root)Item_field(thd, field); + Item* not_null_item= new (thd->mem_root)Item_func_isnotnull(thd, + field_item); + + /* + adding the key to const keys as we have the condition + as key.keypart IS NOT NULL + */ + + const_keys->set_bit(keyuse->key); + not_null_item->fix_fields(thd, 0); + not_null_item->update_used_tables(); + add_cond(thd, &cond, not_null_item); + } + } + table->null_rejecting_conds= cond; + return; +} + #ifndef DBUG_OFF diff --git a/sql/opt_range.h b/sql/opt_range.h index bd85a12d4a1..894d46a892c 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -1728,6 +1728,8 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond); bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond); #endif void store_key_image_to_rec(Field *field, uchar *ptr, uint len); +void make_null_rejecting_conds(THD *thd, TABLE *table, + DYNAMIC_ARRAY *keyuse_array, key_map *const_keys); extern String null_string; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index f658b78c33c..4aceb333340 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -4805,6 +4805,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, add_group_and_distinct_keys(join, s); s->table->cond_selectivity= 1.0; + + make_null_rejecting_conds(join->thd, s->table, + keyuse_array, &s->const_keys); /* Perform range analysis if there are keys it could use (1). @@ -4834,6 +4837,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, 1, &error); if (!select) goto error; + records= get_quick_record_count(join->thd, select, s->table, &s->const_keys, join->row_limit); /* Range analyzer could modify the condition. */ @@ -5352,15 +5356,24 @@ add_key_field(JOIN *join, If the condition has form "tbl.keypart = othertbl.field" and othertbl.field can be NULL, there will be no matches if othertbl.field has NULL value. + + The field KEY_FIELD::null_rejecting is set to TRUE if we have both + the left and right hand side of the equality are NULLABLE + We use null_rejecting in add_not_null_conds() to add 'othertbl.field IS NOT NULL' to tab->select_cond. + + We use null_rejecting in make_null_rejecting_conds() to add + tbl.keypart IS NOT NULL so we can do range analysis on this condition + */ { Item *real= (*value)->real_item(); if (((cond->functype() == Item_func::EQ_FUNC) || (cond->functype() == Item_func::MULT_EQUAL_FUNC)) && - (real->type() == Item::FIELD_ITEM) && + (((real->type() == Item::FIELD_ITEM) && ((Item_field*)real)->field->maybe_null()) + ||(field->maybe_null()))) (*key_fields)->null_rejecting= true; else (*key_fields)->null_rejecting= false; @@ -9813,7 +9826,10 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit); j->ref.items[i]=keyuse->val; // Save for cond removal j->ref.cond_guards[i]= keyuse->cond_guard; - if (keyuse->null_rejecting) + Item *real= (keyuse->val)->real_item(); + if (keyuse->null_rejecting && + (real->type() == Item::FIELD_ITEM) && + ((Item_field*)real)->field->maybe_null()) j->ref.null_rejecting|= (key_part_map)1 << i; keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables; /* @@ -18538,7 +18554,7 @@ free_tmp_table(THD *thd, TABLE *entry) DBUG_ASSERT(entry->pos_in_table_list->table == entry); entry->pos_in_table_list->table= NULL; } - + entry->null_rejecting_conds= NULL; free_root(&own_root, MYF(0)); /* the table is allocated in its own root */ thd_proc_info(thd, save_proc_info); diff --git a/sql/table.cc b/sql/table.cc index a6e445d0d2e..097fee465de 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -4597,6 +4597,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) created= TRUE; cond_selectivity= 1.0; cond_selectivity_sampling_explain= NULL; + null_rejecting_conds= NULL; #ifdef HAVE_REPLICATION /* used in RBR Triggers */ master_had_triggers= 0; diff --git a/sql/table.h b/sql/table.h index 6fd3f219914..50d47899b49 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1356,6 +1356,12 @@ struct TABLE SplM_opt_info *spl_opt_info; key_map keys_usable_for_splitting; + /* + Null rejecting conds added for all tables so we can do range analysis + on these conditions + */ + Item* null_rejecting_conds; + void init(THD *thd, TABLE_LIST *tl); bool fill_item_list(List<Item> *item_list) const; void reset_item_list(List<Item> *item_list, uint skip) const;
Hi Varun, Please finally find the review below. The patch is almost there I think, but there are some minor adjustments that still need to be made.
revision-id: 0ff8b73b60fae40556f91d3959f20bf3cf182b28 (mariadb-10.3.0-947-g0ff8b73b60f) parent(s): 15419a558370aeed9521b498c34d50f20a8d47a5 author: Varun Gupta committer: Varun Gupta timestamp: 2018-05-15 13:53:11 +0530 message:
MDEV-15777:Support Early NULLs filtering-like restrictions in the range optimizer
--- mysql-test/main/mdev15777.result | 455 +++++++++++++++++++++++++++++++++++++++ mysql-test/main/mdev15777.test | 138 ++++++++++++ sql/opt_range.cc | 125 ++++++++++- sql/opt_range.h | 2 + sql/sql_select.cc | 22 +- sql/table.cc | 1 + sql/table.h | 6 + 7 files changed, 745 insertions(+), 4 deletions(-)
diff --git a/mysql-test/main/mdev15777.result b/mysql-test/main/mdev15777.result new file mode 100644 index 00000000000..549ac08b91f --- /dev/null +++ b/mysql-test/main/mdev15777.result @@ -0,0 +1,455 @@ +# Test added to check that null filtering is used by range optimizer +create table ten(a int); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table one_k(a int); +insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; +create table one_m(a int); +insert into one_m select A.a + B.a* 1000 from one_k A, one_k B;
The testcases are extremely confusing. * Do you really need a table with one millon rows to demonstrate the issue? * Why use examples with subqueries (which are converted into join anyway)? - if we want to verify that this optimization works together with subquery-to-semi-join conversion, let's start with a join example, and then construct a subquery which is converted to the query. * The testcases produce a lot of EXPLAIN outputs, and there is no explanation provided what one should expect. Please create a testcase: - of reasonable size (I think tables of 10-20K rows should suffice) - start from a join query - Like in other testcases, tables should be named, t1,t2, ... - I assume the query plan should be: range access on the first table (constructed from a NOT NULL predicate), ref access for the second one. - The testcase should have '--echo #' comments explaining what is relevant in the query plan
+delete from one_m where a=0 limit 1; +create table t1 ( +id int(10) unsigned NOT NULL AUTO_INCREMENT, +filler varchar(100), +subset_id int(11) DEFAULT NULL, +PRIMARY KEY (id), +KEY t1_subset_id (subset_id) +); +create table t1_subsets ( ...
diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 8422917065f..ae06b899ac9 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -155,6 +155,7 @@ class SEL_IMERGE; #define CLONE_KEY1_MAYBE 1 #define CLONE_KEY2_MAYBE 2 #define swap_clone_flag(A) ((A & 1) << 1) | ((A & 2) >> 1) +#define FT_KEYPART (MAX_FIELDS+10)
/* @@ -2400,6 +2401,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, { uint idx; double scan_time; + Item *null_rejecting_conds= NULL; DBUG_ENTER("SQL_SELECT::test_quick_select"); DBUG_PRINT("enter",("keys_to_use: %lu prev_tables: %lu const_tables: %lu", (ulong) keys_to_use.to_ulonglong(), (ulong) prev_tables, @@ -2423,6 +2425,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, read_time= (double) records + scan_time + 1; // Force to use index
possible_keys.clear_all(); + null_rejecting_conds= head->null_rejecting_conds;
DBUG_PRINT("info",("Time to scan table: %g", read_time));
@@ -2431,7 +2434,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, { uchar buff[STACK_BUFF_ALLOC]; MEM_ROOT alloc; - SEL_TREE *tree= NULL; + SEL_TREE *tree= NULL, *not_null_cond_tree= NULL; KEY_PART *key_parts; KEY *key_info; PARAM param; @@ -2540,6 +2543,12 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, TRP_GROUP_MIN_MAX *group_trp; double best_read_time= read_time;
Please add a comment saying that as far as range optimizer is concerned, null_rejecting_conds is just an extra condition on this table. (That is, we don't care if it has NOT NULL conditions or something else).
+ if (null_rejecting_conds) + { + not_null_cond_tree= null_rejecting_conds->get_mm_tree(¶m, + &null_rejecting_conds); + } + if (cond) { if ((tree= cond->get_mm_tree(¶m, &cond))) @@ -2558,6 +2567,13 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, tree= NULL; } } + if (not_null_cond_tree) + { + if (!tree) + tree= not_null_cond_tree; + else + tree= tree_and(¶m, tree, not_null_cond_tree);
This if-else logic is redundant, as it's already present in tree_and(). Can just have this: tree= tree_and(tree, not_null_cond_tree);
+ }
/* Try to construct a QUICK_GROUP_MIN_MAX_SELECT. @@ -14643,6 +14659,113 @@ void QUICK_GROUP_MIN_MAX_SELECT::add_keys_and_lengths(String *key_names, add_key_and_length(key_names, used_lengths, &first); }
+inline void add_cond(THD *thd, Item **e1, Item *e2) +{ + if (*e1) + { + if (!e2) + return; + Item *res; + if ((res= new (thd->mem_root) Item_cond_and(thd, *e1, e2))) + { + res->fix_fields(thd, 0); + res->update_used_tables(); + *e1= res; + } + } + else + *e1= e2; +} + +/* + Create null rejecting conditions for a table, for all the equalites + present in the WHERE clause of a query. + + SYNOPSIS + make_null_rejecting_conds() + @param TABLE - Keys of this table will participate in null + rejecting conditions + @param keyuse_array - array that has all the equalites of the + WHERE clasuse + + DESCRIPTION + This function creates null rejecting conditions for a table. These + conditions are created to do range analysis on them , the conditions + are of the form tbl.key.keypart IS NOT NULL. + + IMPLEMENTATION + Lookup in the keyuse array to check if it has equalites that belong + to the given table. If yes then find out if the conditions are null + rejecting and accordingly create all the condition for the keys of a + given table and AND them. + + + RETURN + NOT NULL - Found null rejecting conditions for the given table + NULL - No null rejecting conditions for the given table +*/ + +void make_null_rejecting_conds(THD *thd, TABLE *table, + DYNAMIC_ARRAY *keyuse_array, key_map *const_keys) ^ Please fix identation.
+{ + KEY *keyinfo; + Item *cond= NULL; + KEYUSE* keyuse; + + /* + The null rejecting conds added will be on the keypart of a key, so for + that we need the table to atleast have a key. + */ please fix typos/wording in the above comment.
+ if (!table->s->keys) + return ; + if (table->null_rejecting_conds) + return; + + for(uint i=0; i < keyuse_array->elements; i++) + { This loop does excessive work. Check out how best_access_path() examines the KEYUSE array.
+ keyuse= (KEYUSE*)dynamic_array_ptr(keyuse_array, i); + if (keyuse->table == table) + { + /* + No null rejecting conds for a hash key orr full-text keys + */
Key points - the array is sorted by table, index_nr, key_part_nr. - JOIN_TAB::keyuse points to the first element that refers to the table described by the JOIN_TAB - The array may have "duplicates", especially when multiple equalities are present: KEYUSE(t.keyXpartY = other_table.colX) KEYUSE(t.keyXpartY = other_table.colY) KEYUSE(t.keyXpartY = some_expr) ... Since the elements are sorted by keyXpartY, it is trival to avoid generating multiple "t.keyXpartY IS NOT NULL" for consecutive KEYUSEs. I would go even further: the same "t.keyXpartY" can be part of multiple indexes (and so its KEYUSE objects will not be adjacent). Can we use a table's column bitmap to avoid generating duplicate IS NOT NULL objects? please fix typos/wording in the above comment.
+ if (keyuse->key == MAX_KEY || keyuse->keypart == FT_KEYPART) + continue; + keyinfo= keyuse->table->key_info+keyuse->key; + Field *field= keyinfo->key_part[keyuse->keypart].field; + + /* + No need to add null-rejecting condition if we have a + keyuse element as + - table.key.keypart= const + - (table.key.keypart= tbl.otherfield or table.key.keypart IS NULL) + - table.key.keypart IS NOT NULLABLE + */ + + if (keyuse->val->const_item() + || !(keyuse->null_rejecting && field->maybe_null()) Please remove the brackets: !(A && B) -> !A || !B + || keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) Please also put the '||' at the end of the lines, like it is done in the rest of the code. + continue; + + Item_field *field_item= new (thd->mem_root)Item_field(thd, field); + Item* not_null_item= new (thd->mem_root)Item_func_isnotnull(thd, + field_item); Please add error handling (just return from the function if we failed to allocate any of the above)
+ + /* + adding the key to const keys as we have the condition + as key.keypart IS NOT NULL + */ + + const_keys->set_bit(keyuse->key); + not_null_item->fix_fields(thd, 0); + not_null_item->update_used_tables(); + add_cond(thd, &cond, not_null_item); + } + } + table->null_rejecting_conds= cond; + return;
Please remove the above 'return;' as it is redundant and makes one suspect a merge error
+} +
#ifndef DBUG_OFF
diff --git a/sql/opt_range.h b/sql/opt_range.h index bd85a12d4a1..894d46a892c 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -1728,6 +1728,8 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond); bool prune_partitions(THD *thd, TABLE *table, Item *pprune_cond); #endif void store_key_image_to_rec(Field *field, uchar *ptr, uint len); +void make_null_rejecting_conds(THD *thd, TABLE *table, + DYNAMIC_ARRAY *keyuse_array, key_map *const_keys); Please fix identation.
extern String null_string;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc index f658b78c33c..4aceb333340 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -4805,6 +4805,9 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, add_group_and_distinct_keys(join, s);
s->table->cond_selectivity= 1.0; + + make_null_rejecting_conds(join->thd, s->table, + keyuse_array, &s->const_keys);
/* Perform range analysis if there are keys it could use (1). @@ -5352,15 +5356,24 @@ add_key_field(JOIN *join, If the condition has form "tbl.keypart = othertbl.field" and othertbl.field can be NULL, there will be no matches if othertbl.field has NULL value. + + The field KEY_FIELD::null_rejecting is set to TRUE if we have both + the left and right hand side of the equality are NULLABLE + This is NOT what is happening. null_rejecting is set to true, if *either* left or right-hand side of the equality is nullable. Please fix the comment.
Please also fix the comment in KEY_FIELD::null_rejecting to reflect that.
We use null_rejecting in add_not_null_conds() to add 'othertbl.field IS NOT NULL' to tab->select_cond. + + We use null_rejecting in make_null_rejecting_conds() to add + tbl.keypart IS NOT NULL so we can do range analysis on this condition +
Please join the above two sentences together: null_rejecting is used - by add_not_null_conds(), to add "othertbl.field IS NOT NULL" to othertbl's tab->select_cond. (This is called "Early NULLs filtering") - by make_null_rejecting_conds(), to provide range optimizer with additional "tbl.keypart IS NOT NULL" condition.
*/ { Item *real= (*value)->real_item(); if (((cond->functype() == Item_func::EQ_FUNC) || (cond->functype() == Item_func::MULT_EQUAL_FUNC)) && - (real->type() == Item::FIELD_ITEM) && + (((real->type() == Item::FIELD_ITEM) && ((Item_field*)real)->field->maybe_null()) + ||(field->maybe_null()))) ^Please fix the identation.
(*key_fields)->null_rejecting= true; else (*key_fields)->null_rejecting= false; @@ -9813,7 +9826,10 @@ static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit); j->ref.items[i]=keyuse->val; // Save for cond removal j->ref.cond_guards[i]= keyuse->cond_guard; - if (keyuse->null_rejecting) + Item *real= (keyuse->val)->real_item(); + if (keyuse->null_rejecting && + (real->type() == Item::FIELD_ITEM) && + ((Item_field*)real)->field->maybe_null())
^ Please fix identation
j->ref.null_rejecting|= (key_part_map)1 << i; keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables; /* @@ -18538,7 +18554,7 @@ free_tmp_table(THD *thd, TABLE *entry) DBUG_ASSERT(entry->pos_in_table_list->table == entry); entry->pos_in_table_list->table= NULL; } - + entry->null_rejecting_conds= NULL;
What is this for? As far as I understand we are freeing the TABLE object
free_root(&own_root, MYF(0)); /* the table is allocated in its own root */ thd_proc_info(thd, save_proc_info);
diff --git a/sql/table.cc b/sql/table.cc index a6e445d0d2e..097fee465de 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -4597,6 +4597,7 @@ void TABLE::init(THD *thd, TABLE_LIST *tl) created= TRUE; cond_selectivity= 1.0; cond_selectivity_sampling_explain= NULL; + null_rejecting_conds= NULL; #ifdef HAVE_REPLICATION /* used in RBR Triggers */ master_had_triggers= 0; diff --git a/sql/table.h b/sql/table.h index 6fd3f219914..50d47899b49 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1356,6 +1356,12 @@ struct TABLE SplM_opt_info *spl_opt_info; key_map keys_usable_for_splitting;
+ /* + Null rejecting conds added for all tables so we can do range analysis + on these conditions + */ "for all tables"? Please change the comment to something like:
"NULL-rejecting conditions for this table. They are created from eq_ref access, and used by the range optimizer".
+ Item* null_rejecting_conds; + void init(THD *thd, TABLE_LIST *tl); bool fill_item_list(List<Item> *item_list) const; void reset_item_list(List<Item> *item_list, uint skip) const;
BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog
participants (2)
-
Sergey Petrunia
-
varunraiko1803@gmail.com