revision-id: 2a3559a84a12a1864e8987e5396fcef86768ce42 (mariadb-10.1.34-36-g2a3559a84a1) parent(s): 2fb68244b4246221e4f605749913bfe582a4d040 author: Varun Gupta committer: Varun Gupta timestamp: 2018-08-02 16:55:39 +0530 message: MDEV-15433: Optimizer does not use group by optimization with distinct The group-by optimization does not work with DISTINCT because we are not doing range analysis on the items in the select list. The solution is to do range analyis on the items in the select list when we have DISTINCT Also we should allow this group-by optimization with DISTINCT only if the quert does not have GROUP BY. --- mysql-test/r/bench_count_distinct.result | 2 +- mysql-test/r/distinct.result | 2 +- mysql-test/r/explain_json.result | 18 +++---- mysql-test/r/group_min_max.result | 87 +++++++++++++++++++++++--------- mysql-test/t/group_min_max.test | 27 ++++++++++ sql/opt_range.cc | 10 ++++ sql/sql_select.cc | 2 +- 7 files changed, 113 insertions(+), 35 deletions(-) diff --git a/mysql-test/r/bench_count_distinct.result b/mysql-test/r/bench_count_distinct.result index 79e12afd237..8b67e4be38a 100644 --- a/mysql-test/r/bench_count_distinct.result +++ b/mysql-test/r/bench_count_distinct.result @@ -5,7 +5,7 @@ count(distinct n) 100 explain extended select count(distinct n) from t1; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 index NULL n 4 NULL 200 100.00 Using index +1 SIMPLE t1 range NULL n 4 NULL 10 100.00 Using index for group-by Warnings: Note 1003 select count(distinct `test`.`t1`.`n`) AS `count(distinct n)` from `test`.`t1` drop table t1; diff --git a/mysql-test/r/distinct.result b/mysql-test/r/distinct.result index b5e8cefca69..d6e5a69e217 100644 --- a/mysql-test/r/distinct.result +++ b/mysql-test/r/distinct.result @@ -212,7 +212,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 index NULL PRIMARY 4 NULL 1 Using index explain SELECT distinct a from t3 order by a desc limit 2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t3 index NULL a 5 NULL 2 Using index +1 SIMPLE t3 index NULL a 5 NULL 40 Using index explain SELECT distinct a,b from t3 order by a+1; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t3 ALL NULL NULL NULL NULL 204 Using temporary; Using filesort diff --git a/mysql-test/r/explain_json.result b/mysql-test/r/explain_json.result index 9e5515f3cfa..0abbda352b7 100644 --- a/mysql-test/r/explain_json.result +++ b/mysql-test/r/explain_json.result @@ -1030,10 +1030,10 @@ Table Op Msg_type Msg_text test.t1 analyze status Table is already up to date explain select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a'); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx_t1_2 147 NULL 128 Using where; Using index +1 SIMPLE t1 range NULL idx_t1_1 147 NULL 17 Using where; Using index for group-by explain select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx_t1_1 163 NULL 128 Using where; Using index +1 SIMPLE t1 range NULL idx_t1_1 163 NULL 65 Using where; Using index for group-by (scanning) explain format=json select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a'); EXPLAIN { @@ -1041,14 +1041,14 @@ EXPLAIN "select_id": 1, "table": { "table_name": "t1", - "access_type": "index", - "key": "idx_t1_2", + "access_type": "range", + "key": "idx_t1_1", "key_length": "147", "used_key_parts": ["a1", "a2", "b"], - "rows": 128, + "rows": 17, "filtered": 100, "attached_condition": "((t1.b = 'a') and (t1.a2 >= 'b'))", - "using_index": true + "using_index_for_group_by": true } } } @@ -1059,14 +1059,14 @@ EXPLAIN "select_id": 1, "table": { "table_name": "t1", - "access_type": "index", + "access_type": "range", "key": "idx_t1_1", "key_length": "163", "used_key_parts": ["a1", "a2", "b", "c"], - "rows": 128, + "rows": 65, "filtered": 100, "attached_condition": "((t1.b = 'a') and (t1.c = 'i121') and (t1.a2 >= 'b'))", - "using_index": true + "using_index_for_group_by": "scanning" } } } diff --git a/mysql-test/r/group_min_max.result b/mysql-test/r/group_min_max.result index cd7f1014ec0..fe351cef39d 100644 --- a/mysql-test/r/group_min_max.result +++ b/mysql-test/r/group_min_max.result @@ -1707,13 +1707,13 @@ select a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c') group by a1 a1 a2 b explain select distinct a1,a2,b from t1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx_t1_2 147 NULL 128 Using index +1 SIMPLE t1 range NULL idx_t1_1 147 NULL 17 Using index for group-by explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a'); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx_t1_2 147 NULL 128 Using where; Using index +1 SIMPLE t1 range NULL idx_t1_1 147 NULL 17 Using where; Using index for group-by explain extended select distinct a1,a2,b,c from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t1 index NULL idx_t1_1 163 NULL 128 100.00 Using where; Using index +1 SIMPLE t1 index NULL idx_t1_1 163 NULL 128 50.78 Using where; Using index Warnings: Note 1003 select distinct `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2`,`test`.`t1`.`b` AS `b`,`test`.`t1`.`c` AS `c` from `test`.`t1` where ((`test`.`t1`.`b` = 'a') and (`test`.`t1`.`c` = 'i121') and (`test`.`t1`.`a2` >= 'b')) explain select distinct a1,a2,b from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); @@ -1724,13 +1724,13 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 index NULL idx_t1_2 147 NULL 128 Using where; Using index explain select distinct a1,a2,b from t2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 index NULL idx_t2_2 146 NULL # Using index +1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using index for group-by explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a'); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 index NULL idx_t2_2 146 NULL # Using where; Using index +1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using where; Using index for group-by explain extended select distinct a1,a2,b,c from t2 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2 index NULL idx_t2_1 163 NULL 164 100.00 Using where; Using index +1 SIMPLE t2 index NULL idx_t2_1 163 NULL 164 50.61 Using where; Using index Warnings: Note 1003 select distinct `test`.`t2`.`a1` AS `a1`,`test`.`t2`.`a2` AS `a2`,`test`.`t2`.`b` AS `b`,`test`.`t2`.`c` AS `c` from `test`.`t2` where ((`test`.`t2`.`b` = 'a') and (`test`.`t2`.`c` = 'i121') and (`test`.`t2`.`a2` >= 'b')) explain select distinct a1,a2,b from t2 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); @@ -1855,7 +1855,7 @@ c e d e explain select distinct a1,a2,b from t1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx_t1_2 147 NULL 128 Using index +1 SIMPLE t1 range NULL idx_t1_1 147 NULL 17 Using index for group-by explain select distinct a1,a2,b from t1 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range NULL idx_t1_1 147 NULL 17 Using where; Using index for group-by @@ -1870,7 +1870,7 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range NULL idx_t1_1 147 NULL 17 Using where; Using index for group-by; Using temporary; Using filesort explain select distinct a1,a2,b from t2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 index NULL idx_t2_2 146 NULL # Using index +1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using index for group-by explain select distinct a1,a2,b from t2 where (a2 >= 'b') and (b = 'a') group by a1,a2,b; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t2 range NULL idx_t2_1 146 NULL # Using where; Using index for group-by @@ -1953,10 +1953,10 @@ b a explain select count(distinct a1,a2,b) from t1 where (a2 >= 'b') and (b = 'a'); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx_t1_2 147 NULL 128 Using where; Using index +1 SIMPLE t1 range NULL idx_t1_1 147 NULL 17 Using where; Using index for group-by explain select count(distinct a1,a2,b,c) from t1 where (a2 >= 'b') and (b = 'a') and (c = 'i121'); id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx_t1_1 163 NULL 128 Using where; Using index +1 SIMPLE t1 range NULL idx_t1_1 163 NULL 65 Using where; Using index for group-by (scanning) explain extended select count(distinct a1,a2,b) from t1 where (a1 > 'a') and (a2 > 'a') and (b = 'c'); id select_type table type possible_keys key key_len ref rows filtered Extra 1 SIMPLE t1 range idx_t1_0,idx_t1_1,idx_t1_2 idx_t1_1 147 NULL 14 100.00 Using where; Using index for group-by @@ -2173,7 +2173,7 @@ c d explain select distinct a1 from t1 where a2 = 'b'; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx_t1_2 147 NULL 128 Using where; Using index +1 SIMPLE t1 range NULL idx_t1_1 130 NULL 5 Using where; Using index for group-by select distinct a1 from t1 where a2 = 'b'; a1 a @@ -2283,7 +2283,7 @@ INSERT INTO t1 (a) VALUES ('SOUTH EAST'), ('SOUTH WEST'), ('WESTERN'); EXPLAIN SELECT DISTINCT a,a FROM t1 ORDER BY a; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL a 66 NULL 11 Using index +1 SIMPLE t1 range NULL a 66 NULL 6 Using index for group-by SELECT DISTINCT a,a FROM t1 ORDER BY a; a a @@ -2499,7 +2499,7 @@ INSERT INTO t1 VALUES (4), (2), (1), (2), (2), (4), (1), (4); EXPLAIN SELECT DISTINCT(a) FROM t1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx 5 NULL 16 Using index +1 SIMPLE t1 range NULL idx 5 NULL 9 Using index for group-by SELECT DISTINCT(a) FROM t1; a 1 @@ -2507,7 +2507,7 @@ a 4 EXPLAIN SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL idx 5 NULL 16 Using index +1 SIMPLE t1 range NULL idx 5 NULL 9 Using index for group-by SELECT SQL_BIG_RESULT DISTINCT(a) FROM t1; a 1 @@ -2646,7 +2646,7 @@ INSERT INTO t1 SELECT * FROM t1; INSERT INTO t1 SELECT a,b,c+1,d FROM t1; EXPLAIN SELECT DISTINCT c FROM t1 WHERE d=4; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL foo 20 NULL 32 Using where; Using index +1 SIMPLE t1 range NULL foo 10 NULL 9 Using where; Using index for group-by SELECT DISTINCT c FROM t1 WHERE d=4; c 1 @@ -3339,19 +3339,19 @@ INSERT INTO t2 SELECT a, b + 4, c,d,e,f FROM t2; INSERT INTO t2 SELECT a + 1, b, c,d,e,f FROM t2; EXPLAIN SELECT COUNT(DISTINCT a) FROM t1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL a 10 NULL 16 Using index +1 SIMPLE t1 range NULL a 5 NULL 9 Using index for group-by SELECT COUNT(DISTINCT a) FROM t1; COUNT(DISTINCT a) 2 EXPLAIN SELECT COUNT(DISTINCT a,b) FROM t1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL a 10 NULL 16 Using index +1 SIMPLE t1 range NULL a 10 NULL 9 Using index for group-by SELECT COUNT(DISTINCT a,b) FROM t1; COUNT(DISTINCT a,b) 16 EXPLAIN SELECT COUNT(DISTINCT b,a) FROM t1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL a 10 NULL 16 Using index +1 SIMPLE t1 range NULL a 10 NULL 9 Using index for group-by SELECT COUNT(DISTINCT b,a) FROM t1; COUNT(DISTINCT b,a) 16 @@ -3414,7 +3414,7 @@ COUNT(DISTINCT a) 2 EXPLAIN SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL a 10 NULL 16 Using index +1 SIMPLE t1 range NULL a 5 NULL 9 Using index for group-by SELECT 1 FROM t1 HAVING COUNT(DISTINCT a) < 10; 1 1 @@ -3435,19 +3435,19 @@ COUNT(DISTINCT t1_1.a) 1 EXPLAIN SELECT COUNT(DISTINCT a), 12 FROM t1; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t1 index NULL a 10 NULL 16 Using index +1 SIMPLE t1 range NULL a 5 NULL 9 Using index for group-by SELECT COUNT(DISTINCT a), 12 FROM t1; COUNT(DISTINCT a) 12 2 12 EXPLAIN SELECT COUNT(DISTINCT a, b, c) FROM t2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 index NULL a 15 NULL 16 Using index +1 SIMPLE t2 range NULL a 15 NULL 9 Using index for group-by SELECT COUNT(DISTINCT a, b, c) FROM t2; COUNT(DISTINCT a, b, c) 16 EXPLAIN SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 index NULL a 15 NULL 16 Using index +1 SIMPLE t2 range NULL a 5 NULL 9 Using index for group-by SELECT COUNT(DISTINCT a), SUM(DISTINCT a), AVG(DISTINCT a) FROM t2; COUNT(DISTINCT a) SUM(DISTINCT a) AVG(DISTINCT a) 2 3 1.5000 @@ -3459,7 +3459,7 @@ COUNT(DISTINCT a) SUM(DISTINCT a) AVG(DISTINCT f) 2 3 1.0000 EXPLAIN SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 index NULL a 15 NULL 16 Using index +1 SIMPLE t2 range NULL a 10 NULL 9 Using index for group-by SELECT COUNT(DISTINCT a, b), COUNT(DISTINCT b, a) FROM t2; COUNT(DISTINCT a, b) COUNT(DISTINCT b, a) 16 16 @@ -3893,5 +3893,46 @@ id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 index a a 13 NULL 2 Using where; Using index drop table t1; # +# MDEV-15433: Optimizer does not use group by optimization with distinct +# +CREATE TABLE t1 (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, a INT NOT NULL, KEY(a)); +INSERT INTO t1(a) VALUES (1), (2), (3), (4); +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +OPTIMIZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +EXPLAIN SELECT DISTINCT a FROM t1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range NULL a 4 NULL 5 Using index for group-by +SELECT DISTINCT a FROM t1; +a +1 +2 +3 +4 +EXPLAIN SELECT a FROM t1 GROUP BY a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range NULL a 4 NULL 5 Using index for group-by +SELECT a FROM t1 GROUP BY a; +a +1 +2 +3 +4 +DROP TABLE t1; +# # End of 10.1 tests # diff --git a/mysql-test/t/group_min_max.test b/mysql-test/t/group_min_max.test index b0bc42d7f8c..ebf005d32e1 100644 --- a/mysql-test/t/group_min_max.test +++ b/mysql-test/t/group_min_max.test @@ -1608,6 +1608,33 @@ explain select min(a) from t1 where a between "a" and "Cafeeeeeeeeeeeeeeeeeeeeee explain select min(a) from t1 where a between "abbbbbbbbbbbbbbbbbbbb" and "Cafe2"; drop table t1; +--echo # +--echo # MDEV-15433: Optimizer does not use group by optimization with distinct +--echo # + +CREATE TABLE t1 (id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, a INT NOT NULL, KEY(a)); +INSERT INTO t1(a) VALUES (1), (2), (3), (4); +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +INSERT INTO t1(a) SELECT a FROM t1; +OPTIMIZE TABLE t1; +EXPLAIN SELECT DISTINCT a FROM t1; +SELECT DISTINCT a FROM t1; +EXPLAIN SELECT a FROM t1 GROUP BY a; +SELECT a FROM t1 GROUP BY a; +DROP TABLE t1; + --echo # --echo # End of 10.1 tests --echo # diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 0315b91b8be..9421319be99 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -12147,6 +12147,16 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time) if (join->make_sum_func_list(join->all_fields, join->fields_list, 1)) DBUG_RETURN(NULL); + /* + If the query has GROUP BY but due to some case it gets optimized away(an example would + be constants in GROUP BY clause) then JOIN::group_list would be NULL. For such a case + we know there would be only one row in the result and we cannot change DISTINCT to GROUP BY + as this can give wrong results. So group-by optimization is used for DISTINCT only if we + don't have a GROUP BY clause in the query. + */ + if (!join->group_list && join->group && join->select_distinct) + DBUG_RETURN(NULL); + List_iterator<Item> select_items_it(join->fields_list); is_agg_distinct = is_indexed_agg_distinct(join, &agg_distinct_flds); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 64d1565dec7..e5430d730bb 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -5754,7 +5754,7 @@ add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab) Item_field *cur_item; key_map possible_keys(0); - if (join->group_list || join->simple_group) + if (join->group_list) { /* Collect all query fields referenced in the GROUP clause. */ for (cur_group= join->group_list; cur_group; cur_group= cur_group->next) (*cur_group->item)->walk(&Item::collect_item_field_processor, 0,