On Tue, Mar 27, 2018 at 04:48:10PM +0530, Varun wrote:
revision-id: 167a96b5157049408a6ad4bca7abcd376af93fb5 (mariadb-10.3.0-644-g167a96b5157) parent(s): 4359c6b4806605c78987e50cab3a6b42016b7603 author: Varun Gupta committer: Varun Gupta timestamp: 2018-03-27 16:45:46 +0530 message:
MDEV-9959: A serious MariaDB server performance bug
step#1: if a derived table has SELECT DISTINCT, provide index statistics for it so that the join optimizer in the upper select knows that ref access to the table will produce one row.
Added handling for multiple selects in the derived table
--- mysql-test/r/cte_nonrecursive.result | 8 +-- mysql-test/r/cte_recursive.result | 6 +- mysql-test/r/derived_cond_pushdown.result | 82 ++++++++++++------------- mysql-test/r/derived_view.result | 2 +- mysql-test/r/join_cache.result | 6 +- mysql-test/r/mdev9959.result | 46 ++++++++++++++ mysql-test/r/subselect_extra.result | 2 +- mysql-test/r/subselect_extra_no_semijoin.result | 2 +- mysql-test/t/mdev9959.test | 25 ++++++++ sql/sql_lex.h | 2 + sql/sql_union.cc | 60 ++++++++++++++++++ sql/sql_yacc.yy | 3 + sql/table.cc | 19 ++++++ 13 files changed, 209 insertions(+), 54 deletions(-)
diff --git a/mysql-test/r/derived_cond_pushdown.result b/mysql-test/r/derived_cond_pushdown.result index 32d3c88cc8d..3723e25a494 100644 --- a/mysql-test/r/derived_cond_pushdown.result +++ b/mysql-test/r/derived_cond_pushdown.result @@ -5187,7 +5187,7 @@ explain select * from v2_union as v,t2 where ((v.a=6) or (v.a=8)) and (v.c>200) and (v.a=t2.a); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 6 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where
As agreed on the call: need to check what is the cause of this. Here, the temp table has distinct rows, that is, {a,b,c} are distinct. But ref access only uses the first component, where does rows=1 come from? (if this is how best_access_path computes an estimate for prefix when it only has the estimate for the full key ... fine)
2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 3 UNION t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 4 UNION t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort @@ -5213,7 +5213,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 6, + "rows": 1, "filtered": 100, "attached_condition": "v.c > 200", "materialized": { @@ -5358,7 +5358,7 @@ a b c a b c d explain select * from v3_union as v,t2 where (v.a=t2.a) and (v.c>6); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 4 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where 3 UNION t1 ALL NULL NULL NULL NULL 20 Using where NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL @@ -5382,7 +5382,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 4, + "rows": 1, "filtered": 100, "attached_condition": "v.c > 6", "materialized": { @@ -5476,7 +5476,7 @@ a b c a b c d explain select * from v3_union as v,t2 where (v.a=t2.a) and ((t2.a>1) or (v.b<20)); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 4 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where 3 UNION t1 ALL NULL NULL NULL NULL 20 Using where NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL @@ -5500,7 +5500,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 4, + "rows": 1, "filtered": 100, "attached_condition": "t2.a > 1 or v.b < 20", "materialized": { @@ -5561,7 +5561,7 @@ explain select * from v3_union as v,t2 where (v.a=t2.a) and ((v.b=19) or (v.b=21)) and ((v.c<3) or (v.c>600)); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 4 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where 3 UNION t1 ALL NULL NULL NULL NULL 20 Using where NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL @@ -5586,7 +5586,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 4, + "rows": 1, "filtered": 100, "attached_condition": "(v.b = 19 or v.b = 21) and (v.c < 3 or v.c > 600)", "materialized": { @@ -5645,7 +5645,7 @@ a b c a b c d explain select * from v4_union as v,t2 where (v.a=t2.a) and (v.b<20); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 4 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 3 UNION t1 ALL NULL NULL NULL NULL 20 Using where NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL @@ -5669,7 +5669,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 4, + "rows": 1, "filtered": 100, "attached_condition": "v.b < 20", "materialized": { @@ -5752,7 +5752,7 @@ explain select * from v4_union as v,t2 where (v.a=t2.a) and ((t2.a<3) or (v.b<40)) and (v.c>500); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 4 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort 3 UNION t1 ALL NULL NULL NULL NULL 20 Using where NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL @@ -5777,7 +5777,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 4, + "rows": 1, "filtered": 100, "attached_condition": "(t2.a < 3 or v.b < 40) and v.c > 500", "materialized": { @@ -7952,7 +7952,7 @@ a b c a b c explain select * from v1,t2 where (v1.a=t2.a) and (v1.a<5); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 2 +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 EXCEPT t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort NULL EXCEPT RESULT <except2,3> ALL NULL NULL NULL NULL NULL @@ -7976,7 +7976,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 2, + "rows": 1, "filtered": 100, "materialized": { "query_block": { @@ -8226,7 +8226,7 @@ a b c a b c explain select * from v1,t2 where (v1.a=t2.a) and (v1.c>500); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 2 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 EXCEPT t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort NULL EXCEPT RESULT <except2,3> ALL NULL NULL NULL NULL NULL @@ -8250,7 +8250,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 2, + "rows": 1, "filtered": 100, "attached_condition": "v1.c > 500", "materialized": { @@ -8317,7 +8317,7 @@ a b c a b c explain select * from v1,t2 where (v1.a=t2.a) and (v1.a<5) and (v1.c>500); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 2 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 EXCEPT t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort NULL EXCEPT RESULT <except2,3> ALL NULL NULL NULL NULL NULL @@ -8341,7 +8341,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 2, + "rows": 1, "filtered": 100, "attached_condition": "v1.c > 500", "materialized": { @@ -8411,7 +8411,7 @@ a b c a b c explain select * from v1,t2 where (v1.a=t2.a) and ((v1.b>27) or (v1.b<19)); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 2 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 EXCEPT t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort NULL EXCEPT RESULT <except2,3> ALL NULL NULL NULL NULL NULL @@ -8435,7 +8435,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 2, + "rows": 1, "filtered": 100, "attached_condition": "v1.b > 27 or v1.b < 19", "materialized": { @@ -8508,7 +8508,7 @@ explain select * from v1,t2 where (v1.a=t2.a) and ((v1.c<400) or (v1.c>800)); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 2 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 EXCEPT t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort NULL EXCEPT RESULT <except2,3> ALL NULL NULL NULL NULL NULL @@ -8533,7 +8533,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 2, + "rows": 1, "filtered": 100, "attached_condition": "v1.c < 400 or v1.c > 800", "materialized": { @@ -8762,7 +8762,7 @@ where ((d1.a>4) and (d1.c>500))); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.b 2 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.b 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 EXCEPT t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort NULL EXCEPT RESULT <except2,3> ALL NULL NULL NULL NULL NULL @@ -8796,7 +8796,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["b"], "ref": ["test.t2.b"], - "rows": 2, + "rows": 1, "filtered": 100, "attached_condition": "t2.c = 988 and t2.b > 13 or d1.a > 4 and d1.c > 500", "materialized": { @@ -8872,7 +8872,7 @@ a b c a b c explain select * from v1,t2 where (v1.a=t2.a) and (v1.a>5) and (v1.c>200); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 3 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 UNION <derived4> ALL NULL NULL NULL NULL 18 Using where 4 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort @@ -8899,7 +8899,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 3, + "rows": 1, "filtered": 100, "attached_condition": "v1.c > 200", "materialized": { @@ -9138,7 +9138,7 @@ a b c a b c explain select * from v1,t2 where (v1.a=t2.a) and (v1.a>5) and (v1.c>200); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 3 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 UNION t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 4 EXCEPT t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort @@ -9163,7 +9163,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 3, + "rows": 1, "filtered": 100, "attached_condition": "v1.c > 200", "materialized": { @@ -9260,7 +9260,7 @@ a b c a b c explain select * from v1,t2 where (v1.a=t2.a) and (v1.a>4) and (v1.c<200); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 3 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 EXCEPT t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 4 UNION t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort @@ -9285,7 +9285,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 3, + "rows": 1, "filtered": 100, "attached_condition": "v1.c < 200", "materialized": { @@ -9498,7 +9498,7 @@ a b c a b c explain select * from v1,t2 where (v1.a=t2.a) and (v1.a>4) and (v1.c<130); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 2 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 EXCEPT <derived4> ALL NULL NULL NULL NULL 18 Using where 4 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort @@ -9525,7 +9525,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 2, + "rows": 1, "filtered": 100, "attached_condition": "v1.c < 130", "materialized": { @@ -9643,7 +9643,7 @@ a b c a b c explain select * from v1,t2 where (v1.a=t2.a) and (v1.a>4) and (v1.c<130); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 3 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 EXCEPT <derived4> ALL NULL NULL NULL NULL 18 Using where 4 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort @@ -9671,7 +9671,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 3, + "rows": 1, "filtered": 100, "attached_condition": "v1.c < 130", "materialized": { @@ -10143,7 +10143,7 @@ a b c a b c explain select * from v1,t2 where (v1.a=t2.a) and (v1.a<2) and (v1.b<30) and (v1.c>450); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 2 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 EXCEPT t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort NULL EXCEPT RESULT <except2,3> ALL NULL NULL NULL NULL NULL @@ -10167,7 +10167,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 2, + "rows": 1, "filtered": 100, "attached_condition": "v1.b < 30 and v1.c > 450", "materialized": { @@ -10243,7 +10243,7 @@ a b c a b c explain select * from v1,t2 where (v1.a=t2.a) and ((v1.a<2) or (v1.a<5)) and (v1.c>450); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 2 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 Using where 2 DERIVED t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort 3 EXCEPT t1 ALL NULL NULL NULL NULL 18 Using where; Using temporary; Using filesort NULL EXCEPT RESULT <except2,3> ALL NULL NULL NULL NULL NULL @@ -10267,7 +10267,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["a"], "ref": ["test.t2.a"], - "rows": 2, + "rows": 1, "filtered": 100, "attached_condition": "v1.c > 450", "materialized": { @@ -10636,7 +10636,7 @@ a b c a b c explain select * from v1,t2 where (v1.b=t2.b) and (v1.a<3); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 9 Using where -1 PRIMARY <derived2> ref key0 key0 5 test.t2.b 2 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t2.b 1 Using where 2 DERIVED t3 range i1 i1 5 NULL 1 Using index condition 3 UNION t3 ALL NULL NULL NULL NULL 20 Using where; Using temporary; Using filesort NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL @@ -10660,7 +10660,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["b"], "ref": ["test.t2.b"], - "rows": 2, + "rows": 1, "filtered": 100, "attached_condition": "v1.a < 3", "materialized": { @@ -12333,7 +12333,7 @@ EXPLAIN "key_length": "5", "used_key_parts": ["b"], "ref": ["test.t1.a"], - "rows": 2, + "rows": 1, "filtered": 100, "materialized": { "query_block": { diff --git a/mysql-test/r/derived_view.result b/mysql-test/r/derived_view.result index 85e56ff176e..3a0fde7b053 100644 --- a/mysql-test/r/derived_view.result +++ b/mysql-test/r/derived_view.result @@ -1525,7 +1525,7 @@ EXPLAIN SELECT a FROM t1 WHERE (a,b) IN (SELECT * FROM v2); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 2 Using where -1 PRIMARY <derived3> ref key0 key0 10 test.t1.a,test.t1.b 2 FirstMatch(t1) +1 PRIMARY <derived3> ref key0 key0 10 test.t1.a,test.t1.b 1 FirstMatch(t1)
Interesting.. here, we potentially could infer that this semi-join can be converted to an inner join (as the subquery may only have one match). But I think this is outside of the scope of this MDEV.
3 DERIVED t2 ALL NULL NULL NULL NULL 6 4 UNION t3 ALL NULL NULL NULL NULL 4 NULL UNION RESULT <union3,4> ALL NULL NULL NULL NULL NULL diff --git a/mysql-test/r/join_cache.result b/mysql-test/r/join_cache.result index eea397402ad..cc185b640bc 100644 --- a/mysql-test/r/join_cache.result +++ b/mysql-test/r/join_cache.result @@ -5197,7 +5197,7 @@ SELECT * FROM (SELECT DISTINCT * FROM t1) t WHERE t.a IN (SELECT t2.a FROM t2); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where; Start temporary -1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 2 End temporary +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 End temporary 2 DERIVED t1 ALL NULL NULL NULL NULL 3 Using temporary SELECT * FROM (SELECT DISTINCT * FROM t1) t WHERE t.a IN (SELECT t2.a FROM t2); @@ -5208,8 +5208,8 @@ EXPLAIN SELECT * FROM (SELECT DISTINCT * FROM t1) t WHERE t.a IN (SELECT t2.a FROM t2); id select_type table type possible_keys key key_len ref rows Extra -1 PRIMARY <derived2> ALL NULL NULL NULL NULL 3 -1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where; Start temporary; End temporary; Using join buffer (flat, BNL join) +1 PRIMARY t2 ALL NULL NULL NULL NULL 4 Using where; Start temporary +1 PRIMARY <derived2> ref key0 key0 5 test.t2.a 1 End temporary 2 DERIVED t1 ALL NULL NULL NULL NULL 3 Using temporary SELECT * FROM (SELECT DISTINCT * FROM t1) t WHERE t.a IN (SELECT t2.a FROM t2); diff --git a/mysql-test/r/mdev9959.result b/mysql-test/r/mdev9959.result new file mode 100644 index 00000000000..049e0350cca --- /dev/null +++ b/mysql-test/r/mdev9959.result @@ -0,0 +1,46 @@ +create table t1(a int); +insert into t1 values (1),(2),(3),(4),(5),(6); +create table t2(a int, b int,c int); +insert into t2(a,b,c) values (1,1,2),(2,2,3),(3,1,4),(4,2,2),(5,1,1),(6,2,5); +create table t3(a int, b int); +insert into t3(a,b) values (1,1),(2,2),(2,1),(1,2),(5,1),(9,2); +table "<derived2>" should have type=ref and rows=1 +one select in derived table +with distinct +analyze select * from t1 , ((select distinct t2.a from t2 order by c))q where t1.a=q.a; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 1 1.00 100.00 100.00 +2 DERIVED t2 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 Using temporary; Using filesort +# multiple selects in derived table +# NO UNION ALL +analyze select * from t1 , ( (select t2.a,t2.b from t2 order by c) union (select t2.a,t2.b from t2 order by c))q where t1.a=q.a; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 1 1.00 100.00 100.00 +2 DERIVED t2 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 +3 UNION t2 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 +NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL 6.00 NULL NULL +select * from t1 , ( (select t2.a from t2 order by c) union (select t2.a from t2 order by c))q where t1.a=q.a; +a a +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +# UNION ALL and EXCEPT +analyze select * from t1 , ( (select t2.a,t2.b from t2 order by c) union all (select t2.a,t2.b from t2 order by c) except(select t3.a, t3.b from t3 order by b))q where t1.a=q.a; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 PRIMARY t1 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 Using where +1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 1 0.50 100.00 100.00 +2 DERIVED t2 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 +3 UNION t2 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 +4 EXCEPT t3 ALL NULL NULL NULL NULL 6 6.00 100.00 100.00 +NULL UNIT RESULT <unit2,3,4> ALL NULL NULL NULL NULL NULL 3.00 NULL NULL +select * from t1 , ( (select t2.a,t2.b from t2 order by c) union all (select t2.a,t2.b from t2 order by c) except(select t3.a, t3.b from t3 order by b))q where t1.a=q.a; +a a b +3 3 1 +4 4 2 +6 6 2 +drop table t1,t2,t3; diff --git a/mysql-test/t/mdev9959.test b/mysql-test/t/mdev9959.test new file mode 100644 index 00000000000..36f364261c9 --- /dev/null +++ b/mysql-test/t/mdev9959.test @@ -0,0 +1,25 @@ +create table t1(a int); +insert into t1 values (1),(2),(3),(4),(5),(6); +create table t2(a int, b int,c int); +insert into t2(a,b,c) values (1,1,2),(2,2,3),(3,1,4),(4,2,2),(5,1,1),(6,2,5); +create table t3(a int, b int); +insert into t3(a,b) values (1,1),(2,2),(2,1),(1,2),(5,1),(9,2); + +--echo table "<derived2>" should have type=ref and rows=1 +--echo one select in derived table + +--echo with distinct +analyze select * from t1 , ((select distinct t2.a from t2 order by c))q where t1.a=q.a; + +--echo # multiple selects in derived table +--echo # NO UNION ALL +analyze select * from t1 , ( (select t2.a,t2.b from t2 order by c) union (select t2.a,t2.b from t2 order by c))q where t1.a=q.a; +select * from t1 , ( (select t2.a from t2 order by c) union (select t2.a from t2 order by c))q where t1.a=q.a; + +--echo # UNION ALL and EXCEPT +analyze select * from t1 , ( (select t2.a,t2.b from t2 order by c) union all (select t2.a,t2.b from t2 order by c) except(select t3.a, t3.b from t3 order by b))q where t1.a=q.a; + +select * from t1 , ( (select t2.a,t2.b from t2 order by c) union all (select t2.a,t2.b from t2 order by c) except(select t3.a, t3.b from t3 order by b))q where t1.a=q.a; + +drop table t1,t2,t3; + diff --git a/sql/sql_lex.h b/sql/sql_lex.h index f0241a32acf..16d5e55d251 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -761,6 +761,7 @@ class st_select_lex_unit: public st_select_lex_node { Procedure *last_procedure; /* Pointer to procedure, if such exists */
bool columns_are_renamed; + bool union_all; /* TRUE if there is a UNION ALL operation */
This is not initialized properly. The testcase: create table t10 (a int , b int); insert into t10 values (1,1),(1,1); create table t11 (a int , b int); create table t12 (a int , b int); insert into t11 values (2,2),(2,2); create table t13 (a int , b int); insert into t12 values (3,3),(3,3); create table ten(a int); insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); explain select * from ten A, (select * from t10 union select * from t11 union select * from t12) T where T.a=A.a; +------+--------------+--------------+------+---------------+------+---------+----------+------+-------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | +------+--------------+--------------+------+---------------+------+---------+----------+------+-------------+ | 1 | PRIMARY | A | ALL | NULL | NULL | NULL | NULL | 10 | Using where | | 1 | PRIMARY | <derived2> | ref | key0 | key0 | 5 | test.A.a | 1 | | | 2 | DERIVED | t10 | ALL | NULL | NULL | NULL | NULL | 2 | | | 3 | UNION | t11 | ALL | NULL | NULL | NULL | NULL | 2 | | | 4 | UNION | t12 | ALL | NULL | NULL | NULL | NULL | 2 | | | NULL | UNION RESULT | <union2,3,4> | ALL | NULL | NULL | NULL | NULL | NULL | | +------+--------------+--------------+------+---------------+------+---------+----------+------+-------------+ (gdb) wher #0 st_select_lex_unit::check_distinct_in_union (this=0x7fff700149e0) at /home/psergey/dev-git/10.3-cp/sql/sql_union.cc:2100 #1 0x0000555555d2a693 in TABLE::add_tmp_key (this=0x7fff70062908, key=0, key_parts=1, next_field_no=0x555555c6bf47 <get_next_field_for_derived_key(uchar*)>, arg=0x7fffd98e9230 "X\207\006p\377\177", unique=false) at /home/psergey/dev-git/10.3-cp/sql/table.cc:7270 #2 0x0000555555c6c214 in generate_derived_keys_for_table (keyuse=0x7fff700687b0, count=1, keys=1) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:11091 #3 0x0000555555c6c42a in generate_derived_keys (keyuse_array=0x7fff70018968) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:11166 #4 0x0000555555c60267 in sort_and_filter_keyuse (thd=0x7fff70000d60, keyuse=0x7fff70018968, skip_unprefixed_keyparts=true) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:6403 #5 0x0000555555c5a910 in make_join_statistics (join=0x7fff70018678, tables_list=..., keyuse_array=0x7fff70018968) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:4622 #6 0x0000555555c513d4 in JOIN::optimize_inner (this=0x7fff70018678) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:1888 #7 0x0000555555c4fa73 in JOIN::optimize (this=0x7fff70018678) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:1451 #8 0x0000555555c59638 in mysql_select (thd=0x7fff70000d60, tables=0x7fff70013f68, wild_num=1, fields=..., conds=0x7fff70017d98, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2147748612, result=0x7fff70017ff0, unit=0x7fff70004c28, select_lex=0x7fff70005398) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:4225 #9 0x0000555555c91d6f in mysql_explain_union (thd=0x7fff70000d60, unit=0x7fff70004c28, result=0x7fff70017ff0) at /home/psergey/dev-git/10.3-cp/sql/sql_select.cc:25866 #10 0x0000555555c15da6 in execute_sqlcom_select (thd=0x7fff70000d60, all_tables=0x7fff70013f68) at /home/psergey/dev-git/10.3-cp/sql/sql_parse.cc:6487 #11 0x0000555555c0c7a5 in mysql_execute_command (thd=0x7fff70000d60) at /home/psergey/dev-git/10.3-cp/sql/sql_parse.cc:3821 #12 0x0000555555c1a0fd in mysql_parse (thd=0x7fff70000d60, rawbuf=0x7fff70013ca8 "explain select * from ten A, (select * from t10 union select * from t11 union select * from t12) T where T.a=A.a", length=112, parser_state=0x7fffd98ea5d0, is_com_multi=false, is_next_command=false) at /home/psergey/dev-git/10.3-cp/sql/sql_parse.cc:8091 (gdb) p union_all $36 = 165
void init_query(); st_select_lex* outer_select(); @@ -800,6 +801,7 @@ class st_select_lex_unit: public st_select_lex_node { bool union_needs_tmp_table();
void set_unique_exclude(); + bool check_distinct_in_union();
can this be made 'const'? if yes, please make it
friend struct LEX; friend int subselect_union_engine::exec(); diff --git a/sql/sql_union.cc b/sql/sql_union.cc index 857c9a117f5..6dac0cb0458 100644 --- a/sql/sql_union.cc +++ b/sql/sql_union.cc @@ -1960,3 +1960,63 @@ void st_select_lex_unit::set_unique_exclude() } } } + +/* + Check if the selects in the derived table can give distinct rows irrespective + of the data given for the tables. + + for example: + select * from + ((select t1.a from t1) op (select t2.a from t2) op (select t3.a from t3)); + the op here being UNION/INTERSECT/EXCEPT + + so this function would check if the derived table like the case above + can give distinct rows or not irrespective of the data in the tables.
I think the above is difficult to read. Readability is a matter of opinion, but I would suggest another variant: /* @brief Check if the derived table is guaranteed to have distinct rows because of UNION operations used to populate it. @detail UNION operation removes duplicate rows from its output. That is, a query like select * from t1 UNION select * from t2 will not produce duplicate rows in its output, even if table t1 (and/or t2) contain duplicate rows. EXCEPT and INTERSECT operations also have this property. On the other hand, UNION ALL operation doesn't remove duplicates. (The SQL standard also defines EXCEPT ALL and INTERSECT ALL, but we don't support them). st_select_lex_unit computes its value left to right. That is, if there is a st_select_lex_unit object describing (select #1) OP1 (select #2) OP2 (select #3) then ((select #1) OP1 (select #2)) is computed first, and OP2 is computed second. How can one tell if st_select_lex_unit is guaranteed to have distinct output rows? This depends on whether the last operation was duplicate- removing or not: - UNION ALL is not duplicate-removing - all other operations are duplicate-removing */ Is the above correct?
+ + So what the function is handling: + - If there is no UNION ALL, we are guarenteed distinct rows. + + Example + ((select t1.a from t1) UNION (select t2.a from t2); + + - If there is UNION ALL, we can still guarantee distinct rows if + the last operation of the selects does not involve UNION ALL or + INTERSECT is not there .
+ + Example: + ((select t1.a from t1) UNION ALL (select t2.a from t2) UNION/EXCEPT + (select t3.a from t3)): guarantees distinct rows, UNION and EXCEPT + have the same precedence. + + Example: + ((select t1.a from t1) UNION ALL (select t2.a from t2) INTERSECT + (select t3.a from t3)): does not guarantee distinct rows because + INTERSECT has higher precedence than UNION so we would evaluate + the INTERSECT part first and then do UNION ALL, so there we can + end up with duplicates, so distinct rows are not guaranteed. + + @retval false Distinct rows are not guaranteed + @retval true Distinct rows are guanranteed irrespective of the data + in the tables + +*/ + +bool st_select_lex_unit::check_distinct_in_union() +{ + bool is_intersect_present=FALSE; + st_select_lex* first= first_select(); + for(st_select_lex *sl=first; sl ; sl=sl->next_select()) + is_intersect_present|= (sl->linkage == INTERSECT_TYPE); + + if (!union_all) + return true; + else + { + if (union_distinct) + { + if (!is_intersect_present && !union_distinct->next_select()) + return true; + } I'm unable to undestand what kind of check we are making here. According to my variant of the comment above, we should just check whether the last operation (operation made with the output of the last select) was duplicate-removing or not.
We seem to be doing something more complex?
+ } + return false; +} diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 35ec2d29d21..112873e8362 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -571,7 +571,10 @@ bool LEX::add_select_to_union_list(bool is_union_distinct, current_select; } else + { DBUG_ASSERT(type == UNION_TYPE); + current_select->master_unit()->union_all= true; + } return FALSE; }
diff --git a/sql/table.cc b/sql/table.cc index 4f90d429ce5..d2938a29ea6 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -7113,6 +7113,25 @@ bool TABLE::add_tmp_key(uint key, uint key_parts, if (!keyinfo->rec_per_key) return TRUE; bzero(keyinfo->rec_per_key, sizeof(ulong)*key_parts); + /* + For the case when there is a derived table that would give distinct rows, + the index statistics are passed to the join optimizer to tell that + a ref access to the derived table will produce only one row. + */ + + st_select_lex_unit* derived= pos_in_table_list ? pos_in_table_list->derived: NULL; + if (derived) + { + /* + This handles the case when we have a single select in the derived table + */
+ st_select_lex* first= derived->first_select(); + if ((first && !first->is_part_of_union() && + first->options & SELECT_DISTINCT) ||
Does it anymore? Now it is handling the UNION case, too. please put brackets around the condition on the line above as some too-smart compilers now complain about a possible typo.
+ derived->check_distinct_in_union()) + keyinfo->rec_per_key[key_parts-1]=1; + } + keyinfo->read_stats= NULL; keyinfo->collected_stats= NULL;
BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog