revision-id: 2de94d8de7a48a4a169dae8cd2a915dcd73de22f (mariadb-10.4.4-154-g2de94d8de7a) parent(s): c51f615bf575480b13ee542fdb3b7fe644e21098 author: Varun Gupta committer: Varun Gupta timestamp: 2019-07-26 23:43:04 +0530 message: Poor optimization of JOIN and ORDER BY LIMIT Iteration one --- mysql-test/main/order_by_limit.result | 200 +++++++++++ mysql-test/main/order_by_limit.test | 23 ++ mysql-test/main/sort_nest.result | 524 +++++++++++++++++++++++++++ mysql-test/main/sort_nest.test | 145 ++++++++ sql/item.cc | 41 ++- sql/item.h | 40 +++ sql/item_cmpfunc.cc | 16 + sql/item_cmpfunc.h | 1 + sql/item_func.h | 8 + sql/item_row.h | 5 + sql/opt_split.cc | 1 - sql/opt_subselect.cc | 30 ++ sql/opt_subselect.h | 1 + sql/opt_trace.cc | 20 ++ sql/opt_trace.h | 1 + sql/sql_class.h | 14 + sql/sql_const.h | 8 + sql/sql_select.cc | 660 +++++++++++++++++++++++++++++++--- sql/sql_select.h | 40 ++- 19 files changed, 1729 insertions(+), 49 deletions(-) diff --git a/mysql-test/main/order_by_limit.result b/mysql-test/main/order_by_limit.result new file mode 100644 index 00000000000..5199cc88519 --- /dev/null +++ b/mysql-test/main/order_by_limit.result @@ -0,0 +1,200 @@ +create table ten(a int); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table one_k(a int); +insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; +create table t1(a int, b int); +insert into t1 select A.a + B.a* 10, A.a + B.a* 10 from ten A, ten B,ten C; +create table t2(a int, b int); +insert into t2(a,b) values (1,1), (2,1); +insert into t2 select A.a + B.a* 10, -1 from ten A, ten B; +create table t3(a int, b int); +insert into t3 select A.a + B.a* 10 + C.a * 100, A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; +set optimizer_trace=1; +the best plan would be having an order nest on (t2,t1) +explain +select * from t1,t2,t3 where t1.a=t2.b order by t1.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 102 Using temporary; Using filesort +1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 Using where; Using join buffer (flat, BNL join) +1 SIMPLE t3 ALL NULL NULL NULL NULL 1000 Using join buffer (incremental, BNL join) +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) +[ + + [ + + { + "plan_prefix": + [ + ], + "table": "t2", + "best_access_path": + { + "considered_access_paths": + [ + + { + "access_type": "scan", + "resulting_rows": 102, + "cost": 2.2241, + "chosen": true + } + ] + }, + "rest_of_plan": + [ + + { + "plan_prefix": + [ + "t2" + ], + "table": "t1", + "best_access_path": + { + "considered_access_paths": + [ + + { + "access_type": "scan", + "resulting_rows": 1000, + "cost": 4.1973, + "chosen": true + } + ] + }, + "rest_of_plan": + [ + + { + "plan_prefix": + [ + "t2", + "t1" + ], + "table": "t3", + "best_access_path": + { + "considered_access_paths": + [ + + { + "access_type": "scan", + "resulting_rows": 1000, + "cost": 33.578, + "chosen": true + } + ] + }, + "cardinality": 1.02e8, + "cost_of_plan": 1.22e8 + }, + + { + "plan_prefix": + [ + "t2", + "t1" + ], + "table": "t3", + "order_by_nest": + [ + "t2", + "t1" + ], + "best_access_path": + { + "considered_access_paths": + [ + + { + "access_type": "scan", + "resulting_rows": 1000, + "cost": 428121, + "chosen": true + } + ] + }, + "cost_of_plan": 2.1e7 + } + ] + }, + + { + "plan_prefix": + [ + "t2" + ], + "table": "t3", + "best_access_path": + { + "considered_access_paths": + [ + + { + "access_type": "scan", + "resulting_rows": 1000, + "cost": 4.1973, + "chosen": true + } + ] + }, + "pruned_by_heuristic": true + } + ] + }, + + { + "plan_prefix": + [ + ], + "table": "t1", + "best_access_path": + { + "considered_access_paths": + [ + + { + "access_type": "scan", + "resulting_rows": 1000, + "cost": 4.1973, + "chosen": true, + "use_tmp_table": true + } + ] + }, + "pruned_by_heuristic": true + }, + + { + "plan_prefix": + [ + ], + "table": "t3", + "best_access_path": + { + "considered_access_paths": + [ + + { + "access_type": "scan", + "resulting_rows": 1000, + "cost": 4.1973, + "chosen": true + } + ] + }, + "pruned_by_heuristic": true + } + ] +] +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.order_nest')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +JSON_DETAILED(JSON_EXTRACT(trace, '$**.order_nest')) +[ + + [ + "t2", + "t1" + ] +] +drop table t1,t2,t3,ten,one_k; diff --git a/mysql-test/main/order_by_limit.test b/mysql-test/main/order_by_limit.test new file mode 100644 index 00000000000..b717e84a1a8 --- /dev/null +++ b/mysql-test/main/order_by_limit.test @@ -0,0 +1,23 @@ +create table ten(a int); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table one_k(a int); +insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; + +create table t1(a int, b int); +insert into t1 select A.a + B.a* 10, A.a + B.a* 10 from ten A, ten B,ten C; +create table t2(a int, b int); +insert into t2(a,b) values (1,1), (2,1); +insert into t2 select A.a + B.a* 10, -1 from ten A, ten B; +create table t3(a int, b int); +insert into t3 select A.a + B.a* 10 + C.a * 100, A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; + +set optimizer_trace=1; + +--echo the best plan would be having an order nest on (t2,t1) + +explain +select * from t1,t2,t3 where t1.a=t2.b order by t1.b; +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.considered_execution_plans')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +select JSON_DETAILED(JSON_EXTRACT(trace, '$**.order_nest')) from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +drop table t1,t2,t3,ten,one_k; diff --git a/mysql-test/main/sort_nest.result b/mysql-test/main/sort_nest.result new file mode 100644 index 00000000000..f3e10597536 --- /dev/null +++ b/mysql-test/main/sort_nest.result @@ -0,0 +1,524 @@ +create table ten(a int); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table one_k(a int); +insert into one_k select A.a + B.a* 10 from ten A, ten B; +create table t1 (a int, b int); +insert into t1 select a,a from one_k; +create table t2 (a int, b int); +insert into t2 select a,a from one_k; +explain +select t1.b from t1,t2 where t1.a=t2.a order by t2.b limit 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using filesort +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +select t1.b from t1,t2 where t1.a=t2.a order by t2.b limit 2; +b +0 +1 +analyze +select t1.a,t2.a from t1,t2 where t1.a=t2.a order by t2.b limit 10; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 10.00 100.00 100.00 Using filesort +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 91.00 100.00 1.10 Using where +select t1.a,t2.a from t1,t2 where t1.a=t2.a order by t2.b limit 10; +a a +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +alter table t1 add key (a); +ref should be func +explain +select t1.b,t2.b from t1,t2 where t1.a=t2.a+1 order by t2.b limit 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using filesort +1 SIMPLE t1 ref a a 5 func 1 Using index condition +select t1.b,t2.b from t1,t2 where t1.a=t2.a+1 order by t2.b limit 2; +b b +1 0 +2 1 +ref should be order-nest.a +explain +select t1.b,t2.b from t1,t2 where t1.a=t2.a order by t2.b limit 2; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t2 ALL NULL NULL NULL NULL 100 Using where; Using filesort +1 SIMPLE t1 ref a a 5 test.t2.a 1 +select t1.b,t2.b from t1,t2 where t1.a=t2.a order by t2.b limit 2; +b b +0 0 +1 1 +drop table t1,t2,ten,one_k; +create table ten(a int); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table one_k(a int); +insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; +create table t1(a int, b int); +insert into t1 select A.a + B.a* 10, A.a + B.a* 10 from ten A, ten B; +create table t2(a int, b int); +insert into t2(a,b) values (1,1), (2,2); +insert into t2 select A.a + B.a* 10, A.a+B.a*10 from ten A, ten B; +create table t3(a int, b int); +insert into t3 select A.a + B.a* 10 + C.a * 100, A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; +create function f1(a int) returns int +begin +declare b int default 0; +return a+b; +end| +Covering 3 table joins +# {t1,t2} part of the nest +# t1.a > 95 would be attached to table t1 +# t1.b=t2.a would be attached to table t2; +explain select * from t1,t2,t3 where t1.a > 95 and t1.a=t2.a and t1.b = t3.a order by t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 102 Using where; Using join buffer (flat, BNL join) +1 SIMPLE <order-nest> ALL NULL NULL NULL NULL 10200 Using filesort +1 SIMPLE t3 ALL NULL NULL NULL NULL 1000 Using where +explain format=json select * from t1,t2,t3 where t1.a > 95 and t1.a=t2.a and t1.b = t3.a order by t2.b; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 100, + "filtered": 100, + "attached_condition": "t1.a > 95" + }, + "block-nl-join": { + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 102, + "filtered": 100 + }, + "buffer_type": "flat", + "buffer_size": "1Kb", + "join_type": "BNL", + "attached_condition": "t2.a = t1.a" + }, + "read_sorted_file": { + "filesort": { + "sort_key": "`order-nest`.b", + "table": { + "table_name": "<order-nest>", + "access_type": "ALL", + "rows": 10200, + "filtered": 100 + } + } + }, + "table": { + "table_name": "t3", + "access_type": "ALL", + "rows": 1000, + "filtered": 100, + "attached_condition": "t3.a = `order-nest`.b" + } + } +} +select * from t1,t2,t3 where t1.a > 95 and t1.a=t2.a and t1.b = t3.a order by t2.b; +a b a b a b +96 96 96 96 96 96 +97 97 97 97 97 97 +98 98 98 98 98 98 +99 99 99 99 99 99 +# {t1,t2} part of the sort nest +# (t2.a < 2 or t1.b > 98) would be attached to table t2 +explain select * from t1,t2,t3 where (t3.a<2 and t2.a <2) or (t1.b > 98 and t3.b > 98) +order by t1.a, t2.b limit 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 +1 SIMPLE t2 ALL NULL NULL NULL NULL 102 Using where; Using join buffer (flat, BNL join) +1 SIMPLE <order-nest> ALL NULL NULL NULL NULL 10200 Using filesort +1 SIMPLE t3 ALL NULL NULL NULL NULL 1000 Using where +explain format=json select * from t1,t2,t3 where (t3.a<2 and t2.a <2) or (t1.b > 98 and t3.b > 98) +order by t1.a, t2.b limit 5; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 100, + "filtered": 100 + }, + "block-nl-join": { + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 102, + "filtered": 100 + }, + "buffer_type": "flat", + "buffer_size": "1Kb", + "join_type": "BNL", + "attached_condition": "t2.a < 2 or t1.b > 98" + }, + "read_sorted_file": { + "filesort": { + "sort_key": "`order-nest`.a, `order-nest`.b", + "table": { + "table_name": "<order-nest>", + "access_type": "ALL", + "rows": 10200, + "filtered": 100 + } + } + }, + "table": { + "table_name": "t3", + "access_type": "ALL", + "rows": 1000, + "filtered": 100, + "attached_condition": "t3.a < 2 and `order-nest`.a < 2 or `order-nest`.b > 98 and t3.b > 98" + } + } +} +select * from t1,t2,t3 where (t3.a<2 and t2.a <2) or (t1.b > 98 and t3.b > 98) +order by t1.a, t2.b limit 5; +a b a b a b +0 0 1 1 0 0 +0 0 1 1 1 1 +1 1 1 1 0 0 +1 1 1 1 1 1 +2 2 1 1 0 0 +# {t1,t2} part of the nest +# t2.a < 2 or f1(t1.b) attached to table t2 +# t1.b=t2.a would be attached to table t2; +explain select * from t1,t2,t3 where (t3.a<2 and t2.a <2) or (f1(t1.b) > 98 and t3.b > 98) +order by t1.a,t2.b limit 5; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 +1 SIMPLE t2 ALL NULL NULL NULL NULL 102 Using join buffer (flat, BNL join) +1 SIMPLE <order-nest> ALL NULL NULL NULL NULL 10200 Using filesort +1 SIMPLE t3 ALL NULL NULL NULL NULL 1000 Using where +explain format=json select * from t1,t2,t3 where (t3.a<2 and t2.a <2) or (f1(t1.b) > 98 and t3.b > 98) +order by t1.a,t2.b limit 5; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 100, + "filtered": 100 + }, + "block-nl-join": { + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 102, + "filtered": 100 + }, + "buffer_type": "flat", + "buffer_size": "1Kb", + "join_type": "BNL" + }, + "read_sorted_file": { + "filesort": { + "sort_key": "`order-nest`.a, `order-nest`.b", + "table": { + "table_name": "<order-nest>", + "access_type": "ALL", + "rows": 10200, + "filtered": 100 + } + } + }, + "table": { + "table_name": "t3", + "access_type": "ALL", + "rows": 1000, + "filtered": 100, + "attached_condition": "t3.a < 2 and `order-nest`.a < 2 or f1(`order-nest`.b) > 98 and t3.b > 98" + } + } +} +select * from t1,t2,t3 where (t3.a<2 and t2.a <2) or (f1(t1.b) > 98 and t3.b > 98) +order by t1.a,t2.b limit 5; +a b a b a b +0 0 0 0 0 0 +0 0 0 0 1 1 +0 0 1 1 0 0 +0 0 1 1 1 1 +0 0 1 1 0 0 +# +# Removing constant from the order by clause +# +explain select * from t1,t2 where t1.a > 95 and t1.a=t2.a order by t2.a limit 4; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where; Using filesort +1 SIMPLE t2 ALL NULL NULL NULL NULL 102 Using where +explain format=json select * from t1,t2 where t1.a > 95 and t1.a=t2.a order by t2.a limit 4; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "read_sorted_file": { + "filesort": { + "sort_key": "t2.a", + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 100, + "filtered": 100, + "attached_condition": "t1.a > 95" + } + } + }, + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 102, + "filtered": 100, + "attached_condition": "t2.a = t1.a" + } + } +} +select * from t1,t2 where t1.a > 95 and t1.a=t2.a order by t2.a limit 4; +a b a b +96 96 96 96 +97 97 97 97 +98 98 98 98 +99 99 99 99 +explain select * from t1,t2 where t1.a > 95 and t1.a=t2.a order by 1+2,t2.a limit 4; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where; Using filesort +1 SIMPLE t2 ALL NULL NULL NULL NULL 102 Using where +explain format=json select * from t1,t2 where t1.a > 95 and t1.a=t2.a order by 1+2,t2.a limit 4; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "read_sorted_file": { + "filesort": { + "sort_key": "t2.a", + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 100, + "filtered": 100, + "attached_condition": "t1.a > 95" + } + } + }, + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 102, + "filtered": 100, + "attached_condition": "t2.a = t1.a" + } + } +} +select * from t1,t2 where t1.a > 95 and t1.a=t2.a order by 1+2,t2.a limit 4; +a b a b +96 96 96 96 +97 97 97 97 +98 98 98 98 +99 99 99 99 +# +# Equality propagation, both the queries should use a sort nest on {t1,t2} +# +explain select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.b=t3.b +order by t1.b desc, t2.a desc limit 3; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 +1 SIMPLE t2 ALL NULL NULL NULL NULL 102 Using join buffer (flat, BNL join) +1 SIMPLE <order-nest> ALL NULL NULL NULL NULL 10200 Using filesort +1 SIMPLE t3 ALL NULL NULL NULL NULL 1000 Using where +explain format=json select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.b=t3.b +order by t1.b desc, t2.a desc limit 3; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 100, + "filtered": 100 + }, + "block-nl-join": { + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 102, + "filtered": 100 + }, + "buffer_type": "flat", + "buffer_size": "1Kb", + "join_type": "BNL" + }, + "read_sorted_file": { + "filesort": { + "sort_key": "`order-nest`.b desc, `order-nest`.a desc", + "table": { + "table_name": "<order-nest>", + "access_type": "ALL", + "rows": 10200, + "filtered": 100 + } + } + }, + "table": { + "table_name": "t3", + "access_type": "ALL", + "rows": 1000, + "filtered": 100, + "attached_condition": "t3.b = `order-nest`.b" + } + } +} +select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.b=t3.b +order by t1.b desc, t2.a desc limit 3; +b a b a +99 99 99 99 +99 98 99 99 +99 97 99 99 +explain select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.b=t3.b +order by t3.b desc, t2.a desc limit 3; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 +1 SIMPLE t2 ALL NULL NULL NULL NULL 102 Using join buffer (flat, BNL join) +1 SIMPLE <order-nest> ALL NULL NULL NULL NULL 10200 Using filesort +1 SIMPLE t3 ALL NULL NULL NULL NULL 1000 Using where +select * from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +QUERY TRACE MISSING_BYTES_BEYOND_MAX_MEM_SIZE INSUFFICIENT_PRIVILEGES +explain format=json select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.b=t3.b +order by t3.b desc, t2.a desc limit 3; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 100, + "filtered": 100 + }, + "block-nl-join": { + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 102, + "filtered": 100 + }, + "buffer_type": "flat", + "buffer_size": "1Kb", + "join_type": "BNL" + }, + "read_sorted_file": { + "filesort": { + "sort_key": "`order-nest`.b desc, `order-nest`.a desc", + "table": { + "table_name": "<order-nest>", + "access_type": "ALL", + "rows": 10200, + "filtered": 100 + } + } + }, + "table": { + "table_name": "t3", + "access_type": "ALL", + "rows": 1000, + "filtered": 100, + "attached_condition": "t3.b = `order-nest`.b" + } + } +} +select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.b=t3.b +order by t3.b desc, t2.a desc limit 3; +b a b a +99 99 99 99 +99 98 99 99 +99 97 99 99 +# +# Equality propagation also for arguments of expressions, +# the plan should use a sort nest on {t1,t2} +# +explain select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.b=t3.b +order by t3.b+1 desc, t2.a desc limit 3; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 +1 SIMPLE t2 ALL NULL NULL NULL NULL 102 Using join buffer (flat, BNL join) +1 SIMPLE <order-nest> ALL NULL NULL NULL NULL 10200 Using filesort +1 SIMPLE t3 ALL NULL NULL NULL NULL 1000 Using where +explain format=json select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.b=t3.b +order by t3.b+1 desc, t2.a desc limit 3; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 100, + "filtered": 100 + }, + "block-nl-join": { + "table": { + "table_name": "t2", + "access_type": "ALL", + "rows": 102, + "filtered": 100 + }, + "buffer_type": "flat", + "buffer_size": "1Kb", + "join_type": "BNL" + }, + "read_sorted_file": { + "filesort": { + "sort_key": "tmp_field desc, `order-nest`.a desc", + "table": { + "table_name": "<order-nest>", + "access_type": "ALL", + "rows": 10200, + "filtered": 100 + } + } + }, + "table": { + "table_name": "t3", + "access_type": "ALL", + "rows": 1000, + "filtered": 100, + "attached_condition": "t3.b = `order-nest`.b" + } + } +} +select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.b=t3.b +order by t3.b+1 desc, t2.a desc limit 3; +b a b a +99 99 99 99 +99 98 99 99 +99 97 99 99 +# +# Rows for the sort-nest should be the cardinality of the join of inner tables +# of the sort-nest +# +# Rows for sort nest would be 9894 here +alter table t1 add key(a); +explain extended select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.a > 5 and t1.b=t3.b +order by t1.b desc, t2.a desc limit 3; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL a NULL NULL NULL 100 97.00 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 102 100.00 Using join buffer (flat, BNL join) +1 SIMPLE <order-nest> ALL NULL NULL NULL NULL 9894 100.00 Using filesort +1 SIMPLE t3 ALL NULL NULL NULL NULL 1000 100.00 Using where +Warnings: +Note 1003 select `test`.`t3`.`b` AS `b`,`order-nest`.`a` AS `a`,`order-nest`.`b` AS `b`,`order-nest`.`a` AS `a` from `test`.`t1` join `test`.`t2` join `test`.`t3` where `test`.`t3`.`b` = `order-nest`.`b` order by `order-nest`.`b` desc,`order-nest`.`a` desc limit 3 +alter table t1 drop key a; +drop table t1,t2,t3,ten,one_k; +drop function f1; diff --git a/mysql-test/main/sort_nest.test b/mysql-test/main/sort_nest.test new file mode 100644 index 00000000000..729a6cd9eaa --- /dev/null +++ b/mysql-test/main/sort_nest.test @@ -0,0 +1,145 @@ +create table ten(a int); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table one_k(a int); +insert into one_k select A.a + B.a* 10 from ten A, ten B; +create table t1 (a int, b int); +insert into t1 select a,a from one_k; + +create table t2 (a int, b int); +insert into t2 select a,a from one_k; +explain +select t1.b from t1,t2 where t1.a=t2.a order by t2.b limit 2; +select t1.b from t1,t2 where t1.a=t2.a order by t2.b limit 2; + +analyze +select t1.a,t2.a from t1,t2 where t1.a=t2.a order by t2.b limit 10; +select t1.a,t2.a from t1,t2 where t1.a=t2.a order by t2.b limit 10; + +alter table t1 add key (a); + +--echo ref should be func +explain +select t1.b,t2.b from t1,t2 where t1.a=t2.a+1 order by t2.b limit 2; +select t1.b,t2.b from t1,t2 where t1.a=t2.a+1 order by t2.b limit 2; + +--echo ref should be order-nest.a +explain +select t1.b,t2.b from t1,t2 where t1.a=t2.a order by t2.b limit 2; +select t1.b,t2.b from t1,t2 where t1.a=t2.a order by t2.b limit 2; + +drop table t1,t2,ten,one_k; + +create table ten(a int); +insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table one_k(a int); +insert into one_k select A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; + +create table t1(a int, b int); +insert into t1 select A.a + B.a* 10, A.a + B.a* 10 from ten A, ten B; +create table t2(a int, b int); +insert into t2(a,b) values (1,1), (2,2); +insert into t2 select A.a + B.a* 10, A.a+B.a*10 from ten A, ten B; +create table t3(a int, b int); +insert into t3 select A.a + B.a* 10 + C.a * 100, A.a + B.a* 10 + C.a * 100 from ten A, ten B, ten C; + +delimiter |; +create function f1(a int) returns int +begin + declare b int default 0; + return a+b; +end| +delimiter ;| + +--echo Covering 3 table joins + +--echo # {t1,t2} part of the nest +--echo # t1.a > 95 would be attached to table t1 +--echo # t1.b=t2.a would be attached to table t2; + +let $query= +select * from t1,t2,t3 where t1.a > 95 and t1.a=t2.a and t1.b = t3.a order by t2.b; +eval explain $query; +eval explain format=json $query; +eval $query; + +--echo # {t1,t2} part of the sort nest +--echo # (t2.a < 2 or t1.b > 98) would be attached to table t2 + +let $query= +select * from t1,t2,t3 where (t3.a<2 and t2.a <2) or (t1.b > 98 and t3.b > 98) +order by t1.a, t2.b limit 5; +eval explain $query; +eval explain format=json $query; +eval $query; + +--echo # {t1,t2} part of the nest +--echo # t2.a < 2 or f1(t1.b) attached to table t2 +--echo # t1.b=t2.a would be attached to table t2; + +let $query= +select * from t1,t2,t3 where (t3.a<2 and t2.a <2) or (f1(t1.b) > 98 and t3.b > 98) +order by t1.a,t2.b limit 5; +eval explain $query; +eval explain format=json $query; +eval $query; + +--echo # +--echo # Removing constant from the order by clause +--echo # + +let $query= +select * from t1,t2 where t1.a > 95 and t1.a=t2.a order by t2.a limit 4; +eval explain $query; +eval explain format=json $query; +eval $query; + +let $query= +select * from t1,t2 where t1.a > 95 and t1.a=t2.a order by 1+2,t2.a limit 4; +eval explain $query; +eval explain format=json $query; +eval $query; + + +--echo # +--echo # Equality propagation, both the queries should use a sort nest on {t1,t2} +--echo # + +let $query=select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.b=t3.b +order by t1.b desc, t2.a desc limit 3; +eval explain $query; +eval explain format=json $query; +eval $query; + +let $query=select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.b=t3.b +order by t3.b desc, t2.a desc limit 3; + +eval explain $query; +select * from INFORMATION_SCHEMA.OPTIMIZER_TRACE; +eval explain format=json $query; +eval $query; + +--echo # +--echo # Equality propagation also for arguments of expressions, +--echo # the plan should use a sort nest on {t1,t2} +--echo # + +let $query=select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.b=t3.b +order by t3.b+1 desc, t2.a desc limit 3; +eval explain $query; +eval explain format=json $query; +eval $query; + +--echo # +--echo # Rows for the sort-nest should be the cardinality of the join of inner tables +--echo # of the sort-nest +--echo # + +--echo # Rows for sort nest would be 9894 here +alter table t1 add key(a); +let $query=select t3.b, t2.a , t1.b , t1.a from t1,t2,t3 where t1.a > 5 and t1.b=t3.b +order by t1.b desc, t2.a desc limit 3; +eval explain extended $query; +alter table t1 drop key a; +drop table t1,t2,t3,ten,one_k; +drop function f1; diff --git a/sql/item.cc b/sql/item.cc index 84fd4dc6fb7..cf71a31be10 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -6158,6 +6158,28 @@ Item *Item_field::replace_equal_field(THD *thd, uchar *arg) } +Item *Item_field::replace_with_nest_items(THD *thd, uchar *arg) +{ + REPLACE_NEST_FIELD_ARG* param= (REPLACE_NEST_FIELD_ARG*)arg; + JOIN *join= param->join; + SORT_NEST_INFO *sort_nest_info= join->sort_nest_info; + if (!(used_tables() & sort_nest_info->nest_tables_map)) + return this; + + List_iterator_fast<Item> li(sort_nest_info->nest_base_table_cols); + uint index= 0; + Item *item; + while((item= li++)) + { + Item *field_item= item->real_item(); + if (field->eq(((Item_field*)field_item)->field)) + return sort_nest_info->nest_temp_table_cols.elem(index); + index++; + } + return this; +} + + void Item::init_make_send_field(Send_field *tmp_field, const Type_handler *h) { @@ -9061,11 +9083,17 @@ Item *Item_direct_view_ref::replace_equal_field(THD *thd, uchar *arg) bool Item_field::excl_dep_on_table(table_map tab_map) { - return used_tables() == tab_map || + return !(used_tables() & ~tab_map) || (item_equal && (item_equal->used_tables() & tab_map)); } +bool Item_field::excl_dep_on_nest(table_map tab_map) +{ + return !(used_tables() & ~tab_map); +} + + bool Item_field::excl_dep_on_grouping_fields(st_select_lex *sel) { @@ -9089,6 +9117,17 @@ bool Item_direct_view_ref::excl_dep_on_table(table_map tab_map) } +bool Item_direct_view_ref::excl_dep_on_nest(table_map tab_map) +{ + table_map used= used_tables(); + if (used & OUTER_REF_TABLE_BIT) + return false; + if (!(used & ~tab_map)) + return true; + return (*ref)->excl_dep_on_nest(tab_map); +} + + bool Item_direct_view_ref::excl_dep_on_grouping_fields(st_select_lex *sel) { if (item_equal) diff --git a/sql/item.h b/sql/item.h index 3b889b08e7c..8c4d07ca5b8 100644 --- a/sql/item.h +++ b/sql/item.h @@ -445,6 +445,11 @@ typedef struct replace_equal_field_arg struct st_join_table *context_tab; } REPLACE_EQUAL_FIELD_ARG; +typedef struct replace_nest_field_arg +{ + JOIN *join; +} REPLACE_NEST_FIELD_ARG; + class Settable_routine_parameter { public: @@ -1887,6 +1892,12 @@ class Item: public Value_source, Not to be used for AND/OR formulas. */ virtual bool excl_dep_on_table(table_map tab_map) { return false; } + + /* + TRUE if the expression depends only on the table indicated by tab_map + Not to be used for AND/OR formulas. + */ + virtual bool excl_dep_on_nest(table_map tab_map) { return false; } /* TRUE if the expression depends only on grouping fields of sel or can be converted to such an expression using equalities. @@ -2109,6 +2120,8 @@ class Item: public Value_source, { return this; } virtual Item *multiple_equality_transformer(THD *thd, uchar *arg) { return this; } + virtual Item *replace_with_nest_items(THD *thd, uchar *arg) + { return this; } virtual bool expr_cache_is_needed(THD *) { return FALSE; } virtual Item *safe_charset_converter(THD *thd, CHARSET_INFO *tocs); bool needs_charset_converter(uint32 length, CHARSET_INFO *tocs) const @@ -2316,6 +2329,10 @@ class Item: public Value_source, { return excl_dep_on_table(*((table_map *)arg)); } + bool pushable_cond_checker_for_nest(uchar *arg) + { + return excl_dep_on_nest(*((table_map *)arg)); + } bool pushable_cond_checker_for_subquery(uchar *arg) { return excl_dep_on_in_subq_left_part((Item_in_subselect *)arg); @@ -2511,6 +2528,17 @@ class Item_args } return true; } + bool excl_dep_on_nest(table_map tab_map) + { + for (uint i= 0; i < arg_count; i++) + { + if (args[i]->const_item()) + continue; + if (!args[i]->excl_dep_on_nest(tab_map)) + return false; + } + return true; + } bool excl_dep_on_grouping_fields(st_select_lex *sel); bool eq(const Item_args *other, bool binary_cmp) const { @@ -3451,6 +3479,7 @@ class Item_field :public Item_ident, Item *in_subq_field_transformer_for_having(THD *thd, uchar *arg); virtual void print(String *str, enum_query_type query_type); bool excl_dep_on_table(table_map tab_map); + bool excl_dep_on_nest(table_map tab_map); bool excl_dep_on_grouping_fields(st_select_lex *sel); bool excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred); bool cleanup_excluding_fields_processor(void *arg) @@ -3471,6 +3500,7 @@ class Item_field :public Item_ident, return field->get_geometry_type(); } bool check_index_dependence(void *arg); + Item *replace_with_nest_items(THD *thd, uchar *arg); friend class Item_default_value; friend class Item_insert_value; friend class st_select_lex_unit; @@ -5306,6 +5336,15 @@ class Item_ref :public Item_ident, return false; return (used == tab_map) || (*ref)->excl_dep_on_table(tab_map); } + + bool excl_dep_on_nest(table_map tab_map) + { + table_map used= used_tables(); + if (used & OUTER_REF_TABLE_BIT) + return false; + return (!(used & ~tab_map) || (*ref)->excl_dep_on_nest(tab_map)); + } + bool excl_dep_on_grouping_fields(st_select_lex *sel) { return (*ref)->excl_dep_on_grouping_fields(sel); } bool excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred) @@ -5631,6 +5670,7 @@ class Item_direct_view_ref :public Item_direct_ref return 0; } bool excl_dep_on_table(table_map tab_map); + bool excl_dep_on_nest(table_map tab_map); bool excl_dep_on_grouping_fields(st_select_lex *sel); bool excl_dep_on_in_subq_left_part(Item_in_subselect *subq_pred); Item *derived_field_transformer_for_having(THD *thd, uchar *arg); diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 02fc7719fbc..48434f66f64 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -5225,6 +5225,22 @@ bool Item_cond::excl_dep_on_table(table_map tab_map) return true; } +bool Item_cond::excl_dep_on_nest(table_map tab_map) +{ + if (used_tables() & OUTER_REF_TABLE_BIT) + return false; + if (!(used_tables() & ~tab_map)) + return true; + List_iterator_fast<Item> li(list); + Item *item; + while ((item= li++)) + { + if (!item->excl_dep_on_nest(tab_map)) + return false; + } + return true; +} + bool Item_cond::excl_dep_on_grouping_fields(st_select_lex *sel) { diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 2af7ebdf231..b992ee1b676 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -3011,6 +3011,7 @@ class Item_cond :public Item_bool_func bool eval_not_null_tables(void *opt_arg); Item *build_clone(THD *thd); bool excl_dep_on_table(table_map tab_map); + bool excl_dep_on_nest(table_map tab_map); bool excl_dep_on_grouping_fields(st_select_lex *sel); }; diff --git a/sql/item_func.h b/sql/item_func.h index 610adb4bb46..350fb937957 100644 --- a/sql/item_func.h +++ b/sql/item_func.h @@ -340,6 +340,14 @@ class Item_func :public Item_func_or_sum, Item_args::excl_dep_on_table(tab_map); } + bool excl_dep_on_nest(table_map tab_map) + { + if (used_tables() & OUTER_REF_TABLE_BIT) + return false; + return !(used_tables() & ~tab_map) || + Item_args::excl_dep_on_nest(tab_map); + } + bool excl_dep_on_grouping_fields(st_select_lex *sel) { if (has_rand_bit() || with_subquery()) diff --git a/sql/item_row.h b/sql/item_row.h index ea5a0f21d8b..4698dcd8150 100644 --- a/sql/item_row.h +++ b/sql/item_row.h @@ -140,6 +140,11 @@ class Item_row: public Item_fixed_hybrid, return Item_args::excl_dep_on_table(tab_map); } + bool excl_dep_on_nest(table_map tab_map) + { + return Item_args::excl_dep_on_nest(tab_map); + } + bool excl_dep_on_grouping_fields(st_select_lex *sel) { return Item_args::excl_dep_on_grouping_fields(sel); diff --git a/sql/opt_split.cc b/sql/opt_split.cc index cfac0c93544..0dc8006029b 100644 --- a/sql/opt_split.cc +++ b/sql/opt_split.cc @@ -651,7 +651,6 @@ add_ext_keyuses_for_splitting_field(Dynamic_array<KEYUSE_EXT> *ext_keyuses, @brief Cost of the post join operation used in specification of splittable table */ - static double spl_postjoin_oper_cost(THD *thd, double join_record_count, uint rec_len) { diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 02d221d13b0..ef1f6f2c6ff 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -4777,6 +4777,11 @@ int setup_semijoin_loosescan(JOIN *join) for (i= join->const_tables ; i < join->top_join_tab_count; ) { JOIN_TAB *tab=join->join_tab + i; + if (tab->is_sort_nest) + { + i++; + continue; + } switch (pos->sj_strategy) { case SJ_OPT_MATERIALIZE: case SJ_OPT_MATERIALIZE_SCAN: @@ -5517,6 +5522,31 @@ enum_nested_loop_state join_tab_execution_startup(JOIN_TAB *tab) sjm->materialized= TRUE; } } + /* + This should be the place to add the sub_select call for the + prefix of the join order + */ + + else if (tab->is_sort_nest) + { + enum_nested_loop_state rc; + JOIN *join= tab->join; + SORT_NEST_INFO *nest_info= join->sort_nest_info; + + if (!nest_info->materialized) + { + JOIN_TAB *join_tab= join->join_tab + join->const_tables; + JOIN_TAB *save_return_tab= join->return_tab; + if ((rc= sub_select(join, join_tab, FALSE)) < 0 || + (rc= sub_select(join, join_tab, TRUE)) < 0) + { + join->return_tab= save_return_tab; + DBUG_RETURN(rc); + } + join->return_tab= save_return_tab; + nest_info->materialized= TRUE; + } + } DBUG_RETURN(NESTED_LOOP_OK); } diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h index 65131f6bc89..dad63c5d1db 100644 --- a/sql/opt_subselect.h +++ b/sql/opt_subselect.h @@ -298,6 +298,7 @@ class Loose_scan_opt pos->loosescan_picker.loosescan_key= best_loose_scan_key; pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1; pos->use_join_buffer= FALSE; + pos->sort_nest_operation_here= FALSE; pos->table= tab; pos->range_rowid_filter_info= tab->range_rowid_filter_info; // todo need ref_depend_map ? diff --git a/sql/opt_trace.cc b/sql/opt_trace.cc index befc7934a3a..ca75b697ec3 100644 --- a/sql/opt_trace.cc +++ b/sql/opt_trace.cc @@ -606,6 +606,13 @@ void Json_writer::add_table_name(const JOIN_TAB *tab) ctab->emb_sj_nest->sj_subq_pred->get_identifier()); add_str(table_name_buffer, len); } + else if (tab->is_sort_nest) + { + size_t len= my_snprintf(table_name_buffer, + sizeof(table_name_buffer)-1, + "<order-nest>"); + add_str(table_name_buffer, len); + } else { TABLE_LIST *real_table= tab->table->pos_in_table_list; @@ -630,6 +637,19 @@ void add_table_scan_values_to_trace(THD *thd, JOIN_TAB *tab) table_rec.add("rows", tab->found_records) .add("cost", tab->read_time); } + +void add_sort_nest_tables_to_trace(JOIN *join) +{ + JOIN_TAB *end_tab, *tab; + THD *thd= join->thd; + SORT_NEST_INFO *sort_nest_info= join->sort_nest_info; + end_tab= sort_nest_info->nest_tab; + Json_writer_object trace_wrapper(thd); + Json_writer_array sort_nest(thd, "sort_nest"); + for (tab= join->join_tab + join->const_tables; tab < end_tab; tab++) + sort_nest.add_table_name(tab); +} + /* Introduce enum_query_type flags parameter, maybe also allow EXPLAIN also use this function. diff --git a/sql/opt_trace.h b/sql/opt_trace.h index 52318bc6b7f..b2dd3dff924 100644 --- a/sql/opt_trace.h +++ b/sql/opt_trace.h @@ -105,6 +105,7 @@ void opt_trace_print_expanded_query(THD *thd, SELECT_LEX *select_lex, Json_writer_object *trace_object); void add_table_scan_values_to_trace(THD *thd, JOIN_TAB *tab); +void add_sort_nest_tables_to_trace(JOIN *join); /* Security related (need to add a proper comment here) diff --git a/sql/sql_class.h b/sql/sql_class.h index 63f964e96ce..73f97ba727b 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -6037,6 +6037,20 @@ class SJ_MATERIALIZATION_INFO : public Sql_alloc Copy_field *copy_field; /* Needed for SJ_Materialization scan */ }; +class SORT_NEST_INFO : public Sql_alloc +{ +public: + TMP_TABLE_PARAM tmp_table_param; + List<Item> nest_base_table_cols; + List<Item> nest_temp_table_cols; + TABLE *table; + st_join_table *nest_tab; + uint n_tables; + bool materialized; /* TRUE <=> materialization already performed */ + table_map nest_tables_map; + Item *nest_cond; +}; + /* Structs used when sorting */ struct SORT_FIELD_ATTR diff --git a/sql/sql_const.h b/sql/sql_const.h index 7aa4249f5ad..96779e4dbeb 100644 --- a/sql/sql_const.h +++ b/sql/sql_const.h @@ -296,6 +296,14 @@ */ #define MAX_TIME_ZONE_NAME_LENGTH (NAME_LEN + 1) + +/** + Average record length is the number of bytes for the record, it is just a rough guess, needs + this to calculate cost of filling and reading the temp table + +*/ +#define AVG_REC_LEN 50 + #if defined(__WIN__) #define INTERRUPT_PRIOR -2 diff --git a/sql/sql_select.cc b/sql/sql_select.cc index e97abf3d981..2346a600c6f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -120,8 +120,12 @@ static bool best_extension_by_limited_search(JOIN *join, uint idx, double record_count, double read_time, uint depth, uint prune_level, - uint use_cond_selectivity); + uint use_cond_selectivity, + table_map previous_tables, + bool nest_created, + double *cardinality); void trace_plan_prefix(JOIN *join, uint idx, table_map remaining_tables); +void trace_order_by_nest(JOIN *join, uint idx, table_map remaining_tables); static uint determine_search_depth(JOIN* join); C_MODE_START static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2); @@ -189,6 +193,8 @@ static enum_nested_loop_state end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); static enum_nested_loop_state end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); +enum_nested_loop_state +end_nest_materialization(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); static int join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos); static int join_read_system(JOIN_TAB *tab); @@ -304,6 +310,11 @@ void set_postjoin_aggr_write_func(JOIN_TAB *tab); static Item **get_sargable_cond(JOIN *join, TABLE *table); +void substitute_base_to_nest_items(JOIN *join); +void substitute_base_to_nest_items2(JOIN *join, Item **cond); +void check_cond_extraction_for_nest(THD *thd, Item *cond, + Pushdown_checker checker, uchar* arg); + #ifndef DBUG_OFF /* @@ -2463,6 +2474,10 @@ int JOIN::optimize_stage2() if (setup_semijoin_loosescan(this)) DBUG_RETURN(1); + if (setup_sort_nest(this)) + DBUG_RETURN(1); + substitute_base_to_nest_items(this); + if (make_join_select(this, select, conds)) { zero_result_cause= @@ -2474,22 +2489,9 @@ int JOIN::optimize_stage2() error= -1; /* if goto err */ /* Optimize distinct away if possible */ - { - ORDER *org_order= order; - order=remove_const(this, order,conds,1, &simple_order); - if (unlikely(thd->is_error())) - { - error= 1; - DBUG_RETURN(1); - } + if (remove_const_from_order_by()) + DBUG_RETURN(TRUE); - /* - If we are using ORDER BY NULL or ORDER BY const_expression, - return result in any order (even if we are using a GROUP BY) - */ - if (!order && org_order) - skip_sort_order= 1; - } /* Check if we can optimize away GROUP BY/DISTINCT. We can do that if there are no aggregate functions, the @@ -2710,7 +2712,9 @@ int JOIN::optimize_stage2() Yet the current implementation of FORCE INDEX hints does not allow us to do it in a clean manner. */ - no_jbuf_after= 1 ? table_count : make_join_orderinfo(this); + no_jbuf_after= 1 ? (sort_nest_info ? const_tables + sort_nest_info->n_tables + : table_count) + : make_join_orderinfo(this); // Don't use join buffering when we use MATCH select_opts_for_readinfo= @@ -3583,7 +3587,9 @@ bool JOIN::make_aggr_tables_info() curr_tab->type != JT_EQ_REF) // Don't sort 1 row { // Sort either first non-const table or the last tmp table - JOIN_TAB *sort_tab= curr_tab; + JOIN_TAB *sort_tab= sort_nest_info ? + sort_nest_info->nest_tab : + curr_tab; if (add_sorting_to_table(sort_tab, order_arg)) DBUG_RETURN(true); @@ -4421,7 +4427,7 @@ JOIN::destroy() WITH_CONST_TABLES); tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) { - if (tab->aggr) + if (tab->aggr || tab->is_sort_nest) { free_tmp_table(thd, tab->table); delete tab->tmp_table_param; @@ -4755,6 +4761,156 @@ static Item **get_sargable_cond(JOIN *join, TABLE *table) return retval; } +void substitute_base_to_nest_items(JOIN *join) +{ + if (!join->sort_nest_needed()) + return; + SORT_NEST_INFO *sort_nest_info= join->sort_nest_info; + REPLACE_NEST_FIELD_ARG arg= {join}; + + List_iterator<Item> it(join->fields_list); + Item *item, *new_item; + while ((item= it++)) + { + if ((new_item= item->transform(join->thd, + &Item::replace_with_nest_items, + (uchar *) &arg)) != item) + it.replace(new_item); + } + JOIN_TAB *end_tab= sort_nest_info->nest_tab; + uint i, j; + for (i= join->const_tables + sort_nest_info->n_tables, j=0; + i < join->top_join_tab_count; i++, j++) + { + JOIN_TAB *tab= end_tab + j; + if (tab->type == JT_REF || tab->type == JT_EQ_REF || + tab->type == JT_REF_OR_NULL) + { + for (uint keypart= 0; keypart < tab->ref.key_parts; keypart++) + { + item= tab->ref.items[keypart]->transform(join->thd, + &Item::replace_with_nest_items, + (uchar *) &arg); + if (item != tab->ref.items[keypart]) + { + tab->ref.items[keypart]= item; + Item *real_item= item->real_item(); + store_key *key_copy= tab->ref.key_copy[keypart]; + if (key_copy->type() == store_key::FIELD_STORE_KEY) + { + store_key_field *field_copy= ((store_key_field *)key_copy); + DBUG_ASSERT(real_item->type() == Item::FIELD_ITEM); + field_copy->change_source_field((Item_field *) real_item); + } + } + } + } + } + substitute_base_to_nest_items2(join, &join->conds); +} + +void substitute_base_to_nest_items2(JOIN *join, Item **cond) +{ + SORT_NEST_INFO *sort_nest_info= join->sort_nest_info; + Item *orig_cond= *cond; + if (!sort_nest_info) + return; + THD *thd= join->thd; + Item *extracted_cond; + SELECT_LEX* sl= join->select_lex; + + /* + check_cond_extraction_for_nest would set NO_EXTRACTION_FL for + all the items that cannot be added to the inner tables of the nest + */ + check_cond_extraction_for_nest(thd, orig_cond, + &Item::pushable_cond_checker_for_nest, + (uchar *)(&sort_nest_info->nest_tables_map)); + /* + build_cond_for_grouping_fields would create the entire + condition that would be added to the tables inside the nest. + This may clone some items too. + */ + extracted_cond= sl->build_cond_for_grouping_fields(thd, orig_cond, TRUE); + + if (extracted_cond) + { + if (extracted_cond->fix_fields_if_needed(thd, 0)) + return; + /* + Remove from the WHERE clause all the conditions that were added + to the inner tables of the sort nest + */ + orig_cond= remove_pushed_top_conjuncts(thd, orig_cond); + sort_nest_info->nest_cond= extracted_cond; + } + + REPLACE_NEST_FIELD_ARG arg= {join}; + if (orig_cond) + { + orig_cond= orig_cond->transform(join->thd, &Item::replace_with_nest_items, + (uchar *) &arg); + orig_cond->update_used_tables(); + } + *cond= orig_cond; +} + +/* + Add a transformer to this call so that we dont have both + check_cond_extraction_for_nest and check_cond_extraction_for_grouping_fields +*/ + +void +check_cond_extraction_for_nest(THD *thd, Item *cond, + Pushdown_checker checker, uchar* arg) +{ + if (cond->get_extraction_flag() == NO_EXTRACTION_FL) + return; + cond->clear_extraction_flag(); + if (cond->type() == Item::COND_ITEM) + { + Item_cond_and *and_cond= + (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) ? + ((Item_cond_and*) cond) : 0; + + List<Item> *arg_list= ((Item_cond*) cond)->argument_list(); + List_iterator<Item> li(*arg_list); + uint count= 0; // to count items not containing NO_EXTRACTION_FL + uint count_full= 0; // to count items with FULL_EXTRACTION_FL + Item *item; + while ((item=li++)) + { + check_cond_extraction_for_nest(thd, item, checker, arg); + if (item->get_extraction_flag() != NO_EXTRACTION_FL) + { + count++; + if (item->get_extraction_flag() == FULL_EXTRACTION_FL) + count_full++; + } + else if (!and_cond) + break; + } + if ((and_cond && count == 0) || item) + cond->set_extraction_flag(NO_EXTRACTION_FL); + if (count_full == arg_list->elements) + { + cond->set_extraction_flag(FULL_EXTRACTION_FL); + } + if (cond->get_extraction_flag() != 0) + { + li.rewind(); + while ((item=li++)) + item->clear_extraction_flag(); + } + } + else + { + int fl= (cond->*checker)(arg) ? + FULL_EXTRACTION_FL : NO_EXTRACTION_FL; + cond->set_extraction_flag(fl); + } +} + /** Calculate the best possible join and initialize the join structure. @@ -5210,6 +5366,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, join->sort_by_table= get_sort_by_table(join->order, join->group_list, join->select_lex->leaf_tables, join->const_table_map); + /* Update info on indexes that can be used for search lookups as reading const tables may has added new sargable predicates. @@ -5444,12 +5601,22 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list, if (pull_out_semijoin_tables(join)) DBUG_RETURN(TRUE); - join->join_tab=stat; join->top_join_tab_count= table_count; - join->map2table=stat_ref; - join->table= table_vector; join->const_tables=const_count; join->found_const_table_map=found_const_table_map; + /* + Here a call is made to remove the constant from the order by clause, + this call would only remove the basic constants. This is done for + the ORDER BY LIMIT optimization. + */ + if (join->remove_const_from_order_by()) + DBUG_RETURN(TRUE); + + (void)propagate_equal_field_for_orderby(join, join->order); + + join->join_tab=stat; + join->map2table=stat_ref; + join->table= table_vector; if (join->const_tables != join->table_count) optimize_keyuse(join, keyuse_array); @@ -7075,6 +7242,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) join->positions[idx].sj_strategy= SJ_OPT_NONE; join->positions[idx].use_join_buffer= FALSE; join->positions[idx].range_rowid_filter_info= 0; + join->positions[idx].sort_nest_operation_here= FALSE; /* Move the const table as down as possible in best_ref */ JOIN_TAB **pos=join->best_ref+idx+1; @@ -7922,6 +8090,7 @@ best_access_path(JOIN *join, pos->use_join_buffer= best_uses_jbuf; pos->spl_plan= spl_plan; pos->range_rowid_filter_info= best_filter; + pos->sort_nest_operation_here= FALSE; loose_scan_opt.save_to_position(s, loose_scan_pos); @@ -8135,6 +8304,7 @@ choose_plan(JOIN *join, table_map join_tables) use_cond_selectivity)) DBUG_RETURN(TRUE); } + trace_plan.end(); /* Store the cost of this query into a user variable @@ -8440,6 +8610,7 @@ optimize_straight_join(JOIN *join, table_map join_tables) pushdown_cond_selectivity= table_cond_selectivity(join, idx, s, join_tables); position->cond_selectivity= pushdown_cond_selectivity; + record_count= record_count*pushdown_cond_selectivity; ++idx; } @@ -8552,6 +8723,7 @@ greedy_search(JOIN *join, JOIN_TAB *best_table; // the next plan node to be added to the curr QEP // ==join->tables or # tables in the sj-mat nest we're optimizing uint n_tables __attribute__((unused)); + double cardinality= DBL_MAX; DBUG_ENTER("greedy_search"); /* number of tables that remain to be optimized */ @@ -8565,9 +8737,11 @@ greedy_search(JOIN *join, do { /* Find the extension of the current QEP with the lowest cost */ join->best_read= DBL_MAX; + table_map previous_tables= 0; if (best_extension_by_limited_search(join, remaining_tables, idx, record_count, read_time, search_depth, prune_level, - use_cond_selectivity)) + use_cond_selectivity, + previous_tables, FALSE, &cardinality)) DBUG_RETURN(TRUE); /* 'best_read < DBL_MAX' means that optimizer managed to find @@ -9166,6 +9340,19 @@ void trace_plan_prefix(JOIN *join, uint idx, table_map remaining_tables) } } +void trace_order_by_nest(JOIN *join, uint idx, table_map remaining_tables) +{ + THD *const thd= join->thd; + Json_writer_array plan_prefix(thd, "order_by_nest"); + for (uint i= 0; i < idx; i++) + { + TABLE *tr= join->positions[i].table->table; + if (tr->map & remaining_tables) + plan_prefix.add_table_name(join->positions[i].table); + } + +} + /** Find a good, possibly optimal, query execution plan (QEP) by a possibly exhaustive search. @@ -9293,7 +9480,10 @@ best_extension_by_limited_search(JOIN *join, double read_time, uint search_depth, uint prune_level, - uint use_cond_selectivity) + uint use_cond_selectivity, + table_map previous_tables, + bool nest_created, + double *cardinality) { DBUG_ENTER("best_extension_by_limited_search"); @@ -9319,7 +9509,16 @@ best_extension_by_limited_search(JOIN *join, JOIN_TAB *s; double best_record_count= DBL_MAX; double best_read_time= DBL_MAX; - bool disable_jbuf= join->thd->variables.join_cache_level == 0; + bool disable_jbuf= (join->thd->variables.join_cache_level == 0) || nest_created; + double fraction_output; + + if (nest_created) + { + fraction_output= join->select_limit < (*cardinality) ? + (join->select_limit/(*cardinality)) : 1.0; + } + else + fraction_output= 1.0; DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time, "part_plan");); @@ -9348,6 +9547,8 @@ best_extension_by_limited_search(JOIN *join, { trace_plan_prefix(join, idx, remaining_tables); trace_one_table.add_table_name(s); + if (nest_created) + trace_order_by_nest(join, idx, previous_tables); } /* Find the best access method from 's' to the current partial plan */ @@ -9361,10 +9562,12 @@ best_extension_by_limited_search(JOIN *join, ? position->range_rowid_filter_info->get_cmp_gain(current_record_count) : 0; current_read_time=COST_ADD(read_time, - COST_ADD(position->read_time - - filter_cmp_gain, - current_record_count / - (double) TIME_FOR_COMPARE)); + COST_MULT( + COST_ADD(position->read_time - + filter_cmp_gain, + current_record_count / + (double) TIME_FOR_COMPARE), fraction_output)); + current_record_count= COST_MULT(current_record_count, fraction_output); advance_sj_state(join, remaining_tables, idx, ¤t_record_count, ¤t_read_time, &loose_scan_pos); @@ -9427,9 +9630,12 @@ best_extension_by_limited_search(JOIN *join, double partial_join_cardinality= current_record_count * pushdown_cond_selectivity; if ( (search_depth > 1) && (remaining_tables & ~real_table_bit) & allowed_tables ) - { /* Recursively expand the current partial plan */ + { + /* Recursively expand the current partial plan */ swap_variables(JOIN_TAB*, join->best_ref[idx], *pos); Json_writer_array trace_rest(thd, "rest_of_plan"); + bool nest_allow= (join->cur_sj_inner_tables == 0 && + join->cur_embedding_map == 0); if (best_extension_by_limited_search(join, remaining_tables & ~real_table_bit, idx + 1, @@ -9437,24 +9643,58 @@ best_extension_by_limited_search(JOIN *join, current_read_time, search_depth - 1, prune_level, - use_cond_selectivity)) + use_cond_selectivity, + nest_created ? previous_tables : + previous_tables | real_table_bit, + nest_created, cardinality)) DBUG_RETURN(TRUE); + + if (!nest_created && !join->emb_sjm_nest && nest_allow && !join->need_order_nest() && + check_join_prefix_contains_ordering(join, s, previous_tables)) + { + join->positions[idx].sort_nest_operation_here= TRUE; + double cost= postjoin_oper_cost(thd, partial_join_cardinality, AVG_REC_LEN, idx); + current_read_time= COST_ADD(current_read_time, cost); + if (best_extension_by_limited_search(join, + remaining_tables & ~real_table_bit, + idx + 1, + partial_join_cardinality, + current_read_time, + search_depth - 1, + 0, + use_cond_selectivity, + previous_tables | real_table_bit, + TRUE, cardinality)) + DBUG_RETURN(TRUE); + join->positions[idx].sort_nest_operation_here= FALSE; + } swap_variables(JOIN_TAB*, join->best_ref[idx], *pos); } else - { /* + { + /* 'join' is either the best partial QEP with 'search_depth' relations, or the best complete QEP so far, whichever is smaller. */ if (join->sort_by_table && join->sort_by_table != - join->positions[join->const_tables].table->table) + join->positions[join->const_tables].table->table + && !nest_created) + { /* We may have to make a temp table, note that this is only a heuristic since we cannot know for sure at this point. Hence it may be wrong. */ - current_read_time= COST_ADD(current_read_time, current_record_count); + double cost= postjoin_oper_cost(thd, partial_join_cardinality, AVG_REC_LEN, idx); + current_read_time= COST_ADD(current_read_time, cost); + } + if (!nest_created) + { + *cardinality= partial_join_cardinality; + trace_one_table.add("cardinality", partial_join_cardinality); + } + trace_one_table.add("cost_of_plan", current_read_time); if (current_read_time < join->best_read) { memcpy((uchar*) join->best_positions, (uchar*) join->positions, @@ -10145,7 +10385,7 @@ bool JOIN::get_best_combination() JOIN_TAB_RANGE *root_range; if (!(root_range= new (thd->mem_root) JOIN_TAB_RANGE)) DBUG_RETURN(TRUE); - root_range->start= join_tab; + root_range->start= join_tab; /* root_range->end will be set later */ join_tab_ranges.empty(); @@ -10220,7 +10460,7 @@ bool JOIN::get_best_combination() j->type=JT_ALL; if (best_positions[tablenr].use_join_buffer && tablenr != const_tables) - full_join= 1; + full_join= 1; } /*if (best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN) @@ -10253,12 +10493,45 @@ bool JOIN::get_best_combination() sjm_nest_root= NULL; sjm_nest_end= NULL; } + if (cur_pos->sort_nest_operation_here) + { + /* + Ok, we've entered an ORDERING nest + 1. Put into main join order a JOIN_TAB that represents a scan + in the temptable. + */ + JOIN_TAB *prev= j; + if ((prev - (join_tab + const_tables) > 0)) + { + j= j+1; + bzero((void*)j, sizeof(JOIN_TAB)); + + j->join= this; + j->table= NULL; //temporary way to tell SJM tables from others. + j->ref.key = -1; + j->on_expr_ref= (Item**) &null_ptr; + j->is_sort_nest= TRUE; + uint tables= prev - (join_tab + const_tables)+1; + j->records_read= calculate_record_count_for_sort_nest(this, tables); + j->records= (ha_rows) j->records_read; + j->cond_selectivity= 1.0; + } + SORT_NEST_INFO *sort_nest_info; + if (!(sort_nest_info= new SORT_NEST_INFO())) + return TRUE; + sort_nest_info->n_tables= prev - (join_tab + const_tables)+1; + sort_nest_info->nest_tab= j; + this->sort_nest_info= sort_nest_info; + DBUG_ASSERT(sort_nest_info->n_tables != 0); + } } root_range->end= j; used_tables= OUTER_REF_TABLE_BIT; // Outer row is already read for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++) { + if (j->is_sort_nest) + j++; if (j->bush_children) j= j->bush_children->start; @@ -10952,6 +11225,8 @@ make_outerjoin_info(JOIN *join) tab; tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS)) { + if (tab->is_sort_nest) + continue; TABLE *table= tab->table; TABLE_LIST *tbl= table->pos_in_table_list; TABLE_LIST *embedding= tbl->embedding; @@ -11127,10 +11402,20 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond) JOIN_TAB *tab; table_map current_map; i= join->const_tables; + Item *saved_cond= cond; + SORT_NEST_INFO *sort_nest_info= join->sort_nest_info; + if (join->sort_nest_needed()) + cond= sort_nest_info->nest_cond; + for (tab= first_depth_first_tab(join); tab; tab= next_depth_first_tab(join, tab)) { bool is_hj; + if (tab->is_sort_nest) + { + cond= saved_cond; + continue; + } /* first_inner is the X in queries like: @@ -12136,6 +12421,35 @@ end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) } +enum_nested_loop_state +end_nest_materialization(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) +{ + int error; + THD *thd= join->thd; + SORT_NEST_INFO *nest_info= join->sort_nest_info; + DBUG_ENTER("end_sj_materialize"); + if (!end_of_records) + { + TABLE *table= nest_info->table; + fill_record(thd, table, table->field, + nest_info->nest_base_table_cols, TRUE, FALSE); + + if (unlikely(thd->is_error())) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + if (unlikely((error= table->file->ha_write_tmp_row(table->record[0])))) + { + /* create_myisam_from_heap will generate error if needed */ + if (table->file->is_fatal_error(error, HA_CHECK_DUP) && + create_internal_tmp_table_from_heap(thd, table, + nest_info->tmp_table_param.start_recinfo, + &nest_info->tmp_table_param.recinfo, + error, 1, NULL)) + DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */ + } + } + DBUG_RETURN(NESTED_LOOP_OK); +} + /* Check whether a join buffer can be used to join the specified table @@ -12367,7 +12681,7 @@ uint check_join_cache_usage(JOIN_TAB *tab, Don't use join buffering if we're dictated not to by no_jbuf_after (This is not meaningfully used currently) */ - if (table_index > no_jbuf_after) + if (table_index+1 > no_jbuf_after) goto no_join_cache; /* @@ -12559,7 +12873,7 @@ void check_join_cache_usage_for_tables(JOIN *join, ulonglong options, */ prev_tab= tab - 1; if (tab == join->join_tab + join->const_tables || - (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab)) + (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab) || tab->is_sort_nest) prev_tab= NULL; switch (tab->type) { @@ -12588,7 +12902,7 @@ void check_join_cache_usage_for_tables(JOIN *join, ulonglong options, default: tab->used_join_cache_level= 0; } - if (!tab->bush_children) + if (!tab->bush_children && !tab->is_sort_nest) idx++; } } @@ -12731,17 +13045,20 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) Later it should be improved. */ - if (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab) + if ((tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab) || + tab->is_sort_nest) prev_tab= NULL; - DBUG_ASSERT(tab->bush_children || tab->table == join->best_positions[i].table->table); + DBUG_ASSERT(tab->bush_children || tab->table == join->best_positions[i].table->table + || tab->is_sort_nest); tab->partial_join_cardinality= join->best_positions[i].records_read * (prev_tab? prev_tab->partial_join_cardinality : 1); - if (!tab->bush_children) + if (!tab->bush_children && !tab->is_sort_nest) i++; } check_join_cache_usage_for_tables(join, options, no_jbuf_after); + SORT_NEST_INFO *sort_nest_info= join->sort_nest_info; JOIN_TAB *first_tab; for (tab= first_tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); @@ -12768,7 +13085,8 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) end_sj_materialize. */ if (!(tab->bush_root_tab && - tab->bush_root_tab->bush_children->end == tab + 1)) + tab->bush_root_tab->bush_children->end == tab + 1) && + !(sort_nest_info && tab+1 == sort_nest_info->nest_tab)) { tab->next_select=sub_select; /* normal select */ } @@ -12958,7 +13276,7 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after) It could be that sort_by_tab==NULL, and the plan is to use filesort() on the first table. */ - if (join->order) + if (join->order && !join->sort_nest_info) { join->simple_order= 0; join->need_tmp= 1; @@ -13711,6 +14029,8 @@ static void update_depend_map(JOIN *join) join_tab; join_tab= next_linear_tab(join, join_tab, WITH_BUSH_ROOTS)) { + if (join_tab->is_sort_nest) + continue; TABLE_REF *ref= &join_tab->ref; table_map depend_map=0; Item **item=ref->items; @@ -14001,6 +14321,22 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond, } +bool JOIN::remove_const_from_order_by() +{ + ORDER *org_order= order; + order=remove_const(this, order,conds,1, &simple_order); + if (unlikely(thd->is_error())) + return TRUE; + /* + If we are using ORDER BY NULL or ORDER BY const_expression, + return result in any order (even if we are using a GROUP BY) + */ + if (!order && org_order) + skip_sort_order= 1; + return FALSE; +} + + /** Filter out ORDER items those are equal to constants in WHERE @@ -14041,6 +14377,198 @@ ORDER *simple_remove_const(ORDER *order, COND *where) } +/* + This function basically tries to propgate all the multiple equalites + for the order by items, so that one can use them to generate QEP that would + also take into consideration equality propagation. + Example + select * from t1,t2 where t1.a=t2.a order by t1.a + + So the possible join orders would be: + + t1 join t2 then sort + t2 join t1 then sort + t1 sort(t1) join t2 + t2 sort(t2) join t1 => this is only possible when equality propagation is + performed +*/ +void propagate_equal_field_for_orderby(JOIN *join, ORDER *first_order) +{ + ORDER *order; + for (order= first_order; order; order= order->next) + { + if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) && + join->cond_equal) + { + Item *item= order->item[0]; + /* + TODO: equality substitution in the context of ORDER BY is + sometimes allowed when it is not allowed in the general case. + We make the below call for its side effect: it will locate the + multiple equality the item belongs to and set item->item_equal + accordingly. + */ + (void)item->propagate_equal_fields(join->thd, + Value_source:: + Context_identity(), + join->cond_equal); + } + } +} + +/* + This function checks if by considering the current join_tab + would we be able to achieve the ordering +*/ + +bool check_join_prefix_contains_ordering(JOIN *join, JOIN_TAB *tab, + table_map previous_tables) +{ + ORDER *order; + for (order= join->order; order; order= order->next) + { + Item *order_item= order->item[0]; + table_map order_tables=order_item->used_tables(); + if (!(order_tables & ~previous_tables) || + (order_item->excl_dep_on_table(previous_tables | tab->table->map))) + continue; + else + return FALSE; + } + return TRUE; +} + + +bool setup_sort_nest(JOIN *join) +{ + if (!join->sort_nest_needed()) + return FALSE; + + /* + The sort nest is only needed when there are more than one table + in the sort nest, else we can just sort with the first table if the + sort nest has only one table + */ + SORT_NEST_INFO* sort_nest_info= join->sort_nest_info; + THD *thd= join->thd; + Field_iterator_table field_iterator; + + JOIN_TAB *start_tab= join->join_tab+join->const_tables, *j, *tab; + tab= sort_nest_info->nest_tab; + sort_nest_info->nest_tables_map= 0; + + if (unlikely(thd->trace_started())) + add_sort_nest_tables_to_trace(join); + + /* This needs to be added to JOIN structure, looks the best option or we + can have a seperate struture NEST_INFO to hold it. + Final Implementation here should just walk over the where clause and collect + the field for which we should have a temp table field + */ + + for (j= start_tab; j < tab; j++) + { + TABLE *table= j->table; + field_iterator.set_table(table); + sort_nest_info->nest_tables_map|= table->map; + for (; !field_iterator.end_of_fields(); field_iterator.next()) + { + Field *field= field_iterator.field(); + if (!bitmap_is_set(table->read_set, field->field_index)) + continue; + Item *item; + if (!(item= field_iterator.create_item(thd))) + return TRUE; + sort_nest_info->nest_base_table_cols.push_back(item, thd->mem_root); + } + } + + uint non_order_fields= sort_nest_info->nest_base_table_cols.elements; + ORDER *order= join->order; + + /* + Order by items need to be in the temp table ,we can avoid the Field items in + the order by list but we need to fields inside the temp table for expressions + */ + for (order= join->order; order; order=order->next) + { + Item *item= order->item[0]; + Item *res= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB, item, + join->cond_equal, + join->map2table, true); + res->update_used_tables(); + sort_nest_info->nest_base_table_cols.push_back(res, thd->mem_root); + } + + DBUG_ASSERT(!tab->table); + + sort_nest_info->tmp_table_param.init(); + sort_nest_info->tmp_table_param.bit_fields_as_long= TRUE; + sort_nest_info->tmp_table_param.field_count= sort_nest_info->nest_base_table_cols.elements; + sort_nest_info->tmp_table_param.force_not_null_cols= FALSE; + + const LEX_CSTRING order_nest_name= { STRING_WITH_LEN("order-nest") }; + if (!(tab->table= create_tmp_table(thd, &sort_nest_info->tmp_table_param, + sort_nest_info->nest_base_table_cols, (ORDER*) 0, + FALSE /* distinct */, + 0, /*save_sum_fields*/ + thd->variables.option_bits | TMP_TABLE_ALL_COLUMNS, + HA_POS_ERROR /*rows_limit */, + &order_nest_name))) + return TRUE; /* purecov: inspected */ + + tab->table->map= sort_nest_info->nest_tables_map; + sort_nest_info->table= tab->table; + tab->type= JT_ALL; + + /* + The list of temp table items created here, these are needed for the substitution + for items that would be evaluated in POST SORT NEST context + */ + field_iterator.set_table(tab->table); + for (; !field_iterator.end_of_fields(); field_iterator.next()) + { + Field *field= field_iterator.field(); + Item *item; + if (!(item= new (thd->mem_root)Item_temptable_field(thd, field))) + return TRUE; + sort_nest_info->nest_temp_table_cols.push_back(item, thd->mem_root); + } + + /* + Here we substitute order by items with the items of the temp table + */ + List_iterator_fast<Item> it(sort_nest_info->nest_temp_table_cols); + Item *item; + order= join->order; + uint i=0; + while ((item= it++)) + { + if (i++ < non_order_fields) + continue; + order->item[0]= item; + order= order->next; + } + tab->table->reginfo.join_tab= tab; + + /* + Create mapping between base table to temp table + Need a key-value structure + would like to have base_table_field ----> temp_table_item mapping + We can use a hash-set that we already have in the file sql-hset.h + */ + + /* + Setting up the scan on the temp table + */ + tab->read_first_record= join_init_read_record; + tab->read_record.read_record_func= rr_sequential; + tab[-1].next_select= end_nest_materialization; + sort_nest_info->materialized= FALSE; + + return FALSE; +} + static int return_zero_rows(JOIN *join, select_result *result, List<TABLE_LIST> &tables, List<Item> &fields, bool send_row, ulonglong select_options, @@ -19721,6 +20249,10 @@ do_select(JOIN *join, Procedure *procedure) JOIN_TAB *join_tab= join->join_tab + (join->tables_list ? join->const_tables : 0); + SORT_NEST_INFO *sort_nest_info= join->sort_nest_info; + join_tab= sort_nest_info ? sort_nest_info->nest_tab + : join_tab; + if (join->outer_ref_cond && !join->outer_ref_cond->val_int()) error= NESTED_LOOP_NO_MORE_ROWS; else @@ -25951,6 +26483,13 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta, ctab->emb_sj_nest->sj_subq_pred->get_identifier()); eta->table_name.copy(table_name_buffer, len, cs); } + else if (is_sort_nest) + { + size_t len= my_snprintf(table_name_buffer, + sizeof(table_name_buffer)-1, + "<order-nest>"); + eta->table_name.copy(table_name_buffer, len, cs); + } else { TABLE_LIST *real_table= table->pos_in_table_list; @@ -28550,6 +29089,39 @@ select_handler *SELECT_LEX::find_select_handler(THD *thd) return 0; } +double postjoin_oper_cost(THD *thd, double join_record_count, uint rec_len, uint idx) +{ + double cost= 0; + /* + For only one table in the order_nest, we don't need a fill the temp table, we can + just read the data into the filesort buffer and read the sorted data from the buffers. + */ + if (idx) + cost= get_tmp_table_write_cost(thd, join_record_count,rec_len) * + join_record_count; // cost to fill tmp table + + cost+= get_tmp_table_lookup_cost(thd, join_record_count,rec_len) * + join_record_count; // cost to perform post join operation used here + cost+= get_tmp_table_lookup_cost(thd, join_record_count, rec_len) + + (join_record_count == 0 ? 0 : + join_record_count * log2 (join_record_count)) * + SORT_INDEX_CMP_COST; // cost to perform sorting + return cost; +} + + +double calculate_record_count_for_sort_nest(JOIN *join, uint n_tables) +{ + double sort_nest_records=1, record_count; + JOIN_TAB *tab= join->join_tab + join->const_tables; + for (uint j= 0; j < n_tables ;j++, tab++) + { + record_count= tab->records_read * tab->cond_selectivity; + sort_nest_records= COST_MULT(sort_nest_records, record_count); + } + return sort_nest_records; +} + /** @} (end of group Query_Optimizer) diff --git a/sql/sql_select.h b/sql/sql_select.h index 40a9ed303f7..e0a9886b984 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -289,13 +289,14 @@ typedef struct st_join_table { /* TRUE <=> This join_tab is inside an SJM bush and is the last leaf tab here */ bool last_leaf_in_bush; - + /* ptr - this is a bush, and ptr points to description of child join_tab range NULL - this join tab has no bush children */ JOIN_TAB_RANGE *bush_children; + JOIN_TAB_RANGE *order_nest_children; /* Special content for EXPLAIN 'Extra' column or NULL if none */ enum explain_extra_tag info; @@ -524,6 +525,12 @@ typedef struct st_join_table { /* Becomes true just after the used range filter has been built / filled */ bool is_rowid_filter_built; + /* + Set to true if we consider creating a nest for a prefix of the JOIN order + that satisfies the ordering + */ + bool is_sort_nest; + void build_range_rowid_filter_if_needed(); void cleanup(); @@ -991,6 +998,9 @@ typedef struct st_position /* Cost info for the range filter used at this position */ Range_rowid_filter_cost_info *range_rowid_filter_info; + /* Flag to be set to TRUE if the join prefix satisfies the ORDER BY CLAUSE */ + bool sort_nest_operation_here; + } POSITION; typedef Bounds_checked_array<Item_null_result*> Item_null_array; @@ -1506,6 +1516,7 @@ class JOIN :public Sql_alloc the optimize_cond() call in JOIN::optimize_inner() method. */ bool is_orig_degenerated; + SORT_NEST_INFO *sort_nest_info; JOIN(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg, select_result *result_arg) @@ -1602,6 +1613,7 @@ class JOIN :public Sql_alloc sjm_lookup_tables= 0; sjm_scan_tables= 0; is_orig_degenerated= false; + sort_nest_info= NULL; } /* True if the plan guarantees that it will be returned zero or one row */ @@ -1609,6 +1621,11 @@ class JOIN :public Sql_alloc /* Number of tables actually joined at the top level */ uint exec_join_tab_cnt() { return tables_list ? top_join_tab_count : 0; } + /* TRUE if the sort-nest contains more than one table else FALSE */ + bool sort_nest_needed() { return sort_nest_info ? + (sort_nest_info->n_tables == 1 ? FALSE : TRUE): + FALSE; } + /* Number of tables in the join which also includes the temporary tables created for GROUP BY, DISTINCT , WINDOW FUNCTION etc. @@ -1721,7 +1738,8 @@ class JOIN :public Sql_alloc JOIN_TAB *get_sort_by_join_tab() { return (need_tmp || !sort_by_table || skip_sort_order || - ((group || tmp_table_param.sum_func_count) && !group_list)) ? + ((group || tmp_table_param.sum_func_count) && !group_list) || + sort_nest_info) ? NULL : join_tab+const_tables; } bool setup_subquery_caches(); @@ -1748,11 +1766,20 @@ class JOIN :public Sql_alloc bool test_if_need_tmp_table() { return ((const_tables != table_count && - ((select_distinct || !simple_order || !simple_group) || + ((select_distinct || (!simple_order && !sort_nest_info) || !simple_group) || (group_list && order) || MY_TEST(select_options & OPTION_BUFFER_RESULT))) || (rollup.state != ROLLUP::STATE_NONE && select_distinct)); } + + bool need_order_nest() + { + return ((const_tables != table_count && + ((select_distinct || group_list) || + MY_TEST(select_options & OPTION_BUFFER_RESULT))) || + (rollup.state != ROLLUP::STATE_NONE && select_distinct) || + select_lex->window_specs.elements > 0 || select_lex->agg_func_used()); + } bool choose_subquery_plan(table_map join_tables); void get_partial_cost_and_fanout(int end_tab_idx, table_map filter_map, @@ -1783,6 +1810,7 @@ class JOIN :public Sql_alloc bool fix_all_splittings_in_plan(); bool transform_in_predicates_into_in_subq(THD *thd); + bool remove_const_from_order_by(); private: /** Create a temporary table to be used for processing DISTINCT/ORDER @@ -1853,6 +1881,7 @@ bool is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args); bool simple_pred(Item_func *func_item, Item **args, bool *inv_order); int opt_sum_query(THD* thd, List<TABLE_LIST> &tables, List<Item> &all_fields, COND *conds); +double calculate_record_count_for_sort_nest(JOIN *join, uint n_tables); /* from sql_delete.cc, used by opt_range.cc */ extern "C" int refpos_order_cmp(void* arg, const void *a,const void *b); @@ -2098,6 +2127,10 @@ bool mysql_select(THD *thd, void free_underlaid_joins(THD *thd, SELECT_LEX *select); bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result); +void propagate_equal_field_for_orderby(JOIN *join, ORDER *first_order); +bool check_join_prefix_contains_ordering(JOIN *join, JOIN_TAB *tab, + table_map previous_tables); +bool setup_sort_nest(JOIN *join); /* General routine to change field->ptr of a NULL-terminated array of Field @@ -2449,6 +2482,7 @@ double get_tmp_table_write_cost(THD *thd, double row_count, uint row_size); void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array); bool sort_and_filter_keyuse(THD *thd, DYNAMIC_ARRAY *keyuse, bool skip_unprefixed_keyparts); +double postjoin_oper_cost(THD *thd, double join_record_count, uint rec_len, uint idx); struct st_cond_statistic {