#At lp:maria based on revid:knielsen@knielsen-hq.org-20090602110359-n4q9gof38buucrny 2708 Sergey Petrunia 2009-06-30 MWL#17: Table elimination - RC0 code added: mysql-test/r/table_elim.result mysql-test/t/table_elim.test sql-bench/test-table-elimination.sh sql/opt_table_elimination.cc modified: libmysqld/Makefile.am mysql-test/r/ps_11bugs.result mysql-test/r/select.result mysql-test/r/subselect.result mysql-test/r/union.result sql/CMakeLists.txt sql/Makefile.am sql/item.cc sql/item.h sql/item_subselect.cc sql/item_subselect.h sql/item_sum.cc sql/item_sum.h sql/sql_lex.cc sql/sql_lex.h sql/sql_select.cc sql/sql_select.h sql/table.h per-file messages: libmysqld/Makefile.am MWL#17: Table elimination - add opt_table_elimination.cc mysql-test/r/ps_11bugs.result MWL#17: Table elimination - Update test results (the difference is because we now recoginze Item_ref(const_item) as const mysql-test/r/select.result MWL#17: Table elimination - Update test results mysql-test/r/subselect.result MWL#17: Table elimination - Update test results (the difference is because we now recoginze Item_ref(const_item) as const mysql-test/r/table_elim.result MWL#17: Table elimination - Testcases mysql-test/r/union.result MWL#17: Table elimination - Update test results (the difference is because we now recoginze Item_ref(const_item) as const mysql-test/t/table_elim.test MWL#17: Table elimination - Testcases sql-bench/test-table-elimination.sh MWL#17: Table elimination - Benchmark which compares table elimination queries with no-table-elimination queries sql/CMakeLists.txt MWL#17: Table elimination - add opt_table_elimination.cc sql/Makefile.am MWL#17: Table elimination - add opt_table_elimination.cc sql/item.cc MWL#17: Table elimination - Add Item_field::check_column_usage_processor sql/item.h MWL#17: Table elimination - Add check_column_usage_processor() sql/item_subselect.cc MWL#17: Table elimination - Make Item_subselect to = be able to tell which particular items are referred from inside the select = to tell whether it was eliminated sql/item_subselect.h MWL#17: Table elimination - Make Item_subselect to = be able to tell which particular items are referred from inside the select = to tell whether it was eliminated sql/item_sum.cc MWL#17: Table elimination - Fix Item_sum_sum::used_tables() to report tables whose columns it really needs sql/item_sum.h MWL#17: Table elimination - Fix Item_sum_sum::used_tables() to report tables whose columns it really needs sql/opt_table_elimination.cc MWL#17: Table elimination - Table elimination Module sql/sql_lex.cc MWL#17: Table elimination - Collect Item_subselect::refers_to attribute sql/sql_lex.h MWL#17: Table elimination - Collect Item_subselect::refers_to attribute sql/sql_select.cc MWL#17: Table elimination - Make KEYUSE array code to also collect/process "binding" equalities in form t.keyXpartY= func(t.keyXpartZ,...) - Call table elimination function - Make EXPLAIN not to show eliminated tables/selects - Added more comments - Move definitions of FT_KEYPART, KEY_OPTIMIZE_* into sql_select.h as they are now used in opt_table_elimination.cc sql/sql_select.h MWL#17: Table elimination - Make KEYUSE array code to also collect/process "binding" equalities in form t.keyXpartY= func(t.keyXpartZ,...) - Call table elimination function - Make EXPLAIN not to show eliminated tables/selects - Added more comments - Move definitions of FT_KEYPART, KEY_OPTIMIZE_* into sql_select.h as they are now used in opt_table_elimination.cc sql/table.h MWL#17: Table elimination - More comments - Add NESTED_JOIN::n_tables === modified file 'libmysqld/Makefile.am' --- a/libmysqld/Makefile.am 2009-03-12 22:27:35 +0000 +++ b/libmysqld/Makefile.am 2009-06-30 15:09:36 +0000 @@ -76,7 +76,7 @@ sqlsources = derror.cc field.cc field_co rpl_filter.cc sql_partition.cc sql_builtin.cc sql_plugin.cc \ sql_tablespace.cc \ rpl_injector.cc my_user.c partition_info.cc \ - sql_servers.cc event_parse_data.cc + sql_servers.cc event_parse_data.cc opt_table_elimination.cc libmysqld_int_a_SOURCES= $(libmysqld_sources) nodist_libmysqld_int_a_SOURCES= $(libmysqlsources) $(sqlsources) === modified file 'mysql-test/r/ps_11bugs.result' --- a/mysql-test/r/ps_11bugs.result 2008-10-08 11:23:53 +0000 +++ b/mysql-test/r/ps_11bugs.result 2009-06-30 15:09:36 +0000 @@ -121,8 +121,8 @@ insert into t1 values (1); explain select * from t1 where 3 in (select (1+1) union select 1); id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables -2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used -3 DEPENDENT UNION NULL NULL NULL NULL NULL NULL NULL No tables used +2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL Impossible HAVING +3 DEPENDENT UNION NULL NULL NULL NULL NULL NULL NULL Impossible HAVING NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL select * from t1 where 3 in (select (1+1) union select 1); a === modified file 'mysql-test/r/select.result' --- a/mysql-test/r/select.result 2009-03-16 05:02:10 +0000 +++ b/mysql-test/r/select.result 2009-06-30 15:09:36 +0000 @@ -3585,7 +3585,6 @@ INSERT INTO t2 VALUES (1,'a'),(2,'b'),(3 EXPLAIN SELECT t1.a FROM t1 LEFT JOIN t2 ON t2.b=t1.b WHERE t1.a=3; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 const PRIMARY PRIMARY 4 const 1 -1 SIMPLE t2 const b b 22 const 1 Using index DROP TABLE t1,t2; CREATE TABLE t1(id int PRIMARY KEY, b int, e int); CREATE TABLE t2(i int, a int, INDEX si(i), INDEX ai(a)); === modified file 'mysql-test/r/subselect.result' --- a/mysql-test/r/subselect.result 2009-04-25 09:04:38 +0000 +++ b/mysql-test/r/subselect.result 2009-06-30 15:09:36 +0000 @@ -4353,13 +4353,13 @@ id select_type table type possible_keys 1 PRIMARY t1 ALL NULL NULL NULL NULL 2 100.00 2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 2 100.00 Using temporary; Using filesort Warnings: -Note 1003 select 1 AS `1` from `test`.`t1` where <in_optimizer>(1,<exists>(select 1 AS `1` from `test`.`t1` group by `test`.`t1`.`a` having (<cache>(1) = <ref_null_helper>(1)))) +Note 1003 select 1 AS `1` from `test`.`t1` where <in_optimizer>(1,<exists>(select 1 AS `1` from `test`.`t1` group by `test`.`t1`.`a` having 1)) EXPLAIN EXTENDED SELECT 1 FROM t1 WHERE 1 IN (SELECT 1 FROM t1 WHERE a > 3 GROUP BY a); id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables 2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 2 100.00 Using where; Using temporary; Using filesort Warnings: -Note 1003 select 1 AS `1` from `test`.`t1` where <in_optimizer>(1,<exists>(select 1 AS `1` from `test`.`t1` where (`test`.`t1`.`a` > 3) group by `test`.`t1`.`a` having (<cache>(1) = <ref_null_helper>(1)))) +Note 1003 select 1 AS `1` from `test`.`t1` where <in_optimizer>(1,<exists>(select 1 AS `1` from `test`.`t1` where (`test`.`t1`.`a` > 3) group by `test`.`t1`.`a` having 1)) DROP TABLE t1; End of 5.0 tests. CREATE TABLE t1 (a INT, b INT); === added file 'mysql-test/r/table_elim.result' --- a/mysql-test/r/table_elim.result 1970-01-01 00:00:00 +0000 +++ b/mysql-test/r/table_elim.result 2009-06-30 15:09:36 +0000 @@ -0,0 +1,204 @@ +drop table if exists t0, t1, t2, t3; +drop view if exists v1, v2; +create table t1 (a int); +insert into t1 values (0),(1),(2),(3); +create table t0 as select * from t1; +create table t2 (a int primary key, b int) +as select a, a as b from t1 where a in (1,2); +create table t3 (a int primary key, b int) +as select a, a as b from t1 where a in (1,3); +# This will be eliminated: +explain select t1.a from t1 left join t2 on t2.a=t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +explain extended select t1.a from t1 left join t2 on t2.a=t1.a; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 100.00 +Warnings: +Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where 1 +select t1.a from t1 left join t2 on t2.a=t1.a; +a +0 +1 +2 +3 +# This will not be eliminated as t2.b is in in select list: +explain select * from t1 left join t2 on t2.a=t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1 +# This will not be eliminated as t2.b is in in order list: +explain select t1.a from t1 left join t2 on t2.a=t1.a order by t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using temporary; Using filesort +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1 +# This will not be eliminated as t2.b is in group list: +explain select t1.a from t1 left join t2 on t2.a=t1.a group by t2.b; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using temporary; Using filesort +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1 +# This will not be eliminated as t2.b is in the WHERE +explain select t1.a from t1 left join t2 on t2.a=t1.a where t2.b < 3 or t2.b is null; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1 Using where +# Elimination of multiple tables: +explain select t1.a from t1 left join (t2 join t3) on t2.a=t1.a and t3.a=t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +# Elimination of multiple tables (2): +explain select t1.a from t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and t3.a=t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +# Elimination when done within an outer join nest: +explain extended +select t0.* +from +t0 left join (t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and +t3.a=t1.a) on t0.a=t1.a; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t0 ALL NULL NULL NULL NULL 4 100.00 +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 100.00 +Warnings: +Note 1003 select `test`.`t0`.`a` AS `a` from `test`.`t0` left join (`test`.`t1`) on((`test`.`t0`.`a` = `test`.`t1`.`a`)) where 1 +# Elimination with aggregate functions +explain select count(*) from t1 left join t2 on t2.a=t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +explain select count(1) from t1 left join t2 on t2.a=t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +explain select count(1) from t1 left join t2 on t2.a=t1.a group by t1.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using temporary; Using filesort +This must not use elimination: +explain select count(1) from t1 left join t2 on t2.a=t1.a group by t2.a; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using temporary; Using filesort +1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1 Using index +drop table t0, t1, t2, t3; +create table t0 ( id integer, primary key (id)); +create table t1 ( +id integer, +attr1 integer, +primary key (id), +key (attr1) +); +create table t2 ( +id integer, +attr2 integer, +fromdate date, +primary key (id, fromdate), +key (attr2,fromdate) +); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +insert into t0 select A.id + 10*B.id from t0 A, t0 B where B.id > 0; +insert into t1 select id, id from t0; +insert into t2 select id, id, date_add('2009-06-22', interval id day) from t0; +insert into t2 select id, id+1, date_add('2008-06-22', interval id day) from t0; +create view v1 as +select +F.id, A1.attr1, A2.attr2 +from +t0 F +left join t1 A1 on A1.id=F.id +left join t2 A2 on A2.id=F.id and +A2.fromdate=(select MAX(fromdate) from +t2 where id=A2.id); +create view v2 as +select +F.id, A1.attr1, A2.attr2 +from +t0 F +left join t1 A1 on A1.id=F.id +left join t2 A2 on A2.id=F.id and +A2.fromdate=(select MAX(fromdate) from +t2 where id=F.id); +This should use one table: +explain select id from v1 where id=2; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY F const PRIMARY PRIMARY 4 const 1 Using index +This should use one table: +explain extended select id from v1 where id in (1,2,3,4); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY F range PRIMARY PRIMARY 4 NULL 4 100.00 Using where; Using index +Warnings: +Note 1276 Field or reference 'test.A2.id' of SELECT #3 was resolved in SELECT #1 +Note 1003 select `F`.`id` AS `id` from `test`.`t0` `F` where (`F`.`id` in (1,2,3,4)) +This should use facts and A1 tables: +explain extended select id from v1 where attr1 between 12 and 14; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY A1 range PRIMARY,attr1 attr1 5 NULL 2 100.00 Using where +1 PRIMARY F eq_ref PRIMARY PRIMARY 4 test.A1.id 1 100.00 Using index +Warnings: +Note 1276 Field or reference 'test.A2.id' of SELECT #3 was resolved in SELECT #1 +Note 1003 select `F`.`id` AS `id` from `test`.`t0` `F` join `test`.`t1` `A1` where ((`F`.`id` = `A1`.`id`) and (`A1`.`attr1` between 12 and 14)) +This should use facts, A2 and its subquery: +explain extended select id from v1 where attr2 between 12 and 14; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY A2 range PRIMARY,attr2 attr2 5 NULL 5 100.00 Using where +1 PRIMARY F eq_ref PRIMARY PRIMARY 4 test.A2.id 1 100.00 Using index +3 DEPENDENT SUBQUERY t2 ref PRIMARY PRIMARY 4 test.A2.id 2 100.00 Using index +Warnings: +Note 1276 Field or reference 'test.A2.id' of SELECT #3 was resolved in SELECT #1 +Note 1003 select `F`.`id` AS `id` from `test`.`t0` `F` join `test`.`t2` `A2` where ((`F`.`id` = `A2`.`id`) and (`A2`.`attr2` between 12 and 14) and (`A2`.`fromdate` = (select max(`test`.`t2`.`fromdate`) AS `MAX(fromdate)` from `test`.`t2` where (`test`.`t2`.`id` = `A2`.`id`)))) +This should use one table: +explain select id from v2 where id=2; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY F const PRIMARY PRIMARY 4 const 1 Using index +This should use one table: +explain extended select id from v2 where id in (1,2,3,4); +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY F range PRIMARY PRIMARY 4 NULL 4 100.00 Using where; Using index +Warnings: +Note 1276 Field or reference 'test.F.id' of SELECT #3 was resolved in SELECT #1 +Note 1003 select `F`.`id` AS `id` from `test`.`t0` `F` where (`F`.`id` in (1,2,3,4)) +This should use facts and A1 tables: +explain extended select id from v2 where attr1 between 12 and 14; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY A1 range PRIMARY,attr1 attr1 5 NULL 2 100.00 Using where +1 PRIMARY F eq_ref PRIMARY PRIMARY 4 test.A1.id 1 100.00 Using index +Warnings: +Note 1276 Field or reference 'test.F.id' of SELECT #3 was resolved in SELECT #1 +Note 1003 select `F`.`id` AS `id` from `test`.`t0` `F` join `test`.`t1` `A1` where ((`F`.`id` = `A1`.`id`) and (`A1`.`attr1` between 12 and 14)) +This should use facts, A2 and its subquery: +explain extended select id from v2 where attr2 between 12 and 14; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 PRIMARY A2 range PRIMARY,attr2 attr2 5 NULL 5 100.00 Using where +1 PRIMARY F eq_ref PRIMARY PRIMARY 4 test.A2.id 1 100.00 Using where; Using index +3 DEPENDENT SUBQUERY t2 ref PRIMARY PRIMARY 4 test.F.id 2 100.00 Using index +Warnings: +Note 1276 Field or reference 'test.F.id' of SELECT #3 was resolved in SELECT #1 +Note 1003 select `F`.`id` AS `id` from `test`.`t0` `F` join `test`.`t2` `A2` where ((`F`.`id` = `A2`.`id`) and (`A2`.`attr2` between 12 and 14) and (`A2`.`fromdate` = (select max(`test`.`t2`.`fromdate`) AS `MAX(fromdate)` from `test`.`t2` where (`test`.`t2`.`id` = `F`.`id`)))) +drop view v1, v2; +drop table t0, t1, t2; +create table t1 (a int); +insert into t1 values (0),(1),(2),(3); +create table t2 (pk1 int, pk2 int, pk3 int, col int, primary key(pk1, pk2, pk3)); +insert into t2 select a,a,a,a from t1; +This must use only t1: +explain select t1.* from t1 left join t2 on t2.pk1=t1.a and +t2.pk2=t2.pk1+1 and +t2.pk3=t2.pk2+1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +This must use only t1: +explain select t1.* from t1 left join t2 on t2.pk1=t1.a and +t2.pk3=t2.pk1+1 and +t2.pk2=t2.pk3+1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +This must use both: +explain select t1.* from t1 left join t2 on t2.pk1=t1.a and +t2.pk3=t2.pk1+1 and +t2.pk2=t2.pk3+t2.col; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +1 SIMPLE t2 ref PRIMARY PRIMARY 4 test.t1.a 1 +This must use only t1: +explain select t1.* from t1 left join t2 on t2.pk2=t1.a and +t2.pk1=t2.pk2+1 and +t2.pk3=t2.pk1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 +drop table t1, t2; === modified file 'mysql-test/r/union.result' --- a/mysql-test/r/union.result 2009-03-19 10:18:52 +0000 +++ b/mysql-test/r/union.result 2009-06-30 15:09:36 +0000 @@ -522,7 +522,7 @@ id select_type table type possible_keys 2 UNION t2 const PRIMARY PRIMARY 4 const 1 100.00 NULL UNION RESULT <union1,2> ALL NULL NULL NULL NULL NULL NULL Warnings: -Note 1003 (select '1' AS `a`,'1' AS `b` from `test`.`t1` where ('1' = 1)) union (select '1' AS `a`,'10' AS `b` from `test`.`t2` where ('1' = 1)) +Note 1003 (select '1' AS `a`,'1' AS `b` from `test`.`t1` where 1) union (select '1' AS `a`,'10' AS `b` from `test`.`t2` where 1) (select * from t1 where a=5) union (select * from t2 where a=1); a b 1 10 === added file 'mysql-test/t/table_elim.test' --- a/mysql-test/t/table_elim.test 1970-01-01 00:00:00 +0000 +++ b/mysql-test/t/table_elim.test 2009-06-30 15:09:36 +0000 @@ -0,0 +1,160 @@ +# +# Table elimination (MWL#17) tests +# +--disable_warnings +drop table if exists t0, t1, t2, t3; +drop view if exists v1, v2; +--enable_warnings + +create table t1 (a int); +insert into t1 values (0),(1),(2),(3); +create table t0 as select * from t1; + +create table t2 (a int primary key, b int) + as select a, a as b from t1 where a in (1,2); + +create table t3 (a int primary key, b int) + as select a, a as b from t1 where a in (1,3); + +--echo # This will be eliminated: +explain select t1.a from t1 left join t2 on t2.a=t1.a; +explain extended select t1.a from t1 left join t2 on t2.a=t1.a; + +select t1.a from t1 left join t2 on t2.a=t1.a; + +--echo # This will not be eliminated as t2.b is in in select list: +explain select * from t1 left join t2 on t2.a=t1.a; + +--echo # This will not be eliminated as t2.b is in in order list: +explain select t1.a from t1 left join t2 on t2.a=t1.a order by t2.b; + +--echo # This will not be eliminated as t2.b is in group list: +explain select t1.a from t1 left join t2 on t2.a=t1.a group by t2.b; + +--echo # This will not be eliminated as t2.b is in the WHERE +explain select t1.a from t1 left join t2 on t2.a=t1.a where t2.b < 3 or t2.b is null; + +--echo # Elimination of multiple tables: +explain select t1.a from t1 left join (t2 join t3) on t2.a=t1.a and t3.a=t1.a; + +--echo # Elimination of multiple tables (2): +explain select t1.a from t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and t3.a=t1.a; + +--echo # Elimination when done within an outer join nest: +explain extended +select t0.* +from + t0 left join (t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and + t3.a=t1.a) on t0.a=t1.a; + +--echo # Elimination with aggregate functions +explain select count(*) from t1 left join t2 on t2.a=t1.a; +explain select count(1) from t1 left join t2 on t2.a=t1.a; +explain select count(1) from t1 left join t2 on t2.a=t1.a group by t1.a; + +--echo This must not use elimination: +explain select count(1) from t1 left join t2 on t2.a=t1.a group by t2.a; + +drop table t0, t1, t2, t3; + +# This will stand for elim_facts +create table t0 ( id integer, primary key (id)); + +# Attribute1, non-versioned +create table t1 ( + id integer, + attr1 integer, + primary key (id), + key (attr1) +); + +# Attribute2, time-versioned +create table t2 ( + id integer, + attr2 integer, + fromdate date, + primary key (id, fromdate), + key (attr2,fromdate) +); + +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +insert into t0 select A.id + 10*B.id from t0 A, t0 B where B.id > 0; + +insert into t1 select id, id from t0; +insert into t2 select id, id, date_add('2009-06-22', interval id day) from t0; +insert into t2 select id, id+1, date_add('2008-06-22', interval id day) from t0; + +create view v1 as +select + F.id, A1.attr1, A2.attr2 +from + t0 F + left join t1 A1 on A1.id=F.id + left join t2 A2 on A2.id=F.id and + A2.fromdate=(select MAX(fromdate) from + t2 where id=A2.id); +create view v2 as +select + F.id, A1.attr1, A2.attr2 +from + t0 F + left join t1 A1 on A1.id=F.id + left join t2 A2 on A2.id=F.id and + A2.fromdate=(select MAX(fromdate) from + t2 where id=F.id); + +--echo This should use one table: +explain select id from v1 where id=2; +--echo This should use one table: +explain extended select id from v1 where id in (1,2,3,4); +--echo This should use facts and A1 tables: +explain extended select id from v1 where attr1 between 12 and 14; +--echo This should use facts, A2 and its subquery: +explain extended select id from v1 where attr2 between 12 and 14; + +# Repeat for v2: + +--echo This should use one table: +explain select id from v2 where id=2; +--echo This should use one table: +explain extended select id from v2 where id in (1,2,3,4); +--echo This should use facts and A1 tables: +explain extended select id from v2 where attr1 between 12 and 14; +--echo This should use facts, A2 and its subquery: +explain extended select id from v2 where attr2 between 12 and 14; + +drop view v1, v2; +drop table t0, t1, t2; + +# +# Tests for the code that uses t.keypartX=func(t.keypartY) equalities to +# make table elimination inferences +# +create table t1 (a int); +insert into t1 values (0),(1),(2),(3); + +create table t2 (pk1 int, pk2 int, pk3 int, col int, primary key(pk1, pk2, pk3)); +insert into t2 select a,a,a,a from t1; + +--echo This must use only t1: +explain select t1.* from t1 left join t2 on t2.pk1=t1.a and + t2.pk2=t2.pk1+1 and + t2.pk3=t2.pk2+1; + +--echo This must use only t1: +explain select t1.* from t1 left join t2 on t2.pk1=t1.a and + t2.pk3=t2.pk1+1 and + t2.pk2=t2.pk3+1; + +--echo This must use both: +explain select t1.* from t1 left join t2 on t2.pk1=t1.a and + t2.pk3=t2.pk1+1 and + t2.pk2=t2.pk3+t2.col; + +--echo This must use only t1: +explain select t1.* from t1 left join t2 on t2.pk2=t1.a and + t2.pk1=t2.pk2+1 and + t2.pk3=t2.pk1; + +drop table t1, t2; + === added file 'sql-bench/test-table-elimination.sh' --- a/sql-bench/test-table-elimination.sh 1970-01-01 00:00:00 +0000 +++ b/sql-bench/test-table-elimination.sh 2009-06-30 15:09:36 +0000 @@ -0,0 +1,320 @@ +#!@PERL@ +# Test of table elimination feature + +use Cwd; +use DBI; +use Getopt::Long; +use Benchmark; + +$opt_loop_count=100000; +$opt_medium_loop_count=10000; +$opt_small_loop_count=100; + +$pwd = cwd(); $pwd = "." if ($pwd eq ''); +require "$pwd/bench-init.pl" || die "Can't read Configuration file: $!\n"; + +if ($opt_small_test) +{ + $opt_loop_count/=10; + $opt_medium_loop_count/=10; + $opt_small_loop_count/=10; +} + +print "Testing table elimination feature\n"; +print "The test table has $opt_loop_count rows.\n\n"; + +# A query to get the recent versions of all attributes: +$select_current_full_facts=" + select + F.id, A1.attr1, A2.attr2 + from + elim_facts F + left join elim_attr1 A1 on A1.id=F.id + left join elim_attr2 A2 on A2.id=F.id and + A2.fromdate=(select MAX(fromdate) from + elim_attr2 where id=A2.id); +"; +$select_current_full_facts=" + select + F.id, A1.attr1, A2.attr2 + from + elim_facts F + left join elim_attr1 A1 on A1.id=F.id + left join elim_attr2 A2 on A2.id=F.id and + A2.fromdate=(select MAX(fromdate) from + elim_attr2 where id=F.id); +"; +# TODO: same as above but for some given date also? +# TODO: + + +#### +#### Connect and start timeing +#### + +$dbh = $server->connect(); +$start_time=new Benchmark; + +#### +#### Create needed tables +#### + +goto select_test if ($opt_skip_create); + +print "Creating tables\n"; +$dbh->do("drop table elim_facts" . $server->{'drop_attr'}); +$dbh->do("drop table elim_attr1" . $server->{'drop_attr'}); +$dbh->do("drop table elim_attr2" . $server->{'drop_attr'}); + +# The facts table +do_many($dbh,$server->create("elim_facts", + ["id integer"], + ["primary key (id)"])); + +# Attribute1, non-versioned +do_many($dbh,$server->create("elim_attr1", + ["id integer", + "attr1 integer"], + ["primary key (id)", + "key (attr1)"])); + +# Attribute2, time-versioned +do_many($dbh,$server->create("elim_attr2", + ["id integer", + "attr2 integer", + "fromdate date"], + ["primary key (id, fromdate)", + "key (attr2,fromdate)"])); + +#NOTE: ignoring: if ($limits->{'views'}) +$dbh->do("drop view elim_current_facts"); +$dbh->do("create view elim_current_facts as $select_current_full_facts"); + +if ($opt_lock_tables) +{ + do_query($dbh,"LOCK TABLES elim_facts, elim_attr1, elim_attr2 WRITE"); +} + +if ($opt_fast && defined($server->{vacuum})) +{ + $server->vacuum(1,\$dbh); +} + +#### +#### Fill the facts table +#### +$n_facts= $opt_loop_count; + +if ($opt_fast && $server->{transactions}) +{ + $dbh->{AutoCommit} = 0; +} + +print "Inserting $n_facts rows into facts table\n"; +$loop_time=new Benchmark; + +$query="insert into elim_facts values ("; +for ($id=0; $id < $n_facts ; $id++) +{ + do_query($dbh,"$query $id)"); +} + +if ($opt_fast && $server->{transactions}) +{ + $dbh->commit; + $dbh->{AutoCommit} = 1; +} + +$end_time=new Benchmark; +print "Time to insert ($n_facts): " . + timestr(timediff($end_time, $loop_time),"all") . "\n\n"; + +#### +#### Fill attr1 table +#### +if ($opt_fast && $server->{transactions}) +{ + $dbh->{AutoCommit} = 0; +} + +print "Inserting $n_facts rows into attr1 table\n"; +$loop_time=new Benchmark; + +$query="insert into elim_attr1 values ("; +for ($id=0; $id < $n_facts ; $id++) +{ + $attr1= ceil(rand($n_facts)); + do_query($dbh,"$query $id, $attr1)"); +} + +if ($opt_fast && $server->{transactions}) +{ + $dbh->commit; + $dbh->{AutoCommit} = 1; +} + +$end_time=new Benchmark; +print "Time to insert ($n_facts): " . + timestr(timediff($end_time, $loop_time),"all") . "\n\n"; + +#### +#### Fill attr2 table +#### +if ($opt_fast && $server->{transactions}) +{ + $dbh->{AutoCommit} = 0; +} + +print "Inserting $n_facts rows into attr2 table\n"; +$loop_time=new Benchmark; + +for ($id=0; $id < $n_facts ; $id++) +{ + # Two values for each $id - current one and obsolete one. + $attr1= ceil(rand($n_facts)); + $query="insert into elim_attr2 values ($id, $attr1, now())"; + do_query($dbh,$query); + $query="insert into elim_attr2 values ($id, $attr1, '2009-01-01')"; + do_query($dbh,$query); +} + +if ($opt_fast && $server->{transactions}) +{ + $dbh->commit; + $dbh->{AutoCommit} = 1; +} + +$end_time=new Benchmark; +print "Time to insert ($n_facts): " . + timestr(timediff($end_time, $loop_time),"all") . "\n\n"; + +#### +#### Finalize the database population +#### + +if ($opt_lock_tables) +{ + do_query($dbh,"UNLOCK TABLES"); +} + +if ($opt_fast && defined($server->{vacuum})) +{ + $server->vacuum(0,\$dbh,["elim_facts", "elim_attr1", "elim_attr2"]); +} + +if ($opt_lock_tables) +{ + do_query($dbh,"LOCK TABLES elim_facts, elim_attr1, elim_attr2 WRITE"); +} + +#### +#### Do some selects on the table +#### + +select_test: + +# +# The selects will be: +# - N pk-lookups with all attributes +# - pk-attribute-based lookup +# - latest-attribute value based lookup. + + +### +### Bare facts select: +### +print "testing bare facts facts table\n"; +$loop_time=new Benchmark; +$rows=0; +for ($i=0 ; $i < $opt_medium_loop_count ; $i++) +{ + $val= ceil(rand($n_facts)); + $rows+=fetch_all_rows($dbh,"select * from elim_facts where id=$val"); +} +$count=$i; + +$end_time=new Benchmark; +print "time for select_bare_facts ($count:$rows): " . + timestr(timediff($end_time, $loop_time),"all") . "\n"; + + +### +### Full facts select, no elimination: +### +print "testing full facts facts table\n"; +$loop_time=new Benchmark; +$rows=0; +for ($i=0 ; $i < $opt_medium_loop_count ; $i++) +{ + $val= rand($n_facts); + $rows+=fetch_all_rows($dbh,"select * from elim_current_facts where id=$val"); +} +$count=$i; + +$end_time=new Benchmark; +print "time for select_two_attributes ($count:$rows): " . + timestr(timediff($end_time, $loop_time),"all") . "\n"; + +### +### Now with elimination: select only only one fact +### +print "testing selection of one attribute\n"; +$loop_time=new Benchmark; +$rows=0; +for ($i=0 ; $i < $opt_medium_loop_count ; $i++) +{ + $val= rand($n_facts); + $rows+=fetch_all_rows($dbh,"select id, attr1 from elim_current_facts where id=$val"); +} +$count=$i; + +$end_time=new Benchmark; +print "time for select_one_attribute ($count:$rows): " . + timestr(timediff($end_time, $loop_time),"all") . "\n"; + +### +### Now with elimination: select only only one fact +### +print "testing selection of one attribute\n"; +$loop_time=new Benchmark; +$rows=0; +for ($i=0 ; $i < $opt_medium_loop_count ; $i++) +{ + $val= rand($n_facts); + $rows+=fetch_all_rows($dbh,"select id, attr2 from elim_current_facts where id=$val"); +} +$count=$i; + +$end_time=new Benchmark; +print "time for select_one_attribute ($count:$rows): " . + timestr(timediff($end_time, $loop_time),"all") . "\n"; + + +### +### TODO... +### + +; + +#### +#### End of benchmark +#### + +if ($opt_lock_tables) +{ + do_query($dbh,"UNLOCK TABLES"); +} +if (!$opt_skip_delete) +{ + do_query($dbh,"drop table elim_facts, elim_attr1, elim_attr2" . $server->{'drop_attr'}); +} + +if ($opt_fast && defined($server->{vacuum})) +{ + $server->vacuum(0,\$dbh); +} + +$dbh->disconnect; # close connection + +end_benchmark($start_time); + === modified file 'sql/CMakeLists.txt' --- a/sql/CMakeLists.txt 2008-11-21 14:21:50 +0000 +++ b/sql/CMakeLists.txt 2009-06-30 15:09:36 +0000 @@ -73,7 +73,7 @@ ADD_EXECUTABLE(mysqld partition_info.cc rpl_utility.cc rpl_injector.cc sql_locale.cc rpl_rli.cc rpl_mi.cc sql_servers.cc sql_connect.cc scheduler.cc - sql_profile.cc event_parse_data.cc + sql_profile.cc event_parse_data.cc opt_table_elimination.cc ${PROJECT_SOURCE_DIR}/sql/sql_yacc.cc ${PROJECT_SOURCE_DIR}/sql/sql_yacc.h ${PROJECT_SOURCE_DIR}/include/mysqld_error.h === modified file 'sql/Makefile.am' --- a/sql/Makefile.am 2009-03-12 22:27:35 +0000 +++ b/sql/Makefile.am 2009-06-30 15:09:36 +0000 @@ -121,7 +121,8 @@ mysqld_SOURCES = sql_lex.cc sql_handler. event_queue.cc event_db_repository.cc events.cc \ sql_plugin.cc sql_binlog.cc \ sql_builtin.cc sql_tablespace.cc partition_info.cc \ - sql_servers.cc event_parse_data.cc + sql_servers.cc event_parse_data.cc \ + opt_table_elimination.cc nodist_mysqld_SOURCES = mini_client_errors.c pack.c client.c my_time.c my_user.c === modified file 'sql/item.cc' --- a/sql/item.cc 2009-04-25 10:05:32 +0000 +++ b/sql/item.cc 2009-06-30 15:09:36 +0000 @@ -1915,6 +1915,37 @@ void Item_field::reset_field(Field *f) name= (char*) f->field_name; } + +bool Item_field::check_column_usage_processor(uchar *arg) +{ + Field_processor_info* info=(Field_processor_info*)arg; + + if (field->table == info->table) + { + /* It is not ok to use columns that are not part of the key of interest: */ + if (!(field->part_of_key.is_set(info->keyno))) + return TRUE; + + /* Find which key part we're using and mark it in needed_key_parts */ + KEY *key= &field->table->key_info[info->keyno]; + for (uint part= 0; part < key->key_parts; part++) + { + if (field->field_index == key->key_part[part].field->field_index) + { + if (part == info->forbidden_part) + return TRUE; + info->needed_key_parts |= key_part_map(1) << part; + break; + } + } + return FALSE; + } + else + info->used_tables |= this->used_tables(); + return FALSE; +} + + const char *Item_ident::full_name() const { char *tmp; @@ -3380,7 +3411,7 @@ static void mark_as_dependent(THD *thd, /* store pointer on SELECT_LEX from which item is dependent */ if (mark_item) mark_item->depended_from= last; - current->mark_as_dependent(last); + current->mark_as_dependent(last, resolved_item); if (thd->lex->describe & DESCRIBE_EXTENDED) { char warn_buff[MYSQL_ERRMSG_SIZE]; === modified file 'sql/item.h' --- a/sql/item.h 2009-04-25 10:05:32 +0000 +++ b/sql/item.h 2009-06-30 15:09:36 +0000 @@ -731,7 +731,11 @@ public: virtual bool val_bool_result() { return val_bool(); } virtual bool is_null_result() { return is_null(); } - /* bit map of tables used by item */ + /* + Bitmap of tables used by item + (note: if you need to check dependencies on individual columns, check out + check_column_usage_processor) + */ virtual table_map used_tables() const { return (table_map) 0L; } /* Return table map of tables that can't be NULL tables (tables that are @@ -888,6 +892,8 @@ public: virtual bool reset_query_id_processor(uchar *query_id_arg) { return 0; } virtual bool is_expensive_processor(uchar *arg) { return 0; } virtual bool register_field_in_read_map(uchar *arg) { return 0; } + virtual bool check_column_usage_processor(uchar *arg) { return 0; } + virtual bool mark_as_eliminated_processor(uchar *arg) { return 0; } /* Check if a partition function is allowed SYNOPSIS @@ -1011,6 +1017,18 @@ public: bool eq_by_collation(Item *item, bool binary_cmp, CHARSET_INFO *cs); }; +/* Data for Item::check_column_usage_processor */ +typedef struct +{ + TABLE *table; /* Table of interest */ + uint keyno; /* Index of interest */ + uint forbidden_part; /* key part which one is not allowed to refer to */ + /* [Set by processor] used tables, besides the table of interest */ + table_map used_tables; + /* [Set by processor] Parts of index of interest that expression refers to */ + uint needed_key_parts; +} Field_processor_info; + class sp_head; @@ -1477,6 +1495,7 @@ public: bool find_item_in_field_list_processor(uchar *arg); bool register_field_in_read_map(uchar *arg); bool check_partition_func_processor(uchar *int_arg) {return FALSE;} + bool check_column_usage_processor(uchar *arg); void cleanup(); bool result_as_longlong() { @@ -2203,6 +2222,10 @@ public: if (!depended_from) (*ref)->update_used_tables(); } + bool const_item() const + { + return (*ref)->const_item(); + } table_map not_null_tables() const { return (*ref)->not_null_tables(); } void set_result_field(Field *field) { result_field= field; } bool is_result_field() { return 1; } === modified file 'sql/item_subselect.cc' --- a/sql/item_subselect.cc 2009-01-31 21:22:44 +0000 +++ b/sql/item_subselect.cc 2009-06-30 15:09:36 +0000 @@ -39,7 +39,7 @@ inline Item * and_items(Item* cond, Item Item_subselect::Item_subselect(): Item_result_field(), value_assigned(0), thd(0), substitution(0), engine(0), old_engine(0), used_tables_cache(0), have_to_be_excluded(0), - const_item_cache(1), engine_changed(0), changed(0), is_correlated(FALSE) + const_item_cache(1), in_fix_fields(0), engine_changed(0), changed(0), is_correlated(FALSE) { with_subselect= 1; reset(); @@ -151,10 +151,14 @@ bool Item_subselect::fix_fields(THD *thd DBUG_ASSERT(fixed == 0); engine->set_thd((thd= thd_param)); + if (!in_fix_fields) + refers_to.empty(); + eliminated= FALSE; if (check_stack_overrun(thd, STACK_MIN_SIZE, (uchar*)&res)) return TRUE; - + + in_fix_fields++; res= engine->prepare(); // all transformation is done (used by prepared statements) @@ -181,12 +185,14 @@ bool Item_subselect::fix_fields(THD *thd if (!(*ref)->fixed) ret= (*ref)->fix_fields(thd, ref); thd->where= save_where; + in_fix_fields--; return ret; } // Is it one field subselect? if (engine->cols() > max_columns) { my_error(ER_OPERAND_COLUMNS, MYF(0), 1); + in_fix_fields--; return TRUE; } fix_length_and_dec(); @@ -203,11 +209,30 @@ bool Item_subselect::fix_fields(THD *thd fixed= 1; err: + in_fix_fields--; thd->where= save_where; return res; } +bool Item_subselect::check_column_usage_processor(uchar *arg) +{ + List_iterator<Item> it(refers_to); + Item *item; + while ((item= it++)) + { + if (item->walk(&Item::check_column_usage_processor,FALSE, arg)) + return TRUE; + } + return FALSE; +} + +bool Item_subselect::mark_as_eliminated_processor(uchar *arg) +{ + eliminated= TRUE; + return FALSE; +} + bool Item_subselect::walk(Item_processor processor, bool walk_subquery, uchar *argument) { @@ -225,6 +250,7 @@ bool Item_subselect::walk(Item_processor if (lex->having && (lex->having)->walk(processor, walk_subquery, argument)) return 1; + /* TODO: why does this walk WHERE/HAVING but not ON expressions of outer joins? */ while ((item=li++)) { === modified file 'sql/item_subselect.h' --- a/sql/item_subselect.h 2008-02-22 10:30:33 +0000 +++ b/sql/item_subselect.h 2009-06-30 15:09:36 +0000 @@ -52,8 +52,16 @@ protected: bool have_to_be_excluded; /* cache of constant state */ bool const_item_cache; - + public: + /* + References from inside the subquery to the select that this predicate is + in. References to parent selects not included. + */ + List<Item> refers_to; + int in_fix_fields; + bool eliminated; + /* changed engine indicator */ bool engine_changed; /* subquery is transformed */ @@ -126,6 +134,8 @@ public: virtual void reset_value_registration() {} enum_parsing_place place() { return parsing_place; } bool walk(Item_processor processor, bool walk_subquery, uchar *arg); + bool mark_as_eliminated_processor(uchar *arg); + bool check_column_usage_processor(uchar *arg); /** Get the SELECT_LEX structure associated with this Item. === modified file 'sql/item_sum.cc' --- a/sql/item_sum.cc 2009-04-25 09:04:38 +0000 +++ b/sql/item_sum.cc 2009-06-30 15:09:36 +0000 @@ -350,7 +350,7 @@ bool Item_sum::register_sum_func(THD *th sl= sl->master_unit()->outer_select() ) sl->master_unit()->item->with_sum_func= 1; } - thd->lex->current_select->mark_as_dependent(aggr_sel); + thd->lex->current_select->mark_as_dependent(aggr_sel, NULL); return FALSE; } @@ -542,11 +542,6 @@ void Item_sum::update_used_tables () args[i]->update_used_tables(); used_tables_cache|= args[i]->used_tables(); } - - used_tables_cache&= PSEUDO_TABLE_BITS; - - /* the aggregate function is aggregated into its local context */ - used_tables_cache |= (1 << aggr_sel->join->tables) - 1; } } === modified file 'sql/item_sum.h' --- a/sql/item_sum.h 2008-12-09 19:43:10 +0000 +++ b/sql/item_sum.h 2009-06-30 15:09:36 +0000 @@ -255,6 +255,12 @@ protected: */ Item **orig_args, *tmp_orig_args[2]; table_map used_tables_cache; + + /* + TRUE <=> We've managed to calculate the value of this Item in + opt_sum_query(), hence it can be considered constant at all subsequent + steps. + */ bool forced_const; public: @@ -341,6 +347,15 @@ public: virtual const char *func_name() const= 0; virtual Item *result_item(Field *field) { return new Item_field(field); } + /* + Return bitmap of tables that are needed to evaluate the item. + + The implementation takes into account the used strategy: items resolved + at optimization phase will report 0. + Items that depend on the number of join output records, but not columns + of any particular table (like COUNT(*)) will report 0 from used_tables(), + but will still return false from const_item(). + */ table_map used_tables() const { return used_tables_cache; } void update_used_tables (); void cleanup() === added file 'sql/opt_table_elimination.cc' --- a/sql/opt_table_elimination.cc 1970-01-01 00:00:00 +0000 +++ b/sql/opt_table_elimination.cc 2009-06-30 15:09:36 +0000 @@ -0,0 +1,494 @@ +/** + @file + + @brief + Table Elimination Module + + @defgroup Table_Elimination Table Elimination Module + @{ +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mysql_priv.h" +#include "sql_select.h" + +/* + OVERVIEW + + The module has one entry point - eliminate_tables() function, which one + needs to call (once) sometime after update_ref_and_keys() but before the + join optimization. + eliminate_tables() operates over the JOIN structures. Logically, it + removes the right sides of outer join nests. Physically, it changes the + following members: + + * Eliminated tables are marked as constant and moved to the front of the + join order. + * In addition to this, they are recorded in JOIN::eliminated_tables bitmap. + + * All join nests have their NESTED_JOIN::n_tables updated to discount + the eliminated tables + + * Items that became disused because they were in the ON expression of an + eliminated outer join are notified by means of the Item tree walk which + calls Item::mark_as_eliminated_processor for every item + - At the moment the only Item that cares is Item_subselect with its + Item_subselect::eliminated flag which is used by EXPLAIN code to + check if the subquery should be shown in EXPLAIN. + + Table elimination is redone on every PS re-execution. +*/ + +static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl); +static bool table_has_one_match(TABLE *table, table_map bound_tables, + bool *multiple_matches); +static uint +eliminate_tables_for_list(JOIN *join, TABLE **leaves_arr, + List<TABLE_LIST> *join_list, + bool its_outer_join, + table_map tables_in_list, + table_map tables_used_elsewhere, + bool *multiple_matches); +static bool +extra_keyuses_bind_all_keyparts(table_map bound_tables, TABLE *table, + KEYUSE *key_start, KEYUSE *key_end, + uint n_keyuses, table_map bound_parts); + +/* + Perform table elimination + + SYNOPSIS + eliminate_tables() + join Join to work on + const_tbl_count INOUT Number of constant tables (this includes + eliminated tables) + const_tables INOUT Bitmap of constant tables + + DESCRIPTION + This function is the entry point for table elimination. + The idea behind table elimination is that if we have an outer join: + + SELECT * FROM t1 LEFT JOIN + (t2 JOIN t3) ON t3.primary_key=t1.col AND + t4.primary_key=t2.col + such that + + 1. columns of the inner tables are not used anywhere ouside the outer + join (not in WHERE, not in GROUP/ORDER BY clause, not in select list + etc etc), and + 2. inner side of the outer join is guaranteed to produce at most one + record combination for each record combination of outer tables. + + then the inner side of the outer join can be removed from the query. + This is because it will always produce one matching record (either a + real match or a NULL-complemented record combination), and since there + are no references to columns of the inner tables anywhere, it doesn't + matter which record combination it was. + + This function primary handles checking #1. It collects a bitmap of + tables that are not used in select list/GROUP BY/ORDER BY/HAVING/etc and + thus can possibly be eliminated. + + SIDE EFFECTS + See the OVERVIEW section at the top of this file. + +*/ + +void eliminate_tables(JOIN *join) +{ + Item *item; + table_map used_tables; + DBUG_ENTER("eliminate_tables"); + + DBUG_ASSERT(join->eliminated_tables == 0); + + /* If there are no outer joins, we have nothing to eliminate: */ + if (!join->outer_join) + DBUG_VOID_RETURN; + + /* Find the tables that are referred to from WHERE/HAVING */ + used_tables= (join->conds? join->conds->used_tables() : 0) | + (join->having? join->having->used_tables() : 0); + + /* Add tables referred to from the select list */ + List_iterator<Item> it(join->fields_list); + while ((item= it++)) + used_tables |= item->used_tables(); + + /* Add tables referred to from ORDER BY and GROUP BY lists */ + ORDER *all_lists[]= { join->order, join->group_list}; + for (int i=0; i < 2; i++) + { + for (ORDER *cur_list= all_lists[i]; cur_list; cur_list= cur_list->next) + used_tables |= (*(cur_list->item))->used_tables(); + } + + THD* thd= join->thd; + if (join->select_lex == &thd->lex->select_lex) + { + /* Multi-table UPDATE and DELETE: don't eliminate the tables we modify: */ + used_tables |= thd->table_map_for_update; + + /* Multi-table UPDATE: don't eliminate tables referred from SET statement */ + if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI) + { + List_iterator<Item> it2(thd->lex->value_list); + while ((item= it2++)) + used_tables |= item->used_tables(); + } + } + + table_map all_tables= join->all_tables_map(); + if (all_tables & ~used_tables) + { + /* There are some tables that we probably could eliminate. Try it. */ + TABLE *leaves_array[MAX_TABLES]; + bool multiple_matches= FALSE; + eliminate_tables_for_list(join, leaves_array, join->join_list, FALSE, + all_tables, used_tables, &multiple_matches); + } + DBUG_VOID_RETURN; +} + +/* + Perform table elimination in a given join list + + SYNOPSIS + eliminate_tables_for_list() + join The join + leaves_arr OUT Store here an array of leaf (base) tables that + are descendants of the join_list, and increment + the pointer to point right above the array. + join_list Join list to work on + its_outer_join TRUE <=> join_list is an inner side of an outer + join + FALSE <=> otherwise (this is top-level join list) + tables_in_list Bitmap of tables embedded in the join_list. + tables_used_elsewhere Bitmap of tables that are referred to from + somewhere outside of the join list (e.g. + select list, HAVING, etc). + + DESCRIPTION + Perform table elimination for a join list. + Try eliminating children nests first. + The "all tables in join nest can produce only one matching record + combination" property checking is modeled after constant table detection, + plus we reuse info attempts to eliminate child join nests. + + RETURN + Number of children left after elimination. 0 means everything was + eliminated. +*/ +static uint +eliminate_tables_for_list(JOIN *join, TABLE **leaves_arr, + List<TABLE_LIST> *join_list, + bool its_outer_join, + table_map tables_in_list, + table_map tables_used_elsewhere, + bool *multiple_matches) +{ + TABLE_LIST *tbl; + List_iterator<TABLE_LIST> it(*join_list); + table_map tables_used_on_left= 0; + TABLE **cur_table= leaves_arr; + bool children_have_multiple_matches= FALSE; + uint remaining_children= 0; + + while ((tbl= it++)) + { + if (tbl->on_expr) + { + table_map outside_used_tables= tables_used_elsewhere | + tables_used_on_left; + bool multiple_matches= FALSE; + if (tbl->nested_join) + { + /* This is "... LEFT JOIN (join_nest) ON cond" */ + uint n; + if (!(n= eliminate_tables_for_list(join, cur_table, + &tbl->nested_join->join_list, TRUE, + tbl->nested_join->used_tables, + outside_used_tables, + &multiple_matches))) + { + mark_as_eliminated(join, tbl); + } + else + remaining_children++; + tbl->nested_join->n_tables= n; + } + else + { + /* This is "... LEFT JOIN tbl ON cond" */ + if (!(tbl->table->map & outside_used_tables) && + table_has_one_match(tbl->table, join->all_tables_map(), + &multiple_matches)) + { + mark_as_eliminated(join, tbl); + } + else + remaining_children++; + } + tables_used_on_left |= tbl->on_expr->used_tables(); + children_have_multiple_matches= children_have_multiple_matches || + multiple_matches; + } + else + { + DBUG_ASSERT(!tbl->nested_join); + remaining_children++; + } + + if (tbl->table) + *(cur_table++)= tbl->table; + } + + *multiple_matches |= children_have_multiple_matches; + + /* Try eliminating the nest we're called for */ + if (its_outer_join && !children_have_multiple_matches && + !(tables_in_list & tables_used_elsewhere)) + { + table_map bound_tables= join->const_table_map | (join->all_tables_map() & + ~tables_in_list); + table_map old_bound_tables; + TABLE **leaves_end= cur_table; + /* + Do the same as const table search table: try to expand the set of bound + tables until it covers all tables in the join_list + */ + do + { + old_bound_tables= bound_tables; + for (cur_table= leaves_arr; cur_table != leaves_end; cur_table++) + { + if (!((*cur_table)->map & join->eliminated_tables) && + table_has_one_match(*cur_table, bound_tables, multiple_matches)) + { + bound_tables |= (*cur_table)->map; + } + } + } while (old_bound_tables != bound_tables); + + if (!(tables_in_list & ~bound_tables)) + { + /* + This join_list can be eliminated. Signal about this to the caller by + returning number of tables. + */ + remaining_children= 0; + } + } + return remaining_children; +} + + +/* + Check if the table will produce at most one matching record + + SYNOPSIS + table_has_one_match() + table The [base] table being checked + bound_tables Tables that should be considered bound. + multiple_matches OUT Set to TRUE when there is no way we could + find find a limitation that would give us one-match + property. + + DESCRIPTION + Check if table will produce at most one matching record for each record + combination of tables in bound_tables bitmap. + + The check is based on ref analysis data, KEYUSE structures. We're + handling two cases: + + 1. Table has a UNIQUE KEY(uk_col_1, ... uk_col_N), and for each uk_col_i + there is a KEYUSE that represents a limitation in form + + table.uk_col_i = func(bound_tables) (X) + + 2. Same as above but we also handle limitations in form + + table.uk_col_i = func(bound_tables, uk_col_j1, ... uk_col_j2) (XX) + + where values of uk_col_jN are known to be bound because for them we + have an equality of form (X) or (XX). + + RETURN + TRUE Yes, at most one match + FALSE No +*/ + +static bool table_has_one_match(TABLE *table, table_map bound_tables, + bool *multiple_matches) +{ + KEYUSE *keyuse= table->reginfo.join_tab->keyuse; + if (keyuse) + { + while (keyuse->table == table) + { + uint key= keyuse->key; + key_part_map bound_parts=0; + uint n_unusable=0; + bool ft_key= test(keyuse->keypart == FT_KEYPART); + KEY *keyinfo= table->key_info + key; + KEYUSE *key_start = keyuse; + + do /* For each keypart and each way to read it */ + { + if (keyuse->type == KEYUSE_USABLE) + { + if(!(keyuse->used_tables & ~bound_tables) && + !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)) + { + bound_parts |= keyuse->keypart_map; + } + } + else + n_unusable++; + keyuse++; + } while (keyuse->table == table && keyuse->key == key); + + if (ft_key || ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) + != HA_NOSAME)) + { + continue; + } + + if (bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts) || + extra_keyuses_bind_all_keyparts(bound_tables, table, key_start, + keyuse, n_unusable, bound_parts)) + { + return TRUE; + } + } + } + return FALSE; +} + + +/* + Check if KEYUSE elemements with unusable==TRUE bind all parts of the key + + SYNOPSIS + + extra_keyuses_bind_all_keyparts() + bound_tables Tables which can be considered constants + table Table we're examining + key_start Start of KEYUSE array with elements describing the key + of interest + key_end End of the array + 1 + n_keyuses Number of elements in the array that have unusable==TRUE + bound_parts Key parts whose values are known to be bound. + + DESCRIPTION + Check if unusable KEYUSE elements cause all parts of key to be bound. An + unusable keyuse element makes a keypart bound when it + represents the following: + + keyXpartY=func(bound_columns, preceding_tables) + + RETURN + TRUE Yes, at most one match + FALSE No +*/ + +static bool +extra_keyuses_bind_all_keyparts(table_map bound_tables, TABLE *table, + KEYUSE *key_start, KEYUSE *key_end, + uint n_keyuses, table_map bound_parts) +{ + /* + We need + - some 'unusable' KEYUSE elements to work on + - some keyparts to be already bound to start inferences: + */ + if (n_keyuses && bound_parts) + { + KEY *keyinfo= table->key_info + key_start->key; + bool bound_more_parts; + do + { + bound_more_parts= FALSE; + for (KEYUSE *k= key_start; k!=key_end; k++) + { + if (k->type == KEYUSE_UNKNOWN) + { + Field_processor_info fp= {table, k->key, k->keypart, 0, 0}; + if (k->val->walk(&Item::check_column_usage_processor, FALSE, + (uchar*)&fp)) + k->type= KEYUSE_NO_BIND; + else + { + k->used_tables= fp.used_tables; + k->keypart_map= fp.needed_key_parts; + k->type= KEYUSE_BIND; + } + } + + if (k->type == KEYUSE_BIND) + { + /* + If this is a binding keyuse, such that + - all tables it refers to are bound, + - all parts it refers to are bound + - but the key part it binds is not itself bound + */ + if (!(k->used_tables & ~bound_tables) && + !(k->keypart_map & ~bound_parts) && + !(bound_parts & key_part_map(1) << k->keypart)) + { + bound_parts|= key_part_map(1) << k->keypart; + if (bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts)) + return TRUE; + bound_more_parts= TRUE; + } + } + } + } while (bound_more_parts); + } + return FALSE; +} + + +/* + Mark one table or the whole join nest as eliminated. +*/ +static void mark_as_eliminated(JOIN *join, TABLE_LIST *tbl) +{ + TABLE *table; + /* + NOTE: there are TABLE_LIST object that have + tbl->table!= NULL && tbl->nested_join!=NULL and + tbl->table == tbl->nested_join->join_list->element(..)->table + */ + if (tbl->nested_join) + { + TABLE_LIST *child; + List_iterator<TABLE_LIST> it(tbl->nested_join->join_list); + while ((child= it++)) + mark_as_eliminated(join, child); + } + else if ((table= tbl->table)) + { + JOIN_TAB *tab= tbl->table->reginfo.join_tab; + if (!(join->const_table_map & tab->table->map)) + { + DBUG_PRINT("info", ("Eliminated table %s", table->alias)); + tab->type= JT_CONST; + join->eliminated_tables |= table->map; + join->const_table_map|= table->map; + set_position(join, join->const_tables++, tab, (KEYUSE*)0); + } + } + + if (tbl->on_expr) + tbl->on_expr->walk(&Item::mark_as_eliminated_processor, FALSE, NULL); +} + +/** + @} (end of group Table_Elimination) +*/ + === modified file 'sql/sql_lex.cc' --- a/sql/sql_lex.cc 2009-04-25 10:05:32 +0000 +++ b/sql/sql_lex.cc 2009-06-30 15:09:36 +0000 @@ -1778,7 +1778,7 @@ void st_select_lex_unit::exclude_tree() 'last' should be reachable from this st_select_lex_node */ -void st_select_lex::mark_as_dependent(st_select_lex *last) +void st_select_lex::mark_as_dependent(st_select_lex *last, Item *dependency) { /* Mark all selects from resolved to 1 before select where was @@ -1804,6 +1804,8 @@ void st_select_lex::mark_as_dependent(st } is_correlated= TRUE; this->master_unit()->item->is_correlated= TRUE; + if (dependency) + this->master_unit()->item->refers_to.push_back(dependency); } bool st_select_lex_node::set_braces(bool value) { return 1; } === modified file 'sql/sql_lex.h' --- a/sql/sql_lex.h 2009-03-17 20:29:24 +0000 +++ b/sql/sql_lex.h 2009-06-30 15:09:36 +0000 @@ -743,7 +743,7 @@ public: return master_unit()->return_after_parsing(); } - void mark_as_dependent(st_select_lex *last); + void mark_as_dependent(st_select_lex *last, Item *dependency); bool set_braces(bool value); bool inc_in_sum_expr(); === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2009-05-19 09:28:05 +0000 +++ b/sql/sql_select.cc 2009-06-30 15:09:36 +0000 @@ -60,7 +60,6 @@ static bool update_ref_and_keys(THD *thd table_map table_map, SELECT_LEX *select_lex, st_sargable_param **sargables); static int sort_keyuse(KEYUSE *a,KEYUSE *b); -static void set_position(JOIN *join,uint index,JOIN_TAB *table,KEYUSE *key); static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse, table_map used_tables); static bool choose_plan(JOIN *join,table_map join_tables); @@ -2381,6 +2380,13 @@ mysql_select(THD *thd, Item ***rref_poin } else { + /* + When in EXPLAIN, delay deleting the joins so that they are still + available when we're producing EXPLAIN EXTENDED warning text. + */ + if (select_options & SELECT_DESCRIBE) + free_join= 0; + if (!(join= new JOIN(thd, fields, select_options, result))) DBUG_RETURN(TRUE); thd_proc_info(thd, "init"); @@ -2468,6 +2474,7 @@ static ha_rows get_quick_record_count(TH DBUG_RETURN(HA_POS_ERROR); /* This shouldn't happend */ } + /* This structure is used to collect info on potentially sargable predicates in order to check whether they become sargable after @@ -2646,24 +2653,31 @@ make_join_statistics(JOIN *join, TABLE_L ~outer_join, join->select_lex, &sargables)) goto error; - /* Read tables with 0 or 1 rows (system tables) */ join->const_table_map= 0; + join->const_tables= const_count; + eliminate_tables(join); + const_count= join->const_tables; + found_const_table_map= join->const_table_map; + /* Read tables with 0 or 1 rows (system tables) */ for (POSITION *p_pos=join->positions, *p_end=p_pos+const_count; p_pos < p_end ; p_pos++) { - int tmp; s= p_pos->table; - s->type=JT_SYSTEM; - join->const_table_map|=s->table->map; - if ((tmp=join_read_const_table(s, p_pos))) + if (! (s->table->map & join->eliminated_tables)) { - if (tmp > 0) - goto error; // Fatal error + int tmp; + s->type=JT_SYSTEM; + join->const_table_map|=s->table->map; + if ((tmp=join_read_const_table(s, p_pos))) + { + if (tmp > 0) + goto error; // Fatal error + } + else + found_const_table_map|= s->table->map; } - else - found_const_table_map|= s->table->map; } /* loop until no more const tables are found */ @@ -2688,7 +2702,8 @@ make_join_statistics(JOIN *join, TABLE_L substitution of a const table the key value happens to be null then we can state that there are no matches for this equi-join. */ - if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map) + if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map && + !(table->map & join->eliminated_tables)) { /* When performing an outer join operation if there are no matching rows @@ -2747,14 +2762,16 @@ make_join_statistics(JOIN *join, TABLE_L { start_keyuse=keyuse; key=keyuse->key; - s->keys.set_bit(key); // QQ: remove this ? + if (keyuse->type == KEYUSE_USABLE) + s->keys.set_bit(key); // QQ: remove this ? refs=0; const_ref.clear_all(); eq_part.clear_all(); do { - if (keyuse->val->type() != Item::NULL_ITEM && !keyuse->optimize) + if (keyuse->type == KEYUSE_USABLE && + keyuse->val->type() != Item::NULL_ITEM && !keyuse->optimize) { if (!((~found_const_table_map) & keyuse->used_tables)) const_ref.set_bit(keyuse->keypart); @@ -2954,17 +2971,35 @@ typedef struct key_field_t { */ bool null_rejecting; bool *cond_guard; /* See KEYUSE::cond_guard */ + enum keyuse_type type; /* See KEYUSE::type */ } KEY_FIELD; -/* Values in optimize */ -#define KEY_OPTIMIZE_EXISTS 1 -#define KEY_OPTIMIZE_REF_OR_NULL 2 /** Merge new key definitions to old ones, remove those not used in both. This is called for OR between different levels. + That is, the function operates on an array of KEY_FIELD elements which has + two parts: + + $LEFT_PART $RIGHT_PART + +-----------------------+-----------------------+ + start new_fields end + + $LEFT_PART and $RIGHT_PART are arrays that have KEY_FIELD elements for two + parts of the OR condition. Our task is to produce an array of KEY_FIELD + elements that would correspond to "$LEFT_PART OR $RIGHT_PART". + + The rules for combining elements are as follows: + (keyfieldA1 AND keyfieldA2 AND ...) OR (keyfieldB1 AND keyfieldB2 AND ...)= + AND_ij (keyfieldA_i OR keyfieldB_j) + + We discard all (keyfieldA_i OR keyfieldB_j) that refer to different + fields. For those referring to the same field, the logic is as follows: + + t.keycol= + To be able to do 'ref_or_null' we merge a comparison of a column and 'column IS NULL' to one test. This is useful for sub select queries that are internally transformed to something like:. @@ -3029,13 +3064,18 @@ merge_key_fields(KEY_FIELD *start,KEY_FI KEY_OPTIMIZE_REF_OR_NULL)); old->null_rejecting= (old->null_rejecting && new_fields->null_rejecting); + /* + The conditions are the same, hence their usabilities should + be, too (TODO: shouldn't that apply to the above + null_rejecting and optimize attributes?) + */ + DBUG_ASSERT(old->type == new_fields->type); } } else if (old->eq_func && new_fields->eq_func && old->val->eq_by_collation(new_fields->val, old->field->binary(), old->field->charset())) - { old->level= and_level; old->optimize= ((old->optimize & new_fields->optimize & @@ -3044,10 +3084,15 @@ merge_key_fields(KEY_FIELD *start,KEY_FI KEY_OPTIMIZE_REF_OR_NULL)); old->null_rejecting= (old->null_rejecting && new_fields->null_rejecting); + // "t.key_col=const" predicates are always usable + DBUG_ASSERT(old->type == KEYUSE_USABLE && + new_fields->type == KEYUSE_USABLE); } else if (old->eq_func && new_fields->eq_func && - ((old->val->const_item() && old->val->is_null()) || - new_fields->val->is_null())) + ((new_fields->type == KEYUSE_USABLE && + old->val->const_item() && old->val->is_null()) || + ((old->type == KEYUSE_USABLE && new_fields->val->is_null())))) + /* TODO ^ why is the above asymmetric, why const_item()? */ { /* field = expression OR field IS NULL */ old->level= and_level; @@ -3118,6 +3163,7 @@ add_key_field(KEY_FIELD **key_fields,uin table_map usable_tables, SARGABLE_PARAM **sargables) { uint exists_optimize= 0; + bool optimizable=0; if (!(field->flags & PART_KEY_FLAG)) { // Don't remove column IS NULL on a LEFT JOIN table @@ -3130,15 +3176,12 @@ add_key_field(KEY_FIELD **key_fields,uin else { table_map used_tables=0; - bool optimizable=0; for (uint i=0; i<num_values; i++) { used_tables|=(value[i])->used_tables(); if (!((value[i])->used_tables() & (field->table->map | RAND_TABLE_BIT))) optimizable=1; } - if (!optimizable) - return; if (!(usable_tables & field->table->map)) { if (!eq_func || (*value)->type() != Item::NULL_ITEM || @@ -3151,7 +3194,8 @@ add_key_field(KEY_FIELD **key_fields,uin JOIN_TAB *stat=field->table->reginfo.join_tab; key_map possible_keys=field->key_start; possible_keys.intersect(field->table->keys_in_use_for_query); - stat[0].keys.merge(possible_keys); // Add possible keys + if (optimizable) + stat[0].keys.merge(possible_keys); // Add possible keys /* Save the following cases: @@ -3244,6 +3288,7 @@ add_key_field(KEY_FIELD **key_fields,uin (*key_fields)->val= *value; (*key_fields)->level= and_level; (*key_fields)->optimize= exists_optimize; + (*key_fields)->type= optimizable? KEYUSE_USABLE : KEYUSE_UNKNOWN; /* If the condition has form "tbl.keypart = othertbl.field" and othertbl.field can be NULL, there will be no matches if othertbl.field @@ -3555,6 +3600,7 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL; keyuse.null_rejecting= key_field->null_rejecting; keyuse.cond_guard= key_field->cond_guard; + keyuse.type= key_field->type; VOID(insert_dynamic(keyuse_array,(uchar*) &keyuse)); } } @@ -3563,7 +3609,6 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array } -#define FT_KEYPART (MAX_REF_PARTS+10) static void add_ft_keys(DYNAMIC_ARRAY *keyuse_array, @@ -3622,6 +3667,7 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array, keyuse.used_tables=cond_func->key_item()->used_tables(); keyuse.optimize= 0; keyuse.keypart_map= 0; + keyuse.type= KEYUSE_USABLE; VOID(insert_dynamic(keyuse_array,(uchar*) &keyuse)); } @@ -3636,6 +3682,13 @@ sort_keyuse(KEYUSE *a,KEYUSE *b) return (int) (a->key - b->key); if (a->keypart != b->keypart) return (int) (a->keypart - b->keypart); + + // Usable ones go before the unusable + int a_ok= test(a->type == KEYUSE_USABLE); + int b_ok= test(b->type == KEYUSE_USABLE); + if (a_ok != b_ok) + return a_ok? -1 : 1; + // Place const values before other ones if ((res= test((a->used_tables & ~OUTER_REF_TABLE_BIT)) - test((b->used_tables & ~OUTER_REF_TABLE_BIT)))) @@ -3846,7 +3899,8 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR found_eq_constant=0; for (i=0 ; i < keyuse->elements-1 ; i++,use++) { - if (!use->used_tables && use->optimize != KEY_OPTIMIZE_REF_OR_NULL) + if (use->type == KEYUSE_USABLE && !use->used_tables && + use->optimize != KEY_OPTIMIZE_REF_OR_NULL) use->table->const_key_parts[use->key]|= use->keypart_map; if (use->keypart != FT_KEYPART) { @@ -3870,7 +3924,8 @@ update_ref_and_keys(THD *thd, DYNAMIC_AR /* Save ptr to first use */ if (!use->table->reginfo.join_tab->keyuse) use->table->reginfo.join_tab->keyuse=save_pos; - use->table->reginfo.join_tab->checked_keys.set_bit(use->key); + if (use->type == KEYUSE_USABLE) + use->table->reginfo.join_tab->checked_keys.set_bit(use->key); save_pos++; } i=(uint) (save_pos-(KEYUSE*) keyuse->buffer); @@ -3900,7 +3955,7 @@ static void optimize_keyuse(JOIN *join, To avoid bad matches, we don't make ref_table_rows less than 100. */ keyuse->ref_table_rows= ~(ha_rows) 0; // If no ref - if (keyuse->used_tables & + if (keyuse->type == KEYUSE_USABLE && keyuse->used_tables & (map= (keyuse->used_tables & ~join->const_table_map & ~OUTER_REF_TABLE_BIT))) { @@ -3990,8 +4045,7 @@ add_group_and_distinct_keys(JOIN *join, /** Save const tables first as used tables. */ -static void -set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) +void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) { join->positions[idx].table= table; join->positions[idx].key=key; @@ -4093,7 +4147,8 @@ best_access_path(JOIN *join, if 1. expression doesn't refer to forward tables 2. we won't get two ref-or-null's */ - if (!(remaining_tables & keyuse->used_tables) && + if (keyuse->type == KEYUSE_USABLE && + !(remaining_tables & keyuse->used_tables) && !(ref_or_null_part && (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))) { @@ -5547,7 +5602,8 @@ static bool create_ref_for_key(JOIN *joi */ do { - if (!(~used_tables & keyuse->used_tables)) + if (!(~used_tables & keyuse->used_tables) && + keyuse->type == KEYUSE_USABLE) { if (keyparts == keyuse->keypart && !(found_part_ref_or_null & keyuse->optimize)) @@ -5597,9 +5653,11 @@ static bool create_ref_for_key(JOIN *joi uint i; for (i=0 ; i < keyparts ; keyuse++,i++) { - while (keyuse->keypart != i || - ((~used_tables) & keyuse->used_tables)) + while (keyuse->keypart != i || ((~used_tables) & keyuse->used_tables) || + !(keyuse->type == KEYUSE_USABLE)) + { keyuse++; /* Skip other parts */ + } uint maybe_null= test(keyinfo->key_part[i].null_bit); j->ref.items[i]=keyuse->val; // Save for cond removal @@ -5757,6 +5815,7 @@ JOIN::make_simple_join(JOIN *parent, TAB tables= 1; const_tables= 0; const_table_map= 0; + eliminated_tables= 0; tmp_table_param.field_count= tmp_table_param.sum_func_count= tmp_table_param.func_count= 0; tmp_table_param.copy_field= tmp_table_param.copy_field_end=0; @@ -6021,7 +6080,7 @@ make_outerjoin_info(JOIN *join) } if (!tab->first_inner) tab->first_inner= nested_join->first_nested; - if (++nested_join->counter < nested_join->join_list.elements) + if (++nested_join->counter < nested_join->n_tables) break; /* Table tab is the last inner table for nested join. */ nested_join->first_nested->last_inner= tab; @@ -8575,6 +8634,8 @@ simplify_joins(JOIN *join, List<TABLE_LI conds= simplify_joins(join, &nested_join->join_list, conds, top); used_tables= nested_join->used_tables; not_null_tables= nested_join->not_null_tables; + /* The following two might become unequal after table elimination: */ + nested_join->n_tables= nested_join->join_list.elements; } else { @@ -8733,7 +8794,7 @@ static uint build_bitmap_for_nested_join with anything) 2. we could run out bits in nested_join_map otherwise. */ - if (nested_join->join_list.elements != 1) + if (nested_join->n_tables != 1) { nested_join->nj_map= (nested_join_map) 1 << first_unused++; first_unused= build_bitmap_for_nested_joins(&nested_join->join_list, @@ -8894,7 +8955,7 @@ static bool check_interleaving_with_nj(J join->cur_embedding_map |= next_emb->nested_join->nj_map; } - if (next_emb->nested_join->join_list.elements != + if (next_emb->nested_join->n_tables != next_emb->nested_join->counter) break; @@ -8926,9 +8987,23 @@ static void restore_prev_nj_state(JOIN_T JOIN *join= last->join; while (last_emb) { + /* + psergey-elim: (nevermind) + new_prefix= cur_prefix & ~last; + if (!(new_prefix & cur_table_map)) // removed last inner table + { + join->cur_embedding_map&= ~last_emb->nested_join->nj_map; + } + else (current) + { + // Won't hurt doing it all the time: + join->cur_embedding_map |= ...; + } + else + */ if (!(--last_emb->nested_join->counter)) join->cur_embedding_map&= ~last_emb->nested_join->nj_map; - else if (last_emb->nested_join->join_list.elements-1 == + else if (last_emb->nested_join->n_tables-1 == last_emb->nested_join->counter) join->cur_embedding_map|= last_emb->nested_join->nj_map; else @@ -16202,6 +16277,14 @@ static void select_describe(JOIN *join, tmp3.length(0); quick_type= -1; + + /* Don't show eliminated tables */ + if (table->map & join->eliminated_tables) + { + used_tables|=table->map; + continue; + } + item_list.empty(); /* id */ item_list.push_back(new Item_uint((uint32) @@ -16524,8 +16607,11 @@ static void select_describe(JOIN *join, unit; unit= unit->next_unit()) { - if (mysql_explain_union(thd, unit, result)) - DBUG_VOID_RETURN; + if (!(unit->item && unit->item->eliminated)) + { + if (mysql_explain_union(thd, unit, result)) + DBUG_VOID_RETURN; + } } DBUG_VOID_RETURN; } @@ -16566,7 +16652,6 @@ bool mysql_explain_union(THD *thd, SELEC unit->fake_select_lex->options|= SELECT_DESCRIBE; if (!(res= unit->prepare(thd, result, SELECT_NO_UNLOCK | SELECT_DESCRIBE))) res= unit->exec(); - res|= unit->cleanup(); } else { @@ -16599,6 +16684,7 @@ bool mysql_explain_union(THD *thd, SELEC */ static void print_join(THD *thd, + table_map eliminated_tables, String *str, List<TABLE_LIST> *tables, enum_query_type query_type) @@ -16614,12 +16700,33 @@ static void print_join(THD *thd, *t= ti++; DBUG_ASSERT(tables->elements >= 1); - (*table)->print(thd, str, query_type); + /* + Assert that the first table in the list isn't eliminated. This comes from + the fact that the first table can't be inner table of an outer join. + */ + DBUG_ASSERT(!eliminated_tables || + !(((*table)->table && ((*table)->table->map & eliminated_tables)) || + ((*table)->nested_join && !((*table)->nested_join->used_tables & + ~eliminated_tables)))); + (*table)->print(thd, eliminated_tables, str, query_type); TABLE_LIST **end= table + tables->elements; for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++) { TABLE_LIST *curr= *tbl; + /* + The "eliminated_tables &&" check guards againist the case of + printing the query for CREATE VIEW. We do that without having run + JOIN::optimize() and so will have nested_join->used_tables==0. + */ + if (eliminated_tables && + ((curr->table && (curr->table->map & eliminated_tables)) || + (curr->nested_join && !(curr->nested_join->used_tables & + ~eliminated_tables)))) + { + continue; + } + if (curr->outer_join) { /* MySQL converts right to left joins */ @@ -16629,7 +16736,7 @@ static void print_join(THD *thd, str->append(STRING_WITH_LEN(" straight_join ")); else str->append(STRING_WITH_LEN(" join ")); - curr->print(thd, str, query_type); + curr->print(thd, eliminated_tables, str, query_type); if (curr->on_expr) { str->append(STRING_WITH_LEN(" on(")); @@ -16683,12 +16790,13 @@ Index_hint::print(THD *thd, String *str) @param str string where table should be printed */ -void TABLE_LIST::print(THD *thd, String *str, enum_query_type query_type) +void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str, + enum_query_type query_type) { if (nested_join) { str->append('('); - print_join(thd, str, &nested_join->join_list, query_type); + print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type); str->append(')'); } else @@ -16830,7 +16938,7 @@ void st_select_lex::print(THD *thd, Stri { str->append(STRING_WITH_LEN(" from ")); /* go through join tree */ - print_join(thd, str, &top_join_list, query_type); + print_join(thd, join? join->eliminated_tables: 0, str, &top_join_list, query_type); } else if (where) { === modified file 'sql/sql_select.h' --- a/sql/sql_select.h 2009-04-25 10:05:32 +0000 +++ b/sql/sql_select.h 2009-06-30 15:09:36 +0000 @@ -28,6 +28,45 @@ #include "procedure.h" #include <myisam.h> +#define FT_KEYPART (MAX_REF_PARTS+10) +/* Values in optimize */ +#define KEY_OPTIMIZE_EXISTS 1 +#define KEY_OPTIMIZE_REF_OR_NULL 2 + +/* KEYUSE element types */ +enum keyuse_type +{ + /* + val refers to the same table, this is either KEYUSE_BIND or KEYUSE_NO_BIND + type, we didn't determine which one yet. + */ + KEYUSE_UNKNOWN= 0, + /* + 'regular' keyuse, i.e. it represents one of the following + * t.keyXpartY = func(constants, other-tables) + * t.keyXpartY IS NULL + * t.keyXpartY = func(constants, other-tables) OR t.keyXpartY IS NULL + and can be used to construct ref acces + */ + KEYUSE_USABLE, + /* + The keyuse represents a condition in form: + + t.uniq_keyXpartY = func(other parts of uniq_keyX) + + This can't be used to construct uniq_keyX but we could use it to determine + that the table will produce at most one match. + */ + KEYUSE_BIND, + /* + Keyuse that's not usable for ref access and doesn't meet the criteria of + KEYUSE_BIND. Examples: + t.keyXpartY = func(t.keyXpartY) + t.keyXpartY = func(column of t that's not covered by keyX) + */ + KEYUSE_NO_BIND +}; + typedef struct keyuse_t { TABLE *table; Item *val; /**< or value if no field */ @@ -51,6 +90,15 @@ typedef struct keyuse_t { NULL - Otherwise (the source equality can't be turned off) */ bool *cond_guard; + /* + 1 <=> This keyuse can be used to construct key access. + 0 <=> Otherwise. Currently unusable KEYUSEs represent equalities + where one table column refers to another one, like this: + t.keyXpartA=func(t.keyXpartB) + This equality cannot be used for index access but is useful + for table elimination. + */ + enum keyuse_type type; } KEYUSE; class store_key; @@ -210,7 +258,7 @@ typedef struct st_join_table { JOIN *join; /** Bitmap of nested joins this table is part of */ nested_join_map embedding_map; - + void cleanup(); inline bool is_using_loose_index_scan() { @@ -285,7 +333,15 @@ public: fetching data from a cursor */ bool resume_nested_loop; - table_map const_table_map,found_const_table_map; + table_map const_table_map; + /* + Constant tables for which we have found a row (as opposed to those for + which we didn't). + */ + table_map found_const_table_map; + + /* Tables removed by table elimination. Set to 0 before the elimination. */ + table_map eliminated_tables; /* Bitmap of all inner tables from outer joins */ @@ -425,6 +481,7 @@ public: table= 0; tables= 0; const_tables= 0; + eliminated_tables= 0; join_list= 0; sort_and_group= 0; first_record= 0; @@ -530,6 +587,10 @@ public: return (unit == &thd->lex->unit && (unit->fake_select_lex == 0 || select_lex == unit->fake_select_lex)); } + inline table_map all_tables_map() + { + return (table_map(1) << tables) - 1; + } private: bool make_simple_join(JOIN *join, TABLE *tmp_table); }; @@ -730,9 +791,12 @@ bool error_if_full_join(JOIN *join); int report_error(TABLE *table, int error); int safe_index_read(JOIN_TAB *tab); COND *remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value); +void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key); inline bool optimizer_flag(THD *thd, uint flag) { return (thd->variables.optimizer_switch & flag); } +void eliminate_tables(JOIN *join); + === modified file 'sql/table.h' --- a/sql/table.h 2009-02-19 09:01:25 +0000 +++ b/sql/table.h 2009-06-30 15:09:36 +0000 @@ -1366,7 +1366,8 @@ struct TABLE_LIST return (derived || view || schema_table || (create && !table->db_stat) || !table); } - void print(THD *thd, String *str, enum_query_type query_type); + void print(THD *thd, table_map eliminated_tables, String *str, + enum_query_type query_type); bool check_single_table(TABLE_LIST **table, table_map map, TABLE_LIST *view); bool set_insert_values(MEM_ROOT *mem_root); @@ -1615,7 +1616,11 @@ public: typedef struct st_nested_join { List<TABLE_LIST> join_list; /* list of elements in the nested join */ - table_map used_tables; /* bitmap of tables in the nested join */ + /* + Bitmap of tables within this nested join (including those embedded within + its children), including tables removed by table elimination. + */ + table_map used_tables; table_map not_null_tables; /* tables that rejects nulls */ struct st_join_table *first_nested;/* the first nested table in the plan */ /* @@ -1626,6 +1631,11 @@ typedef struct st_nested_join Before each use the counters are zeroed by reset_nj_counters. */ uint counter; + /* + Number of elements in join_list that were not (or contain table(s) that + weren't) removed by table elimination. + */ + uint n_tables; nested_join_map nj_map; /* Bit used to identify this nested join*/ } NESTED_JOIN;