#At file:///home/tsk/mprog/src/5.3-subqueries/ based on revid:psergey@askmonty.org-20100315063535-jsp4jgya6lfqt8e6 2779 timour@askmonty.org 2010-03-15 [merge] Merge in MWL#68: Subquery optimization: Efficient NOT IN execution with NULLs modified: mysql-test/include/mix1.inc mysql-test/r/index_merge_myisam.result mysql-test/r/innodb_mysql.result mysql-test/r/myisam_mrr.result mysql-test/r/ps.result mysql-test/r/subselect.result mysql-test/r/subselect3.result mysql-test/r/subselect3_jcl6.result mysql-test/r/subselect_no_mat.result mysql-test/r/subselect_no_opts.result mysql-test/r/subselect_no_semijoin.result mysql-test/r/subselect_sj.result mysql-test/r/subselect_sj_jcl6.result mysql-test/t/ps.test mysql-test/t/subselect.test mysql-test/t/subselect3.test sql/item_cmpfunc.h sql/item_subselect.cc sql/item_subselect.h sql/mysql_priv.h sql/mysqld.cc sql/opt_subselect.cc sql/set_var.cc sql/sql_class.cc sql/sql_class.h sql/sql_select.cc === modified file 'mysql-test/include/mix1.inc' --- a/mysql-test/include/mix1.inc 2009-09-15 06:08:54 +0000 +++ b/mysql-test/include/mix1.inc 2010-03-11 21:43:31 +0000 @@ -1177,8 +1177,11 @@ DROP TABLE t1; create table t1 (a bit(1) not null,b int) engine=myisam; create table t2 (c int) engine=innodb; +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch='partial_match_rowid_merge=off,partial_match_table_scan=off'; explain select b from t1 where a not in (select b from t1,t2 group by a) group by a; +set optimizer_switch=@save_optimizer_switch; DROP TABLE t1,t2; --echo End of 5.0 tests === modified file 'mysql-test/r/index_merge_myisam.result' --- a/mysql-test/r/index_merge_myisam.result 2010-01-17 14:51:10 +0000 +++ b/mysql-test/r/index_merge_myisam.result 2010-03-11 21:43:31 +0000 @@ -1419,19 +1419,19 @@ drop table t1; # select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='index_merge=off,index_merge_union=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='index_merge_union=on'; select @@optimizer_switch; @@optimizer_switch -index_merge=off,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +index_merge=off,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,index_merge_sort_union=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch=4; ERROR 42000: Variable 'optimizer_switch' can't be set to the value of '4' set optimizer_switch=NULL; @@ -1458,21 +1458,21 @@ set optimizer_switch=default; set optimizer_switch='index_merge=off,index_merge_union=off,default'; select @@optimizer_switch; @@optimizer_switch -index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch=default; select @@global.optimizer_switch; @@global.optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set @@global.optimizer_switch=default; select @@global.optimizer_switch; @@global.optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on # # Check index_merge's @@optimizer_switch flags # select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on create table t0 (a int); insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); create table t1 (a int, b int, c int, filler char(100), @@ -1582,5 +1582,5 @@ id select_type table type possible_keys set optimizer_switch=default; show variables like 'optimizer_switch'; Variable_name Value -optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on drop table t0, t1; === modified file 'mysql-test/r/innodb_mysql.result' --- a/mysql-test/r/innodb_mysql.result 2009-12-15 07:16:46 +0000 +++ b/mysql-test/r/innodb_mysql.result 2010-03-11 21:43:31 +0000 @@ -1425,12 +1425,15 @@ DROP TABLE t1; # create table t1 (a bit(1) not null,b int) engine=myisam; create table t2 (c int) engine=innodb; +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch='partial_match_rowid_merge=off,partial_match_table_scan=off'; explain select b from t1 where a not in (select b from t1,t2 group by a) group by a; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables 2 DEPENDENT SUBQUERY t1 system NULL NULL NULL NULL 0 const row not found 2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 1 +set optimizer_switch=@save_optimizer_switch; DROP TABLE t1,t2; End of 5.0 tests CREATE TABLE `t2` ( === modified file 'mysql-test/r/myisam_mrr.result' --- a/mysql-test/r/myisam_mrr.result 2010-01-17 14:51:10 +0000 +++ b/mysql-test/r/myisam_mrr.result 2010-03-11 21:43:31 +0000 @@ -394,7 +394,7 @@ drop table t0, t1; # - engine_condition_pushdown does not affect ICP select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on create table t0 (a int); insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); create table t1 (a int, b int, key(a)); === modified file 'mysql-test/r/ps.result' --- a/mysql-test/r/ps.result 2009-05-27 15:19:44 +0000 +++ b/mysql-test/r/ps.result 2010-03-11 21:43:31 +0000 @@ -149,6 +149,8 @@ c29 longblob, c30 longtext, c31 enum('on c32 set('monday', 'tuesday', 'wednesday') ) engine = MYISAM ; create table t2 like t1; +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; set @stmt= ' explain SELECT (SELECT SUM(c1 + c12 + 0.0) FROM t2 where (t1.c2 - 0e-3) = t2.c2 GROUP BY t1.c15 LIMIT 1) as scalar_s, exists (select 1.0e+0 from t2 where t2.c3 * 9.0000000000 = t1.c4) as exists_s, c5 * 4 in (select c6 + 0.3e+1 from t2) as in_s, (c7 - 4, c8 - 4) in (select c9 + 4.0, c10 + 40e-1 from t2) as in_row_s FROM t1, (select c25 x, c32 y from t2) tt WHERE x * 1 = c25 ' ; prepare stmt1 from @stmt ; execute stmt1 ; @@ -177,6 +179,7 @@ id select_type table type possible_keys 2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables deallocate prepare stmt1; drop tables t1,t2; +set @@optimizer_switch=@save_optimizer_switch; set @arg00=1; prepare stmt1 from ' create table t1 (m int) as select 1 as m ' ; execute stmt1 ; === modified file 'mysql-test/r/subselect.result' --- a/mysql-test/r/subselect.result 2010-02-17 21:59:41 +0000 +++ b/mysql-test/r/subselect.result 2010-03-11 21:43:31 +0000 @@ -1,4 +1,6 @@ drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t11,t12; +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; select (select 2); (select 2) 2 @@ -4803,4 +4805,5 @@ SELECT 1 FROM t1 GROUP BY 1 1 DROP TABLE t1; +set @@optimizer_switch=@save_optimizer_switch; End of 5.1 tests. === modified file 'mysql-test/r/subselect3.result' --- a/mysql-test/r/subselect3.result 2010-02-17 10:05:27 +0000 +++ b/mysql-test/r/subselect3.result 2010-03-11 21:43:31 +0000 @@ -63,12 +63,15 @@ Handler_read_rnd_next 11 select ' ^ This must show 11' Z; Z ^ This must show 11 +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; explain extended select a in (select max(ie) from t1 where oref=4 group by grp) from t3; id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t3 ALL NULL NULL NULL NULL 2 100.00 2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 6 100.00 Using where; Using temporary; Using filesort Warnings: Note 1003 select <in_optimizer>(`test`.`t3`.`a`,<exists>(select max(`test`.`t1`.`ie`) AS `max(ie)` from `test`.`t1` where (`test`.`t1`.`oref` = 4) group by `test`.`t1`.`grp` having trigcond((<cache>(`test`.`t3`.`a`) = <ref_null_helper>(max(`test`.`t1`.`ie`)))))) AS `a in (select max(ie) from t1 where oref=4 group by grp)` from `test`.`t3` +set @@optimizer_switch=@save_optimizer_switch; drop table t1, t2, t3; create table t1 (a int, oref int, key(a)); insert into t1 values @@ -692,6 +695,8 @@ a MAX(b) test 2 3 h 3 4 i DROP TABLE t1, t2; +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; CREATE TABLE t1 (a int); CREATE TABLE t2 (b int, PRIMARY KEY(b)); INSERT INTO t1 VALUES (1), (NULL), (4); @@ -759,6 +764,7 @@ id select_type table type possible_keys 1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where 2 DEPENDENT SUBQUERY t2 unique_subquery PRIMARY PRIMARY 4 func 1 Using index; Using where DROP TABLE t1, t2; +set @@optimizer_switch=@save_optimizer_switch; CREATE TABLE t1 (a INT); INSERT INTO t1 VALUES(1); CREATE TABLE t2 (placeholder CHAR(11)); @@ -960,7 +966,7 @@ i1 i2 # Baseline: SHOW STATUS LIKE '%Handler_read_rnd_next'; Variable_name Value -Handler_read_rnd_next 17 +Handler_read_rnd_next 18 INSERT INTO t1 VALUES (NULL, NULL); FLUSH STATUS; @@ -977,7 +983,7 @@ i1 i2 # (read record from t1, but do not read from t2) SHOW STATUS LIKE '%Handler_read_rnd_next'; Variable_name Value -Handler_read_rnd_next 18 +Handler_read_rnd_next 19 DROP TABLE t1,t2; End of 5.1 tests CREATE TABLE t1 ( === modified file 'mysql-test/r/subselect3_jcl6.result' --- a/mysql-test/r/subselect3_jcl6.result 2010-02-17 10:47:55 +0000 +++ b/mysql-test/r/subselect3_jcl6.result 2010-03-11 21:43:31 +0000 @@ -67,12 +67,15 @@ Handler_read_rnd_next 11 select ' ^ This must show 11' Z; Z ^ This must show 11 +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; explain extended select a in (select max(ie) from t1 where oref=4 group by grp) from t3; id select_type table type possible_keys key key_len ref rows filtered Extra 1 PRIMARY t3 ALL NULL NULL NULL NULL 2 100.00 2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 6 100.00 Using where; Using temporary; Using filesort Warnings: Note 1003 select <in_optimizer>(`test`.`t3`.`a`,<exists>(select max(`test`.`t1`.`ie`) AS `max(ie)` from `test`.`t1` where (`test`.`t1`.`oref` = 4) group by `test`.`t1`.`grp` having trigcond((<cache>(`test`.`t3`.`a`) = <ref_null_helper>(max(`test`.`t1`.`ie`)))))) AS `a in (select max(ie) from t1 where oref=4 group by grp)` from `test`.`t3` +set @@optimizer_switch=@save_optimizer_switch; drop table t1, t2, t3; create table t1 (a int, oref int, key(a)); insert into t1 values @@ -696,6 +699,8 @@ a MAX(b) test 2 3 h 3 4 i DROP TABLE t1, t2; +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; CREATE TABLE t1 (a int); CREATE TABLE t2 (b int, PRIMARY KEY(b)); INSERT INTO t1 VALUES (1), (NULL), (4); @@ -763,6 +768,7 @@ id select_type table type possible_keys 1 PRIMARY t1 ALL NULL NULL NULL NULL 4 Using where 2 DEPENDENT SUBQUERY t2 unique_subquery PRIMARY PRIMARY 4 func 1 Using index; Using where DROP TABLE t1, t2; +set @@optimizer_switch=@save_optimizer_switch; CREATE TABLE t1 (a INT); INSERT INTO t1 VALUES(1); CREATE TABLE t2 (placeholder CHAR(11)); @@ -964,7 +970,7 @@ i1 i2 # Baseline: SHOW STATUS LIKE '%Handler_read_rnd_next'; Variable_name Value -Handler_read_rnd_next 17 +Handler_read_rnd_next 18 INSERT INTO t1 VALUES (NULL, NULL); FLUSH STATUS; @@ -981,7 +987,7 @@ i1 i2 # (read record from t1, but do not read from t2) SHOW STATUS LIKE '%Handler_read_rnd_next'; Variable_name Value -Handler_read_rnd_next 18 +Handler_read_rnd_next 19 DROP TABLE t1,t2; End of 5.1 tests CREATE TABLE t1 ( === modified file 'mysql-test/r/subselect_no_mat.result' --- a/mysql-test/r/subselect_no_mat.result 2010-02-21 07:33:54 +0000 +++ b/mysql-test/r/subselect_no_mat.result 2010-03-11 21:43:31 +0000 @@ -1,8 +1,10 @@ show variables like 'optimizer_switch'; Variable_name Value -optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='materialization=off'; drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t11,t12; +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; select (select 2); (select 2) 2 @@ -4807,8 +4809,9 @@ SELECT 1 FROM t1 GROUP BY 1 1 DROP TABLE t1; +set @@optimizer_switch=@save_optimizer_switch; End of 5.1 tests. set optimizer_switch=default; show variables like 'optimizer_switch'; Variable_name Value -optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on === modified file 'mysql-test/r/subselect_no_opts.result' --- a/mysql-test/r/subselect_no_opts.result 2010-02-21 07:33:54 +0000 +++ b/mysql-test/r/subselect_no_opts.result 2010-03-11 21:43:31 +0000 @@ -1,8 +1,10 @@ show variables like 'optimizer_switch'; Variable_name Value -optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='materialization=off,semijoin=off'; drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t11,t12; +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; select (select 2); (select 2) 2 @@ -4807,8 +4809,9 @@ SELECT 1 FROM t1 GROUP BY 1 1 DROP TABLE t1; +set @@optimizer_switch=@save_optimizer_switch; End of 5.1 tests. set optimizer_switch=default; show variables like 'optimizer_switch'; Variable_name Value -optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on === modified file 'mysql-test/r/subselect_no_semijoin.result' --- a/mysql-test/r/subselect_no_semijoin.result 2010-02-21 07:33:54 +0000 +++ b/mysql-test/r/subselect_no_semijoin.result 2010-03-11 21:43:31 +0000 @@ -1,8 +1,10 @@ show variables like 'optimizer_switch'; Variable_name Value -optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='semijoin=off'; drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t11,t12; +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; select (select 2); (select 2) 2 @@ -4807,8 +4809,9 @@ SELECT 1 FROM t1 GROUP BY 1 1 DROP TABLE t1; +set @@optimizer_switch=@save_optimizer_switch; End of 5.1 tests. set optimizer_switch=default; show variables like 'optimizer_switch'; Variable_name Value -optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on === modified file 'mysql-test/r/subselect_sj.result' --- a/mysql-test/r/subselect_sj.result 2010-03-15 06:32:54 +0000 +++ b/mysql-test/r/subselect_sj.result 2010-03-15 19:52:58 +0000 @@ -202,39 +202,39 @@ BUG#37120 optimizer_switch allowable val select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,materialization=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,semijoin=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=off +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,loosescan=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,semijoin=off,materialization=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,materialization=off,semijoin=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,semijoin=off,materialization=off,loosescan=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=off +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,semijoin=off,loosescan=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=off +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,materialization=off,loosescan=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch=default; drop table t0, t1, t2; drop table t10, t11, t12; === modified file 'mysql-test/r/subselect_sj_jcl6.result' --- a/mysql-test/r/subselect_sj_jcl6.result 2010-03-15 06:32:54 +0000 +++ b/mysql-test/r/subselect_sj_jcl6.result 2010-03-15 19:52:58 +0000 @@ -206,39 +206,39 @@ BUG#37120 optimizer_switch allowable val select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,materialization=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,semijoin=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=off +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,loosescan=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,semijoin=off,materialization=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,materialization=off,semijoin=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=on,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,semijoin=off,materialization=off,loosescan=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=off +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,semijoin=off,loosescan=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=off +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=on,semijoin=off,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch='default,materialization=off,loosescan=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on,firstmatch=on,loosescan=off,materialization=off,semijoin=on,partial_match_rowid_merge=on,partial_match_table_scan=on set optimizer_switch=default; drop table t0, t1, t2; drop table t10, t11, t12; === modified file 'mysql-test/t/ps.test' --- a/mysql-test/t/ps.test 2009-05-27 15:19:44 +0000 +++ b/mysql-test/t/ps.test 2010-03-11 21:43:31 +0000 @@ -163,6 +163,9 @@ create table t1 ) engine = MYISAM ; create table t2 like t1; +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; + set @stmt= ' explain SELECT (SELECT SUM(c1 + c12 + 0.0) FROM t2 where (t1.c2 - 0e-3) = t2.c2 GROUP BY t1.c15 LIMIT 1) as scalar_s, exists (select 1.0e+0 from t2 where t2.c3 * 9.0000000000 = t1.c4) as exists_s, c5 * 4 in (select c6 + 0.3e+1 from t2) as in_s, (c7 - 4, c8 - 4) in (select c9 + 4.0, c10 + 40e-1 from t2) as in_row_s FROM t1, (select c25 x, c32 y from t2) tt WHERE x * 1 = c25 ' ; prepare stmt1 from @stmt ; execute stmt1 ; @@ -171,6 +174,8 @@ explain SELECT (SELECT SUM(c1 + c12 + 0. deallocate prepare stmt1; drop tables t1,t2; +set @@optimizer_switch=@save_optimizer_switch; + # # parameters from variables (for field creation) # === modified file 'mysql-test/t/subselect.test' --- a/mysql-test/t/subselect.test 2010-01-17 20:52:20 +0000 +++ b/mysql-test/t/subselect.test 2010-03-11 21:43:31 +0000 @@ -11,6 +11,9 @@ drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t11,t12; --enable_warnings +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; + select (select 2); explain extended select (select 2); SELECT (SELECT 1) UNION SELECT (SELECT 2); @@ -4061,4 +4064,6 @@ SELECT 1 FROM t1 GROUP BY (SELECT LAST_INSERT_ID() FROM t1 ORDER BY MIN(a) ASC LIMIT 1); DROP TABLE t1; +set @@optimizer_switch=@save_optimizer_switch; + --echo End of 5.1 tests. === modified file 'mysql-test/t/subselect3.test' --- a/mysql-test/t/subselect3.test 2010-01-17 14:51:10 +0000 +++ b/mysql-test/t/subselect3.test 2010-03-11 21:43:31 +0000 @@ -59,9 +59,13 @@ select a in (select max(ie) from t1 wher show status like 'Handler_read_rnd_next'; select ' ^ This must show 11' Z; +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; + # This must show trigcond: explain extended select a in (select max(ie) from t1 where oref=4 group by grp) from t3; +set @@optimizer_switch=@save_optimizer_switch; drop table t1, t2, t3; # @@ -529,6 +533,9 @@ SELECT a, MAX(b), DROP TABLE t1, t2; +# The next three test cases must be executed with the IN=>EXISTS strategy +set @save_optimizer_switch=@@optimizer_switch; +set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=off"; # # Bug #27870: crash of an equijoin query with WHERE condition containing @@ -588,6 +595,8 @@ EXPLAIN SELECT a FROM t1 WHERE a NOT IN DROP TABLE t1, t2; +set @@optimizer_switch=@save_optimizer_switch; + # # Bug #34763: item_subselect.cc:1235:Item_in_subselect::row_value_transformer: # Assertion failed, unexpected error message: === modified file 'sql/item_cmpfunc.h' --- a/sql/item_cmpfunc.h 2010-03-13 20:04:52 +0000 +++ b/sql/item_cmpfunc.h 2010-03-15 19:52:58 +0000 @@ -350,6 +350,7 @@ public: CHARSET_INFO *compare_collation() { return cmp.cmp_collation.collation; } uint decimal_precision() const { return 1; } void top_level_item() { abort_on_null= TRUE; } + Arg_comparator *get_comparator() { return &cmp; } friend class Arg_comparator; }; === modified file 'sql/item_subselect.cc' --- a/sql/item_subselect.cc 2010-02-21 06:32:23 +0000 +++ b/sql/item_subselect.cc 2010-03-09 10:14:06 +0000 @@ -138,6 +138,7 @@ void Item_in_subselect::cleanup() left_expr_cache= NULL; } first_execution= TRUE; + is_constant= FALSE; Item_subselect::cleanup(); DBUG_VOID_RETURN; } @@ -449,8 +450,10 @@ bool Item_subselect::exec() int res; if (thd->is_error()) - /* Do not execute subselect in case of a fatal error */ + { + /* Do not execute subselect in case of a fatal error */ return 1; + } /* Simulate a failure in sub-query execution. Used to test e.g. out of memory or query being killed conditions. @@ -475,9 +478,6 @@ bool Item_subselect::exec() bool Item_in_subselect::exec() { DBUG_ENTER("Item_in_subselect::exec"); - DBUG_ASSERT(exec_method != MATERIALIZATION || - (exec_method == MATERIALIZATION && - engine->engine_type() == subselect_engine::HASH_SJ_ENGINE)); /* Initialize the cache of the left predicate operand. This has to be done as late as now, because Cached_item directly contains a resolved field (not @@ -493,14 +493,14 @@ bool Item_in_subselect::exec() if (!left_expr_cache && exec_method == MATERIALIZATION) init_left_expr_cache(); - /* If the new left operand is already in the cache, reuse the old result. */ - if (left_expr_cache && test_if_item_cache_changed(*left_expr_cache) < 0) - { - /* Always compute IN for the first row as the cache is not valid for it. */ - if (!first_execution) - DBUG_RETURN(FALSE); - first_execution= FALSE; - } + /* + If the new left operand is already in the cache, reuse the old result. + Use the cached result only if this is not the first execution of IN + because the cache is not valid for the first execution. + */ + if (!first_execution && left_expr_cache && + test_if_item_cache_changed(*left_expr_cache) < 0) + DBUG_RETURN(FALSE); /* The exec() method below updates item::value, and item::null_value, thus if @@ -910,8 +910,8 @@ bool Item_in_subselect::test_limit(st_se Item_in_subselect::Item_in_subselect(Item * left_exp, st_select_lex *select_lex): Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE), - optimizer(0), pushed_cond_guards(NULL), exec_method(NOT_TRANSFORMED), - upper_item(0) + is_constant(FALSE), optimizer(0), pushed_cond_guards(NULL), + exec_method(NOT_TRANSFORMED), upper_item(0) { DBUG_ENTER("Item_in_subselect::Item_in_subselect"); left_expr= left_exp; @@ -1105,6 +1105,8 @@ bool Item_in_subselect::val_bool() { DBUG_ASSERT(fixed == 1); null_value= 0; + if (is_constant) + return value; if (exec()) { reset(); @@ -1571,9 +1573,9 @@ Item_in_subselect::row_value_transformer DBUG_ENTER("Item_in_subselect::row_value_transformer"); // psergey: duplicated_subselect_card_check - if (select_lex->item_list.elements != left_expr->cols()) + if (select_lex->item_list.elements != cols_num) { - my_error(ER_OPERAND_COLUMNS, MYF(0), left_expr->cols()); + my_error(ER_OPERAND_COLUMNS, MYF(0), cols_num); DBUG_RETURN(RES_ERROR); } @@ -1980,17 +1982,69 @@ void Item_in_subselect::print(String *st bool Item_in_subselect::fix_fields(THD *thd_arg, Item **ref) { - bool result = 0; + uint outer_cols_num; + List<Item> *inner_cols; if (exec_method == SEMI_JOIN) return !( (*ref)= new Item_int(1)); - if (thd_arg->lex->view_prepare_mode && left_expr && !left_expr->fixed) - result = left_expr->fix_fields(thd_arg, &left_expr); + /* + Check if the outer and inner IN operands match in those cases when we + will not perform IN=>EXISTS transformation. Currently this is when we + use subquery materialization. + + The condition below is true when this method was called recursively from + inside JOIN::prepare for the JOIN object created by the call chain + Item_subselect::fix_fields -> subselect_single_select_engine::prepare, + which creates a JOIN object for the subquery and calls JOIN::prepare for + the JOIN of the subquery. + Notice that in some cases, this doesn't happen, and the check_cols() + test for each Item happens later in + Item_in_subselect::row_value_in_to_exists_transformer. + The reason for this mess is that our JOIN::prepare phase works top-down + instead of bottom-up, so we first do name resoluton and semantic checks + for the outer selects, then for the inner. + */ + if (engine && + engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE && + ((subselect_single_select_engine*)engine)->join) + { + outer_cols_num= left_expr->cols(); + + if (unit->is_union()) + inner_cols= &(unit->types); + else + inner_cols= &(unit->first_select()->item_list); + if (outer_cols_num != inner_cols->elements) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), outer_cols_num); + return TRUE; + } + if (outer_cols_num > 1) + { + List_iterator<Item> inner_col_it(*inner_cols); + Item *inner_col; + for (uint i= 0; i < outer_cols_num; i++) + { + inner_col= inner_col_it++; + if (inner_col->check_cols(left_expr->element_index(i)->cols())) + return TRUE; + } + } + } + + if (thd_arg->lex->view_prepare_mode && left_expr && !left_expr->fixed && + left_expr->fix_fields(thd_arg, &left_expr)) + return TRUE; + if (Item_subselect::fix_fields(thd_arg, ref)) + return TRUE; - return result || Item_subselect::fix_fields(thd_arg, ref); + fixed= TRUE; + + return FALSE; } + void Item_in_subselect::fix_after_pullout(st_select_lex *new_parent, Item **ref) { left_expr->fix_after_pullout(new_parent, &left_expr); @@ -2267,10 +2321,9 @@ bool subselect_union_engine::no_rows() void subselect_uniquesubquery_engine::cleanup() { DBUG_ENTER("subselect_uniquesubquery_engine::cleanup"); - /* - subselect_uniquesubquery_engine have not 'result' assigbed, so we do not - cleanup() it - */ + /* Tell handler we don't need the index anymore */ + if (tab->table->file->inited) + tab->table->file->ha_index_end(); DBUG_VOID_RETURN; } @@ -2291,7 +2344,7 @@ subselect_union_engine::subselect_union_ Create and prepare the JOIN object that represents the query execution plan for the subquery. - @detail + @details This method is called from Item_subselect::fix_fields. For prepared statements it is called both during the PREPARE and EXECUTE phases in the following ways: @@ -2593,14 +2646,23 @@ int subselect_uniquesubquery_engine::sca for (;;) { error=table->file->ha_rnd_next(table->record[0]); - if (error && error != HA_ERR_END_OF_FILE) - { - error= report_error(table, error); - break; + if (error) { + if (error == HA_ERR_RECORD_DELETED) + { + error= 0; + continue; + } + if (error == HA_ERR_END_OF_FILE) + { + error= 0; + break; + } + else + { + error= report_error(table, error); + break; + } } - /* No more rows */ - if (table->status) - break; if (!cond || cond->val_int()) { @@ -2711,6 +2773,56 @@ bool subselect_uniquesubquery_engine::co /* + @retval 1 A NULL was found in the outer reference, index lookup is + not applicable, the outer ref is unsusable as a lookup key, + use some other method to find a match. + @retval 0 The outer ref was copied into an index lookup key. + @retval -1 The outer ref cannot possibly match any row, IN is FALSE. +*/ +/* TIMOUR: this method is a variant of copy_ref_key(), needs refactoring. */ + +int subselect_uniquesubquery_engine::copy_ref_key_simple() +{ + for (store_key **copy= tab->ref.key_copy ; *copy ; copy++) + { + enum store_key::store_key_result store_res; + store_res= (*copy)->copy(); + tab->ref.key_err= store_res; + + /* + When there is a NULL part in the key we don't need to make index + lookup for such key thus we don't need to copy whole key. + If we later should do a sequential scan return OK. Fail otherwise. + + See also the comment for the subselect_uniquesubquery_engine::exec() + function. + */ + null_keypart= (*copy)->null_key; + if (null_keypart) + return 1; + + /* + Check if the error is equal to STORE_KEY_FATAL. This is not expressed + using the store_key::store_key_result enum because ref.key_err is a + boolean and we want to detect both TRUE and STORE_KEY_FATAL from the + space of the union of the values of [TRUE, FALSE] and + store_key::store_key_result. + TODO: fix the variable an return types. + */ + if (store_res == store_key::STORE_KEY_FATAL) + { + /* + Error converting the left IN operand to the column type of the right + IN operand. + */ + return -1; + } + } + return 0; +} + + +/* Execute subselect SYNOPSIS @@ -2750,7 +2862,13 @@ int subselect_uniquesubquery_engine::exe /* TODO: change to use of 'full_scan' here? */ if (copy_ref_key()) + { + /* + TIMOUR: copy_ref_key() == 1 means NULL result, not error, why return 1? + Check who reiles on this result. + */ DBUG_RETURN(1); + } if (table->status) { /* @@ -2791,6 +2909,46 @@ int subselect_uniquesubquery_engine::exe } +/* + TIMOUR: write comment +*/ + +int subselect_uniquesubquery_engine::index_lookup() +{ + DBUG_ENTER("subselect_uniquesubquery_engine::index_lookup"); + int error; + TABLE *table= tab->table; + + if (!table->file->inited) + table->file->ha_index_init(tab->ref.key, 0); + error= table->file->ha_index_read_map(table->record[0], + tab->ref.key_buff, + make_prev_keypart_map(tab-> + ref.key_parts), + HA_READ_KEY_EXACT); + DBUG_PRINT("info", ("lookup result: %i", error)); + + if (error && error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE) + { + /* + TIMOUR: I don't understand at all when do we need to call report_error. + In most places where we access an index, we don't do this. Why here? + */ + error= report_error(table, error); + DBUG_RETURN(error); + } + + table->null_row= 0; + if (!error && (!cond || cond->val_int())) + ((Item_in_subselect *) item)->value= 1; + else + ((Item_in_subselect *) item)->value= 0; + + DBUG_RETURN(0); +} + + + subselect_uniquesubquery_engine::~subselect_uniquesubquery_engine() { /* Tell handler we don't need the index anymore */ @@ -3225,6 +3383,7 @@ bool subselect_union_engine::no_tables() bool subselect_uniquesubquery_engine::no_tables() { /* returning value is correct, but this method should never be called */ + DBUG_ASSERT(FALSE); return 0; } @@ -3235,16 +3394,259 @@ bool subselect_uniquesubquery_engine::no /** + Check if an IN predicate should be executed via partial matching using + only schema information. + + @details + This test essentially has three results: + - partial matching is applicable, but cannot be executed due to a + limitation in the total number of indexes, as a result we can't + use subquery materialization at all. + - partial matching is either applicable or not, and this can be + determined by looking at 'this->max_keys'. + If max_keys > 1, then we need partial matching because there are + more indexes than just the one we use during materialization to + remove duplicates. + + @note + TIMOUR: The schema-based analysis for partial matching can be done once for + prepared statement and remembered. It is done here to remove the need to + save/restore all related variables between each re-execution, thus making + the code simpler. + + @retval PARTIAL_MATCH if a partial match should be used + @retval COMPLETE_MATCH if a complete match (index lookup) should be used +*/ + +subselect_hash_sj_engine::exec_strategy +subselect_hash_sj_engine::get_strategy_using_schema() +{ + Item_in_subselect *item_in= (Item_in_subselect *) item; + + if (item_in->is_top_level_item()) + return COMPLETE_MATCH; + else + { + List_iterator<Item> inner_col_it(*item_in->unit->get_unit_column_types()); + Item *outer_col, *inner_col; + + for (uint i= 0; i < item_in->left_expr->cols(); i++) + { + outer_col= item_in->left_expr->element_index(i); + inner_col= inner_col_it++; + + if (!inner_col->maybe_null && !outer_col->maybe_null) + bitmap_set_bit(&non_null_key_parts, i); + else + { + bitmap_set_bit(&partial_match_key_parts, i); + ++count_partial_match_columns; + } + } + } + + /* If no column contains NULLs use regular hash index lookups. */ + if (count_partial_match_columns) + return PARTIAL_MATCH; + return COMPLETE_MATCH; +} + + +/** + Test whether an IN predicate must be computed via partial matching + based on the NULL statistics for each column of a materialized subquery. + + @details The procedure analyzes column NULL statistics, updates the + matching type of columns that cannot be NULL or that contain only NULLs. + Based on this, the procedure determines the final execution strategy for + the [NOT] IN predicate. + + @retval PARTIAL_MATCH if a partial match should be used + @retval COMPLETE_MATCH if a complete match (index lookup) should be used +*/ + +subselect_hash_sj_engine::exec_strategy +subselect_hash_sj_engine::get_strategy_using_data() +{ + Item_in_subselect *item_in= (Item_in_subselect *) item; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + Item *outer_col; + + /* + If we already determined that a complete match is enough based on schema + information, nothing can be better. + */ + if (strategy == COMPLETE_MATCH) + return COMPLETE_MATCH; + + for (uint i= 0; i < item_in->left_expr->cols(); i++) + { + if (!bitmap_is_set(&partial_match_key_parts, i)) + continue; + outer_col= item_in->left_expr->element_index(i); + /* + If column 'i' doesn't contain NULLs, and the corresponding outer reference + cannot have a NULL value, then 'i' is a non-nullable column. + */ + if (result_sink->get_null_count_of_col(i) == 0 && !outer_col->maybe_null) + { + bitmap_clear_bit(&partial_match_key_parts, i); + bitmap_set_bit(&non_null_key_parts, i); + --count_partial_match_columns; + } + if (result_sink->get_null_count_of_col(i) == + tmp_table->file->stats.records) + ++count_null_only_columns; + } + + /* If no column contains NULLs use regular hash index lookups. */ + if (!count_partial_match_columns) + return COMPLETE_MATCH; + return PARTIAL_MATCH; +} + + +void +subselect_hash_sj_engine::choose_partial_match_strategy( + bool has_non_null_key, bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts) +{ + size_t pm_buff_size; + + DBUG_ASSERT(strategy == PARTIAL_MATCH); + /* + Choose according to global optimizer switch. If only one of the switches is + 'ON', then the remaining strategy is the only possible one. The only cases + when this will be overriden is when the total size of all buffers for the + merge strategy is bigger than the 'rowid_merge_buff_size' system variable, + or if there isn't enough physical memory to allocate the buffers. + */ + if (!optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) && + optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) + strategy= PARTIAL_MATCH_SCAN; + else if + ( optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) && + !optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) + strategy= PARTIAL_MATCH_MERGE; + + /* + If both switches are ON, or both are OFF, we interpret that as "let the + optimizer decide". Perform a cost based choice between the two partial + matching strategies. + */ + /* + TIMOUR: the above interpretation of the switch values could be changed to: + - if both are ON - let the optimizer decide, + - if both are OFF - do not use partial matching, therefore do not use + materialization in non-top-level predicates. + The problem with this is that we know for sure if we need partial matching + only after the subquery is materialized, and this is too late to revert to + the IN=>EXISTS strategy. + */ + if (strategy == PARTIAL_MATCH) + { + /* + TIMOUR: Currently we use a super simplistic measure. This will be + addressed in a separate task. + */ + if (tmp_table->file->stats.records < 100) + strategy= PARTIAL_MATCH_SCAN; + else + strategy= PARTIAL_MATCH_MERGE; + } + + /* Check if there is enough memory for the rowid merge strategy. */ + if (strategy == PARTIAL_MATCH_MERGE) + { + pm_buff_size= rowid_merge_buff_size(has_non_null_key, + has_covering_null_row, + partial_match_key_parts); + if (pm_buff_size > thd->variables.rowid_merge_buff_size) + strategy= PARTIAL_MATCH_SCAN; + } +} + + +/* + Compute the memory size of all buffers proportional to the number of rows + in tmp_table. + + @details + If the result is bigger than thd->variables.rowid_merge_buff_size, partial + matching via merging is not applicable. +*/ + +size_t subselect_hash_sj_engine::rowid_merge_buff_size( + bool has_non_null_key, bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts) +{ + size_t buff_size; /* Total size of all buffers used by partial matching. */ + ha_rows row_count= tmp_table->file->stats.records; + uint rowid_length= tmp_table->file->ref_length; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + + /* Size of the subselect_rowid_merge_engine::row_num_to_rowid buffer. */ + buff_size= row_count * rowid_length * sizeof(uchar); + + if (has_non_null_key) + { + /* Add the size of Ordered_key::key_buff of the only non-NULL key. */ + buff_size+= row_count * sizeof(rownum_t); + } + + if (!has_covering_null_row) + { + for (uint i= 0; i < partial_match_key_parts->n_bits; i++) + { + if (!bitmap_is_set(partial_match_key_parts, i) || + result_sink->get_null_count_of_col(i) == row_count) + continue; /* In these cases we wouldn't construct Ordered keys. */ + + /* Add the size of Ordered_key::key_buff */ + buff_size+= (row_count - result_sink->get_null_count_of_col(i)) * + sizeof(rownum_t); + /* Add the size of Ordered_key::null_key */ + buff_size+= bitmap_buffer_size(result_sink->get_max_null_of_col(i)); + } + } + + return buff_size; +} + + +/* + Initialize a MY_BITMAP with a buffer allocated on the current + memory root. + TIMOUR: move to bitmap C file? +*/ + +static my_bool +bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root) +{ + my_bitmap_map *bitmap_buf; + + if (!(bitmap_buf= (my_bitmap_map*) alloc_root(mem_root, + bitmap_buffer_size(n_bits))) || + bitmap_init(map, bitmap_buf, n_bits, FALSE)) + return TRUE; + bitmap_clear_all(map); + return FALSE; +} + + +/** Create all structures needed for IN execution that can live between PS reexecution. - @detail + @param tmp_columns the items that produce the data for the temp table + + @details - Create a temporary table to store the result of the IN subquery. The temporary table has one hash index on all its columns. - Create a new result sink that sends the result stream of the subquery to the temporary table, - - Create and initialize a new JOIN_TAB, and TABLE_REF objects to perform - lookups into the indexed temporary table. @notice: Currently Item_subselect::init() already chooses and creates at parse @@ -3256,145 +3658,210 @@ bool subselect_uniquesubquery_engine::no bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns) { - /* The result sink where we will materialize the subquery result. */ - select_union *tmp_result_sink; - /* The table into which the subquery is materialized. */ - TABLE *tmp_table; - KEY *tmp_key; /* The only index on the temporary table. */ - uint tmp_key_parts; /* Number of keyparts in tmp_key. */ - Item_in_subselect *item_in= (Item_in_subselect *) item; + /* Options to create_tmp_table. */ + ulonglong tmp_create_options= thd->options | TMP_TABLE_ALL_COLUMNS; + /* | TMP_TABLE_FORCE_MYISAM; TIMOUR: force MYISAM */ DBUG_ENTER("subselect_hash_sj_engine::init_permanent"); - /* 1. Create/initialize materialization related objects. */ + if (bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements, + thd->mem_root) || + bitmap_init_memroot(&partial_match_key_parts, tmp_columns->elements, + thd->mem_root)) + DBUG_RETURN(TRUE); /* Create and initialize a select result interceptor that stores the result stream in a temporary table. The temporary table itself is managed (created/filled/etc) internally by the interceptor. */ - if (!(tmp_result_sink= new select_union)) +/* + TIMOUR: + Select a more efficient result sink when we know there is no need to collect + data statistics. + + if (strategy == COMPLETE_MATCH) + { + if (!(result= new select_union)) + DBUG_RETURN(TRUE); + } + else if (strategy == PARTIAL_MATCH) + { + if (!(result= new select_materialize_with_stats)) + DBUG_RETURN(TRUE); + } +*/ + if (!(result= new select_materialize_with_stats)) DBUG_RETURN(TRUE); - if (tmp_result_sink->create_result_table( - thd, tmp_columns, TRUE, - thd->options | TMP_TABLE_ALL_COLUMNS, + + if (((select_union*) result)->create_result_table( + thd, tmp_columns, TRUE, tmp_create_options, "materialized subselect", TRUE)) DBUG_RETURN(TRUE); - tmp_table= tmp_result_sink->table; - tmp_key= tmp_table->key_info; - tmp_key_parts= tmp_key->key_parts; + tmp_table= ((select_union*) result)->table; /* - If the subquery has blobs, or the total key lenght is bigger than some - length, then the created index cannot be used for lookups and we - can't use hash semi join. If this is the case, delete the temporary - table since it will not be used, and tell the caller we failed to - initialize the engine. + If the subquery has blobs, or the total key lenght is bigger than + some length, or the total number of key parts is more than the + allowed maximum (currently MAX_REF_PARTS == 16), then the created + index cannot be used for lookups and we can't use hash semi + join. If this is the case, delete the temporary table since it + will not be used, and tell the caller we failed to initialize the + engine. */ if (tmp_table->s->keys == 0) { -#ifndef DBUG_OFF - handlerton *tmp_table_hton= tmp_table->s->db_type(); -#ifdef USE_MARIA_FOR_TMP_TABLES - DBUG_ASSERT(tmp_table_hton == maria_hton); -#else - DBUG_ASSERT(tmp_table_hton == myisam_hton); -#endif -#endif DBUG_ASSERT( tmp_table->s->uniques || tmp_table->key_info->key_length >= tmp_table->file->max_key_length() || tmp_table->key_info->key_parts > tmp_table->file->max_key_parts()); free_tmp_table(thd, tmp_table); + tmp_table= NULL; delete result; result= NULL; DBUG_RETURN(TRUE); } - result= tmp_result_sink; /* Make sure there is only one index on the temp table, and it doesn't have the extra key part created when s->uniques > 0. */ - DBUG_ASSERT(tmp_table->s->keys == 1 && tmp_columns->elements == tmp_key_parts); + DBUG_ASSERT(tmp_table->s->keys == 1 && + ((Item_in_subselect *) item)->left_expr->cols() == + tmp_table->key_info->key_parts); + + if (make_semi_join_conds() || + /* A unique_engine is used both for complete and partial matching. */ + !(lookup_engine= make_unique_engine())) + DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); +} - /* 2. Create/initialize execution related objects. */ +/* + Create an artificial condition to post-filter those rows matched by index + lookups that cannot be distinguished by the index lookup procedure. - /* - Create and initialize the JOIN_TAB that represents an index lookup - plan operator into the materialized subquery result. Notice that: - - this JOIN_TAB has no corresponding JOIN (and doesn't need one), and - - here we initialize only those members that are used by - subselect_uniquesubquery_engine, so these objects are incomplete. - */ - if (!(tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)))) - DBUG_RETURN(TRUE); - tab->table= tmp_table; - tab->ref.key= 0; /* The only temp table index. */ - tab->ref.key_length= tmp_key->key_length; - if (!(tab->ref.key_buff= - (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) || - !(tab->ref.key_copy= - (store_key**) thd->alloc((sizeof(store_key*) * - (tmp_key_parts + 1)))) || - !(tab->ref.items= - (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts))) - DBUG_RETURN(TRUE); + @notes + The need for post-filtering may occur e.g. because of + truncation. Prepared statements execution requires that fix_fields is + called for every execution. In order to call fix_fields we need to + create a Name_resolution_context and a corresponding TABLE_LIST for + the temporary table for the subquery, so that all column references + to the materialized subquery table can be resolved correctly. - KEY_PART_INFO *cur_key_part= tmp_key->key_part; - store_key **ref_key= tab->ref.key_copy; - uchar *cur_ref_buff= tab->ref.key_buff; + @returns + @retval TRUE memory allocation error occurred + @retval FALSE the conditions were created and resolved (fixed) +*/ - /* - Create an artificial condition to post-filter those rows matched by index - lookups that cannot be distinguished by the index lookup procedure, e.g. - because of truncation. Prepared statements execution requires that - fix_fields is called for every execution. In order to call fix_fields we - need to create a Name_resolution_context and a corresponding TABLE_LIST - for the temporary table for the subquery, so that all column references - to the materialized subquery table can be resolved correctly. - */ - DBUG_ASSERT(cond == NULL); - if (!(cond= new Item_cond_and)) - DBUG_RETURN(TRUE); +bool subselect_hash_sj_engine::make_semi_join_conds() +{ /* Table reference for tmp_table that is used to resolve column references (Item_fields) to columns in tmp_table. */ TABLE_LIST *tmp_table_ref; + /* Name resolution context for all tmp_table columns created below. */ + Name_resolution_context *context; + Item_in_subselect *item_in= (Item_in_subselect *) item; + + DBUG_ENTER("subselect_hash_sj_engine::make_semi_join_conds"); + DBUG_ASSERT(semi_join_conds == NULL); + + if (!(semi_join_conds= new Item_cond_and)) + DBUG_RETURN(TRUE); + if (!(tmp_table_ref= (TABLE_LIST*) thd->alloc(sizeof(TABLE_LIST)))) DBUG_RETURN(TRUE); tmp_table_ref->init_one_table("", "materialized subselect", TL_READ); tmp_table_ref->table= tmp_table; - /* Name resolution context for all tmp_table columns created below. */ - Name_resolution_context *context= new Name_resolution_context; + context= new Name_resolution_context; context->init(); context->first_name_resolution_table= context->last_name_resolution_table= tmp_table_ref; - for (uint i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++) + for (uint i= 0; i < item_in->left_expr->cols(); i++) { Item_func_eq *eq_cond; /* New equi-join condition for the current column. */ /* Item for the corresponding field from the materialized temp table. */ Item_field *right_col_item; - int null_count= test(cur_key_part->field->real_maybe_null()); - tab->ref.items[i]= item_in->left_expr->element_index(i); - if (!(right_col_item= new Item_field(thd, context, cur_key_part->field)) || - !(eq_cond= new Item_func_eq(tab->ref.items[i], right_col_item)) || - ((Item_cond_and*)cond)->add(eq_cond)) + if (!(right_col_item= new Item_field(thd, context, tmp_table->field[i])) || + !(eq_cond= new Item_func_eq(item_in->left_expr->element_index(i), + right_col_item)) || + (((Item_cond_and*)semi_join_conds)->add(eq_cond))) { - delete cond; - cond= NULL; + delete semi_join_conds; + semi_join_conds= NULL; DBUG_RETURN(TRUE); } + } + if (semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds)) + DBUG_RETURN(TRUE); + + DBUG_RETURN(FALSE); +} + + +/** + Create a new uniquesubquery engine for the execution of an IN predicate. + + @details + Create and initialize a new JOIN_TAB, and Table_ref objects to perform + lookups into the indexed temporary table. + + @retval A new subselect_hash_sj_engine object + @retval NULL if a memory allocation error occurs +*/ + +subselect_uniquesubquery_engine* +subselect_hash_sj_engine::make_unique_engine() +{ + Item_in_subselect *item_in= (Item_in_subselect *) item; + /* The only index on the temporary table. */ + KEY *tmp_key= tmp_table->key_info; + /* Number of keyparts in tmp_key. */ + uint tmp_key_parts= tmp_key->key_parts; + JOIN_TAB *tab; + + DBUG_ENTER("subselect_hash_sj_engine::make_unique_engine"); + + /* + Create and initialize the JOIN_TAB that represents an index lookup + plan operator into the materialized subquery result. Notice that: + - this JOIN_TAB has no corresponding JOIN (and doesn't need one), and + - here we initialize only those members that are used by + subselect_uniquesubquery_engine, so these objects are incomplete. + */ + if (!(tab= (JOIN_TAB*) thd->alloc(sizeof(JOIN_TAB)))) + DBUG_RETURN(NULL); + tab->table= tmp_table; + tab->ref.key= 0; /* The only temp table index. */ + tab->ref.key_length= tmp_key->key_length; + if (!(tab->ref.key_buff= + (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) || + !(tab->ref.key_copy= + (store_key**) thd->alloc((sizeof(store_key*) * + (tmp_key_parts + 1)))) || + !(tab->ref.items= + (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts))) + DBUG_RETURN(NULL); + KEY_PART_INFO *cur_key_part= tmp_key->key_part; + store_key **ref_key= tab->ref.key_copy; + uchar *cur_ref_buff= tab->ref.key_buff; + + for (uint i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++) + { + tab->ref.items[i]= item_in->left_expr->element_index(i); + int null_count= test(cur_key_part->field->real_maybe_null()); *ref_key= new store_key_item(thd, cur_key_part->field, - /* TODO: + /* TIMOUR: the NULL byte is taken into account in cur_key_part->store_length, so instead of cur_ref_buff + test(maybe_null), we could @@ -3409,10 +3876,8 @@ bool subselect_hash_sj_engine::init_perm tab->ref.key_err= 1; tab->ref.key_parts= tmp_key_parts; - if (cond->fix_fields(thd, &cond)) - DBUG_RETURN(TRUE); - - DBUG_RETURN(FALSE); + DBUG_RETURN(new subselect_uniquesubquery_engine(thd, tab, item, + semi_join_conds)); } @@ -3435,7 +3900,8 @@ bool subselect_hash_sj_engine::init_runt Repeat name resolution for 'cond' since cond is not part of any clause of the query, and it is not 'fixed' during JOIN::prepare. */ - if (cond && !cond->fixed && cond->fix_fields(thd, &cond)) + if (semi_join_conds && !semi_join_conds->fixed && + semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds)) return TRUE; /* Let our engine reuse this query plan for materialization. */ materialize_join= materialize_engine->join; @@ -3446,32 +3912,53 @@ bool subselect_hash_sj_engine::init_runt subselect_hash_sj_engine::~subselect_hash_sj_engine() { + delete lookup_engine; delete result; - if (tab) - free_tmp_table(thd, tab->table); + if (tmp_table) + free_tmp_table(thd, tmp_table); } /** Cleanup performed after each PS execution. - @detail + @details Called in the end of JOIN::prepare for PS from Item_subselect::cleanup. */ void subselect_hash_sj_engine::cleanup() { + enum_engine_type lookup_engine_type= lookup_engine->engine_type(); is_materialized= FALSE; - result->cleanup(); /* Resets the temp table as well. */ + bitmap_clear_all(&non_null_key_parts); + bitmap_clear_all(&partial_match_key_parts); + count_partial_match_columns= 0; + count_null_only_columns= 0; + strategy= UNDEFINED; materialize_engine->cleanup(); - subselect_uniquesubquery_engine::cleanup(); + if (lookup_engine_type == TABLE_SCAN_ENGINE || + lookup_engine_type == ROWID_MERGE_ENGINE) + { + subselect_engine *inner_lookup_engine; + inner_lookup_engine= + ((subselect_partial_match_engine*) lookup_engine)->lookup_engine; + /* + Partial match engines are recreated for each PS execution inside + subselect_hash_sj_engine::exec(). + */ + delete lookup_engine; + lookup_engine= inner_lookup_engine; + } + DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE); + lookup_engine->cleanup(); + result->cleanup(); /* Resets the temp table as well. */ } /** Execute a subquery IN predicate via materialization. - @detail + @details If needed materialize the subquery into a temporary table, then copmpute the predicate via a lookup into this table. @@ -3482,6 +3969,9 @@ void subselect_hash_sj_engine::cleanup() int subselect_hash_sj_engine::exec() { Item_in_subselect *item_in= (Item_in_subselect *) item; + SELECT_LEX *save_select= thd->lex->current_select; + subselect_partial_match_engine *pm_engine= NULL; + int res= 0; DBUG_ENTER("subselect_hash_sj_engine::exec"); @@ -3489,56 +3979,126 @@ int subselect_hash_sj_engine::exec() Optimize and materialize the subquery during the first execution of the subquery predicate. */ - if (!is_materialized) - { - int res= 0; - SELECT_LEX *save_select= thd->lex->current_select; - thd->lex->current_select= materialize_engine->select_lex; - if ((res= materialize_join->optimize())) - goto err; /* purecov: inspected */ - materialize_join->exec(); - if ((res= test(materialize_join->error || thd->is_fatal_error))) - goto err; - - /* - TODO: - - Unlock all subquery tables as we don't need them. To implement this - we need to add new functionality to JOIN::join_free that can unlock - all tables in a subquery (and all its subqueries). - - The temp table used for grouping in the subquery can be freed - immediately after materialization (yet it's done together with - unlocking). - */ - is_materialized= TRUE; - /* - If the subquery returned no rows, the temporary table is empty, so we know - directly that the result of IN is FALSE. We first update the table - statistics, then we test if the temporary table for the query result is - empty. - */ - tab->table->file->info(HA_STATUS_VARIABLE); - if (!tab->table->file->stats.records) - { - empty_result_set= TRUE; - item_in->value= FALSE; - /* TODO: check we need this: item_in->null_value= FALSE; */ - DBUG_RETURN(FALSE); - } - /* Set tmp_param only if its usable, i.e. tmp_param->copy_field != NULL. */ - tmp_param= &(item_in->unit->outer_select()->join->tmp_table_param); - if (tmp_param && !tmp_param->copy_field) - tmp_param= NULL; + thd->lex->current_select= materialize_engine->select_lex; + if ((res= materialize_join->optimize())) + goto err; /* purecov: inspected */ + DBUG_ASSERT(!is_materialized); /* We should materialize only once. */ + materialize_join->exec(); + if ((res= test(materialize_join->error || thd->is_fatal_error))) + goto err; -err: - thd->lex->current_select= save_select; - if (res) - DBUG_RETURN(res); + /* + TODO: + - Unlock all subquery tables as we don't need them. To implement this + we need to add new functionality to JOIN::join_free that can unlock + all tables in a subquery (and all its subqueries). + - The temp table used for grouping in the subquery can be freed + immediately after materialization (yet it's done together with + unlocking). + */ + is_materialized= TRUE; + /* + If the subquery returned no rows, the temporary table is empty, so we know + directly that the result of IN is FALSE. We first update the table + statistics, then we test if the temporary table for the query result is + empty. + */ + tmp_table->file->info(HA_STATUS_VARIABLE); + if (!tmp_table->file->stats.records) + { + item_in->value= FALSE; + /* The value of IN will not change during this execution. */ + item_in->is_constant= TRUE; + item_in->set_first_execution(); + /* TIMOUR: check if we need this: item_in->null_value= FALSE; */ + DBUG_RETURN(FALSE); } /* - Lookup the left IN operand in the hash index of the materialized subquery. + TIMOUR: The schema-based analysis for partial matching can be done once for + prepared statement and remembered. It is done here to remove the need to + save/restore all related variables between each re-execution, thus making + the code simpler. */ - DBUG_RETURN(subselect_uniquesubquery_engine::exec()); + strategy= get_strategy_using_schema(); + /* This call may discover that we don't need partial matching at all. */ + strategy= get_strategy_using_data(); + if (strategy == PARTIAL_MATCH) + { + uint count_pm_keys; /* Total number of keys needed for partial matching. */ + MY_BITMAP *nn_key_parts; /* The key parts of the only non-NULL index. */ + uint covering_null_row_width; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + + nn_key_parts= (count_partial_match_columns < tmp_table->s->fields) ? + &non_null_key_parts : NULL; + + if (result_sink->get_max_nulls_in_row() == + tmp_table->s->fields - + (nn_key_parts ? bitmap_bits_set(nn_key_parts) : 0)) + covering_null_row_width= result_sink->get_max_nulls_in_row(); + else + covering_null_row_width= 0; + + if (covering_null_row_width) + count_pm_keys= nn_key_parts ? 1 : 0; + else + count_pm_keys= count_partial_match_columns - count_null_only_columns + + (nn_key_parts ? 1 : 0); + + choose_partial_match_strategy(test(nn_key_parts), + test(covering_null_row_width), + &partial_match_key_parts); + DBUG_ASSERT(strategy == PARTIAL_MATCH_MERGE || + strategy == PARTIAL_MATCH_SCAN); + if (strategy == PARTIAL_MATCH_MERGE) + { + pm_engine= + new subselect_rowid_merge_engine((subselect_uniquesubquery_engine*) + lookup_engine, tmp_table, + count_pm_keys, + covering_null_row_width, + item, result, + semi_join_conds->argument_list()); + if (!pm_engine || + ((subselect_rowid_merge_engine*) pm_engine)-> + init(nn_key_parts, &partial_match_key_parts)) + { + /* + The call to init() would fail if there was not enough memory to allocate + all buffers for the rowid merge strategy. In this case revert to table + scanning which doesn't need any big buffers. + */ + delete pm_engine; + pm_engine= NULL; + strategy= PARTIAL_MATCH_SCAN; + } + } + + if (strategy == PARTIAL_MATCH_SCAN) + { + if (!(pm_engine= + new subselect_table_scan_engine((subselect_uniquesubquery_engine*) + lookup_engine, tmp_table, + item, result, + semi_join_conds->argument_list(), + covering_null_row_width))) + { + /* This is an irrecoverable error. */ + res= 1; + goto err; + } + } + } + + if (pm_engine) + lookup_engine= pm_engine; + item_in->change_engine(lookup_engine); + +err: + thd->lex->current_select= save_select; + DBUG_RETURN(res); } @@ -3551,10 +4111,1008 @@ void subselect_hash_sj_engine::print(Str str->append(STRING_WITH_LEN(" <materialize> (")); materialize_engine->print(str, query_type); str->append(STRING_WITH_LEN(" ), ")); - if (tab) - subselect_uniquesubquery_engine::print(str, query_type); + + if (lookup_engine) + lookup_engine->print(str, query_type); else str->append(STRING_WITH_LEN( - "<the access method for lookups is not yet created>" + "<engine selected at execution time>" )); } + +void subselect_hash_sj_engine::fix_length_and_dec(Item_cache** row) +{ + DBUG_ASSERT(FALSE); +} + +void subselect_hash_sj_engine::exclude() +{ + DBUG_ASSERT(FALSE); +} + +bool subselect_hash_sj_engine::no_tables() +{ + DBUG_ASSERT(FALSE); + return FALSE; +} + +bool subselect_hash_sj_engine::change_result(Item_subselect *si, + select_result_interceptor *res) +{ + DBUG_ASSERT(FALSE); + return TRUE; +} + + +Ordered_key::Ordered_key(uint keyid_arg, TABLE *tbl_arg, Item *search_key_arg, + ha_rows null_count_arg, ha_rows min_null_row_arg, + ha_rows max_null_row_arg, uchar *row_num_to_rowid_arg) + : keyid(keyid_arg), tbl(tbl_arg), search_key(search_key_arg), + row_num_to_rowid(row_num_to_rowid_arg), null_count(null_count_arg) +{ + DBUG_ASSERT(tbl->file->stats.records > null_count); + key_buff_elements= tbl->file->stats.records - null_count; + cur_key_idx= HA_POS_ERROR; + + DBUG_ASSERT((null_count && min_null_row_arg && max_null_row_arg) || + (!null_count && !min_null_row_arg && !max_null_row_arg)); + if (null_count) + { + /* The counters are 1-based, for key access we need 0-based indexes. */ + min_null_row= min_null_row_arg - 1; + max_null_row= max_null_row_arg - 1; + } + else + min_null_row= max_null_row= 0; +} + + +Ordered_key::~Ordered_key() +{ + my_free((char*) key_buff, MYF(0)); + bitmap_free(&null_key); +} + + +/* + Cleanup that needs to be done for each PS (re)execution. +*/ + +void Ordered_key::cleanup() +{ + /* + Currently these keys are recreated for each PS re-execution, thus + there is nothing to cleanup, the whole object goes away after execution + is over. All handler related initialization/deinitialization is done by + the parent subselect_rowid_merge_engine object. + */ +} + + +/* + Initialize a multi-column index. +*/ + +bool Ordered_key::init(MY_BITMAP *columns_to_index) +{ + THD *thd= tbl->in_use; + uint cur_key_col= 0; + Item_field *cur_tmp_field; + Item_func_lt *fn_less_than; + + key_column_count= bitmap_bits_set(columns_to_index); + + // TIMOUR: check for mem allocation err, revert to scan + + key_columns= (Item_field**) thd->alloc(key_column_count * + sizeof(Item_field*)); + compare_pred= (Item_func_lt**) thd->alloc(key_column_count * + sizeof(Item_func_lt*)); + + for (uint i= 0; i < columns_to_index->n_bits; i++) + { + if (!bitmap_is_set(columns_to_index, i)) + continue; + cur_tmp_field= new Item_field(tbl->field[i]); + /* Create the predicate (tmp_column[i] < outer_ref[i]). */ + fn_less_than= new Item_func_lt(cur_tmp_field, + search_key->element_index(i)); + fn_less_than->fix_fields(thd, (Item**) &fn_less_than); + key_columns[cur_key_col]= cur_tmp_field; + compare_pred[cur_key_col]= fn_less_than; + ++cur_key_col; + } + + if (alloc_keys_buffers()) + { + /* TIMOUR revert to partial match via table scan. */ + return TRUE; + } + return FALSE; +} + + +/* + Initialize a single-column index. +*/ + +bool Ordered_key::init(int col_idx) +{ + THD *thd= tbl->in_use; + + key_column_count= 1; + + // TIMOUR: check for mem allocation err, revert to scan + + key_columns= (Item_field**) thd->alloc(sizeof(Item_field*)); + compare_pred= (Item_func_lt**) thd->alloc(sizeof(Item_func_lt*)); + + key_columns[0]= new Item_field(tbl->field[col_idx]); + /* Create the predicate (tmp_column[i] < outer_ref[i]). */ + compare_pred[0]= new Item_func_lt(key_columns[0], + search_key->element_index(col_idx)); + compare_pred[0]->fix_fields(thd, (Item**)&compare_pred[0]); + + if (alloc_keys_buffers()) + { + /* TIMOUR revert to partial match via table scan. */ + return TRUE; + } + return FALSE; +} + + +/* + Allocate the buffers for both the row number, and the NULL-bitmap indexes. +*/ + +bool Ordered_key::alloc_keys_buffers() +{ + DBUG_ASSERT(key_buff_elements > 0); + + if (!(key_buff= (rownum_t*) my_malloc(key_buff_elements * sizeof(rownum_t), + MYF(MY_WME)))) + return TRUE; + + /* + TIMOUR: it is enough to create bitmaps with size + (max_null_row - min_null_row), and then use min_null_row as + lookup offset. + */ + /* Notice that max_null_row is max array index, we need count, so +1. */ + if (bitmap_init(&null_key, NULL, max_null_row + 1, FALSE)) + return TRUE; + + cur_key_idx= HA_POS_ERROR; + + return FALSE; +} + + +/* + Quick sort comparison function that compares two rows of the same table + indentfied with their row numbers. + + @retval -1 + @retval 0 + @retval +1 +*/ + +int +Ordered_key::cmp_keys_by_row_data(ha_rows a, ha_rows b) +{ + uchar *rowid_a, *rowid_b; + int error, cmp_res; + /* The length in bytes of the rowids (positions) of tmp_table. */ + uint rowid_length= tbl->file->ref_length; + + if (a == b) + return 0; + /* Get the corresponding rowids. */ + rowid_a= row_num_to_rowid + a * rowid_length; + rowid_b= row_num_to_rowid + b * rowid_length; + /* Fetch the rows for comparison. */ + error= tbl->file->ha_rnd_pos(tbl->record[0], rowid_a); + DBUG_ASSERT(!error); + error= tbl->file->ha_rnd_pos(tbl->record[1], rowid_b); + DBUG_ASSERT(!error); + /* + Compare the two rows by the corresponding values of the indexed + columns. + */ + for (uint i= 0; i < key_column_count; i++) + { + Field *cur_field= key_columns[i]->field; + if ((cmp_res= cur_field->cmp_offset(tbl->s->rec_buff_length))) + return (cmp_res > 0 ? 1 : -1); + } + return 0; +} + + +int +Ordered_key::cmp_keys_by_row_data_and_rownum(Ordered_key *key, + rownum_t* a, rownum_t* b) +{ + /* The result of comparing the two keys according to their row data. */ + int cmp_row_res= key->cmp_keys_by_row_data(*a, *b); + if (cmp_row_res) + return cmp_row_res; + return (*a < *b) ? -1 : (*a > *b) ? 1 : 0; +} + + +void Ordered_key::sort_keys() +{ + my_qsort2(key_buff, key_buff_elements, sizeof(rownum_t), + (qsort2_cmp) &cmp_keys_by_row_data_and_rownum, (void*) this); + /* Invalidate the current row position. */ + cur_key_idx= HA_POS_ERROR; +} + + +/* + The fraction of rows that do not contain NULL in the columns indexed by + this key. + + @retval 1 if there are no NULLs + @retval 0 if only NULLs +*/ + +double Ordered_key::null_selectivity() +{ + /* We should not be processing empty tables. */ + DBUG_ASSERT(tbl->file->stats.records); + return (1 - (double) null_count / (double) tbl->file->stats.records); +} + + +/* + Compare the value(s) of the current key in 'search_key' with the + data of the current table record. + + @notes The comparison result follows from the way compare_pred + is created in Ordered_key::init. Currently compare_pred compares + a field in of the current row with the corresponding Item that + contains the search key. + + @param row_num Number of the row (not index in the key_buff array) + + @retval -1 if (current row < search_key) + @retval 0 if (current row == search_key) + @retval +1 if (current row > search_key) +*/ + +int Ordered_key::cmp_key_with_search_key(rownum_t row_num) +{ + /* The length in bytes of the rowids (positions) of tmp_table. */ + uint rowid_length= tbl->file->ref_length; + uchar *cur_rowid= row_num_to_rowid + row_num * rowid_length; + int error, cmp_res; + + error= tbl->file->ha_rnd_pos(tbl->record[0], cur_rowid); + DBUG_ASSERT(!error); + + for (uint i= 0; i < key_column_count; i++) + { + cmp_res= compare_pred[i]->get_comparator()->compare(); + /* Unlike Arg_comparator::compare_row() here there should be no NULLs. */ + DBUG_ASSERT(!compare_pred[i]->null_value); + if (cmp_res) + return (cmp_res > 0 ? 1 : -1); + } + return 0; +} + + +/* + Find a key in a sorted array of keys via binary search. + + see create_subq_in_equalities() +*/ + +bool Ordered_key::lookup() +{ + DBUG_ASSERT(key_buff_elements); + + ha_rows lo= 0; + ha_rows hi= key_buff_elements - 1; + ha_rows mid; + int cmp_res; + + while (lo <= hi) + { + mid= lo + (hi - lo) / 2; + cmp_res= cmp_key_with_search_key(key_buff[mid]); + /* + In order to find the minimum match, check if the pevious element is + equal or smaller than the found one. If equal, we need to search further + to the left. + */ + if (!cmp_res && mid > 0) + cmp_res= !cmp_key_with_search_key(key_buff[mid - 1]) ? 1 : 0; + + if (cmp_res == -1) + { + /* row[mid] < search_key */ + lo= mid + 1; + } + else if (cmp_res == 1) + { + /* row[mid] > search_key */ + if (!mid) + goto not_found; + hi= mid - 1; + } + else + { + /* row[mid] == search_key */ + cur_key_idx= mid; + return TRUE; + } + } +not_found: + cur_key_idx= HA_POS_ERROR; + return FALSE; +} + + +/* + Move the current index pointer to the next key with the same column + values as the current key. Since the index is sorted, all such keys + are contiguous. +*/ + +bool Ordered_key::next_same() +{ + DBUG_ASSERT(key_buff_elements); + + if (cur_key_idx < key_buff_elements - 1) + { + /* + TIMOUR: + The below is quite inefficient, since as a result we will fetch every + row (except the last one) twice. There must be a more efficient way, + e.g. swapping record[0] and record[1], and reading only the new record. + */ + if (!cmp_keys_by_row_data(key_buff[cur_key_idx], key_buff[cur_key_idx + 1])) + { + ++cur_key_idx; + return TRUE; + } + } + return FALSE; +} + + +void Ordered_key::print(String *str) +{ + uint i; + str->append("{idx="); + str->qs_append(keyid); + str->append(", ("); + for (i= 0; i < key_column_count - 1; i++) + { + str->append(key_columns[i]->field->field_name); + str->append(", "); + } + str->append(key_columns[i]->field->field_name); + str->append("), "); + + str->append("null_bitmap: (bits="); + str->qs_append(null_key.n_bits); + str->append(", nulls= "); + str->qs_append((double)null_count); + str->append(", min_null= "); + str->qs_append((double)min_null_row); + str->append(", max_null= "); + str->qs_append((double)max_null_row); + str->append("), "); + + str->append('}'); +} + + +subselect_partial_match_engine::subselect_partial_match_engine( + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg, + uint covering_null_row_width_arg) + :subselect_engine(item_arg, result_arg), + tmp_table(tmp_table_arg), lookup_engine(engine_arg), + equi_join_conds(equi_join_conds_arg), + covering_null_row_width(covering_null_row_width_arg) +{} + + +int subselect_partial_match_engine::exec() +{ + Item_in_subselect *item_in= (Item_in_subselect *) item; + int res; + + /* Try to find a matching row by index lookup. */ + res= lookup_engine->copy_ref_key_simple(); + if (res == -1) + { + /* The result is FALSE based on the outer reference. */ + item_in->value= 0; + item_in->null_value= 0; + return 0; + } + else if (res == 0) + { + /* Search for a complete match. */ + if ((res= lookup_engine->index_lookup())) + { + /* An error occured during lookup(). */ + item_in->value= 0; + item_in->null_value= 0; + return res; + } + else if (item_in->value) + { + /* + A complete match was found, the result of IN is TRUE. + Notice: (this->item == lookup_engine->item) + */ + return 0; + } + } + + if (covering_null_row_width == tmp_table->s->fields) + { + /* + If there is a NULL-only row that coveres all columns the result of IN + is UNKNOWN. + */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 1; + item_in->null_value= 1; + return 0; + } + + /* + There is no complete match. Look for a partial match (UNKNOWN result), or + no match (FALSE). + */ + if (tmp_table->file->inited) + tmp_table->file->ha_index_end(); + + if (partial_match()) + { + /* The result of IN is UNKNOWN. */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 1; + item_in->null_value= 1; + } + else + { + /* The result of IN is FALSE. */ + item_in->value= 0; + /* + TIMOUR: which one is the right way to propagate an UNKNOWN result? + Should we also set empty_result_set= FALSE; ??? + */ + //item_in->was_null= 0; + item_in->null_value= 0; + } + + return 0; +} + + +void subselect_partial_match_engine::print(String *str, + enum_query_type query_type) +{ + /* + Should never be called as the actual engine cannot be known at query + optimization time. + */ + DBUG_ASSERT(FALSE); +} + + +/* + @param non_null_key_parts + @param partial_match_key_parts A union of all single-column NULL key parts. + @param count_partial_match_columns Number of NULL keyparts (set bits above). + + @retval FALSE the engine was initialized successfully + @retval TRUE there was some (memory allocation) error during initialization, + such errors should be interpreted as revert to other strategy +*/ + +bool +subselect_rowid_merge_engine::init(MY_BITMAP *non_null_key_parts, + MY_BITMAP *partial_match_key_parts) +{ + /* The length in bytes of the rowids (positions) of tmp_table. */ + uint rowid_length= tmp_table->file->ref_length; + ha_rows row_count= tmp_table->file->stats.records; + rownum_t cur_rownum= 0; + select_materialize_with_stats *result_sink= + (select_materialize_with_stats *) result; + uint cur_keyid= 0; + Item_in_subselect *item_in= (Item_in_subselect*) item; + int error; + + if (keys_count == 0) + { + /* There is nothing to initialize, we will only do regular lookups. */ + return FALSE; + } + + DBUG_ASSERT(!covering_null_row_width || (covering_null_row_width && + keys_count == 1 && + non_null_key_parts)); + /* + Allocate buffers to hold the merged keys and the mapping between rowids and + row numbers. + */ + if (!(merge_keys= (Ordered_key**) thd->alloc(keys_count * + sizeof(Ordered_key*))) || + !(row_num_to_rowid= (uchar*) my_malloc(row_count * rowid_length * + sizeof(uchar), MYF(MY_WME)))) + return TRUE; + + /* Create the only non-NULL key if there is any. */ + if (non_null_key_parts) + { + non_null_key= new Ordered_key(cur_keyid, tmp_table, item_in->left_expr, + 0, 0, 0, row_num_to_rowid); + if (non_null_key->init(non_null_key_parts)) + return TRUE; + merge_keys[cur_keyid]= non_null_key; + merge_keys[cur_keyid]->first(); + ++cur_keyid; + } + + /* + If there is a covering NULL row, the only key that is needed is the + only non-NULL key that is already created above. We create keys on + NULL-able columns only if there is no covering NULL row. + */ + if (!covering_null_row_width) + { + if (bitmap_init_memroot(&matching_keys, keys_count, thd->mem_root) || + bitmap_init_memroot(&matching_outer_cols, keys_count, thd->mem_root) || + bitmap_init_memroot(&null_only_columns, keys_count, thd->mem_root)) + return TRUE; + + /* + Create one single-column NULL-key for each column in + partial_match_key_parts. + */ + for (uint i= 0; i < partial_match_key_parts->n_bits; i++) + { + if (!bitmap_is_set(partial_match_key_parts, i)) + continue; + + if (result_sink->get_null_count_of_col(i) == row_count) + bitmap_set_bit(&null_only_columns, cur_keyid); + else + { + merge_keys[cur_keyid]= new Ordered_key( + cur_keyid, tmp_table, + item_in->left_expr->element_index(i), + result_sink->get_null_count_of_col(i), + result_sink->get_min_null_of_col(i), + result_sink->get_max_null_of_col(i), + row_num_to_rowid); + if (merge_keys[cur_keyid]->init(i)) + return TRUE; + merge_keys[cur_keyid]->first(); + } + ++cur_keyid; + } + } + + /* Populate the indexes with data from the temporary table. */ + tmp_table->file->ha_rnd_init(1); + tmp_table->file->extra_opt(HA_EXTRA_CACHE, + current_thd->variables.read_buff_size); + tmp_table->null_row= 0; + while (TRUE) + { + error= tmp_table->file->ha_rnd_next(tmp_table->record[0]); + if (error == HA_ERR_RECORD_DELETED) + { + /* We get this for duplicate records that should not be in tmp_table. */ + continue; + } + /* + This is a temp table that we fully own, there should be no other + cause to stop the iteration than EOF. + */ + DBUG_ASSERT(!error || error == HA_ERR_END_OF_FILE); + if (error == HA_ERR_END_OF_FILE) + { + DBUG_ASSERT(cur_rownum == tmp_table->file->stats.records); + break; + } + + /* + Save the position of this record in the row_num -> rowid mapping. + */ + tmp_table->file->position(tmp_table->record[0]); + memcpy(row_num_to_rowid + cur_rownum * rowid_length, + tmp_table->file->ref, rowid_length); + + /* Add the current row number to the corresponding keys. */ + if (non_null_key) + { + /* By definition there are no NULLs in the non-NULL key. */ + non_null_key->add_key(cur_rownum); + } + + for (uint i= (non_null_key ? 1 : 0); i < keys_count; i++) + { + /* + Check if the first and only indexed column contains NULL in the curent + row, and add the row number to the corresponding key. + */ + if (tmp_table->field[merge_keys[i]->get_field_idx(0)]->is_null()) + merge_keys[i]->set_null(cur_rownum); + else + merge_keys[i]->add_key(cur_rownum); + } + ++cur_rownum; + } + + tmp_table->file->ha_rnd_end(); + + /* Sort all the keys by their NULL selectivity. */ + my_qsort(merge_keys, keys_count, sizeof(Ordered_key*), + (qsort_cmp) cmp_keys_by_null_selectivity); + + /* Sort the keys in each of the indexes. */ + for (uint i= 0; i < keys_count; i++) + merge_keys[i]->sort_keys(); + + if (init_queue(&pq, keys_count, 0, FALSE, + subselect_rowid_merge_engine::cmp_keys_by_cur_rownum, NULL)) + return TRUE; + + return FALSE; +} + + +subselect_rowid_merge_engine::~subselect_rowid_merge_engine() +{ + /* None of the resources below is allocated if there are no ordered keys. */ + if (keys_count) + { + my_free((char*) row_num_to_rowid, MYF(0)); + for (uint i= 0; i < keys_count; i++) + delete merge_keys[i]; + delete_queue(&pq); + if (tmp_table->file->inited == handler::RND) + tmp_table->file->ha_rnd_end(); + } +} + + +void subselect_rowid_merge_engine::cleanup() +{ +} + + +/* + Quick sort comparison function to compare keys in order of decreasing bitmap + selectivity, so that the most selective keys come first. + + @param k1 first key to compare + @param k2 second key to compare + + @retval 1 if k1 is less selective than k2 + @retval 0 if k1 is equally selective as k2 + @retval -1 if k1 is more selective than k2 +*/ + +int +subselect_rowid_merge_engine::cmp_keys_by_null_selectivity(Ordered_key **k1, + Ordered_key **k2) +{ + double k1_sel= (*k1)->null_selectivity(); + double k2_sel= (*k2)->null_selectivity(); + if (k1_sel < k2_sel) + return 1; + if (k1_sel > k2_sel) + return -1; + return 0; +} + + +/* +*/ + +int +subselect_rowid_merge_engine::cmp_keys_by_cur_rownum(void *arg, + uchar *k1, uchar *k2) +{ + rownum_t r1= ((Ordered_key*) k1)->current(); + rownum_t r2= ((Ordered_key*) k2)->current(); + + return (r1 < r2) ? -1 : (r1 > r2) ? 1 : 0; +} + + +/* + Check if certain table row contains a NULL in all columns for which there is + no match in the corresponding value index. + + @retval TRUE if a NULL row exists + @retval FALSE otherwise +*/ + +bool subselect_rowid_merge_engine::test_null_row(rownum_t row_num) +{ + Ordered_key *cur_key; + uint cur_id; + for (uint i = 0; i < keys_count; i++) + { + cur_key= merge_keys[i]; + cur_id= cur_key->get_keyid(); + if (bitmap_is_set(&matching_keys, cur_id)) + { + /* + The key 'i' (with id 'cur_keyid') already matches a value in row 'row_num', + thus we skip it as it can't possibly match a NULL. + */ + continue; + } + if (!cur_key->is_null(row_num)) + return FALSE; + } + return TRUE; +} + + +/* + @retval TRUE there is a partial match (UNKNOWN) + @retval FALSE there is no match at all (FALSE) +*/ + +bool subselect_rowid_merge_engine::partial_match() +{ + Ordered_key *min_key; /* Key that contains the current minimum position. */ + rownum_t min_row_num; /* Current row number of min_key. */ + Ordered_key *cur_key; + rownum_t cur_row_num; + uint count_nulls_in_search_key= 0; + bool res= FALSE; + + /* If there is a non-NULL key, it must be the first key in the keys array. */ + DBUG_ASSERT(!non_null_key || (non_null_key && merge_keys[0] == non_null_key)); + + /* All data accesses during execution are via handler::ha_rnd_pos() */ + tmp_table->file->ha_rnd_init(0); + + /* Check if there is a match for the columns of the only non-NULL key. */ + if (non_null_key && !non_null_key->lookup()) + { + res= FALSE; + goto end; + } + + /* + If there is a NULL (sub)row that covers all NULL-able columns, + then there is a guranteed partial match, and we don't need to search + for the matching row. + */ + if (covering_null_row_width) + { + res= TRUE; + goto end; + } + + if (non_null_key) + queue_insert(&pq, (uchar *) non_null_key); + /* + Do not add the non_null_key, since it was already processed above. + */ + bitmap_clear_all(&matching_outer_cols); + for (uint i= test(non_null_key); i < keys_count; i++) + { + DBUG_ASSERT(merge_keys[i]->get_column_count() == 1); + if (merge_keys[i]->get_search_key(0)->is_null()) + { + ++count_nulls_in_search_key; + bitmap_set_bit(&matching_outer_cols, merge_keys[i]->get_keyid()); + } + else if (merge_keys[i]->lookup()) + queue_insert(&pq, (uchar *) merge_keys[i]); + } + + /* + If the outer reference consists of only NULLs, or if it has NULLs in all + nullable columns, the result is UNKNOWN. + */ + if (count_nulls_in_search_key == + ((Item_in_subselect *) item)->left_expr->cols() - + (non_null_key ? non_null_key->get_column_count() : 0)) + { + res= TRUE; + goto end; + } + + /* + If there is no NULL (sub)row that covers all NULL columns, and there is no + single match for any of the NULL columns, the result is FALSE. + */ + if (pq.elements - test(non_null_key) == 0) + { + res= FALSE; + goto end; + } + + DBUG_ASSERT(pq.elements); + + min_key= (Ordered_key*) queue_remove(&pq, 0); + min_row_num= min_key->current(); + bitmap_copy(&matching_keys, &null_only_columns); + bitmap_set_bit(&matching_keys, min_key->get_keyid()); + bitmap_union(&matching_keys, &matching_outer_cols); + if (min_key->next_same()) + queue_insert(&pq, (uchar *) min_key); + + if (pq.elements == 0) + { + /* + Check the only matching row of the only key min_key for NULL matches + in the other columns. + */ + res= test_null_row(min_row_num); + goto end; + } + + while (TRUE) + { + cur_key= (Ordered_key*) queue_remove(&pq, 0); + cur_row_num= cur_key->current(); + + if (cur_row_num == min_row_num) + bitmap_set_bit(&matching_keys, cur_key->get_keyid()); + else + { + /* Follows from the correct use of priority queue. */ + DBUG_ASSERT(cur_row_num > min_row_num); + if (test_null_row(min_row_num)) + { + res= TRUE; + goto end; + } + else + { + min_key= cur_key; + min_row_num= cur_row_num; + bitmap_copy(&matching_keys, &null_only_columns); + bitmap_set_bit(&matching_keys, min_key->get_keyid()); + bitmap_union(&matching_keys, &matching_outer_cols); + } + } + + if (cur_key->next_same()) + queue_insert(&pq, (uchar *) cur_key); + + if (pq.elements == 0) + { + /* Check the last row of the last column in PQ for NULL matches. */ + res= test_null_row(min_row_num); + goto end; + } + } + + /* We should never get here - all branches must be handled explicitly above. */ + DBUG_ASSERT(FALSE); + +end: + tmp_table->file->ha_rnd_end(); + return res; +} + + +subselect_table_scan_engine::subselect_table_scan_engine( + subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, + Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg, + uint covering_null_row_width_arg) + :subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg, + result_arg, equi_join_conds_arg, + covering_null_row_width_arg) +{} + + +/* + TIMOUR: + This method is based on subselect_uniquesubquery_engine::scan_table(). + Consider refactoring somehow, 80% of the code is the same. + + for each row_i in tmp_table + { + count_matches= 0; + for each row element row_i[j] + { + if (outer_ref[j] is NULL || row_i[j] is NULL || outer_ref[j] == row_i[j]) + ++count_matches; + } + if (count_matches == outer_ref.elements) + return TRUE + } + return FALSE +*/ + +bool subselect_table_scan_engine::partial_match() +{ + List_iterator_fast<Item> equality_it(*equi_join_conds); + Item *cur_eq; + uint count_matches; + int error; + bool res; + + tmp_table->file->ha_rnd_init(1); + tmp_table->file->extra_opt(HA_EXTRA_CACHE, + current_thd->variables.read_buff_size); + /* + TIMOUR: + scan_table() also calls "table->null_row= 0;", why, do we need it? + */ + for (;;) + { + error= tmp_table->file->ha_rnd_next(tmp_table->record[0]); + if (error) { + if (error == HA_ERR_RECORD_DELETED) + { + error= 0; + continue; + } + if (error == HA_ERR_END_OF_FILE) + { + error= 0; + break; + } + else + { + error= report_error(tmp_table, error); + break; + } + } + + equality_it.rewind(); + count_matches= 0; + while ((cur_eq= equality_it++)) + { + DBUG_ASSERT(cur_eq->type() == Item::FUNC_ITEM && + ((Item_func*)cur_eq)->functype() == Item_func::EQ_FUNC); + if (!cur_eq->val_int() && !cur_eq->null_value) + break; + ++count_matches; + } + if (count_matches == tmp_table->s->fields) + { + res= TRUE; /* Found a matching row. */ + goto end; + } + } + + res= FALSE; +end: + tmp_table->file->ha_rnd_end(); + return res; +} + + +void subselect_table_scan_engine::cleanup() +{ +} === modified file 'sql/item_subselect.h' --- a/sql/item_subselect.h 2010-02-11 23:59:58 +0000 +++ b/sql/item_subselect.h 2010-03-09 10:14:06 +0000 @@ -297,7 +297,7 @@ public: Representation of IN subquery predicates of the form "left_expr IN (SELECT ...)". - @detail + @details This class has: - A "subquery execution engine" (as a subclass of Item_subselect) that allows it to evaluate subqueries. (and this class participates in execution by @@ -319,6 +319,12 @@ protected: */ List<Cached_item> *left_expr_cache; bool first_execution; + /* + Set to TRUE if at query execution time we determine that this item's + value is a constant during this execution. We need this member because + it is not possible to substitute 'this' with a constant item. + */ + bool is_constant; /* expr & optimizer used in subselect rewriting to store Item for @@ -387,8 +393,8 @@ public: Item_in_subselect(Item * left_expr, st_select_lex *select_lex); Item_in_subselect() :Item_exists_subselect(), left_expr_cache(0), first_execution(TRUE), - optimizer(0), abort_on_null(0), pushed_cond_guards(NULL), - exec_method(NOT_TRANSFORMED), upper_item(0) + is_constant(FALSE), optimizer(0), abort_on_null(0), + pushed_cond_guards(NULL), exec_method(NOT_TRANSFORMED), upper_item(0) {} void cleanup(); subs_type substype() { return IN_SUBS; } @@ -421,6 +427,8 @@ public: void update_used_tables(); bool setup_engine(); bool init_left_expr_cache(); + /* Inform 'this' that it was computed, and contains a valid result. */ + void set_first_execution() { if (first_execution) first_execution= FALSE; } bool is_expensive_processor(uchar *arg); friend class Item_ref_null_helper; @@ -428,6 +436,7 @@ public: friend class Item_in_optimizer; friend class subselect_indexsubquery_engine; friend class subselect_hash_sj_engine; + friend class subselect_partial_match_engine; }; @@ -462,7 +471,8 @@ public: enum enum_engine_type {ABSTRACT_ENGINE, SINGLE_SELECT_ENGINE, UNION_ENGINE, UNIQUESUBQUERY_ENGINE, - INDEXSUBQUERY_ENGINE, HASH_SJ_ENGINE}; + INDEXSUBQUERY_ENGINE, HASH_SJ_ENGINE, + ROWID_MERGE_ENGINE, TABLE_SCAN_ENGINE}; subselect_engine(Item_subselect *si, select_result_interceptor *res) :thd(0) @@ -635,8 +645,10 @@ public: virtual void print (String *str, enum_query_type query_type); bool change_result(Item_subselect *si, select_result_interceptor *result); bool no_tables(); + int index_lookup(); /* TIMOUR: this method needs refactoring. */ int scan_table(); bool copy_ref_key(); + int copy_ref_key_simple(); /* TIMOUR: this method needs refactoring. */ bool no_rows() { return empty_result_set; } virtual enum_engine_type engine_type() { return UNIQUESUBQUERY_ENGINE; } }; @@ -705,50 +717,439 @@ inline bool Item_subselect::is_uncacheab /** - Compute an IN predicate via a hash semi-join. The subquery is materialized - during the first evaluation of the IN predicate. The IN predicate is executed - via the functionality inherited from subselect_uniquesubquery_engine. + Compute an IN predicate via a hash semi-join. This class is responsible for + the materialization of the subquery, and the selection of the correct and + optimal execution method (e.g. direct index lookup, or partial matching) for + the IN predicate. */ -class subselect_hash_sj_engine: public subselect_uniquesubquery_engine +class subselect_hash_sj_engine : public subselect_engine { protected: + /* The table into which the subquery is materialized. */ + TABLE *tmp_table; /* TRUE if the subquery was materialized into a temp table. */ bool is_materialized; /* The old engine already chosen at parse time and stored in permanent memory. Through this member we can re-create and re-prepare materialize_join for - each execution of a prepared statement. We akso resuse the functionality + each execution of a prepared statement. We also reuse the functionality of subselect_single_select_engine::[prepare | cols]. */ subselect_single_select_engine *materialize_engine; + /* The engine used to compute the IN predicate. */ + subselect_engine *lookup_engine; /* QEP to execute the subquery and materialize its result into a temporary table. Created during the first call to exec(). */ JOIN *materialize_join; - /* Temp table context of the outer select's JOIN. */ - TMP_TABLE_PARAM *tmp_param; + + /* Keyparts of the only non-NULL composite index in a rowid merge. */ + MY_BITMAP non_null_key_parts; + /* Keyparts of the single column indexes with NULL, one keypart per index. */ + MY_BITMAP partial_match_key_parts; + uint count_partial_match_columns; + uint count_null_only_columns; + /* + A conjunction of all the equality condtions between all pairs of expressions + that are arguments of an IN predicate. We need these to post-filter some + IN results because index lookups sometimes match values that are actually + not equal to the search key in SQL terms. + */ + Item_cond_and *semi_join_conds; + /* Possible execution strategies that can be used to compute hash semi-join.*/ + enum exec_strategy { + UNDEFINED, + COMPLETE_MATCH, /* Use regular index lookups. */ + PARTIAL_MATCH, /* Use some partial matching strategy. */ + PARTIAL_MATCH_MERGE, /* Use partial matching through index merging. */ + PARTIAL_MATCH_SCAN, /* Use partial matching through table scan. */ + IMPOSSIBLE /* Subquery materialization is not applicable. */ + }; + /* The chosen execution strategy. Computed after materialization. */ + exec_strategy strategy; +protected: + exec_strategy get_strategy_using_schema(); + exec_strategy get_strategy_using_data(); + size_t rowid_merge_buff_size(bool has_non_null_key, + bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts); + void choose_partial_match_strategy(bool has_non_null_key, + bool has_covering_null_row, + MY_BITMAP *partial_match_key_parts); + bool make_semi_join_conds(); + subselect_uniquesubquery_engine* make_unique_engine(); public: subselect_hash_sj_engine(THD *thd, Item_subselect *in_predicate, - subselect_single_select_engine *old_engine) - :subselect_uniquesubquery_engine(thd, NULL, in_predicate, NULL), - is_materialized(FALSE), materialize_engine(old_engine), - materialize_join(NULL), tmp_param(NULL) - {} + subselect_single_select_engine *old_engine) + :subselect_engine(in_predicate, NULL), tmp_table(NULL), + is_materialized(FALSE), materialize_engine(old_engine), lookup_engine(NULL), + materialize_join(NULL), count_partial_match_columns(0), + count_null_only_columns(0), semi_join_conds(NULL), strategy(UNDEFINED) + { + set_thd(thd); + } ~subselect_hash_sj_engine(); bool init_permanent(List<Item> *tmp_columns); bool init_runtime(); void cleanup(); - int prepare() { return 0; } + int prepare() { return 0; } /* Override virtual function in base class. */ int exec(); - virtual void print (String *str, enum_query_type query_type); + virtual void print(String *str, enum_query_type query_type); uint cols() { return materialize_engine->cols(); } + uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; } + table_map upper_select_const_tables() { return 0; } + bool no_rows() { return !tmp_table->file->stats.records; } virtual enum_engine_type engine_type() { return HASH_SJ_ENGINE; } + /* + TODO: factor out all these methods in a base subselect_index_engine class + because all of them have dummy implementations and should never be called. + */ + void fix_length_and_dec(Item_cache** row);//=>base class + void exclude(); //=>base class + //=>base class + bool change_result(Item_subselect *si, select_result_interceptor *result); + bool no_tables();//=>base class +}; + + +/* + Distinguish the type od (0-based) row numbers from the type of the index into + an array of row numbers. +*/ +typedef ha_rows rownum_t; + + +/* + An Ordered_key is an in-memory table index that allows O(log(N)) time + lookups of a multi-part key. + + If the index is over a single column, then this column may contain NULLs, and + the NULLs are stored and tested separately for NULL in O(1) via is_null(). + Multi-part indexes assume that the indexed columns do not contain NULLs. + + TODO: + = Due to the unnatural assymetry between single and multi-part indexes, it + makes sense to somehow refactor or extend the class. + + = This class can be refactored into a base abstract interface, and two + subclasses: + - one to represent single-column indexes, and + - another to represent multi-column indexes. + Such separation would allow slightly more efficient implementation of + the single-column indexes. + = The current design requires such indexes to be fully recreated for each + PS (re)execution, however most of the comprising objects can be reused. +*/ + +class Ordered_key : public Sql_alloc +{ +protected: + /* + Index of the key in an array of keys. This index allows to + construct (sub)sets of keys represented by bitmaps. + */ + uint keyid; + /* The table being indexed. */ + TABLE *tbl; + /* The columns being indexed. */ + Item_field **key_columns; + /* Number of elements in 'key_columns' (number of key parts). */ + uint key_column_count; + /* + An expression, or sequence of expressions that forms the search key. + The search key is a sequence when it is Item_row. Each element of the + sequence is accessible via Item::element_index(int i). + */ + Item *search_key; + +/* Value index related members. */ + /* + The actual value index, consists of a sorted sequence of row numbers. + */ + rownum_t *key_buff; + /* Number of elements in key_buff. */ + ha_rows key_buff_elements; + /* Current element in 'key_buff'. */ + ha_rows cur_key_idx; + /* + Mapping from row numbers to row ids. The element row_num_to_rowid[i] + contains a buffer with the rowid for the row numbered 'i'. + The memory for this member is not maintanined by this class because + all Ordered_key indexes of the same table share the same mapping. + */ + uchar *row_num_to_rowid; + /* + A sequence of predicates to compare the search key with the corresponding + columns of a table row from the index. + */ + Item_func_lt **compare_pred; + +/* Null index related members. */ + MY_BITMAP null_key; + /* Count of NULLs per column. */ + ha_rows null_count; + /* The row number that contains the first NULL in a column. */ + ha_rows min_null_row; + /* The row number that contains the last NULL in a column. */ + ha_rows max_null_row; + +protected: + bool alloc_keys_buffers(); + /* + Quick sort comparison function that compares two rows of the same table + indentfied with their row numbers. + */ + int cmp_keys_by_row_data(rownum_t a, rownum_t b); + static int cmp_keys_by_row_data_and_rownum(Ordered_key *key, + rownum_t* a, rownum_t* b); + + int cmp_key_with_search_key(rownum_t row_num); + +public: + Ordered_key(uint keyid_arg, TABLE *tbl_arg, + Item *search_key_arg, ha_rows null_count_arg, + ha_rows min_null_row_arg, ha_rows max_null_row_arg, + uchar *row_num_to_rowid_arg); + ~Ordered_key(); + void cleanup(); + /* Initialize a multi-column index. */ + bool init(MY_BITMAP *columns_to_index); + /* Initialize a single-column index. */ + bool init(int col_idx); + + uint get_column_count() { return key_column_count; } + uint get_keyid() { return keyid; } + uint get_field_idx(uint i) + { + DBUG_ASSERT(i < key_column_count); + return key_columns[i]->field->field_index; + } + /* + Get the search key element that corresponds to the i-th key part of this + index. + */ + Item *get_search_key(uint i) + { + return search_key->element_index(key_columns[i]->field->field_index); + } + void add_key(rownum_t row_num) + { + /* The caller must know how many elements to add. */ + DBUG_ASSERT(key_buff_elements && cur_key_idx < key_buff_elements); + key_buff[cur_key_idx]= row_num; + ++cur_key_idx; + } + + void sort_keys(); + double null_selectivity(); + + /* + Position the current element at the first row that matches the key. + The key itself is propagated by evaluating the current value(s) of + this->search_key. + */ + bool lookup(); + /* Move the current index cursor to the first key. */ + void first() + { + DBUG_ASSERT(key_buff_elements); + cur_key_idx= 0; + } + /* TODO */ + bool next_same(); + /* Move the current index cursor to the next key. */ + bool next() + { + DBUG_ASSERT(key_buff_elements); + if (cur_key_idx < key_buff_elements - 1) + { + ++cur_key_idx; + return TRUE; + } + return FALSE; + }; + /* Return the current index element. */ + rownum_t current() + { + DBUG_ASSERT(key_buff_elements && cur_key_idx < key_buff_elements); + return key_buff[cur_key_idx]; + } + + void set_null(rownum_t row_num) + { + bitmap_set_bit(&null_key, row_num); + } + bool is_null(rownum_t row_num) + { + /* + Indexes consisting of only NULLs do not have a bitmap buffer at all. + Their only initialized member is 'n_bits', which is equal to the number + of temp table rows. + */ + if (null_count == tbl->file->stats.records) + { + DBUG_ASSERT(tbl->file->stats.records == null_key.n_bits); + return TRUE; + } + if (row_num > max_null_row || row_num < min_null_row) + return FALSE; + return bitmap_is_set(&null_key, row_num); + } + void print(String *str); +}; + + +class subselect_partial_match_engine : public subselect_engine +{ +protected: + /* The temporary table that contains a materialized subquery. */ + TABLE *tmp_table; + /* + The engine used to check whether an IN predicate is TRUE or not. If not + TRUE, then subselect_rowid_merge_engine further distinguishes between + FALSE and UNKNOWN. + */ + subselect_uniquesubquery_engine *lookup_engine; + /* A list of equalities between each pair of IN operands. */ + List<Item> *equi_join_conds; + /* + If there is a row, such that all its NULL-able components are NULL, this + member is set to the number of covered columns. If there is no covering + row, then this is 0. + */ + uint covering_null_row_width; +protected: + virtual bool partial_match()= 0; +public: + subselect_partial_match_engine(subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg, + uint covering_null_row_width_arg); + int prepare() { return 0; } + int exec(); + void fix_length_and_dec(Item_cache**) {} + uint cols() { /* TODO: what is the correct value? */ return 1; } + uint8 uncacheable() { return UNCACHEABLE_DEPENDENT; } + void exclude() {} + table_map upper_select_const_tables() { return 0; } + bool change_result(Item_subselect*, select_result_interceptor*) + { DBUG_ASSERT(FALSE); return false; } + bool no_tables() { return false; } + bool no_rows() + { + /* + TODO: It is completely unclear what is the semantics of this + method. The current result is computed so that the call to no_rows() + from Item_in_optimizer::val_int() sets Item_in_optimizer::null_value + correctly. + */ + return !(((Item_in_subselect *) item)->null_value); + } + void print(String*, enum_query_type); + + friend void subselect_hash_sj_engine::cleanup(); +}; + + +class subselect_rowid_merge_engine: public subselect_partial_match_engine +{ +protected: + /* + Mapping from row numbers to row ids. The rowids are stored sequentially + in the array - rowid[i] is located in row_num_to_rowid + i * rowid_length. + */ + uchar *row_num_to_rowid; + /* + A subset of all the keys for which there is a match for the same row. + Used during execution. Computed for each outer reference + */ + MY_BITMAP matching_keys; + /* + The columns of the outer reference that are NULL. Computed for each + outer reference. + */ + MY_BITMAP matching_outer_cols; + /* + Columns that consist of only NULLs. Such columns match any value. + Computed once per query execution. + */ + MY_BITMAP null_only_columns; + /* + Indexes of row numbers, sorted by <column_value, row_number>. If an + index may contain NULLs, the NULLs are stored efficiently in a bitmap. + + The indexes are sorted by the selectivity of their NULL sub-indexes, the + one with the fewer NULLs is first. Thus, if there is any index on + non-NULL columns, it is contained in keys[0]. + */ + Ordered_key **merge_keys; + /* The number of elements in keys. */ + uint keys_count; + /* + An index on all non-NULL columns of 'tmp_table'. The index has the + logical form: <[v_i1 | ... | v_ik], rownum>. It allows to find the row + number where the columns c_i1,...,c1_k contain the values v_i1,...,v_ik. + If such an index exists, it is always the first element of 'keys'. + */ + Ordered_key *non_null_key; + /* + Priority queue of Ordered_key indexes, one per NULLable column. + This queue is used by the partial match algorithm in method exec(). + */ + QUEUE pq; +protected: + /* + Comparison function to compare keys in order of decreasing bitmap + selectivity. + */ + static int cmp_keys_by_null_selectivity(Ordered_key **k1, Ordered_key **k2); + /* + Comparison function used by the priority queue pq, the 'smaller' key + is the one with the smaller current row number. + */ + static int cmp_keys_by_cur_rownum(void *arg, uchar *k1, uchar *k2); + + bool test_null_row(rownum_t row_num); + bool partial_match(); +public: + subselect_rowid_merge_engine(subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, uint keys_count_arg, + uint covering_null_row_width_arg, + Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg) + :subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg, + result_arg, equi_join_conds_arg, + covering_null_row_width_arg), + keys_count(keys_count_arg), non_null_key(NULL) + { + thd= lookup_engine->get_thd(); + } + ~subselect_rowid_merge_engine(); + bool init(MY_BITMAP *non_null_key_parts, MY_BITMAP *partial_match_key_parts); + void cleanup(); + virtual enum_engine_type engine_type() { return ROWID_MERGE_ENGINE; } }; + +class subselect_table_scan_engine: public subselect_partial_match_engine +{ +protected: + bool partial_match(); +public: + subselect_table_scan_engine(subselect_uniquesubquery_engine *engine_arg, + TABLE *tmp_table_arg, Item_subselect *item_arg, + select_result_interceptor *result_arg, + List<Item> *equi_join_conds_arg, + uint covering_null_row_width_arg); + void cleanup(); + virtual enum_engine_type engine_type() { return TABLE_SCAN_ENGINE; } +}; === modified file 'sql/mysql_priv.h' --- a/sql/mysql_priv.h 2010-01-17 14:55:08 +0000 +++ b/sql/mysql_priv.h 2010-03-09 10:14:06 +0000 @@ -552,12 +552,14 @@ protected: #define OPTIMIZER_SWITCH_LOOSE_SCAN 64 #define OPTIMIZER_SWITCH_MATERIALIZATION 128 #define OPTIMIZER_SWITCH_SEMIJOIN 256 +#define OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE 512 +#define OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN 1024 #ifdef DBUG_OFF -# define OPTIMIZER_SWITCH_LAST 512 +# define OPTIMIZER_SWITCH_LAST 2048 #else -# define OPTIMIZER_SWITCH_TABLE_ELIMINATION 512 -# define OPTIMIZER_SWITCH_LAST 1024 +# define OPTIMIZER_SWITCH_TABLE_ELIMINATION 2048 +# define OPTIMIZER_SWITCH_LAST 4096 #endif #ifdef DBUG_OFF @@ -570,8 +572,10 @@ protected: OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_MATERIALIZATION | \ - OPTIMIZER_SWITCH_SEMIJOIN) -#else + OPTIMIZER_SWITCH_SEMIJOIN | \ + OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ + OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN) +#else # define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \ OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \ OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \ @@ -581,7 +585,9 @@ protected: OPTIMIZER_SWITCH_FIRSTMATCH | \ OPTIMIZER_SWITCH_LOOSE_SCAN | \ OPTIMIZER_SWITCH_MATERIALIZATION | \ - OPTIMIZER_SWITCH_SEMIJOIN) + OPTIMIZER_SWITCH_SEMIJOIN | \ + OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE|\ + OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN) #endif /* === modified file 'sql/mysqld.cc' --- a/sql/mysqld.cc 2010-01-17 14:55:08 +0000 +++ b/sql/mysqld.cc 2010-03-09 10:14:06 +0000 @@ -301,7 +301,9 @@ static const char *optimizer_switch_name "index_merge","index_merge_union","index_merge_sort_union", "index_merge_intersection", "index_condition_pushdown", - "firstmatch","loosescan","materialization", "semijoin", + "firstmatch","loosescan","materialization", "semijoin", + "partial_match_rowid_merge", + "partial_match_table_scan", #ifndef DBUG_OFF "table_elimination", #endif @@ -320,6 +322,8 @@ static const unsigned int optimizer_swit sizeof("loosescan") - 1, sizeof("materialization") - 1, sizeof("semijoin") - 1, + sizeof("partial_match_rowid_merge") - 1, + sizeof("partial_match_table_scan") - 1, #ifndef DBUG_OFF sizeof("table_elimination") - 1, #endif @@ -5794,7 +5798,8 @@ enum options_mysqld OPT_RECORD_RND_BUFFER, OPT_DIV_PRECINCREMENT, OPT_RELAY_LOG_SPACE_LIMIT, OPT_RELAY_LOG_PURGE, OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME, - OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING, OPT_DEBUG_FLUSH, + OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_ROWID_MERGE_BUFF_SIZE, + OPT_DEBUGGING, OPT_DEBUG_FLUSH, OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE, OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE, OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK, @@ -7130,6 +7135,11 @@ The minimum value for this variable is 4 (uchar**) &max_system_variables.range_alloc_block_size, 0, GET_ULONG, REQUIRED_ARG, RANGE_ALLOC_BLOCK_SIZE, RANGE_ALLOC_BLOCK_SIZE, (longlong) ULONG_MAX, 0, 1024, 0}, + {"rowid_merge_buff_size", OPT_ROWID_MERGE_BUFF_SIZE, + "The size of the buffers used [NOT] IN evaluation via partial matching.", + (uchar**) &global_system_variables.rowid_merge_buff_size, + (uchar**) &max_system_variables.rowid_merge_buff_size, 0, GET_ULONG, + REQUIRED_ARG, 8*1024*1024L, 0, MAX_MEM_TABLE_SIZE/2, 0, 1, 0}, {"read_buffer_size", OPT_RECORD_BUFFER, "Each thread that does a sequential scan allocates a buffer of this size for each table it scans. If you do many sequential scans, you may want to increase this value.", (uchar**) &global_system_variables.read_buff_size, === modified file 'sql/opt_subselect.cc' --- a/sql/opt_subselect.cc 2010-03-15 06:32:54 +0000 +++ b/sql/opt_subselect.cc 2010-03-15 19:52:58 +0000 @@ -187,10 +187,10 @@ int check_and_do_in_subquery_rewrites(JO does not call setup_subquery_materialization(). We could make SELECT ... FROM DUAL call that function but that doesn't seem to be the case that is worth handling. - 4. Subquery predicate is a top-level predicate - (this implies it is not negated) - TODO: this is a limitation that should be lifted once we - implement correct NULL semantics (WL#3830) + 4. Either the subquery predicate is a top-level predicate, or at + least one partial match strategy is enabled. If no partial match + strategy is enabled, then materialization cannot be used for + non-top-level queries because it cannot handle NULLs correctly. 5. Subquery is non-correlated TODO: This is an overly restrictive condition. It can be extended to: @@ -204,8 +204,8 @@ int check_and_do_in_subquery_rewrites(JO (*) The subquery must be part of a SELECT statement. The current condition also excludes multi-table update statements. - We have to determine whether we will perform subquery materialization - before calling the IN=>EXISTS transformation, so that we know whether to + Determine whether we will perform subquery materialization before + calling the IN=>EXISTS transformation, so that we know whether to perform the whole transformation or only that part of it which wraps Item_in_subselect in an Item_in_optimizer. */ @@ -215,12 +215,14 @@ int check_and_do_in_subquery_rewrites(JO select_lex->master_unit()->first_select()->leaf_tables && // 3 thd->lex->sql_command == SQLCOM_SELECT && // * select_lex->outer_select()->leaf_tables && // 3A - subquery_types_allow_materialization(in_subs)) + subquery_types_allow_materialization(in_subs) && + // psergey-todo: duplicated_subselect_card_check: where it's done? + (in_subs->is_top_level_item() || + optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_ROWID_MERGE) || + optimizer_flag(thd, OPTIMIZER_SWITCH_PARTIAL_MATCH_TABLE_SCAN)) &&//4 + !in_subs->is_correlated && // 5 + in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6 { - // psergey-todo: duplicated_subselect_card_check: where it's done? - if (in_subs->is_top_level_item() && // 4 - !in_subs->is_correlated && // 5 - in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6 in_subs->exec_method= Item_in_subselect::MATERIALIZATION; } === modified file 'sql/set_var.cc' --- a/sql/set_var.cc 2009-12-22 12:49:15 +0000 +++ b/sql/set_var.cc 2010-03-09 10:14:06 +0000 @@ -540,6 +540,9 @@ static sys_var_long_ptr sys_query_cache_ static sys_var_thd_ulong sys_range_alloc_block_size(&vars, "range_alloc_block_size", &SV::range_alloc_block_size); +static sys_var_thd_ulong sys_rowid_merge_buff_size(&vars, "rowid_merge_buff_size", + &SV::rowid_merge_buff_size); + static sys_var_thd_ulong sys_query_alloc_block_size(&vars, "query_alloc_block_size", &SV::query_alloc_block_size, 0, fix_thd_mem_root); === modified file 'sql/sql_class.cc' --- a/sql/sql_class.cc 2010-02-17 21:59:41 +0000 +++ b/sql/sql_class.cc 2010-02-19 21:55:57 +0000 @@ -42,6 +42,7 @@ #include "sp_rcontext.h" #include "sp_cache.h" +#include "sql_select.h" /* declares create_tmp_table() */ /* The following is used to initialise Table_ident with a internal @@ -2877,6 +2878,71 @@ bool select_dumpvar::send_eof() return 0; } + +bool +select_materialize_with_stats:: +create_result_table(THD *thd_arg, List<Item> *column_types, + bool is_union_distinct, ulonglong options, + const char *table_alias, bool bit_fields_as_long) +{ + DBUG_ASSERT(table == 0); + tmp_table_param.field_count= column_types->elements; + tmp_table_param.bit_fields_as_long= bit_fields_as_long; + + if (! (table= create_tmp_table(thd_arg, &tmp_table_param, *column_types, + (ORDER*) 0, is_union_distinct, 1, + options, HA_POS_ERROR, (char*) table_alias))) + return TRUE; + + col_stat= (Column_statistics*) table->in_use->alloc(table->s->fields * + sizeof(Column_statistics)); + if (!stat) + return TRUE; + + cleanup(); + + table->file->extra(HA_EXTRA_WRITE_CACHE); + table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + return FALSE; +} + + +/** + Override select_union::send_data to analyze each row for NULLs and to + update null_statistics before sending data to the client. + + @return TRUE if fatal error when sending data to the client + @return FALSE on success +*/ + +bool select_materialize_with_stats::send_data(List<Item> &items) +{ + List_iterator_fast<Item> item_it(items); + Item *cur_item; + Column_statistics *cur_col_stat= col_stat; + uint nulls_in_row= 0; + + ++count_rows; + + while ((cur_item= item_it++)) + { + if (cur_item->is_null()) + { + ++cur_col_stat->null_count; + cur_col_stat->max_null_row= count_rows; + if (!cur_col_stat->min_null_row) + cur_col_stat->min_null_row= count_rows; + ++nulls_in_row; + } + ++cur_col_stat; + } + if (nulls_in_row > max_nulls_in_row) + max_nulls_in_row= nulls_in_row; + + return select_union::send_data(items); +} + + /**************************************************************************** TMP_TABLE_PARAM ****************************************************************************/ === modified file 'sql/sql_class.h' --- a/sql/sql_class.h 2010-02-17 21:59:41 +0000 +++ b/sql/sql_class.h 2010-03-09 10:14:06 +0000 @@ -343,6 +343,8 @@ struct system_variables ulong mrr_buff_size; ulong div_precincrement; ulong sortbuff_size; + /* Total size of all buffers used by the subselect_rowid_merge_engine. */ + ulong rowid_merge_buff_size; ulong thread_handling; ulong tx_isolation; ulong completion_type; @@ -2740,19 +2742,20 @@ public: class select_union :public select_result_interceptor { +protected: TMP_TABLE_PARAM tmp_table_param; public: TABLE *table; - select_union() :table(0) {} + select_union() :table(0) { tmp_table_param.init(); } int prepare(List<Item> &list, SELECT_LEX_UNIT *u); bool send_data(List<Item> &items); bool send_eof(); bool flush(); - bool create_result_table(THD *thd, List<Item> *column_types, - bool is_distinct, ulonglong options, - const char *alias, bool bit_fields_as_long); + virtual bool create_result_table(THD *thd, List<Item> *column_types, + bool is_distinct, ulonglong options, + const char *alias, bool bit_fields_as_long); }; /* Base subselect interface class */ @@ -2776,6 +2779,74 @@ public: bool send_data(List<Item> &items); }; + +/* + This class specializes select_union to collect statistics about the + data stored in the temp table. Currently the class collects statistcs + about NULLs. +*/ + +class select_materialize_with_stats : public select_union +{ +protected: + class Column_statistics + { + public: + /* Count of NULLs per column. */ + ha_rows null_count; + /* The row number that contains the first NULL in a column. */ + ha_rows min_null_row; + /* The row number that contains the last NULL in a column. */ + ha_rows max_null_row; + }; + + /* Array of statistics data per column. */ + Column_statistics* col_stat; + + /* + The number of columns in the biggest sub-row that consists of only + NULL values. + */ + ha_rows max_nulls_in_row; + /* + Count of rows writtent to the temp table. This is redundant as it is + already stored in handler::stats.records, however that one is relatively + expensive to compute (given we need that for evry row). + */ + ha_rows count_rows; + +public: + select_materialize_with_stats() {} + virtual bool create_result_table(THD *thd, List<Item> *column_types, + bool is_distinct, ulonglong options, + const char *alias, bool bit_fields_as_long); + bool init_result_table(ulonglong select_options); + bool send_data(List<Item> &items); + void cleanup() + { + memset(col_stat, 0, table->s->fields * sizeof(Column_statistics)); + max_nulls_in_row= 0; + count_rows= 0; + } + ha_rows get_null_count_of_col(uint idx) + { + DBUG_ASSERT(idx < table->s->fields); + return col_stat[idx].null_count; + } + ha_rows get_max_null_of_col(uint idx) + { + DBUG_ASSERT(idx < table->s->fields); + return col_stat[idx].max_null_row; + } + ha_rows get_min_null_of_col(uint idx) + { + DBUG_ASSERT(idx < table->s->fields); + return col_stat[idx].min_null_row; + } + ha_rows get_max_nulls_in_row() { return max_nulls_in_row; } +}; + + /* used in independent ALL/ANY optimisation */ class select_max_min_finder_subselect :public select_subselect { === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2010-03-14 18:25:43 +0000 +++ b/sql/sql_select.cc 2010-03-15 19:52:58 +0000 @@ -874,6 +874,9 @@ JOIN::optimize() { DBUG_PRINT("info",("No tables")); error= 0; + /* Create all structures needed for materialized subquery execution. */ + if (setup_subquery_materialization()) + DBUG_RETURN(1); DBUG_RETURN(0); } error= -1; // Error is sent to client @@ -11258,7 +11261,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARA param->group_buff=group_buff; share->keys=1; share->uniques= test(using_unique_constraint); - table->key_info=keyinfo; + table->key_info= table->s->key_info= keyinfo; keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME; keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts; @@ -11344,7 +11347,7 @@ create_tmp_table(THD *thd,TMP_TABLE_PARA keyinfo->key_parts * sizeof(KEY_PART_INFO)))) goto err; bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO)); - table->key_info=keyinfo; + table->key_info= table->s->key_info= keyinfo; keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL; keyinfo->key_length= 0; // Will compute the sum of the parts below.