[Maria-developers] bzr commit into Mariadb 5.2, with Maria 2.0:maria/5.2 branch (igor:2742)
#At lp:maria/5.2 based on revid:igor@askmonty.org-20091222151209-8ijjadlltdvmr1dy 2742 Igor Babaev 2009-12-22 [merge] Merge removed: sql/ds_mrr.cc sql/ds_mrr.h added: sql/multi_range_read.cc sql/multi_range_read.h sql/opt_index_cond_pushdown.cc sql/opt_range_mrr.cc modified: include/my_handler.h libmysqld/Makefile.am mysql-test/include/common-tests.inc mysql-test/r/ctype_cp1251.result mysql-test/r/index_merge_myisam.result mysql-test/r/innodb_mrr.result mysql-test/r/myisam_mrr.result mysql-test/t/ctype_cp1251.test mysql-test/t/innodb_mrr.test mysql-test/t/myisam_mrr.test sql/Makefile.am sql/handler.h sql/mysql_priv.h sql/mysqld.cc sql/opt_range.cc sql/opt_range.h sql/set_var.cc sql/sql_class.h storage/maria/ha_maria.cc storage/maria/ha_maria.h storage/maria/ma_key.c storage/maria/maria_def.h storage/myisam/mi_key.c storage/myisam/mi_rkey.c storage/myisam/mi_rnext.c storage/myisam/mi_rnext_same.c storage/xtradb/handler/ha_innodb.cc storage/xtradb/include/row0mysql.h storage/xtradb/row/row0sel.c === modified file 'include/my_handler.h' --- a/include/my_handler.h 2009-12-15 07:16:46 +0000 +++ b/include/my_handler.h 2009-12-22 12:33:21 +0000 @@ -138,11 +138,13 @@ extern void my_handler_error_unregister( */ typedef enum icp_result { - ICP_NO_MATCH, - ICP_MATCH, - ICP_OUT_OF_RANGE + ICP_ERROR=-1, + ICP_NO_MATCH=0, + ICP_MATCH=1, + ICP_OUT_OF_RANGE=2 } ICP_RESULT; + #ifdef __cplusplus } #endif === modified file 'libmysqld/Makefile.am' --- a/libmysqld/Makefile.am 2009-12-21 02:26:15 +0000 +++ b/libmysqld/Makefile.am 2009-12-22 12:49:15 +0000 @@ -45,7 +45,7 @@ libmysqlsources = errmsg.c get_password. noinst_HEADERS = embedded_priv.h emb_qcache.h -sqlsources = ds_mrr.cc derror.cc field.cc field_conv.cc strfunc.cc filesort.cc \ +sqlsources = derror.cc field.cc field_conv.cc strfunc.cc filesort.cc \ ha_ndbcluster.cc ha_ndbcluster_cond.cc \ ha_ndbcluster_binlog.cc ha_partition.cc \ handler.cc sql_handler.cc \ @@ -77,7 +77,8 @@ sqlsources = ds_mrr.cc derror.cc field.c rpl_filter.cc sql_partition.cc sql_builtin.cc sql_plugin.cc \ sql_tablespace.cc \ rpl_injector.cc my_user.c partition_info.cc \ - sql_servers.cc event_parse_data.cc opt_table_elimination.cc + sql_servers.cc event_parse_data.cc opt_table_elimination.cc \ + multi_range_read.cc opt_index_cond_pushdown.cc libmysqld_int_a_SOURCES= $(libmysqld_sources) nodist_libmysqld_int_a_SOURCES= $(libmysqlsources) $(sqlsources) === modified file 'mysql-test/include/common-tests.inc' --- a/mysql-test/include/common-tests.inc 2006-06-09 07:23:59 +0000 +++ b/mysql-test/include/common-tests.inc 2009-12-22 12:33:21 +0000 @@ -1332,7 +1332,7 @@ explain select fld1 from t2 where fld1=2 # Search with a key with LIKE constant # If the like starts with a certain letter key will be used. # - +--sorted_result select fld1,fld3 from t2 where companynr = 37 and fld3 like 'f%'; select fld3 from t2 where fld3 like "L%" and fld3 = "ok"; select fld3 from t2 where (fld3 like "C%" and fld3 = "Chantilly"); === modified file 'mysql-test/r/ctype_cp1251.result' --- a/mysql-test/r/ctype_cp1251.result 2009-12-15 07:16:46 +0000 +++ b/mysql-test/r/ctype_cp1251.result 2009-12-22 12:33:21 +0000 @@ -65,8 +65,8 @@ insert into t1 (a) values ('air'), ('tn_fakira'),('vw_silvia'),('vw_starshi'),('vw_geo'),('vw_b0x1'); select * from t1 where a like 'we_%'; a b -we_toshko NULL -we_ivo NULL we_iliyan NULL +we_ivo NULL we_martin NULL +we_toshko NULL drop table t1; === modified file 'mysql-test/r/index_merge_myisam.result' --- a/mysql-test/r/index_merge_myisam.result 2009-12-21 02:26:15 +0000 +++ b/mysql-test/r/index_merge_myisam.result 2009-12-22 12:49:15 +0000 @@ -1419,19 +1419,19 @@ drop table t1; # select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on set optimizer_switch='index_merge=off,index_merge_union=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on +index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on set optimizer_switch='index_merge_union=on'; select @@optimizer_switch; @@optimizer_switch -index_merge=off,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on +index_merge=off,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on set optimizer_switch='default,index_merge_sort_union=off'; select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on +index_merge=on,index_merge_union=on,index_merge_sort_union=off,index_merge_intersection=on,index_condition_pushdown=on set optimizer_switch=4; ERROR 42000: Variable 'optimizer_switch' can't be set to the value of '4' set optimizer_switch=NULL; @@ -1458,21 +1458,21 @@ set optimizer_switch=default; set optimizer_switch='index_merge=off,index_merge_union=off,default'; select @@optimizer_switch; @@optimizer_switch -index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on +index_merge=off,index_merge_union=off,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on set optimizer_switch=default; select @@global.optimizer_switch; @@global.optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on set @@global.optimizer_switch=default; select @@global.optimizer_switch; @@global.optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on # # Check index_merge's @@optimizer_switch flags # select @@optimizer_switch; @@optimizer_switch -index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on create table t0 (a int); insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); create table t1 (a int, b int, c int, filler char(100), @@ -1582,5 +1582,5 @@ id select_type table type possible_keys set optimizer_switch=default; show variables like 'optimizer_switch'; Variable_name Value -optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on +optimizer_switch index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on drop table t0, t1; === modified file 'mysql-test/r/innodb_mrr.result' --- a/mysql-test/r/innodb_mrr.result 2009-12-19 19:54:54 +0000 +++ b/mysql-test/r/innodb_mrr.result 2009-12-22 12:33:21 +0000 @@ -292,10 +292,10 @@ NULL 9 0 NULL 9 0 drop table t1, t2; set storage_engine= @save_storage_engine; -set @read_rnd_buffer_size_save= @@read_rnd_buffer_size; -set read_rnd_buffer_size=64; +set @mrr_buffer_size_save= @@mrr_buffer_size; +set mrr_buffer_size=64; Warnings: -Warning 1292 Truncated incorrect read_rnd_buffer_size value: '64' +Warning 1292 Truncated incorrect mrr_buffer_size value: '64' create table t1(a int); insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); create table t2(a char(8), b char(8), c char(8), filler char(100), key(a,b,c) ) engine=InnoDB; @@ -318,10 +318,10 @@ filler char(10), key(d), primary key (a, insert into t2 select A.a, B.a, B.a, A.a, 'filler' from t1 A, t1 B; explain select * from t2 force index (d) where d < 10; id select_type table type possible_keys key key_len ref rows Extra -1 SIMPLE t2 range d d 5 NULL 53 Using index condition; Using MRR +1 SIMPLE t2 range d d 5 NULL # Using index condition; Using MRR drop table t2; drop table t1; -set @@read_rnd_buffer_size= @read_rnd_buffer_size_save; +set @@mrr_buffer_size= @mrr_buffer_size_save; create table t1 (f1 int not null, f2 int not null,f3 int not null, f4 char(1), primary key (f1,f2), key ix(f3))Engine=InnoDB; select * from t1 where (f3>=5 and f3<=10) or (f3>=1 and f3<=4); f1 f2 f3 f4 === modified file 'mysql-test/r/myisam_mrr.result' --- a/mysql-test/r/myisam_mrr.result 2009-12-15 17:23:55 +0000 +++ b/mysql-test/r/myisam_mrr.result 2009-12-22 14:43:00 +0000 @@ -1,8 +1,8 @@ drop table if exists t1, t2, t3; -set @read_rnd_buffer_size_save= @@read_rnd_buffer_size; -set read_rnd_buffer_size=79; +set @mrr_buffer_size_save= @@mrr_buffer_size; +set mrr_buffer_size=79; Warnings: -Warning 1292 Truncated incorrect read_rnd_buffer_size value: '79' +Warning 1292 Truncated incorrect mrr_buffer_size value: '79' create table t1(a int); show create table t1; Table Create Table @@ -293,7 +293,7 @@ NULL 7 0 NULL 9 0 NULL 9 0 drop table t1, t2; -set @@read_rnd_buffer_size= @read_rnd_buffer_size_save; +set @@mrr_buffer_size= @mrr_buffer_size_save; CREATE TABLE t1 ( ID int(10) unsigned NOT NULL AUTO_INCREMENT, col1 int(10) unsigned DEFAULT NULL, @@ -388,3 +388,29 @@ explain select * from t1 where a < 20 o id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range a a 5 NULL 20 Using index condition drop table t0, t1; +# +# Part of MWL#67: DS-MRR backport: add an @@optimizer_switch flag for +# index_condition pushdown: +# - engine_condition_pushdown does not affect ICP +select @@optimizer_switch; +@@optimizer_switch +index_merge=on,index_merge_union=on,index_merge_sort_union=on,index_merge_intersection=on,index_condition_pushdown=on +create table t0 (a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1 (a int, b int, key(a)); +insert into t1 select A.a + 10 *(B.a + 10*C.a), A.a + 10 *(B.a + 10*C.a) from t0 A, t0 B, t0 C; +A query that will use ICP: +explain select * from t1 where a < 20; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 20 Using index condition; Using MRR +set @save_optimizer_switch=@@optimizer_switch; +set optimizer_switch='index_condition_pushdown=off'; +explain select * from t1 where a < 20; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 20 Using where; Using MRR +set optimizer_switch='index_condition_pushdown=on'; +explain select * from t1 where a < 20; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 5 NULL 20 Using index condition; Using MRR +set optimizer_switch=@save_optimizer_switch; +drop table t0, t1; === modified file 'mysql-test/t/ctype_cp1251.test' --- a/mysql-test/t/ctype_cp1251.test 2005-10-13 14:16:19 +0000 +++ b/mysql-test/t/ctype_cp1251.test 2009-12-22 12:33:21 +0000 @@ -44,6 +44,7 @@ insert into t1 (a) values ('air'), ('we_martin'),('vw_grado'),('vw_vasko'),('tn_vili'),('tn_kalina'), ('tn_fakira'),('vw_silvia'),('vw_starshi'),('vw_geo'),('vw_b0x1'); +--sorted_result select * from t1 where a like 'we_%'; drop table t1; === modified file 'mysql-test/t/innodb_mrr.test' --- a/mysql-test/t/innodb_mrr.test 2009-12-15 07:16:46 +0000 +++ b/mysql-test/t/innodb_mrr.test 2009-12-22 12:33:21 +0000 @@ -12,8 +12,8 @@ set storage_engine=InnoDB; set storage_engine= @save_storage_engine; # Try big rowid sizes -set @read_rnd_buffer_size_save= @@read_rnd_buffer_size; -set read_rnd_buffer_size=64; +set @mrr_buffer_size_save= @@mrr_buffer_size; +set mrr_buffer_size=64; # By default InnoDB will fill values only for key parts used by the query, # which will cause DS-MRR to supply an invalid tuple on scan restoration. @@ -38,11 +38,12 @@ drop table t2; create table t2 (a char(100), b char(100), c char(100), d int, filler char(10), key(d), primary key (a,b,c)) engine= innodb; insert into t2 select A.a, B.a, B.a, A.a, 'filler' from t1 A, t1 B; +--replace_column 9 # explain select * from t2 force index (d) where d < 10; drop table t2; drop table t1; -set @@read_rnd_buffer_size= @read_rnd_buffer_size_save; +set @@mrr_buffer_size= @mrr_buffer_size_save; # # BUG#33033 "MySQL/InnoDB crashes with simple select range query" === modified file 'mysql-test/t/myisam_mrr.test' --- a/mysql-test/t/myisam_mrr.test 2009-12-15 17:23:55 +0000 +++ b/mysql-test/t/myisam_mrr.test 2009-12-22 14:43:00 +0000 @@ -6,12 +6,12 @@ drop table if exists t1, t2, t3; --enable_warnings -set @read_rnd_buffer_size_save= @@read_rnd_buffer_size; -set read_rnd_buffer_size=79; +set @mrr_buffer_size_save= @@mrr_buffer_size; +set mrr_buffer_size=79; -- source include/mrr_tests.inc -set @@read_rnd_buffer_size= @read_rnd_buffer_size_save; +set @@mrr_buffer_size= @mrr_buffer_size_save; # # BUG#30622: Incorrect query results for MRR + filesort @@ -96,3 +96,32 @@ insert into t1 select A.a + 10 *(B.a + 1 explain select * from t1 where a < 20 order by a; drop table t0, t1; +-- echo # +-- echo # Part of MWL#67: DS-MRR backport: add an @@optimizer_switch flag for +-- echo # index_condition pushdown: +-- echo # - engine_condition_pushdown does not affect ICP + + +# Check that optimizer_switch is present +--replace_regex /,table_elimination=o[nf]*// +select @@optimizer_switch; + +# Check if it affects ICP +create table t0 (a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1 (a int, b int, key(a)); +insert into t1 select A.a + 10 *(B.a + 10*C.a), A.a + 10 *(B.a + 10*C.a) from t0 A, t0 B, t0 C; + +-- echo A query that will use ICP: +explain select * from t1 where a < 20; + +set @save_optimizer_switch=@@optimizer_switch; +set optimizer_switch='index_condition_pushdown=off'; +explain select * from t1 where a < 20; + +set optimizer_switch='index_condition_pushdown=on'; +explain select * from t1 where a < 20; + +set optimizer_switch=@save_optimizer_switch; + +drop table t0, t1; === modified file 'sql/Makefile.am' --- a/sql/Makefile.am 2009-12-21 02:26:15 +0000 +++ b/sql/Makefile.am 2009-12-22 14:43:00 +0000 @@ -47,7 +47,7 @@ mysqld_LDADD = libndb.la \ $(LDADD) $(CXXLDFLAGS) $(WRAPLIBS) @LIBDL@ \ $(yassl_libs) $(openssl_libs) @MYSQLD_EXTRA_LIBS@ -noinst_HEADERS = ds_mrr.h item.h item_func.h item_sum.h item_cmpfunc.h \ +noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ item_strfunc.h item_timefunc.h \ item_xmlfunc.h \ item_create.h item_subselect.h item_row.h \ @@ -77,9 +77,10 @@ noinst_HEADERS = ds_mrr.h item.h item_fu sql_plugin.h authors.h event_parse_data.h \ event_data_objects.h event_scheduler.h \ sql_partition.h partition_info.h partition_element.h \ - contributors.h sql_servers.h + contributors.h sql_servers.h \ + multi_range_read.h -mysqld_SOURCES = ds_mrr.cc sql_lex.cc sql_handler.cc sql_partition.cc \ +mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \ item.cc item_sum.cc item_buff.cc item_func.cc \ item_cmpfunc.cc item_strfunc.cc item_timefunc.cc \ thr_malloc.cc item_create.cc item_subselect.cc \ @@ -123,7 +124,9 @@ mysqld_SOURCES = ds_mrr.cc sql_lex.cc sq sql_plugin.cc sql_binlog.cc \ sql_builtin.cc sql_tablespace.cc partition_info.cc \ sql_servers.cc event_parse_data.cc \ - opt_table_elimination.cc + opt_table_elimination.cc \ + multi_range_read.cc \ + opt_index_cond_pushdown.cc nodist_mysqld_SOURCES = mini_client_errors.c pack.c client.c my_time.c my_user.c @@ -151,7 +154,7 @@ BUILT_SOURCES = $(BUILT_MAINT_SRC) lex_ EXTRA_DIST = udf_example.c udf_example.def $(BUILT_MAINT_SRC) \ nt_servc.cc nt_servc.h \ message.mc message.h message.rc MSG00001.bin \ - CMakeLists.txt + CMakeLists.txt opt_range_mrr.cc CLEANFILES = lex_hash.h sql_yacc.output link_sources DISTCLEANFILES = $(EXTRA_PROGRAMS) === removed file 'sql/ds_mrr.cc' --- a/sql/ds_mrr.cc 2009-12-21 02:26:15 +0000 +++ b/sql/ds_mrr.cc 1970-01-01 00:00:00 +0000 @@ -1,1337 +0,0 @@ -#include "mysql_priv.h" -#include "sql_select.h" - -/* ************************************************************************** - * DS-MRR implementation - ***************************************************************************/ - -/** - DS-MRR: Initialize and start MRR scan - - Initialize and start the MRR scan. Depending on the mode parameter, this - may use default or DS-MRR implementation. - - @param h Table handler to be used - @param key Index to be used - @param seq_funcs Interval sequence enumeration functions - @param seq_init_param Interval sequence enumeration parameter - @param n_ranges Number of ranges in the sequence. - @param mode HA_MRR_* modes to use - @param buf INOUT Buffer to use - - @retval 0 Ok, Scan started. - @retval other Error -*/ - -int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, - void *seq_init_param, uint n_ranges, uint mode, - HANDLER_BUFFER *buf) -{ - uint elem_size; - Item *pushed_cond= NULL; - handler *new_h2= 0; - DBUG_ENTER("DsMrr_impl::dsmrr_init"); - - /* - index_merge may invoke a scan on an object for which dsmrr_info[_const] - has not been called, so set the owner handler here as well. - */ - h= h_arg; - if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED) - { - use_default_impl= TRUE; - const int retval= - h->handler::multi_range_read_init(seq_funcs, seq_init_param, - n_ranges, mode, buf); - DBUG_RETURN(retval); - } - rowids_buf= buf->buffer; - - is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION); - - if (is_mrr_assoc) - status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count); - - rowids_buf_end= buf->buffer_end; - elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); - rowids_buf_last= rowids_buf + - ((rowids_buf_end - rowids_buf)/ elem_size)* - elem_size; - rowids_buf_end= rowids_buf_last; - - /* - There can be two cases: - - This is the first call since index_init(), h2==NULL - Need to setup h2 then. - - This is not the first call, h2 is initalized and set up appropriately. - The caller might have called h->index_init(), need to switch h to - rnd_pos calls. - */ - if (!h2) - { - /* Create a separate handler object to do rndpos() calls. */ - THD *thd= current_thd; - /* - ::clone() takes up a lot of stack, especially on 64 bit platforms. - The constant 5 is an empiric result. - */ - if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2)) - DBUG_RETURN(1); - DBUG_ASSERT(h->active_index != MAX_KEY); - uint mrr_keyno= h->active_index; - - /* Create a separate handler object to do rndpos() calls. */ - if (!(new_h2= h->clone(thd->mem_root)) || - new_h2->ha_external_lock(thd, F_RDLCK)) - { - delete new_h2; - DBUG_RETURN(1); - } - - if (mrr_keyno == h->pushed_idx_cond_keyno) - pushed_cond= h->pushed_idx_cond; - - /* - Caution: this call will invoke this->dsmrr_close(). Do not put the - created secondary table handler into this->h2 or it will delete it. - */ - if (h->ha_index_end()) - { - h2=new_h2; - goto error; - } - - h2= new_h2; /* Ok, now can put it into h2 */ - table->prepare_for_position(); - h2->extra(HA_EXTRA_KEYREAD); - - if (h2->ha_index_init(mrr_keyno, FALSE)) - goto error; - - use_default_impl= FALSE; - if (pushed_cond) - h2->idx_cond_push(mrr_keyno, pushed_cond); - } - else - { - /* - We get here when the access alternates betwen MRR scan(s) and non-MRR - scans. - - Calling h->index_end() will invoke dsmrr_close() for this object, - which will delete h2. We need to keep it, so save put it away and dont - let it be deleted: - */ - handler *save_h2= h2; - h2= NULL; - int res= (h->inited == handler::INDEX && h->ha_index_end()); - h2= save_h2; - use_default_impl= FALSE; - if (res) - goto error; - } - - if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges, - mode, buf) || - dsmrr_fill_buffer()) - { - goto error; - } - /* - If the above call has scanned through all intervals in *seq, then - adjust *buf to indicate that the remaining buffer space will not be used. - */ - if (dsmrr_eof) - buf->end_of_used_area= rowids_buf_last; - - /* - h->inited == INDEX may occur when 'range checked for each record' is - used. - */ - if ((h->inited != handler::RND) && - ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) || - (h->ha_rnd_init(FALSE)))) - goto error; - - use_default_impl= FALSE; - h->mrr_funcs= *seq_funcs; - - DBUG_RETURN(0); -error: - h2->ha_index_or_rnd_end(); - h2->ha_external_lock(current_thd, F_UNLCK); - h2->close(); - delete h2; - h2= NULL; - DBUG_RETURN(1); -} - - -void DsMrr_impl::dsmrr_close() -{ - DBUG_ENTER("DsMrr_impl::dsmrr_close"); - if (h2) - { - h2->ha_index_or_rnd_end(); - h2->ha_external_lock(current_thd, F_UNLCK); - h2->close(); - delete h2; - h2= NULL; - } - use_default_impl= TRUE; - DBUG_VOID_RETURN; -} - - -static int rowid_cmp(void *h, uchar *a, uchar *b) -{ - return ((handler*)h)->cmp_ref(a, b); -} - - -/** - DS-MRR: Fill the buffer with rowids and sort it by rowid - - {This is an internal function of DiskSweep MRR implementation} - Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into - buffer. When the buffer is full or scan is completed, sort the buffer by - rowid and return. - - The function assumes that rowids buffer is empty when it is invoked. - - @param h Table handler - - @retval 0 OK, the next portion of rowids is in the buffer, - properly ordered - @retval other Error -*/ - -int DsMrr_impl::dsmrr_fill_buffer() -{ - char *range_info; - int res; - DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer"); - - rowids_buf_cur= rowids_buf; - while ((rowids_buf_cur < rowids_buf_end) && - !(res= h2->handler::multi_range_read_next(&range_info))) - { - KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range; - if (h2->mrr_funcs.skip_index_tuple && - h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr)) - continue; - - /* Put rowid, or {rowid, range_id} pair into the buffer */ - h2->position(table->record[0]); - memcpy(rowids_buf_cur, h2->ref, h2->ref_length); - rowids_buf_cur += h2->ref_length; - - if (is_mrr_assoc) - { - memcpy(rowids_buf_cur, &range_info, sizeof(void*)); - rowids_buf_cur += sizeof(void*); - } - } - - if (res && res != HA_ERR_END_OF_FILE) - DBUG_RETURN(res); - dsmrr_eof= test(res == HA_ERR_END_OF_FILE); - - /* Sort the buffer contents by rowid */ - uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); - uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size; - - my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp, - (void*)h); - rowids_buf_last= rowids_buf_cur; - rowids_buf_cur= rowids_buf; - DBUG_RETURN(0); -} - - -/** - DS-MRR implementation: multi_range_read_next() function -*/ - -int DsMrr_impl::dsmrr_next(char **range_info) -{ - int res; - uchar *cur_range_info= 0; - uchar *rowid; - - if (use_default_impl) - return h->handler::multi_range_read_next(range_info); - - do - { - if (rowids_buf_cur == rowids_buf_last) - { - if (dsmrr_eof) - { - res= HA_ERR_END_OF_FILE; - goto end; - } - res= dsmrr_fill_buffer(); - if (res) - goto end; - } - - /* return eof if there are no rowids in the buffer after re-fill attempt */ - if (rowids_buf_cur == rowids_buf_last) - { - res= HA_ERR_END_OF_FILE; - goto end; - } - rowid= rowids_buf_cur; - - if (is_mrr_assoc) - memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**)); - - rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc); - if (h2->mrr_funcs.skip_record && - h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid)) - continue; - res= h->rnd_pos(table->record[0], rowid); - break; - } while (true); - - if (is_mrr_assoc) - { - memcpy(range_info, rowid + h->ref_length, sizeof(void*)); - } -end: - return res; -} - - -/** - DS-MRR implementation: multi_range_read_info() function -*/ -ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, - uint *bufsz, uint *flags, COST_VECT *cost) -{ - ha_rows res; - uint def_flags= *flags; - uint def_bufsz= *bufsz; - - /* Get cost/flags/mem_usage of default MRR implementation */ - res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz, - &def_flags, cost); - DBUG_ASSERT(!res); - - if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || - choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost)) - { - /* Default implementation is choosen */ - DBUG_PRINT("info", ("Default MRR implementation choosen")); - *flags= def_flags; - *bufsz= def_bufsz; - } - else - { - /* *flags and *bufsz were set by choose_mrr_impl */ - DBUG_PRINT("info", ("DS-MRR implementation choosen")); - } - return 0; -} - - -/** - DS-MRR Implementation: multi_range_read_info_const() function -*/ - -ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, - void *seq_init_param, uint n_ranges, - uint *bufsz, uint *flags, COST_VECT *cost) -{ - ha_rows rows; - uint def_flags= *flags; - uint def_bufsz= *bufsz; - /* Get cost/flags/mem_usage of default MRR implementation */ - rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param, - n_ranges, &def_bufsz, - &def_flags, cost); - if (rows == HA_POS_ERROR) - { - /* Default implementation can't perform MRR scan => we can't either */ - return rows; - } - - /* - If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to - use the default MRR implementation (we need it for UPDATE/DELETE). - Otherwise, make a choice based on cost and @@optimizer_use_mrr. - */ - if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || - choose_mrr_impl(keyno, rows, flags, bufsz, cost)) - { - DBUG_PRINT("info", ("Default MRR implementation choosen")); - *flags= def_flags; - *bufsz= def_bufsz; - } - else - { - /* *flags and *bufsz were set by choose_mrr_impl */ - DBUG_PRINT("info", ("DS-MRR implementation choosen")); - } - return rows; -} - - -/** - Check if key has partially-covered columns - - We can't use DS-MRR to perform range scans when the ranges are over - partially-covered keys, because we'll not have full key part values - (we'll have their prefixes from the index) and will not be able to check - if we've reached the end the range. - - @param keyno Key to check - - @todo - Allow use of DS-MRR in cases where the index has partially-covered - components but they are not used for scanning. - - @retval TRUE Yes - @retval FALSE No -*/ - -bool key_uses_partial_cols(TABLE *table, uint keyno) -{ - KEY_PART_INFO *kp= table->key_info[keyno].key_part; - KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts; - for (; kp != kp_end; kp++) - { - if (!kp->field->part_of_key.is_set(keyno)) - return TRUE; - } - return FALSE; -} - -/** - DS-MRR Internals: Choose between Default MRR implementation and DS-MRR - - Make the choice between using Default MRR implementation and DS-MRR. - This function contains common functionality factored out of dsmrr_info() - and dsmrr_info_const(). The function assumes that the default MRR - implementation's applicability requirements are satisfied. - - @param keyno Index number - @param rows E(full rows to be retrieved) - @param flags IN MRR flags provided by the MRR user - OUT If DS-MRR is choosen, flags of DS-MRR implementation - else the value is not modified - @param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation - else the value is not modified - @param cost IN Cost of default MRR implementation - OUT If DS-MRR is choosen, cost of DS-MRR scan - else the value is not modified - - @retval TRUE Default MRR implementation should be used - @retval FALSE DS-MRR implementation should be used -*/ - -bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, - uint *bufsz, COST_VECT *cost) -{ - COST_VECT dsmrr_cost; - bool res; - THD *thd= current_thd; - if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY || - (keyno == table->s->primary_key && h->primary_key_is_clustered()) || - key_uses_partial_cols(table, keyno)) - { - /* Use the default implementation */ - *flags |= HA_MRR_USE_DEFAULT_IMPL; - return TRUE; - } - - uint add_len= table->key_info[keyno].key_length + h->ref_length; - *bufsz -= add_len; - if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost)) - return TRUE; - *bufsz += add_len; - - bool force_dsmrr; - /* - If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of - DS-MRR and Default implementations cost. This allows one to force use of - DS-MRR whenever it is applicable without affecting other cost-based - choices. - */ - if ((force_dsmrr= (thd->variables.optimizer_use_mrr == 1)) && - dsmrr_cost.total_cost() > cost->total_cost()) - dsmrr_cost= *cost; - - if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost()) - { - *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */ - *flags &= ~HA_MRR_SORTED; /* We will return unordered output */ - *cost= dsmrr_cost; - res= FALSE; - } - else - { - /* Use the default MRR implementation */ - res= TRUE; - } - return res; -} - - -static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost); - - -/** - Get cost of DS-MRR scan - - @param keynr Index to be used - @param rows E(Number of rows to be scanned) - @param flags Scan parameters (HA_MRR_* flags) - @param buffer_size INOUT Buffer size - @param cost OUT The cost - - @retval FALSE OK - @retval TRUE Error, DS-MRR cannot be used (the buffer is too small - for even 1 rowid) -*/ - -bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, - uint *buffer_size, COST_VECT *cost) -{ - ulong max_buff_entries, elem_size; - ha_rows rows_in_full_step, rows_in_last_step; - uint n_full_steps; - double index_read_cost; - - elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION)); - max_buff_entries = *buffer_size / elem_size; - - if (!max_buff_entries) - return TRUE; /* Buffer has not enough space for even 1 rowid */ - - /* Number of iterations we'll make with full buffer */ - n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries); - - /* - Get numbers of rows we'll be processing in - - non-last sweep, with full buffer - - last iteration, with non-full buffer - */ - rows_in_full_step= max_buff_entries; - rows_in_last_step= rows % max_buff_entries; - - /* Adjust buffer size if we expect to use only part of the buffer */ - if (n_full_steps) - { - get_sort_and_sweep_cost(table, rows, cost); - cost->multiply(n_full_steps); - } - else - { - cost->zero(); - *buffer_size= max(*buffer_size, - (size_t)(1.2*rows_in_last_step) * elem_size + - h->ref_length + table->key_info[keynr].key_length); - } - - COST_VECT last_step_cost; - get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost); - cost->add(&last_step_cost); - - if (n_full_steps != 0) - cost->mem_cost= *buffer_size; - else - cost->mem_cost= (double)rows_in_last_step * elem_size; - - /* Total cost of all index accesses */ - index_read_cost= h->index_only_read_time(keynr, (double)rows); - cost->add_io(index_read_cost, 1 /* Random seeks */); - return FALSE; -} - - -/* - Get cost of one sort-and-sweep step - - SYNOPSIS - get_sort_and_sweep_cost() - table Table being accessed - nrows Number of rows to be sorted and retrieved - cost OUT The cost - - DESCRIPTION - Get cost of these operations: - - sort an array of #nrows ROWIDs using qsort - - read #nrows records from table in a sweep. -*/ - -static -void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost) -{ - if (nrows) - { - get_sweep_read_cost(table, nrows, FALSE, cost); - /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */ - double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID); - if (cmp_op < 3) - cmp_op= 3; - cost->cpu_cost += cmp_op * log2(cmp_op); - } - else - cost->zero(); -} - - -/** - Get cost of reading nrows table records in a "disk sweep" - - A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made - for an ordered sequence of rowids. - - We assume hard disk IO. The read is performed as follows: - - 1. The disk head is moved to the needed cylinder - 2. The controller waits for the plate to rotate - 3. The data is transferred - - Time to do #3 is insignificant compared to #2+#1. - - Time to move the disk head is proportional to head travel distance. - - Time to wait for the plate to rotate depends on whether the disk head - was moved or not. - - If disk head wasn't moved, the wait time is proportional to distance - between the previous block and the block we're reading. - - If the head was moved, we don't know how much we'll need to wait for the - plate to rotate. We assume the wait time to be a variate with a mean of - 0.5 of full rotation time. - - Our cost units are "random disk seeks". The cost of random disk seek is - actually not a constant, it depends one range of cylinders we're going - to access. We make it constant by introducing a fuzzy concept of "typical - datafile length" (it's fuzzy as it's hard to tell whether it should - include index file, temp.tables etc). Then random seek cost is: - - 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length - - We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9. - - @param table Table to be accessed - @param nrows Number of rows to retrieve - @param interrupted TRUE <=> Assume that the disk sweep will be - interrupted by other disk IO. FALSE - otherwise. - @param cost OUT The cost. -*/ - -void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, - COST_VECT *cost) -{ - DBUG_ENTER("get_sweep_read_cost"); - - cost->zero(); - if (table->file->primary_key_is_clustered()) - { - cost->io_count= table->file->read_time(table->s->primary_key, - (uint) nrows, nrows); - } - else - { - double n_blocks= - ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE); - double busy_blocks= - n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows))); - if (busy_blocks < 1.0) - busy_blocks= 1.0; - - DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks, - busy_blocks)); - cost->io_count= busy_blocks; - - if (!interrupted) - { - /* Assume reading is done in one 'sweep' */ - cost->avg_io_cost= (DISK_SEEK_BASE_COST + - DISK_SEEK_PROP_COST*n_blocks/busy_blocks); - } - } - DBUG_PRINT("info",("returning cost=%g", cost->total_cost())); - DBUG_VOID_RETURN; -} - - -/* ************************************************************************** - * DS-MRR implementation ends - ***************************************************************************/ - -/* ************************************************************************** - * Index Condition Pushdown code starts - ***************************************************************************/ -/* - Check if given expression uses only table fields covered by the given index - - SYNOPSIS - uses_index_fields_only() - item Expression to check - tbl The table having the index - keyno The index number - other_tbls_ok TRUE <=> Fields of other non-const tables are allowed - - DESCRIPTION - Check if given expression only uses fields covered by index #keyno in the - table tbl. The expression can use any fields in any other tables. - - The expression is guaranteed not to be AND or OR - those constructs are - handled outside of this function. - - RETURN - TRUE Yes - FALSE No -*/ - -bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno, - bool other_tbls_ok) -{ - if (item->const_item()) - return TRUE; - - /* - Don't push down the triggered conditions. Nested outer joins execution - code may need to evaluate a condition several times (both triggered and - untriggered), and there is no way to put thi - TODO: Consider cloning the triggered condition and using the copies for: - 1. push the first copy down, to have most restrictive index condition - possible - 2. Put the second copy into tab->select_cond. - */ - if (item->type() == Item::FUNC_ITEM && - ((Item_func*)item)->functype() == Item_func::TRIG_COND_FUNC) - return FALSE; - - if (!(item->used_tables() & tbl->map)) - return other_tbls_ok; - - Item::Type item_type= item->type(); - switch (item_type) { - case Item::FUNC_ITEM: - { - /* This is a function, apply condition recursively to arguments */ - Item_func *item_func= (Item_func*)item; - Item **child; - Item **item_end= (item_func->arguments()) + item_func->argument_count(); - for (child= item_func->arguments(); child != item_end; child++) - { - if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok)) - return FALSE; - } - return TRUE; - } - case Item::COND_ITEM: - { - /* - This is a AND/OR condition. Regular AND/OR clauses are handled by - make_cond_for_index() which will chop off the part that can be - checked with index. This code is for handling non-top-level AND/ORs, - e.g. func(x AND y). - */ - List_iterator<Item> li(*((Item_cond*)item)->argument_list()); - Item *item; - while ((item=li++)) - { - if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok)) - return FALSE; - } - return TRUE; - } - case Item::FIELD_ITEM: - { - Item_field *item_field= (Item_field*)item; - if (item_field->field->table != tbl) - return TRUE; - /* - The below is probably a repetition - the first part checks the - other two, but let's play it safe: - */ - return item_field->field->part_of_key.is_set(keyno) && - item_field->field->type() != MYSQL_TYPE_GEOMETRY && - item_field->field->type() != MYSQL_TYPE_BLOB; - } - case Item::REF_ITEM: - return uses_index_fields_only(item->real_item(), tbl, keyno, - other_tbls_ok); - default: - return FALSE; /* Play it safe, don't push unknown non-const items */ - } -} - -#define ICP_COND_USES_INDEX_ONLY 10 - -/* - Get a part of the condition that can be checked using only index fields - - SYNOPSIS - make_cond_for_index() - cond The source condition - table The table that is partially available - keyno The index in the above table. Only fields covered by the index - are available - other_tbls_ok TRUE <=> Fields of other non-const tables are allowed - - DESCRIPTION - Get a part of the condition that can be checked when for the given table - we have values only of fields covered by some index. The condition may - refer to other tables, it is assumed that we have values of all of their - fields. - - Example: - make_cond_for_index( - "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)", - t2, keyno(t2.key1)) - will return - "cond(t1.field) AND cond(t2.key2)" - - RETURN - Index condition, or NULL if no condition could be inferred. -*/ - -Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno, - bool other_tbls_ok) -{ - if (!cond) - return NULL; - if (cond->type() == Item::COND_ITEM) - { - uint n_marked= 0; - if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) - { - table_map used_tables= 0; - Item_cond_and *new_cond=new Item_cond_and; - if (!new_cond) - return (COND*) 0; - List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); - Item *item; - while ((item=li++)) - { - Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok); - if (fix) - { - new_cond->argument_list()->push_back(fix); - used_tables|= fix->used_tables(); - } - n_marked += test(item->marker == ICP_COND_USES_INDEX_ONLY); - } - if (n_marked ==((Item_cond*)cond)->argument_list()->elements) - cond->marker= ICP_COND_USES_INDEX_ONLY; - switch (new_cond->argument_list()->elements) { - case 0: - return (COND*) 0; - case 1: - new_cond->used_tables_cache= used_tables; - return new_cond->argument_list()->head(); - default: - new_cond->quick_fix_field(); - new_cond->used_tables_cache= used_tables; - return new_cond; - } - } - else /* It's OR */ - { - Item_cond_or *new_cond=new Item_cond_or; - if (!new_cond) - return (COND*) 0; - List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); - Item *item; - while ((item=li++)) - { - Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok); - if (!fix) - return (COND*) 0; - new_cond->argument_list()->push_back(fix); - n_marked += test(item->marker == ICP_COND_USES_INDEX_ONLY); - } - if (n_marked ==((Item_cond*)cond)->argument_list()->elements) - cond->marker= ICP_COND_USES_INDEX_ONLY; - new_cond->quick_fix_field(); - new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache; - new_cond->top_level_item(); - return new_cond; - } - } - - if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok)) - return (COND*) 0; - cond->marker= ICP_COND_USES_INDEX_ONLY; - return cond; -} - - -Item *make_cond_remainder(Item *cond, bool exclude_index) -{ - if (exclude_index && cond->marker == ICP_COND_USES_INDEX_ONLY) - return 0; /* Already checked */ - - if (cond->type() == Item::COND_ITEM) - { - table_map tbl_map= 0; - if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) - { - /* Create new top level AND item */ - Item_cond_and *new_cond=new Item_cond_and; - if (!new_cond) - return (COND*) 0; - List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); - Item *item; - while ((item=li++)) - { - Item *fix= make_cond_remainder(item, exclude_index); - if (fix) - { - new_cond->argument_list()->push_back(fix); - tbl_map |= fix->used_tables(); - } - } - switch (new_cond->argument_list()->elements) { - case 0: - return (COND*) 0; - case 1: - return new_cond->argument_list()->head(); - default: - new_cond->quick_fix_field(); - ((Item_cond*)new_cond)->used_tables_cache= tbl_map; - return new_cond; - } - } - else /* It's OR */ - { - Item_cond_or *new_cond=new Item_cond_or; - if (!new_cond) - return (COND*) 0; - List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); - Item *item; - while ((item=li++)) - { - Item *fix= make_cond_remainder(item, FALSE); - if (!fix) - return (COND*) 0; - new_cond->argument_list()->push_back(fix); - tbl_map |= fix->used_tables(); - } - new_cond->quick_fix_field(); - ((Item_cond*)new_cond)->used_tables_cache= tbl_map; - new_cond->top_level_item(); - return new_cond; - } - } - return cond; -} - - -/* - Try to extract and push the index condition - - SYNOPSIS - push_index_cond() - tab A join tab that has tab->table->file and its condition - in tab->select_cond - keyno Index for which extract and push the condition - other_tbls_ok TRUE <=> Fields of other non-const tables are allowed - - DESCRIPTION - Try to extract and push the index condition down to table handler -*/ - -void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok) -{ - DBUG_ENTER("push_index_cond"); - Item *idx_cond; - bool do_index_cond_pushdown= - ((tab->table->file->index_flags(keyno, 0, 1) & - HA_DO_INDEX_COND_PUSHDOWN) && - tab->join->thd->variables.engine_condition_pushdown); - - /* - Do not try index condition pushdown on indexes which have partially-covered - columns. Unpacking from a column prefix into index tuple is not a supported - operation in some engines, see e.g. MySQL BUG#42991. - TODO: a better solution would be not to consider partially-covered columns - as parts of the index and still produce/check index condition for - fully-covered index columns. - */ - KEY *key_info= tab->table->key_info + keyno; - for (uint kp= 0; kp < key_info->key_parts; kp++) - { - if ((key_info->key_part[kp].key_part_flag & HA_PART_KEY_SEG)) - { - do_index_cond_pushdown= FALSE; - break; - } - } - - /* - When WL#5116 is done this DBUG statement must be removed. It's just a - temporary hack to allow us to discriminate whether a test failure relates - to *Engine* or *Index* Condition Pushdown. - */ - DBUG_EXECUTE_IF("optimizer_no_icp", do_index_cond_pushdown= false;); - if (do_index_cond_pushdown) - { - DBUG_EXECUTE("where", - print_where(tab->select_cond, "full cond", QT_ORDINARY);); - - idx_cond= make_cond_for_index(tab->select_cond, tab->table, keyno, - other_tbls_ok); - - DBUG_EXECUTE("where", - print_where(idx_cond, "idx cond", QT_ORDINARY);); - - if (idx_cond) - { - Item *idx_remainder_cond= 0; - tab->pre_idx_push_select_cond= tab->select_cond; - /* - For BKA cache we store condition to special BKA cache field - because evaluation of the condition requires additional operations - before the evaluation. This condition is used in - JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions. - */ - if (tab->use_join_cache && - /* - if cache is used then the value is TRUE only - for BKA[_UNIQUE] cache (see check_join_cache_usage func). - In this case other_tbls_ok is an equivalent of - cache->is_key_access(). - */ - other_tbls_ok && - (idx_cond->used_tables() & - ~(tab->table->map | tab->join->const_table_map))) - tab->cache_idx_cond= idx_cond; - else - idx_remainder_cond= tab->table->file->idx_cond_push(keyno, idx_cond); - - /* - Disable eq_ref's "lookup cache" if we've pushed down an index - condition. - TODO: This check happens to work on current ICP implementations, but - there may exist a compliant implementation that will not work - correctly with it. Sort this out when we stabilize the condition - pushdown APIs. - */ - if (idx_remainder_cond != idx_cond) - tab->ref.disable_cache= TRUE; - - Item *row_cond= make_cond_remainder(tab->select_cond, TRUE); - - DBUG_EXECUTE("where", - print_where(row_cond, "remainder cond", QT_ORDINARY);); - - if (row_cond) - { - if (!idx_remainder_cond) - tab->select_cond= row_cond; - else - { - COND *new_cond= new Item_cond_and(row_cond, idx_remainder_cond); - tab->select_cond= new_cond; - tab->select_cond->quick_fix_field(); - ((Item_cond_and*)tab->select_cond)->used_tables_cache= - row_cond->used_tables() | idx_remainder_cond->used_tables(); - } - } - else - tab->select_cond= idx_remainder_cond; - if (tab->select) - { - DBUG_EXECUTE("where", - print_where(tab->select->cond, - "select_cond", - QT_ORDINARY);); - - tab->select->cond= tab->select_cond; - } - } - } - DBUG_VOID_RETURN; -} - -/* ************************************************************************** - * Default MRR implementation starts - ***************************************************************************/ - - -/**************************************************************************** - * Default MRR implementation (MRR to non-MRR converter) - ***************************************************************************/ - -/** - Get cost and other information about MRR scan over a known list of ranges - - Calculate estimated cost and other information about an MRR scan for given - sequence of ranges. - - @param keyno Index number - @param seq Range sequence to be traversed - @param seq_init_param First parameter for seq->init() - @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller - can't efficiently determine it - @param bufsz INOUT IN: Size of the buffer available for use - OUT: Size of the buffer that is expected to be actually - used, or 0 if buffer is not needed. - @param flags INOUT A combination of HA_MRR_* flags - @param cost OUT Estimated cost of MRR access - - @note - This method (or an overriding one in a derived class) must check for - thd->killed and return HA_POS_ERROR if it is not zero. This is required - for a user to be able to interrupt the calculation by killing the - connection/query. - - @retval - HA_POS_ERROR Error or the engine is unable to perform the requested - scan. Values of OUT parameters are undefined. - @retval - other OK, *cost contains cost of the scan, *bufsz and *flags - contain scan parameters. -*/ - -ha_rows -handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, - void *seq_init_param, uint n_ranges_arg, - uint *bufsz, uint *flags, COST_VECT *cost) -{ - KEY_MULTI_RANGE range; - range_seq_t seq_it; - ha_rows rows, total_rows= 0; - uint n_ranges=0; - THD *thd= current_thd; - - /* Default MRR implementation doesn't need buffer */ - *bufsz= 0; - - seq_it= seq->init(seq_init_param, n_ranges, *flags); - while (!seq->next(seq_it, &range)) - { - if (unlikely(thd->killed != 0)) - return HA_POS_ERROR; - - n_ranges++; - key_range *min_endp, *max_endp; - if (range.range_flag & GEOM_FLAG) - { - /* In this case tmp_min_flag contains the handler-read-function */ - range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG); - min_endp= &range.start_key; - max_endp= NULL; - } - else - { - min_endp= range.start_key.length? &range.start_key : NULL; - max_endp= range.end_key.length? &range.end_key : NULL; - } - if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) - rows= 1; /* there can be at most one row */ - else - { - if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, - max_endp))) - { - /* Can't scan one range => can't do MRR scan at all */ - total_rows= HA_POS_ERROR; - break; - } - } - total_rows += rows; - } - - if (total_rows != HA_POS_ERROR) - { - /* The following calculation is the same as in multi_range_read_info(): */ - *flags |= HA_MRR_USE_DEFAULT_IMPL; - cost->zero(); - cost->avg_io_cost= 1; /* assume random seeks */ - if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2) - cost->io_count= index_only_read_time(keyno, (uint)total_rows); - else - cost->io_count= read_time(keyno, n_ranges, total_rows); - cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01; - } - return total_rows; -} - - -/** - Get cost and other information about MRR scan over some sequence of ranges - - Calculate estimated cost and other information about an MRR scan for some - sequence of ranges. - - The ranges themselves will be known only at execution phase. When this - function is called we only know number of ranges and a (rough) E(#records) - within those ranges. - - Currently this function is only called for "n-keypart singlepoint" ranges, - i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN" - - The flags parameter is a combination of those flags: HA_MRR_SORTED, - HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS. - - @param keyno Index number - @param n_ranges Estimated number of ranges (i.e. intervals) in the - range sequence. - @param n_rows Estimated total number of records contained within all - of the ranges - @param bufsz INOUT IN: Size of the buffer available for use - OUT: Size of the buffer that will be actually used, or - 0 if buffer is not needed. - @param flags INOUT A combination of HA_MRR_* flags - @param cost OUT Estimated cost of MRR access - - @retval - 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan - parameters. - @retval - other Error or can't perform the requested scan -*/ - -ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, - uint *bufsz, uint *flags, COST_VECT *cost) -{ - *bufsz= 0; /* Default implementation doesn't need a buffer */ - - *flags |= HA_MRR_USE_DEFAULT_IMPL; - - cost->zero(); - cost->avg_io_cost= 1; /* assume random seeks */ - - /* Produce the same cost as non-MRR code does */ - if (*flags & HA_MRR_INDEX_ONLY) - cost->io_count= index_only_read_time(keyno, n_rows); - else - cost->io_count= read_time(keyno, n_ranges, n_rows); - return 0; -} - - -/** - Initialize the MRR scan - - Initialize the MRR scan. This function may do heavyweight scan - initialization like row prefetching/sorting/etc (NOTE: but better not do - it here as we may not need it, e.g. if we never satisfy WHERE clause on - previous tables. For many implementations it would be natural to do such - initializations in the first multi_read_range_next() call) - - mode is a combination of the following flags: HA_MRR_SORTED, - HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION - - @param seq Range sequence to be traversed - @param seq_init_param First parameter for seq->init() - @param n_ranges Number of ranges in the sequence - @param mode Flags, see the description section for the details - @param buf INOUT: memory buffer to be used - - @note - One must have called index_init() before calling this function. Several - multi_range_read_init() calls may be made in course of one query. - - Until WL#2623 is done (see its text, section 3.2), the following will - also hold: - The caller will guarantee that if "seq->init == mrr_ranges_array_init" - then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures. - This property will only be used by NDB handler until WL#2623 is done. - - Buffer memory management is done according to the following scenario: - The caller allocates the buffer and provides it to the callee by filling - the members of HANDLER_BUFFER structure. - The callee consumes all or some fraction of the provided buffer space, and - sets the HANDLER_BUFFER members accordingly. - The callee may use the buffer memory until the next multi_range_read_init() - call is made, all records have been read, or until index_end() call is - made, whichever comes first. - - @retval 0 OK - @retval 1 Error -*/ - -int -handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param, - uint n_ranges, uint mode, HANDLER_BUFFER *buf) -{ - DBUG_ENTER("handler::multi_range_read_init"); - mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); - mrr_funcs= *seq_funcs; - mrr_is_output_sorted= test(mode & HA_MRR_SORTED); - mrr_have_range= FALSE; - DBUG_RETURN(0); -} - - -/** - Get next record in MRR scan - - Default MRR implementation: read the next record - - @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect - Otherwise, the opaque value associated with the range - that contains the returned record. - - @retval 0 OK - @retval other Error code -*/ - -int handler::multi_range_read_next(char **range_info) -{ - int UNINIT_VAR(result); - int range_res; - DBUG_ENTER("handler::multi_range_read_next"); - - if (!mrr_have_range) - { - mrr_have_range= TRUE; - goto start; - } - - do - { - /* Save a call if there can be only one row in range. */ - if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE)) - { - result= read_range_next(); - /* On success or non-EOF errors jump to the end. */ - if (result != HA_ERR_END_OF_FILE) - break; - } - else - { - if (was_semi_consistent_read()) - goto scan_it_again; - /* - We need to set this for the last range only, but checking this - condition is more expensive than just setting the result code. - */ - result= HA_ERR_END_OF_FILE; - } - -start: - /* Try the next range(s) until one matches a record. */ - while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range))) - { -scan_it_again: - result= read_range_first(mrr_cur_range.start_key.keypart_map ? - &mrr_cur_range.start_key : 0, - mrr_cur_range.end_key.keypart_map ? - &mrr_cur_range.end_key : 0, - test(mrr_cur_range.range_flag & EQ_RANGE), - mrr_is_output_sorted); - if (result != HA_ERR_END_OF_FILE) - break; - } - } - while ((result == HA_ERR_END_OF_FILE) && !range_res); - - *range_info= mrr_cur_range.ptr; - DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result)); - DBUG_RETURN(result); -} - === removed file 'sql/ds_mrr.h' --- a/sql/ds_mrr.h 2009-12-15 21:35:55 +0000 +++ b/sql/ds_mrr.h 1970-01-01 00:00:00 +0000 @@ -1,71 +0,0 @@ -/* - This file contains declarations for - - Disk-Sweep MultiRangeRead (DS-MRR) implementation - - Index Condition Pushdown helper functions -*/ - -/** - A Disk-Sweep MRR interface implementation - - This implementation makes range (and, in the future, 'ref') scans to read - table rows in disk sweeps. - - Currently it is used by MyISAM and InnoDB. Potentially it can be used with - any table handler that has non-clustered indexes and on-disk rows. -*/ - -class DsMrr_impl -{ -public: - typedef void (handler::*range_check_toggle_func_t)(bool on); - - DsMrr_impl() - : h2(NULL) {}; - - /* - The "owner" handler object (the one that calls dsmrr_XXX functions. - It is used to retrieve full table rows by calling rnd_pos(). - */ - handler *h; - TABLE *table; /* Always equal to h->table */ -private: - /* Secondary handler object. It is used for scanning the index */ - handler *h2; - - /* Buffer to store rowids, or (rowid, range_id) pairs */ - uchar *rowids_buf; - uchar *rowids_buf_cur; /* Current position when reading/writing */ - uchar *rowids_buf_last; /* When reading: end of used buffer space */ - uchar *rowids_buf_end; /* End of the buffer */ - - bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */ - - /* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */ - bool is_mrr_assoc; - - bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */ -public: - void init(handler *h_arg, TABLE *table_arg) - { - h= h_arg; - table= table_arg; - } - int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param, - uint n_ranges, uint mode, HANDLER_BUFFER *buf); - void dsmrr_close(); - int dsmrr_fill_buffer(); - int dsmrr_next(char **range_info); - - ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint *bufsz, - uint *flags, COST_VECT *cost); - - ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, - void *seq_init_param, uint n_ranges, uint *bufsz, - uint *flags, COST_VECT *cost); -private: - bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, - COST_VECT *cost); - bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, - uint *buffer_size, COST_VECT *cost); -}; - === modified file 'sql/handler.h' --- a/sql/handler.h 2009-12-15 17:23:55 +0000 +++ b/sql/handler.h 2009-12-22 12:33:21 +0000 @@ -2314,7 +2314,7 @@ private: friend class DsMrr_impl; }; -#include "ds_mrr.h" +#include "multi_range_read.h" /* Some extern variables used with handlers */ === added file 'sql/multi_range_read.cc' --- a/sql/multi_range_read.cc 1970-01-01 00:00:00 +0000 +++ b/sql/multi_range_read.cc 2009-12-22 12:33:21 +0000 @@ -0,0 +1,944 @@ +#include "mysql_priv.h" +#include "sql_select.h" + +/**************************************************************************** + * Default MRR implementation (MRR to non-MRR converter) + ***************************************************************************/ + +/** + Get cost and other information about MRR scan over a known list of ranges + + Calculate estimated cost and other information about an MRR scan for given + sequence of ranges. + + @param keyno Index number + @param seq Range sequence to be traversed + @param seq_init_param First parameter for seq->init() + @param n_ranges_arg Number of ranges in the sequence, or 0 if the caller + can't efficiently determine it + @param bufsz INOUT IN: Size of the buffer available for use + OUT: Size of the buffer that is expected to be actually + used, or 0 if buffer is not needed. + @param flags INOUT A combination of HA_MRR_* flags + @param cost OUT Estimated cost of MRR access + + @note + This method (or an overriding one in a derived class) must check for + thd->killed and return HA_POS_ERROR if it is not zero. This is required + for a user to be able to interrupt the calculation by killing the + connection/query. + + @retval + HA_POS_ERROR Error or the engine is unable to perform the requested + scan. Values of OUT parameters are undefined. + @retval + other OK, *cost contains cost of the scan, *bufsz and *flags + contain scan parameters. +*/ + +ha_rows +handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, uint n_ranges_arg, + uint *bufsz, uint *flags, COST_VECT *cost) +{ + KEY_MULTI_RANGE range; + range_seq_t seq_it; + ha_rows rows, total_rows= 0; + uint n_ranges=0; + THD *thd= current_thd; + + /* Default MRR implementation doesn't need buffer */ + *bufsz= 0; + + seq_it= seq->init(seq_init_param, n_ranges, *flags); + while (!seq->next(seq_it, &range)) + { + if (unlikely(thd->killed != 0)) + return HA_POS_ERROR; + + n_ranges++; + key_range *min_endp, *max_endp; + if (range.range_flag & GEOM_FLAG) + { + /* In this case tmp_min_flag contains the handler-read-function */ + range.start_key.flag= (ha_rkey_function) (range.range_flag ^ GEOM_FLAG); + min_endp= &range.start_key; + max_endp= NULL; + } + else + { + min_endp= range.start_key.length? &range.start_key : NULL; + max_endp= range.end_key.length? &range.end_key : NULL; + } + if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE)) + rows= 1; /* there can be at most one row */ + else + { + if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, + max_endp))) + { + /* Can't scan one range => can't do MRR scan at all */ + total_rows= HA_POS_ERROR; + break; + } + } + total_rows += rows; + } + + if (total_rows != HA_POS_ERROR) + { + /* The following calculation is the same as in multi_range_read_info(): */ + *flags |= HA_MRR_USE_DEFAULT_IMPL; + cost->zero(); + cost->avg_io_cost= 1; /* assume random seeks */ + if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2) + cost->io_count= index_only_read_time(keyno, (uint)total_rows); + else + cost->io_count= read_time(keyno, n_ranges, total_rows); + cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01; + } + return total_rows; +} + + +/** + Get cost and other information about MRR scan over some sequence of ranges + + Calculate estimated cost and other information about an MRR scan for some + sequence of ranges. + + The ranges themselves will be known only at execution phase. When this + function is called we only know number of ranges and a (rough) E(#records) + within those ranges. + + Currently this function is only called for "n-keypart singlepoint" ranges, + i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN" + + The flags parameter is a combination of those flags: HA_MRR_SORTED, + HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS. + + @param keyno Index number + @param n_ranges Estimated number of ranges (i.e. intervals) in the + range sequence. + @param n_rows Estimated total number of records contained within all + of the ranges + @param bufsz INOUT IN: Size of the buffer available for use + OUT: Size of the buffer that will be actually used, or + 0 if buffer is not needed. + @param flags INOUT A combination of HA_MRR_* flags + @param cost OUT Estimated cost of MRR access + + @retval + 0 OK, *cost contains cost of the scan, *bufsz and *flags contain scan + parameters. + @retval + other Error or can't perform the requested scan +*/ + +ha_rows handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows, + uint *bufsz, uint *flags, COST_VECT *cost) +{ + *bufsz= 0; /* Default implementation doesn't need a buffer */ + + *flags |= HA_MRR_USE_DEFAULT_IMPL; + + cost->zero(); + cost->avg_io_cost= 1; /* assume random seeks */ + + /* Produce the same cost as non-MRR code does */ + if (*flags & HA_MRR_INDEX_ONLY) + cost->io_count= index_only_read_time(keyno, n_rows); + else + cost->io_count= read_time(keyno, n_ranges, n_rows); + return 0; +} + + +/** + Initialize the MRR scan + + Initialize the MRR scan. This function may do heavyweight scan + initialization like row prefetching/sorting/etc (NOTE: but better not do + it here as we may not need it, e.g. if we never satisfy WHERE clause on + previous tables. For many implementations it would be natural to do such + initializations in the first multi_read_range_next() call) + + mode is a combination of the following flags: HA_MRR_SORTED, + HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION + + @param seq Range sequence to be traversed + @param seq_init_param First parameter for seq->init() + @param n_ranges Number of ranges in the sequence + @param mode Flags, see the description section for the details + @param buf INOUT: memory buffer to be used + + @note + One must have called index_init() before calling this function. Several + multi_range_read_init() calls may be made in course of one query. + + Until WL#2623 is done (see its text, section 3.2), the following will + also hold: + The caller will guarantee that if "seq->init == mrr_ranges_array_init" + then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures. + This property will only be used by NDB handler until WL#2623 is done. + + Buffer memory management is done according to the following scenario: + The caller allocates the buffer and provides it to the callee by filling + the members of HANDLER_BUFFER structure. + The callee consumes all or some fraction of the provided buffer space, and + sets the HANDLER_BUFFER members accordingly. + The callee may use the buffer memory until the next multi_range_read_init() + call is made, all records have been read, or until index_end() call is + made, whichever comes first. + + @retval 0 OK + @retval 1 Error +*/ + +int +handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param, + uint n_ranges, uint mode, HANDLER_BUFFER *buf) +{ + DBUG_ENTER("handler::multi_range_read_init"); + mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode); + mrr_funcs= *seq_funcs; + mrr_is_output_sorted= test(mode & HA_MRR_SORTED); + mrr_have_range= FALSE; + DBUG_RETURN(0); +} + + +/** + Get next record in MRR scan + + Default MRR implementation: read the next record + + @param range_info OUT Undefined if HA_MRR_NO_ASSOCIATION flag is in effect + Otherwise, the opaque value associated with the range + that contains the returned record. + + @retval 0 OK + @retval other Error code +*/ + +int handler::multi_range_read_next(char **range_info) +{ + int UNINIT_VAR(result); + int range_res; + DBUG_ENTER("handler::multi_range_read_next"); + + if (!mrr_have_range) + { + mrr_have_range= TRUE; + goto start; + } + + do + { + /* Save a call if there can be only one row in range. */ + if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE)) + { + result= read_range_next(); + /* On success or non-EOF errors jump to the end. */ + if (result != HA_ERR_END_OF_FILE) + break; + } + else + { + if (was_semi_consistent_read()) + goto scan_it_again; + /* + We need to set this for the last range only, but checking this + condition is more expensive than just setting the result code. + */ + result= HA_ERR_END_OF_FILE; + } + +start: + /* Try the next range(s) until one matches a record. */ + while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range))) + { +scan_it_again: + result= read_range_first(mrr_cur_range.start_key.keypart_map ? + &mrr_cur_range.start_key : 0, + mrr_cur_range.end_key.keypart_map ? + &mrr_cur_range.end_key : 0, + test(mrr_cur_range.range_flag & EQ_RANGE), + mrr_is_output_sorted); + if (result != HA_ERR_END_OF_FILE) + break; + } + } + while ((result == HA_ERR_END_OF_FILE) && !range_res); + + *range_info= mrr_cur_range.ptr; + DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result)); + DBUG_RETURN(result); +} + +/**************************************************************************** + * DS-MRR implementation + ***************************************************************************/ + +/** + DS-MRR: Initialize and start MRR scan + + Initialize and start the MRR scan. Depending on the mode parameter, this + may use default or DS-MRR implementation. + + @param h Table handler to be used + @param key Index to be used + @param seq_funcs Interval sequence enumeration functions + @param seq_init_param Interval sequence enumeration parameter + @param n_ranges Number of ranges in the sequence. + @param mode HA_MRR_* modes to use + @param buf INOUT Buffer to use + + @retval 0 Ok, Scan started. + @retval other Error +*/ + +int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, + void *seq_init_param, uint n_ranges, uint mode, + HANDLER_BUFFER *buf) +{ + uint elem_size; + Item *pushed_cond= NULL; + handler *new_h2= 0; + DBUG_ENTER("DsMrr_impl::dsmrr_init"); + + /* + index_merge may invoke a scan on an object for which dsmrr_info[_const] + has not been called, so set the owner handler here as well. + */ + h= h_arg; + if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED) + { + use_default_impl= TRUE; + const int retval= + h->handler::multi_range_read_init(seq_funcs, seq_init_param, + n_ranges, mode, buf); + DBUG_RETURN(retval); + } + rowids_buf= buf->buffer; + + is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION); + + if (is_mrr_assoc) + status_var_increment(table->in_use->status_var.ha_multi_range_read_init_count); + + rowids_buf_end= buf->buffer_end; + elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); + rowids_buf_last= rowids_buf + + ((rowids_buf_end - rowids_buf)/ elem_size)* + elem_size; + rowids_buf_end= rowids_buf_last; + + /* + There can be two cases: + - This is the first call since index_init(), h2==NULL + Need to setup h2 then. + - This is not the first call, h2 is initalized and set up appropriately. + The caller might have called h->index_init(), need to switch h to + rnd_pos calls. + */ + if (!h2) + { + /* Create a separate handler object to do rndpos() calls. */ + THD *thd= current_thd; + /* + ::clone() takes up a lot of stack, especially on 64 bit platforms. + The constant 5 is an empiric result. + */ + if (check_stack_overrun(thd, 5*STACK_MIN_SIZE, (uchar*) &new_h2)) + DBUG_RETURN(1); + DBUG_ASSERT(h->active_index != MAX_KEY); + uint mrr_keyno= h->active_index; + + /* Create a separate handler object to do rndpos() calls. */ + if (!(new_h2= h->clone(thd->mem_root)) || + new_h2->ha_external_lock(thd, F_RDLCK)) + { + delete new_h2; + DBUG_RETURN(1); + } + + if (mrr_keyno == h->pushed_idx_cond_keyno) + pushed_cond= h->pushed_idx_cond; + + /* + Caution: this call will invoke this->dsmrr_close(). Do not put the + created secondary table handler into this->h2 or it will delete it. + */ + if (h->ha_index_end()) + { + h2=new_h2; + goto error; + } + + h2= new_h2; /* Ok, now can put it into h2 */ + table->prepare_for_position(); + h2->extra(HA_EXTRA_KEYREAD); + + if (h2->ha_index_init(mrr_keyno, FALSE)) + goto error; + + use_default_impl= FALSE; + if (pushed_cond) + h2->idx_cond_push(mrr_keyno, pushed_cond); + } + else + { + /* + We get here when the access alternates betwen MRR scan(s) and non-MRR + scans. + + Calling h->index_end() will invoke dsmrr_close() for this object, + which will delete h2. We need to keep it, so save put it away and dont + let it be deleted: + */ + handler *save_h2= h2; + h2= NULL; + int res= (h->inited == handler::INDEX && h->ha_index_end()); + h2= save_h2; + use_default_impl= FALSE; + if (res) + goto error; + } + + if (h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges, + mode, buf) || + dsmrr_fill_buffer()) + { + goto error; + } + /* + If the above call has scanned through all intervals in *seq, then + adjust *buf to indicate that the remaining buffer space will not be used. + */ + if (dsmrr_eof) + buf->end_of_used_area= rowids_buf_last; + + /* + h->inited == INDEX may occur when 'range checked for each record' is + used. + */ + if ((h->inited != handler::RND) && + ((h->inited==handler::INDEX? h->ha_index_end(): FALSE) || + (h->ha_rnd_init(FALSE)))) + goto error; + + use_default_impl= FALSE; + h->mrr_funcs= *seq_funcs; + + DBUG_RETURN(0); +error: + h2->ha_index_or_rnd_end(); + h2->ha_external_lock(current_thd, F_UNLCK); + h2->close(); + delete h2; + h2= NULL; + DBUG_RETURN(1); +} + + +void DsMrr_impl::dsmrr_close() +{ + DBUG_ENTER("DsMrr_impl::dsmrr_close"); + if (h2) + { + h2->ha_index_or_rnd_end(); + h2->ha_external_lock(current_thd, F_UNLCK); + h2->close(); + delete h2; + h2= NULL; + } + use_default_impl= TRUE; + DBUG_VOID_RETURN; +} + + +static int rowid_cmp(void *h, uchar *a, uchar *b) +{ + return ((handler*)h)->cmp_ref(a, b); +} + + +/** + DS-MRR: Fill the buffer with rowids and sort it by rowid + + {This is an internal function of DiskSweep MRR implementation} + Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into + buffer. When the buffer is full or scan is completed, sort the buffer by + rowid and return. + + The function assumes that rowids buffer is empty when it is invoked. + + @param h Table handler + + @retval 0 OK, the next portion of rowids is in the buffer, + properly ordered + @retval other Error +*/ + +int DsMrr_impl::dsmrr_fill_buffer() +{ + char *range_info; + int res; + DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer"); + + rowids_buf_cur= rowids_buf; + while ((rowids_buf_cur < rowids_buf_end) && + !(res= h2->handler::multi_range_read_next(&range_info))) + { + KEY_MULTI_RANGE *curr_range= &h2->handler::mrr_cur_range; + if (h2->mrr_funcs.skip_index_tuple && + h2->mrr_funcs.skip_index_tuple(h2->mrr_iter, curr_range->ptr)) + continue; + + /* Put rowid, or {rowid, range_id} pair into the buffer */ + h2->position(table->record[0]); + memcpy(rowids_buf_cur, h2->ref, h2->ref_length); + rowids_buf_cur += h2->ref_length; + + if (is_mrr_assoc) + { + memcpy(rowids_buf_cur, &range_info, sizeof(void*)); + rowids_buf_cur += sizeof(void*); + } + } + + if (res && res != HA_ERR_END_OF_FILE) + DBUG_RETURN(res); + dsmrr_eof= test(res == HA_ERR_END_OF_FILE); + + /* Sort the buffer contents by rowid */ + uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*); + uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size; + + my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp, + (void*)h); + rowids_buf_last= rowids_buf_cur; + rowids_buf_cur= rowids_buf; + DBUG_RETURN(0); +} + + +/** + DS-MRR implementation: multi_range_read_next() function +*/ + +int DsMrr_impl::dsmrr_next(char **range_info) +{ + int res; + uchar *cur_range_info= 0; + uchar *rowid; + + if (use_default_impl) + return h->handler::multi_range_read_next(range_info); + + do + { + if (rowids_buf_cur == rowids_buf_last) + { + if (dsmrr_eof) + { + res= HA_ERR_END_OF_FILE; + goto end; + } + res= dsmrr_fill_buffer(); + if (res) + goto end; + } + + /* return eof if there are no rowids in the buffer after re-fill attempt */ + if (rowids_buf_cur == rowids_buf_last) + { + res= HA_ERR_END_OF_FILE; + goto end; + } + rowid= rowids_buf_cur; + + if (is_mrr_assoc) + memcpy(&cur_range_info, rowids_buf_cur + h->ref_length, sizeof(uchar**)); + + rowids_buf_cur += h->ref_length + sizeof(void*) * test(is_mrr_assoc); + if (h2->mrr_funcs.skip_record && + h2->mrr_funcs.skip_record(h2->mrr_iter, (char *) cur_range_info, rowid)) + continue; + res= h->rnd_pos(table->record[0], rowid); + break; + } while (true); + + if (is_mrr_assoc) + { + memcpy(range_info, rowid + h->ref_length, sizeof(void*)); + } +end: + return res; +} + + +/** + DS-MRR implementation: multi_range_read_info() function +*/ +ha_rows DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, + uint *bufsz, uint *flags, COST_VECT *cost) +{ + ha_rows res; + uint def_flags= *flags; + uint def_bufsz= *bufsz; + + /* Get cost/flags/mem_usage of default MRR implementation */ + res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz, + &def_flags, cost); + DBUG_ASSERT(!res); + + if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || + choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost)) + { + /* Default implementation is choosen */ + DBUG_PRINT("info", ("Default MRR implementation choosen")); + *flags= def_flags; + *bufsz= def_bufsz; + } + else + { + /* *flags and *bufsz were set by choose_mrr_impl */ + DBUG_PRINT("info", ("DS-MRR implementation choosen")); + } + return 0; +} + + +/** + DS-MRR Implementation: multi_range_read_info_const() function +*/ + +ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, uint n_ranges, + uint *bufsz, uint *flags, COST_VECT *cost) +{ + ha_rows rows; + uint def_flags= *flags; + uint def_bufsz= *bufsz; + /* Get cost/flags/mem_usage of default MRR implementation */ + rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param, + n_ranges, &def_bufsz, + &def_flags, cost); + if (rows == HA_POS_ERROR) + { + /* Default implementation can't perform MRR scan => we can't either */ + return rows; + } + + /* + If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to + use the default MRR implementation (we need it for UPDATE/DELETE). + Otherwise, make a choice based on cost and @@optimizer_use_mrr. + */ + if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || + choose_mrr_impl(keyno, rows, flags, bufsz, cost)) + { + DBUG_PRINT("info", ("Default MRR implementation choosen")); + *flags= def_flags; + *bufsz= def_bufsz; + } + else + { + /* *flags and *bufsz were set by choose_mrr_impl */ + DBUG_PRINT("info", ("DS-MRR implementation choosen")); + } + return rows; +} + + +/** + Check if key has partially-covered columns + + We can't use DS-MRR to perform range scans when the ranges are over + partially-covered keys, because we'll not have full key part values + (we'll have their prefixes from the index) and will not be able to check + if we've reached the end the range. + + @param keyno Key to check + + @todo + Allow use of DS-MRR in cases where the index has partially-covered + components but they are not used for scanning. + + @retval TRUE Yes + @retval FALSE No +*/ + +bool key_uses_partial_cols(TABLE *table, uint keyno) +{ + KEY_PART_INFO *kp= table->key_info[keyno].key_part; + KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts; + for (; kp != kp_end; kp++) + { + if (!kp->field->part_of_key.is_set(keyno)) + return TRUE; + } + return FALSE; +} + +/** + DS-MRR Internals: Choose between Default MRR implementation and DS-MRR + + Make the choice between using Default MRR implementation and DS-MRR. + This function contains common functionality factored out of dsmrr_info() + and dsmrr_info_const(). The function assumes that the default MRR + implementation's applicability requirements are satisfied. + + @param keyno Index number + @param rows E(full rows to be retrieved) + @param flags IN MRR flags provided by the MRR user + OUT If DS-MRR is choosen, flags of DS-MRR implementation + else the value is not modified + @param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation + else the value is not modified + @param cost IN Cost of default MRR implementation + OUT If DS-MRR is choosen, cost of DS-MRR scan + else the value is not modified + + @retval TRUE Default MRR implementation should be used + @retval FALSE DS-MRR implementation should be used +*/ + +bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, + uint *bufsz, COST_VECT *cost) +{ + COST_VECT dsmrr_cost; + bool res; + THD *thd= current_thd; + if (thd->variables.optimizer_use_mrr == 2 || *flags & HA_MRR_INDEX_ONLY || + (keyno == table->s->primary_key && h->primary_key_is_clustered()) || + key_uses_partial_cols(table, keyno)) + { + /* Use the default implementation */ + *flags |= HA_MRR_USE_DEFAULT_IMPL; + return TRUE; + } + + uint add_len= table->key_info[keyno].key_length + h->ref_length; + *bufsz -= add_len; + if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost)) + return TRUE; + *bufsz += add_len; + + bool force_dsmrr; + /* + If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of + DS-MRR and Default implementations cost. This allows one to force use of + DS-MRR whenever it is applicable without affecting other cost-based + choices. + */ + if ((force_dsmrr= (thd->variables.optimizer_use_mrr == 1)) && + dsmrr_cost.total_cost() > cost->total_cost()) + dsmrr_cost= *cost; + + if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost()) + { + *flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */ + *flags &= ~HA_MRR_SORTED; /* We will return unordered output */ + *cost= dsmrr_cost; + res= FALSE; + } + else + { + /* Use the default MRR implementation */ + res= TRUE; + } + return res; +} + + +static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost); + + +/** + Get cost of DS-MRR scan + + @param keynr Index to be used + @param rows E(Number of rows to be scanned) + @param flags Scan parameters (HA_MRR_* flags) + @param buffer_size INOUT Buffer size + @param cost OUT The cost + + @retval FALSE OK + @retval TRUE Error, DS-MRR cannot be used (the buffer is too small + for even 1 rowid) +*/ + +bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, + uint *buffer_size, COST_VECT *cost) +{ + ulong max_buff_entries, elem_size; + ha_rows rows_in_full_step, rows_in_last_step; + uint n_full_steps; + double index_read_cost; + + elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION)); + max_buff_entries = *buffer_size / elem_size; + + if (!max_buff_entries) + return TRUE; /* Buffer has not enough space for even 1 rowid */ + + /* Number of iterations we'll make with full buffer */ + n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries); + + /* + Get numbers of rows we'll be processing in + - non-last sweep, with full buffer + - last iteration, with non-full buffer + */ + rows_in_full_step= max_buff_entries; + rows_in_last_step= rows % max_buff_entries; + + /* Adjust buffer size if we expect to use only part of the buffer */ + if (n_full_steps) + { + get_sort_and_sweep_cost(table, rows, cost); + cost->multiply(n_full_steps); + } + else + { + cost->zero(); + *buffer_size= max(*buffer_size, + (size_t)(1.2*rows_in_last_step) * elem_size + + h->ref_length + table->key_info[keynr].key_length); + } + + COST_VECT last_step_cost; + get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost); + cost->add(&last_step_cost); + + if (n_full_steps != 0) + cost->mem_cost= *buffer_size; + else + cost->mem_cost= (double)rows_in_last_step * elem_size; + + /* Total cost of all index accesses */ + index_read_cost= h->index_only_read_time(keynr, (double)rows); + cost->add_io(index_read_cost, 1 /* Random seeks */); + return FALSE; +} + + +/* + Get cost of one sort-and-sweep step + + SYNOPSIS + get_sort_and_sweep_cost() + table Table being accessed + nrows Number of rows to be sorted and retrieved + cost OUT The cost + + DESCRIPTION + Get cost of these operations: + - sort an array of #nrows ROWIDs using qsort + - read #nrows records from table in a sweep. +*/ + +static +void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost) +{ + if (nrows) + { + get_sweep_read_cost(table, nrows, FALSE, cost); + /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */ + double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID); + if (cmp_op < 3) + cmp_op= 3; + cost->cpu_cost += cmp_op * log2(cmp_op); + } + else + cost->zero(); +} + + +/** + Get cost of reading nrows table records in a "disk sweep" + + A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made + for an ordered sequence of rowids. + + We assume hard disk IO. The read is performed as follows: + + 1. The disk head is moved to the needed cylinder + 2. The controller waits for the plate to rotate + 3. The data is transferred + + Time to do #3 is insignificant compared to #2+#1. + + Time to move the disk head is proportional to head travel distance. + + Time to wait for the plate to rotate depends on whether the disk head + was moved or not. + + If disk head wasn't moved, the wait time is proportional to distance + between the previous block and the block we're reading. + + If the head was moved, we don't know how much we'll need to wait for the + plate to rotate. We assume the wait time to be a variate with a mean of + 0.5 of full rotation time. + + Our cost units are "random disk seeks". The cost of random disk seek is + actually not a constant, it depends one range of cylinders we're going + to access. We make it constant by introducing a fuzzy concept of "typical + datafile length" (it's fuzzy as it's hard to tell whether it should + include index file, temp.tables etc). Then random seek cost is: + + 1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length + + We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9. + + @param table Table to be accessed + @param nrows Number of rows to retrieve + @param interrupted TRUE <=> Assume that the disk sweep will be + interrupted by other disk IO. FALSE - otherwise. + @param cost OUT The cost. +*/ + +void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, + COST_VECT *cost) +{ + DBUG_ENTER("get_sweep_read_cost"); + + cost->zero(); + if (table->file->primary_key_is_clustered()) + { + cost->io_count= table->file->read_time(table->s->primary_key, + (uint) nrows, nrows); + } + else + { + double n_blocks= + ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE); + double busy_blocks= + n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows))); + if (busy_blocks < 1.0) + busy_blocks= 1.0; + + DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks, + busy_blocks)); + cost->io_count= busy_blocks; + + if (!interrupted) + { + /* Assume reading is done in one 'sweep' */ + cost->avg_io_cost= (DISK_SEEK_BASE_COST + + DISK_SEEK_PROP_COST*n_blocks/busy_blocks); + } + } + DBUG_PRINT("info",("returning cost=%g", cost->total_cost())); + DBUG_VOID_RETURN; +} + + +/* ************************************************************************** + * DS-MRR implementation ends + ***************************************************************************/ + + === added file 'sql/multi_range_read.h' --- a/sql/multi_range_read.h 1970-01-01 00:00:00 +0000 +++ b/sql/multi_range_read.h 2009-12-22 12:33:21 +0000 @@ -0,0 +1,70 @@ +/* + This file contains declarations for + - Disk-Sweep MultiRangeRead (DS-MRR) implementation +*/ + +/** + A Disk-Sweep MRR interface implementation + + This implementation makes range (and, in the future, 'ref') scans to read + table rows in disk sweeps. + + Currently it is used by MyISAM and InnoDB. Potentially it can be used with + any table handler that has non-clustered indexes and on-disk rows. +*/ + +class DsMrr_impl +{ +public: + typedef void (handler::*range_check_toggle_func_t)(bool on); + + DsMrr_impl() + : h2(NULL) {}; + + /* + The "owner" handler object (the one that calls dsmrr_XXX functions. + It is used to retrieve full table rows by calling rnd_pos(). + */ + handler *h; + TABLE *table; /* Always equal to h->table */ +private: + /* Secondary handler object. It is used for scanning the index */ + handler *h2; + + /* Buffer to store rowids, or (rowid, range_id) pairs */ + uchar *rowids_buf; + uchar *rowids_buf_cur; /* Current position when reading/writing */ + uchar *rowids_buf_last; /* When reading: end of used buffer space */ + uchar *rowids_buf_end; /* End of the buffer */ + + bool dsmrr_eof; /* TRUE <=> We have reached EOF when reading index tuples */ + + /* TRUE <=> need range association, buffer holds {rowid, range_id} pairs */ + bool is_mrr_assoc; + + bool use_default_impl; /* TRUE <=> shortcut all calls to default MRR impl */ +public: + void init(handler *h_arg, TABLE *table_arg) + { + h= h_arg; + table= table_arg; + } + int dsmrr_init(handler *h, RANGE_SEQ_IF *seq_funcs, void *seq_init_param, + uint n_ranges, uint mode, HANDLER_BUFFER *buf); + void dsmrr_close(); + int dsmrr_fill_buffer(); + int dsmrr_next(char **range_info); + + ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint *bufsz, + uint *flags, COST_VECT *cost); + + ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, uint n_ranges, uint *bufsz, + uint *flags, COST_VECT *cost); +private: + bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, + COST_VECT *cost); + bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, + uint *buffer_size, COST_VECT *cost); +}; + === modified file 'sql/mysql_priv.h' --- a/sql/mysql_priv.h 2009-12-15 07:16:46 +0000 +++ b/sql/mysql_priv.h 2009-12-22 12:33:21 +0000 @@ -540,12 +540,13 @@ protected: #define OPTIMIZER_SWITCH_INDEX_MERGE_UNION 2 #define OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION 4 #define OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT 8 +#define OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN 16 #ifdef DBUG_OFF -# define OPTIMIZER_SWITCH_LAST 16 -#else -# define OPTIMIZER_SWITCH_TABLE_ELIMINATION 16 # define OPTIMIZER_SWITCH_LAST 32 +#else +# define OPTIMIZER_SWITCH_TABLE_ELIMINATION 32 +# define OPTIMIZER_SWITCH_LAST 64 #endif #ifdef DBUG_OFF @@ -553,12 +554,14 @@ protected: # define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \ OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \ OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \ - OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT) + OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT | \ + OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN) #else # define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \ OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \ OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION | \ OPTIMIZER_SWITCH_INDEX_MERGE_INTERSECT | \ + OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN | \ OPTIMIZER_SWITCH_TABLE_ELIMINATION) #endif === modified file 'sql/mysqld.cc' --- a/sql/mysqld.cc 2009-12-21 02:26:15 +0000 +++ b/sql/mysqld.cc 2009-12-22 12:49:15 +0000 @@ -300,6 +300,7 @@ static const char *optimizer_switch_name { "index_merge","index_merge_union","index_merge_sort_union", "index_merge_intersection", + "index_condition_pushdown", #ifndef DBUG_OFF "table_elimination", #endif @@ -313,6 +314,7 @@ static const unsigned int optimizer_swit sizeof("index_merge_union") - 1, sizeof("index_merge_sort_union") - 1, sizeof("index_merge_intersection") - 1, + sizeof("index_condition_pushdown") - 1, #ifndef DBUG_OFF sizeof("table_elimination") - 1, #endif @@ -391,7 +393,8 @@ static const char *sql_mode_str= "OFF"; /* Text representation for OPTIMIZER_SWITCH_DEFAULT */ static const char *optimizer_switch_str="index_merge=on,index_merge_union=on," "index_merge_sort_union=on," - "index_merge_intersection=on" + "index_merge_intersection=on," + "index_condition_pushdown=on" #ifndef DBUG_OFF ",table_elimination=on"; #else @@ -5767,7 +5770,7 @@ enum options_mysqld OPT_MAX_SEEKS_FOR_KEY, OPT_MAX_TMP_TABLES, OPT_MAX_USER_CONNECTIONS, OPT_MAX_LENGTH_FOR_SORT_DATA, OPT_MAX_WRITE_LOCK_COUNT, OPT_BULK_INSERT_BUFFER_SIZE, - OPT_MAX_ERROR_COUNT, OPT_MULTI_RANGE_COUNT, OPT_MYISAM_DATA_POINTER_SIZE, + OPT_MAX_ERROR_COUNT, OPT_MRR_BUFFER_SIZE, OPT_MYISAM_DATA_POINTER_SIZE, OPT_MYISAM_BLOCK_SIZE, OPT_MYISAM_MAX_EXTRA_SORT_FILE_SIZE, OPT_MYISAM_MAX_SORT_FILE_SIZE, OPT_MYISAM_SORT_BUFFER_SIZE, @@ -6968,6 +6971,12 @@ The minimum value for this variable is 4 (uchar**) &global_system_variables.min_examined_row_limit, (uchar**) &max_system_variables.min_examined_row_limit, 0, GET_ULONG, REQUIRED_ARG, 0, 0, (longlong) ULONG_MAX, 0, 1L, 0}, + {"mrr_buffer_size", OPT_MRR_BUFFER_SIZE, + "Size of buffer to use when using MRR with range access", + (uchar**) &global_system_variables.mrr_buff_size, + (uchar**) &max_system_variables.mrr_buff_size, 0, + GET_ULONG, REQUIRED_ARG, 256*1024L, IO_SIZE*2+MALLOC_OVERHEAD, + INT_MAX32, MALLOC_OVERHEAD, 1 /* Small to be able to do tests */ , 0}, {"myisam_block_size", OPT_MYISAM_BLOCK_SIZE, "Block size to be used for MyISAM index pages.", (uchar**) &opt_myisam_block_size, @@ -7047,7 +7056,8 @@ The minimum value for this variable is 4 0, GET_ULONG, OPT_ARG, MAX_TABLES+1, 0, MAX_TABLES+2, 0, 1, 0}, {"optimizer_switch", OPT_OPTIMIZER_SWITCH, "optimizer_switch=option=val[,option=val...], where option={index_merge, " - "index_merge_union, index_merge_sort_union, index_merge_intersection" + "index_merge_union, index_merge_sort_union, index_merge_intersection, " + "index_condition_pushdown" #ifndef DBUG_OFF ", table_elimination" #endif @@ -7131,7 +7141,7 @@ The minimum value for this variable is 4 (uchar**) &global_system_variables.read_rnd_buff_size, (uchar**) &max_system_variables.read_rnd_buff_size, 0, GET_ULONG, REQUIRED_ARG, 256*1024L, IO_SIZE*2+MALLOC_OVERHEAD, - INT_MAX32, MALLOC_OVERHEAD, 1 /* Small overhead to be able to test MRR, was: IO_SIZE*/ , 0}, + INT_MAX32, MALLOC_OVERHEAD, IO_SIZE, 0}, {"record_buffer", OPT_RECORD_BUFFER, "Alias for read_buffer_size", (uchar**) &global_system_variables.read_buff_size, === added file 'sql/opt_index_cond_pushdown.cc' --- a/sql/opt_index_cond_pushdown.cc 1970-01-01 00:00:00 +0000 +++ b/sql/opt_index_cond_pushdown.cc 2009-12-22 12:49:15 +0000 @@ -0,0 +1,387 @@ +#include "mysql_priv.h" +#include "sql_select.h" + +/**************************************************************************** + * Index Condition Pushdown code starts + ***************************************************************************/ +/* + Check if given expression uses only table fields covered by the given index + + SYNOPSIS + uses_index_fields_only() + item Expression to check + tbl The table having the index + keyno The index number + other_tbls_ok TRUE <=> Fields of other non-const tables are allowed + + DESCRIPTION + Check if given expression only uses fields covered by index #keyno in the + table tbl. The expression can use any fields in any other tables. + + The expression is guaranteed not to be AND or OR - those constructs are + handled outside of this function. + + RETURN + TRUE Yes + FALSE No +*/ + +bool uses_index_fields_only(Item *item, TABLE *tbl, uint keyno, + bool other_tbls_ok) +{ + if (item->const_item()) + return TRUE; + + /* + Don't push down the triggered conditions. Nested outer joins execution + code may need to evaluate a condition several times (both triggered and + untriggered), and there is no way to put thi + TODO: Consider cloning the triggered condition and using the copies for: + 1. push the first copy down, to have most restrictive index condition + possible + 2. Put the second copy into tab->select_cond. + */ + if (item->type() == Item::FUNC_ITEM && + ((Item_func*)item)->functype() == Item_func::TRIG_COND_FUNC) + return FALSE; + + if (!(item->used_tables() & tbl->map)) + return other_tbls_ok; + + Item::Type item_type= item->type(); + switch (item_type) { + case Item::FUNC_ITEM: + { + /* This is a function, apply condition recursively to arguments */ + Item_func *item_func= (Item_func*)item; + Item **child; + Item **item_end= (item_func->arguments()) + item_func->argument_count(); + for (child= item_func->arguments(); child != item_end; child++) + { + if (!uses_index_fields_only(*child, tbl, keyno, other_tbls_ok)) + return FALSE; + } + return TRUE; + } + case Item::COND_ITEM: + { + /* + This is a AND/OR condition. Regular AND/OR clauses are handled by + make_cond_for_index() which will chop off the part that can be + checked with index. This code is for handling non-top-level AND/ORs, + e.g. func(x AND y). + */ + List_iterator<Item> li(*((Item_cond*)item)->argument_list()); + Item *item; + while ((item=li++)) + { + if (!uses_index_fields_only(item, tbl, keyno, other_tbls_ok)) + return FALSE; + } + return TRUE; + } + case Item::FIELD_ITEM: + { + Item_field *item_field= (Item_field*)item; + if (item_field->field->table != tbl) + return TRUE; + /* + The below is probably a repetition - the first part checks the + other two, but let's play it safe: + */ + return item_field->field->part_of_key.is_set(keyno) && + item_field->field->type() != MYSQL_TYPE_GEOMETRY && + item_field->field->type() != MYSQL_TYPE_BLOB; + } + case Item::REF_ITEM: + return uses_index_fields_only(item->real_item(), tbl, keyno, + other_tbls_ok); + default: + return FALSE; /* Play it safe, don't push unknown non-const items */ + } +} + +#define ICP_COND_USES_INDEX_ONLY 10 + +/* + Get a part of the condition that can be checked using only index fields + + SYNOPSIS + make_cond_for_index() + cond The source condition + table The table that is partially available + keyno The index in the above table. Only fields covered by the index + are available + other_tbls_ok TRUE <=> Fields of other non-const tables are allowed + + DESCRIPTION + Get a part of the condition that can be checked when for the given table + we have values only of fields covered by some index. The condition may + refer to other tables, it is assumed that we have values of all of their + fields. + + Example: + make_cond_for_index( + "cond(t1.field) AND cond(t2.key1) AND cond(t2.non_key) AND cond(t2.key2)", + t2, keyno(t2.key1)) + will return + "cond(t1.field) AND cond(t2.key2)" + + RETURN + Index condition, or NULL if no condition could be inferred. +*/ + +Item *make_cond_for_index(Item *cond, TABLE *table, uint keyno, + bool other_tbls_ok) +{ + if (!cond) + return NULL; + if (cond->type() == Item::COND_ITEM) + { + uint n_marked= 0; + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + table_map used_tables= 0; + Item_cond_and *new_cond=new Item_cond_and; + if (!new_cond) + return (COND*) 0; + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok); + if (fix) + { + new_cond->argument_list()->push_back(fix); + used_tables|= fix->used_tables(); + } + n_marked += test(item->marker == ICP_COND_USES_INDEX_ONLY); + } + if (n_marked ==((Item_cond*)cond)->argument_list()->elements) + cond->marker= ICP_COND_USES_INDEX_ONLY; + switch (new_cond->argument_list()->elements) { + case 0: + return (COND*) 0; + case 1: + new_cond->used_tables_cache= used_tables; + return new_cond->argument_list()->head(); + default: + new_cond->quick_fix_field(); + new_cond->used_tables_cache= used_tables; + return new_cond; + } + } + else /* It's OR */ + { + Item_cond_or *new_cond=new Item_cond_or; + if (!new_cond) + return (COND*) 0; + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix= make_cond_for_index(item, table, keyno, other_tbls_ok); + if (!fix) + return (COND*) 0; + new_cond->argument_list()->push_back(fix); + n_marked += test(item->marker == ICP_COND_USES_INDEX_ONLY); + } + if (n_marked ==((Item_cond*)cond)->argument_list()->elements) + cond->marker= ICP_COND_USES_INDEX_ONLY; + new_cond->quick_fix_field(); + new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache; + new_cond->top_level_item(); + return new_cond; + } + } + + if (!uses_index_fields_only(cond, table, keyno, other_tbls_ok)) + return (COND*) 0; + cond->marker= ICP_COND_USES_INDEX_ONLY; + return cond; +} + + +Item *make_cond_remainder(Item *cond, bool exclude_index) +{ + if (exclude_index && cond->marker == ICP_COND_USES_INDEX_ONLY) + return 0; /* Already checked */ + + if (cond->type() == Item::COND_ITEM) + { + table_map tbl_map= 0; + if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC) + { + /* Create new top level AND item */ + Item_cond_and *new_cond=new Item_cond_and; + if (!new_cond) + return (COND*) 0; + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix= make_cond_remainder(item, exclude_index); + if (fix) + { + new_cond->argument_list()->push_back(fix); + tbl_map |= fix->used_tables(); + } + } + switch (new_cond->argument_list()->elements) { + case 0: + return (COND*) 0; + case 1: + return new_cond->argument_list()->head(); + default: + new_cond->quick_fix_field(); + ((Item_cond*)new_cond)->used_tables_cache= tbl_map; + return new_cond; + } + } + else /* It's OR */ + { + Item_cond_or *new_cond=new Item_cond_or; + if (!new_cond) + return (COND*) 0; + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item=li++)) + { + Item *fix= make_cond_remainder(item, FALSE); + if (!fix) + return (COND*) 0; + new_cond->argument_list()->push_back(fix); + tbl_map |= fix->used_tables(); + } + new_cond->quick_fix_field(); + ((Item_cond*)new_cond)->used_tables_cache= tbl_map; + new_cond->top_level_item(); + return new_cond; + } + } + return cond; +} + + +/* + Try to extract and push the index condition + + SYNOPSIS + push_index_cond() + tab A join tab that has tab->table->file and its condition + in tab->select_cond + keyno Index for which extract and push the condition + other_tbls_ok TRUE <=> Fields of other non-const tables are allowed + + DESCRIPTION + Try to extract and push the index condition down to table handler +*/ + +void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok) +{ + DBUG_ENTER("push_index_cond"); + Item *idx_cond; + bool do_index_cond_pushdown= + ((tab->table->file->index_flags(keyno, 0, 1) & + HA_DO_INDEX_COND_PUSHDOWN) && + optimizer_flag(tab->join->thd, OPTIMIZER_SWITCH_INDEX_COND_PUSHDOWN)); + + /* + Do not try index condition pushdown on indexes which have partially-covered + columns. Unpacking from a column prefix into index tuple is not a supported + operation in some engines, see e.g. MySQL BUG#42991. + TODO: a better solution would be not to consider partially-covered columns + as parts of the index and still produce/check index condition for + fully-covered index columns. + */ + KEY *key_info= tab->table->key_info + keyno; + for (uint kp= 0; kp < key_info->key_parts; kp++) + { + if ((key_info->key_part[kp].key_part_flag & HA_PART_KEY_SEG)) + { + do_index_cond_pushdown= FALSE; + break; + } + } + + if (do_index_cond_pushdown) + { + DBUG_EXECUTE("where", + print_where(tab->select_cond, "full cond", QT_ORDINARY);); + + idx_cond= make_cond_for_index(tab->select_cond, tab->table, keyno, + other_tbls_ok); + + DBUG_EXECUTE("where", + print_where(idx_cond, "idx cond", QT_ORDINARY);); + + if (idx_cond) + { + Item *idx_remainder_cond= 0; + tab->pre_idx_push_select_cond= tab->select_cond; + /* + For BKA cache we store condition to special BKA cache field + because evaluation of the condition requires additional operations + before the evaluation. This condition is used in + JOIN_CACHE_BKA[_UNIQUE]::skip_index_tuple() functions. + */ + if (tab->use_join_cache && + /* + if cache is used then the value is TRUE only + for BKA[_UNIQUE] cache (see check_join_cache_usage func). + In this case other_tbls_ok is an equivalent of + cache->is_key_access(). + */ + other_tbls_ok && + (idx_cond->used_tables() & + ~(tab->table->map | tab->join->const_table_map))) + tab->cache_idx_cond= idx_cond; + else + idx_remainder_cond= tab->table->file->idx_cond_push(keyno, idx_cond); + + /* + Disable eq_ref's "lookup cache" if we've pushed down an index + condition. + TODO: This check happens to work on current ICP implementations, but + there may exist a compliant implementation that will not work + correctly with it. Sort this out when we stabilize the condition + pushdown APIs. + */ + if (idx_remainder_cond != idx_cond) + tab->ref.disable_cache= TRUE; + + Item *row_cond= make_cond_remainder(tab->select_cond, TRUE); + + DBUG_EXECUTE("where", + print_where(row_cond, "remainder cond", QT_ORDINARY);); + + if (row_cond) + { + if (!idx_remainder_cond) + tab->select_cond= row_cond; + else + { + COND *new_cond= new Item_cond_and(row_cond, idx_remainder_cond); + tab->select_cond= new_cond; + tab->select_cond->quick_fix_field(); + ((Item_cond_and*)tab->select_cond)->used_tables_cache= + row_cond->used_tables() | idx_remainder_cond->used_tables(); + } + } + else + tab->select_cond= idx_remainder_cond; + if (tab->select) + { + DBUG_EXECUTE("where", + print_where(tab->select->cond, + "select_cond", + QT_ORDINARY);); + + tab->select->cond= tab->select_cond; + } + } + } + DBUG_VOID_RETURN; +} + + === modified file 'sql/opt_range.cc' --- a/sql/opt_range.cc 2009-12-15 07:16:46 +0000 +++ b/sql/opt_range.cc 2009-12-22 12:33:21 +0000 @@ -720,6 +720,7 @@ public: uint8 first_null_comp; /* first null component if any, 0 - otherwise */ }; + class TABLE_READ_PLAN; class TRP_RANGE; class TRP_ROR_INTERSECT; @@ -789,7 +790,9 @@ static SEL_ARG null_element(SEL_ARG::IMP static bool null_part_in_key(KEY_PART *key_part, const uchar *key, uint length); bool sel_trees_can_be_ored(SEL_TREE *tree1, SEL_TREE *tree2, RANGE_OPT_PARAM* param); +static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts); +#include "opt_range_mrr.cc" /* SEL_IMERGE is a list of possible ways to do index merge, i.e. it is @@ -1165,7 +1168,7 @@ QUICK_RANGE_SELECT::QUICK_RANGE_SELECT(T my_init_dynamic_array(&ranges, sizeof(QUICK_RANGE*), 16, 16); /* 'thd' is not accessible in QUICK_RANGE_SELECT::reset(). */ - mrr_buf_size= thd->variables.read_rnd_buff_size; + mrr_buf_size= thd->variables.mrr_buff_size; mrr_buf_desc= NULL; if (!no_alloc && !parent_alloc) @@ -4875,7 +4878,6 @@ static TRP_RANGE *get_key_scans_params(P uint UNINIT_VAR(best_mrr_flags), /* protected by key_to_read */ UNINIT_VAR(best_buf_size); /* protected by key_to_read */ TRP_RANGE* read_plan= NULL; - bool pk_is_clustered= param->table->file->primary_key_is_clustered(); DBUG_ENTER("get_key_scans_params"); /* Note that there may be trees that have type SEL_TREE::KEY but contain no @@ -7281,284 +7283,6 @@ void SEL_ARG::test_use_count(SEL_ARG *ro } #endif - -/**************************************************************************** - MRR Range Sequence Interface implementation that walks a SEL_ARG* tree. - ****************************************************************************/ - -/* MRR range sequence, SEL_ARG* implementation: stack entry */ -typedef struct st_range_seq_entry -{ - /* - Pointers in min and max keys. They point to right-after-end of key - images. The 0-th entry has these pointing to key tuple start. - */ - uchar *min_key, *max_key; - - /* - Flags, for {keypart0, keypart1, ... this_keypart} subtuple. - min_key_flag may have NULL_RANGE set. - */ - uint min_key_flag, max_key_flag; - - /* Number of key parts */ - uint min_key_parts, max_key_parts; - SEL_ARG *key_tree; -} RANGE_SEQ_ENTRY; - - -/* - MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context -*/ -typedef struct st_sel_arg_range_seq -{ - uint keyno; /* index of used tree in SEL_TREE structure */ - uint real_keyno; /* Number of the index in tables */ - PARAM *param; - SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */ - - RANGE_SEQ_ENTRY stack[MAX_REF_PARTS]; - int i; /* Index of last used element in the above array */ - - bool at_start; /* TRUE <=> The traversal has just started */ -} SEL_ARG_RANGE_SEQ; - - -/* - Range sequence interface, SEL_ARG* implementation: Initialize the traversal - - SYNOPSIS - init() - init_params SEL_ARG tree traversal context - n_ranges [ignored] The number of ranges obtained - flags [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY - - RETURN - Value of init_param -*/ - -range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags) -{ - SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)init_param; - seq->at_start= TRUE; - seq->stack[0].key_tree= NULL; - seq->stack[0].min_key= seq->param->min_key; - seq->stack[0].min_key_flag= 0; - seq->stack[0].min_key_parts= 0; - - seq->stack[0].max_key= seq->param->max_key; - seq->stack[0].max_key_flag= 0; - seq->stack[0].max_key_parts= 0; - seq->i= 0; - return init_param; -} - - -static void step_down_to(SEL_ARG_RANGE_SEQ *arg, SEL_ARG *key_tree) -{ - RANGE_SEQ_ENTRY *cur= &arg->stack[arg->i+1]; - RANGE_SEQ_ENTRY *prev= &arg->stack[arg->i]; - - cur->key_tree= key_tree; - cur->min_key= prev->min_key; - cur->max_key= prev->max_key; - cur->min_key_parts= prev->min_key_parts; - cur->max_key_parts= prev->max_key_parts; - - uint16 stor_length= arg->param->key[arg->keyno][key_tree->part].store_length; - cur->min_key_parts += key_tree->store_min(stor_length, &cur->min_key, - prev->min_key_flag); - cur->max_key_parts += key_tree->store_max(stor_length, &cur->max_key, - prev->max_key_flag); - - cur->min_key_flag= prev->min_key_flag | key_tree->min_flag; - cur->max_key_flag= prev->max_key_flag | key_tree->max_flag; - - if (key_tree->is_null_interval()) - cur->min_key_flag |= NULL_RANGE; - (arg->i)++; -} - - -/* - Range sequence interface, SEL_ARG* implementation: get the next interval - - SYNOPSIS - sel_arg_range_seq_next() - rseq Value returned from sel_arg_range_seq_init - range OUT Store information about the range here - - DESCRIPTION - This is "get_next" function for Range sequence interface implementation - for SEL_ARG* tree. - - IMPLEMENTATION - The traversal also updates those param members: - - is_ror_scan - - range_count - - max_key_part - - RETURN - 0 Ok - 1 No more ranges in the sequence -*/ - -//psergey-merge-todo: support check_quick_keys:max_keypart -uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) -{ - SEL_ARG *key_tree; - SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)rseq; - if (seq->at_start) - { - key_tree= seq->start; - seq->at_start= FALSE; - goto walk_up_n_right; - } - - key_tree= seq->stack[seq->i].key_tree; - /* Ok, we're at some "full tuple" position in the tree */ - - /* Step down if we can */ - if (key_tree->next && key_tree->next != &null_element) - { - //step down; (update the tuple, we'll step right and stay there) - seq->i--; - step_down_to(seq, key_tree->next); - key_tree= key_tree->next; - seq->param->is_ror_scan= FALSE; - goto walk_right_n_up; - } - - /* Ok, can't step down, walk left until we can step down */ - while (1) - { - if (seq->i == 1) // can't step left - return 1; - /* Step left */ - seq->i--; - key_tree= seq->stack[seq->i].key_tree; - - /* Step down if we can */ - if (key_tree->next && key_tree->next != &null_element) - { - // Step down; update the tuple - seq->i--; - step_down_to(seq, key_tree->next); - key_tree= key_tree->next; - break; - } - } - - /* - Ok, we've stepped down from the path to previous tuple. - Walk right-up while we can - */ -walk_right_n_up: - while (key_tree->next_key_part && key_tree->next_key_part != &null_element && - key_tree->next_key_part->part == key_tree->part + 1 && - key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) - { - { - RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i]; - uint min_key_length= cur->min_key - seq->param->min_key; - uint max_key_length= cur->max_key - seq->param->max_key; - uint len= cur->min_key - cur[-1].min_key; - if (!(min_key_length == max_key_length && - !memcmp(cur[-1].min_key, cur[-1].max_key, len) && - !key_tree->min_flag && !key_tree->max_flag)) - { - seq->param->is_ror_scan= FALSE; - if (!key_tree->min_flag) - cur->min_key_parts += - key_tree->next_key_part->store_min_key(seq->param->key[seq->keyno], - &cur->min_key, - &cur->min_key_flag); - if (!key_tree->max_flag) - cur->max_key_parts += - key_tree->next_key_part->store_max_key(seq->param->key[seq->keyno], - &cur->max_key, - &cur->max_key_flag); - break; - } - } - - /* - Ok, current atomic interval is in form "t.field=const" and there is - next_key_part interval. Step right, and walk up from there. - */ - key_tree= key_tree->next_key_part; - -walk_up_n_right: - while (key_tree->prev && key_tree->prev != &null_element) - { - /* Step up */ - key_tree= key_tree->prev; - } - step_down_to(seq, key_tree); - } - - /* Ok got a tuple */ - RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i]; - uint min_key_length= cur->min_key - seq->param->min_key; - - range->ptr= (char*)(int)(key_tree->part); - if (cur->min_key_flag & GEOM_FLAG) - { - range->range_flag= cur->min_key_flag; - - /* Here minimum contains also function code bits, and maximum is +inf */ - range->start_key.key= seq->param->min_key; - range->start_key.length= min_key_length; - range->start_key.flag= (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG); - } - else - { - range->range_flag= cur->min_key_flag | cur->max_key_flag; - - range->start_key.key= seq->param->min_key; - range->start_key.length= cur->min_key - seq->param->min_key; - range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts); - range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY : - HA_READ_KEY_EXACT); - - range->end_key.key= seq->param->max_key; - range->end_key.length= cur->max_key - seq->param->max_key; - range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY : - HA_READ_AFTER_KEY); - range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts); - - if (!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag && - (uint)key_tree->part+1 == seq->param->table->key_info[seq->real_keyno].key_parts && - (seq->param->table->key_info[seq->real_keyno].flags & (HA_NOSAME | HA_END_SPACE_KEY)) == - HA_NOSAME && - range->start_key.length == range->end_key.length && - !memcmp(seq->param->min_key,seq->param->max_key,range->start_key.length)) - range->range_flag= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE); - - if (seq->param->is_ror_scan) - { - /* - If we get here, the condition on the key was converted to form - "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND - somecond(keyXpart{key_tree->part})" - Check if - somecond is "keyXpart{key_tree->part} = const" and - uncovered "tail" of KeyX parts is either empty or is identical to - first members of clustered primary key. - */ - if (!(!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag && - (range->start_key.length == range->end_key.length) && - !memcmp(range->start_key.key, range->end_key.key, range->start_key.length) && - is_key_scan_ror(seq->param, seq->real_keyno, key_tree->part + 1))) - seq->param->is_ror_scan= FALSE; - } - } - seq->param->range_count++; - seq->param->max_key_part=max(seq->param->max_key_part,key_tree->part); - return 0; -} - - /* Calculate cost and E(#rows) for a given index and intervals tree @@ -7633,7 +7357,7 @@ ha_rows check_quick_select(PARAM *param, if (current_thd->lex->sql_command != SQLCOM_SELECT) *mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; - *bufsize= param->thd->variables.read_rnd_buff_size; + *bufsize= param->thd->variables.mrr_buff_size; rows= file->multi_range_read_info_const(keynr, &seq_if, (void*)&seq, 0, bufsize, mrr_flags, cost); if (rows != HA_POS_ERROR) @@ -8148,7 +7872,7 @@ QUICK_RANGE_SELECT *get_quick_select_for quick->mrr_flags |= HA_MRR_NO_NULL_ENDPOINTS; #endif - quick->mrr_buf_size= thd->variables.read_rnd_buff_size; + quick->mrr_buf_size= thd->variables.mrr_buff_size; if (table->file->multi_range_read_info(quick->index, 1, (uint)records, &quick->mrr_buf_size, &quick->mrr_flags, &cost)) @@ -8518,75 +8242,6 @@ int QUICK_RANGE_SELECT::reset() /* - Range sequence interface implementation for array<QUICK_RANGE>: initialize - - SYNOPSIS - quick_range_seq_init() - init_param Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer - n_ranges Number of ranges in the sequence (ignored) - flags MRR flags (currently not used) - - RETURN - Opaque value to be passed to quick_range_seq_next -*/ - -range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags) -{ - QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param; - quick->qr_traversal_ctx.first= (QUICK_RANGE**)quick->ranges.buffer; - quick->qr_traversal_ctx.cur= (QUICK_RANGE**)quick->ranges.buffer; - quick->qr_traversal_ctx.last= quick->qr_traversal_ctx.cur + - quick->ranges.elements; - return &quick->qr_traversal_ctx; -} - - -/* - Range sequence interface implementation for array<QUICK_RANGE>: get next - - SYNOPSIS - quick_range_seq_next() - rseq Value returned from quick_range_seq_init - range OUT Store information about the range here - - RETURN - 0 Ok - 1 No more ranges in the sequence -*/ - -uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) -{ - QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq; - - if (ctx->cur == ctx->last) - return 1; /* no more ranges */ - - QUICK_RANGE *cur= *(ctx->cur); - key_range *start_key= &range->start_key; - key_range *end_key= &range->end_key; - - start_key->key= cur->min_key; - start_key->length= cur->min_length; - start_key->keypart_map= cur->min_keypart_map; - start_key->flag= ((cur->flag & NEAR_MIN) ? HA_READ_AFTER_KEY : - (cur->flag & EQ_RANGE) ? - HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT); - end_key->key= cur->max_key; - end_key->length= cur->max_length; - end_key->keypart_map= cur->max_keypart_map; - /* - We use HA_READ_AFTER_KEY here because if we are reading on a key - prefix. We want to find all keys with this prefix. - */ - end_key->flag= (cur->flag & NEAR_MAX ? HA_READ_BEFORE_KEY : - HA_READ_AFTER_KEY); - range->range_flag= cur->flag; - ctx->cur++; - return 0; -} - - -/* Get next possible record using quick-struct. SYNOPSIS @@ -9658,7 +9313,7 @@ get_best_group_min_max(PARAM *param, SEL uint mrr_flags= HA_MRR_USE_DEFAULT_IMPL; uint mrr_bufsize=0; cur_quick_prefix_records= check_quick_select(param, cur_param_idx, - FALSE /*don't care(*/, + FALSE /*don't care*/, cur_index_tree, TRUE, &mrr_flags, &mrr_bufsize, &dummy_cost); === modified file 'sql/opt_range.h' --- a/sql/opt_range.h 2009-12-15 07:16:46 +0000 +++ b/sql/opt_range.h 2009-12-22 12:33:21 +0000 @@ -317,7 +317,7 @@ protected: public: uint mrr_flags; /* Flags to be used with MRR interface */ protected: - uint mrr_buf_size; /* copy from thd->variables.read_rnd_buff_size */ + uint mrr_buf_size; /* copy from thd->variables.mrr_buff_size */ HANDLER_BUFFER *mrr_buf_desc; /* the handler buffer */ /* Info about index we're scanning */ === added file 'sql/opt_range_mrr.cc' --- a/sql/opt_range_mrr.cc 1970-01-01 00:00:00 +0000 +++ b/sql/opt_range_mrr.cc 2009-12-22 12:33:21 +0000 @@ -0,0 +1,349 @@ + +/**************************************************************************** + MRR Range Sequence Interface implementation that walks a SEL_ARG* tree. + ****************************************************************************/ + +/* MRR range sequence, SEL_ARG* implementation: stack entry */ +typedef struct st_range_seq_entry +{ + /* + Pointers in min and max keys. They point to right-after-end of key + images. The 0-th entry has these pointing to key tuple start. + */ + uchar *min_key, *max_key; + + /* + Flags, for {keypart0, keypart1, ... this_keypart} subtuple. + min_key_flag may have NULL_RANGE set. + */ + uint min_key_flag, max_key_flag; + + /* Number of key parts */ + uint min_key_parts, max_key_parts; + SEL_ARG *key_tree; +} RANGE_SEQ_ENTRY; + + +/* + MRR range sequence, SEL_ARG* implementation: SEL_ARG graph traversal context +*/ +typedef struct st_sel_arg_range_seq +{ + uint keyno; /* index of used tree in SEL_TREE structure */ + uint real_keyno; /* Number of the index in tables */ + PARAM *param; + SEL_ARG *start; /* Root node of the traversed SEL_ARG* graph */ + + RANGE_SEQ_ENTRY stack[MAX_REF_PARTS]; + int i; /* Index of last used element in the above array */ + + bool at_start; /* TRUE <=> The traversal has just started */ +} SEL_ARG_RANGE_SEQ; + + +/* + Range sequence interface, SEL_ARG* implementation: Initialize the traversal + + SYNOPSIS + init() + init_params SEL_ARG tree traversal context + n_ranges [ignored] The number of ranges obtained + flags [ignored] HA_MRR_SINGLE_POINT, HA_MRR_FIXED_KEY + + RETURN + Value of init_param +*/ + +range_seq_t sel_arg_range_seq_init(void *init_param, uint n_ranges, uint flags) +{ + SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)init_param; + seq->at_start= TRUE; + seq->stack[0].key_tree= NULL; + seq->stack[0].min_key= seq->param->min_key; + seq->stack[0].min_key_flag= 0; + seq->stack[0].min_key_parts= 0; + + seq->stack[0].max_key= seq->param->max_key; + seq->stack[0].max_key_flag= 0; + seq->stack[0].max_key_parts= 0; + seq->i= 0; + return init_param; +} + + +static void step_down_to(SEL_ARG_RANGE_SEQ *arg, SEL_ARG *key_tree) +{ + RANGE_SEQ_ENTRY *cur= &arg->stack[arg->i+1]; + RANGE_SEQ_ENTRY *prev= &arg->stack[arg->i]; + + cur->key_tree= key_tree; + cur->min_key= prev->min_key; + cur->max_key= prev->max_key; + cur->min_key_parts= prev->min_key_parts; + cur->max_key_parts= prev->max_key_parts; + + uint16 stor_length= arg->param->key[arg->keyno][key_tree->part].store_length; + cur->min_key_parts += key_tree->store_min(stor_length, &cur->min_key, + prev->min_key_flag); + cur->max_key_parts += key_tree->store_max(stor_length, &cur->max_key, + prev->max_key_flag); + + cur->min_key_flag= prev->min_key_flag | key_tree->min_flag; + cur->max_key_flag= prev->max_key_flag | key_tree->max_flag; + + if (key_tree->is_null_interval()) + cur->min_key_flag |= NULL_RANGE; + (arg->i)++; +} + + +/* + Range sequence interface, SEL_ARG* implementation: get the next interval + + SYNOPSIS + sel_arg_range_seq_next() + rseq Value returned from sel_arg_range_seq_init + range OUT Store information about the range here + + DESCRIPTION + This is "get_next" function for Range sequence interface implementation + for SEL_ARG* tree. + + IMPLEMENTATION + The traversal also updates those param members: + - is_ror_scan + - range_count + - max_key_part + + RETURN + 0 Ok + 1 No more ranges in the sequence +*/ + +uint sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) +{ + SEL_ARG *key_tree; + SEL_ARG_RANGE_SEQ *seq= (SEL_ARG_RANGE_SEQ*)rseq; + if (seq->at_start) + { + key_tree= seq->start; + seq->at_start= FALSE; + goto walk_up_n_right; + } + + key_tree= seq->stack[seq->i].key_tree; + /* Ok, we're at some "full tuple" position in the tree */ + + /* Step down if we can */ + if (key_tree->next && key_tree->next != &null_element) + { + //step down; (update the tuple, we'll step right and stay there) + seq->i--; + step_down_to(seq, key_tree->next); + key_tree= key_tree->next; + seq->param->is_ror_scan= FALSE; + goto walk_right_n_up; + } + + /* Ok, can't step down, walk left until we can step down */ + while (1) + { + if (seq->i == 1) // can't step left + return 1; + /* Step left */ + seq->i--; + key_tree= seq->stack[seq->i].key_tree; + + /* Step down if we can */ + if (key_tree->next && key_tree->next != &null_element) + { + // Step down; update the tuple + seq->i--; + step_down_to(seq, key_tree->next); + key_tree= key_tree->next; + break; + } + } + + /* + Ok, we've stepped down from the path to previous tuple. + Walk right-up while we can + */ +walk_right_n_up: + while (key_tree->next_key_part && key_tree->next_key_part != &null_element && + key_tree->next_key_part->part == key_tree->part + 1 && + key_tree->next_key_part->type == SEL_ARG::KEY_RANGE) + { + { + RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i]; + uint min_key_length= cur->min_key - seq->param->min_key; + uint max_key_length= cur->max_key - seq->param->max_key; + uint len= cur->min_key - cur[-1].min_key; + if (!(min_key_length == max_key_length && + !memcmp(cur[-1].min_key, cur[-1].max_key, len) && + !key_tree->min_flag && !key_tree->max_flag)) + { + seq->param->is_ror_scan= FALSE; + if (!key_tree->min_flag) + cur->min_key_parts += + key_tree->next_key_part->store_min_key(seq->param->key[seq->keyno], + &cur->min_key, + &cur->min_key_flag); + if (!key_tree->max_flag) + cur->max_key_parts += + key_tree->next_key_part->store_max_key(seq->param->key[seq->keyno], + &cur->max_key, + &cur->max_key_flag); + break; + } + } + + /* + Ok, current atomic interval is in form "t.field=const" and there is + next_key_part interval. Step right, and walk up from there. + */ + key_tree= key_tree->next_key_part; + +walk_up_n_right: + while (key_tree->prev && key_tree->prev != &null_element) + { + /* Step up */ + key_tree= key_tree->prev; + } + step_down_to(seq, key_tree); + } + + /* Ok got a tuple */ + RANGE_SEQ_ENTRY *cur= &seq->stack[seq->i]; + uint min_key_length= cur->min_key - seq->param->min_key; + + range->ptr= (char*)(int)(key_tree->part); + if (cur->min_key_flag & GEOM_FLAG) + { + range->range_flag= cur->min_key_flag; + + /* Here minimum contains also function code bits, and maximum is +inf */ + range->start_key.key= seq->param->min_key; + range->start_key.length= min_key_length; + range->start_key.flag= (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG); + } + else + { + range->range_flag= cur->min_key_flag | cur->max_key_flag; + + range->start_key.key= seq->param->min_key; + range->start_key.length= cur->min_key - seq->param->min_key; + range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts); + range->start_key.flag= (cur->min_key_flag & NEAR_MIN ? HA_READ_AFTER_KEY : + HA_READ_KEY_EXACT); + + range->end_key.key= seq->param->max_key; + range->end_key.length= cur->max_key - seq->param->max_key; + range->end_key.flag= (cur->max_key_flag & NEAR_MAX ? HA_READ_BEFORE_KEY : + HA_READ_AFTER_KEY); + range->end_key.keypart_map= make_prev_keypart_map(cur->max_key_parts); + + if (!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag && + (uint)key_tree->part+1 == seq->param->table->key_info[seq->real_keyno].key_parts && + (seq->param->table->key_info[seq->real_keyno].flags & (HA_NOSAME | HA_END_SPACE_KEY)) == + HA_NOSAME && + range->start_key.length == range->end_key.length && + !memcmp(seq->param->min_key,seq->param->max_key,range->start_key.length)) + range->range_flag= UNIQUE_RANGE | (cur->min_key_flag & NULL_RANGE); + + if (seq->param->is_ror_scan) + { + /* + If we get here, the condition on the key was converted to form + "(keyXpart1 = c1) AND ... AND (keyXpart{key_tree->part - 1} = cN) AND + somecond(keyXpart{key_tree->part})" + Check if + somecond is "keyXpart{key_tree->part} = const" and + uncovered "tail" of KeyX parts is either empty or is identical to + first members of clustered primary key. + */ + if (!(!(cur->min_key_flag & ~NULL_RANGE) && !cur->max_key_flag && + (range->start_key.length == range->end_key.length) && + !memcmp(range->start_key.key, range->end_key.key, range->start_key.length) && + is_key_scan_ror(seq->param, seq->real_keyno, key_tree->part + 1))) + seq->param->is_ror_scan= FALSE; + } + } + seq->param->range_count++; + seq->param->max_key_part=max(seq->param->max_key_part,key_tree->part); + return 0; +} + +/**************************************************************************** + MRR Range Sequence Interface implementation that walks array<QUICK_RANGE> + ****************************************************************************/ + +/* + Range sequence interface implementation for array<QUICK_RANGE>: initialize + + SYNOPSIS + quick_range_seq_init() + init_param Caller-opaque paramenter: QUICK_RANGE_SELECT* pointer + n_ranges Number of ranges in the sequence (ignored) + flags MRR flags (currently not used) + + RETURN + Opaque value to be passed to quick_range_seq_next +*/ + +range_seq_t quick_range_seq_init(void *init_param, uint n_ranges, uint flags) +{ + QUICK_RANGE_SELECT *quick= (QUICK_RANGE_SELECT*)init_param; + quick->qr_traversal_ctx.first= (QUICK_RANGE**)quick->ranges.buffer; + quick->qr_traversal_ctx.cur= (QUICK_RANGE**)quick->ranges.buffer; + quick->qr_traversal_ctx.last= quick->qr_traversal_ctx.cur + + quick->ranges.elements; + return &quick->qr_traversal_ctx; +} + + +/* + Range sequence interface implementation for array<QUICK_RANGE>: get next + + SYNOPSIS + quick_range_seq_next() + rseq Value returned from quick_range_seq_init + range OUT Store information about the range here + + RETURN + 0 Ok + 1 No more ranges in the sequence +*/ + +uint quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range) +{ + QUICK_RANGE_SEQ_CTX *ctx= (QUICK_RANGE_SEQ_CTX*)rseq; + + if (ctx->cur == ctx->last) + return 1; /* no more ranges */ + + QUICK_RANGE *cur= *(ctx->cur); + key_range *start_key= &range->start_key; + key_range *end_key= &range->end_key; + + start_key->key= cur->min_key; + start_key->length= cur->min_length; + start_key->keypart_map= cur->min_keypart_map; + start_key->flag= ((cur->flag & NEAR_MIN) ? HA_READ_AFTER_KEY : + (cur->flag & EQ_RANGE) ? + HA_READ_KEY_EXACT : HA_READ_KEY_OR_NEXT); + end_key->key= cur->max_key; + end_key->length= cur->max_length; + end_key->keypart_map= cur->max_keypart_map; + /* + We use HA_READ_AFTER_KEY here because if we are reading on a key + prefix. We want to find all keys with this prefix. + */ + end_key->flag= (cur->flag & NEAR_MAX ? HA_READ_BEFORE_KEY : + HA_READ_AFTER_KEY); + range->range_flag= cur->flag; + ctx->cur++; + return 0; +} + + === modified file 'sql/set_var.cc' --- a/sql/set_var.cc 2009-12-21 02:26:15 +0000 +++ b/sql/set_var.cc 2009-12-22 12:49:15 +0000 @@ -528,6 +528,8 @@ static sys_var_bool_ptr sys_user static sys_var_thd_ulong sys_read_rnd_buff_size(&vars, "read_rnd_buffer_size", &SV::read_rnd_buff_size); +static sys_var_thd_ulong sys_mrr_buff_size(&vars, "mrr_buffer_size", + &SV::mrr_buff_size); static sys_var_thd_ulong sys_div_precincrement(&vars, "div_precision_increment", &SV::div_precincrement); static sys_var_long_ptr sys_rpl_recovery_rank(&vars, "rpl_recovery_rank", === modified file 'sql/sql_class.h' --- a/sql/sql_class.h 2009-12-21 02:26:15 +0000 +++ b/sql/sql_class.h 2009-12-22 12:49:15 +0000 @@ -340,6 +340,7 @@ struct system_variables ulong query_cache_type; ulong read_buff_size; ulong read_rnd_buff_size; + ulong mrr_buff_size; ulong div_precincrement; ulong sortbuff_size; ulong thread_handling; === modified file 'storage/maria/ha_maria.cc' --- a/storage/maria/ha_maria.cc 2009-12-15 07:16:46 +0000 +++ b/storage/maria/ha_maria.cc 2009-12-22 12:33:21 +0000 @@ -2022,16 +2022,15 @@ int ha_maria::delete_row(const uchar * b C_MODE_START -my_bool index_cond_func_maria(void *arg) +ICP_RESULT index_cond_func_maria(void *arg) { ha_maria *h= (ha_maria*)arg; - /*if (h->in_range_read)*/ if (h->end_range) { if (h->compare_key2(h->end_range) > 0) - return 2; /* caller should return HA_ERR_END_OF_FILE already */ + return ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */ } - return (my_bool)h->pushed_idx_cond->val_int(); + return h->pushed_idx_cond->val_int() ? ICP_MATCH : ICP_NO_MATCH; } C_MODE_END === modified file 'storage/maria/ha_maria.h' --- a/storage/maria/ha_maria.h 2009-12-15 07:16:46 +0000 +++ b/storage/maria/ha_maria.h 2009-12-22 12:33:21 +0000 @@ -29,7 +29,7 @@ #define HA_RECOVER_QUICK 8 /* Don't check rows in data file */ C_MODE_START -my_bool index_cond_func_maria(void *arg); +ICP_RESULT index_cond_func_maria(void *arg); C_MODE_END extern ulong maria_sort_buffer_size; @@ -187,5 +187,5 @@ public: Item *idx_cond_push(uint keyno, Item* idx_cond); private: DsMrr_impl ds_mrr; - friend my_bool index_cond_func_maria(void *arg); + friend ICP_RESULT index_cond_func_maria(void *arg); }; === modified file 'storage/maria/ma_key.c' --- a/storage/maria/ma_key.c 2009-12-15 07:16:46 +0000 +++ b/storage/maria/ma_key.c 2009-12-22 12:33:21 +0000 @@ -669,10 +669,10 @@ int _ma_read_key_record(MARIA_HA *info, will look for column values there) RETURN - -1 Error - 0 Index condition is not satisfied, continue scanning - 1 Index condition is satisfied - 2 Index condition is not satisfied, end the scan. + ICP_ERROR Error + ICP_NO_MATCH Index condition is not satisfied, continue scanning + ICP_MATCH Index condition is satisfied + ICP_OUT_OF_RANGE Index condition is not satisfied, end the scan. */ int ma_check_index_cond(register MARIA_HA *info, uint keynr, uchar *record) === modified file 'storage/maria/maria_def.h' --- a/storage/maria/maria_def.h 2009-12-15 07:16:46 +0000 +++ b/storage/maria/maria_def.h 2009-12-22 12:33:21 +0000 @@ -477,8 +477,7 @@ typedef struct st_maria_block_scan MARIA_RECORD_POS row_base_page; } MARIA_BLOCK_SCAN; -/*psergey-todo: do really need to have copies of this all over the place?*/ -typedef my_bool (*index_cond_func_t)(void *param); +typedef ICP_RESULT (*index_cond_func_t)(void *param); struct st_maria_handler { === modified file 'storage/myisam/mi_key.c' --- a/storage/myisam/mi_key.c 2009-12-15 07:16:46 +0000 +++ b/storage/myisam/mi_key.c 2009-12-22 12:33:21 +0000 @@ -504,10 +504,10 @@ int _mi_read_key_record(MI_INFO *info, m will look for column values there) RETURN - -1 Error - 0 Index condition is not satisfied, continue scanning - 1 Index condition is satisfied - 2 Index condition is not satisfied, end the scan. + ICP_ERROR Error + ICP_NO_MATCH Index condition is not satisfied, continue scanning + ICP_MATCH Index condition is satisfied + ICP_OUT_OF_RANGE Index condition is not satisfied, end the scan. */ int mi_check_index_cond(register MI_INFO *info, uint keynr, uchar *record) @@ -516,7 +516,7 @@ int mi_check_index_cond(register MI_INFO { mi_print_error(info->s, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; - return -1; + return ICP_ERROR; } return info->index_cond_func(info->index_cond_func_arg); } === modified file 'storage/myisam/mi_rkey.c' --- a/storage/myisam/mi_rkey.c 2009-12-15 07:16:46 +0000 +++ b/storage/myisam/mi_rkey.c 2009-12-22 12:33:21 +0000 @@ -29,7 +29,7 @@ int mi_rkey(MI_INFO *info, uchar *buf, i MI_KEYDEF *keyinfo; HA_KEYSEG *last_used_keyseg; uint pack_key_length, use_key_length, nextflag; - int res= 0; + ICP_RESULT res= ICP_NO_MATCH; DBUG_ENTER("mi_rkey"); DBUG_PRINT("enter", ("base: 0x%lx buf: 0x%lx inx: %d search_flag: %d", (long) info, (long) buf, inx, search_flag)); @@ -118,7 +118,7 @@ int mi_rkey(MI_INFO *info, uchar *buf, i (search_flag != HA_READ_KEY_EXACT || last_used_keyseg != keyinfo->seg + keyinfo->keysegs)) || (info->index_cond_func && - !(res= mi_check_index_cond(info, inx, buf)))) + (res= mi_check_index_cond(info, inx, buf)) == ICP_NO_MATCH)) { uint not_used[2]; /* @@ -146,7 +146,7 @@ int mi_rkey(MI_INFO *info, uchar *buf, i break; } } - if (res == 2) + if (res == ICP_OUT_OF_RANGE) { info->lastpos= HA_OFFSET_ERROR; if (share->concurrent_insert) === modified file 'storage/myisam/mi_rnext.c' --- a/storage/myisam/mi_rnext.c 2009-12-15 07:16:46 +0000 +++ b/storage/myisam/mi_rnext.c 2009-12-22 12:33:21 +0000 @@ -28,7 +28,7 @@ int mi_rnext(MI_INFO *info, uchar *buf, { int error,changed; uint flag; - int res= 0; + ICP_RESULT res= 0; DBUG_ENTER("mi_rnext"); if ((inx = _mi_check_index(info,inx)) < 0) @@ -87,7 +87,7 @@ int mi_rnext(MI_INFO *info, uchar *buf, while ((info->s->concurrent_insert && info->lastpos >= info->state->data_file_length) || (info->index_cond_func && - !(res= mi_check_index_cond(info, inx, buf)))) + (res= mi_check_index_cond(info, inx, buf)) == ICP_NO_MATCH)) { /* Skip rows that are either inserted by other threads since @@ -100,7 +100,7 @@ int mi_rnext(MI_INFO *info, uchar *buf, info->s->state.key_root[inx]))) break; } - if (!error && res == 2) + if (!error && res == ICP_OUT_OF_RANGE) { if (info->s->concurrent_insert) rw_unlock(&info->s->key_root_lock[inx]); === modified file 'storage/myisam/mi_rnext_same.c' --- a/storage/myisam/mi_rnext_same.c 2009-12-15 07:16:46 +0000 +++ b/storage/myisam/mi_rnext_same.c 2009-12-22 12:33:21 +0000 @@ -75,9 +75,13 @@ int mi_rnext_same(MI_INFO *info, uchar * info->lastpos= HA_OFFSET_ERROR; break; } - /* Skip rows that are inserted by other threads since we got a lock */ + /* + Skip + - rows that are inserted by other threads since we got a lock + - rows that don't match index condition */ if (info->lastpos < info->state->data_file_length && - (!info->index_cond_func || mi_check_index_cond(info, inx, buf))) + (!info->index_cond_func || + mi_check_index_cond(info, inx, buf) != ICP_NO_MATCH)) break; } } === modified file 'storage/xtradb/handler/ha_innodb.cc' --- a/storage/xtradb/handler/ha_innodb.cc 2009-12-15 07:16:46 +0000 +++ b/storage/xtradb/handler/ha_innodb.cc 2009-12-22 12:33:21 +0000 @@ -114,7 +114,7 @@ static pthread_mutex_t commit_cond_m; static bool innodb_inited = 0; C_MODE_START -static uint index_cond_func_innodb(void *arg); +static int index_cond_func_innodb(void *arg); C_MODE_END @@ -10765,24 +10765,12 @@ ha_rows ha_innobase::multi_range_read_in { /* See comments in ha_myisam::multi_range_read_info_const */ ds_mrr.init(this, table); - //psergey-mrr-fix: + if (prebuilt->select_lock_type != LOCK_NONE) *flags |= HA_MRR_USE_DEFAULT_IMPL; - uint orig_flags= *flags; - ha_rows res= ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, bufsz, flags, cost); - - bool disable_ds_mrr= true; - disable_ds_mrr= false; -// DBUG_EXECUTE_IF("optimizer_innodb_ds_mrr", disable_ds_mrr= false;); - if (!disable_ds_mrr) - return res; - - /* Disable DS-MRR: enable MS-MRR only after critical bugs are fixed */ - *bufsz= 0; - *flags = orig_flags | HA_MRR_USE_DEFAULT_IMPL; return res; } @@ -10791,17 +10779,7 @@ ha_rows ha_innobase::multi_range_read_in uint *flags, COST_VECT *cost) { ds_mrr.init(this, table); - uint orig_flags= *flags; - ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, bufsz, flags, cost); - bool disable_ds_mrr= false; - // DBUG_EXECUTE_IF("optimizer_innodb_ds_mrr", disable_ds_mrr= false;); - if (!disable_ds_mrr) - return res; - - /* Disable DS-MRR: enable MS-MRR only after critical bugs are fixed */ - *bufsz= 0; - *flags = orig_flags | HA_MRR_USE_DEFAULT_IMPL; return res; } @@ -10818,15 +10796,15 @@ C_MODE_START See note on ICP_RESULT for return values description. */ -static uint index_cond_func_innodb(void *arg) +static int index_cond_func_innodb(void *arg) { ha_innobase *h= (ha_innobase*)arg; if (h->end_range) { if (h->compare_key2(h->end_range) > 0) - return 2; /* caller should return HA_ERR_END_OF_FILE already */ + return ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */ } - return test(h->pushed_idx_cond->val_int()); + return h->pushed_idx_cond->val_int()? ICP_MATCH : ICP_NO_MATCH; } C_MODE_END @@ -10834,8 +10812,7 @@ C_MODE_END Item *ha_innobase::idx_cond_push(uint keyno_arg, Item* idx_cond_arg) { - // V :psergey-mrrr-merge: V - if (keyno_arg != primary_key && (prebuilt->select_lock_type == LOCK_NONE)) + if ((keyno_arg != primary_key) && (prebuilt->select_lock_type == LOCK_NONE)) { pushed_idx_cond_keyno= keyno_arg; pushed_idx_cond= idx_cond_arg; === modified file 'storage/xtradb/include/row0mysql.h' --- a/storage/xtradb/include/row0mysql.h 2009-12-15 07:16:46 +0000 +++ b/storage/xtradb/include/row0mysql.h 2009-12-22 12:33:21 +0000 @@ -564,7 +564,7 @@ struct mysql_row_templ_struct { #define ROW_PREBUILT_ALLOCATED 78540783 #define ROW_PREBUILT_FREED 26423527 -typedef uint (*index_cond_func_t)(void *param); +typedef int (*index_cond_func_t)(void *param); /* A struct for (sometimes lazily) prebuilt structures in an Innobase table handle used within MySQL; these are used to save CPU time. */ === modified file 'storage/xtradb/row/row0sel.c' --- a/storage/xtradb/row/row0sel.c 2009-12-15 07:16:46 +0000 +++ b/storage/xtradb/row/row0sel.c 2009-12-22 12:33:21 +0000 @@ -3116,10 +3116,14 @@ row_sel_pop_cached_row_for_mysql( /* Copy NULL bit of the current field from cached_rec to buf */ if (templ->mysql_null_bit_mask) { - buf[templ->mysql_null_byte_offset] + /*buf[templ->mysql_null_byte_offset] ^= (buf[templ->mysql_null_byte_offset] ^ cached_rec[templ->mysql_null_byte_offset]) - & (byte)templ->mysql_null_bit_mask; + & (byte)templ->mysql_null_bit_mask;*/ + byte *null_byte= buf + templ->mysql_null_byte_offset; + (*null_byte)&= ~templ->mysql_null_bit_mask; + (*null_byte)|= cached_rec[templ->mysql_null_byte_offset] & + templ->mysql_null_bit_mask; } } } @@ -3354,10 +3358,8 @@ row_search_for_mysql( mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; - /*psergey-mrr:*/ ibool some_fields_in_buffer; ibool get_clust_rec = 0; - /*:psergey-mrr*/ rec_offs_init(offsets_); @@ -4210,11 +4212,8 @@ no_gap_lock: information via the clustered index record. */ ut_ad(index != clust_index); - /*psergey-mrr:*/ get_clust_rec = TRUE; goto idx_cond_check; - /**goto requires_clust_rec;**/ - /*:psergey-mrr*/ } } @@ -4260,22 +4259,20 @@ no_gap_lock: idx_cond_check: - if (prebuilt->idx_cond_func) - { - int res; - ut_ad(prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE); - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - row_sel_store_mysql_rec(buf, prebuilt, rec, - offsets, 0, prebuilt->n_index_fields); - res= prebuilt->idx_cond_func(prebuilt->idx_cond_func_arg); - if (res == 0) - goto next_rec; - if (res == 2) - { - err = DB_RECORD_NOT_FOUND; - goto idx_cond_failed; - } - } + if (prebuilt->idx_cond_func) { + int res; + ut_ad(prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE); + offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); + row_sel_store_mysql_rec(buf, prebuilt, rec, + offsets, 0, prebuilt->n_index_fields); + res= prebuilt->idx_cond_func(prebuilt->idx_cond_func_arg); + if (res == 0) + goto next_rec; + if (res == 2) { + err = DB_RECORD_NOT_FOUND; + goto idx_cond_failed; + } + } /* Get the clustered index record if needed, if we did not do the search using the clustered index. */
participants (1)
-
Igor Babaev