#At lp:maria based on revid:sergii@pisem.net-20100308170509-gsqfnt3a9rdaxj32 2825 Michael Widenius 2010-03-09 Added count of my_sync calls (to SHOW STATUS) tmp_table_size can now be set to 0 (to disable in memory internal temp tables) Improved speed for internal Maria temp tables: - Don't use packed keys, except with long text fields. - Don't copy key all accessed pages during key search. Some new benchmark tests to sql-bench (for group by) modified: BUILD/compile-pentium64-gcov BUILD/compile-pentium64-gprof include/my_sys.h mysql-test/r/variables.result mysys/my_sync.c sql-bench/test-select.sh sql/mysqld.cc sql/sql_select.cc storage/maria/ma_key_recover.h storage/maria/ma_page.c storage/maria/ma_rkey.c storage/maria/ma_search.c storage/maria/ma_write.c storage/maria/maria_def.h per-file messages: BUILD/compile-pentium64-gcov Update script to use same pentium_config flags as other tests BUILD/compile-pentium64-gprof Update script to use same pentium_config flags as other tests include/my_sys.h Added count of my_sync calls mysql-test/r/variables.result tmp_table_size can now be set to 0 sql-bench/test-select.sh Added some new test for GROUP BY on a not key field and group by with different order by sql/mysqld.cc Added count of my_sync calls tmp_table_size can now be set to 0 (to disable in memory internal temp tables) sql/sql_select.cc If tmp_table_size is 0, don't use in memory temp tables (good for benchmarking MyISAM/Maria temp tables) Don't pack keys for Maria tables; The 8K page size makes packed keys too slow for temp tables. storage/maria/ma_key_recover.h Moved definition to maria_def.h storage/maria/ma_page.c Moved code used to simplify comparing of identical Maria tables to own function (page_cleanup()) Fixed that one can read a page with a read lock. storage/maria/ma_rkey.c For not exact key reads, cache the page where we found key (to speed up future read-next/read-prev calls) storage/maria/ma_search.c Moved code to cache last key page to separate function. Instead of copying pages, only get a link to the page. This notable speeds up key searches on bigger tables. storage/maria/ma_write.c Added comment storage/maria/maria_def.h Moved page_cleanup() to separate function. === modified file 'BUILD/compile-pentium64-gcov' --- a/BUILD/compile-pentium64-gcov 2007-08-16 00:10:16 +0000 +++ b/BUILD/compile-pentium64-gcov 2010-03-09 19:22:24 +0000 @@ -9,9 +9,9 @@ export CCACHE_DISABLE export LDFLAGS="$gcov_link_flags" -extra_flags="$pentium64_cflags $debug_cflags $max_cflags $gcov_compile_flags" +extra_flags="$pentium64_cflags $max_cflags $gcov_compile_flags" c_warnings="$c_warnings $debug_extra_warnings" cxx_warnings="$cxx_warnings $debug_extra_warnings" -extra_configs="$pentium64_configs $debug_configs $gcov_configs $max_configs" +extra_configs="$pentium_configs $debug_configs $gcov_configs $max_configs --with-zlib-dir=bundled" . "$path/FINISH.sh" === modified file 'BUILD/compile-pentium64-gprof' --- a/BUILD/compile-pentium64-gprof 2007-08-16 00:10:16 +0000 +++ b/BUILD/compile-pentium64-gprof 2010-03-09 19:22:24 +0000 @@ -4,6 +4,6 @@ path=`dirname $0` . "$path/SETUP.sh" extra_flags="$pentium64_cflags $gprof_compile_flags" -extra_configs="$pentium64_configs $debug_configs $gprof_link_flags" +extra_configs="$pentium_configs $max_configs $gprof_link_flags --with-zlib-dir=bundled" . "$path/FINISH.sh" === modified file 'include/my_sys.h' --- a/include/my_sys.h 2009-12-03 11:19:05 +0000 +++ b/include/my_sys.h 2010-03-09 19:22:24 +0000 @@ -247,6 +247,7 @@ extern CHARSET_INFO compiled_charsets[]; /* statistics */ extern ulong my_file_opened,my_stream_opened, my_tmp_file_created; extern ulong my_file_total_opened; +extern ulong my_sync_count; extern uint mysys_usage_id; extern my_bool my_init_done; === modified file 'mysql-test/r/variables.result' --- a/mysql-test/r/variables.result 2010-02-10 19:06:24 +0000 +++ b/mysql-test/r/variables.result 2010-03-09 19:22:24 +0000 @@ -575,8 +575,6 @@ set storage_engine=myisam; set global thread_cache_size=100; set timestamp=1, timestamp=default; set tmp_table_size=100; -Warnings: -Warning 1292 Truncated incorrect tmp_table_size value: '100' set tx_isolation="READ-COMMITTED"; set wait_timeout=100; set log_warnings=1; === modified file 'mysys/my_sync.c' --- a/mysys/my_sync.c 2010-01-15 15:27:55 +0000 +++ b/mysys/my_sync.c 2010-03-09 19:22:24 +0000 @@ -17,6 +17,8 @@ #include "mysys_err.h" #include <errno.h> +ulong my_sync_count; /* Count number of sync calls */ + /* Sync data in file to disk @@ -46,6 +48,7 @@ int my_sync(File fd, myf my_flags) DBUG_ENTER("my_sync"); DBUG_PRINT("my",("fd: %d my_flags: %d", fd, my_flags)); + statistic_increment(my_sync_count,&THR_LOCK_open); do { #if defined(F_FULLFSYNC) === modified file 'sql-bench/test-select.sh' --- a/sql-bench/test-select.sh 2010-02-17 20:10:02 +0000 +++ b/sql-bench/test-select.sh 2010-03-09 19:22:24 +0000 @@ -68,7 +68,8 @@ do_many($dbh,$server->create("bench1", ["region char(1) NOT NULL", "idn integer(6) NOT NULL", "rev_idn integer(6) NOT NULL", - "grp integer(6) NOT NULL"], + "grp integer(6) NOT NULL", + "grp_no_key integer(6) NOT NULL"], ["primary key (region,idn)", "unique (region,rev_idn)", "unique (region,grp,idn)"])); @@ -105,10 +106,10 @@ for ($id=0,$rev_id=$opt_loop_count-1 ; $ { $grp=$id*3 % $opt_groups; $region=chr(65+$id%$opt_regions); - do_query($dbh,"$query'$region',$id,$rev_id,$grp)"); + do_query($dbh,"$query'$region',$id,$rev_id,$grp,$grp)"); if ($id == $half_done) { # Test with different insert - $query="insert into bench1 (region,idn,rev_idn,grp) values ("; + $query="insert into bench1 (region,idn,rev_idn,grp,grp_no_key) values ("; } } @@ -323,6 +324,26 @@ if ($limits->{'group_functions'}) $end_time=new Benchmark; print "Time for count_group_on_key_parts ($i:$rows): " . timestr(timediff($end_time, $loop_time),"all") . "\n"; + + $loop_time=new Benchmark; + $rows=0; + for ($i=0 ; $i < $opt_medium_loop_count ; $i++) + { + $rows+=fetch_all_rows($dbh,"select grp_no_key,count(*) from bench1 group by grp_no_key"); + } + $end_time=new Benchmark; + print "Time for count_group ($i:$rows): " . + timestr(timediff($end_time, $loop_time),"all") . "\n"; + + $loop_time=new Benchmark; + $rows=0; + for ($i=0 ; $i < $opt_medium_loop_count ; $i++) + { + $rows+=fetch_all_rows($dbh,"select grp_no_key,count(*) as cnt from bench1 group by grp_no_key order by cnt"); + } + $end_time=new Benchmark; + print "Time for count_group_with_order ($i:$rows): " . + timestr(timediff($end_time, $loop_time),"all") . "\n"; } if ($limits->{'group_distinct_functions'}) === modified file 'sql/mysqld.cc' --- a/sql/mysqld.cc 2010-02-11 19:15:24 +0000 +++ b/sql/mysqld.cc 2010-03-09 19:22:24 +0000 @@ -7273,10 +7273,10 @@ The minimum value for this variable is 4 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"tmp_table_size", OPT_TMP_TABLE_SIZE, "If an internal in-memory temporary table exceeds this size, MySQL will" - " automatically convert it to an on-disk MyISAM table.", + " automatically convert it to an on-disk MyISAM/Maria table.", (uchar**) &global_system_variables.tmp_table_size, (uchar**) &max_system_variables.tmp_table_size, 0, GET_ULL, - REQUIRED_ARG, 16*1024*1024L, 1024, MAX_MEM_TABLE_SIZE, 0, 1, 0}, + REQUIRED_ARG, 16*1024*1024L, 0, MAX_MEM_TABLE_SIZE, 0, 1, 0}, {"transaction_alloc_block_size", OPT_TRANS_ALLOC_BLOCK_SIZE, "Allocation block size for transactions to be stored in binary log", (uchar**) &global_system_variables.trans_alloc_block_size, @@ -7778,6 +7778,7 @@ SHOW_VAR status_vars[]= { {"Ssl_verify_mode", (char*) &show_ssl_get_verify_mode, SHOW_FUNC}, {"Ssl_version", (char*) &show_ssl_get_version, SHOW_FUNC}, #endif /* HAVE_OPENSSL */ + {"Syncs", (char*) &my_sync_count, SHOW_LONG_NOFLUSH}, {"Table_locks_immediate", (char*) &locks_immediate, SHOW_LONG}, {"Table_locks_waited", (char*) &locks_waited, SHOW_LONG}, #ifdef HAVE_MMAP === modified file 'sql/sql_select.cc' --- a/sql/sql_select.cc 2010-03-08 13:57:32 +0000 +++ b/sql/sql_select.cc 2010-03-09 19:22:24 +0000 @@ -10168,7 +10168,8 @@ create_tmp_table(THD *thd,TMP_TABLE_PARA /* future: storage engine selection can be made dynamic? */ if (blob_count || using_unique_constraint || (select_options & (OPTION_BIG_TABLES | SELECT_SMALL_RESULT)) == - OPTION_BIG_TABLES || (select_options & TMP_TABLE_FORCE_MYISAM)) + OPTION_BIG_TABLES || (select_options & TMP_TABLE_FORCE_MYISAM) || + !thd->variables.tmp_table_size) { share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON); table->file= get_new_handler(share, &table->mem_root, @@ -10707,7 +10708,7 @@ static bool create_internal_tmp_table(TA { /* Create an unique key */ bzero((char*) &keydef,sizeof(keydef)); - keydef.flag=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY; + keydef.flag=HA_NOSAME; keydef.keysegs= keyinfo->key_parts; keydef.seg= seg; } @@ -10732,7 +10733,7 @@ static bool create_internal_tmp_table(TA seg->type= keyinfo->key_part[i].type; /* Tell handler if it can do suffic space compression */ if (field->real_type() == MYSQL_TYPE_STRING && - keyinfo->key_part[i].length > 4) + keyinfo->key_part[i].length > 32) seg->flag|= HA_SPACE_PACK; } if (!(field->flags & NOT_NULL_FLAG)) === modified file 'storage/maria/ma_key_recover.h' --- a/storage/maria/ma_key_recover.h 2008-09-01 17:31:40 +0000 +++ b/storage/maria/ma_key_recover.h 2010-03-09 19:22:24 +0000 @@ -63,7 +63,6 @@ extern my_bool write_hook_for_undo_key_i extern my_bool write_hook_for_undo_key_delete(enum translog_record_type type, TRN *trn, MARIA_HA *tbl_info, LSN *lsn, void *hook_arg); -void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn); my_bool _ma_log_prefix(MARIA_PAGE *page, uint changed_length, int move_length); my_bool _ma_log_suffix(MARIA_PAGE *page, uint org_length, === modified file 'storage/maria/ma_page.c' --- a/storage/maria/ma_page.c 2009-05-06 12:03:24 +0000 +++ b/storage/maria/ma_page.c 2010-03-09 19:22:24 +0000 @@ -64,6 +64,15 @@ void _ma_page_setup(MARIA_PAGE *page, MA share->base.key_reflength : 0); } +#ifdef IDENTICAL_PAGES_AFTER_RECOVERY +void page_cleanup(MARIA_SHARE *share, MARIA_PAGE *page) +{ + uint length= page->size; + DBUG_ASSERT(length <= block_size - KEYPAGE_CHECKSUM_SIZE); + bzero(page->buff + length, share->block_size - length); +} +#endif + /** Fetch a key-page in memory @@ -102,8 +111,10 @@ my_bool _ma_fetch_keypage(MARIA_PAGE *pa if (lock != PAGECACHE_LOCK_LEFT_UNLOCKED) { - DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE); - page_link.unlock= PAGECACHE_LOCK_WRITE_UNLOCK; + DBUG_ASSERT(lock == PAGECACHE_LOCK_WRITE || PAGECACHE_LOCK_READ); + page_link.unlock= (lock == PAGECACHE_LOCK_WRITE ? + PAGECACHE_LOCK_WRITE_UNLOCK : + PAGECACHE_LOCK_READ_UNLOCK); page_link.changed= 0; push_dynamic(&info->pinned_pages, (void*) &page_link); page->link_offset= info->pinned_pages.elements-1; @@ -209,14 +220,7 @@ my_bool _ma_write_keypage(MARIA_PAGE *pa } #endif -#ifdef IDENTICAL_PAGES_AFTER_RECOVERY - { - uint length= page->size; - DBUG_ASSERT(length <= block_size - KEYPAGE_CHECKSUM_SIZE); - bzero(buff + length, block_size - length); - } -#endif - + page_cleanup(share, page); res= pagecache_write(share->pagecache, &share->kfile, (pgcache_page_no_t) (page->pos / block_size), === modified file 'storage/maria/ma_rkey.c' --- a/storage/maria/ma_rkey.c 2008-06-26 05:18:28 +0000 +++ b/storage/maria/ma_rkey.c 2010-03-09 19:22:24 +0000 @@ -82,6 +82,9 @@ int maria_rkey(MARIA_HA *info, uchar *bu rw_rdlock(&keyinfo->root_lock); nextflag= maria_read_vec[search_flag] | key.flag; + if (search_flag != HA_READ_KEY_EXACT || + ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME)) + nextflag|= SEARCH_SAVE_BUFF; switch (keyinfo->key_alg) { #ifdef HAVE_RTREE_KEYS === modified file 'storage/maria/ma_search.c' --- a/storage/maria/ma_search.c 2009-05-06 12:03:24 +0000 +++ b/storage/maria/ma_search.c 2010-03-09 19:22:24 +0000 @@ -18,6 +18,10 @@ #include "ma_fulltext.h" #include "m_ctype.h" +static int _ma_search_no_save(register MARIA_HA *info, MARIA_KEY *key, + uint32 nextflag, register my_off_t pos, + MARIA_PINNED_PAGE **res_page_link, + uchar **res_page_buff); static my_bool _ma_get_prev_key(MARIA_KEY *key, MARIA_PAGE *ma_page, uchar *keypos); @@ -57,7 +61,51 @@ int _ma_check_index(MARIA_HA *info, int */ int _ma_search(register MARIA_HA *info, MARIA_KEY *key, uint32 nextflag, - register my_off_t pos) + my_off_t pos) +{ + int error; + MARIA_PINNED_PAGE *page_link; + uchar *page_buff; + + info->page_changed= 1; /* If page not saved */ + if (!(error= _ma_search_no_save(info, key, nextflag, pos, &page_link, + &page_buff))) + { + if (nextflag & SEARCH_SAVE_BUFF) + { + bmove512(info->keyread_buff, page_buff, info->s->block_size); + + /* Save position for a possible read next / previous */ + info->int_keypos= info->keyread_buff + (ulonglong) info->int_keypos; + info->int_maxpos= info->keyread_buff + (ulonglong) info->int_maxpos; + info->int_keytree_version= key->keyinfo->version; + info->last_search_keypage= info->last_keypage; + info->page_changed= 0; + info->keyread_buff_used= 0; + } + } + _ma_unpin_all_pages(info, LSN_IMPOSSIBLE); + return (error); +} + +/** + @breif Search after row by a key + + ret_page_link Will contain pointer to page where we found key + + @note + Position to row is stored in info->lastpos + + @return + @retval 0 ok (key found) + @retval -1 Not found + @retval 1 If one should continue search on higher level +*/ + +static int _ma_search_no_save(register MARIA_HA *info, MARIA_KEY *key, + uint32 nextflag, register my_off_t pos, + MARIA_PINNED_PAGE **res_page_link, + uchar **res_page_buff) { my_bool last_key_not_used; int error,flag; @@ -66,6 +114,7 @@ int _ma_search(register MARIA_HA *info, uchar lastkey[MARIA_MAX_KEY_BUFF]; MARIA_KEYDEF *keyinfo= key->keyinfo; MARIA_PAGE page; + MARIA_PINNED_PAGE *page_link; DBUG_ENTER("_ma_search"); DBUG_PRINT("enter",("pos: %lu nextflag: %u lastpos: %lu", (ulong) pos, nextflag, (ulong) info->cur_row.lastpos)); @@ -81,10 +130,11 @@ int _ma_search(register MARIA_HA *info, } if (_ma_fetch_keypage(&page, info, keyinfo, pos, - PAGECACHE_LOCK_LEFT_UNLOCKED, - DFLT_INIT_HITS, info->keyread_buff, - test(!(nextflag & SEARCH_SAVE_BUFF)))) + PAGECACHE_LOCK_READ, DFLT_INIT_HITS, 0, 0)) goto err; + page_link= dynamic_element(&info->pinned_pages, + info->pinned_pages.elements-1, + MARIA_PINNED_PAGE*); DBUG_DUMP("page", page.buff, page.size); flag= (*keyinfo->bin_search)(key, &page, nextflag, &keypos, lastkey, @@ -98,8 +148,9 @@ int _ma_search(register MARIA_HA *info, if (flag) { - if ((error= _ma_search(info, key, nextflag, - _ma_kpos(nod_flag,keypos))) <= 0) + if ((error= _ma_search_no_save(info, key, nextflag, + _ma_kpos(nod_flag,keypos), + res_page_link, res_page_buff)) <= 0) DBUG_RETURN(error); if (flag >0) @@ -118,26 +169,15 @@ int _ma_search(register MARIA_HA *info, ((keyinfo->flag & (HA_NOSAME | HA_NULL_PART)) != HA_NOSAME || (key->flag & SEARCH_PART_KEY) || info->s->base.born_transactional)) { - if ((error= _ma_search(info, key, (nextflag | SEARCH_FIND) & - ~(SEARCH_BIGGER | SEARCH_SMALLER | SEARCH_LAST), - _ma_kpos(nod_flag,keypos))) >= 0 || + if ((error= _ma_search_no_save(info, key, (nextflag | SEARCH_FIND) & + ~(SEARCH_BIGGER | SEARCH_SMALLER | + SEARCH_LAST), + _ma_kpos(nod_flag,keypos), + res_page_link, res_page_buff)) >= 0 || my_errno != HA_ERR_KEY_NOT_FOUND) DBUG_RETURN(error); - info->last_keypage= HA_OFFSET_ERROR; /* Buffer not in mem */ } } - if (pos != info->last_keypage) - { - uchar *old_buff= page.buff; - if (_ma_fetch_keypage(&page, info, keyinfo, pos, - PAGECACHE_LOCK_LEFT_UNLOCKED,DFLT_INIT_HITS, - info->keyread_buff, - test(!(nextflag & SEARCH_SAVE_BUFF)))) - goto err; - /* Restore position if page buffer moved */ - keypos= page.buff + (keypos - old_buff); - maxpos= page.buff + (maxpos - old_buff); - } info->last_key.keyinfo= keyinfo; if ((nextflag & (SEARCH_SMALLER | SEARCH_LAST)) && flag != 0) @@ -172,16 +212,15 @@ int _ma_search(register MARIA_HA *info, } info->cur_row.lastpos= _ma_row_pos_from_key(&info->last_key); info->cur_row.trid= _ma_trid_from_key(&info->last_key); - /* Save position for a possible read next / previous */ - info->int_keypos= info->keyread_buff + (keypos - page.buff); - info->int_maxpos= info->keyread_buff + (maxpos - page.buff); - info->int_nod_flag=nod_flag; - info->int_keytree_version=keyinfo->version; - info->last_search_keypage=info->last_keypage; - info->page_changed=0; - /* Set marker that buffer was used (Marker for mi_search_next()) */ - info->keyread_buff_used= (info->keyread_buff != page.buff); + /* Store offset to key */ + info->int_keypos= (uchar*) (keypos - page.buff); + info->int_maxpos= (uchar*) (maxpos - page.buff); + info->int_nod_flag= nod_flag; + info->last_keypage= pos; + *res_page_link= page_link; + *res_page_buff= page.buff; + DBUG_PRINT("exit",("found key at %lu",(ulong) info->cur_row.lastpos)); DBUG_RETURN(0); @@ -190,7 +229,7 @@ err: info->cur_row.lastpos= HA_OFFSET_ERROR; info->page_changed=1; DBUG_RETURN (-1); -} /* _ma_search */ +} /* === modified file 'storage/maria/ma_write.c' --- a/storage/maria/ma_write.c 2009-02-19 09:01:25 +0000 +++ b/storage/maria/ma_write.c 2010-03-09 19:22:24 +0000 @@ -587,6 +587,12 @@ my_bool _ma_enlarge_root(MARIA_HA *info, /* Search after a position for a key and store it there + TODO: + Change this to use pagecache directly instead of creating a copy + of the page. To do this, we must however change write-key-on-page + algorithm to not overwrite the buffer but instead store any overflow + key in a separate buffer. + @return @retval -1 error @retval 0 ok === modified file 'storage/maria/maria_def.h' --- a/storage/maria/maria_def.h 2010-02-10 19:06:24 +0000 +++ b/storage/maria/maria_def.h 2010-03-09 19:22:24 +0000 @@ -979,6 +979,11 @@ extern ulonglong transid_get_packed(MARI #define page_store_info(share, page) \ _ma_store_keypage_flag((share), (page)->buff, (page)->flag); \ _ma_store_page_used((share), (page)->buff, (page)->size); +#ifdef IDENTICAL_PAGES_AFTER_RECOVERY +void page_cleanup(MARIA_SHARE *share, MARIA_PAGE *page) +#else +#define page_cleanup(A,B) while (0) +#endif extern MARIA_KEY *_ma_make_key(MARIA_HA *info, MARIA_KEY *int_key, uint keynr, uchar *key, const uchar *record, @@ -1197,7 +1202,7 @@ void _ma_tmp_disable_logging_for_table(M my_bool log_incomplete); my_bool _ma_reenable_logging_for_table(MARIA_HA *info, my_bool flush_pages); my_bool write_log_record_for_bulk_insert(MARIA_HA *info); - +void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn); #define MARIA_NO_CRC_NORMAL_PAGE 0xffffffff #define MARIA_NO_CRC_BITMAP_PAGE 0xfffffffe