Hi, Igor! Sorry for a delay :( Please, find my review attached. On Sep 27, Igor Babaev wrote:
=== modified file 'mysql-test/r/alter_table.result' --- a/mysql-test/r/alter_table.result 2012-05-21 13:30:25 +0000 +++ b/mysql-test/r/alter_table.result 2012-09-27 21:29:26 +0000 @@ -1340,3 +1340,203 @@ execute stmt1; deallocate prepare stmt1; drop table t2; +# +# mdev-539: fast build of unique/primary indexes for MyISAM +#
I'd suggest to have these test cases in a separate file, say, alter_table_539.test But it's just a siggestion, do as you prefer.
+DROP DATABASE IF EXISTS dbt3_s001; +CREATE DATABASE dbt3_s001; +use dbt3_s001; +drop index `primary` on customer; +show create table customer; +Table Create Table +customer CREATE TABLE `customer` ( + `c_custkey` int(11) NOT NULL, + `c_name` varchar(25) DEFAULT NULL, + `c_address` varchar(40) DEFAULT NULL, + `c_nationkey` int(11) DEFAULT NULL, + `c_phone` char(15) DEFAULT NULL, + `c_acctbal` double DEFAULT NULL, + `c_mktsegment` char(10) DEFAULT NULL, + `c_comment` varchar(117) DEFAULT NULL, + KEY `i_c_nationkey` (`c_nationkey`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +alter table customer add primary key (c_custkey); +show create table customer; +Table Create Table +customer CREATE TABLE `customer` ( + `c_custkey` int(11) NOT NULL, + `c_name` varchar(25) DEFAULT NULL, + `c_address` varchar(40) DEFAULT NULL, + `c_nationkey` int(11) DEFAULT NULL, + `c_phone` char(15) DEFAULT NULL, + `c_acctbal` double DEFAULT NULL, + `c_mktsegment` char(10) DEFAULT NULL, + `c_comment` varchar(117) DEFAULT NULL, + PRIMARY KEY (`c_custkey`), + KEY `i_c_nationkey` (`c_nationkey`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +drop index `primary` on customer; +insert into customer values +(3,'Customer#00000000303','MG9kdTD2WBHm',1,'11-719-748-3364',7498.12,'AUTOMOBILE','special packages wake. slyly reg'); +alter table customer add primary key (c_custkey);
please add a test case for alter ignore table
+ERROR 23000: Duplicate entry '3' for key 'PRIMARY' +show create table customer; +Table Create Table +customer CREATE TABLE `customer` ( ... === modified file 'sql/ha_partition.cc' --- a/sql/ha_partition.cc 2012-08-31 21:54:54 +0000 +++ b/sql/ha_partition.cc 2012-09-27 21:29:26 +0000 @@ -6862,6 +6862,8 @@
flags_to_return= ht->alter_table_flags(flags); flags_to_return|= m_file[0]->alter_table_flags(flags); + flags_to_return&= + ~(HA_FAST_ADD_INDEX | HA_FAST_ADD_UNIQUE_INDEX | HA_FAST_ADD_PK_INDEX);
You don't need these flags - see below
/* If one partition fails we must be able to revert the change for the other,
=== modified file 'sql/sql_table.cc' --- a/sql/sql_table.cc 2012-09-02 02:41:38 +0000 +++ b/sql/sql_table.cc 2012-09-27 21:29:26 +0000 @@ -6743,6 +6745,11 @@ my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0)); goto err_new_table_cleanup; }); + + if (!ignore && + test(alter_flags & (HA_FAST_ADD_UNIQUE_INDEX | HA_FAST_ADD_PK_INDEX))) + new_table->file->extra(HA_EXTRA_CREATE_UNIQUE_INDEX_BY_SORT);
What's the point? extra() functions were always *optional* in the storage engine API, any storage engine is allowed to ignore any HA_EXTRA_* requests, or all of them. It means, you don't need to introduce HA_FAST_whatever flags, and don't need to check them. Just write if (!ignore) new_table->file->extra(HA_EXTRA_CREATE_UNIQUE_INDEX_BY_SORT); and let the engine ignore it, if it wishes to do so. as a small bonus, you won't need to move alter_flags variable around, it can stay where it was, inside the if() block.
+ error= copy_data_between_tables(thd, table, new_table, alter_info->create_list, ignore, order_num, order, &copied, &deleted,
=== modified file 'storage/maria/ha_maria.cc' --- a/storage/maria/ha_maria.cc 2012-08-14 16:59:28 +0000 +++ b/storage/maria/ha_maria.cc 2012-09-27 21:29:26 +0000 @@ -1633,6 +1633,9 @@
please, use the recipe from https://kb.askmonty.org/en/how-to-get-more-out-of-bzr-when-working-on-mariad... it's much more difficult to review patches without sufficient context (that is, without function names on every chunk)
param->testflag|= T_REP_BY_SORT; error= maria_repair_by_sort(param, file, fixed_name, test(param->testflag & T_QUICK)); + if (error && share->state.dupp_key != MARIA_MAX_KEY) + print_keydup_error(share->state.dupp_key, + ER(ER_DUP_ENTRY_WITH_KEY_NAME)); } } else
=== modified file 'storage/myisam/ha_myisam.cc' --- a/storage/myisam/ha_myisam.cc 2012-07-18 18:40:15 +0000 +++ b/storage/myisam/ha_myisam.cc 2012-09-27 21:29:26 +0000 @@ -1130,6 +1130,9 @@ thd_proc_info(thd, "Repair by sorting"); error = mi_repair_by_sort(¶m, file, fixed_name, test(param.testflag & T_QUICK)); + if (error && share->state.dupp_key != MAX_KEY) + print_keydup_error(share->state.dupp_key, + ER(ER_DUP_ENTRY_WITH_KEY_NAME));
I don't understand it. print_keydup_error() takes the value to print from table->record[[0]. But - in the case of mi_repair_by_sort - how does the value end up here? What code puts the conflicting key value into table->record[0] ?
} } else
=== modified file 'storage/myisam/mi_check.c' --- a/storage/myisam/mi_check.c 2012-04-07 13:58:46 +0000 +++ b/storage/myisam/mi_check.c 2012-09-27 21:29:26 +0000 @@ -2408,12 +2411,22 @@ (my_bool) (!(param->testflag & T_VERBOSE)), param->sort_buffer_length)) { + + if (sort_param.sort_info->dupp) + { + share->state.dupp_key= sort_param.key; + keydup_error= 1; + } + if (!(keydup_error && param->suppress_keydup_error_handling))
1. please use param->testflag for flags, instead of a new special field 2. I don't see why are you doing it here, normally the error is issued below, as "Duplicate key for record at %10s against...." I'd expect to see any new code that deals with duplicate key errors at the same place where the old code is
+ { param->retry_repair= 1; if (! param->error_printed) mi_check_print_error(param, "Couldn't fix table with create_index_by_sort(). Error: %d", my_errno); + } goto err; } + /* No need to calculate checksum again. */ sort_param.calc_checksum= 0; free_root(&sort_param.wordroot, MYF(0)); @@ -2533,18 +2546,27 @@ } if (got_error) { + if (!(keydup_error && param->suppress_keydup_error_handling)) + { if (! param->error_printed) mi_check_print_error(param,"%d when fixing table",my_errno);
You could simply set param->error_printed when you get dplicate key error - it'll avoid all further errors, you won't need to wrap them all in if()'s
+ } if (new_file >= 0) { (void) mysql_file_close(new_file, MYF(0)); (void) mysql_file_delete(mi_key_file_datatmp, param->temp_filename, MYF(MY_WME)); + if (!(keydup_error && param->suppress_keydup_error_handling)) + {
Why do you need this if() ?
if (info->dfile == new_file) /* Retry with key cache */ if (unlikely(mi_open_datafile(info, share, name, -1))) param->retry_repair= 0; /* Safety */ + } } + if (!(keydup_error && param->suppress_keydup_error_handling)) + { mi_mark_crashed_on_repair(info); + }
Why do you need to avoid mi_mark_crashed_on_repair() ?
if (killed_ptr(param)) param->retry_repair= 0; /* No use to retry repair */ } @@ -4682,8 +4708,9 @@ (!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES)); for (i=0 ; i < share->base.keys ; i++,key++) { - if (!(key->flag & (HA_NOSAME | HA_SPATIAL | HA_AUTO_KEY)) && - ! mi_too_big_key_for_sort(key,rows) && info->s->base.auto_key != i+1) + if (!(key->flag & (HA_SPATIAL | HA_AUTO_KEY)) && + ! mi_too_big_key_for_sort(key,rows) && info->s->base.auto_key != i+1 && + (!(key->flag & HA_NOSAME) || info->create_unique_index_by_sort))
Now, this directly contradicts the function name and the function comment. Could you please rename the function and update the comment? And instead of checking info->create_unique_index_by_sort I'd rather pass it down as an argument: void mi_disable_indexes(MI_INFO *info, ha_rows rows, my_bool only_non_unique) {
{ mi_clear_key_active(share->state.key_map, i); info->update|= HA_STATE_CHANGED;
=== modified file 'storage/myisam/mi_extra.c' --- a/storage/myisam/mi_extra.c 2012-07-31 17:29:07 +0000 +++ b/storage/myisam/mi_extra.c 2012-09-27 21:29:26 +0000 @@ -365,6 +365,9 @@ case HA_EXTRA_CHANGE_KEY_TO_DUP: mi_extra_keyflag(info, function); break; + case HA_EXTRA_CREATE_UNIQUE_INDEX_BY_SORT: + info->create_unique_index_by_sort= 1; + break;
Two comments here: 1. I don't like the name "create unique index by sort", because ALTER TABLE itself does not create any indexes by sort or anything it simply tells the engine to optimize a bulk insertion. and it's myisam that does repair-by-sort. I mean, a better name would be a high-level name that does not imply myisam-specific details. something like HA_EXTRA_BULK_INSERT_DUPLICATES_UNLIKELY which tells the engine to optimize the following bulk insert for the case when unique key conflicts are unlikely to happen. (in myisam case it means to build unique indexes by sort, right) 2. I've discussed this with Monty and we both didn't like using extra() here, even to preserve the API compatibility. Monty thinks there's no need to bother, and we can change start_bulk_insert() any way we want. But to make it better for third-party engines, I'd suggest something like class handler { ... virtual void start_bulk_insert(ha_rows rows) {} virtual void start_bulk_insert(ha_rows rows, ulonglong flags) { start_bulk_insert(rows); } ... } that is, you introduce the new function, and keep the old one as a fallback. This way it stays compatible, and old engines won't need any changes. But myisam/maria will implement the new start_bulk_insert variant, with two arguments, and will recognize the flag HA_BULK_INSERT_DUPLICATES_UNLIKELY And no extra() is needed.
case HA_EXTRA_MMAP: #ifdef HAVE_MMAP mysql_mutex_lock(&share->intern_lock);
Regards, Sergei