Hi, Nikita! Despite what the subject says, it's the review for eea71e8b05a..7a5d3316805 That is everything in bb-10.5-MDEV-16978-without-overlaps minus the last commit (that came too late and I'll review it separately) In general it looks pretty good, just few minor comments below: On Mar 10, Nikita Malyavin wrote:
revision-id: eea71e8b05a (mariadb-10.5.0-246-geea71e8b05a) parent(s): 6618fc29749 author: Nikita Malyavin <nikitamalyavin@gmail.com> committer: Nikita Malyavin <nikitamalyavin@gmail.com> timestamp: 2020-02-21 02:38:57 +1000 message:
MDEV-16978 Application-time periods: WITHOUT OVERLAPS
* The overlaps check is implemented on a handler level per row command. It creates a separate cursor (actually, another handler instance) and caches it inside the original handler, when ha_update_row or ha_insert_row is issued. Cursor closes on unlocking the handler.
* Containing the same key in index means unique constraint violation even in usual terms. So we fetch left and right neighbours and check that they have same key prefix, excluding from the key only the period part. If it doesnt match, then there's no such neighbour, and the check passes. Otherwise, we check if this neighbour intersects with the considered key.
* The check does introduce new error and fails with ER_DUPP_KEY error.
"does not introduce new error" you mean?
This might break REPLACE workflow and should be fixed separately
diff --git a/sql/field.h b/sql/field.h index 4a8eec35b05..e187ffeb331 100644 --- a/sql/field.h +++ b/sql/field.h @@ -1444,8 +1444,9 @@ class Field: public Value_source if (null_ptr) null_ptr=ADD_TO_PTR(null_ptr,ptr_diff,uchar*); } - virtual void get_image(uchar *buff, uint length, CHARSET_INFO *cs) - { memcpy(buff,ptr,length); } + virtual void get_image(uchar *buff, uint length, + const uchar *ptr_arg, CHARSET_INFO *cs) const + { memcpy(buff,ptr_arg,length); }
please, add a convenience method. void get_image(uchar *buff, uint length, CHARSET_INFO *cs) { get_image(buff, length, ptr, cs); } and the same below, where you add ptr_arg
virtual void set_image(const uchar *buff,uint length, CHARSET_INFO *cs) { memcpy(ptr,buff,length); }
@@ -4056,7 +4066,8 @@ class Field_varstring :public Field_longstr { using Field_str::store; double val_real() override; longlong val_int() override; - String *val_str(String *, String *) override; + String *val_str(String *, String *) final; + virtual String *val_str(String*,String *, const uchar*) const;
This means that for the sake of indexes WITHOUT OVERLAPS (that very few people will use) and compressed blobs (that are used even less) you've added a new virtual call to Field_varstring::val_str (that is used awfully a lot) Try to avoid it, please
my_decimal *val_decimal(my_decimal *) override; int cmp_max(const uchar *, const uchar *, uint max_length) const override; int cmp(const uchar *a,const uchar *b) const override diff --git a/sql/field.cc b/sql/field.cc index 1ce49b0bdfa..82df2784057 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -9638,11 +9645,12 @@ int Field_bit::cmp_offset(my_ptrdiff_t row_offset) }
-uint Field_bit::get_key_image(uchar *buff, uint length, imagetype type_arg) +uint Field_bit::get_key_image(uchar *buff, uint length, const uchar *ptr_arg, imagetype type_arg) const { if (bit_len) { - uchar bits= get_rec_bits(bit_ptr, bit_ofs, bit_len); + auto *bit_ptr_for_arg= ptr_arg + (bit_ptr - ptr);
please, don't use auto in trivial cases like this one. it might be easier to type, but then the reviewer and whoever will in the future will edit this code will have to do type derivation in the head.
+ uchar bits= get_rec_bits(bit_ptr_for_arg, bit_ofs, bit_len); *buff++= bits; length--; } diff --git a/sql/item_buff.cc b/sql/item_buff.cc index 81949bcdae0..514ac740697 100644 --- a/sql/item_buff.cc +++ b/sql/item_buff.cc @@ -195,7 +195,7 @@ bool Cached_item_field::cmp(void) becasue of value change), then copy the new value to buffer. */ if (! null_value && (tmp || (tmp= (field->cmp(buff) != 0)))) - field->get_image(buff,length,field->charset()); + field->get_image(buff,length,field->ptr,field->charset());
not needed if you add convenience methods
return tmp; }
diff --git a/sql/sql_class.h b/sql/sql_class.h index 13b2659789d..2f1b1431dc0 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -357,13 +357,15 @@ class Key :public Sql_alloc, public DDL_options { engine_option_value *option_list; bool generated; bool invisible; + bool without_overlaps; + Lex_ident period;
strictly speaking, you don't need without_overlaps property, if the period name is set it *has to be* without overlaps. but ok, whatever you prefer
Key(enum Keytype type_par, const LEX_CSTRING *name_arg, ha_key_alg algorithm_arg, bool generated_arg, DDL_options_st ddl_options) :DDL_options(ddl_options), type(type_par), key_create_info(default_key_create_info), name(*name_arg), option_list(NULL), generated(generated_arg), - invisible(false) + invisible(false), without_overlaps(false) { key_create_info.algorithm= algorithm_arg; } diff --git a/sql/table.h b/sql/table.h index 6ce92ee048e..09f03690a8c 100644 --- a/sql/table.h +++ b/sql/table.h @@ -1635,13 +1635,12 @@ struct TABLE int insert_portion_of_time(THD *thd, const vers_select_conds_t &period_conds, ha_rows *rows_inserted); bool vers_check_update(List<Item> &items); - + static int check_period_overlaps(const KEY &lhs_key, const KEY &rhs_key, + const uchar *lhs, const uchar *rhs);
why did you make it a static method of TABLE?
int delete_row(); void vers_update_fields(); void vers_update_end(); void find_constraint_correlated_indexes(); - void clone_handler_for_update(); - void delete_update_handler();
/** Number of additional fields used in versioned tables */ #define VERSIONING_FIELDS 2 diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 240f001f7de..74d28ede25f 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -3959,6 +3959,28 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info, DBUG_RETURN(TRUE); }
+ switch (key->type) { + case Key::UNIQUE: + if (!key->period) + break; + /* Fall through: + WITHOUT OVERLAPS forces fields to be NOT NULL + */
Why is that?
+ case Key::PRIMARY: + /* Implicitly set primary key fields to NOT NULL for ISO conf. */ + if (!(sql_field->flags & NOT_NULL_FLAG)) + { + /* Implicitly set primary key fields to NOT NULL for ISO conf. */
duplicated comment
+ sql_field->flags|= NOT_NULL_FLAG; + sql_field->pack_flag&= ~FIELDFLAG_MAYBE_NULL; + null_fields--; + } + break; + default: + // Fall through + break; + } + cols2.rewind(); switch(key->type) {
@@ -4536,15 +4556,13 @@ bool Column_definition::sp_prepare_create_field(THD *thd, MEM_ROOT *mem_root) }
-static bool vers_prepare_keys(THD *thd, HA_CREATE_INFO *create_info, - Alter_info *alter_info, KEY **key_info, uint key_count) +static bool append_system_key_parts(THD *thd, HA_CREATE_INFO *create_info, + Alter_info *alter_info, KEY **key_info, + uint key_count) { - DBUG_ASSERT(create_info->versioned()); - - const char *row_start_field= create_info->vers_info.as_row.start; - DBUG_ASSERT(row_start_field); - const char *row_end_field= create_info->vers_info.as_row.end; - DBUG_ASSERT(row_end_field); + const auto &row_start_field= create_info->vers_info.as_row.start; + const auto &row_end_field= create_info->vers_info.as_row.end;
please, don't use auto in trivial cases like this one. it might be easier to type, but then the reviewer and whoever will in the future will edit this code will have to do type derivation in the head.
+ DBUG_ASSERT(!create_info->versioned() || (row_start_field && row_end_field));
List_iterator<Key> key_it(alter_info->key_list); Key *key= NULL; @@ -4553,25 +4571,61 @@ static bool vers_prepare_keys(THD *thd, HA_CREATE_INFO *create_info, if (key->type != Key::PRIMARY && key->type != Key::UNIQUE) continue;
+ if (create_info->versioned()) + { Key_part_spec *key_part=NULL; List_iterator<Key_part_spec> part_it(key->columns); while ((key_part=part_it++)) { - if (!my_strcasecmp(system_charset_info, - row_start_field, - key_part->field_name.str) || - - !my_strcasecmp(system_charset_info, - row_end_field, - key_part->field_name.str)) + if (row_start_field.streq(key_part->field_name) || + row_end_field.streq(key_part->field_name)) break; } - if (key_part) - continue; // Key already contains Sys_start or Sys_end + if (!key_part) + key->columns.push_back(new Key_part_spec(&row_end_field, 0)); + } + }
- Key_part_spec *key_part_sys_end_col= - new (thd->mem_root) Key_part_spec(&create_info->vers_info.as_row.end, 0); - key->columns.push_back(key_part_sys_end_col); + key_it.rewind(); + while ((key=key_it++))
please skip this loop if there's no PERIOD and skip the loop above if there's no system versioning.
+ { + if (key->without_overlaps) + { + if (key->type != Key::PRIMARY && key->type != Key::UNIQUE) + { + my_error(ER_PERIOD_WITHOUT_OVERLAPS_NON_UNIQUE, MYF(0), key->period.str); + return true;
I think this can even be a syntax error in the parser. there's no need to postpone it till here, is there?
+ } + if (!create_info->period_info.is_set() + || !key->period.streq(create_info->period_info.name)) + { + my_error(ER_PERIOD_NOT_FOUND, MYF(0), key->period.str); + return true; + } + if (thd->work_part_info) + { + // Unfortunately partitions do not support searching upper/lower bounds + // (i.e. ha_index_read_map with KEY_OR_PREV, KEY_OR_NEXT) + my_error(ER_FEATURE_NOT_SUPPORTED_WITH_PARTITIONING, MYF(0), + "WITHOUT OVERLAPS"); + return true; + } + const auto &period_start= create_info->period_info.period.start; + const auto &period_end= create_info->period_info.period.end; + List_iterator<Key_part_spec> part_it(key->columns); + while (Key_part_spec *key_part= part_it++) + { + if (period_start.streq(key_part->field_name) + || period_end.streq(key_part->field_name)) + { + my_error(ER_KEY_CONTAINS_PERIOD_FIELDS, MYF(0), key->name.str, + key_part->field_name); + return true; + } + } + key->columns.push_back(new Key_part_spec(&period_end, 0)); + key->columns.push_back(new Key_part_spec(&period_start, 0)); + } }
return false; diff --git a/sql/handler.cc b/sql/handler.cc index 7d61252eea6..917386f4392 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -4173,6 +4173,11 @@ uint handler::get_dup_key(int error) if (table->s->long_unique_table && table->file->errkey < table->s->keys) DBUG_RETURN(table->file->errkey); table->file->errkey = (uint) -1; + if (overlaps_error_key != -1) + { + table->file->errkey= (uint)overlaps_error_key; + DBUG_RETURN(table->file->errkey); + }
Why do you need overlaps_error_key? it looks like you can store the conflicting key number directly in errkey and it's somewhat confusing that this method uses table->file->errkey instead of just errkey. It's totally not clear why it does that. Looks like some historical thing from 2000.
if (error == HA_ERR_FOUND_DUPP_KEY || error == HA_ERR_FOREIGN_DUPLICATE_KEY || error == HA_ERR_FOUND_DUPP_UNIQUE || error == HA_ERR_NULL_IN_SPATIAL || @@ -6563,10 +6576,12 @@ static int check_duplicate_long_entry_key(TABLE *table, handler *h, unique constraint on long columns. @returns 0 if no duplicate else returns error */ -static int check_duplicate_long_entries(TABLE *table, handler *h, - const uchar *new_rec) +int handler::check_duplicate_long_entries(const uchar *new_rec) { - table->file->errkey= -1; + if (this->inited == RND) + create_lookup_handler();
1. and if inited==INDEX ? 2. why 'this->' ? 3. generally it's not a good idea to check inited==RND and lookup_handler==NULL inside a loop per row, because these values cannot change in the middle of a scan. Better to check them once, when inited is being set to RND.
+ handler *h= lookup_handler ? lookup_handler : table->file;
why table->file and not this?
+ errkey= -1; int result; for (uint i= 0; i < table->s->keys; i++) { @@ -6935,6 +6956,142 @@ void handler::set_lock_type(enum thr_lock_type lock) table->reginfo.lock_type= lock; }
+/** + @brief clone of current handler. + Creates a clone of handler used for unique hash key and WITHOUT OVERLAPS. + @return error code +*/ +int handler::create_lookup_handler() +{ + if (lookup_handler) + return 0; + lookup_handler= clone(table_share->normalized_path.str, + table->in_use->mem_root); + int error= lookup_handler->ha_external_lock(table->in_use, F_RDLCK); + return error; +} + +int handler::ha_check_overlaps(const uchar *old_data, const uchar* new_data) +{ + DBUG_ASSERT(new_data); + if (!table_share->period.unique_keys) + return 0; + if (table->versioned() && !table->vers_end_field()->is_max()) + return 0; + + bool is_update= old_data != NULL; + if (!check_overlaps_buffer) + check_overlaps_buffer= (uchar*)alloc_root(&table_share->mem_root, + table_share->max_unique_length + + table_share->reclength);
check_overlaps_buffer is per handler. It should be allocated in the TABLE::mem_root, not TABLE_SHARE::mem_root Also, it should probably be called lookup_buffer and check_duplicate_long_entry_key() should use it too.
+ auto *record_buffer= check_overlaps_buffer + table_share->max_unique_length; + auto *handler= this; + // handler->inited can be NONE on INSERT + if (handler->inited != NONE) + { + create_lookup_handler(); + handler= lookup_handler; + + // Needs to compare record refs later is old_row_found() + if (is_update) + position(old_data); + } + + // Save and later restore this handler's keyread + int old_this_keyread= this->keyread;
Again, why do you save/restore this->keyread? If you're using lookup_handler below, then this->keyread doesn't matter. And if handler==this below, then this->inited==NONE, which implies no keyread. Either way, handler->keyread_enabled() should always be false here. What about DBUG_ASSERT(!handler->keyread_enabled()) ?
+ DBUG_ASSERT(this->ha_end_keyread() == 0);
please fix all cases where you used a function with side effects inside an assert. not just the one I've commented about
+ + int error= 0; + + for (uint key_nr= 0; key_nr < table_share->keys && !error; key_nr++) + { + const KEY &key_info= table->key_info[key_nr]; + const uint key_parts= key_info.user_defined_key_parts; + if (!key_info.without_overlaps) + continue; + + if (is_update) + { + bool key_used= false; + for (uint k= 0; k < key_parts && !key_used; k++) + key_used= bitmap_is_set(table->write_set, + key_info.key_part[k].fieldnr - 1); + if (!key_used) + continue; + } + + error= handler->ha_index_init(key_nr, 0); + if (error) + return error;
we should try to minimize number of index_init/index_end, it doesn't look like a cheap operation, at least in InnoDB. but in a separate commit, because it should cover long uniques too.
+ + error= handler->ha_start_keyread(key_nr); + DBUG_ASSERT(!error); + + const uint period_field_length= key_info.key_part[key_parts - 1].length; + const uint key_base_length= key_info.key_length - 2 * period_field_length; + + key_copy(check_overlaps_buffer, new_data, &key_info, 0); + + /* Copy period_start to period_end. + the value in period_start field is not significant, but anyway let's leave + it defined to avoid uninitialized memory access + */ + memcpy(check_overlaps_buffer + key_base_length, + check_overlaps_buffer + key_base_length + period_field_length, + period_field_length); + + /* Find row with period_end > (period_start of new_data) */ + error = handler->ha_index_read_map(record_buffer, + check_overlaps_buffer, + key_part_map((1 << (key_parts - 1)) - 1), + HA_READ_AFTER_KEY); + + if (!error && is_update) + { + /* In case of update it could happen that the nearest neighbour is + a record we are updating. It means, that there are no overlaps + from this side. + + An assumption is made that during update we always have the last + fetched row in old_data. Therefore, comparing ref's is enough + */ + DBUG_ASSERT(handler != this); + DBUG_ASSERT(inited != NONE); + DBUG_ASSERT(ref_length == handler->ref_length); + + handler->position(record_buffer); + if (memcmp(ref, handler->ref, ref_length) == 0) + error= handler->ha_index_next(record_buffer); + } + + if (!error && table->check_period_overlaps(key_info, key_info, + new_data, record_buffer) == 0) + error= HA_ERR_FOUND_DUPP_KEY; + + if (error == HA_ERR_KEY_NOT_FOUND || error == HA_ERR_END_OF_FILE) + error= 0; + + if (error == HA_ERR_FOUND_DUPP_KEY) + overlaps_error_key= key_nr; + + int end_error= handler->ha_end_keyread(); + DBUG_ASSERT(!end_error); + + end_error= handler->ha_index_end(); + if (!error && end_error) + error= end_error; + } + + // Restore keyread of this handler, if it was enabled + if (old_this_keyread < MAX_KEY) + { + error= this->ha_start_keyread(old_this_keyread); + DBUG_ASSERT(error == 0); + } + + return error; +} + #ifdef WITH_WSREP /** @details diff --git a/sql/table.cc b/sql/table.cc index 718efa5767c..65fc44458f4 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -1499,6 +1500,14 @@ static size_t extra2_read_len(const uchar **extra2, const uchar *extra2_end) return length; }
+static +bool fill_unique_extra2(const uchar *extra2, size_t len, LEX_CUSTRING *section)
thanks, good point. a bit confusing name, I thought it's something about UNIQUE particularly as this is what the whole patch is about :) What about read_extra2_section_once() or something like that? Or get_ or consume_ or store_ ?
+{ + if (section->str) + return true; + *section= {extra2, len}; + return false; +}
static bool read_extra2(const uchar *frm_image, size_t len, extra2_fields *fields) @@ -1725,11 +1725,26 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, keyinfo= &first_keyinfo; thd->mem_root= &share->mem_root;
+ auto err= [thd, share, &handler_file, &se_plugin, old_root](){ + share->db_plugin= NULL; + share->error= OPEN_FRM_CORRUPTED; + share->open_errno= my_errno; + delete handler_file; + plugin_unlock(0, se_plugin); + my_hash_free(&share->name_hash); + + if (!thd->is_error()) + open_table_error(share, OPEN_FRM_CORRUPTED, share->open_errno); + + thd->mem_root= old_root; + return HA_ERR_NOT_A_TABLE; + }; +
Okay. I kind of see your point, but let's postpone this refactoring until after 10.5.2. I still need some time to think about it. You'll still be able to push it later.
if (write && write_frm_image(frm_image, frm_length)) - goto err; + DBUG_RETURN(err());
if (frm_length < FRM_HEADER_SIZE + FRM_FORMINFO_SIZE) - goto err; + DBUG_RETURN(err());
share->frm_version= frm_image[2]; /* @@ -8603,6 +8626,21 @@ void TABLE::evaluate_update_default_function() DBUG_VOID_RETURN; }
+/** + Compare two keys with periods + @return -1, lhs precedes rhs + 0, lhs overlaps rhs + 1, lhs succeeds rhs + */ +int TABLE::check_period_overlaps(const KEY &lhs_key, const KEY &rhs_key, + const uchar *lhs, const uchar *rhs) +{ + int cmp_res= key_period_compare_bases(lhs_key, rhs_key, lhs, rhs);
you only use key_period_compare_bases here. I just wouldn't create a separate small function for that, but rather inlined it here.
+ if (cmp_res) + return cmp_res; + + return key_period_compare_periods(lhs_key, rhs_key, lhs, rhs);
same for key_period_compare_periods
+}
void TABLE::vers_update_fields() { diff --git a/sql/key.cc b/sql/key.cc index 9dbb7a15726..49e97faea22 100644 --- a/sql/key.cc +++ b/sql/key.cc @@ -896,3 +897,51 @@ bool key_buf_cmp(KEY *key_info, uint used_key_parts, } return FALSE; } + + +/** + Compare base parts (not including the period) of keys with period + @return -1, lhs less than rhs + 0, lhs equals rhs + 1, lhs more than rhs + */ +int key_period_compare_bases(const KEY &lhs_key, const KEY &rhs_key, + const uchar *lhs, const uchar *rhs) +{ + uint base_part_nr= lhs_key.user_defined_key_parts - 2;
may be, give this '2' a name, like 'period_key_parts' ? and use them everywhere, of course, not just here
+ int cmp_res= 0; + for (uint part_nr= 0; !cmp_res && part_nr < base_part_nr; part_nr++) + { + Field *f= lhs_key.key_part[part_nr].field; + cmp_res= f->cmp(f->ptr_in_record(lhs), + rhs_key.key_part[part_nr].field->ptr_in_record(rhs));
This doesn't seem to be handling NULLs (see the next review)
+ } + + return cmp_res; +} + +/** + Compare periods of two keys + @return -1, lhs preceeds rhs + 0, lhs overlaps rhs + 1, lhs succeeds rhs + */ +int key_period_compare_periods(const KEY &lhs_key, const KEY &rhs_key, + const uchar *lhs, const uchar *rhs) +{ + uint period_start= lhs_key.user_defined_key_parts - 1; + uint period_end= lhs_key.user_defined_key_parts - 2; + + const auto *f= lhs_key.key_part[period_start].field; + const uchar *l[]= {lhs_key.key_part[period_start].field->ptr_in_record(lhs), + rhs_key.key_part[period_start].field->ptr_in_record(rhs)}; + + const uchar *r[]= {lhs_key.key_part[period_end].field->ptr_in_record(lhs), + rhs_key.key_part[period_end].field->ptr_in_record(rhs)};
I'd still prefer names like 'ls', 'le', 'rs', 're'. Now I need to look up and keep in mind that 'r[0] is left key end period' etc
+ + if (f->cmp(r[0], l[1]) <= 0) + return -1; + if (f->cmp(l[0], r[1]) >= 0) + return 1; + return 0; +}
Regards, Sergei VP of MariaDB Server Engineering and security@mariadb.org