Hi, Nikita,
This review applies to the combined diff e2f8dff^..52f489e
On Oct 04, Nikita Malyavin wrote:
> diff --git a/sql/log_event.h b/sql/log_event.h
> index 91a9eeee10f..8452e665c98 100644
> --- a/sql/log_event.h
> +++ b/sql/log_event.h
> @@ -5275,8 +5278,11 @@ class Rows_log_event : public Log_event
> uchar *m_key; /* Buffer to keep key value during searches */
> KEY *m_key_info; /* Pointer to KEY info for m_key_nr */
> uint m_key_nr; /* Key number */
> + uint m_key_parts_suit; /* A number of key_parts suited to lookup */
that's very confusing name. Took me a while to understand what you meant.
Better call it m_usable_key_parts
> bool master_had_triggers; /* set after tables opening */
>
> + uint key_parts_suit_event(const KEY *key) const;
and this could be "get_usable_key_parts()"
> +
> int find_key(); // Find a best key to use in find_row()
> int find_row(rpl_group_info *);
> int write_row(rpl_group_info *, const bool);
> diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc
> index 422d496d5c3..25705d13641 100644
> --- a/sql/log_event_server.cc
> +++ b/sql/log_event_server.cc
> @@ -6062,7 +6065,7 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi)
> */
> sql_mode_t saved_sql_mode= thd->variables.sql_mode;
> if (!is_auto_inc_in_extra_columns())
> - thd->variables.sql_mode= (rpl_data.is_online_alter() ? saved_sql_mode :0)
> + thd->variables.sql_mode= (m_online_alter ? saved_sql_mode :0)
wouldn't it be more correct to use saved_sql_mode also in normal replication?
> | MODE_NO_AUTO_VALUE_ON_ZERO;
>
> // row processing loop
> @@ -7949,14 +7952,35 @@ uint8 Write_rows_log_event::get_trg_event_map()
> **************************************************************************/
>
> #if defined(HAVE_REPLICATION)
> -/*
> - Compares table->record[0] and table->record[1]
> +/**
> + @brief Compares table->record[0] and table->record[1]
> +
> + @param masrter_columns a number of columns on the source replica,
> + 0 if ONLINE ALTER TABLE
>
> - Returns TRUE if different.
> + @returns true if different.
> */
> -static bool record_compare(TABLE *table)
> +static bool record_compare(TABLE *table, uint master_columns)
> {
> - bool result= FALSE;
> + bool result= false;
> +
> + /*
> + Determine if the optimized check is possible (and we can
> + goto record_compare_exit).
> + We should ensure that all extra columns (i.e. fieldnr > master_columns)
> + have explicit values. If not, we will exclude them from comparison,
> + as they can contain non-deterministic values.
> +
> + master_columns == 0 case is optimization for ONLINE ALTER to check
> + all columns fast.
> + */
> + bool all_values_set= master_columns == 0
> + && bitmap_is_set_all(&table->has_value_set);
> + for (uint i = master_columns; all_values_set && i < table->s->fields; i++)
> + {
> + all_values_set= bitmap_is_set(&table->has_value_set, i);
> + }
wait, this doesn't make any sense. You only do the loop if
all_values_set is true.
But it is true only if bitmap_is_set_all(&table->has_value_set).
There is no point in doing bitmap_is_set() for individual bits,
when you already know that all bits are set :)
shouldn't it simply be
bool all_values_set= bitmap_is_set_all(&table->has_value_set);
?
but see below, perhaps you don't need all_values_set at all
> +
> /**
> Compare full record only if:
> - there are no blob fields (otherwise we would also need
> @@ -7969,40 +7993,43 @@ static bool record_compare(TABLE *table)
> */
> if ((table->s->blob_fields +
> table->s->varchar_fields +
> - table->s->null_fields) == 0)
> + table->s->null_fields) == 0
> + && all_values_set)
> {
> result= cmp_record(table,record[1]);
> goto record_compare_exit;
> }
>
> /* Compare null bits */
> - if (memcmp(table->null_flags,
> - table->null_flags+table->s->rec_buff_length,
> - table->s->null_bytes))
> + if (all_values_set && memcmp(table->null_flags,
> + table->null_flags+table->s->rec_buff_length,
> + table->s->null_bytes))
> {
> - result= TRUE; // Diff in NULL value
> + result= true; // Diff in NULL value
> goto record_compare_exit;
> }
>
> /* Compare fields */
> for (Field **ptr=table->field ; *ptr ; ptr++)
> {
> + Field *f= *ptr;
> if (table->versioned() && (*ptr)->vers_sys_field())
> {
> continue;
> }
> - /**
> - We only compare field contents that are not null.
> - NULL fields (i.e., their null bits) were compared
> - earlier.
> - */
> - if (!(*(ptr))->is_null())
> + /*
> + We only compare fields that exist on the source (or in ONLINE
> + ALTER case, that were in the original table). For reference,
> + replica tables can also contain extra fields.
> + */
> + if (f->field_index > master_columns && !f->has_explicit_value())
> + continue;
same question as in a previous review. Do you mean that when
f->field_index < master_columns, it's ok to compare values below
even when !f->has_explicit_value() ?
> +
> + if (f->is_null() != f->is_null(table->s->rec_buff_length)
> + || f->cmp_binary_offset(table->s->rec_buff_length))
may be it's better to create a fast copy of this function, like
if (bitmap_is_set_all(&table->has_value_set))
{
... old record_compare() code starting from
if ((table->s->blob_fields +
table->s->varchar_fields +
table->s->null_fields) == 0)
}
else
{
... your code, only the loop over fields.
}
in this case you won't even need all_values_set or master_columns
> {
> - if ((*ptr)->cmp_binary_offset(table->s->rec_buff_length))
> - {
> - result= TRUE;
> - goto record_compare_exit;
> - }
> + result= true;
> + goto record_compare_exit;
> }
> }
>
> @@ -8010,6 +8037,32 @@ static bool record_compare(TABLE *table)
> return result;
> }
>
> +/**
> + Newly added fields with non-deterministic defaults (i.e. DEFAULT(RANDOM()),
> + CURRENT_TIMESTAMP, AUTO_INCREMENT) should be excluded from key search.
> + Basically we exclude all the default-filled fields based on
> + has_explicit_value bitmap.
> + */
> +uint Rows_log_event::key_parts_suit_event(const KEY *key) const
> +{
> + uint master_columns= m_online_alter ? 0 : m_cols.n_bits;
get_master_cols()
> + uint p= 0;
> + for (;p < key->ext_key_parts; p++)
> + {
> + Field *f= key->key_part[p].field;
> + uint non_deterministic_default= f->default_value &&
> + f->default_value->flags | VCOL_NOT_STRICTLY_DETERMINISTIC;
> +
> + int next_number_field= f->table->next_number_field ?
> + f->table->next_number_field->field_index : -1;
eh?
1. why it's *in the loop* ?
2. why not simply
|| f == f->table->next_number_field
or, to be extra safe
|| f->table->field[f->field_index] == f->table->next_number_field
> +
> + if (f->field_index >= master_columns
I don't think this condition ^^^ is needed. Same as elsewhere,
if f->field_index < master_columns but !bitmap_is_set,
does it mean the keyseg is ok? I don't think so.
> + && !bitmap_is_set(&m_table->has_value_set, f->field_index)
> + && (non_deterministic_default || next_number_field == f->field_index))
> + break;
> + }
> + return p;
> +}
>
> /**
> Find the best key to use when locating the row in @c find_row().
> @@ -8024,13 +8077,26 @@ static bool record_compare(TABLE *table)
> */
> int Rows_log_event::find_key()
> {
> - uint i, best_key_nr, last_part;
> + uint i, best_key_nr;
> KEY *key, *UNINIT_VAR(best_key);
> ulong UNINIT_VAR(best_rec_per_key), tmp;
> DBUG_ENTER("Rows_log_event::find_key");
> DBUG_ASSERT(m_table);
>
> + #ifdef DBUG_TRACE
#ifndef DBUG_OFF
> + auto _ = make_scope_exit([this]() {
> + DBUG_EXECUTE_IF("rpl_report_chosen_key",
> + push_warning_printf(m_table->in_use,
> + Sql_condition::WARN_LEVEL_NOTE,
> + ER_UNKNOWN_ERROR, "Key chosen: %d",
> + m_key_nr == MAX_KEY ?
> + -1 : m_key_nr););
> + });
> + #endif
> +
> + m_key_nr= MAX_KEY;
> best_key_nr= MAX_KEY;
> + uint parts_suit= 0;
>
> /*
> Keys are sorted so that any primary key is first, followed by unique keys,
> @@ -8041,33 +8107,38 @@ int Rows_log_event::find_key()
> {
> if (!m_table->s->keys_in_use.is_set(i))
> continue;
> + parts_suit= key_parts_suit_event(key);
> + if (!parts_suit)
> + continue;
> /*
> We cannot use a unique key with NULL-able columns to uniquely identify
> a row (but we can still select it for range scan below if nothing better
> is available).
> */
> - if ((key->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
> + if ((key->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME &&
> + parts_suit == key->user_defined_key_parts)
this doesn't make use of ext key parts. Should be something like
if (parts_suit == key->ext_key_parts ||
(parts_suit >= key->user_defined_key_parts && (key->flags & ....)))
> {
> best_key_nr= i;
> best_key= key;
> + m_key_parts_suit= parts_suit;
> break;
> }
> /*
> We can only use a non-unique key if it allows range scans (ie. skip
> FULLTEXT indexes and such).
> */
> - last_part= key->user_defined_key_parts - 1;
> DBUG_PRINT("info", ("Index %s rec_per_key[%u]= %lu",
> - key->name.str, last_part, key->rec_per_key[last_part]));
> - if (!(m_table->file->index_flags(i, last_part, 1) & HA_READ_NEXT))
> + key->name.str, parts_suit, key->rec_per_key[parts_suit]));
> + if (!(m_table->file->index_flags(i, parts_suit, 1) & HA_READ_NEXT))
> continue;
>
> - tmp= key->rec_per_key[last_part];
> + tmp= key->rec_per_key[parts_suit - 1];
> if (best_key_nr == MAX_KEY || (tmp > 0 && tmp < best_rec_per_key))
> {
> best_key_nr= i;
> best_key= key;
> best_rec_per_key= tmp;
> + m_key_parts_suit= parts_suit;
> }
> }
>
> @@ -8494,12 +8568,20 @@ Delete_rows_log_event::do_before_row_operations(const Slave_reporting_capability
> if (get_flags(STMT_END_F))
> status_var_increment(thd->status_var.com_stat[SQLCOM_DELETE]);
>
> + const uchar *curr_row_end= m_curr_row_end;
> + unpack_row(rgi, m_table, m_width, m_curr_row, &m_cols,
> + &curr_row_end, &m_master_reclength, m_rows_end);
> +
> + KEY *pk_info= m_table->key_info + m_table->s->primary_key;
> if ((m_table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION) &&
> - m_table->s->primary_key < MAX_KEY)
> + m_table->s->primary_key < MAX_KEY &&
> + key_parts_suit_event(pk_info) == pk_info->ext_key_parts)
> {
> /*
> We don't need to allocate any memory for m_key since it is not used.
> */
> + m_key_nr= m_table->s->primary_key;
> + m_key_parts_suit= pk_info->ext_key_parts;
> return 0;
> }
> if (do_invoke_trigger())
already commented in the previous review
that if "We don't need to allocate any memory" than you `return 0` early
wthout doing do_invoke_trigger().
Regards,
Sergei
VP of MariaDB Server Engineering
and security(a)mariadb.org