Hello Sergei!

See my answers below.
I have made a few fixes not regarding to the review as well, please
see commits 7a41f82...7296fa0, branch bb-10.10-ddl-nikita [github]
 
> diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc
> index 422d496d5c3..ca17c5c07a8 100644
> --- a/sql/log_event_server.cc
> +++ b/sql/log_event_server.cc
> @@ -7949,14 +7952,35 @@ uint8 Write_rows_log_event::get_trg_event_map()
>  **************************************************************************/

>  #if defined(HAVE_REPLICATION)
> -/*
> -  Compares table->record[0] and table->record[1]
> +/**
> +  @brief Compares table->record[0] and table->record[1]
> +
> +  @param masrter_columns  a number of columns on the source replica,
> +                          0 if ONLINE ALTER TABLE

> -  Returns TRUE if different.
> +  @returns true if different.
>  */
> -static bool record_compare(TABLE *table)
> +static bool record_compare(TABLE *table, uint master_columns)
>  {
> -  bool result= FALSE;
> +  bool result= false;
> +
> +  /*
> +    Determine if the optimized check is possible (and we can
> +    goto record_compare_exit).
> +    We should ensure that all extra columns (i.e. fieldnr > master_columns)
> +    have explicit values. If not, we will exclude them from comparison,
> +    as they can contain non-deterministic values.
> +
> +    master_columns == 0 case is optimization for ONLINE ALTER to check
> +    all columns fast.
> +   */
> +  bool all_values_set= master_columns == 0
> +                       && bitmap_is_set_all(&table->has_value_set);
> +  for (uint i = master_columns; all_values_set && i < table->s->fields; i++)
> +  {
> +    all_values_set= bitmap_is_set(&table->has_value_set, i);
> +  }

it'd be safer and faster to do

 all_values_set= bitmap_is_set_all(&table->has_value_set);

 
Depends on what is master_columns value, relative to total number of columns. 
But agree about safety. On the level of this function, we'd probably better not know
about extra columns and check each one.

and, please, add replication (not online alter) tests:
1. slave has more columns
I don't know if you know about rpl_alter_extra_persistent.test, it has such tests there
2. binlog row format is "minimal", this might trigger it too
Nice idea, but I guess we'll not see the effect: having PK should end up in choosing it for
search, and since we only have (or test) engines that 
support HA_PRIMARY_KEY_REQUIRED_FOR_POSITION, other fields will be defined
after ha_rnd_pos() call, and even record_compare() will not be called. I'll add the test,
however, it may be good for future.

 
> +
>    /**
>      Compare full record only if:
>      - there are no blob fields (otherwise we would also need
> @@ -7969,47 +7993,68 @@ static bool record_compare(TABLE *table)
>      */
>    if ((table->s->blob_fields +
>         table->s->varchar_fields +
> -       table->s->null_fields) == 0)
> +       table->s->null_fields) == 0
> +      && all_values_set)
>    {
>      result= cmp_record(table,record[1]);
>      goto record_compare_exit;
>    }

>    /* Compare null bits */
> -  if (memcmp(table->null_flags,
> +  if (all_values_set && memcmp(table->null_flags,
>                                 table->null_flags+table->s->rec_buff_length,
>                                 table->s->null_bytes))
>    {
> -    result= TRUE;                            // Diff in NULL value
> +    result= true;                            // Diff in NULL value
>      goto record_compare_exit;
>    }

>    /* Compare fields */
>    for (Field **ptr=table->field ; *ptr ; ptr++)
>    {
> +    Field *f= *ptr;
>      if (table->versioned() && (*ptr)->vers_sys_field())
>      {
>        continue;
>      }
> -    /**
> -      We only compare field contents that are not null.
> -      NULL fields (i.e., their null bits) were compared
> -      earlier.
> +    /*
> +      We only compare fields that exist on the source (or in ONLINE
> +      ALTER case, that were in the original table). For reference,
> +      replica tables can also contain extra fields.
>       */
> -    if (!(*(ptr))->is_null())
> -    {
> -      if ((*ptr)->cmp_binary_offset(table->s->rec_buff_length))
> +    if (f->field_index > master_columns && !f->has_explicit_value())
> +      continue;

why do you check for f->field_index > master_columns?
isn't !f->has_explicit_value() the one that decided?
in what case will be ok to compare values when
f->field_index < master_columns but no explicit value was set?

 
My wrong, I admit it. Better to check all the fields!
 
> @@ -8030,6 +8075,18 @@ int Rows_log_event::find_key()
>    DBUG_ENTER("Rows_log_event::find_key");
>    DBUG_ASSERT(m_table);

> +  #ifdef DBUG_TRACE
> +  auto _ = make_scope_exit([this]() {
> +    DBUG_EXECUTE_IF("rpl_report_chosen_key",
> +                    push_warning_printf(m_table->in_use,
> +                                        Sql_condition::WARN_LEVEL_NOTE,
> +                                        ER_UNKNOWN_ERROR, "Key chosen: %d",
> +                                        m_key_nr == MAX_KEY ?
> +                                        -1 : m_key_nr););
> +  });
> +  #endif
> +
> +  m_key_nr= MAX_KEY;

it would've been simpler and shorter to do it the old style, with a goto

 
The disadvantage is that you can't declare the variables between a goto and a label, so I
prefer to avoid it, if it doesn't affect critical performance (like in storage engine)

>    best_key_nr= MAX_KEY;

>    /*
> @@ -8041,6 +8098,8 @@ int Rows_log_event::find_key()
>    {
>      if (!m_table->s->keys_in_use.is_set(i))
>        continue;
> +    if (!key_suits_event(key))
> +      continue;

1. again, add a replication test, please. when, e.g., slave has more
columns and a unique index over new columns.

 
2. going over all keyparts for every event is rather redundant.
you can calculate the set of usable keys once and put it, for example,
in m_table->keys_in_use_for_query.
 
Did you mind commits 94378dc "MDEV-29069 follow-up: allow deterministic DEFAULTs" 
and 52f489e "MDEV-29069 follow-up: support partially suitable keys"?

I think with regard to then it becomes not so redundant, at least there'll be much less keys completely
unsuitable.Maybe i could make bitmap of keys that don't need any keypart check. What do you think?


>      /*
>        We cannot use a unique key with NULL-able columns to uniquely identify
>        a row (but we can still select it for range scan below if nothing better
> @@ -8221,7 +8280,8 @@ int Rows_log_event::find_row(rpl_group_info *rgi)
>    DBUG_DUMP("record[0]", table->record[0], table->s->reclength);

>    if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION) &&
> -      table->s->primary_key < MAX_KEY)
> +      table->s->primary_key < MAX_KEY &&
> +      m_key_nr == table->s->primary_key)

add a replication test for that: slave has an extra column
which is a PK, and HA_PRIMARY_KEY_REQUIRED_FOR_POSITION is set (InnoDB)

I've translated all the online alter table tests into extra column tests.
 
>    {
>      /*
>        Use a more efficient method to fetch the record given by
> @@ -8494,12 +8554,18 @@ Delete_rows_log_event::do_before_row_operations(const Slave_reporting_capability
>    if (get_flags(STMT_END_F))
>      status_var_increment(thd->status_var.com_stat[SQLCOM_DELETE]);

> +  const uchar *curr_row_end= m_curr_row_end;
> +  unpack_row(rgi, m_table, m_width, m_curr_row, &m_cols,
> +             &curr_row_end, &m_master_reclength, m_rows_end);
> +
>    if ((m_table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION) &&
> -      m_table->s->primary_key < MAX_KEY)
> +      m_table->s->primary_key < MAX_KEY
> +      && key_suits_event(&m_table->key_info[m_table->s->primary_key]))
>    {
>      /*
>        We don't need to allocate any memory for m_key since it is not used.
>      */
> +    m_key_nr= m_table->s->primary_key;
>      return 0;
>    }

This is very strange if().
1. it's only for delete, not for update
 
agree 

2. it's before invoking triggers (bug)
 
Why? I see no dependency on triggers here 

3. it's wrong if the master table has a different primary key (bug)
 
key_suits_event() prevents it. We can't allow a different primary key with auto_increment, 
but we can allow the one with deterministic default, or from existing column. Then
minimal row format will work

I'd suggest to remove it completely. An optimization of avoiding
a malloc can be done inside find_key() after the best key is found.

>    if (do_invoke_trigger())

Regards,
Sergei
VP of MariaDB Server Engineering
and security@mariadb.org


--
Yours truly,
Nikita Malyavin