Looks good to me, thanks Kristian!

As a bonus, I wanted to see if each of the fixes standalone would resolve the
underlying issue, and they do. That is, the test passes with any one (and just
that one) of the three separate fixes alone.

Brandon

On Tue, Jul 8, 2025 at 5:19 AM Kristian Nielsen <knielsen@knielsen-hq.org> wrote:
Hi Brandon,

Kristian Nielsen via commits <commits@lists.mariadb.org> writes:

> Ensure that a pending semi-sync transaction can only be signalled on its THD
> while it is waiting in commit_trx(); not if the wait in commit_trx() is
> skipped for some reason.

Can you review this patch for MDEV-36934 so it can get into the upcoming
release? You've already seen and agreed to the first version of the patch.

I've included your test case for the bug, thanks a lot for writing that! I
only modified it slightly, to increase the timeout for potentially slow
Buildbot hosts, and to avoid an unnecessary use of DBUG injection.

I also added your suggestions to (in addition to my original
defensive-coding patch to only signal the THD while it is marked as waiting)
also avoid adding a transaction to the list if it will never be ack'ed due
to no waiting slaves; and to unlink_thd_as_waiter() if we skip waiting for
ack in commit_trx().

 - Kristian.

>
> There was a bug that if no semi-sync slaves were connected and
> rpl_semi_sync_master_wait_no_slave was off, the THD pointer could be left in
> the list of pending transactions after THD was deleted, and an invalid
> THD/condition variable could be signalled, causing hang or other corruption.
>
> Testcase based on work by Brandon Nesterenko <brandon.nesterenko@mariadb.com>.
>
> Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
> ---
>  .../r/rpl_semi_sync_cond_var_per_thd.result   |  63 ++++++++++-
>  .../rpl/t/rpl_semi_sync_cond_var_per_thd.test | 104 +++++++++++++++++-
>  sql/semisync_master.cc                        |  50 +++++++--
>  sql/semisync_master.h                         |  10 +-
>  4 files changed, 210 insertions(+), 17 deletions(-)
>
> diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result
> index 18ad5d3d2cc..96e7aa43fbf 100644
> --- a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result
> +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result
> @@ -7,6 +7,8 @@ call mtr.add_suppression("Could not read packet");
>  call mtr.add_suppression("Could not write packet");
>  set @save_bgc_count= @@global.binlog_commit_wait_count;
>  set @save_bgc_usec= @@global.binlog_commit_wait_usec;
> +set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
> +set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
>  set @save_debug_dbug= @@global.debug_dbug;
>  set @@global.binlog_commit_wait_count=3;
>  set @@global.binlog_commit_wait_usec=10000000;
> @@ -46,8 +48,6 @@ drop table t1, t2, t3;
>  # the binlogging to semi-sync, and starting the wait for ACK; and during
>  # this pause, semi-sync is manually switched off and on.
>  connection master;
> -set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
> -set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
>  set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC;
>  set @@global.rpl_semi_sync_master_wait_no_slave= 1;
>  set @@global.debug_dbug="+d,semisync_log_skip_trx_wait";
> @@ -100,7 +100,66 @@ commit;
>  # Cleanup
>  connection master;
>  drop table tn;
> +set @@global.debug_dbug=@save_debug_dbug;
> +#
> +# MDEV-36934
> +# The server could indefinitely hang due to a memory leak which tried to
> +# pthread signal on a destroyed condition variable. In effect, no
> +# connections could commit transactions because there would be a thread
> +# stuck on a never-returning call to pthread_cond_signal() while
> +# holding Repl_semi_sync_master::LOCK_log.
> +connection master;
> +set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT;
> +set @@global.rpl_semi_sync_master_wait_no_slave= 0;
> +# Ensure servers are in proper state
> +connection master;
> +connection slave;
> +# Test case initial set-up
> +connection master;
> +create table t_36934 (a int) engine=innodb;
> +include/save_master_gtid.inc
> +connection slave;
> +include/sync_with_master_gtid.inc
> +# Pause the user transaction before inserting into Active_tranx
> +connect user_con,localhost,root,,;
> +SET debug_sync= 'semisync_at_write_tranx_in_binlog SIGNAL at_write_tranx_in_binlog WAIT_FOR resume_write_tranx_in_binlog';
> +insert into t_36934 values (1);
> +connection server_1;
> +set debug_sync="now wait_for at_write_tranx_in_binlog";
> +# Disconnect the slave (note that the binlog dump thread won't yet be
> +# notified of a binlog update from the last transaction, so the slave
> +# should neither receiver nor ACK the transaction).
> +connection slave;
> +include/stop_slave.inc
> +# Waiting for master to realize the slave has disconnected..
> +connection server_1;
> +# ..done
> +# Resuming transaction (it will exit commit_trx early without waiting)
> +set debug_sync="now signal resume_write_tranx_in_binlog";
> +connection user_con;
> +disconnect user_con;
> +# Force delete the user thread (FLUSH THREADS ensures the thread won't
> +# stay in the thread cache)
> +connection master;
> +FLUSH THREADS;
> +# BUG: Re-connect slave. MDEV-36934 reports that the master would hang
> +# when the slave would re-connect and try to ACK the last transaction
> +# who's thread has been deleted
> +connection slave;
> +include/start_slave.inc
> +# Try to commit another transaction (prior to MDEV-36934 fixes, this
> +# would hang indefinitely)
> +connection master;
> +set debug_sync="RESET";
> +insert into t_36934 values (2);
> +connection server_1;
> +# Waiting 30s for last query to complete..
> +connection master;
> +# ..done
> +# Cleanup
> +connection master;
>  set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point;
>  set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave;
>  set @@global.debug_dbug=@save_debug_dbug;
> +drop table t_36934;
>  include/rpl_end.inc
> diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test
> index 537a4bf7fa8..5d6702f8c42 100644
> --- a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test
> +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test
> @@ -34,6 +34,8 @@ call mtr.add_suppression("Could not read packet");
>  call mtr.add_suppression("Could not write packet");
>  set @save_bgc_count= @@global.binlog_commit_wait_count;
>  set @save_bgc_usec= @@global.binlog_commit_wait_usec;
> +set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
> +set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
>  set @save_debug_dbug= @@global.debug_dbug;
>  set @@global.binlog_commit_wait_count=3;
>  set @@global.binlog_commit_wait_usec=10000000;
> @@ -98,8 +100,6 @@ drop table t1, t2, t3;


>  --connection master
> -set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
> -set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
>  set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC;
>  set @@global.rpl_semi_sync_master_wait_no_slave= 1;
>  --eval set @@global.debug_dbug="+d,semisync_log_skip_trx_wait"
> @@ -190,8 +190,108 @@ commit;
>  --echo # Cleanup
>  --connection master
>  drop table tn;
> +set @@global.debug_dbug=@save_debug_dbug;
> +
> +
> +--echo #
> +--echo # MDEV-36934
> +--echo # The server could indefinitely hang due to a memory leak which tried to
> +--echo # pthread signal on a destroyed condition variable. In effect, no
> +--echo # connections could commit transactions because there would be a thread
> +--echo # stuck on a never-returning call to pthread_cond_signal() while
> +--echo # holding Repl_semi_sync_master::LOCK_log.
> +
> +--connection master
> +set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT;
> +set @@global.rpl_semi_sync_master_wait_no_slave= 0;
> +
> +--echo # Ensure servers are in proper state
> +--connection master
> +let $status_var= rpl_semi_sync_master_status;
> +let $status_var_value= ON;
> +source include/wait_for_status_var.inc;
> +--connection slave
> +let $status_var= rpl_semi_sync_slave_status;
> +let $status_var_value= ON;
> +source include/wait_for_status_var.inc;
> +
> +--echo # Test case initial set-up
> +--connection master
> +create table t_36934 (a int) engine=innodb;
> +--source include/save_master_gtid.inc
> +--connection slave
> +--source include/sync_with_master_gtid.inc
> +
> +--echo # Pause the user transaction before inserting into Active_tranx
> +--connect(user_con,localhost,root,,)
> +SET debug_sync= 'semisync_at_write_tranx_in_binlog SIGNAL at_write_tranx_in_binlog WAIT_FOR resume_write_tranx_in_binlog';
> +--send insert into t_36934 values (1)
> +
> +--connection server_1
> +set debug_sync="now wait_for at_write_tranx_in_binlog";
> +
> +--echo # Disconnect the slave (note that the binlog dump thread won't yet be
> +--echo # notified of a binlog update from the last transaction, so the slave
> +--echo # should neither receiver nor ACK the transaction).
> +--connection slave
> +--source include/stop_slave.inc
> +
> +--echo # Waiting for master to realize the slave has disconnected..
> +--connection server_1
> +let $status_var= rpl_semi_sync_master_clients;
> +let $status_var_value= 0;
> +source include/wait_for_status_var.inc;
> +--echo # ..done
> +
> +--echo # Resuming transaction (it will exit commit_trx early without waiting)
> +set debug_sync="now signal resume_write_tranx_in_binlog";
> +
> +--connection user_con
> +--reap
> +--let $user_con_tid= `SELECT connection_id()`
> +--disconnect user_con
> +--source include/wait_until_disconnected.inc
> +
> +--echo # Force delete the user thread (FLUSH THREADS ensures the thread won't
> +--echo # stay in the thread cache)
> +--connection master
> +FLUSH THREADS;
> +
> +--echo # BUG: Re-connect slave. MDEV-36934 reports that the master would hang
> +--echo # when the slave would re-connect and try to ACK the last transaction
> +--echo # who's thread has been deleted
> +--connection slave
> +--source include/start_slave.inc
> +
> +--echo # Try to commit another transaction (prior to MDEV-36934 fixes, this
> +--echo # would hang indefinitely)
> +--connection master
> +set debug_sync="RESET";
> +--send insert into t_36934 values (2)
> +
> +--connection server_1
> +--echo # Waiting 30s for last query to complete..
> +--let $wait_timeout= 30
> +--let $wait_condition= SELECT count(*)=0 FROM information_schema.processlist WHERE info LIKE 'insert into t_36934%';
> +--source include/wait_condition.inc
> +
> +# Variable `success` is set by wait_condition.inc
> +if (!$success)
> +{
> +  --echo # ..error
> +  --die Query is hung
> +}
> +
> +--connection master
> +--reap
> +--echo # ..done
> +
> +
> +--echo # Cleanup
> +--connection master
>  set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point;
>  set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave;
>  set @@global.debug_dbug=@save_debug_dbug;
> +drop table t_36934;

>  --source include/rpl_end.inc
> diff --git a/sql/semisync_master.cc b/sql/semisync_master.cc
> index 43afb8f996e..854b6a61eeb 100644
> --- a/sql/semisync_master.cc
> +++ b/sql/semisync_master.cc
> @@ -68,15 +68,20 @@ static ulonglong timespec_to_usec(const struct timespec *ts)
>    return (ulonglong) ts->tv_sec * TIME_MILLION + ts->tv_nsec / TIME_THOUSAND;
>  }

> -int signal_waiting_transaction(THD *waiting_thd, const char *binlog_file,
> -                                my_off_t binlog_pos)
> +static int
> +signal_waiting_transaction(THD *waiting_thd, bool thd_valid,
> +                           const char *binlog_file, my_off_t binlog_pos)
>  {
>    /*
>      It is possible that the connection thd waiting for an ACK was killed. In
>      such circumstance, the connection thread will nullify the thd member of its
>      Active_tranx node. So before we try to signal, ensure the THD exists.
> +
> +    The thd_valid is only set while the THD is waiting in commit_trx(); this
> +    is defensive coding to not signal an invalid THD if we somewhere
> +    accidentally did not remove the transaction from the list.
>    */
> -  if (waiting_thd)
> +  if (waiting_thd && thd_valid)
>      mysql_cond_signal(&waiting_thd->COND_wakeup_ready);
>    return 0;
>  }
> @@ -182,6 +187,7 @@ int Active_tranx::insert_tranx_node(THD *thd_to_wait,
>    ins_node->log_name[FN_REFLEN-1] = 0; /* make sure it ends properly */
>    ins_node->log_pos = log_file_pos;
>    ins_node->thd= thd_to_wait;
> +  ins_node->thd_valid= false;

>    if (!m_trx_front)
>    {
> @@ -263,7 +269,8 @@ void Active_tranx::clear_active_tranx_nodes(
>      if ((log_file_name != NULL) &&
>          compare(new_front, log_file_name, log_file_pos) > 0)
>        break;
> -    pre_delete_hook(new_front->thd, new_front->log_name, new_front->log_pos);
> +    pre_delete_hook(new_front->thd, new_front->thd_valid,
> +                    new_front->log_name, new_front->log_pos);
>      new_front = new_front->next;
>    }

> @@ -355,13 +362,17 @@ void Active_tranx::unlink_thd_as_waiter(const char *log_file_name,
>    }

>    if (entry)
> +  {
>      entry->thd= NULL;
> +    entry->thd_valid= false;
> +  }

>    DBUG_VOID_RETURN;
>  }

> -bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
> -                                 my_off_t log_file_pos)
> +Tranx_node *
> +Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
> +                            my_off_t log_file_pos)
>  {
>    DBUG_ENTER("Active_tranx::assert_thd_is_waiter");
>    mysql_mutex_assert_owner(m_lock);
> @@ -377,7 +388,7 @@ bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
>      entry = entry->hash_next;
>    }

> -  DBUG_RETURN(static_cast<bool>(entry));
> +  DBUG_RETURN(entry);
>  }

>  /*******************************************************************************
> @@ -863,6 +874,10 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,

>    if (!rpl_semi_sync_master_clients && !rpl_semi_sync_master_wait_no_slave)
>    {
> +    lock();
> +    m_active_tranxs->unlink_thd_as_waiter(trx_wait_binlog_name,
> +                                          trx_wait_binlog_pos);
> +    unlock();
>      rpl_semi_sync_master_no_transactions++;
>      DBUG_RETURN(0);
>    }
> @@ -922,6 +937,9 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
>          }
>        }

> +      Tranx_node *tranx_entry=
> +        m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name,
> +                                       trx_wait_binlog_pos);
>        /* In between the binlogging of this transaction and this wait, it is
>         * possible that our entry in Active_tranx was removed (i.e. if
>         * semi-sync was switched off and on). It is also possible that the
> @@ -932,8 +950,7 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
>         * rpl_semi_sync_master_yes/no_tx consistent with it, we check for a
>         * semi-sync restart _after_ checking the reply state.
>         */
> -      if (unlikely(!m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name,
> -                                                   trx_wait_binlog_pos)))
> +      if (unlikely(!tranx_entry))
>        {
>          DBUG_EXECUTE_IF(
>              "semisync_log_skip_trx_wait",
> @@ -952,6 +969,16 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
>          break;
>        }

> +      /*
> +        Mark that our THD is now valid for signalling to by the ack thread.
> +        It is important to ensure that we can never leave a no longer valid
> +        THD in the transaction list and signal it, eg. MDEV-36934. This way,
> +        we ensure the THD will only be signalled while this function is
> +        running, even in case of some incorrect error handling or similar
> +        that might leave a dangling THD in the list.
> +      */
> +      tranx_entry->thd_valid= true;
> +
>        /* Let us update the info about the minimum binlog position of waiting
>         * threads.
>         */
> @@ -1284,6 +1311,8 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd,

>    DBUG_ENTER("Repl_semi_sync_master::write_tranx_in_binlog");

> +  DEBUG_SYNC(current_thd, "semisync_at_write_tranx_in_binlog");
> +
>    lock();

>    /* This is the real check inside the mutex. */
> @@ -1317,7 +1346,8 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd,
>      m_commit_file_name_inited = true;
>    }

> -  if (is_on())
> +  if (is_on() &&
> +      (rpl_semi_sync_master_clients || rpl_semi_sync_master_wait_no_slave))
>    {
>      DBUG_ASSERT(m_active_tranxs != NULL);
>      if(m_active_tranxs->insert_tranx_node(thd, log_file_name, log_file_pos))
> diff --git a/sql/semisync_master.h b/sql/semisync_master.h
> index a1c57959165..28de3ecf480 100644
> --- a/sql/semisync_master.h
> +++ b/sql/semisync_master.h
> @@ -30,6 +30,7 @@ extern PSI_cond_key key_COND_binlog_send;

>  struct Tranx_node {
>    char              log_name[FN_REFLEN];
> +  bool              thd_valid;             /* thd is valid for signalling */
>    my_off_t          log_pos;
>    THD               *thd;                   /* The thread awaiting an ACK */
>    struct Tranx_node *next;            /* the next node in the sorted list */
> @@ -126,7 +127,9 @@ class Tranx_node_allocator

>      trx_node= &(current_block->nodes[++last_node]);
>      trx_node->log_name[0] = '\0';
> +    trx_node->thd_valid= false;
>      trx_node->log_pos= 0;
> +    trx_node->thd= nullptr;
>      trx_node->next= 0;
>      trx_node->hash_next= 0;
>      return trx_node;
> @@ -298,7 +301,8 @@ class Tranx_node_allocator
>    its invocation. See the context in which it is called to know.
>  */

> -typedef int (*active_tranx_action)(THD *trx_thd, const char *log_file_name,
> +typedef int (*active_tranx_action)(THD *trx_thd, bool thd_valid,
> +                                   const char *log_file_name,
>                                     my_off_t trx_log_file_pos);

>  /**
> @@ -381,8 +385,8 @@ class Active_tranx
>     * matches the thread of the respective Tranx_node::thd of the passed in
>     * log_file_name and log_file_pos.
>     */
> -  bool is_thd_waiter(THD *thd_to_check, const char *log_file_name,
> -                     my_off_t log_file_pos);
> +  Tranx_node * is_thd_waiter(THD *thd_to_check, const char *log_file_name,
> +                             my_off_t log_file_pos);

>    /* Given a position, check to see whether the position is an active
>     * transaction's ending position by probing the hash table.