New subject: [MariaDB commits] [PATCH] MDEV-36934: semi sync makes the master unresponsive when a replica is stopped

8 Jul 2025

Hi Brandon,

Kristian Nielsen via commits <commits@lists.mariadb.org> writes:
...
Ensure that a pending semi-sync transaction can only be signalled on its THD
while it is waiting in commit_trx(); not if the wait in commit_trx() is
skipped for some reason.
Can you review this patch for MDEV-36934 so it can get into the upcoming
release? You've already seen and agreed to the first version of the patch.

I've included your test case for the bug, thanks a lot for writing that! I
only modified it slightly, to increase the timeout for potentially slow
Buildbot hosts, and to avoid an unnecessary use of DBUG injection.

I also added your suggestions to (in addition to my original
defensive-coding patch to only signal the THD while it is marked as waiting)
also avoid adding a transaction to the list if it will never be ack'ed due
to no waiting slaves; and to unlink_thd_as_waiter() if we skip waiting for
ack in commit_trx().

 - Kristian.
...
There was a bug that if no semi-sync slaves were connected and
rpl_semi_sync_master_wait_no_slave was off, the THD pointer could be left in
the list of pending transactions after THD was deleted, and an invalid
THD/condition variable could be signalled, causing hang or other corruption.
Testcase based on work by Brandon Nesterenko <brandon.nesterenko@mariadb.com>.
Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
---
 .../r/rpl_semi_sync_cond_var_per_thd.result   |  63 ++++++++++-
 .../rpl/t/rpl_semi_sync_cond_var_per_thd.test | 104 +++++++++++++++++-
 sql/semisync_master.cc                        |  50 +++++++--
 sql/semisync_master.h                         |  10 +-
 4 files changed, 210 insertions(+), 17 deletions(-)

diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result
index 18ad5d3d2cc..96e7aa43fbf 100644
--- a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result
+++ b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result
@@ -7,6 +7,8 @@ call mtr.add_suppression("Could not read packet");
 call mtr.add_suppression("Could not write packet");
 set @save_bgc_count= @@global.binlog_commit_wait_count;
 set @save_bgc_usec= @@global.binlog_commit_wait_usec;
+set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
+set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
 set @save_debug_dbug= @@global.debug_dbug;
 set @@global.binlog_commit_wait_count=3;
 set @@global.binlog_commit_wait_usec=10000000;
@@ -46,8 +48,6 @@ drop table t1, t2, t3;
 # the binlogging to semi-sync, and starting the wait for ACK; and during
 # this pause, semi-sync is manually switched off and on.
 connection master;
-set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
-set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
 set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC;
 set @@global.rpl_semi_sync_master_wait_no_slave= 1;
 set @@global.debug_dbug="+d,semisync_log_skip_trx_wait";
@@ -100,7 +100,66 @@ commit;
 # Cleanup
 connection master;
 drop table tn;
+set @@global.debug_dbug=@save_debug_dbug;
+#
+# MDEV-36934
+# The server could indefinitely hang due to a memory leak which tried to
+# pthread signal on a destroyed condition variable. In effect, no
+# connections could commit transactions because there would be a thread
+# stuck on a never-returning call to pthread_cond_signal() while
+# holding Repl_semi_sync_master::LOCK_log.
+connection master;
+set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT;
+set @@global.rpl_semi_sync_master_wait_no_slave= 0;
+# Ensure servers are in proper state
+connection master;
+connection slave;
+# Test case initial set-up
+connection master;
+create table t_36934 (a int) engine=innodb;
+include/save_master_gtid.inc
+connection slave;
+include/sync_with_master_gtid.inc
+# Pause the user transaction before inserting into Active_tranx
+connect user_con,localhost,root,,;
+SET debug_sync= 'semisync_at_write_tranx_in_binlog SIGNAL at_write_tranx_in_binlog WAIT_FOR resume_write_tranx_in_binlog';
+insert into t_36934 values (1);
+connection server_1;
+set debug_sync="now wait_for at_write_tranx_in_binlog";
+# Disconnect the slave (note that the binlog dump thread won't yet be
+# notified of a binlog update from the last transaction, so the slave
+# should neither receiver nor ACK the transaction).
+connection slave;
+include/stop_slave.inc
+# Waiting for master to realize the slave has disconnected..
+connection server_1;
+# ..done
+# Resuming transaction (it will exit commit_trx early without waiting)
+set debug_sync="now signal resume_write_tranx_in_binlog";
+connection user_con;
+disconnect user_con;
+# Force delete the user thread (FLUSH THREADS ensures the thread won't
+# stay in the thread cache)
+connection master;
+FLUSH THREADS;
+# BUG: Re-connect slave. MDEV-36934 reports that the master would hang
+# when the slave would re-connect and try to ACK the last transaction
+# who's thread has been deleted
+connection slave;
+include/start_slave.inc
+# Try to commit another transaction (prior to MDEV-36934 fixes, this
+# would hang indefinitely)
+connection master;
+set debug_sync="RESET";
+insert into t_36934 values (2);
+connection server_1;
+# Waiting 30s for last query to complete..
+connection master;
+# ..done
+# Cleanup
+connection master;
 set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point;
 set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave;
 set @@global.debug_dbug=@save_debug_dbug;
+drop table t_36934;
 include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test
index 537a4bf7fa8..5d6702f8c42 100644
--- a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test
+++ b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test
@@ -34,6 +34,8 @@ call mtr.add_suppression("Could not read packet");
 call mtr.add_suppression("Could not write packet");
 set @save_bgc_count= @@global.binlog_commit_wait_count;
 set @save_bgc_usec= @@global.binlog_commit_wait_usec;
+set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
+set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
 set @save_debug_dbug= @@global.debug_dbug;
 set @@global.binlog_commit_wait_count=3;
 set @@global.binlog_commit_wait_usec=10000000;
@@ -98,8 +100,6 @@ drop table t1, t2, t3;
--connection master
-set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
-set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
 set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC;
 set @@global.rpl_semi_sync_master_wait_no_slave= 1;
 --eval set @@global.debug_dbug="+d,semisync_log_skip_trx_wait"
@@ -190,8 +190,108 @@ commit;
 --echo # Cleanup
 --connection master
 drop table tn;
+set @@global.debug_dbug=@save_debug_dbug;
+
+
+--echo #
+--echo # MDEV-36934
+--echo # The server could indefinitely hang due to a memory leak which tried to
+--echo # pthread signal on a destroyed condition variable. In effect, no
+--echo # connections could commit transactions because there would be a thread
+--echo # stuck on a never-returning call to pthread_cond_signal() while
+--echo # holding Repl_semi_sync_master::LOCK_log.
+
+--connection master
+set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT;
+set @@global.rpl_semi_sync_master_wait_no_slave= 0;
+
+--echo # Ensure servers are in proper state
+--connection master
+let $status_var= rpl_semi_sync_master_status;
+let $status_var_value= ON;
+source include/wait_for_status_var.inc;
+--connection slave
+let $status_var= rpl_semi_sync_slave_status;
+let $status_var_value= ON;
+source include/wait_for_status_var.inc;
+
+--echo # Test case initial set-up
+--connection master
+create table t_36934 (a int) engine=innodb;
+--source include/save_master_gtid.inc
+--connection slave
+--source include/sync_with_master_gtid.inc
+
+--echo # Pause the user transaction before inserting into Active_tranx
+--connect(user_con,localhost,root,,)
+SET debug_sync= 'semisync_at_write_tranx_in_binlog SIGNAL at_write_tranx_in_binlog WAIT_FOR resume_write_tranx_in_binlog';
+--send insert into t_36934 values (1)
+
+--connection server_1
+set debug_sync="now wait_for at_write_tranx_in_binlog";
+
+--echo # Disconnect the slave (note that the binlog dump thread won't yet be
+--echo # notified of a binlog update from the last transaction, so the slave
+--echo # should neither receiver nor ACK the transaction).
+--connection slave
+--source include/stop_slave.inc
+
+--echo # Waiting for master to realize the slave has disconnected..
+--connection server_1
+let $status_var= rpl_semi_sync_master_clients;
+let $status_var_value= 0;
+source include/wait_for_status_var.inc;
+--echo # ..done
+
+--echo # Resuming transaction (it will exit commit_trx early without waiting)
+set debug_sync="now signal resume_write_tranx_in_binlog";
+
+--connection user_con
+--reap
+--let $user_con_tid= `SELECT connection_id()`
+--disconnect user_con
+--source include/wait_until_disconnected.inc
+
+--echo # Force delete the user thread (FLUSH THREADS ensures the thread won't
+--echo # stay in the thread cache)
+--connection master
+FLUSH THREADS;
+
+--echo # BUG: Re-connect slave. MDEV-36934 reports that the master would hang
+--echo # when the slave would re-connect and try to ACK the last transaction
+--echo # who's thread has been deleted
+--connection slave
+--source include/start_slave.inc
+
+--echo # Try to commit another transaction (prior to MDEV-36934 fixes, this
+--echo # would hang indefinitely)
+--connection master
+set debug_sync="RESET";
+--send insert into t_36934 values (2)
+
+--connection server_1
+--echo # Waiting 30s for last query to complete..
+--let $wait_timeout= 30
+--let $wait_condition= SELECT count(*)=0 FROM information_schema.processlist WHERE info LIKE 'insert into t_36934%';
+--source include/wait_condition.inc
+
+# Variable `success` is set by wait_condition.inc
+if (!$success)
+{
+  --echo # ..error
+  --die Query is hung
+}
+
+--connection master
+--reap
+--echo # ..done
+
+
+--echo # Cleanup
+--connection master
 set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point;
 set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave;
 set @@global.debug_dbug=@save_debug_dbug;
+drop table t_36934;
--source include/rpl_end.inc
diff --git a/sql/semisync_master.cc b/sql/semisync_master.cc
index 43afb8f996e..854b6a61eeb 100644
--- a/sql/semisync_master.cc
+++ b/sql/semisync_master.cc
@@ -68,15 +68,20 @@ static ulonglong timespec_to_usec(const struct timespec *ts)
   return (ulonglong) ts->tv_sec * TIME_MILLION + ts->tv_nsec / TIME_THOUSAND;
 }
-int signal_waiting_transaction(THD *waiting_thd, const char *binlog_file,
-                                my_off_t binlog_pos)
+static int
+signal_waiting_transaction(THD *waiting_thd, bool thd_valid,
+                           const char *binlog_file, my_off_t binlog_pos)
 {
   /*
     It is possible that the connection thd waiting for an ACK was killed. In
     such circumstance, the connection thread will nullify the thd member of its
     Active_tranx node. So before we try to signal, ensure the THD exists.
+
+    The thd_valid is only set while the THD is waiting in commit_trx(); this
+    is defensive coding to not signal an invalid THD if we somewhere
+    accidentally did not remove the transaction from the list.
   */
-  if (waiting_thd)
+  if (waiting_thd && thd_valid)
     mysql_cond_signal(&waiting_thd->COND_wakeup_ready);
   return 0;
 }
@@ -182,6 +187,7 @@ int Active_tranx::insert_tranx_node(THD *thd_to_wait,
   ins_node->log_name[FN_REFLEN-1] = 0; /* make sure it ends properly */
   ins_node->log_pos = log_file_pos;
   ins_node->thd= thd_to_wait;
+  ins_node->thd_valid= false;
if (!m_trx_front)
   {
@@ -263,7 +269,8 @@ void Active_tranx::clear_active_tranx_nodes(
     if ((log_file_name != NULL) &&
         compare(new_front, log_file_name, log_file_pos) > 0)
       break;
-    pre_delete_hook(new_front->thd, new_front->log_name, new_front->log_pos);
+    pre_delete_hook(new_front->thd, new_front->thd_valid,
+                    new_front->log_name, new_front->log_pos);
     new_front = new_front->next;
   }
@@ -355,13 +362,17 @@ void Active_tranx::unlink_thd_as_waiter(const char *log_file_name,
   }
if (entry)
+  {
     entry->thd= NULL;
+    entry->thd_valid= false;
+  }
DBUG_VOID_RETURN;
 }
-bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
-                                 my_off_t log_file_pos)
+Tranx_node *
+Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
+                            my_off_t log_file_pos)
 {
   DBUG_ENTER("Active_tranx::assert_thd_is_waiter");
   mysql_mutex_assert_owner(m_lock);
@@ -377,7 +388,7 @@ bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
     entry = entry->hash_next;
   }
-  DBUG_RETURN(static_cast<bool>(entry));
+  DBUG_RETURN(entry);
 }
/*******************************************************************************
@@ -863,6 +874,10 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
if (!rpl_semi_sync_master_clients && !rpl_semi_sync_master_wait_no_slave)
   {
+    lock();
+    m_active_tranxs->unlink_thd_as_waiter(trx_wait_binlog_name,
+                                          trx_wait_binlog_pos);
+    unlock();
     rpl_semi_sync_master_no_transactions++;
     DBUG_RETURN(0);
   }
@@ -922,6 +937,9 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
         }
       }
+      Tranx_node *tranx_entry=
+        m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name,
+                                       trx_wait_binlog_pos);
       /* In between the binlogging of this transaction and this wait, it is
        * possible that our entry in Active_tranx was removed (i.e. if
        * semi-sync was switched off and on). It is also possible that the
@@ -932,8 +950,7 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
        * rpl_semi_sync_master_yes/no_tx consistent with it, we check for a
        * semi-sync restart _after_ checking the reply state.
        */
-      if (unlikely(!m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name,
-                                                   trx_wait_binlog_pos)))
+      if (unlikely(!tranx_entry))
       {
         DBUG_EXECUTE_IF(
             "semisync_log_skip_trx_wait",
@@ -952,6 +969,16 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
         break;
       }
+      /*
+        Mark that our THD is now valid for signalling to by the ack thread.
+        It is important to ensure that we can never leave a no longer valid
+        THD in the transaction list and signal it, eg. MDEV-36934. This way,
+        we ensure the THD will only be signalled while this function is
+        running, even in case of some incorrect error handling or similar
+        that might leave a dangling THD in the list.
+      */
+      tranx_entry->thd_valid= true;
+
       /* Let us update the info about the minimum binlog position of waiting
        * threads.
        */
@@ -1284,6 +1311,8 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd,
DBUG_ENTER("Repl_semi_sync_master::write_tranx_in_binlog");
+  DEBUG_SYNC(current_thd, "semisync_at_write_tranx_in_binlog");
+
   lock();
/* This is the real check inside the mutex. */
@@ -1317,7 +1346,8 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd,
     m_commit_file_name_inited = true;
   }
-  if (is_on())
+  if (is_on() &&
+      (rpl_semi_sync_master_clients || rpl_semi_sync_master_wait_no_slave))
   {
     DBUG_ASSERT(m_active_tranxs != NULL);
     if(m_active_tranxs->insert_tranx_node(thd, log_file_name, log_file_pos))
diff --git a/sql/semisync_master.h b/sql/semisync_master.h
index a1c57959165..28de3ecf480 100644
--- a/sql/semisync_master.h
+++ b/sql/semisync_master.h
@@ -30,6 +30,7 @@ extern PSI_cond_key key_COND_binlog_send;
struct Tranx_node {
   char              log_name[FN_REFLEN];
+  bool              thd_valid;             /* thd is valid for signalling */
   my_off_t          log_pos;
   THD               *thd;                   /* The thread awaiting an ACK */
   struct Tranx_node *next;            /* the next node in the sorted list */
@@ -126,7 +127,9 @@ class Tranx_node_allocator
trx_node= &(current_block->nodes[++last_node]);
     trx_node->log_name[0] = '\0';
+    trx_node->thd_valid= false;
     trx_node->log_pos= 0;
+    trx_node->thd= nullptr;
     trx_node->next= 0;
     trx_node->hash_next= 0;
     return trx_node;
@@ -298,7 +301,8 @@ class Tranx_node_allocator
   its invocation. See the context in which it is called to know.
 */
-typedef int (*active_tranx_action)(THD *trx_thd, const char *log_file_name,
+typedef int (*active_tranx_action)(THD *trx_thd, bool thd_valid,
+                                   const char *log_file_name,
                                    my_off_t trx_log_file_pos);
/**
@@ -381,8 +385,8 @@ class Active_tranx
    * matches the thread of the respective Tranx_node::thd of the passed in
    * log_file_name and log_file_pos.
    */
-  bool is_thd_waiter(THD *thd_to_check, const char *log_file_name,
-                     my_off_t log_file_pos);
+  Tranx_node * is_thd_waiter(THD *thd_to_check, const char *log_file_name,
+                             my_off_t log_file_pos);
/* Given a position, check to see whether the position is an active
    * transaction's ending position by probing the hash table.

    

Re: [MariaDB commits] [PATCH] MDEV-36934: semi sync makes the master unresponsive when a replica is stopped

Kristian Nielsen

Brandon Nesterenko

Kristian Nielsen

tags

participants (2)