
Hi Brandon, Kristian Nielsen via commits <commits@lists.mariadb.org> writes:
Ensure that a pending semi-sync transaction can only be signalled on its THD while it is waiting in commit_trx(); not if the wait in commit_trx() is skipped for some reason.
Can you review this patch for MDEV-36934 so it can get into the upcoming release? You've already seen and agreed to the first version of the patch. I've included your test case for the bug, thanks a lot for writing that! I only modified it slightly, to increase the timeout for potentially slow Buildbot hosts, and to avoid an unnecessary use of DBUG injection. I also added your suggestions to (in addition to my original defensive-coding patch to only signal the THD while it is marked as waiting) also avoid adding a transaction to the list if it will never be ack'ed due to no waiting slaves; and to unlink_thd_as_waiter() if we skip waiting for ack in commit_trx(). - Kristian.
There was a bug that if no semi-sync slaves were connected and rpl_semi_sync_master_wait_no_slave was off, the THD pointer could be left in the list of pending transactions after THD was deleted, and an invalid THD/condition variable could be signalled, causing hang or other corruption.
Testcase based on work by Brandon Nesterenko <brandon.nesterenko@mariadb.com>.
Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org> --- .../r/rpl_semi_sync_cond_var_per_thd.result | 63 ++++++++++- .../rpl/t/rpl_semi_sync_cond_var_per_thd.test | 104 +++++++++++++++++- sql/semisync_master.cc | 50 +++++++-- sql/semisync_master.h | 10 +- 4 files changed, 210 insertions(+), 17 deletions(-)
diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result index 18ad5d3d2cc..96e7aa43fbf 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result @@ -7,6 +7,8 @@ call mtr.add_suppression("Could not read packet"); call mtr.add_suppression("Could not write packet"); set @save_bgc_count= @@global.binlog_commit_wait_count; set @save_bgc_usec= @@global.binlog_commit_wait_usec; +set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point; +set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave; set @save_debug_dbug= @@global.debug_dbug; set @@global.binlog_commit_wait_count=3; set @@global.binlog_commit_wait_usec=10000000; @@ -46,8 +48,6 @@ drop table t1, t2, t3; # the binlogging to semi-sync, and starting the wait for ACK; and during # this pause, semi-sync is manually switched off and on. connection master; -set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point; -set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave; set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC; set @@global.rpl_semi_sync_master_wait_no_slave= 1; set @@global.debug_dbug="+d,semisync_log_skip_trx_wait"; @@ -100,7 +100,66 @@ commit; # Cleanup connection master; drop table tn; +set @@global.debug_dbug=@save_debug_dbug; +# +# MDEV-36934 +# The server could indefinitely hang due to a memory leak which tried to +# pthread signal on a destroyed condition variable. In effect, no +# connections could commit transactions because there would be a thread +# stuck on a never-returning call to pthread_cond_signal() while +# holding Repl_semi_sync_master::LOCK_log. +connection master; +set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT; +set @@global.rpl_semi_sync_master_wait_no_slave= 0; +# Ensure servers are in proper state +connection master; +connection slave; +# Test case initial set-up +connection master; +create table t_36934 (a int) engine=innodb; +include/save_master_gtid.inc +connection slave; +include/sync_with_master_gtid.inc +# Pause the user transaction before inserting into Active_tranx +connect user_con,localhost,root,,; +SET debug_sync= 'semisync_at_write_tranx_in_binlog SIGNAL at_write_tranx_in_binlog WAIT_FOR resume_write_tranx_in_binlog'; +insert into t_36934 values (1); +connection server_1; +set debug_sync="now wait_for at_write_tranx_in_binlog"; +# Disconnect the slave (note that the binlog dump thread won't yet be +# notified of a binlog update from the last transaction, so the slave +# should neither receiver nor ACK the transaction). +connection slave; +include/stop_slave.inc +# Waiting for master to realize the slave has disconnected.. +connection server_1; +# ..done +# Resuming transaction (it will exit commit_trx early without waiting) +set debug_sync="now signal resume_write_tranx_in_binlog"; +connection user_con; +disconnect user_con; +# Force delete the user thread (FLUSH THREADS ensures the thread won't +# stay in the thread cache) +connection master; +FLUSH THREADS; +# BUG: Re-connect slave. MDEV-36934 reports that the master would hang +# when the slave would re-connect and try to ACK the last transaction +# who's thread has been deleted +connection slave; +include/start_slave.inc +# Try to commit another transaction (prior to MDEV-36934 fixes, this +# would hang indefinitely) +connection master; +set debug_sync="RESET"; +insert into t_36934 values (2); +connection server_1; +# Waiting 30s for last query to complete.. +connection master; +# ..done +# Cleanup +connection master; set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point; set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave; set @@global.debug_dbug=@save_debug_dbug; +drop table t_36934; include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test index 537a4bf7fa8..5d6702f8c42 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test @@ -34,6 +34,8 @@ call mtr.add_suppression("Could not read packet"); call mtr.add_suppression("Could not write packet"); set @save_bgc_count= @@global.binlog_commit_wait_count; set @save_bgc_usec= @@global.binlog_commit_wait_usec; +set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point; +set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave; set @save_debug_dbug= @@global.debug_dbug; set @@global.binlog_commit_wait_count=3; set @@global.binlog_commit_wait_usec=10000000; @@ -98,8 +100,6 @@ drop table t1, t2, t3;
--connection master -set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point; -set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave; set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC; set @@global.rpl_semi_sync_master_wait_no_slave= 1; --eval set @@global.debug_dbug="+d,semisync_log_skip_trx_wait" @@ -190,8 +190,108 @@ commit; --echo # Cleanup --connection master drop table tn; +set @@global.debug_dbug=@save_debug_dbug; + + +--echo # +--echo # MDEV-36934 +--echo # The server could indefinitely hang due to a memory leak which tried to +--echo # pthread signal on a destroyed condition variable. In effect, no +--echo # connections could commit transactions because there would be a thread +--echo # stuck on a never-returning call to pthread_cond_signal() while +--echo # holding Repl_semi_sync_master::LOCK_log. + +--connection master +set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT; +set @@global.rpl_semi_sync_master_wait_no_slave= 0; + +--echo # Ensure servers are in proper state +--connection master +let $status_var= rpl_semi_sync_master_status; +let $status_var_value= ON; +source include/wait_for_status_var.inc; +--connection slave +let $status_var= rpl_semi_sync_slave_status; +let $status_var_value= ON; +source include/wait_for_status_var.inc; + +--echo # Test case initial set-up +--connection master +create table t_36934 (a int) engine=innodb; +--source include/save_master_gtid.inc +--connection slave +--source include/sync_with_master_gtid.inc + +--echo # Pause the user transaction before inserting into Active_tranx +--connect(user_con,localhost,root,,) +SET debug_sync= 'semisync_at_write_tranx_in_binlog SIGNAL at_write_tranx_in_binlog WAIT_FOR resume_write_tranx_in_binlog'; +--send insert into t_36934 values (1) + +--connection server_1 +set debug_sync="now wait_for at_write_tranx_in_binlog"; + +--echo # Disconnect the slave (note that the binlog dump thread won't yet be +--echo # notified of a binlog update from the last transaction, so the slave +--echo # should neither receiver nor ACK the transaction). +--connection slave +--source include/stop_slave.inc + +--echo # Waiting for master to realize the slave has disconnected.. +--connection server_1 +let $status_var= rpl_semi_sync_master_clients; +let $status_var_value= 0; +source include/wait_for_status_var.inc; +--echo # ..done + +--echo # Resuming transaction (it will exit commit_trx early without waiting) +set debug_sync="now signal resume_write_tranx_in_binlog"; + +--connection user_con +--reap +--let $user_con_tid= `SELECT connection_id()` +--disconnect user_con +--source include/wait_until_disconnected.inc + +--echo # Force delete the user thread (FLUSH THREADS ensures the thread won't +--echo # stay in the thread cache) +--connection master +FLUSH THREADS; + +--echo # BUG: Re-connect slave. MDEV-36934 reports that the master would hang +--echo # when the slave would re-connect and try to ACK the last transaction +--echo # who's thread has been deleted +--connection slave +--source include/start_slave.inc + +--echo # Try to commit another transaction (prior to MDEV-36934 fixes, this +--echo # would hang indefinitely) +--connection master +set debug_sync="RESET"; +--send insert into t_36934 values (2) + +--connection server_1 +--echo # Waiting 30s for last query to complete.. +--let $wait_timeout= 30 +--let $wait_condition= SELECT count(*)=0 FROM information_schema.processlist WHERE info LIKE 'insert into t_36934%'; +--source include/wait_condition.inc + +# Variable `success` is set by wait_condition.inc +if (!$success) +{ + --echo # ..error + --die Query is hung +} + +--connection master +--reap +--echo # ..done + + +--echo # Cleanup +--connection master set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point; set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave; set @@global.debug_dbug=@save_debug_dbug; +drop table t_36934;
--source include/rpl_end.inc diff --git a/sql/semisync_master.cc b/sql/semisync_master.cc index 43afb8f996e..854b6a61eeb 100644 --- a/sql/semisync_master.cc +++ b/sql/semisync_master.cc @@ -68,15 +68,20 @@ static ulonglong timespec_to_usec(const struct timespec *ts) return (ulonglong) ts->tv_sec * TIME_MILLION + ts->tv_nsec / TIME_THOUSAND; }
-int signal_waiting_transaction(THD *waiting_thd, const char *binlog_file, - my_off_t binlog_pos) +static int +signal_waiting_transaction(THD *waiting_thd, bool thd_valid, + const char *binlog_file, my_off_t binlog_pos) { /* It is possible that the connection thd waiting for an ACK was killed. In such circumstance, the connection thread will nullify the thd member of its Active_tranx node. So before we try to signal, ensure the THD exists. + + The thd_valid is only set while the THD is waiting in commit_trx(); this + is defensive coding to not signal an invalid THD if we somewhere + accidentally did not remove the transaction from the list. */ - if (waiting_thd) + if (waiting_thd && thd_valid) mysql_cond_signal(&waiting_thd->COND_wakeup_ready); return 0; } @@ -182,6 +187,7 @@ int Active_tranx::insert_tranx_node(THD *thd_to_wait, ins_node->log_name[FN_REFLEN-1] = 0; /* make sure it ends properly */ ins_node->log_pos = log_file_pos; ins_node->thd= thd_to_wait; + ins_node->thd_valid= false;
if (!m_trx_front) { @@ -263,7 +269,8 @@ void Active_tranx::clear_active_tranx_nodes( if ((log_file_name != NULL) && compare(new_front, log_file_name, log_file_pos) > 0) break; - pre_delete_hook(new_front->thd, new_front->log_name, new_front->log_pos); + pre_delete_hook(new_front->thd, new_front->thd_valid, + new_front->log_name, new_front->log_pos); new_front = new_front->next; }
@@ -355,13 +362,17 @@ void Active_tranx::unlink_thd_as_waiter(const char *log_file_name, }
if (entry) + { entry->thd= NULL; + entry->thd_valid= false; + }
DBUG_VOID_RETURN; }
-bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name, - my_off_t log_file_pos) +Tranx_node * +Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name, + my_off_t log_file_pos) { DBUG_ENTER("Active_tranx::assert_thd_is_waiter"); mysql_mutex_assert_owner(m_lock); @@ -377,7 +388,7 @@ bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name, entry = entry->hash_next; }
- DBUG_RETURN(static_cast<bool>(entry)); + DBUG_RETURN(entry); }
/******************************************************************************* @@ -863,6 +874,10 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
if (!rpl_semi_sync_master_clients && !rpl_semi_sync_master_wait_no_slave) { + lock(); + m_active_tranxs->unlink_thd_as_waiter(trx_wait_binlog_name, + trx_wait_binlog_pos); + unlock(); rpl_semi_sync_master_no_transactions++; DBUG_RETURN(0); } @@ -922,6 +937,9 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name, } }
+ Tranx_node *tranx_entry= + m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name, + trx_wait_binlog_pos); /* In between the binlogging of this transaction and this wait, it is * possible that our entry in Active_tranx was removed (i.e. if * semi-sync was switched off and on). It is also possible that the @@ -932,8 +950,7 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name, * rpl_semi_sync_master_yes/no_tx consistent with it, we check for a * semi-sync restart _after_ checking the reply state. */ - if (unlikely(!m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name, - trx_wait_binlog_pos))) + if (unlikely(!tranx_entry)) { DBUG_EXECUTE_IF( "semisync_log_skip_trx_wait", @@ -952,6 +969,16 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name, break; }
+ /* + Mark that our THD is now valid for signalling to by the ack thread. + It is important to ensure that we can never leave a no longer valid + THD in the transaction list and signal it, eg. MDEV-36934. This way, + we ensure the THD will only be signalled while this function is + running, even in case of some incorrect error handling or similar + that might leave a dangling THD in the list. + */ + tranx_entry->thd_valid= true; + /* Let us update the info about the minimum binlog position of waiting * threads. */ @@ -1284,6 +1311,8 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd,
DBUG_ENTER("Repl_semi_sync_master::write_tranx_in_binlog");
+ DEBUG_SYNC(current_thd, "semisync_at_write_tranx_in_binlog"); + lock();
/* This is the real check inside the mutex. */ @@ -1317,7 +1346,8 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd, m_commit_file_name_inited = true; }
- if (is_on()) + if (is_on() && + (rpl_semi_sync_master_clients || rpl_semi_sync_master_wait_no_slave)) { DBUG_ASSERT(m_active_tranxs != NULL); if(m_active_tranxs->insert_tranx_node(thd, log_file_name, log_file_pos)) diff --git a/sql/semisync_master.h b/sql/semisync_master.h index a1c57959165..28de3ecf480 100644 --- a/sql/semisync_master.h +++ b/sql/semisync_master.h @@ -30,6 +30,7 @@ extern PSI_cond_key key_COND_binlog_send;
struct Tranx_node { char log_name[FN_REFLEN]; + bool thd_valid; /* thd is valid for signalling */ my_off_t log_pos; THD *thd; /* The thread awaiting an ACK */ struct Tranx_node *next; /* the next node in the sorted list */ @@ -126,7 +127,9 @@ class Tranx_node_allocator
trx_node= &(current_block->nodes[++last_node]); trx_node->log_name[0] = '\0'; + trx_node->thd_valid= false; trx_node->log_pos= 0; + trx_node->thd= nullptr; trx_node->next= 0; trx_node->hash_next= 0; return trx_node; @@ -298,7 +301,8 @@ class Tranx_node_allocator its invocation. See the context in which it is called to know. */
-typedef int (*active_tranx_action)(THD *trx_thd, const char *log_file_name, +typedef int (*active_tranx_action)(THD *trx_thd, bool thd_valid, + const char *log_file_name, my_off_t trx_log_file_pos);
/** @@ -381,8 +385,8 @@ class Active_tranx * matches the thread of the respective Tranx_node::thd of the passed in * log_file_name and log_file_pos. */ - bool is_thd_waiter(THD *thd_to_check, const char *log_file_name, - my_off_t log_file_pos); + Tranx_node * is_thd_waiter(THD *thd_to_check, const char *log_file_name, + my_off_t log_file_pos);
/* Given a position, check to see whether the position is an active * transaction's ending position by probing the hash table.