Hi Brandon,
Kristian Nielsen via commits <commits@lists.mariadb.org> writes:
> Ensure that a pending semi-sync transaction can only be signalled on its THD
> while it is waiting in commit_trx(); not if the wait in commit_trx() is
> skipped for some reason.
Can you review this patch for MDEV-36934 so it can get into the upcoming
release? You've already seen and agreed to the first version of the patch.
I've included your test case for the bug, thanks a lot for writing that! I
only modified it slightly, to increase the timeout for potentially slow
Buildbot hosts, and to avoid an unnecessary use of DBUG injection.
I also added your suggestions to (in addition to my original
defensive-coding patch to only signal the THD while it is marked as waiting)
also avoid adding a transaction to the list if it will never be ack'ed due
to no waiting slaves; and to unlink_thd_as_waiter() if we skip waiting for
ack in commit_trx().
- Kristian.
>
> There was a bug that if no semi-sync slaves were connected and
> rpl_semi_sync_master_wait_no_slave was off, the THD pointer could be left in
> the list of pending transactions after THD was deleted, and an invalid
> THD/condition variable could be signalled, causing hang or other corruption.
>
> Testcase based on work by Brandon Nesterenko <brandon.nesterenko@mariadb.com>.
>
> Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
> ---
> .../r/rpl_semi_sync_cond_var_per_thd.result | 63 ++++++++++-
> .../rpl/t/rpl_semi_sync_cond_var_per_thd.test | 104 +++++++++++++++++-
> sql/semisync_master.cc | 50 +++++++--
> sql/semisync_master.h | 10 +-
> 4 files changed, 210 insertions(+), 17 deletions(-)
>
> diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result
> index 18ad5d3d2cc..96e7aa43fbf 100644
> --- a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result
> +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result
> @@ -7,6 +7,8 @@ call mtr.add_suppression("Could not read packet");
> call mtr.add_suppression("Could not write packet");
> set @save_bgc_count= @@global.binlog_commit_wait_count;
> set @save_bgc_usec= @@global.binlog_commit_wait_usec;
> +set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
> +set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
> set @save_debug_dbug= @@global.debug_dbug;
> set @@global.binlog_commit_wait_count=3;
> set @@global.binlog_commit_wait_usec=10000000;
> @@ -46,8 +48,6 @@ drop table t1, t2, t3;
> # the binlogging to semi-sync, and starting the wait for ACK; and during
> # this pause, semi-sync is manually switched off and on.
> connection master;
> -set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
> -set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
> set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC;
> set @@global.rpl_semi_sync_master_wait_no_slave= 1;
> set @@global.debug_dbug="+d,semisync_log_skip_trx_wait";
> @@ -100,7 +100,66 @@ commit;
> # Cleanup
> connection master;
> drop table tn;
> +set @@global.debug_dbug=@save_debug_dbug;
> +#
> +# MDEV-36934
> +# The server could indefinitely hang due to a memory leak which tried to
> +# pthread signal on a destroyed condition variable. In effect, no
> +# connections could commit transactions because there would be a thread
> +# stuck on a never-returning call to pthread_cond_signal() while
> +# holding Repl_semi_sync_master::LOCK_log.
> +connection master;
> +set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT;
> +set @@global.rpl_semi_sync_master_wait_no_slave= 0;
> +# Ensure servers are in proper state
> +connection master;
> +connection slave;
> +# Test case initial set-up
> +connection master;
> +create table t_36934 (a int) engine=innodb;
> +include/save_master_gtid.inc
> +connection slave;
> +include/sync_with_master_gtid.inc
> +# Pause the user transaction before inserting into Active_tranx
> +connect user_con,localhost,root,,;
> +SET debug_sync= 'semisync_at_write_tranx_in_binlog SIGNAL at_write_tranx_in_binlog WAIT_FOR resume_write_tranx_in_binlog';
> +insert into t_36934 values (1);
> +connection server_1;
> +set debug_sync="now wait_for at_write_tranx_in_binlog";
> +# Disconnect the slave (note that the binlog dump thread won't yet be
> +# notified of a binlog update from the last transaction, so the slave
> +# should neither receiver nor ACK the transaction).
> +connection slave;
> +include/stop_slave.inc
> +# Waiting for master to realize the slave has disconnected..
> +connection server_1;
> +# ..done
> +# Resuming transaction (it will exit commit_trx early without waiting)
> +set debug_sync="now signal resume_write_tranx_in_binlog";
> +connection user_con;
> +disconnect user_con;
> +# Force delete the user thread (FLUSH THREADS ensures the thread won't
> +# stay in the thread cache)
> +connection master;
> +FLUSH THREADS;
> +# BUG: Re-connect slave. MDEV-36934 reports that the master would hang
> +# when the slave would re-connect and try to ACK the last transaction
> +# who's thread has been deleted
> +connection slave;
> +include/start_slave.inc
> +# Try to commit another transaction (prior to MDEV-36934 fixes, this
> +# would hang indefinitely)
> +connection master;
> +set debug_sync="RESET";
> +insert into t_36934 values (2);
> +connection server_1;
> +# Waiting 30s for last query to complete..
> +connection master;
> +# ..done
> +# Cleanup
> +connection master;
> set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point;
> set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave;
> set @@global.debug_dbug=@save_debug_dbug;
> +drop table t_36934;
> include/rpl_end.inc
> diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test
> index 537a4bf7fa8..5d6702f8c42 100644
> --- a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test
> +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test
> @@ -34,6 +34,8 @@ call mtr.add_suppression("Could not read packet");
> call mtr.add_suppression("Could not write packet");
> set @save_bgc_count= @@global.binlog_commit_wait_count;
> set @save_bgc_usec= @@global.binlog_commit_wait_usec;
> +set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
> +set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
> set @save_debug_dbug= @@global.debug_dbug;
> set @@global.binlog_commit_wait_count=3;
> set @@global.binlog_commit_wait_usec=10000000;
> @@ -98,8 +100,6 @@ drop table t1, t2, t3;
>
>
> --connection master
> -set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
> -set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
> set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC;
> set @@global.rpl_semi_sync_master_wait_no_slave= 1;
> --eval set @@global.debug_dbug="+d,semisync_log_skip_trx_wait"
> @@ -190,8 +190,108 @@ commit;
> --echo # Cleanup
> --connection master
> drop table tn;
> +set @@global.debug_dbug=@save_debug_dbug;
> +
> +
> +--echo #
> +--echo # MDEV-36934
> +--echo # The server could indefinitely hang due to a memory leak which tried to
> +--echo # pthread signal on a destroyed condition variable. In effect, no
> +--echo # connections could commit transactions because there would be a thread
> +--echo # stuck on a never-returning call to pthread_cond_signal() while
> +--echo # holding Repl_semi_sync_master::LOCK_log.
> +
> +--connection master
> +set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT;
> +set @@global.rpl_semi_sync_master_wait_no_slave= 0;
> +
> +--echo # Ensure servers are in proper state
> +--connection master
> +let $status_var= rpl_semi_sync_master_status;
> +let $status_var_value= ON;
> +source include/wait_for_status_var.inc;
> +--connection slave
> +let $status_var= rpl_semi_sync_slave_status;
> +let $status_var_value= ON;
> +source include/wait_for_status_var.inc;
> +
> +--echo # Test case initial set-up
> +--connection master
> +create table t_36934 (a int) engine=innodb;
> +--source include/save_master_gtid.inc
> +--connection slave
> +--source include/sync_with_master_gtid.inc
> +
> +--echo # Pause the user transaction before inserting into Active_tranx
> +--connect(user_con,localhost,root,,)
> +SET debug_sync= 'semisync_at_write_tranx_in_binlog SIGNAL at_write_tranx_in_binlog WAIT_FOR resume_write_tranx_in_binlog';
> +--send insert into t_36934 values (1)
> +
> +--connection server_1
> +set debug_sync="now wait_for at_write_tranx_in_binlog";
> +
> +--echo # Disconnect the slave (note that the binlog dump thread won't yet be
> +--echo # notified of a binlog update from the last transaction, so the slave
> +--echo # should neither receiver nor ACK the transaction).
> +--connection slave
> +--source include/stop_slave.inc
> +
> +--echo # Waiting for master to realize the slave has disconnected..
> +--connection server_1
> +let $status_var= rpl_semi_sync_master_clients;
> +let $status_var_value= 0;
> +source include/wait_for_status_var.inc;
> +--echo # ..done
> +
> +--echo # Resuming transaction (it will exit commit_trx early without waiting)
> +set debug_sync="now signal resume_write_tranx_in_binlog";
> +
> +--connection user_con
> +--reap
> +--let $user_con_tid= `SELECT connection_id()`
> +--disconnect user_con
> +--source include/wait_until_disconnected.inc
> +
> +--echo # Force delete the user thread (FLUSH THREADS ensures the thread won't
> +--echo # stay in the thread cache)
> +--connection master
> +FLUSH THREADS;
> +
> +--echo # BUG: Re-connect slave. MDEV-36934 reports that the master would hang
> +--echo # when the slave would re-connect and try to ACK the last transaction
> +--echo # who's thread has been deleted
> +--connection slave
> +--source include/start_slave.inc
> +
> +--echo # Try to commit another transaction (prior to MDEV-36934 fixes, this
> +--echo # would hang indefinitely)
> +--connection master
> +set debug_sync="RESET";
> +--send insert into t_36934 values (2)
> +
> +--connection server_1
> +--echo # Waiting 30s for last query to complete..
> +--let $wait_timeout= 30
> +--let $wait_condition= SELECT count(*)=0 FROM information_schema.processlist WHERE info LIKE 'insert into t_36934%';
> +--source include/wait_condition.inc
> +
> +# Variable `success` is set by wait_condition.inc
> +if (!$success)
> +{
> + --echo # ..error
> + --die Query is hung
> +}
> +
> +--connection master
> +--reap
> +--echo # ..done
> +
> +
> +--echo # Cleanup
> +--connection master
> set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point;
> set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave;
> set @@global.debug_dbug=@save_debug_dbug;
> +drop table t_36934;
>
> --source include/rpl_end.inc
> diff --git a/sql/semisync_master.cc b/sql/semisync_master.cc
> index 43afb8f996e..854b6a61eeb 100644
> --- a/sql/semisync_master.cc
> +++ b/sql/semisync_master.cc
> @@ -68,15 +68,20 @@ static ulonglong timespec_to_usec(const struct timespec *ts)
> return (ulonglong) ts->tv_sec * TIME_MILLION + ts->tv_nsec / TIME_THOUSAND;
> }
>
> -int signal_waiting_transaction(THD *waiting_thd, const char *binlog_file,
> - my_off_t binlog_pos)
> +static int
> +signal_waiting_transaction(THD *waiting_thd, bool thd_valid,
> + const char *binlog_file, my_off_t binlog_pos)
> {
> /*
> It is possible that the connection thd waiting for an ACK was killed. In
> such circumstance, the connection thread will nullify the thd member of its
> Active_tranx node. So before we try to signal, ensure the THD exists.
> +
> + The thd_valid is only set while the THD is waiting in commit_trx(); this
> + is defensive coding to not signal an invalid THD if we somewhere
> + accidentally did not remove the transaction from the list.
> */
> - if (waiting_thd)
> + if (waiting_thd && thd_valid)
> mysql_cond_signal(&waiting_thd->COND_wakeup_ready);
> return 0;
> }
> @@ -182,6 +187,7 @@ int Active_tranx::insert_tranx_node(THD *thd_to_wait,
> ins_node->log_name[FN_REFLEN-1] = 0; /* make sure it ends properly */
> ins_node->log_pos = log_file_pos;
> ins_node->thd= thd_to_wait;
> + ins_node->thd_valid= false;
>
> if (!m_trx_front)
> {
> @@ -263,7 +269,8 @@ void Active_tranx::clear_active_tranx_nodes(
> if ((log_file_name != NULL) &&
> compare(new_front, log_file_name, log_file_pos) > 0)
> break;
> - pre_delete_hook(new_front->thd, new_front->log_name, new_front->log_pos);
> + pre_delete_hook(new_front->thd, new_front->thd_valid,
> + new_front->log_name, new_front->log_pos);
> new_front = new_front->next;
> }
>
> @@ -355,13 +362,17 @@ void Active_tranx::unlink_thd_as_waiter(const char *log_file_name,
> }
>
> if (entry)
> + {
> entry->thd= NULL;
> + entry->thd_valid= false;
> + }
>
> DBUG_VOID_RETURN;
> }
>
> -bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
> - my_off_t log_file_pos)
> +Tranx_node *
> +Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
> + my_off_t log_file_pos)
> {
> DBUG_ENTER("Active_tranx::assert_thd_is_waiter");
> mysql_mutex_assert_owner(m_lock);
> @@ -377,7 +388,7 @@ bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
> entry = entry->hash_next;
> }
>
> - DBUG_RETURN(static_cast<bool>(entry));
> + DBUG_RETURN(entry);
> }
>
> /*******************************************************************************
> @@ -863,6 +874,10 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
>
> if (!rpl_semi_sync_master_clients && !rpl_semi_sync_master_wait_no_slave)
> {
> + lock();
> + m_active_tranxs->unlink_thd_as_waiter(trx_wait_binlog_name,
> + trx_wait_binlog_pos);
> + unlock();
> rpl_semi_sync_master_no_transactions++;
> DBUG_RETURN(0);
> }
> @@ -922,6 +937,9 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
> }
> }
>
> + Tranx_node *tranx_entry=
> + m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name,
> + trx_wait_binlog_pos);
> /* In between the binlogging of this transaction and this wait, it is
> * possible that our entry in Active_tranx was removed (i.e. if
> * semi-sync was switched off and on). It is also possible that the
> @@ -932,8 +950,7 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
> * rpl_semi_sync_master_yes/no_tx consistent with it, we check for a
> * semi-sync restart _after_ checking the reply state.
> */
> - if (unlikely(!m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name,
> - trx_wait_binlog_pos)))
> + if (unlikely(!tranx_entry))
> {
> DBUG_EXECUTE_IF(
> "semisync_log_skip_trx_wait",
> @@ -952,6 +969,16 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
> break;
> }
>
> + /*
> + Mark that our THD is now valid for signalling to by the ack thread.
> + It is important to ensure that we can never leave a no longer valid
> + THD in the transaction list and signal it, eg. MDEV-36934. This way,
> + we ensure the THD will only be signalled while this function is
> + running, even in case of some incorrect error handling or similar
> + that might leave a dangling THD in the list.
> + */
> + tranx_entry->thd_valid= true;
> +
> /* Let us update the info about the minimum binlog position of waiting
> * threads.
> */
> @@ -1284,6 +1311,8 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd,
>
> DBUG_ENTER("Repl_semi_sync_master::write_tranx_in_binlog");
>
> + DEBUG_SYNC(current_thd, "semisync_at_write_tranx_in_binlog");
> +
> lock();
>
> /* This is the real check inside the mutex. */
> @@ -1317,7 +1346,8 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd,
> m_commit_file_name_inited = true;
> }
>
> - if (is_on())
> + if (is_on() &&
> + (rpl_semi_sync_master_clients || rpl_semi_sync_master_wait_no_slave))
> {
> DBUG_ASSERT(m_active_tranxs != NULL);
> if(m_active_tranxs->insert_tranx_node(thd, log_file_name, log_file_pos))
> diff --git a/sql/semisync_master.h b/sql/semisync_master.h
> index a1c57959165..28de3ecf480 100644
> --- a/sql/semisync_master.h
> +++ b/sql/semisync_master.h
> @@ -30,6 +30,7 @@ extern PSI_cond_key key_COND_binlog_send;
>
> struct Tranx_node {
> char log_name[FN_REFLEN];
> + bool thd_valid; /* thd is valid for signalling */
> my_off_t log_pos;
> THD *thd; /* The thread awaiting an ACK */
> struct Tranx_node *next; /* the next node in the sorted list */
> @@ -126,7 +127,9 @@ class Tranx_node_allocator
>
> trx_node= &(current_block->nodes[++last_node]);
> trx_node->log_name[0] = '\0';
> + trx_node->thd_valid= false;
> trx_node->log_pos= 0;
> + trx_node->thd= nullptr;
> trx_node->next= 0;
> trx_node->hash_next= 0;
> return trx_node;
> @@ -298,7 +301,8 @@ class Tranx_node_allocator
> its invocation. See the context in which it is called to know.
> */
>
> -typedef int (*active_tranx_action)(THD *trx_thd, const char *log_file_name,
> +typedef int (*active_tranx_action)(THD *trx_thd, bool thd_valid,
> + const char *log_file_name,
> my_off_t trx_log_file_pos);
>
> /**
> @@ -381,8 +385,8 @@ class Active_tranx
> * matches the thread of the respective Tranx_node::thd of the passed in
> * log_file_name and log_file_pos.
> */
> - bool is_thd_waiter(THD *thd_to_check, const char *log_file_name,
> - my_off_t log_file_pos);
> + Tranx_node * is_thd_waiter(THD *thd_to_check, const char *log_file_name,
> + my_off_t log_file_pos);
>
> /* Given a position, check to see whether the position is an active
> * transaction's ending position by probing the hash table.