[Commits] 48e3b4ca5dd: MDEV-15607: mysqld crashed few after node is being joined with sst
revision-id: 48e3b4ca5dd6a6cffbee64381dc301d43c66e036 (mariadb-10.1.32-67-g48e3b4ca5dd) parent(s): 9c34a4124d67d9e3f70837eaeb11290f35e8f8d0 author: Jan Lindström committer: Jan Lindström timestamp: 2018-04-24 14:43:41 +0300 message: MDEV-15607: mysqld crashed few after node is being joined with sst This is a typical systemd response where it tries to shutdown the joiner (due to "timeout") before the joiner manages to complete SST. wsrep_sst_wait wsrep_SE_init_wait While waiting the operation to finish use mysql_cond_timedwait instead of mysql_cond_wait and if operation is not finished extend systemd timeout (if needed). --- sql/wsrep_sst.cc | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index 260755d08a8..c1f980bd595 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -30,6 +30,10 @@ #include <cstdio> #include <cstdlib> +#if MYSQL_VERSION_ID < 100200 +# include <my_service_manager.h> +#endif + static char wsrep_defaults_file[FN_REFLEN * 2 + 10 + 30 + sizeof(WSREP_SST_OPT_CONF) + sizeof(WSREP_SST_OPT_CONF_SUFFIX) + @@ -186,6 +190,9 @@ bool wsrep_before_SE() static bool sst_complete = false; static bool sst_needed = false; +#define WSREP_EXTEND_TIMEOUT_INTERVAL 30 +#define WSREP_TIMEDWAIT_SECONDS 10 + void wsrep_sst_grab () { WSREP_INFO("wsrep_sst_grab()"); @@ -197,11 +204,16 @@ void wsrep_sst_grab () // Wait for end of SST bool wsrep_sst_wait () { + struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0}; if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort(); while (!sst_complete) { WSREP_INFO("Waiting for SST to complete."); - mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst); + mysql_cond_timedwait (&COND_wsrep_sst, &LOCK_wsrep_sst, &wtime); + if (!sst_complete) + service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL, + "WSREP state transfer ongoing, current seqno: %ld", local_seqno); + } if (local_seqno >= 0) @@ -1298,9 +1310,14 @@ void wsrep_SE_init_grab() void wsrep_SE_init_wait() { + struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0}; while (SE_initialized == false) { - mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init); + mysql_cond_timedwait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init, &wtime); + + if (!SE_initialized) + service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL, + "WSREP SE initialization ongoing."); } mysql_mutex_unlock (&LOCK_wsrep_sst_init); }
participants (1)
-
jan