revision-id: 5d9386892b58477c4e28de3e40d930b44f5b42d4 (mariadb-10.1.43-274-g5d9386892b5) parent(s): 3ee2422624ffb3d7ffefff8db7ef9398816299bc author: Sujatha committer: Sujatha timestamp: 2020-09-08 13:10:36 +0530 message: MDEV-4633: multi_source.simple test fails sporadically Analysis: ======== Writes to 'rli->log_space_total' needs to be synchronized, otherwise both SQL_THREAD and IO_THREAD can try to modify the variable simultaneously resulting in incorrect rli->log_space_total. In the current test scenario SQL_THREAD is trying to decrement 'rli->log_space_total' in 'purge_first_log' and IO_THREAD is trying to increment the 'rli->log_space_total' in 'queue_event' simultaneously. Hence test occasionally fails with result mismatch. Fix: === Convert 'rli->log_space_total' variable to atomic type. --- sql/log.cc | 4 +++- sql/log.h | 4 +++- sql/rpl_rli.cc | 14 ++++++++------ sql/slave.cc | 19 +++++++++++++++---- 4 files changed, 29 insertions(+), 12 deletions(-) diff --git a/sql/log.cc b/sql/log.cc index 8049b94bab1..90d5c46a15f 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -4400,7 +4400,9 @@ int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included) 0, 0, &log_space_reclaimed); mysql_mutex_lock(&rli->log_space_lock); - rli->log_space_total-= log_space_reclaimed; + my_atomic_add64_explicit((volatile int64*)(&rli->log_space_total), + (-(ulonglong)log_space_reclaimed), + MY_MEMORY_ORDER_RELAXED); mysql_cond_broadcast(&rli->log_space_cond); mysql_mutex_unlock(&rli->log_space_lock); diff --git a/sql/log.h b/sql/log.h index 277e5c6f69c..9e3807e9939 100644 --- a/sql/log.h +++ b/sql/log.h @@ -712,7 +712,9 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG char buf1[22],buf2[22]; #endif DBUG_ENTER("harvest_bytes_written"); - (*counter)+=bytes_written; + + my_atomic_add64_explicit((volatile int64*)(counter), bytes_written, + MY_MEMORY_ORDER_RELAXED); DBUG_PRINT("info",("counter: %s bytes_written: %s", llstr(*counter,buf1), llstr(bytes_written,buf2))); bytes_written=0; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index c196a65809a..8815af4ea4a 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -140,8 +140,8 @@ int init_relay_log_info(Relay_log_info* rli, rli->slave_skip_counter=0; rli->abort_pos_wait=0; rli->log_space_limit= relay_log_space_limit; - rli->log_space_total= 0; - + my_atomic_store64_explicit((volatile int64*)(&rli->log_space_total), 0, + MY_MEMORY_ORDER_RELAXED); char pattern[FN_REFLEN]; (void) my_realpath(pattern, slave_load_tmpdir, 0); if (fn_format(pattern, PREFIX_SQL_LOAD, pattern, "", @@ -467,7 +467,8 @@ static inline int add_relay_log(Relay_log_info* rli,LOG_INFO* linfo) linfo->log_file_name); DBUG_RETURN(1); } - rli->log_space_total += s.st_size; + my_atomic_add64_explicit((volatile int64*)(&rli->log_space_total), + s.st_size, MY_MEMORY_ORDER_RELAXED); DBUG_PRINT("info",("log_space_total: %llu", rli->log_space_total)); DBUG_RETURN(0); } @@ -477,7 +478,8 @@ static int count_relay_log_space(Relay_log_info* rli) { LOG_INFO linfo; DBUG_ENTER("count_relay_log_space"); - rli->log_space_total= 0; + my_atomic_store64_explicit((volatile int64*)(&rli->log_space_total), 0, + MY_MEMORY_ORDER_RELAXED); if (rli->relay_log.find_log_pos(&linfo, NullS, 1)) { sql_print_error("Could not find first log while counting relay log space"); @@ -1202,8 +1204,8 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset, strmake_buf(rli->group_relay_log_name, rli->relay_log.get_log_fname()); strmake_buf(rli->event_relay_log_name, rli->relay_log.get_log_fname()); rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE; - rli->log_space_total= 0; - + my_atomic_store64_explicit((volatile int64*)(&rli->log_space_total), 0, + MY_MEMORY_ORDER_RELAXED); if (count_relay_log_space(rli)) { *errmsg= "Error counting relay log space"; diff --git a/sql/slave.cc b/sql/slave.cc index 06f2b0d955a..9f4b0f53fb4 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -2371,7 +2371,10 @@ static bool wait_for_relay_log_space(Relay_log_info* rli) &rli->log_space_lock, &stage_waiting_for_relay_log_space, &old_stage); - while (rli->log_space_limit < rli->log_space_total && + while (rli->log_space_limit < + (ulonglong)my_atomic_load64_explicit((volatile int64*) + (&rli->log_space_total), + MY_MEMORY_ORDER_RELAXED) && !(slave_killed=io_slave_killed(mi)) && !rli->ignore_log_space_limit) mysql_cond_wait(&rli->log_space_cond, &rli->log_space_lock); @@ -2912,7 +2915,10 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full, protocol->store(mi->rli.last_error().message, &my_charset_bin); protocol->store((uint32) mi->rli.slave_skip_counter); protocol->store((ulonglong) mi->rli.group_master_log_pos); - protocol->store((ulonglong) mi->rli.log_space_total); + protocol->store((ulonglong) + my_atomic_load64_explicit((volatile int64*) + (&mi->rli.log_space_total), + MY_MEMORY_ORDER_RELAXED)); protocol->store( mi->rli.until_condition==Relay_log_info::UNTIL_NONE ? "None": @@ -4428,7 +4434,9 @@ Stopping slave I/O thread due to out-of-memory error from master"); #endif if (rli->log_space_limit && rli->log_space_limit < - rli->log_space_total && + (ulonglong) my_atomic_load64_explicit((volatile int64*) + (&rli->log_space_total), + MY_MEMORY_ORDER_RELAXED) && !rli->ignore_log_space_limit) if (wait_for_relay_log_space(rli)) { @@ -7031,7 +7039,10 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size) is are able to rotate and purge sometime soon. */ if (rli->log_space_limit && - rli->log_space_limit < rli->log_space_total) + rli->log_space_limit < + (ulonglong) my_atomic_load64_explicit((volatile int64*) + (&rli->log_space_total), + MY_MEMORY_ORDER_RELAXED)) { /* force rotation if not in an unfinished group */ rli->sql_force_rotate_relay= !rli->is_in_group();