[Maria-developers] e4fa0c805ad: MDEV-4633: multi_source.simple test fails sporadically
revision-id: e4fa0c805adf06c637cc3867427ca529b40e19ce (mariadb-10.1.43-267-ge4fa0c805ad) parent(s): de49e603770463e64cf4a79a7414bac93c02254f author: Sujatha committer: Sujatha timestamp: 2020-08-28 12:03:09 +0530 message: MDEV-4633: multi_source.simple test fails sporadically Analysis: ======== Writes to 'rli->log_space_total' needs to be synchronized. All modifications to 'rli->log_space_total' should be performed by holding 'rli->log_space_lock'. But IO thread doesn't acquire this mutex while writing events to relay log. Hence there is a race condition between SQL THREAD and IO THREAD, when both try to modify the same variable. SQL_THREAD is trying to decrement 'rli->log_space_total' in 'purge_first_log' and IO_THREAD is trying to increment the 'rli->log_space_total' in 'queue_event' simultaneously . Hence test occasionally fails with result mismatch. Fix: === Acquire 'rli->log_space_lock' while modifying 'rli->log_space_total'. --- sql/log.cc | 15 +++++++++++++++ sql/log.h | 13 +------------ sql/rpl_rli.cc | 4 ++++ sql/slave.cc | 28 ++++++++++++++-------------- 4 files changed, 34 insertions(+), 26 deletions(-) diff --git a/sql/log.cc b/sql/log.cc index 8049b94bab1..28a3d58d9e4 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -10265,6 +10265,21 @@ MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery) return error; } +void MYSQL_BIN_LOG::harvest_bytes_written(Relay_log_info *rli) +{ +#ifndef DBUG_OFF + char buf1[22],buf2[22]; +#endif + DBUG_ENTER("harvest_bytes_written"); + mysql_mutex_lock(&rli->log_space_lock); + rli->log_space_total+=bytes_written; + DBUG_PRINT("info",("relay_log_space: %s bytes_written: %s", + llstr(rli->log_space_total,buf1), + llstr(bytes_written,buf2))); + bytes_written=0; + mysql_mutex_unlock(&rli->log_space_lock); + DBUG_VOID_RETURN; +} #ifdef INNODB_COMPATIBILITY_HOOKS /** diff --git a/sql/log.h b/sql/log.h index 277e5c6f69c..3b136c47bcf 100644 --- a/sql/log.h +++ b/sql/log.h @@ -706,18 +706,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG { bytes_written = 0; } - void harvest_bytes_written(ulonglong* counter) - { -#ifndef DBUG_OFF - char buf1[22],buf2[22]; -#endif - DBUG_ENTER("harvest_bytes_written"); - (*counter)+=bytes_written; - DBUG_PRINT("info",("counter: %s bytes_written: %s", llstr(*counter,buf1), - llstr(bytes_written,buf2))); - bytes_written=0; - DBUG_VOID_RETURN; - } + void harvest_bytes_written(Relay_log_info *rli); void set_max_size(ulong max_size_arg); void signal_update(); void wait_for_sufficient_commits(); diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index c196a65809a..18d7db234fe 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -467,7 +467,9 @@ static inline int add_relay_log(Relay_log_info* rli,LOG_INFO* linfo) linfo->log_file_name); DBUG_RETURN(1); } + mysql_mutex_lock(&rli->log_space_lock); rli->log_space_total += s.st_size; + mysql_mutex_unlock(&rli->log_space_lock); DBUG_PRINT("info",("log_space_total: %llu", rli->log_space_total)); DBUG_RETURN(0); } @@ -477,7 +479,9 @@ static int count_relay_log_space(Relay_log_info* rli) { LOG_INFO linfo; DBUG_ENTER("count_relay_log_space"); + mysql_mutex_lock(&rli->log_space_lock); rli->log_space_total= 0; + mysql_mutex_unlock(&rli->log_space_lock); if (rli->relay_log.find_log_pos(&linfo, NullS, 1)) { sql_print_error("Could not find first log while counting relay log space"); diff --git a/sql/slave.cc b/sql/slave.cc index 06f2b0d955a..3d9e23441bc 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -2508,7 +2508,7 @@ static void write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi) } if (likely (rev || glev)) { - rli->relay_log.harvest_bytes_written(&rli->log_space_total); + rli->relay_log.harvest_bytes_written(rli); if (flush_master_info(mi, TRUE, TRUE)) sql_print_error("Failed to flush master info file"); } @@ -5270,7 +5270,7 @@ static int process_io_create_file(Master_info* mi, Create_file_log_event* cev) "error writing Exec_load event to relay log"); goto err; } - mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total); + mi->rli.relay_log.harvest_bytes_written(&mi->rli); break; } if (unlikely(cev_not_written)) @@ -5285,7 +5285,7 @@ static int process_io_create_file(Master_info* mi, Create_file_log_event* cev) goto err; } cev_not_written=0; - mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total); + mi->rli.relay_log.harvest_bytes_written(&mi->rli); } else { @@ -5299,7 +5299,7 @@ static int process_io_create_file(Master_info* mi, Create_file_log_event* cev) "error writing Append_block event to relay log"); goto err; } - mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total) ; + mi->rli.relay_log.harvest_bytes_written(&mi->rli) ; } } } @@ -5489,7 +5489,7 @@ static int queue_binlog_ver_1_event(Master_info *mi, const char *buf, mysql_mutex_unlock(&mi->data_lock); DBUG_RETURN(1); } - rli->relay_log.harvest_bytes_written(&rli->log_space_total); + rli->relay_log.harvest_bytes_written(rli); } delete ev; mi->master_log_pos+= inc_pos; @@ -5547,7 +5547,7 @@ static int queue_binlog_ver_3_event(Master_info *mi, const char *buf, mysql_mutex_unlock(&mi->data_lock); DBUG_RETURN(1); } - rli->relay_log.harvest_bytes_written(&rli->log_space_total); + rli->relay_log.harvest_bytes_written(rli); delete ev; mi->master_log_pos+= inc_pos; err: @@ -5784,9 +5784,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) mysql_mutex_lock(log_lock); if (likely(!rli->relay_log.write_event(&fdle) && !rli->relay_log.flush_and_sync(NULL))) - { - rli->relay_log.harvest_bytes_written(&rli->log_space_total); - } + rli->relay_log.harvest_bytes_written(rli); else { error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; @@ -6221,7 +6219,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) (rli->relay_log.description_event_for_queue)) error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; else - rli->relay_log.harvest_bytes_written(&rli->log_space_total); + rli->relay_log.harvest_bytes_written(rli); } else if (mi->gtid_reconnect_event_skip_count == 0) { @@ -6234,7 +6232,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) if (rli->relay_log.append_no_lock(&fake_rev)) error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; else - rli->relay_log.harvest_bytes_written(&rli->log_space_total); + rli->relay_log.harvest_bytes_written(rli); } } else @@ -6302,7 +6300,7 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len) { mi->master_log_pos+= inc_pos; DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos)); - rli->relay_log.harvest_bytes_written(&rli->log_space_total); + rli->relay_log.harvest_bytes_written(rli); } else { @@ -6382,7 +6380,7 @@ void end_relay_log_info(Relay_log_info* rli) log_lock= rli->relay_log.get_log_lock(); mysql_mutex_lock(log_lock); rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT); - rli->relay_log.harvest_bytes_written(&rli->log_space_total); + rli->relay_log.harvest_bytes_written(rli); mysql_mutex_unlock(log_lock); /* Delete the slave's temporary tables from memory. @@ -7318,7 +7316,9 @@ int rotate_relay_log(Master_info* mi) Note that it needs to be protected by mi->data_lock. */ mysql_mutex_assert_owner(&mi->data_lock); - rli->relay_log.harvest_bytes_written(&rli->log_space_total); + + rli->relay_log.harvest_bytes_written(rli); + end: DBUG_RETURN(error); }
participants (1)
-
sujatha