revision-id: eb75e8705d9a444e10057967eaebf947b1115ff8 (mariadb-10.2.31-695-geb75e8705d9) parent(s): 53acd1c1d88be82190c56af3e4cc11fb2770a169 author: Sujatha committer: Sujatha timestamp: 2021-01-21 13:00:02 +0530 message: MDEV-8134: The relay-log is not flushed after the slave-relay-log.999999 showed Problem: ======== Auto purge of relaylogs stops when relay-log-file is 'slave-relay-log.999999' and slave_parallel_threads is enabled. Analysis: ========= The problem is that in Relay_log_info::inc_group_relay_log_pos() function, when two log names are compared via strcmp() function, it gives correct result, when log name sequence numbers are of same digits(6 digits), But when the number goes to 7 digits, a 999999 compares greater than 1000000, which is wrong, hence the bug. Fix: ==== Extract the numeric extension part of the file name, convert it into unsigned long and compare. Thanks to David Zhao for the contribution. --- .../suite/rpl/r/rpl_relay_max_extension.result | 37 +++++++ .../suite/rpl/t/rpl_relay_max_extension.test | 109 +++++++++++++++++++++ sql/rpl_parallel.cc | 5 +- sql/rpl_rli.cc | 4 +- sql/sql_repl.cc | 17 ++++ sql/sql_repl.h | 1 + 6 files changed, 169 insertions(+), 4 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_relay_max_extension.result b/mysql-test/suite/rpl/r/rpl_relay_max_extension.result new file mode 100644 index 00000000000..4444398203e --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_relay_max_extension.result @@ -0,0 +1,37 @@ +include/rpl_init.inc [topology=1->2] +connection server_2; +include/stop_slave.inc +RESET SLAVE; +include/start_slave.inc +include/stop_slave.inc +# +# Stop slave server +# +# +# Simulate file number get close to 999997 +# by renaming relay logs and modifying index/info files +# +# Restart slave server +# +SET @save_slave_parallel_threads= @@GLOBAL.slave_parallel_threads; +SET @save_max_relay_log_size= @@GLOBAL.max_relay_log_size; +SET GLOBAL slave_parallel_threads=1; +SET GLOBAL max_relay_log_size=100 * 1024; +include/start_slave.inc +connection server_1; +create table t1 (i int, c varchar(1024)); +# +# Insert some data to generate enough amount of binary logs +# +connection server_2; +# +# Assert that 'slave-relay-bin.999999' is purged. +# +NOT FOUND /slave-relay-bin.999999/ in slave-relay-bin.index +include/stop_slave.inc +SET GLOBAL slave_parallel_threads= @save_slave_parallel_threads; +SET GLOBAL max_relay_log_size= @save_max_relay_log_size; +include/start_slave.inc +connection server_1; +DROP TABLE t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_relay_max_extension.test b/mysql-test/suite/rpl/t/rpl_relay_max_extension.test new file mode 100644 index 00000000000..e1e087f2e0e --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_relay_max_extension.test @@ -0,0 +1,109 @@ +# ==== Purpose ==== +# +# Test verifies that auto purging mechanism of relay logs works fine when the +# file extension grows beyond 999999. +# +# ==== Implementation ==== +# +# Steps: +# 0 - In master-slave setup clear all the relay logs on the slave server. +# 1 - Start the slave so that new relay logs starting from +# 'slave-relay-bin.000001' are created. +# 2 - Get the active relay-log file name by using SHOW SLAVE STATUS. +# Shutdown the slave server. +# 3 - Rename active relay log to '999997' in both 'relay-log.info' and +# 'slave-relay-bin.index' files. +# 4 - Restart the slave server by configuring 'slave_parallel_threads=1' +# and 'max_relay_log_size=100K'. +# 5 - Generate load on master such that few relay logs are generated on +# slave. The relay log sequence number will change to 7 digits. +# 6 - Sync slave with master to ensure that relay logs are applied on +# slave. They should have been automatically purged. +# 7 - Assert that there is no 'slave-relay-bin.999999' file in +# 'relay-log.info'. +# +# ==== References ==== +# +# MDEV-8134: The relay-log is not flushed after the slave-relay-log.999999 +# showed +# + +--source include/have_innodb.inc +--source include/have_binlog_format_row.inc +--let $rpl_topology=1->2 +--source include/rpl_init.inc + +--connection server_2 +--source include/stop_slave.inc +RESET SLAVE; +--source include/start_slave.inc +--source include/stop_slave.inc +--let $relay_log=query_get_value(SHOW SLAVE STATUS, Relay_Log_File, 1) + +--echo # +--echo # Stop slave server +--echo # + +--let $datadir = `select @@datadir` +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--shutdown_server 10 +--source include/wait_until_disconnected.inc + +--exec sed -i "s/$relay_log/slave-relay-bin.999997/g" $datadir/relay-log.info +--exec sed -i "s/$relay_log/slave-relay-bin.999997/g" $datadir/slave-relay-bin.index + +--echo # +--echo # Simulate file number get close to 999997 +--echo # by renaming relay logs and modifying index/info files + +--move_file $datadir/$relay_log $datadir/slave-relay-bin.999997 + +--echo # +--echo # Restart slave server +--echo # + +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect +--enable_reconnect +--source include/wait_until_connected_again.inc +SET @save_slave_parallel_threads= @@GLOBAL.slave_parallel_threads; +SET @save_max_relay_log_size= @@GLOBAL.max_relay_log_size; + +SET GLOBAL slave_parallel_threads=1; +SET GLOBAL max_relay_log_size=100 * 1024; +--source include/start_slave.inc + +--connection server_1 +create table t1 (i int, c varchar(1024)); +--echo # +--echo # Insert some data to generate enough amount of binary logs +--echo # +--let $count = 1000 +--disable_query_log +while ($count) +{ + eval insert into t1 values (1001 - $count, repeat('a',1000)); + dec $count; +} +--enable_query_log +--save_master_pos + +--connection server_2 +--sync_with_master + +--let $relay_log=query_get_value(SHOW SLAVE STATUS, Relay_Log_File, 1) + +--echo # +--echo # Assert that 'slave-relay-bin.999999' is purged. +--echo # +let SEARCH_FILE=$datadir/slave-relay-bin.index; +let SEARCH_PATTERN=slave-relay-bin.999999; +source include/search_pattern_in_file.inc; + +--source include/stop_slave.inc +SET GLOBAL slave_parallel_threads= @save_slave_parallel_threads; +SET GLOBAL max_relay_log_size= @save_max_relay_log_size; +--source include/start_slave.inc + +--connection server_1 +DROP TABLE t1; +--source include/rpl_end.inc diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 4cf87ba73b7..869640fd46f 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -4,6 +4,7 @@ #include "rpl_mi.h" #include "sql_parse.h" #include "debug_sync.h" +#include "sql_repl.h" /* Code for optional parallel execution of replicated events on the slave. @@ -82,7 +83,7 @@ handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev) return; mysql_mutex_lock(&rli->data_lock); - cmp= strcmp(rli->group_relay_log_name, qev->event_relay_log_name); + cmp= compare_log_name(rli->group_relay_log_name, qev->event_relay_log_name); if (cmp < 0) { rli->group_relay_log_pos= qev->future_event_relay_log_pos; @@ -91,7 +92,7 @@ handle_queued_pos_update(THD *thd, rpl_parallel_thread::queued_event *qev) rli->group_relay_log_pos < qev->future_event_relay_log_pos) rli->group_relay_log_pos= qev->future_event_relay_log_pos; - cmp= strcmp(rli->group_master_log_name, qev->future_event_master_log_name); + cmp= compare_log_name(rli->group_master_log_name, qev->future_event_master_log_name); if (cmp < 0) { strcpy(rli->group_master_log_name, qev->future_event_master_log_name); diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 40ab375571a..5273b33c728 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -989,7 +989,7 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos, if (rgi->is_parallel_exec) { /* In case of parallel replication, do not update the position backwards. */ - int cmp= strcmp(group_relay_log_name, rgi->event_relay_log_name); + int cmp= compare_log_name(group_relay_log_name, rgi->event_relay_log_name); if (cmp < 0) { group_relay_log_pos= rgi->future_event_relay_log_pos; @@ -1001,7 +1001,7 @@ void Relay_log_info::inc_group_relay_log_pos(ulonglong log_pos, In the parallel case we need to update the master_log_name here, rather than in Rotate_log_event::do_update_pos(). */ - cmp= strcmp(group_master_log_name, rgi->future_event_master_log_name); + cmp= compare_log_name(group_master_log_name, rgi->future_event_master_log_name); if (cmp <= 0) { if (cmp < 0) diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 4af8ebc2dd8..59a3f686e45 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -4541,5 +4541,22 @@ rpl_gtid_pos_update(THD *thd, char *str, size_t len) return false; } +int compare_log_name(const char *log_1, const char *log_2) { + int res= 1; + const char *ext1_str= strrchr(log_1, '.'); + const char *ext2_str= strrchr(log_2, '.'); + char file_name_1[255], file_name_2[255]; + strmake(file_name_1, log_1, (ext1_str - log_1)); + strmake(file_name_2, log_2, (ext2_str - log_2)); + char *endptr = NULL; + res= strcmp(file_name_1, file_name_2); + if (!res) + { + ulong ext1= strtoul(++ext1_str, &endptr, 10); + ulong ext2= strtoul(++ext2_str, &endptr, 10); + res= (ext1 > ext2 ? 1 : ((ext1 == ext2) ? 0 : -1)); + } + return res; +} #endif /* HAVE_REPLICATION */ diff --git a/sql/sql_repl.h b/sql/sql_repl.h index 8ddfa9239f6..9129aaeed5e 100644 --- a/sql/sql_repl.h +++ b/sql/sql_repl.h @@ -56,6 +56,7 @@ bool show_binlogs(THD* thd); extern int init_master_info(Master_info* mi); void kill_zombie_dump_threads(uint32 slave_server_id); int check_binlog_magic(IO_CACHE* log, const char** errmsg); +int compare_log_name(const char *log_1, const char *log_2); struct LOAD_FILE_IO_CACHE : public IO_CACHE {