Hi, Andrei, I'm sorry, but I don't understand that fix at all. Why did you move do_accept_own_server_id into Master_info if you only use it in one static function queue_event() ? Why did you change it under some complex condition that you moved somewhere up in the queue_event() when you could've done simply && opt_gtid_strict_mode ? On Feb 17, Andrei wrote:
revision-id: 592f57e25a5 (mariadb-10.6.5-82-g592f57e25a5) parent(s): 8d742fe4acb author: Andrei committer: Andrei timestamp: 2022-02-14 16:31:25 +0200 message:
MDEV-27760 event may non stop replicate in circular semisync setup
diff --git a/sql/slave.cc b/sql/slave.cc index 5ff40d8fb25..d5275d17460 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -5037,6 +5037,7 @@ log space"); mi->abort_slave= 0; mi->slave_running= MYSQL_SLAVE_NOT_RUN; mi->io_thd= 0; + mi->do_accept_own_server_id= false; /* Note: the order of the two following calls (first broadcast, then unlock) is important. Otherwise a killer_thread can execute between the calls and @@ -6175,15 +6176,6 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) uchar new_buf_arr[4096]; bool is_malloc = false; bool is_rows_event= false; - /* - The flag has replicate_same_server_id semantics and is raised to accept - a same-server-id event on the semisync slave, for both the gtid and legacy - connection modes. - Such events can appear as result of this server recovery so the event - was created there and replicated elsewhere right before the crash. At recovery - it could be evicted from the server's binlog. - */ - bool do_accept_own_server_id= false; /* FD_q must have been prepared for the first R_a event inside get_master_version_and_clock() @@ -6272,6 +6264,8 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) dbug_rows_event_count = 0; };); #endif + s_id= uint4korr(buf + SERVER_ID_OFFSET); + mysql_mutex_lock(&mi->data_lock);
switch (buf[EVENT_TYPE_OFFSET]) { @@ -6713,6 +6707,20 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len)
++mi->events_queued_since_last_gtid; inc_pos= event_len; + + if (s_id == global_system_variables.server_id && + rpl_semi_sync_slave_enabled && + mi->using_gtid != Master_info::USE_GTID_NO && + opt_gtid_strict_mode) + { + rpl_gtid *state_gtid= mi->gtid_current_pos.find(event_gtid.domain_id); + mi->do_accept_own_server_id= + !state_gtid || state_gtid->seq_no < event_gtid.seq_no; + } + else + { + mi->do_accept_own_server_id= false; + } } break; /* @@ -6900,7 +6908,6 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) */
mysql_mutex_lock(log_lock); - s_id= uint4korr(buf + SERVER_ID_OFFSET); /* Write the event to the relay log, unless we reconnected in the middle of an event group and now need to skip the initial part of the group that @@ -6946,7 +6953,7 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) else if ((s_id == global_system_variables.server_id && !(mi->rli.replicate_same_server_id || - (do_accept_own_server_id= rpl_semi_sync_slave_enabled))) || + mi->do_accept_own_server_id)) || event_that_should_be_ignored(buf) || /* the following conjunction deals with IGNORE_SERVER_IDS, if set @@ -7006,7 +7013,7 @@ static int queue_event(Master_info* mi, const uchar *buf, ulong event_len) } else { - if (do_accept_own_server_id) + if (mi->do_accept_own_server_id) { int2store(const_cast<uchar*>(buf + FLAGS_OFFSET), uint2korr(buf + FLAGS_OFFSET) | LOG_EVENT_ACCEPT_OWN_F);
Regards, Sergei VP of MariaDB Server Engineering and security@mariadb.org