Re: [Maria-developers] d25439f1cbb: MDEV-31140: FLUSH BINARY LOGS DELETE_DOMAIN_ID=(D) can errorneously delete active domains
Hi Andrei, Want to review a small patch for https://jira.mariadb.org/browse/MDEV-31140 ? I found this problem while trying to find the root cause of https://jira.mariadb.org/browse/MDEV-30386 . However this bug seems to be unrelated to MDEV-30386. Basically the logic for comparing the gtid_binlog_state with the GTID_LIST event was flawed, making it possible to delete a domain_id from the binlog state when this should not be allowed. - Kristian. Kristian Nielsen <knielsen@knielsen-hq.org> writes:
revision-id: d25439f1cbb79e5467b4249792130bfe524e3f10 (mariadb-10.11.2-21-gd25439f1cbb) parent(s): 656c2e18b1e9ea5d0314745f3988d126eedbc22a author: Kristian Nielsen committer: Kristian Nielsen timestamp: 2023-04-27 12:16:07 +0200 message:
MDEV-31140: FLUSH BINARY LOGS DELETE_DOMAIN_ID=(D) can errorneously delete active domains
Fix the code in rpl_binlog_state::drop_domain(), so that _all_ entries for the domain in the binlog state must match an entry in the initial GTID_LIST, not just one entry match.
Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
--- .../r/binlog_flush_binlogs_delete_domain.result | 14 ++++++++++--- .../t/binlog_flush_binlogs_delete_domain.test | 13 +++++++++++- sql/rpl_gtid.cc | 24 ++++++++++++---------- 3 files changed, 36 insertions(+), 15 deletions(-)
diff --git a/mysql-test/suite/binlog/r/binlog_flush_binlogs_delete_domain.result b/mysql-test/suite/binlog/r/binlog_flush_binlogs_delete_domain.result index fdcfb4bfa01..1c11191802f 100644 --- a/mysql-test/suite/binlog/r/binlog_flush_binlogs_delete_domain.result +++ b/mysql-test/suite/binlog/r/binlog_flush_binlogs_delete_domain.result @@ -46,15 +46,23 @@ Warning 1076 The current gtid binlog state is incompatible with a former one mis Warning 1076 The gtid domain being deleted ('1') is not in the current binlog state FLUSH BINARY LOGS DELETE_DOMAIN_ID = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0); ERROR HY000: Could not delete gtid domain. Reason: binlog files may contain gtids from the domain ('1') being deleted. Make sure to first purge those files. +MDEV-31140: Missing error from DELETE_DOMAIN_ID when gtid_binlog_state partially matches GTID_LIST. FLUSH BINARY LOGS; PURGE BINARY LOGS TO 'master-bin.000005'; +SET @@SESSION.gtid_domain_id=8; +SET @@SESSION.server_id=10*8 + 1; +INSERT INTO t SELECT 1+MAX(a) FROM t; +FLUSH BINARY LOGS DELETE_DOMAIN_ID = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0); +ERROR HY000: Could not delete gtid domain. Reason: binlog files may contain gtids from the domain ('8') being deleted. Make sure to first purge those files. +FLUSH BINARY LOGS; +PURGE BINARY LOGS TO 'master-bin.000006'; FLUSH BINARY LOGS DELETE_DOMAIN_ID = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0); Warnings: Warning 1076 The gtid domain being deleted ('0') is not in the current binlog state Gtid_list of the current binlog does not contain 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0: -show binlog events in 'master-bin.000006' limit 1,1; +show binlog events in 'master-bin.000007' limit 1,1; Log_name Pos Event_type Server_id End_log_pos Info -master-bin.000006 # Gtid_list 1 # [] +master-bin.000007 # Gtid_list 1 # [] SET @@SESSION.gtid_domain_id=1;; SET @@SESSION.server_id=1; SET @@SESSION.gtid_seq_no=1; @@ -75,7 +83,7 @@ INSERT INTO t SET a=1; SELECT @gtid_binlog_state_saved "as original state", @@GLOBAL.gtid_binlog_state as "out of order for 11 domain state"; as original state out of order for 11 domain state 1-1-1,1-2-2,11-11-11 1-1-1,1-2-2,11-11-1 -PURGE BINARY LOGS TO 'master-bin.000007'; +PURGE BINARY LOGS TO 'master-bin.000008'; the following command succeeds with warnings FLUSH BINARY LOGS DELETE_DOMAIN_ID = (1); Warnings: diff --git a/mysql-test/suite/binlog/t/binlog_flush_binlogs_delete_domain.test b/mysql-test/suite/binlog/t/binlog_flush_binlogs_delete_domain.test index 8311f4bd800..1643ecff72d 100644 --- a/mysql-test/suite/binlog/t/binlog_flush_binlogs_delete_domain.test +++ b/mysql-test/suite/binlog/t/binlog_flush_binlogs_delete_domain.test @@ -21,7 +21,6 @@ FLUSH BINARY LOGS DELETE_DOMAIN_ID = (); --echo but with a warning --let $binlog_pre_flush=query_get_value(SHOW MASTER STATUS, Position, 1) FLUSH BINARY LOGS DELETE_DOMAIN_ID = (99); ---let $binlog_start=$binlog_pre_flush --source include/show_binary_logs.inc
# Log one event in a specified domain and try to delete the domain @@ -62,6 +61,8 @@ FLUSH BINARY LOGS DELETE_DOMAIN_ID = (1); # expected overrun of the static buffers of underlying dynamic arrays is doing. --let $domain_cnt=17 --let $server_in_domain_cnt=3 +--let $err_domain_id=`SELECT FLOOR($domain_cnt/2)` +--let $err_server_id=`SELECT FLOOR($server_in_domain_cnt/2)` --let $domain_list= --disable_query_log while ($domain_cnt) @@ -86,6 +87,16 @@ while ($domain_cnt) --error ER_BINLOG_CANT_DELETE_GTID_DOMAIN --eval FLUSH BINARY LOGS DELETE_DOMAIN_ID = ($domain_list)
+--echo MDEV-31140: Missing error from DELETE_DOMAIN_ID when gtid_binlog_state partially matches GTID_LIST. +FLUSH BINARY LOGS; +--let $purge_to_binlog= query_get_value(SHOW MASTER STATUS, File, 1) +--eval PURGE BINARY LOGS TO '$purge_to_binlog' +--eval SET @@SESSION.gtid_domain_id=$err_domain_id +--eval SET @@SESSION.server_id=10*$err_domain_id + $err_server_id +eval INSERT INTO t SELECT 1+MAX(a) FROM t; +--error ER_BINLOG_CANT_DELETE_GTID_DOMAIN +--eval FLUSH BINARY LOGS DELETE_DOMAIN_ID = ($domain_list) + # Now satisfy the safety condtion to purge log files containing $domain list FLUSH BINARY LOGS; --let $purge_to_binlog= query_get_value(SHOW MASTER STATUS, File, 1) diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index c4e5c75b10a..7b67a83b3dd 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -2209,18 +2209,16 @@ rpl_binlog_state::drop_domain(DYNAMIC_ARRAY *ids,
/* For each domain_id from ids - when no such domain in binlog state - warn && continue - For each domain.server's last gtid - when not locate the last gtid in glev.list - error out binlog state can't change - otherwise continue + If the domain is already absent from the binlog state + Warn && continue + If any GTID with that domain in binlog state is missing from glev.list + Error out binlog state can't change */ for (ulong i= 0; i < ids->elements; i++) { rpl_binlog_state::element *elem= NULL; uint32 *ptr_domain_id; - bool not_match; + bool all_found;
ptr_domain_id= (uint32*) dynamic_array_ptr(ids, i); elem= (rpl_binlog_state::element *) @@ -2235,14 +2233,18 @@ rpl_binlog_state::drop_domain(DYNAMIC_ARRAY *ids, continue; }
- for (not_match= true, k= 0; k < elem->hash.records; k++) + all_found= true; + for (k= 0; k < elem->hash.records && all_found; k++) { rpl_gtid *d_gtid= (rpl_gtid *)my_hash_element(&elem->hash, k); - for (ulong l= 0; l < glev->count && not_match; l++) - not_match= !(*d_gtid == glev->list[l]); + bool match_found= false; + for (ulong l= 0; l < glev->count && !match_found; l++) + match_found= match_found || (*d_gtid == glev->list[l]); + if (!match_found) + all_found= false; }
- if (not_match) + if (!all_found) { sprintf(errbuf, "binlog files may contain gtids from the domain ('%u') " "being deleted. Make sure to first purge those files",
participants (1)
-
Kristian Nielsen