Re: [Maria-developers] d25439f1cbb: MDEV-31140: FLUSH BINARY LOGS DELETE_DOMAIN_ID=(D) can errorneously delete active domains
Andrei Elkin <andrei.elkin@mariadb.com> writes:
Want to review a small patch for https://jira.mariadb.org/browse/MDEV-31140 ?
Thanks for a very good piece of analysis and the refinement! I agree with both.
Great, thanks for review! Do you have an opinion into which branch I should push it? The fix should be low-risk. I'm thinking maybe 11.0 or 10.11 is fine, since the bug does not affect normal operation. But I'm open to other suggestions, whatever is the current policy for this kind of bug?
The existed test was more than a bit unlucky, to confess, to let the problem stay unnoticed.
Yes, there is a good test coverage, so a bit unlucky, agree. - Kristian.
On the occasion of the International workers' day, let me wish you more contribution, and inspiration for how to make not just this server better alone :-)!!!
Thanks, and all the best to you too :-) - Kristian.
Kristian Nielsen <knielsen@knielsen-hq.org> writes:
revision-id: d25439f1cbb79e5467b4249792130bfe524e3f10 (mariadb-10.11.2-21-gd25439f1cbb) parent(s): 656c2e18b1e9ea5d0314745f3988d126eedbc22a author: Kristian Nielsen committer: Kristian Nielsen timestamp: 2023-04-27 12:16:07 +0200 message:
MDEV-31140: FLUSH BINARY LOGS DELETE_DOMAIN_ID=(D) can errorneously delete active domains
Fix the code in rpl_binlog_state::drop_domain(), so that _all_ entries for the domain in the binlog state must match an entry in the initial GTID_LIST, not just one entry match.
Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
--- .../r/binlog_flush_binlogs_delete_domain.result | 14 ++++++++++--- .../t/binlog_flush_binlogs_delete_domain.test | 13 +++++++++++- sql/rpl_gtid.cc | 24 ++++++++++++---------- 3 files changed, 36 insertions(+), 15 deletions(-)
diff --git a/mysql-test/suite/binlog/r/binlog_flush_binlogs_delete_domain.result b/mysql-test/suite/binlog/r/binlog_flush_binlogs_delete_domain.result index fdcfb4bfa01..1c11191802f 100644 --- a/mysql-test/suite/binlog/r/binlog_flush_binlogs_delete_domain.result +++ b/mysql-test/suite/binlog/r/binlog_flush_binlogs_delete_domain.result @@ -46,15 +46,23 @@ Warning 1076 The current gtid binlog state is incompatible with a former one mis Warning 1076 The gtid domain being deleted ('1') is not in the current binlog state FLUSH BINARY LOGS DELETE_DOMAIN_ID = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0); ERROR HY000: Could not delete gtid domain. Reason: binlog files may contain gtids from the domain ('1') being deleted. Make sure to first purge those files. +MDEV-31140: Missing error from DELETE_DOMAIN_ID when gtid_binlog_state partially matches GTID_LIST. FLUSH BINARY LOGS; PURGE BINARY LOGS TO 'master-bin.000005'; +SET @@SESSION.gtid_domain_id=8; +SET @@SESSION.server_id=10*8 + 1; +INSERT INTO t SELECT 1+MAX(a) FROM t; +FLUSH BINARY LOGS DELETE_DOMAIN_ID = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0); +ERROR HY000: Could not delete gtid domain. Reason: binlog files may contain gtids from the domain ('8') being deleted. Make sure to first purge those files. +FLUSH BINARY LOGS; +PURGE BINARY LOGS TO 'master-bin.000006'; FLUSH BINARY LOGS DELETE_DOMAIN_ID = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0); Warnings: Warning 1076 The gtid domain being deleted ('0') is not in the current binlog state Gtid_list of the current binlog does not contain 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0: -show binlog events in 'master-bin.000006' limit 1,1; +show binlog events in 'master-bin.000007' limit 1,1; Log_name Pos Event_type Server_id End_log_pos Info -master-bin.000006 # Gtid_list 1 # [] +master-bin.000007 # Gtid_list 1 # [] SET @@SESSION.gtid_domain_id=1;; SET @@SESSION.server_id=1; SET @@SESSION.gtid_seq_no=1; @@ -75,7 +83,7 @@ INSERT INTO t SET a=1; SELECT @gtid_binlog_state_saved "as original state", @@GLOBAL.gtid_binlog_state as "out of order for 11 domain state"; as original state out of order for 11 domain state 1-1-1,1-2-2,11-11-11 1-1-1,1-2-2,11-11-1 -PURGE BINARY LOGS TO 'master-bin.000007'; +PURGE BINARY LOGS TO 'master-bin.000008'; the following command succeeds with warnings FLUSH BINARY LOGS DELETE_DOMAIN_ID = (1); Warnings: diff --git a/mysql-test/suite/binlog/t/binlog_flush_binlogs_delete_domain.test b/mysql-test/suite/binlog/t/binlog_flush_binlogs_delete_domain.test index 8311f4bd800..1643ecff72d 100644 --- a/mysql-test/suite/binlog/t/binlog_flush_binlogs_delete_domain.test +++ b/mysql-test/suite/binlog/t/binlog_flush_binlogs_delete_domain.test @@ -21,7 +21,6 @@ FLUSH BINARY LOGS DELETE_DOMAIN_ID = (); --echo but with a warning --let $binlog_pre_flush=query_get_value(SHOW MASTER STATUS, Position, 1) FLUSH BINARY LOGS DELETE_DOMAIN_ID = (99); ---let $binlog_start=$binlog_pre_flush --source include/show_binary_logs.inc
# Log one event in a specified domain and try to delete the domain @@ -62,6 +61,8 @@ FLUSH BINARY LOGS DELETE_DOMAIN_ID = (1); # expected overrun of the static buffers of underlying dynamic arrays is doing. --let $domain_cnt=17 --let $server_in_domain_cnt=3 +--let $err_domain_id=`SELECT FLOOR($domain_cnt/2)` +--let $err_server_id=`SELECT FLOOR($server_in_domain_cnt/2)` --let $domain_list= --disable_query_log while ($domain_cnt) @@ -86,6 +87,16 @@ while ($domain_cnt) --error ER_BINLOG_CANT_DELETE_GTID_DOMAIN --eval FLUSH BINARY LOGS DELETE_DOMAIN_ID = ($domain_list)
+--echo MDEV-31140: Missing error from DELETE_DOMAIN_ID when gtid_binlog_state partially matches GTID_LIST. +FLUSH BINARY LOGS; +--let $purge_to_binlog= query_get_value(SHOW MASTER STATUS, File, 1) +--eval PURGE BINARY LOGS TO '$purge_to_binlog' +--eval SET @@SESSION.gtid_domain_id=$err_domain_id +--eval SET @@SESSION.server_id=10*$err_domain_id + $err_server_id +eval INSERT INTO t SELECT 1+MAX(a) FROM t; +--error ER_BINLOG_CANT_DELETE_GTID_DOMAIN +--eval FLUSH BINARY LOGS DELETE_DOMAIN_ID = ($domain_list) + # Now satisfy the safety condtion to purge log files containing $domain list FLUSH BINARY LOGS; --let $purge_to_binlog= query_get_value(SHOW MASTER STATUS, File, 1) diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index c4e5c75b10a..7b67a83b3dd 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -2209,18 +2209,16 @@ rpl_binlog_state::drop_domain(DYNAMIC_ARRAY *ids,
/* For each domain_id from ids - when no such domain in binlog state - warn && continue - For each domain.server's last gtid - when not locate the last gtid in glev.list - error out binlog state can't change - otherwise continue + If the domain is already absent from the binlog state + Warn && continue + If any GTID with that domain in binlog state is missing from glev.list + Error out binlog state can't change */ for (ulong i= 0; i < ids->elements; i++) { rpl_binlog_state::element *elem= NULL; uint32 *ptr_domain_id; - bool not_match; + bool all_found;
ptr_domain_id= (uint32*) dynamic_array_ptr(ids, i); elem= (rpl_binlog_state::element *) @@ -2235,14 +2233,18 @@ rpl_binlog_state::drop_domain(DYNAMIC_ARRAY *ids, continue; }
- for (not_match= true, k= 0; k < elem->hash.records; k++) + all_found= true; + for (k= 0; k < elem->hash.records && all_found; k++) { rpl_gtid *d_gtid= (rpl_gtid *)my_hash_element(&elem->hash, k); - for (ulong l= 0; l < glev->count && not_match; l++) - not_match= !(*d_gtid == glev->list[l]); + bool match_found= false; + for (ulong l= 0; l < glev->count && !match_found; l++) + match_found= match_found || (*d_gtid == glev->list[l]); + if (!match_found) + all_found= false; }
- if (not_match) + if (!all_found) { sprintf(errbuf, "binlog files may contain gtids from the domain ('%u') " "being deleted. Make sure to first purge those files",
modified sql/rpl_gtid.cc @@ -2220,7 +2220,7 @@ rpl_binlog_state::drop_domain(DYNAMIC_ARRAY *ids, { rpl_binlog_state::element *elem= NULL; uint32 *ptr_domain_id; - bool not_match; + ulong match; // count-down for matches found
ptr_domain_id= (uint32*) dynamic_array_ptr(ids, i); elem= (rpl_binlog_state::element *) @@ -2235,14 +2235,15 @@ rpl_binlog_state::drop_domain(DYNAMIC_ARRAY *ids, continue; }
- for (not_match= true, k= 0; k < elem->hash.records; k++) + for (match= elem->hash.records, k= 0; + k < elem->hash.records && match + k == elem->hash.records; k++) { rpl_gtid *d_gtid= (rpl_gtid *)my_hash_element(&elem->hash, k); - for (ulong l= 0; l < glev->count && not_match; l++) - not_match= !(*d_gtid == glev->list[l]); + for (ulong l= 0, same= 0; l < glev->count && !same; l++) + match-= (same= (*d_gtid == glev->list[l])); }
- if (not_match) + if (match) { sprintf(errbuf, "binlog files may contain gtids from the domain ('%u') " "being deleted. Make sure to first purge those files",
participants (1)
-
Kristian Nielsen