Hi, Andrei!
Again, this is a review of the git diff f09d33f521b 557ab2992dd,
not only 557ab2992dd.
On Jun 02, Andrei Elkin wrote:
> revision-id: 557ab2992dd (mariadb-10.6.0-67-g557ab2992dd)
> parent(s): 4cc5a3074da
> author: Andrei Elkin <andrei.elkin(a)mariadb.com>
> committer: Andrei Elkin <andrei.elkin(a)mariadb.com>
> timestamp: 2021-05-27 17:37:22 +0300
> message:
>
> MDEV-21117 Incremental patch for a few review notes addressed.
> diff --git a/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc b/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc
> new file mode 100644
> --- /dev/null
> +++ b/mysql-test/suite/binlog/t/binlog_truncate_active_log.inc
> @@ -0,0 +1,65 @@
> +connect(master1,localhost,root,,);
> +connect(master2,localhost,root,,);
> +connect(master3,localhost,root,,);
> +
> +--connection default
> +
> +# First to commit few transactions
> +INSERT INTO t VALUES (10);
> +INSERT INTO tm VALUES (10);
> +
> +--connection master1
> +# Hold insert after write to binlog and before "run_commit_ordered" in engine
> +SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master1_ready WAIT_FOR signal_never_arrives";
> +--send_eval $query1
> +
> +--connection master2
> +SET DEBUG_SYNC= "now WAIT_FOR master1_ready";
> +SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready";
> +if ($pre_q2)
> +{
> + --send_eval $pre_q2;
> +}
> +# To binlog non-xid transactional group which will be truncated all right
> +if (!$pre_q2)
> +{
> + --send_eval $query2
> +}
eh, this makes no sense. In the previous version of the patch
you've run pre_q2 before sending query2.
now you send either pre_q2 or query2.
so you don't need pre_q2 anymore now, just set query2 to the
what used to be pre_q2.
> +
> +--connection master3
> +SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
> +SELECT @@global.gtid_binlog_pos as 'Before the crash';
> +
> +--connection default
> +--source include/kill_mysqld.inc
> +--disconnect master1
> +--disconnect master2
> +--disconnect master3
> +
> +#
> +# Server restart
> +#
> +--let $restart_parameters= --rpl-semi-sync-slave-enabled=1
> +--source include/start_mysqld.inc
> +
> +# Check error log for a successful truncate message.
> +--let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err
> +
> +--let SEARCH_FILE=$log_error_
> +--let SEARCH_PATTERN=Successfully truncated.*to remove transactions starting from GTID $truncate_gtid_pos
> +--replace_regex /FOUND [0-9]+/FOUND #/
can it be found an unpredictable number of times?
Why would binlog be truncated more than once?
> +--source include/search_pattern_in_file.inc
> +
> +--echo Pre-crash binlog file content:
> +--let $binlog_file= query_get_value(show binary logs, Log_name, $binlog_file_index)
> +--source include/show_binlog_events.inc
> +
> +SELECT @@global.gtid_binlog_pos as 'After the crash';
> +--echo "One row should be present in table 't'"
> +SELECT * FROM t;
> +
> +# prepare binlog file index for the next test
> +--inc $binlog_file_index
> +
> +# Local cleanup
> +DELETE FROM t;
> diff --git a/mysql-test/suite/binlog/t/binlog_truncate_active_log.test b/mysql-test/suite/binlog/t/binlog_truncate_active_log.test
> new file mode 100644
> --- /dev/null
> +++ b/mysql-test/suite/binlog/t/binlog_truncate_active_log.test
> @@ -0,0 +1,104 @@
> +# ==== Purpose ====
> +#
> +# Test verifies the truncation of single binary log file.
> +#
> +# ==== References ====
> +#
> +# MDEV-21117: recovery for --rpl-semi-sync-slave-enabled server
> +
> +--source include/have_innodb.inc
> +--source include/have_aria.inc
> +# File: binlog_truncate_active_log.inc included in test makes use of
> +# 'debug_sync' facility.
> +--source include/have_debug_sync.inc
> +--source include/have_binlog_format_statement.inc
> +
> +call mtr.add_suppression("Can.t init tc log");
> +call mtr.add_suppression("Aborting");
> +
> +# The following cases are tested:
> +# A. 2pc transaction is followed by a blank "zero-engines" one
> +# B. 2pc transaction follows the blank one
> +# C. Similarly to A, with the XA blank transaction
> +
> +RESET MASTER;
> +CREATE TABLE t (f INT) ENGINE=INNODB;
> +CREATE TABLE t2 (f INT) ENGINE=INNODB;
> +CREATE TABLE tm (f INT) ENGINE=Aria;
> +
> +# Old (pre-crash) binlog file index initial value.
> +# It keeps incremented at the end of each case.
> +--let $binlog_file_index=1
> +
> +--echo # Case A.
> +# Using 'debug_sync' hold 'query1' execution after 'query1' is flushed and
> +# synced to binary log but not yet committed. In an another connection hold
> +# 'query2' execution after 'query2' is flushed and synced to binlog.
> +# Crash and restart server with --rpl-semi-sync-slave-enabled=1
> +#
> +# During recovery of binary log 'query1' status is checked with InnoDB engine,
> +# it will be in prepared but not yet commited. All transactions starting from
> +# 'query1' onwards will be removed from the binary log.
> +# Show-binlog-events is to prove that.
> +
> +--let $truncate_gtid_pos = 0-1-6
> +--let $query1 = INSERT INTO t VALUES (20)
> +--let $query2 = DELETE FROM t2 WHERE f = 0 /* no such record */
> +--source binlog_truncate_active_log.inc
> +
> +--echo # Case B.
> +# The inverted sequence ends up to truncate starting from $query2
> +--let $truncate_gtid_pos = 0-1-10
> +--let $query1 = DELETE FROM t2 WHERE f = 0
> +--let $query2 = INSERT INTO t VALUES (20)
> +--source binlog_truncate_active_log.inc
> +
> +
> +--echo # Case C.
> +delimiter |;
> +CREATE PROCEDURE sp_blank_xa()
> +BEGIN
> + XA START 'blank';
> + DELETE FROM t2 WHERE f = 0 /* no such record */;
> + XA END 'blank';
> + XA PREPARE 'blank';
> +END|
> +delimiter ;|
> +
> +# The same as in A with $query2 being the zero-engine XA transaction.
> +# Both $query1 and $query2 are going to be truncated.
> +--let $truncate_gtid_pos = 0-1-14
> +--let $query1 = INSERT INTO t VALUES (20)
> +--let $pre_q2 = CALL sp_blank_xa
> +--let $query2 = XA COMMIT 'blank'
hm.. now your binlog_truncate_active_log.inc
only runs query2 is pre_q2 is empty. So this XA COMMIT isn't done at all.
Very confusing.
> +--source binlog_truncate_active_log.inc
> +
> +DROP PROCEDURE sp_blank_xa;
> +
> +
> +--echo # Case D.
> +delimiter |;
> +CREATE PROCEDURE sp_xa()
> +BEGIN
> + XA START 'xid';
> + DELETE FROM t WHERE f = 10;
> + XA END 'xid';
> + XA PREPARE 'xid';
> +END|
> +delimiter ;|
> +
> +# The same as in B with $query1 being the prepared XA transaction.
> +# Truncation must occurs at $query2.
> +--let $truncate_gtid_pos = 0-1-20
> +--let $query1 = CALL sp_xa
> +--let $pre_q2 =
> +--let $query2 = INSERT INTO t2 VALUES (20)
> +--source binlog_truncate_active_log.inc
> +
> +DROP PROCEDURE sp_xa;
> +
> +
> +--echo # Cleanup
> +DROP TABLE t,t2,tm;
> +
> +--echo # End of the tests
> diff --git a/sql/handler.h b/sql/handler.h
> --- a/sql/handler.h
> +++ b/sql/handler.h
> @@ -931,6 +932,32 @@ struct xid_t {
> };
> typedef struct xid_t XID;
>
> +/*
> + Enumerates a sequence in the order of
> + their creation that is in the top-down order of the index file.
> + Ranges from zero through MAX_binlog_id.
> + Not confuse the value with the binlog file numerical suffix,
> + neither with the binlog file line in the binlog index file.
Ah! Good, thanks. I thought it was a binlog file numerical suffix, indeed.
> +*/
> +typedef uint Binlog_file_id;
> +const Binlog_file_id MAX_binlog_id= UINT_MAX;
> +/*
> + Compound binlog-id and byte offset of transaction's first event
> + in a sequence (e.g the recovery sequence) of binlog files.
> + Binlog_offset(0,0) is the minimum value to mean
> + the first byte of the first binlog file.
> +*/
> +typedef std::pair<Binlog_file_id, my_off_t> Binlog_offset;
> +
> +/* binlog-based recovery transaction descriptor */
> +struct xid_recovery_member
> +{
> + my_xid xid;
> + uint in_engine_prepare; // number of engines that have xid prepared
> + bool decided_to_commit;
> + Binlog_offset binlog_coord; // semisync recovery binlog offset
> +};
> +
> /* for recover() handlerton call */
> #define MIN_XID_LIST_SIZE 128
> #define MAX_XID_LIST_SIZE (1024*128)
> diff --git a/sql/log.cc b/sql/log.cc
> --- a/sql/log.cc
> +++ b/sql/log.cc
> @@ -1692,17 +1698,13 @@ int binlog_init(void *p)
> binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
> binlog_hton->savepoint_rollback_can_release_mdl=
> binlog_savepoint_rollback_can_release_mdl;
> - binlog_hton->commit= binlog_commit;
> + binlog_hton->commit= binlog_commit_dummy;
you could use a lambda for that too, [](handlerton*,THD*,bool) { return 0; }
> binlog_hton->rollback= binlog_rollback;
> binlog_hton->drop_table= [](handlerton *, const char*) { return -1; };
Oh did you actually need it? Or it's for completeness only?
I mean, was there some code that tried to drop a "binlog" table?
> if (WSREP_ON || opt_bin_log)
> {
> binlog_hton->prepare= binlog_prepare;
> binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
> - binlog_hton->commit_by_xid= binlog_commit_by_xid;
> - binlog_hton->rollback_by_xid= binlog_rollback_by_xid;
> - // recover needs to be set to make xa{commit,rollback}_handlerton effective
> - binlog_hton->recover= binlog_xa_recover_dummy;
> }
> binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN | HTON_NO_ROLLBACK;
> return 0;
> @@ -9975,6 +9977,154 @@ int TC_LOG::using_heuristic_recover()
> /****** transaction coordinator log for 2pc - binlog() based solution ******/
> #define TC_LOG_BINLOG MYSQL_BIN_LOG
>
> +/**
> + Truncates the current binlog to specified position. Removes the rest of binlogs
> + which are present after this binlog file.
> +
> + @param truncate_file Holds the binlog name to be truncated
> + @param truncate_pos Position within binlog from where it needs to
> + truncated.
> +
> + @retval true ok
> + @retval false error
> +
> +*/
> +bool MYSQL_BIN_LOG::truncate_and_remove_binlogs(const char *file_name,
> + my_off_t pos,
> + rpl_gtid *ptr_gtid)
> +{
> + int error= 0;
> +#ifdef HAVE_REPLICATION
> + LOG_INFO log_info;
> + THD *thd= current_thd;
> + my_off_t index_file_offset= 0;
> + File file= -1;
> + MY_STAT s;
> + my_off_t old_size;
> +
> + if ((error= find_log_pos(&log_info, file_name, 1)))
> + {
> + sql_print_error("Failed to locate binary log file:%s."
> + "Error:%d", file_name, error);
> + goto end;
> + }
> +
> + while (!(error= find_next_log(&log_info, 1)))
> + {
> + if (!index_file_offset)
> + {
> + index_file_offset= log_info.index_file_start_offset;
> + if ((error= open_purge_index_file(TRUE)))
> + {
> + sql_print_error("Failed to open purge index "
> + "file:%s. Error:%d", purge_index_file_name, error);
> + goto end;
> + }
> + }
> + if ((error= register_purge_index_entry(log_info.log_file_name)))
> + {
> + sql_print_error("Failed to copy %s to purge index"
> + " file. Error:%d", log_info.log_file_name, error);
> + goto end;
> + }
> + }
> +
> + if (error != LOG_INFO_EOF)
> + {
> + sql_print_error("Failed to find the next binlog to "
> + "add to purge index register. Error:%d", error);
> + goto end;
> + }
> +
> + if (is_inited_purge_index_file())
> + {
> + if (!index_file_offset)
> + index_file_offset= log_info.index_file_start_offset;
> +
> + if ((error= sync_purge_index_file()))
> + {
> + sql_print_error("Failed to flush purge index "
> + "file. Error:%d", error);
> + goto end;
> + }
> +
> + // Trim index file
> + if ((error=
> + mysql_file_chsize(index_file.file, index_file_offset, '\n',
> + MYF(MY_WME))) ||
> + (error=
> + mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE))))
> + {
> + sql_print_error("Failed to trim binlog index "
> + "file:%s to offset:%llu. Error:%d", index_file_name,
> + index_file_offset, error);
> + goto end;
> + }
> +
> + /* Reset data in old index cache */
> + if ((error= reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 1)))
> + {
> + sql_print_error("Failed to reinit binlog index "
> + "file. Error:%d", error);
> + goto end;
> + }
> +
> + /* Read each entry from purge_index_file and delete the file. */
> + if ((error= purge_index_entry(thd, NULL, TRUE)))
> + {
> + sql_print_error("Failed to process registered "
> + "files that would be purged.");
> + goto end;
> + }
> + }
> +
> + DBUG_ASSERT(pos);
> +
> + if ((file= mysql_file_open(key_file_binlog, file_name,
> + O_RDWR | O_BINARY, MYF(MY_WME))) < 0)
> + {
> + error= 1;
> + sql_print_error("Failed to open binlog file:%s for "
> + "truncation.", file_name);
> + goto end;
> + }
> + my_stat(file_name, &s, MYF(0));
> + old_size= s.st_size;
> + clear_inuse_flag_when_closing(file);
> + /* Change binlog file size to truncate_pos */
> + if ((error=
> + mysql_file_chsize(file, pos, 0, MYF(MY_WME))) ||
> + (error= mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE))))
I'd rather not to overload if() conditions with function calls
like this, they're rather annoying to single-step through
and impossible to put a breakpoint on a specific function call.
> + {
> + sql_print_error("Failed to trim the "
> + "binlog file:%s to size:%llu. Error:%d",
> + file_name, pos, error);
> + goto end;
> + }
> + else
> + {
> + char buf[21];
> + my_stat(file_name, &s, MYF(0));
> + my_off_t new_size= s.st_size;
do you need it? new_size == pos, isn't it? You yourself use the word
"size" to describe pos in sql_print_error just few lines above :)
> +
> + longlong10_to_str(ptr_gtid->seq_no, buf, 10);
> + sql_print_information("Successfully truncated binlog file:%s "
> + "to pos:%llu to remove transactions starting from "
> + "GTID %u-%u-%s; "
> + "former file size shrunk from %llu to %llu.",
This isn't quite English. Better just "previous (or old) file
size: %llu", that is not "former" and no need to specify the new size twice.
Also, you say that you "truncated binlog file" here, but on failure
you fail to "trim the binlog file". I suggest to use consistent
wording, either always "trim" or always "truncate" (and always with
or without "the") - both on success and on failure. Personally, I favor
"truncate the binlog file"
> + file_name, pos,
> + ptr_gtid->domain_id, ptr_gtid->server_id, buf,
> + old_size, new_size);
> + }
> +
> +end:
> + if (file >= 0)
> + mysql_file_close(file, MYF(MY_WME));
> +
> + error= error || close_purge_index_file();
> +#endif
> + return error > 0;
> +}
> int TC_LOG_BINLOG::open(const char *opt_name)
> {
> int error= 1;
> @@ -10449,7 +10599,528 @@ start_binlog_background_thread()
...
> +
> +Recovery_context::Recovery_context() :
> + prev_event_pos(0),
> + last_gtid_standalone(false), last_gtid_valid(false), last_gtid_no2pc(false),
> + last_gtid_engines(0),
> + do_truncate(rpl_semi_sync_slave_enabled),
> + truncate_validated(false), truncate_reset_done(false),
> + truncate_set_in_1st(false), id_binlog(MAX_binlog_id),
> + checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF), gtid_maybe_to_truncate(NULL)
> +{
> + last_gtid_coord= Binlog_offset(0,0);
> + binlog_truncate_coord= binlog_truncate_coord_1st_round= Binlog_offset(0,0);
> + binlog_unsafe_coord= Binlog_offset(0,0);
Okay, I admit, just typedef'ing std::pair is a huge improvement already
> + binlog_truncate_file_name[0]= 0;
> + binlog_unsafe_file_name [0]= 0;
> + binlog_unsafe_gtid= truncate_gtid= truncate_gtid_1st_round= rpl_gtid();
> + if (do_truncate)
> + gtid_maybe_to_truncate= new Dynamic_array<rpl_gtid>(16, 16);
> +}
> +
Regards,
Sergei
VP of MariaDB Server Engineering
and security(a)mariadb.org