revision-id: b62432fc2459a312aaa0e27c0950de3420a34d53 (mariadb-10.1.43-96-gb62432fc245) parent(s): f9639c2d1a5e24f1a1533b6277fe7eca3aa3c3c0 author: Sujatha committer: Sujatha timestamp: 2020-04-01 23:17:10 +0530 message: MDEV-21117: --tc-heuristic-recover=rollback is not replication safe Problem: ======= When run after master server crash --tc-heuristic-recover=rollback produces inconsistent server state with binlog still containing transactions that were rolled back by the option. Such way recovered server may not be used for replication. Fix: === During --tc-heuristic-recover query the storage engine to get binlog file name and position corrensponding to the last committed transaction. If last_commit_pos_file < checkpoint_file then we need to start looking from checkpoint file. Look for first transactional event as this is the candidate for heuristic rollback. If a transactional event is found then this is used as reference for binlog truncation. Otherwise no truncation is needed. If last_commit_pos_file is equal to checkpoint_file then truncate binlog as per the last_commit_pos. If last_commit_file is not set then checkpoint_file is what we have. Look for first transactional event and consider this as truncate position. Once the binlog file name and position for truncation are identified start reading all the events beyond this position. Check if they are all transactional. If all are transactional truncate the binary log. If any non transactional events are found then truncation will fail. Adjust the global gtid state as per the truncated binlog. --- .../r/binlog_heuristic_rollback_active_log.result | 22 + .../binlog/r/binlog_truncate_multi_log.result | 26 + .../suite/binlog/r/binlog_truncate_none.result | 24 + .../t/binlog_heuristic_rollback_active_log.test | 76 +++ .../suite/binlog/t/binlog_truncate_multi_log.test | 88 ++++ .../suite/binlog/t/binlog_truncate_none.test | 65 +++ sql/log.cc | 565 ++++++++++++++++++++- sql/log.h | 7 +- storage/innobase/handler/ha_innodb.cc | 3 + storage/xtradb/handler/ha_innodb.cc | 3 + 10 files changed, 875 insertions(+), 4 deletions(-) diff --git a/mysql-test/suite/binlog/r/binlog_heuristic_rollback_active_log.result b/mysql-test/suite/binlog/r/binlog_heuristic_rollback_active_log.result new file mode 100644 index 00000000000..512644459f0 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_heuristic_rollback_active_log.result @@ -0,0 +1,22 @@ +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); +RESET MASTER; +CREATE TABLE t ( f INT ) ENGINE=INNODB; +INSERT INTO t VALUES (10); +SET GLOBAL DEBUG_DBUG="d,simulate_crash_after_binlog_write"; +INSERT INTO t VALUES (20); +ERROR HY000: Lost connection to MySQL server during query +SELECT * FROM t; +f +10 +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # GTID #-#-# +master-bin.000001 # Query # # use `test`; CREATE TABLE t ( f INT ) ENGINE=INNODB +master-bin.000001 # Gtid # # BEGIN GTID #-#-# +master-bin.000001 # Query # # use `test`; INSERT INTO t VALUES (10) +master-bin.000001 # Xid # # COMMIT /* XID */ +DROP TABLE t; +show binlog events in 'master-bin.000003' limit 1,1; +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000003 # Gtid_list 1 # [0-1-2] diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result new file mode 100644 index 00000000000..b6a3580f916 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result @@ -0,0 +1,26 @@ +SET @old_max_binlog_size= @@global.max_binlog_size; +SET GLOBAL max_binlog_size= 4096; +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); +RESET MASTER; +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con1_go"; +INSERT INTO t1 VALUES (1, REPEAT("x", 4100)); +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +SET GLOBAL DEBUG_DBUG="d,simulate_crash_after_binlog_write"; +SET DEBUG_SYNC= "now SIGNAL con1_go"; +ERROR HY000: Lost connection to MySQL server during query +"Only CREATE TABLE t1 should be present in the binary log" +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000001 # Gtid # # GTID #-#-# +master-bin.000001 # Query # # use `test`; CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb +"Zero records should be there." +SELECT COUNT(*) FROM t1; +COUNT(*) +0 +DROP TABLE t1; diff --git a/mysql-test/suite/binlog/r/binlog_truncate_none.result b/mysql-test/suite/binlog/r/binlog_truncate_none.result new file mode 100644 index 00000000000..1eee3fb1cd1 --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_truncate_none.result @@ -0,0 +1,24 @@ +SET @old_max_binlog_size= @@global.max_binlog_size; +SET GLOBAL max_binlog_size= 4096; +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); +RESET MASTER; +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +INSERT INTO t1 VALUES (1, REPEAT("x", 4100)); +CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +# Kill the server +"Zero records should be there." +SELECT COUNT(*) FROM t1; +COUNT(*) +1 +show binary logs; +Log_name File_size +master-bin.000001 # +master-bin.000002 # +master-bin.000003 # +master-bin.000004 # +DROP TABLE t1,t2; diff --git a/mysql-test/suite/binlog/t/binlog_heuristic_rollback_active_log.test b/mysql-test/suite/binlog/t/binlog_heuristic_rollback_active_log.test new file mode 100644 index 00000000000..1abb3b6495a --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_heuristic_rollback_active_log.test @@ -0,0 +1,76 @@ +# ==== Purpose ==== +# +# Test verifies the truncation of single binary log file. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Create table t1 and insert a row. +# 1 - Insert an another row such that it gets written to binlog but commit +# in engine fails as server crashed at this point. +# 2 - Restart server with --tc-heuristic-recover=ROLLBACK +# 3 - Upon server start 'master-bin.000001' will be truncated to contain +# only the first insert +# +# ==== References ==== +# +# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe + + +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_binlog_format_statement.inc + +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); + +connect(master,localhost,root,,); +connect(master1,localhost,root,,); + +--connection master +RESET MASTER; +CREATE TABLE t ( f INT ) ENGINE=INNODB; +INSERT INTO t VALUES (10); + +--connection master1 +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +SET GLOBAL DEBUG_DBUG="d,simulate_crash_after_binlog_write"; +--error 2013 # CR_SERVER_LOST +INSERT INTO t VALUES (20); +--source include/wait_until_disconnected.inc + +--connection master +--source include/wait_until_disconnected.inc + +# +# Server restart +# +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: --tc-heuristic-recover=ROLLBACK +EOF +--source include/wait_until_disconnected.inc + +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: +EOF + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +--connection master +--enable_reconnect +--source include/wait_until_connected_again.inc + +--connection master +SELECT * FROM t; +--source include/show_binlog_events.inc + +--connection master +DROP TABLE t; +let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1); +--source include/show_gtid_list.inc diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test new file mode 100644 index 00000000000..5afa3b782cc --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test @@ -0,0 +1,88 @@ +# ==== Purpose ==== +# +# Test verifies truncation of multiple binary logs. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Set max_binlog_size= 4096. Create a table and do an insert such that +# the max_binlog_size is reached and binary log gets rotated. +# 1 - Using debug simulation make the server crash at a point where the DML +# transaction is written to binary log but not committed in engine. +# 2 - At the time of crash two binary logs will be there master-bin.0000001 +# and master-bin.000002. +# 3 - Restart server with --tc-heuristic-recover=ROLLBACK +# 4 - Since the prepared DML in master-bin.000001 is rolled back the first +# binlog will be truncated prior to the DML and master-bin.000002 will be +# removed. +# +# ==== References ==== +# +# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe + + +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_binlog_format_row.inc + +SET @old_max_binlog_size= @@global.max_binlog_size; +SET GLOBAL max_binlog_size= 4096; + +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); + +RESET MASTER; + +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; + +# Hold insert after write to binlog and before "run_commit_ordered" in engine +connect(master1,localhost,root,,); +connect(master2,localhost,root,,); + +--connection master1 +SET DEBUG_SYNC= "commit_after_release_LOCK_log SIGNAL con1_ready WAIT_FOR con1_go"; +send INSERT INTO t1 VALUES (1, REPEAT("x", 4100)); + +--connection default +SET DEBUG_SYNC= "now WAIT_FOR con1_ready"; +--source include/show_binary_logs.inc +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +--connection master2 +SET GLOBAL DEBUG_DBUG="d,simulate_crash_after_binlog_write"; +SET DEBUG_SYNC= "now SIGNAL con1_go"; + +--connection master1 +--error 2013 # CR_SERVER_LOST +--reap +--source include/wait_until_disconnected.inc + +--connection master2 +--source include/wait_until_disconnected.inc + +# +# Server restart +# +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: --tc-heuristic-recover=ROLLBACK +EOF +--source include/wait_until_disconnected.inc + +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: +EOF + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc +--echo "Only CREATE TABLE t1 should be present in the binary log" +--source include/show_binlog_events.inc + +--echo "Zero records should be there." +SELECT COUNT(*) FROM t1; + +DROP TABLE t1; + diff --git a/mysql-test/suite/binlog/t/binlog_truncate_none.test b/mysql-test/suite/binlog/t/binlog_truncate_none.test new file mode 100644 index 00000000000..9254cfd4d72 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_truncate_none.test @@ -0,0 +1,65 @@ +# ==== Purpose ==== +# +# Test case verifies no binlog truncation happens when non transactional +# events are found in binlog after the last committed transaction. +# +# ==== Implementation ==== +# +# Steps: +# 0 - Set max_binlog_size= 4096. Create a table and do an insert such that +# the max_binlog_size is reached and binary log gets rotated. +# 1 - Create a table in newly created binary log and crash the server +# 2 - Restart server with --tc-heuristic-recover=ROLLBACK +# 3 - Recovery code will get the last committed DML specific postion and +# will try to check if binlog can be truncated upto this position. +# Since a DDL is present beyond this no truncation will happen. +# ==== References ==== +# +# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe + + +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_debug.inc +--source include/have_binlog_format_row.inc + +SET @old_max_binlog_size= @@global.max_binlog_size; +SET GLOBAL max_binlog_size= 4096; + +call mtr.add_suppression("Can't init tc log"); +call mtr.add_suppression("Aborting"); + +RESET MASTER; + +CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +INSERT INTO t1 VALUES (1, REPEAT("x", 4100)); +CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb; +--source include/show_binary_logs.inc + +--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +wait +EOF + +--source include/kill_mysqld.inc +--source include/wait_until_disconnected.inc + +# +# Server restart +# +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: --tc-heuristic-recover=ROLLBACK +EOF +--source include/wait_until_disconnected.inc + +--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +restart: +EOF + +connection default; +--enable_reconnect +--source include/wait_until_connected_again.inc + +--echo "Zero records should be there." +SELECT COUNT(*) FROM t1; +--source include/show_binary_logs.inc +DROP TABLE t1,t2; diff --git a/sql/log.cc b/sql/log.cc index 0efef6d1e29..8c997d547da 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -3164,6 +3164,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period) checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF), relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF), description_event_for_exec(0), description_event_for_queue(0), + last_commit_pos_offset(0), current_binlog_id(0) { /* @@ -3173,6 +3174,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period) before main(). */ index_file_name[0] = 0; + last_commit_pos_file[0]= 0; bzero((char*) &index_file, sizeof(index_file)); bzero((char*) &purge_index_file, sizeof(purge_index_file)); } @@ -7936,6 +7938,8 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) mysql_mutex_unlock(&LOCK_log); DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log"); + DBUG_EXECUTE_IF("simulate_crash_after_binlog_write", + DBUG_SUICIDE();); /* Loop through threads and run the binlog_sync hook @@ -8964,7 +8968,7 @@ int TC_LOG_MMAP::open(const char *opt_name) { if (my_errno != ENOENT) goto err; - if (using_heuristic_recover()) + if (using_heuristic_recover(opt_name)) return 1; if ((fd= mysql_file_create(key_file_tclog, logname, CREATE_MODE, O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0) @@ -9497,14 +9501,40 @@ TC_LOG_MMAP tc_log_mmap; 1 heuristic recovery was performed */ -int TC_LOG::using_heuristic_recover() +int TC_LOG::using_heuristic_recover(const char* opt_name) { + LOG_INFO log_info; + int error; + if (!tc_heuristic_recover) return 0; sql_print_information("Heuristic crash recovery mode"); + if (ha_recover(0)) + { sql_print_error("Heuristic crash recovery failed"); + } + + /* + Check if TC log is referring to binary log not memory map. Ensure that + tc_heuristic_recover being ROLLBACK". If both match initiate binlog + truncation mechanism. + */ + if (!strcmp(opt_name,opt_bin_logname) && + tc_heuristic_recover == TC_HEURISTIC_RECOVER_ROLLBACK) + { + if ((error= mysql_bin_log.find_log_pos(&log_info, NullS, 1))) + { + if (error != LOG_INFO_EOF) + sql_print_error("find_log_pos() failed (error: %d)", error); + else + return 0; + } + if ((error= mysql_bin_log.heuristic_binlog_rollback())) + return error; + } + sql_print_information("Please restart mysqld without --tc-heuristic-recover"); return 1; } @@ -9512,6 +9542,535 @@ int TC_LOG::using_heuristic_recover() /****** transaction coordinator log for 2pc - binlog() based solution ******/ #define TC_LOG_BINLOG MYSQL_BIN_LOG +/** + Truncates the current binlog to specified position. Removes the rest of binlogs + which are present after this binlog file. + + @param truncate_file Holds the binlog name to be truncated + @param truncate_pos Position within binlog from where it needs to + truncated. + + @retval true ok + @retval false error + +*/ +bool MYSQL_BIN_LOG::truncate_and_remove_binlogs(const char *truncate_file, + my_off_t truncate_pos) +{ + int error= 0; +#ifdef HAVE_REPLICATION + LOG_INFO log_info; + THD *thd= current_thd; + my_off_t index_file_offset=0; + File file= -1; + bool included= (!(truncate_pos) ? 1 : 0); + MY_STAT s; + my_off_t binlog_size; + + if ((error= open_purge_index_file(TRUE))) + { + sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file."); + goto err; + } + + if ((error=find_log_pos(&log_info, truncate_file, 1))) + goto err; + + if (included) + { + index_file_offset= log_info.index_file_start_offset; + if((error= register_purge_index_entry(log_info.log_file_name))) + { + sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.", + log_info.log_file_name); + goto err; + } + } + while (!(error= find_next_log(&log_info, 1))) + { + if (!index_file_offset) + index_file_offset= log_info.index_file_start_offset; + if((error= register_purge_index_entry(log_info.log_file_name))) + { + sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.", + log_info.log_file_name); + goto err; + } + } + + if (error != LOG_INFO_EOF) + { + sql_print_error("Error while looking for next binlog file to be truncated. " + "Error:%d", error); + goto err; + } + + if (!index_file_offset) + index_file_offset= log_info.index_file_start_offset; + + if ((error= sync_purge_index_file())) + { + sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file."); + goto err; + } + + DBUG_ASSERT(index_file_offset != 0); + // Trim index file + if (mysql_file_chsize(index_file.file, index_file_offset, '\n', MYF(MY_WME)) || + mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE))) + { + sql_print_error("Failed to trim binlog index file " + "when master server is recovering it."); + mysql_file_close(index_file.file, MYF(MY_WME)); + goto err; + } + + /* Reset data in old index cache */ + reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 1); + + /* Read each entry from purge_index_file and delete the file. */ + if (is_inited_purge_index_file() && + (error= purge_index_entry(thd, NULL, TRUE))) + { + sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files" + " that would be purged."); + goto err; + } + + if (truncate_pos) + { + if ((file= mysql_file_open(key_file_binlog, truncate_file, + O_RDWR | O_BINARY, MYF(MY_WME))) < 0) + { + sql_print_error("Failed to open binlog file:%s for truncation.", + truncate_file); + error= 1; + goto err; + } + my_stat(truncate_file, &s, MYF(0)); + binlog_size= s.st_size; + + /* Change binlog file size to truncate_pos */ + if (mysql_file_chsize(file, truncate_pos, 0, MYF(MY_WME)) || + mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE))) + { + sql_print_error("Failed to trim the crashed binlog file:%s to size:%llu", + truncate_file, truncate_pos); + mysql_file_close(file, MYF(MY_WME)); + error= 1; + goto err; + } + else + { + sql_print_information("Crashed binlog file %s size is %llu, " + "but truncated to %llu.", + truncate_file, binlog_size, truncate_pos); + } + clear_inuse_flag_when_closing(file); + mysql_file_close(file, MYF(MY_WME)); + } + +err: + close_purge_index_file(); +#endif + return error; +} + +/** + Returns the checkpoint binlog file name. Iterate though the list of binary + logs. Move to the last one. Read the binlog checkpoint event within the last + log and return it. + + @param checkpoint_file Holds the binlog checkpoint file name. + + @retval 0 ok + @retval 1 error + +*/ +int TC_LOG_BINLOG::get_binlog_checkpoint_file(char* checkpoint_file) +{ + Log_event *ev= NULL; + bool binlog_checkpoint_found= false; + LOG_INFO log_info; + const char *errmsg; + IO_CACHE log; + File file; + Format_description_log_event fdle(BINLOG_VERSION); + char log_name[FN_REFLEN]; + int error=1; + + if (!fdle.is_valid()) + return 1; + + if ((error= find_log_pos(&log_info, NullS, 1))) + { + sql_print_error("find_log_pos() failed (error: %d)", error); + return error; + } + + // Move to the last binary log. + do + { + strmake_buf(log_name, log_info.log_file_name); + } while (!(error= find_next_log(&log_info, 1))); + + if (error != LOG_INFO_EOF) + { + sql_print_error("find_log_pos() failed (error: %d)", error); + return error; + } + if ((file= open_binlog(&log, log_name, &errmsg)) < 0) + { + sql_print_error("%s", errmsg); + return error; + } + while (!binlog_checkpoint_found && + (ev= Log_event::read_log_event(&log, 0, &fdle, + opt_master_verify_checksum)) && + ev->is_valid()) + { + enum Log_event_type typ= ev->get_type_code(); + if (typ == BINLOG_CHECKPOINT_EVENT) + { + uint dir_len; + Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev; + if (cev->binlog_file_len >= FN_REFLEN) + { + sql_print_error("Incorrect binlog checkpoint event with too " + "long file name found."); + delete ev; + ev= NULL; + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + return 1; + } + else + { + dir_len= dirname_length(log_name); + strmake(strnmov(checkpoint_file, log_name, dir_len), + cev->binlog_file_name, FN_REFLEN - 1 - dir_len); + binlog_checkpoint_found= true; + } + } + delete ev; + ev= NULL; + } // End of while + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + file= -1; + /* + Old binary log without checkpoint found, binlog truncation is not + possible. Hence return error. + */ + if (!binlog_checkpoint_found) + return 1; + else + { + // Look for binlog checkpoint file in binlog index file. + if (find_log_pos(&log_info, checkpoint_file, 1)) + { + sql_print_error("Binlog file '%s' not found in binlog index, needed " + "for recovery. Aborting.", checkpoint_file); + return 1; + } + } + return 0; +} + +/** + Given a binlog file name and position the following function will parse the + binlog for the presense of only transactional event groups beyond the + position to be truncated. If all are transactional then truncation is + considered to be safe. If any non transactional or DDL groups are found then + truncation is not safe. + + @param truncate_file Holds the name of binlog to be truncated + + @retval true Safe to truncate + @retval false Truncation is not safe + +*/ +bool MYSQL_BIN_LOG::is_binlog_truncate_safe(const char* truncate_file, my_off_t pos) +{ + Log_event *ev= NULL; + LOG_INFO log_info; + const char *errmsg; + IO_CACHE log; + File file; + Format_description_log_event fdle(BINLOG_VERSION); + int error; + + if (!fdle.is_valid()) + goto err; + + if ((error= find_log_pos(&log_info, truncate_file, 1))) + { + sql_print_error("find_log_pos() failed (error: %d)", error); + goto err; + } + + if ((file= open_binlog(&log, log_info.log_file_name, &errmsg)) < 0) + { + sql_print_error("Failed to open the binlog checkpoint file:%s for recovery." + "Error: %s", truncate_file, errmsg); + goto err; + } + if (pos) + my_b_seek(&log, pos); + + for (;;) + { + while ((ev= Log_event::read_log_event(&log, 0, &fdle, + opt_master_verify_checksum)) && + ev->is_valid()) + { +#ifdef HAVE_REPLICATION + enum Log_event_type typ= ev->get_type_code(); + if (typ == GTID_EVENT) + { + Gtid_log_event *gev= (Gtid_log_event *)ev; + if (!(gev->flags2 & Gtid_log_event::FL_TRANSACTIONAL)) + { + delete ev; + ev= NULL; + goto err; + } + } +#endif + delete ev; + ev= NULL; + }// End While + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + file= -1; + + if ((error=find_next_log(&log_info, 1))) + { + if (error != LOG_INFO_EOF) + { + sql_print_error("Error reading binlog files during recovery. Aborting."); + goto err; + } + else + break; + } + + if ((file= open_binlog(&log, log_info.log_file_name, &errmsg)) < 0) + { + sql_print_error("%s", errmsg); + goto err; + } + } // End of for(;;) + return true; + +err: + if (file >= 0) + { + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + file= -1; + } + return false; +} + + +/** + Truncates the binary log, according to the transactions that got rolled + back from engine, during heuristic-recover=ROLLBACK. Global GTID state is + adjusted as per the truncated binlog. + + Called from @c TC_LOG::using_heuristic_recover(const char* opt_name) + + @param opt_name The base name of binary log. + + @return indicates success or failure of binlog rollback + @retval 0 success + @retval 1 failure + +*/ +int TC_LOG_BINLOG::heuristic_binlog_rollback() +{ + int error=0; +#ifdef HAVE_REPLICATION + Log_event *ev= NULL; + char binlog_truncate_file_name[FN_REFLEN]; + char checkpoint_file[FN_REFLEN]; + my_off_t binlog_truncate_pos=0; + LOG_INFO log_info; + const char *errmsg; + IO_CACHE log; + File file; + Format_description_log_event fdle(BINLOG_VERSION); + bool found_truncate_pos= false; + bool is_safe= false; + my_off_t tmp_truncate_pos=0; + rpl_gtid last_gtid; + bool last_gtid_standalone= false; + bool last_gtid_valid= false; + + if (!fdle.is_valid()) + return 1; + + if ((error= get_binlog_checkpoint_file(checkpoint_file))) + return error; + + sql_print_information("Binlog checkpoint_file name:%s", checkpoint_file); + + if (last_commit_pos_file[0] != 0) + { + if (strcmp(last_commit_pos_file, checkpoint_file) > 0) + { + sql_print_error("Engine specific binlog_file:%s cannot be advanced than " + "checkpoint_file:%s", last_commit_pos_file, checkpoint_file); + return 1; + } + else + binlog_truncate_pos= last_commit_pos_offset; + } + /* + If last_commit_pos_file < checkpoint_file then we need to start looking + from checkpoint file. Look for first transactional event as this is the + candidate for heuristic rollback. If a transactional event is found then + this is used as reference for binlog truncation. Otherwise no truncation + is needed. + + If last_commit_pos_file is equal to checkpoint_file then truncate binlog as + per the last_commit_pos. + + If last_commit_file is not set then checkpoint_file is what we have. + Look for first trans event and consider this as truncate position. + */ + strmake_buf(binlog_truncate_file_name, checkpoint_file); + + error= read_state_from_file(); + if (error && error != 2) + { + sql_print_error("Failed to load global gtid binlog state from file"); + return error; + } + + if ((error= find_log_pos(&log_info, checkpoint_file, 1))) + { + sql_print_error("find_log_pos() failed (error: %d)", error); + return error; + } + + if ((file= open_binlog(&log, log_info.log_file_name, &errmsg)) < 0) + { + sql_print_error("Failed to open the binlog checkpoint file:%s for recovery." + "Error: %s", checkpoint_file, errmsg); + return 1; + } + + while (!found_truncate_pos && + ((ev= Log_event::read_log_event(&log, 0, &fdle, + opt_master_verify_checksum)) && + ev->is_valid())) + { + enum Log_event_type typ= ev->get_type_code(); + switch (typ) + { + case XID_EVENT: + if (ev->log_pos == binlog_truncate_pos) + { + found_truncate_pos= true; + } + break; + case GTID_LIST_EVENT: + { + Gtid_list_log_event *glev= (Gtid_list_log_event *)ev; + /* Initialise the binlog state from the Gtid_list event. */ + if (glev->count > 0 && + rpl_global_gtid_binlog_state.load(glev->list, glev->count)) + goto err; + } + break; + case GTID_EVENT: + if (!found_truncate_pos) + { + Gtid_log_event *gev= (Gtid_log_event *)ev; + + /* Update the binlog state with any GTID logged after Gtid_list. */ + last_gtid.domain_id= gev->domain_id; + last_gtid.server_id= gev->server_id; + last_gtid.seq_no= gev->seq_no; + last_gtid_standalone= + ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false); + last_gtid_valid= true; + if ((binlog_truncate_pos == 0) && gev->flags2 & Gtid_log_event::FL_TRANSACTIONAL) + { + found_truncate_pos=true; + binlog_truncate_pos= tmp_truncate_pos; + } + } + break; + default: + /* Nothing. */ + break; + }// End switch + if (last_gtid_valid && + ((last_gtid_standalone && !ev->is_part_of_group(typ)) || + (!last_gtid_standalone && + (typ == XID_EVENT || + (typ == QUERY_EVENT && + (((Query_log_event *)ev)->is_commit() || + ((Query_log_event *)ev)->is_rollback())))))) + { + if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false)) + goto err; + last_gtid_valid= false; + } + // Used to identify the last group specific end position. + tmp_truncate_pos= ev->log_pos; + delete ev; + ev= NULL; + }// End While + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + file= -1; + + if (!found_truncate_pos) + { + if (binlog_truncate_pos != 0) + { + sql_print_error("Failed to locate event with offset:%llu in binlog:%s" + "Unable to truncate the binary log", binlog_truncate_pos, + binlog_truncate_file_name); + goto err; + } + else + return 0; // Nothing to truncate + } + + is_safe= is_binlog_truncate_safe(binlog_truncate_file_name, + binlog_truncate_pos); + + if (is_safe) + { + if (truncate_and_remove_binlogs(binlog_truncate_file_name, + binlog_truncate_pos)) + goto err; + } + if (write_state_to_file()) + { + sql_print_error("Failed to save binlog GTID state during heuristic " + "binlog rollback. "); + goto err; + } + return 0; + +err: + error= 1; + if (file >= 0) + { + end_io_cache(&log); + mysql_file_close(file, MYF(MY_WME)); + } + sql_print_error("Binlog truncation failed"); +#endif + return error; +} + int TC_LOG_BINLOG::open(const char *opt_name) { int error= 1; @@ -9526,7 +10085,7 @@ int TC_LOG_BINLOG::open(const char *opt_name) return 1; } - if (using_heuristic_recover()) + if (using_heuristic_recover(opt_name)) { mysql_mutex_lock(&LOCK_log); /* generate a new binlog to mask a corrupted one */ diff --git a/sql/log.h b/sql/log.h index 277e5c6f69c..c6179c27d8e 100644 --- a/sql/log.h +++ b/sql/log.h @@ -41,7 +41,7 @@ bool stmt_has_updated_non_trans_table(const THD* thd); class TC_LOG { public: - int using_heuristic_recover(); + int using_heuristic_recover(const char* opt_name); TC_LOG() {} virtual ~TC_LOG() {} @@ -692,6 +692,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG bool need_prepare_ordered, bool need_commit_ordered); int unlog(ulong cookie, my_xid xid); void commit_checkpoint_notify(void *cookie); + int heuristic_binlog_rollback(); int recover(LOG_INFO *linfo, const char *last_log_name, IO_CACHE *first_log, Format_description_log_event *fdle, bool do_xa); int do_binlog_recovery(const char *opt_name, bool do_xa_recovery); @@ -794,6 +795,10 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG int purge_first_log(Relay_log_info* rli, bool included); int set_purge_index_file_name(const char *base_file_name); int open_purge_index_file(bool destroy); + bool truncate_and_remove_binlogs(const char *truncate_file, + my_off_t truncate_pos); + int get_binlog_checkpoint_file(char* checkpoint_file); + bool is_binlog_truncate_safe(const char* checkpoint_file, my_off_t pos); bool is_inited_purge_index_file(); int close_purge_index_file(); int clean_purge_index_file(); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 4de2cdbeaec..42d655ee0e8 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -16493,6 +16493,9 @@ innobase_xa_recover( { DBUG_ASSERT(hton == innodb_hton_ptr); + mysql_bin_log.last_commit_pos_offset= trx_sys_mysql_bin_log_pos; + strmake_buf(mysql_bin_log.last_commit_pos_file, trx_sys_mysql_bin_log_name); + if (len == 0 || xid_list == NULL) { return(0); diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 2aafb1a44ee..dec106cac54 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -17165,6 +17165,9 @@ innobase_xa_recover( { DBUG_ASSERT(hton == innodb_hton_ptr); + mysql_bin_log.last_commit_pos_offset= trx_sys_mysql_bin_log_pos; + strmake_buf(mysql_bin_log.last_commit_pos_file, trx_sys_mysql_bin_log_name); + if (len == 0 || xid_list == NULL) { return(0);