The assertion occurred in the SQL thread if an event group was incompletely written, missing the end XID or COMMIT event, and immediately followed by a new event group. This could also lead to the incomplete event group being committed, and with the wrong GTID. Fix by rolling back any active transaction from a prior event group when applying the following GTID event. Getting an incomplete event like this is somewhat rare to happen. If the server crashes in the middle of writing an event group, the server restart will write a new format description event, which makes the slave roll back the partial event group. But presumably it could happen if the master experiences temporary write errors in the binlog, like intermittent disk full for example. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org> --- mysql-test/suite/rpl/r/rpl_mdev13831.result | 29 +++++++++++++++ mysql-test/suite/rpl/t/rpl_mdev13831.test | 41 +++++++++++++++++++++ sql/log_event_server.cc | 15 +++++++- 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 mysql-test/suite/rpl/r/rpl_mdev13831.result create mode 100644 mysql-test/suite/rpl/t/rpl_mdev13831.test diff --git a/mysql-test/suite/rpl/r/rpl_mdev13831.result b/mysql-test/suite/rpl/r/rpl_mdev13831.result new file mode 100644 index 00000000000..0f0f25e6def --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_mdev13831.result @@ -0,0 +1,29 @@ +include/master-slave.inc +[connection master] +*** MDEV-13831: Assertion on event group missing XID/COMMIT event +connection slave; +include/stop_slave.inc +CHANGE MASTER TO Master_use_gtid= No; +include/start_slave.inc +connection master; +SET @old_legacy= @@GLOBAL.binlog_legacy_event_pos; +SET GLOBAL binlog_legacy_event_pos= 1; +CREATE TABLE t1 (a INT) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1); +connection master1; +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug = '+d,fail_binlog_write_1'; +UPDATE t1 SET a = 2; +ERROR HY000: Error writing file 'master-bin' (errno: 28 "No space left on device") +SET debug_dbug= @old_dbug; +DROP TEMPORARY TABLE t1; +ERROR 42S02: Unknown table 'test.t1' +connection master; +CREATE TEMPORARY TABLE t1 (i INT) ENGINE=InnoDB; +connection slave; +connection master; +SET GLOBAL binlog_legacy_event_pos= @old_legacy; +CALL mtr.add_suppression("Error writing file.*No space left on device"); +DROP TEMPORARY TABLE t1; +DROP TABLE t1; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_mdev13831.test b/mysql-test/suite/rpl/t/rpl_mdev13831.test new file mode 100644 index 00000000000..5b86e21cda0 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_mdev13831.test @@ -0,0 +1,41 @@ +--source include/have_debug.inc +--source include/have_innodb.inc +--source include/have_binlog_format_mixed.inc +--source include/master-slave.inc + +--echo *** MDEV-13831: Assertion on event group missing XID/COMMIT event + +--connection slave +--source include/stop_slave.inc +# Use non-GTID mode. In GTID mode, the IO thread will fail if it sees an +# incomplete event group after MDEV-27697 patch. +CHANGE MASTER TO Master_use_gtid= No; +--source include/start_slave.inc + +--connection master +# The dbug injection below is only active in legacy mode. +SET @old_legacy= @@GLOBAL.binlog_legacy_event_pos; +SET GLOBAL binlog_legacy_event_pos= 1; +CREATE TABLE t1 (a INT) ENGINE=InnoDB; +INSERT INTO t1 VALUES (1); + +--connection master1 +SET @old_dbug= @@SESSION.debug_dbug; +SET SESSION debug_dbug = '+d,fail_binlog_write_1'; +--error ER_ERROR_ON_WRITE +UPDATE t1 SET a = 2; +SET debug_dbug= @old_dbug; +--error ER_BAD_TABLE_ERROR +DROP TEMPORARY TABLE t1; + +--connection master +CREATE TEMPORARY TABLE t1 (i INT) ENGINE=InnoDB; + +--sync_slave_with_master + +--connection master +SET GLOBAL binlog_legacy_event_pos= @old_legacy; +CALL mtr.add_suppression("Error writing file.*No space left on device"); +DROP TEMPORARY TABLE t1; +DROP TABLE t1; +--source include/rpl_end.inc diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index 84da9ab17aa..10cd47bbd62 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -3094,7 +3094,20 @@ static char gtid_begin_string[] = "BEGIN"; int Gtid_log_event::do_apply_event(rpl_group_info *rgi) { + Relay_log_info *rli= rgi->rli; ulonglong bits= thd->variables.option_bits; + + if (unlikely(thd->transaction->all.ha_list || (bits & OPTION_GTID_BEGIN))) + { + rli->report(WARNING_LEVEL, 0, NULL, + "Rolling back unfinished transaction (no COMMIT " + "or ROLLBACK in relay log). This indicates a corrupt binlog " + "on the master, possibly caused by disk full or other write " + "error."); + rgi->cleanup_context(thd, 1); + bits= thd->variables.option_bits; + } + thd->variables.server_id= this->server_id; thd->variables.gtid_domain_id= this->domain_id; thd->variables.gtid_seq_no= this->seq_no; @@ -3113,7 +3126,7 @@ Gtid_log_event::do_apply_event(rpl_group_info *rgi) DBUG_ASSERT((bits & OPTION_GTID_BEGIN) == 0); - Master_info *mi=rgi->rli->mi; + Master_info *mi= rli->mi; switch (flags2 & (FL_DDL | FL_TRANSACTIONAL)) { case FL_TRANSACTIONAL: -- 2.39.5