Hi, Kristian! On Apr 27, knielsen@knielsen-hq.org wrote:
revno: 3368 revision-id: knielsen@knielsen-hq.org-20120427082038-frqt8ykqy77x3r8e parent: sergii@pisem.net-20120410063020-fm6a5ds0iggihq02 committer: knielsen@knielsen-hq.org branch nick: work-5.5-mdev225 timestamp: Fri 2012-04-27 10:20:38 +0200 message: MDEV-225: Replace with dummy events an event that is not understood by a slave to which it should be sent
Add function to replace arbitrary event with dummy event.
Add code which uses this to fix the bug that enabling row_annotate events on the master breaks slaves which do not request such events.
Add that slaves set a variable @mariadb_slave_capability to inform the master in a robust way about which events it can, and cannot, handle.
I'm not sure I like the idea of magic user variables. Why wouldn't you create a session server variable @@mariadb_slave_capability? Also: Why would "a later release of a slave change what an earlier release of a master believes about its capabilities"? See comments to the code below.
=== added file 'mysql-test/suite/rpl/t/rpl_mariadb_slave_capability.test' --- a/mysql-test/suite/rpl/t/rpl_mariadb_slave_capability.test 1970-01-01 00:00:00 +0000 +++ b/mysql-test/suite/rpl/t/rpl_mariadb_slave_capability.test 2012-04-27 08:20:38 +0000 @@ -0,0 +1,96 @@ +--source include/master-slave.inc +--source include/have_debug.inc +--source include/have_binlog_format_row.inc + +connection master; + +set @old_master_binlog_checksum= @@global.binlog_checksum; +CREATE TABLE t1 (a INT PRIMARY KEY); +INSERT INTO t1 VALUES (0); + +sync_slave_with_master; +connection slave; + +--echo # Test slave with no capability gets dummy event, which is ignored. +--source include/stop_slave.inc +SET @@global.debug_dbug='d,simulate_slave_capability_none';
Don't do that please, it'll stop any debug trace for ./mtr --debug. Instead, write SET @@global.debug_dbug='+d,simulate_slave_capability_old_53'; ... your tests SET @@global.debug_dbug='-d,simulate_slave_capability_old_53'; or SET @debug_old=@@global.debug_dbug; SET @@global.debug_dbug='+d,simulate_slave_capability_old_53'; ... your tests SET @@global.debug_dbug=@debug_old;
+--source include/start_slave.inc +connection master; +sync_slave_with_master; +connection slave; +let $relaylog_start= query_get_value(SHOW SLAVE STATUS, Relay_Log_Pos, 1); + +connection master; +SET SESSION binlog_annotate_row_events = ON; +let $binlog_file= query_get_value(SHOW MASTER STATUS, File, 1); +let $binlog_start= query_get_value(SHOW MASTER STATUS, Position, 1); +# A short event, to test when we need to use user_var_event for dummy event. ... === modified file 'sql/log_event.cc' --- a/sql/log_event.cc 2012-04-10 06:28:13 +0000 +++ b/sql/log_event.cc 2012-04-27 08:20:38 +0000 @@ -3277,6 +3277,114 @@ Query_log_event::Query_log_event(const c }
+/* + Replace a binlog event read into a packet with a dummy event. Either a + Query_log_event that has just a comment, or if that will not fit in the + space used for the event to be replaced, then a NULL user_var event. + + This is used when sending binlog data to a slave which does not understand + this particular event and which is too old to support informational events + or holes in the event stream. + + This allows to write such events into the binlog on the master and still be + able to replicate against old slaves without them breaking. + + Clears the flag LOG_EVENT_THREAD_SPECIFIC_F and set LOG_EVENT_SUPPRESS_USE_F. + Overwrites the type with QUERY_EVENT (or USER_VAR_EVENT), and replaces the + body with a minimal query / NULL user var. + + Returns zero on success, -1 if error due to too little space in original + event. A minimum of 25 bytes (19 bytes fixed header + 6 bytes in the body) + is needed in any event to be replaced with a dummy event. +*/ +int +Query_log_event::dummy_event(String *packet, ulong ev_offset, + uint8 checksum_alg) +{ + uchar *p= (uchar *)packet->ptr() + ev_offset; + size_t data_len= packet->length() - ev_offset; + uint16 flags; + static const size_t min_user_var_event_len= + LOG_EVENT_HEADER_LEN + UV_NAME_LEN_SIZE + 1 + UV_VAL_IS_NULL; // 25 + static const size_t min_query_event_len= + LOG_EVENT_HEADER_LEN + QUERY_HEADER_LEN + 1 + 1; // 34 + + if (checksum_alg == BINLOG_CHECKSUM_ALG_CRC32) + data_len-= BINLOG_CHECKSUM_LEN; + else + DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF || + checksum_alg == BINLOG_CHECKSUM_ALG_OFF); + + if (data_len < min_user_var_event_len) + /* Cannot replace with dummy, event too short. */ + return -1; + + flags= uint2korr(p + FLAGS_OFFSET); + flags&= ~LOG_EVENT_THREAD_SPECIFIC_F; + flags|= LOG_EVENT_SUPPRESS_USE_F; + int2store(p + FLAGS_OFFSET, flags); + + if (data_len < min_query_event_len) + { + /* + Have to use dummy user_var event for such a short packet. + + This works, but the event will be considered part of an event group with + the following event, so things like START SLAVE UNTIL may behave slightly + unexpected.
Why?
+ + We write a NULL user var with the name @`!dummyvar` (or as much + as that as will fit within the size of the original event - so + possibly just @`!`). + */ + static const char var_name[]= "!dummyvar"; + uint name_len= data_len - (min_user_var_event_len - 1); + + p[EVENT_TYPE_OFFSET]= USER_VAR_EVENT; + int4store(p + LOG_EVENT_HEADER_LEN, name_len); + memcpy(p + LOG_EVENT_HEADER_LEN + UV_NAME_LEN_SIZE, var_name, name_len); + p[LOG_EVENT_HEADER_LEN + UV_NAME_LEN_SIZE + name_len]= 1; // indicates NULL + } + else + { + /* + Use a dummy query event, just a comment. + */ + static const char message[]= + "# Dummy event replacing event type %u that slave cannot handle."; + char buf[sizeof(message)+1]; /* +1, as %u can expand to 3 digits. */ + uchar old_type= p[EVENT_TYPE_OFFSET]; + uchar *q= p + LOG_EVENT_HEADER_LEN; + size_t comment_len, len; + + p[EVENT_TYPE_OFFSET]= QUERY_EVENT; + int4store(q + Q_THREAD_ID_OFFSET, 0); + int4store(q + Q_EXEC_TIME_OFFSET, 0); + q[Q_DB_LEN_OFFSET]= 0; + int2store(q + Q_ERR_CODE_OFFSET, 0); + int2store(q + Q_STATUS_VARS_LEN_OFFSET, 0); + q[Q_DATA_OFFSET]= 0; /* Zero terminator for empty db */ + q+= Q_DATA_OFFSET + 1; + len= my_snprintf(buf, sizeof(buf), message, old_type); + comment_len= data_len - (min_query_event_len - 1); + if (comment_len <= len) + memcpy(q, buf, comment_len); + else + { + memcpy(q, buf, len); + memset(q+len, ' ', comment_len - len); + } + } + + if (checksum_alg == BINLOG_CHECKSUM_ALG_CRC32) + { + ha_checksum crc= my_checksum(0L, p, data_len); + int4store(p + data_len, crc); + } + return 0; +} + + #ifdef MYSQL_CLIENT /** Query_log_event::print(). === modified file 'sql/log_event.h' --- a/sql/log_event.h 2012-04-10 06:28:13 +0000 +++ b/sql/log_event.h 2012-04-27 08:20:38 +0000 @@ -566,6 +566,43 @@ enum enum_binlog_checksum_alg { #define BINLOG_CHECKSUM_LEN CHECKSUM_CRC32_SIGNATURE_LEN #define BINLOG_CHECKSUM_ALG_DESC_LEN 1 /* 1 byte checksum alg descriptor */
+/* + These are cabability numbers for MariaDB slave servers.
Typo. capability.
+ + Newer MariaDB slaves set this to inform the master about their capabilities. + This allows the master to decide which events it can send to the slave + without breaking replication on old slaves that maybe do not understand + all events from newer masters. + + As new releases are backwards compatible, a given capability implies also + all capabilities with smaller number. + + Older MariaDB slaves and other MySQL slave servers do not set this, so they + are recorded with capability 0. +*/ + +/* MySQL or old MariaDB slave with no announced capability. */ +#define MARIA_SLAVE_CAPABILITY_UNKNOWN 0 +/* MariaDB >= 5.3, which understands ANNOTATE_ROWS_EVENT. */ +#define MARIA_SLAVE_CAPABILITY_ANNOTATE 1 +/* + MariaDB >= 5.5. This version has the capability to tolerate events omitted + from the binlog stream without breaking replication (MySQL slaves fail + because they mis-compute the offsets into the master's binlog). +*/ +#define MARIA_SLAVE_CAPABILITY_TOLERATE_HOLES 2 +/* MariaDB > 5.5, which knows about binlog_checkpoint_log_event. */ +#define MARIA_SLAVE_CAPABILITY_BINLOG_CHECKPOINT 3 +/* + MariaDB server which understands MySQL 5.6 ignorable events. This server + can tolerate receiving any event with the LOG_EVENT_IGNORABLE_F flag set. +*/ +#define MARIA_SLAVE_CAPABILITY_IGNORABLE 4 + +/* Our capability. */ +#define MARIA_SLAVE_CAPABILITY_MINE MARIA_SLAVE_CAPABILITY_BINLOG_CHECKPOINT + + /** @enum Log_event_type
=== modified file 'sql/slave.cc' --- a/sql/slave.cc 2012-04-10 06:28:13 +0000 +++ b/sql/slave.cc 2012-04-27 08:20:38 +0000 @@ -1747,6 +1747,37 @@ past_checksum: } } } + + /* Announce MariaDB slave capabilities. */ + DBUG_EXECUTE_IF("simulate_slave_capability_none", goto after_set_cabability;); + { + int rc= DBUG_EVALUATE_IF("simulate_slave_capability_old_53", + mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability=" + STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_ANNOTATE))), + mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability=" + STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_MINE))));
Better put DBUG_EVALUATE_IF only around the STRINGIFY_ARG values: DBUG_EVALUATE_IF("...", STRINGIFY_ARG(...), STRINGIFY_ARG(...)) so that next time I (or somebody) wouldn't need to vdiff two long function calls, trying to count parentheses and commans :)
+ if (rc) + { + err_code= mysql_errno(mysql); + if (is_network_error(err_code)) + { + mi->report(ERROR_LEVEL, err_code, + "Setting @mariadb_slave_capability failed with error: %s", + mysql_error(mysql)); + goto network_err; + } + else + { + /* Fatal error */ + errmsg= "The slave I/O thread stops because a fatal error is " + "encountered when it tries to set @mariadb_slave_capability."; + sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); + goto err; + } + } + } +after_set_cabability: + err: if (errmsg) { === modified file 'sql/sql_repl.cc' --- a/sql/sql_repl.cc 2012-03-28 17:26:00 +0000 +++ b/sql/sql_repl.cc 2012-04-27 08:20:38 +0000 @@ -563,14 +584,44 @@ static int send_heartbeat_event(NET* net static const char * send_event_to_slave(THD *thd, NET *net, String* const packet, ushort flags, Log_event_type event_type, char *log_file_name, - IO_CACHE *log) + IO_CACHE *log, int mariadb_slave_capability, + ulong ev_offset, uint8 current_checksum_alg)
current_checksum_alg is part of THD, why do you bother to pass is separately?
{ my_off_t pos;
/* Do not send annotate_rows events unless slave requested it. */ - if (event_type == ANNOTATE_ROWS_EVENT && - !(flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)) - return NULL; + if (event_type == ANNOTATE_ROWS_EVENT && !(flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT)) + { + if (mariadb_slave_capability >= MARIA_SLAVE_CAPABILITY_TOLERATE_HOLES) + { + /* This slave can tolerate events omitted from the binlog stream. */ + return NULL; + } + else if (mariadb_slave_capability >= MARIA_SLAVE_CAPABILITY_ANNOTATE) + { + /* + The slave did not request ANNOTATE_ROWS_EVENT (it does not need them as + it will not log them in its own binary log). However, it understands the + event and will just ignore it, and it would break if we omitted it, + leaving a hole in the binlog stream. So just send the event as-is. + */ + } + else + { + /* + The slave does not understand ANNOTATE_ROWS_EVENT. + + Older MariaDB slaves (and MySQL slaves) will break replication if there + are holes in the binlog stream (they will miscompute the binlog offset + and request the wrong position when reconnecting). + + So replace the event with a dummy event of the same size that will be + a no-operation on the slave. + */ + if (Query_log_event::dummy_event(packet, ev_offset, current_checksum_alg)) + return "Failed to replace row annotate event with dummy: too small event.";
No ER_xxx error message for that?
+ } + }
/* Skip events with the @@skip_replication flag set, if slave requested
Regards, Sergei