revision-id: 71cb28e39ad4f670b4cf067f8b877be352c5cc4c (mariadb-10.1.33-31-g71cb28e39ad) parent(s): 3b7da8a44c8a0ff4b40b37e4db01f7e397aefab5 author: Andrei Elkin committer: Andrei Elkin timestamp: 2018-06-09 11:16:37 +0300 message: MDEV-13577 slave_parallel_mode=optimistic should not report the mode's specific temporary errors The optimistic parallel slave's worker thread could face a run-time error due to the algorithm's specifics which allows for conflicts like the reported "Can't find record in 'table'". A typical stack is like {noformat} #0 handler::print_error (this=0x61c00008f8a0, error=149, errflag=0) at handler.cc:3650 #1 0x0000555555e95361 in write_record (thd=thd@entry=0x62a0000a2208, table=table@entry=0x61f00008ce88, info=info@entry=0x7fffdee356d0) at sql_insert.cc:1944 #2 0x0000555555ea7767 in mysql_insert (thd=thd@entry=0x62a0000a2208, table_list=0x61b00012ada0, fields=..., values_list=..., update_fields=..., update_values=..., duplic=<optimized out>, ignore=<optimized out>) at sql_insert.cc:1039 #3 0x0000555555efda90 in mysql_execute_command (thd=thd@entry=0x62a0000a2208) at sql_parse.cc:3927 #4 0x0000555555f0cc50 in mysql_parse (thd=0x62a0000a2208, rawbuf=<optimized out>, length=<optimized out>, parser_state=<optimized out>) at sql_parse.cc:7449 #5 0x00005555566d4444 in Query_log_event::do_apply_event (this=0x61200005b9c8, rgi=<optimized out>, query_arg=<optimized out>, q_len_arg=<optimized out>) at log_event.cc:4508 #6 0x00005555566d639e in Query_log_event::do_apply_event (this=<optimized out>, rgi=<optimized out>) at log_event.cc:4185 #7 0x0000555555d738cf in Log_event::apply_event (rgi=0x61d0001ea080, this=0x61200005b9c8) at log_event.h:1343 #8 apply_event_and_update_pos_apply (ev=ev@entry=0x61200005b9c8, thd=thd@entry=0x62a0000a2208, rgi=rgi@entry=0x61d0001ea080, reason=<optimized out>) at slave.cc:3479 #9 0x0000555555d8596b in apply_event_and_update_pos_for_parallel (ev=ev@entry=0x61200005b9c8, thd=thd@entry=0x62a0000a2208, rgi=rgi@entry=0x61d0001ea080) at slave.cc:3623 #10 0x00005555562aca83 in rpt_handle_event (qev=qev@entry=0x6190000fa088, rpt=rpt@entry=0x62200002bd68) at rpl_parallel.cc:50 #11 0x00005555562bd04e in handle_rpl_parallel_thread (arg=arg@entry=0x62200002bd68) at rpl_parallel.cc:1258 {noformat} Here {{handler::print_error}} computes whether to error log the current error when --log-warnings > 1. The decision flag is consulted bu {{my_message_sql()}} which can be eventually called. In the bug case the decision is to log. However in the optimistic mode slave applier case any conflict is attempted to resolve with rollback and retry to success. Hence the logging is at least extraneous. The case is fixed with adding a new flag {{ME_LOG_AS_WARN}} which {{handler::print_error}} may propagate further on through {{my_error}} when the error comes from an optimistically running slave worker thread. The new flag effectively requests the warning level for the errlog record, while the thread's DA records the actual error (which is regarded as temporary one by the parallel slave error handler). --- include/my_sys.h | 1 + .../suite/rpl/r/rpl_parallel_optimistic.result | 23 ++++++++ .../suite/rpl/t/rpl_parallel_optimistic.test | 64 +++++++++++++++++++++- sql/handler.cc | 9 ++- sql/mysqld.cc | 10 ++++ sql/sql_class.cc | 5 ++ sql/sql_class.h | 6 ++ 7 files changed, 116 insertions(+), 2 deletions(-) diff --git a/include/my_sys.h b/include/my_sys.h index 110a2ee9af3..1c5649812d1 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -112,6 +112,7 @@ typedef struct my_aio_result { #define ME_JUST_INFO 1024 /**< not error but just info */ #define ME_JUST_WARNING 2048 /**< not error but just warning */ #define ME_FATALERROR 4096 /* Fatal statement error */ +#define ME_LOG_AS_WARN 8192 /* is error but error-logged as warning */ /* Bits in last argument to fn_format */ #define MY_REPLACE_DIR 1 /* replace dir in name with 'dir' */ diff --git a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result index 0177e65b10f..a6da3399fab 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result +++ b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result @@ -1,4 +1,6 @@ include/rpl_init.inc [topology=1->2] +call mtr.add_suppression("Warning.*Deadlock found when trying to get lock; try restarting transaction"); +call mtr.add_suppression("Warning.*mysqld: Can't find record in 't2'"); ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; @@ -543,6 +545,27 @@ a b 57 7 58 8 59 9 +DELETE FROM t1; +DELETE FROM t2; +include/save_master_gtid.inc +include/sync_with_master_gtid.inc +BEGIN; +INSERT INTO t1 SET a=1; +SET @save.binlog_format=@@session.binlog_format; +SET @@SESSION.binlog_format=row; +BEGIN; +INSERT INTO t1 SET a=1; +INSERT INTO t2 SET a=1; +COMMIT; +BEGIN; +DELETE FROM t2; +COMMIT; +ROLLBACK; +SET @@SESSION.binlog_format= @save.binlog_format; +DELETE FROM t1; +DELETE FROM t2; +include/save_master_gtid.inc +include/sync_with_master_gtid.inc include/stop_slave.inc SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_threads=@old_parallel_threads; diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test index 41fb6ebb72e..d0164c1cefa 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test @@ -4,6 +4,11 @@ --let $rpl_topology=1->2 --source include/rpl_init.inc +--connection server_2 +call mtr.add_suppression("Warning.*Deadlock found when trying to get lock; try restarting transaction"); +# The following instruction is a part of the proof of MDEV-13577 fixes, below. +call mtr.add_suppression("Warning.*mysqld: Can't find record in 't2'"); + --connection server_1 ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; @@ -480,8 +485,65 @@ SELECT * FROM t2 WHERE a >= 40 ORDER BY a; SELECT * FROM t1 WHERE a >= 40 ORDER BY a; SELECT * FROM t2 WHERE a >= 40 ORDER BY a; -# Clean up. +# partial cleanup to reuse the tables by following tests +--connection server_1 +DELETE FROM t1; +DELETE FROM t2; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc +# +# MDEV-13577 optimistic parallel slave errors out to error log unnecessary +# + +# The 1st of the following two trx blocker on slave +--connection server_2 +BEGIN; +INSERT INTO t1 SET a=1; + +--connection server_1 +SET @save.binlog_format=@@session.binlog_format; +SET @@SESSION.binlog_format=row; + +BEGIN; + INSERT INTO t1 SET a=1; + INSERT INTO t2 SET a=1; +COMMIT; + +# This transaction is going to win optimistical race with above INSERT +# on slave while being depend on it. That means it will face a kind of temporary error +# and then will retry to succeed. +BEGIN; + DELETE FROM t2; +COMMIT; + +# First make sure DELETE raced indeed to get stuck at retrying stage +# where it runs "realistically" now. There is nomore optimistic error +# in the errorlog, which is downgraded to the warning level (when +# --log-warnings > 1), see above suppression. +--connection server_2 +--let $wait_condition= SELECT COUNT(*) = 1 FROM information_schema.processlist WHERE state = "Waiting for prior transaction to commit" +--source include/wait_condition.inc + +# Next release the 1st trx to commit. +--connection server_2 +ROLLBACK; + +# MDEV-13577 local cleanup: +--connection server_1 +SET @@SESSION.binlog_format= @save.binlog_format; +DELETE FROM t1; +DELETE FROM t2; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc + +# +# Clean up. +# --connection server_2 --source include/stop_slave.inc SET GLOBAL slave_parallel_mode=@old_parallel_mode; diff --git a/sql/handler.cc b/sql/handler.cc index 397891ceec5..1778b099a4a 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -3639,12 +3639,19 @@ void handler::print_error(int error, myf errflag) if ((debug_assert_if_crashed_table || global_system_variables.log_warnings > 1)) { + THD *thd= ha_thd(); /* Log error to log before we crash or if extended warnings are requested */ + DBUG_ASSERT(!(thd && thd->system_thread == SYSTEM_THREAD_SLAVE_SQL) || + (thd == current_thd && + thd->rgi_slave)); + errflag|= ME_NOREFRESH; + if (thd->is_optimistic_slave_worker()) + errflag|= ME_LOG_AS_WARN; } - } + } /* if we got an OS error from a file-based engine, specify a path of error */ if (error < HA_ERR_FIRST && bas_ext()[0]) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 5f954f7576d..5bf5e3f73fc 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -3517,6 +3517,16 @@ void my_message_sql(uint error, const char *str, myf MyFlags) level= Sql_condition::WARN_LEVEL_WARN; func= sql_print_warning; } + else if (MyFlags & ME_LOG_AS_WARN) + { + /* + Typical use case is optimistic parallel slave where DA needs to hold + an error condition caused by the current error, but the error-log + level is relaxed to the warning one. + */ + level= Sql_condition::WARN_LEVEL_ERROR; + func= sql_print_warning; + } else { level= Sql_condition::WARN_LEVEL_ERROR; diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 24140246b96..cc1d1974a60 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -7100,6 +7100,11 @@ bool THD::rgi_have_temporary_tables() return rgi_slave->rli->save_temporary_tables != 0; } +bool THD::is_optimistic_slave_worker() +{ + return system_thread == SYSTEM_THREAD_SLAVE_SQL && rgi_slave && + rgi_slave->speculation == rpl_group_info::SPECULATE_OPTIMISTIC; +} void wait_for_commit::reinit() diff --git a/sql/sql_class.h b/sql/sql_class.h index ca6155ec93f..0ced84791a0 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -4167,6 +4167,12 @@ class THD :public Statement, (THD_TRANS::DID_WAIT | THD_TRANS::CREATED_TEMP_TABLE | THD_TRANS::DROPPED_TEMP_TABLE | THD_TRANS::DID_DDL)); } + + /* + Returns true when the thread handle belongs to a slave worker thread + running in the optimistic execution mode. + */ + bool is_optimistic_slave_worker(); };