New subject: [Commits] cb06612a9da: MDEV-31655: Parallel replication deadlock victim preference code errorneously removed

11 Jul 2023

Hi Marko,

I'd like to ask you to review the below patch for MDEV-31655.

  https://jira.mariadb.org/browse/MDEV-31655
  https://github.com/MariaDB/server/commit/cb06612a9da09a7981ada84768793f2ff3e...

This patch restores code in InnoDB that was originally part of the parallel
replication implementation. It makes InnoDB choose the right victim when two
transactions in parallel replication deadlock with each other. The code was
erroneously removed as part of the VATS work in InnoDB. I saw that you later
deprecated and eventually removed the VATS code.

I saw that the InnoDB deadlock code is substantially changed in 10.6, so I
will need to rework the patch in that version. But I wanted to ask your
opinion about this 10.4 version first.

In the patch, note in particular the modification to has_higher_priority(),
which I think has the effect of choosing which of two waiting transactions
to grant a lock to. I don't remember adding this myself, so it might have
been added as part of VATS work. But it sounds appropriate; if T1 and T2 are
both waiting for a lock, there is no point in granting it to T2 over T1, as
then T2 will just be immediately killed and rolled back by replication.

 - Kristian.

Kristian Nielsen via commits <commits@lists.mariadb.org> writes:
...
revision-id: cb06612a9da09a7981ada84768793f2ff3ef842c (mariadb-10.4.30-3-gcb06612a9da)
parent(s): dbe5c20b755b87f67d87990c3baabc866667e41b
author: Kristian Nielsen
committer: Kristian Nielsen
timestamp: 2023-07-11 00:31:29 +0200
message:
MDEV-31655: Parallel replication deadlock victim preference code errorneously removed
Restore code to make InnoDB choose the second transaction as a deadlock
victim if two transactions deadlock that need to commit in-order for
parallel replication. This code was erroneously removed when VATS was
implemented in InnoDB.
Also add a test case for InnoDB choosing the right deadlock victim.
Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
---
 .../suite/binlog_encryption/rpl_parallel.result    | 42 ++++++++++++-
 mysql-test/suite/rpl/r/rpl_parallel.result         | 42 ++++++++++++-
 mysql-test/suite/rpl/t/rpl_parallel.test           | 71 +++++++++++++++++++++-
 sql/rpl_parallel.cc                                |  7 ++-
 sql/sql_class.cc                                   | 43 +++++++++++++
 storage/innobase/lock/lock0lock.cc                 | 12 ++++
 storage/innobase/trx/trx0trx.cc                    | 12 ++++
 7 files changed, 225 insertions(+), 4 deletions(-)

diff --git a/mysql-test/suite/binlog_encryption/rpl_parallel.result b/mysql-test/suite/binlog_encryption/rpl_parallel.result
index b75a66a634a..b24ff7ba53d 100644
--- a/mysql-test/suite/binlog_encryption/rpl_parallel.result
+++ b/mysql-test/suite/binlog_encryption/rpl_parallel.result
@@ -2,6 +2,7 @@ include/master-slave.inc
 [connection master]
 connection server_2;
 SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
+SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
 SET GLOBAL slave_parallel_threads=10;
 ERROR HY000: This operation cannot be performed as you have a running slave ''; run STOP SLAVE '' first
 include/stop_slave.inc
@@ -1680,13 +1681,52 @@ a
 2000
 SELECT * FROM t2 WHERE a>=2000 ORDER BY a;
 a
+MDEV-31655: Parallel replication deadlock victim preference code erroneously removed
+connection server_1;
+CREATE TABLE t7 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
+BEGIN;
+COMMIT;
+include/save_master_gtid.inc
+connection server_2;
+include/sync_with_master_gtid.inc
+include/stop_slave.inc
+set @@global.slave_parallel_threads= 5;
+set @@global.slave_parallel_mode= conservative;
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries";
+connection master;
+SET @old_dbug= @@SESSION.debug_dbug;
+SET SESSION debug_dbug="+d,binlog_force_commit_id";
+SET @commit_id= 1+1000;
+SET @commit_id= 2+1000;
+SET @commit_id= 3+1000;
+SET @commit_id= 4+1000;
+SET @commit_id= 5+1000;
+SET @commit_id= 6+1000;
+SET @commit_id= 7+1000;
+SET @commit_id= 8+1000;
+SET @commit_id= 9+1000;
+SET @commit_id= 10+1000;
+SET SESSION debug_dbug= @old_dbug;
+SELECT COUNT(*), SUM(a*100*b) FROM t7;
+COUNT(*)	SUM(a*100*b)
+10	225000
+include/save_master_gtid.inc
+connection server_2;
+include/start_slave.inc
+include/sync_with_master_gtid.inc
+SET GLOBAL debug_dbug= @old_dbug;
+SELECT COUNT(*), SUM(a*100*b) FROM t7;
+COUNT(*)	SUM(a*100*b)
+10	225000
 connection server_2;
 include/stop_slave.inc
 SET GLOBAL slave_parallel_threads=@old_parallel_threads;
+SET GLOBAL slave_parallel_mode=@old_parallel_mode;
 include/start_slave.inc
 SET DEBUG_SYNC= 'RESET';
 connection server_1;
 DROP function foo;
-DROP TABLE t1,t2,t3,t4,t5,t6;
+DROP TABLE t1,t2,t3,t4,t5,t6,t7;
 SET DEBUG_SYNC= 'RESET';
 include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_parallel.result b/mysql-test/suite/rpl/r/rpl_parallel.result
index 9b2e68d366e..ef89d954faa 100644
--- a/mysql-test/suite/rpl/r/rpl_parallel.result
+++ b/mysql-test/suite/rpl/r/rpl_parallel.result
@@ -2,6 +2,7 @@ include/master-slave.inc
 [connection master]
 connection server_2;
 SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
+SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
 SET GLOBAL slave_parallel_threads=10;
 ERROR HY000: This operation cannot be performed as you have a running slave ''; run STOP SLAVE '' first
 include/stop_slave.inc
@@ -1679,13 +1680,52 @@ a
 2000
 SELECT * FROM t2 WHERE a>=2000 ORDER BY a;
 a
+MDEV-31655: Parallel replication deadlock victim preference code erroneously removed
+connection server_1;
+CREATE TABLE t7 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
+BEGIN;
+COMMIT;
+include/save_master_gtid.inc
+connection server_2;
+include/sync_with_master_gtid.inc
+include/stop_slave.inc
+set @@global.slave_parallel_threads= 5;
+set @@global.slave_parallel_mode= conservative;
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries";
+connection master;
+SET @old_dbug= @@SESSION.debug_dbug;
+SET SESSION debug_dbug="+d,binlog_force_commit_id";
+SET @commit_id= 1+1000;
+SET @commit_id= 2+1000;
+SET @commit_id= 3+1000;
+SET @commit_id= 4+1000;
+SET @commit_id= 5+1000;
+SET @commit_id= 6+1000;
+SET @commit_id= 7+1000;
+SET @commit_id= 8+1000;
+SET @commit_id= 9+1000;
+SET @commit_id= 10+1000;
+SET SESSION debug_dbug= @old_dbug;
+SELECT COUNT(*), SUM(a*100*b) FROM t7;
+COUNT(*)	SUM(a*100*b)
+10	225000
+include/save_master_gtid.inc
+connection server_2;
+include/start_slave.inc
+include/sync_with_master_gtid.inc
+SET GLOBAL debug_dbug= @old_dbug;
+SELECT COUNT(*), SUM(a*100*b) FROM t7;
+COUNT(*)	SUM(a*100*b)
+10	225000
 connection server_2;
 include/stop_slave.inc
 SET GLOBAL slave_parallel_threads=@old_parallel_threads;
+SET GLOBAL slave_parallel_mode=@old_parallel_mode;
 include/start_slave.inc
 SET DEBUG_SYNC= 'RESET';
 connection server_1;
 DROP function foo;
-DROP TABLE t1,t2,t3,t4,t5,t6;
+DROP TABLE t1,t2,t3,t4,t5,t6,t7;
 SET DEBUG_SYNC= 'RESET';
 include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_parallel.test b/mysql-test/suite/rpl/t/rpl_parallel.test
index 9ba7a30f2eb..d43cec4df34 100644
--- a/mysql-test/suite/rpl/t/rpl_parallel.test
+++ b/mysql-test/suite/rpl/t/rpl_parallel.test
@@ -13,6 +13,7 @@
--connection server_2
 SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
+SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
 --error ER_SLAVE_MUST_STOP
 SET GLOBAL slave_parallel_threads=10;
 --source include/stop_slave.inc
@@ -2203,16 +2204,84 @@ SELECT * FROM t1 WHERE a>=2000 ORDER BY a;
 SELECT * FROM t2 WHERE a>=2000 ORDER BY a;
+--echo MDEV-31655: Parallel replication deadlock victim preference code erroneously removed
+# The problem was that InnoDB would choose the wrong deadlock victim.
+# Create a lot of transactions that can cause deadlocks, and use error
+# injection to check that the first transactions in each group is never
+# selected as deadlock victim.
+--let $rows= 10
+--let $transactions= 5
+--let $gcos= 10
+
+--connection server_1
+CREATE TABLE t7 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
+BEGIN;
+--disable_query_log
+--let $i= 0
+while ($i < 10) {
+  eval INSERT INTO t7 VALUES ($i, 0);
+  inc $i;
+}
+--enable_query_log
+COMMIT;
+--source include/save_master_gtid.inc
+
+--connection server_2
+--source include/sync_with_master_gtid.inc
+--source include/stop_slave.inc
+eval set @@global.slave_parallel_threads= $transactions;
+set @@global.slave_parallel_mode= conservative;
+SET @old_dbug= @@GLOBAL.debug_dbug;
+# This error injection will allow no retries for GTIDs divisible by 1000.
+SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries";
+
+--connection master
+SET @old_dbug= @@SESSION.debug_dbug;
+SET SESSION debug_dbug="+d,binlog_force_commit_id";
+
+--let $j= 1
+while ($j <= $gcos) {
+  eval SET @commit_id= $j+1000;
+  --let $i= 0
+  while ($i < $transactions) {
+    --disable_query_log
+    eval SET SESSION gtid_seq_no= 1000 + 1000*$j + $i;
+    BEGIN;
+    --let $k= 0
+    while ($k < $rows) {
+      eval UPDATE t7 SET b=b+1 WHERE a=(($i+$k) MOD $rows);
+      inc $k;
+    }
+    COMMIT;
+    --enable_query_log
+    inc $i;
+  }
+  inc $j;
+}
+
+SET SESSION debug_dbug= @old_dbug;
+SELECT COUNT(*), SUM(a*100*b) FROM t7;
+
+--source include/save_master_gtid.inc
+
+--connection server_2
+--source include/start_slave.inc
+--source include/sync_with_master_gtid.inc
+SET GLOBAL debug_dbug= @old_dbug;
+SELECT COUNT(*), SUM(a*100*b) FROM t7;
+
+
 # Clean up.
 --connection server_2
 --source include/stop_slave.inc
 SET GLOBAL slave_parallel_threads=@old_parallel_threads;
+SET GLOBAL slave_parallel_mode=@old_parallel_mode;
 --source include/start_slave.inc
 SET DEBUG_SYNC= 'RESET';
--connection server_1
 DROP function foo;
-DROP TABLE t1,t2,t3,t4,t5,t6;
+DROP TABLE t1,t2,t3,t4,t5,t6,t7;
 SET DEBUG_SYNC= 'RESET';
--source include/rpl_end.inc
diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc
index b550315d69f..1aeb1257c4a 100644
--- a/sql/rpl_parallel.cc
+++ b/sql/rpl_parallel.cc
@@ -1385,8 +1385,13 @@ handle_rpl_parallel_thread(void *arg)
                         err= dbug_simulate_tmp_error(rgi, thd););
         if (unlikely(err))
         {
+          ulong max_retries= slave_trans_retries;
           convert_kill_to_deadlock_error(rgi);
-          if (has_temporary_error(thd) && slave_trans_retries > 0)
+          DBUG_EXECUTE_IF("rpl_mdev31655_zero_retries",
+                          if ((rgi->current_gtid.seq_no % 1000) == 0)
+                            max_retries= 0;
+                          );
+          if (has_temporary_error(thd) && max_retries > 0)
             err= retry_event_group(rgi, rpt, qev);
         }
       }
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 8ed3f8a9c5e..e6ed7ca1cc4 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -5247,6 +5247,49 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd)
   return 0;
 }
+
+/*
+  If the storage engine detects a deadlock, and needs to choose a victim
+  transaction to roll back, it can call this function to ask the upper
+  server layer for which of two possible transactions is prefered to be
+  aborted and rolled back.
+
+  In parallel replication, if two transactions are running in parallel and
+  one is fixed to commit before the other, then the one that commits later
+  will be prefered as the victim - chosing the early transaction as a victim
+  will not resolve the deadlock anyway, as the later transaction still needs
+  to wait for the earlier to commit.
+
+  The return value is -1 if the first transaction is prefered as a deadlock
+  victim, 1 if the second transaction is prefered, or 0 for no preference (in
+  which case the storage engine can make the choice as it prefers).
+*/
+extern "C" int
+thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2)
+{
+  rpl_group_info *rgi1, *rgi2;
+
+  if (!thd1 || !thd2)
+    return 0;
+
+  /*
+    If the transactions are participating in the same replication domain in
+    parallel replication, then request to select the one that will commit
+    later (in the fixed commit order from the master) as the deadlock victim.
+  */
+  rgi1= thd1->rgi_slave;
+  rgi2= thd2->rgi_slave;
+  if (rgi1 && rgi2 &&
+      rgi1->is_parallel_exec &&
+      rgi1->rli == rgi2->rli &&
+      rgi1->current_gtid.domain_id == rgi2->current_gtid.domain_id)
+    return rgi1->gtid_sub_id < rgi2->gtid_sub_id ? 1 : -1;
+
+  /* No preferences, let the storage engine decide. */
+  return 0;
+}
+
+
 extern "C" int thd_non_transactional_update(const MYSQL_THD thd)
 {
   return(thd->transaction.all.modified_non_trans_table);
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index b1cf2152cd6..469d03eaa06 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -49,6 +49,8 @@ Created 5/7/1996 Heikki Tuuri
 #include <mysql/service_wsrep.h>
 #endif /* WITH_WSREP */
+extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
+
 /** Lock scheduling algorithm */
 ulong innodb_lock_schedule_algorithm;
@@ -1538,6 +1540,16 @@ static bool has_higher_priority(lock_t *lock1, lock_t *lock2)
  } else if (!lock_get_wait(lock2)) {
      return false;
  }
+	// Ask the upper server layer if any of the two trx should be prefered.
+	int preference = thd_deadlock_victim_preference(lock1->trx->mysql_thd,
+							lock2->trx->mysql_thd);
+	if (preference == -1) {
+		// lock1 is preferred as a victim, so lock2 has higher priority
+		return false;
+	} else if (preference == 1) {
+		// lock2 is preferred as a victim, so lock1 has higher priority
+		return true;
+	}
  return lock1->trx->start_time_micro <= lock2->trx->start_time_micro;
 }
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 7cd95878b0c..0771d764fb6 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -52,6 +52,9 @@ Created 3/26/1996 Heikki Tuuri
 #include <set>
 #include <new>
+extern "C"
+int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
+
 /** The bit pattern corresponding to TRX_ID_MAX */
 const byte trx_id_max_bytes[8] = {
  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
@@ -1906,6 +1909,15 @@ trx_weight_ge(
 {
  ibool	a_notrans_edit;
  ibool	b_notrans_edit;
+	int pref;
+
+	/* First ask the upper server layer if it has any preference for which
+	to prefer as a deadlock victim. */
+	pref= thd_deadlock_victim_preference(a->mysql_thd, b->mysql_thd);
+	if (pref < 0)
+		return FALSE;
+	else if (pref > 0)
+		return TRUE;
/* If mysql_thd is NULL for a transaction we assume that it has
  not edited non-transactional tables. */
_______________________________________________
commits mailing list -- commits@lists.mariadb.org
To unsubscribe send an email to commits-leave@lists.mariadb.org

    

Re: [Commits] cb06612a9da: MDEV-31655: Parallel replication deadlock victim preference code errorneously removed

Kristian Nielsen

Marko Mäkelä

Kristian Nielsen

Marko Mäkelä

Kristian Nielsen

Kristian Nielsen

Kristian Nielsen

Marko Mäkelä

tags

participants (2)