Hi Jan, On Fri, Mar 22, 2019 at 1:59 PM jan <jan.lindstrom@mariadb.com> wrote:
revision-id: 842ef80b029c975b7f7f7ec25fe55817ec626f02 (mariadb-10.1.38-68-g842ef80b029)
Was this pushed to buildbot as well? I did not find the commit ID. First, some general comments: * The functions trx_start_if_not_started_low() and trx_start_for_ddl_low() differ between XtraDB and InnoDB with regard to #ifdef WITH_WSREP. Why? Are the debug assignments truly necessary? I see that the difference was introduced 5 years ago in MDEV-6247. * The files storage/{innobase,xtradb}/trx/trx0roll.cc differ from each other after the change. It looks like the innobase version is better. Please remove the unnecessary lines from xtradb: --- storage/xtradb/trx/trx0roll.cc 2019-03-26 10:20:31.767971537 +0200 +++ storage/innobase/trx/trx0roll.cc 2019-03-26 10:20:31.759971465 +0200 @@ -33,8 +33,6 @@ #include "trx0roll.ic" #endif -#include <mysql/service_wsrep.h> - #include "fsp0fsp.h" #include "mach0data.h" #include "trx0rseg.h" @@ -51,9 +49,6 @@ #include "pars0pars.h" #include "srv0mon.h" #include "trx0sys.h" -#ifdef WITH_WSREP -#include "ha_prototypes.h" -#endif /* WITH_WSREP */ /** This many pages must be undone before a truncate is tried within rollback */ @@ -1075,12 +1070,6 @@ if (trx->update_undo) { trx_undo_truncate_end(trx, trx->update_undo, limit); } - -#ifdef WITH_WSREP_OUT - if (wsrep_on(trx->mysql_thd)) { - trx->lock.was_chosen_as_deadlock_victim = FALSE; - } -#endif /* WITH_WSREP */ } /***********************************************************************//** On to the detailed review:
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index fe16b8272b8..b83734b154a 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -1,7 +1,7 @@ /*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2018, MariaDB Corporation. +Copyright (c) 2015, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -613,6 +613,9 @@ struct trx_lock_t { transaction as a victim in deadlock resolution, it sets this to TRUE. Protected by trx->mutex. */ + bool was_chosen_as_replication_victim; + /* replication has marked this + trx to abort */ time_t wait_started; /*!< lock wait started at this time, protected only by lock_sys->mutex */
@@ -624,6 +627,12 @@ struct trx_lock_t { only be modified by the thread that is serving the running transaction. */
+#ifdef WITH_WSREP + bool was_chosen_as_wsrep_victim; + /*!< high priority wsrep thread has + marked this trx to abort */ +#endif /* WITH_WSREP */ + mem_heap_t* lock_heap; /*!< memory heap for trx_locks; protected by lock_sys->mutex */
@@ -695,14 +704,6 @@ lock_rec_convert_impl_to_expl()) will access transactions associated to other connections. The locks of transactions are protected by lock_sys->mutex and sometimes by trx->mutex. */
-enum trx_abort_t { - TRX_SERVER_ABORT = 0, -#ifdef WITH_WSREP - TRX_WSREP_ABORT, -#endif - TRX_REPLICATION_ABORT -}; - struct trx_t{ ulint magic_n;
@@ -880,8 +881,6 @@ struct trx_t{ /*------------------------------*/ THD* mysql_thd; /*!< MySQL thread handle corresponding to this trx, or NULL */ - trx_abort_t abort_type; /*!< Transaction abort type*/ - const char* mysql_log_file_name; /*!< if MySQL binlog is used, this field contains a pointer to the latest file
Why are you replacing trx_t::abort_type with two fields in trx_lock_t that are located far apart from each other? To avoid changing the performance too much due to caching, I would suggest to introduce the two fields in trx_t, replacing the former location of abort_type. Do we really need such long names? Would trx_t::replication_victim and trx_t::wsrep_victim suffice? Please document how concurrent access to the fields is being protected. Apparently for the WSREP field it is both trx->mutex and lock_sys->mutex.
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 3970a559a4c..984ac6cacbc 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -4785,9 +4783,8 @@ lock_report_waiters_to_mysql( innobase_kill_query. We mark this by setting current_lock_mutex_owner, so we can avoid trying to recursively take lock_sys->mutex. */ - w_trx->abort_type = TRX_REPLICATION_ABORT; + w_trx->lock.was_chosen_as_replication_victim = true; thd_report_wait_for(mysql_thd, w_trx->mysql_thd); - w_trx->abort_type = TRX_SERVER_ABORT; } ++i; }
Here, we seem to be only holding lock_sys->mutex, not w_trx->mutex.
@@ -7993,6 +7981,37 @@ lock_trx_handle_wait( return DB_LOCK_WAIT; }
+/*********************************************************************//** +Check whether the transaction has already been rolled back because it +was selected as a deadlock victim, or if it has to wait then cancel +the wait lock. +@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ +UNIV_INTERN +dberr_t +lock_trx_handle_wait( +/*=================*/ + trx_t* trx) /*!< in/out: trx lock state */ +{ + bool lock_mutex_taken = false;
Do we really need this variable?
+#ifdef WITH_WSREP + /* We already own mutexes */ + if (trx->lock.was_chosen_as_wsrep_victim) { + return lock_trx_handle_wait_low(trx); + } +#endif /* WITH_WSREP */ + if (!trx->lock.was_chosen_as_replication_victim) { + lock_mutex_enter(); + lock_mutex_taken = true; + } + trx_mutex_enter(trx); + dberr_t err = lock_trx_handle_wait_low(trx); + if (lock_mutex_taken) { + lock_mutex_exit(); + } + trx_mutex_exit(trx); + return err; +}
I would expand the second part of the code and remove the condition before releasing the 2 mutexes: if (!trx->lock.was_chosen_as_replication_victim) { lock_mutex_enter(); trx_mutex_enter(trx); dberr_t err = lock_trx_handle_wait_low(trx); lock_mutex_exit(); trx_mutex_exit(trx); return err; } else { trx_mutex_enter(trx); dberr_t err = lock_trx_handle_wait_low(trx); trx_mutex_exit(trx); return err; } Also, I think that it would be better to declare lock_trx_handle_wait_low() only static but not inline, to reduce unnecessary code expansion. Best regards, Marko