revision-id: 8409f451737516e653b078cc529cbb8439a9e3d1 (mariadb-10.1.38-92-g8409f451737) parent(s): d0116e10a5da52503a89a413e481996ce3f65e63 author: Jan Lindström committer: Jan Lindström timestamp: 2019-03-29 13:20:53 +0200 message: MDEV-18464: Port kill_one_trx fixes from 10.4 to 10.1 Pushed the decision for innodb transaction and system locking down to lock_trx_handle_wait() level. With this, we can avoid releasing these mutexes for executions where these mutexes were acquired upfront. To know who initiated abort a new abort_initiator field is added to THD. This field is protected using LOCK_thd_data mutex and we add setter and getter for this field. Both wsrep thread and replication thread starting a thread abort will mark InnoDB as abort initiator. If user kills a query no abort initiator is set. Thus, we can identify safely in InnoDB case has this thread already acquired lock sys and trx mutexes or not. For wsrep and replication abort we have acquired them and for external user kill we have not. This patch will also fix BF aborting of native threads, e.g. threads which have declared wsrep_on=OFF. Earlier, we have used, for innodb trx locks, was_chosen_as_deadlock_victim flag, for marking inodb transactions, which are victims for wsrep BF abort. With native threads (wsrep_on==OFF), re-using was_chosen_as_deadlock_victim flag may lead to inteference with real deadlock. --- sql/sql_class.cc | 32 ++++++++++++- sql/sql_class.h | 5 ++ storage/innobase/handler/ha_innodb.cc | 61 ++++++++++++------------ storage/innobase/include/ha_prototypes.h | 11 +++++ storage/innobase/include/lock0lock.h | 19 ++++++-- storage/innobase/include/trx0trx.h | 13 +----- storage/innobase/lock/lock0lock.cc | 80 +++++++++++++++++++++++++------- storage/innobase/row/row0sel.cc | 24 +++++++++- storage/innobase/trx/trx0roll.cc | 2 +- storage/innobase/trx/trx0trx.cc | 7 +-- storage/xtradb/handler/ha_innodb.cc | 59 +++++++++++------------ storage/xtradb/include/ha_prototypes.h | 13 +++++- storage/xtradb/include/lock0lock.h | 19 ++++++-- storage/xtradb/include/trx0trx.h | 13 +----- storage/xtradb/lock/lock0lock.cc | 80 +++++++++++++++++++++++++------- storage/xtradb/row/row0sel.cc | 26 ++++++++++- storage/xtradb/trx/trx0roll.cc | 2 +- storage/xtradb/trx/trx0trx.cc | 7 +-- 18 files changed, 325 insertions(+), 148 deletions(-) diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 639c7c1784a..288c380a323 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -622,6 +622,35 @@ void thd_set_ha_data(THD *thd, const struct handlerton *hton, } +/** + Provides setter for thread abort initiator. Caller should + already own LOCK_thd_data mutex. + + @param thd THD object + @param hton handlerton or NULL +*/ +extern "C" +void thd_set_abort_initiator(THD *thd, const struct handlerton *hton) +{ + mysql_mutex_assert_owner(&thd->LOCK_thd_data); + thd->abort_initiator= hton; +} + + +/** + Provides getter for thread abort initiator. Caller should + already own LOCK_thd_data mutex. + + @param thd THD object + @retun handlerton object or NULL +*/ +extern "C" +const struct handlerton* thd_get_abort_initiator(THD* thd) +{ + mysql_mutex_assert_owner(&thd->LOCK_thd_data); + return thd->abort_initiator; +} + /** Allow storage engine to wakeup commits waiting in THD::wait_for_prior_commit. @see thd_wakeup_subsequent_commits() definition in plugin.h @@ -4526,7 +4555,7 @@ thd_need_wait_for(const MYSQL_THD thd) not harmful, but could lead to unnecessary kill and retry, so best avoided). */ extern "C" void -thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd) +thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd, const handlerton* abort_initiator=NULL) { rpl_group_info *rgi; rpl_group_info *other_rgi; @@ -4562,6 +4591,7 @@ thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd) */ other_rgi->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED; mysql_mutex_lock(&other_thd->LOCK_thd_data); + other_thd->abort_initiator= abort_initiator; other_thd->awake(KILL_CONNECTION); mysql_mutex_unlock(&other_thd->LOCK_thd_data); } diff --git a/sql/sql_class.h b/sql/sql_class.h index de52cec0f38..c94c9465ac5 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -4141,6 +4141,11 @@ class THD :public Statement, /* Handling of timeouts for commands */ thr_timer_t query_timer; + + /* Thread abort initiator. Used on thd::awake() to identify the + storage engine initing the thread abort or NULL. */ + const struct handlerton* abort_initiator; + public: void set_query_timer() { diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index cd605b6b791..a06dfec8868 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1862,6 +1862,29 @@ thd_to_trx( { return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr)); } + +/* Setter and getter for thread abort initiator. */ +extern "C" void thd_set_abort_initiator(MYSQL_THD thd, const struct handlerton *hton); +extern "C" const struct handlerton* thd_get_abort_initiator(MYSQL_THD thd); + +extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd, const struct handlerton* hton); + +/* Check if thread abort initiator is InnoDB. +@param thd MYSQL thread +@return true if abort initiator is InnoDB */ +bool innodb_is_abort_initiator(THD* thd) +{ + return (thd_get_abort_initiator(thd) == innodb_hton_ptr); +} + +/* Helper to report replication waits. +@param thd MYSQL thread +@param thd MYSQL victim thread */ +void innodb_report_wait_for(THD * thd, THD* other_thd) +{ + thd_report_wait_for(thd, other_thd, innodb_hton_ptr); +} + #ifdef WITH_WSREP ulonglong thd_to_trx_id( @@ -4929,8 +4952,6 @@ static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels) /* if victim has been signaled by BF thread and/or aborting is already progressing, following query aborting is not necessary any more. - Also, BF thread should own trx mutex for the victim, which would - conflict with trx_mutex_enter() below */ DBUG_VOID_RETURN; } @@ -4939,34 +4960,8 @@ static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels) if (trx_t* trx = thd_to_trx(thd)) { ut_ad(trx->mysql_thd == thd); - switch (trx->abort_type) { -#ifdef WITH_WSREP - case TRX_WSREP_ABORT: - break; -#endif - case TRX_SERVER_ABORT: - if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { - lock_mutex_enter(); - } - /* fall through */ - case TRX_REPLICATION_ABORT: - trx_mutex_enter(trx); - } /* Cancel a pending lock request if there are any */ - lock_trx_handle_wait(trx); - switch (trx->abort_type) { -#ifdef WITH_WSREP - case TRX_WSREP_ABORT: - break; -#endif - case TRX_SERVER_ABORT: - if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { - lock_mutex_exit(); - } - /* fall through */ - case TRX_REPLICATION_ABORT: - trx_mutex_exit(trx); - } + lock_trx_handle_wait_for_mysql(trx); } DBUG_VOID_RETURN; @@ -18683,6 +18678,11 @@ wsrep_innobase_kill_one_trx( wsrep_thd_ws_handle(thd)->trx_id); wsrep_thd_LOCK(thd); + + /* We mark thd as a victim and InnoDB as abort initiator, this thread + is already marked as BF victim. */ + thd_set_abort_initiator(thd, innodb_hton_ptr); + DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock", { const char act[]= @@ -18866,7 +18866,6 @@ wsrep_abort_transaction( my_bool signal) { DBUG_ENTER("wsrep_innobase_abort_thd"); - trx_t* victim_trx = thd_to_trx(victim_thd); trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL; @@ -18878,12 +18877,10 @@ wsrep_abort_transaction( if (victim_trx) { lock_mutex_enter(); trx_mutex_enter(victim_trx); - victim_trx->abort_type = TRX_WSREP_ABORT; int rcode = wsrep_innobase_kill_one_trx(bf_thd, bf_trx, victim_trx, signal); trx_mutex_exit(victim_trx); lock_mutex_exit(); - victim_trx->abort_type = TRX_SERVER_ABORT; wsrep_srv_conc_cancel_wait(victim_trx); DBUG_RETURN(rcode); } else { diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index be423c8bd01..dd7dd2bc8a2 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -665,4 +666,14 @@ normalize_table_name_low( ibool set_lower_case); /*!< in: TRUE if we want to set name to lower case */ +/* Check if thread abort initiator is InnoDB. +@param thd MYSQL thread +@return true if abort initiator is InnoDB */ +bool innodb_is_abort_initiator(THD* thd); + +/* Helper to report replication waits. +@param thd MYSQL thread +@param thd MYSQL victim thread */ +void innodb_report_wait_for(THD * thd, THD* other_thd); + #endif /* HA_INNODB_PROTOTYPES_H */ diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index b4259cd4851..e3883888ab0 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -821,13 +821,22 @@ lock_unlock_table_autoinc( /*********************************************************************//** Check whether the transaction has already been rolled back because it was selected as a deadlock victim, or if it has to wait then cancel -the wait lock. +the wait lock. This function should be called only when holding +lock sys mutex and trx mutex. + +@param trx transaction object +@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ +dberr_t lock_trx_handle_wait(trx_t* trx) + MY_ATTRIBUTE((nonnull)); + +/*********************************************************************//** +Handle lock waits for MySQL interface. +See detailed description on lock0lock.cc + +@param trx transaction object @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ UNIV_INTERN -dberr_t -lock_trx_handle_wait( -/*=================*/ - trx_t* trx) /*!< in/out: trx lock state */ +dberr_t lock_trx_handle_wait_for_mysql(trx_t* trx) MY_ATTRIBUTE((nonnull)); /*********************************************************************//** Get the number of locks on a table. diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index fe16b8272b8..2a71e92fc58 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2018, MariaDB Corporation. +Copyright (c) 2015, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -623,7 +623,6 @@ struct trx_lock_t { lock_sys->mutex. Otherwise, this may only be modified by the thread that is serving the running transaction. */ - mem_heap_t* lock_heap; /*!< memory heap for trx_locks; protected by lock_sys->mutex */ @@ -695,14 +694,6 @@ lock_rec_convert_impl_to_expl()) will access transactions associated to other connections. The locks of transactions are protected by lock_sys->mutex and sometimes by trx->mutex. */ -enum trx_abort_t { - TRX_SERVER_ABORT = 0, -#ifdef WITH_WSREP - TRX_WSREP_ABORT, -#endif - TRX_REPLICATION_ABORT -}; - struct trx_t{ ulint magic_n; @@ -880,8 +871,6 @@ struct trx_t{ /*------------------------------*/ THD* mysql_thd; /*!< MySQL thread handle corresponding to this trx, or NULL */ - trx_abort_t abort_type; /*!< Transaction abort type*/ - const char* mysql_log_file_name; /*!< if MySQL binlog is used, this field contains a pointer to the latest file diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index f06fcd6c4d8..00580a80b62 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2018, MariaDB Corporation. +Copyright (c) 2014, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -402,7 +402,6 @@ lock_grant( lock_t* lock, /*!< in/out: waiting lock request */ bool owns_trx_mutex); /*!< in: whether lock->trx->mutex is owned */ -extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd); extern "C" int thd_need_wait_for(const MYSQL_THD thd); extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); @@ -1793,10 +1792,8 @@ wsrep_kill_victim( } } - lock->trx->abort_type = TRX_WSREP_ABORT; wsrep_innobase_kill_one_trx(trx->mysql_thd, (const trx_t*) trx, lock->trx, TRUE); - lock->trx->abort_type = TRX_SERVER_ABORT; } } } @@ -4782,12 +4779,10 @@ lock_report_waiters_to_mysql( if (w_trx->id != victim_trx_id) { /* If thd_report_wait_for() decides to kill the transaction, then we will get a call back into - innobase_kill_query. We mark this by setting - current_lock_mutex_owner, so we can avoid trying - to recursively take lock_sys->mutex. */ - w_trx->abort_type = TRX_REPLICATION_ABORT; - thd_report_wait_for(mysql_thd, w_trx->mysql_thd); - w_trx->abort_type = TRX_SERVER_ABORT; + innobase_kill_query.*/ + trx_mutex_enter(w_trx); + innodb_report_wait_for(mysql_thd, w_trx->mysql_thd); + trx_mutex_exit(w_trx); } ++i; } @@ -7970,13 +7965,12 @@ lock_trx_release_locks( /*********************************************************************//** Check whether the transaction has already been rolled back because it was selected as a deadlock victim, or if it has to wait then cancel -the wait lock. +the wait lock. This function should be called only when holding +lock sys mutex and trx mutex. + +@param trx transaction object @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ -UNIV_INTERN -dberr_t -lock_trx_handle_wait( -/*=================*/ - trx_t* trx) /*!< in/out: trx lock state */ +dberr_t lock_trx_handle_wait(trx_t* trx) { ut_ad(lock_mutex_own()); ut_ad(trx_mutex_own(trx)); @@ -7993,6 +7987,60 @@ lock_trx_handle_wait( return DB_LOCK_WAIT; } +/*********************************************************************//** +Handle lock waits for MySQL interface. + +This function should called only from handler API i.e. ha_innodb.cc. +Call trace: THD::awake() (we hold LOCK_thd_data) -> ha_kill_query() +-> hton->kill_query() -> innobase_kill_query() -> + +There is 3 possible cases: + +(1) wsrep high priority thread aborting lock holder i.e. victim thread +wsrep_abort_transaction() (takes lock sys mutex and trx mutex for +victim) -> marks abort initiator as InnoDB -> wsrep_thd_awake() +-> thd->awake(KILL_QUERY) (while holding LOCK_thd_data). +thd->abort_initiator is protected by LOCK_thd_data and we are +holding it so it can't change during this function. +Thus, we hold lock sys, trx and LOCK_thd_data mutexes. + +(2) replication thread aborting lock holder +lock_report_waiters_to_mysql() (we hold lock sys mutes and take trx +mutex to victim) -> innodb_report_wait_for() -> thd_report_wait_for() +we mark abort initiator as provided hton-> thd->awake(KILL_CONNECTION) +(while holding LOCK_thd_data). +thd->abort_initiator is protected by LOCK_thd_data and we are +holding it so it can't change during this function. +Thus, we hold lock sys, trx and LOCK_thd_data mutexes. + +(3) User action KILL [HARD | SOFT] [CONNECTION | QUERY [ID] ] +[thread_id | USER user_name | query_id] +thd->awake() (while holding LOCK_thd_data) no other mutexes. +Thus, thd->abort_initiator is protected by LOCK_thd_data and +we are holding it so it can't change during this function. +We need to take lock sys mutex and trx mutex. + +@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ +UNIV_INTERN +dberr_t lock_trx_handle_wait_for_mysql(trx_t* trx) +{ + dberr_t err= DB_SUCCESS; + + if (trx->mysql_thd && innodb_is_abort_initiator(trx->mysql_thd)) { + err = lock_trx_handle_wait(trx); + } else { + ut_ad(!lock_mutex_own()); + ut_ad(!trx_mutex_own(trx)); + lock_mutex_enter(); + trx_mutex_enter(trx); + err = lock_trx_handle_wait(trx); + lock_mutex_exit(); + trx_mutex_exit(trx); + } + + return err; +} + /*********************************************************************//** Get the number of locks on a table. @return number of locks */ diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 06bf4cc30c0..223aa5b3c39 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -4744,7 +4744,29 @@ row_search_for_mysql( /* Check whether it was a deadlock or not, if not a deadlock and the transaction had to wait then - release the lock it is waiting on. */ + release the lock it is waiting on. + + There is two possible cases here: + + (1) wsrep high priority thread or replication + thread selects this thread as a victim. Both + will take lock sys mutex and trx mutex before + entering thread abort. Thus, either this + thread will check deadlock below or this + thread will be aborted and then we find it + out below. Thus concurrent access in + this case is protected by lock sys and trx + mutex. + + (2) User action kill. Here killing thread either + takes lock sys mutex and trx mutex before + aborting this thread and we will find it out + when they are released or this thread + acquires them fist and we check deadlock + and then killing thread can proceed. Thus + concurrent access in this case is also + protected by lock sys and trx mutex. + */ lock_mutex_enter(); trx_mutex_enter(trx); diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc index 3fd71aff23a..fa2bced5ce0 100644 --- a/storage/innobase/trx/trx0roll.cc +++ b/storage/innobase/trx/trx0roll.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2018, MariaDB Corporation. +Copyright (c) 2016, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index f36aabba8b4..5359e51c17c 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2018, MariaDB Corporation. +Copyright (c) 2015, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1339,11 +1339,6 @@ trx_commit_in_memory( ut_ad(!trx->in_ro_trx_list); ut_ad(!trx->in_rw_trx_list); -#ifdef WITH_WSREP - if (trx->mysql_thd && wsrep_on(trx->mysql_thd)) { - trx->lock.was_chosen_as_deadlock_victim = FALSE; - } -#endif trx->dict_operation = TRX_DICT_OP_NONE; trx->error_state = DB_SUCCESS; diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index a0df23b60d4..0d072bcb6ad 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -2171,6 +2171,28 @@ thd_to_trx( return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr)); } +/* Setter and getter for thread abort initiator. */ +extern "C" void thd_set_abort_initiator(MYSQL_THD thd, const struct handlerton *hton); +extern "C" const struct handlerton* thd_get_abort_initiator(MYSQL_THD thd); + +extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd, const struct handlerton* hton); + +/* Check if thread abort initiator is InnoDB. +@param thd MYSQL thread +@return true if abort initiator is InnoDB */ +bool innodb_is_abort_initiator(THD* thd) +{ + return (thd_get_abort_initiator(thd) == innodb_hton_ptr); +} + +/* Helper to report replication waits. +@param thd MYSQL thread +@param thd MYSQL victim thread */ +void innodb_report_wait_for(THD * thd, THD* other_thd) +{ + thd_report_wait_for(thd, other_thd, innodb_hton_ptr); +} + #ifdef WITH_WSREP ulonglong thd_to_trx_id( @@ -5534,8 +5556,6 @@ static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels) /* if victim has been signaled by BF thread and/or aborting is already progressing, following query aborting is not necessary any more. - Also, BF thread should own trx mutex for the victim, which would - conflict with trx_mutex_enter() below */ DBUG_VOID_RETURN; } @@ -5543,34 +5563,8 @@ static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels) if (trx_t* trx = thd_to_trx(thd)) { ut_ad(trx->mysql_thd == thd); - switch (trx->abort_type) { -#ifdef WITH_WSREP - case TRX_WSREP_ABORT: - break; -#endif - case TRX_SERVER_ABORT: - if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { - lock_mutex_enter(); - } - /* fall through */ - case TRX_REPLICATION_ABORT: - trx_mutex_enter(trx); - } /* Cancel a pending lock request if there are any */ - lock_trx_handle_wait(trx); - switch (trx->abort_type) { -#ifdef WITH_WSREP - case TRX_WSREP_ABORT: - break; -#endif - case TRX_SERVER_ABORT: - if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { - lock_mutex_exit(); - } - /* fall through */ - case TRX_REPLICATION_ABORT: - trx_mutex_exit(trx); - } + lock_trx_handle_wait_for_mysql(trx); } DBUG_VOID_RETURN; @@ -19723,6 +19717,11 @@ wsrep_innobase_kill_one_trx( (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void"); wsrep_thd_LOCK(thd); + + /* We mark thd as a victim and InnoDB as abort initiator, this thread + is already marked as BF victim. */ + thd_set_abort_initiator(thd, innodb_hton_ptr); + DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock", { const char act[]= @@ -19911,12 +19910,10 @@ wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd, if (victim_trx) { lock_mutex_enter(); trx_mutex_enter(victim_trx); - victim_trx->abort_type = TRX_WSREP_ABORT; int rcode = wsrep_innobase_kill_one_trx(bf_thd, bf_trx, victim_trx, signal); trx_mutex_exit(victim_trx); lock_mutex_exit(); - victim_trx->abort_type = TRX_SERVER_ABORT; wsrep_srv_conc_cancel_wait(victim_trx); DBUG_RETURN(rcode); } else { diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index 3f3c4f28ced..1b5cad16f34 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. +Copyright (c) 2017, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -688,4 +688,15 @@ normalize_table_name_low( const char* name, /*!< in: table name string */ ibool set_lower_case); /*!< in: TRUE if we want to set name to lower case */ + +/* Check if thread abort initiator is InnoDB. +@param thd MYSQL thread +@return true if abort initiator is InnoDB */ +bool innodb_is_abort_initiator(THD* thd); + +/* Helper to report replication waits. +@param thd MYSQL thread +@param thd MYSQL victim thread */ +void innodb_report_wait_for(THD * thd, THD* other_thd); + #endif /* HA_INNODB_PROTOTYPES_H */ diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h index 5aff67db0ee..9a68e1ff614 100644 --- a/storage/xtradb/include/lock0lock.h +++ b/storage/xtradb/include/lock0lock.h @@ -835,13 +835,22 @@ lock_unlock_table_autoinc( /*********************************************************************//** Check whether the transaction has already been rolled back because it was selected as a deadlock victim, or if it has to wait then cancel -the wait lock. +the wait lock. This function should be called only when holding +lock sys mutex and trx mutex. + +@param trx transaction object +@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ +dberr_t lock_trx_handle_wait(trx_t* trx) + MY_ATTRIBUTE((nonnull)); + +/*********************************************************************//** +Handle lock waits for MySQL interface. +See detailed description on lock0lock.cc + +@param trx transaction object @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ UNIV_INTERN -dberr_t -lock_trx_handle_wait( -/*=================*/ - trx_t* trx) /*!< in/out: trx lock state */ +dberr_t lock_trx_handle_wait_for_mysql(trx_t* trx) MY_ATTRIBUTE((nonnull)); /*********************************************************************//** Get the number of locks on a table. diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h index 77afde4c35c..e8b2b31a169 100644 --- a/storage/xtradb/include/trx0trx.h +++ b/storage/xtradb/include/trx0trx.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2018, MariaDB Corporation. +Copyright (c) 2015, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -672,7 +672,6 @@ struct trx_lock_t { lock_sys->mutex. Otherwise, this may only be modified by the thread that is serving the running transaction. */ - mem_heap_t* lock_heap; /*!< memory heap for trx_locks; protected by lock_sys->mutex */ @@ -744,14 +743,6 @@ lock_rec_convert_impl_to_expl()) will access transactions associated to other connections. The locks of transactions are protected by lock_sys->mutex and sometimes by trx->mutex. */ -enum trx_abort_t { - TRX_SERVER_ABORT = 0, -#ifdef WITH_WSREP - TRX_WSREP_ABORT, -#endif - TRX_REPLICATION_ABORT -}; - struct trx_t{ ulint magic_n; @@ -930,8 +921,6 @@ struct trx_t{ /*------------------------------*/ THD* mysql_thd; /*!< MySQL thread handle corresponding to this trx, or NULL */ - trx_abort_t abort_type; /*!< Transaction abort type */ - const char* mysql_log_file_name; /*!< if MySQL binlog is used, this field contains a pointer to the latest file diff --git a/storage/xtradb/lock/lock0lock.cc b/storage/xtradb/lock/lock0lock.cc index 9daa2cc906f..183305a49d0 100644 --- a/storage/xtradb/lock/lock0lock.cc +++ b/storage/xtradb/lock/lock0lock.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2018, MariaDB Corporation. +Copyright (c) 2014, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -402,7 +402,6 @@ lock_grant( lock_t* lock, /*!< in/out: waiting lock request */ bool owns_trx_mutex); /*!< in: whether lock->trx->mutex is owned */ -extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd); extern "C" int thd_need_wait_for(const MYSQL_THD thd); extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); @@ -1804,10 +1803,8 @@ wsrep_kill_victim( } } - lock->trx->abort_type = TRX_WSREP_ABORT; wsrep_innobase_kill_one_trx(trx->mysql_thd, (const trx_t*) trx, lock->trx, TRUE); - lock->trx->abort_type = TRX_SERVER_ABORT; } } } @@ -4821,12 +4818,10 @@ lock_report_waiters_to_mysql( if (w_trx->id != victim_trx_id) { /* If thd_report_wait_for() decides to kill the transaction, then we will get a call back into - innobase_kill_query. We mark this by setting - current_lock_mutex_owner, so we can avoid trying - to recursively take lock_sys->mutex. */ - w_trx->abort_type = TRX_REPLICATION_ABORT; - thd_report_wait_for(mysql_thd, w_trx->mysql_thd); - w_trx->abort_type = TRX_SERVER_ABORT; + innobase_kill_query.*/ + trx_mutex_enter(w_trx); + innodb_report_wait_for(mysql_thd, w_trx->mysql_thd); + trx_mutex_exit(w_trx); } ++i; } @@ -8080,13 +8075,12 @@ lock_trx_release_locks( /*********************************************************************//** Check whether the transaction has already been rolled back because it was selected as a deadlock victim, or if it has to wait then cancel -the wait lock. +the wait lock. This function should be called only when holding +lock sys mutex and trx mutex. + +@param trx transaction object @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ -UNIV_INTERN -dberr_t -lock_trx_handle_wait( -/*=================*/ - trx_t* trx) /*!< in/out: trx lock state */ +dberr_t lock_trx_handle_wait(trx_t* trx) { ut_ad(lock_mutex_own()); ut_ad(trx_mutex_own(trx)); @@ -8103,6 +8097,60 @@ lock_trx_handle_wait( return DB_LOCK_WAIT; } +/*********************************************************************//** +Handle lock waits for MySQL interface. + +This function should called only from handler API i.e. ha_innodb.cc. +Call trace: THD::awake() (we hold LOCK_thd_data) -> ha_kill_query() +-> hton->kill_query() -> innobase_kill_query() -> + +There is 3 possible cases: + +(1) wsrep high priority thread aborting lock holder i.e. victim thread +wsrep_abort_transaction() (takes lock sys mutex and trx mutex for +victim) -> marks abort initiator as InnoDB -> wsrep_thd_awake() +-> thd->awake(KILL_QUERY) (while holding LOCK_thd_data). +thd->abort_initiator is protected by LOCK_thd_data and we are +holding it so it can't change during this function. +Thus, we hold lock sys, trx and LOCK_thd_data mutexes. + +(2) replication thread aborting lock holder +lock_report_waiters_to_mysql() (we hold lock sys mutes and take trx +mutex to victim) -> innodb_report_wait_for() -> thd_report_wait_for() +we mark abort initiator as provided hton-> thd->awake(KILL_CONNECTION) +(while holding LOCK_thd_data). +thd->abort_initiator is protected by LOCK_thd_data and we are +holding it so it can't change during this function. +Thus, we hold lock sys, trx and LOCK_thd_data mutexes. + +(3) User action KILL [HARD | SOFT] [CONNECTION | QUERY [ID] ] +[thread_id | USER user_name | query_id] +thd->awake() (while holding LOCK_thd_data) no other mutexes. +Thus, thd->abort_initiator is protected by LOCK_thd_data and +we are holding it so it can't change during this function. +We need to take lock sys mutex and trx mutex. + +@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ +UNIV_INTERN +dberr_t lock_trx_handle_wait_for_mysql(trx_t* trx) +{ + dberr_t err= DB_SUCCESS; + + if (trx->mysql_thd && innodb_is_abort_initiator(trx->mysql_thd)) { + err = lock_trx_handle_wait(trx); + } else { + ut_ad(!lock_mutex_own()); + ut_ad(!trx_mutex_own(trx)); + lock_mutex_enter(); + trx_mutex_enter(trx); + err = lock_trx_handle_wait(trx); + lock_mutex_exit(); + trx_mutex_exit(trx); + } + + return err; +} + /*********************************************************************//** Get the number of locks on a table. @return number of locks */ diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc index b6b5d107885..6bc9098491d 100644 --- a/storage/xtradb/row/row0sel.cc +++ b/storage/xtradb/row/row0sel.cc @@ -2,7 +2,7 @@ Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. -Copyright (c) 2015, 2018, MariaDB Corporation. +Copyright (c) 2015, 2019, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -4753,7 +4753,29 @@ row_search_for_mysql( /* Check whether it was a deadlock or not, if not a deadlock and the transaction had to wait then - release the lock it is waiting on. */ + release the lock it is waiting on. + + There is two possible cases here: + + (1) wsrep high priority thread or replication + thread selects this thread as a victim. Both + will take lock sys mutex and trx mutex before + entering thread abort. Thus, either this + thread will check deadlock below or this + thread will be aborted and then we find it + out below. Thus concurrent access in + this case is protected by lock sys and trx + mutex. + + (2) User action kill. Here killing thread either + takes lock sys mutex and trx mutex before + aborting this thread and we will find it out + when they are released or this thread + acquires them fist and we check deadlock + and then killing thread can proceed. Thus + concurrent access in this case is also + protected by lock sys and trx mutex. + */ lock_mutex_enter(); trx_mutex_enter(trx); diff --git a/storage/xtradb/trx/trx0roll.cc b/storage/xtradb/trx/trx0roll.cc index 56b7120fa34..66a16ec9541 100644 --- a/storage/xtradb/trx/trx0roll.cc +++ b/storage/xtradb/trx/trx0roll.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2018, MariaDB Corporation. +Copyright (c) 2016, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/xtradb/trx/trx0trx.cc b/storage/xtradb/trx/trx0trx.cc index 17cba81daf3..9fe947c8318 100644 --- a/storage/xtradb/trx/trx0trx.cc +++ b/storage/xtradb/trx/trx0trx.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2018, MariaDB Corporation. +Copyright (c) 2015, 2019, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1563,11 +1563,6 @@ trx_commit_in_memory( ut_ad(!trx->in_ro_trx_list); ut_ad(!trx->in_rw_trx_list); -#ifdef WITH_WSREP - if (trx->mysql_thd && wsrep_on(trx->mysql_thd)) { - trx->lock.was_chosen_as_deadlock_victim = FALSE; - } -#endif trx->dict_operation = TRX_DICT_OP_NONE; trx->error_state = DB_SUCCESS;