Re: [Maria-developers] 20e5c37: MDEV-7728 - Spiral patch 032_mariadb-10.0.15.scale_registering_xid.diff
Hi, Sergey! On Mar 12, svoj@mariadb.org wrote:
revision-id: 20e5c37ccfac9d95a2db1f295f211656df1ce160 parent(s): 190858d996e7dc90e01fe8a5e7daec6af0012b23 committer: Sergey Vojtovich branch nick: 10.1 timestamp: 2015-03-12 18:15:08 +0400 message:
MDEV-7728 - Spiral patch 032_mariadb-10.0.15.scale_registering_xid.diff
XID cache is now based on lock-free hash. Also fixed lf_hash_destroy() to call alloc destructor.
Note that previous implementation had race condition when thread was accessing XA owned by different thread. This new implementation doesn't fix it either.
diff --git a/include/lf.h b/include/lf.h index 88024bc..19bdafc 100644 --- a/include/lf.h +++ b/include/lf.h @@ -117,7 +117,12 @@ void lf_pinbox_init(LF_PINBOX *pinbox, uint free_ptr_offset, #define lf_alloc_free(PINS, PTR) lf_pinbox_free((PINS), (PTR)) #define lf_alloc_get_pins(A) lf_pinbox_get_pins(&(A)->pinbox) #define lf_alloc_put_pins(PINS) lf_pinbox_put_pins(PINS) -#define lf_alloc_direct_free(ALLOC, ADDR) my_free((ADDR)) +#define lf_alloc_direct_free(ALLOC, ADDR) \ + do { \ + if ((ALLOC)->destructor) \ + (ALLOC)->destructor((uchar*) ADDR); \ + my_free(ADDR); \ + } while(0)
void *lf_alloc_new(LF_PINS *pins);
diff --git a/mysql-test/suite/innodb/r/innodb_bug59641.result b/mysql-test/suite/innodb/r/innodb_bug59641.result index 5062c69..476385f 100644 --- a/mysql-test/suite/innodb/r/innodb_bug59641.result +++ b/mysql-test/suite/innodb/r/innodb_bug59641.result @@ -43,8 +43,8 @@ COMMIT; XA RECOVER; formatID gtrid_length bqual_length data 1 3 0 789 -1 3 0 456 1 3 0 123 +1 3 0 456 XA ROLLBACK '123'; XA ROLLBACK '456'; XA COMMIT '789'; diff --git a/mysql-test/suite/perfschema/r/server_init.result b/mysql-test/suite/perfschema/r/server_init.result index 20dc130..1bdb988 100644 --- a/mysql-test/suite/perfschema/r/server_init.result +++ b/mysql-test/suite/perfschema/r/server_init.result @@ -120,10 +120,6 @@ where name like "wait/synch/mutex/sql/LOCK_audit_mask"; count(name) 1 select count(name) from mutex_instances -where name like "wait/synch/mutex/sql/LOCK_xid_cache"; -count(name) -1 -select count(name) from mutex_instances where name like "wait/synch/mutex/sql/LOCK_plugin"; count(name) 1 diff --git a/mysql-test/suite/perfschema/r/setup_instruments_defaults.result b/mysql-test/suite/perfschema/r/setup_instruments_defaults.result index e502376..b0570d7 100644 --- a/mysql-test/suite/perfschema/r/setup_instruments_defaults.result +++ b/mysql-test/suite/perfschema/r/setup_instruments_defaults.result @@ -5,14 +5,14 @@ SELECT * FROM performance_schema.setup_instruments WHERE name IN ( 'wait/synch/mutex/sql/LOCK_user_conn', 'wait/synch/mutex/sql/LOCK_uuid_generator', -'wait/synch/mutex/sql/LOCK_xid_cache', +'wait/synch/mutex/sql/LOCK_plugin', 'stage/sql/creating table') AND enabled = 'yes' AND timed = 'no' ORDER BY name; NAME ENABLED TIMED stage/sql/creating table YES NO +wait/synch/mutex/sql/LOCK_plugin YES NO wait/synch/mutex/sql/LOCK_user_conn YES NO -wait/synch/mutex/sql/LOCK_xid_cache YES NO SELECT * FROM performance_schema.setup_instruments WHERE name = 'wait/synch/mutex/sql/LOCK_thread_count' AND enabled = 'no' AND timed = 'no'; diff --git a/mysql-test/suite/perfschema/t/server_init.test b/mysql-test/suite/perfschema/t/server_init.test index d5a7e18..c6d25f1 100644 --- a/mysql-test/suite/perfschema/t/server_init.test +++ b/mysql-test/suite/perfschema/t/server_init.test @@ -118,9 +118,6 @@ select count(name) from mutex_instances where name like "wait/synch/mutex/sql/LOCK_audit_mask";
select count(name) from mutex_instances - where name like "wait/synch/mutex/sql/LOCK_xid_cache"; - -select count(name) from mutex_instances where name like "wait/synch/mutex/sql/LOCK_plugin";
# Not a global variable, may be destroyed already. diff --git a/mysql-test/suite/perfschema/t/setup_instruments_defaults-master.opt b/mysql-test/suite/perfschema/t/setup_instruments_defaults-master.opt index 408eb5c..ed6702e 100644 --- a/mysql-test/suite/perfschema/t/setup_instruments_defaults-master.opt +++ b/mysql-test/suite/perfschema/t/setup_instruments_defaults-master.opt @@ -12,7 +12,7 @@ --loose-performance-schema-instrument='wait/synch/mutex/sql/LOCK_thread_count=OFF' --loose-performance-schema-instrument=' wait/synch/mutex/sql/LOCK_user_conn = COUNTED' --loose-performance-schema-instrument='wait%/synch/mutex/sql/LOCK_uu%_genera%/= COUNTED' ---loose-performance-schema-instrument='%%wait/synch/mutex/sql/LOCK_xid_cache=COUNTED' +--loose-performance-schema-instrument='%%wait/synch/mutex/sql/LOCK_plugin=COUNTED' --loose-performance-schema-instrument='%=FOO' --loose-performance-schema-instrument='%=%' --loose-performance-schema-instrument='%' diff --git a/mysql-test/suite/perfschema/t/setup_instruments_defaults.test b/mysql-test/suite/perfschema/t/setup_instruments_defaults.test index e1f6140..5e0a3a5 100644 --- a/mysql-test/suite/perfschema/t/setup_instruments_defaults.test +++ b/mysql-test/suite/perfschema/t/setup_instruments_defaults.test @@ -15,7 +15,7 @@ SELECT * FROM performance_schema.setup_instruments WHERE name IN ( 'wait/synch/mutex/sql/LOCK_user_conn', 'wait/synch/mutex/sql/LOCK_uuid_generator', - 'wait/synch/mutex/sql/LOCK_xid_cache', + 'wait/synch/mutex/sql/LOCK_plugin', 'stage/sql/creating table') AND enabled = 'yes' AND timed = 'no' ORDER BY name; diff --git a/sql/handler.cc b/sql/handler.cc index 12d7ffb..a45419f 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1931,12 +1931,28 @@ int ha_recover(HASH *commit_list) so mysql_xa_recover does not filter XID's to ensure uniqueness. It can be easily fixed later, if necessary. */ + +static my_bool xa_recover_callback(XID_STATE *xs, Protocol *protocol) +{ + if (xs->xa_state == XA_PREPARED) + { + protocol->prepare_for_resend(); + protocol->store_longlong((longlong) xs->xid.formatID, FALSE); + protocol->store_longlong((longlong) xs->xid.gtrid_length, FALSE); + protocol->store_longlong((longlong) xs->xid.bqual_length, FALSE); + protocol->store(xs->xid.data, xs->xid.gtrid_length + xs->xid.bqual_length, + &my_charset_bin); + if (protocol->write()) + return TRUE; + } + return FALSE; +} + + bool mysql_xa_recover(THD *thd) { List<Item> field_list; Protocol *protocol= thd->protocol; - int i=0; - XID_STATE *xs; DBUG_ENTER("mysql_xa_recover");
field_list.push_back(new Item_int("formatID", 0, MY_INT32_NUM_DECIMAL_DIGITS)); @@ -1948,26 +1964,9 @@ bool mysql_xa_recover(THD *thd) Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) DBUG_RETURN(1);
- mysql_mutex_lock(&LOCK_xid_cache); - while ((xs= (XID_STATE*) my_hash_element(&xid_cache, i++))) - { - if (xs->xa_state==XA_PREPARED) - { - protocol->prepare_for_resend(); - protocol->store_longlong((longlong)xs->xid.formatID, FALSE); - protocol->store_longlong((longlong)xs->xid.gtrid_length, FALSE); - protocol->store_longlong((longlong)xs->xid.bqual_length, FALSE); - protocol->store(xs->xid.data, xs->xid.gtrid_length+xs->xid.bqual_length, - &my_charset_bin); - if (protocol->write()) - { - mysql_mutex_unlock(&LOCK_xid_cache); - DBUG_RETURN(1); - } - } - } - - mysql_mutex_unlock(&LOCK_xid_cache); + if (xid_cache_iterate(thd, (my_hash_walk_action) xa_recover_callback, + protocol)) + DBUG_RETURN(1); my_eof(thd); DBUG_RETURN(0); } diff --git a/sql/handler.h b/sql/handler.h index 5ef9208..5ab67c0 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -612,11 +612,11 @@ struct xid_t { return sizeof(formatID)+sizeof(gtrid_length)+sizeof(bqual_length)+ gtrid_length+bqual_length; } - uchar *key() + uchar *key() const { return (uchar *)>rid_length; } - uint key_length() + uint key_length() const { return sizeof(gtrid_length)+sizeof(bqual_length)+gtrid_length+bqual_length; } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index b97742d..6c06240 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -4889,11 +4889,7 @@ static int init_server_components() my_charset_error_reporter= charset_error_reporter; #endif
- if (xid_cache_init()) - { - sql_print_error("Out of memory"); - unireg_abort(1); - } + xid_cache_init();
/* initialize delegates for extension observers, errors have already diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 5b0d04d..d59ef52 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -914,7 +914,8 @@ bool Drop_table_error_handler::handle_condition(THD *thd, wait_for_commit_ptr(0), main_da(0, false, false), m_stmt_da(&main_da), - tdc_hash_pins(0) + tdc_hash_pins(0), + xid_hash_pins(0) #ifdef WITH_WSREP , wsrep_applier(is_wsrep_applier), @@ -1593,7 +1594,7 @@ void THD::cleanup(void)
transaction.xid_state.xa_state= XA_NOTR; trans_rollback(this); - xid_cache_delete(&transaction.xid_state); + xid_cache_delete(this, &transaction.xid_state);
DBUG_ASSERT(open_tables == NULL); /* @@ -1704,6 +1705,8 @@ void THD::cleanup(void) main_da.free_memory(); if (tdc_hash_pins) lf_hash_put_pins(tdc_hash_pins); + if (xid_hash_pins) + lf_hash_put_pins(xid_hash_pins); /* Ensure everything is freed */ if (status_var.local_memory_used != 0) { @@ -5106,120 +5109,205 @@ void mark_transaction_to_rollback(THD *thd, bool all) /*************************************************************************** Handling of XA id cacheing ***************************************************************************/ +class XID_cache_element +{ + uint m_state;
shouldn't it be uint32 ? That's the type that my_atomic_add32/cas32 work with.
+ static const uint DELETED= 1 << 31; +public: + XID_STATE *m_xid_state;
O-okay. I *think* your hand-written locking logic works. But please add a comment, explaining how it works, all four methods, the meaning of DELETED, etc. Perhaps DELETED should rather be NOT_INITIALIZED?
+ bool lock() + { + if (my_atomic_add32_explicit(&m_state, 1, MY_MEMORY_ORDER_ACQUIRE) & DELETED) + { + unlock(); + return false; + } + return true; + } + void unlock() + { + my_atomic_add32_explicit(&m_state, -1, MY_MEMORY_ORDER_RELEASE); + } + void mark_deleted() + { + uint old= 0; + while (!my_atomic_cas32_weak_explicit(&m_state, &old, DELETED, + MY_MEMORY_ORDER_RELAXED, + MY_MEMORY_ORDER_RELAXED))
usually one should use LF_BACKOFF inside spin-loops.
+ old= 0; + } + void mark_not_deleted() + { + DBUG_ASSERT(m_state & DELETED); + my_atomic_add32_explicit(&m_state, -DELETED, MY_MEMORY_ORDER_RELAXED); + } + static void lf_hash_initializer(LF_HASH *hash __attribute__((unused)), + XID_cache_element *element, + XID_STATE *xid_state) + { + element->m_xid_state= xid_state; + xid_state->xid_cache_element= element; + } + static void lf_alloc_constructor(uchar *ptr) + { + XID_cache_element *element= (XID_cache_element*) (ptr + LF_HASH_OVERHEAD); + element->m_state= DELETED; + } + static void lf_alloc_destructor(uchar *ptr) + { + XID_cache_element *element= (XID_cache_element*) (ptr + LF_HASH_OVERHEAD); + if (element->m_state != DELETED)
How can this happen? You mark an element DELETED before lf_hash_delete()
+ { + DBUG_ASSERT(!element->m_xid_state->in_thd); + my_free(element->m_xid_state); + } + } + static uchar *key(const XID_cache_element *element, size_t *length, + my_bool not_used __attribute__((unused))) + { + *length= element->m_xid_state->xid.key_length(); + return element->m_xid_state->xid.key(); + } +};
-mysql_mutex_t LOCK_xid_cache; -HASH xid_cache;
-extern "C" uchar *xid_get_hash_key(const uchar *, size_t *, my_bool); -extern "C" void xid_free_hash(void *); +static LF_HASH xid_cache; +static bool xid_cache_inited;
-uchar *xid_get_hash_key(const uchar *ptr, size_t *length, - my_bool not_used __attribute__((unused))) -{ - *length=((XID_STATE*)ptr)->xid.key_length(); - return ((XID_STATE*)ptr)->xid.key(); -}
-void xid_free_hash(void *ptr) +bool THD::fix_xid_hash_pins() { - if (!((XID_STATE*)ptr)->in_thd) - my_free(ptr); + if (!xid_hash_pins) + xid_hash_pins= lf_hash_get_pins(&xid_cache); + return !xid_hash_pins; }
-#ifdef HAVE_PSI_INTERFACE -static PSI_mutex_key key_LOCK_xid_cache;
-static PSI_mutex_info all_xid_mutexes[]= +void xid_cache_init() { - { &key_LOCK_xid_cache, "LOCK_xid_cache", PSI_FLAG_GLOBAL} -}; - -static void init_xid_psi_keys(void) -{ - const char* category= "sql"; - int count; - - if (PSI_server == NULL) - return; - - count= array_elements(all_xid_mutexes); - PSI_server->register_mutex(category, all_xid_mutexes, count); + xid_cache_inited= true; + lf_hash_init(&xid_cache, sizeof(XID_cache_element), LF_HASH_UNIQUE, 0, 0, + (my_hash_get_key) XID_cache_element::key, &my_charset_bin); + xid_cache.alloc.constructor= XID_cache_element::lf_alloc_constructor; + xid_cache.alloc.destructor= XID_cache_element::lf_alloc_destructor; + xid_cache.initializer= + (lf_hash_initializer) XID_cache_element::lf_hash_initializer; } -#endif /* HAVE_PSI_INTERFACE */ - -bool xid_cache_init() -{ -#ifdef HAVE_PSI_INTERFACE - init_xid_psi_keys(); -#endif
- mysql_mutex_init(key_LOCK_xid_cache, &LOCK_xid_cache, MY_MUTEX_INIT_FAST); - return my_hash_init(&xid_cache, &my_charset_bin, 100, 0, 0, - xid_get_hash_key, xid_free_hash, 0) != 0; -}
void xid_cache_free() { - if (my_hash_inited(&xid_cache)) + if (xid_cache_inited) { - my_hash_free(&xid_cache); - mysql_mutex_destroy(&LOCK_xid_cache); + lf_hash_destroy(&xid_cache); + xid_cache_inited= false; } }
-XID_STATE *xid_cache_search(XID *xid) + +XID_STATE *xid_cache_search(THD *thd, XID *xid) { - mysql_mutex_lock(&LOCK_xid_cache); - XID_STATE *res=(XID_STATE *)my_hash_search(&xid_cache, xid->key(), - xid->key_length()); - mysql_mutex_unlock(&LOCK_xid_cache); - return res; + DBUG_ASSERT(thd->xid_hash_pins); + XID_cache_element *element= + (XID_cache_element*) lf_hash_search(&xid_cache, thd->xid_hash_pins, + xid->key(), xid->key_length()); + if (element) + { + lf_hash_search_unpin(thd->xid_hash_pins); + return element->m_xid_state;
Normally, one should not access the element after it's unpinned, so the protocol is XID_STATE *state= element->m_xid_state; lf_hash_search_unpin(thd->xid_hash_pins); return state; Perhaps your locking/deleted m_state guarantees that element is safe to access? But I failed to see that :(
+ } + return 0; }
bool xid_cache_insert(XID *xid, enum xa_states xa_state) { XID_STATE *xs; - my_bool res; - mysql_mutex_lock(&LOCK_xid_cache); - if (my_hash_search(&xid_cache, xid->key(), xid->key_length())) - res=0; - else if (!(xs=(XID_STATE *)my_malloc(sizeof(*xs), MYF(MY_WME)))) - res=1; - else + LF_PINS *pins; + int res= 1; + + if (!(pins= lf_hash_get_pins(&xid_cache))) + return true; + + if ((xs= (XID_STATE*) my_malloc(sizeof(*xs), MYF(MY_WME)))) { xs->xa_state=xa_state; xs->xid.set(xid); xs->in_thd=0; xs->rm_error=0; - res=my_hash_insert(&xid_cache, (uchar*)xs); + + if ((res= lf_hash_insert(&xid_cache, pins, xs))) + my_free(xs); + else + xs->xid_cache_element->mark_not_deleted(); + if (res == 1) + res= 0; } - mysql_mutex_unlock(&LOCK_xid_cache); + lf_hash_put_pins(pins); return res; }
-bool xid_cache_insert(XID_STATE *xid_state) +bool xid_cache_insert(THD *thd, XID_STATE *xid_state) { - mysql_mutex_lock(&LOCK_xid_cache); - if (my_hash_search(&xid_cache, xid_state->xid.key(), - xid_state->xid.key_length())) + if (thd->fix_xid_hash_pins()) + return true; + + int res= lf_hash_insert(&xid_cache, thd->xid_hash_pins, xid_state); + switch (res) { - mysql_mutex_unlock(&LOCK_xid_cache); + case 0: + xid_state->xid_cache_element->mark_not_deleted(); + break; + case 1: my_error(ER_XAER_DUPID, MYF(0)); - return true; + default: + xid_state->xid_cache_element= 0; } - bool res= my_hash_insert(&xid_cache, (uchar*)xid_state); - mysql_mutex_unlock(&LOCK_xid_cache); return res; }
-void xid_cache_delete(XID_STATE *xid_state) +void xid_cache_delete(THD *thd, XID_STATE *xid_state) +{ + if (xid_state->xid_cache_element) + { + DBUG_ASSERT(thd->xid_hash_pins); + xid_state->xid_cache_element->mark_deleted(); + lf_hash_delete(&xid_cache, thd->xid_hash_pins, + xid_state->xid.key(), xid_state->xid.key_length()); + xid_state->xid_cache_element= 0; + if (!xid_state->in_thd) + my_free(xid_state); + } +} + + +struct xid_cache_iterate_arg +{ + my_hash_walk_action action; + void *argument; +}; + +static my_bool xid_cache_iterate_callback(XID_cache_element *element, + xid_cache_iterate_arg *arg) +{ + my_bool res= FALSE; + if (element->lock()) + { + res= arg->action(element->m_xid_state, arg->argument); + element->unlock(); + } + return res; +} + +int xid_cache_iterate(THD *thd, my_hash_walk_action action, void *arg) { - mysql_mutex_lock(&LOCK_xid_cache); - my_hash_delete(&xid_cache, (uchar *)xid_state); - mysql_mutex_unlock(&LOCK_xid_cache); + xid_cache_iterate_arg argument= { action, arg }; + return thd->fix_xid_hash_pins() ? -1 : + lf_hash_iterate(&xid_cache, thd->xid_hash_pins, + (my_hash_walk_action) xid_cache_iterate_callback, + &argument); }
diff --git a/sql/sql_class.h b/sql/sql_class.h index bd145bd..200f541 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1105,6 +1105,7 @@ struct st_savepoint {
enum xa_states {XA_NOTR=0, XA_ACTIVE, XA_IDLE, XA_PREPARED, XA_ROLLBACK_ONLY}; extern const char *xa_state_names[]; +class XID_cache_element;
typedef struct st_xid_state { /* For now, this is only used to catch duplicated external xids */ @@ -1113,16 +1114,16 @@ struct st_savepoint { bool in_thd; /* Error reported by the Resource Manager (RM) to the Transaction Manager. */ uint rm_error; + XID_cache_element *xid_cache_element; } XID_STATE;
-extern mysql_mutex_t LOCK_xid_cache; -extern HASH xid_cache; -bool xid_cache_init(void); +void xid_cache_init(void); void xid_cache_free(void); -XID_STATE *xid_cache_search(XID *xid); +XID_STATE *xid_cache_search(THD *thd, XID *xid); bool xid_cache_insert(XID *xid, enum xa_states xa_state); -bool xid_cache_insert(XID_STATE *xid_state); -void xid_cache_delete(XID_STATE *xid_state); +bool xid_cache_insert(THD *thd, XID_STATE *xid_state); +void xid_cache_delete(THD *thd, XID_STATE *xid_state); +int xid_cache_iterate(THD *thd, my_hash_walk_action action, void *argument);
/** @class Security_context @@ -3785,6 +3786,8 @@ class THD :public Statement, }
LF_PINS *tdc_hash_pins; + LF_PINS *xid_hash_pins; + bool fix_xid_hash_pins();
inline ulong wsrep_binlog_format() const { diff --git a/sql/transaction.cc b/sql/transaction.cc index 22e3ad7..d6ef160 100644 --- a/sql/transaction.cc +++ b/sql/transaction.cc @@ -738,7 +738,7 @@ bool trans_xa_start(THD *thd) thd->transaction.xid_state.xa_state= XA_ACTIVE; thd->transaction.xid_state.rm_error= 0; thd->transaction.xid_state.xid.set(thd->lex->xid); - if (xid_cache_insert(&thd->transaction.xid_state)) + if (xid_cache_insert(thd, &thd->transaction.xid_state)) { thd->transaction.xid_state.xa_state= XA_NOTR; thd->transaction.xid_state.xid.null(); @@ -801,7 +801,7 @@ bool trans_xa_prepare(THD *thd) my_error(ER_XAER_NOTA, MYF(0)); else if (ha_prepare(thd)) { - xid_cache_delete(&thd->transaction.xid_state); + xid_cache_delete(thd, &thd->transaction.xid_state); thd->transaction.xid_state.xa_state= XA_NOTR; my_error(ER_XA_RBROLLBACK, MYF(0)); } @@ -830,6 +830,11 @@ bool trans_xa_commit(THD *thd)
if (!thd->transaction.xid_state.xid.eq(thd->lex->xid)) { + if (thd->fix_xid_hash_pins()) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + DBUG_RETURN(TRUE); + } /* xid_state.in_thd is always true beside of xa recovery procedure. Note, that there is no race condition here between xid_cache_search @@ -840,7 +845,7 @@ bool trans_xa_commit(THD *thd) xa_cache_insert(XID, xa_states), which is called before starting client connections, and thus is always single-threaded. */ - XID_STATE *xs= xid_cache_search(thd->lex->xid); + XID_STATE *xs= xid_cache_search(thd, thd->lex->xid); res= !xs || xs->in_thd; if (res) my_error(ER_XAER_NOTA, MYF(0)); @@ -848,7 +853,7 @@ bool trans_xa_commit(THD *thd) { res= xa_trans_rolled_back(xs); ha_commit_or_rollback_by_xid(thd->lex->xid, !res); - xid_cache_delete(xs); + xid_cache_delete(thd, xs); } DBUG_RETURN(res); } @@ -911,7 +916,7 @@ bool trans_xa_commit(THD *thd) thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); - xid_cache_delete(&thd->transaction.xid_state); + xid_cache_delete(thd, &thd->transaction.xid_state); thd->transaction.xid_state.xa_state= XA_NOTR;
DBUG_RETURN(res); @@ -935,14 +940,20 @@ bool trans_xa_rollback(THD *thd)
if (!thd->transaction.xid_state.xid.eq(thd->lex->xid)) { - XID_STATE *xs= xid_cache_search(thd->lex->xid); + if (thd->fix_xid_hash_pins()) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + DBUG_RETURN(TRUE); + } + + XID_STATE *xs= xid_cache_search(thd, thd->lex->xid); if (!xs || xs->in_thd) my_error(ER_XAER_NOTA, MYF(0)); else { xa_trans_rolled_back(xs); ha_commit_or_rollback_by_xid(thd->lex->xid, 0); - xid_cache_delete(xs); + xid_cache_delete(thd, xs); } DBUG_RETURN(thd->get_stmt_da()->is_error()); } @@ -961,7 +972,7 @@ bool trans_xa_rollback(THD *thd) thd->server_status&= ~(SERVER_STATUS_IN_TRANS | SERVER_STATUS_IN_TRANS_READONLY); DBUG_PRINT("info", ("clearing SERVER_STATUS_IN_TRANS")); - xid_cache_delete(&thd->transaction.xid_state); + xid_cache_delete(thd, &thd->transaction.xid_state); thd->transaction.xid_state.xa_state= XA_NOTR;
DBUG_RETURN(res); diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc index 36768ae..411c7ae 100644 --- a/storage/spider/spd_table.cc +++ b/storage/spider/spd_table.cc @@ -41,11 +41,13 @@
I'd prefer to have spider changes in a separate commit
#include "spd_malloc.h"
ulong *spd_db_att_thread_id; +#if MYSQL_VERSION_ID < 100103 #ifdef XID_CACHE_IS_SPLITTED uint *spd_db_att_xid_cache_split_num; #endif pthread_mutex_t *spd_db_att_LOCK_xid_cache; HASH *spd_db_att_xid_cache; +#endif struct charset_info_st *spd_charset_utf8_bin; const char **spd_defaults_extra_file; const char **spd_defaults_file; @@ -6263,7 +6265,7 @@ int spider_db_init( "?LOCK_xid_cache@@3PAUst_mysql_mutex@@A")); spd_db_att_xid_cache = *((HASH **) GetProcAddress(current_module, "?xid_cache@@3PAUst_hash@@A")); -#else +#elif MYSQL_VERSION_ID < 100103 spd_db_att_LOCK_xid_cache = (pthread_mutex_t *) #if MYSQL_VERSION_ID < 50500 GetProcAddress(current_module, @@ -6289,7 +6291,7 @@ int spider_db_init( spd_db_att_xid_cache_split_num = &opt_xid_cache_split_num; spd_db_att_LOCK_xid_cache = LOCK_xid_cache; spd_db_att_xid_cache = xid_cache; -#else +#elif MYSQL_VERSION_ID < 100103 spd_db_att_LOCK_xid_cache = &LOCK_xid_cache; spd_db_att_xid_cache = &xid_cache; #endif diff --git a/storage/spider/spd_trx.cc b/storage/spider/spd_trx.cc index a66fa5a..1b02bb8 100644 --- a/storage/spider/spd_trx.cc +++ b/storage/spider/spd_trx.cc @@ -38,11 +38,13 @@ #include "spd_ping_table.h" #include "spd_malloc.h"
+#if MYSQL_VERSION_ID < 100103 #ifdef XID_CACHE_IS_SPLITTED extern uint *spd_db_att_xid_cache_split_num; #endif extern pthread_mutex_t *spd_db_att_LOCK_xid_cache; extern HASH *spd_db_att_xid_cache; +#endif extern struct charset_info_st *spd_charset_utf8_bin;
extern handlerton *spider_hton_ptr; @@ -1641,6 +1643,13 @@ int spider_xa_lock( int error_num; const char *old_proc_info; DBUG_ENTER("spider_xa_lock"); +#if MYSQL_VERSION_ID >= 100103 + old_proc_info = thd_proc_info(thd, "Locking xid by Spider"); + error_num= 0; + if (xid_cache_insert(thd, xid_state)) + error_num= thd->get_stmt_da()->sql_errno() == ER_XAER_DUPID ? + ER_SPIDER_XA_LOCKED_NUM : HA_ERR_OUT_OF_MEM; +#else #ifdef SPIDER_HAS_HASH_VALUE_TYPE my_hash_value_type hash_value = my_calc_hash(spd_db_att_xid_cache, (uchar*) xid_state->xid.key(), xid_state->xid.key_length()); @@ -1699,6 +1708,7 @@ int spider_xa_lock( #else pthread_mutex_unlock(spd_db_att_LOCK_xid_cache); #endif +#endif thd_proc_info(thd, old_proc_info); DBUG_RETURN(error_num); } @@ -1709,6 +1719,10 @@ int spider_xa_unlock( THD *thd = current_thd; const char *old_proc_info; DBUG_ENTER("spider_xa_unlock"); +#if MYSQL_VERSION_ID >= 100103 + old_proc_info = thd_proc_info(thd, "Unlocking xid by Spider"); + xid_cache_delete(thd, xid_state); +#else #if defined(SPIDER_HAS_HASH_VALUE_TYPE) && defined(HASH_UPDATE_WITH_HASH_VALUE) my_hash_value_type hash_value = my_calc_hash(spd_db_att_xid_cache, (uchar*) xid_state->xid.key(), xid_state->xid.key_length()); @@ -1738,6 +1752,7 @@ int spider_xa_unlock( #else pthread_mutex_unlock(spd_db_att_LOCK_xid_cache); #endif +#endif thd_proc_info(thd, old_proc_info); DBUG_RETURN(0); } _______________________________________________ commits mailing list commits@mariadb.org https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits Regards, Sergei
Hi Sergei, thanks for you comments. Answers inline. On Fri, Mar 13, 2015 at 08:58:38PM +0100, Sergei Golubchik wrote:
Hi, Sergey!
On Mar 12, svoj@mariadb.org wrote:
revision-id: 20e5c37ccfac9d95a2db1f295f211656df1ce160 parent(s): 190858d996e7dc90e01fe8a5e7daec6af0012b23 committer: Sergey Vojtovich branch nick: 10.1 timestamp: 2015-03-12 18:15:08 +0400 message:
MDEV-7728 - Spiral patch 032_mariadb-10.0.15.scale_registering_xid.diff
XID cache is now based on lock-free hash. Also fixed lf_hash_destroy() to call alloc destructor.
Note that previous implementation had race condition when thread was accessing XA owned by different thread. This new implementation doesn't fix it either. ...skip...
@@ -5106,120 +5109,205 @@ void mark_transaction_to_rollback(THD *thd, bool all) /*************************************************************************** Handling of XA id cacheing ***************************************************************************/ +class XID_cache_element +{ + uint m_state;
shouldn't it be uint32 ? That's the type that my_atomic_add32/cas32 work with. Yes, it should. Thanks!
+ static const uint DELETED= 1 << 31; +public: + XID_STATE *m_xid_state;
O-okay. I *think* your hand-written locking logic works. But please add a comment, explaining how it works, all four methods, the meaning of DELETED, etc. Perhaps DELETED should rather be NOT_INITIALIZED?
A very valid request. IMHO mutex is overkill here, that's why I implemented this trivial locking. I'll add comments and think about better names.
+ bool lock() + { + if (my_atomic_add32_explicit(&m_state, 1, MY_MEMORY_ORDER_ACQUIRE) & DELETED) + { + unlock(); + return false; + } + return true; + } + void unlock() + { + my_atomic_add32_explicit(&m_state, -1, MY_MEMORY_ORDER_RELEASE); + } + void mark_deleted() + { + uint old= 0; + while (!my_atomic_cas32_weak_explicit(&m_state, &old, DELETED, + MY_MEMORY_ORDER_RELAXED, + MY_MEMORY_ORDER_RELAXED))
usually one should use LF_BACKOFF inside spin-loops.
Good point. Though LF_BACKOFF doesn't seem to do anything useful.
+ old= 0; + } + void mark_not_deleted() + { + DBUG_ASSERT(m_state & DELETED); + my_atomic_add32_explicit(&m_state, -DELETED, MY_MEMORY_ORDER_RELAXED); + } + static void lf_hash_initializer(LF_HASH *hash __attribute__((unused)), + XID_cache_element *element, + XID_STATE *xid_state) + { + element->m_xid_state= xid_state; + xid_state->xid_cache_element= element; + } + static void lf_alloc_constructor(uchar *ptr) + { + XID_cache_element *element= (XID_cache_element*) (ptr + LF_HASH_OVERHEAD); + element->m_state= DELETED; + } + static void lf_alloc_destructor(uchar *ptr) + { + XID_cache_element *element= (XID_cache_element*) (ptr + LF_HASH_OVERHEAD); + if (element->m_state != DELETED)
How can this happen? You mark an element DELETED before lf_hash_delete()
That's for elements inserted by ha_recover(). There's no guarantee that they will be deleted. ...skip...
-XID_STATE *xid_cache_search(XID *xid) + +XID_STATE *xid_cache_search(THD *thd, XID *xid) { - mysql_mutex_lock(&LOCK_xid_cache); - XID_STATE *res=(XID_STATE *)my_hash_search(&xid_cache, xid->key(), - xid->key_length()); - mysql_mutex_unlock(&LOCK_xid_cache); - return res; + DBUG_ASSERT(thd->xid_hash_pins); + XID_cache_element *element= + (XID_cache_element*) lf_hash_search(&xid_cache, thd->xid_hash_pins, + xid->key(), xid->key_length()); + if (element) + { + lf_hash_search_unpin(thd->xid_hash_pins); + return element->m_xid_state;
Normally, one should not access the element after it's unpinned, so the protocol is
XID_STATE *state= element->m_xid_state; lf_hash_search_unpin(thd->xid_hash_pins); return state;
Perhaps your locking/deleted m_state guarantees that element is safe to access? But I failed to see that :( This is noted in cset comment: Note that previous implementation had race condition when thread was accessing XA owned by different thread. This new implementation doesn't fix it either.
I can fix it if you like.
diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc index 36768ae..411c7ae 100644 --- a/storage/spider/spd_table.cc +++ b/storage/spider/spd_table.cc @@ -41,11 +41,13 @@
I'd prefer to have spider changes in a separate commit Ok, I'll split them.
...skip... Thanks, Sergey
participants (2)
-
Sergei Golubchik
-
Sergey Vojtovich