Hi, Sergey! On Dec 22, svoj@mariadb.org wrote:
revision-id: b2f45ba8280c97f8dc8b1adb3234cf2e28e48982 parent(s): 4a469b0ed68bb341e1a1e1ba540a45a3b6ff3ef9 committer: Sergey Vojtovich branch nick: 10.1 timestamp: 2014-12-22 19:56:48 +0400 message:
MDEV-7324 - Lock-free hash for table definition cache
That looked good to me. I didn't find anything obviously wrong. There are mostly questions below. Some are style suggestions (like, renaming of methods, etc). The only place I'm not sure about is early lf_hash_search_unpin() in tdc_acquire_share().
diff --git a/mysys/lf_hash.c b/mysys/lf_hash.c --- a/mysys/lf_hash.c +++ b/mysys/lf_hash.c @@ -122,7 +122,7 @@ static int lfind(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, { if (unlikely(callback)) { - if (callback(cursor->curr + 1, (void*)key)) + if (cur_hashnr & 1 && callback(cursor->curr + 1, (void*)key))
Sorry for this.
return 1; } else if (cur_hashnr >= hashnr) diff --git a/sql/sql_class.cc b/sql/sql_class.cc --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -912,17 +912,17 @@ bool Drop_table_error_handler::handle_condition(THD *thd, #endif /* defined(ENABLED_DEBUG_SYNC) */ wait_for_commit_ptr(0), main_da(0, false, false), - m_stmt_da(&main_da) + m_stmt_da(&main_da), #ifdef WITH_WSREP - , wsrep_applier(is_wsrep_applier), wsrep_applier_closing(false), wsrep_client_thread(false), wsrep_apply_toi(false), wsrep_po_handle(WSREP_PO_INITIALIZER), wsrep_po_cnt(0), - wsrep_apply_format(0) + wsrep_apply_format(0), #endif + tdc_hash_pins(0)
Eh... I actually wanted wsrep related data to the at the end of the THD. Just to keep the rest of the THD layout identical for wsrep and non-wsrep builds. In case a wsrep-enabled plugin is loaded into non-wsrep server or visa versa. That was an early change, though, I've done more cleanups after that and moved wsrep into a service, so perhaps (even, most probably) this is not needed anymore. But I'd better stay on the safe side and keep wsrep at the end of THD.
{ ulong tmp;
@@ -1698,6 +1698,8 @@ void THD::cleanup(void)
free_root(&main_mem_root, MYF(0)); main_da.free_memory(); + if (tdc_hash_pins) + lf_hash_put_pins(tdc_hash_pins);
no tdc_hash_pins=0 here?
if (status_var.memory_used != 0) { DBUG_PRINT("error", ("memory_used: %lld", status_var.memory_used)); diff --git a/sql/table.h b/sql/table.h --- a/sql/table.h +++ b/sql/table.h @@ -611,32 +612,7 @@ struct TABLE_SHARE mysql_mutex_t LOCK_ha_data; /* To protect access to ha_data */ mysql_mutex_t LOCK_share; /* To protect TABLE_SHARE */
- typedef I_P_List <TABLE, TABLE_share> TABLE_list; - typedef I_P_List <TABLE, All_share_tables> All_share_tables_list; - struct - { - /** - Protects ref_count, m_flush_tickets, all_tables, free_tables, flushed, - all_tables_refs. - */ - mysql_mutex_t LOCK_table_share; - mysql_cond_t COND_release; - TABLE_SHARE *next, **prev; /* Link to unused shares */ - uint ref_count; /* How many TABLE objects uses this */ - uint all_tables_refs; /* Number of refs to all_tables */ - /** - List of tickets representing threads waiting for the share to be flushed. - */ - Wait_for_flush_list m_flush_tickets; - /* - Doubly-linked (back-linked) lists of used and unused TABLE objects - for this share. - */ - All_share_tables_list all_tables; - TABLE_list free_tables; - ulong version; - bool flushed; - } tdc; + TDC_element *tdc;
Hmm. Ok. I think I like this change - TABLE_SHARE now contains table metadata and everything related to tdc is moved to TDC_element. Right?
LEX_CUSTRING tabledef_version;
diff --git a/sql/table.cc b/sql/table.cc --- a/sql/table.cc +++ b/sql/table.cc @@ -3845,11 +3844,11 @@ bool TABLE_SHARE::visit_subgraph(Wait_for_flush *wait_for_flush, because we won't try to acquire tdc.LOCK_table_share while holding a write-lock on MDL_lock::m_rwlock. */ - mysql_mutex_lock(&tdc.LOCK_table_share); - tdc.all_tables_refs++; - mysql_mutex_unlock(&tdc.LOCK_table_share); + mysql_mutex_lock(&tdc->LOCK_table_share); + tdc->all_tables_refs++; + mysql_mutex_unlock(&tdc->LOCK_table_share);
Do I understand it correctly that a TDC_element can never be deleted from LF_HASH as long as at least one TABLE_SHARE references it?
- All_share_tables_list::Iterator tables_it(tdc.all_tables); + TDC_element::All_share_tables_list::Iterator tables_it(tdc->all_tables);
/* In case of multiple searches running in parallel, avoid going @@ -3946,21 +3945,10 @@ bool TABLE_SHARE::wait_for_old_version(THD *thd, struct timespec *abstime,
mdl_context->done_waiting_for();
- mysql_mutex_lock(&tdc.LOCK_table_share); - - tdc.m_flush_tickets.remove(&ticket); - - if (tdc.m_flush_tickets.is_empty() && tdc.ref_count == 0) - { - /* - If our thread was the last one using the share, - we must destroy it here. - */ - mysql_mutex_unlock(&tdc.LOCK_table_share); - destroy(); - } - else - mysql_mutex_unlock(&tdc.LOCK_table_share); + mysql_mutex_lock(&tdc->LOCK_table_share); + tdc->m_flush_tickets.remove(&ticket); + mysql_cond_broadcast(&tdc->COND_release); + mysql_mutex_unlock(&tdc->LOCK_table_share);
Why is the logic here different?
/* diff --git a/sql/table_cache.h b/sql/table_cache.h --- a/sql/table_cache.h +++ b/sql/table_cache.h @@ -16,6 +16,171 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+extern PSI_mutex_key key_TABLE_SHARE_LOCK_table_share; +extern PSI_cond_key key_TABLE_SHARE_COND_release; + +class TDC_element +{ +public: + uchar m_key[NAME_LEN + 1 + NAME_LEN + 1]; + uint m_key_length; + ulong version; + bool flushed; + TABLE_SHARE *share; + + typedef I_P_List <TABLE, TABLE_share> TABLE_list; + typedef I_P_List <TABLE, All_share_tables> All_share_tables_list; + /** + Protects ref_count, m_flush_tickets, all_tables, free_tables, flushed, + all_tables_refs. + */ + mysql_mutex_t LOCK_table_share; + mysql_cond_t COND_release; + TDC_element *next, **prev; /* Link to unused shares */ + uint ref_count; /* How many TABLE objects uses this */ + uint all_tables_refs; /* Number of refs to all_tables */ + /** + List of tickets representing threads waiting for the share to be flushed. + */ + Wait_for_flush_list m_flush_tickets; + /* + Doubly-linked (back-linked) lists of used and unused TABLE objects + for this share. + */ + All_share_tables_list all_tables; + TABLE_list free_tables; + + TDC_element() {} + + TDC_element(const char *key, uint key_length) : m_key_length(key_length) + { + memcpy(m_key, key, key_length); + } + + + void dbug_release_assertions()
Somewhat confusing name. If I hadn't see the code below I'd thought this function releases some assertions (how? why?) may be assert_correctness_at_release() would be less ambiguous?
+ { + DBUG_ASSERT(share == 0); + DBUG_ASSERT(ref_count == 0); + DBUG_ASSERT(m_flush_tickets.is_empty()); + DBUG_ASSERT(all_tables.is_empty()); + DBUG_ASSERT(free_tables.is_empty()); + DBUG_ASSERT(all_tables_refs == 0); + DBUG_ASSERT(next == 0); + DBUG_ASSERT(prev == 0); + } + + + /** + Acquire TABLE object from table cache. + + @pre share must be protected against removal. + + Acquired object cannot be evicted or acquired again. + + While locked: + - pop object from TABLE_SHARE::tdc.free_tables + + While unlocked: + - mark object used by thd
"While locked" / "While unlocked" - what does that mean?
+ + @return TABLE object, or NULL if no unused objects. + */ + + TABLE *acquire_table(THD *thd) + { + TABLE *table; + + mysql_mutex_lock(&LOCK_table_share); + table= free_tables.pop_front(); + if (table) + { + DBUG_ASSERT(!table->in_use);
lots of asserts - good!
+ table->in_use= thd; + /* The ex-unused table must be fully functional. */ + DBUG_ASSERT(table->db_stat && table->file); + /* The children must be detached from the table. */ + DBUG_ASSERT(!table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN));
I don't even think of asking why it's HA_EXTRA_IS_ATTACHED_CHILDREN and not HA_EXTRA_HAS_ATTACHED_CHILDREN or, at least, HA_EXTRA_CHILDREN_ARE_ATTACHED. Ah, whatever... Horrors of the English language hidden in old MySQL code :)
+ } + mysql_mutex_unlock(&LOCK_table_share); + return table; + } + + + /** + Get last element of free_tables. + */ + + TABLE *free_tables_back()
Eh, I've thought for a few seconds what could it mean to "free tables back" until I read the function comment. You see, "free" is so often used as a verb that one absolutely doesn't see it that here it means "free_tables' back" even though it's also a common naming pattern (xxx_back for the list xxx). Rename the method, may be?
+ { + TABLE_list::Iterator it(share->tdc->free_tables); + TABLE *entry, *last= 0; + while ((entry= it++)) + last= entry; + return last; + } + + + /** + Wait for MDL deadlock detector to complete traversing tdc.all_tables. + + Must be called before updating TABLE_SHARE::tdc.all_tables. + */ + + void wait_for_mdl_deadlock_detector() + { + while (all_tables_refs) + mysql_cond_wait(&COND_release, &LOCK_table_share); + } + + + /** + Prepeare table share for use with table definition cache. + */ + + static void lf_alloc_constructor(uchar *arg) + { + TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD); + DBUG_ENTER("tdc_init_share");
intentionally? or forgot to update DBUG_ENTER?
+ mysql_mutex_init(key_TABLE_SHARE_LOCK_table_share, + &element->LOCK_table_share, MY_MUTEX_INIT_FAST); + mysql_cond_init(key_TABLE_SHARE_COND_release, &element->COND_release, 0); + element->m_flush_tickets.empty(); + element->all_tables.empty(); + element->free_tables.empty(); + element->all_tables_refs= 0; + element->share= 0; + element->ref_count= 0; + element->next= 0; + element->prev= 0; + DBUG_VOID_RETURN; + } + + + /** + Release table definition cache specific resources of table share. + */ + + static void lf_alloc_destructor(uchar *arg) + { + TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD); + DBUG_ENTER("tdc_deinit_share");
intentionally? or forgot to update DBUG_ENTER?
+ element->dbug_release_assertions(); + mysql_cond_destroy(&element->COND_release); + mysql_mutex_destroy(&element->LOCK_table_share); + DBUG_VOID_RETURN; + } + + + static uchar *key(const TDC_element *element, size_t *length, + my_bool not_used __attribute__((unused))) + { + *length= element->m_key_length; + return (uchar*) element->m_key; + } +}; + + enum enum_tdc_remove_table_type { TDC_RT_REMOVE_ALL, diff --git a/sql/sql_base.cc b/sql/sql_base.cc --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -270,58 +270,75 @@ uint get_table_def_key(const TABLE_LIST *table_list, const char **key) # Pointer to list of names of open tables. */
-OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild) +struct list_open_tables_arg { + THD *thd; + const char *db; + const char *wild; TABLE_LIST table_list; OPEN_TABLE_LIST **start_list, *open_list; - TABLE_SHARE *share; - TDC_iterator tdc_it; - DBUG_ENTER("list_open_tables"); +};
- bzero((char*) &table_list,sizeof(table_list)); - start_list= &open_list; - open_list=0;
- tdc_it.init(); - while ((share= tdc_it.next())) +static my_bool list_open_tables_callback(TDC_element *element, + list_open_tables_arg *arg) { - if (db && my_strcasecmp(system_charset_info, db, share->db.str)) - continue; - if (wild && wild_compare(share->table_name.str, wild, 0)) - continue; + char *db= (char*) element->m_key; + char *table_name= (char*) element->m_key + strlen((char*) element->m_key) + 1;
+ if (arg->db && my_strcasecmp(system_charset_info, arg->db, db)) + return FALSE; + if (arg->wild && wild_compare(table_name, arg->wild, 0)) + return FALSE; + /* Check if user has SELECT privilege for any column in the table */ - table_list.db= share->db.str; - table_list.table_name= share->table_name.str; - table_list.grant.privilege=0; + arg->table_list.db= db; + arg->table_list.table_name= table_name; + arg->table_list.grant.privilege= 0;
- if (check_table_access(thd,SELECT_ACL,&table_list, TRUE, 1, TRUE)) - continue; + if (check_table_access(arg->thd, SELECT_ACL, &arg->table_list, TRUE, 1, TRUE)) + return FALSE;
- if (!(*start_list = (OPEN_TABLE_LIST *) - sql_alloc(sizeof(**start_list)+share->table_cache_key.length))) - { - open_list=0; // Out of memory - break; - } + if (!(*arg->start_list= (OPEN_TABLE_LIST *) sql_alloc(
when you know the thd, prefer thd->alloc() over sql_alloc() (which is, basically, current_thd->alloc())
+ sizeof(**arg->start_list) + element->m_key_length))) + return TRUE; + - strmov((*start_list)->table= - strmov(((*start_list)->db= (char*) ((*start_list)+1)), - share->db.str)+1, - share->table_name.str); + strmov((*arg->start_list)->table= + strmov(((*arg->start_list)->db= (char*) ((*arg->start_list) + 1)), + db) + 1, table_name); - (*start_list)->in_use= 0; + (*arg->start_list)->in_use= 0; - mysql_mutex_lock(&share->tdc.LOCK_table_share); - TABLE_SHARE::All_share_tables_list::Iterator it(share->tdc.all_tables); + + mysql_mutex_lock(&element->LOCK_table_share); + TDC_element::All_share_tables_list::Iterator it(element->all_tables); TABLE *table; while ((table= it++)) if (table->in_use) - ++(*start_list)->in_use; + ++(*arg->start_list)->in_use; - mysql_mutex_unlock(&share->tdc.LOCK_table_share); + mysql_mutex_unlock(&element->LOCK_table_share); - (*start_list)->locked= 0; /* Obsolete. */ + (*arg->start_list)->locked= 0; /* Obsolete. */ - start_list= &(*start_list)->next; + arg->start_list= &(*arg->start_list)->next; - *start_list=0; + *arg->start_list= 0; + return FALSE; } - tdc_it.deinit(); - DBUG_RETURN(open_list); + + +OPEN_TABLE_LIST *list_open_tables(THD *thd, const char *db, const char *wild) +{ + list_open_tables_arg argument; + DBUG_ENTER("list_open_tables"); + + argument.thd= thd; + argument.db= db; + argument.wild= wild; + bzero((char*) &argument.table_list, sizeof(argument.table_list)); + argument.start_list= &argument.open_list; + argument.open_list= 0; + + if (tdc_iterate(thd, (my_hash_walk_action) list_open_tables_callback, + &argument, true)) + DBUG_RETURN(0); + + DBUG_RETURN(argument.open_list); }
/***************************************************************************** diff --git a/sql/table_cache.cc b/sql/table_cache.cc --- a/sql/table_cache.cc +++ b/sql/table_cache.cc @@ -72,32 +76,23 @@ /** Protects unused shares list.
- TABLE_SHARE::tdc.prev - TABLE_SHARE::tdc.next - oldest_unused_share - end_of_unused_share + TDC_element::prev + TDC_element::next + unused_shares */
static mysql_mutex_t LOCK_unused_shares; -static mysql_rwlock_t LOCK_tdc; /**< Protects tdc_hash. */ my_atomic_rwlock_t LOCK_tdc_atomics; /**< Protects tdc_version. */
#ifdef HAVE_PSI_INTERFACE -static PSI_mutex_key key_LOCK_unused_shares, key_TABLE_SHARE_LOCK_table_share; +PSI_mutex_key key_LOCK_unused_shares, key_TABLE_SHARE_LOCK_table_share;
really? not static?
static PSI_mutex_info all_tc_mutexes[]= { { &key_LOCK_unused_shares, "LOCK_unused_shares", PSI_FLAG_GLOBAL }, { &key_TABLE_SHARE_LOCK_table_share, "TABLE_SHARE::tdc.LOCK_table_share", 0 } };
-static PSI_rwlock_key key_rwlock_LOCK_tdc; -static PSI_rwlock_info all_tc_rwlocks[]= -{ - { &key_rwlock_LOCK_tdc, "LOCK_tdc", PSI_FLAG_GLOBAL } -}; - - -static PSI_cond_key key_TABLE_SHARE_COND_release; +PSI_cond_key key_TABLE_SHARE_COND_release;
and here
static PSI_cond_info all_tc_conds[]= { { &key_TABLE_SHARE_COND_release, "TABLE_SHARE::tdc.COND_release", 0 } @@ -265,85 +269,41 @@ void tc_add_table(THD *thd, TABLE *table)
if (need_purge) { - TABLE_SHARE *purge_share= 0; - TABLE_SHARE *share; - TABLE *entry; - ulonglong UNINIT_VAR(purge_time); - TDC_iterator tdc_it; + tc_add_table_arg argument; + argument.purge_time= ULONGLONG_MAX; + tdc_iterate(thd, (my_hash_walk_action) tc_add_table_callback, &argument);
- tdc_it.init(); - while ((share= tdc_it.next())) + if (argument.purge_time != ULONGLONG_MAX) { - mysql_mutex_lock(&share->tdc.LOCK_table_share); - if ((entry= tc_free_tables_back(share)) && - (!purge_share || entry->tc_time < purge_time)) + TDC_element *element= (TDC_element*) lf_hash_search(&tdc_hash, + thd->tdc_hash_pins, + argument.key, + argument.key_length); + if (element) { - purge_share= share; - purge_time= entry->tc_time; - } - mysql_mutex_unlock(&share->tdc.LOCK_table_share); - } + TABLE *entry; + mysql_mutex_lock(&element->LOCK_table_share); + lf_hash_search_unpin(thd->tdc_hash_pins); + element->wait_for_mdl_deadlock_detector();
you wait under a mutex? hmm...
- if (purge_share) - { - mysql_mutex_lock(&purge_share->tdc.LOCK_table_share); - tc_wait_for_mdl_deadlock_detector(purge_share); - tdc_it.deinit(); /* It may happen that oldest table was acquired meanwhile. In this case just go ahead, number of objects in table cache will normalize eventually. */ - if ((entry= tc_free_tables_back(purge_share)) && - entry->tc_time == purge_time) + if ((entry= element->free_tables_back()) && + entry->tc_time == argument.purge_time) { - entry->s->tdc.free_tables.remove(entry); + element->free_tables.remove(entry); tc_remove_table(entry); - mysql_mutex_unlock(&purge_share->tdc.LOCK_table_share); + mysql_mutex_unlock(&element->LOCK_table_share); intern_close_table(entry); } else - mysql_mutex_unlock(&purge_share->tdc.LOCK_table_share); + mysql_mutex_unlock(&element->LOCK_table_share); } - else - tdc_it.deinit(); } } - - -/** - Acquire TABLE object from table cache. - - @pre share must be protected against removal. - - Acquired object cannot be evicted or acquired again. - - While locked: - - pop object from TABLE_SHARE::tdc.free_tables - - While unlocked: - - mark object used by thd - - @return TABLE object, or NULL if no unused objects. -*/ - -static TABLE *tc_acquire_table(THD *thd, TABLE_SHARE *share) -{ - TABLE *table; - - mysql_mutex_lock(&share->tdc.LOCK_table_share); - table= share->tdc.free_tables.pop_front(); - if (table) - { - DBUG_ASSERT(!table->in_use); - table->in_use= thd; - /* The ex-unused table must be fully functional. */ - DBUG_ASSERT(table->db_stat && table->file); - /* The children must be detached from the table. */ - DBUG_ASSERT(!table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN)); - }
ok, you've moved it to table_cache.h...
- mysql_mutex_unlock(&share->tdc.LOCK_table_share); - return table; }
@@ -397,88 +357,76 @@ bool tc_release_table(TABLE *table) */ table->in_use= 0; /* Add table to the list of unused TABLE objects for this share. */ - table->s->tdc.free_tables.push_front(table); - mysql_mutex_unlock(&table->s->tdc.LOCK_table_share); + table->s->tdc->free_tables.push_front(table); + mysql_mutex_unlock(&table->s->tdc->LOCK_table_share); return false;
purge: - tc_wait_for_mdl_deadlock_detector(table->s); + table->s->tdc->wait_for_mdl_deadlock_detector(); tc_remove_table(table); - mysql_mutex_unlock(&table->s->tdc.LOCK_table_share); + mysql_mutex_unlock(&table->s->tdc->LOCK_table_share); table->in_use= 0; intern_close_table(table); return true; }
-extern "C" uchar *tdc_key(const uchar *record, size_t *length, - my_bool not_used __attribute__((unused))) -{ - TABLE_SHARE *entry= (TABLE_SHARE*) record; - *length= entry->table_cache_key.length; - return (uchar*) entry->table_cache_key.str; -} - - /** Delete share from hash and free share object. - - @return - @retval 0 Success - @retval 1 Share is referenced */
-static int tdc_delete_share_from_hash(TABLE_SHARE *share) +static void tdc_delete_share_from_hash(TDC_element *element) { + THD *thd= current_thd; + LF_PINS *pins; + TABLE_SHARE *share; DBUG_ENTER("tdc_delete_share_from_hash"); - mysql_rwlock_wrlock(&LOCK_tdc); - mysql_mutex_lock(&share->tdc.LOCK_table_share); - if (--share->tdc.ref_count) - { - mysql_cond_broadcast(&share->tdc.COND_release); - mysql_mutex_unlock(&share->tdc.LOCK_table_share); - mysql_rwlock_unlock(&LOCK_tdc); - DBUG_RETURN(1); - } - my_hash_delete(&tdc_hash, (uchar*) share); + + share= element->share; + DBUG_ASSERT(share); + element->share= 0; /* Notify PFS early, while still locked. */
"while still locked" what?
PSI_CALL_release_table_share(share->m_psi); share->m_psi= 0; - mysql_rwlock_unlock(&LOCK_tdc);
- if (share->tdc.m_flush_tickets.is_empty()) + if (!element->m_flush_tickets.is_empty()) { - /* No threads are waiting for this share to be flushed, destroy it. */ - mysql_mutex_unlock(&share->tdc.LOCK_table_share); - free_table_share(share); - } - else - { - Wait_for_flush_list::Iterator it(share->tdc.m_flush_tickets); + Wait_for_flush_list::Iterator it(element->m_flush_tickets); Wait_for_flush *ticket; while ((ticket= it++)) (void) ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED); - /* - If there are threads waiting for this share to be flushed, - the last one to receive the notification will destroy the - share. At this point the share is removed from the table - definition cache, so is OK to proceed here without waiting - for this thread to do the work. - */ - mysql_mutex_unlock(&share->tdc.LOCK_table_share); + + do + { + mysql_cond_wait(&element->COND_release, &element->LOCK_table_share); + } while (!element->m_flush_tickets.is_empty()); } - DBUG_RETURN(0); + + mysql_mutex_unlock(&element->LOCK_table_share);
Ok, so the caller must lock LOCK_table_share? please put mysql_mutex_assert_owner() in the beginning of the function
+ + if (thd)
When thd is NULL here? On shutdown?
+ { + fix_thd_pins(thd); + pins= thd->tdc_hash_pins; } + else + pins= lf_hash_get_pins(&tdc_hash);
+ DBUG_ASSERT(pins); // What can we do about it? + element->dbug_release_assertions(); + lf_hash_delete(&tdc_hash, pins, element->m_key, element->m_key_length); + if (!thd) + lf_hash_put_pins(pins); + free_table_share(share); + DBUG_VOID_RETURN; +}
+ /** Initialize table definition cache. - - @retval 0 Success - @retval !0 Error */
-int tdc_init(void) +void tdc_init(void) { DBUG_ENTER("tdc_init"); #ifdef HAVE_PSI_INTERFACE @@ -642,26 +548,34 @@ void tdc_deinit_share(TABLE_SHARE *share) Caller is expected to unlock table share with tdc_unlock_share().
@retval 0 Share not found - @retval !0 Pointer to locked table share + @retval MY_ERRPTR OOM + @retval ptr Pointer to locked table share */
-TABLE_SHARE *tdc_lock_share(const char *db, const char *table_name) +TDC_element *tdc_lock_share(THD *thd, const char *db, const char *table_name) { + TDC_element *element; char key[MAX_DBKEY_LENGTH]; - uint key_length;
DBUG_ENTER("tdc_lock_share"); - key_length= tdc_create_key(key, db, table_name); + if (fix_thd_pins(thd)) + DBUG_RETURN((TDC_element*) MY_ERRPTR);
I suppose you can do it in THD:init, if you'd like. A THD almost always will need to access table definition cache, so it'll need these pins. Then there will be no need to return MY_ERRPTR here and handle it in the caller
- mysql_rwlock_rdlock(&LOCK_tdc); - TABLE_SHARE* share= (TABLE_SHARE*) my_hash_search(&tdc_hash, - (uchar*) key, key_length); - if (share && !share->error) - mysql_mutex_lock(&share->tdc.LOCK_table_share); - else - share= 0; - mysql_rwlock_unlock(&LOCK_tdc); - DBUG_RETURN(share); + element= (TDC_element *) lf_hash_search(&tdc_hash, thd->tdc_hash_pins, + (uchar*) key, + tdc_create_key(key, db, table_name)); + if (element) + { + mysql_mutex_lock(&element->LOCK_table_share); + if (!element->share || element->share->error) + { + mysql_mutex_unlock(&element->LOCK_table_share); + element= 0; + } + lf_hash_search_unpin(thd->tdc_hash_pins); + } + + DBUG_RETURN(element);
Looks good to me
}
@@ -702,60 +616,60 @@ TABLE_SHARE *tdc_acquire_share(THD *thd, const char *db, const char *table_name, TABLE **out_table) { TABLE_SHARE *share; + TDC_element *element; bool was_unused; DBUG_ENTER("tdc_acquire_share");
- mysql_rwlock_rdlock(&LOCK_tdc); - share= (TABLE_SHARE*) my_hash_search_using_hash_value(&tdc_hash, hash_value, - (uchar*) key, - key_length); - if (!share) + if (fix_thd_pins(thd)) + DBUG_RETURN(0); + +retry: + while (!(element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
Was lf_hash_search_using_hash_value added in a separate patch?
+ thd->tdc_hash_pins, hash_value, (uchar*) key, key_length))) { - TABLE_SHARE *new_share; - mysql_rwlock_unlock(&LOCK_tdc); + TDC_element tmp(key, key_length); + int res= lf_hash_insert(&tdc_hash, thd->tdc_hash_pins, (uchar*) &tmp);
- if (!(new_share= alloc_table_share(db, table_name, key, key_length))) + if (res == -1) DBUG_RETURN(0); - new_share->error= OPEN_FRM_OPEN_ERROR; + else if (res == 1) + continue;
- mysql_rwlock_wrlock(&LOCK_tdc); - share= (TABLE_SHARE*) my_hash_search_using_hash_value(&tdc_hash, hash_value, - (uchar*) key, - key_length); - if (!share) - { - bool need_purge; + element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash, + thd->tdc_hash_pins, hash_value, (uchar*) key, key_length); + lf_hash_search_unpin(thd->tdc_hash_pins);
How can you be sure that nobody frees your element after you unpin it?
+ DBUG_ASSERT(element); + element->dbug_release_assertions();
- share= new_share; - mysql_mutex_lock(&share->tdc.LOCK_table_share); - if (my_hash_insert(&tdc_hash, (uchar*) share)) + if (!(share= alloc_table_share(db, table_name, key, key_length))) { - mysql_mutex_unlock(&share->tdc.LOCK_table_share); - mysql_rwlock_unlock(&LOCK_tdc); - free_table_share(share); + lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length); DBUG_RETURN(0); } - need_purge= tdc_hash.records > tdc_size; - mysql_rwlock_unlock(&LOCK_tdc);
/* note that tdc_acquire_share() *always* uses discovery */ open_table_def(thd, share, flags | GTS_USE_DISCOVERY); - share->tdc.ref_count++; - mysql_mutex_unlock(&share->tdc.LOCK_table_share);
if (share->error) { - tdc_delete_share_from_hash(share); + free_table_share(share); + lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length); DBUG_RETURN(0); } - else if (need_purge) + + mysql_mutex_lock(&element->LOCK_table_share); + element->share= share; + share->tdc= element; + element->ref_count++; + element->version= tdc_refresh_version(); + element->flushed= false; + mysql_mutex_unlock(&element->LOCK_table_share); + tdc_purge(false); if (out_table) *out_table= 0; share->m_psi= PSI_CALL_get_table_share(false, share); goto end; - } - free_table_share(new_share); }
/* cannot force discovery of a cached share */ @@ -763,18 +677,25 @@ TABLE_SHARE *tdc_acquire_share(THD *thd, const char *db, const char *table_name,
if (out_table && (flags & GTS_TABLE)) { - if ((*out_table= tc_acquire_table(thd, share))) + if ((*out_table= element->acquire_table(thd))) { - mysql_rwlock_unlock(&LOCK_tdc); + lf_hash_search_unpin(thd->tdc_hash_pins);
what this lf_hash_search_unpin corresponds to?
DBUG_ASSERT(!(flags & GTS_NOLOCK)); - DBUG_ASSERT(!share->error); - DBUG_ASSERT(!share->is_view); - DBUG_RETURN(share); + DBUG_ASSERT(element->share); + DBUG_ASSERT(!element->share->error); + DBUG_ASSERT(!element->share->is_view); + DBUG_RETURN(element->share); } }
- mysql_mutex_lock(&share->tdc.LOCK_table_share); - mysql_rwlock_unlock(&LOCK_tdc); + mysql_mutex_lock(&element->LOCK_table_share); + if (!(share= element->share)) + { + mysql_mutex_unlock(&element->LOCK_table_share); + lf_hash_search_unpin(thd->tdc_hash_pins);
and here
+ goto retry; + } + lf_hash_search_unpin(thd->tdc_hash_pins);
may be that first lf_hash_search_unpin was wrong? the one that goes directly after lf_hash_search.
/* We found an existing table definition. Return it if we didn't get @@ -983,13 +864,36 @@ bool tdc_remove_table(THD *thd, enum_tdc_remove_table_type remove_type, thd->mdl_context.is_lock_owner(MDL_key::TABLE, db, table_name, MDL_EXCLUSIVE));
- if ((share= tdc_delete_share(db, table_name))) + + mysql_mutex_lock(&LOCK_unused_shares); + if (!(element= tdc_lock_share(thd, db, table_name))) { - I_P_List <TABLE, TABLE_share> purge_tables; - uint my_refs= 1; + mysql_mutex_unlock(&LOCK_unused_shares); + DBUG_ASSERT(remove_type != TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE); + DBUG_RETURN(false); + }
- mysql_mutex_lock(&share->tdc.LOCK_table_share); - tc_wait_for_mdl_deadlock_detector(share); + DBUG_ASSERT(element != MY_ERRPTR); // What can we do about it?
allocate thd->tdc_hash_pins in THD::init
+ + if (!element->ref_count) + { + if (element->prev) + { + unused_shares.remove(element); + /* Concurrent thread may start using share again, reset prev and next. */
What concurrent thread? element->ref_count is 0, you removed the the share from the unused_shares list. There can be no concurrent threads, as far as I understand. And indeed, you do remove the share from the hash (which does free_table_share) immediately below, which also proves that there can be no concurrent threads.
+ element->prev= 0; + element->next= 0; + } + mysql_mutex_unlock(&LOCK_unused_shares); + + tdc_delete_share_from_hash(element); + DBUG_RETURN(true); + } + mysql_mutex_unlock(&LOCK_unused_shares); + + element->ref_count++; + + element->wait_for_mdl_deadlock_detector(); /* Mark share flushed in order to ensure that it gets automatically deleted once it is no longer referenced.
Regards, Sergei