New subject: [Maria-developers] 48d1ba6: MDEV-10296 - Multi-instance table cache

2 Aug 2016

Hi, Sergey!

Ok, see comments below.

But why is this supposed to improve scalability?
Are you talking about the use case of one table being access by multiple
threads? Or this helps in other cases too?
And what is the effect of the more expensive tc_remove_all_unused_tables() ?

On Jun 29, Sergey Vojtovich wrote:
...
revision-id: 48d1ba6097939efe5efd28dd3ed9d281cc2bc2f4 (mariadb-10.2.0-93-g48d1ba6)
parent(s): 8bec9746f0d5b363f385713035ca3f2daff34e1c
committer: Sergey Vojtovich
timestamp: 2016-06-29 16:33:08 +0400
message:
MDEV-10296 - Multi-instance table cache
Improve scalability by implementing multi-instance table cache.

diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
index e075c64..22b12a9 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
@@ -4559,6 +4559,20 @@ NUMERIC_BLOCK_SIZE	1
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
+VARIABLE_NAME	TABLE_OPEN_CACHE_INSTANCES
+SESSION_VALUE	NULL
+GLOBAL_VALUE	1
+GLOBAL_VALUE_ORIGIN	COMPILE-TIME
Default value 1 and it's not auto-tuned on startup?
You think multi-instance table cache is better stay unused? :)
If not, either use a reasonable default or, may be, choose it on
startup based on my_gencpus()
...
+DEFAULT_VALUE	1
+VARIABLE_SCOPE	GLOBAL
+VARIABLE_TYPE	BIGINT UNSIGNED
+VARIABLE_COMMENT	The number of table cache instances
+NUMERIC_MIN_VALUE	1
+NUMERIC_MAX_VALUE	64
+NUMERIC_BLOCK_SIZE	1
+ENUM_VALUE_LIST	NULL
+READ_ONLY	YES
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	THREAD_CACHE_SIZE
 SESSION_VALUE	NULL
 GLOBAL_VALUE	151
diff --git a/sql/table_cache.cc b/sql/table_cache.cc
index bfe51df..f1eb006 100644
--- a/sql/table_cache.cc
+++ b/sql/table_cache.cc
@@ -125,67 +114,101 @@ static int fix_thd_pins(THD *thd)
   part of table definition cache.
 */
+struct Share_free_tables
 {
+  typedef I_P_List <TABLE, TABLE_share> List;
+  List list;
+  char pad[CPU_LEVEL1_DCACHE_LINESIZE];
a comment, please, explaining the padding.
Or see a suggestion at the end.
...
+};
+
+struct Table_cache_instance
+{
+  /**
+    Protects free_tables (TABLE::global_free_next and TABLE::global_free_prev),
+    records, Share_free_tables::List (TABLE::prev and TABLE::next),
+    TABLE::in_use.
+  */
+  mysql_mutex_t LOCK_table_cache;
+  I_P_List <TABLE, I_P_List_adapter<TABLE, &TABLE::global_free_next,
+                                    &TABLE::global_free_prev>,
+            I_P_List_null_counter, I_P_List_fast_push_back<TABLE> >
+    free_tables;
+  ulong records;
+  char pad[CPU_LEVEL1_DCACHE_LINESIZE];
why is it padded?
...
+
+  Table_cache_instance(): records(0)
   {
+    mysql_mutex_init(key_LOCK_table_cache, &LOCK_table_cache,
+                     MY_MUTEX_INIT_FAST);
   }
+
+  ~Table_cache_instance()
+  {
+    mysql_mutex_destroy(&LOCK_table_cache);
+    DBUG_ASSERT(free_tables.is_empty());
+    DBUG_ASSERT(records == 0);
+  }
+};
+static Table_cache_instance *tc;
+
+static inline Share_free_tables *SHARE_FREE_TABLES(TDC_element *element)
fairly unconventional letter case choice :)
Besides, instead of pointer arithmetics, you could've simply put


   Share_free_tables free_tables[1];

at the end of TDC_element.
...
{
+  return (Share_free_tables*) (element + 1);
 }
+static void intern_close_table(TABLE *table)
+{
+  delete table->triggers;
+  DBUG_ASSERT(table->file);
old intern_close_table had

  if (table->file) // Not true if placeholder

do you mean, these placeholder TABLEs can no longer be in the table
cache?
...
+  closefrm(table);
+  tdc_release_share(table->s);
old intern_close_table() also did table->alias.free();
...
+  my_free(table);
+}
+
+/**
+  Get number of TABLE objects (used and unused) in table cache.
 */
+uint tc_records(void)
 {
+  ulong total= 0;
+  for (ulong i= 0; i < tc_instances; i++)
+  {
+    mysql_mutex_lock(&tc[i].LOCK_table_cache);
+    total+= tc[i].records;
+    mysql_mutex_unlock(&tc[i].LOCK_table_cache);
+  }
+  return total;
 }
/**
   Remove TABLE object from table cache.
 */
static void tc_remove_table(TABLE *table)
 {
+  TDC_element *element= table->s->tdc;
+
+  mysql_mutex_lock(&element->LOCK_table_share);
+  /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
+  while (element->all_tables_refs)
+    mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
+  element->all_tables.remove(table);
+  mysql_mutex_unlock(&element->LOCK_table_share);
+
+  intern_close_table(table);
 }
static void tc_remove_all_unused_tables(TDC_element *element,
+                                        Share_free_tables::List *purge_tables,
                                         bool mark_flushed)
 {
   TABLE *table;
@@ -200,10 +223,18 @@ static void tc_remove_all_unused_tables(TDC_element *element,
   */
   if (mark_flushed)
     element->flushed= true;
+  for (ulong i= 0; i < tc_instances; i++)
   {
+    mysql_mutex_lock(&tc[i].LOCK_table_cache);
+    while ((table= SHARE_FREE_TABLES(element)[i].list.pop_front()))
+    {
+      tc[i].records--;
+      tc[i].free_tables.remove(table);
+      DBUG_ASSERT(element->all_tables_refs == 0);
+      element->all_tables.remove(table);
+      purge_tables->push_front(table);
+    }
+    mysql_mutex_unlock(&tc[i].LOCK_table_cache);
   }
 }
@@ -225,7 +256,7 @@ static void tc_remove_all_unused_tables(TDC_element *element,
struct tc_purge_arg
 {
+  Share_free_tables::List purge_tables;
   bool mark_flushed;
 };
@@ -281,79 +298,33 @@ static TABLE *tc_free_tables_back(TDC_element *element)
   - free evicted object
 */
+void tc_add_table(THD *thd, TABLE *table)
 {
+  ulong i= thd->thread_id % tc_instances;
+  TABLE *LRU_table= 0;
+  TDC_element *element= table->s->tdc;
+  DBUG_ASSERT(table->in_use == thd);
   mysql_mutex_lock(&element->LOCK_table_share);
+  /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
+  while (element->all_tables_refs)
+    mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
+  element->all_tables.push_front(table);
   mysql_mutex_unlock(&element->LOCK_table_share);
+  mysql_mutex_lock(&tc[i].LOCK_table_cache);
+  if (tc[i].records == tc_size && (LRU_table= tc[i].free_tables.pop_front()))
   {
+    SHARE_FREE_TABLES(LRU_table->s->tdc)[i].list.remove(LRU_table);
+    /* Needed if MDL deadlock detector chimes in before tc_remove_table() */
+    LRU_table->in_use= thd;
How can MDL deadlock detector get to the unused table?  It traverses the
lock dependency graph, and an unused table cannot be locked, can it?
...
}
+  else
+    tc[i].records++;
+  mysql_mutex_unlock(&tc[i].LOCK_table_cache);
+
+  if (LRU_table)
+    tc_remove_table(LRU_table);
 }
@@ -369,10 +340,11 @@ void tc_add_table(THD *thd, TABLE *table)
static TABLE *tc_acquire_table(THD *thd, TDC_element *element)
 {
+  ulong i= thd->thread_id % tc_instances;
   TABLE *table;
-  mysql_mutex_lock(&element->LOCK_table_share);
-  table= element->free_tables.pop_front();
+  mysql_mutex_lock(&tc[i].LOCK_table_cache);
+  table= SHARE_FREE_TABLES(element)[i].list.pop_front();
   if (table)
   {
     DBUG_ASSERT(!table->in_use);
@@ -381,8 +353,9 @@ static TABLE *tc_acquire_table(THD *thd, TDC_element *element)
     DBUG_ASSERT(table->db_stat && table->file);
     /* The children must be detached from the table. */
     DBUG_ASSERT(!table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN));
+    tc[i].free_tables.remove(table);
   }
-  mysql_mutex_unlock(&element->LOCK_table_share);
+  mysql_mutex_unlock(&tc[i].LOCK_table_cache);
   return table;
 }
@@ -413,40 +386,27 @@ static TABLE *tc_acquire_table(THD *thd, TDC_element *element)
     @retval false object released
 */
-bool tc_release_table(TABLE *table)
+void tc_release_table(TABLE *table)
 {
+  ulong i= table->in_use->thread_id % tc_instances;
   DBUG_ASSERT(table->in_use);
   DBUG_ASSERT(table->file);
-  if (table->needs_reopen() || tc_records() > tc_size)
+  mysql_mutex_lock(&tc[i].LOCK_table_cache);
+  if (table->needs_reopen() || table->s->tdc->flushed ||
+      tc[i].records > tc_size)
   {
-    mysql_mutex_lock(&table->s->tdc->LOCK_table_share);
-    goto purge;
+    tc[i].records--;
+    mysql_mutex_unlock(&tc[i].LOCK_table_cache);
+    tc_remove_table(table);
Does it mean that if the table cache is full, you free the table,
instead of adding it to the cache? That's not very LRU-ish
...
+  }
+  else
+  {
+    table->in_use= 0;
+    SHARE_FREE_TABLES(table->s->tdc)[i].list.push_front(table);
+    tc[i].free_tables.push_back(table);
+    mysql_mutex_unlock(&tc[i].LOCK_table_cache);
   }
 }
@@ -573,23 +535,29 @@ static uchar *tdc_hash_key(const TDC_element *element, size_t *length,
   Initialize table definition cache.
 */
-void tdc_init(void)
+bool tdc_init(void)
 {
   DBUG_ENTER("tdc_init");
 #ifdef HAVE_PSI_INTERFACE
-  init_tc_psi_keys();
+  mysql_mutex_register("sql", all_tc_mutexes, array_elements(all_tc_mutexes));
+  mysql_cond_register("sql", all_tc_conds, array_elements(all_tc_conds));
 #endif
+  /* Extra instance is allocated to avoid false sharing */
+  if (!(tc= new Table_cache_instance[tc_instances + 1]))
+    DBUG_RETURN(true);
   tdc_inited= true;
   mysql_mutex_init(key_LOCK_unused_shares, &LOCK_unused_shares,
                    MY_MUTEX_INIT_FAST);
   tdc_version= 1L;  /* Increments on each reload */
-  lf_hash_init(&tdc_hash, sizeof(TDC_element), LF_HASH_UNIQUE, 0, 0,
+  lf_hash_init(&tdc_hash, sizeof(TDC_element) +
+                          sizeof(Share_free_tables) * tc_instances,
+               LF_HASH_UNIQUE, 0, 0,
                (my_hash_get_key) tdc_hash_key,
                &my_charset_bin);
   tdc_hash.alloc.constructor= lf_alloc_constructor;
   tdc_hash.alloc.destructor= lf_alloc_destructor;
   tdc_hash.initializer= (lf_hash_initializer) tdc_hash_initializer;
-  DBUG_VOID_RETURN;
+  DBUG_RETURN(false);
 }
@@ -631,6 +599,7 @@ void tdc_deinit(void)
     tdc_inited= false;
     lf_hash_destroy(&tdc_hash);
     mysql_mutex_destroy(&LOCK_unused_shares);
+    delete [] tc;
   }
   DBUG_VOID_RETURN;
 }
@@ -1038,7 +1007,7 @@ bool tdc_remove_table(THD *thd, enum_tdc_remove_table_type remove_type,
                       const char *db, const char *table_name,
                       bool kill_delayed_threads)
 {
-  TDC_element::TABLE_list purge_tables;
+  Share_free_tables::List purge_tables;
   TABLE *table;
   TDC_element *element;
   uint my_refs= 1;
diff --git a/sql/table_cache.h b/sql/table_cache.h
index c05baae..4f449cb 100644
--- a/sql/table_cache.h
+++ b/sql/table_cache.h
@@ -45,7 +43,7 @@ struct TDC_element
     for this share.
   */
   All_share_tables_list all_tables;
-  TABLE_list free_tables;
+  char pad[CPU_LEVEL1_DCACHE_LINESIZE]; // free_tables follows this immediately
hmm. may be you'd rather put padding in one place - at the beginning of
Share_free_tables?
...
};
Regards,
Sergei
Chief Architect MariaDB
and security@mariadb.org

    

Re: [Maria-developers] 48d1ba6: MDEV-10296 - Multi-instance table cache

Sergei Golubchik

Sergey Vojtovich

Sergei Golubchik

Sergey Vojtovich

tags

participants (2)