Hi Sergei!

Can you review that you are happy with the storage engine API changes? I'veustructured the commit to be as small as possible to achieve the desired outcome. In my tests, we are now twice as fast as MySQL for a 10 mil row table with 13 columns.

Vicențiu

-------- Forwarded Message --------
From: vicentiu@mariadb.org
To: commits@mariadb.org
Subject: 53730224efd: Improve histogram collection performance by sampling
Date: Sun, 10 Feb 2019 20:09:49 +0200 (EET)

revision-id: 53730224efd987f97a6cc968ff5214ee499d84e0 (mariadb-10.4.1-163-g53730224efd)
parent(s): 3c305d3f1951f1667f84e48ddd98674c6318c39d
author: Vicențiu Ciorbaru
committer: Vicențiu Ciorbaru
timestamp: 2019-02-10 19:54:50 +0200
message:

Improve histogram collection performance by sampling

Histogram collection is done by sampling a percentage of rows from the table,
not looking at all individual ones.

The default implementation, to keep the server's Engine Indepenent
Statistics component working uses Bernoulli sampling. It does a simple
table scan, but only accepts rows that pass a dice roll. This
implementation is done as a storage engine interface method, so as to
allow faster and/or more efficient implementations for storage engines
internally.

The number of rows collected is capped to a minimum of 50000 and
increases logarithmically with a coffecient of 4096. The coffecient is
chosen so that we expect an error of less than 3% in our estimations
according to the paper:
"Random Sampling for Histogram Construction: How much is enough?”
– Surajit Chaudhuri, Rajeev Motwani, Vivek Narasayya, ACM SIGMOD, 1998.

This interface is also a precursor to allowing SELECT ... FROM table
with sampling to work.

Performance wise, for a table of 10 million rows and 13 columns, 6 int,
6 doubles, one string, the previous analyze table statistics took
1 minute 20 seconds to collect all data. Post implementation, the
time is less than 6 seconds. This was run on an InnoDB table, NVME SSD with
approximately 2GB/s read speed and 500MB/s write speed.

---
 mysql-test/main/selectivity.result        |  8 +++----
 mysql-test/main/selectivity_innodb.result |  8 +++----
 sql/handler.cc                            | 14 +++++++++++
 sql/handler.h                             | 40 ++++++++++++++++++++++++++++++-
 sql/sql_statistics.cc                     | 32 +++++++++++++++++++------
 5 files changed, 86 insertions(+), 16 deletions(-)

diff --git a/mysql-test/main/selectivity.result b/mysql-test/main/selectivity.result
index 00907235ecc..6d09c1c9b62 100644
--- a/mysql-test/main/selectivity.result
+++ b/mysql-test/main/selectivity.result
@@ -1290,9 +1290,9 @@ explain extended
 select * from t1, t2, t1 as t3
 where t1.b=t2.c and t2.d=t3.a and t3.b<5 and t1.a < 2000;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	262144	100.00	Using where
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	262117	100.00	Using where
 1	SIMPLE	t2	ref	c,d	c	5	test.t1.b	5	100.00	
-1	SIMPLE	t3	ALL	NULL	NULL	NULL	NULL	262144	100.00	Using where; Using join buffer (flat, BNL join)
+1	SIMPLE	t3	ALL	NULL	NULL	NULL	NULL	262117	100.00	Using where; Using join buffer (flat, BNL join)
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d`,`test`.`t3`.`a` AS `a`,`test`.`t3`.`b` AS `b` from `test`.`t1` join `test`.`t2` join `test`.`t1` `t3` where `test`.`t2`.`c` = `test`.`t1`.`b` and `test`.`t3`.`a` = `test`.`t2`.`d` and `test`.`t3`.`b` < 5 and `test`.`t1`.`a` < 2000
 select * from t1, t2, t1 as t3
@@ -1307,9 +1307,9 @@ explain extended
 select * from t1, t2, t1 as t3
 where t1.b=t2.c and t2.d=t3.a and t3.b<5 and t1.a < 2000;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t3	ALL	NULL	NULL	NULL	NULL	262144	0.00	Using where
+1	SIMPLE	t3	ALL	NULL	NULL	NULL	NULL	262117	0.00	Using where
 1	SIMPLE	t2	ref	c,d	d	5	test.t3.a	7	100.00	
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	262144	2.00	Using where; Using join buffer (flat, BNL join)
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	262117	2.00	Using where; Using join buffer (flat, BNL join)
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d`,`test`.`t3`.`a` AS `a`,`test`.`t3`.`b` AS `b` from `test`.`t1` join `test`.`t2` join `test`.`t1` `t3` where `test`.`t1`.`b` = `test`.`t2`.`c` and `test`.`t2`.`d` = `test`.`t3`.`a` and `test`.`t3`.`b` < 5 and `test`.`t1`.`a` < 2000
 select * from t1, t2, t1 as t3
diff --git a/mysql-test/main/selectivity_innodb.result b/mysql-test/main/selectivity_innodb.result
index 93917065722..0b20a40f69f 100644
--- a/mysql-test/main/selectivity_innodb.result
+++ b/mysql-test/main/selectivity_innodb.result
@@ -1300,9 +1300,9 @@ explain extended
 select * from t1, t2, t1 as t3
 where t1.b=t2.c and t2.d=t3.a and t3.b<5 and t1.a < 2000;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	262144	100.00	Using where
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	262117	100.00	Using where
 1	SIMPLE	t2	ref	c,d	c	5	test.t1.b	5	100.00	
-1	SIMPLE	t3	ALL	NULL	NULL	NULL	NULL	262144	100.00	Using where; Using join buffer (flat, BNL join)
+1	SIMPLE	t3	ALL	NULL	NULL	NULL	NULL	262117	100.00	Using where; Using join buffer (flat, BNL join)
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d`,`test`.`t3`.`a` AS `a`,`test`.`t3`.`b` AS `b` from `test`.`t1` join `test`.`t2` join `test`.`t1` `t3` where `test`.`t2`.`c` = `test`.`t1`.`b` and `test`.`t3`.`a` = `test`.`t2`.`d` and `test`.`t3`.`b` < 5 and `test`.`t1`.`a` < 2000
 select * from t1, t2, t1 as t3
@@ -1317,9 +1317,9 @@ explain extended
 select * from t1, t2, t1 as t3
 where t1.b=t2.c and t2.d=t3.a and t3.b<5 and t1.a < 2000;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
-1	SIMPLE	t3	ALL	NULL	NULL	NULL	NULL	262144	0.00	Using where
+1	SIMPLE	t3	ALL	NULL	NULL	NULL	NULL	262117	0.00	Using where
 1	SIMPLE	t2	ref	c,d	d	5	test.t3.a	7	100.00	
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	262144	2.00	Using where; Using join buffer (flat, BNL join)
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	262117	2.00	Using where; Using join buffer (flat, BNL join)
 Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a`,`test`.`t1`.`b` AS `b`,`test`.`t2`.`c` AS `c`,`test`.`t2`.`d` AS `d`,`test`.`t3`.`a` AS `a`,`test`.`t3`.`b` AS `b` from `test`.`t1` join `test`.`t2` join `test`.`t1` `t3` where `test`.`t1`.`b` = `test`.`t2`.`c` and `test`.`t2`.`d` = `test`.`t3`.`a` and `test`.`t3`.`b` < 5 and `test`.`t1`.`a` < 2000
 select * from t1, t2, t1 as t3
diff --git a/sql/handler.cc b/sql/handler.cc
index 2e45b5883ea..16fb126918e 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -7547,3 +7547,17 @@ bool Vers_parse_info::check_sys_fields(const Lex_table_name &table_name,
            "ROW END" : found_flag ? "ROW START" : "ROW START/END");
   return true;
 }
+
+int handler::random_sample(uchar *buf)
+{
+  int rc;
+  THD *thd= ha_thd();
+  do
+  {
+    if (table->in_use->check_killed(1))
+      return HA_ERR_ABORTED_BY_USER;
+    rc= rnd_next(buf);
+  } while (rc == HA_ERR_RECORD_DELETED || thd_rnd(thd) > sample_fraction);
+
+  return rc;
+}
diff --git a/sql/handler.h b/sql/handler.h
index dfb2333b24e..fcdadbeb42b 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -1913,6 +1913,11 @@ enum enum_stats_auto_recalc { HA_STATS_AUTO_RECALC_DEFAULT= 0,
                               HA_STATS_AUTO_RECALC_ON,
                               HA_STATS_AUTO_RECALC_OFF };
 
+enum sample_mode {
+  HA_SAMPLE_BERNOULLI= 0,
+  HA_SAMPLE_SYSTEM
+};
+
 /**
   A helper struct for schema DDL statements:
     CREATE SCHEMA [IF NOT EXISTS] name [ schema_specification... ]
@@ -2947,9 +2952,11 @@ class handler :public Sql_alloc
   /** Length of ref (1-8 or the clustered key length) */
   uint ref_length;
   FT_INFO *ft_handler;
-  enum init_stat { NONE=0, INDEX, RND };
+  enum init_stat { NONE=0, INDEX, RND, SAMPLE };
   init_stat inited, pre_inited;
 
+  double sample_fraction= 0;
+  enum sample_mode sample_mode;
   const COND *pushed_cond;
   /**
     next_insert_id is the next value which should be inserted into the
@@ -3112,6 +3119,31 @@ class handler :public Sql_alloc
   virtual int prepare_range_scan(const key_range *start_key, const key_range *end_key)
   { return 0; }
 
+  int ha_random_sample_init(THD *thd, enum sample_mode mode, double fraction)
+    __attribute__((warn_unused_result))
+  {
+    DBUG_ENTER("ha_random_sample_init");
+    DBUG_ASSERT(inited==NONE);
+    int result;
+    sample_mode= mode;
+    sample_fraction= fraction;
+    inited= (result= random_sample_init(mode, fraction)) ? NONE : SAMPLE;
+    DBUG_RETURN(result);
+  }
+  int ha_random_sample(uchar *buf)
+    __attribute__((warn_unused_result))
+  {
+    DBUG_ENTER("ha_random_sample");
+    DBUG_ASSERT(inited == SAMPLE);
+    DBUG_RETURN(random_sample(buf));
+  }
+  int ha_random_sample_end()
+  {
+    DBUG_ENTER("ha_random_sample_end");
+    inited= NONE;
+    DBUG_RETURN(random_sample_end());
+  }
+
   int ha_rnd_init(bool scan) __attribute__ ((warn_unused_result))
   {
     DBUG_EXECUTE_IF("ha_rnd_init_fail", return HA_ERR_TABLE_DEF_CHANGED;);
@@ -4425,6 +4457,12 @@ class handler :public Sql_alloc
   /* Note: ha_index_read_idx_map() may bypass index_init() */
   virtual int index_init(uint idx, bool sorted) { return 0; }
   virtual int index_end() { return 0; }
+  virtual int random_sample_init(enum sample_mode mode, double fraction)
+  {
+    return rnd_init(TRUE);
+  }
+  virtual int random_sample(uchar *buf);
+  virtual int random_sample_end() { return rnd_end(); }
   /**
     rnd_init() can be called two times without rnd_end() in between
     (it only makes sense if scan=1).
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index db214a1fe28..22a015821de 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -2727,12 +2727,16 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
   Field *table_field;
   ha_rows rows= 0;
   handler *file=table->file;
+  double sample_fraction;
+  const ha_rows MIN_THRESHOLD_FOR_SAMPLING= 50000;
 
   DBUG_ENTER("collect_statistics_for_table");
 
   table->collected_stats->cardinality_is_null= TRUE;
   table->collected_stats->cardinality= 0;
 
+  table->file->info(HA_STATUS_VARIABLE);
+
   for (field_ptr= table->field; *field_ptr; field_ptr++)
   {
     table_field= *field_ptr;   
@@ -2743,12 +2747,27 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
 
   restore_record(table, s->default_values);
 
-  /* Perform a full table scan to collect statistics on 'table's columns */
-  if (!(rc= file->ha_rnd_init(TRUE)))
-  {  
+
+  if (file->records() < MIN_THRESHOLD_FOR_SAMPLING)
+  {
+    sample_fraction= 1;
+  }
+  else
+  {
+    sample_fraction= std::fmin(
+                (MIN_THRESHOLD_FOR_SAMPLING + 4096 *
+                 log(200 * file->records())) / file->records(), 1);
+  }
+
+
+  /* Fetch samples from the table to collect statistics on table's columns */
+
+  if (!(rc= file->ha_random_sample_init(thd, HA_SAMPLE_BERNOULLI,
+                                        sample_fraction)))
+  {
     DEBUG_SYNC(table->in_use, "statistics_collection_start");
 
-    while ((rc= file->ha_rnd_next(table->record[0])) != HA_ERR_END_OF_FILE)
+    while ((rc= file->ha_random_sample(table->record[0])) != HA_ERR_END_OF_FILE)
     {
       if (thd->killed)
         break;
@@ -2768,10 +2787,9 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
         break;
       rows++;
     }
-    file->ha_rnd_end();
+    file->ha_random_sample_end();
   }
   rc= (rc == HA_ERR_END_OF_FILE && !thd->killed) ? 0 : 1;
-
   /* 
     Calculate values for all statistical characteristics on columns and
     and for each field f of 'table' save them in the write_stat structure
@@ -2780,7 +2798,7 @@ int collect_statistics_for_table(THD *thd, TABLE *table)
   if (!rc)
   {
     table->collected_stats->cardinality_is_null= FALSE;
-    table->collected_stats->cardinality= rows;
+    table->collected_stats->cardinality= rows / sample_fraction;
   }
 
   bitmap_clear_all(table->write_set);