Hi Varun, Nice to see that most of the input has been addressed. However, the issue of missing test coverage is still not resolved: On Sat, Nov 24, 2018 at 09:47:57PM +0300, Sergey Petrunia wrote:
Please also add test coverage for this particular MDEV. This should be a testcase that shows that
- with PREFERABLY, ANALYZE will collect EITS stats, SELECT will use them.
- with PREFERABLY_FOR_READS : -- ANALYZE will not collect EITS stats -- ANALYZE .. PERSISTENT FOR ... will update the EITS stats. -- SELECT will use them.
and now it is still not addressed. I made this change: @@ -104,6 +104,8 @@ Use_stat_tables_mode get_use_stat_tables_mode(THD *thd) inline bool check_eits_collection_allowed(THD *thd) { + if (get_use_stat_tables_mode(thd) == PREFERABLY_FOR_QUERIES) + DBUG_ASSERT(0); return (get_use_stat_tables_mode(thd) == COMPLEMENTARY || get_use_stat_tables_mode(thd) == PREFERABLY); } and the testsuite still passes! On Wed, Dec 05, 2018 at 07:57:06PM +0530, Varun wrote:
revision-id: 2da93752430d0dd07344c9d62f2468bba8962b77 (mariadb-10.3.6-266-g2da93752430) parent(s): e0739064450f2c2be6d5de1d799582121747dd39 author: Varun Gupta committer: Varun Gupta timestamp: 2018-12-05 19:56:23 +0530 message:
MDEV-17255: New optimizer defaults and ANALYZE TABLE
Added to new values to the server variable use_stat_tables. The values are COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES. Both these values don't allow to collect EITS for queries like analyze table t1; To collect EITS we would need to use the syntax with persistent like analyze table t1 persistent for columns (col1,col2...) index (idx1, idx2...) / ALL
Changing the default value from NEVER to PREFERABLY_FOR_QUERIES.
--- mysql-test/include/default_mysqld.cnf | 1 + mysql-test/main/stat_tables.result | 59 +++++++++++++++++++++++++++++++ mysql-test/main/stat_tables.test | 43 ++++++++++++++++++++++ mysql-test/main/stat_tables_innodb.result | 59 +++++++++++++++++++++++++++++++ sql/sql_admin.cc | 2 +- sql/sql_statistics.cc | 5 ++- sql/sql_statistics.h | 27 ++++++++++++++ sql/sys_vars.cc | 5 +-- 8 files changed, 195 insertions(+), 6 deletions(-)
diff --git a/mysql-test/include/default_mysqld.cnf b/mysql-test/include/default_mysqld.cnf index 69a2b58288b..5ba3bdeb92c 100644 --- a/mysql-test/include/default_mysqld.cnf +++ b/mysql-test/include/default_mysqld.cnf @@ -107,6 +107,7 @@ loose-performance-schema-consumer-thread-instrumentation=ON binlog-direct-non-transactional-updates
default-storage-engine=myisam +use-stat-tables=preferably_for_queries
loose-ssl-ca=@ENV.MYSQL_TEST_DIR/std_data/cacert.pem loose-ssl-cert=@ENV.MYSQL_TEST_DIR/std_data/server-cert.pem diff --git a/mysql-test/main/stat_tables.result b/mysql-test/main/stat_tables.result index 308529ece47..67bf3f9237d 100644 --- a/mysql-test/main/stat_tables.result +++ b/mysql-test/main/stat_tables.result @@ -605,4 +605,63 @@ SELECT MAX(pk) FROM t1; MAX(pk) NULL DROP TABLE t1; +# +# MDEV-17255: New optimizer defaults and ANALYZE TABLE +# +set @save_use_stat_tables= @@use_stat_tables; +create table t1 (a int, b int); +insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10); +# +# with use_stat_tables= PREFERABLY_FOR_QUERIES +# analyze table t1 will not collect statistics +# +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL +analyze +select * from t1 where a = 1 and b=3; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where +# +# with use_stat_tables= PREFERABLY_FOR_QUERIES +# analyze table t1 will collect statistics if we use PERSISTENT +# for columns, indexes or everything +# +analyze table t1 persistent for columns (a) indexes (); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL +# filtered shows that we used the data from stat tables +analyze +select * from t1 where a = 1 and b=3; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where +# +# with use_stat_tables= PREFERABLY +# analyze table t1 will collect statistics +# +set @@use_stat_tables=PREFERABLY; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL +# filtered shows that we used the data from stat tables +analyze +select * from t1 where a=1 and b=3; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where +drop table t1; set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/main/stat_tables.test b/mysql-test/main/stat_tables.test index 19bc0fa2f46..8616f6386e1 100644 --- a/mysql-test/main/stat_tables.test +++ b/mysql-test/main/stat_tables.test @@ -384,4 +384,47 @@ SELECT MAX(pk) FROM t1;
DROP TABLE t1;
+--echo # +--echo # MDEV-17255: New optimizer defaults and ANALYZE TABLE +--echo # + +set @save_use_stat_tables= @@use_stat_tables; +create table t1 (a int, b int); +insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10); + +--echo # +--echo # with use_stat_tables= PREFERABLY_FOR_QUERIES +--echo # analyze table t1 will not collect statistics +--echo # + +analyze table t1; +select * from mysql.column_stats; +analyze +select * from t1 where a = 1 and b=3; + +--echo # +--echo # with use_stat_tables= PREFERABLY_FOR_QUERIES +--echo # analyze table t1 will collect statistics if we use PERSISTENT +--echo # for columns, indexes or everything +--echo # + +analyze table t1 persistent for columns (a) indexes (); +select * from mysql.column_stats; +--echo # filtered shows that we used the data from stat tables +analyze +select * from t1 where a = 1 and b=3; + +--echo # +--echo # with use_stat_tables= PREFERABLY +--echo # analyze table t1 will collect statistics +--echo # + +set @@use_stat_tables=PREFERABLY; +analyze table t1; +select * from mysql.column_stats; +--echo # filtered shows that we used the data from stat tables +analyze +select * from t1 where a=1 and b=3; +drop table t1; + set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/main/stat_tables_innodb.result b/mysql-test/main/stat_tables_innodb.result index 8198e94dc10..4bb8f34712c 100644 --- a/mysql-test/main/stat_tables_innodb.result +++ b/mysql-test/main/stat_tables_innodb.result @@ -632,6 +632,65 @@ SELECT MAX(pk) FROM t1; MAX(pk) NULL DROP TABLE t1; +# +# MDEV-17255: New optimizer defaults and ANALYZE TABLE +# +set @save_use_stat_tables= @@use_stat_tables; +create table t1 (a int, b int); +insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10); +# +# with use_stat_tables= PREFERABLY_FOR_QUERIES +# analyze table t1 will not collect statistics +# +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL +analyze +select * from t1 where a = 1 and b=3; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where +# +# with use_stat_tables= PREFERABLY_FOR_QUERIES +# analyze table t1 will collect statistics if we use PERSISTENT +# for columns, indexes or everything +# +analyze table t1 persistent for columns (a) indexes (); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL +# filtered shows that we used the data from stat tables +analyze +select * from t1 where a = 1 and b=3; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where +# +# with use_stat_tables= PREFERABLY +# analyze table t1 will collect statistics +# +set @@use_stat_tables=PREFERABLY; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL +# filtered shows that we used the data from stat tables +analyze +select * from t1 where a=1 and b=3; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where +drop table t1; set use_stat_tables=@save_use_stat_tables; set optimizer_switch=@save_optimizer_switch_for_stat_tables_test; SET SESSION STORAGE_ENGINE=DEFAULT; diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index d0d959de8f9..b39103e382a 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -767,7 +767,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, } collect_eis= (table->table->s->table_category == TABLE_CATEGORY_USER && - (get_use_stat_tables_mode(thd) > NEVER || + (check_eits_collection_allowed(thd) || lex->with_persistent_for_clause));
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 04806f07b3b..8c88f7f927f 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -3720,9 +3720,8 @@ void set_statistics_for_table(THD *thd, TABLE *table) { TABLE_STATISTICS_CB *stats_cb= &table->s->stats_cb; Table_statistics *read_stats= stats_cb->table_stats; - Use_stat_tables_mode use_stat_table_mode= get_use_stat_tables_mode(thd); table->used_stat_records= - (use_stat_table_mode <= COMPLEMENTARY || + (!check_eits_preferred(thd) || !table->stats_is_read || read_stats->cardinality_is_null) ? table->file->stats.records : read_stats->cardinality; KEY *key_info, *key_info_end; @@ -3730,7 +3729,7 @@ void set_statistics_for_table(THD *thd, TABLE *table) key_info < key_info_end; key_info++) { key_info->is_statistics_from_stat_tables= - (use_stat_table_mode > COMPLEMENTARY && + (check_eits_preferred(thd) && table->stats_is_read && key_info->read_stats->avg_frequency_is_inited() && key_info->read_stats->get_avg_frequency(0) > 0.5); diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 39cddf95188..8439ac8db53 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -16,12 +16,26 @@ #ifndef SQL_STATISTICS_H #define SQL_STATISTICS_H
+/* + For COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES they are + similar to the COMPLEMENTARY and PREFERABLY respectively except that + with these values we would not be collecting EITS for queries like + ANALYZE TABLE t1; + To collect EITS with these values, we have to use PERSISITENT FOR + analyze table t1 persistent for + columns (col1,col2...) index (idx1, idx2...) + or + analyze table t1 persistent for all +*/ + typedef enum enum_use_stat_tables_mode { NEVER, COMPLEMENTARY, PREFERABLY, + COMPLEMENTARY_FOR_QUERIES, + PREFERABLY_FOR_QUERIES } Use_stat_tables_mode;
typedef @@ -87,6 +101,19 @@ Use_stat_tables_mode get_use_stat_tables_mode(THD *thd) { return (Use_stat_tables_mode) (thd->variables.use_stat_tables); } +inline +bool check_eits_collection_allowed(THD *thd) +{ + return (get_use_stat_tables_mode(thd) == COMPLEMENTARY || + get_use_stat_tables_mode(thd) == PREFERABLY); +} + +inline +bool check_eits_preferred(THD *thd) +{ + return (get_use_stat_tables_mode(thd) == PREFERABLY || + get_use_stat_tables_mode(thd) == PREFERABLY_FOR_QUERIES); +}
int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables); int collect_statistics_for_table(THD *thd, TABLE *table); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 420e7feabde..b925de34db4 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -5842,12 +5842,13 @@ static Sys_var_ulong Sys_progress_report_time( VALID_RANGE(0, UINT_MAX), DEFAULT(5), BLOCK_SIZE(1));
const char *use_stat_tables_modes[] = - {"NEVER", "COMPLEMENTARY", "PREFERABLY", 0}; + {"NEVER", "COMPLEMENTARY", "PREFERABLY", + "COMPLEMENTARY_FOR_QUERIES", "PREFERABLY_FOR_QUERIES", 0}; static Sys_var_enum Sys_optimizer_use_stat_tables( "use_stat_tables", "Specifies how to use system statistics tables", SESSION_VAR(use_stat_tables), CMD_LINE(REQUIRED_ARG), - use_stat_tables_modes, DEFAULT(2)); + use_stat_tables_modes, DEFAULT(4));
static Sys_var_ulong Sys_histogram_size( "histogram_size", _______________________________________________ commits mailing list commits@mariadb.org https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits
-- BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog