revision-id: 35cd9eb6c5d8a0922b1544736320f013a485395d (mariadb-10.3.6-266-g35cd9eb6c5d) parent(s): e0739064450f2c2be6d5de1d799582121747dd39 author: Varun Gupta committer: Varun Gupta timestamp: 2018-12-07 02:56:18 +0530 message: MDEV-17255: New optimizer defaults and ANALYZE TABLE Added to new values to the server variable use_stat_tables. The values are COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES. Both these values don't allow to collect EITS for queries like analyze table t1; To collect EITS we would need to use the syntax with persistent like analyze table t1 persistent for columns (col1,col2...) index (idx1, idx2...) / ALL Changing the default value from NEVER to PREFERABLY_FOR_QUERIES. --- mysql-test/main/stat_tables.result | 56 +++++++++++++++++++++++++++++++ mysql-test/main/stat_tables.test | 42 +++++++++++++++++++++++ mysql-test/main/stat_tables_innodb.result | 56 +++++++++++++++++++++++++++++++ sql/sql_admin.cc | 2 +- sql/sql_statistics.cc | 5 ++- sql/sql_statistics.h | 27 +++++++++++++++ sql/sys_vars.cc | 5 +-- 7 files changed, 187 insertions(+), 6 deletions(-) diff --git a/mysql-test/main/stat_tables.result b/mysql-test/main/stat_tables.result index 308529ece47..e7ace3dd85a 100644 --- a/mysql-test/main/stat_tables.result +++ b/mysql-test/main/stat_tables.result @@ -606,3 +606,59 @@ MAX(pk) NULL DROP TABLE t1; set use_stat_tables=@save_use_stat_tables; +# +# MDEV-17255: New optimizer defaults and ANALYZE TABLE +# +create table t1 (a int, b int); +insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10); +set use_stat_tables= preferably_for_queries; +# +# with use_stat_tables= PREFERABLY_FOR_QUERIES +# analyze table t1 will not collect statistics +# +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +analyze +select * from t1 where a = 1 and b=3; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 100.00 10.00 Using where +# +# with use_stat_tables= PREFERABLY_FOR_QUERIES +# analyze table t1 will collect statistics if we use PERSISTENT +# for columns, indexes or everything +# +analyze table t1 persistent for columns (a) indexes (); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL +# filtered shows that we used the data from stat tables +analyze +select * from t1 where a = 1 and b=3; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 25.00 10.00 Using where +# +# with use_stat_tables= PREFERABLY +# analyze table t1 will collect statistics +# +set use_stat_tables=PREFERABLY; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status Table is already up to date +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL +# filtered shows that we used the data from stat tables +analyze +select * from t1 where a=1 and b=3; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where +drop table t1; +set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/main/stat_tables.test b/mysql-test/main/stat_tables.test index 19bc0fa2f46..3e6e2cf3093 100644 --- a/mysql-test/main/stat_tables.test +++ b/mysql-test/main/stat_tables.test @@ -383,5 +383,47 @@ CREATE OR REPLACE TABLE t1 (pk INT PRIMARY KEY, t TEXT); SELECT MAX(pk) FROM t1; DROP TABLE t1; +set use_stat_tables=@save_use_stat_tables; + +--echo # +--echo # MDEV-17255: New optimizer defaults and ANALYZE TABLE +--echo # + +create table t1 (a int, b int); +insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10); +set use_stat_tables= preferably_for_queries; +--echo # +--echo # with use_stat_tables= PREFERABLY_FOR_QUERIES +--echo # analyze table t1 will not collect statistics +--echo # + +analyze table t1; +select * from mysql.column_stats; +analyze +select * from t1 where a = 1 and b=3; +--echo # +--echo # with use_stat_tables= PREFERABLY_FOR_QUERIES +--echo # analyze table t1 will collect statistics if we use PERSISTENT +--echo # for columns, indexes or everything +--echo # + +analyze table t1 persistent for columns (a) indexes (); +select * from mysql.column_stats; +--echo # filtered shows that we used the data from stat tables +analyze +select * from t1 where a = 1 and b=3; + +--echo # +--echo # with use_stat_tables= PREFERABLY +--echo # analyze table t1 will collect statistics +--echo # + +set use_stat_tables=PREFERABLY; +analyze table t1; +select * from mysql.column_stats; +--echo # filtered shows that we used the data from stat tables +analyze +select * from t1 where a=1 and b=3; +drop table t1; set use_stat_tables=@save_use_stat_tables; diff --git a/mysql-test/main/stat_tables_innodb.result b/mysql-test/main/stat_tables_innodb.result index 8198e94dc10..369d85da985 100644 --- a/mysql-test/main/stat_tables_innodb.result +++ b/mysql-test/main/stat_tables_innodb.result @@ -633,5 +633,61 @@ MAX(pk) NULL DROP TABLE t1; set use_stat_tables=@save_use_stat_tables; +# +# MDEV-17255: New optimizer defaults and ANALYZE TABLE +# +create table t1 (a int, b int); +insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10); +set use_stat_tables= preferably_for_queries; +# +# with use_stat_tables= PREFERABLY_FOR_QUERIES +# analyze table t1 will not collect statistics +# +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +analyze +select * from t1 where a = 1 and b=3; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 100.00 10.00 Using where +# +# with use_stat_tables= PREFERABLY_FOR_QUERIES +# analyze table t1 will collect statistics if we use PERSISTENT +# for columns, indexes or everything +# +analyze table t1 persistent for columns (a) indexes (); +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL +# filtered shows that we used the data from stat tables +analyze +select * from t1 where a = 1 and b=3; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 25.00 10.00 Using where +# +# with use_stat_tables= PREFERABLY +# analyze table t1 will collect statistics +# +set use_stat_tables=PREFERABLY; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status Engine-independent statistics collected +test.t1 analyze status OK +select * from mysql.column_stats; +db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram +test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL +test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL +# filtered shows that we used the data from stat tables +analyze +select * from t1 where a=1 and b=3; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where +drop table t1; +set use_stat_tables=@save_use_stat_tables; set optimizer_switch=@save_optimizer_switch_for_stat_tables_test; SET SESSION STORAGE_ENGINE=DEFAULT; diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc index d0d959de8f9..b39103e382a 100644 --- a/sql/sql_admin.cc +++ b/sql/sql_admin.cc @@ -767,7 +767,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables, } collect_eis= (table->table->s->table_category == TABLE_CATEGORY_USER && - (get_use_stat_tables_mode(thd) > NEVER || + (check_eits_collection_allowed(thd) || lex->with_persistent_for_clause)); diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 04806f07b3b..8c88f7f927f 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -3720,9 +3720,8 @@ void set_statistics_for_table(THD *thd, TABLE *table) { TABLE_STATISTICS_CB *stats_cb= &table->s->stats_cb; Table_statistics *read_stats= stats_cb->table_stats; - Use_stat_tables_mode use_stat_table_mode= get_use_stat_tables_mode(thd); table->used_stat_records= - (use_stat_table_mode <= COMPLEMENTARY || + (!check_eits_preferred(thd) || !table->stats_is_read || read_stats->cardinality_is_null) ? table->file->stats.records : read_stats->cardinality; KEY *key_info, *key_info_end; @@ -3730,7 +3729,7 @@ void set_statistics_for_table(THD *thd, TABLE *table) key_info < key_info_end; key_info++) { key_info->is_statistics_from_stat_tables= - (use_stat_table_mode > COMPLEMENTARY && + (check_eits_preferred(thd) && table->stats_is_read && key_info->read_stats->avg_frequency_is_inited() && key_info->read_stats->get_avg_frequency(0) > 0.5); diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 39cddf95188..8439ac8db53 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -16,12 +16,26 @@ #ifndef SQL_STATISTICS_H #define SQL_STATISTICS_H +/* + For COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES they are + similar to the COMPLEMENTARY and PREFERABLY respectively except that + with these values we would not be collecting EITS for queries like + ANALYZE TABLE t1; + To collect EITS with these values, we have to use PERSISITENT FOR + analyze table t1 persistent for + columns (col1,col2...) index (idx1, idx2...) + or + analyze table t1 persistent for all +*/ + typedef enum enum_use_stat_tables_mode { NEVER, COMPLEMENTARY, PREFERABLY, + COMPLEMENTARY_FOR_QUERIES, + PREFERABLY_FOR_QUERIES } Use_stat_tables_mode; typedef @@ -87,6 +101,19 @@ Use_stat_tables_mode get_use_stat_tables_mode(THD *thd) { return (Use_stat_tables_mode) (thd->variables.use_stat_tables); } +inline +bool check_eits_collection_allowed(THD *thd) +{ + return (get_use_stat_tables_mode(thd) == COMPLEMENTARY || + get_use_stat_tables_mode(thd) == PREFERABLY); +} + +inline +bool check_eits_preferred(THD *thd) +{ + return (get_use_stat_tables_mode(thd) == PREFERABLY || + get_use_stat_tables_mode(thd) == PREFERABLY_FOR_QUERIES); +} int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables); int collect_statistics_for_table(THD *thd, TABLE *table); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 420e7feabde..b925de34db4 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -5842,12 +5842,13 @@ static Sys_var_ulong Sys_progress_report_time( VALID_RANGE(0, UINT_MAX), DEFAULT(5), BLOCK_SIZE(1)); const char *use_stat_tables_modes[] = - {"NEVER", "COMPLEMENTARY", "PREFERABLY", 0}; + {"NEVER", "COMPLEMENTARY", "PREFERABLY", + "COMPLEMENTARY_FOR_QUERIES", "PREFERABLY_FOR_QUERIES", 0}; static Sys_var_enum Sys_optimizer_use_stat_tables( "use_stat_tables", "Specifies how to use system statistics tables", SESSION_VAR(use_stat_tables), CMD_LINE(REQUIRED_ARG), - use_stat_tables_modes, DEFAULT(2)); + use_stat_tables_modes, DEFAULT(4)); static Sys_var_ulong Sys_histogram_size( "histogram_size",