revision-id: 1ed2d8b98ade099fe23b7d5c00d23364388e15aa (mariadb-10.0.36-80-g1ed2d8b98ad)
parent(s): a84d87fde8c0bc325c8e00f06ea02bcd84a75d55
author: Varun Gupta
committer: Varun Gupta
timestamp: 2018-11-16 19:40:47 +0530
message:
MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
The problem here is EITS statistics does not calculate statistics for the partitions of the table.
So a temporary solution would be to not read EITS statistics for partitioned tables.
Also disabling reading of EITS for columns that participate in the partition list of a table.
---
mysql-test/r/partition.result | 88 +++++++++++++++++++++++++++++++++++++++++++
mysql-test/t/partition.test | 58 ++++++++++++++++++++++++++++
sql/opt_range.cc | 12 +++++-
sql/partition_info.cc | 26 +++++++++++++
sql/partition_info.h | 1 +
sql/sql_statistics.cc | 16 ++++++++
6 files changed, 199 insertions(+), 2 deletions(-)
diff --git a/mysql-test/r/partition.result b/mysql-test/r/partition.result
index c6669176b3d..aedf9f89f0e 100644
--- a/mysql-test/r/partition.result
+++ b/mysql-test/r/partition.result
@@ -2645,3 +2645,91 @@ Warnings:
Note 1517 Duplicate partition name p2
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
+#
+# MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
+#
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+part_key int,
+a int,
+b int
+) partition by list(part_key) (
+partition p0 values in (0),
+partition p1 values in (1),
+partition p2 values in (2),
+partition p3 values in (3),
+partition p4 values in (4)
+);
+insert into t2
+select mod(a,5), a/100, a from t1;
+set @save_use_stat_tables= @@use_stat_tables;
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+#
+# Tests using stats provided by the storage engine
+#
+explain extended select * from t2 where part_key=1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1)
+explain partitions select * from t2 where part_key=1;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
+explain extended select * from t2 where part_key in (1,2);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
+explain partitions select * from t2 where part_key=1;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
+explain extended select * from t2 where part_key in (1,2);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
+explain partitions select * from t2 where part_key in (1,2);
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 Using where
+set @save_histogram_size=@@histogram_size;
+set @@histogram_size=100;
+set @@use_stat_tables= PREFERABLY;
+set @@optimizer_use_condition_selectivity=4;
+analyze table t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status OK
+#
+# Tests using EITS
+#
+explain extended select * from t2 where part_key=1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1)
+explain partitions select * from t2 where part_key=1;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
+explain extended select * from t2 where part_key in (1,2);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
+explain partitions select * from t2 where part_key=1;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
+explain extended select * from t2 where part_key in (1,2);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
+explain partitions select * from t2 where part_key in (1,2);
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 Using where
+set @@use_stat_tables= @save_use_stat_tables;
+set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+set @@histogram_size= @save_histogram_size;
+drop table t0,t1,t2;
diff --git a/mysql-test/t/partition.test b/mysql-test/t/partition.test
index 1c8cd0375d6..00c6f1ce77c 100644
--- a/mysql-test/t/partition.test
+++ b/mysql-test/t/partition.test
@@ -2897,3 +2897,61 @@ EXECUTE stmt;
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
+--echo #
+--echo # MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
+--echo #
+
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+
+
+create table t2 (
+ part_key int,
+ a int,
+ b int
+) partition by list(part_key) (
+ partition p0 values in (0),
+ partition p1 values in (1),
+ partition p2 values in (2),
+ partition p3 values in (3),
+ partition p4 values in (4)
+);
+insert into t2
+select mod(a,5), a/100, a from t1;
+
+set @save_use_stat_tables= @@use_stat_tables;
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+--echo #
+--echo # Tests using stats provided by the storage engine
+--echo #
+explain extended select * from t2 where part_key=1;
+explain partitions select * from t2 where part_key=1;
+explain extended select * from t2 where part_key in (1,2);
+explain partitions select * from t2 where part_key=1;
+explain extended select * from t2 where part_key in (1,2);
+explain partitions select * from t2 where part_key in (1,2);
+
+set @save_histogram_size=@@histogram_size;
+set @@histogram_size=100;
+set @@use_stat_tables= PREFERABLY;
+set @@optimizer_use_condition_selectivity=4;
+analyze table t2;
+--echo #
+--echo # Tests using EITS
+--echo #
+# filtered should be 100
+explain extended select * from t2 where part_key=1;
+explain partitions select * from t2 where part_key=1;
+# filtered should be 100
+explain extended select * from t2 where part_key in (1,2);
+explain partitions select * from t2 where part_key=1;
+# filtered should be 100
+explain extended select * from t2 where part_key in (1,2);
+explain partitions select * from t2 where part_key in (1,2);
+set @@use_stat_tables= @save_use_stat_tables;
+set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+set @@histogram_size= @save_histogram_size;
+drop table t0,t1,t2;
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 3bcaa72e32f..59aa2f3f280 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3322,6 +3322,10 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
{
Field **field_ptr;
TABLE *table= param->table;
+ partition_info *part_info= NULL;
+ #ifdef WITH_PARTITION_STORAGE_ENGINE
+ part_info= table->part_info;
+ #endif
uint parts= 0;
for (field_ptr= table->field; *field_ptr; field_ptr++)
@@ -3329,7 +3333,9 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
Column_statistics* col_stats= (*field_ptr)->read_stats;
if (bitmap_is_set(used_fields, (*field_ptr)->field_index)
&& col_stats && !col_stats->no_stat_values_provided()
- && !((*field_ptr)->type() == MYSQL_TYPE_GEOMETRY))
+ && !((*field_ptr)->type() == MYSQL_TYPE_GEOMETRY)
+ && (!part_info ||
+ part_info->field_in_partition_expr(*field_ptr)))
parts++;
}
@@ -3350,7 +3356,9 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
if (bitmap_is_set(used_fields, (*field_ptr)->field_index))
{
Field *field= *field_ptr;
- if (field->type() == MYSQL_TYPE_GEOMETRY)
+ if (field->type() == MYSQL_TYPE_GEOMETRY
+ || (!part_info ||
+ part_info->field_in_partition_expr(field)))
continue;
uint16 store_length;
diff --git a/sql/partition_info.cc b/sql/partition_info.cc
index 52bda560c1c..0111fc1451d 100644
--- a/sql/partition_info.cc
+++ b/sql/partition_info.cc
@@ -3164,6 +3164,32 @@ void partition_info::print_debug(const char *str, uint *value)
DBUG_PRINT("info", ("parser: %s", str));
DBUG_VOID_RETURN;
}
+
+/*
+
+ Disabling reading EITS statistics for columns involved in the
+ partition list of a table.
+ We assume the selecticivity for such columns would be handled
+ during partition pruning.
+
+*/
+
+bool partition_info::field_in_partition_expr(Field *field) const
+{
+ uint i;
+ for (i= 0; i < num_part_fields; i++)
+ {
+ if (field->eq(part_field_array[i]))
+ return FALSE;
+ }
+ for (i= 0; i < num_subpart_fields; i++)
+ {
+ if (field->eq(subpart_field_array[i]))
+ return FALSE;
+ }
+ return TRUE;
+}
+
#else /* WITH_PARTITION_STORAGE_ENGINE */
/*
For builds without partitioning we need to define these functions
diff --git a/sql/partition_info.h b/sql/partition_info.h
index f250c5496bf..10b8954ace7 100644
--- a/sql/partition_info.h
+++ b/sql/partition_info.h
@@ -384,6 +384,7 @@ class partition_info : public Sql_alloc
bool is_full_part_expr_in_fields(List<Item> &fields);
public:
bool has_unique_name(partition_element *element);
+ bool field_in_partition_expr(Field *field) const;
};
uint32 get_next_partition_id_range(struct st_partition_iter* part_iter);
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index cb75a5c2176..02dd4970c99 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -3589,6 +3589,22 @@ void set_statistics_for_table(THD *thd, TABLE *table)
(use_stat_table_mode <= COMPLEMENTARY ||
!table->stats_is_read || read_stats->cardinality_is_null) ?
table->file->stats.records : read_stats->cardinality;
+
+ /*
+ For partitioned table, EITS statistics is based on data from all partitions.
+
+ On the other hand, Partition Pruning figures which partitions will be
+ accessed and then computes the estimate of rows in used_partitions.
+
+ Use the estimate from Partition Pruning as it is typically more precise.
+ Ideally, EITS should provide per-partition statistics but this is not
+ implemented currently.
+ */
+ #ifdef WITH_PARTITION_STORAGE_ENGINE
+ if (table->part_info)
+ table->used_stat_records= table->file->stats.records;
+ #endif
+
KEY *key_info, *key_info_end;
for (key_info= table->key_info, key_info_end= key_info+table->s->keys;
key_info < key_info_end; key_info++)