[Commits] bdb90055c02: MDEV-27229: Estimation for filtered rows less precise ... #5
revision-id: bdb90055c02a348e224574bab8113e76c5105a65 (mariadb-10.6.1-348-gbdb90055c02) parent(s): bd3930075b34d576dcebe14037d1d654c9fa8232 author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2022-01-11 16:58:51 +0300 message: MDEV-27229: Estimation for filtered rows less precise ... #5 Followup: remove this line from get_column_range_cardinality() set_if_bigger(res, col_stats->get_avg_frequency()); and make sure it is only used with the binary histograms. For JSON histograms, it makes the estimates unnecessarily imprecise. --- mysql-test/main/statistics_json.result | 17 +++++++++++++---- mysql-test/main/statistics_json.test | 6 +++--- sql/opt_histogram_json.cc | 2 +- sql/opt_histogram_json.h | 2 +- sql/opt_range.cc | 5 ++++- sql/sql_statistics.cc | 13 +++++++++---- sql/sql_statistics.h | 4 ++-- 7 files changed, 33 insertions(+), 16 deletions(-) diff --git a/mysql-test/main/statistics_json.result b/mysql-test/main/statistics_json.result index 5de350fb21d..600b2ddd4ee 100644 --- a/mysql-test/main/statistics_json.result +++ b/mysql-test/main/statistics_json.result @@ -4644,7 +4644,7 @@ Warnings: Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` < 'b-1a' analyze select * from t1_json where a > 'zzzzzzzzz'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 10.00 0.00 Using where +1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 0.00 0.00 Using where drop table ten; UPDATE mysql.column_stats SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json'; @@ -4752,7 +4752,7 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f 1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 1.98 1.98 Using where explain extended select * from t2 where city < 'Lagos'; id select_type table type possible_keys key key_len ref rows filtered Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 101 50.00 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 101 1.98 Using where Warnings: Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` < 'Lagos' drop table t1_bin; @@ -8164,7 +8164,7 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f 1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 33.00 33.00 Using where analyze select * from t1 where a < 0; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.47 0.00 Using where +1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.00 0.00 Using where analyze select * from t1 where a > 0; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 67.00 67.00 Using where @@ -8189,7 +8189,7 @@ test.t2 analyze status Engine-independent statistics collected test.t2 analyze status OK analyze select * from t2 where a < 1; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra -1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 8.33 0.10 Using where +1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 0.10 0.10 Using where analyze select * from t2 where a =100; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 0.10 0.10 Using where @@ -8237,6 +8237,15 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f analyze select COUNT(*) FROM t1 WHERE a >='bar'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 100.00 100.00 Using where +analyze select COUNT(*) FROM t1 WHERE a < 'aaa'; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where +analyze select COUNT(*) FROM t1 WHERE a <='aaa'; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where +analyze select COUNT(*) FROM t1 WHERE a < 'bar'; +id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where analyze select COUNT(*) FROM t1 WHERE a <='bar'; id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra 1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 50.00 50.00 Using where diff --git a/mysql-test/main/statistics_json.test b/mysql-test/main/statistics_json.test index 7ec6c691a99..93a526dbfb0 100644 --- a/mysql-test/main/statistics_json.test +++ b/mysql-test/main/statistics_json.test @@ -409,9 +409,9 @@ analyze select COUNT(*) FROM t1 WHERE a > 'bar'; analyze select COUNT(*) FROM t1 WHERE a >='bar'; # Can enable these after get_avg_frequency issue is resolved: -# analyze select COUNT(*) FROM t1 WHERE a < 'aaa'; -# analyze select COUNT(*) FROM t1 WHERE a <='aaa'; -# analyze select COUNT(*) FROM t1 WHERE a < 'bar'; +analyze select COUNT(*) FROM t1 WHERE a < 'aaa'; +analyze select COUNT(*) FROM t1 WHERE a <='aaa'; +analyze select COUNT(*) FROM t1 WHERE a < 'bar'; analyze select COUNT(*) FROM t1 WHERE a <='bar'; diff --git a/sql/opt_histogram_json.cc b/sql/opt_histogram_json.cc index 7c037183f41..4305737df1c 100644 --- a/sql/opt_histogram_json.cc +++ b/sql/opt_histogram_json.cc @@ -962,7 +962,7 @@ std::string& Histogram_json_hb::get_end_value(int idx) */ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp, - key_range *max_endp) + key_range *max_endp, double avg_sel) { double min, max; diff --git a/sql/opt_histogram_json.h b/sql/opt_histogram_json.h index 9e32e70f7fc..48e9a29c8a5 100644 --- a/sql/opt_histogram_json.h +++ b/sql/opt_histogram_json.h @@ -129,7 +129,7 @@ class Histogram_json_hb : public Histogram_base double avg_selection, double total_rows) override; double range_selectivity(Field *field, key_range *min_endp, - key_range *max_endp) override; + key_range *max_endp, double avg_sel) override; void set_json_text(ulonglong sz, const char *json_text_arg, size_t json_text_len) diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 06063cb9ae1..bb5043eecd7 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -3277,7 +3277,10 @@ double records_in_column_ranges(PARAM *param, uint idx, break; } total_rows += rows; - } + } + if (total_rows == 0) + total_rows= MY_MIN(1, param->table->stat_records()); + return total_rows; } diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 6b3ea834628..fc8e38173c7 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -3910,10 +3910,13 @@ double get_column_range_cardinality(Field *field, if (col_stats->min_max_values_are_provided()) { Histogram_base *hist= col_stats->histogram; + double avg_frequency= col_stats->get_avg_frequency(); double sel; if (hist && hist->is_usable(thd)) { - sel= hist->range_selectivity(field, min_endp, max_endp); + sel= hist->range_selectivity(field, min_endp, max_endp, + avg_frequency / col_non_nulls); + res= col_non_nulls * sel; } else { @@ -3938,9 +3941,9 @@ double get_column_range_cardinality(Field *field, max_mp_pos= 1.0; sel = (max_mp_pos - min_mp_pos); + res= col_non_nulls * sel; + set_if_bigger(res, avg_frequency); } - res= col_non_nulls * sel; - set_if_bigger(res, col_stats->get_avg_frequency()); } else res= col_non_nulls; @@ -4076,7 +4079,8 @@ double Histogram_binary::point_selectivity(Field *field, key_range *endpoint, double Histogram_binary::range_selectivity(Field *field, key_range *min_endp, - key_range *max_endp) + key_range *max_endp, + double avg_sel) { double sel, min_mp_pos, max_mp_pos; Column_statistics *col_stats= field->read_stats; @@ -4105,6 +4109,7 @@ double Histogram_binary::range_selectivity(Field *field, uint max= find_bucket(max_mp_pos, FALSE); sel= bucket_sel * (max - min + 1); + set_if_bigger(sel, avg_sel); return sel; } diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 14877417072..1950f3268a7 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -191,7 +191,7 @@ class Histogram_base double avg_selectivity, double total_rows)=0; virtual double range_selectivity(Field *field, key_range *min_endp, - key_range *max_endp)=0; + key_range *max_endp, double avg_sel)=0; /* Legacy: return the size of the histogram on disk. @@ -353,7 +353,7 @@ class Histogram_binary : public Histogram_base } double range_selectivity(Field *field, key_range *min_endp, - key_range *max_endp) override; + key_range *max_endp, double avg_sel) override; /* Estimate selectivity of "col=const" using a histogram
participants (1)
-
psergey