revision-id: 45ee42cf6445a3670624de0d2072970b4f6075a6 (mariadb-10.6.1-107-g45ee42cf644) parent(s): daac419b35a057211bf8f1545569fa12598e203c author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-08-31 13:39:39 +0300 message: Cleanup histogram collection code --- sql/sql_statistics.cc | 82 ++++++++++++++++++++++++++++----------------------- sql/sql_statistics.h | 10 +++++++ 2 files changed, 55 insertions(+), 37 deletions(-) diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index a19a179b0e4..756287b2567 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1959,7 +1959,9 @@ class Stat_table_write_iter /* Histogram_builder is a helper class that is used to build histograms - for columns + for columns. + + Do not create directly, call Histogram->get_builder(...); */ class Histogram_builder @@ -2027,8 +2029,17 @@ class Histogram_builder } return 0; } + virtual void finalize(){} }; + +Histogram_builder *Histogram_binary::create_builder(Field *col, uint col_len, + ha_rows rows) +{ + return new Histogram_builder(col, col_len, rows); +} + + class Histogram_builder_json : public Histogram_builder { std::vector<std::string> bucket_bounds; @@ -2036,13 +2047,14 @@ class Histogram_builder_json : public Histogram_builder public: Histogram_builder_json(Field *col, uint col_len, ha_rows rows) - : Histogram_builder(col, col_len, rows) - { - bucket_bounds = {}; - } + : Histogram_builder(col, col_len, rows) {} ~Histogram_builder_json() override = default; + /* + Add data to the histogram. Adding Element elem which encountered elem_cnt + times. + */ int next(void *elem, element_count elem_cnt) override { count_distinct++; @@ -2053,24 +2065,26 @@ class Histogram_builder_json : public Histogram_builder return 0; if (count > bucket_capacity * (curr_bucket + 1)) { - column->store_field_value((uchar *) elem, col_length); + column->store_field_value((uchar*) elem, col_length); StringBuffer<MAX_FIELD_WIDTH> val; column->val_str(&val); - auto it = bucket_bounds.begin(); - bucket_bounds.insert(it+curr_bucket, std::string(val.ptr(), val.length())); + bucket_bounds.push_back(std::string(val.ptr(), val.length())); curr_bucket++; while (curr_bucket != hist_width && count > bucket_capacity * (curr_bucket + 1)) { - it = bucket_bounds.begin(); - bucket_bounds.insert(it+curr_bucket, bucket_bounds[curr_bucket-1]); + bucket_bounds.push_back(std::string(val.ptr(), val.length())); curr_bucket++; } } return 0; } - void build_json_from_histogram() { + /* + Finalize the creation of histogram + */ + void finalize() override + { Json_writer writer; writer.start_object(); writer.add_member(Histogram_json::JSON_NAME).start_array(); @@ -2087,6 +2101,15 @@ class Histogram_builder_json : public Histogram_builder }; +Histogram_builder *Histogram_json::create_builder(Field *col, uint col_len, + ha_rows rows) +{ + return new Histogram_builder_json(col, col_len, rows); +} + + + + Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type, THD *owner) { @@ -2111,19 +2134,12 @@ Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type, C_MODE_START -int histogram_build_walk(void *elem, element_count elem_cnt, void *arg) +static int histogram_build_walk(void *elem, element_count elem_cnt, void *arg) { Histogram_builder *hist_builder= (Histogram_builder *) arg; return hist_builder->next(elem, elem_cnt); } -int json_histogram_build_walk(void *elem, element_count elem_cnt, void *arg) -{ - Histogram_builder_json *hist_builder= (Histogram_builder_json *) arg; - return hist_builder->next(elem, elem_cnt); -} - - static int count_distinct_single_occurence_walk(void *elem, element_count count, void *arg) @@ -2228,24 +2244,16 @@ class Count_distinct_field: public Sql_alloc */ void walk_tree_with_histogram(ha_rows rows) { - // GSOC-TODO: is below a meaningful difference: - if (table_field->collected_stats->histogram_->get_type() == JSON_HB) - { - Histogram_builder_json hist_builder(table_field, tree_key_length, rows); - tree->walk(table_field->table, json_histogram_build_walk, - (void *) &hist_builder); - hist_builder.build_json_from_histogram(); - distincts= hist_builder.get_count_distinct(); - distincts_single_occurence= hist_builder.get_count_single_occurence(); - } - else - { - Histogram_builder hist_builder(table_field, tree_key_length, rows); - tree->walk(table_field->table, histogram_build_walk, - (void *) &hist_builder); - distincts= hist_builder.get_count_distinct(); - distincts_single_occurence= hist_builder.get_count_single_occurence(); - } + Histogram_base *hist= table_field->collected_stats->histogram_; + Histogram_builder *hist_builder= + hist->create_builder(table_field, tree_key_length, rows); + + tree->walk(table_field->table, histogram_build_walk, + (void *) hist_builder); + hist_builder->finalize(); + distincts= hist_builder->get_count_distinct(); + distincts_single_occurence= hist_builder->get_count_single_occurence(); + delete hist_builder; } ulonglong get_count_distinct() diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index 065c0d0c840..c0bbb58a52f 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -146,6 +146,8 @@ double get_column_range_cardinality(Field *field, bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table); bool is_eits_usable(Field* field); +class Histogram_builder; + /* Common base for all histograms */ @@ -160,6 +162,9 @@ class Histogram_base : public Sql_alloc virtual uint get_width()=0; + virtual Histogram_builder *create_builder(Field *col, uint col_len, + ha_rows rows)=0; + virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size)=0; @@ -290,6 +295,8 @@ class Histogram_binary : public Histogram_base void serialize(Field *to_field) override; void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override; + Histogram_builder *create_builder(Field *col, uint col_len, + ha_rows rows) override; bool is_available() override { return (values!=NULL); } @@ -373,6 +380,9 @@ class Histogram_json : public Histogram_base void serialize(Field *field) override; + Histogram_builder *create_builder(Field *col, uint col_len, + ha_rows rows) override; + // returns number of buckets in the histogram uint get_width() override {