[Commits] 23ae4232b69: Code cleanup part #1
revision-id: 23ae4232b691c2eb5157396747ea8d20115ab22c (mariadb-10.6.1-96-g23ae4232b69) parent(s): f7efa5e713ddef1ac9e34ff5032e17d67e5074a1 author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2021-08-27 22:28:59 +0300 message: Code cleanup part #1 --- sql/sql_statistics.cc | 77 +++++++++++++++++++++++++++-------------------- sql/sql_statistics.h | 83 ++++++++++++++++++++++----------------------------- strings/json_lib.c | 5 ++-- 3 files changed, 83 insertions(+), 82 deletions(-) diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index fb89bf513c1..fefe0709672 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1179,14 +1179,17 @@ class Column_stat: public Stat_table table_field->read_stats->set_avg_frequency(stat_field->val_real()); break; case COLUMN_STAT_HIST_SIZE: - //TODO: ignore this. The size is a part of histogram! - //table_field->read_stats->histogram.set_size(stat_field->val_int()); + /* + Ignore the contents of mysql.column_stats.hist_size. We take the + size from the mysql.column_stats.histogram column, itself. + */ break; case COLUMN_STAT_HIST_TYPE: - // TODO: save this next to histogram. - // For some reason, the histogram itself is read in - // read_histograms_for_table { + /* + Save the histogram type. The histogram itself will be read in + read_histograms_for_table(). + */ Histogram_type hist_type= (Histogram_type) (stat_field->val_int() - 1); table_field->read_stats->histogram_type_on_disk= hist_type; @@ -1247,21 +1250,24 @@ class Column_stat: public Stat_table table_field->read_stats->histogram_= hist; return hist; } - //memcpy(table_field->read_stats->histogram_.get_values(), - // val.ptr(), table_field->read_stats->histogram.get_size()); } return NULL; } }; -bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, Histogram_type type_arg, const uchar *ptr_arg, uint size_arg) +bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, + Histogram_type type_arg, + const uchar *ptr_arg, uint size_arg) { // Just copy the data size = (uint8) size_arg; type = type_arg; - values = (uchar*)alloc_root(mem_root, size_arg); - memcpy(values, ptr_arg, size_arg); - return false; + if ((values = (uchar*)alloc_root(mem_root, size_arg))) + { + memcpy(values, ptr_arg, size_arg); + return false; + } + return true; } /* @@ -1269,7 +1275,7 @@ bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, Histogram_type type_ar */ void Histogram_binary::serialize(Field *field) { - field->store((char*)get_values(), get_size(), &my_charset_bin); + field->store((char*)values, size, &my_charset_bin); } void Histogram_binary::init_for_collection(MEM_ROOT *mem_root, @@ -1282,20 +1288,32 @@ void Histogram_binary::init_for_collection(MEM_ROOT *mem_root, } -void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size_arg) +void Histogram_json::init_for_collection(MEM_ROOT *mem_root, + Histogram_type htype_arg, + ulonglong size_arg) { type= htype_arg; - values = (uchar*)alloc_root(mem_root, size_arg); - size = (uint8) size_arg; + //values_ = (uchar*)alloc_root(mem_root, size_arg); + size= (uint8) size_arg; } -bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg, const uchar *ptr, uint size_arg) + +/* + @brief + Parse the histogram from its on-disk representation + +*/ + +bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field, + Histogram_type type_arg, const uchar *ptr, + uint size_arg) { DBUG_ENTER("Histogram_json::parse"); size = (uint8) size_arg; type = type_arg; const char *json = (char *)ptr; int vt; + std::vector<std::string> hist_buckets_text; bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets_text); if (!result) { @@ -1482,6 +1500,8 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub } return sel; } + + /* @param field The table field histogram is for. We don't care about the field's current value, we only need its virtual functions to @@ -1492,14 +1512,13 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub double Histogram_json::range_selectivity(Field *field, key_range *min_endp, key_range *max_endp) { - //fprintf(stderr, "Histogram_json::range_selectivity\n"); double min = 0.0, max = 1.0; double width = 1.0/(int)histogram_bounds.size(); if (min_endp) { double min_sel = 0.0; const uchar *min_key= min_endp->key; - // TODO: also, properly handle SQL NULLs. + // GSOC-TODO: properly handle SQL NULLs. // in this test patch, we just assume the values are not SQL NULLs. if (field->real_maybe_null()) min_key++; @@ -1573,8 +1592,7 @@ double Histogram_json::range_selectivity(Field *field, key_range *min_endp, void Histogram_json::serialize(Field *field) { - field->store((char*)get_values(), strlen((char*)get_values()), - &my_charset_bin); + field->store((char*)json_text, strlen((char*)json_text), &my_charset_bin); } int Histogram_json::find_bucket(Field *field, const uchar *endpoint) @@ -1583,7 +1601,7 @@ int Histogram_json::find_bucket(Field *field, const uchar *endpoint) int high = (int)histogram_bounds.size()-1; int mid; int min_bucket_index = -1; - std::string mid_val; + std::string mid_val; // GSOC-todo: don't copy strings while(low <= high) { // c++ gives us the floor of integer divisions by default, below we get the ceiling (round-up). @@ -2037,9 +2055,9 @@ class Histogram_builder_json : public Histogram_builder writer->add_str(value.c_str()); } writer->end_array(); - histogram->set_size(bucket_bounds.size()); Binary_string *json_string = (Binary_string *) writer->output.get_string(); - ((Histogram_json *)histogram)->set_values((uchar *) json_string->c_ptr()); + Histogram_json *hist= (Histogram_json*)histogram; + hist->set_json_text(bucket_bounds.size(), (uchar *) json_string->c_ptr()); } }; @@ -2207,6 +2225,7 @@ class Count_distinct_field: public Sql_alloc */ void walk_tree_with_histogram(ha_rows rows) { + // GSOC-TODO: is below a meaningful difference: if (table_field->collected_stats->histogram_->get_type() == JSON_HB) { Histogram_builder_json hist_builder(table_field, tree_key_length, rows); @@ -2680,11 +2699,6 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) if (bitmap_is_set(table->read_set, (*field_ptr)->field_index)) { column_stats->histogram_ = NULL; - /* - column_stats->histogram.set_size(hist_size); - column_stats->histogram.set_type(hist_type); - column_stats->histogram.set_values(histogram); - histogram+= hist_size;*/ (*field_ptr)->collected_stats= column_stats++; } } @@ -2950,9 +2964,9 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl } if (count_distinct) { - //uint hist_size= count_distinct->get_hist_size(); uint hist_size= current_thd->variables.histogram_size; - Histogram_type hist_type= (Histogram_type) (current_thd->variables.histogram_type); + Histogram_type hist_type= + (Histogram_type) (current_thd->variables.histogram_type); bool have_histogram= false; if (hist_size != 0 && hist_type != INVALID_HISTOGRAM) { @@ -3001,12 +3015,11 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl } else have_histogram= false ; // TODO: need this? - //histogram.set_size(hist_size); + set_not_null(COLUMN_STAT_HIST_SIZE); if (have_histogram && distincts) { set_not_null(COLUMN_STAT_HIST_TYPE); - //histogram.set_values(count_distinct->get_histogram()); histogram_= count_distinct->get_histogram(); set_not_null(COLUMN_STAT_HISTOGRAM); } diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index f9fdd0a63bc..f9031257728 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -157,22 +157,17 @@ class Histogram_base : public Sql_alloc virtual uint get_width()=0; - virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size)=0; + virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, + ulonglong size)=0; virtual bool is_available()=0; virtual bool is_usable(THD *thd)=0; - virtual void set_values(uchar * values)=0; - - virtual uchar *get_values()=0; - - virtual void set_size(ulonglong sz)=0; - - virtual double point_selectivity(Field *field, key_range *endpoint, double avg_selection)=0; - + virtual double point_selectivity(Field *field, key_range *endpoint, + double avg_selection)=0; virtual double range_selectivity(Field *field, key_range *min_endp, - key_range *max_endp)=0; + key_range *max_endp)=0; // Legacy: return the size of the histogram on disk. // This will be stored in mysql.column_stats.hist_size column. @@ -181,6 +176,11 @@ class Histogram_base : public Sql_alloc virtual ~Histogram_base()= default; }; + +/* + A Height-balanced histogram that stores numeric fractions +*/ + class Histogram_binary : public Histogram_base { public: @@ -274,17 +274,12 @@ class Histogram_binary : public Histogram_base return i; } - uchar *get_values() override { return (uchar *) values; } public: void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override; - // Note: these two are used only for saving the JSON text: - void set_values (uchar *vals) override { values= (uchar *) vals; } - void set_size (ulonglong sz) override { size= (uint8) sz; } - uint get_size() override {return (uint)size;} - bool is_available() override { return get_size() > 0 && get_values(); } + bool is_available() override { return get_size() > 0 && (values!=NULL); } /* This function checks that histograms should be usable only when @@ -328,58 +323,57 @@ class Histogram_binary : public Histogram_base } double range_selectivity(Field *field, key_range *min_endp, - key_range *max_endp) override; - + key_range *max_endp) override; + /* Estimate selectivity of "col=const" using a histogram */ - double point_selectivity(Field *field, key_range *endpoint, double avg_sel) override; + double point_selectivity(Field *field, key_range *endpoint, + double avg_sel) override; }; + +/* + An equi-height histogram which stores real values for bucket bounds. +*/ + class Histogram_json : public Histogram_base { private: Histogram_type type; uint8 size; /* Number of elements in the histogram*/ - - /* - GSOC-TODO: This is used for storing collected JSON text. Rename it - accordingly. - */ - uchar *values; - - // List of values in string form. - /* - GSOC-TODO: We don't need to save this. It can be a local variable in - parse(). - Eventually we should get rid of this at all, as we can convert the - endpoints and add them to histogram_bounds as soon as we've read them. - */ - std::vector<std::string> hist_buckets_text; + /* Collection-time only: collected histogram in the JSON form. */ + uchar *json_text; + // Array of histogram bucket endpoints in KeyTupleFormat. std::vector<std::string> histogram_bounds; public: bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg, - const uchar *ptr, uint size) override; + const uchar *ptr, uint size) override; void serialize(Field *field) override; // returns number of buckets in the histogram uint get_width() override { - return size; - }; + return size; + } Histogram_type get_type() override { return JSON_HB; } - void set_size (ulonglong sz) override {size = (uint8) sz; } + void set_json_text(ulonglong sz, uchar *json_text_arg) + { + size = (uint8) sz; + json_text= json_text_arg; + } - uint get_size() override { + uint get_size() override + { return size; } @@ -393,15 +387,10 @@ class Histogram_json : public Histogram_base is_available(); } - void set_values (uchar *vals) override { values= (uchar *) vals; } - - uchar *get_values() override { return (uchar *) values; } - - double point_selectivity(Field *field, key_range *endpoint, double avg_selection) override; - + double point_selectivity(Field *field, key_range *endpoint, + double avg_selection) override; double range_selectivity(Field *field, key_range *min_endp, - key_range *max_endp) override; - + key_range *max_endp) override; /* * Returns the index of the biggest histogram value that is smaller than endpoint */ diff --git a/strings/json_lib.c b/strings/json_lib.c index 02e09acb8b1..b61eb634a20 100644 --- a/strings/json_lib.c +++ b/strings/json_lib.c @@ -1869,7 +1869,7 @@ int json_path_compare(const json_path_t *a, const json_path_t *b, enum json_types json_smart_read_value(json_engine_t *je, - const char **value, int *value_len) + const char **value, int *value_len) { if (json_read_value(je)) goto err_return; @@ -1952,6 +1952,7 @@ enum json_types json_get_array_item(const char *js, const char *js_end, return JSV_BAD_JSON; } + /** Simple json lookup for a value by the key. Expects JSON object. @@ -2027,8 +2028,6 @@ enum json_types json_get_object_nkey(const char *js __attribute__((unused)), return JSV_NOTHING; } - - /** Check if json is valid (well-formed) @retval 0 - success, json is well-formed
participants (1)
-
psergey