revision-id: 1bd845eb1e54635f203c1e9f0638650b07a7771a (mariadb-10.6.1-110-g1bd845eb1e5)
parent(s): 917afb5731adea15ecac9e29cdda75a72d7b5378
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2021-08-31 16:17:06 +0300
message:
Code cleanup: don't duplicate the position-in-interval code
---
sql/field.cc | 44 +++++++----
sql/field.h | 7 ++
sql/sql_statistics.cc | 208 +++++++++++++-------------------------------------
3 files changed, 89 insertions(+), 170 deletions(-)
diff --git a/sql/field.cc b/sql/field.cc
index 46a3a1deea3..cf42dcfab18 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -1108,19 +1108,27 @@ Field_longstr::pack_sort_string(uchar *to, const SORT_FIELD_ATTR *sort_field)
relative position of the field value in the numeric interval [min,max]
*/
-double Field::pos_in_interval_val_real(Field *min, Field *max)
+double pos_in_interval_for_double(double midp_val, double min_val,
+ double max_val)
{
double n, d;
- n= val_real() - min->val_real();
+ n= midp_val - min_val;
if (n < 0)
return 0.0;
- d= max->val_real() - min->val_real();
+ d= max_val - min_val;
if (d <= 0)
return 1.0;
return MY_MIN(n/d, 1.0);
}
+double Field::pos_in_interval_val_real(Field *min, Field *max)
+{
+ return pos_in_interval_for_double(val_real(), min->val_real(),
+ max->val_real());
+}
+
+
static
inline ulonglong char_prefix_to_ulonglong(uchar *src)
{
@@ -1177,23 +1185,33 @@ static inline double safe_substract(ulonglong a, ulonglong b)
*/
double Field::pos_in_interval_val_str(Field *min, Field *max, uint data_offset)
+{
+ return pos_in_interval_for_string(charset(),
+ ptr + data_offset, data_length(),
+ min->ptr + data_offset, min->data_length(),
+ max->ptr + data_offset, max->data_length()
+ );
+}
+
+
+double pos_in_interval_for_string(CHARSET_INFO *cset,
+ const uchar *midp_val, uint32 midp_len,
+ const uchar *min_val, uint32 min_len,
+ const uchar *max_val, uint32 max_len)
{
uchar mp_prefix[sizeof(ulonglong)];
uchar minp_prefix[sizeof(ulonglong)];
uchar maxp_prefix[sizeof(ulonglong)];
ulonglong mp, minp, maxp;
- charset()->strnxfrm(mp_prefix, sizeof(mp),
- ptr + data_offset,
- data_length());
- charset()->strnxfrm(minp_prefix, sizeof(minp),
- min->ptr + data_offset,
- min->data_length());
- charset()->strnxfrm(maxp_prefix, sizeof(maxp),
- max->ptr + data_offset,
- max->data_length());
- mp= char_prefix_to_ulonglong(mp_prefix);
+
+ cset->strnxfrm(mp_prefix, sizeof(mp), midp_val, midp_len);
+ cset->strnxfrm(minp_prefix, sizeof(minp), min_val, min_len);
+ cset->strnxfrm(maxp_prefix, sizeof(maxp), max_val, max_len);
+
+ mp= char_prefix_to_ulonglong(mp_prefix);
minp= char_prefix_to_ulonglong(minp_prefix);
maxp= char_prefix_to_ulonglong(maxp_prefix);
+
double n, d;
n= safe_substract(mp, minp);
if (n < 0)
diff --git a/sql/field.h b/sql/field.h
index 8d0890c6c06..e4224fc1e51 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -5893,5 +5893,12 @@ ulonglong TABLE::vers_start_id() const
return static_cast<ulonglong>(vers_start_field()->val_int());
}
+double pos_in_interval_for_string(CHARSET_INFO *cset,
+ const uchar *midp_val, uint32 midp_len,
+ const uchar *min_val, uint32 min_len,
+ const uchar *max_val, uint32 max_len);
+
+double pos_in_interval_for_double(double midp_val,
+ double min_val, double max_val);
#endif /* FIELD_INCLUDED */
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 976646fd78e..37cb18d10e7 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -1373,134 +1373,6 @@ bool Histogram_json_hb::parse(MEM_ROOT *mem_root, Field *field,
}
-static
-void store_key_image_to_rec_no_null(Field *field, uchar *ptr) {
- MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table,
- &field->table->write_set);
- field->set_key_image(ptr, field->key_length());
- dbug_tmp_restore_column_map(&field->table->write_set, old_map);
-}
-
-/*
- GSOC-TODO:
- This is our replacement for Field::pos_in_interval_val_real
-
- We take midpoint_val and an interval [min_val, max_val], and return
- a number between 0.0 and 1.0 which specifies how close midpoint_val is
- to one of the bounds.
-
- @param field Field object. We don't care about the field's current value
- (actually, we overwrite it). We need it for its virtual
- functions.
-
-*/
-double pos_in_interval_through_val_real(Field *field,
- uchar* min_val,
- uchar *max_val,
- uchar *midpoint_val)
-{
- // For each passed value: unpack it into Field's current value. Then, we can
- // get the value as double.
-
- store_key_image_to_rec_no_null(field, min_val);
- double min_val_real= field->val_real();
-
- store_key_image_to_rec_no_null(field, max_val);
- double max_val_real= field->val_real();
-
- store_key_image_to_rec_no_null(field, midpoint_val);
- double midpoint_val_real= field->val_real();
-
- // The code below is a copy of logic from Field::pos_in_interval_val_real:
- double n, d;
- n= midpoint_val_real - min_val_real;
- if (n < 0)
- return 0.0;
- d= max_val_real - min_val_real;
- if (d <= 0)
- return 1.0;
- return MY_MIN(n/d, 1.0);
-}
-
-// Copy-paste:
-static
-inline ulonglong char_prefix_to_ulonglong(uchar *src)
-{
- uint sz= sizeof(ulonglong);
- for (uint i= 0; i < sz/2; i++)
- {
- uchar tmp= src[i];
- src[i]= src[sz-1-i];
- src[sz-1-i]= tmp;
- }
- return uint8korr(src);
-}
-
-// copy-paste:
-static inline double safe_substract(ulonglong a, ulonglong b)
-{
- return (a > b)? double(a - b) : -double(b - a);
-}
-
-/*
- GSOC-TODO:
- This is our replacement for Field::pos_in_interval_val_str
-
- We take midpoint_val and an interval [min_val, max_val], and return
- a number between 0.0 and 1.0 which specifies how close midpoint_val is
- to one of the bounds.
-
- @param field Field object. We don't care about the field's current value
- (actually, we overwrite it). We need it for its virtual
- functions.
-
- @TODO
- Instead of copying the pos_in_interval_val_str(), we should do better:
- if all three passed values have a common prefix, skip it.
- This will make the returned value more precise.
-
-*/
-
-double pos_in_interval_through_strxfrm(Field *field,
- uchar *min_val,
- uchar *max_val,
- uchar *midpoint_val)
-{
- // The code below is a copy of logic from Field::pos_in_interval_val_str
- uchar mp_prefix[sizeof(ulonglong)];
- uchar minp_prefix[sizeof(ulonglong)];
- uchar maxp_prefix[sizeof(ulonglong)];
- ulonglong mp, minp, maxp;
-
- uint min_len= uint2korr(min_val);
- uint max_len= uint2korr(max_val);
- uint midpoint_len= uint2korr(midpoint_val);
-
- auto cset= field->charset();
-
- cset->strnxfrm(mp_prefix, sizeof(mp),
- midpoint_val + HA_KEY_BLOB_LENGTH,
- midpoint_len);
- cset->strnxfrm(minp_prefix, sizeof(minp),
- min_val + HA_KEY_BLOB_LENGTH,
- min_len);
- cset->strnxfrm(maxp_prefix, sizeof(maxp),
- max_val + HA_KEY_BLOB_LENGTH,
- max_len);
- mp= char_prefix_to_ulonglong(mp_prefix);
- minp= char_prefix_to_ulonglong(minp_prefix);
- maxp= char_prefix_to_ulonglong(maxp_prefix);
- double n, d;
- n= safe_substract(mp, minp);
- if (n < 0)
- return 0.0;
- d= safe_substract(maxp, minp);
- if (d <= 0)
- return 1.0;
- return MY_MIN(n/d, 1.0);
-}
-
-
double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
double avg_sel)
{
@@ -1535,6 +1407,51 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
}
+static
+void store_key_image_to_rec_no_null(Field *field, const uchar *ptr)
+{
+ MY_BITMAP *old_map= dbug_tmp_use_all_columns(field->table,
+ &field->table->write_set);
+ field->set_key_image(ptr, field->key_length());
+ dbug_tmp_restore_column_map(&field->table->write_set, old_map);
+}
+
+
+static
+double position_in_interval(Field *field, const uchar *key,
+ const std::string& left, const std::string& right)
+{
+ double res;
+ if (field->pos_through_val_str())
+ {
+ uint32 min_len= uint2korr(left.data());
+ uint32 max_len= uint2korr(right.data());
+ uint32 midp_len= uint2korr(key);
+
+ res= pos_in_interval_for_string(field->charset(),
+ key + HA_KEY_BLOB_LENGTH,
+ midp_len,
+ (const uchar*)left.data() + HA_KEY_BLOB_LENGTH,
+ min_len,
+ (const uchar*)right.data() + HA_KEY_BLOB_LENGTH,
+ max_len);
+ }
+ else
+ {
+ store_key_image_to_rec_no_null(field, (const uchar*)left.data());
+ double min_val_real= field->val_real();
+
+ store_key_image_to_rec_no_null(field, (const uchar*)right.data());
+ double max_val_real= field->val_real();
+
+ store_key_image_to_rec_no_null(field, key);
+ double midp_val_real= field->val_real();
+
+ res= pos_in_interval_for_double(midp_val_real, min_val_real, max_val_real);
+ }
+ return res;
+}
+
/*
@param field The table field histogram is for. We don't care about the
field's current value, we only need its virtual functions to
@@ -1559,20 +1476,9 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
// Find the leftmost bucket that contains the lookup value.
// (If the lookup value is to the left of all buckets, find bucket #0)
int idx= find_bucket(field, min_key, exclusive_endp);
- double min_sel;
- {
- std::string &left= histogram_bounds[idx];
- std::string &right= histogram_bounds[idx+1];
- if (field->pos_through_val_str())
- min_sel= pos_in_interval_through_strxfrm(
- field, (uchar*) left.data(), (uchar*) right.data(),
- (uchar*) min_key);
- else
- min_sel= pos_in_interval_through_val_real(
- field, (uchar *) left.data(), (uchar*) right.data(),
- (uchar*) min_key);
- }
-
+ double min_sel= position_in_interval(field, (const uchar*)min_key,
+ histogram_bounds[idx],
+ histogram_bounds[idx+1]);
min= idx*width + min_sel*width;
}
else
@@ -1588,21 +1494,9 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
max_key++;
int idx= find_bucket(field, max_key, inclusive_endp);
- double max_sel;
- {
- std::string &left= histogram_bounds[idx];
- std::string &right= histogram_bounds[idx+1];
-
- if (field->pos_through_val_str())
- max_sel= pos_in_interval_through_strxfrm(
- field, (uchar *) left.data(), (uchar *) right.data(),
- (uchar *) max_key);
- else
- max_sel= pos_in_interval_through_val_real(
- field, (uchar *) left.data(), (uchar *) right.data(),
- (uchar *) max_key);
- }
-
+ double max_sel= position_in_interval(field, (const uchar*)max_key,
+ histogram_bounds[idx],
+ histogram_bounds[idx+1]);
max= idx*width + max_sel*width;
}
else