revision-id: fbfc1470ee49bdccad51f0f3a5a0b1fcc88bcef8 (mariadb-10.5.2-494-gfbfc1470ee4) parent(s): af34fcbe4f5a1e20fef1aef4b404678a52e0157f author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2020-09-29 15:33:02 +0300 message: MDEV-21829: Packed Sort Keys in Unique: Address review input Make the Unique class an interface, and move the implementation into class Unique_impl. The idea is to allow seamless change of Unique class's implementation/ to something else. The construction of Unique object still exposes the implementation: the code uses "new Unique_impl(...)". It will be hidden in a factory function. --- sql/item_sum.cc | 4 ++-- sql/opt_range.cc | 20 +++++++++---------- sql/sql_delete.cc | 8 ++++---- sql/sql_statistics.cc | 4 ++-- sql/uniques.cc | 32 +++++++++++++++---------------- sql/uniques.h | 53 +++++++++++++++++++++++++++++++++++++++------------ 6 files changed, 75 insertions(+), 46 deletions(-) diff --git a/sql/item_sum.cc b/sql/item_sum.cc index d1023ba65db..0f544142bff 100644 --- a/sql/item_sum.cc +++ b/sql/item_sum.cc @@ -1178,7 +1178,7 @@ void Aggregator_distinct::endup() { DBUG_ASSERT(item_sum->fixed == 1); Item_sum_count *sum= (Item_sum_count *)item_sum; - if (tree && tree->elements == 0) + if (tree && tree->get_n_elements() == 0) { /* everything fits in memory */ sum->count= (longlong) tree->elements_in_tree(); @@ -4948,7 +4948,7 @@ Unique* Item_sum::get_unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg, if (allow_packing) return new Unique_packed(comp_func, comp_func_fixed_arg, size_arg, max_in_memory_size_arg, min_dupl_count_arg); - return new Unique(comp_func, comp_func_fixed_arg, size_arg, + return new Unique_impl(comp_func, comp_func_fixed_arg, size_arg, max_in_memory_size_arg, min_dupl_count_arg); } diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 7d65c85129b..8720e0fa7c4 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -5209,7 +5209,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, /* Add Unique operations cost */ unique_calc_buff_size= - Unique::get_cost_calc_buff_size((ulong)non_cpk_scan_records, + Unique_impl::get_cost_calc_buff_size((ulong)non_cpk_scan_records, param->table->file->ref_length, (size_t)param->thd->variables.sortbuff_size); if (param->imerge_cost_buff_size < unique_calc_buff_size) @@ -5221,7 +5221,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge, } { - const double dup_removal_cost= Unique::get_use_cost( + const double dup_removal_cost= Unique_impl::get_use_cost( param->imerge_cost_buff, (uint)non_cpk_scan_records, param->table->file->ref_length, (size_t)param->thd->variables.sortbuff_size, @@ -5831,7 +5831,7 @@ bool prepare_search_best_index_intersect(PARAM *param, return TRUE; size_t calc_cost_buff_size= - Unique::get_cost_calc_buff_size((size_t)records_in_scans, + Unique_impl::get_cost_calc_buff_size((size_t)records_in_scans, common->key_size, common->max_memory_size); if (!(common->buff_elems= (uint *) alloc_root(param->mem_root, @@ -6180,7 +6180,7 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, */ ha_rows elems_in_tree= common_info->search_scans[0]->records- common_info->search_scans[0]->filtered_out ; - next->in_memory_cost+= Unique::get_search_cost(elems_in_tree, + next->in_memory_cost+= Unique_impl::get_search_cost(elems_in_tree, common_info->compare_factor)* ext_index_scan_records; cost= next->in_memory_cost; @@ -6193,7 +6193,7 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, size_t max_memory_size= common_info->max_memory_size; records_sent_to_unique+= ext_index_scan_records; - cost= Unique::get_use_cost(buff_elems, (size_t) records_sent_to_unique, key_size, + cost= Unique_impl::get_use_cost(buff_elems, (size_t) records_sent_to_unique, key_size, max_memory_size, compare_factor, TRUE, &next->in_memory); if (records_filtered_out_by_cpk) @@ -6203,7 +6203,7 @@ bool check_index_intersect_extension(PARTIAL_INDEX_INTERSECT_INFO *curr, double cost2; bool in_memory2; ha_rows records2= records_sent_to_unique-records_filtered_out_by_cpk; - cost2= Unique::get_use_cost(buff_elems, (size_t) records2, key_size, + cost2= Unique_impl::get_use_cost(buff_elems, (size_t) records2, key_size, max_memory_size, compare_factor, TRUE, &in_memory2); cost2+= get_cpk_filter_cost(ext_index_scan_records, common_info->cpk_scan, @@ -11758,7 +11758,7 @@ int read_keys_and_merge_scans(THD *thd, DBUG_EXECUTE_IF("only_one_Unique_may_be_created", DBUG_SET("+d,index_merge_may_not_create_a_Unique"); ); - unique= new Unique(refpos_order_cmp, (void *)file, + unique= new Unique_impl(refpos_order_cmp, (void *)file, file->ref_length, (size_t)thd->variables.sortbuff_size, intersection ? quick_selects.elements : 0); @@ -11830,7 +11830,7 @@ int read_keys_and_merge_scans(THD *thd, */ head->file->ha_end_keyread(); if (init_read_record(read_record, thd, head, (SQL_SELECT*) 0, - &unique->sort, 1 , 1, TRUE)) + unique->get_sort(), 1 , 1, TRUE)) result= 1; DBUG_RETURN(result); @@ -11873,7 +11873,7 @@ int QUICK_INDEX_MERGE_SELECT::get_next() result= HA_ERR_END_OF_FILE; end_read_record(&read_record); // Free things used by sort early. Shouldn't be strictly necessary - unique->sort.reset(); + unique->get_sort()->reset(); /* All rows from Unique have been retrieved, do a clustered PK scan */ if (pk_quick_select) { @@ -11908,7 +11908,7 @@ int QUICK_INDEX_INTERSECT_SELECT::get_next() { result= HA_ERR_END_OF_FILE; end_read_record(&read_record); - unique->sort.reset(); // Free things early + unique->get_sort()->reset(); // Free things early } DBUG_RETURN(result); diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index db5b5b777a8..64cb025cce0 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -702,7 +702,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, clause. Instead of deleting the rows, first mark them deleted. */ ha_rows tmplimit=limit; - deltempfile= new (thd->mem_root) Unique (refpos_order_cmp, table->file, + deltempfile= new (thd->mem_root) Unique_impl (refpos_order_cmp, table->file, table->file->ref_length, MEM_STRIP_BUF_SIZE); @@ -727,7 +727,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, end_read_record(&info); if (unlikely(deltempfile->get(table)) || unlikely(table->file->ha_index_or_rnd_end()) || - unlikely(init_read_record(&info, thd, table, 0, &deltempfile->sort, 0, + unlikely(init_read_record(&info, thd, table, 0, deltempfile->get_sort(), 0, 1, false))) { error= 1; @@ -1269,7 +1269,7 @@ multi_delete::initialize_tables(JOIN *join) for (;walk ;walk= walk->next_local) { TABLE *table=walk->table; - *tempfiles_ptr++= new (thd->mem_root) Unique (refpos_order_cmp, table->file, + *tempfiles_ptr++= new (thd->mem_root) Unique_impl (refpos_order_cmp, table->file, table->file->ref_length, MEM_STRIP_BUF_SIZE); } @@ -1452,7 +1452,7 @@ int multi_delete::do_deletes() if (unlikely(tempfiles[counter]->get(table))) DBUG_RETURN(1); - local_error= do_table_deletes(table, &tempfiles[counter]->sort, + local_error= do_table_deletes(table, tempfiles[counter]->get_sort(), thd->lex->ignore); if (unlikely(thd->killed) && likely(!local_error)) diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc index 60e5b17fa81..2cc132dfe2b 100644 --- a/sql/sql_statistics.cc +++ b/sql/sql_statistics.cc @@ -1732,7 +1732,7 @@ class Count_distinct_field: public Sql_alloc } tree_key_length= table_field->pack_length(); - tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) table_field, + tree= new Unique_impl((qsort_cmp2) simple_str_key_cmp, (void*) table_field, tree_key_length, max_heap_table_size, 1); return tree == NULL; @@ -1858,7 +1858,7 @@ class Count_distinct_field_bit: public Count_distinct_field bool setup(THD *thd, size_t max_heap_table_size) { tree_key_length= sizeof(ulonglong); - tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp, + tree= new Unique_impl((qsort_cmp2) simple_ulonglong_key_cmp, (void*) &tree_key_length, tree_key_length, max_heap_table_size, 1); return tree == NULL; diff --git a/sql/uniques.cc b/sql/uniques.cc index 4ec032276fe..615387ddfce 100644 --- a/sql/uniques.cc +++ b/sql/uniques.cc @@ -40,25 +40,25 @@ #include "uniques.h" // Unique #include "sql_sort.h" -int unique_write_to_file(uchar* key, element_count count, Unique *unique) +int unique_write_to_file(uchar* key, element_count count, Unique_impl *unique) { return unique->write_record_to_file(key) ? 1 : 0; } -int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique) +int unique_write_to_file_with_count(uchar* key, element_count count, Unique_impl *unique) { return unique_write_to_file(key, count, unique) || my_b_write(&unique->file, (uchar*)&count, sizeof(element_count)) ? 1 : 0; } -int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique) +int unique_write_to_ptrs(uchar* key, element_count count, Unique_impl *unique) { memcpy(unique->sort.record_pointers, key, unique->size); unique->sort.record_pointers+=unique->size; return 0; } -int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *unique) +int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique_impl *unique) { if (count >= unique->min_dupl_count) { @@ -71,7 +71,7 @@ int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *uniq } -Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, +Unique_impl::Unique_impl(qsort_cmp2 comp_func, void * comp_func_fixed_arg, uint size_arg, size_t max_in_memory_size_arg, uint min_dupl_count_arg) :max_in_memory_size(max_in_memory_size_arg), @@ -306,7 +306,7 @@ static double get_merge_many_buffs_cost(uint *buffer, these will be random seeks. */ -double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size, +double Unique_impl::get_use_cost(uint *buffer, size_t nkeys, uint key_size, size_t max_in_memory_size, double compare_factor, bool intersect_fl, bool *in_memory) @@ -368,7 +368,7 @@ double Unique::get_use_cost(uint *buffer, size_t nkeys, uint key_size, return result; } -Unique::~Unique() +Unique_impl::~Unique_impl() { close_cached_file(&file); delete_tree(&tree, 0); @@ -377,7 +377,7 @@ Unique::~Unique() /* Write tree to disk; clear tree */ -bool Unique::flush() +bool Unique_impl::flush() { Merge_chunk file_ptr; elements+= tree.elements_in_tree; @@ -406,7 +406,7 @@ bool Unique::flush() */ void -Unique::reset() +Unique_impl::reset() { reset_tree(&tree); /* @@ -663,7 +663,7 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size, <> 0 error */ -bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) +bool Unique_impl::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) { int res= 0; uchar *merge_buffer; @@ -712,7 +712,7 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) TRUE. SYNOPSIS - Unique:merge() + Unique_impl::merge() All params are 'IN': table the parameter to access sort context buff merge buffer @@ -723,7 +723,7 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) <> 0 error */ -bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size, +bool Unique_impl::merge(TABLE *table, uchar *buff, size_t buff_size, bool without_last_merge) { IO_CACHE *outfile= &sort.io_cache; @@ -804,12 +804,12 @@ bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size, rows will be read in priority order. */ -bool Unique::get(TABLE *table) +bool Unique_impl::get(TABLE *table) { bool rc= 1; uchar *sort_buffer= NULL; sort.return_rows= elements+tree.elements_in_tree; - DBUG_ENTER("Unique::get"); + DBUG_ENTER("Unique_impl::get"); DBUG_ASSERT(is_packed() == FALSE); @@ -860,7 +860,7 @@ bool Unique::get(TABLE *table) Unique_packed::Unique_packed(qsort_cmp2 comp_func, void *comp_func_fixed_arg, uint size_arg, size_t max_in_memory_size_arg, uint min_dupl_count_arg): - Unique(comp_func, comp_func_fixed_arg, size_arg, + Unique_impl(comp_func, comp_func_fixed_arg, size_arg, max_in_memory_size_arg, min_dupl_count_arg) { } @@ -877,7 +877,7 @@ Unique_packed::Unique_packed(qsort_cmp2 comp_func, void *comp_func_fixed_arg, =0 Record successfully written */ -int Unique::write_record_to_file(uchar *key) +int Unique_impl::write_record_to_file(uchar *key) { return my_b_write(get_file(), key, size); } diff --git a/sql/uniques.h b/sql/uniques.h index 3d90039b597..64d22920adc 100644 --- a/sql/uniques.h +++ b/sql/uniques.h @@ -18,6 +18,33 @@ #include "filesort.h" +class Unique : public Sql_alloc { +public: + + virtual void reset() = 0; + virtual bool unique_add(void *ptr, uint size_arg) = 0; + virtual ~Unique() {}; + + virtual void close_for_expansion() = 0; + + virtual bool get(TABLE *table) = 0; + virtual bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg)= 0; + + virtual SORT_INFO *get_sort() = 0; + + virtual ulong get_n_elements() = 0; + virtual size_t get_max_in_memory_size() const = 0; + virtual bool is_in_memory() = 0; + + // This will be renamed: + virtual ulong elements_in_tree() = 0; + + // These will be removed: + virtual uint get_size() const = 0; + virtual bool is_packed() = 0; + virtual uint get_full_size() const = 0; +}; + /* Unique -- class for unique (removing of duplicates). Puts all values to the TREE. If the tree becomes too big, @@ -26,8 +53,7 @@ memory simultaneously with iteration, so it should be ~2-3x faster. */ -class Unique :public Sql_alloc -{ +class Unique_impl : public Unique { DYNAMIC_ARRAY file_ptrs; ulong max_elements; /* Total number of elements that will be stored in-memory */ size_t max_in_memory_size; @@ -69,11 +95,14 @@ class Unique :public Sql_alloc public: ulong elements; + ulong get_n_elements() override { return elements; } SORT_INFO sort; - Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg, + SORT_INFO *get_sort() override { return &sort; } + + Unique_impl(qsort_cmp2 comp_func, void *comp_func_fixed_arg, uint size_arg, size_t max_in_memory_size_arg, uint min_dupl_count_arg= 0); - virtual ~Unique(); + virtual ~Unique_impl(); ulong elements_in_tree() { return tree.elements_in_tree; } /* @@ -84,7 +113,7 @@ class Unique :public Sql_alloc size length of the key */ - inline bool unique_add(void *ptr, uint size_arg) + bool unique_add(void *ptr, uint size_arg) override { DBUG_ENTER("unique_add"); DBUG_PRINT("info", ("tree %u - %lu", tree.elements_in_tree, max_elements)); @@ -132,10 +161,10 @@ class Unique :public Sql_alloc return (int) (sizeof(uint)*(1 + nkeys/max_elems_in_tree)); } - void reset(); + void reset() override; bool walk(TABLE *table, tree_walk_action action, void *walk_action_arg); - uint get_size() const { return size; } + uint get_size() const override { return size; } uint get_full_size() const { return full_size; } size_t get_max_in_memory_size() const { return max_in_memory_size; } bool is_count_stored() { return with_counters; } @@ -145,13 +174,13 @@ class Unique :public Sql_alloc // returns TRUE if the unique tree stores packed values virtual bool is_packed() { return false; } - friend int unique_write_to_file(uchar* key, element_count count, Unique *unique); - friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique); + friend int unique_write_to_file(uchar* key, element_count count, Unique_impl *unique); + friend int unique_write_to_ptrs(uchar* key, element_count count, Unique_impl *unique); friend int unique_write_to_file_with_count(uchar* key, element_count count, - Unique *unique); + Unique_impl *unique); friend int unique_intersect_write_to_ptrs(uchar* key, element_count count, - Unique *unique); + Unique_impl *unique); }; @@ -162,7 +191,7 @@ class Unique :public Sql_alloc inside the tree. */ -class Unique_packed : public Unique +class Unique_packed : public Unique_impl { protected: public: