[Commits] 46dc54e: MDEV-16188: Added many comments
revision-id: 46dc54ef035dec65ac3e996e039d622aa451013a (mariadb-10.3.6-104-g46dc54e) parent(s): 322f91664bac5692e86e986778bcd7adfb743ede author: Igor Babaev committer: Igor Babaev timestamp: 2019-02-02 00:47:22 -0800 message: MDEV-16188: Added many comments Performed some cleanup as well. --- mysql-test/main/func_group.test | 2 +- mysql-test/main/range.result | 16 +-- mysql-test/main/range.test | 16 +-- mysql-test/main/range_mrr_icp.result | 16 +-- sql/handler.cc | 10 +- sql/handler.h | 2 + sql/multi_range_read.cc | 18 +++ sql/rowid_filter.cc | 216 ++++++++++++++++++++++++++++++++-- sql/rowid_filter.h | 190 +++++++++++++++++++++++++----- sql/sql_explain.cc | 12 +- sql/sql_explain.h | 8 +- sql/sql_select.cc | 100 ++++++++++------ sql/sql_select.h | 12 +- sql/structs.h | 4 + sql/table.cc | 1 + storage/innobase/handler/ha_innodb.cc | 8 +- storage/myisam/mi_range.c | 16 ++- storage/myisam/mi_rprev.c | 2 +- storage/myisam/myisamdef.h | 2 +- 19 files changed, 535 insertions(+), 116 deletions(-) diff --git a/mysql-test/main/func_group.test b/mysql-test/main/func_group.test index aa02f9f..bc2d6e9 100644 --- a/mysql-test/main/func_group.test +++ b/mysql-test/main/func_group.test @@ -850,7 +850,7 @@ SELECT MIN(a), MIN(b) FROM t1; CREATE TABLE t2( a INT, b INT, c INT, KEY(a, b) ); INSERT INTO t2 ( a, b, c ) VALUES - ( 1, NULL, 2 ), ( 1, 3, 4 ), ( 1, 4, 4 ), ( 2, NULL, 2 ), ( 2, 3, 4 ), ( 2, 4, 4 ); + ( 1, NULL, 2 ), ( 1, 3, 4 ), ( 1, 4, 4 ), ( 2, NULL, 2 ), ( 2, 3, 4 ), ( 2, 4, 4 ); EXPLAIN SELECT MIN(b), MIN(c) FROM t2 WHERE a = 1; SELECT MIN(b), MIN(c) FROM t2 WHERE a = 1; diff --git a/mysql-test/main/range.result b/mysql-test/main/range.result index 4d0997d..83237a9 100644 --- a/mysql-test/main/range.result +++ b/mysql-test/main/range.result @@ -1266,7 +1266,7 @@ INSERT INTO t3 VALUES INSERT INTO t3 SELECT * FROM t3 WHERE a = 10; INSERT INTO t3 SELECT * FROM t3 WHERE a = 10; SELECT * FROM t1 WHERE -23 <= a AND a < 25 OR +23 <= a AND a < 25 OR 25 < a AND b = 23 OR 23 <= a; a b @@ -1274,13 +1274,13 @@ a b 29 27 EXPLAIN SELECT * FROM t1 WHERE -23 <= a AND a < 25 OR +23 <= a AND a < 25 OR 25 < a AND b = 23 OR 23 <= a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range a a 5 NULL 2 Using where; Using index SELECT * FROM t1 WHERE -23 <= a AND a < 25 OR +23 <= a AND a < 25 OR 25 <= a AND b = 23 OR 23 <= a; a b @@ -1288,13 +1288,13 @@ a b 29 27 EXPLAIN SELECT * FROM t1 WHERE -23 <= a AND a < 25 OR +23 <= a AND a < 25 OR 25 <= a AND b = 23 OR 23 <= a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range a a 5 NULL 3 Using where; Using index SELECT * FROM t1 WHERE -23 <= a AND a <= 25 OR +23 <= a AND a <= 25 OR 25 <= a AND b = 23 OR 23 <= a; a b @@ -1302,20 +1302,20 @@ a b 29 27 EXPLAIN SELECT * FROM t1 WHERE -23 <= a AND a <= 25 OR +23 <= a AND a <= 25 OR 25 <= a AND b = 23 OR 23 <= a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range a a 5 NULL 2 Using where; Using index SELECT * FROM t1 WHERE -23 <= a AND a <= 25 OR +23 <= a AND a <= 25 OR 23 <= a; a b 25 20 29 27 EXPLAIN SELECT * FROM t1 WHERE -23 <= a AND a <= 25 OR +23 <= a AND a <= 25 OR 23 <= a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range a a 5 NULL 2 Using where; Using index diff --git a/mysql-test/main/range.test b/mysql-test/main/range.test index c185e7c..9edb3f3 100644 --- a/mysql-test/main/range.test +++ b/mysql-test/main/range.test @@ -1083,46 +1083,46 @@ INSERT INTO t3 SELECT * FROM t3 WHERE a = 10; # With one exception, they are independent of Problem#2. # SELECT * FROM t1 WHERE -23 <= a AND a < 25 OR +23 <= a AND a < 25 OR 25 < a AND b = 23 OR 23 <= a; EXPLAIN SELECT * FROM t1 WHERE -23 <= a AND a < 25 OR +23 <= a AND a < 25 OR 25 < a AND b = 23 OR 23 <= a; # Query below: Tests both Problem#1 and Problem#2 (EXPLAIN differs as well) SELECT * FROM t1 WHERE -23 <= a AND a < 25 OR +23 <= a AND a < 25 OR 25 <= a AND b = 23 OR 23 <= a; EXPLAIN SELECT * FROM t1 WHERE -23 <= a AND a < 25 OR +23 <= a AND a < 25 OR 25 <= a AND b = 23 OR 23 <= a; SELECT * FROM t1 WHERE -23 <= a AND a <= 25 OR +23 <= a AND a <= 25 OR 25 <= a AND b = 23 OR 23 <= a; EXPLAIN SELECT * FROM t1 WHERE -23 <= a AND a <= 25 OR +23 <= a AND a <= 25 OR 25 <= a AND b = 23 OR 23 <= a; SELECT * FROM t1 WHERE -23 <= a AND a <= 25 OR +23 <= a AND a <= 25 OR 23 <= a; EXPLAIN SELECT * FROM t1 WHERE -23 <= a AND a <= 25 OR +23 <= a AND a <= 25 OR 23 <= a; # diff --git a/mysql-test/main/range_mrr_icp.result b/mysql-test/main/range_mrr_icp.result index 91fd84a..a4a64dd 100644 --- a/mysql-test/main/range_mrr_icp.result +++ b/mysql-test/main/range_mrr_icp.result @@ -1269,7 +1269,7 @@ INSERT INTO t3 VALUES INSERT INTO t3 SELECT * FROM t3 WHERE a = 10; INSERT INTO t3 SELECT * FROM t3 WHERE a = 10; SELECT * FROM t1 WHERE -23 <= a AND a < 25 OR +23 <= a AND a < 25 OR 25 < a AND b = 23 OR 23 <= a; a b @@ -1277,13 +1277,13 @@ a b 29 27 EXPLAIN SELECT * FROM t1 WHERE -23 <= a AND a < 25 OR +23 <= a AND a < 25 OR 25 < a AND b = 23 OR 23 <= a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range a a 5 NULL 2 Using where; Using index SELECT * FROM t1 WHERE -23 <= a AND a < 25 OR +23 <= a AND a < 25 OR 25 <= a AND b = 23 OR 23 <= a; a b @@ -1291,13 +1291,13 @@ a b 29 27 EXPLAIN SELECT * FROM t1 WHERE -23 <= a AND a < 25 OR +23 <= a AND a < 25 OR 25 <= a AND b = 23 OR 23 <= a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range a a 5 NULL 3 Using where; Using index SELECT * FROM t1 WHERE -23 <= a AND a <= 25 OR +23 <= a AND a <= 25 OR 25 <= a AND b = 23 OR 23 <= a; a b @@ -1305,20 +1305,20 @@ a b 29 27 EXPLAIN SELECT * FROM t1 WHERE -23 <= a AND a <= 25 OR +23 <= a AND a <= 25 OR 25 <= a AND b = 23 OR 23 <= a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range a a 5 NULL 2 Using where; Using index SELECT * FROM t1 WHERE -23 <= a AND a <= 25 OR +23 <= a AND a <= 25 OR 23 <= a; a b 25 20 29 27 EXPLAIN SELECT * FROM t1 WHERE -23 <= a AND a <= 25 OR +23 <= a AND a <= 25 OR 23 <= a; id select_type table type possible_keys key key_len ref rows Extra 1 SIMPLE t1 range a a 5 NULL 2 Using where; Using index diff --git a/sql/handler.cc b/sql/handler.cc index bf0afec..30ce3f6 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -5760,8 +5760,9 @@ extern "C" enum icp_result handler_index_cond_check(void* h_arg) /** Rowid filter callback - to be called by an engine to check rowid / primary - keys of the rows whose data is to be fetched against the set rowid filter + keys of the rows whose data is to be fetched against the used rowid filter */ + extern "C" int handler_rowid_filter_check(void *h_arg) { handler *h= (handler*) h_arg; @@ -5770,6 +5771,12 @@ extern "C" int handler_rowid_filter_check(void *h_arg) return h->pushed_rowid_filter->check((char *) h->ref); } + +/** + Callback function for an engine to check whether the used rowid filter + has been already built +*/ + extern "C" int handler_rowid_filter_is_active(void *h_arg) { if (!h_arg) @@ -5778,6 +5785,7 @@ extern "C" int handler_rowid_filter_is_active(void *h_arg) return h->rowid_filter_is_active; } + int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key, key_part_map keypart_map, enum ha_rkey_function find_flag) diff --git a/sql/handler.h b/sql/handler.h index 8869d3d..ee730e8 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -2991,7 +2991,9 @@ class handler :public Sql_alloc Item *pushed_idx_cond; uint pushed_idx_cond_keyno; /* The index which the above condition is for */ + /* Rowid filter pushed into the engine */ Rowid_filter *pushed_rowid_filter; + /* true when the pushed rowid filter has been already filled */ bool rowid_filter_is_active; Discrete_interval auto_inc_interval_for_cur_row; diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc index 8221f5c..cab278f 100644 --- a/sql/multi_range_read.cc +++ b/sql/multi_range_read.cc @@ -53,6 +53,24 @@ static ulonglong key_block_no(handler *h, uint keyno, uint keyentry_pos) for a user to be able to interrupt the calculation by killing the connection/query. + @note + Starting from 10.4 the implementation of this method tries to take into + account gaps between range intervals. Before this we had such paradoxical + cases when, for example, the cost of the index scan by range [1..3] was + almost twice as less than the cost of of the index scan by two intervals + [1..1] and [3..3]. + + @note + The current implementation of the method is not efficient for it + requires extra dives for gaps. Although these dives are not expensive + as they touch the index nodes that with very high probability are in + cache this is still not good. We could avoid it if records in range + also returned positions of the ends of range intervals. It's not + hard to implement it now for MyISAM as this engine provides a function + returning an approximation of the relative position of a key tuple + among other index key tuples. Unfortunately InnoDB now does not provide + anything like this function. + @retval HA_POS_ERROR Error or the engine is unable to perform the requested scan. Values of OUT parameters are undefined. diff --git a/sql/rowid_filter.cc b/sql/rowid_filter.cc index 6b357e1..4348eea 100644 --- a/sql/rowid_filter.cc +++ b/sql/rowid_filter.cc @@ -1,3 +1,19 @@ +/* + Copyright (c) 2018, 2019 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + #include "mariadb.h" #include "table.h" #include "sql_class.h" @@ -5,6 +21,7 @@ #include "rowid_filter.h" #include "sql_select.h" + inline double Range_rowid_filter_cost_info::lookup_cost( Rowid_filter_container_type cont_type) @@ -19,6 +36,11 @@ double Range_rowid_filter_cost_info::lookup_cost( } +/** + @brief + The average gain in cost per row to use the range filter with this cost info +*/ + inline double Range_rowid_filter_cost_info::avg_access_and_eval_gain_per_row( Rowid_filter_container_type cont_type) @@ -28,9 +50,12 @@ double Range_rowid_filter_cost_info::avg_access_and_eval_gain_per_row( } /** - Sets information about filter with key_numb index. - It sets a cardinality of filter, calculates its selectivity - and gets slope and interscept values. + @brief + Initialize the cost info structure for a range filter + + @param cont_type The type of the container of the range filter + @param tab The table for which the range filter is evaluated + @param idx The index used to create this range filter */ void Range_rowid_filter_cost_info::init(Rowid_filter_container_type cont_type, @@ -48,6 +73,12 @@ void Range_rowid_filter_cost_info::init(Rowid_filter_container_type cont_type, abs_independent.clear_all(); } + +/** + @brief + Return the cost of building a range filter of a certain type +*/ + double Range_rowid_filter_cost_info::build_cost(Rowid_filter_container_type cont_type) { @@ -68,7 +99,7 @@ Range_rowid_filter_cost_info::build_cost(Rowid_filter_container_type cont_type) return cost; } - + Rowid_filter_container *Range_rowid_filter_cost_info::create_container() { THD *thd= table->in_use; @@ -85,25 +116,41 @@ Rowid_filter_container *Range_rowid_filter_cost_info::create_container() return res; } + static int compare_range_rowid_filter_cost_info_by_a( Range_rowid_filter_cost_info **filter_ptr_1, Range_rowid_filter_cost_info **filter_ptr_2) { - double diff= (*filter_ptr_2)->a - (*filter_ptr_1)->a; + double diff= (*filter_ptr_2)->get_a() - (*filter_ptr_1)->get_a(); return (diff < 0 ? -1 : (diff > 0 ? 1 : 0)); } + /** @brief + Prepare the array with cost info on range filters to be used by optimizer @details + The function removes the array of cost info on range filters the elements + for those range filters that won't be ever chosen as the best filter, no + matter what index will be used to access the table and at what step the + table will be joined. */ void TABLE::prune_range_rowid_filters() { uint i, j; + /* + For the elements of the array with cost info on range filters + build a bit matrix of absolutely independent elements. + Two elements are absolutely independent if they such indexes that + there is no other index that overlaps both of them or is constraint + correlated with both of them. Use abs_independent key maps to store + the elements if this bit matrix. + */ + Range_rowid_filter_cost_info **filter_ptr_1= range_rowid_filter_cost_info_ptr; for (i= 0; i < range_rowid_filter_cost_info_elems; i++, filter_ptr_1++) { @@ -121,12 +168,20 @@ void TABLE::prune_range_rowid_filters() } } - /* Sort the array range_filter_cost_info by 'a' */ + /* Sort the array range_filter_cost_info by 'a' in descending order */ my_qsort(range_rowid_filter_cost_info_ptr, range_rowid_filter_cost_info_elems, sizeof(Range_rowid_filter_cost_info *), (qsort_cmp) compare_range_rowid_filter_cost_info_by_a); + /* + For each element check whether it is created for the filter that + can be ever chosen as the best one. If it's not the case remove + from the array. Otherwise put it in the array in such a place + that all already checked elements left the array are ordered by + cross_x. + */ + Range_rowid_filter_cost_info **cand_filter_ptr= range_rowid_filter_cost_info_ptr; for (i= 0; i < range_rowid_filter_cost_info_elems; i++, cand_filter_ptr++) @@ -142,6 +197,20 @@ void TABLE::prune_range_rowid_filters() { if (abs_indep.is_set((*usable_filter_ptr)->key_no)) { + /* + The following is true here for the element e being checked: + There are at 2 elements e1 and e2 among already selected such that + e1.cross_x < e.cross_x and e1.a > e.a + and + e2.cross_x < e_cross_x and e2.a > e.a, + i.e. the range filters f1, f2 of both e1 and e2 always promise + better gains then the range filter of e. + As e1 and e2 are absolutely independent one of the range filters + f1, f2 will be always a better choice than f no matter what index + is chosen to access the table. Because of this the element e + can be safely removed from the array. + */ + is_pruned= true; break; } @@ -149,6 +218,10 @@ void TABLE::prune_range_rowid_filters() } else { + /* + Move the element being checked to the proper position to have all + elements that have been already checked to be sorted by cross_x + */ Range_rowid_filter_cost_info *moved= *cand_filter_ptr; memmove(usable_filter_ptr+1, usable_filter_ptr, sizeof(Range_rowid_filter_cost_info *) * (i-j-1)); @@ -157,6 +230,7 @@ void TABLE::prune_range_rowid_filters() } if (is_pruned) { + /* Remove the checked element from the array */ memmove(cand_filter_ptr, cand_filter_ptr+1, sizeof(Range_rowid_filter_cost_info *) * (range_rowid_filter_cost_info_elems - 1 - i)); @@ -166,6 +240,11 @@ void TABLE::prune_range_rowid_filters() } +/** + @brief + Return maximum number of elements that a container allowed to have + */ + static uint get_max_range_rowid_filter_elems_for_table( THD *thd, TABLE *tab, @@ -180,25 +259,59 @@ get_max_range_rowid_filter_elems_for_table( } } + +/** + @brief + Prepare info on possible range filters used by optimizer + + @param table The thread handler + + @details + The function first selects the indexes of the table that potentially + can be used for range filters and allocates an array of the objects + of the Range_rowid_filter_cost_info type to store cost info on + possible range filters and an array of pointers to these objects. + The latter is created for easy sorting of the objects with cost info + by different sort criteria. Then the function initializes the allocated + array with cost info for each possible range filter. After this + the function calls the method TABLE::prune_range_rowid_filters(). + The method removes the elements of the array for the filters that + promise less gain then others remaining in the array in any situation + and optimizes the order of the elements for faster choice of the best + range filter. +*/ + void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd) { uint key_no; key_map usable_range_filter_keys; usable_range_filter_keys.clear_all(); key_map::Iterator it(quick_keys); + + /* + From all indexes that can be used for range accesses select only such that + - range filter pushdown is supported by the engine for them (1) + - they are not clustered primary (2) + - the range filter containers for them are not too large (3) + */ while ((key_no= it++) != key_map::Iterator::BITMAP_END) { - if (!(file->index_flags(key_no, 0, 1) & HA_DO_RANGE_FILTER_PUSHDOWN)) + if (!(file->index_flags(key_no, 0, 1) & HA_DO_RANGE_FILTER_PUSHDOWN)) // !1 continue; - if (key_no == s->primary_key && file->primary_key_is_clustered()) + if (key_no == s->primary_key && file->primary_key_is_clustered()) // !2 continue; if (quick_rows[key_no] > get_max_range_rowid_filter_elems_for_table(thd, this, - SORTED_ARRAY_CONTAINER)) + SORTED_ARRAY_CONTAINER)) // !3 continue; usable_range_filter_keys.set_bit(key_no); } + /* + Allocate an array of objects to store cost info for the selected filters + and allocate an array of pointers to these objects + */ + range_rowid_filter_cost_info_elems= usable_range_filter_keys.bits_set(); if (!range_rowid_filter_cost_info_elems) return; @@ -216,6 +329,8 @@ void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd) return; } + /* Fill the allocated array with cost info on the selected range filters */ + Range_rowid_filter_cost_info **curr_ptr= range_rowid_filter_cost_info_ptr; Range_rowid_filter_cost_info *curr_filter_cost_info= range_rowid_filter_cost_info; @@ -228,10 +343,31 @@ void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd) curr_ptr++; curr_filter_cost_info++; } + prune_range_rowid_filters(); } +/** + @brief + Choose the best range filter for the given access of the table + + @param access_key_no The index by which the table is accessed + @param records The estimated total number of key tuples with this access + + @details + The function looks through the array of cost info for range filters + and chooses the element for the range filter that promise the greatest + gain with the the ref or range access of the table by access_key_no. + As the array is sorted by cross_x in ascending order the function stops + the look through as soon as it reaches the first element with + cross_x > records because the range filter for this element and the + range filters for all remaining elements do not promise positive gains + + @retval Pointer to the cost info for the range filter that promises + the greatest gain, NULL if there is no such range filter +*/ + Range_rowid_filter_cost_info * TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no, double records) @@ -240,6 +376,13 @@ TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no, covering_keys.is_set(access_key_no)) return 0; + /* + Currently we do not support usage of range filters if the table + is accessed by the clustered primary key. It does not make sense + if a full key is used. If the table is accessed by a partial + clustered primary key it would, but the current InnoDB code does not + allow it. Later this limitation will be lifted + */ if (access_key_no == s->primary_key && file->primary_key_is_clustered()) return 0; @@ -251,11 +394,21 @@ TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no, { double curr_gain = 0; Range_rowid_filter_cost_info *filter= range_rowid_filter_cost_info_ptr[i]; + + /* + Do not use a range filter that uses an in index correlated with + the index by which the table is accessed + */ if ((filter->key_no == access_key_no) || overlapped->is_set(filter->key_no)) continue; + if (records < filter->cross_x) + { + /* Does not make sense to look through the remaining filters */ break; + } + curr_gain= filter->get_gain(records); if (best_filter_gain < curr_gain) { @@ -267,12 +420,36 @@ TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no, } +/** + @brief + Fill the range rowid filter performing the associated range index scan + + @details + This function performs the range index scan associated with this + range filter and place into the filter the rowids / primary keys + read from key tuples when doing this scan. + @retval + false on success + true otherwise + + @note + The function assumes that the quick select object to perform + the index range scan has been already created. + + @note + Currently the same table handler is used to access the joined table + and to perform range index scan filling the filter. + In the future two different handlers will be used for this + purposes to facilitate a lazy building of the filter. +*/ + bool Range_rowid_filter::fill() { int rc= 0; handler *file= table->file; THD *thd= table->in_use; QUICK_RANGE_SELECT* quick= (QUICK_RANGE_SELECT*) select->quick; + uint table_status_save= table->status; Item *pushed_idx_cond_save= file->pushed_idx_cond; uint pushed_idx_cond_keyno_save= file->pushed_idx_cond_keyno; @@ -283,7 +460,7 @@ bool Range_rowid_filter::fill() file->pushed_idx_cond_keyno= MAX_KEY; file->in_range_check_pushed_down= false; - /* We're going to just read rowids. */ + /* We're going to just read rowids / primary keys */ table->prepare_for_position(); table->file->ha_start_keyread(quick->index); @@ -306,10 +483,12 @@ bool Range_rowid_filter::fill() quick->range_end(); table->file->ha_end_keyread(); + table->status= table_status_save; file->pushed_idx_cond= pushed_idx_cond_save; file->pushed_idx_cond_keyno= pushed_idx_cond_keyno_save; file->in_range_check_pushed_down= in_range_check_pushed_down_save; + if (rc != HA_ERR_END_OF_FILE) return 1; table->file->rowid_filter_is_active= true; @@ -317,6 +496,23 @@ bool Range_rowid_filter::fill() } +/** + @brief + Binary search in the sorted array of a rowid filter + + @param ctxt context of the search + @parab elem rowid / primary key to look for + + @details + The function looks for the rowid / primary key ' elem' in this container + assuming that ctxt contains a pointer to the TABLE structure created + for the table to whose row elem refers to. + + @retval + true elem is found in the container + false otherwise +*/ + bool Rowid_filter_sorted_array::check(void *ctxt, char *elem) { TABLE *table= (TABLE *) ctxt; diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h index 3578866..a90201d 100644 --- a/sql/rowid_filter.h +++ b/sql/rowid_filter.h @@ -1,3 +1,19 @@ +/* + Copyright (c) 2018, 2019 MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + #ifndef ROWID_FILTER_INCLUDED #define ROWID_FILTER_INCLUDED @@ -5,12 +21,11 @@ #include "mariadb.h" #include "sql_array.h" -/** - @class Rowid_filter +/* What rowid / primary filters are -------------------------------- - + Consider a join query Q of the form SELECT * FROM T1, ... , Tk WHERE P. @@ -22,23 +37,23 @@ When pk-filters are useful -------------------------- - + If building a pk-filter F for Ti(Q )is not too costly and its cardinality #F is much less than the cardinality of T - #T then using the pk-filter when executing Q might be quite beneficial. Let r be a random row from Ti. Let s(F) be the probability that pk(r) - belongs to F. Let BC(F) be the cost of building F. + belongs to F. Let BC(F) be the cost of building F. Suppose that the optimizer has chosen for Q a plan with this join order T1 => ... Tk and that the table Ti is accessed by a ref access using index I. Let K = {k1,...,kM} be the set of all rowid/primary keys values used to access - rows of Ti when looking for matches in this table.to join Ti by index I. - + rows of Ti when looking for matches in this table.to join Ti by index I. + Let's assume that two set sets K and F are uncorrelated. With this assumption if before accessing data from Ti by the rowid / primary key k we first check whether k is in F then we can expect saving on M*(1-s(S)) accesses of - data rows from Ti. If we can guarantee that test whether k is in F is + data rows from Ti. If we can guarantee that test whether k is in F is relatively cheap then we can gain a lot assuming that BC(F) is much less then the cost of fetching M*(1-s(S)) records from Ti and following evaluation of conditions pushed into Ti. @@ -54,7 +69,8 @@ then at least space for each primary key from the filter must be allocated in the search structure. On a the opposite a bloom filter requires a fixed number of bits and this number does not depend on the cardinality - of the pk-filter (10 bits per element will serve pk-filter of any size). + of the pk-filter (10 bits per element will serve pk-filter of any size). + */ class TABLE; @@ -75,26 +91,75 @@ typedef enum BLOOM_FILTER_CONTAINER } Rowid_filter_container_type; +/** + @class Rowid_filter_container + + The interface for different types of containers to store info on the set + of rowids / primary keys that defines a pk-filter. + + There will be two implementations of this abstract class. + - sorted array + - bloom filter +*/ + class Rowid_filter_container : public Sql_alloc { public: + virtual Rowid_filter_container_type get_type() = 0; + + /* Allocate memory for the container */ virtual bool alloc() = 0; + + /* + @brief Add info on a rowid / primary to the container + @param ctxt The context info (opaque) + @param elem The rowid / primary key to be added to the container + @retval true if elem is successfully added + */ virtual bool add(void *ctxt, char *elem) = 0; + + /* + @brief Check whether a rowid / primary key is in container + @param ctxt The context info (opaque) + @param elem The rowid / primary key to be checked against the container + @retval False if elem is definitely not in the container + */ virtual bool check(void *ctxt, char *elem) = 0; + virtual ~Rowid_filter_container() {} -}; +}; + + +/** + @class Rowid_filter + The interface for different types of pk-filters + + Currently we support only range pk filters. +*/ class Rowid_filter : public Sql_alloc { protected: + + /* The container to store info the set of elements in the filter */ Rowid_filter_container *container; + public: Rowid_filter(Rowid_filter_container *container_arg) : container(container_arg) {} - + + /* + Build the filter : + fill it with info on the set of elements placed there + */ virtual bool build() = 0; + + /* + Check whether an element is in the filter. + Returns false is the elements is definitely not in the filter. + */ virtual bool check(char *elem) = 0; virtual ~Rowid_filter() {} @@ -103,10 +168,20 @@ class Rowid_filter : public Sql_alloc }; +/** + @class Rowid_filter_container + + The implementation of the Rowid_interface used for pk-filters + that are filled when performing range index scans. +*/ + class Range_rowid_filter: public Rowid_filter { + /* The table for which the rowid filter is built */ TABLE *table; + /* The select to perform the range scan to fill the filter */ SQL_SELECT *select; + /* The cost info on the filter (used for EXPLAIN/ANALYZE) */ Range_rowid_filter_cost_info *cost_info; public: @@ -123,21 +198,34 @@ class Range_rowid_filter: public Rowid_filter bool check(char *elem) { return container->check(table, elem); } - bool fill(); + bool fill(); SQL_SELECT *get_select() { return select; } }; +/** + @class Refpos_container_sorted_array + + The wrapper class over Dynamic_array<char> to facilitate operations over + array of elements of the type char[N] where N is the same for all elements +*/ + class Refpos_container_sorted_array : public Sql_alloc { + /* + Maximum number of elements in the array + (Now is used only at the initialization of the dynamic array) + */ uint max_elements; + /* Number of bytes allocated for an element */ uint elem_size; + /* The dynamic array over which the wrapper is built */ Dynamic_array<char> *array; public: - Refpos_container_sorted_array(uint max_elems, uint elem_sz) + Refpos_container_sorted_array(uint max_elems, uint elem_sz) : max_elements(max_elems), elem_size(elem_sz), array(0) {} ~Refpos_container_sorted_array() @@ -149,7 +237,7 @@ class Refpos_container_sorted_array : public Sql_alloc bool alloc() { array= new Dynamic_array<char> (elem_size * max_elements, - elem_size * max_elements/8 + 1); + elem_size * max_elements/sizeof(char) + 1); return array == NULL; } @@ -178,11 +266,21 @@ class Refpos_container_sorted_array : public Sql_alloc } }; + +/** + @class Rowid_filter_sorted_array + + The implementation of the Rowid_filter_container interface as + a sorted array container of rowids / primary keys +*/ + class Rowid_filter_sorted_array: public Rowid_filter_container { + /* The dynamic array to store rowids / primary keys */ Refpos_container_sorted_array refpos_container; + /* Initially false, becomes true after the first call of (check() */ bool is_checked; - + public: Rowid_filter_sorted_array(uint elems, uint elem_size) : refpos_container(elems, elem_size), is_checked(false) {} @@ -197,28 +295,44 @@ class Rowid_filter_sorted_array: public Rowid_filter_container bool check(void *ctxt, char *elem); }; +/** + @class Range_rowid_filter_cost_info + + An objects of this class is created for each potentially usable + range filter. It contains the info that allows to figure out + whether usage of the range filter promises some gain. +*/ class Range_rowid_filter_cost_info : public Sql_alloc { -public: - Rowid_filter_container_type container_type; + /* The table for which the range filter is to be built (if needed) */ TABLE *table; - uint key_no; + /* Estimated number of elements in the filter */ double est_elements; - double b; // intercept of the linear function - double a; // slope of the linear function - double selectivity; + /* The cost of building the range filter */ + double b; + /* + a*N-b yields the gain of the filter + for N key tuples of the index key_no + */ + double a; + /* The value of N where the gain is 0 */ double cross_x; + /* Used for pruning of the potential range filters */ key_map abs_independent; - /** - Filter cost functions - */ +public: + /* The type of the container of the range filter */ + Rowid_filter_container_type container_type; + /* The index whose range scan would be used to build the range filter */ + uint key_no; + /* The selectivity of the range filter */ + double selectivity; Range_rowid_filter_cost_info() : table(0), key_no(0) {} void init(Rowid_filter_container_type cont_type, - TABLE *tab, uint key_numb); + TABLE *tab, uint key_no); double build_cost(Rowid_filter_container_type container_type); @@ -227,20 +341,27 @@ class Range_rowid_filter_cost_info : public Sql_alloc inline double avg_access_and_eval_gain_per_row(Rowid_filter_container_type cont_type); - /** - Get the gain that usage of filter promises for 'rows' key entries - */ + /* Get the gain that usage of filter promises for 'rows' key tuples */ inline double get_gain(double rows) { return rows * a - b; } + /* + The gain promised by usage of the filter at the assumption that + when the table is accessed without the filter at most worst_seeks + pages are fetched from disk to access the data rows of the table + */ inline double get_adjusted_gain(double rows, double worst_seeks) { return get_gain(rows) - (1 - selectivity) * (rows - MY_MIN(rows, worst_seeks)); } + /* + The gain promised by usage of the filter + due to less condition evaluations + */ inline double get_cmp_gain(double rows) { return rows * (1 - selectivity) / TIME_FOR_COMPARE; @@ -248,7 +369,18 @@ class Range_rowid_filter_cost_info : public Sql_alloc Rowid_filter_container *create_container(); -}; + double get_a() { return a; } + + friend + void TABLE::prune_range_rowid_filters(); + friend + void TABLE::init_cost_info_for_usable_range_rowid_filters(THD *thd); + + friend + Range_rowid_filter_cost_info * + TABLE::best_range_rowid_filter_for_partial_join(uint access_key_no, + double records); +}; #endif /* ROWID_FILTER_INCLUDED */ diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc index f7262e2..c7c6f15 100644 --- a/sql/sql_explain.cc +++ b/sql/sql_explain.cc @@ -383,7 +383,7 @@ int print_explain_row(select_result_sink *result, StringBuffer<64> rows_str; if (rows) { - rows_str.append_ulonglong((ulonglong)(*rows)); + rows_str.append_ulonglong((ulonglong)(*rows)); item_list.push_back(new (mem_root) Item_string_sys(thd, rows_str.ptr(), rows_str.length()), mem_root); @@ -1117,7 +1117,7 @@ void Explain_table_access::fill_key_str(String *key_str, bool is_json) const - for hash join, it is key_len:pseudo_key_len - [tabular form only] rowid filter length is added after "|". - In JSON, we consider this column to be legacy, it is superceded by + In JSON, we consider this column to be legacy, it is superceded by used_key_parts. */ @@ -1251,9 +1251,9 @@ int Explain_table_access::print_explain(select_result_sink *output, uint8 explai join_type_buf.append(join_type_str[type]); join_type_buf.append("|filter"); item_list.push_back(new (mem_root) - Item_string_sys(thd, join_type_buf.ptr(), - join_type_buf.length()), - mem_root); + Item_string_sys(thd, join_type_buf.ptr(), + join_type_buf.length()), + mem_root); } /* `possible_keys` column */ @@ -1595,7 +1595,7 @@ void add_json_keyset(Json_writer *writer, const char *elem_name, } -void Explain_rowid_filter::print_explain_json(Explain_query *query, +void Explain_rowid_filter::print_explain_json(Explain_query *query, Json_writer *writer, bool is_analyze) { diff --git a/sql/sql_explain.h b/sql/sql_explain.h index 89fbd0f..a161f6c 100644 --- a/sql/sql_explain.h +++ b/sql/sql_explain.h @@ -612,12 +612,12 @@ class Explain_index_use : public Sql_alloc /* Query Plan data structure for Rowid filter. */ -class Explain_rowid_filter : public Sql_alloc +class Explain_rowid_filter : public Sql_alloc { public: /* Quick select used to collect the rowids into filter */ Explain_quick_select *quick; - + /* How many rows the above quick select is expected to return */ ha_rows rows; @@ -626,12 +626,12 @@ class Explain_rowid_filter : public Sql_alloc void print_explain_json(Explain_query *query, Json_writer *writer, bool is_analyze); - + /* TODO: Here should be ANALYZE members: - r_rows for the quick select - - An object that tracked the table access time + - An object that tracked the table access time - real selectivity of the filter. */ }; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 531fe5f..b5b77c2 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -1465,6 +1465,22 @@ int JOIN::optimize() } +/** + @brief + Create range filters objects needed in execution for all join tables + + @details + For each join table from the chosen execution plan such that a range filter + is used when joining this table the function creates a Rowid_filter object + for this range filter. In order to do this the function first constructs + a quick select to scan the range for this range filter. Then it creates + a container for the range filter and finally constructs a Range_rowid_filter + object a pointer to which is set in the field JOIN_TAB::rowid_filter of + the joined table. + + @retval false always +*/ + bool JOIN::make_range_rowid_filters() { DBUG_ENTER("make_range_rowid_filters"); @@ -1475,46 +1491,64 @@ bool JOIN::make_range_rowid_filters() tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS)) { - if (tab->range_rowid_filter_info) - { - int err; - SQL_SELECT *sel; - Rowid_filter_container *filter_container= NULL; - Item **sargable_cond= get_sargable_cond(this, tab->table); - sel= make_select(tab->table, const_table_map, const_table_map, - *sargable_cond, (SORT_INFO*) 0, 1, &err); - if (!sel) - DBUG_RETURN(1); + if (!tab->range_rowid_filter_info) + continue; + int err; + SQL_SELECT *sel= NULL; + Rowid_filter_container *filter_container= NULL; + Item **sargable_cond= get_sargable_cond(this, tab->table); + sel= make_select(tab->table, const_table_map, const_table_map, + *sargable_cond, (SORT_INFO*) 0, 1, &err); + if (!sel) + continue; - key_map filter_map; - filter_map.clear_all(); - filter_map.set_bit(tab->range_rowid_filter_info->key_no); - filter_map.merge(tab->table->with_impossible_ranges); - bool force_index_save= tab->table->force_index; - tab->table->force_index= true; - (void) sel->test_quick_select(thd, filter_map, (table_map) 0, - (ha_rows) HA_POS_ERROR, - true, false, true); - tab->table->force_index= force_index_save; - if (thd->is_error()) - DBUG_RETURN(1); - DBUG_ASSERT(sel->quick); - filter_container= - tab->range_rowid_filter_info->create_container(); - if (filter_container) - { - tab->rowid_filter= - new (thd->mem_root) Range_rowid_filter( - tab->table, - tab->range_rowid_filter_info, - filter_container, sel); - } + key_map filter_map; + filter_map.clear_all(); + filter_map.set_bit(tab->range_rowid_filter_info->key_no); + filter_map.merge(tab->table->with_impossible_ranges); + bool force_index_save= tab->table->force_index; + tab->table->force_index= true; + (void) sel->test_quick_select(thd, filter_map, (table_map) 0, + (ha_rows) HA_POS_ERROR, + true, false, true); + tab->table->force_index= force_index_save; + if (thd->is_error()) + goto no_filter; + DBUG_ASSERT(sel->quick); + filter_container= + tab->range_rowid_filter_info->create_container(); + if (filter_container) + { + tab->rowid_filter= + new (thd->mem_root) Range_rowid_filter(tab->table, + tab->range_rowid_filter_info, + filter_container, sel); + if (tab->rowid_filter) + continue; } + no_filter: + if (sel->quick) + delete sel->quick; + delete sel; } + DBUG_RETURN(0); } +/** + @brief + Allocate memory the rowid containers of the used the range filters + + @details + For each join table from the chosen execution plan such that a range filter + is used when joining this table the function allocate memory for the + rowid container employed by the filter. On success it lets the table engine + know that what rowid filter will be used when accessing the table rows. + + @retval false always +*/ + bool JOIN::init_range_rowid_filters() { diff --git a/sql/sql_select.h b/sql/sql_select.h index bfa4274..3de0e89 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -510,11 +510,19 @@ typedef struct st_join_table { uint n_sj_tables; bool preread_init_done; + + /* + Cost info to the range filter used when joining this join table + (Defined when the best join order has been already chosen) + */ Range_rowid_filter_cost_info *range_rowid_filter_info; + /* Rowid filter to be used when joining this join table */ Rowid_filter *rowid_filter; + /* Becomes true just after the used range filter has been built / filled */ bool is_rowid_filter_built; void build_range_rowid_filter_if_needed(); + void cleanup(); inline bool is_using_loose_index_scan() { @@ -975,8 +983,10 @@ typedef struct st_position /* Info on splitting plan used at this position */ SplM_plan_info *spl_plan; - /* The index for which filter can be built */ + + /* Cost info for the range filter used at this position */ Range_rowid_filter_cost_info *range_rowid_filter_info; + } POSITION; typedef Bounds_checked_array<Item_null_result*> Item_null_array; diff --git a/sql/structs.h b/sql/structs.h index a45cc34..12b5dee 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -120,6 +120,10 @@ typedef struct st_key { */ LEX_CSTRING name; key_part_map ext_key_part_map; + /* + Bitmap of indexes having common parts with this index + (only key parts from key definitions are taken into account) + */ key_map overlapped; uint block_size; enum ha_key_alg algorithm; diff --git a/sql/table.cc b/sql/table.cc index bb18940..3df4c9f 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -1227,6 +1227,7 @@ static const Type_handler *old_frm_type_handler(uint pack_flag, return Type_handler::get_handler_by_real_type(field_type); } +/* Set overlapped bitmaps for each index */ void TABLE_SHARE::set_overlapped_keys() { diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 8a092d6..0b4880b 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -7564,7 +7564,7 @@ ha_innobase::build_template( } else { m_prebuilt->pk_filter = NULL; } - + n_fields = (ulint) mysql_fields(table); if (!m_prebuilt->mysql_template) { @@ -20345,7 +20345,7 @@ innobase_pk_filter_is_active( /*==========================*/ void* file) /*!< in/out: pointer to ha_innobase */ { - return handler_rowid_filter_is_active(file); + return handler_rowid_filter_is_active(file); } /** Parse the table file name into table name and database name. @@ -20888,12 +20888,12 @@ ha_innobase::idx_cond_push( @param[in] pk_filter PK filter against which primary keys are to be checked */ -bool +bool ha_innobase::rowid_filter_push( class Rowid_filter* pk_filter) { DBUG_ENTER("ha_innobase::rowid_filter_push"); - DBUG_ASSERT(pk_filter != NULL); + DBUG_ASSERT(pk_filter != NULL); pushed_rowid_filter= pk_filter; DBUG_RETURN(FALSE); } diff --git a/storage/myisam/mi_range.c b/storage/myisam/mi_range.c index 33ac2b6..185f2ca 100644 --- a/storage/myisam/mi_range.c +++ b/storage/myisam/mi_range.c @@ -128,7 +128,21 @@ ha_rows mi_records_in_range(MI_INFO *info, int inx, } - /* Find relative position (in records) for key in index-tree */ +/* + To find an approximate relative position of a key tuple among all index + key tuples would not be hard if we considered B-trees where all key + tuples were contained only in leaf nodes. If we consider a B-tree where + key tuples are stored also in non-leaf nodes we have to convert such + tree into the tree of the first type. The transformation procedure is + simple: the key tuple k goes alter the last key tuple in the most right + sub-tree pointer to which is coupled with k. As a result of this + transformation each leaf node except the most right one in the tree will + contain one extra key tuple following those originally belonging to + the leaf. +*/ + + +/* Find relative position (in records) for key in index-tree */ static double _mi_record_pos(MI_INFO *info, const uchar *key, key_part_map keypart_map, diff --git a/storage/myisam/mi_rprev.c b/storage/myisam/mi_rprev.c index 2103003..a78bab6 100644 --- a/storage/myisam/mi_rprev.c +++ b/storage/myisam/mi_rprev.c @@ -59,7 +59,7 @@ int mi_rprev(MI_INFO *info, uchar *buf, int inx) while ((share->concurrent_insert && info->lastpos >= info->state->data_file_length) || (info->index_cond_func && - (icp_res= mi_check_index_cond(info, inx, buf)) == ICP_NO_MATCH) || + (icp_res= mi_check_index_cond(info, inx, buf)) == ICP_NO_MATCH) || (mi_check_rowid_filter_is_active(info) && !mi_check_rowid_filter(info))) { diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h index 8eca504..d51e0f9 100644 --- a/storage/myisam/myisamdef.h +++ b/storage/myisam/myisamdef.h @@ -737,7 +737,7 @@ void mi_check_print_info(HA_CHECK *param, const char *fmt, ...); pthread_handler_t thr_find_all_keys(void *arg); extern void mi_set_index_cond_func(MI_INFO *info, index_cond_func_t check_func, void *func_arg); -extern void mi_set_rowid_filter_func(MI_INFO *info, +extern void mi_set_rowid_filter_func(MI_INFO *info, rowid_filter_func_t check_func, rowid_filter_func_t is_active_func, void *func_arg);
participants (1)
-
IgorBabaev