revision-id: f6806d9e4acbe260cd4ffc4996a058288e711d6a (mariadb-10.4.4-379-gf6806d9e4ac) parent(s): 276c6d571252f2d23c8a449391eca3a3bf4f350d author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2020-02-16 20:46:38 +0300 message: MDEV-21713: LIMIT optimization and selectivity: pessimistic estimates cause optimistic plans Don't use LIMIT-based query plans if we don't have an accurate estimate of join output cardinality (the primary reason for that is lack of data about condition selectivity). --- sql/item.cc | 2 + sql/item.h | 2 + sql/item_cmpfunc.h | 4 ++ sql/opt_range.cc | 11 +++++ sql/opt_range.h | 2 + sql/sql_select.cc | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++-- sql/sql_select.h | 3 ++ 7 files changed, 155 insertions(+), 3 deletions(-) diff --git a/sql/item.cc b/sql/item.cc index f92390105c1..f97ed3fa45a 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -402,6 +402,7 @@ int Item::save_str_value_in_field(Field *field, String *result) Item::Item(THD *thd): + n_selectivity_estimates(0), is_expensive_cache(-1), rsize(0), name(null_clex_str), orig_name(0), is_autogenerated_name(TRUE) { @@ -451,6 +452,7 @@ const TABLE_SHARE *Item::field_table_or_null() Item::Item(THD *thd, Item *item): Type_all_attributes(*item), join_tab_idx(item->join_tab_idx), + n_selectivity_estimates(0), is_expensive_cache(-1), rsize(0), str_value(item->str_value), diff --git a/sql/item.h b/sql/item.h index 7fd32b134b4..0b5c961d8d5 100644 --- a/sql/item.h +++ b/sql/item.h @@ -734,6 +734,8 @@ class Item: public Value_source, static void *operator new(size_t size); public: + int n_selectivity_estimates; + static void *operator new(size_t size, MEM_ROOT *mem_root) throw () { return alloc_root(mem_root, size); } static void operator delete(void *ptr,size_t size) { TRASH_FREE(ptr, size); } diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index dda4c29dba3..c91673d9da3 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -444,6 +444,7 @@ class Item_bool_func2 :public Item_bool_func } }; +bool sel_tree_non_empty(SEL_TREE *tree); /** A class for functions and operators that can use the range optimizer and @@ -502,6 +503,8 @@ class Item_bool_func2_with_rev :public Item_bool_func2 if (!(ftree= get_full_func_mm_tree_for_args(param, args[0], args[1])) && !(ftree= get_full_func_mm_tree_for_args(param, args[1], args[0]))) ftree= Item_func::get_mm_tree(param, cond_ptr); + if (sel_tree_non_empty(ftree)) + n_selectivity_estimates++; DBUG_RETURN(ftree); } }; @@ -3158,6 +3161,7 @@ class Item_equal: public Item_bool_func const Type_handler *m_compare_handler; CHARSET_INFO *m_compare_collation; + public: COND_EQUAL *upper_levels; /* multiple equalities of upper and levels */ diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 8dae20fc30f..baba33ca60d 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -8371,6 +8371,10 @@ Item_func_between::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) } ftree= tree_and(param, ftree, tree); + + if (sel_tree_non_empty(ftree)) + n_selectivity_estimates++; + DBUG_RETURN(ftree); } @@ -8425,10 +8429,17 @@ SEL_TREE *Item_equal::get_mm_tree(RANGE_OPT_PARAM *param, Item **cond_ptr) ftree= !ftree ? tree : tree_and(param, ftree, tree); } } + // psergey + if (sel_tree_non_empty(ftree)) + n_selectivity_estimates++; DBUG_RETURN(ftree); } +bool sel_tree_non_empty(SEL_TREE *tree) +{ + return (tree && !tree->keys_map.is_clear_all()); +} SEL_TREE * Item_bool_func::get_mm_parts(RANGE_OPT_PARAM *param, Field *field, diff --git a/sql/opt_range.h b/sql/opt_range.h index 73def7bde92..9dbeb66afe2 100644 --- a/sql/opt_range.h +++ b/sql/opt_range.h @@ -1741,6 +1741,8 @@ void store_key_image_to_rec(Field *field, uchar *ptr, uint len); extern String null_string; + +bool sel_tree_non_empty(SEL_TREE *tree); /* check this number of rows (default value) */ #define SELECTIVITY_SAMPLING_LIMIT 100 /* but no more then this part of table (10%) */ diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 886f33b1662..45af16bbe95 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -7247,7 +7247,8 @@ best_access_path(JOIN *join, THD *thd= join->thd; uint use_cond_selectivity= thd->variables.optimizer_use_condition_selectivity; KEYUSE *best_key= 0; - uint best_max_key_part= 0; + uint best_max_key_part= 0; // this has ~0 for fully-used unique keys + uint best_n_key_parts= 0; // this is actual best # key parts used. my_bool found_constraint= 0; double best= DBL_MAX; double best_time= DBL_MAX; @@ -7819,6 +7820,7 @@ best_access_path(JOIN *join, best_records= records; best_key= start_key; best_max_key_part= max_key_part; + best_n_key_parts= max_part_bit(found_part); best_ref_depends_map= found_ref; best_filter= filter; best_type= type; @@ -8162,6 +8164,9 @@ best_access_path(JOIN *join, pos->sort_nest_operation_here= FALSE; pos->index_no= index_picked; + if (best_key) + pos->key_parts = best_n_key_parts; + loose_scan_opt.save_to_position(s, loose_scan_pos); trace_paths.end(); @@ -8282,6 +8287,114 @@ static void choose_initial_table_order(JOIN *join) } +/* See comments to MDEV-21713 for explanation of why this works */ +bool +check_if_refs_cover_item_equal(JOIN *join, bool use_best_pos, uint prefix_size, + Item_equal *item_eq) +{ + uint n_covered= 0; + // walk through the join prefix and check if used ref access guarantees + // selectivity of the condition. + + for (uint i= 0; i < prefix_size; i++) + { + POSITION *pos= use_best_pos? &join->best_positions[i] : &join->positions[i]; + KEYUSE *keyuse= pos->key; + if (keyuse && keyuse->keypart != FT_KEYPART && + !is_hash_join_key_no(keyuse->key)) + { + uint key= keyuse->key; + // For each key part + for (uint kp= 0; kp < pos->key_parts; kp++) { + Field *field= pos->table->table->key_info[key].key_part[kp].field; + // Find the field in the Item_equal + if (item_eq->contains(field)) + n_covered++; + } + } + } + + return n_covered >= item_eq->elements_count() - 1; +} + + +bool is_item_selectivity_covered(JOIN *join, bool use_best_pos, uint prefix_size, Item *item) +{ + if (item->type() == Item::FUNC_ITEM && + ((Item_func*)item)->functype() == Item_func::MULT_EQUAL_FUNC) + { + // multiple equality... it must be "covered" by estimates for all + // pair-wise equalities. + if (check_if_refs_cover_item_equal(join, use_best_pos, prefix_size, (Item_equal*)item)) + { + return true; + } + } + else + { + // not a multiple equality. + if (item->n_selectivity_estimates) + return true; // EITS or range analyzer have provided the estimates. + } + + // This is something that we don't have selectivity for + return false; +} + + +/* + @brief Check if a Join plan accounts for all parts of the WHERE condition + + @param use_best_pos TRUE - The plan is in join->best_positions, + FALSE - The plan is in join->positions + @param prefix_size Size of join prefix to examine (TODO: currently we + examime only full join orders, examining a prefix will + typically produce "false" (as selectivities on tables + not in the prefix will not be accounted for) + @detail + The analysis is done as follows: we require that the WHERE clause has form + + simple_pred1 AND ... AND simple_predN + + Where each simple_pred{i} is either + - a comparison e.g. "col < const" for which range and/or EITS analyzer + produced a range. + - A multiple equality: col1=col2=col3= ... + Multiple equality must be "covered" by equalities that are implied by the + used ref accesses. +*/ + +bool all_selectivity_accounted_for(JOIN *join, bool use_best_pos, uint prefix_size) +{ + // TODO: also check for outer joins and their ON expressions. + Item *cond= join->conds; + + // Walk the WHERE clause: + // Items that are not multiple equalities must have '1' + // Multiple equalities must have more. + + if (!cond) + return true; + else if (cond->type() == Item::COND_ITEM && + (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)) + { + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item *item; + while ((item= li++)) + { + if (!is_item_selectivity_covered(join, use_best_pos, prefix_size, item)) + return false; + } + } + else + { + if (!is_item_selectivity_covered(join, use_best_pos, prefix_size, cond)) + return false; + } + + return true; +} + /** Selects and invokes a search strategy for an optimal query plan. @@ -8366,7 +8479,7 @@ choose_plan(JOIN *join, table_map join_tables) /* Automatically determine a reasonable value for 'search_depth' */ search_depth= determine_search_depth(join); - if (join->sort_nest_possible && + if (join->sort_nest_possible && !join->emb_sjm_nest && join->estimate_cardinality_for_join(join_tables)) DBUG_RETURN(TRUE); @@ -9861,8 +9974,16 @@ best_extension_by_limited_search(JOIN *join, } trace_one_table.add("estimated_join_cardinality", partial_join_cardinality); + /* + SelectivityAccuracyCheck-2: Check if our estimate of join output size + is is accurate. If it's not, we are likely to be overly optimistic + about this LIMIT-based query plan, so we discard it. + */ + bool disallow_plan= false; + if (nest_created && !all_selectivity_accounted_for(join, false, idx+1)) + disallow_plan= true; - if (current_read_time < join->best_read) + if (current_read_time < join->best_read && !disallow_plan) { memcpy((uchar*) join->best_positions, (uchar*) join->positions, sizeof(POSITION) * (idx + 1)); @@ -29268,6 +29389,13 @@ bool JOIN::estimate_cardinality_for_join(table_map joined_tables) use_cond_selectivity)) return TRUE; + /* + SelectivityAccuracyCheck-1: Check if the obtained estimate of total join + cardinality is accurate (or at least is not obviously inaccurate) + */ + if (!all_selectivity_accounted_for(this, true, table_count)) + sort_nest_possible= false; + set_if_bigger(join_record_count, 1); cardinality_estimate= join_record_count; diff --git a/sql/sql_select.h b/sql/sql_select.h index 7f598ca116a..2f26759516a 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -965,6 +965,9 @@ typedef struct st_position */ KEYUSE *key; + // Number of key parts used + uint key_parts; + /* If ref-based access is used: bitmap of tables this table depends on */ table_map ref_depend_map;