[Commits] Review draft for: Extend the join prefix to ensure all tables in the duplicate range ...
Hi Varun, Please check out the draft notes below. Any comments/objections? I'm having difficulty reviewing the patch as the naming is counter-intuitive (either that, or I miss something).
commit dcc092af9b8b73587788dcd3e664bb496378ec76 Author: Varun Gupta <varun.gupta@mariadb.com> Date: Wed Nov 6 13:24:29 2019 +0530
Extend the join prefix to ensure all tables in the duplicate range either are inside or outside the sort nest
...
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc index 822240dcfac..543378a85f4 100644 --- a/sql/opt_subselect.cc +++ b/sql/opt_subselect.cc @@ -3513,6 +3513,13 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join, }
It is not clear what this function means.
+bool Duplicate_weedout_picker::sort_nest_allowed_for_sj(table_map prefix_tables) +{
dupsweedout_tables is defined as: /* Tables that we will need to have in the prefix to do the weedout step (all inner and all outer that the involved semi-joins are correlated with) */ table_map dupsweedout_tables; I don't follow what the condition means?
+ if (!dupsweedout_tables || !(~prefix_tables & dupsweedout_tables)) + return FALSE; + return TRUE; +} + /* Remove the last join tab from from join->cur_sj_inner_tables bitmap we assume remaining_tables doesnt contain @tab. diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 25cf486f27d..e3056be7cc5 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -9594,8 +9594,6 @@ best_extension_by_limited_search(JOIN *join, JOIN_TAB *s; double best_record_count= DBL_MAX; double best_read_time= DBL_MAX; - bool disable_jbuf= (join->thd->variables.join_cache_level == 0) || - nest_created; bool save_prefix_resolves_ordering= join->prefix_resolves_ordering;
if (nest_created && !limit_applied_to_nest) @@ -9610,9 +9608,17 @@ best_extension_by_limited_search(JOIN *join, apply_limit.add("original_record_count", original_record_count); apply_limit.add("record_count_after_limit_applied", record_count); } - }
+ /* + Join buffering should be not considered in 2 cases s/should be not/should not be/?
+ 1) if join_cache_level = 0 that is join buffering is disabled + 2) if the limit was pushed to a prefix of the join that can resolve the + ORDER BY clause + */ + bool disable_jbuf= (join->thd->variables.join_cache_level == 0) || + limit_applied_to_nest; + DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time, "part_plan"););
@@ -9755,15 +9761,10 @@ best_extension_by_limited_search(JOIN *join, s->check_if_index_satisfies_ordering(index_used)) limit_applied_to_nest= TRUE;
- /* - TODO varun: - 1) get an optimizer switch to enable or disable the sort - nest instead of a system variable - */ if (!nest_created && (join->prefix_resolves_ordering || join->consider_adding_sort_nest(sort_nest_tables | - real_table_bit))) + real_table_bit, idx))) { // SORT_NEST branch join->positions[idx].sort_nest_operation_here= TRUE; @@ -9789,6 +9790,8 @@ best_extension_by_limited_search(JOIN *join, sort_nest_tables | real_table_bit, TRUE, limit_applied_to_nest)) DBUG_RETURN(TRUE); + if (!(join->is_index_with_ordering_allowed(idx) && + s->check_if_index_satisfies_ordering(index_used))) join->positions[idx].sort_nest_operation_here= FALSE; trace_rest.end(); } @@ -9808,8 +9811,11 @@ best_extension_by_limited_search(JOIN *join, DBUG_RETURN(TRUE); trace_rest.end();
- if (!idx) + if (idx == join->const_tables) + { limit_applied_to_nest= FALSE; + join->positions[idx].sort_nest_operation_here= FALSE; + } swap_variables(JOIN_TAB*, join->best_ref[idx], *pos); } else diff --git a/sql/sql_select.h b/sql/sql_select.h index ec86ae346ed..0e0d1de8f15 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -738,6 +738,7 @@ class Semi_join_strategy_picker struct st_position *loose_scan_pos) = 0;
virtual void mark_used() = 0; + virtual bool sort_nest_allowed_for_sj(table_map prefix_tables) = 0;
virtual ~Semi_join_strategy_picker() {} }; @@ -779,6 +780,7 @@ class Duplicate_weedout_picker : public Semi_join_strategy_picker struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; } + bool sort_nest_allowed_for_sj(table_map remaining_tables); friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); };
@@ -824,6 +826,7 @@ class Firstmatch_picker : public Semi_join_strategy_picker struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; } + bool sort_nest_allowed_for_sj(table_map remaining_tables) { return FALSE; }
Please clarify why this is so.
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); };
@@ -866,7 +869,7 @@ class LooseScan_picker : public Semi_join_strategy_picker sj_strategy_enum *strategy, struct st_position *loose_scan_pos); void mark_used() { is_used= TRUE; } - + bool sort_nest_allowed_for_sj(table_map remaining_tables) { return FALSE; }
I think this is ok to disable.
friend class Loose_scan_opt; friend void best_access_path(JOIN *join, JOIN_TAB *s, @@ -916,6 +919,7 @@ class Sj_materialization_picker : public Semi_join_strategy_picker sj_strategy_enum *strategy, struct st_position *loose_scan_pos); void mark_used() { is_used= TRUE; } + bool sort_nest_allowed_for_sj(table_map remaining_tables) { return FALSE; }
Seems to be ok.
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join); }; @@ -1956,7 +1960,8 @@ class JOIN :public Sql_alloc void setup_range_scan(JOIN_TAB *tab, uint idx, double records); bool is_join_buffering_allowed(JOIN_TAB *tab); bool check_join_prefix_resolves_ordering(table_map previous_tables); - bool consider_adding_sort_nest(table_map previous_tables); + bool consider_adding_sort_nest(table_map previous_tables, uint idx); + bool extend_prefix_to_ensure_duplicate_removal(table_map prefix_tables, uint idx); void set_fraction_output_for_nest(); double sort_nest_oper_cost(double join_record_count, uint idx, ulong rec_len); diff --git a/sql/sql_sort_nest.cc b/sql/sql_sort_nest.cc index 18d2a596587..5725c1ef0db 100644 --- a/sql/sql_sort_nest.cc +++ b/sql/sql_sort_nest.cc @@ -1446,18 +1446,44 @@ bool JOIN::sort_nest_allowed() FALSE otherwise */
(note to self: this returns FALSE when one CANNOT add a sort nest)
-bool JOIN::consider_adding_sort_nest(table_map prefix_tables) +bool JOIN::consider_adding_sort_nest(table_map prefix_tables, uint idx) { if (!sort_nest_possible || // (1) get_cardinality_estimate || // (2) cur_embedding_map || // (3) - cur_sj_inner_tables) // (4) + cur_sj_inner_tables || // (4) + extend_prefix_to_ensure_duplicate_removal(prefix_tables, idx))
(note to self: extend_prefix_to_ensure_duplicate_removal()= TRUE means we CANNOT add a sort nest)
return FALSE;
return check_join_prefix_resolves_ordering(prefix_tables); // (5) }
+bool +JOIN::extend_prefix_to_ensure_duplicate_removal(table_map prefix_tables, uint idx)
The name is poor. The function is called "extend_..." but it doesn't perform any extension. What it actually does is to check "whether sort nest is *NOT allowed* at this point. @return TRUE - CANNOT add a sort nest FALSE - CAN add a sort nest
+{
+ if (!select_lex->have_merged_subqueries) + return FALSE; + + POSITION *pos= positions + idx; + Semi_join_strategy_picker *pickers[]= + { + &pos->firstmatch_picker, + &pos->loosescan_picker, + &pos->sjmat_picker, + &pos->dups_weedout_picker, + NULL, + }; + Semi_join_strategy_picker **strategy; + for (strategy= pickers; *strategy != NULL; strategy++) + {
So, the logic is: if any of the semi-join strategies returns true ("disallow") that means the sort nest cannot be placed here and must be moved.
+ if ((*strategy)->sort_nest_allowed_for_sj(prefix_tables)) + return TRUE; + }
FirstMatch and LooseScan pickers always return FALSE. This means they always allow adding a sort nest... Is this correct?
+ return FALSE; +} + + /* @brief Check if indexes on a table are allowed to resolve the ORDER BY clause
BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog
participants (1)
-
Sergey Petrunia