[Commits] d7d7ee859c4: MDEV-19901: Wrong window function calculation
revision-id: d7d7ee859c4fc35139034ac19adc8c9c18bf197f (mariadb-10.2.30-26-gd7d7ee859c4) parent(s): c3824766c5788bf3aa0d1131e149dff187d41f2c author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2019-12-21 19:41:23 +0300 message: MDEV-19901: Wrong window function calculation Use a different approach to splitting window functions into groups which can re-use the same data sorting. Instead of sorting the window functions and then marking the bounds in the list, partition window functions into groups directly. --- mysql-test/r/win.result | 67 +++++++++-- mysql-test/t/win.test | 24 ++++ sql/sql_window.cc | 303 ++++++++++-------------------------------------- sql/sql_window.h | 7 +- 4 files changed, 148 insertions(+), 253 deletions(-) diff --git a/mysql-test/r/win.result b/mysql-test/r/win.result index 2fe511d0e8b..7f08797c54c 100644 --- a/mysql-test/r/win.result +++ b/mysql-test/r/win.result @@ -2038,17 +2038,17 @@ row_number() over (order by pk desc) as r_desc, row_number() over (order by pk asc) as r_asc from t1; pk r_desc r_asc -11 1 11 -10 2 10 -9 3 9 -8 4 8 -7 5 7 -6 6 6 -5 7 5 -4 8 4 -3 9 3 -2 10 2 1 11 1 +2 10 2 +3 9 3 +4 8 4 +5 7 5 +6 6 6 +7 5 7 +8 4 8 +9 3 9 +10 2 10 +11 1 11 drop table t1; # # MDEV-10874: two window functions with compatible sorting @@ -3653,5 +3653,52 @@ COUNT(*) OVER () MOD(MIN(i),2) 3 1 drop table t1; # +# MDEV-19901: Wrong window function calculation +# +create table t1 (a int, b int); +insert into t1 values (1, 1), (2, 1), (3, 2), (4, 2); +select +sum(a) over (), +sum(a) over (partition by b) c, +sum(a) over (order by a) d +from t1 +order by a; +sum(a) over () c d +10 3 1 +10 3 3 +10 7 6 +10 7 10 +explain format=json +select +sum(a) over (), +sum(a) over (partition by b) c, +sum(a) over (order by a) d +from t1; +EXPLAIN +{ + "query_block": { + "select_id": 1, + "window_functions_computation": { + "sorts": { + "filesort": { + "sort_key": "t1.b" + }, + "filesort": { + "sort_key": "t1.a" + } + }, + "temporary_table": { + "table": { + "table_name": "t1", + "access_type": "ALL", + "rows": 4, + "filtered": 100 + } + } + } + } +} +drop table t1; +# # End of 10.2 tests # diff --git a/mysql-test/t/win.test b/mysql-test/t/win.test index 1e302722b99..cb07d16e197 100644 --- a/mysql-test/t/win.test +++ b/mysql-test/t/win.test @@ -2361,6 +2361,30 @@ INSERT INTO t1 VALUES (1),(0),(1),(2),(0),(1),(2),(1),(2); SELECT DISTINCT COUNT(*) OVER (), MOD(MIN(i),2) FROM t1 GROUP BY i ; drop table t1; +--echo # +--echo # MDEV-19901: Wrong window function calculation +--echo # + +create table t1 (a int, b int); +insert into t1 values (1, 1), (2, 1), (3, 2), (4, 2); + +select + sum(a) over (), + sum(a) over (partition by b) c, + sum(a) over (order by a) d +from t1 + order by a; + +explain format=json +select + sum(a) over (), + sum(a) over (partition by b) c, + sum(a) over (order by a) d +from t1; + + +drop table t1; + --echo # --echo # End of 10.2 tests --echo # diff --git a/sql/sql_window.cc b/sql/sql_window.cc index b258b8f56c9..a9a979bb664 100644 --- a/sql/sql_window.cc +++ b/sql/sql_window.cc @@ -6,6 +6,10 @@ #include "sql_window.h" #include "my_dbug.h" +typedef List<Item_window_func> Window_func_list; + +static List<Window_func_list> +split_window_funcs_into_sorts(List<Item_window_func> *win_func_list); bool Window_spec::check_window_names(List_iterator_fast<Window_spec> &it) @@ -368,84 +372,6 @@ int compare_order_lists(SQL_I_List<ORDER> *part_list1, return CMP_EQ; } - -static -int compare_window_frame_bounds(Window_frame_bound *win_frame_bound1, - Window_frame_bound *win_frame_bound2, - bool is_bottom_bound) -{ - int res; - if (win_frame_bound1->precedence_type != win_frame_bound2->precedence_type) - { - res= win_frame_bound1->precedence_type > win_frame_bound2->precedence_type ? - CMP_GT : CMP_LT; - if (is_bottom_bound) - res= -res; - return res; - } - - if (win_frame_bound1->is_unbounded() && win_frame_bound2->is_unbounded()) - return CMP_EQ; - - if (!win_frame_bound1->is_unbounded() && !win_frame_bound2->is_unbounded()) - { - if (win_frame_bound1->offset->eq(win_frame_bound2->offset, true)) - return CMP_EQ; - else - { - res= strcmp(win_frame_bound1->offset->name, - win_frame_bound2->offset->name); - res= res > 0 ? CMP_GT : CMP_LT; - if (is_bottom_bound) - res= -res; - return res; - } - } - - /* - Here we have: - win_frame_bound1->is_unbounded() != win_frame_bound1->is_unbounded() - */ - return is_bottom_bound != win_frame_bound1->is_unbounded() ? CMP_LT : CMP_GT; -} - - -static -int compare_window_frames(Window_frame *win_frame1, - Window_frame *win_frame2) -{ - int cmp; - - if (win_frame1 == win_frame2) - return CMP_EQ; - - if (!win_frame1) - return CMP_LT; - - if (!win_frame2) - return CMP_GT; - - if (win_frame1->units != win_frame2->units) - return win_frame1->units > win_frame2->units ? CMP_GT : CMP_LT; - - cmp= compare_window_frame_bounds(win_frame1->top_bound, - win_frame2->top_bound, - false); - if (cmp) - return cmp; - - cmp= compare_window_frame_bounds(win_frame1->bottom_bound, - win_frame2->bottom_bound, - true); - if (cmp) - return cmp; - - if (win_frame1->exclusion != win_frame2->exclusion) - return win_frame1->exclusion > win_frame2->exclusion ? CMP_GT_C : CMP_LT_C; - - return CMP_EQ; -} - static int compare_window_spec_joined_lists(Window_spec *win_spec1, Window_spec *win_spec2) @@ -459,147 +385,68 @@ int compare_window_spec_joined_lists(Window_spec *win_spec1, return cmp; } - -static -int compare_window_funcs_by_window_specs(Item_window_func *win_func1, - Item_window_func *win_func2, - void *arg) -{ - int cmp; - Window_spec *win_spec1= win_func1->window_spec; - Window_spec *win_spec2= win_func2->window_spec; - if (win_spec1 == win_spec2) - return CMP_EQ; - cmp= compare_order_lists(win_spec1->partition_list, - win_spec2->partition_list); - if (cmp == CMP_EQ) - { - /* - Partition lists contain the same elements. - Let's use only one of the lists. - */ - if (!win_spec1->name() && win_spec2->name()) - win_spec1->partition_list= win_spec2->partition_list; - else - win_spec2->partition_list= win_spec1->partition_list; - - cmp= compare_order_lists(win_spec1->order_list, - win_spec2->order_list); - - if (cmp != CMP_EQ) - return cmp; - - /* - Order lists contain the same elements. - Let's use only one of the lists. - */ - if (!win_spec1->name() && win_spec2->name()) - win_spec1->order_list= win_spec2->order_list; - else - win_spec2->order_list= win_spec1->order_list; - - cmp= compare_window_frames(win_spec1->window_frame, - win_spec2->window_frame); - - if (cmp != CMP_EQ) - return cmp; - - /* Window frames are equal. Let's use only one of them. */ - if (!win_spec1->name() && win_spec2->name()) - win_spec1->window_frame= win_spec2->window_frame; - else - win_spec2->window_frame= win_spec1->window_frame; - - return CMP_EQ; - } - - if (cmp == CMP_GT || cmp == CMP_LT) - return cmp; - - /* one of the partitions lists is the proper beginning of the another */ - cmp= compare_window_spec_joined_lists(win_spec1, win_spec2); - - if (CMP_LT_C <= cmp && cmp <= CMP_GT_C) - cmp= win_spec1->partition_list->elements < - win_spec2->partition_list->elements ? CMP_GT_C : CMP_LT_C; - - return cmp; -} - - -#define SORTORDER_CHANGE_FLAG 1 -#define PARTITION_CHANGE_FLAG 2 -#define FRAME_CHANGE_FLAG 4 - -typedef int (*Item_window_func_cmp)(Item_window_func *f1, - Item_window_func *f2, - void *arg); /* @brief - Sort window functions so that those that can be computed together are - adjacent. + Split window functions into lists that can share the sorting. - @detail - Sort window functions by their - - required sorting order, - - partition list, - - window frame compatibility. + @param win_func_list List of all window functions in the select. - The changes between the groups are marked by setting item_window_func->marker. + @detail + Split the list of window functions into multiple lists. Each of the + new lists only includes functions that can be computed using + the sorting from the first element of the list. + + @note + Within one sort, it's also possible to re-use + - partition bound detector (for equivalent PARTITION BY clauses) + - window frame cursors + This is not implemented currently, but could use the same approach. */ -static -void order_window_funcs_by_window_specs(List<Item_window_func> *win_func_list) +static List<Window_func_list> +split_window_funcs_into_sorts(List<Item_window_func> *win_func_list) { - if (win_func_list->elements == 0) - return; + List<Window_func_list> sorts; - bubble_sort<Item_window_func>(win_func_list, - compare_window_funcs_by_window_specs, - NULL); - - List_iterator_fast<Item_window_func> it(*win_func_list); - Item_window_func *prev= it++; - prev->marker= SORTORDER_CHANGE_FLAG | - PARTITION_CHANGE_FLAG | - FRAME_CHANGE_FLAG; - Item_window_func *curr; - while ((curr= it++)) - { - Window_spec *win_spec_prev= prev->window_spec; - Window_spec *win_spec_curr= curr->window_spec; - curr->marker= 0; - if (!(win_spec_prev->partition_list == win_spec_curr->partition_list && - win_spec_prev->order_list == win_spec_curr->order_list)) + List_iterator<Item_window_func> win_func_it(*win_func_list); + Item_window_func *new_win_func; + while ((new_win_func= win_func_it++)) + { + List_iterator<Window_func_list> sorts_it(sorts); + Window_func_list *wfunc_list; + bool added= false; + while ((wfunc_list= sorts_it++)) { - int cmp; - if (win_spec_prev->partition_list == win_spec_curr->partition_list) - cmp= compare_order_lists(win_spec_prev->order_list, - win_spec_curr->order_list); + Item_window_func *win_func= wfunc_list->head(); + int r= compare_window_spec_joined_lists(win_func->window_spec, + new_win_func->window_spec); + + // maintain the invariant; the first element in the list uses the most + // restrictive sorting + + if (r == CMP_EQ || r == CMP_GT_C) + wfunc_list->push_back(new_win_func); + else if (r == CMP_LT_C) + wfunc_list->push_front(new_win_func); else - cmp= compare_window_spec_joined_lists(win_spec_prev, win_spec_curr); - if (!(CMP_LT_C <= cmp && cmp <= CMP_GT_C)) - { - curr->marker= SORTORDER_CHANGE_FLAG | - PARTITION_CHANGE_FLAG | - FRAME_CHANGE_FLAG; - } - else if (win_spec_prev->partition_list != win_spec_curr->partition_list) - { - curr->marker|= PARTITION_CHANGE_FLAG | FRAME_CHANGE_FLAG; - } + continue; // This element of sorts is not a match + + added= true; + break; } - else if (win_spec_prev->window_frame != win_spec_curr->window_frame) - curr->marker|= FRAME_CHANGE_FLAG; - prev= curr; + if (!added) + { + // We didn't find a suitable sort to add window function to. + // Create a new sort and put it there. + Window_func_list *newlist = new Window_func_list; + newlist->push_back(new_win_func); + sorts.push_back(newlist); + } } + return sorts; } - -///////////////////////////////////////////////////////////////////////////// - - ///////////////////////////////////////////////////////////////////////////// // Window Frames support ///////////////////////////////////////////////////////////////////////////// @@ -2819,42 +2666,18 @@ bool Window_funcs_sort::exec(JOIN *join, bool keep_filesort_result) bool Window_funcs_sort::setup(THD *thd, SQL_SELECT *sel, - List_iterator<Item_window_func> &it, + List<Item_window_func> *window_funcs, JOIN_TAB *join_tab) { - Window_spec *spec; - Item_window_func *win_func= it.peek(); - Item_window_func *win_func_with_longest_order= NULL; - int longest_order_elements= -1; - - /* The iterator should point to a valid function at the start of execution. */ - DBUG_ASSERT(win_func); - do - { - spec= win_func->window_spec; - int win_func_order_elements= spec->partition_list->elements + - spec->order_list->elements; - if (win_func_order_elements > longest_order_elements) - { - win_func_with_longest_order= win_func; - longest_order_elements= win_func_order_elements; - } + List_iterator<Item_window_func> it(*window_funcs); + Item_window_func *win_func; + while ((win_func = it++)) + { if (runner.add_function_to_run(win_func)) return true; - it++; - win_func= it.peek(); - } while (win_func && !(win_func->marker & SORTORDER_CHANGE_FLAG)); - - /* - The sort criteria must be taken from the last win_func in the group of - adjacent win_funcs that do not have SORTORDER_CHANGE_FLAG. This is - because the sort order must be the most specific sorting criteria defined - within the window function group. This ensures that we sort the table - in a way that the result is valid for all window functions belonging to - this Window_funcs_sort. - */ - spec= win_func_with_longest_order->window_spec; + } + Window_spec *spec= window_funcs->head()->window_spec; ORDER* sort_order= concat_order_lists(thd->mem_root, spec->partition_list->first, spec->order_list->first); @@ -2892,7 +2715,8 @@ bool Window_funcs_computation::setup(THD *thd, List<Item_window_func> *window_funcs, JOIN_TAB *tab) { - order_window_funcs_by_window_specs(window_funcs); + List<Window_func_list> wfunc_sorts= + split_window_funcs_into_sorts(window_funcs); SQL_SELECT *sel= NULL; /* @@ -2908,11 +2732,12 @@ bool Window_funcs_computation::setup(THD *thd, } Window_funcs_sort *srt; - List_iterator<Item_window_func> iter(*window_funcs); - while (iter.peek()) + List_iterator<Window_func_list> it(wfunc_sorts); + Window_func_list *wfunc_list; + while ((wfunc_list= it++)) { if (!(srt= new Window_funcs_sort()) || - srt->setup(thd, sel, iter, tab)) + srt->setup(thd, sel, wfunc_list, tab)) { return true; } diff --git a/sql/sql_window.h b/sql/sql_window.h index e0c1563e5bb..628ab908f14 100644 --- a/sql/sql_window.h +++ b/sql/sql_window.h @@ -163,9 +163,8 @@ int setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables, class Frame_cursor; /* - This handles computation of one window function. - - Currently, we make a spearate filesort() call for each window function. + This handles computation of several window functions which require the same + sorting of the dataset. */ class Window_func_runner : public Sql_alloc @@ -193,7 +192,7 @@ class Window_func_runner : public Sql_alloc class Window_funcs_sort : public Sql_alloc { public: - bool setup(THD *thd, SQL_SELECT *sel, List_iterator<Item_window_func> &it, + bool setup(THD *thd, SQL_SELECT *sel, List<Item_window_func> *window_funcs, st_join_table *join_tab); bool exec(JOIN *join, bool keep_filesort_result); void cleanup() { delete filesort; }
participants (1)
-
Sergei Petrunia