
22 Dec '19
revision-id: d7d7ee859c4fc35139034ac19adc8c9c18bf197f (mariadb-10.2.30-26-gd7d7ee859c4)
parent(s): c3824766c5788bf3aa0d1131e149dff187d41f2c
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-12-21 19:41:23 +0300
message:
MDEV-19901: Wrong window function calculation
Use a different approach to splitting window functions into groups which
can re-use the same data sorting.
Instead of sorting the window functions and then marking the bounds in
the list, partition window functions into groups directly.
---
mysql-test/r/win.result | 67 +++++++++--
mysql-test/t/win.test | 24 ++++
sql/sql_window.cc | 303 ++++++++++--------------------------------------
sql/sql_window.h | 7 +-
4 files changed, 148 insertions(+), 253 deletions(-)
diff --git a/mysql-test/r/win.result b/mysql-test/r/win.result
index 2fe511d0e8b..7f08797c54c 100644
--- a/mysql-test/r/win.result
+++ b/mysql-test/r/win.result
@@ -2038,17 +2038,17 @@ row_number() over (order by pk desc) as r_desc,
row_number() over (order by pk asc) as r_asc
from t1;
pk r_desc r_asc
-11 1 11
-10 2 10
-9 3 9
-8 4 8
-7 5 7
-6 6 6
-5 7 5
-4 8 4
-3 9 3
-2 10 2
1 11 1
+2 10 2
+3 9 3
+4 8 4
+5 7 5
+6 6 6
+7 5 7
+8 4 8
+9 3 9
+10 2 10
+11 1 11
drop table t1;
#
# MDEV-10874: two window functions with compatible sorting
@@ -3653,5 +3653,52 @@ COUNT(*) OVER () MOD(MIN(i),2)
3 1
drop table t1;
#
+# MDEV-19901: Wrong window function calculation
+#
+create table t1 (a int, b int);
+insert into t1 values (1, 1), (2, 1), (3, 2), (4, 2);
+select
+sum(a) over (),
+sum(a) over (partition by b) c,
+sum(a) over (order by a) d
+from t1
+order by a;
+sum(a) over () c d
+10 3 1
+10 3 3
+10 7 6
+10 7 10
+explain format=json
+select
+sum(a) over (),
+sum(a) over (partition by b) c,
+sum(a) over (order by a) d
+from t1;
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "window_functions_computation": {
+ "sorts": {
+ "filesort": {
+ "sort_key": "t1.b"
+ },
+ "filesort": {
+ "sort_key": "t1.a"
+ }
+ },
+ "temporary_table": {
+ "table": {
+ "table_name": "t1",
+ "access_type": "ALL",
+ "rows": 4,
+ "filtered": 100
+ }
+ }
+ }
+ }
+}
+drop table t1;
+#
# End of 10.2 tests
#
diff --git a/mysql-test/t/win.test b/mysql-test/t/win.test
index 1e302722b99..cb07d16e197 100644
--- a/mysql-test/t/win.test
+++ b/mysql-test/t/win.test
@@ -2361,6 +2361,30 @@ INSERT INTO t1 VALUES (1),(0),(1),(2),(0),(1),(2),(1),(2);
SELECT DISTINCT COUNT(*) OVER (), MOD(MIN(i),2) FROM t1 GROUP BY i ;
drop table t1;
+--echo #
+--echo # MDEV-19901: Wrong window function calculation
+--echo #
+
+create table t1 (a int, b int);
+insert into t1 values (1, 1), (2, 1), (3, 2), (4, 2);
+
+select
+ sum(a) over (),
+ sum(a) over (partition by b) c,
+ sum(a) over (order by a) d
+from t1
+ order by a;
+
+explain format=json
+select
+ sum(a) over (),
+ sum(a) over (partition by b) c,
+ sum(a) over (order by a) d
+from t1;
+
+
+drop table t1;
+
--echo #
--echo # End of 10.2 tests
--echo #
diff --git a/sql/sql_window.cc b/sql/sql_window.cc
index b258b8f56c9..a9a979bb664 100644
--- a/sql/sql_window.cc
+++ b/sql/sql_window.cc
@@ -6,6 +6,10 @@
#include "sql_window.h"
#include "my_dbug.h"
+typedef List<Item_window_func> Window_func_list;
+
+static List<Window_func_list>
+split_window_funcs_into_sorts(List<Item_window_func> *win_func_list);
bool
Window_spec::check_window_names(List_iterator_fast<Window_spec> &it)
@@ -368,84 +372,6 @@ int compare_order_lists(SQL_I_List<ORDER> *part_list1,
return CMP_EQ;
}
-
-static
-int compare_window_frame_bounds(Window_frame_bound *win_frame_bound1,
- Window_frame_bound *win_frame_bound2,
- bool is_bottom_bound)
-{
- int res;
- if (win_frame_bound1->precedence_type != win_frame_bound2->precedence_type)
- {
- res= win_frame_bound1->precedence_type > win_frame_bound2->precedence_type ?
- CMP_GT : CMP_LT;
- if (is_bottom_bound)
- res= -res;
- return res;
- }
-
- if (win_frame_bound1->is_unbounded() && win_frame_bound2->is_unbounded())
- return CMP_EQ;
-
- if (!win_frame_bound1->is_unbounded() && !win_frame_bound2->is_unbounded())
- {
- if (win_frame_bound1->offset->eq(win_frame_bound2->offset, true))
- return CMP_EQ;
- else
- {
- res= strcmp(win_frame_bound1->offset->name,
- win_frame_bound2->offset->name);
- res= res > 0 ? CMP_GT : CMP_LT;
- if (is_bottom_bound)
- res= -res;
- return res;
- }
- }
-
- /*
- Here we have:
- win_frame_bound1->is_unbounded() != win_frame_bound1->is_unbounded()
- */
- return is_bottom_bound != win_frame_bound1->is_unbounded() ? CMP_LT : CMP_GT;
-}
-
-
-static
-int compare_window_frames(Window_frame *win_frame1,
- Window_frame *win_frame2)
-{
- int cmp;
-
- if (win_frame1 == win_frame2)
- return CMP_EQ;
-
- if (!win_frame1)
- return CMP_LT;
-
- if (!win_frame2)
- return CMP_GT;
-
- if (win_frame1->units != win_frame2->units)
- return win_frame1->units > win_frame2->units ? CMP_GT : CMP_LT;
-
- cmp= compare_window_frame_bounds(win_frame1->top_bound,
- win_frame2->top_bound,
- false);
- if (cmp)
- return cmp;
-
- cmp= compare_window_frame_bounds(win_frame1->bottom_bound,
- win_frame2->bottom_bound,
- true);
- if (cmp)
- return cmp;
-
- if (win_frame1->exclusion != win_frame2->exclusion)
- return win_frame1->exclusion > win_frame2->exclusion ? CMP_GT_C : CMP_LT_C;
-
- return CMP_EQ;
-}
-
static
int compare_window_spec_joined_lists(Window_spec *win_spec1,
Window_spec *win_spec2)
@@ -459,147 +385,68 @@ int compare_window_spec_joined_lists(Window_spec *win_spec1,
return cmp;
}
-
-static
-int compare_window_funcs_by_window_specs(Item_window_func *win_func1,
- Item_window_func *win_func2,
- void *arg)
-{
- int cmp;
- Window_spec *win_spec1= win_func1->window_spec;
- Window_spec *win_spec2= win_func2->window_spec;
- if (win_spec1 == win_spec2)
- return CMP_EQ;
- cmp= compare_order_lists(win_spec1->partition_list,
- win_spec2->partition_list);
- if (cmp == CMP_EQ)
- {
- /*
- Partition lists contain the same elements.
- Let's use only one of the lists.
- */
- if (!win_spec1->name() && win_spec2->name())
- win_spec1->partition_list= win_spec2->partition_list;
- else
- win_spec2->partition_list= win_spec1->partition_list;
-
- cmp= compare_order_lists(win_spec1->order_list,
- win_spec2->order_list);
-
- if (cmp != CMP_EQ)
- return cmp;
-
- /*
- Order lists contain the same elements.
- Let's use only one of the lists.
- */
- if (!win_spec1->name() && win_spec2->name())
- win_spec1->order_list= win_spec2->order_list;
- else
- win_spec2->order_list= win_spec1->order_list;
-
- cmp= compare_window_frames(win_spec1->window_frame,
- win_spec2->window_frame);
-
- if (cmp != CMP_EQ)
- return cmp;
-
- /* Window frames are equal. Let's use only one of them. */
- if (!win_spec1->name() && win_spec2->name())
- win_spec1->window_frame= win_spec2->window_frame;
- else
- win_spec2->window_frame= win_spec1->window_frame;
-
- return CMP_EQ;
- }
-
- if (cmp == CMP_GT || cmp == CMP_LT)
- return cmp;
-
- /* one of the partitions lists is the proper beginning of the another */
- cmp= compare_window_spec_joined_lists(win_spec1, win_spec2);
-
- if (CMP_LT_C <= cmp && cmp <= CMP_GT_C)
- cmp= win_spec1->partition_list->elements <
- win_spec2->partition_list->elements ? CMP_GT_C : CMP_LT_C;
-
- return cmp;
-}
-
-
-#define SORTORDER_CHANGE_FLAG 1
-#define PARTITION_CHANGE_FLAG 2
-#define FRAME_CHANGE_FLAG 4
-
-typedef int (*Item_window_func_cmp)(Item_window_func *f1,
- Item_window_func *f2,
- void *arg);
/*
@brief
- Sort window functions so that those that can be computed together are
- adjacent.
+ Split window functions into lists that can share the sorting.
- @detail
- Sort window functions by their
- - required sorting order,
- - partition list,
- - window frame compatibility.
+ @param win_func_list List of all window functions in the select.
- The changes between the groups are marked by setting item_window_func->marker.
+ @detail
+ Split the list of window functions into multiple lists. Each of the
+ new lists only includes functions that can be computed using
+ the sorting from the first element of the list.
+
+ @note
+ Within one sort, it's also possible to re-use
+ - partition bound detector (for equivalent PARTITION BY clauses)
+ - window frame cursors
+ This is not implemented currently, but could use the same approach.
*/
-static
-void order_window_funcs_by_window_specs(List<Item_window_func> *win_func_list)
+static List<Window_func_list>
+split_window_funcs_into_sorts(List<Item_window_func> *win_func_list)
{
- if (win_func_list->elements == 0)
- return;
+ List<Window_func_list> sorts;
- bubble_sort<Item_window_func>(win_func_list,
- compare_window_funcs_by_window_specs,
- NULL);
-
- List_iterator_fast<Item_window_func> it(*win_func_list);
- Item_window_func *prev= it++;
- prev->marker= SORTORDER_CHANGE_FLAG |
- PARTITION_CHANGE_FLAG |
- FRAME_CHANGE_FLAG;
- Item_window_func *curr;
- while ((curr= it++))
- {
- Window_spec *win_spec_prev= prev->window_spec;
- Window_spec *win_spec_curr= curr->window_spec;
- curr->marker= 0;
- if (!(win_spec_prev->partition_list == win_spec_curr->partition_list &&
- win_spec_prev->order_list == win_spec_curr->order_list))
+ List_iterator<Item_window_func> win_func_it(*win_func_list);
+ Item_window_func *new_win_func;
+ while ((new_win_func= win_func_it++))
+ {
+ List_iterator<Window_func_list> sorts_it(sorts);
+ Window_func_list *wfunc_list;
+ bool added= false;
+ while ((wfunc_list= sorts_it++))
{
- int cmp;
- if (win_spec_prev->partition_list == win_spec_curr->partition_list)
- cmp= compare_order_lists(win_spec_prev->order_list,
- win_spec_curr->order_list);
+ Item_window_func *win_func= wfunc_list->head();
+ int r= compare_window_spec_joined_lists(win_func->window_spec,
+ new_win_func->window_spec);
+
+ // maintain the invariant; the first element in the list uses the most
+ // restrictive sorting
+
+ if (r == CMP_EQ || r == CMP_GT_C)
+ wfunc_list->push_back(new_win_func);
+ else if (r == CMP_LT_C)
+ wfunc_list->push_front(new_win_func);
else
- cmp= compare_window_spec_joined_lists(win_spec_prev, win_spec_curr);
- if (!(CMP_LT_C <= cmp && cmp <= CMP_GT_C))
- {
- curr->marker= SORTORDER_CHANGE_FLAG |
- PARTITION_CHANGE_FLAG |
- FRAME_CHANGE_FLAG;
- }
- else if (win_spec_prev->partition_list != win_spec_curr->partition_list)
- {
- curr->marker|= PARTITION_CHANGE_FLAG | FRAME_CHANGE_FLAG;
- }
+ continue; // This element of sorts is not a match
+
+ added= true;
+ break;
}
- else if (win_spec_prev->window_frame != win_spec_curr->window_frame)
- curr->marker|= FRAME_CHANGE_FLAG;
- prev= curr;
+ if (!added)
+ {
+ // We didn't find a suitable sort to add window function to.
+ // Create a new sort and put it there.
+ Window_func_list *newlist = new Window_func_list;
+ newlist->push_back(new_win_func);
+ sorts.push_back(newlist);
+ }
}
+ return sorts;
}
-
-/////////////////////////////////////////////////////////////////////////////
-
-
/////////////////////////////////////////////////////////////////////////////
// Window Frames support
/////////////////////////////////////////////////////////////////////////////
@@ -2819,42 +2666,18 @@ bool Window_funcs_sort::exec(JOIN *join, bool keep_filesort_result)
bool Window_funcs_sort::setup(THD *thd, SQL_SELECT *sel,
- List_iterator<Item_window_func> &it,
+ List<Item_window_func> *window_funcs,
JOIN_TAB *join_tab)
{
- Window_spec *spec;
- Item_window_func *win_func= it.peek();
- Item_window_func *win_func_with_longest_order= NULL;
- int longest_order_elements= -1;
-
- /* The iterator should point to a valid function at the start of execution. */
- DBUG_ASSERT(win_func);
- do
- {
- spec= win_func->window_spec;
- int win_func_order_elements= spec->partition_list->elements +
- spec->order_list->elements;
- if (win_func_order_elements > longest_order_elements)
- {
- win_func_with_longest_order= win_func;
- longest_order_elements= win_func_order_elements;
- }
+ List_iterator<Item_window_func> it(*window_funcs);
+ Item_window_func *win_func;
+ while ((win_func = it++))
+ {
if (runner.add_function_to_run(win_func))
return true;
- it++;
- win_func= it.peek();
- } while (win_func && !(win_func->marker & SORTORDER_CHANGE_FLAG));
-
- /*
- The sort criteria must be taken from the last win_func in the group of
- adjacent win_funcs that do not have SORTORDER_CHANGE_FLAG. This is
- because the sort order must be the most specific sorting criteria defined
- within the window function group. This ensures that we sort the table
- in a way that the result is valid for all window functions belonging to
- this Window_funcs_sort.
- */
- spec= win_func_with_longest_order->window_spec;
+ }
+ Window_spec *spec= window_funcs->head()->window_spec;
ORDER* sort_order= concat_order_lists(thd->mem_root,
spec->partition_list->first,
spec->order_list->first);
@@ -2892,7 +2715,8 @@ bool Window_funcs_computation::setup(THD *thd,
List<Item_window_func> *window_funcs,
JOIN_TAB *tab)
{
- order_window_funcs_by_window_specs(window_funcs);
+ List<Window_func_list> wfunc_sorts=
+ split_window_funcs_into_sorts(window_funcs);
SQL_SELECT *sel= NULL;
/*
@@ -2908,11 +2732,12 @@ bool Window_funcs_computation::setup(THD *thd,
}
Window_funcs_sort *srt;
- List_iterator<Item_window_func> iter(*window_funcs);
- while (iter.peek())
+ List_iterator<Window_func_list> it(wfunc_sorts);
+ Window_func_list *wfunc_list;
+ while ((wfunc_list= it++))
{
if (!(srt= new Window_funcs_sort()) ||
- srt->setup(thd, sel, iter, tab))
+ srt->setup(thd, sel, wfunc_list, tab))
{
return true;
}
diff --git a/sql/sql_window.h b/sql/sql_window.h
index e0c1563e5bb..628ab908f14 100644
--- a/sql/sql_window.h
+++ b/sql/sql_window.h
@@ -163,9 +163,8 @@ int setup_windows(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
class Frame_cursor;
/*
- This handles computation of one window function.
-
- Currently, we make a spearate filesort() call for each window function.
+ This handles computation of several window functions which require the same
+ sorting of the dataset.
*/
class Window_func_runner : public Sql_alloc
@@ -193,7 +192,7 @@ class Window_func_runner : public Sql_alloc
class Window_funcs_sort : public Sql_alloc
{
public:
- bool setup(THD *thd, SQL_SELECT *sel, List_iterator<Item_window_func> &it,
+ bool setup(THD *thd, SQL_SELECT *sel, List<Item_window_func> *window_funcs,
st_join_table *join_tab);
bool exec(JOIN *join, bool keep_filesort_result);
void cleanup() { delete filesort; }
1
0

19 Dec '19
revision-id: 409cc5bda27a518c244e82e9e24c61fee9f1f8a3 (mariadb-10.1.41-106-g409cc5bda27)
parent(s): d930422e9e84aa1bc265963225dd29295203e9d3
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-12-18 23:41:32 +0530
message:
MDEV-20922: Adding an order by changes the query results
For Item_direct_ref and its subclasses, get value from val_* methods
instead of result* family
The val_* methods gets value of item on which it is referred.
---
mysql-test/r/group_by.result | 16 ++++++++++++++++
mysql-test/t/group_by.test | 14 ++++++++++++++
sql/item.h | 6 ++++++
3 files changed, 36 insertions(+)
diff --git a/mysql-test/r/group_by.result b/mysql-test/r/group_by.result
index eb730a8c954..bde24afa6e3 100644
--- a/mysql-test/r/group_by.result
+++ b/mysql-test/r/group_by.result
@@ -2792,3 +2792,19 @@ SELECT 1 IN ( SELECT COUNT( DISTINCT f2 ) FROM t1 WHERE f1 <= 4 );
1 IN ( SELECT COUNT( DISTINCT f2 ) FROM t1 WHERE f1 <= 4 )
0
drop table t1;
+#
+# MDEV-20922: Adding an order by changes the query results
+#
+CREATE TABLE t1(a int, b int);
+INSERT INTO t1 values (1, 100), (2, 200), (3, 100), (4, 200);
+create view v1 as select a, b+1 as x from t1;
+select x, count(distinct a) as y from v1 group by x order by y;
+x y
+101 2
+201 2
+select b+1 as x, count(distinct a) as y from t1 group by x order by y;
+x y
+101 2
+201 2
+drop view v1;
+drop table t1;
diff --git a/mysql-test/t/group_by.test b/mysql-test/t/group_by.test
index 0401ad9780c..b3e12a93147 100644
--- a/mysql-test/t/group_by.test
+++ b/mysql-test/t/group_by.test
@@ -1915,3 +1915,17 @@ INSERT INTO t1 VALUES (0,'foo'),(1,'bar');
SELECT 1 IN ( SELECT COUNT( DISTINCT f2 ) FROM t1 WHERE f1 <= 4 );
drop table t1;
+--echo #
+--echo # MDEV-20922: Adding an order by changes the query results
+--echo #
+
+CREATE TABLE t1(a int, b int);
+INSERT INTO t1 values (1, 100), (2, 200), (3, 100), (4, 200);
+
+create view v1 as select a, b+1 as x from t1;
+
+select x, count(distinct a) as y from v1 group by x order by y;
+select b+1 as x, count(distinct a) as y from t1 group by x order by y;
+
+drop view v1;
+drop table t1;
diff --git a/sql/item.h b/sql/item.h
index fb11064f122..bb8dc788f81 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -4146,6 +4146,12 @@ class Item_direct_ref :public Item_ref
bool val_bool();
bool is_null();
bool get_date(MYSQL_TIME *ltime, ulonglong fuzzydate);
+ double val_result() { return val_real(); }
+ longlong val_int_result() { return val_int(); }
+ String *str_result(String* tmp) { return val_str(tmp); }
+ my_decimal *val_decimal_result(my_decimal *val)
+ { return val_decimal(val); }
+ bool val_bool_result() { return val_bool(); }
virtual Ref_Type ref_type() { return DIRECT_REF; }
};
1
0

[Commits] 8681b0e191d: MDEV-21341: Fix UBSAN failures, part 8: fix error in compare_fields_by_table_order
by psergey 19 Dec '19
by psergey 19 Dec '19
19 Dec '19
revision-id: 8681b0e191ded6e2061cd2b65b18d8d6d9a85491 (mariadb-10.1.43-38-g8681b0e191d)
parent(s): 6b24843647540d40d6115dd6b0e2a5a746ea03ca
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-12-18 21:53:19 +0300
message:
MDEV-21341: Fix UBSAN failures, part 8: fix error in compare_fields_by_table_order
Dont assign Item_field variables to point to Item_string objects (even if we
don't make any dangerous calls for them).
---
sql/sql_select.cc | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 40941294013..2358615affc 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -13669,12 +13669,16 @@ static int compare_fields_by_table_order(Item *field1,
{
int cmp= 0;
bool outer_ref= 0;
- Item_field *f1= (Item_field *) (field1->real_item());
- Item_field *f2= (Item_field *) (field2->real_item());
- if (field1->const_item() || f1->const_item())
+ Item *field1_real= field1->real_item();
+ Item *field2_real= field2->real_item();
+
+ if (field1->const_item() || field1_real->const_item())
return 1;
- if (field2->const_item() || f2->const_item())
+ if (field2->const_item() || field2_real->const_item())
return -1;
+
+ Item_field *f1= (Item_field *) field1_real;
+ Item_field *f2= (Item_field *) field2_real;
if (f2->used_tables() & OUTER_REF_TABLE_BIT)
{
outer_ref= 1;
1
0

Re: [Commits] f72427f463d: MDEV-20923:UBSAN: member access within address Б─╕ which does not point to an object of type 'xid_count_per_binlog'
by andrei.elkin@pp.inet.fi 18 Dec '19
by andrei.elkin@pp.inet.fi 18 Dec '19
18 Dec '19
Sujatha, Kristian, howdy.
Sorry, I managed to sent my view without actually cc-ing Kristian, as claimed.
X-From-Line: nobody Wed Nov 27 19:27:51 2019
From: Andrei Elkin <andrei.elkin(a)mariadb.com>
To: sujatha <sujatha.sivakumar(a)mariadb.com>
Cc: <commits(a)mariadb.org>, <maria-developers(a)lists.launchpad.net>
Subject: Re: f72427f463d: MDEV-20923:UBSAN: member access within address
Let me try that again now.
Kristian, the question was about `xid_count_per_binlog' struct
comment -- ..
struct xid_count_per_binlog : public ilink {
char *binlog_name;
uint binlog_name_len;
ulong binlog_id;
/* Total prepared XIDs and pending checkpoint requests in this binlog. */
long xid_count;
/* For linking in requests to the binlog background thread. */
xid_count_per_binlog *next_in_queue;
xid_count_per_binlog(); /* Give link error if constructor used. */
.. -----------------------------^
};
Now I am guessing a constructor was attempted to fail out.. Was that
related to de-POD-ing of the object ('cos of the constructor)? It's
pretty obscure.
We would appreciate your explanation.
The former review mail is quoted (except one more [@]-marked view note, Sujatha) further.
Cheers,
Andrei
-------------------------------------------------------------------------------
Sujatha, hello.
The patch looks quite good. I have two suggestions though.
The first is to match the destructor's free() with actual allocation (of what is to be
freed) in the new parameterized constructor.
And 2nd-ly, I suggest to leave the explicit zero argument constructor
intact. Kristian is cc-d to help us to decide.
My comments are below.
Cheers,
Andrei
sujatha <sujatha.sivakumar(a)mariadb.com> writes:
> revision-id: f72427f463d316a54ebf87c2e84c73947e3c5fe4 (mariadb-10.1.43-5-gf72427f463d)
> parent(s): 13db50fc03e7312e6c01b06c7e4af69f69ba5382
> author: Sujatha
> committer: Sujatha
> timestamp: 2019-11-12 16:11:16 +0530
> message:
>
> MDEV-20923:UBSAN: member access within address … which does not point to an object of type 'xid_count_per_binlog'
>
> Problem:
> -------
> Accessing a member within 'xid_count_per_binlog' structure results in
> following error when 'UBSAN' is enabled.
>
> member access within address 0xXXX which does not point to an object of type
> 'xid_count_per_binlog'
>
> Analysis:
> ---------
> The problem appears to be that no constructor for 'xid_count_per_binlog' is
> being called, and thus the vtable will not be initialized.
>
> Fix:
> ---
> Defined a parameterized constructor for 'xid_count_per_binlog' class.
>
> ---
> sql/log.cc | 28 ++++++++++++++--------------
> sql/log.h | 9 ++++++++-
> 2 files changed, 22 insertions(+), 15 deletions(-)
>
> diff --git a/sql/log.cc b/sql/log.cc
> index acf1f8f8a9c..2b8b67febef 100644
> --- a/sql/log.cc
> +++ b/sql/log.cc
> @@ -3216,7 +3216,7 @@ void MYSQL_BIN_LOG::cleanup()
> DBUG_ASSERT(!binlog_xid_count_list.head());
> WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::cleanup(): Removing xid_list_entry "
> "for %s (%lu)", b);
> - my_free(b);
> + delete b;
> }
>
> mysql_mutex_destroy(&LOCK_log);
> @@ -3580,17 +3580,17 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
> */
> uint off= dirname_length(log_file_name);
> uint len= strlen(log_file_name) - off;
> - char *entry_mem, *name_mem;
> - if (!(new_xid_list_entry = (xid_count_per_binlog *)
> - my_multi_malloc(MYF(MY_WME),
> - &entry_mem, sizeof(xid_count_per_binlog),
> - &name_mem, len,
> - NULL)))
> + char *name_mem;
> + name_mem= (char *) my_malloc(len, MYF(MY_ZEROFILL));
> + if (!name_mem)
> goto err;
> memcpy(name_mem, log_file_name+off, len);
That is both my_malloc and memcpy to go into the new constructor to
match [*] (find the label below).
> - new_xid_list_entry->binlog_name= name_mem;
> - new_xid_list_entry->binlog_name_len= len;
> - new_xid_list_entry->xid_count= 0;
> + new_xid_list_entry= new xid_count_per_binlog(name_mem, (int)len);
> + if (!new_xid_list_entry)
> + {
> + my_free(name_mem);
> + goto err;
> + }
>
> /*
> Find the name for the Initial binlog checkpoint.
> @@ -3711,7 +3711,7 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
> {
> WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Removing xid_list_entry for "
> "%s (%lu)", b);
> - my_free(binlog_xid_count_list.get());
> + delete binlog_xid_count_list.get();
> }
> mysql_cond_broadcast(&COND_xid_list);
> WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Adding new xid_list_entry for "
> @@ -3758,7 +3758,7 @@ Turning logging off for the whole duration of the MySQL server process. \
> To turn it on again: fix the cause, \
> shutdown the MySQL server and restart it.", name, errno);
> if (new_xid_list_entry)
> - my_free(new_xid_list_entry);
> + delete new_xid_list_entry;
[@] With `delete` we don't need the non-null check: `if()` to remove.
> if (file >= 0)
> mysql_file_close(file, MYF(0));
> close(LOG_CLOSE_INDEX);
> @@ -4252,7 +4252,7 @@ bool MYSQL_BIN_LOG::reset_logs(THD *thd, bool create_new_log,
> DBUG_ASSERT(b->xid_count == 0);
> WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::reset_logs(): Removing "
> "xid_list_entry for %s (%lu)", b);
> - my_free(binlog_xid_count_list.get());
> + delete binlog_xid_count_list.get();
> }
> mysql_cond_broadcast(&COND_xid_list);
> reset_master_pending--;
> @@ -9736,7 +9736,7 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
> break;
> WSREP_XID_LIST_ENTRY("TC_LOG_BINLOG::mark_xid_done(): Removing "
> "xid_list_entry for %s (%lu)", b);
> - my_free(binlog_xid_count_list.get());
> + delete binlog_xid_count_list.get();
> }
>
> mysql_mutex_unlock(&LOCK_xid_list);
> diff --git a/sql/log.h b/sql/log.h
> index b4c9b24a3a9..30a55e577a4 100644
> --- a/sql/log.h
> +++ b/sql/log.h
> @@ -587,7 +587,14 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
> long xid_count;
> /* For linking in requests to the binlog background thread. */
> xid_count_per_binlog *next_in_queue;
> - xid_count_per_binlog(); /* Give link error if constructor used. */
Maybe we should leave it as it seems to be for catching inadvertent
object constructions.
I'm cc-ing Kristian to clear out.
> + xid_count_per_binlog(char *log_file_name, uint log_file_name_len)
> + :binlog_name(log_file_name), binlog_name_len(log_file_name_len),
> + binlog_id(0), xid_count(0)
> + {}
> + ~xid_count_per_binlog()
> + {
> + my_free(binlog_name);
[*]
> + }
> };
> I_List<xid_count_per_binlog> binlog_xid_count_list;
> mysql_mutex_t LOCK_binlog_background_thread;
> _______________________________________________
> commits mailing list
> commits(a)mariadb.org
> https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits
1
0

[Commits] 176f446338f: MDEV-21087: ER_SLAVE_INCIDENT arrives at slave without failure specifics
by sujatha 17 Dec '19
by sujatha 17 Dec '19
17 Dec '19
revision-id: 176f446338f5374233522b2e185e1d0acc422ffd (mariadb-10.2.30-21-g176f446338f)
parent(s): 8129ff14407826d54745346c552fadf3d292a0d8
author: Sujatha
committer: Sujatha
timestamp: 2019-12-17 17:54:27 +0530
message:
MDEV-21087: ER_SLAVE_INCIDENT arrives at slave without failure specifics
Problem:
=======
Issue 1:
Incident event is reported for transactions which are rolled back successfully.
Issue 2:
When the mariadb slave (error) stops at receiving the incident event there's
no description of what led to it. Neither in the event nor in the master's
error log.
Analysis:
=========
Incident events are logged when a multi statement transaction specific changes
cannot be safely rolled back. In reported scenario a big multi statement
transaction is in progress and it makes use of innodb transactional engine.
Row based replication is being used. At the time of writing the annotate event
for the multi statement transaction the binary log cache size exceeds the
configured max_binlog_cache_size. Hence write to binary log fails. The write
error handling code generates an incident event and writes to binary log. This
stops the slave. The error code is propagated to the callers and transaction
is rolled back. Ideally binlog write error handling code should restrict
incident event reporting only for cases where clean rollback is not possible.
At the time of reporting incident event the actual error message which lead to
incident should also be written to server error log for debugging purpose.
Fix:
===
Report incident event in cases where transactions cannot be safely rolledback.
Write the error message into the error log.
@sql/handler.cc
Added new variable "stmt_modified_non_trans_table" to identify modification to
non transactional tables as "thd->transaction.stmt.modified_non_trans_table"
and "thd->transaction.all.modified_non_trans_table" will be set at the later
stage of execution. Annotate/Table_map events are written at the starting
stage of transaction execution. i.e In a multi statement transaction when the
first statement execution is complete its changes are written to binary log.
At this stage the Annotate/Table_map events are written.
To set "stmt_modified_non_trans_table" iterate through the list of tables
modified by the transaction and see if they are transactional or not. If table
is non transactional and it has write mode lock then set this variable to
true. Upon event write error check for the above flag and report incident
event.
@sql/log.cc
Report incident event if "stmt_modified_non_trans_table" variable is set.
In "write_incident" code print thd->error_message() to server error log.
---
.../extra/binlog_tests/binlog_write_error.inc | 2 +
.../extra/rpl_tests/rpl_binlog_max_cache_size.test | 3 +
mysql-test/suite/binlog/r/binlog_bug23533.result | 1 +
.../suite/binlog/r/binlog_write_error.result | 1 +
mysql-test/suite/binlog/t/binlog_bug23533.test | 2 +-
.../binlog_encryption/binlog_write_error.result | 1 +
.../rpl_mixed_binlog_max_cache_size.result | 2 +
mysql-test/suite/rpl/r/rpl_mdev-11092.result | 70 +++++++++++-
.../rpl/r/rpl_mixed_binlog_max_cache_size.result | 2 +
.../rpl/r/rpl_stm_binlog_max_cache_size.result | 2 +
mysql-test/suite/rpl/t/rpl_mdev-11092.test | 119 ++++++++++++++++++++-
sql/handler.cc | 26 ++++-
sql/log.cc | 54 +++++++---
sql/sql_class.h | 3 +-
14 files changed, 267 insertions(+), 21 deletions(-)
diff --git a/mysql-test/extra/binlog_tests/binlog_write_error.inc b/mysql-test/extra/binlog_tests/binlog_write_error.inc
index fa3ba087a7e..be5e9e0cfea 100644
--- a/mysql-test/extra/binlog_tests/binlog_write_error.inc
+++ b/mysql-test/extra/binlog_tests/binlog_write_error.inc
@@ -38,6 +38,8 @@ DROP TRIGGER IF EXISTS tr2;
DROP VIEW IF EXISTS v1, v2;
enable_warnings;
+call mtr.add_suppression("Write to binary log failed: Error writing file*");
+
--echo #
--echo # Test injecting binlog write error when executing queries
--echo #
diff --git a/mysql-test/extra/rpl_tests/rpl_binlog_max_cache_size.test b/mysql-test/extra/rpl_tests/rpl_binlog_max_cache_size.test
index 0f46b00f683..351b464de57 100644
--- a/mysql-test/extra/rpl_tests/rpl_binlog_max_cache_size.test
+++ b/mysql-test/extra/rpl_tests/rpl_binlog_max_cache_size.test
@@ -22,6 +22,9 @@
#
########################################################################################
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
+call mtr.add_suppression("Write to binary log failed: Multi-row statements required more than .max_binlog_stmt_cache_size.* ");
+call mtr.add_suppression("Write to binary log failed: Multi-statement transaction required more than .max_binlog_cache_size.* ");
+
let $old_max_binlog_cache_size= query_get_value(SHOW VARIABLES LIKE "max_binlog_cache_size", Value, 1);
let $old_binlog_cache_size= query_get_value(SHOW VARIABLES LIKE "binlog_cache_size", Value, 1);
diff --git a/mysql-test/suite/binlog/r/binlog_bug23533.result b/mysql-test/suite/binlog/r/binlog_bug23533.result
index cc9799506c3..aa357b58f2c 100644
--- a/mysql-test/suite/binlog/r/binlog_bug23533.result
+++ b/mysql-test/suite/binlog/r/binlog_bug23533.result
@@ -1,3 +1,4 @@
+call mtr.add_suppression("Write to binary log failed: Multi-statement transaction required more than .max_binlog_cache_size.*");
SET AUTOCOMMIT=0;
CREATE TABLE t1 (a INT NOT NULL AUTO_INCREMENT, b TEXT, PRIMARY KEY(a)) ENGINE=InnoDB;
SELECT COUNT(*) FROM t1;
diff --git a/mysql-test/suite/binlog/r/binlog_write_error.result b/mysql-test/suite/binlog/r/binlog_write_error.result
index 2606a9f40b3..ec1a701bd0e 100644
--- a/mysql-test/suite/binlog/r/binlog_write_error.result
+++ b/mysql-test/suite/binlog/r/binlog_write_error.result
@@ -9,6 +9,7 @@ DROP PROCEDURE IF EXISTS p2;
DROP TRIGGER IF EXISTS tr1;
DROP TRIGGER IF EXISTS tr2;
DROP VIEW IF EXISTS v1, v2;
+call mtr.add_suppression("Write to binary log failed: Error writing file*");
#
# Test injecting binlog write error when executing queries
#
diff --git a/mysql-test/suite/binlog/t/binlog_bug23533.test b/mysql-test/suite/binlog/t/binlog_bug23533.test
index ca610e399e4..a77497115e5 100644
--- a/mysql-test/suite/binlog/t/binlog_bug23533.test
+++ b/mysql-test/suite/binlog/t/binlog_bug23533.test
@@ -6,7 +6,7 @@
--source include/have_innodb.inc
--source include/have_log_bin.inc
--source include/have_binlog_format_row.inc
-
+call mtr.add_suppression("Write to binary log failed: Multi-statement transaction required more than .max_binlog_cache_size.*");
SET AUTOCOMMIT=0;
# Create 1st table
diff --git a/mysql-test/suite/binlog_encryption/binlog_write_error.result b/mysql-test/suite/binlog_encryption/binlog_write_error.result
index 2606a9f40b3..ec1a701bd0e 100644
--- a/mysql-test/suite/binlog_encryption/binlog_write_error.result
+++ b/mysql-test/suite/binlog_encryption/binlog_write_error.result
@@ -9,6 +9,7 @@ DROP PROCEDURE IF EXISTS p2;
DROP TRIGGER IF EXISTS tr1;
DROP TRIGGER IF EXISTS tr2;
DROP VIEW IF EXISTS v1, v2;
+call mtr.add_suppression("Write to binary log failed: Error writing file*");
#
# Test injecting binlog write error when executing queries
#
diff --git a/mysql-test/suite/binlog_encryption/rpl_mixed_binlog_max_cache_size.result b/mysql-test/suite/binlog_encryption/rpl_mixed_binlog_max_cache_size.result
index 388c8e67b68..d302e73f15c 100644
--- a/mysql-test/suite/binlog_encryption/rpl_mixed_binlog_max_cache_size.result
+++ b/mysql-test/suite/binlog_encryption/rpl_mixed_binlog_max_cache_size.result
@@ -1,6 +1,8 @@
include/master-slave.inc
[connection master]
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
+call mtr.add_suppression("Write to binary log failed: Multi-row statements required more than .max_binlog_stmt_cache_size.* ");
+call mtr.add_suppression("Write to binary log failed: Multi-statement transaction required more than .max_binlog_cache_size.* ");
SET GLOBAL max_binlog_cache_size = 4096;
SET GLOBAL binlog_cache_size = 4096;
SET GLOBAL max_binlog_stmt_cache_size = 4096;
diff --git a/mysql-test/suite/rpl/r/rpl_mdev-11092.result b/mysql-test/suite/rpl/r/rpl_mdev-11092.result
index 90b809477b2..ef392762eef 100644
--- a/mysql-test/suite/rpl/r/rpl_mdev-11092.result
+++ b/mysql-test/suite/rpl/r/rpl_mdev-11092.result
@@ -2,6 +2,9 @@ include/master-slave.inc
[connection master]
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
call mtr.add_suppression("Slave SQL: The incident LOST_EVENTS occured on the master. .*");
+call mtr.add_suppression("Write to binary log failed: Multi-row statements required more than .max_binlog_stmt_cache_size.* ");
+call mtr.add_suppression("Write to binary log failed: Multi-statement transaction required more than .max_binlog_cache_size.* ");
+"*********** Annotate Event write failure **************"
SET GLOBAL max_binlog_cache_size = 4096;
SET GLOBAL binlog_cache_size = 4096;
SET GLOBAL max_binlog_stmt_cache_size = 4096;
@@ -10,12 +13,75 @@ disconnect master;
connect master,127.0.0.1,root,,test,$MASTER_MYPORT,;
CREATE TABLE t1(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=MYISAM;
connection master;
-ERROR HY000: Writing one row to the row-based binary log failed
+"#######################################################################"
+"# Test Case1: Annotate event write failure for MyISAM #"
+"#######################################################################"
+ERROR HY000: Multi-row statements required more than 'max_binlog_stmt_cache_size' bytes of storage; increase this mysqld variable and try again
include/wait_for_slave_sql_error_and_skip.inc [errno=1590]
+"#######################################################################"
+"# Test Case2: Annotate event write failure for INNODB #"
+"#######################################################################"
connection master;
+CREATE TABLE t2(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=INNODB;
+ERROR HY000: Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage; increase this mysqld variable and try again
+"*** One row will be present as second row will be rolledback ***"
+SELECT COUNT(*) FROM t2;
+COUNT(*)
+1
+connection slave;
+SELECT COUNT(*) FROM t2;
+COUNT(*)
+1
+"#######################################################################"
+"# Test Case3: Annotate event write failure for mixed engine UPDATE #"
+"#######################################################################"
+connection master;
+ERROR HY000: Multi-row statements required more than 'max_binlog_stmt_cache_size' bytes of storage; increase this mysqld variable and try again
+include/wait_for_slave_sql_error_and_skip.inc [errno=1590]
+connection master;
+"****** Clean up *******"
SET GLOBAL max_binlog_cache_size= ORIGINAL_VALUE;
SET GLOBAL binlog_cache_size= ORIGINAL_VALUE;
SET GLOBAL max_binlog_stmt_cache_size= ORIGINAL_VALUE;
SET GLOBAL binlog_stmt_cache_size= ORIGINAL_VALUE;
-DROP TABLE t1;
+DROP TABLE t1,t2;
+"*********** TABLE MAP Event write failure **************"
+CREATE TABLE tm (f INT) ENGINE=MYISAM;
+CREATE TABLE ti (f INT) ENGINE=INNODB;
+INSERT INTO tm VALUES (10);
+INSERT INTO ti VALUES (20);
+connection slave;
+"#######################################################################"
+"# Test Case4: Table_map event write failure for mixed engine UPDATE #"
+"#######################################################################"
+connection master;
+SET debug_dbug="+d,table_map_write_error";
+"In case of mixed engines if non trans table is updated write INCIDENT event"
+UPDATE ti,tm SET tm.f=88, ti.f=120;
+ERROR HY000: Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage; increase this mysqld variable and try again
+include/wait_for_slave_sql_error_and_skip.inc [errno=1590]
+"On Slave non trans table should be updated one row with f=88 should be found"
+SELECT * FROM tm WHERE f=88;
+f
+88
+"On Innodb table 'ti' no update should be done no row exists with f=120"
+SELECT COUNT(*) FROM ti WHERE f=120;
+COUNT(*)
+0
+"#######################################################################"
+"# Test Case5: Table_map event write failure for trans engine UPDATE #"
+"#######################################################################"
+"Transaction will be rolled back. No incident event is written."
+connection master;
+UPDATE ti, tm set ti.f=30;
+ERROR HY000: Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage; increase this mysqld variable and try again
+"Verify on master that no rows exists with f=30"
+SELECT COUNT(*) FROM ti WHERE f=30;
+COUNT(*)
+0
+connection slave;
+connection master;
+"******** Clean Up **********"
+SET GLOBAL debug_dbug = '';
+DROP TABLE tm,ti;
include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_mixed_binlog_max_cache_size.result b/mysql-test/suite/rpl/r/rpl_mixed_binlog_max_cache_size.result
index 388c8e67b68..d302e73f15c 100644
--- a/mysql-test/suite/rpl/r/rpl_mixed_binlog_max_cache_size.result
+++ b/mysql-test/suite/rpl/r/rpl_mixed_binlog_max_cache_size.result
@@ -1,6 +1,8 @@
include/master-slave.inc
[connection master]
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
+call mtr.add_suppression("Write to binary log failed: Multi-row statements required more than .max_binlog_stmt_cache_size.* ");
+call mtr.add_suppression("Write to binary log failed: Multi-statement transaction required more than .max_binlog_cache_size.* ");
SET GLOBAL max_binlog_cache_size = 4096;
SET GLOBAL binlog_cache_size = 4096;
SET GLOBAL max_binlog_stmt_cache_size = 4096;
diff --git a/mysql-test/suite/rpl/r/rpl_stm_binlog_max_cache_size.result b/mysql-test/suite/rpl/r/rpl_stm_binlog_max_cache_size.result
index 388c8e67b68..d302e73f15c 100644
--- a/mysql-test/suite/rpl/r/rpl_stm_binlog_max_cache_size.result
+++ b/mysql-test/suite/rpl/r/rpl_stm_binlog_max_cache_size.result
@@ -1,6 +1,8 @@
include/master-slave.inc
[connection master]
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
+call mtr.add_suppression("Write to binary log failed: Multi-row statements required more than .max_binlog_stmt_cache_size.* ");
+call mtr.add_suppression("Write to binary log failed: Multi-statement transaction required more than .max_binlog_cache_size.* ");
SET GLOBAL max_binlog_cache_size = 4096;
SET GLOBAL binlog_cache_size = 4096;
SET GLOBAL max_binlog_stmt_cache_size = 4096;
diff --git a/mysql-test/suite/rpl/t/rpl_mdev-11092.test b/mysql-test/suite/rpl/t/rpl_mdev-11092.test
index 31a385b40e6..6e130db37d8 100644
--- a/mysql-test/suite/rpl/t/rpl_mdev-11092.test
+++ b/mysql-test/suite/rpl/t/rpl_mdev-11092.test
@@ -1,3 +1,4 @@
+--source include/have_debug.inc
--source include/have_innodb.inc
--source include/not_embedded.inc
--source include/not_windows.inc
@@ -7,12 +8,16 @@
########################################################################################
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
call mtr.add_suppression("Slave SQL: The incident LOST_EVENTS occured on the master. .*");
+call mtr.add_suppression("Write to binary log failed: Multi-row statements required more than .max_binlog_stmt_cache_size.* ");
+call mtr.add_suppression("Write to binary log failed: Multi-statement transaction required more than .max_binlog_cache_size.* ");
let $old_max_binlog_cache_size= query_get_value(SHOW VARIABLES LIKE "max_binlog_cache_size", Value, 1);
let $old_binlog_cache_size= query_get_value(SHOW VARIABLES LIKE "binlog_cache_size", Value, 1);
let $old_max_binlog_stmt_cache_size= query_get_value(SHOW VARIABLES LIKE "max_binlog_stmt_cache_size", Value, 1);
let $old_binlog_stmt_cache_size= query_get_value(SHOW VARIABLES LIKE "binlog_stmt_cache_size", Value, 1);
+--echo "*********** Annotate Event write failure **************"
+
SET GLOBAL max_binlog_cache_size = 4096;
SET GLOBAL binlog_cache_size = 4096;
SET GLOBAL max_binlog_stmt_cache_size = 4096;
@@ -26,8 +31,19 @@ let $data = `select concat('"', repeat('a',2000), '"')`;
connection master;
+# Insert a huge row into MyISAM table. The row will be inserted in engine and a
+# request to write to binary log will be initiated. Since row annotations are
+# enabled the size of the annotate event itself will exceed the
+# "max_binlog_stmt_cache_size". This will result in ER_STMT_CACHE_FULL error
+# and an incident event will be written to the binary log as row update in
+# engine cannot be undone.
+
+--echo "#######################################################################"
+--echo "# Test Case1: Annotate event write failure for MyISAM #"
+--echo "#######################################################################"
+
--disable_query_log
---error ER_BINLOG_ROW_LOGGING_FAILED
+--error ER_STMT_CACHE_FULL
eval INSERT INTO t1 (a, data) VALUES (2,
CONCAT($data, $data, $data, $data, $data, $data));
--enable_query_log
@@ -37,8 +53,59 @@ eval INSERT INTO t1 (a, data) VALUES (2,
--let $slave_sql_errno= 1590
--source include/wait_for_slave_sql_error_and_skip.inc
-connection master;
+# MDEV-21087
+# Insert two huge rows in to transaction cache. Have data such that first row
+# fits inside the binary log cache. While writing the annotate event for the
+# second row the binary log cache size will exceed "max_binlog_cache_size".
+# Hence this statement cannot be written to binary log. As DMLs in Innodb can
+# be safely rolled back only an error will be reported. Slave will continue to
+# work.
+
+--echo "#######################################################################"
+--echo "# Test Case2: Annotate event write failure for INNODB #"
+--echo "#######################################################################"
+
+--connection master
+CREATE TABLE t2(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=INNODB;
+--disable_query_log
+BEGIN;
+eval INSERT INTO t2 (a, data) VALUES (1, CONCAT($data, $data));
+--error ER_TRANS_CACHE_FULL
+eval INSERT INTO t2 (a, data) VALUES (2, CONCAT($data, $data));
+COMMIT;
+--enable_query_log
+
+--echo "*** One row will be present as second row will be rolledback ***"
+SELECT COUNT(*) FROM t2;
+--sync_slave_with_master
+SELECT COUNT(*) FROM t2;
+# Testing mixed engine UPDATE statement scenario. In the following multi
+# update query 'ha_update_row' will be invoked for t1 (myisam) table. This
+# intern invokes binlog_write_table_map() function call. While writing a huge
+# annotate event binary log cache size will exceed max_binlog_cache_size.
+# Writing to binary log fails. Since non transactional changes cannot be
+# rolled back incident event will be written to binary log.
+
+--echo "#######################################################################"
+--echo "# Test Case3: Annotate event write failure for mixed engine UPDATE #"
+--echo "#######################################################################"
+
+--connection master
+let $new_data = `select concat('"', repeat('b',2000), '"')`;
+--disable_query_log
+--error ER_STMT_CACHE_FULL
+eval UPDATE t1,t2 SET t1.data="Hello", t2.data=CONCAT($new_data,$new_data,$new_data,$new_data,$new_data);
+--enable_query_log
+
+# Incident event
+# 1590=ER_SLAVE_INCIDENT
+--let $slave_sql_errno= 1590
+--source include/wait_for_slave_sql_error_and_skip.inc
+
+--connection master
+
+--echo "****** Clean up *******"
--replace_result $old_max_binlog_cache_size ORIGINAL_VALUE
--eval SET GLOBAL max_binlog_cache_size= $old_max_binlog_cache_size
--replace_result $old_binlog_cache_size ORIGINAL_VALUE
@@ -48,6 +115,52 @@ connection master;
--replace_result $old_binlog_stmt_cache_size ORIGINAL_VALUE
--eval SET GLOBAL binlog_stmt_cache_size= $old_binlog_stmt_cache_size
-DROP TABLE t1;
+DROP TABLE t1,t2;
+
+--echo "*********** TABLE MAP Event write failure **************"
+
+--let $debug_save= `SELECT @@GLOBAL.debug_dbug`
+CREATE TABLE tm (f INT) ENGINE=MYISAM;
+CREATE TABLE ti (f INT) ENGINE=INNODB;
+INSERT INTO tm VALUES (10);
+INSERT INTO ti VALUES (20);
+--sync_slave_with_master
+
+--echo "#######################################################################"
+--echo "# Test Case4: Table_map event write failure for mixed engine UPDATE #"
+--echo "#######################################################################"
+--connection master
+SET debug_dbug="+d,table_map_write_error";
+--echo "In case of mixed engines if non trans table is updated write INCIDENT event"
+--error ER_TRANS_CACHE_FULL
+UPDATE ti,tm SET tm.f=88, ti.f=120;
+
+# Incident event
+# 1590=ER_SLAVE_INCIDENT
+--let $slave_sql_errno= 1590
+--source include/wait_for_slave_sql_error_and_skip.inc
+
+--echo "On Slave non trans table should be updated one row with f=88 should be found"
+SELECT * FROM tm WHERE f=88;
+
+--echo "On Innodb table 'ti' no update should be done no row exists with f=120"
+SELECT COUNT(*) FROM ti WHERE f=120;
+
+--echo "#######################################################################"
+--echo "# Test Case5: Table_map event write failure for trans engine UPDATE #"
+--echo "#######################################################################"
+--echo "Transaction will be rolled back. No incident event is written."
+--connection master
+--error ER_TRANS_CACHE_FULL
+UPDATE ti, tm set ti.f=30;
+--echo "Verify on master that no rows exists with f=30"
+SELECT COUNT(*) FROM ti WHERE f=30;
+
+--sync_slave_with_master
+
+--connection master
+--echo "******** Clean Up **********"
+--eval SET GLOBAL debug_dbug = '$debug_save'
+DROP TABLE tm,ti;
--source include/rpl_end.inc
diff --git a/sql/handler.cc b/sql/handler.cc
index 914a4dc07b1..6cd5ab15b36 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -5863,7 +5863,27 @@ static int write_locked_table_maps(THD *thd)
continue;
TABLE **const end_ptr= lock->table + lock->table_count;
- for (TABLE **table_ptr= lock->table ;
+ bool stmt_modified_non_trans_table= false;
+ /*
+ Iterate through list of tables and identify if multi statement
+ transaction modifies any non transactional table. Set the
+ 'stmt_modified_non_trans_table' flag to 'true'. In case if there a
+ failure at the time of writing annotate/table_map event into the binary
+ log this flag will enable server to write an incident event into the
+ binary log. This additional flag is required as
+ 'thd->transaction.stmt.modified_non_trans_table' will not available at
+ the time of writing table_map/annotate event.
+ */
+ for (TABLE **table_ptr= lock->table; table_ptr != end_ptr ; ++table_ptr)
+ {
+ TABLE *const table= *table_ptr;
+ if (!table->file->has_transactions() && table->current_lock == F_WRLCK)
+ {
+ stmt_modified_non_trans_table=true;
+ break;
+ }
+ }
+ for (TABLE **table_ptr= lock->table ;
table_ptr != end_ptr ;
++table_ptr)
{
@@ -5887,8 +5907,10 @@ static int write_locked_table_maps(THD *thd)
*/
bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE ||
table->file->has_transactions();
+
int const error= thd->binlog_write_table_map(table, has_trans,
- &with_annotate);
+ &with_annotate,
+ stmt_modified_non_trans_table);
/*
If an error occurs, it is the responsibility of the caller to
roll back the transaction.
diff --git a/sql/log.cc b/sql/log.cc
index 5fd384d55a0..a82808bc7b7 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -5673,14 +5673,14 @@ binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
}
/**
- This function writes a table map to the binary log.
+ This function writes a table map to the binary log.
Note that in order to keep the signature uniform with related methods,
we use a redundant parameter to indicate whether a transactional table
was changed or not.
If with_annotate != NULL and
*with_annotate = TRUE write also Annotate_rows before the table map.
-
+
@param table a pointer to the table.
@param is_transactional @c true indicates a transactional table,
otherwise @c false a non-transactional.
@@ -5688,7 +5688,8 @@ binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
nonzero if an error pops up when writing the table map event.
*/
int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
- my_bool *with_annotate)
+ my_bool *with_annotate,
+ bool stmt_modified_non_trans_table)
{
int error;
DBUG_ENTER("THD::binlog_write_table_map");
@@ -5699,7 +5700,7 @@ int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
/* Ensure that all events in a GTID group are in the same cache */
if (variables.option_bits & OPTION_GTID_BEGIN)
is_transactional= 1;
-
+
/* Pre-conditions */
DBUG_ASSERT(is_current_stmt_binlog_format_row());
DBUG_ASSERT(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open());
@@ -5726,17 +5727,34 @@ int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
/* Annotate event should be written not more than once */
*with_annotate= 0;
if ((error= writer.write(&anno)))
- {
- if (my_errno == EFBIG)
- cache_data->set_incident();
- DBUG_RETURN(error);
- }
+ goto write_err;
}
+ DBUG_EXECUTE_IF("table_map_write_error",
+ {
+ if (is_transactional)
+ {
+ my_errno= EFBIG;
+ error= 1;
+ goto write_err;
+ }
+ });
if ((error= writer.write(&the_event)))
- DBUG_RETURN(error);
+ goto write_err;
- binlog_table_maps++;
- DBUG_RETURN(0);
+ binlog_table_maps++;
+ DBUG_RETURN(0);
+
+write_err:
+ mysql_bin_log.set_write_error(this, is_transactional);
+ /*
+ For non-transactional engine or multi statement transaction with mixed
+ engines, data is written to table but writing to binary log failed. In
+ these scenarios rollback is not possible. Hence report an incident.
+ */
+ if (mysql_bin_log.check_write_error(this) && cache_data &&
+ stmt_modified_non_trans_table)
+ cache_data->set_incident();
+ DBUG_RETURN(error);
}
/**
@@ -7198,6 +7216,18 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd)
mysql_mutex_unlock(&LOCK_log);
}
+ /*
+ Upon writing incident event, check for thd->error() and print the
+ relevant error message in the error log.
+ */
+ if (!error && thd->is_error())
+ {
+ sql_print_error("Write to binary log failed: "
+ "%s. An incident event is written to binary log "
+ "and slave will be stopped.\n",
+ thd->get_stmt_da()->message());
+ }
+
DBUG_RETURN(error);
}
diff --git a/sql/sql_class.h b/sql/sql_class.h
index b35f9a93238..133bc9de607 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -2423,7 +2423,8 @@ class THD :public Statement,
void binlog_start_trans_and_stmt();
void binlog_set_stmt_begin();
int binlog_write_table_map(TABLE *table, bool is_transactional,
- my_bool *with_annotate= 0);
+ my_bool *with_annotate= 0,
+ bool stmt_modified_non_trans_table= false);
int binlog_write_row(TABLE* table, bool is_transactional,
const uchar *buf);
int binlog_delete_row(TABLE* table, bool is_transactional,
1
0

[Commits] fc34657511a: MDEV-21318: Wrong results with window functions and implicit grouping
by Varun 17 Dec '19
by Varun 17 Dec '19
17 Dec '19
revision-id: fc34657511a9aa08dd92f7363dc53f58934f9673 (mariadb-10.2.29-62-gfc34657511a)
parent(s): f0aa073f2bf3d8d85b3d028df89cdb4cdfc4002d
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-12-17 16:40:06 +0530
message:
MDEV-21318: Wrong results with window functions and implicit grouping
The issue here is for degenerate joins we should execute the window
function but it is not getting executed in all the cases.
To get the window function values window function needs to be executed
always. This currently does not happen in few cases
where the join would return 0 or 1 row like
1) IMPOSSIBLE WHERE
2) IMPOSSIBLE HAVING
3) MIN/MAX optimization
4) EMPTY CONST TABLE
5) ZERO LIMIT
The fix is to make sure that window functions get executed
and the temporary table is setup for the execution of window functions
---
mysql-test/r/win.result | 51 ++++++++++++++++++++++++++++++++++++++++++++
mysql-test/t/win.test | 56 +++++++++++++++++++++++++++++++++++++++++++++++++
sql/sql_select.cc | 33 ++++++++++++++++++++++++++++-
sql/sql_select.h | 1 +
4 files changed, 140 insertions(+), 1 deletion(-)
diff --git a/mysql-test/r/win.result b/mysql-test/r/win.result
index 805fd2ed3d7..de6e0e5afbb 100644
--- a/mysql-test/r/win.result
+++ b/mysql-test/r/win.result
@@ -3643,5 +3643,56 @@ x
foo
drop table t1;
#
+# MDEV-21318: Wrong results with window functions and implicit grouping
+#
+CREATE TABLE t1 (a INT);
+#
+# With empty table
+# The expected result here is 1, NULL
+#
+SELECT row_number() over(), sum(1) FROM t1 where a=1;
+row_number() over() sum(1)
+1 NULL
+insert into t1 values (2);
+#
+# Const table has 1 row, but still impossible where
+# The expected result here is 1, NULL
+#
+SELECT row_number() over(), sum(1) FROM t1 where a=1;
+row_number() over() sum(1)
+1 NULL
+#
+# Impossible HAVING
+# Empty result is expected
+#
+SELECT row_number() over(), sum(1) FROM t1 where a=1 having 1=0;
+row_number() over() sum(1)
+#
+# const table has 1 row, no impossible where
+# The expected result here is 1, 2
+#
+SELECT row_number() over(), sum(a) FROM t1 where a=2;
+row_number() over() sum(a)
+1 2
+drop table t1;
+#
+# Impossible Where
+#
+create table t1(a int);
+insert into t1 values (1);
+#
+# Expected result is NULL, 0, NULL
+#
+SELECT MAX(a) OVER (), COUNT(a), abs(a) FROM t1 WHERE FALSE;
+MAX(a) OVER () COUNT(a) abs(a)
+NULL 0 NULL
+#
+# Expected result is 1, 0, NULL
+#
+SELECT MAX(1) OVER (), COUNT(a), abs(a) FROM t1 WHERE FALSE;
+MAX(1) OVER () COUNT(a) abs(a)
+1 0 NULL
+drop table t1;
+#
# End of 10.2 tests
#
diff --git a/mysql-test/t/win.test b/mysql-test/t/win.test
index 0f79834567b..9bc867cea7f 100644
--- a/mysql-test/t/win.test
+++ b/mysql-test/t/win.test
@@ -2351,6 +2351,62 @@ INSERT INTO t1 VALUES (1),(2),(3);
SELECT (SELECT MIN('foo') OVER() FROM t1 LIMIT 1) as x;
drop table t1;
+--echo #
+--echo # MDEV-21318: Wrong results with window functions and implicit grouping
+--echo #
+
+CREATE TABLE t1 (a INT);
+
+--echo #
+--echo # With empty table
+--echo # The expected result here is 1, NULL
+--echo #
+
+SELECT row_number() over(), sum(1) FROM t1 where a=1;
+insert into t1 values (2);
+
+--echo #
+--echo # Const table has 1 row, but still impossible where
+--echo # The expected result here is 1, NULL
+--echo #
+
+SELECT row_number() over(), sum(1) FROM t1 where a=1;
+
+--echo #
+--echo # Impossible HAVING
+--echo # Empty result is expected
+--echo #
+
+SELECT row_number() over(), sum(1) FROM t1 where a=1 having 1=0;
+
+--echo #
+--echo # const table has 1 row, no impossible where
+--echo # The expected result here is 1, 2
+--echo #
+
+SELECT row_number() over(), sum(a) FROM t1 where a=2;
+drop table t1;
+
+--echo #
+--echo # Impossible Where
+--echo #
+
+create table t1(a int);
+insert into t1 values (1);
+
+--echo #
+--echo # Expected result is NULL, 0, NULL
+--echo #
+SELECT MAX(a) OVER (), COUNT(a), abs(a) FROM t1 WHERE FALSE;
+
+--echo #
+--echo # Expected result is 1, 0, NULL
+--echo #
+
+SELECT MAX(1) OVER (), COUNT(a), abs(a) FROM t1 WHERE FALSE;
+
+drop table t1;
+
--echo #
--echo # End of 10.2 tests
--echo #
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index c9cb533aa33..f6943b18cee 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1447,6 +1447,7 @@ JOIN::optimize_inner()
zero_result_cause= "Zero limit";
}
table_count= top_join_tab_count= 0;
+ implicit_grouping_with_window_funcs();
error= 0;
goto setup_subq_exit;
}
@@ -1502,6 +1503,7 @@ JOIN::optimize_inner()
zero_result_cause= "No matching min/max row";
table_count= top_join_tab_count= 0;
error=0;
+ implicit_grouping_with_window_funcs();
goto setup_subq_exit;
}
if (res > 1)
@@ -1517,6 +1519,7 @@ JOIN::optimize_inner()
tables_list= 0; // All tables resolved
select_lex->min_max_opt_list.empty();
const_tables= top_join_tab_count= table_count;
+ implicit_grouping_with_window_funcs();
/*
Extract all table-independent conditions and replace the WHERE
clause with them. All other conditions were computed by opt_sum_query
@@ -1615,6 +1618,7 @@ JOIN::optimize_inner()
zero_result_cause= "no matching row in const table";
DBUG_PRINT("error",("Error: %s", zero_result_cause));
error= 0;
+ implicit_grouping_with_window_funcs();
goto setup_subq_exit;
}
if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
@@ -1639,6 +1643,7 @@ JOIN::optimize_inner()
zero_result_cause=
"Impossible WHERE noticed after reading const tables";
select_lex->mark_const_derived(zero_result_cause);
+ implicit_grouping_with_window_funcs();
goto setup_subq_exit;
}
@@ -1781,6 +1786,7 @@ JOIN::optimize_inner()
zero_result_cause=
"Impossible WHERE noticed after reading const tables";
select_lex->mark_const_derived(zero_result_cause);
+ implicit_grouping_with_window_funcs();
goto setup_subq_exit;
}
@@ -18225,7 +18231,8 @@ void set_postjoin_aggr_write_func(JOIN_TAB *tab)
}
}
else if (join->sort_and_group && !tmp_tbl->precomputed_group_by &&
- !join->sort_and_group_aggr_tab && join->tables_list)
+ !join->sort_and_group_aggr_tab && join->tables_list &&
+ join->top_join_tab_count)
{
DBUG_PRINT("info",("Using end_write_group"));
aggr->set_write_func(end_write_group);
@@ -26924,6 +26931,30 @@ Item *remove_pushed_top_conjuncts(THD *thd, Item *cond)
return cond;
}
+/*
+ There are 5 cases in which we shortcut the join optimization process as we
+ conclude that the join would be a degenerate one
+ 1) IMPOSSIBLE WHERE
+ 2) IMPOSSIBLE HAVING
+ 3) MIN/MAX optimization (@see opt_sum_query)
+ 4) EMPTY CONST TABLE
+ 5) ZERO LIMIT
+ If a window function is present in any of the above cases then to get the
+ result of the window function, we need to execute it. So we need to
+ create a temporary table for its execution. Here we need to take in mind
+ that aggregate functions and non-aggregate function need not be executed.
+
+*/
+
+
+void JOIN::implicit_grouping_with_window_funcs()
+{
+ if (select_lex->have_window_funcs() && send_row_on_empty_set())
+ {
+ const_tables= top_join_tab_count= table_count= 0;
+ }
+}
+
/**
@} (end of group Query_Optimizer)
*/
diff --git a/sql/sql_select.h b/sql/sql_select.h
index fe44f448446..74e8ef4698b 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -1057,6 +1057,7 @@ class JOIN :public Sql_alloc
void restore_query_plan(Join_plan_state *restore_from);
/* Choose a subquery plan for a table-less subquery. */
bool choose_tableless_subquery_plan();
+ void implicit_grouping_with_window_funcs();
public:
JOIN_TAB *join_tab, **best_ref;
1
0

[Commits] 0e04bccc19a: MDEV-21318: Wrong results with window functions and implicit grouping
by Varun 17 Dec '19
by Varun 17 Dec '19
17 Dec '19
revision-id: 0e04bccc19adf9b9cdc2ae6461e3e707d26cd62d (mariadb-10.2.29-62-g0e04bccc19a)
parent(s): f0aa073f2bf3d8d85b3d028df89cdb4cdfc4002d
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-12-17 15:53:09 +0530
message:
MDEV-21318: Wrong results with window functions and implicit grouping
The issue here is for degenerate joins we should execute the window
function but it is not getting executed in all the cases.
To get the window function values window function needs to be executed
always. This currently does not happen in few cases
where the join would return 0 or 1 row like
1) IMPOSSIBLE WHERE
2) IMPOSSIBLE HAVING
3) MIN/MAX optimization
4) EMPTY CONST TABLE
5) ZERO LIMIT
The fix is to make sure that window functions get executed
and the temporary table is setup for the execution of window functions
---
mysql-test/r/win.result | 51 ++++++++++++++++++++++++++++++++++++++++++++
mysql-test/t/win.test | 56 +++++++++++++++++++++++++++++++++++++++++++++++++
sql/sql_select.cc | 32 +++++++++++++++++++++++++++-
sql/sql_select.h | 1 +
4 files changed, 139 insertions(+), 1 deletion(-)
diff --git a/mysql-test/r/win.result b/mysql-test/r/win.result
index 805fd2ed3d7..de6e0e5afbb 100644
--- a/mysql-test/r/win.result
+++ b/mysql-test/r/win.result
@@ -3643,5 +3643,56 @@ x
foo
drop table t1;
#
+# MDEV-21318: Wrong results with window functions and implicit grouping
+#
+CREATE TABLE t1 (a INT);
+#
+# With empty table
+# The expected result here is 1, NULL
+#
+SELECT row_number() over(), sum(1) FROM t1 where a=1;
+row_number() over() sum(1)
+1 NULL
+insert into t1 values (2);
+#
+# Const table has 1 row, but still impossible where
+# The expected result here is 1, NULL
+#
+SELECT row_number() over(), sum(1) FROM t1 where a=1;
+row_number() over() sum(1)
+1 NULL
+#
+# Impossible HAVING
+# Empty result is expected
+#
+SELECT row_number() over(), sum(1) FROM t1 where a=1 having 1=0;
+row_number() over() sum(1)
+#
+# const table has 1 row, no impossible where
+# The expected result here is 1, 2
+#
+SELECT row_number() over(), sum(a) FROM t1 where a=2;
+row_number() over() sum(a)
+1 2
+drop table t1;
+#
+# Impossible Where
+#
+create table t1(a int);
+insert into t1 values (1);
+#
+# Expected result is NULL, 0, NULL
+#
+SELECT MAX(a) OVER (), COUNT(a), abs(a) FROM t1 WHERE FALSE;
+MAX(a) OVER () COUNT(a) abs(a)
+NULL 0 NULL
+#
+# Expected result is 1, 0, NULL
+#
+SELECT MAX(1) OVER (), COUNT(a), abs(a) FROM t1 WHERE FALSE;
+MAX(1) OVER () COUNT(a) abs(a)
+1 0 NULL
+drop table t1;
+#
# End of 10.2 tests
#
diff --git a/mysql-test/t/win.test b/mysql-test/t/win.test
index 0f79834567b..9bc867cea7f 100644
--- a/mysql-test/t/win.test
+++ b/mysql-test/t/win.test
@@ -2351,6 +2351,62 @@ INSERT INTO t1 VALUES (1),(2),(3);
SELECT (SELECT MIN('foo') OVER() FROM t1 LIMIT 1) as x;
drop table t1;
+--echo #
+--echo # MDEV-21318: Wrong results with window functions and implicit grouping
+--echo #
+
+CREATE TABLE t1 (a INT);
+
+--echo #
+--echo # With empty table
+--echo # The expected result here is 1, NULL
+--echo #
+
+SELECT row_number() over(), sum(1) FROM t1 where a=1;
+insert into t1 values (2);
+
+--echo #
+--echo # Const table has 1 row, but still impossible where
+--echo # The expected result here is 1, NULL
+--echo #
+
+SELECT row_number() over(), sum(1) FROM t1 where a=1;
+
+--echo #
+--echo # Impossible HAVING
+--echo # Empty result is expected
+--echo #
+
+SELECT row_number() over(), sum(1) FROM t1 where a=1 having 1=0;
+
+--echo #
+--echo # const table has 1 row, no impossible where
+--echo # The expected result here is 1, 2
+--echo #
+
+SELECT row_number() over(), sum(a) FROM t1 where a=2;
+drop table t1;
+
+--echo #
+--echo # Impossible Where
+--echo #
+
+create table t1(a int);
+insert into t1 values (1);
+
+--echo #
+--echo # Expected result is NULL, 0, NULL
+--echo #
+SELECT MAX(a) OVER (), COUNT(a), abs(a) FROM t1 WHERE FALSE;
+
+--echo #
+--echo # Expected result is 1, 0, NULL
+--echo #
+
+SELECT MAX(1) OVER (), COUNT(a), abs(a) FROM t1 WHERE FALSE;
+
+drop table t1;
+
--echo #
--echo # End of 10.2 tests
--echo #
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index c9cb533aa33..56865c51906 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1447,6 +1447,7 @@ JOIN::optimize_inner()
zero_result_cause= "Zero limit";
}
table_count= top_join_tab_count= 0;
+ implicit_grouping_with_window_funcs();
error= 0;
goto setup_subq_exit;
}
@@ -1502,6 +1503,7 @@ JOIN::optimize_inner()
zero_result_cause= "No matching min/max row";
table_count= top_join_tab_count= 0;
error=0;
+ implicit_grouping_with_window_funcs();
goto setup_subq_exit;
}
if (res > 1)
@@ -1615,6 +1617,7 @@ JOIN::optimize_inner()
zero_result_cause= "no matching row in const table";
DBUG_PRINT("error",("Error: %s", zero_result_cause));
error= 0;
+ implicit_grouping_with_window_funcs();
goto setup_subq_exit;
}
if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
@@ -1639,6 +1642,7 @@ JOIN::optimize_inner()
zero_result_cause=
"Impossible WHERE noticed after reading const tables";
select_lex->mark_const_derived(zero_result_cause);
+ implicit_grouping_with_window_funcs();
goto setup_subq_exit;
}
@@ -1781,6 +1785,7 @@ JOIN::optimize_inner()
zero_result_cause=
"Impossible WHERE noticed after reading const tables";
select_lex->mark_const_derived(zero_result_cause);
+ implicit_grouping_with_window_funcs();
goto setup_subq_exit;
}
@@ -18225,7 +18230,8 @@ void set_postjoin_aggr_write_func(JOIN_TAB *tab)
}
}
else if (join->sort_and_group && !tmp_tbl->precomputed_group_by &&
- !join->sort_and_group_aggr_tab && join->tables_list)
+ !join->sort_and_group_aggr_tab && join->tables_list &&
+ join->top_join_tab_count)
{
DBUG_PRINT("info",("Using end_write_group"));
aggr->set_write_func(end_write_group);
@@ -26924,6 +26930,30 @@ Item *remove_pushed_top_conjuncts(THD *thd, Item *cond)
return cond;
}
+/*
+ There are 4 cases in which we shortcut the join optimization process as we
+ conclude that the join would be a degenerate one
+ 1) IMPOSSIBLE WHERE
+ 2) IMPOSSIBLE HAVING
+ 3) MIN/MAX optimzation (@see opt_sum_query)
+ 4) EMPTY CONST TABLE
+ 5) ZERO LIMIT
+ If a window function is present in any of the above cases then to get the
+ result of the window function, we need to execute it. So we need to
+ create a temporary table for its execution. Here we need to take in mind
+ that aggregate functions and non-aggregate function need not be executed.
+
+*/
+
+
+void JOIN::implicit_grouping_with_window_funcs()
+{
+ if (select_lex->have_window_funcs() && send_row_on_empty_set())
+ {
+ const_tables= top_join_tab_count= table_count= 0;
+ }
+}
+
/**
@} (end of group Query_Optimizer)
*/
diff --git a/sql/sql_select.h b/sql/sql_select.h
index fe44f448446..74e8ef4698b 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -1057,6 +1057,7 @@ class JOIN :public Sql_alloc
void restore_query_plan(Join_plan_state *restore_from);
/* Choose a subquery plan for a table-less subquery. */
bool choose_tableless_subquery_plan();
+ void implicit_grouping_with_window_funcs();
public:
JOIN_TAB *join_tab, **best_ref;
1
0

[Commits] 6c9919baaf1: Extend the join prefix to ensure all tables in the duplicate range either are inside or outside the sort nest
by Varun 17 Dec '19
by Varun 17 Dec '19
17 Dec '19
revision-id: 6c9919baaf1e45830cd10f7c4d139c0746c2fc30 (mariadb-10.4.4-377-g6c9919baaf1)
parent(s): 4512fe80b9b0cdc6d540f85b3d37934b0edd4bba
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-12-17 03:09:48 +0530
message:
Extend the join prefix to ensure all tables in the duplicate range either are inside or outside the sort nest
---
mysql-test/main/sort_nest.result | 73 ++++++++++++++++++++++++++++++++++++++++
mysql-test/main/sort_nest.test | 46 +++++++++++++++++++++++++
sql/opt_subselect.cc | 13 +++++++
sql/sql_select.cc | 9 ++---
sql/sql_select.h | 14 ++++++--
sql/sql_sort_nest.cc | 44 ++++++++++++++++++++++--
6 files changed, 188 insertions(+), 11 deletions(-)
diff --git a/mysql-test/main/sort_nest.result b/mysql-test/main/sort_nest.result
index 08e5b557a2f..a9de5b8b156 100644
--- a/mysql-test/main/sort_nest.result
+++ b/mysql-test/main/sort_nest.result
@@ -2919,3 +2919,76 @@ a b a b
1 1 1 1
0 0 0 0
DROP TABLE t0,t1,t2,t3;
+#
+# Ensure all tables in Duplicate weedout range are either inside
+# or outside the sort nest
+#
+CREATE TABLE t0(a int);
+CREATE TABLE t1 (a int, b int, c int, key(b));
+CREATE TABLE t2 (a int, b int, c int);
+CREATE TABLE t3 (a int, b int, c int, key(a));
+CREATE TABLE t4 (a int, b int, c int, key(a));
+INSERT INTO t0 SELECT seq-1 FROM seq_1_to_10;
+INSERT INTO t1 SELECT seq-1, seq-1, seq-1 FROM seq_1_to_100;
+INSERT INTO t2 SELECT a,a,a FROM t0;
+INSERT INTO t2 (a,b,c) values (9,9,9);
+INSERT INTO t3 SELECT a,a,a FROM t0;
+INSERT INTO t4 SELECT a,a,a FROM t0;
+ANALYZE TABLE t0 PERSISTENT FOR ALL;
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+ANALYZE TABLE t2 PERSISTENT FOR ALL;
+ANALYZE TABLE t3 PERSISTENT FOR ALL;
+ANALYZE TABLE t4 PERSISTENT FOR ALL;
+set optimizer_switch='cost_based_order_by_limit=on';
+EXPLAIN SELECT t1.a, t2.a, t1.b,t2.b
+FROM t1, t2
+WHERE t1.a=t2.a AND
+t1.b IN (SELECT t3.b FROM t3,t4
+WHERE t3.c < 10 AND t4.a=t2.b)
+ORDER BY t1.b DESC
+LIMIT 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t3 ALL NULL NULL NULL NULL 10 Using where; Start temporary; Using temporary; Using filesort
+1 PRIMARY t1 ref b b 5 test.t3.b 1
+1 PRIMARY t2 ALL NULL NULL NULL NULL 11 Using where; Using join buffer (flat, BNL join)
+1 PRIMARY t4 ref a a 5 test.t2.b 1 Using index; End temporary
+SELECT t1.a, t2.a, t1.b,t2.b
+FROM t1, t2
+WHERE t1.a=t2.a AND
+t1.b IN (SELECT t3.b FROM t3,t4
+WHERE t3.c < 10 AND t4.a=t2.b)
+ORDER BY t1.b DESC
+LIMIT 5;
+a a b b
+9 9 9 9
+9 9 9 9
+8 8 8 8
+7 7 7 7
+6 6 6 6
+set optimizer_switch='cost_based_order_by_limit=off';
+EXPLAIN SELECT t1.a, t2.a, t1.b,t2.b
+FROM t1, t2
+WHERE t1.a=t2.a AND
+t1.b IN (SELECT t3.b FROM t3,t4
+WHERE t3.c < 10 AND t4.a=t2.b)
+ORDER BY t1.b DESC
+LIMIT 5;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t3 ALL NULL NULL NULL NULL 10 Using where; Start temporary; Using temporary; Using filesort
+1 PRIMARY t1 ref b b 5 test.t3.b 1
+1 PRIMARY t2 ALL NULL NULL NULL NULL 11 Using where; Using join buffer (flat, BNL join)
+1 PRIMARY t4 ref a a 5 test.t2.b 1 Using index; End temporary
+SELECT t1.a, t2.a, t1.b,t2.b
+FROM t1, t2
+WHERE t1.a=t2.a AND
+t1.b IN (SELECT t3.b FROM t3,t4
+WHERE t3.c < 10 AND t4.a=t2.b)
+ORDER BY t1.b DESC
+LIMIT 5;
+a a b b
+9 9 9 9
+9 9 9 9
+8 8 8 8
+7 7 7 7
+6 6 6 6
+drop table t0,t1,t2,t3,t4;
diff --git a/mysql-test/main/sort_nest.test b/mysql-test/main/sort_nest.test
index ef50167d412..952d0bc8c5a 100644
--- a/mysql-test/main/sort_nest.test
+++ b/mysql-test/main/sort_nest.test
@@ -1177,3 +1177,49 @@ eval EXPLAIN FORMAT=JSON $query;
eval $query;
DROP TABLE t0,t1,t2,t3;
+
+--echo #
+--echo # Ensure all tables in Duplicate weedout range are either inside
+--echo # or outside the sort nest
+--echo #
+
+CREATE TABLE t0(a int);
+CREATE TABLE t1 (a int, b int, c int, key(b));
+CREATE TABLE t2 (a int, b int, c int);
+CREATE TABLE t3 (a int, b int, c int, key(a));
+CREATE TABLE t4 (a int, b int, c int, key(a));
+
+INSERT INTO t0 SELECT seq-1 FROM seq_1_to_10;
+INSERT INTO t1 SELECT seq-1, seq-1, seq-1 FROM seq_1_to_100;
+INSERT INTO t2 SELECT a,a,a FROM t0;
+INSERT INTO t2 (a,b,c) values (9,9,9);
+INSERT INTO t3 SELECT a,a,a FROM t0;
+INSERT INTO t4 SELECT a,a,a FROM t0;
+
+--disable_result_log
+ANALYZE TABLE t0 PERSISTENT FOR ALL;
+ANALYZE TABLE t1 PERSISTENT FOR ALL;
+ANALYZE TABLE t2 PERSISTENT FOR ALL;
+ANALYZE TABLE t3 PERSISTENT FOR ALL;
+ANALYZE TABLE t4 PERSISTENT FOR ALL;
+--enable_result_log
+
+let $query= SELECT t1.a, t2.a, t1.b,t2.b
+ FROM t1, t2
+ WHERE t1.a=t2.a AND
+ t1.b IN (SELECT t3.b FROM t3,t4
+ WHERE t3.c < 10 AND t4.a=t2.b)
+ ORDER BY t1.b DESC
+ LIMIT 5;
+
+
+set optimizer_switch='cost_based_order_by_limit=on';
+eval EXPLAIN $query;
+eval $query;
+
+set optimizer_switch='cost_based_order_by_limit=off';
+eval EXPLAIN $query;
+eval $query;
+
+drop table t0,t1,t2,t3,t4;
+
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 822240dcfac..d7e2760394d 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -3513,6 +3513,19 @@ bool Duplicate_weedout_picker::check_qep(JOIN *join,
}
+bool
+Duplicate_weedout_picker::sort_nest_allowed_for_sj(table_map prefix_tables)
+{
+ /*
+ The check here is to ensure that either all the tables involve in
+ the duplicate weedout strategy are COMPLETELY outside the join prefix
+ or are COMPLETELY inside the join prefix
+ */
+ if (!dupsweedout_tables || !(~prefix_tables & dupsweedout_tables))
+ return TRUE;
+ return FALSE;
+}
+
/*
Remove the last join tab from from join->cur_sj_inner_tables bitmap
we assume remaining_tables doesnt contain @tab.
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index b87dc39fc06..2e120067229 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -9611,7 +9611,7 @@ best_extension_by_limited_search(JOIN *join,
}
/*
- Join buffering should be not considered in 2 cases
+ Join buffering should not be considered in 2 cases
1) if join_cache_level = 0 that is join buffering is disabled
2) if the limit was pushed to a prefix of the join that can resolve the
ORDER BY clause
@@ -9761,15 +9761,10 @@ best_extension_by_limited_search(JOIN *join,
s->check_if_index_satisfies_ordering(index_used))
limit_applied_to_nest= TRUE;
- /*
- TODO varun:
- 1) get an optimizer switch to enable or disable the sort
- nest instead of a system variable
- */
if (!nest_created &&
(join->prefix_resolves_ordering ||
join->consider_adding_sort_nest(sort_nest_tables |
- real_table_bit)))
+ real_table_bit, idx)))
{
// SORT_NEST branch
join->positions[idx].sort_nest_operation_here= TRUE;
diff --git a/sql/sql_select.h b/sql/sql_select.h
index ec86ae346ed..686cf237a40 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -739,6 +739,12 @@ class Semi_join_strategy_picker
virtual void mark_used() = 0;
+ /*
+ Check if a sort nest can be added to a prefix of the join(which may
+ include the inner tables of a semi join).
+ */
+ virtual bool sort_nest_allowed_for_sj(table_map prefix_tables) = 0;
+
virtual ~Semi_join_strategy_picker() {}
};
@@ -779,6 +785,7 @@ class Duplicate_weedout_picker : public Semi_join_strategy_picker
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
+ bool sort_nest_allowed_for_sj(table_map remaining_tables);
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
@@ -824,6 +831,7 @@ class Firstmatch_picker : public Semi_join_strategy_picker
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
+ bool sort_nest_allowed_for_sj(table_map remaining_tables) { return TRUE; }
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
@@ -866,7 +874,7 @@ class LooseScan_picker : public Semi_join_strategy_picker
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
-
+ bool sort_nest_allowed_for_sj(table_map remaining_tables) { return TRUE; }
friend class Loose_scan_opt;
friend void best_access_path(JOIN *join,
JOIN_TAB *s,
@@ -916,6 +924,7 @@ class Sj_materialization_picker : public Semi_join_strategy_picker
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
+ bool sort_nest_allowed_for_sj(table_map remaining_tables) { return TRUE; }
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
@@ -1956,7 +1965,8 @@ class JOIN :public Sql_alloc
void setup_range_scan(JOIN_TAB *tab, uint idx, double records);
bool is_join_buffering_allowed(JOIN_TAB *tab);
bool check_join_prefix_resolves_ordering(table_map previous_tables);
- bool consider_adding_sort_nest(table_map previous_tables);
+ bool consider_adding_sort_nest(table_map previous_tables, uint idx);
+ bool is_duplicate_removal_ensured(table_map prefix_tables, uint idx);
void set_fraction_output_for_nest();
double sort_nest_oper_cost(double join_record_count, uint idx,
ulong rec_len);
diff --git a/sql/sql_sort_nest.cc b/sql/sql_sort_nest.cc
index 18d2a596587..847d047126c 100644
--- a/sql/sql_sort_nest.cc
+++ b/sql/sql_sort_nest.cc
@@ -1446,18 +1446,58 @@ bool JOIN::sort_nest_allowed()
FALSE otherwise
*/
-bool JOIN::consider_adding_sort_nest(table_map prefix_tables)
+bool JOIN::consider_adding_sort_nest(table_map prefix_tables, uint idx)
{
if (!sort_nest_possible || // (1)
get_cardinality_estimate || // (2)
cur_embedding_map || // (3)
- cur_sj_inner_tables) // (4)
+ cur_sj_inner_tables || // (4)
+ !is_duplicate_removal_ensured(prefix_tables, idx))
return FALSE;
return check_join_prefix_resolves_ordering(prefix_tables); // (5)
}
+/*
+ @brief
+ Check if the join prefix ensures duplicate removal for semi joins
+
+ @details
+ This function checks for all the semi join strategies, if the join
+ prefix can ensure duplicate removal if a sort nest is considered
+ on the join prefix.
+
+ @retval
+ TRUE Duplicate removal is ensured
+ FALSE Otherwise
+*/
+
+bool
+JOIN::is_duplicate_removal_ensured(table_map prefix_tables, uint idx)
+{
+ if (!select_lex->have_merged_subqueries)
+ return TRUE;
+
+ POSITION *pos= positions + idx;
+ Semi_join_strategy_picker *pickers[]=
+ {
+ &pos->firstmatch_picker,
+ &pos->loosescan_picker,
+ &pos->sjmat_picker,
+ &pos->dups_weedout_picker,
+ NULL,
+ };
+ Semi_join_strategy_picker **strategy;
+ for (strategy= pickers; *strategy != NULL; strategy++)
+ {
+ if (!(*strategy)->sort_nest_allowed_for_sj(prefix_tables))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+
/*
@brief
Check if indexes on a table are allowed to resolve the ORDER BY clause
1
0

[Commits] f9cc0d345ae: Range Locking: acquire correct range locks at start/end of index
by psergey 17 Dec '19
by psergey 17 Dec '19
17 Dec '19
revision-id: f9cc0d345aeccabcb84ac98ce5d1130b637e024d (fb-prod201903-272-gf9cc0d345ae)
parent(s): fbbc63bd33d1bd1ff75bec0f7abf093e2e2de070
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-12-16 19:06:39 +0300
message:
Range Locking: acquire correct range locks at start/end of index
Make ha_rocksdb::index_{first,last}_intern() acquire appropriate locks.
---
mysql-test/suite/rocksdb/r/range_locking.result | 48 +++++++++++++++++++++-
.../suite/rocksdb/r/range_locking_rev_cf.result | 48 +++++++++++++++++++++-
mysql-test/suite/rocksdb/t/range_locking.inc | 44 +++++++++++++++++++-
storage/rocksdb/ha_rocksdb.cc | 16 ++++++--
4 files changed, 148 insertions(+), 8 deletions(-)
diff --git a/mysql-test/suite/rocksdb/r/range_locking.result b/mysql-test/suite/rocksdb/r/range_locking.result
index 5a58ddf148e..85264fbf2fa 100644
--- a/mysql-test/suite/rocksdb/r/range_locking.result
+++ b/mysql-test/suite/rocksdb/r/range_locking.result
@@ -468,6 +468,52 @@ $cf_id $trx_id 00${indexnr}8000000480000005 - 01${indexnr}8000000480000008 X
rollback;
connection con1;
rollback;
-disconnect con1;
connection default;
drop table t0, t1;
+#
+# A bug: range locking was not used when scan started at table start or end
+#
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t10(a int);
+insert into t10 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t1 (
+pk int not null,
+a int,
+primary key(pk)
+) engine=rocksdb;
+insert into t1 select a*2,a*2 from t10;
+connection con1;
+begin;
+select * from t1 where pk=500 for update;
+pk a
+500 500
+connection default;
+begin;
+select * from t1 where pk<10 order by pk limit 10 for update;
+pk a
+0 0
+2 2
+4 4
+6 6
+8 8
+# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
+COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
+$cf_id $trx_id 00${indexnr} - 00${indexnr}8000000a X
+rollback;
+begin;
+select * from t1 where pk>1990 order by pk desc limit 10 for update;
+pk a
+1998 1998
+1996 1996
+1994 1994
+1992 1992
+# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
+COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
+$cf_id $trx_id 00${indexnr}800007c6 - 01${indexnr+1} X
+rollback;
+connection con1;
+rollback;
+disconnect con1;
+connection default;
+drop table t0,t10,t1;
diff --git a/mysql-test/suite/rocksdb/r/range_locking_rev_cf.result b/mysql-test/suite/rocksdb/r/range_locking_rev_cf.result
index 811ad88e91b..87522d5ae37 100644
--- a/mysql-test/suite/rocksdb/r/range_locking_rev_cf.result
+++ b/mysql-test/suite/rocksdb/r/range_locking_rev_cf.result
@@ -428,6 +428,52 @@ $cf_id $trx_id 00${indexnr}8000000480000008 - 01${indexnr}8000000480000005 X
rollback;
connection con1;
rollback;
-disconnect con1;
connection default;
drop table t0, t1;
+#
+# A bug: range locking was not used when scan started at table start or end
+#
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t10(a int);
+insert into t10 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t1 (
+pk int not null,
+a int,
+primary key(pk)
+) engine=rocksdb;
+insert into t1 select a*2,a*2 from t10;
+connection con1;
+begin;
+select * from t1 where pk=500 for update;
+pk a
+500 500
+connection default;
+begin;
+select * from t1 where pk<10 order by pk limit 10 for update;
+pk a
+0 0
+2 2
+4 4
+6 6
+8 8
+# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
+COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
+$cf_id $trx_id 00${indexnr} - 00${indexnr}8000000a X
+rollback;
+begin;
+select * from t1 where pk>1990 order by pk desc limit 10 for update;
+pk a
+1998 1998
+1996 1996
+1994 1994
+1992 1992
+# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
+COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
+$cf_id $trx_id 00${indexnr}800007c6 - 01${indexnr+1} X
+rollback;
+connection con1;
+rollback;
+disconnect con1;
+connection default;
+drop table t0,t10,t1;
diff --git a/mysql-test/suite/rocksdb/t/range_locking.inc b/mysql-test/suite/rocksdb/t/range_locking.inc
index ecc8e4432bb..fd0f3f4e6c6 100644
--- a/mysql-test/suite/rocksdb/t/range_locking.inc
+++ b/mysql-test/suite/rocksdb/t/range_locking.inc
@@ -498,7 +498,47 @@ rollback;
connection con1;
rollback;
-disconnect con1;
-connection default;
+connection default;
drop table t0, t1;
+
+--echo #
+--echo # A bug: range locking was not used when scan started at table start or end
+--echo #
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t10(a int);
+insert into t10 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+
+create table t1 (
+ pk int not null,
+ a int,
+ primary key(pk)
+) engine=rocksdb;
+
+insert into t1 select a*2,a*2 from t10;
+
+connection con1;
+begin;
+select * from t1 where pk=500 for update;
+connection default;
+
+begin;
+select * from t1 where pk<10 order by pk limit 10 for update;
+
+let $select_from_is_rowlocks_current_trx_only=1;
+--source suite/rocksdb/include/select_from_is_rowlocks.inc
+rollback;
+
+begin;
+select * from t1 where pk>1990 order by pk desc limit 10 for update;
+let $select_from_is_rowlocks_current_trx_only=1;
+--source suite/rocksdb/include/select_from_is_rowlocks.inc
+rollback;
+
+connection con1;
+rollback;
+disconnect con1;
+
+connection default;
+drop table t0,t10,t1;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index cca9122f9d0..f77da1bc8da 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -9430,13 +9430,17 @@ int ha_rocksdb::index_first_intern(uchar *const buf) {
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
DBUG_ASSERT(tx != nullptr);
+ bool use_locking_iter;
+ if ((rc = set_range_lock(tx, kd, HA_READ_KEY_OR_NEXT, index_key,
+ end_range, &use_locking_iter)))
+ DBUG_RETURN(rc);
+
const bool is_new_snapshot = !tx->has_snapshot();
// Loop as long as we get a deadlock error AND we end up creating the
// snapshot here (i.e. it did not exist prior to this)
for (;;) {
setup_scan_iterator(kd, &index_key, false, key_start_matching_bytes,
- (rocksdb_use_range_locking &&
- m_lock_rows != RDB_LOCK_NONE && !end_range));
+ use_locking_iter);
m_scan_it->Seek(index_key);
m_skip_scan_it_next_call = true;
@@ -9522,13 +9526,17 @@ int ha_rocksdb::index_last_intern(uchar *const buf) {
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
DBUG_ASSERT(tx != nullptr);
+ bool use_locking_iter;
+ if ((rc = set_range_lock(tx, kd, HA_READ_PREFIX_LAST_OR_PREV, index_key,
+ end_range, &use_locking_iter)))
+ DBUG_RETURN(rc);
+
bool is_new_snapshot = !tx->has_snapshot();
// Loop as long as we get a deadlock error AND we end up creating the
// snapshot here (i.e. it did not exist prior to this)
for (;;) {
setup_scan_iterator(kd, &index_key, false, key_end_matching_bytes,
- (rocksdb_use_range_locking &&
- m_lock_rows != RDB_LOCK_NONE && !end_range));
+ use_locking_iter);
m_scan_it->SeekForPrev(index_key);
m_skip_scan_it_next_call = false;
1
0

[Commits] fbbc63bd33d: Range Locking, SeekForUpdate: use LockingIter with locking full table scan
by psergey 16 Dec '19
by psergey 16 Dec '19
16 Dec '19
revision-id: fbbc63bd33d1bd1ff75bec0f7abf093e2e2de070 (fb-prod201903-271-gfbbc63bd33d)
parent(s): 1f738064ee9d3e3d04491ab01b6a6afcd6085136
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-12-15 23:39:36 +0300
message:
Range Locking, SeekForUpdate: use LockingIter with locking full table scan
- Make ha_rocksdb::setup_iterator_for_rnd_scan() use LockingIterator (that
is, use SeekForUpdate) when doing a locking full table scan.
- Adjust testcases that are affected by the change
- Correctly handle transaction isolation errors (kLockTimeout, kDeadlock,
etc). (Without SeekForUpdate, it was not possible to get such errors
from an iterator. Now it's possible, and before this commit, we didn't
handle e.g. kDeadlock)
values ).
---
.../rocksdb/r/range_locking_seek_for_update.result | 32 ++++++++++++++++++++--
mysql-test/suite/rocksdb/r/unique_sec.result | 4 +++
.../suite/rocksdb/r/unique_sec_rev_cf.result | 4 +++
.../rocksdb/t/range_locking_seek_for_update.test | 32 ++++++++++++++++++++++
mysql-test/suite/rocksdb/t/unique_sec.inc | 10 ++++++-
storage/rocksdb/ha_rocksdb.cc | 25 ++++++++++-------
storage/rocksdb/ha_rocksdb.h | 4 ++-
7 files changed, 96 insertions(+), 15 deletions(-)
diff --git a/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result b/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result
index 2614e8ce66e..cddf039bc4f 100644
--- a/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result
+++ b/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result
@@ -173,7 +173,7 @@ begin;
delete from t1 where pk=7;
set debug_sync='now SIGNAL spoiler_inserted';
connection default;
-ERROR HY000: Lock wait timeout exceeded; try restarting transaction:
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t1.PRIMARY
rollback;
connection con1;
rollback;
@@ -225,7 +225,7 @@ pk a
connection default;
begin;
select * from t1 order by pk desc limit 2 for update;
-ERROR HY000: Lock wait timeout exceeded; try restarting transaction:
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t1.PRIMARY
rollback;
connection con1;
rollback;
@@ -236,10 +236,36 @@ pk a
connection default;
begin;
select * from t1 order by pk desc limit 2 for update;
-ERROR HY000: Lock wait timeout exceeded; try restarting transaction:
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t1.PRIMARY
rollback;
connection con1;
rollback;
disconnect con1;
connection default;
drop table t0,t1;
+#
+# A test: full table scan doesn't lock gaps
+#
+create table t1 (
+pk int primary key,
+a int
+) engine=rocksdb;
+insert into t1 values (10,10),(20,20),(30,30);
+connect con1,localhost,root,,;
+connect con2,localhost,root,,;
+connection con1;
+begin;
+select * from t1 for update;
+pk a
+10 10
+20 20
+30 30
+connection con2;
+insert into t1 values (5,5);
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t1.PRIMARY
+connection con1;
+rollback;
+disconnect con1;
+disconnect con2;
+connection default;
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/r/unique_sec.result b/mysql-test/suite/rocksdb/r/unique_sec.result
index 64db56ca78e..a9550164e30 100644
--- a/mysql-test/suite/rocksdb/r/unique_sec.result
+++ b/mysql-test/suite/rocksdb/r/unique_sec.result
@@ -115,6 +115,10 @@ ERROR 23000: Duplicate entry '37' for key 'id5'
UPDATE t1 SET id5=34 WHERE id1=38;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t1.id5
# NULL values are unique
+# (Note: the following UPDATE reads through the whole table without
+# finding anything to update. With point locking, this is fine,
+# but with range locking it will time out while waiting on a row lock
+# that the other transaction is holding)
UPDATE t1 SET id5=NULL WHERE value1 > 37;
COMMIT;
COMMIT;
diff --git a/mysql-test/suite/rocksdb/r/unique_sec_rev_cf.result b/mysql-test/suite/rocksdb/r/unique_sec_rev_cf.result
index 0ff55ac8d10..e3f592f6284 100644
--- a/mysql-test/suite/rocksdb/r/unique_sec_rev_cf.result
+++ b/mysql-test/suite/rocksdb/r/unique_sec_rev_cf.result
@@ -115,6 +115,10 @@ ERROR 23000: Duplicate entry '37' for key 'id5'
UPDATE t1 SET id5=34 WHERE id1=38;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t1.id5
# NULL values are unique
+# (Note: the following UPDATE reads through the whole table without
+# finding anything to update. With point locking, this is fine,
+# but with range locking it will time out while waiting on a row lock
+# that the other transaction is holding)
UPDATE t1 SET id5=NULL WHERE value1 > 37;
COMMIT;
COMMIT;
diff --git a/mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test b/mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test
index 0230113eeb1..32590af1799 100644
--- a/mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test
+++ b/mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test
@@ -254,3 +254,35 @@ rollback;
disconnect con1;
connection default;
drop table t0,t1;
+
+--echo #
+--echo # A test: full table scan doesn't lock gaps
+--echo #
+
+create table t1 (
+ pk int primary key,
+ a int
+) engine=rocksdb;
+
+insert into t1 values (10,10),(20,20),(30,30);
+
+connect (con1,localhost,root,,);
+connect (con2,localhost,root,,);
+
+connection con1;
+begin;
+
+select * from t1 for update;
+
+connection con2;
+
+--error ER_LOCK_WAIT_TIMEOUT
+insert into t1 values (5,5);
+
+connection con1;
+rollback;
+
+disconnect con1;
+disconnect con2;
+connection default;
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/t/unique_sec.inc b/mysql-test/suite/rocksdb/t/unique_sec.inc
index 2f11cd3b65a..a78a1e1218d 100644
--- a/mysql-test/suite/rocksdb/t/unique_sec.inc
+++ b/mysql-test/suite/rocksdb/t/unique_sec.inc
@@ -148,8 +148,16 @@ UPDATE t1 SET id5=37 WHERE id1=38;
UPDATE t1 SET id5=34 WHERE id1=38;
--echo # NULL values are unique
+--echo # (Note: the following UPDATE reads through the whole table without
+--echo # finding anything to update. With point locking, this is fine,
+--echo # but with range locking it will time out while waiting on a row lock
+--echo # that the other transaction is holding)
+if (`select @@rocksdb_use_range_locking=0`) {
UPDATE t1 SET id5=NULL WHERE value1 > 37;
-
+}
+if (`select @@rocksdb_use_range_locking=1`) {
+-- echo UPDATE t1 SET id5=NULL WHERE value1 > 37;
+}
connection con1;
COMMIT;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 2e161dbfb4b..cca9122f9d0 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -8012,14 +8012,18 @@ int ha_rocksdb::read_before_key(const Rdb_key_def &kd,
}
-inline int iter_status_to_retval(rocksdb::Iterator *it, int not_found_code) {
+int ha_rocksdb::iter_status_to_retval(rocksdb::Iterator *it,
+ const Rdb_key_def &kd,
+ int not_found_code) {
if (it->Valid())
return HA_EXIT_SUCCESS;
+
rocksdb::Status s= it->status();
- if (s.IsTimedOut())
- return HA_ERR_LOCK_WAIT_TIMEOUT;
- //TODO: should we handle other kinds of errors?
- return not_found_code;
+ if (s.ok() || s.IsNotFound())
+ return not_found_code;
+
+ Rdb_transaction *&tx = get_tx_from_thd(table->in_use);
+ return tx->set_status_error(table->in_use, s, kd, m_tbl_def, m_table_handler);
}
int ha_rocksdb::read_after_key(const Rdb_key_def &kd,
@@ -8051,7 +8055,7 @@ int ha_rocksdb::read_after_key(const Rdb_key_def &kd,
return is_valid_iterator(m_scan_it) ?
HA_EXIT_SUCCESS :
- iter_status_to_retval(m_scan_it, HA_ERR_KEY_NOT_FOUND);
+ iter_status_to_retval(m_scan_it, kd, HA_ERR_KEY_NOT_FOUND);
}
int ha_rocksdb::position_to_correct_key(
@@ -10774,7 +10778,8 @@ void ha_rocksdb::setup_iterator_for_rnd_scan() {
rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size);
- setup_scan_iterator(*m_pk_descr, &table_key, false, key_start_matching_bytes);
+ setup_scan_iterator(*m_pk_descr, &table_key, false, key_start_matching_bytes,
+ (m_lock_rows != RDB_LOCK_NONE) && rocksdb_use_range_locking);
m_scan_it->Seek(table_key);
m_skip_scan_it_next_call = true;
}
@@ -10864,7 +10869,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
In this case, we should return EOF.
*/
rc = HA_ERR_END_OF_FILE;
- DBUG_RETURN(m_scan_it ? iter_status_to_retval(m_scan_it, rc) : rc);
+ DBUG_RETURN(m_scan_it ? iter_status_to_retval(m_scan_it, *m_pk_descr, rc) : rc);
}
for (;;) {
@@ -10885,7 +10890,7 @@ int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) {
}
if (!is_valid_iterator(m_scan_it)) {
- rc = iter_status_to_retval(m_scan_it, HA_ERR_END_OF_FILE);
+ rc = iter_status_to_retval(m_scan_it, *m_pk_descr, HA_ERR_END_OF_FILE);
break;
}
@@ -14051,7 +14056,7 @@ static void show_rocksdb_stall_vars(THD *thd, SHOW_VAR *var, char *buff) {
}
//
-// psergey: lock tree escalation count status variable.
+// Lock Tree Status variables
//
static longlong rocksdb_locktree_escalation_count=1234;
static longlong rocksdb_locktree_current_lock_memory=0;
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index a285e74e728..272dc9973d4 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -321,7 +321,7 @@ class ha_rocksdb : public my_core::handler {
rocksdb::Slice *upper_bound_slice);
void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *slice,
const bool use_all_keys, const uint eq_cond_len,
- bool use_locking_iterator=false)
+ bool use_locking_iterator)
MY_ATTRIBUTE((__nonnull__));
int set_range_lock(Rdb_transaction *tx,
@@ -1031,6 +1031,8 @@ class ha_rocksdb : public my_core::handler {
bool m_in_rpl_update_rows;
bool m_force_skip_unique_check;
+
+ int iter_status_to_retval(rocksdb::Iterator *it, const Rdb_key_def &kd, int not_found_code);
};
/*
1
0