
[Commits] 38c89a95752: MDEV-20740: Odd computations in calculate_cond_selectivity_for_table
by Sergei Petrunia 04 Oct '19
by Sergei Petrunia 04 Oct '19
04 Oct '19
revision-id: 38c89a95752ee7a59f9366af0cefacf3a71a6fd0 (mariadb-10.4.7-102-g38c89a95752)
parent(s): eb0a10b07222b01d40dee24e61aac9502256481f
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-04 20:16:39 +0300
message:
MDEV-20740: Odd computations in calculate_cond_selectivity_for_table
Make SEL_ARG graph traversal code in sel_arg_range_seq_next() set
max_key_parts first.
(Pushing to 10.4 first)
---
mysql-test/main/index_intersect_innodb.result | 8 ++++----
sql/opt_range.cc | 16 ++++++++++------
sql/opt_range_mrr.cc | 5 ++++-
3 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/mysql-test/main/index_intersect_innodb.result b/mysql-test/main/index_intersect_innodb.result
index 854bcd75e5c..fd07f6b41c6 100644
--- a/mysql-test/main/index_intersect_innodb.result
+++ b/mysql-test/main/index_intersect_innodb.result
@@ -470,17 +470,17 @@ EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 501 AND 1000 AND Population > 700000 AND Country LIKE 'C%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Country,Population 4,7,4 NULL # Using sort_intersect(PRIMARY,Country,Population); Using where
+1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Population,Country 4,4,7 NULL # Using sort_intersect(PRIMARY,Population,Country); Using where
EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 1 AND 500 AND Population > 700000 AND Country LIKE 'C%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Country,Population 4,7,4 NULL # Using sort_intersect(PRIMARY,Country,Population); Using where
+1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Population,Country 4,4,7 NULL # Using sort_intersect(PRIMARY,Population,Country); Using where
EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 2001 AND 2500 AND Population > 300000 AND Country LIKE 'H%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Country 4,7 NULL # Using sort_intersect(PRIMARY,Country); Using where
+1 SIMPLE City range PRIMARY,Population,Country Country 7 NULL # Using index condition; Using where
EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 3701 AND 4000 AND Population > 1000000
@@ -724,7 +724,7 @@ EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 1 AND 500 AND Population > 700000 AND Country LIKE 'C%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Country,Population 4,7,4 NULL # Using sort_intersect(PRIMARY,Country,Population); Using where
+1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Population,Country 4,4,7 NULL # Using sort_intersect(PRIMARY,Population,Country); Using where
EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 3001 AND 4000 AND Population > 600000
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 654fab7810a..c47da283e9c 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -315,7 +315,7 @@ class PARAM : public RANGE_OPT_PARAM
*/
key_map possible_keys;
longlong baseflag;
- uint max_key_part, range_count;
+ uint max_key_parts, range_count;
bool quick; // Don't calulate possible keys
@@ -7387,7 +7387,7 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
index_scan->idx= idx;
index_scan->keynr= keynr;
index_scan->key_info= ¶m->table->key_info[keynr];
- index_scan->used_key_parts= param->max_key_part+1;
+ index_scan->used_key_parts= param->max_key_parts;
index_scan->range_count= param->range_count;
index_scan->records= found_records;
index_scan->sel_arg= key;
@@ -11042,7 +11042,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
seq.start= tree;
param->range_count=0;
- param->max_key_part=0;
+ param->max_key_parts=0;
seq.is_ror_scan= TRUE;
if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
@@ -11055,9 +11055,13 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
*mrr_flags|= HA_MRR_NO_ASSOCIATION | HA_MRR_SORTED;
bool pk_is_clustered= file->primary_key_is_clustered();
+ // TODO: param->max_key_parts holds 0 now, and not the #keyparts used.
+ // Passing wrong second argument to index_flags() makes no difference for
+ // most storage engines but might be an issue for MyRocks with certain
+ // datatypes.
if (index_only &&
- (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
- !(file->index_flags(keynr, param->max_key_part, 1) & HA_CLUSTERED_INDEX))
+ (file->index_flags(keynr, param->max_key_parts, 1) & HA_KEYREAD_ONLY) &&
+ !(file->index_flags(keynr, param->max_key_parts, 1) & HA_CLUSTERED_INDEX))
*mrr_flags |= HA_MRR_INDEX_ONLY;
if (param->thd->lex->sql_command != SQLCOM_SELECT)
@@ -11088,7 +11092,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
if (update_tbl_stats)
{
param->table->quick_keys.set_bit(keynr);
- param->table->quick_key_parts[keynr]= param->max_key_part+1;
+ param->table->quick_key_parts[keynr]= param->max_key_parts;
param->table->quick_n_ranges[keynr]= param->range_count;
param->table->quick_condition_rows=
MY_MIN(param->table->quick_condition_rows, rows);
diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc
index d3a1e155fb7..4afa06a7ca0 100644
--- a/sql/opt_range_mrr.cc
+++ b/sql/opt_range_mrr.cc
@@ -247,6 +247,7 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
uint min_key_length= (uint)(cur->min_key - seq->param->min_key);
range->ptr= (char*)(intptr)(key_tree->part);
+ uint max_key_parts;
if (cur->min_key_flag & GEOM_FLAG)
{
range->range_flag= cur->min_key_flag;
@@ -256,9 +257,11 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
range->start_key.length= min_key_length;
range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
range->start_key.flag= (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
+ max_key_parts= cur->min_key_parts;
}
else
{
+ max_key_parts= MY_MAX(cur->min_key_parts, cur->max_key_parts);
range->range_flag= cur->min_key_flag | cur->max_key_flag;
range->start_key.key= seq->param->min_key;
@@ -336,7 +339,7 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
}
}
seq->param->range_count++;
- seq->param->max_key_part=MY_MAX(seq->param->max_key_part,key_tree->part);
+ seq->param->max_key_parts= MY_MAX(seq->param->max_key_parts, max_key_parts);
return 0;
}
1
0

[Commits] d34839cacbe: MDEV-18046: Assortment of crashes, assertion failures and ASAN errors in mysql_show_binlog_events
by sujatha 04 Oct '19
by sujatha 04 Oct '19
04 Oct '19
revision-id: d34839cacbef0f4c76b7aa7ae6165b37b293685c (mariadb-10.1.41-51-gd34839cacbe)
parent(s): 4bcf524482b3a4260347b0fe912fa2660af97c43
author: Sujatha
committer: Sujatha
timestamp: 2019-10-04 13:45:33 +0530
message:
MDEV-18046: Assortment of crashes, assertion failures and ASAN errors in mysql_show_binlog_events
Problem:
========
SHOW BINLOG EVENTS FROM <pos> causes a variety of failures, some of which are
listed below. It is not a race condition issue, but there is some
non-determinism in it.
Fix:
====
Assert 1: With ASAN
========
Rows_log_event::Rows_log_event(const char*, uint,
const Format_description_log_event*):
Assertion `var_header_len >= 2'
Implemented upstream patch.
commit a3a497ccf7ecacc900551fb1e47ea4078b45c351
Fix: Added checks to avoid reading out of buffer limits.
Assert 2:
=========
Table_map_log_event::Table_map_log_event(const char*, uint,
const Format_description_log_event*)
Assertion `m_field_metadata_size <= (m_colcnt * 2)' failed.
Fix: Converted debug assert to error handler code.
ASAN Error: head-buffer-overflow within "my_strndup" in Rotate_log_event
===========
my_strndup /home/sujatha/bug_repo/tmp-10.1/mysys/my_malloc.c:254
Rotate_log_event::Rotate_log_event(char const*, unsigned int,
Format_description_log_event const*)
Fix: Improved the check for event_len validation.
ASAN Error: AddressSanitizer: SEGV on unknown address
==========
in inline_mysql_mutex_destroy
in my_bitmap_free
in Update_rows_log_event::~Update_rows_log_event()
Fix: Intialize m_cols_ai.bitmap to NULL.
ASAN Error: AddressSanitizer: heap-buffer-overflow on address 0x60400002acb8
==========
Load_log_event::copy_log_event(char const*, unsigned long, int,
Format_description_log_event const*)
Fix: Moved the event_len validation to the begin of copy_log_event function.
ASAN Error: AddressSanitizer: SEGV on unknown address
==========
The signal is caused by a READ memory access.
User_var_log_event::User_var_log_event(char const*, unsigned int,
Format_description_log_event const*)
Implemented upstream patch.
commit a3a497ccf7ecacc900551fb1e47ea4078b45c351
User_var_log_event::User_var_log_event: added checks to avoid reading out of
buffer limits.
ASAN Error: AddressSanitizer: heap-buffer-overflow on address
==========
String::append(char const*, unsigned int)
Query_log_event::pack_info(Protocol*)
Fix: Added check to ensure that query_len is within event_length
---
.../binlog/r/binlog_invalid_read_in_rotate.result | 17 +++++
.../r/binlog_show_binlog_event_random_pos.result | 13 ++++
.../binlog/t/binlog_invalid_read_in_rotate.test | 47 ++++++++++++++
.../t/binlog_show_binlog_event_random_pos.test | 37 +++++++++++
sql/log_event.cc | 75 ++++++++++++++++------
5 files changed, 168 insertions(+), 21 deletions(-)
diff --git a/mysql-test/suite/binlog/r/binlog_invalid_read_in_rotate.result b/mysql-test/suite/binlog/r/binlog_invalid_read_in_rotate.result
new file mode 100644
index 00000000000..28131d2645c
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_invalid_read_in_rotate.result
@@ -0,0 +1,17 @@
+RESET MASTER;
+call mtr.add_suppression("Error in Log_event::read_log_event*");
+DROP TABLE /*! IF EXISTS*/ t1;
+Warnings:
+Note 1051 Unknown table 'test.t1'
+CREATE TABLE `t1` (
+`col_int` int,
+pk integer auto_increment,
+`col_char_12_key` char(12),
+`col_int_key` int,
+`col_char_12` char(12),
+primary key (pk),
+key (`col_char_12_key` ),
+key (`col_int_key` )) ENGINE=InnoDB;
+INSERT /*! IGNORE */ INTO t1 VALUES (183173120, NULL, 'abcd', 1, 'efghijk'), (1, NULL, 'lmno', 2, 'p');
+ALTER TABLE `t1` ENABLE KEYS;
+DROP TABLE t1;
diff --git a/mysql-test/suite/binlog/r/binlog_show_binlog_event_random_pos.result b/mysql-test/suite/binlog/r/binlog_show_binlog_event_random_pos.result
new file mode 100644
index 00000000000..98385a7ac5e
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_show_binlog_event_random_pos.result
@@ -0,0 +1,13 @@
+RESET MASTER;
+call mtr.add_suppression("Error in Log_event::read_log_event*");
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note 1051 Unknown table 'test.t1'
+CREATE TABLE t1 (c1 CHAR(255), c2 CHAR(255), c3 CHAR(255), c4 CHAR(255), c5 CHAR(255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+UPDATE t1 SET c1=repeat('b',255);
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+DROP TABLE t1;
diff --git a/mysql-test/suite/binlog/t/binlog_invalid_read_in_rotate.test b/mysql-test/suite/binlog/t/binlog_invalid_read_in_rotate.test
new file mode 100644
index 00000000000..8360f2dcac4
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_invalid_read_in_rotate.test
@@ -0,0 +1,47 @@
+# ==== Purpose ====
+#
+# Test verifies that reading binary log by using "SHOW BINLOG EVENTS" command
+# from various random positions doesn't lead to crash. It should exit
+# gracefully with appropriate error.
+#
+# ==== References ====
+#
+# MDEV-18046:Assortment of crashes, assertion failures and ASAN errors in mysql_show_binlog_events
+
+--source include/have_log_bin.inc
+--source include/have_innodb.inc
+
+RESET MASTER;
+
+call mtr.add_suppression("Error in Log_event::read_log_event*");
+
+DROP TABLE /*! IF EXISTS*/ t1;
+CREATE TABLE `t1` (
+`col_int` int,
+pk integer auto_increment,
+`col_char_12_key` char(12),
+`col_int_key` int,
+`col_char_12` char(12),
+primary key (pk),
+key (`col_char_12_key` ),
+key (`col_int_key` )) ENGINE=InnoDB;
+
+INSERT /*! IGNORE */ INTO t1 VALUES (183173120, NULL, 'abcd', 1, 'efghijk'), (1, NULL, 'lmno', 2, 'p');
+
+--disable_warnings
+ALTER TABLE `t1` ENABLE KEYS;
+--enable_warnings
+
+--let $max_pos= query_get_value(SHOW MASTER STATUS,Position,1)
+--let $pos= 1
+while ($pos <= $max_pos)
+{
+ --disable_query_log
+ --disable_result_log
+ --error 0,1220
+ eval SHOW BINLOG EVENTS FROM $pos;
+ --inc $pos
+ --enable_result_log
+ --enable_query_log
+}
+DROP TABLE t1;
diff --git a/mysql-test/suite/binlog/t/binlog_show_binlog_event_random_pos.test b/mysql-test/suite/binlog/t/binlog_show_binlog_event_random_pos.test
new file mode 100644
index 00000000000..aaf56faa57e
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_show_binlog_event_random_pos.test
@@ -0,0 +1,37 @@
+# ==== Purpose ====
+#
+# Test verifies that reading binary log by using "SHOW BINLOG EVENTS" command
+# from various random positions doesn't lead to crash. It should exit
+# gracefully with appropriate error.
+#
+# ==== References ====
+#
+# MDEV-18046:Assortment of crashes, assertion failures and ASAN errors in mysql_show_binlog_events
+
+--source include/have_log_bin.inc
+--source include/have_innodb.inc
+
+RESET MASTER;
+call mtr.add_suppression("Error in Log_event::read_log_event*");
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 CHAR(255), c2 CHAR(255), c3 CHAR(255), c4 CHAR(255), c5 CHAR(255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+UPDATE t1 SET c1=repeat('b',255);
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+--let $max_pos= query_get_value(SHOW MASTER STATUS,Position,1)
+--let $pos= 1
+while ($pos <= $max_pos)
+{
+ --disable_query_log
+ --disable_result_log
+ --error 0,1220
+ eval SHOW BINLOG EVENTS FROM $pos LIMIT 100;
+ --inc $pos
+ --enable_result_log
+ --enable_query_log
+}
+
+DROP TABLE t1;
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 65f29441e1a..293bfa6680e 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -3502,7 +3502,6 @@ code_name(int code)
#define CHECK_SPACE(PTR,END,CNT) \
do { \
DBUG_PRINT("info", ("Read %s", code_name(pos[-1]))); \
- DBUG_ASSERT((PTR) + (CNT) <= (END)); \
if ((PTR) + (CNT) > (END)) { \
DBUG_PRINT("info", ("query= 0")); \
query= 0; \
@@ -3815,7 +3814,7 @@ Query_log_event::Query_log_event(const char* buf, uint event_len,
uint32 max_length= uint32(event_len - ((const char*)(end + db_len + 1) -
(buf - common_header_len)));
- if (q_len != max_length)
+ if (q_len != max_length || q_len > event_len)
{
q_len= 0;
query= NULL;
@@ -5889,6 +5888,9 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
{
DBUG_ENTER("Load_log_event::copy_log_event");
uint data_len;
+
+ if ((int) event_len < body_offset)
+ DBUG_RETURN(1);
char* buf_end = (char*)buf + event_len;
/* this is the beginning of the post-header */
const char* data_head = buf + description_event->common_header_len;
@@ -5898,18 +5900,17 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
table_name_len = (uint)data_head[L_TBL_LEN_OFFSET];
db_len = (uint)data_head[L_DB_LEN_OFFSET];
num_fields = uint4korr(data_head + L_NUM_FIELDS_OFFSET);
-
- if ((int) event_len < body_offset)
- DBUG_RETURN(1);
+
/*
Sql_ex.init() on success returns the pointer to the first byte after
the sql_ex structure, which is the start of field lengths array.
*/
+
if (!(field_lens= (uchar*)sql_ex.init((char*)buf + body_offset,
buf_end,
(uchar)buf[EVENT_TYPE_OFFSET] != LOAD_EVENT)))
DBUG_RETURN(1);
-
+
data_len = event_len - body_offset;
if (num_fields > data_len) // simple sanity check against corruption
DBUG_RETURN(1);
@@ -6449,7 +6450,7 @@ Rotate_log_event::Rotate_log_event(const char* buf, uint event_len,
// The caller will ensure that event_len is what we have at EVENT_LEN_OFFSET
uint8 post_header_len= description_event->post_header_len[ROTATE_EVENT-1];
uint ident_offset;
- if (event_len < LOG_EVENT_MINIMAL_HEADER_LEN)
+ if (event_len < (uint)(LOG_EVENT_MINIMAL_HEADER_LEN + post_header_len))
DBUG_VOID_RETURN;
buf+= LOG_EVENT_MINIMAL_HEADER_LEN;
pos= post_header_len ? uint8korr(buf + R_POS_OFFSET) : 4;
@@ -7955,6 +7956,12 @@ User_var_log_event(const char* buf, uint event_len,
we keep the flags set to UNDEF_F.
*/
uint bytes_read= ((val + val_len) - buf_start);
+ if (bytes_read > event_len)
+ {
+ error= true;
+ goto err;
+ }
+
if ((data_written - bytes_read) > 0)
{
flags= (uint) *(buf + UV_VAL_IS_NULL + UV_VAL_TYPE_SIZE +
@@ -9513,7 +9520,8 @@ Rows_log_event::Rows_log_event(const char *buf, uint event_len,
uint8 const common_header_len= description_event->common_header_len;
Log_event_type event_type= (Log_event_type) buf[EVENT_TYPE_OFFSET];
m_type= event_type;
-
+ m_cols_ai.bitmap= 0;
+
uint8 const post_header_len= description_event->post_header_len[event_type-1];
DBUG_PRINT("enter",("event_len: %u common_header_len: %d "
@@ -9546,7 +9554,16 @@ Rows_log_event::Rows_log_event(const char *buf, uint event_len,
which includes length bytes
*/
var_header_len= uint2korr(post_start);
- assert(var_header_len >= 2);
+
+ /* Check length and also avoid out of buffer read */
+ if (var_header_len < 2 ||
+ event_len < static_cast<unsigned int>(var_header_len +
+ (post_start - buf)))
+ {
+ m_cols.bitmap= 0;
+ DBUG_VOID_RETURN;
+ }
+
var_header_len-= 2;
/* Iterate over var-len header, extracting 'chunks' */
@@ -11047,6 +11064,10 @@ Table_map_log_event::Table_map_log_event(const char *buf, uint event_len,
/* Length of table name + counter + terminating null */
uchar const *const ptr_colcnt= ptr_tbllen + m_tbllen + 2;
uchar *ptr_after_colcnt= (uchar*) ptr_colcnt;
+ bytes_read= (unsigned int) (ptr_after_colcnt - (unsigned char *)buf);
+ /* Avoid reading out of buffer */
+ if (event_len <= bytes_read)
+ DBUG_VOID_RETURN;
m_colcnt= net_field_length(&ptr_after_colcnt);
DBUG_PRINT("info",("m_dblen: %lu off: %ld m_tbllen: %lu off: %ld m_colcnt: %lu off: %ld",
@@ -11074,15 +11095,24 @@ Table_map_log_event::Table_map_log_event(const char *buf, uint event_len,
if (bytes_read < event_len)
{
m_field_metadata_size= net_field_length(&ptr_after_colcnt);
- DBUG_ASSERT(m_field_metadata_size <= (m_colcnt * 2));
- uint num_null_bytes= (m_colcnt + 7) / 8;
- m_meta_memory= (uchar *)my_multi_malloc(MYF(MY_WME),
- &m_null_bits, num_null_bytes,
- &m_field_metadata, m_field_metadata_size,
- NULL);
- memcpy(m_field_metadata, ptr_after_colcnt, m_field_metadata_size);
- ptr_after_colcnt= (uchar*)ptr_after_colcnt + m_field_metadata_size;
- memcpy(m_null_bits, ptr_after_colcnt, num_null_bytes);
+ if (m_field_metadata_size <= (m_colcnt * 2))
+ {
+ uint num_null_bytes= (m_colcnt + 7) / 8;
+ m_meta_memory= (uchar *)my_multi_malloc(MYF(MY_WME),
+ &m_null_bits, num_null_bytes,
+ &m_field_metadata, m_field_metadata_size,
+ NULL);
+ memcpy(m_field_metadata, ptr_after_colcnt, m_field_metadata_size);
+ ptr_after_colcnt= (uchar*)ptr_after_colcnt + m_field_metadata_size;
+ memcpy(m_null_bits, ptr_after_colcnt, num_null_bytes);
+ }
+ else
+ {
+ m_coltype= NULL;
+ my_free(m_memory);
+ m_memory= NULL;
+ DBUG_VOID_RETURN;
+ }
}
}
@@ -12739,9 +12769,12 @@ void Update_rows_log_event::init(MY_BITMAP const *cols)
Update_rows_log_event::~Update_rows_log_event()
{
- if (m_cols_ai.bitmap == m_bitbuf_ai) // no my_malloc happened
- m_cols_ai.bitmap= 0; // so no my_free in my_bitmap_free
- my_bitmap_free(&m_cols_ai); // To pair with my_bitmap_init().
+ if (m_cols_ai.bitmap)
+ {
+ if (m_cols_ai.bitmap == m_bitbuf_ai) // no my_malloc happened
+ m_cols_ai.bitmap= 0; // so no my_free in my_bitmap_free
+ my_bitmap_free(&m_cols_ai); // To pair with my_bitmap_init().
+ }
}
1
0

[Commits] 16b25039c5e: MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one.
by sachin 01 Oct '19
by sachin 01 Oct '19
01 Oct '19
revision-id: 16b25039c5ebb53893904dd52ae3e6b3b6daec89 (mariadb-10.2.27-49-g16b25039c5e)
parent(s): 102bc7beb080e936ba70c0524a8759799f42458e
author: Sachin Setiya
committer: Sachin Setiya
timestamp: 2019-10-01 14:59:29 +0530
message:
MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one.
Analysis
Mysqlbinlog output for encrypted binary log
#Q> insert into tab1 values (3,'row 003')
#190912 17:36:35 server id 10221 end_log_pos 980 CRC32 0x53bcb3d3 Table_map: `test`.`tab1` mapped to number 19
# at 940
#190912 17:36:35 server id 10221 end_log_pos 1026 CRC32 0xf2ae5136 Write_rows: table id 19 flags: STMT_END_F
Here we can see Table_map_log_event ends at 980 but Next event starts at 940.
And the reason for that is we do not send START_ENCRYPTION_EVENT to the slave
Solution:-
Send Start_encryption_log_event as Ignorable_log_event to slave(mysqlbinlog),
So that mysqlbinlog can update its log_pos.
Since Slave can request multiple FORMAT_DESCRIPTION_EVENT while master does not
have so We only update slave master pos when master actually have the
FORMAT_DESCRIPTION_EVENT. Similar logic should be applied for START_ENCRYPTION_EVENT.
Also added the test case when new server reads the data from old server which
does not send START_ENCRYPTION_EVENT to slave.
---
mysql-test/std_data/binlog_before_20574.bin | Bin 0 -> 1022 bytes
.../binlog_encryption/binlog_row_annotate.result | 9 +++
.../binlog_encryption/mdev_20574_old_binlog.result | 27 +++++++++
.../binlog_encryption/mdev_20574_old_binlog.test | 40 +++++++++++++
.../suite/binlog_encryption/mysqlbinlog.result | 1 +
.../suite/binlog_encryption/mysqlbinlog.test | 3 +
sql/log_event.cc | 34 +++++------
sql/slave.cc | 11 ++++
sql/sql_repl.cc | 63 ++++++++++++++-------
9 files changed, 153 insertions(+), 35 deletions(-)
diff --git a/mysql-test/std_data/binlog_before_20574.bin b/mysql-test/std_data/binlog_before_20574.bin
new file mode 100644
index 00000000000..596a883dc71
Binary files /dev/null and b/mysql-test/std_data/binlog_before_20574.bin differ
diff --git a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
index 88c690a8bb7..9b843dc8a6b 100644
--- a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
+++ b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
@@ -104,6 +104,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
@@ -336,6 +339,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
@@ -495,6 +501,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
diff --git a/mysql-test/suite/binlog_encryption/mdev_20574_old_binlog.result b/mysql-test/suite/binlog_encryption/mdev_20574_old_binlog.result
new file mode 100644
index 00000000000..52029b6342e
--- /dev/null
+++ b/mysql-test/suite/binlog_encryption/mdev_20574_old_binlog.result
@@ -0,0 +1,27 @@
+include/master-slave.inc
+[connection master]
+connection slave;
+include/stop_slave.inc
+connection master;
+include/rpl_stop_server.inc [server_number=1]
+# Data in binlog
+# CREATE TABLE t1 (a INT);
+# INSERT INTO t1 VALUES (1),(2),(3);
+# REPLACE INTO t1 VALUES (4);
+include/rpl_start_server.inc [server_number=1]
+connection slave;
+reset slave;
+reset master;
+CHANGE MASTER TO master_host='127.0.0.1', master_port=16000, master_user='root', master_log_file='master-bin.000001', master_log_pos=4;
+include/start_slave.inc
+desc t1;
+Field Type Null Key Default Extra
+a int(11) YES NULL
+select * from t1 order by a;
+a
+1
+2
+3
+4
+drop table t1;
+include/rpl_end.inc
diff --git a/mysql-test/suite/binlog_encryption/mdev_20574_old_binlog.test b/mysql-test/suite/binlog_encryption/mdev_20574_old_binlog.test
new file mode 100644
index 00000000000..183697ab9c3
--- /dev/null
+++ b/mysql-test/suite/binlog_encryption/mdev_20574_old_binlog.test
@@ -0,0 +1,40 @@
+# Test replicating off old master.
+# We simulate old master by copying in pre-generated binlog files from earlier
+# server versions.
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+--source include/have_innodb.inc
+
+--connection slave
+--source include/stop_slave.inc
+
+--connection master
+--let $datadir= `SELECT @@datadir`
+
+--let $rpl_server_number= 1
+--source include/rpl_stop_server.inc
+
+--remove_file $datadir/master-bin.000001
+--remove_file $datadir/master-bin.state
+--echo # Data in binlog
+--echo # CREATE TABLE t1 (a INT);
+--echo # INSERT INTO t1 VALUES (1),(2),(3);
+--echo # REPLACE INTO t1 VALUES (4);
+
+--copy_file $MYSQL_TEST_DIR/std_data/binlog_before_20574.bin $datadir/master-bin.000001
+
+--let $rpl_server_number= 1
+--source include/rpl_start_server.inc
+
+--source include/wait_until_connected_again.inc
+
+--connection slave
+#--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1
+reset slave;
+reset master;
+eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_log_file='master-bin.000001', master_log_pos=4;
+--source include/start_slave.inc
+desc t1;
+select * from t1 order by a;
+drop table t1;
+source include/rpl_end.inc;
diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.result b/mysql-test/suite/binlog_encryption/mysqlbinlog.result
index 71758f7d6e7..e97e0569571 100644
--- a/mysql-test/suite/binlog_encryption/mysqlbinlog.result
+++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.result
@@ -4,3 +4,4 @@ INSERT INTO t1 VALUES (1),(2),(3);
REPLACE INTO t1 VALUES (4);
DROP TABLE t1;
FLUSH LOGS;
+FOUND 1 /Ignorable event type 164.*/ in binlog_enc.sql
diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.test b/mysql-test/suite/binlog_encryption/mysqlbinlog.test
index b80388aaa45..108dbd8782f 100644
--- a/mysql-test/suite/binlog_encryption/mysqlbinlog.test
+++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.test
@@ -17,5 +17,8 @@ let outfile=$MYSQLTEST_VARDIR/tmp/binlog_enc.sql;
exec $MYSQL_BINLOG $local > $outfile;
exec $MYSQL_BINLOG $local --force-read >> $outfile;
exec $MYSQL_BINLOG $remote >> $outfile;
+--let SEARCH_FILE= $outfile
+--let SEARCH_PATTERN= Ignorable event type 164.*
+--source include/search_pattern_in_file.inc
remove_file $outfile;
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 0e0d69b515c..66f1d6bc790 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -2077,6 +2077,19 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
alg != BINLOG_CHECKSUM_ALG_OFF))
event_len= event_len - BINLOG_CHECKSUM_LEN;
+ /*
+ Create an object of Ignorable_log_event for unrecognized sub-class.
+ So that SLAVE SQL THREAD will only update the position and continue.
+ We should look for this flag first instead of judging by event_type
+ Any event can be Ignorable_log_event if it has this flag on.
+ look into @note of Ignorable_log_event
+ */
+ if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+ {
+ ev= new Ignorable_log_event(buf, fdle,
+ get_type_str((Log_event_type) event_type));
+ goto exit;
+ }
switch(event_type) {
case QUERY_EVENT:
ev = new Query_log_event(buf, event_len, fdle, QUERY_EVENT);
@@ -2203,24 +2216,13 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
ev = new Start_encryption_log_event(buf, event_len, fdle);
break;
default:
- /*
- Create an object of Ignorable_log_event for unrecognized sub-class.
- So that SLAVE SQL THREAD will only update the position and continue.
- */
- if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
- {
- ev= new Ignorable_log_event(buf, fdle,
- get_type_str((Log_event_type) event_type));
- }
- else
- {
- DBUG_PRINT("error",("Unknown event code: %d",
- (uchar) buf[EVENT_TYPE_OFFSET]));
- ev= NULL;
- break;
- }
+ DBUG_PRINT("error",("Unknown event code: %d",
+ (uchar) buf[EVENT_TYPE_OFFSET]));
+ ev= NULL;
+ break;
}
}
+exit:
if (ev)
{
diff --git a/sql/slave.cc b/sql/slave.cc
index 88a80029bba..1bc21f8895b 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -6317,7 +6317,18 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
mi->last_queued_gtid.seq_no == 1000)
goto skip_relay_logging;
});
+ goto default_action;
#endif
+ case START_ENCRYPTION_EVENT:
+ if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+ {
+ /*
+ If the event was not requested by the slave (the slave did not ask for
+ it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
+ */
+ inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
+ break;
+ }
/* fall through */
default:
default_action:
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index edff750a9a3..8a6bea34fb4 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -399,16 +399,27 @@ static int send_file(THD *thd)
/**
Internal to mysql_binlog_send() routine that recalculates checksum for
- a FD event (asserted) that needs additional arranment prior sending to slave.
+ 1. FD event (asserted) that needs additional arranment prior sending to slave.
+ 2. Start_encryption_log_event whose Ignored flag is set
+TODO DBUG_ASSERT can be removed if this function is used for more general cases
*/
-inline void fix_checksum(String *packet, ulong ev_offset)
+
+inline void fix_checksum(enum_binlog_checksum_alg checksum_alg, String *packet,
+ ulong ev_offset)
{
+ if (checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
+ checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
+ return;
/* recalculate the crc for this event */
uint data_len = uint4korr(packet->ptr() + ev_offset + EVENT_LEN_OFFSET);
ha_checksum crc;
- DBUG_ASSERT(data_len ==
+ DBUG_ASSERT((data_len ==
LOG_EVENT_MINIMAL_HEADER_LEN + FORMAT_DESCRIPTION_HEADER_LEN +
- BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN);
+ BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN) ||
+ (data_len ==
+ LOG_EVENT_MINIMAL_HEADER_LEN + BINLOG_CRYPTO_SCHEME_LENGTH +
+ BINLOG_KEY_VERSION_LENGTH + BINLOG_NONCE_LENGTH +
+ BINLOG_CHECKSUM_LEN));
crc= my_checksum(0, (uchar *)packet->ptr() + ev_offset, data_len -
BINLOG_CHECKSUM_LEN);
int4store(packet->ptr() + ev_offset + data_len - BINLOG_CHECKSUM_LEN, crc);
@@ -2135,6 +2146,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
THD *thd= info->thd;
String *packet= info->packet;
Log_event_type event_type;
+ bool initial_log_pos= info->clear_initial_log_pos;
DBUG_ENTER("send_format_descriptor_event");
/**
@@ -2233,7 +2245,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
(*packet)[FLAGS_OFFSET+ev_offset] &= ~LOG_EVENT_BINLOG_IN_USE_F;
- if (info->clear_initial_log_pos)
+ if (initial_log_pos)
{
info->clear_initial_log_pos= false;
/*
@@ -2251,9 +2263,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
ST_CREATED_OFFSET+ev_offset, (ulong) 0);
/* fix the checksum due to latest changes in header */
- if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
- info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
- fix_checksum(packet, ev_offset);
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
}
else if (info->using_gtid_state)
{
@@ -2274,9 +2284,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
{
int4store((char*) packet->ptr()+LOG_EVENT_MINIMAL_HEADER_LEN+
ST_CREATED_OFFSET+ev_offset, (ulong) 0);
- if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
- info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
- fix_checksum(packet, ev_offset);
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
}
}
@@ -2289,12 +2297,16 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
}
/*
- Read the following Start_encryption_log_event but don't send it to slave.
- Slave doesn't need to know whether master's binlog is encrypted,
- and if it'll want to encrypt its logs, it should generate its own
- random nonce, not use the one from the master.
+ Read the following Start_encryption_log_event and send it to slave as
+ Ignorable_log_event. Although Slave doesn't need to know whether master's
+ binlog is encrypted but it needs to update slave log pos (for mysqlbinlog).
+
+ If slave want to encrypt its logs, it should generate its own
+ random nonce, it should not use the one from the master.
*/
- packet->length(0);
+ /* reset transmit packet for the event read from binary log file */
+ if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg))
+ DBUG_RETURN(1);
info->last_pos= linfo->pos;
error= Log_event::read_log_event(log, packet, info->fdev,
opt_master_verify_checksum
@@ -2308,12 +2320,13 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
DBUG_RETURN(1);
}
- event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET]);
+ event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET + ev_offset]);
if (event_type == START_ENCRYPTION_EVENT)
{
Start_encryption_log_event *sele= (Start_encryption_log_event *)
- Log_event::read_log_event(packet->ptr(), packet->length(), &info->errmsg,
- info->fdev, BINLOG_CHECKSUM_ALG_OFF);
+ Log_event::read_log_event(packet->ptr() + ev_offset, packet->length()
+ - ev_offset, &info->errmsg, info->fdev,
+ BINLOG_CHECKSUM_ALG_OFF);
if (!sele)
{
info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG;
@@ -2327,6 +2340,18 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
delete sele;
DBUG_RETURN(1);
}
+ /* Make it Ignorable_log_event and send it */
+ (*packet)[FLAGS_OFFSET+ev_offset] |= LOG_EVENT_IGNORABLE_F;
+ if (initial_log_pos)
+ int4store((char*) packet->ptr()+LOG_POS_OFFSET+ev_offset, (ulong) 0);
+ /* fix the checksum due to latest changes in header */
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
+ if (my_net_write(info->net, (uchar*) packet->ptr(), packet->length()))
+ {
+ info->errmsg= "Failed on my_net_write()";
+ info->error= ER_UNKNOWN_ERROR;
+ DBUG_RETURN(1);
+ }
delete sele;
}
else if (start_pos == BIN_LOG_HEADER_SIZE)
1
0

[Commits] ad8b8223326: MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one.
by sachin 30 Sep '19
by sachin 30 Sep '19
30 Sep '19
revision-id: ad8b8223326a43ad4de49b96dcc4b3f66400f9f3 (mariadb-10.2.27-49-gad8b8223326)
parent(s): 102bc7beb080e936ba70c0524a8759799f42458e
author: Sachin Setiya
committer: Sachin Setiya
timestamp: 2019-10-01 01:00:29 +0530
message:
MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one.
Analysis
Mysqlbinlog output for encrypted binary log
#Q> insert into tab1 values (3,'row 003')
#190912 17:36:35 server id 10221 end_log_pos 980 CRC32 0x53bcb3d3 Table_map: `test`.`tab1` mapped to number 19
# at 940
#190912 17:36:35 server id 10221 end_log_pos 1026 CRC32 0xf2ae5136 Write_rows: table id 19 flags: STMT_END_F
Here we can see Table_map_log_event ends at 980 but Next event starts at 940.
And the reason for that is we do not send START_ENCRYPTION_EVENT to the slave
Solution:-
Send Start_encryption_log_event as Ignorable_log_event to slave(mysqlbinlog),
So that mysqlbinlog can update its log_pos.
Since Slave can request multiple FORMAT_DESCRIPTION_EVENT while master does not
have so We only update slave master pos when master actually have the
FORMAT_DESCRIPTION_EVENT. Similar logic should be applied for START_ENCRYPTION_EVENT.
---
.../binlog_encryption/binlog_row_annotate.result | 9 ++++
.../suite/binlog_encryption/mysqlbinlog.result | 1 +
.../suite/binlog_encryption/mysqlbinlog.test | 3 ++
sql/log_event.cc | 34 ++++++------
sql/slave.cc | 11 ++++
sql/sql_repl.cc | 63 +++++++++++++++-------
6 files changed, 86 insertions(+), 35 deletions(-)
diff --git a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
index 88c690a8bb7..9b843dc8a6b 100644
--- a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
+++ b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
@@ -104,6 +104,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
@@ -336,6 +339,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
@@ -495,6 +501,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.result b/mysql-test/suite/binlog_encryption/mysqlbinlog.result
index 71758f7d6e7..e97e0569571 100644
--- a/mysql-test/suite/binlog_encryption/mysqlbinlog.result
+++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.result
@@ -4,3 +4,4 @@ INSERT INTO t1 VALUES (1),(2),(3);
REPLACE INTO t1 VALUES (4);
DROP TABLE t1;
FLUSH LOGS;
+FOUND 1 /Ignorable event type 164.*/ in binlog_enc.sql
diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.test b/mysql-test/suite/binlog_encryption/mysqlbinlog.test
index b80388aaa45..108dbd8782f 100644
--- a/mysql-test/suite/binlog_encryption/mysqlbinlog.test
+++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.test
@@ -17,5 +17,8 @@ let outfile=$MYSQLTEST_VARDIR/tmp/binlog_enc.sql;
exec $MYSQL_BINLOG $local > $outfile;
exec $MYSQL_BINLOG $local --force-read >> $outfile;
exec $MYSQL_BINLOG $remote >> $outfile;
+--let SEARCH_FILE= $outfile
+--let SEARCH_PATTERN= Ignorable event type 164.*
+--source include/search_pattern_in_file.inc
remove_file $outfile;
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 0e0d69b515c..66f1d6bc790 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -2077,6 +2077,19 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
alg != BINLOG_CHECKSUM_ALG_OFF))
event_len= event_len - BINLOG_CHECKSUM_LEN;
+ /*
+ Create an object of Ignorable_log_event for unrecognized sub-class.
+ So that SLAVE SQL THREAD will only update the position and continue.
+ We should look for this flag first instead of judging by event_type
+ Any event can be Ignorable_log_event if it has this flag on.
+ look into @note of Ignorable_log_event
+ */
+ if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+ {
+ ev= new Ignorable_log_event(buf, fdle,
+ get_type_str((Log_event_type) event_type));
+ goto exit;
+ }
switch(event_type) {
case QUERY_EVENT:
ev = new Query_log_event(buf, event_len, fdle, QUERY_EVENT);
@@ -2203,24 +2216,13 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
ev = new Start_encryption_log_event(buf, event_len, fdle);
break;
default:
- /*
- Create an object of Ignorable_log_event for unrecognized sub-class.
- So that SLAVE SQL THREAD will only update the position and continue.
- */
- if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
- {
- ev= new Ignorable_log_event(buf, fdle,
- get_type_str((Log_event_type) event_type));
- }
- else
- {
- DBUG_PRINT("error",("Unknown event code: %d",
- (uchar) buf[EVENT_TYPE_OFFSET]));
- ev= NULL;
- break;
- }
+ DBUG_PRINT("error",("Unknown event code: %d",
+ (uchar) buf[EVENT_TYPE_OFFSET]));
+ ev= NULL;
+ break;
}
}
+exit:
if (ev)
{
diff --git a/sql/slave.cc b/sql/slave.cc
index 88a80029bba..1bc21f8895b 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -6317,7 +6317,18 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
mi->last_queued_gtid.seq_no == 1000)
goto skip_relay_logging;
});
+ goto default_action;
#endif
+ case START_ENCRYPTION_EVENT:
+ if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+ {
+ /*
+ If the event was not requested by the slave (the slave did not ask for
+ it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
+ */
+ inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
+ break;
+ }
/* fall through */
default:
default_action:
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index edff750a9a3..8a6bea34fb4 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -399,16 +399,27 @@ static int send_file(THD *thd)
/**
Internal to mysql_binlog_send() routine that recalculates checksum for
- a FD event (asserted) that needs additional arranment prior sending to slave.
+ 1. FD event (asserted) that needs additional arranment prior sending to slave.
+ 2. Start_encryption_log_event whose Ignored flag is set
+TODO DBUG_ASSERT can be removed if this function is used for more general cases
*/
-inline void fix_checksum(String *packet, ulong ev_offset)
+
+inline void fix_checksum(enum_binlog_checksum_alg checksum_alg, String *packet,
+ ulong ev_offset)
{
+ if (checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
+ checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
+ return;
/* recalculate the crc for this event */
uint data_len = uint4korr(packet->ptr() + ev_offset + EVENT_LEN_OFFSET);
ha_checksum crc;
- DBUG_ASSERT(data_len ==
+ DBUG_ASSERT((data_len ==
LOG_EVENT_MINIMAL_HEADER_LEN + FORMAT_DESCRIPTION_HEADER_LEN +
- BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN);
+ BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN) ||
+ (data_len ==
+ LOG_EVENT_MINIMAL_HEADER_LEN + BINLOG_CRYPTO_SCHEME_LENGTH +
+ BINLOG_KEY_VERSION_LENGTH + BINLOG_NONCE_LENGTH +
+ BINLOG_CHECKSUM_LEN));
crc= my_checksum(0, (uchar *)packet->ptr() + ev_offset, data_len -
BINLOG_CHECKSUM_LEN);
int4store(packet->ptr() + ev_offset + data_len - BINLOG_CHECKSUM_LEN, crc);
@@ -2135,6 +2146,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
THD *thd= info->thd;
String *packet= info->packet;
Log_event_type event_type;
+ bool initial_log_pos= info->clear_initial_log_pos;
DBUG_ENTER("send_format_descriptor_event");
/**
@@ -2233,7 +2245,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
(*packet)[FLAGS_OFFSET+ev_offset] &= ~LOG_EVENT_BINLOG_IN_USE_F;
- if (info->clear_initial_log_pos)
+ if (initial_log_pos)
{
info->clear_initial_log_pos= false;
/*
@@ -2251,9 +2263,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
ST_CREATED_OFFSET+ev_offset, (ulong) 0);
/* fix the checksum due to latest changes in header */
- if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
- info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
- fix_checksum(packet, ev_offset);
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
}
else if (info->using_gtid_state)
{
@@ -2274,9 +2284,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
{
int4store((char*) packet->ptr()+LOG_EVENT_MINIMAL_HEADER_LEN+
ST_CREATED_OFFSET+ev_offset, (ulong) 0);
- if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
- info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
- fix_checksum(packet, ev_offset);
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
}
}
@@ -2289,12 +2297,16 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
}
/*
- Read the following Start_encryption_log_event but don't send it to slave.
- Slave doesn't need to know whether master's binlog is encrypted,
- and if it'll want to encrypt its logs, it should generate its own
- random nonce, not use the one from the master.
+ Read the following Start_encryption_log_event and send it to slave as
+ Ignorable_log_event. Although Slave doesn't need to know whether master's
+ binlog is encrypted but it needs to update slave log pos (for mysqlbinlog).
+
+ If slave want to encrypt its logs, it should generate its own
+ random nonce, it should not use the one from the master.
*/
- packet->length(0);
+ /* reset transmit packet for the event read from binary log file */
+ if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg))
+ DBUG_RETURN(1);
info->last_pos= linfo->pos;
error= Log_event::read_log_event(log, packet, info->fdev,
opt_master_verify_checksum
@@ -2308,12 +2320,13 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
DBUG_RETURN(1);
}
- event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET]);
+ event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET + ev_offset]);
if (event_type == START_ENCRYPTION_EVENT)
{
Start_encryption_log_event *sele= (Start_encryption_log_event *)
- Log_event::read_log_event(packet->ptr(), packet->length(), &info->errmsg,
- info->fdev, BINLOG_CHECKSUM_ALG_OFF);
+ Log_event::read_log_event(packet->ptr() + ev_offset, packet->length()
+ - ev_offset, &info->errmsg, info->fdev,
+ BINLOG_CHECKSUM_ALG_OFF);
if (!sele)
{
info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG;
@@ -2327,6 +2340,18 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
delete sele;
DBUG_RETURN(1);
}
+ /* Make it Ignorable_log_event and send it */
+ (*packet)[FLAGS_OFFSET+ev_offset] |= LOG_EVENT_IGNORABLE_F;
+ if (initial_log_pos)
+ int4store((char*) packet->ptr()+LOG_POS_OFFSET+ev_offset, (ulong) 0);
+ /* fix the checksum due to latest changes in header */
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
+ if (my_net_write(info->net, (uchar*) packet->ptr(), packet->length()))
+ {
+ info->errmsg= "Failed on my_net_write()";
+ info->error= ER_UNKNOWN_ERROR;
+ DBUG_RETURN(1);
+ }
delete sele;
}
else if (start_pos == BIN_LOG_HEADER_SIZE)
1
0

[Commits] c648a7a4539: MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one.
by sachin 30 Sep '19
by sachin 30 Sep '19
30 Sep '19
revision-id: c648a7a453963e2c2b2910c7d4b8e211c6f30915 (mariadb-10.2.27-49-gc648a7a4539)
parent(s): 102bc7beb080e936ba70c0524a8759799f42458e
author: Sachin Setiya
committer: Sachin Setiya
timestamp: 2019-10-01 00:56:51 +0530
message:
MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one.
Analysis
Mysqlbinlog output for encrypted binary log
#Q> insert into tab1 values (3,'row 003')
#190912 17:36:35 server id 10221 end_log_pos 980 CRC32 0x53bcb3d3 Table_map: `test`.`tab1` mapped to number 19
# at 940
#190912 17:36:35 server id 10221 end_log_pos 1026 CRC32 0xf2ae5136 Write_rows: table id 19 flags: STMT_END_F
Here we can see Table_map_log_event ends at 980 but Next event starts at 940.
And the reason for that is we do not send START_ENCRYPTION_EVENT to the server
Solution:-
Send Start_encryption_log_event as Ignorable_log_event to slave(mysqlbinlog),
So that mysqlbinlog can update its log_pos.
Since Slave can request multiple FORMAT_DESCRIPTION_EVENT while master does not
have so We only update slave master pos when master actually have the
FORMAT_DESCRIPTION_EVENT. Similar logic should be applied for START_ENCRYPTION_EVENT.
---
.../binlog_encryption/binlog_row_annotate.result | 9 ++++
.../suite/binlog_encryption/mysqlbinlog.result | 1 +
.../suite/binlog_encryption/mysqlbinlog.test | 3 ++
sql/log_event.cc | 34 ++++++------
sql/slave.cc | 11 ++++
sql/sql_repl.cc | 63 +++++++++++++++-------
6 files changed, 86 insertions(+), 35 deletions(-)
diff --git a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
index 88c690a8bb7..9b843dc8a6b 100644
--- a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
+++ b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
@@ -104,6 +104,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
@@ -336,6 +339,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
@@ -495,6 +501,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.result b/mysql-test/suite/binlog_encryption/mysqlbinlog.result
index 71758f7d6e7..e97e0569571 100644
--- a/mysql-test/suite/binlog_encryption/mysqlbinlog.result
+++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.result
@@ -4,3 +4,4 @@ INSERT INTO t1 VALUES (1),(2),(3);
REPLACE INTO t1 VALUES (4);
DROP TABLE t1;
FLUSH LOGS;
+FOUND 1 /Ignorable event type 164.*/ in binlog_enc.sql
diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.test b/mysql-test/suite/binlog_encryption/mysqlbinlog.test
index b80388aaa45..108dbd8782f 100644
--- a/mysql-test/suite/binlog_encryption/mysqlbinlog.test
+++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.test
@@ -17,5 +17,8 @@ let outfile=$MYSQLTEST_VARDIR/tmp/binlog_enc.sql;
exec $MYSQL_BINLOG $local > $outfile;
exec $MYSQL_BINLOG $local --force-read >> $outfile;
exec $MYSQL_BINLOG $remote >> $outfile;
+--let SEARCH_FILE= $outfile
+--let SEARCH_PATTERN= Ignorable event type 164.*
+--source include/search_pattern_in_file.inc
remove_file $outfile;
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 0e0d69b515c..66f1d6bc790 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -2077,6 +2077,19 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
alg != BINLOG_CHECKSUM_ALG_OFF))
event_len= event_len - BINLOG_CHECKSUM_LEN;
+ /*
+ Create an object of Ignorable_log_event for unrecognized sub-class.
+ So that SLAVE SQL THREAD will only update the position and continue.
+ We should look for this flag first instead of judging by event_type
+ Any event can be Ignorable_log_event if it has this flag on.
+ look into @note of Ignorable_log_event
+ */
+ if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+ {
+ ev= new Ignorable_log_event(buf, fdle,
+ get_type_str((Log_event_type) event_type));
+ goto exit;
+ }
switch(event_type) {
case QUERY_EVENT:
ev = new Query_log_event(buf, event_len, fdle, QUERY_EVENT);
@@ -2203,24 +2216,13 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
ev = new Start_encryption_log_event(buf, event_len, fdle);
break;
default:
- /*
- Create an object of Ignorable_log_event for unrecognized sub-class.
- So that SLAVE SQL THREAD will only update the position and continue.
- */
- if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
- {
- ev= new Ignorable_log_event(buf, fdle,
- get_type_str((Log_event_type) event_type));
- }
- else
- {
- DBUG_PRINT("error",("Unknown event code: %d",
- (uchar) buf[EVENT_TYPE_OFFSET]));
- ev= NULL;
- break;
- }
+ DBUG_PRINT("error",("Unknown event code: %d",
+ (uchar) buf[EVENT_TYPE_OFFSET]));
+ ev= NULL;
+ break;
}
}
+exit:
if (ev)
{
diff --git a/sql/slave.cc b/sql/slave.cc
index 88a80029bba..1bc21f8895b 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -6317,7 +6317,18 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
mi->last_queued_gtid.seq_no == 1000)
goto skip_relay_logging;
});
+ goto default_action;
#endif
+ case START_ENCRYPTION_EVENT:
+ if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+ {
+ /*
+ If the event was not requested by the slave (the slave did not ask for
+ it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
+ */
+ inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
+ break;
+ }
/* fall through */
default:
default_action:
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index edff750a9a3..8a6bea34fb4 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -399,16 +399,27 @@ static int send_file(THD *thd)
/**
Internal to mysql_binlog_send() routine that recalculates checksum for
- a FD event (asserted) that needs additional arranment prior sending to slave.
+ 1. FD event (asserted) that needs additional arranment prior sending to slave.
+ 2. Start_encryption_log_event whose Ignored flag is set
+TODO DBUG_ASSERT can be removed if this function is used for more general cases
*/
-inline void fix_checksum(String *packet, ulong ev_offset)
+
+inline void fix_checksum(enum_binlog_checksum_alg checksum_alg, String *packet,
+ ulong ev_offset)
{
+ if (checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
+ checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
+ return;
/* recalculate the crc for this event */
uint data_len = uint4korr(packet->ptr() + ev_offset + EVENT_LEN_OFFSET);
ha_checksum crc;
- DBUG_ASSERT(data_len ==
+ DBUG_ASSERT((data_len ==
LOG_EVENT_MINIMAL_HEADER_LEN + FORMAT_DESCRIPTION_HEADER_LEN +
- BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN);
+ BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN) ||
+ (data_len ==
+ LOG_EVENT_MINIMAL_HEADER_LEN + BINLOG_CRYPTO_SCHEME_LENGTH +
+ BINLOG_KEY_VERSION_LENGTH + BINLOG_NONCE_LENGTH +
+ BINLOG_CHECKSUM_LEN));
crc= my_checksum(0, (uchar *)packet->ptr() + ev_offset, data_len -
BINLOG_CHECKSUM_LEN);
int4store(packet->ptr() + ev_offset + data_len - BINLOG_CHECKSUM_LEN, crc);
@@ -2135,6 +2146,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
THD *thd= info->thd;
String *packet= info->packet;
Log_event_type event_type;
+ bool initial_log_pos= info->clear_initial_log_pos;
DBUG_ENTER("send_format_descriptor_event");
/**
@@ -2233,7 +2245,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
(*packet)[FLAGS_OFFSET+ev_offset] &= ~LOG_EVENT_BINLOG_IN_USE_F;
- if (info->clear_initial_log_pos)
+ if (initial_log_pos)
{
info->clear_initial_log_pos= false;
/*
@@ -2251,9 +2263,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
ST_CREATED_OFFSET+ev_offset, (ulong) 0);
/* fix the checksum due to latest changes in header */
- if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
- info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
- fix_checksum(packet, ev_offset);
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
}
else if (info->using_gtid_state)
{
@@ -2274,9 +2284,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
{
int4store((char*) packet->ptr()+LOG_EVENT_MINIMAL_HEADER_LEN+
ST_CREATED_OFFSET+ev_offset, (ulong) 0);
- if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
- info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
- fix_checksum(packet, ev_offset);
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
}
}
@@ -2289,12 +2297,16 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
}
/*
- Read the following Start_encryption_log_event but don't send it to slave.
- Slave doesn't need to know whether master's binlog is encrypted,
- and if it'll want to encrypt its logs, it should generate its own
- random nonce, not use the one from the master.
+ Read the following Start_encryption_log_event and send it to slave as
+ Ignorable_log_event. Although Slave doesn't need to know whether master's
+ binlog is encrypted but it needs to update slave log pos (for mysqlbinlog).
+
+ If slave want to encrypt its logs, it should generate its own
+ random nonce, it should not use the one from the master.
*/
- packet->length(0);
+ /* reset transmit packet for the event read from binary log file */
+ if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg))
+ DBUG_RETURN(1);
info->last_pos= linfo->pos;
error= Log_event::read_log_event(log, packet, info->fdev,
opt_master_verify_checksum
@@ -2308,12 +2320,13 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
DBUG_RETURN(1);
}
- event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET]);
+ event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET + ev_offset]);
if (event_type == START_ENCRYPTION_EVENT)
{
Start_encryption_log_event *sele= (Start_encryption_log_event *)
- Log_event::read_log_event(packet->ptr(), packet->length(), &info->errmsg,
- info->fdev, BINLOG_CHECKSUM_ALG_OFF);
+ Log_event::read_log_event(packet->ptr() + ev_offset, packet->length()
+ - ev_offset, &info->errmsg, info->fdev,
+ BINLOG_CHECKSUM_ALG_OFF);
if (!sele)
{
info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG;
@@ -2327,6 +2340,18 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
delete sele;
DBUG_RETURN(1);
}
+ /* Make it Ignorable_log_event and send it */
+ (*packet)[FLAGS_OFFSET+ev_offset] |= LOG_EVENT_IGNORABLE_F;
+ if (initial_log_pos)
+ int4store((char*) packet->ptr()+LOG_POS_OFFSET+ev_offset, (ulong) 0);
+ /* fix the checksum due to latest changes in header */
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
+ if (my_net_write(info->net, (uchar*) packet->ptr(), packet->length()))
+ {
+ info->errmsg= "Failed on my_net_write()";
+ info->error= ER_UNKNOWN_ERROR;
+ DBUG_RETURN(1);
+ }
delete sele;
}
else if (start_pos == BIN_LOG_HEADER_SIZE)
1
0

[Commits] 83fedc99799: Support Create_time and Update_time in MyRocks table status
by psergey 27 Sep '19
by psergey 27 Sep '19
27 Sep '19
revision-id: 83fedc99799692902995f223ffd0784195d26c3c (fb-prod201903-144-g83fedc99799)
parent(s): d97c0c628e5dc60abd725f6a7120a8d87b09321e
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-09-27 15:40:10 +0300
message:
Support Create_time and Update_time in MyRocks table status
- Create_time is stored in the MyRocks' internal data dictionary.
- Update_time is in-memory only (like in InnoDB).
(variant #3, review input addressed, on-disk data format is backward-
and forward-compatible)
---
mysql-test/suite/rocksdb/include/bulk_load.inc | 4 +-
.../suite/rocksdb/include/bulk_load_unsorted.inc | 4 +-
mysql-test/suite/rocksdb/r/bulk_load.result | 12 +--
mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result | 12 +--
.../rocksdb/r/bulk_load_rev_cf_and_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_rev_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted_rev.result | 12 +--
mysql-test/suite/rocksdb/r/issue255.result | 16 ++--
mysql-test/suite/rocksdb/r/rocksdb.result | 6 +-
.../suite/rocksdb/r/show_table_status.result | 85 +++++++++++++++++++++-
.../suite/rocksdb/r/show_table_status_debug.result | 13 ++++
mysql-test/suite/rocksdb/r/truncate_table.result | 8 +-
mysql-test/suite/rocksdb/t/issue255.test | 17 +++--
mysql-test/suite/rocksdb/t/rocksdb.test | 4 +-
mysql-test/suite/rocksdb/t/show_table_status.test | 81 ++++++++++++++++++++-
.../suite/rocksdb/t/show_table_status_debug.test | 21 ++++++
mysql-test/suite/rocksdb/t/truncate_table.test | 8 +-
storage/rocksdb/ha_rocksdb.cc | 38 ++++++++++
storage/rocksdb/rdb_datadic.cc | 71 ++++++++++++++++++
storage/rocksdb/rdb_datadic.h | 18 +++++
21 files changed, 392 insertions(+), 74 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/bulk_load.inc b/mysql-test/suite/rocksdb/include/bulk_load.inc
index 1b79825e507..7e163602202 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load.inc
@@ -121,12 +121,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
index 5cdc76a32d4..812af0401aa 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
@@ -119,12 +119,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/r/bulk_load.result b/mysql-test/suite/rocksdb/r/bulk_load.result
index a36f99a7619..76db28e66bd 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
index b5d3e252c5d..ae363f7ec0c 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
index f46acd41080..dd8dd7e60a8 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
index 3389968ef37..96738ae62e2 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
index 924032549ac..87fc63af2da 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
index 3cc9fb8e459..8e0914f0159 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/issue255.result b/mysql-test/suite/rocksdb/r/issue255.result
index c1ce3be2276..b45b3b5afc7 100644
--- a/mysql-test/suite/rocksdb/r/issue255.result
+++ b/mysql-test/suite/rocksdb/r/issue255.result
@@ -2,7 +2,7 @@ CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ('538647864786478647864');
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -12,7 +12,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -21,7 +21,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -30,13 +30,13 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES (1000);
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -46,7 +46,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -55,7 +55,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -64,5 +64,5 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result
index 088eb050f6f..a631d58ac69 100644
--- a/mysql-test/suite/rocksdb/r/rocksdb.result
+++ b/mysql-test/suite/rocksdb/r/rocksdb.result
@@ -1417,7 +1417,7 @@ create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
show table status like 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 # # NULL latin1_swedish_ci NULL
drop table t1;
#
# Fix Issue #4: Crash when using pseudo-unique keys
@@ -2612,7 +2612,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
-1
@@ -2623,7 +2623,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
1
diff --git a/mysql-test/suite/rocksdb/r/show_table_status.result b/mysql-test/suite/rocksdb/r/show_table_status.result
index 29140f045e4..345882040ef 100644
--- a/mysql-test/suite/rocksdb/r/show_table_status.result
+++ b/mysql-test/suite/rocksdb/r/show_table_status.result
@@ -7,12 +7,12 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL
+t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL utf8_general_ci NULL
SHOW TABLE STATUS WHERE name LIKE 't2';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
DROP TABLE t1, t2, t3;
CREATE DATABASE `db_new..............................................end`;
USE `db_new..............................................end`;
@@ -22,3 +22,80 @@ SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE T
TABLE_SCHEMA db_new..............................................end
TABLE_NAME t1_new..............................................end
DROP DATABASE `db_new..............................................end`;
+#
+# MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+#
+use test;
+create table t1 (a int) engine=rocksdb;
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time check_time
+1 NULL NULL
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select sleep(3);
+sleep(3)
+0
+insert into t1 values (2);
+select
+create_time=@create_tm /* should not change */ ,
+timestampdiff(second, @update_tm, update_time) > 2,
+check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+timestampdiff(second, @update_tm, update_time) > 2 1
+check_time NULL
+#
+# Check how create_time survives ALTER TABLE.
+# First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+create_time<>@create_tm /* should change */,
+create_time IS NOT NULL,
+update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time<>@create_tm 1
+create_time IS NOT NULL 1
+update_time IS NULL 1
+insert into t1 values (5,5);
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+# Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+select
+create_time=@create_tm /* should not change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+update_time NULL
+#
+# Check what is left after server restart
+#
+# Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select
+create_time=(select create_time from t2) /* should change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=(select create_time from t2) 1
+update_time NULL
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/r/show_table_status_debug.result b/mysql-test/suite/rocksdb/r/show_table_status_debug.result
new file mode 100644
index 00000000000..25b36954ef6
--- /dev/null
+++ b/mysql-test/suite/rocksdb/r/show_table_status_debug.result
@@ -0,0 +1,13 @@
+set session debug='+d,myrocks_produce_ddl_entry_v1';
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (123);
+set session debug='-d,myrocks_produce_ddl_entry_v1';
+select * from t1;
+a
+123
+select create_time, update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time
+NULL NULL
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/r/truncate_table.result b/mysql-test/suite/rocksdb/r/truncate_table.result
index 1544256f194..79b266a2453 100644
--- a/mysql-test/suite/rocksdb/r/truncate_table.result
+++ b/mysql-test/suite/rocksdb/r/truncate_table.result
@@ -9,19 +9,19 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 4 # # NULL latin1_swedish_ci NULL
TRUNCATE TABLE t1;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('d');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 2 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 2 # # NULL latin1_swedish_ci NULL
SELECT a,c FROM t1;
a c
1 d
diff --git a/mysql-test/suite/rocksdb/t/issue255.test b/mysql-test/suite/rocksdb/t/issue255.test
index 370dece0c6c..686f45b4056 100644
--- a/mysql-test/suite/rocksdb/t/issue255.test
+++ b/mysql-test/suite/rocksdb/t/issue255.test
@@ -3,24 +3,25 @@
CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES ('538647864786478647864');
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SELECT * FROM t1;
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
@@ -28,24 +29,24 @@ DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES (1000);
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb.test b/mysql-test/suite/rocksdb/t/rocksdb.test
index 5eff0fbf38f..7dcae569c92 100644
--- a/mysql-test/suite/rocksdb/t/rocksdb.test
+++ b/mysql-test/suite/rocksdb/t/rocksdb.test
@@ -1198,7 +1198,7 @@ drop table t1;
create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
---replace_column 7 #
+--replace_column 7 # 12 # 13 #
show table status like 't1';
drop table t1;
@@ -1903,11 +1903,13 @@ DROP TABLE t1;
# value is 4 while MyRocks will show it as 3.
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/show_table_status.test b/mysql-test/suite/rocksdb/t/show_table_status.test
index 29cc2ccfb5e..59effcc788c 100644
--- a/mysql-test/suite/rocksdb/t/show_table_status.test
+++ b/mysql-test/suite/rocksdb/t/show_table_status.test
@@ -24,7 +24,7 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
# Some statistics don't get updated as quickly. The Data_length and
@@ -48,7 +48,7 @@ set global rocksdb_force_flush_memtable_now = true;
# We expect the number of rows to be 10000. Data_len and Avg_row_len
# may vary, depending on built-in compression library.
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't2';
DROP TABLE t1, t2, t3;
@@ -62,3 +62,80 @@ CREATE TABLE `t1_new..............................................end`(a int) en
INSERT INTO `t1_new..............................................end` VALUES (1);
--query_vertical SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE TABLE_NAME = 't1_new..............................................end'
DROP DATABASE `db_new..............................................end`;
+--echo #
+--echo # MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+--echo #
+use test;
+create table t1 (a int) engine=rocksdb;
+
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+select sleep(3);
+insert into t1 values (2);
+
+--vertical_results
+select
+ create_time=@create_tm /* should not change */ ,
+ timestampdiff(second, @update_tm, update_time) > 2,
+ check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check how create_time survives ALTER TABLE.
+--echo # First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+ create_time<>@create_tm /* should change */,
+ create_time IS NOT NULL,
+ update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+insert into t1 values (5,5);
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo # Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+
+select
+ create_time=@create_tm /* should not change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check what is left after server restart
+--echo #
+
+--echo # Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--source include/restart_mysqld.inc
+
+select
+ create_time=(select create_time from t2) /* should change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/t/show_table_status_debug.test b/mysql-test/suite/rocksdb/t/show_table_status_debug.test
new file mode 100644
index 00000000000..7dcc023cd68
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/show_table_status_debug.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+#
+# Check the upgrade from Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 to _2
+#
+
+set session debug='+d,myrocks_produce_ddl_entry_v1';
+
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (123);
+set session debug='-d,myrocks_produce_ddl_entry_v1';
+
+--source include/restart_mysqld.inc
+
+select * from t1;
+select create_time, update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/t/truncate_table.test b/mysql-test/suite/rocksdb/t/truncate_table.test
index a61488654a3..1001eeb6cde 100644
--- a/mysql-test/suite/rocksdb/t/truncate_table.test
+++ b/mysql-test/suite/rocksdb/t/truncate_table.test
@@ -29,22 +29,22 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
TRUNCATE TABLE t1;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('d');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--sorted_result
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index d6c89087bea..446e2d64c1c 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -2456,6 +2456,8 @@ class Rdb_transaction {
bool m_is_delayed_snapshot = false;
bool m_is_two_phase = false;
+ std::unordered_set<Rdb_tbl_def*> modified_tables;
+
private:
/*
Number of write operations this transaction had when we took the last
@@ -3139,6 +3141,25 @@ class Rdb_transaction {
virtual void start_tx() = 0;
virtual void start_stmt() = 0;
+ protected:
+ // Non-virtual functions with actions to be done on transaction start and
+ // commit.
+ void on_commit() {
+ time_t tm;
+ tm = time(nullptr);
+ for (auto &it : modified_tables) {
+ it->update_time = tm;
+ }
+ modified_tables.clear();
+ }
+ void on_rollback() {
+ modified_tables.clear();
+ }
+ public:
+ void log_table_write_op(Rdb_tbl_def *tbl) {
+ modified_tables.insert(tbl);
+ }
+
void set_initial_savepoint() {
/*
Set the initial savepoint. If the first statement in the transaction
@@ -3322,7 +3343,9 @@ class Rdb_transaction_impl : public Rdb_transaction {
goto error;
}
+ on_commit();
error:
+ on_rollback();
/* Save the transaction object to be reused */
release_tx();
@@ -3338,6 +3361,7 @@ class Rdb_transaction_impl : public Rdb_transaction {
public:
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -3659,7 +3683,9 @@ class Rdb_writebatch_impl : public Rdb_transaction {
res = true;
goto error;
}
+ on_commit();
error:
+ on_rollback();
reset();
m_write_count = 0;
@@ -3691,6 +3717,7 @@ class Rdb_writebatch_impl : public Rdb_transaction {
}
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -7549,6 +7576,7 @@ int ha_rocksdb::create_table(const std::string &table_name,
/* Create table/key descriptions and put them into the data dictionary */
m_tbl_def = new Rdb_tbl_def(table_name);
+ m_tbl_def->set_create_time(time(nullptr));
uint n_keys = table_arg->s->keys;
@@ -10177,6 +10205,8 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
row_info.tx->incr_insert_count();
}
+ row_info.tx->log_table_write_op(m_tbl_def);
+
if (do_bulk_commit(row_info.tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
}
@@ -10648,6 +10678,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
}
tx->incr_delete_count();
+ tx->log_table_write_op(m_tbl_def);
if (do_bulk_commit(tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
@@ -10802,6 +10833,12 @@ int ha_rocksdb::info(uint flag) {
k->rec_per_key[j] = x;
}
}
+
+ stats.create_time = m_tbl_def->get_create_time();
+ }
+
+ if (flag & HA_STATUS_TIME) {
+ stats.update_time = m_tbl_def->update_time;
}
if (flag & HA_STATUS_ERRKEY) {
@@ -12603,6 +12640,7 @@ bool ha_rocksdb::prepare_inplace_alter_table(
m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
new_tdef->m_hidden_pk_val =
m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
+ new_tdef->set_create_time(m_tbl_def->get_create_time());
if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
/* Delete the new key descriptors */
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index c0741a1ce9b..826a50ca726 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -3560,6 +3560,25 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
const rocksdb::Slice svalue(indexes.c_ptr(), indexes.length());
dict->put_key(batch, key, svalue);
+
+ // Now, save the creation timestamp
+ bool simulate_old_version = false;
+ DBUG_EXECUTE_IF("myrocks_produce_ddl_entry_v1",
+ simulate_old_version = true; );
+ if (!simulate_old_version) {
+ Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> ts_key;
+ ts_key.write_index(Rdb_key_def::TABLE_CREATE_TIMESTAMP);
+ ts_key.write(m_dbname_tablename.c_str(), m_dbname_tablename.size());
+
+ const int ts_and_vers_size = Rdb_key_def::VERSION_SIZE +
+ Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE;
+ Rdb_buf_writer<ts_and_vers_size> ts_val;
+ ts_val.write_uint16(Rdb_key_def::TABLE_CREATE_TIMESTAMP_VERSION);
+ ts_val.write_uint64(create_time);
+
+ dict->put_key(batch, ts_key.to_slice(), ts_val.to_slice());
+ }
+
return false;
}
@@ -3974,6 +3993,50 @@ bool Rdb_ddl_manager::validate_schemas(void) {
return !has_errors;
}
+
+void Rdb_ddl_manager::load_create_timestamp(Rdb_tbl_def *tdef) {
+ Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> lookup_key;
+
+ lookup_key.write_index(Rdb_key_def::TABLE_CREATE_TIMESTAMP);
+ const std::string &dbname_tablename = tdef->full_tablename();
+ lookup_key.write(dbname_tablename.c_str(), dbname_tablename.size());
+
+ tdef->set_create_time(0); // The default is SQL NULL.
+
+ std::string ts_value;
+ auto s = m_dict->get_value(lookup_key.to_slice(), &ts_value);
+ // Create timestamp may or may not be present. If it is not present, it's not
+ // an error.
+ if (s.ok()) {
+ if (ts_value.size() < Rdb_key_def::VERSION_SIZE) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: Invalid TABLE_CREATE_TIMESTAMP record for table %s",
+ dbname_tablename.c_str());
+ }
+
+ const uchar *ptr = reinterpret_cast<const uchar *>(ts_value.data());
+ const int version = rdb_netbuf_read_uint16(&ptr);
+
+ if (version != Rdb_key_def::TABLE_CREATE_TIMESTAMP_VERSION) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: TABLE_CREATE_TIMESTAMP Version was not expected."
+ "Expected: %d, Actual: %d",
+ Rdb_key_def::TABLE_CREATE_TIMESTAMP_VERSION, version);
+ return;
+ }
+ if (ts_value.size() != 2 + Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: Unexpected size of TABLE_CREATE_TIMESTAMP record: %ld",
+ ts_value.size());
+ return;
+ }
+ tdef->set_create_time(rdb_netbuf_read_uint64(&ptr));
+ }
+}
+
bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
Rdb_cf_manager *const cf_manager,
const uint32_t validate_tables) {
@@ -4109,6 +4172,7 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
tdef->m_key_count > 0 ? tdef->m_key_descr_arr[0]->m_stats.m_rows : 0, 0,
0);
+ load_create_timestamp(tdef);
put(tdef);
i++;
}
@@ -4436,6 +4500,12 @@ void Rdb_ddl_manager::remove(Rdb_tbl_def *const tbl,
m_dict->delete_key(batch, key_writer.to_slice());
+ // Also delete the table creation timestamp record
+ Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> ts_key;
+ ts_key.write_index(Rdb_key_def::TABLE_CREATE_TIMESTAMP);
+ ts_key.write(dbname_tablename.c_str(), dbname_tablename.size());
+ m_dict->delete_key(batch, ts_key.to_slice());
+
const auto it = m_ddl_map.find(dbname_tablename);
if (it != m_ddl_map.end()) {
// Free Rdb_tbl_def
@@ -4471,6 +4541,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
rec->m_hidden_pk_val.load(std::memory_order_relaxed);
new_rec->m_tbl_stats = rec->m_tbl_stats;
+ new_rec->set_create_time(rec->get_create_time());
// so that it's not free'd when deleting the old rec
rec->m_key_descr_arr = nullptr;
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 416857cad38..d2ff6d95625 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -465,6 +465,7 @@ class Rdb_key_def {
CF_NUMBER_SIZE = 4,
CF_FLAG_SIZE = 4,
PACKED_SIZE = 4, // one int
+ TABLE_CREATE_TIMESTAMP_SIZE = 8,
};
// bit flags for combining bools when writing to disk
@@ -500,6 +501,7 @@ class Rdb_key_def {
DDL_CREATE_INDEX_ONGOING = 8,
AUTO_INC = 9,
DROPPED_CF = 10,
+ TABLE_CREATE_TIMESTAMP = 11,
END_DICT_INDEX_ID = 255
};
@@ -514,6 +516,7 @@ class Rdb_key_def {
DDL_CREATE_INDEX_ONGOING_VERSION = 1,
AUTO_INCREMENT_VERSION = 1,
DROPPED_CF_VERSION = 1,
+ TABLE_CREATE_TIMESTAMP_VERSION = 1,
// Version for index stats is stored in IndexStats struct
};
@@ -1116,6 +1119,12 @@ class Rdb_tbl_def {
~Rdb_tbl_def();
+ // time values are shown in SHOW TABLE STATUS
+ void set_create_time(time_t val) { create_time = val; }
+ time_t get_create_time() { return create_time; }
+
+ time_t update_time = 0; // in-memory only value, maintained right here
+
void check_and_set_read_free_rpl_table();
/* Number of indexes */
@@ -1161,6 +1170,9 @@ class Rdb_tbl_def {
const std::string &base_tablename() const { return m_tablename; }
const std::string &base_partition() const { return m_partition; }
GL_INDEX_ID get_autoincr_gl_index_id();
+
+ private:
+ time_t create_time = 0;
};
/*
@@ -1248,6 +1260,7 @@ class Rdb_ddl_manager {
void persist_stats(const bool sync = false);
void set_table_stats(const std::string &tbl_name);
+ void load_create_timestamp(Rdb_tbl_def *tdef);
/* Modify the mapping and write it to on-disk storage */
int put_and_write(Rdb_tbl_def *const key_descr,
@@ -1386,6 +1399,11 @@ class Rdb_binlog_manager {
key: Rdb_key_def::DROPPED_CF(0xa) + cf_id
value: version
+ 11.Table Name => crete_timestamp.
+ key: Rdb_key_def::TABLE_CREATE_TIMESTAMP(0xB) + dbname.tablename
+ value: version, create_timestamp.
+ create_timestamp is 8 bytes.
+
Data dictionary operations are atomic inside RocksDB. For example,
when creating a table with two indexes, it is necessary to call Put
three times. They have to be atomic. Rdb_dict_manager has a wrapper function
1
0

26 Sep '19
revision-id: 673e253724979fd9fe43a4a22bd7e1b2c3a5269e (mariadb-10.4.4-333-g673e2537249)
parent(s): 8887effe13ad87ba0460d4d3068fb5696f089bb0
author: Kristian Nielsen
committer: Kristian Nielsen
timestamp: 2019-09-26 17:43:26 +0200
message:
Fix missing memory barrier in wait_for_commit
The function wait_for_commit::wait_for_prior_commit() has a fast path
where it checks without locks if wakeup_subsequent_commits() has
already been called. This check was missing a memory barrier. The
waitee thread does two writes to variables `waitee' and
`wakeup_error', and if the waiting thread sees the first write it
_must_ also see the second or incorrect behaviour will occur. This
requires memory barriers between both the writes (release semantics)
and the reads (acquire semantics) of those two variables.
Other accesses to these variables are done under lock or where only
one thread will be accessing them, and can be done without barriers
(relaxed sematics).
---
sql/log.cc | 19 +++++++++++++------
sql/sql_class.cc | 25 ++++++++++++++-----------
sql/sql_class.h | 17 ++++++++++++-----
3 files changed, 39 insertions(+), 22 deletions(-)
diff --git a/sql/log.cc b/sql/log.cc
index 4f51a9a9c17..a88d5147898 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -7467,8 +7467,10 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
*/
wfc= orig_entry->thd->wait_for_commit_ptr;
orig_entry->queued_by_other= false;
- if (wfc && wfc->waitee)
+ if (wfc && wfc->waitee.load(std::memory_order_acquire))
{
+ wait_for_commit *loc_waitee;
+
mysql_mutex_lock(&wfc->LOCK_wait_commit);
/*
Do an extra check here, this time safely under lock.
@@ -7480,10 +7482,10 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
before setting the flag, so there is no risk that we can queue ahead of
it.
*/
- if (wfc->waitee && !wfc->waitee->commit_started)
+ if ((loc_waitee= wfc->waitee.load(std::memory_order_relaxed)) &&
+ !loc_waitee->commit_started)
{
PSI_stage_info old_stage;
- wait_for_commit *loc_waitee;
/*
By setting wfc->opaque_pointer to our own entry, we mark that we are
@@ -7505,7 +7507,8 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
&wfc->LOCK_wait_commit,
&stage_waiting_for_prior_transaction_to_commit,
&old_stage);
- while ((loc_waitee= wfc->waitee) && !orig_entry->thd->check_killed(1))
+ while ((loc_waitee= wfc->waitee.load(std::memory_order_relaxed)) &&
+ !orig_entry->thd->check_killed(1))
mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
wfc->opaque_pointer= NULL;
DBUG_PRINT("info", ("After waiting for prior commit, queued_by_other=%d",
@@ -7523,14 +7526,18 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
do
{
mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
- } while (wfc->waitee);
+ } while (wfc->waitee.load(std::memory_order_relaxed));
}
else
{
/* We were killed, so remove us from the list of waitee. */
wfc->remove_from_list(&loc_waitee->subsequent_commits_list);
mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
- wfc->waitee= NULL;
+ /*
+ This is the thread clearing its own status, it is no longer on
+ the list of waiters. So no memory barriers are needed here.
+ */
+ wfc->waitee.store(NULL, std::memory_order_relaxed);
orig_entry->thd->EXIT_COND(&old_stage);
/* Interrupted by kill. */
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 4eab241232b..ca179a39dc1 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -7230,7 +7230,7 @@ wait_for_commit::reinit()
{
subsequent_commits_list= NULL;
next_subsequent_commit= NULL;
- waitee= NULL;
+ waitee.store(NULL, std::memory_order_relaxed);
opaque_pointer= NULL;
wakeup_error= 0;
wakeup_subsequent_commits_running= false;
@@ -7308,8 +7308,9 @@ wait_for_commit::wakeup(int wakeup_error)
*/
mysql_mutex_lock(&LOCK_wait_commit);
- waitee= NULL;
this->wakeup_error= wakeup_error;
+ /* Memory barrier to make wakeup_error visible to the waiter thread. */
+ waitee.store(NULL, std::memory_order_release);
/*
Note that it is critical that the mysql_cond_signal() here is done while
still holding the mutex. As soon as we release the mutex, the waiter might
@@ -7340,9 +7341,10 @@ wait_for_commit::wakeup(int wakeup_error)
void
wait_for_commit::register_wait_for_prior_commit(wait_for_commit *waitee)
{
- DBUG_ASSERT(!this->waitee /* No prior registration allowed */);
+ DBUG_ASSERT(!this->waitee.load(std::memory_order_relaxed)
+ /* No prior registration allowed */);
wakeup_error= 0;
- this->waitee= waitee;
+ this->waitee.store(waitee, std::memory_order_relaxed);
mysql_mutex_lock(&waitee->LOCK_wait_commit);
/*
@@ -7351,7 +7353,7 @@ wait_for_commit::register_wait_for_prior_commit(wait_for_commit *waitee)
see comments on wakeup_subsequent_commits2() for details.
*/
if (waitee->wakeup_subsequent_commits_running)
- this->waitee= NULL;
+ this->waitee.store(NULL, std::memory_order_relaxed);
else
{
/*
@@ -7381,7 +7383,8 @@ wait_for_commit::wait_for_prior_commit2(THD *thd)
thd->ENTER_COND(&COND_wait_commit, &LOCK_wait_commit,
&stage_waiting_for_prior_transaction_to_commit,
&old_stage);
- while ((loc_waitee= this->waitee) && likely(!thd->check_killed(1)))
+ while ((loc_waitee= this->waitee.load(std::memory_order_relaxed)) &&
+ likely(!thd->check_killed(1)))
mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit);
if (!loc_waitee)
{
@@ -7404,14 +7407,14 @@ wait_for_commit::wait_for_prior_commit2(THD *thd)
do
{
mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit);
- } while (this->waitee);
+ } while (this->waitee.load(std::memory_order_relaxed));
if (wakeup_error)
my_error(ER_PRIOR_COMMIT_FAILED, MYF(0));
goto end;
}
remove_from_list(&loc_waitee->subsequent_commits_list);
mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
- this->waitee= NULL;
+ this->waitee.store(NULL, std::memory_order_relaxed);
wakeup_error= thd->killed_errno();
if (!wakeup_error)
@@ -7513,7 +7516,7 @@ wait_for_commit::unregister_wait_for_prior_commit2()
wait_for_commit *loc_waitee;
mysql_mutex_lock(&LOCK_wait_commit);
- if ((loc_waitee= this->waitee))
+ if ((loc_waitee= this->waitee.load(std::memory_order_relaxed)))
{
mysql_mutex_lock(&loc_waitee->LOCK_wait_commit);
if (loc_waitee->wakeup_subsequent_commits_running)
@@ -7526,7 +7529,7 @@ wait_for_commit::unregister_wait_for_prior_commit2()
See comments on wakeup_subsequent_commits2() for more details.
*/
mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
- while (this->waitee)
+ while (this->waitee.load(std::memory_order_relaxed))
mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit);
}
else
@@ -7534,7 +7537,7 @@ wait_for_commit::unregister_wait_for_prior_commit2()
/* Remove ourselves from the list in the waitee. */
remove_from_list(&loc_waitee->subsequent_commits_list);
mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
- this->waitee= NULL;
+ this->waitee.store(NULL, std::memory_order_relaxed);
}
}
wakeup_error= 0;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 72cb8148895..1c81739865b 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -20,6 +20,7 @@
/* Classes in mysql */
+#include <atomic>
#include "dur_prop.h"
#include <waiting_threads.h>
#include "sql_const.h"
@@ -2018,7 +2019,7 @@ struct wait_for_commit
/*
The LOCK_wait_commit protects the fields subsequent_commits_list and
wakeup_subsequent_commits_running (for a waitee), and the pointer
- waiterr and associated COND_wait_commit (for a waiter).
+ waitee and associated COND_wait_commit (for a waiter).
*/
mysql_mutex_t LOCK_wait_commit;
mysql_cond_t COND_wait_commit;
@@ -2032,8 +2033,14 @@ struct wait_for_commit
When this is cleared for wakeup, the COND_wait_commit condition is
signalled.
+
+ This pointer is protected by LOCK_wait_commit. But there is also a "fast
+ path" where the waiter compares this to NULL without holding the lock.
+ Such read must be done with acquire semantics (and all corresponding
+ writes done with release semantics). This ensures that a wakeup with error
+ is reliably detected as (waitee==NULL && wakeup_error != 0).
*/
- wait_for_commit *waitee;
+ std::atomic<wait_for_commit *> waitee;
/*
Generic pointer for use by the transaction coordinator to optimise the
waiting for improved group commit.
@@ -2068,7 +2075,7 @@ struct wait_for_commit
Quick inline check, to avoid function call and locking in the common case
where no wakeup is registered, or a registered wait was already signalled.
*/
- if (waitee)
+ if (waitee.load(std::memory_order_acquire))
return wait_for_prior_commit2(thd);
else
{
@@ -2096,7 +2103,7 @@ struct wait_for_commit
}
void unregister_wait_for_prior_commit()
{
- if (waitee)
+ if (waitee.load(std::memory_order_relaxed))
unregister_wait_for_prior_commit2();
else
wakeup_error= 0;
@@ -2118,7 +2125,7 @@ struct wait_for_commit
}
next_ptr_ptr= &cur->next_subsequent_commit;
}
- waitee= NULL;
+ waitee.store(NULL, std::memory_order_relaxed);
}
void wakeup(int wakeup_error);
1
0

[Commits] 4720db9441f: MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
by varunraiko1803ï¼ gmail.com 24 Sep '19
by varunraiko1803ï¼ gmail.com 24 Sep '19
24 Sep '19
revision-id: 4720db9441fe9af7a69b59f2b097039ba256741e (mariadb-10.3.0-348-g4720db9441f)
parent(s): c9ad134e56cc70119aaab8c8ac60c837fbb98dac
author: Varun Gupta
committer: Varun Gupta
timestamp: 2018-01-05 04:53:54 +0530
message:
MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
When we have the optimization orderby_uses_equalities ON then for semi-join materialised tables
we get wrong results when these table is involved in filesort. This is due to the reason that
the fields that are referenced are the ones from the semi-join materialised table and not from the
original table
The fix is to have the fields refer to the original table. So a copy_back technique is implemented
for rr_sequential functions, which reads data from semi-join materialised tables and copies that back
to the original table fields.
So we need to extend this technique to other rr_* functions which are used to read the sorted data
after filesort is performed
---
mysql-test/r/order_by.result | 51 ++++++++++++++++++++++++++++++++++++-
mysql-test/r/order_by_innodb.result | 16 ++++++------
mysql-test/t/order_by.test | 47 ++++++++++++++++++++++++++++++++++
sql/records.cc | 20 +++++++++++++++
sql/records.h | 5 ++--
sql/sql_select.cc | 46 ++++++++++++++++++---------------
6 files changed, 154 insertions(+), 31 deletions(-)
diff --git a/mysql-test/r/order_by.result b/mysql-test/r/order_by.result
index 946ecb51426..b45eb6481a0 100644
--- a/mysql-test/r/order_by.result
+++ b/mysql-test/r/order_by.result
@@ -3200,10 +3200,59 @@ WHERE books.library_id = 8663 AND
books.scheduled_for_removal=0 )
ORDER BY wings.id;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 1 100.00 Using temporary; Using filesort
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 1 100.00 Using filesort
1 PRIMARY wings eq_ref PRIMARY PRIMARY 4 test.books.wings_id 1 100.00
2 MATERIALIZED books ref library_idx library_idx 4 const 1 100.00 Using where
Warnings:
Note 1003 select `test`.`wings`.`id` AS `wing_id`,`test`.`wings`.`department_id` AS `department_id` from `test`.`wings` semi join (`test`.`books`) where `test`.`books`.`library_id` = 8663 and `test`.`books`.`scheduled_for_removal` = 0 and `test`.`wings`.`id` = `test`.`books`.`wings_id` order by `test`.`wings`.`id`
set optimizer_switch= @save_optimizer_switch;
DROP TABLE books, wings;
+#
+# Wrong result upon GROUP BY with orderby_uses_equalities=on
+#
+CREATE TABLE person (
+PersonID MEDIUMINT(8) UNSIGNED AUTO_INCREMENT,
+PRIMARY KEY (PersonID)
+) ;
+CREATE TABLE percat (
+PersonID MEDIUMINT(8) DEFAULT 0,
+CategoryID MEDIUMINT(8) DEFAULT 0,
+PRIMARY KEY (PersonID, CategoryID),
+INDEX (CategoryID)
+) ;
+CREATE TABLE action (
+PersonID MEDIUMINT(8) UNSIGNED DEFAULT 0,
+ActionTypeID MEDIUMINT(8) UNSIGNED DEFAULT 0,
+INDEX (PersonID),
+INDEX (ActionTypeID)
+) ;
+INSERT INTO person (PersonID) VALUES
+(58),(96),(273),(352);
+INSERT INTO percat VALUES
+(58,9),(273,1),(273,9),(273,14),(352,1),(352,13);
+INSERT INTO action (PersonID, ActionTypeID) VALUES
+(58,3),(96,3),(273,3),(352,3);
+SELECT person.PersonID,
+GROUP_CONCAT(CategoryID ORDER BY CategoryID SEPARATOR ',') AS categories
+FROM person LEFT JOIN percat ON person.PersonID=percat.PersonID
+WHERE person.PersonID IN (SELECT PersonID FROM action WHERE ActionTypeID=3)
+GROUP BY person.PersonID;
+PersonID categories
+58 9
+96 NULL
+273 1,9,14
+352 1,13
+SET @save_optimizer_switch=@@optimizer_switch;
+SET optimizer_switch='orderby_uses_equalities=off';
+SELECT person.PersonID,
+GROUP_CONCAT(CategoryID ORDER BY CategoryID SEPARATOR ',') AS categories
+FROM person LEFT JOIN percat ON person.PersonID=percat.PersonID
+WHERE person.PersonID IN (SELECT PersonID FROM action WHERE ActionTypeID=3)
+GROUP BY person.PersonID;
+PersonID categories
+58 9
+96 NULL
+273 1,9,14
+352 1,13
+set optimizer_switch= @save_optimizer_switch;
+DROP TABLE action, percat, person;
diff --git a/mysql-test/r/order_by_innodb.result b/mysql-test/r/order_by_innodb.result
index 3ff1f92e94a..102323a9cbc 100644
--- a/mysql-test/r/order_by_innodb.result
+++ b/mysql-test/r/order_by_innodb.result
@@ -99,25 +99,25 @@ SELECT i,n
FROM t1 INNER JOIN t2 USING (i,j) LEFT JOIN t3 USING (j)
WHERE i IN (SELECT i FROM t1 WHERE z=1) AND z=0 ORDER BY i;
i n
-188 eight
-218 eight
-338 four
-409 seven
466 eight
469 eight
498 eight
656 eight
-SELECT i,n
-FROM t1 x INNER JOIN t2 USING (i,j) LEFT JOIN t3 USING (j)
-WHERE EXISTS (SELECT * FROM t1 WHERE i=x.i AND z=1) AND z=0 ORDER BY i;
-i n
188 eight
218 eight
338 four
409 seven
+SELECT i,n
+FROM t1 x INNER JOIN t2 USING (i,j) LEFT JOIN t3 USING (j)
+WHERE EXISTS (SELECT * FROM t1 WHERE i=x.i AND z=1) AND z=0 ORDER BY i;
+i n
466 eight
469 eight
498 eight
656 eight
+188 eight
+218 eight
+338 four
+409 seven
set optimizer_switch= @save_optimizer_switch;
DROP TABLE t1,t2,t3;
diff --git a/mysql-test/t/order_by.test b/mysql-test/t/order_by.test
index 914911648b2..38537af835b 100644
--- a/mysql-test/t/order_by.test
+++ b/mysql-test/t/order_by.test
@@ -2149,3 +2149,50 @@ eval explain extended $q;
set optimizer_switch= @save_optimizer_switch;
DROP TABLE books, wings;
+
+--echo #
+--echo # Wrong result upon GROUP BY with orderby_uses_equalities=on
+--echo #
+
+CREATE TABLE person (
+ PersonID MEDIUMINT(8) UNSIGNED AUTO_INCREMENT,
+ PRIMARY KEY (PersonID)
+) ;
+
+CREATE TABLE percat (
+ PersonID MEDIUMINT(8) DEFAULT 0,
+ CategoryID MEDIUMINT(8) DEFAULT 0,
+ PRIMARY KEY (PersonID, CategoryID),
+ INDEX (CategoryID)
+) ;
+
+CREATE TABLE action (
+ PersonID MEDIUMINT(8) UNSIGNED DEFAULT 0,
+ ActionTypeID MEDIUMINT(8) UNSIGNED DEFAULT 0,
+ INDEX (PersonID),
+ INDEX (ActionTypeID)
+) ;
+
+INSERT INTO person (PersonID) VALUES (58),(96),(273),(352);
+
+INSERT INTO percat VALUES (58,9),(273,1),(273,9),(273,14),(352,1),(352,13);
+
+INSERT INTO action (PersonID, ActionTypeID) VALUES (58,3),(96,3),(273,3),(352,3);
+
+SELECT person.PersonID,
+GROUP_CONCAT(CategoryID ORDER BY CategoryID SEPARATOR ',') AS categories
+FROM person LEFT JOIN percat ON person.PersonID=percat.PersonID
+WHERE person.PersonID IN (SELECT PersonID FROM action WHERE ActionTypeID=3)
+GROUP BY person.PersonID;
+
+SET @save_optimizer_switch=@@optimizer_switch;
+SET optimizer_switch='orderby_uses_equalities=off';
+
+SELECT person.PersonID,
+GROUP_CONCAT(CategoryID ORDER BY CategoryID SEPARATOR ',') AS categories
+FROM person LEFT JOIN percat ON person.PersonID=percat.PersonID
+WHERE person.PersonID IN (SELECT PersonID FROM action WHERE ActionTypeID=3)
+GROUP BY person.PersonID;
+
+set optimizer_switch= @save_optimizer_switch;
+DROP TABLE action, percat, person;
diff --git a/sql/records.cc b/sql/records.cc
index 650a51f7f37..f29554c59ef 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -49,6 +49,18 @@ static int rr_index_last(READ_RECORD *info);
static int rr_index(READ_RECORD *info);
static int rr_index_desc(READ_RECORD *info);
+static int rr_read_record_and_unpack(READ_RECORD *info)
+{
+ int error;
+ if ((error= info->read_record_func_and_unpack_calls(info)))
+ return error;
+
+ for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+
+ return error;
+}
+
/**
Initialize READ_RECORD structure to perform full index scan in desired
@@ -189,6 +201,7 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
{
IO_CACHE *tempfile;
SORT_ADDON_FIELD *addon_field= filesort ? filesort->addon_field : 0;
+ bool need_unpacking= info->need_unpacking;
DBUG_ENTER("init_read_record");
bzero((char*) info,sizeof(*info));
@@ -308,6 +321,13 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
(void) table->file->extra_opt(HA_EXTRA_CACHE,
thd->variables.read_buff_size);
}
+
+ if (need_unpacking)
+ {
+ info->read_record_func_and_unpack_calls= info->read_record_func;
+ info->read_record_func = rr_read_record_and_unpack;
+ }
+
/* Condition pushdown to storage engine */
if ((table->file->ha_table_flags() & HA_CAN_TABLE_CONDITION_PUSHDOWN) &&
select && select->cond &&
diff --git a/sql/records.h b/sql/records.h
index 940c88ca0c7..ee923297ef9 100644
--- a/sql/records.h
+++ b/sql/records.h
@@ -56,6 +56,7 @@ struct READ_RECORD
TABLE **forms; /* head and ref forms */
Unlock_row_func unlock_row;
Read_func read_record_func;
+ Read_func read_record_func_and_unpack_calls;
THD *thd;
SQL_SELECT *select;
uint cache_records;
@@ -67,7 +68,7 @@ struct READ_RECORD
uchar *cache,*cache_pos,*cache_end,*read_positions;
struct st_sort_addon_field *addon_field; /* Pointer to the fields info */
struct st_io_cache *io_cache;
- bool print_error, ignore_not_found_rows;
+ bool print_error, ignore_not_found_rows, need_unpacking;
void (*unpack)(struct st_sort_addon_field *, uchar *, uchar *);
int read_record() { return read_record_func(this); }
@@ -79,7 +80,7 @@ struct READ_RECORD
Copy_field *copy_field;
Copy_field *copy_field_end;
public:
- READ_RECORD() : table(NULL), cache(NULL) {}
+ READ_RECORD() : table(NULL), cache(NULL), need_unpacking(false) {}
~READ_RECORD() { end_read_record(this); }
};
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 8aed093af7f..98734715efe 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -12696,20 +12696,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
can be used without tmp. table.
*/
bool can_subst_to_first_table= false;
- bool first_is_in_sjm_nest= false;
- if (first_is_base_table)
- {
- TABLE_LIST *tbl_for_first=
- join->join_tab[join->const_tables].table->pos_in_table_list;
- first_is_in_sjm_nest= tbl_for_first->sj_mat_info &&
- tbl_for_first->sj_mat_info->is_used;
- }
/*
- Currently we do not employ the optimization that uses multiple
- equalities for ORDER BY to remove tmp table in the case when
- the first table happens to be the result of materialization of
- a semi-join nest ( <=> first_is_in_sjm_nest == true).
-
When a semi-join nest is materialized and scanned to look for
possible matches in the remaining tables for every its row
the fields from the result of materialization are copied
@@ -12717,16 +12704,12 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
So these copies are used to access the remaining tables rather
than the fields from the result of materialization.
- Unfortunately now this so-called 'copy back' technique is
- supported only if the rows are scanned with the rr_sequential
- function, but not with other rr_* functions that are employed
+ Now this so-called 'copy back' technique is
+ supported for all rr_* functions that are employed
when the result of materialization is required to be sorted.
-
- TODO: either to support 'copy back' technique for the above case,
- or to get rid of this technique altogether.
*/
if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) &&
- first_is_base_table && !first_is_in_sjm_nest &&
+ first_is_base_table &&
order->item[0]->real_item()->type() == Item::FIELD_ITEM &&
join->cond_equal)
{
@@ -19693,6 +19676,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
int join_init_read_record(JOIN_TAB *tab)
{
+ Copy_field *save_copy, *save_copy_end;
+ TABLE_LIST *tbl_for_first;
/*
Note: the query plan tree for the below operations is constructed in
save_agg_explain_data.
@@ -19716,9 +19701,30 @@ int join_init_read_record(JOIN_TAB *tab)
tab->join->thd->reset_killed(););
if (!tab->preread_init_done && tab->preread_init())
return 1;
+
+ /*
+ Allow unpacking of field for semi-join materialised table when
+ they are involved in filesort
+ */
+
+ tbl_for_first= tab->table->pos_in_table_list;
+ tab->read_record.need_unpacking= tbl_for_first ? (tab->filesort &&
+ tbl_for_first->sj_mat_info &&
+ tbl_for_first->sj_mat_info->is_used) : FALSE;
+
+ /*
+ init_read_record resets all elements of tab->read_record().
+ Remember things that we don't want to have reset.
+ */
+ save_copy= tab->read_record.copy_field;
+ save_copy_end= tab->read_record.copy_field_end;
+
if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
tab->select, tab->filesort_result, 1,1, FALSE))
return 1;
+ tab->read_record.copy_field= save_copy;
+ tab->read_record.copy_field_end= save_copy_end;
+
return tab->read_record.read_record();
}
2
1

[Commits] e07caf401c2: MDEV-20645: Replication consistency is broken as workers miss the error notification from an earlier failed group.
by sujatha 23 Sep '19
by sujatha 23 Sep '19
23 Sep '19
revision-id: e07caf401c26cf8144899336d103e4c7aafd3d7a (mariadb-10.1.41-45-ge07caf401c2)
parent(s): 896974fc3d721aabe1afbf637a566cab856a731d
author: Sujatha
committer: Sujatha
timestamp: 2019-09-23 13:56:13 +0530
message:
MDEV-20645: Replication consistency is broken as workers miss the error notification from an earlier failed group.
Analysis:
========
In general if there are three groups.
1 - Inserts 32 which fails due to local entry '32' on slave.
2 - Inserts 33
3 - Inserts 34
Each group considers itself as a waiter and it waits for prior group 'waitee'.
This is done in 'register_wait_for_prior_event_group_commit'. If there is no
other parallel group being scheduled then no waitee will be there.
Let us assume 3 groups are being scheduled in parallel.
3-> waits for 2-> waits for->1
'1' upon completion it checks is there any registered subsequent waiter. If
so it wakes up the subsequent waiter with its execution status. This execution
status is stored in wakeup_error.
If '1' failed then it sends corresponding wakeup_error to 2. Then '2' aborts
and it propagates error to '3'. So all further commits are aborted. This
mechanism works only when all transactions reach a stage where they are
waiting for their prior commit to complete.
In case of optimistic following scenario occurs.
1,2,3 are scheduled in parallel.
3 - Reaches group_commit_code waits for 2 to complete.
1 - errors out sets stop_on_error_sub_id=1.
When a group execution results in error its corresponding sub_id is set to
'stop_on_error_sub_id'. Any new groups queued for execution will check if
their sub_id is > stop_on_error_sub_id. If it is true their execution will be
skipped as prior group execution failed. 'skip_event_group=1' will be set.
Since the execution of SQL thread is about to stop we just skip execution of
all the following event groups. We still do all the normal waiting and wakeup
processing between the event groups as a simple way to ensure that everything
is stopped and cleaned up correctly.
Upon error '1' transaction checks for registered waiters. Since no one is
there it simply goes away.
2 - Starts the execution. It checks do I have a waitee.
Since wait_commit_sub_id == entry->last_committed_sub_id no waitee is set.
Secondly: 'entry->stop_on_error_sub_id' is set by '1'st execution. Now
'handle_parallel_thread' code checks if the current group 'sub_id' is greater
than the 'sub_id' set within 'stop_on_error_sub_id'.
Since the above is true 'skip_event_group=true' is set. Simply call
'wait_for_prior_commit' to wakeup all waiters. Group '2' didn't had any
waitee and its execution is skipped. Hence its wakeup_error=0.It sends a
positive wakeup signal to '3'. Which commits. This results in a missed
transaction. i.e 33 is missed and 34 is committed.
Fix:
===
When a worker learns that an earlier transaction execution has failed, and it
should not proceed for further execution, it should mark its own execution
status as failed so that it alerts its followers to abort as well.
---
.../rpl_parallel_ignored_errors.result | 56 +++++++++++
.../rpl_parallel_ignored_errors.test | 1 +
.../rpl/include/rpl_parallel_ignored_errors.inc | 111 +++++++++++++++++++++
.../suite/rpl/r/rpl_parallel_ignored_errors.result | 56 +++++++++++
.../suite/rpl/t/rpl_parallel_ignored_errors.test | 1 +
sql/rpl_parallel.cc | 16 +++
6 files changed, 241 insertions(+)
diff --git a/mysql-test/suite/binlog_encryption/rpl_parallel_ignored_errors.result b/mysql-test/suite/binlog_encryption/rpl_parallel_ignored_errors.result
new file mode 100644
index 00000000000..13af480adf7
--- /dev/null
+++ b/mysql-test/suite/binlog_encryption/rpl_parallel_ignored_errors.result
@@ -0,0 +1,56 @@
+include/master-slave.inc
+[connection master]
+connection server_2;
+include/stop_slave.inc
+connection server_2;
+SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
+SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL slave_parallel_mode='optimistic';
+SET GLOBAL slave_parallel_threads= 3;
+CHANGE MASTER TO master_use_gtid=slave_pos;
+CALL mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends");
+include/start_slave.inc
+connection server_2;
+connection server_1;
+ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
+CREATE TABLE t1 (a int PRIMARY KEY) ENGINE=InnoDB;
+include/save_master_gtid.inc
+connection server_1;
+connection server_2;
+include/sync_with_master_gtid.inc
+connection server_2;
+connect con_temp2,127.0.0.1,root,,test,$SERVER_MYPORT_2,;
+BEGIN;
+INSERT INTO t1 VALUES (32);
+connection server_1;
+INSERT INTO t1 VALUES (32);
+connection server_2;
+SET GLOBAL debug_dbug="+d,hold_worker_on_schedule";
+connection server_1;
+SET gtid_seq_no=100;
+INSERT INTO t1 VALUES (33);
+connection server_2;
+SET debug_sync='now WAIT_FOR reached_pause';
+connection server_1;
+INSERT INTO t1 VALUES (34);
+connection server_2;
+connection con_temp2;
+COMMIT;
+connection server_2;
+include/stop_slave.inc
+connection server_2;
+include/assert.inc [table t1 should have zero rows where a>32]
+connection server_2;
+SELECT * FROM t1 WHERE a>32;
+a
+DELETE FROM t1 WHERE a=32;
+SET GLOBAL slave_parallel_threads=@old_parallel_threads;
+SET GLOBAL slave_parallel_mode=@old_parallel_mode;
+SET GLOBAL debug_dbug=@old_debug;
+SET DEBUG_SYNC= 'RESET';
+include/start_slave.inc
+connection server_2;
+connection server_1;
+DROP TABLE t1;
+include/rpl_end.inc
diff --git a/mysql-test/suite/binlog_encryption/rpl_parallel_ignored_errors.test b/mysql-test/suite/binlog_encryption/rpl_parallel_ignored_errors.test
new file mode 100644
index 00000000000..8a26778c8f2
--- /dev/null
+++ b/mysql-test/suite/binlog_encryption/rpl_parallel_ignored_errors.test
@@ -0,0 +1 @@
+--source suite/rpl/include/rpl_parallel_ignored_errors.inc
diff --git a/mysql-test/suite/rpl/include/rpl_parallel_ignored_errors.inc b/mysql-test/suite/rpl/include/rpl_parallel_ignored_errors.inc
new file mode 100644
index 00000000000..8f9a964f95e
--- /dev/null
+++ b/mysql-test/suite/rpl/include/rpl_parallel_ignored_errors.inc
@@ -0,0 +1,111 @@
+# ==== Purpose ====
+#
+# Test verifies that, in parallel replication, transaction failure notification
+# is propagated to all the workers. Workers should abort the execution of
+# transaction event groups, whose event positions are higher than the failing
+# transaction group.
+#
+# ==== Implementation ====
+#
+# Steps:
+# 0 - Create a table t1 on master which has a primary key. Enable parallel
+# replication on slave with slave_parallel_mode='optimistic' and
+# slave_parallel_threads=3.
+# 1 - On slave start a transaction and execute a local INSERT statement
+# which will insert value 32. This is done to block the INSERT coming
+# from master.
+# 2 - On master execute an INSERT statement with value 32, so that it is
+# blocked on slave.
+# 3 - On slave enable a debug sync point such that it holds the worker thread
+# execution as soon as work is scheduled to it.
+# 4 - INSERT value 33 on master. It will be held on slave by other worker
+# thread due to debug simulation.
+# 5 - INSERT value 34 on master.
+# 6 - On slave, enusre that INSERT 34 has reached a state where it waits for
+# its prior transactions to commit.
+# 7 - Commit the local INSERT 32 on slave server so that first worker will
+# error out.
+# 8 - Now send a continue signal to second worker processing 33. It should
+# wakeup and propagate the error to INSERT 34.
+# 9 - Upon slave stop due to error, check that no rows are found after the
+# failed INSERT 32.
+#
+# ==== References ====
+#
+# MDEV-20645: Replication consistency is broken as workers miss the error
+# notification from an earlier failed group.
+#
+
+--source include/have_innodb.inc
+--source include/have_debug.inc
+--source include/have_debug_sync.inc
+--source include/have_binlog_format_statement.inc
+--source include/master-slave.inc
+
+--enable_connect_log
+--connection server_2
+--source include/stop_slave.inc
+SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
+SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL slave_parallel_mode='optimistic';
+SET GLOBAL slave_parallel_threads= 3;
+CHANGE MASTER TO master_use_gtid=slave_pos;
+CALL mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends");
+--source include/start_slave.inc
+
+--connection server_1
+ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
+CREATE TABLE t1 (a int PRIMARY KEY) ENGINE=InnoDB;
+--source include/save_master_gtid.inc
+
+--connection server_2
+--source include/sync_with_master_gtid.inc
+
+--connect (con_temp2,127.0.0.1,root,,test,$SERVER_MYPORT_2,)
+BEGIN;
+INSERT INTO t1 VALUES (32);
+
+--connection server_1
+INSERT INTO t1 VALUES (32);
+
+--connection server_2
+--let $wait_condition= SELECT COUNT(*) = 1 FROM information_schema.processlist WHERE info like "INSERT INTO t1 VALUES (32)"
+--source include/wait_condition.inc
+SET GLOBAL debug_dbug="+d,hold_worker_on_schedule";
+
+--connection server_1
+SET gtid_seq_no=100;
+INSERT INTO t1 VALUES (33);
+
+--connection server_2
+SET debug_sync='now WAIT_FOR reached_pause';
+
+--connection server_1
+INSERT INTO t1 VALUES (34);
+
+--connection server_2
+--let $wait_condition= SELECT COUNT(*) = 1 FROM information_schema.processlist WHERE state like "Waiting for prior transaction to commit"
+--source include/wait_condition.inc
+--connection con_temp2
+COMMIT;
+
+# Clean up.
+--connection server_2
+--source include/stop_slave.inc
+--let $assert_cond= COUNT(*) = 0 FROM t1 WHERE a>32
+--let $assert_text= table t1 should have zero rows where a>32
+--source include/assert.inc
+SELECT * FROM t1 WHERE a>32;
+DELETE FROM t1 WHERE a=32;
+
+SET GLOBAL slave_parallel_threads=@old_parallel_threads;
+SET GLOBAL slave_parallel_mode=@old_parallel_mode;
+SET GLOBAL debug_dbug=@old_debug;
+SET DEBUG_SYNC= 'RESET';
+--source include/start_slave.inc
+
+--connection server_1
+DROP TABLE t1;
+--disable_connect_log
+--source include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_parallel_ignored_errors.result b/mysql-test/suite/rpl/r/rpl_parallel_ignored_errors.result
new file mode 100644
index 00000000000..13af480adf7
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_parallel_ignored_errors.result
@@ -0,0 +1,56 @@
+include/master-slave.inc
+[connection master]
+connection server_2;
+include/stop_slave.inc
+connection server_2;
+SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
+SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
+SET @old_dbug= @@GLOBAL.debug_dbug;
+SET GLOBAL slave_parallel_mode='optimistic';
+SET GLOBAL slave_parallel_threads= 3;
+CHANGE MASTER TO master_use_gtid=slave_pos;
+CALL mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends");
+include/start_slave.inc
+connection server_2;
+connection server_1;
+ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
+CREATE TABLE t1 (a int PRIMARY KEY) ENGINE=InnoDB;
+include/save_master_gtid.inc
+connection server_1;
+connection server_2;
+include/sync_with_master_gtid.inc
+connection server_2;
+connect con_temp2,127.0.0.1,root,,test,$SERVER_MYPORT_2,;
+BEGIN;
+INSERT INTO t1 VALUES (32);
+connection server_1;
+INSERT INTO t1 VALUES (32);
+connection server_2;
+SET GLOBAL debug_dbug="+d,hold_worker_on_schedule";
+connection server_1;
+SET gtid_seq_no=100;
+INSERT INTO t1 VALUES (33);
+connection server_2;
+SET debug_sync='now WAIT_FOR reached_pause';
+connection server_1;
+INSERT INTO t1 VALUES (34);
+connection server_2;
+connection con_temp2;
+COMMIT;
+connection server_2;
+include/stop_slave.inc
+connection server_2;
+include/assert.inc [table t1 should have zero rows where a>32]
+connection server_2;
+SELECT * FROM t1 WHERE a>32;
+a
+DELETE FROM t1 WHERE a=32;
+SET GLOBAL slave_parallel_threads=@old_parallel_threads;
+SET GLOBAL slave_parallel_mode=@old_parallel_mode;
+SET GLOBAL debug_dbug=@old_debug;
+SET DEBUG_SYNC= 'RESET';
+include/start_slave.inc
+connection server_2;
+connection server_1;
+DROP TABLE t1;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_parallel_ignored_errors.test b/mysql-test/suite/rpl/t/rpl_parallel_ignored_errors.test
new file mode 100644
index 00000000000..90f09a76546
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_parallel_ignored_errors.test
@@ -0,0 +1 @@
+--source include/rpl_parallel_ignored_errors.inc
diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc
index 8fef2d66635..73e52b19e94 100644
--- a/sql/rpl_parallel.cc
+++ b/sql/rpl_parallel.cc
@@ -228,6 +228,12 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
entry->stop_on_error_sub_id == (uint64)ULONGLONG_MAX)
entry->stop_on_error_sub_id= sub_id;
mysql_mutex_unlock(&entry->LOCK_parallel_entry);
+ DBUG_EXECUTE_IF("hold_worker_on_schedule", {
+ if (entry->stop_on_error_sub_id < (uint64)ULONGLONG_MAX)
+ {
+ debug_sync_set_action(thd, STRING_WITH_LEN("now SIGNAL continue_worker"));
+ }
+ });
if (rgi->killed_for_retry == rpl_group_info::RETRY_KILL_PENDING)
wait_for_pending_deadlock_kill(thd, rgi);
@@ -1096,6 +1102,13 @@ handle_rpl_parallel_thread(void *arg)
bool did_enter_cond= false;
PSI_stage_info old_stage;
+ DBUG_EXECUTE_IF("hold_worker_on_schedule", {
+ if (rgi->current_gtid.domain_id == 0 &&
+ rgi->current_gtid.seq_no == 100) {
+ debug_sync_set_action(thd,
+ STRING_WITH_LEN("now SIGNAL reached_pause WAIT_FOR continue_worker"));
+ }
+ });
DBUG_EXECUTE_IF("rpl_parallel_scheduled_gtid_0_x_100", {
if (rgi->current_gtid.domain_id == 0 &&
rgi->current_gtid.seq_no == 100) {
@@ -1137,7 +1150,10 @@ handle_rpl_parallel_thread(void *arg)
skip_event_group= do_gco_wait(rgi, gco, &did_enter_cond, &old_stage);
if (unlikely(entry->stop_on_error_sub_id <= rgi->wait_commit_sub_id))
+ {
skip_event_group= true;
+ rgi->worker_error= 1;
+ }
if (likely(!skip_event_group))
do_ftwrl_wait(rgi, &did_enter_cond, &old_stage);
1
0

[Commits] ba7725d: MDEV-20229 CTE defined with table value constructor cannot be used in views
by IgorBabaev 20 Sep '19
by IgorBabaev 20 Sep '19
20 Sep '19
revision-id: ba7725dace48d403187eb2a418a2081703fe5c9d (mariadb-10.3.18-16-gba7725d)
parent(s): 90a9c4cae74d2ef1008e3f216026b7fd2697e46b
author: Igor Babaev
committer: Igor Babaev
timestamp: 2019-09-20 15:59:54 -0700
message:
MDEV-20229 CTE defined with table value constructor cannot be used in views
A CTE can be defined as a table values constructor. In this case the CTE is
always materialized in a temporary table.
If the definition of the CTE contains a list of the names of the CTE
columns then the query expression that uses this CTE can refer to the CTE
columns by these names. Otherwise the names of the columns are taken from
the names of the columns in the result set of the query that specifies the
CTE.
Thus if the column names of a CTE are provided in the definition the
columns of result set should be renamed. In a general case renaming of
the columns is done in the select lists of the query specifying the CTE.
If a CTE is specified by a table value constructor then there are no such
select lists and renaming is actually done for the columns of the result
of materialization.
Now if a view is specified by a query expression that uses a CTE specified
by a table value constructor saving the column names of the CTE in the
stored view definition becomes critical: without these names the query
expression is not able to refer to the columns of the CTE.
This patch saves the given column names of CTEs in stored view definitions
that use them.
---
mysql-test/main/cte_nonrecursive.result | 4 ++--
mysql-test/main/cte_recursive.result | 6 +++---
mysql-test/main/table_value_constr.result | 13 +++++++++++++
mysql-test/main/table_value_constr.test | 10 ++++++++++
sql/sql_cte.cc | 16 ++++++++++++++++
sql/sql_union.cc | 1 +
6 files changed, 45 insertions(+), 5 deletions(-)
diff --git a/mysql-test/main/cte_nonrecursive.result b/mysql-test/main/cte_nonrecursive.result
index 69494d0..d6904b8 100644
--- a/mysql-test/main/cte_nonrecursive.result
+++ b/mysql-test/main/cte_nonrecursive.result
@@ -606,7 +606,7 @@ with t(c) as (select a from t1 where b >= 'c')
select * from t r1 where r1.c=4;
show create view v3;
View Create View character_set_client collation_connection
-v3 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v3` AS with t as (select `t1`.`a` AS `c` from `t1` where `t1`.`b` >= 'c')select `r1`.`c` AS `c` from `t` `r1` where `r1`.`c` = 4 latin1 latin1_swedish_ci
+v3 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v3` AS with t(c) as (select `t1`.`a` AS `c` from `t1` where `t1`.`b` >= 'c')select `r1`.`c` AS `c` from `t` `r1` where `r1`.`c` = 4 latin1 latin1_swedish_ci
select * from v3;
c
4
@@ -618,7 +618,7 @@ with t(c) as (select a from t1 where b >= 'c')
select * from t r1, t r2 where r1.c=r2.c and r2.c=4;
show create view v4;
View Create View character_set_client collation_connection
-v4 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v4` AS with t as (select `test`.`t1`.`a` AS `c` from `test`.`t1` where `test`.`t1`.`b` >= 'c')select `r1`.`c` AS `c`,`r2`.`c` AS `d` from (`t` `r1` join (select `test`.`t1`.`a` AS `c` from `test`.`t1` where `test`.`t1`.`b` >= 'c') `r2`) where `r1`.`c` = `r2`.`c` and `r2`.`c` = 4 latin1 latin1_swedish_ci
+v4 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v4` AS with t(c) as (select `test`.`t1`.`a` AS `c` from `test`.`t1` where `test`.`t1`.`b` >= 'c')select `r1`.`c` AS `c`,`r2`.`c` AS `d` from (`t` `r1` join (select `test`.`t1`.`a` AS `c` from `test`.`t1` where `test`.`t1`.`b` >= 'c') `r2`) where `r1`.`c` = `r2`.`c` and `r2`.`c` = 4 latin1 latin1_swedish_ci
select * from v4;
c d
4 4
diff --git a/mysql-test/main/cte_recursive.result b/mysql-test/main/cte_recursive.result
index f2ae992..9b2aa2b 100644
--- a/mysql-test/main/cte_recursive.result
+++ b/mysql-test/main/cte_recursive.result
@@ -699,7 +699,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
5 RECURSIVE UNION p ALL NULL NULL NULL NULL 12 100.00 Using where; Using join buffer (flat, BNL join)
NULL UNION RESULT <union3,4,5> ALL NULL NULL NULL NULL NULL NULL
Warnings:
-Note 1003 with recursive ancestor_couple_ids as (/* select#2 */ select `a`.`father` AS `h_id`,`a`.`mother` AS `w_id` from `coupled_ancestors` `a` where `a`.`father` is not null and `a`.`mother` is not null), coupled_ancestors as (/* select#3 */ select `test`.`folks`.`id` AS `id`,`test`.`folks`.`name` AS `name`,`test`.`folks`.`dob` AS `dob`,`test`.`folks`.`father` AS `father`,`test`.`folks`.`mother` AS `mother` from `test`.`folks` where `test`.`folks`.`name` = 'Me' union all /* select#4 */ select `test`.`p`.`id` AS `id`,`test`.`p`.`name` AS `name`,`test`.`p`.`dob` AS `dob`,`test`.`p`.`father` AS `father`,`test`.`p`.`mother` AS `mother` from `test`.`folks` `p` join `ancestor_couple_ids` `fa` where `test`.`p`.`id` = `fa`.`h_id` union all /* select#5 */ select `test`.`p`.`id` AS `id`,`test`.`p`.`name` AS `name`,`test`.`p`.`dob` AS `dob`,`test`.`p`.`father` AS `father`,`test`.`p`.`mother` AS `mother` from `test`.`folks` `p` join `ancestor_couple_ids` `ma` where `test`.`p`.`id` =
`ma`.`w_
id`)/* select#1 */ select `h`.`name` AS `name`,`h`.`dob` AS `dob`,`w`.`name` AS `name`,`w`.`dob` AS `dob` from `ancestor_couple_ids` `c` join `coupled_ancestors` `h` join `coupled_ancestors` `w` where `h`.`id` = `c`.`h_id` and `w`.`id` = `c`.`w_id`
+Note 1003 with recursive ancestor_couple_ids(h_id,w_id) as (/* select#2 */ select `a`.`father` AS `h_id`,`a`.`mother` AS `w_id` from `coupled_ancestors` `a` where `a`.`father` is not null and `a`.`mother` is not null), coupled_ancestors(id,name,dob,father,mother) as (/* select#3 */ select `test`.`folks`.`id` AS `id`,`test`.`folks`.`name` AS `name`,`test`.`folks`.`dob` AS `dob`,`test`.`folks`.`father` AS `father`,`test`.`folks`.`mother` AS `mother` from `test`.`folks` where `test`.`folks`.`name` = 'Me' union all /* select#4 */ select `test`.`p`.`id` AS `id`,`test`.`p`.`name` AS `name`,`test`.`p`.`dob` AS `dob`,`test`.`p`.`father` AS `father`,`test`.`p`.`mother` AS `mother` from `test`.`folks` `p` join `ancestor_couple_ids` `fa` where `test`.`p`.`id` = `fa`.`h_id` union all /* select#5 */ select `test`.`p`.`id` AS `id`,`test`.`p`.`name` AS `name`,`test`.`p`.`dob` AS `dob`,`test`.`p`.`father` AS `father`,`test`.`p`.`mother` AS `mother` from `test`.`folks` `p` join `ancestor_cou
ple_ids`
`ma` where `test`.`p`.`id` = `ma`.`w_id`)/* select#1 */ select `h`.`name` AS `name`,`h`.`dob` AS `dob`,`w`.`name` AS `name`,`w`.`dob` AS `dob` from `ancestor_couple_ids` `c` join `coupled_ancestors` `h` join `coupled_ancestors` `w` where `h`.`id` = `c`.`h_id` and `w`.`id` = `c`.`w_id`
# simple mutual recursion
with recursive
ancestor_couple_ids(h_id, w_id)
@@ -3091,7 +3091,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
4 DEPENDENT SUBQUERY <derived2> ALL NULL NULL NULL NULL 16 100.00 Using where
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL NULL
Warnings:
-Note 1003 with recursive destinations as (/* select#2 */ select `test`.`a`.`arrival` AS `city`,1 AS `legs` from `test`.`flights` `a` where `test`.`a`.`departure` = 'Cairo' union /* select#3 */ select `test`.`b`.`arrival` AS `arrival`,`r`.`legs` + 1 AS `r.legs + 1` from `destinations` `r` join `test`.`flights` `b` where `r`.`city` = `test`.`b`.`departure` and !<in_optimizer>(`test`.`b`.`arrival`,<exists>(/* select#4 */ select `destinations`.`city` from `destinations` where trigcond(`test`.`b`.`arrival` = `destinations`.`city` or `destinations`.`city` is null) having trigcond(`destinations`.`city` is null))))/* select#1 */ select `destinations`.`city` AS `city`,`destinations`.`legs` AS `legs` from `destinations`
+Note 1003 with recursive destinations(city,legs) as (/* select#2 */ select `test`.`a`.`arrival` AS `city`,1 AS `legs` from `test`.`flights` `a` where `test`.`a`.`departure` = 'Cairo' union /* select#3 */ select `test`.`b`.`arrival` AS `arrival`,`r`.`legs` + 1 AS `r.legs + 1` from `destinations` `r` join `test`.`flights` `b` where `r`.`city` = `test`.`b`.`departure` and !<in_optimizer>(`test`.`b`.`arrival`,<exists>(/* select#4 */ select `destinations`.`city` from `destinations` where trigcond(`test`.`b`.`arrival` = `destinations`.`city` or `destinations`.`city` is null) having trigcond(`destinations`.`city` is null))))/* select#1 */ select `destinations`.`city` AS `city`,`destinations`.`legs` AS `legs` from `destinations`
set standard_compliant_cte=default;
drop table flights;
#
@@ -3378,7 +3378,7 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
3 RECURSIVE UNION <derived2> ALL NULL NULL NULL NULL 2 100.00 Using where
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL NULL
Warnings:
-Note 1003 with recursive rcte as (/* select#2 */ select 1 AS `a` union /* select#3 */ select cast(`rcte`.`a` + 1 as unsigned) AS `cast(a+1 as unsigned)` from `rcte` where `rcte`.`a` < 10), cte1 as (/* select#4 */ select count(0) AS `c1` from `rcte` join `test`.`t1` where `rcte`.`a` between 3 and 5 and `test`.`t1`.`id` = `rcte`.`a` - 3), cte2 as (/* select#5 */ select count(0) AS `c2` from `rcte` join `test`.`t1` where `rcte`.`a` between 7 and 8 and `test`.`t1`.`id` = `rcte`.`a` - 7)/* select#1 */ select `cte1`.`c1` AS `c1`,`cte2`.`c2` AS `c2` from `cte1` join `cte2`
+Note 1003 with recursive rcte(a) as (/* select#2 */ select 1 AS `a` union /* select#3 */ select cast(`rcte`.`a` + 1 as unsigned) AS `cast(a+1 as unsigned)` from `rcte` where `rcte`.`a` < 10), cte1 as (/* select#4 */ select count(0) AS `c1` from `rcte` join `test`.`t1` where `rcte`.`a` between 3 and 5 and `test`.`t1`.`id` = `rcte`.`a` - 3), cte2 as (/* select#5 */ select count(0) AS `c2` from `rcte` join `test`.`t1` where `rcte`.`a` between 7 and 8 and `test`.`t1`.`id` = `rcte`.`a` - 7)/* select#1 */ select `cte1`.`c1` AS `c1`,`cte2`.`c2` AS `c2` from `cte1` join `cte2`
prepare stmt from "with recursive
rcte(a) as
(select 1 union select cast(a+1 as unsigned) from rcte where a < 10),
diff --git a/mysql-test/main/table_value_constr.result b/mysql-test/main/table_value_constr.result
index 6356d4e..03e378e 100644
--- a/mysql-test/main/table_value_constr.result
+++ b/mysql-test/main/table_value_constr.result
@@ -2586,3 +2586,16 @@ create view v1 as
union
( values (5), (7), (1), (3), (4) order by 2 limit 2 );
ERROR 42S22: Unknown column '2' in 'order clause'
+#
+# MDEV-20229: view defined as select using
+# CTE with named columns defined as TVC
+#
+create view v1 as with t(a) as (values (2), (1)) select a from t;
+show create view v1;
+View Create View character_set_client collation_connection
+v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS with t(a) as (values (2),(1))select `t`.`a` AS `a` from `t` latin1 latin1_swedish_ci
+select * from v1;
+a
+2
+1
+drop view v1;
diff --git a/mysql-test/main/table_value_constr.test b/mysql-test/main/table_value_constr.test
index 6b89816..4464eb7 100644
--- a/mysql-test/main/table_value_constr.test
+++ b/mysql-test/main/table_value_constr.test
@@ -1316,3 +1316,13 @@ create view v1 as
( values (5), (7), (1), (3), (4) limit 2 offset 1 )
union
( values (5), (7), (1), (3), (4) order by 2 limit 2 );
+
+--echo #
+--echo # MDEV-20229: view defined as select using
+--echo # CTE with named columns defined as TVC
+--echo #
+
+create view v1 as with t(a) as (values (2), (1)) select a from t;
+show create view v1;
+select * from v1;
+drop view v1;
diff --git a/sql/sql_cte.cc b/sql/sql_cte.cc
index 247d7e5..08d5e20 100644
--- a/sql/sql_cte.cc
+++ b/sql/sql_cte.cc
@@ -1430,6 +1430,22 @@ void With_clause::print(String *str, enum_query_type query_type)
void With_element::print(String *str, enum_query_type query_type)
{
str->append(query_name);
+ if (column_list.elements)
+ {
+ List_iterator_fast<LEX_CSTRING> li(column_list);
+ str->append('(');
+ for (LEX_CSTRING *col_name= li++; ; )
+ {
+ str->append(col_name);
+ col_name= li++;
+ if (!col_name)
+ {
+ str->append(')');
+ break;
+ }
+ str->append(',');
+ }
+ }
str->append(STRING_WITH_LEN(" as "));
str->append('(');
spec->print(str, query_type);
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index 6ab2619..c119f1e 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -1858,6 +1858,7 @@ bool st_select_lex_unit::cleanup()
DBUG_RETURN(FALSE);
}
}
+ columns_are_renamed= false;
cleaned= 1;
for (SELECT_LEX *sl= first_select(); sl; sl= sl->next_select())
1
0