[Commits] d48294d15c4: MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
by Varun 09 Oct '19
by Varun 09 Oct '19
09 Oct '19
revision-id: d48294d15c4895215d5facef97fc80c03cd6b4b0 (mariadb-10.4.4-341-gd48294d15c4)
parent(s): a340af922361e3958e5d6653c8b840771db282f2
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-09-28 13:06:44 +0530
message:
MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
For the case when the SJM scan table is the first table in the join order,
then if we want to do the sorting on the SJM scan table, then we need to
make sure that we unpack the values to base table fields in two cases:
1) Reading the SJM table and writing the sort-keys inside the sort-buffer
2) Reading the sorted data from the sort file
---
mysql-test/main/order_by.result | 138 +++++++++++++++++++++++++++++++++++++++-
mysql-test/main/order_by.test | 34 ++++++++++
sql/filesort.cc | 17 +++++
sql/opt_subselect.cc | 10 ++-
sql/records.cc | 13 ++++
sql/records.h | 1 +
sql/sql_select.cc | 89 ++++++++++----------------
sql/sql_select.h | 4 +-
sql/table.h | 1 +
9 files changed, 246 insertions(+), 61 deletions(-)
diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result
index b059cc686cd..e74583670fc 100644
--- a/mysql-test/main/order_by.result
+++ b/mysql-test/main/order_by.result
@@ -3322,7 +3322,7 @@ WHERE books.library_id = 8663 AND
books.scheduled_for_removal=0 )
ORDER BY wings.id;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using temporary; Using filesort
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using filesort
1 PRIMARY wings eq_ref PRIMARY PRIMARY 4 test.books.wings_id 1 100.00
2 MATERIALIZED books ref library_idx library_idx 4 const 2 100.00 Using where
Warnings:
@@ -3436,3 +3436,139 @@ Note 1003 select `test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` A
set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
optimizer_use_condition_selectivity=@tmp_o;
drop table t1,t2,t3,t4;
+#
+# MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
+#
+CREATE TABLE t1 (a INT, b int, primary key(a));
+CREATE TABLE t2 (a INT, b INT);
+INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
+(2354,7),(321421,3),(535,2),(4535,3);
+INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
+# Join order should have the SJM scan table as the first table for both
+# the queries with GROUP BY and ORDER BY clause.
+EXPLAIN SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort
+1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 Using index
+2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where
+EXPLAIN FORMAT=JSON SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "read_sorted_file": {
+ "filesort": {
+ "sort_key": "t1.a desc",
+ "table": {
+ "table_name": "<subquery2>",
+ "access_type": "ALL",
+ "possible_keys": ["distinct_key"],
+ "rows": 3,
+ "filtered": 100,
+ "materialized": {
+ "unique": 1,
+ "query_block": {
+ "select_id": 2,
+ "table": {
+ "table_name": "t2",
+ "access_type": "ALL",
+ "rows": 3,
+ "filtered": 100,
+ "attached_condition": "t2.b = 3 and t2.a is not null"
+ }
+ }
+ }
+ }
+ }
+ },
+ "table": {
+ "table_name": "t1",
+ "access_type": "eq_ref",
+ "possible_keys": ["PRIMARY"],
+ "key": "PRIMARY",
+ "key_length": "4",
+ "used_key_parts": ["a"],
+ "ref": ["test.t2.a"],
+ "rows": 1,
+ "filtered": 100,
+ "using_index": true
+ }
+ }
+}
+SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+a
+273
+96
+58
+EXPLAIN SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort
+1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1
+2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where
+EXPLAIN FORMAT=JSON SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "read_sorted_file": {
+ "filesort": {
+ "sort_key": "t1.a desc",
+ "table": {
+ "table_name": "<subquery2>",
+ "access_type": "ALL",
+ "possible_keys": ["distinct_key"],
+ "rows": 3,
+ "filtered": 100,
+ "materialized": {
+ "unique": 1,
+ "query_block": {
+ "select_id": 2,
+ "table": {
+ "table_name": "t2",
+ "access_type": "ALL",
+ "rows": 3,
+ "filtered": 100,
+ "attached_condition": "t2.b = 3 and t2.a is not null"
+ }
+ }
+ }
+ }
+ }
+ },
+ "table": {
+ "table_name": "t1",
+ "access_type": "eq_ref",
+ "possible_keys": ["PRIMARY"],
+ "key": "PRIMARY",
+ "key_length": "4",
+ "used_key_parts": ["a"],
+ "ref": ["test.t2.a"],
+ "rows": 1,
+ "filtered": 100
+ }
+ }
+}
+SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+a group_concat(t1.b)
+273 3
+96 2
+58 1
+DROP TABLE t1, t2;
diff --git a/mysql-test/main/order_by.test b/mysql-test/main/order_by.test
index 934c503302f..b3e43d27e2f 100644
--- a/mysql-test/main/order_by.test
+++ b/mysql-test/main/order_by.test
@@ -2276,3 +2276,37 @@ set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
optimizer_use_condition_selectivity=@tmp_o;
drop table t1,t2,t3,t4;
+
+
+--echo #
+--echo # MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
+--echo #
+
+CREATE TABLE t1 (a INT, b int, primary key(a));
+CREATE TABLE t2 (a INT, b INT);
+
+INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
+ (2354,7),(321421,3),(535,2),(4535,3);
+INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
+
+--echo # Join order should have the SJM scan table as the first table for both
+--echo # the queries with GROUP BY and ORDER BY clause.
+
+let $query= SELECT t1.a
+ FROM t1
+ WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ ORDER BY t1.a DESC;
+
+eval EXPLAIN $query;
+eval EXPLAIN FORMAT=JSON $query;
+eval $query;
+
+let $query= SELECT t1.a, group_concat(t1.b)
+ FROM t1
+ WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ GROUP BY t1.a DESC;
+
+eval EXPLAIN $query;
+eval EXPLAIN FORMAT=JSON $query;
+eval $query;
+DROP TABLE t1, t2;
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 3f4291cfb1f..e5c83293e9f 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -716,11 +716,21 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
*found_rows= 0;
ref_pos= &file->ref[0];
next_pos=ref_pos;
+ JOIN_TAB *tab= sort_form->reginfo.join_tab;
+ JOIN *join= tab ? tab->join : NULL;
+ bool first_is_in_sjm_nest= FALSE;
DBUG_EXECUTE_IF("show_explain_in_find_all_keys",
dbug_serve_apcs(thd, 1);
);
+ if (join && join->table_count != join->const_tables &&
+ (join->join_tab + join->const_tables == tab))
+ {
+ TABLE_LIST *tbl_for_first= sort_form->pos_in_table_list;
+ first_is_in_sjm_nest= tbl_for_first && tbl_for_first->is_sjm_scan_table();
+ }
+
if (!quick_select)
{
next_pos=(uchar*) 0; /* Find records in sequence */
@@ -756,13 +766,20 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
goto err;
}
+ if (first_is_in_sjm_nest)
+ sort_form->column_bitmaps_set(save_read_set, save_write_set);
+
DEBUG_SYNC(thd, "after_index_merge_phase1");
for (;;)
{
if (quick_select)
error= select->quick->get_next();
else /* Not quick-select */
+ {
error= file->ha_rnd_next(sort_form->record[0]);
+ if (first_is_in_sjm_nest)
+ tab->unpacking_to_base_table_fields();
+ }
if (unlikely(error))
break;
file->position(sort_form->record[0]);
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 87458357865..f837a6394af 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -4252,11 +4252,11 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab)
sjm_tab->type= JT_ALL;
/* Initialize full scan */
- sjm_tab->read_first_record= join_read_record_no_init;
+ sjm_tab->read_first_record= join_init_read_record;
sjm_tab->read_record.copy_field= sjm->copy_field;
sjm_tab->read_record.copy_field_end= sjm->copy_field +
sjm->sjm_table_cols.elements;
- sjm_tab->read_record.read_record_func= rr_sequential_and_unpack;
+ sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack;
}
sjm_tab->bush_children->end[-1].next_select= end_sj_materialize;
@@ -7105,3 +7105,9 @@ bool Item_in_subselect::pushdown_cond_for_in_subquery(THD *thd, Item *cond)
thd->lex->current_select= save_curr_select;
DBUG_RETURN(FALSE);
}
+
+
+bool TABLE_LIST::is_sjm_scan_table()
+{
+ return is_active_sjm() && sj_mat_info->is_sj_scan;
+}
diff --git a/sql/records.cc b/sql/records.cc
index 3d709182a4e..f6885f773d5 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -709,3 +709,16 @@ static int rr_cmp(uchar *a,uchar *b)
return (int) a[7] - (int) b[7];
#endif
}
+
+
+int read_record_func_for_rr_and_unpack(READ_RECORD *info)
+{
+ int error;
+ if ((error= info->read_record_func_and_unpack_calls(info)))
+ return error;
+
+ for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+
+ return error;
+}
diff --git a/sql/records.h b/sql/records.h
index faf0d13c9a9..037a06b9d34 100644
--- a/sql/records.h
+++ b/sql/records.h
@@ -55,6 +55,7 @@ struct READ_RECORD
TABLE *table; /* Head-form */
Unlock_row_func unlock_row;
Read_func read_record_func;
+ Read_func read_record_func_and_unpack_calls;
THD *thd;
SQL_SELECT *select;
uint ref_length, reclength, rec_cache_size, error_offset;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 36d9eda3383..28bc57c692f 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -14015,37 +14015,8 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
can be used without tmp. table.
*/
bool can_subst_to_first_table= false;
- bool first_is_in_sjm_nest= false;
- if (first_is_base_table)
- {
- TABLE_LIST *tbl_for_first=
- join->join_tab[join->const_tables].table->pos_in_table_list;
- first_is_in_sjm_nest= tbl_for_first->sj_mat_info &&
- tbl_for_first->sj_mat_info->is_used;
- }
- /*
- Currently we do not employ the optimization that uses multiple
- equalities for ORDER BY to remove tmp table in the case when
- the first table happens to be the result of materialization of
- a semi-join nest ( <=> first_is_in_sjm_nest == true).
-
- When a semi-join nest is materialized and scanned to look for
- possible matches in the remaining tables for every its row
- the fields from the result of materialization are copied
- into the record buffers of tables from the semi-join nest.
- So these copies are used to access the remaining tables rather
- than the fields from the result of materialization.
-
- Unfortunately now this so-called 'copy back' technique is
- supported only if the rows are scanned with the rr_sequential
- function, but not with other rr_* functions that are employed
- when the result of materialization is required to be sorted.
-
- TODO: either to support 'copy back' technique for the above case,
- or to get rid of this technique altogether.
- */
if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) &&
- first_is_base_table && !first_is_in_sjm_nest &&
+ first_is_base_table &&
order->item[0]->real_item()->type() == Item::FIELD_ITEM &&
join->cond_equal)
{
@@ -19922,19 +19893,6 @@ do_select(JOIN *join, Procedure *procedure)
}
-int rr_sequential_and_unpack(READ_RECORD *info)
-{
- int error;
- if (unlikely((error= rr_sequential(info))))
- return error;
-
- for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
- (*cp->do_copy)(cp);
-
- return error;
-}
-
-
/**
@brief
Instantiates temporary table
@@ -21223,6 +21181,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
int join_init_read_record(JOIN_TAB *tab)
{
+ bool need_unpacking= FALSE;
+ JOIN *join= tab->join;
/*
Note: the query plan tree for the below operations is constructed in
save_agg_explain_data.
@@ -21232,6 +21192,12 @@ int join_init_read_record(JOIN_TAB *tab)
if (tab->filesort && tab->sort_table()) // Sort table.
return 1;
+ if (join->top_join_tab_count != join->const_tables)
+ {
+ TABLE_LIST *tbl= tab->table->pos_in_table_list;
+ need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE;
+ }
+
tab->build_range_rowid_filter_if_needed();
DBUG_EXECUTE_IF("kill_join_init_read_record",
@@ -21249,16 +21215,6 @@ int join_init_read_record(JOIN_TAB *tab)
if (!tab->preread_init_done && tab->preread_init())
return 1;
-
- if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
- tab->select, tab->filesort_result, 1,1, FALSE))
- return 1;
- return tab->read_record.read_record();
-}
-
-int
-join_read_record_no_init(JOIN_TAB *tab)
-{
Copy_field *save_copy, *save_copy_end;
/*
@@ -21268,12 +21224,20 @@ join_read_record_no_init(JOIN_TAB *tab)
save_copy= tab->read_record.copy_field;
save_copy_end= tab->read_record.copy_field_end;
- init_read_record(&tab->read_record, tab->join->thd, tab->table,
- tab->select, tab->filesort_result, 1, 1, FALSE);
+
+ if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
+ tab->select, tab->filesort_result, 1, 1, FALSE))
+ return 1;
tab->read_record.copy_field= save_copy;
tab->read_record.copy_field_end= save_copy_end;
- tab->read_record.read_record_func= rr_sequential_and_unpack;
+
+ if (need_unpacking)
+ {
+ tab->read_record.read_record_func_and_unpack_calls=
+ tab->read_record.read_record_func;
+ tab->read_record.read_record_func = read_record_func_for_rr_and_unpack;
+ }
return tab->read_record.read_record();
}
@@ -28981,6 +28945,19 @@ void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join,
}
+/*
+ @brief
+ Unpacking temp table fields to base table fields.
+*/
+
+void JOIN_TAB::unpacking_to_base_table_fields()
+{
+ for (Copy_field *cp= read_record.copy_field;
+ cp != read_record.copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+}
+
+
/**
@} (end of group Query_Optimizer)
*/
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 4f7bf49f635..545d4a788cc 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -223,7 +223,7 @@ typedef enum_nested_loop_state
(*Next_select_func)(JOIN *, struct st_join_table *, bool);
Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab);
int rr_sequential(READ_RECORD *info);
-int rr_sequential_and_unpack(READ_RECORD *info);
+int read_record_func_for_rr_and_unpack(READ_RECORD *info);
Item *remove_pushed_top_conjuncts(THD *thd, Item *cond);
Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond,
COND_EQUAL **cond_eq,
@@ -676,6 +676,7 @@ typedef struct st_join_table {
table_map remaining_tables);
bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
bool is_const_table);
+ void unpacking_to_base_table_fields();
} JOIN_TAB;
@@ -2352,7 +2353,6 @@ create_virtual_tmp_table(THD *thd, Field *field)
int test_if_item_cache_changed(List<Cached_item> &list);
int join_init_read_record(JOIN_TAB *tab);
-int join_read_record_no_init(JOIN_TAB *tab);
void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
inline Item * and_items(THD *thd, Item* cond, Item *item)
{
diff --git a/sql/table.h b/sql/table.h
index 1a7e5fbd4dc..35ba9bbb95d 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -2622,6 +2622,7 @@ struct TABLE_LIST
*/
const char *get_table_name() const { return view != NULL ? view_name.str : table_name.str; }
bool is_active_sjm();
+ bool is_sjm_scan_table();
bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); }
st_select_lex_unit *get_unit();
st_select_lex *get_single_select();
2
1
[Commits] 6dc3fa2e054: Support Create_time and Update_time in MyRocks table status
by psergey 06 Oct '19
by psergey 06 Oct '19
06 Oct '19
revision-id: 6dc3fa2e054d999a12cd5d7aaf637e4ac7c3f149 (fb-prod201903-144-g6dc3fa2e054)
parent(s): d97c0c628e5dc60abd725f6a7120a8d87b09321e
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-06 23:48:49 +0300
message:
Support Create_time and Update_time in MyRocks table status
(variant #4, with @@rocksdb_table_dictionary_format)
- Create_time is stored in the MyRocks' internal data dictionary.
- Update_time is in-memory only (like in InnoDB).
@@rocksdb_table_dictionary_format is a global read-only variable (set it
from my.cnf) which controls the on-disk data format.
rocksdb_table_dictionary_format=1 means use the same data format is
before. Create_time will always be NULL for all tables that are created.
rocksdb_table_dictionary_format=2 (the default) means use the newer data
format. All newly-created tables will have proper Create_time attribute.
Downgrades are only possible if one hasn't run any DDL that re-creates a
table.
---
mysql-test/suite/rocksdb/include/bulk_load.inc | 4 +-
.../suite/rocksdb/include/bulk_load_unsorted.inc | 4 +-
mysql-test/suite/rocksdb/r/bulk_load.result | 12 +--
mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result | 12 +--
.../rocksdb/r/bulk_load_rev_cf_and_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_rev_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted_rev.result | 12 +--
mysql-test/suite/rocksdb/r/issue255.result | 16 ++--
mysql-test/suite/rocksdb/r/rocksdb.result | 7 +-
.../r/rocksdb_table_dictionary_format.result | 37 ++++++++++
.../suite/rocksdb/r/show_table_status.result | 85 +++++++++++++++++++++-
mysql-test/suite/rocksdb/r/truncate_table.result | 8 +-
mysql-test/suite/rocksdb/t/issue255.test | 17 +++--
mysql-test/suite/rocksdb/t/rocksdb.test | 4 +-
.../rocksdb/t/rocksdb_table_dictionary_format.test | 67 +++++++++++++++++
mysql-test/suite/rocksdb/t/show_table_status.test | 81 ++++++++++++++++++++-
mysql-test/suite/rocksdb/t/truncate_table.test | 8 +-
.../r/rocksdb_table_dictionary_format_basic.result | 14 ++++
.../t/rocksdb_table_dictionary_format_basic.test | 16 ++++
storage/rocksdb/ha_rocksdb.cc | 53 ++++++++++++++
storage/rocksdb/ha_rocksdb.h | 1 +
storage/rocksdb/rdb_datadic.cc | 57 ++++++++++++---
storage/rocksdb/rdb_datadic.h | 36 ++++++++-
24 files changed, 502 insertions(+), 85 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/bulk_load.inc b/mysql-test/suite/rocksdb/include/bulk_load.inc
index 1b79825e507..7e163602202 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load.inc
@@ -121,12 +121,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
index 5cdc76a32d4..812af0401aa 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
@@ -119,12 +119,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/r/bulk_load.result b/mysql-test/suite/rocksdb/r/bulk_load.result
index a36f99a7619..76db28e66bd 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
index b5d3e252c5d..ae363f7ec0c 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
index f46acd41080..dd8dd7e60a8 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
index 3389968ef37..96738ae62e2 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
index 924032549ac..87fc63af2da 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
index 3cc9fb8e459..8e0914f0159 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/issue255.result b/mysql-test/suite/rocksdb/r/issue255.result
index c1ce3be2276..b45b3b5afc7 100644
--- a/mysql-test/suite/rocksdb/r/issue255.result
+++ b/mysql-test/suite/rocksdb/r/issue255.result
@@ -2,7 +2,7 @@ CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ('538647864786478647864');
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -12,7 +12,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -21,7 +21,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -30,13 +30,13 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES (1000);
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -46,7 +46,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -55,7 +55,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -64,5 +64,5 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result
index 088eb050f6f..c76f31afaab 100644
--- a/mysql-test/suite/rocksdb/r/rocksdb.result
+++ b/mysql-test/suite/rocksdb/r/rocksdb.result
@@ -979,6 +979,7 @@ rocksdb_store_row_debug_checksums OFF
rocksdb_strict_collation_check OFF
rocksdb_strict_collation_exceptions
rocksdb_table_cache_numshardbits 6
+rocksdb_table_dictionary_format 2
rocksdb_table_stats_background_thread_nice_value 19
rocksdb_table_stats_max_num_rows_scanned 0
rocksdb_table_stats_recalc_threshold_count 100
@@ -1417,7 +1418,7 @@ create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
show table status like 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 # # NULL latin1_swedish_ci NULL
drop table t1;
#
# Fix Issue #4: Crash when using pseudo-unique keys
@@ -2612,7 +2613,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
-1
@@ -2623,7 +2624,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
1
diff --git a/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result b/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result
new file mode 100644
index 00000000000..8f2fa96a04a
--- /dev/null
+++ b/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result
@@ -0,0 +1,37 @@
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+2
+#
+# Server restarted
+#
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+1
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+# Create_time will be NULL as the table doesn't support it
+# Update_time will be non-null
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time is not null
+NULL 1
+#
+# Server restarted
+#
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+2
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time is not null
+NULL 0
+set global rocksdb_compact_cf='default';
+select concat('00', '00', '00', '01', hex('test.t1'));
+concat('00', '00', '00', '01', hex('test.t1'))
+00000001746573742E7431
+Found the datadic entry
+Value has format version 1, followed by 8 bytes describing one index
+Done
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/r/show_table_status.result b/mysql-test/suite/rocksdb/r/show_table_status.result
index 29140f045e4..345882040ef 100644
--- a/mysql-test/suite/rocksdb/r/show_table_status.result
+++ b/mysql-test/suite/rocksdb/r/show_table_status.result
@@ -7,12 +7,12 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL
+t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL utf8_general_ci NULL
SHOW TABLE STATUS WHERE name LIKE 't2';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
DROP TABLE t1, t2, t3;
CREATE DATABASE `db_new..............................................end`;
USE `db_new..............................................end`;
@@ -22,3 +22,80 @@ SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE T
TABLE_SCHEMA db_new..............................................end
TABLE_NAME t1_new..............................................end
DROP DATABASE `db_new..............................................end`;
+#
+# MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+#
+use test;
+create table t1 (a int) engine=rocksdb;
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time check_time
+1 NULL NULL
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select sleep(3);
+sleep(3)
+0
+insert into t1 values (2);
+select
+create_time=@create_tm /* should not change */ ,
+timestampdiff(second, @update_tm, update_time) > 2,
+check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+timestampdiff(second, @update_tm, update_time) > 2 1
+check_time NULL
+#
+# Check how create_time survives ALTER TABLE.
+# First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+create_time<>@create_tm /* should change */,
+create_time IS NOT NULL,
+update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time<>@create_tm 1
+create_time IS NOT NULL 1
+update_time IS NULL 1
+insert into t1 values (5,5);
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+# Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+select
+create_time=@create_tm /* should not change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+update_time NULL
+#
+# Check what is left after server restart
+#
+# Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select
+create_time=(select create_time from t2) /* should change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=(select create_time from t2) 1
+update_time NULL
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/r/truncate_table.result b/mysql-test/suite/rocksdb/r/truncate_table.result
index 1544256f194..79b266a2453 100644
--- a/mysql-test/suite/rocksdb/r/truncate_table.result
+++ b/mysql-test/suite/rocksdb/r/truncate_table.result
@@ -9,19 +9,19 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 4 # # NULL latin1_swedish_ci NULL
TRUNCATE TABLE t1;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('d');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 2 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 2 # # NULL latin1_swedish_ci NULL
SELECT a,c FROM t1;
a c
1 d
diff --git a/mysql-test/suite/rocksdb/t/issue255.test b/mysql-test/suite/rocksdb/t/issue255.test
index 370dece0c6c..686f45b4056 100644
--- a/mysql-test/suite/rocksdb/t/issue255.test
+++ b/mysql-test/suite/rocksdb/t/issue255.test
@@ -3,24 +3,25 @@
CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES ('538647864786478647864');
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SELECT * FROM t1;
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
@@ -28,24 +29,24 @@ DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES (1000);
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb.test b/mysql-test/suite/rocksdb/t/rocksdb.test
index 5eff0fbf38f..7dcae569c92 100644
--- a/mysql-test/suite/rocksdb/t/rocksdb.test
+++ b/mysql-test/suite/rocksdb/t/rocksdb.test
@@ -1198,7 +1198,7 @@ drop table t1;
create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
---replace_column 7 #
+--replace_column 7 # 12 # 13 #
show table status like 't1';
drop table t1;
@@ -1903,11 +1903,13 @@ DROP TABLE t1;
# value is 4 while MyRocks will show it as 3.
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test b/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test
new file mode 100644
index 00000000000..3dedf6502ab
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test
@@ -0,0 +1,67 @@
+--source include/have_rocksdb.inc
+
+select @@rocksdb_table_dictionary_format;
+
+#
+# Check the upgrade from Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 to _2
+#
+
+--let $_mysqld_option=--rocksdb_table_dictionary_format=1
+--source include/restart_mysqld_with_option.inc
+
+--echo #
+--echo # Server restarted
+--echo #
+select @@rocksdb_table_dictionary_format;
+
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+
+--echo # Create_time will be NULL as the table doesn't support it
+--echo # Update_time will be non-null
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--let $_mysqld_option=--rocksdb_table_dictionary_format=2
+--source include/restart_mysqld_with_option.inc
+--echo #
+--echo # Server restarted
+--echo #
+select @@rocksdb_table_dictionary_format;
+
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+set global rocksdb_compact_cf='default';
+# use: $MYSQLTEST_VARDIR $MYSQL_SST_DUMP
+let MYSQL_DATADIR = `select @@datadir`;
+let MYSQL_SST_DUMP = $MYSQL_SST_DUMP;
+
+select concat('00', '00', '00', '01', hex('test.t1'));
+
+# We are looking to find a record like this:
+# '00000001746573742E7431' seq:0, type:1 => 00010000000000000104
+# that is:
+# '00000001:746573742E7431' seq:0, type:1 => 0001: 0000000000000104
+# the key is: DDL_ENTRY_INDEX_START_NUMBER, "test.t1"
+# the value is: DDL_ENTRY_INDEX_VERSION_1, {cf_id, index_id}
+perl;
+ my $datadir = $ENV{'MYSQL_DATADIR'};
+ my $sst_dump = $ENV{'MYSQL_SST_DUMP'};
+ open(IN, "$sst_dump --command=scan --output_hex --file=$datadir/.rocksdb |");
+ while(<IN>) {
+ if ($_ =~ /^\'00000001746573742E7431\'/) {
+ print "Found the datadic entry\n";
+ if ($_ =~ / 0001[0-9]{16}$/) {
+ print "Value has format version 1, followed by 8 bytes describing one index\n"
+ } else {
+ print "Unexpected value. $_\n";
+ }
+ }
+ }
+ print "Done\n";
+ close(IN);
+EOF
+drop table t1;
+
diff --git a/mysql-test/suite/rocksdb/t/show_table_status.test b/mysql-test/suite/rocksdb/t/show_table_status.test
index 29cc2ccfb5e..59effcc788c 100644
--- a/mysql-test/suite/rocksdb/t/show_table_status.test
+++ b/mysql-test/suite/rocksdb/t/show_table_status.test
@@ -24,7 +24,7 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
# Some statistics don't get updated as quickly. The Data_length and
@@ -48,7 +48,7 @@ set global rocksdb_force_flush_memtable_now = true;
# We expect the number of rows to be 10000. Data_len and Avg_row_len
# may vary, depending on built-in compression library.
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't2';
DROP TABLE t1, t2, t3;
@@ -62,3 +62,80 @@ CREATE TABLE `t1_new..............................................end`(a int) en
INSERT INTO `t1_new..............................................end` VALUES (1);
--query_vertical SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE TABLE_NAME = 't1_new..............................................end'
DROP DATABASE `db_new..............................................end`;
+--echo #
+--echo # MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+--echo #
+use test;
+create table t1 (a int) engine=rocksdb;
+
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+select sleep(3);
+insert into t1 values (2);
+
+--vertical_results
+select
+ create_time=@create_tm /* should not change */ ,
+ timestampdiff(second, @update_tm, update_time) > 2,
+ check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check how create_time survives ALTER TABLE.
+--echo # First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+ create_time<>@create_tm /* should change */,
+ create_time IS NOT NULL,
+ update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+insert into t1 values (5,5);
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo # Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+
+select
+ create_time=@create_tm /* should not change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check what is left after server restart
+--echo #
+
+--echo # Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--source include/restart_mysqld.inc
+
+select
+ create_time=(select create_time from t2) /* should change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/t/truncate_table.test b/mysql-test/suite/rocksdb/t/truncate_table.test
index a61488654a3..1001eeb6cde 100644
--- a/mysql-test/suite/rocksdb/t/truncate_table.test
+++ b/mysql-test/suite/rocksdb/t/truncate_table.test
@@ -29,22 +29,22 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
TRUNCATE TABLE t1;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('d');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--sorted_result
diff --git a/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result
new file mode 100644
index 00000000000..825d7a7113a
--- /dev/null
+++ b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT;
+SELECT @start_global_value;
+@start_global_value
+2
+"Trying to set variable @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT = 444;
+ERROR HY000: Variable 'rocksdb_table_dictionary_format' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test
new file mode 100644
index 00000000000..2b8c6e81bf7
--- /dev/null
+++ b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_TABLE_DICTIONARY_FORMAT
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index d6c89087bea..a0a344187cf 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -122,6 +122,8 @@ const std::string PER_INDEX_CF_NAME("$per_index_cf");
static std::vector<std::string> rdb_tables_to_recalc;
+uint rocksdb_table_dictionary_format;
+
class Rdb_explicit_snapshot : public explicit_snapshot {
public:
static std::shared_ptr<Rdb_explicit_snapshot> create(
@@ -2136,6 +2138,17 @@ static MYSQL_SYSVAR_ULONGLONG(
"MultiGet",
nullptr, nullptr, SIZE_T_MAX, /* min */ 0, /* max */ SIZE_T_MAX, 0);
+static MYSQL_SYSVAR_UINT(table_dictionary_format,
+ rocksdb_table_dictionary_format,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Max Table Dictionary format version that the server "
+ "may create (use lower values for backward "
+ " compatibility, higher values for new features)",
+ nullptr, nullptr,
+ ROCKSDB_DATADIC_FORMAT_DEFAULT,
+ ROCKSDB_DATADIC_FORMAT_INITIAL,
+ ROCKSDB_DATADIC_FORMAT_MAX, 0);
+
static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
static struct st_mysql_sys_var *rocksdb_system_variables[] = {
@@ -2309,6 +2322,8 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(select_bypass_log_rejected),
MYSQL_SYSVAR(select_bypass_debug_row_delay),
MYSQL_SYSVAR(select_bypass_multiget_min),
+
+ MYSQL_SYSVAR(table_dictionary_format),
nullptr};
static rocksdb::WriteOptions rdb_get_rocksdb_write_options(
@@ -2456,6 +2471,8 @@ class Rdb_transaction {
bool m_is_delayed_snapshot = false;
bool m_is_two_phase = false;
+ std::unordered_set<Rdb_tbl_def*> modified_tables;
+
private:
/*
Number of write operations this transaction had when we took the last
@@ -3139,6 +3156,25 @@ class Rdb_transaction {
virtual void start_tx() = 0;
virtual void start_stmt() = 0;
+ protected:
+ // Non-virtual functions with actions to be done on transaction start and
+ // commit.
+ void on_commit() {
+ time_t tm;
+ tm = time(nullptr);
+ for (auto &it : modified_tables) {
+ it->update_time = tm;
+ }
+ modified_tables.clear();
+ }
+ void on_rollback() {
+ modified_tables.clear();
+ }
+ public:
+ void log_table_write_op(Rdb_tbl_def *tbl) {
+ modified_tables.insert(tbl);
+ }
+
void set_initial_savepoint() {
/*
Set the initial savepoint. If the first statement in the transaction
@@ -3322,7 +3358,9 @@ class Rdb_transaction_impl : public Rdb_transaction {
goto error;
}
+ on_commit();
error:
+ on_rollback();
/* Save the transaction object to be reused */
release_tx();
@@ -3338,6 +3376,7 @@ class Rdb_transaction_impl : public Rdb_transaction {
public:
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -3659,7 +3698,9 @@ class Rdb_writebatch_impl : public Rdb_transaction {
res = true;
goto error;
}
+ on_commit();
error:
+ on_rollback();
reset();
m_write_count = 0;
@@ -3691,6 +3732,7 @@ class Rdb_writebatch_impl : public Rdb_transaction {
}
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -7549,6 +7591,7 @@ int ha_rocksdb::create_table(const std::string &table_name,
/* Create table/key descriptions and put them into the data dictionary */
m_tbl_def = new Rdb_tbl_def(table_name);
+ m_tbl_def->set_create_time(time(nullptr));
uint n_keys = table_arg->s->keys;
@@ -10177,6 +10220,8 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
row_info.tx->incr_insert_count();
}
+ row_info.tx->log_table_write_op(m_tbl_def);
+
if (do_bulk_commit(row_info.tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
}
@@ -10648,6 +10693,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
}
tx->incr_delete_count();
+ tx->log_table_write_op(m_tbl_def);
if (do_bulk_commit(tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
@@ -10802,6 +10848,12 @@ int ha_rocksdb::info(uint flag) {
k->rec_per_key[j] = x;
}
}
+
+ stats.create_time = m_tbl_def->get_create_time();
+ }
+
+ if (flag & HA_STATUS_TIME) {
+ stats.update_time = m_tbl_def->update_time;
}
if (flag & HA_STATUS_ERRKEY) {
@@ -12603,6 +12655,7 @@ bool ha_rocksdb::prepare_inplace_alter_table(
m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
new_tdef->m_hidden_pk_val =
m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
+ new_tdef->set_create_time(m_tbl_def->get_create_time());
if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
/* Delete the new key descriptors */
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 9a250af40c7..c36c5c98e19 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -82,6 +82,7 @@ extern PSI_rwlock_key key_rwlock_read_free_rpl_tables;
#endif
extern Regex_list_handler rdb_read_free_regex_handler;
+extern uint rocksdb_table_dictionary_format;
/**
@brief
Rdb_table_handler is a reference-counted structure storing information for
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index c0741a1ce9b..234f7a789ff 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -3514,8 +3514,21 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
const rocksdb::Slice &key) {
StringBuffer<8 * Rdb_key_def::PACKED_SIZE> indexes;
indexes.alloc(Rdb_key_def::VERSION_SIZE +
+ Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE +
m_key_count * Rdb_key_def::PACKED_SIZE * 2);
- rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION);
+
+ if (rocksdb_table_dictionary_format <
+ ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP) {
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1);
+ // We are using old data format, which means we cannot save Create_time
+ // Set it to be shown as unknown right away, so that the behavior before
+ // server restart and after is the same.
+ set_create_time(0);
+ }
+ else {
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2);
+ rdb_netstr_append_uint64(&indexes, create_time);
+ }
for (uint i = 0; i < m_key_count; i++) {
const Rdb_key_def &kd = *m_key_descr_arr[i];
@@ -4015,27 +4028,52 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
Rdb_tbl_def *const tdef =
new Rdb_tbl_def(key, Rdb_key_def::INDEX_NUMBER_SIZE);
- // Now, read the DDLs.
- const int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
- if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ if (val.size() < Rdb_key_def::VERSION_SIZE) {
// NO_LINT_DEBUG
sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
tdef->full_tablename().c_str());
return true;
}
- tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
- tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
ptr = reinterpret_cast<const uchar *>(val.data());
const int version = rdb_netbuf_read_uint16(&ptr);
- if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION) {
+
+ if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 &&
+ version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2) {
// NO_LINT_DEBUG
sql_print_error(
"RocksDB: DDL ENTRY Version was not expected."
- "Expected: %d, Actual: %d",
- Rdb_key_def::DDL_ENTRY_INDEX_VERSION, version);
+ "Expected: %d..%d, Actual: %d",
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1,
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2, version);
return true;
}
+ int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
+
+ if (version == Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2) {
+ if (real_val_size < Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE) {
+ // NO_LINT_DEBUG
+ sql_print_error( "RocksDB: DDL ENTRY V2 doesn't have timestamp");
+ delete tdef;
+ return true;
+ }
+ tdef->set_create_time(rdb_netbuf_read_uint64(&ptr));
+ real_val_size -= Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE;
+ }
+ else
+ tdef->set_create_time(0); // shown as SQL NULL.
+
+ // Now, read the DDLs.
+ if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
+ tdef->full_tablename().c_str());
+ return true;
+ }
+ tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
+ tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
+
+
ptr_end = ptr + real_val_size;
for (uint keyno = 0; ptr < ptr_end; keyno++) {
GL_INDEX_ID gl_index_id;
@@ -4471,6 +4509,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
rec->m_hidden_pk_val.load(std::memory_order_relaxed);
new_rec->m_tbl_stats = rec->m_tbl_stats;
+ new_rec->set_create_time(rec->get_create_time());
// so that it's not free'd when deleting the old rec
rec->m_key_descr_arr = nullptr;
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 416857cad38..9c2a96e77b7 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -203,6 +203,22 @@ enum {
UNPACK_FAILURE = 1,
};
+
+/*
+ Global user-visible data dictionary format version.
+ The server will read the data of any version, but will not write data
+ structures that were introduced after the version in
+ rocksdb_table_dictionary_format.
+ This way, one can keep the on-disk data backward-compatible.
+*/
+const uint ROCKSDB_DATADIC_FORMAT_INITIAL = 1;
+const uint ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP = 2;
+
+// Maximum possible value:
+const uint ROCKSDB_DATADIC_FORMAT_MAX = 2;
+const uint ROCKSDB_DATADIC_FORMAT_DEFAULT = ROCKSDB_DATADIC_FORMAT_MAX;
+
+
/*
An object of this class represents information about an index in an SQL
table. It provides services to encode and decode index tuples.
@@ -465,6 +481,7 @@ class Rdb_key_def {
CF_NUMBER_SIZE = 4,
CF_FLAG_SIZE = 4,
PACKED_SIZE = 4, // one int
+ TABLE_CREATE_TIMESTAMP_SIZE = 8,
};
// bit flags for combining bools when writing to disk
@@ -506,7 +523,10 @@ class Rdb_key_def {
// Data dictionary schema version. Introduce newer versions
// if changing schema layout
enum {
- DDL_ENTRY_INDEX_VERSION = 1,
+ DDL_ENTRY_INDEX_VERSION_1 = 1,
+ // this includes a 64-bit table_creation_time at the end.
+ // Allowed since ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP.
+ DDL_ENTRY_INDEX_VERSION_2 = 2,
CF_DEFINITION_VERSION = 1,
BINLOG_INFO_INDEX_NUMBER_VERSION = 1,
DDL_DROP_INDEX_ONGOING_VERSION = 1,
@@ -1116,6 +1136,12 @@ class Rdb_tbl_def {
~Rdb_tbl_def();
+ // time values are shown in SHOW TABLE STATUS
+ void set_create_time(time_t val) { create_time = val; }
+ time_t get_create_time() { return create_time; }
+
+ time_t update_time = 0; // in-memory only value, maintained right here
+
void check_and_set_read_free_rpl_table();
/* Number of indexes */
@@ -1161,6 +1187,9 @@ class Rdb_tbl_def {
const std::string &base_tablename() const { return m_tablename; }
const std::string &base_partition() const { return m_partition; }
GL_INDEX_ID get_autoincr_gl_index_id();
+
+ private:
+ time_t create_time = 0;
};
/*
@@ -1341,8 +1370,11 @@ class Rdb_binlog_manager {
1. Table Name => internal index id mappings
key: Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER(0x1) + dbname.tablename
- value: version, {cf_id, index_id}*n_indexes_of_the_table
+ value: DDL_ENTRY_INDEX_VERSION_1, {cf_id, index_id}*n_indexes_of_the_table
+ or value: DDL_ENTRY_INDEX_VERSION_2, create_timestamp, {cf_id, index_id}*
+ n_indexes_of_the_table
version is 2 bytes. cf_id and index_id are 4 bytes.
+ create_timestamp is 8 bytes.
2. internal cf_id, index id => index information
key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id
1
0
[Commits] 8dc011b9965: Support Create_time and Update_time in MyRocks table status
by psergey 06 Oct '19
by psergey 06 Oct '19
06 Oct '19
revision-id: 8dc011b99657ae3d25e44debdc4bbeaebbaecb52 (fb-prod201903-144-g8dc011b9965)
parent(s): d97c0c628e5dc60abd725f6a7120a8d87b09321e
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-06 23:47:41 +0300
message:
Support Create_time and Update_time in MyRocks table status
(variant #3, with @@rocksdb_table_dictionary_format)
- Create_time is stored in the MyRocks' internal data dictionary.
- Update_time is in-memory only (like in InnoDB).
@@rocksdb_table_dictionary_format is a global read-only variable (set it
from my.cnf) which controls the on-disk data format.
rocksdb_table_dictionary_format=1 means use the same data format is
before. Create_time will always be NULL for all tables that are created.
rocksdb_table_dictionary_format=2 (the default) means use the newer data
format. All newly-created tables will have proper Create_time attribute.
Downgrades are only possible if one hasn't run any DDL that re-creates a
table.
---
mysql-test/suite/rocksdb/include/bulk_load.inc | 4 +-
.../suite/rocksdb/include/bulk_load_unsorted.inc | 4 +-
mysql-test/suite/rocksdb/r/bulk_load.result | 12 +--
mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result | 12 +--
.../rocksdb/r/bulk_load_rev_cf_and_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_rev_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted_rev.result | 12 +--
mysql-test/suite/rocksdb/r/issue255.result | 16 ++--
mysql-test/suite/rocksdb/r/rocksdb.result | 7 +-
.../r/rocksdb_table_dictionary_format.result | 37 ++++++++++
.../suite/rocksdb/r/show_table_status.result | 85 +++++++++++++++++++++-
mysql-test/suite/rocksdb/r/truncate_table.result | 8 +-
mysql-test/suite/rocksdb/t/issue255.test | 17 +++--
mysql-test/suite/rocksdb/t/rocksdb.test | 4 +-
.../rocksdb/t/rocksdb_table_dictionary_format.test | 67 +++++++++++++++++
mysql-test/suite/rocksdb/t/show_table_status.test | 81 ++++++++++++++++++++-
mysql-test/suite/rocksdb/t/truncate_table.test | 8 +-
.../r/rocksdb_table_dictionary_format_basic.result | 14 ++++
.../t/rocksdb_table_dictionary_format_basic.test | 16 ++++
storage/rocksdb/ha_rocksdb.cc | 53 ++++++++++++++
storage/rocksdb/ha_rocksdb.h | 1 +
storage/rocksdb/rdb_datadic.cc | 57 ++++++++++++---
storage/rocksdb/rdb_datadic.h | 36 ++++++++-
24 files changed, 502 insertions(+), 85 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/bulk_load.inc b/mysql-test/suite/rocksdb/include/bulk_load.inc
index 1b79825e507..7e163602202 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load.inc
@@ -121,12 +121,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
index 5cdc76a32d4..812af0401aa 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
@@ -119,12 +119,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/r/bulk_load.result b/mysql-test/suite/rocksdb/r/bulk_load.result
index a36f99a7619..76db28e66bd 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
index b5d3e252c5d..ae363f7ec0c 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
index f46acd41080..dd8dd7e60a8 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
index 3389968ef37..96738ae62e2 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
index 924032549ac..87fc63af2da 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
index 3cc9fb8e459..8e0914f0159 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/issue255.result b/mysql-test/suite/rocksdb/r/issue255.result
index c1ce3be2276..b45b3b5afc7 100644
--- a/mysql-test/suite/rocksdb/r/issue255.result
+++ b/mysql-test/suite/rocksdb/r/issue255.result
@@ -2,7 +2,7 @@ CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ('538647864786478647864');
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -12,7 +12,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -21,7 +21,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -30,13 +30,13 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES (1000);
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -46,7 +46,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -55,7 +55,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -64,5 +64,5 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result
index 088eb050f6f..c76f31afaab 100644
--- a/mysql-test/suite/rocksdb/r/rocksdb.result
+++ b/mysql-test/suite/rocksdb/r/rocksdb.result
@@ -979,6 +979,7 @@ rocksdb_store_row_debug_checksums OFF
rocksdb_strict_collation_check OFF
rocksdb_strict_collation_exceptions
rocksdb_table_cache_numshardbits 6
+rocksdb_table_dictionary_format 2
rocksdb_table_stats_background_thread_nice_value 19
rocksdb_table_stats_max_num_rows_scanned 0
rocksdb_table_stats_recalc_threshold_count 100
@@ -1417,7 +1418,7 @@ create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
show table status like 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 # # NULL latin1_swedish_ci NULL
drop table t1;
#
# Fix Issue #4: Crash when using pseudo-unique keys
@@ -2612,7 +2613,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
-1
@@ -2623,7 +2624,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
1
diff --git a/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result b/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result
new file mode 100644
index 00000000000..8f2fa96a04a
--- /dev/null
+++ b/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result
@@ -0,0 +1,37 @@
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+2
+#
+# Server restarted
+#
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+1
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+# Create_time will be NULL as the table doesn't support it
+# Update_time will be non-null
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time is not null
+NULL 1
+#
+# Server restarted
+#
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+2
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time is not null
+NULL 0
+set global rocksdb_compact_cf='default';
+select concat('00', '00', '00', '01', hex('test.t1'));
+concat('00', '00', '00', '01', hex('test.t1'))
+00000001746573742E7431
+Found the datadic entry
+Value has format version 1, followed by 8 bytes describing one index
+Done
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/r/show_table_status.result b/mysql-test/suite/rocksdb/r/show_table_status.result
index 29140f045e4..345882040ef 100644
--- a/mysql-test/suite/rocksdb/r/show_table_status.result
+++ b/mysql-test/suite/rocksdb/r/show_table_status.result
@@ -7,12 +7,12 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL
+t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL utf8_general_ci NULL
SHOW TABLE STATUS WHERE name LIKE 't2';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
DROP TABLE t1, t2, t3;
CREATE DATABASE `db_new..............................................end`;
USE `db_new..............................................end`;
@@ -22,3 +22,80 @@ SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE T
TABLE_SCHEMA db_new..............................................end
TABLE_NAME t1_new..............................................end
DROP DATABASE `db_new..............................................end`;
+#
+# MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+#
+use test;
+create table t1 (a int) engine=rocksdb;
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time check_time
+1 NULL NULL
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select sleep(3);
+sleep(3)
+0
+insert into t1 values (2);
+select
+create_time=@create_tm /* should not change */ ,
+timestampdiff(second, @update_tm, update_time) > 2,
+check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+timestampdiff(second, @update_tm, update_time) > 2 1
+check_time NULL
+#
+# Check how create_time survives ALTER TABLE.
+# First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+create_time<>@create_tm /* should change */,
+create_time IS NOT NULL,
+update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time<>@create_tm 1
+create_time IS NOT NULL 1
+update_time IS NULL 1
+insert into t1 values (5,5);
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+# Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+select
+create_time=@create_tm /* should not change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+update_time NULL
+#
+# Check what is left after server restart
+#
+# Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select
+create_time=(select create_time from t2) /* should change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=(select create_time from t2) 1
+update_time NULL
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/r/truncate_table.result b/mysql-test/suite/rocksdb/r/truncate_table.result
index 1544256f194..79b266a2453 100644
--- a/mysql-test/suite/rocksdb/r/truncate_table.result
+++ b/mysql-test/suite/rocksdb/r/truncate_table.result
@@ -9,19 +9,19 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 4 # # NULL latin1_swedish_ci NULL
TRUNCATE TABLE t1;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('d');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 2 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 2 # # NULL latin1_swedish_ci NULL
SELECT a,c FROM t1;
a c
1 d
diff --git a/mysql-test/suite/rocksdb/t/issue255.test b/mysql-test/suite/rocksdb/t/issue255.test
index 370dece0c6c..686f45b4056 100644
--- a/mysql-test/suite/rocksdb/t/issue255.test
+++ b/mysql-test/suite/rocksdb/t/issue255.test
@@ -3,24 +3,25 @@
CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES ('538647864786478647864');
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SELECT * FROM t1;
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
@@ -28,24 +29,24 @@ DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES (1000);
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb.test b/mysql-test/suite/rocksdb/t/rocksdb.test
index 5eff0fbf38f..7dcae569c92 100644
--- a/mysql-test/suite/rocksdb/t/rocksdb.test
+++ b/mysql-test/suite/rocksdb/t/rocksdb.test
@@ -1198,7 +1198,7 @@ drop table t1;
create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
---replace_column 7 #
+--replace_column 7 # 12 # 13 #
show table status like 't1';
drop table t1;
@@ -1903,11 +1903,13 @@ DROP TABLE t1;
# value is 4 while MyRocks will show it as 3.
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test b/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test
new file mode 100644
index 00000000000..3dedf6502ab
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test
@@ -0,0 +1,67 @@
+--source include/have_rocksdb.inc
+
+select @@rocksdb_table_dictionary_format;
+
+#
+# Check the upgrade from Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 to _2
+#
+
+--let $_mysqld_option=--rocksdb_table_dictionary_format=1
+--source include/restart_mysqld_with_option.inc
+
+--echo #
+--echo # Server restarted
+--echo #
+select @@rocksdb_table_dictionary_format;
+
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+
+--echo # Create_time will be NULL as the table doesn't support it
+--echo # Update_time will be non-null
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--let $_mysqld_option=--rocksdb_table_dictionary_format=2
+--source include/restart_mysqld_with_option.inc
+--echo #
+--echo # Server restarted
+--echo #
+select @@rocksdb_table_dictionary_format;
+
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+set global rocksdb_compact_cf='default';
+# use: $MYSQLTEST_VARDIR $MYSQL_SST_DUMP
+let MYSQL_DATADIR = `select @@datadir`;
+let MYSQL_SST_DUMP = $MYSQL_SST_DUMP;
+
+select concat('00', '00', '00', '01', hex('test.t1'));
+
+# We are looking to find a record like this:
+# '00000001746573742E7431' seq:0, type:1 => 00010000000000000104
+# that is:
+# '00000001:746573742E7431' seq:0, type:1 => 0001: 0000000000000104
+# the key is: DDL_ENTRY_INDEX_START_NUMBER, "test.t1"
+# the value is: DDL_ENTRY_INDEX_VERSION_1, {cf_id, index_id}
+perl;
+ my $datadir = $ENV{'MYSQL_DATADIR'};
+ my $sst_dump = $ENV{'MYSQL_SST_DUMP'};
+ open(IN, "$sst_dump --command=scan --output_hex --file=$datadir/.rocksdb |");
+ while(<IN>) {
+ if ($_ =~ /^\'00000001746573742E7431\'/) {
+ print "Found the datadic entry\n";
+ if ($_ =~ / 0001[0-9]{16}$/) {
+ print "Value has format version 1, followed by 8 bytes describing one index\n"
+ } else {
+ print "Unexpected value. $_\n";
+ }
+ }
+ }
+ print "Done\n";
+ close(IN);
+EOF
+drop table t1;
+
diff --git a/mysql-test/suite/rocksdb/t/show_table_status.test b/mysql-test/suite/rocksdb/t/show_table_status.test
index 29cc2ccfb5e..59effcc788c 100644
--- a/mysql-test/suite/rocksdb/t/show_table_status.test
+++ b/mysql-test/suite/rocksdb/t/show_table_status.test
@@ -24,7 +24,7 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
# Some statistics don't get updated as quickly. The Data_length and
@@ -48,7 +48,7 @@ set global rocksdb_force_flush_memtable_now = true;
# We expect the number of rows to be 10000. Data_len and Avg_row_len
# may vary, depending on built-in compression library.
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't2';
DROP TABLE t1, t2, t3;
@@ -62,3 +62,80 @@ CREATE TABLE `t1_new..............................................end`(a int) en
INSERT INTO `t1_new..............................................end` VALUES (1);
--query_vertical SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE TABLE_NAME = 't1_new..............................................end'
DROP DATABASE `db_new..............................................end`;
+--echo #
+--echo # MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+--echo #
+use test;
+create table t1 (a int) engine=rocksdb;
+
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+select sleep(3);
+insert into t1 values (2);
+
+--vertical_results
+select
+ create_time=@create_tm /* should not change */ ,
+ timestampdiff(second, @update_tm, update_time) > 2,
+ check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check how create_time survives ALTER TABLE.
+--echo # First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+ create_time<>@create_tm /* should change */,
+ create_time IS NOT NULL,
+ update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+insert into t1 values (5,5);
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo # Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+
+select
+ create_time=@create_tm /* should not change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check what is left after server restart
+--echo #
+
+--echo # Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--source include/restart_mysqld.inc
+
+select
+ create_time=(select create_time from t2) /* should change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/t/truncate_table.test b/mysql-test/suite/rocksdb/t/truncate_table.test
index a61488654a3..1001eeb6cde 100644
--- a/mysql-test/suite/rocksdb/t/truncate_table.test
+++ b/mysql-test/suite/rocksdb/t/truncate_table.test
@@ -29,22 +29,22 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
TRUNCATE TABLE t1;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('d');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--sorted_result
diff --git a/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result
new file mode 100644
index 00000000000..825d7a7113a
--- /dev/null
+++ b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT;
+SELECT @start_global_value;
+@start_global_value
+2
+"Trying to set variable @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT = 444;
+ERROR HY000: Variable 'rocksdb_table_dictionary_format' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test
new file mode 100644
index 00000000000..2b8c6e81bf7
--- /dev/null
+++ b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_TABLE_DICTIONARY_FORMAT
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index d6c89087bea..a0a344187cf 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -122,6 +122,8 @@ const std::string PER_INDEX_CF_NAME("$per_index_cf");
static std::vector<std::string> rdb_tables_to_recalc;
+uint rocksdb_table_dictionary_format;
+
class Rdb_explicit_snapshot : public explicit_snapshot {
public:
static std::shared_ptr<Rdb_explicit_snapshot> create(
@@ -2136,6 +2138,17 @@ static MYSQL_SYSVAR_ULONGLONG(
"MultiGet",
nullptr, nullptr, SIZE_T_MAX, /* min */ 0, /* max */ SIZE_T_MAX, 0);
+static MYSQL_SYSVAR_UINT(table_dictionary_format,
+ rocksdb_table_dictionary_format,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Max Table Dictionary format version that the server "
+ "may create (use lower values for backward "
+ " compatibility, higher values for new features)",
+ nullptr, nullptr,
+ ROCKSDB_DATADIC_FORMAT_DEFAULT,
+ ROCKSDB_DATADIC_FORMAT_INITIAL,
+ ROCKSDB_DATADIC_FORMAT_MAX, 0);
+
static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
static struct st_mysql_sys_var *rocksdb_system_variables[] = {
@@ -2309,6 +2322,8 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(select_bypass_log_rejected),
MYSQL_SYSVAR(select_bypass_debug_row_delay),
MYSQL_SYSVAR(select_bypass_multiget_min),
+
+ MYSQL_SYSVAR(table_dictionary_format),
nullptr};
static rocksdb::WriteOptions rdb_get_rocksdb_write_options(
@@ -2456,6 +2471,8 @@ class Rdb_transaction {
bool m_is_delayed_snapshot = false;
bool m_is_two_phase = false;
+ std::unordered_set<Rdb_tbl_def*> modified_tables;
+
private:
/*
Number of write operations this transaction had when we took the last
@@ -3139,6 +3156,25 @@ class Rdb_transaction {
virtual void start_tx() = 0;
virtual void start_stmt() = 0;
+ protected:
+ // Non-virtual functions with actions to be done on transaction start and
+ // commit.
+ void on_commit() {
+ time_t tm;
+ tm = time(nullptr);
+ for (auto &it : modified_tables) {
+ it->update_time = tm;
+ }
+ modified_tables.clear();
+ }
+ void on_rollback() {
+ modified_tables.clear();
+ }
+ public:
+ void log_table_write_op(Rdb_tbl_def *tbl) {
+ modified_tables.insert(tbl);
+ }
+
void set_initial_savepoint() {
/*
Set the initial savepoint. If the first statement in the transaction
@@ -3322,7 +3358,9 @@ class Rdb_transaction_impl : public Rdb_transaction {
goto error;
}
+ on_commit();
error:
+ on_rollback();
/* Save the transaction object to be reused */
release_tx();
@@ -3338,6 +3376,7 @@ class Rdb_transaction_impl : public Rdb_transaction {
public:
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -3659,7 +3698,9 @@ class Rdb_writebatch_impl : public Rdb_transaction {
res = true;
goto error;
}
+ on_commit();
error:
+ on_rollback();
reset();
m_write_count = 0;
@@ -3691,6 +3732,7 @@ class Rdb_writebatch_impl : public Rdb_transaction {
}
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -7549,6 +7591,7 @@ int ha_rocksdb::create_table(const std::string &table_name,
/* Create table/key descriptions and put them into the data dictionary */
m_tbl_def = new Rdb_tbl_def(table_name);
+ m_tbl_def->set_create_time(time(nullptr));
uint n_keys = table_arg->s->keys;
@@ -10177,6 +10220,8 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
row_info.tx->incr_insert_count();
}
+ row_info.tx->log_table_write_op(m_tbl_def);
+
if (do_bulk_commit(row_info.tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
}
@@ -10648,6 +10693,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
}
tx->incr_delete_count();
+ tx->log_table_write_op(m_tbl_def);
if (do_bulk_commit(tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
@@ -10802,6 +10848,12 @@ int ha_rocksdb::info(uint flag) {
k->rec_per_key[j] = x;
}
}
+
+ stats.create_time = m_tbl_def->get_create_time();
+ }
+
+ if (flag & HA_STATUS_TIME) {
+ stats.update_time = m_tbl_def->update_time;
}
if (flag & HA_STATUS_ERRKEY) {
@@ -12603,6 +12655,7 @@ bool ha_rocksdb::prepare_inplace_alter_table(
m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
new_tdef->m_hidden_pk_val =
m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
+ new_tdef->set_create_time(m_tbl_def->get_create_time());
if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
/* Delete the new key descriptors */
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 9a250af40c7..c36c5c98e19 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -82,6 +82,7 @@ extern PSI_rwlock_key key_rwlock_read_free_rpl_tables;
#endif
extern Regex_list_handler rdb_read_free_regex_handler;
+extern uint rocksdb_table_dictionary_format;
/**
@brief
Rdb_table_handler is a reference-counted structure storing information for
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index c0741a1ce9b..234f7a789ff 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -3514,8 +3514,21 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
const rocksdb::Slice &key) {
StringBuffer<8 * Rdb_key_def::PACKED_SIZE> indexes;
indexes.alloc(Rdb_key_def::VERSION_SIZE +
+ Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE +
m_key_count * Rdb_key_def::PACKED_SIZE * 2);
- rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION);
+
+ if (rocksdb_table_dictionary_format <
+ ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP) {
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1);
+ // We are using old data format, which means we cannot save Create_time
+ // Set it to be shown as unknown right away, so that the behavior before
+ // server restart and after is the same.
+ set_create_time(0);
+ }
+ else {
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2);
+ rdb_netstr_append_uint64(&indexes, create_time);
+ }
for (uint i = 0; i < m_key_count; i++) {
const Rdb_key_def &kd = *m_key_descr_arr[i];
@@ -4015,27 +4028,52 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
Rdb_tbl_def *const tdef =
new Rdb_tbl_def(key, Rdb_key_def::INDEX_NUMBER_SIZE);
- // Now, read the DDLs.
- const int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
- if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ if (val.size() < Rdb_key_def::VERSION_SIZE) {
// NO_LINT_DEBUG
sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
tdef->full_tablename().c_str());
return true;
}
- tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
- tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
ptr = reinterpret_cast<const uchar *>(val.data());
const int version = rdb_netbuf_read_uint16(&ptr);
- if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION) {
+
+ if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 &&
+ version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2) {
// NO_LINT_DEBUG
sql_print_error(
"RocksDB: DDL ENTRY Version was not expected."
- "Expected: %d, Actual: %d",
- Rdb_key_def::DDL_ENTRY_INDEX_VERSION, version);
+ "Expected: %d..%d, Actual: %d",
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1,
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2, version);
return true;
}
+ int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
+
+ if (version == Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2) {
+ if (real_val_size < Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE) {
+ // NO_LINT_DEBUG
+ sql_print_error( "RocksDB: DDL ENTRY V2 doesn't have timestamp");
+ delete tdef;
+ return true;
+ }
+ tdef->set_create_time(rdb_netbuf_read_uint64(&ptr));
+ real_val_size -= Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE;
+ }
+ else
+ tdef->set_create_time(0); // shown as SQL NULL.
+
+ // Now, read the DDLs.
+ if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
+ tdef->full_tablename().c_str());
+ return true;
+ }
+ tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
+ tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
+
+
ptr_end = ptr + real_val_size;
for (uint keyno = 0; ptr < ptr_end; keyno++) {
GL_INDEX_ID gl_index_id;
@@ -4471,6 +4509,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
rec->m_hidden_pk_val.load(std::memory_order_relaxed);
new_rec->m_tbl_stats = rec->m_tbl_stats;
+ new_rec->set_create_time(rec->get_create_time());
// so that it's not free'd when deleting the old rec
rec->m_key_descr_arr = nullptr;
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 416857cad38..9c2a96e77b7 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -203,6 +203,22 @@ enum {
UNPACK_FAILURE = 1,
};
+
+/*
+ Global user-visible data dictionary format version.
+ The server will read the data of any version, but will not write data
+ structures that were introduced after the version in
+ rocksdb_table_dictionary_format.
+ This way, one can keep the on-disk data backward-compatible.
+*/
+const uint ROCKSDB_DATADIC_FORMAT_INITIAL = 1;
+const uint ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP = 2;
+
+// Maximum possible value:
+const uint ROCKSDB_DATADIC_FORMAT_MAX = 2;
+const uint ROCKSDB_DATADIC_FORMAT_DEFAULT = ROCKSDB_DATADIC_FORMAT_MAX;
+
+
/*
An object of this class represents information about an index in an SQL
table. It provides services to encode and decode index tuples.
@@ -465,6 +481,7 @@ class Rdb_key_def {
CF_NUMBER_SIZE = 4,
CF_FLAG_SIZE = 4,
PACKED_SIZE = 4, // one int
+ TABLE_CREATE_TIMESTAMP_SIZE = 8,
};
// bit flags for combining bools when writing to disk
@@ -506,7 +523,10 @@ class Rdb_key_def {
// Data dictionary schema version. Introduce newer versions
// if changing schema layout
enum {
- DDL_ENTRY_INDEX_VERSION = 1,
+ DDL_ENTRY_INDEX_VERSION_1 = 1,
+ // this includes a 64-bit table_creation_time at the end.
+ // Allowed since ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP.
+ DDL_ENTRY_INDEX_VERSION_2 = 2,
CF_DEFINITION_VERSION = 1,
BINLOG_INFO_INDEX_NUMBER_VERSION = 1,
DDL_DROP_INDEX_ONGOING_VERSION = 1,
@@ -1116,6 +1136,12 @@ class Rdb_tbl_def {
~Rdb_tbl_def();
+ // time values are shown in SHOW TABLE STATUS
+ void set_create_time(time_t val) { create_time = val; }
+ time_t get_create_time() { return create_time; }
+
+ time_t update_time = 0; // in-memory only value, maintained right here
+
void check_and_set_read_free_rpl_table();
/* Number of indexes */
@@ -1161,6 +1187,9 @@ class Rdb_tbl_def {
const std::string &base_tablename() const { return m_tablename; }
const std::string &base_partition() const { return m_partition; }
GL_INDEX_ID get_autoincr_gl_index_id();
+
+ private:
+ time_t create_time = 0;
};
/*
@@ -1341,8 +1370,11 @@ class Rdb_binlog_manager {
1. Table Name => internal index id mappings
key: Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER(0x1) + dbname.tablename
- value: version, {cf_id, index_id}*n_indexes_of_the_table
+ value: DDL_ENTRY_INDEX_VERSION_1, {cf_id, index_id}*n_indexes_of_the_table
+ or value: DDL_ENTRY_INDEX_VERSION_2, create_timestamp, {cf_id, index_id}*
+ n_indexes_of_the_table
version is 2 bytes. cf_id and index_id are 4 bytes.
+ create_timestamp is 8 bytes.
2. internal cf_id, index id => index information
key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id
1
0
[Commits] 38c89a95752: MDEV-20740: Odd computations in calculate_cond_selectivity_for_table
by Sergei Petrunia 04 Oct '19
by Sergei Petrunia 04 Oct '19
04 Oct '19
revision-id: 38c89a95752ee7a59f9366af0cefacf3a71a6fd0 (mariadb-10.4.7-102-g38c89a95752)
parent(s): eb0a10b07222b01d40dee24e61aac9502256481f
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-04 20:16:39 +0300
message:
MDEV-20740: Odd computations in calculate_cond_selectivity_for_table
Make SEL_ARG graph traversal code in sel_arg_range_seq_next() set
max_key_parts first.
(Pushing to 10.4 first)
---
mysql-test/main/index_intersect_innodb.result | 8 ++++----
sql/opt_range.cc | 16 ++++++++++------
sql/opt_range_mrr.cc | 5 ++++-
3 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/mysql-test/main/index_intersect_innodb.result b/mysql-test/main/index_intersect_innodb.result
index 854bcd75e5c..fd07f6b41c6 100644
--- a/mysql-test/main/index_intersect_innodb.result
+++ b/mysql-test/main/index_intersect_innodb.result
@@ -470,17 +470,17 @@ EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 501 AND 1000 AND Population > 700000 AND Country LIKE 'C%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Country,Population 4,7,4 NULL # Using sort_intersect(PRIMARY,Country,Population); Using where
+1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Population,Country 4,4,7 NULL # Using sort_intersect(PRIMARY,Population,Country); Using where
EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 1 AND 500 AND Population > 700000 AND Country LIKE 'C%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Country,Population 4,7,4 NULL # Using sort_intersect(PRIMARY,Country,Population); Using where
+1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Population,Country 4,4,7 NULL # Using sort_intersect(PRIMARY,Population,Country); Using where
EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 2001 AND 2500 AND Population > 300000 AND Country LIKE 'H%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Country 4,7 NULL # Using sort_intersect(PRIMARY,Country); Using where
+1 SIMPLE City range PRIMARY,Population,Country Country 7 NULL # Using index condition; Using where
EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 3701 AND 4000 AND Population > 1000000
@@ -724,7 +724,7 @@ EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 1 AND 500 AND Population > 700000 AND Country LIKE 'C%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Country,Population 4,7,4 NULL # Using sort_intersect(PRIMARY,Country,Population); Using where
+1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Population,Country 4,4,7 NULL # Using sort_intersect(PRIMARY,Population,Country); Using where
EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 3001 AND 4000 AND Population > 600000
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 654fab7810a..c47da283e9c 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -315,7 +315,7 @@ class PARAM : public RANGE_OPT_PARAM
*/
key_map possible_keys;
longlong baseflag;
- uint max_key_part, range_count;
+ uint max_key_parts, range_count;
bool quick; // Don't calulate possible keys
@@ -7387,7 +7387,7 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
index_scan->idx= idx;
index_scan->keynr= keynr;
index_scan->key_info= ¶m->table->key_info[keynr];
- index_scan->used_key_parts= param->max_key_part+1;
+ index_scan->used_key_parts= param->max_key_parts;
index_scan->range_count= param->range_count;
index_scan->records= found_records;
index_scan->sel_arg= key;
@@ -11042,7 +11042,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
seq.start= tree;
param->range_count=0;
- param->max_key_part=0;
+ param->max_key_parts=0;
seq.is_ror_scan= TRUE;
if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
@@ -11055,9 +11055,13 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
*mrr_flags|= HA_MRR_NO_ASSOCIATION | HA_MRR_SORTED;
bool pk_is_clustered= file->primary_key_is_clustered();
+ // TODO: param->max_key_parts holds 0 now, and not the #keyparts used.
+ // Passing wrong second argument to index_flags() makes no difference for
+ // most storage engines but might be an issue for MyRocks with certain
+ // datatypes.
if (index_only &&
- (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
- !(file->index_flags(keynr, param->max_key_part, 1) & HA_CLUSTERED_INDEX))
+ (file->index_flags(keynr, param->max_key_parts, 1) & HA_KEYREAD_ONLY) &&
+ !(file->index_flags(keynr, param->max_key_parts, 1) & HA_CLUSTERED_INDEX))
*mrr_flags |= HA_MRR_INDEX_ONLY;
if (param->thd->lex->sql_command != SQLCOM_SELECT)
@@ -11088,7 +11092,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
if (update_tbl_stats)
{
param->table->quick_keys.set_bit(keynr);
- param->table->quick_key_parts[keynr]= param->max_key_part+1;
+ param->table->quick_key_parts[keynr]= param->max_key_parts;
param->table->quick_n_ranges[keynr]= param->range_count;
param->table->quick_condition_rows=
MY_MIN(param->table->quick_condition_rows, rows);
diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc
index d3a1e155fb7..4afa06a7ca0 100644
--- a/sql/opt_range_mrr.cc
+++ b/sql/opt_range_mrr.cc
@@ -247,6 +247,7 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
uint min_key_length= (uint)(cur->min_key - seq->param->min_key);
range->ptr= (char*)(intptr)(key_tree->part);
+ uint max_key_parts;
if (cur->min_key_flag & GEOM_FLAG)
{
range->range_flag= cur->min_key_flag;
@@ -256,9 +257,11 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
range->start_key.length= min_key_length;
range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
range->start_key.flag= (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
+ max_key_parts= cur->min_key_parts;
}
else
{
+ max_key_parts= MY_MAX(cur->min_key_parts, cur->max_key_parts);
range->range_flag= cur->min_key_flag | cur->max_key_flag;
range->start_key.key= seq->param->min_key;
@@ -336,7 +339,7 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
}
}
seq->param->range_count++;
- seq->param->max_key_part=MY_MAX(seq->param->max_key_part,key_tree->part);
+ seq->param->max_key_parts= MY_MAX(seq->param->max_key_parts, max_key_parts);
return 0;
}
1
0
[Commits] d34839cacbe: MDEV-18046: Assortment of crashes, assertion failures and ASAN errors in mysql_show_binlog_events
by sujatha 04 Oct '19
by sujatha 04 Oct '19
04 Oct '19
revision-id: d34839cacbef0f4c76b7aa7ae6165b37b293685c (mariadb-10.1.41-51-gd34839cacbe)
parent(s): 4bcf524482b3a4260347b0fe912fa2660af97c43
author: Sujatha
committer: Sujatha
timestamp: 2019-10-04 13:45:33 +0530
message:
MDEV-18046: Assortment of crashes, assertion failures and ASAN errors in mysql_show_binlog_events
Problem:
========
SHOW BINLOG EVENTS FROM <pos> causes a variety of failures, some of which are
listed below. It is not a race condition issue, but there is some
non-determinism in it.
Fix:
====
Assert 1: With ASAN
========
Rows_log_event::Rows_log_event(const char*, uint,
const Format_description_log_event*):
Assertion `var_header_len >= 2'
Implemented upstream patch.
commit a3a497ccf7ecacc900551fb1e47ea4078b45c351
Fix: Added checks to avoid reading out of buffer limits.
Assert 2:
=========
Table_map_log_event::Table_map_log_event(const char*, uint,
const Format_description_log_event*)
Assertion `m_field_metadata_size <= (m_colcnt * 2)' failed.
Fix: Converted debug assert to error handler code.
ASAN Error: head-buffer-overflow within "my_strndup" in Rotate_log_event
===========
my_strndup /home/sujatha/bug_repo/tmp-10.1/mysys/my_malloc.c:254
Rotate_log_event::Rotate_log_event(char const*, unsigned int,
Format_description_log_event const*)
Fix: Improved the check for event_len validation.
ASAN Error: AddressSanitizer: SEGV on unknown address
==========
in inline_mysql_mutex_destroy
in my_bitmap_free
in Update_rows_log_event::~Update_rows_log_event()
Fix: Intialize m_cols_ai.bitmap to NULL.
ASAN Error: AddressSanitizer: heap-buffer-overflow on address 0x60400002acb8
==========
Load_log_event::copy_log_event(char const*, unsigned long, int,
Format_description_log_event const*)
Fix: Moved the event_len validation to the begin of copy_log_event function.
ASAN Error: AddressSanitizer: SEGV on unknown address
==========
The signal is caused by a READ memory access.
User_var_log_event::User_var_log_event(char const*, unsigned int,
Format_description_log_event const*)
Implemented upstream patch.
commit a3a497ccf7ecacc900551fb1e47ea4078b45c351
User_var_log_event::User_var_log_event: added checks to avoid reading out of
buffer limits.
ASAN Error: AddressSanitizer: heap-buffer-overflow on address
==========
String::append(char const*, unsigned int)
Query_log_event::pack_info(Protocol*)
Fix: Added check to ensure that query_len is within event_length
---
.../binlog/r/binlog_invalid_read_in_rotate.result | 17 +++++
.../r/binlog_show_binlog_event_random_pos.result | 13 ++++
.../binlog/t/binlog_invalid_read_in_rotate.test | 47 ++++++++++++++
.../t/binlog_show_binlog_event_random_pos.test | 37 +++++++++++
sql/log_event.cc | 75 ++++++++++++++++------
5 files changed, 168 insertions(+), 21 deletions(-)
diff --git a/mysql-test/suite/binlog/r/binlog_invalid_read_in_rotate.result b/mysql-test/suite/binlog/r/binlog_invalid_read_in_rotate.result
new file mode 100644
index 00000000000..28131d2645c
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_invalid_read_in_rotate.result
@@ -0,0 +1,17 @@
+RESET MASTER;
+call mtr.add_suppression("Error in Log_event::read_log_event*");
+DROP TABLE /*! IF EXISTS*/ t1;
+Warnings:
+Note 1051 Unknown table 'test.t1'
+CREATE TABLE `t1` (
+`col_int` int,
+pk integer auto_increment,
+`col_char_12_key` char(12),
+`col_int_key` int,
+`col_char_12` char(12),
+primary key (pk),
+key (`col_char_12_key` ),
+key (`col_int_key` )) ENGINE=InnoDB;
+INSERT /*! IGNORE */ INTO t1 VALUES (183173120, NULL, 'abcd', 1, 'efghijk'), (1, NULL, 'lmno', 2, 'p');
+ALTER TABLE `t1` ENABLE KEYS;
+DROP TABLE t1;
diff --git a/mysql-test/suite/binlog/r/binlog_show_binlog_event_random_pos.result b/mysql-test/suite/binlog/r/binlog_show_binlog_event_random_pos.result
new file mode 100644
index 00000000000..98385a7ac5e
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_show_binlog_event_random_pos.result
@@ -0,0 +1,13 @@
+RESET MASTER;
+call mtr.add_suppression("Error in Log_event::read_log_event*");
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note 1051 Unknown table 'test.t1'
+CREATE TABLE t1 (c1 CHAR(255), c2 CHAR(255), c3 CHAR(255), c4 CHAR(255), c5 CHAR(255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+UPDATE t1 SET c1=repeat('b',255);
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+DROP TABLE t1;
diff --git a/mysql-test/suite/binlog/t/binlog_invalid_read_in_rotate.test b/mysql-test/suite/binlog/t/binlog_invalid_read_in_rotate.test
new file mode 100644
index 00000000000..8360f2dcac4
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_invalid_read_in_rotate.test
@@ -0,0 +1,47 @@
+# ==== Purpose ====
+#
+# Test verifies that reading binary log by using "SHOW BINLOG EVENTS" command
+# from various random positions doesn't lead to crash. It should exit
+# gracefully with appropriate error.
+#
+# ==== References ====
+#
+# MDEV-18046:Assortment of crashes, assertion failures and ASAN errors in mysql_show_binlog_events
+
+--source include/have_log_bin.inc
+--source include/have_innodb.inc
+
+RESET MASTER;
+
+call mtr.add_suppression("Error in Log_event::read_log_event*");
+
+DROP TABLE /*! IF EXISTS*/ t1;
+CREATE TABLE `t1` (
+`col_int` int,
+pk integer auto_increment,
+`col_char_12_key` char(12),
+`col_int_key` int,
+`col_char_12` char(12),
+primary key (pk),
+key (`col_char_12_key` ),
+key (`col_int_key` )) ENGINE=InnoDB;
+
+INSERT /*! IGNORE */ INTO t1 VALUES (183173120, NULL, 'abcd', 1, 'efghijk'), (1, NULL, 'lmno', 2, 'p');
+
+--disable_warnings
+ALTER TABLE `t1` ENABLE KEYS;
+--enable_warnings
+
+--let $max_pos= query_get_value(SHOW MASTER STATUS,Position,1)
+--let $pos= 1
+while ($pos <= $max_pos)
+{
+ --disable_query_log
+ --disable_result_log
+ --error 0,1220
+ eval SHOW BINLOG EVENTS FROM $pos;
+ --inc $pos
+ --enable_result_log
+ --enable_query_log
+}
+DROP TABLE t1;
diff --git a/mysql-test/suite/binlog/t/binlog_show_binlog_event_random_pos.test b/mysql-test/suite/binlog/t/binlog_show_binlog_event_random_pos.test
new file mode 100644
index 00000000000..aaf56faa57e
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_show_binlog_event_random_pos.test
@@ -0,0 +1,37 @@
+# ==== Purpose ====
+#
+# Test verifies that reading binary log by using "SHOW BINLOG EVENTS" command
+# from various random positions doesn't lead to crash. It should exit
+# gracefully with appropriate error.
+#
+# ==== References ====
+#
+# MDEV-18046:Assortment of crashes, assertion failures and ASAN errors in mysql_show_binlog_events
+
+--source include/have_log_bin.inc
+--source include/have_innodb.inc
+
+RESET MASTER;
+call mtr.add_suppression("Error in Log_event::read_log_event*");
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 CHAR(255), c2 CHAR(255), c3 CHAR(255), c4 CHAR(255), c5 CHAR(255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+UPDATE t1 SET c1=repeat('b',255);
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+--let $max_pos= query_get_value(SHOW MASTER STATUS,Position,1)
+--let $pos= 1
+while ($pos <= $max_pos)
+{
+ --disable_query_log
+ --disable_result_log
+ --error 0,1220
+ eval SHOW BINLOG EVENTS FROM $pos LIMIT 100;
+ --inc $pos
+ --enable_result_log
+ --enable_query_log
+}
+
+DROP TABLE t1;
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 65f29441e1a..293bfa6680e 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -3502,7 +3502,6 @@ code_name(int code)
#define CHECK_SPACE(PTR,END,CNT) \
do { \
DBUG_PRINT("info", ("Read %s", code_name(pos[-1]))); \
- DBUG_ASSERT((PTR) + (CNT) <= (END)); \
if ((PTR) + (CNT) > (END)) { \
DBUG_PRINT("info", ("query= 0")); \
query= 0; \
@@ -3815,7 +3814,7 @@ Query_log_event::Query_log_event(const char* buf, uint event_len,
uint32 max_length= uint32(event_len - ((const char*)(end + db_len + 1) -
(buf - common_header_len)));
- if (q_len != max_length)
+ if (q_len != max_length || q_len > event_len)
{
q_len= 0;
query= NULL;
@@ -5889,6 +5888,9 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
{
DBUG_ENTER("Load_log_event::copy_log_event");
uint data_len;
+
+ if ((int) event_len < body_offset)
+ DBUG_RETURN(1);
char* buf_end = (char*)buf + event_len;
/* this is the beginning of the post-header */
const char* data_head = buf + description_event->common_header_len;
@@ -5898,18 +5900,17 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
table_name_len = (uint)data_head[L_TBL_LEN_OFFSET];
db_len = (uint)data_head[L_DB_LEN_OFFSET];
num_fields = uint4korr(data_head + L_NUM_FIELDS_OFFSET);
-
- if ((int) event_len < body_offset)
- DBUG_RETURN(1);
+
/*
Sql_ex.init() on success returns the pointer to the first byte after
the sql_ex structure, which is the start of field lengths array.
*/
+
if (!(field_lens= (uchar*)sql_ex.init((char*)buf + body_offset,
buf_end,
(uchar)buf[EVENT_TYPE_OFFSET] != LOAD_EVENT)))
DBUG_RETURN(1);
-
+
data_len = event_len - body_offset;
if (num_fields > data_len) // simple sanity check against corruption
DBUG_RETURN(1);
@@ -6449,7 +6450,7 @@ Rotate_log_event::Rotate_log_event(const char* buf, uint event_len,
// The caller will ensure that event_len is what we have at EVENT_LEN_OFFSET
uint8 post_header_len= description_event->post_header_len[ROTATE_EVENT-1];
uint ident_offset;
- if (event_len < LOG_EVENT_MINIMAL_HEADER_LEN)
+ if (event_len < (uint)(LOG_EVENT_MINIMAL_HEADER_LEN + post_header_len))
DBUG_VOID_RETURN;
buf+= LOG_EVENT_MINIMAL_HEADER_LEN;
pos= post_header_len ? uint8korr(buf + R_POS_OFFSET) : 4;
@@ -7955,6 +7956,12 @@ User_var_log_event(const char* buf, uint event_len,
we keep the flags set to UNDEF_F.
*/
uint bytes_read= ((val + val_len) - buf_start);
+ if (bytes_read > event_len)
+ {
+ error= true;
+ goto err;
+ }
+
if ((data_written - bytes_read) > 0)
{
flags= (uint) *(buf + UV_VAL_IS_NULL + UV_VAL_TYPE_SIZE +
@@ -9513,7 +9520,8 @@ Rows_log_event::Rows_log_event(const char *buf, uint event_len,
uint8 const common_header_len= description_event->common_header_len;
Log_event_type event_type= (Log_event_type) buf[EVENT_TYPE_OFFSET];
m_type= event_type;
-
+ m_cols_ai.bitmap= 0;
+
uint8 const post_header_len= description_event->post_header_len[event_type-1];
DBUG_PRINT("enter",("event_len: %u common_header_len: %d "
@@ -9546,7 +9554,16 @@ Rows_log_event::Rows_log_event(const char *buf, uint event_len,
which includes length bytes
*/
var_header_len= uint2korr(post_start);
- assert(var_header_len >= 2);
+
+ /* Check length and also avoid out of buffer read */
+ if (var_header_len < 2 ||
+ event_len < static_cast<unsigned int>(var_header_len +
+ (post_start - buf)))
+ {
+ m_cols.bitmap= 0;
+ DBUG_VOID_RETURN;
+ }
+
var_header_len-= 2;
/* Iterate over var-len header, extracting 'chunks' */
@@ -11047,6 +11064,10 @@ Table_map_log_event::Table_map_log_event(const char *buf, uint event_len,
/* Length of table name + counter + terminating null */
uchar const *const ptr_colcnt= ptr_tbllen + m_tbllen + 2;
uchar *ptr_after_colcnt= (uchar*) ptr_colcnt;
+ bytes_read= (unsigned int) (ptr_after_colcnt - (unsigned char *)buf);
+ /* Avoid reading out of buffer */
+ if (event_len <= bytes_read)
+ DBUG_VOID_RETURN;
m_colcnt= net_field_length(&ptr_after_colcnt);
DBUG_PRINT("info",("m_dblen: %lu off: %ld m_tbllen: %lu off: %ld m_colcnt: %lu off: %ld",
@@ -11074,15 +11095,24 @@ Table_map_log_event::Table_map_log_event(const char *buf, uint event_len,
if (bytes_read < event_len)
{
m_field_metadata_size= net_field_length(&ptr_after_colcnt);
- DBUG_ASSERT(m_field_metadata_size <= (m_colcnt * 2));
- uint num_null_bytes= (m_colcnt + 7) / 8;
- m_meta_memory= (uchar *)my_multi_malloc(MYF(MY_WME),
- &m_null_bits, num_null_bytes,
- &m_field_metadata, m_field_metadata_size,
- NULL);
- memcpy(m_field_metadata, ptr_after_colcnt, m_field_metadata_size);
- ptr_after_colcnt= (uchar*)ptr_after_colcnt + m_field_metadata_size;
- memcpy(m_null_bits, ptr_after_colcnt, num_null_bytes);
+ if (m_field_metadata_size <= (m_colcnt * 2))
+ {
+ uint num_null_bytes= (m_colcnt + 7) / 8;
+ m_meta_memory= (uchar *)my_multi_malloc(MYF(MY_WME),
+ &m_null_bits, num_null_bytes,
+ &m_field_metadata, m_field_metadata_size,
+ NULL);
+ memcpy(m_field_metadata, ptr_after_colcnt, m_field_metadata_size);
+ ptr_after_colcnt= (uchar*)ptr_after_colcnt + m_field_metadata_size;
+ memcpy(m_null_bits, ptr_after_colcnt, num_null_bytes);
+ }
+ else
+ {
+ m_coltype= NULL;
+ my_free(m_memory);
+ m_memory= NULL;
+ DBUG_VOID_RETURN;
+ }
}
}
@@ -12739,9 +12769,12 @@ void Update_rows_log_event::init(MY_BITMAP const *cols)
Update_rows_log_event::~Update_rows_log_event()
{
- if (m_cols_ai.bitmap == m_bitbuf_ai) // no my_malloc happened
- m_cols_ai.bitmap= 0; // so no my_free in my_bitmap_free
- my_bitmap_free(&m_cols_ai); // To pair with my_bitmap_init().
+ if (m_cols_ai.bitmap)
+ {
+ if (m_cols_ai.bitmap == m_bitbuf_ai) // no my_malloc happened
+ m_cols_ai.bitmap= 0; // so no my_free in my_bitmap_free
+ my_bitmap_free(&m_cols_ai); // To pair with my_bitmap_init().
+ }
}
1
0
[Commits] 16b25039c5e: MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one.
by sachin 01 Oct '19
by sachin 01 Oct '19
01 Oct '19
revision-id: 16b25039c5ebb53893904dd52ae3e6b3b6daec89 (mariadb-10.2.27-49-g16b25039c5e)
parent(s): 102bc7beb080e936ba70c0524a8759799f42458e
author: Sachin Setiya
committer: Sachin Setiya
timestamp: 2019-10-01 14:59:29 +0530
message:
MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one.
Analysis
Mysqlbinlog output for encrypted binary log
#Q> insert into tab1 values (3,'row 003')
#190912 17:36:35 server id 10221 end_log_pos 980 CRC32 0x53bcb3d3 Table_map: `test`.`tab1` mapped to number 19
# at 940
#190912 17:36:35 server id 10221 end_log_pos 1026 CRC32 0xf2ae5136 Write_rows: table id 19 flags: STMT_END_F
Here we can see Table_map_log_event ends at 980 but Next event starts at 940.
And the reason for that is we do not send START_ENCRYPTION_EVENT to the slave
Solution:-
Send Start_encryption_log_event as Ignorable_log_event to slave(mysqlbinlog),
So that mysqlbinlog can update its log_pos.
Since Slave can request multiple FORMAT_DESCRIPTION_EVENT while master does not
have so We only update slave master pos when master actually have the
FORMAT_DESCRIPTION_EVENT. Similar logic should be applied for START_ENCRYPTION_EVENT.
Also added the test case when new server reads the data from old server which
does not send START_ENCRYPTION_EVENT to slave.
---
mysql-test/std_data/binlog_before_20574.bin | Bin 0 -> 1022 bytes
.../binlog_encryption/binlog_row_annotate.result | 9 +++
.../binlog_encryption/mdev_20574_old_binlog.result | 27 +++++++++
.../binlog_encryption/mdev_20574_old_binlog.test | 40 +++++++++++++
.../suite/binlog_encryption/mysqlbinlog.result | 1 +
.../suite/binlog_encryption/mysqlbinlog.test | 3 +
sql/log_event.cc | 34 +++++------
sql/slave.cc | 11 ++++
sql/sql_repl.cc | 63 ++++++++++++++-------
9 files changed, 153 insertions(+), 35 deletions(-)
diff --git a/mysql-test/std_data/binlog_before_20574.bin b/mysql-test/std_data/binlog_before_20574.bin
new file mode 100644
index 00000000000..596a883dc71
Binary files /dev/null and b/mysql-test/std_data/binlog_before_20574.bin differ
diff --git a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
index 88c690a8bb7..9b843dc8a6b 100644
--- a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
+++ b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
@@ -104,6 +104,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
@@ -336,6 +339,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
@@ -495,6 +501,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
diff --git a/mysql-test/suite/binlog_encryption/mdev_20574_old_binlog.result b/mysql-test/suite/binlog_encryption/mdev_20574_old_binlog.result
new file mode 100644
index 00000000000..52029b6342e
--- /dev/null
+++ b/mysql-test/suite/binlog_encryption/mdev_20574_old_binlog.result
@@ -0,0 +1,27 @@
+include/master-slave.inc
+[connection master]
+connection slave;
+include/stop_slave.inc
+connection master;
+include/rpl_stop_server.inc [server_number=1]
+# Data in binlog
+# CREATE TABLE t1 (a INT);
+# INSERT INTO t1 VALUES (1),(2),(3);
+# REPLACE INTO t1 VALUES (4);
+include/rpl_start_server.inc [server_number=1]
+connection slave;
+reset slave;
+reset master;
+CHANGE MASTER TO master_host='127.0.0.1', master_port=16000, master_user='root', master_log_file='master-bin.000001', master_log_pos=4;
+include/start_slave.inc
+desc t1;
+Field Type Null Key Default Extra
+a int(11) YES NULL
+select * from t1 order by a;
+a
+1
+2
+3
+4
+drop table t1;
+include/rpl_end.inc
diff --git a/mysql-test/suite/binlog_encryption/mdev_20574_old_binlog.test b/mysql-test/suite/binlog_encryption/mdev_20574_old_binlog.test
new file mode 100644
index 00000000000..183697ab9c3
--- /dev/null
+++ b/mysql-test/suite/binlog_encryption/mdev_20574_old_binlog.test
@@ -0,0 +1,40 @@
+# Test replicating off old master.
+# We simulate old master by copying in pre-generated binlog files from earlier
+# server versions.
+source include/have_binlog_format_row.inc;
+source include/master-slave.inc;
+--source include/have_innodb.inc
+
+--connection slave
+--source include/stop_slave.inc
+
+--connection master
+--let $datadir= `SELECT @@datadir`
+
+--let $rpl_server_number= 1
+--source include/rpl_stop_server.inc
+
+--remove_file $datadir/master-bin.000001
+--remove_file $datadir/master-bin.state
+--echo # Data in binlog
+--echo # CREATE TABLE t1 (a INT);
+--echo # INSERT INTO t1 VALUES (1),(2),(3);
+--echo # REPLACE INTO t1 VALUES (4);
+
+--copy_file $MYSQL_TEST_DIR/std_data/binlog_before_20574.bin $datadir/master-bin.000001
+
+--let $rpl_server_number= 1
+--source include/rpl_start_server.inc
+
+--source include/wait_until_connected_again.inc
+
+--connection slave
+#--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1
+reset slave;
+reset master;
+eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_log_file='master-bin.000001', master_log_pos=4;
+--source include/start_slave.inc
+desc t1;
+select * from t1 order by a;
+drop table t1;
+source include/rpl_end.inc;
diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.result b/mysql-test/suite/binlog_encryption/mysqlbinlog.result
index 71758f7d6e7..e97e0569571 100644
--- a/mysql-test/suite/binlog_encryption/mysqlbinlog.result
+++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.result
@@ -4,3 +4,4 @@ INSERT INTO t1 VALUES (1),(2),(3);
REPLACE INTO t1 VALUES (4);
DROP TABLE t1;
FLUSH LOGS;
+FOUND 1 /Ignorable event type 164.*/ in binlog_enc.sql
diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.test b/mysql-test/suite/binlog_encryption/mysqlbinlog.test
index b80388aaa45..108dbd8782f 100644
--- a/mysql-test/suite/binlog_encryption/mysqlbinlog.test
+++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.test
@@ -17,5 +17,8 @@ let outfile=$MYSQLTEST_VARDIR/tmp/binlog_enc.sql;
exec $MYSQL_BINLOG $local > $outfile;
exec $MYSQL_BINLOG $local --force-read >> $outfile;
exec $MYSQL_BINLOG $remote >> $outfile;
+--let SEARCH_FILE= $outfile
+--let SEARCH_PATTERN= Ignorable event type 164.*
+--source include/search_pattern_in_file.inc
remove_file $outfile;
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 0e0d69b515c..66f1d6bc790 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -2077,6 +2077,19 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
alg != BINLOG_CHECKSUM_ALG_OFF))
event_len= event_len - BINLOG_CHECKSUM_LEN;
+ /*
+ Create an object of Ignorable_log_event for unrecognized sub-class.
+ So that SLAVE SQL THREAD will only update the position and continue.
+ We should look for this flag first instead of judging by event_type
+ Any event can be Ignorable_log_event if it has this flag on.
+ look into @note of Ignorable_log_event
+ */
+ if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+ {
+ ev= new Ignorable_log_event(buf, fdle,
+ get_type_str((Log_event_type) event_type));
+ goto exit;
+ }
switch(event_type) {
case QUERY_EVENT:
ev = new Query_log_event(buf, event_len, fdle, QUERY_EVENT);
@@ -2203,24 +2216,13 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
ev = new Start_encryption_log_event(buf, event_len, fdle);
break;
default:
- /*
- Create an object of Ignorable_log_event for unrecognized sub-class.
- So that SLAVE SQL THREAD will only update the position and continue.
- */
- if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
- {
- ev= new Ignorable_log_event(buf, fdle,
- get_type_str((Log_event_type) event_type));
- }
- else
- {
- DBUG_PRINT("error",("Unknown event code: %d",
- (uchar) buf[EVENT_TYPE_OFFSET]));
- ev= NULL;
- break;
- }
+ DBUG_PRINT("error",("Unknown event code: %d",
+ (uchar) buf[EVENT_TYPE_OFFSET]));
+ ev= NULL;
+ break;
}
}
+exit:
if (ev)
{
diff --git a/sql/slave.cc b/sql/slave.cc
index 88a80029bba..1bc21f8895b 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -6317,7 +6317,18 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
mi->last_queued_gtid.seq_no == 1000)
goto skip_relay_logging;
});
+ goto default_action;
#endif
+ case START_ENCRYPTION_EVENT:
+ if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+ {
+ /*
+ If the event was not requested by the slave (the slave did not ask for
+ it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
+ */
+ inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
+ break;
+ }
/* fall through */
default:
default_action:
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index edff750a9a3..8a6bea34fb4 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -399,16 +399,27 @@ static int send_file(THD *thd)
/**
Internal to mysql_binlog_send() routine that recalculates checksum for
- a FD event (asserted) that needs additional arranment prior sending to slave.
+ 1. FD event (asserted) that needs additional arranment prior sending to slave.
+ 2. Start_encryption_log_event whose Ignored flag is set
+TODO DBUG_ASSERT can be removed if this function is used for more general cases
*/
-inline void fix_checksum(String *packet, ulong ev_offset)
+
+inline void fix_checksum(enum_binlog_checksum_alg checksum_alg, String *packet,
+ ulong ev_offset)
{
+ if (checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
+ checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
+ return;
/* recalculate the crc for this event */
uint data_len = uint4korr(packet->ptr() + ev_offset + EVENT_LEN_OFFSET);
ha_checksum crc;
- DBUG_ASSERT(data_len ==
+ DBUG_ASSERT((data_len ==
LOG_EVENT_MINIMAL_HEADER_LEN + FORMAT_DESCRIPTION_HEADER_LEN +
- BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN);
+ BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN) ||
+ (data_len ==
+ LOG_EVENT_MINIMAL_HEADER_LEN + BINLOG_CRYPTO_SCHEME_LENGTH +
+ BINLOG_KEY_VERSION_LENGTH + BINLOG_NONCE_LENGTH +
+ BINLOG_CHECKSUM_LEN));
crc= my_checksum(0, (uchar *)packet->ptr() + ev_offset, data_len -
BINLOG_CHECKSUM_LEN);
int4store(packet->ptr() + ev_offset + data_len - BINLOG_CHECKSUM_LEN, crc);
@@ -2135,6 +2146,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
THD *thd= info->thd;
String *packet= info->packet;
Log_event_type event_type;
+ bool initial_log_pos= info->clear_initial_log_pos;
DBUG_ENTER("send_format_descriptor_event");
/**
@@ -2233,7 +2245,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
(*packet)[FLAGS_OFFSET+ev_offset] &= ~LOG_EVENT_BINLOG_IN_USE_F;
- if (info->clear_initial_log_pos)
+ if (initial_log_pos)
{
info->clear_initial_log_pos= false;
/*
@@ -2251,9 +2263,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
ST_CREATED_OFFSET+ev_offset, (ulong) 0);
/* fix the checksum due to latest changes in header */
- if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
- info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
- fix_checksum(packet, ev_offset);
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
}
else if (info->using_gtid_state)
{
@@ -2274,9 +2284,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
{
int4store((char*) packet->ptr()+LOG_EVENT_MINIMAL_HEADER_LEN+
ST_CREATED_OFFSET+ev_offset, (ulong) 0);
- if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
- info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
- fix_checksum(packet, ev_offset);
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
}
}
@@ -2289,12 +2297,16 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
}
/*
- Read the following Start_encryption_log_event but don't send it to slave.
- Slave doesn't need to know whether master's binlog is encrypted,
- and if it'll want to encrypt its logs, it should generate its own
- random nonce, not use the one from the master.
+ Read the following Start_encryption_log_event and send it to slave as
+ Ignorable_log_event. Although Slave doesn't need to know whether master's
+ binlog is encrypted but it needs to update slave log pos (for mysqlbinlog).
+
+ If slave want to encrypt its logs, it should generate its own
+ random nonce, it should not use the one from the master.
*/
- packet->length(0);
+ /* reset transmit packet for the event read from binary log file */
+ if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg))
+ DBUG_RETURN(1);
info->last_pos= linfo->pos;
error= Log_event::read_log_event(log, packet, info->fdev,
opt_master_verify_checksum
@@ -2308,12 +2320,13 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
DBUG_RETURN(1);
}
- event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET]);
+ event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET + ev_offset]);
if (event_type == START_ENCRYPTION_EVENT)
{
Start_encryption_log_event *sele= (Start_encryption_log_event *)
- Log_event::read_log_event(packet->ptr(), packet->length(), &info->errmsg,
- info->fdev, BINLOG_CHECKSUM_ALG_OFF);
+ Log_event::read_log_event(packet->ptr() + ev_offset, packet->length()
+ - ev_offset, &info->errmsg, info->fdev,
+ BINLOG_CHECKSUM_ALG_OFF);
if (!sele)
{
info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG;
@@ -2327,6 +2340,18 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
delete sele;
DBUG_RETURN(1);
}
+ /* Make it Ignorable_log_event and send it */
+ (*packet)[FLAGS_OFFSET+ev_offset] |= LOG_EVENT_IGNORABLE_F;
+ if (initial_log_pos)
+ int4store((char*) packet->ptr()+LOG_POS_OFFSET+ev_offset, (ulong) 0);
+ /* fix the checksum due to latest changes in header */
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
+ if (my_net_write(info->net, (uchar*) packet->ptr(), packet->length()))
+ {
+ info->errmsg= "Failed on my_net_write()";
+ info->error= ER_UNKNOWN_ERROR;
+ DBUG_RETURN(1);
+ }
delete sele;
}
else if (start_pos == BIN_LOG_HEADER_SIZE)
1
0
[Commits] ad8b8223326: MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one.
by sachin 30 Sep '19
by sachin 30 Sep '19
30 Sep '19
revision-id: ad8b8223326a43ad4de49b96dcc4b3f66400f9f3 (mariadb-10.2.27-49-gad8b8223326)
parent(s): 102bc7beb080e936ba70c0524a8759799f42458e
author: Sachin Setiya
committer: Sachin Setiya
timestamp: 2019-10-01 01:00:29 +0530
message:
MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one.
Analysis
Mysqlbinlog output for encrypted binary log
#Q> insert into tab1 values (3,'row 003')
#190912 17:36:35 server id 10221 end_log_pos 980 CRC32 0x53bcb3d3 Table_map: `test`.`tab1` mapped to number 19
# at 940
#190912 17:36:35 server id 10221 end_log_pos 1026 CRC32 0xf2ae5136 Write_rows: table id 19 flags: STMT_END_F
Here we can see Table_map_log_event ends at 980 but Next event starts at 940.
And the reason for that is we do not send START_ENCRYPTION_EVENT to the slave
Solution:-
Send Start_encryption_log_event as Ignorable_log_event to slave(mysqlbinlog),
So that mysqlbinlog can update its log_pos.
Since Slave can request multiple FORMAT_DESCRIPTION_EVENT while master does not
have so We only update slave master pos when master actually have the
FORMAT_DESCRIPTION_EVENT. Similar logic should be applied for START_ENCRYPTION_EVENT.
---
.../binlog_encryption/binlog_row_annotate.result | 9 ++++
.../suite/binlog_encryption/mysqlbinlog.result | 1 +
.../suite/binlog_encryption/mysqlbinlog.test | 3 ++
sql/log_event.cc | 34 ++++++------
sql/slave.cc | 11 ++++
sql/sql_repl.cc | 63 +++++++++++++++-------
6 files changed, 86 insertions(+), 35 deletions(-)
diff --git a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
index 88c690a8bb7..9b843dc8a6b 100644
--- a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
+++ b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
@@ -104,6 +104,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
@@ -336,6 +339,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
@@ -495,6 +501,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.result b/mysql-test/suite/binlog_encryption/mysqlbinlog.result
index 71758f7d6e7..e97e0569571 100644
--- a/mysql-test/suite/binlog_encryption/mysqlbinlog.result
+++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.result
@@ -4,3 +4,4 @@ INSERT INTO t1 VALUES (1),(2),(3);
REPLACE INTO t1 VALUES (4);
DROP TABLE t1;
FLUSH LOGS;
+FOUND 1 /Ignorable event type 164.*/ in binlog_enc.sql
diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.test b/mysql-test/suite/binlog_encryption/mysqlbinlog.test
index b80388aaa45..108dbd8782f 100644
--- a/mysql-test/suite/binlog_encryption/mysqlbinlog.test
+++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.test
@@ -17,5 +17,8 @@ let outfile=$MYSQLTEST_VARDIR/tmp/binlog_enc.sql;
exec $MYSQL_BINLOG $local > $outfile;
exec $MYSQL_BINLOG $local --force-read >> $outfile;
exec $MYSQL_BINLOG $remote >> $outfile;
+--let SEARCH_FILE= $outfile
+--let SEARCH_PATTERN= Ignorable event type 164.*
+--source include/search_pattern_in_file.inc
remove_file $outfile;
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 0e0d69b515c..66f1d6bc790 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -2077,6 +2077,19 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
alg != BINLOG_CHECKSUM_ALG_OFF))
event_len= event_len - BINLOG_CHECKSUM_LEN;
+ /*
+ Create an object of Ignorable_log_event for unrecognized sub-class.
+ So that SLAVE SQL THREAD will only update the position and continue.
+ We should look for this flag first instead of judging by event_type
+ Any event can be Ignorable_log_event if it has this flag on.
+ look into @note of Ignorable_log_event
+ */
+ if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+ {
+ ev= new Ignorable_log_event(buf, fdle,
+ get_type_str((Log_event_type) event_type));
+ goto exit;
+ }
switch(event_type) {
case QUERY_EVENT:
ev = new Query_log_event(buf, event_len, fdle, QUERY_EVENT);
@@ -2203,24 +2216,13 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
ev = new Start_encryption_log_event(buf, event_len, fdle);
break;
default:
- /*
- Create an object of Ignorable_log_event for unrecognized sub-class.
- So that SLAVE SQL THREAD will only update the position and continue.
- */
- if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
- {
- ev= new Ignorable_log_event(buf, fdle,
- get_type_str((Log_event_type) event_type));
- }
- else
- {
- DBUG_PRINT("error",("Unknown event code: %d",
- (uchar) buf[EVENT_TYPE_OFFSET]));
- ev= NULL;
- break;
- }
+ DBUG_PRINT("error",("Unknown event code: %d",
+ (uchar) buf[EVENT_TYPE_OFFSET]));
+ ev= NULL;
+ break;
}
}
+exit:
if (ev)
{
diff --git a/sql/slave.cc b/sql/slave.cc
index 88a80029bba..1bc21f8895b 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -6317,7 +6317,18 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
mi->last_queued_gtid.seq_no == 1000)
goto skip_relay_logging;
});
+ goto default_action;
#endif
+ case START_ENCRYPTION_EVENT:
+ if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+ {
+ /*
+ If the event was not requested by the slave (the slave did not ask for
+ it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
+ */
+ inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
+ break;
+ }
/* fall through */
default:
default_action:
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index edff750a9a3..8a6bea34fb4 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -399,16 +399,27 @@ static int send_file(THD *thd)
/**
Internal to mysql_binlog_send() routine that recalculates checksum for
- a FD event (asserted) that needs additional arranment prior sending to slave.
+ 1. FD event (asserted) that needs additional arranment prior sending to slave.
+ 2. Start_encryption_log_event whose Ignored flag is set
+TODO DBUG_ASSERT can be removed if this function is used for more general cases
*/
-inline void fix_checksum(String *packet, ulong ev_offset)
+
+inline void fix_checksum(enum_binlog_checksum_alg checksum_alg, String *packet,
+ ulong ev_offset)
{
+ if (checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
+ checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
+ return;
/* recalculate the crc for this event */
uint data_len = uint4korr(packet->ptr() + ev_offset + EVENT_LEN_OFFSET);
ha_checksum crc;
- DBUG_ASSERT(data_len ==
+ DBUG_ASSERT((data_len ==
LOG_EVENT_MINIMAL_HEADER_LEN + FORMAT_DESCRIPTION_HEADER_LEN +
- BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN);
+ BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN) ||
+ (data_len ==
+ LOG_EVENT_MINIMAL_HEADER_LEN + BINLOG_CRYPTO_SCHEME_LENGTH +
+ BINLOG_KEY_VERSION_LENGTH + BINLOG_NONCE_LENGTH +
+ BINLOG_CHECKSUM_LEN));
crc= my_checksum(0, (uchar *)packet->ptr() + ev_offset, data_len -
BINLOG_CHECKSUM_LEN);
int4store(packet->ptr() + ev_offset + data_len - BINLOG_CHECKSUM_LEN, crc);
@@ -2135,6 +2146,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
THD *thd= info->thd;
String *packet= info->packet;
Log_event_type event_type;
+ bool initial_log_pos= info->clear_initial_log_pos;
DBUG_ENTER("send_format_descriptor_event");
/**
@@ -2233,7 +2245,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
(*packet)[FLAGS_OFFSET+ev_offset] &= ~LOG_EVENT_BINLOG_IN_USE_F;
- if (info->clear_initial_log_pos)
+ if (initial_log_pos)
{
info->clear_initial_log_pos= false;
/*
@@ -2251,9 +2263,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
ST_CREATED_OFFSET+ev_offset, (ulong) 0);
/* fix the checksum due to latest changes in header */
- if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
- info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
- fix_checksum(packet, ev_offset);
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
}
else if (info->using_gtid_state)
{
@@ -2274,9 +2284,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
{
int4store((char*) packet->ptr()+LOG_EVENT_MINIMAL_HEADER_LEN+
ST_CREATED_OFFSET+ev_offset, (ulong) 0);
- if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
- info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
- fix_checksum(packet, ev_offset);
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
}
}
@@ -2289,12 +2297,16 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
}
/*
- Read the following Start_encryption_log_event but don't send it to slave.
- Slave doesn't need to know whether master's binlog is encrypted,
- and if it'll want to encrypt its logs, it should generate its own
- random nonce, not use the one from the master.
+ Read the following Start_encryption_log_event and send it to slave as
+ Ignorable_log_event. Although Slave doesn't need to know whether master's
+ binlog is encrypted but it needs to update slave log pos (for mysqlbinlog).
+
+ If slave want to encrypt its logs, it should generate its own
+ random nonce, it should not use the one from the master.
*/
- packet->length(0);
+ /* reset transmit packet for the event read from binary log file */
+ if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg))
+ DBUG_RETURN(1);
info->last_pos= linfo->pos;
error= Log_event::read_log_event(log, packet, info->fdev,
opt_master_verify_checksum
@@ -2308,12 +2320,13 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
DBUG_RETURN(1);
}
- event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET]);
+ event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET + ev_offset]);
if (event_type == START_ENCRYPTION_EVENT)
{
Start_encryption_log_event *sele= (Start_encryption_log_event *)
- Log_event::read_log_event(packet->ptr(), packet->length(), &info->errmsg,
- info->fdev, BINLOG_CHECKSUM_ALG_OFF);
+ Log_event::read_log_event(packet->ptr() + ev_offset, packet->length()
+ - ev_offset, &info->errmsg, info->fdev,
+ BINLOG_CHECKSUM_ALG_OFF);
if (!sele)
{
info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG;
@@ -2327,6 +2340,18 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
delete sele;
DBUG_RETURN(1);
}
+ /* Make it Ignorable_log_event and send it */
+ (*packet)[FLAGS_OFFSET+ev_offset] |= LOG_EVENT_IGNORABLE_F;
+ if (initial_log_pos)
+ int4store((char*) packet->ptr()+LOG_POS_OFFSET+ev_offset, (ulong) 0);
+ /* fix the checksum due to latest changes in header */
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
+ if (my_net_write(info->net, (uchar*) packet->ptr(), packet->length()))
+ {
+ info->errmsg= "Failed on my_net_write()";
+ info->error= ER_UNKNOWN_ERROR;
+ DBUG_RETURN(1);
+ }
delete sele;
}
else if (start_pos == BIN_LOG_HEADER_SIZE)
1
0
[Commits] c648a7a4539: MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one.
by sachin 30 Sep '19
by sachin 30 Sep '19
30 Sep '19
revision-id: c648a7a453963e2c2b2910c7d4b8e211c6f30915 (mariadb-10.2.27-49-gc648a7a4539)
parent(s): 102bc7beb080e936ba70c0524a8759799f42458e
author: Sachin Setiya
committer: Sachin Setiya
timestamp: 2019-10-01 00:56:51 +0530
message:
MDEV-20574 Position of events reported by mysqlbinlog is wrong with encrypted binlogs, SHOW BINLOG EVENTS reports the correct one.
Analysis
Mysqlbinlog output for encrypted binary log
#Q> insert into tab1 values (3,'row 003')
#190912 17:36:35 server id 10221 end_log_pos 980 CRC32 0x53bcb3d3 Table_map: `test`.`tab1` mapped to number 19
# at 940
#190912 17:36:35 server id 10221 end_log_pos 1026 CRC32 0xf2ae5136 Write_rows: table id 19 flags: STMT_END_F
Here we can see Table_map_log_event ends at 980 but Next event starts at 940.
And the reason for that is we do not send START_ENCRYPTION_EVENT to the server
Solution:-
Send Start_encryption_log_event as Ignorable_log_event to slave(mysqlbinlog),
So that mysqlbinlog can update its log_pos.
Since Slave can request multiple FORMAT_DESCRIPTION_EVENT while master does not
have so We only update slave master pos when master actually have the
FORMAT_DESCRIPTION_EVENT. Similar logic should be applied for START_ENCRYPTION_EVENT.
---
.../binlog_encryption/binlog_row_annotate.result | 9 ++++
.../suite/binlog_encryption/mysqlbinlog.result | 1 +
.../suite/binlog_encryption/mysqlbinlog.test | 3 ++
sql/log_event.cc | 34 ++++++------
sql/slave.cc | 11 ++++
sql/sql_repl.cc | 63 +++++++++++++++-------
6 files changed, 86 insertions(+), 35 deletions(-)
diff --git a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
index 88c690a8bb7..9b843dc8a6b 100644
--- a/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
+++ b/mysql-test/suite/binlog_encryption/binlog_row_annotate.result
@@ -104,6 +104,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
@@ -336,6 +339,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
@@ -495,6 +501,9 @@ DELIMITER /*!*/;
#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
ROLLBACK/*!*/;
# at #
+#010909 4:46:40 server id # end_log_pos # Ignorable
+# Ignorable event type 164 (Start_encryption)
+# at #
#010909 4:46:40 server id # end_log_pos # Gtid list []
# at #
#010909 4:46:40 server id # end_log_pos # Binlog checkpoint master-bin.000001
diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.result b/mysql-test/suite/binlog_encryption/mysqlbinlog.result
index 71758f7d6e7..e97e0569571 100644
--- a/mysql-test/suite/binlog_encryption/mysqlbinlog.result
+++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.result
@@ -4,3 +4,4 @@ INSERT INTO t1 VALUES (1),(2),(3);
REPLACE INTO t1 VALUES (4);
DROP TABLE t1;
FLUSH LOGS;
+FOUND 1 /Ignorable event type 164.*/ in binlog_enc.sql
diff --git a/mysql-test/suite/binlog_encryption/mysqlbinlog.test b/mysql-test/suite/binlog_encryption/mysqlbinlog.test
index b80388aaa45..108dbd8782f 100644
--- a/mysql-test/suite/binlog_encryption/mysqlbinlog.test
+++ b/mysql-test/suite/binlog_encryption/mysqlbinlog.test
@@ -17,5 +17,8 @@ let outfile=$MYSQLTEST_VARDIR/tmp/binlog_enc.sql;
exec $MYSQL_BINLOG $local > $outfile;
exec $MYSQL_BINLOG $local --force-read >> $outfile;
exec $MYSQL_BINLOG $remote >> $outfile;
+--let SEARCH_FILE= $outfile
+--let SEARCH_PATTERN= Ignorable event type 164.*
+--source include/search_pattern_in_file.inc
remove_file $outfile;
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 0e0d69b515c..66f1d6bc790 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -2077,6 +2077,19 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
alg != BINLOG_CHECKSUM_ALG_OFF))
event_len= event_len - BINLOG_CHECKSUM_LEN;
+ /*
+ Create an object of Ignorable_log_event for unrecognized sub-class.
+ So that SLAVE SQL THREAD will only update the position and continue.
+ We should look for this flag first instead of judging by event_type
+ Any event can be Ignorable_log_event if it has this flag on.
+ look into @note of Ignorable_log_event
+ */
+ if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+ {
+ ev= new Ignorable_log_event(buf, fdle,
+ get_type_str((Log_event_type) event_type));
+ goto exit;
+ }
switch(event_type) {
case QUERY_EVENT:
ev = new Query_log_event(buf, event_len, fdle, QUERY_EVENT);
@@ -2203,24 +2216,13 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
ev = new Start_encryption_log_event(buf, event_len, fdle);
break;
default:
- /*
- Create an object of Ignorable_log_event for unrecognized sub-class.
- So that SLAVE SQL THREAD will only update the position and continue.
- */
- if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
- {
- ev= new Ignorable_log_event(buf, fdle,
- get_type_str((Log_event_type) event_type));
- }
- else
- {
- DBUG_PRINT("error",("Unknown event code: %d",
- (uchar) buf[EVENT_TYPE_OFFSET]));
- ev= NULL;
- break;
- }
+ DBUG_PRINT("error",("Unknown event code: %d",
+ (uchar) buf[EVENT_TYPE_OFFSET]));
+ ev= NULL;
+ break;
}
}
+exit:
if (ev)
{
diff --git a/sql/slave.cc b/sql/slave.cc
index 88a80029bba..1bc21f8895b 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -6317,7 +6317,18 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
mi->last_queued_gtid.seq_no == 1000)
goto skip_relay_logging;
});
+ goto default_action;
#endif
+ case START_ENCRYPTION_EVENT:
+ if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
+ {
+ /*
+ If the event was not requested by the slave (the slave did not ask for
+ it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
+ */
+ inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
+ break;
+ }
/* fall through */
default:
default_action:
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index edff750a9a3..8a6bea34fb4 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -399,16 +399,27 @@ static int send_file(THD *thd)
/**
Internal to mysql_binlog_send() routine that recalculates checksum for
- a FD event (asserted) that needs additional arranment prior sending to slave.
+ 1. FD event (asserted) that needs additional arranment prior sending to slave.
+ 2. Start_encryption_log_event whose Ignored flag is set
+TODO DBUG_ASSERT can be removed if this function is used for more general cases
*/
-inline void fix_checksum(String *packet, ulong ev_offset)
+
+inline void fix_checksum(enum_binlog_checksum_alg checksum_alg, String *packet,
+ ulong ev_offset)
{
+ if (checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
+ checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
+ return;
/* recalculate the crc for this event */
uint data_len = uint4korr(packet->ptr() + ev_offset + EVENT_LEN_OFFSET);
ha_checksum crc;
- DBUG_ASSERT(data_len ==
+ DBUG_ASSERT((data_len ==
LOG_EVENT_MINIMAL_HEADER_LEN + FORMAT_DESCRIPTION_HEADER_LEN +
- BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN);
+ BINLOG_CHECKSUM_ALG_DESC_LEN + BINLOG_CHECKSUM_LEN) ||
+ (data_len ==
+ LOG_EVENT_MINIMAL_HEADER_LEN + BINLOG_CRYPTO_SCHEME_LENGTH +
+ BINLOG_KEY_VERSION_LENGTH + BINLOG_NONCE_LENGTH +
+ BINLOG_CHECKSUM_LEN));
crc= my_checksum(0, (uchar *)packet->ptr() + ev_offset, data_len -
BINLOG_CHECKSUM_LEN);
int4store(packet->ptr() + ev_offset + data_len - BINLOG_CHECKSUM_LEN, crc);
@@ -2135,6 +2146,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
THD *thd= info->thd;
String *packet= info->packet;
Log_event_type event_type;
+ bool initial_log_pos= info->clear_initial_log_pos;
DBUG_ENTER("send_format_descriptor_event");
/**
@@ -2233,7 +2245,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
(*packet)[FLAGS_OFFSET+ev_offset] &= ~LOG_EVENT_BINLOG_IN_USE_F;
- if (info->clear_initial_log_pos)
+ if (initial_log_pos)
{
info->clear_initial_log_pos= false;
/*
@@ -2251,9 +2263,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
ST_CREATED_OFFSET+ev_offset, (ulong) 0);
/* fix the checksum due to latest changes in header */
- if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
- info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
- fix_checksum(packet, ev_offset);
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
}
else if (info->using_gtid_state)
{
@@ -2274,9 +2284,7 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
{
int4store((char*) packet->ptr()+LOG_EVENT_MINIMAL_HEADER_LEN+
ST_CREATED_OFFSET+ev_offset, (ulong) 0);
- if (info->current_checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
- info->current_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF)
- fix_checksum(packet, ev_offset);
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
}
}
@@ -2289,12 +2297,16 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
}
/*
- Read the following Start_encryption_log_event but don't send it to slave.
- Slave doesn't need to know whether master's binlog is encrypted,
- and if it'll want to encrypt its logs, it should generate its own
- random nonce, not use the one from the master.
+ Read the following Start_encryption_log_event and send it to slave as
+ Ignorable_log_event. Although Slave doesn't need to know whether master's
+ binlog is encrypted but it needs to update slave log pos (for mysqlbinlog).
+
+ If slave want to encrypt its logs, it should generate its own
+ random nonce, it should not use the one from the master.
*/
- packet->length(0);
+ /* reset transmit packet for the event read from binary log file */
+ if (reset_transmit_packet(info, info->flags, &ev_offset, &info->errmsg))
+ DBUG_RETURN(1);
info->last_pos= linfo->pos;
error= Log_event::read_log_event(log, packet, info->fdev,
opt_master_verify_checksum
@@ -2308,12 +2320,13 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
DBUG_RETURN(1);
}
- event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET]);
+ event_type= (Log_event_type)((uchar)(*packet)[LOG_EVENT_OFFSET + ev_offset]);
if (event_type == START_ENCRYPTION_EVENT)
{
Start_encryption_log_event *sele= (Start_encryption_log_event *)
- Log_event::read_log_event(packet->ptr(), packet->length(), &info->errmsg,
- info->fdev, BINLOG_CHECKSUM_ALG_OFF);
+ Log_event::read_log_event(packet->ptr() + ev_offset, packet->length()
+ - ev_offset, &info->errmsg, info->fdev,
+ BINLOG_CHECKSUM_ALG_OFF);
if (!sele)
{
info->error= ER_MASTER_FATAL_ERROR_READING_BINLOG;
@@ -2327,6 +2340,18 @@ static int send_format_descriptor_event(binlog_send_info *info, IO_CACHE *log,
delete sele;
DBUG_RETURN(1);
}
+ /* Make it Ignorable_log_event and send it */
+ (*packet)[FLAGS_OFFSET+ev_offset] |= LOG_EVENT_IGNORABLE_F;
+ if (initial_log_pos)
+ int4store((char*) packet->ptr()+LOG_POS_OFFSET+ev_offset, (ulong) 0);
+ /* fix the checksum due to latest changes in header */
+ fix_checksum(info->current_checksum_alg, packet, ev_offset);
+ if (my_net_write(info->net, (uchar*) packet->ptr(), packet->length()))
+ {
+ info->errmsg= "Failed on my_net_write()";
+ info->error= ER_UNKNOWN_ERROR;
+ DBUG_RETURN(1);
+ }
delete sele;
}
else if (start_pos == BIN_LOG_HEADER_SIZE)
1
0
[Commits] 83fedc99799: Support Create_time and Update_time in MyRocks table status
by psergey 27 Sep '19
by psergey 27 Sep '19
27 Sep '19
revision-id: 83fedc99799692902995f223ffd0784195d26c3c (fb-prod201903-144-g83fedc99799)
parent(s): d97c0c628e5dc60abd725f6a7120a8d87b09321e
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-09-27 15:40:10 +0300
message:
Support Create_time and Update_time in MyRocks table status
- Create_time is stored in the MyRocks' internal data dictionary.
- Update_time is in-memory only (like in InnoDB).
(variant #3, review input addressed, on-disk data format is backward-
and forward-compatible)
---
mysql-test/suite/rocksdb/include/bulk_load.inc | 4 +-
.../suite/rocksdb/include/bulk_load_unsorted.inc | 4 +-
mysql-test/suite/rocksdb/r/bulk_load.result | 12 +--
mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result | 12 +--
.../rocksdb/r/bulk_load_rev_cf_and_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_rev_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted_rev.result | 12 +--
mysql-test/suite/rocksdb/r/issue255.result | 16 ++--
mysql-test/suite/rocksdb/r/rocksdb.result | 6 +-
.../suite/rocksdb/r/show_table_status.result | 85 +++++++++++++++++++++-
.../suite/rocksdb/r/show_table_status_debug.result | 13 ++++
mysql-test/suite/rocksdb/r/truncate_table.result | 8 +-
mysql-test/suite/rocksdb/t/issue255.test | 17 +++--
mysql-test/suite/rocksdb/t/rocksdb.test | 4 +-
mysql-test/suite/rocksdb/t/show_table_status.test | 81 ++++++++++++++++++++-
.../suite/rocksdb/t/show_table_status_debug.test | 21 ++++++
mysql-test/suite/rocksdb/t/truncate_table.test | 8 +-
storage/rocksdb/ha_rocksdb.cc | 38 ++++++++++
storage/rocksdb/rdb_datadic.cc | 71 ++++++++++++++++++
storage/rocksdb/rdb_datadic.h | 18 +++++
21 files changed, 392 insertions(+), 74 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/bulk_load.inc b/mysql-test/suite/rocksdb/include/bulk_load.inc
index 1b79825e507..7e163602202 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load.inc
@@ -121,12 +121,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
index 5cdc76a32d4..812af0401aa 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
@@ -119,12 +119,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/r/bulk_load.result b/mysql-test/suite/rocksdb/r/bulk_load.result
index a36f99a7619..76db28e66bd 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
index b5d3e252c5d..ae363f7ec0c 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
index f46acd41080..dd8dd7e60a8 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
index 3389968ef37..96738ae62e2 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
index 924032549ac..87fc63af2da 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
index 3cc9fb8e459..8e0914f0159 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/issue255.result b/mysql-test/suite/rocksdb/r/issue255.result
index c1ce3be2276..b45b3b5afc7 100644
--- a/mysql-test/suite/rocksdb/r/issue255.result
+++ b/mysql-test/suite/rocksdb/r/issue255.result
@@ -2,7 +2,7 @@ CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ('538647864786478647864');
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -12,7 +12,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -21,7 +21,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -30,13 +30,13 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES (1000);
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -46,7 +46,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -55,7 +55,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -64,5 +64,5 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result
index 088eb050f6f..a631d58ac69 100644
--- a/mysql-test/suite/rocksdb/r/rocksdb.result
+++ b/mysql-test/suite/rocksdb/r/rocksdb.result
@@ -1417,7 +1417,7 @@ create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
show table status like 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 # # NULL latin1_swedish_ci NULL
drop table t1;
#
# Fix Issue #4: Crash when using pseudo-unique keys
@@ -2612,7 +2612,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
-1
@@ -2623,7 +2623,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
1
diff --git a/mysql-test/suite/rocksdb/r/show_table_status.result b/mysql-test/suite/rocksdb/r/show_table_status.result
index 29140f045e4..345882040ef 100644
--- a/mysql-test/suite/rocksdb/r/show_table_status.result
+++ b/mysql-test/suite/rocksdb/r/show_table_status.result
@@ -7,12 +7,12 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL
+t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL utf8_general_ci NULL
SHOW TABLE STATUS WHERE name LIKE 't2';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
DROP TABLE t1, t2, t3;
CREATE DATABASE `db_new..............................................end`;
USE `db_new..............................................end`;
@@ -22,3 +22,80 @@ SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE T
TABLE_SCHEMA db_new..............................................end
TABLE_NAME t1_new..............................................end
DROP DATABASE `db_new..............................................end`;
+#
+# MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+#
+use test;
+create table t1 (a int) engine=rocksdb;
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time check_time
+1 NULL NULL
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select sleep(3);
+sleep(3)
+0
+insert into t1 values (2);
+select
+create_time=@create_tm /* should not change */ ,
+timestampdiff(second, @update_tm, update_time) > 2,
+check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+timestampdiff(second, @update_tm, update_time) > 2 1
+check_time NULL
+#
+# Check how create_time survives ALTER TABLE.
+# First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+create_time<>@create_tm /* should change */,
+create_time IS NOT NULL,
+update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time<>@create_tm 1
+create_time IS NOT NULL 1
+update_time IS NULL 1
+insert into t1 values (5,5);
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+# Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+select
+create_time=@create_tm /* should not change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+update_time NULL
+#
+# Check what is left after server restart
+#
+# Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select
+create_time=(select create_time from t2) /* should change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=(select create_time from t2) 1
+update_time NULL
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/r/show_table_status_debug.result b/mysql-test/suite/rocksdb/r/show_table_status_debug.result
new file mode 100644
index 00000000000..25b36954ef6
--- /dev/null
+++ b/mysql-test/suite/rocksdb/r/show_table_status_debug.result
@@ -0,0 +1,13 @@
+set session debug='+d,myrocks_produce_ddl_entry_v1';
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (123);
+set session debug='-d,myrocks_produce_ddl_entry_v1';
+select * from t1;
+a
+123
+select create_time, update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time
+NULL NULL
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/r/truncate_table.result b/mysql-test/suite/rocksdb/r/truncate_table.result
index 1544256f194..79b266a2453 100644
--- a/mysql-test/suite/rocksdb/r/truncate_table.result
+++ b/mysql-test/suite/rocksdb/r/truncate_table.result
@@ -9,19 +9,19 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 4 # # NULL latin1_swedish_ci NULL
TRUNCATE TABLE t1;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('d');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 2 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 2 # # NULL latin1_swedish_ci NULL
SELECT a,c FROM t1;
a c
1 d
diff --git a/mysql-test/suite/rocksdb/t/issue255.test b/mysql-test/suite/rocksdb/t/issue255.test
index 370dece0c6c..686f45b4056 100644
--- a/mysql-test/suite/rocksdb/t/issue255.test
+++ b/mysql-test/suite/rocksdb/t/issue255.test
@@ -3,24 +3,25 @@
CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES ('538647864786478647864');
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SELECT * FROM t1;
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
@@ -28,24 +29,24 @@ DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES (1000);
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb.test b/mysql-test/suite/rocksdb/t/rocksdb.test
index 5eff0fbf38f..7dcae569c92 100644
--- a/mysql-test/suite/rocksdb/t/rocksdb.test
+++ b/mysql-test/suite/rocksdb/t/rocksdb.test
@@ -1198,7 +1198,7 @@ drop table t1;
create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
---replace_column 7 #
+--replace_column 7 # 12 # 13 #
show table status like 't1';
drop table t1;
@@ -1903,11 +1903,13 @@ DROP TABLE t1;
# value is 4 while MyRocks will show it as 3.
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/show_table_status.test b/mysql-test/suite/rocksdb/t/show_table_status.test
index 29cc2ccfb5e..59effcc788c 100644
--- a/mysql-test/suite/rocksdb/t/show_table_status.test
+++ b/mysql-test/suite/rocksdb/t/show_table_status.test
@@ -24,7 +24,7 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
# Some statistics don't get updated as quickly. The Data_length and
@@ -48,7 +48,7 @@ set global rocksdb_force_flush_memtable_now = true;
# We expect the number of rows to be 10000. Data_len and Avg_row_len
# may vary, depending on built-in compression library.
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't2';
DROP TABLE t1, t2, t3;
@@ -62,3 +62,80 @@ CREATE TABLE `t1_new..............................................end`(a int) en
INSERT INTO `t1_new..............................................end` VALUES (1);
--query_vertical SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE TABLE_NAME = 't1_new..............................................end'
DROP DATABASE `db_new..............................................end`;
+--echo #
+--echo # MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+--echo #
+use test;
+create table t1 (a int) engine=rocksdb;
+
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+select sleep(3);
+insert into t1 values (2);
+
+--vertical_results
+select
+ create_time=@create_tm /* should not change */ ,
+ timestampdiff(second, @update_tm, update_time) > 2,
+ check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check how create_time survives ALTER TABLE.
+--echo # First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+ create_time<>@create_tm /* should change */,
+ create_time IS NOT NULL,
+ update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+insert into t1 values (5,5);
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo # Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+
+select
+ create_time=@create_tm /* should not change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check what is left after server restart
+--echo #
+
+--echo # Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--source include/restart_mysqld.inc
+
+select
+ create_time=(select create_time from t2) /* should change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/t/show_table_status_debug.test b/mysql-test/suite/rocksdb/t/show_table_status_debug.test
new file mode 100644
index 00000000000..7dcc023cd68
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/show_table_status_debug.test
@@ -0,0 +1,21 @@
+--source include/have_rocksdb.inc
+--source include/have_debug.inc
+
+#
+# Check the upgrade from Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 to _2
+#
+
+set session debug='+d,myrocks_produce_ddl_entry_v1';
+
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (123);
+set session debug='-d,myrocks_produce_ddl_entry_v1';
+
+--source include/restart_mysqld.inc
+
+select * from t1;
+select create_time, update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/t/truncate_table.test b/mysql-test/suite/rocksdb/t/truncate_table.test
index a61488654a3..1001eeb6cde 100644
--- a/mysql-test/suite/rocksdb/t/truncate_table.test
+++ b/mysql-test/suite/rocksdb/t/truncate_table.test
@@ -29,22 +29,22 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
TRUNCATE TABLE t1;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('d');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--sorted_result
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index d6c89087bea..446e2d64c1c 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -2456,6 +2456,8 @@ class Rdb_transaction {
bool m_is_delayed_snapshot = false;
bool m_is_two_phase = false;
+ std::unordered_set<Rdb_tbl_def*> modified_tables;
+
private:
/*
Number of write operations this transaction had when we took the last
@@ -3139,6 +3141,25 @@ class Rdb_transaction {
virtual void start_tx() = 0;
virtual void start_stmt() = 0;
+ protected:
+ // Non-virtual functions with actions to be done on transaction start and
+ // commit.
+ void on_commit() {
+ time_t tm;
+ tm = time(nullptr);
+ for (auto &it : modified_tables) {
+ it->update_time = tm;
+ }
+ modified_tables.clear();
+ }
+ void on_rollback() {
+ modified_tables.clear();
+ }
+ public:
+ void log_table_write_op(Rdb_tbl_def *tbl) {
+ modified_tables.insert(tbl);
+ }
+
void set_initial_savepoint() {
/*
Set the initial savepoint. If the first statement in the transaction
@@ -3322,7 +3343,9 @@ class Rdb_transaction_impl : public Rdb_transaction {
goto error;
}
+ on_commit();
error:
+ on_rollback();
/* Save the transaction object to be reused */
release_tx();
@@ -3338,6 +3361,7 @@ class Rdb_transaction_impl : public Rdb_transaction {
public:
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -3659,7 +3683,9 @@ class Rdb_writebatch_impl : public Rdb_transaction {
res = true;
goto error;
}
+ on_commit();
error:
+ on_rollback();
reset();
m_write_count = 0;
@@ -3691,6 +3717,7 @@ class Rdb_writebatch_impl : public Rdb_transaction {
}
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -7549,6 +7576,7 @@ int ha_rocksdb::create_table(const std::string &table_name,
/* Create table/key descriptions and put them into the data dictionary */
m_tbl_def = new Rdb_tbl_def(table_name);
+ m_tbl_def->set_create_time(time(nullptr));
uint n_keys = table_arg->s->keys;
@@ -10177,6 +10205,8 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
row_info.tx->incr_insert_count();
}
+ row_info.tx->log_table_write_op(m_tbl_def);
+
if (do_bulk_commit(row_info.tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
}
@@ -10648,6 +10678,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
}
tx->incr_delete_count();
+ tx->log_table_write_op(m_tbl_def);
if (do_bulk_commit(tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
@@ -10802,6 +10833,12 @@ int ha_rocksdb::info(uint flag) {
k->rec_per_key[j] = x;
}
}
+
+ stats.create_time = m_tbl_def->get_create_time();
+ }
+
+ if (flag & HA_STATUS_TIME) {
+ stats.update_time = m_tbl_def->update_time;
}
if (flag & HA_STATUS_ERRKEY) {
@@ -12603,6 +12640,7 @@ bool ha_rocksdb::prepare_inplace_alter_table(
m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
new_tdef->m_hidden_pk_val =
m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
+ new_tdef->set_create_time(m_tbl_def->get_create_time());
if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
/* Delete the new key descriptors */
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index c0741a1ce9b..826a50ca726 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -3560,6 +3560,25 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
const rocksdb::Slice svalue(indexes.c_ptr(), indexes.length());
dict->put_key(batch, key, svalue);
+
+ // Now, save the creation timestamp
+ bool simulate_old_version = false;
+ DBUG_EXECUTE_IF("myrocks_produce_ddl_entry_v1",
+ simulate_old_version = true; );
+ if (!simulate_old_version) {
+ Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> ts_key;
+ ts_key.write_index(Rdb_key_def::TABLE_CREATE_TIMESTAMP);
+ ts_key.write(m_dbname_tablename.c_str(), m_dbname_tablename.size());
+
+ const int ts_and_vers_size = Rdb_key_def::VERSION_SIZE +
+ Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE;
+ Rdb_buf_writer<ts_and_vers_size> ts_val;
+ ts_val.write_uint16(Rdb_key_def::TABLE_CREATE_TIMESTAMP_VERSION);
+ ts_val.write_uint64(create_time);
+
+ dict->put_key(batch, ts_key.to_slice(), ts_val.to_slice());
+ }
+
return false;
}
@@ -3974,6 +3993,50 @@ bool Rdb_ddl_manager::validate_schemas(void) {
return !has_errors;
}
+
+void Rdb_ddl_manager::load_create_timestamp(Rdb_tbl_def *tdef) {
+ Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> lookup_key;
+
+ lookup_key.write_index(Rdb_key_def::TABLE_CREATE_TIMESTAMP);
+ const std::string &dbname_tablename = tdef->full_tablename();
+ lookup_key.write(dbname_tablename.c_str(), dbname_tablename.size());
+
+ tdef->set_create_time(0); // The default is SQL NULL.
+
+ std::string ts_value;
+ auto s = m_dict->get_value(lookup_key.to_slice(), &ts_value);
+ // Create timestamp may or may not be present. If it is not present, it's not
+ // an error.
+ if (s.ok()) {
+ if (ts_value.size() < Rdb_key_def::VERSION_SIZE) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: Invalid TABLE_CREATE_TIMESTAMP record for table %s",
+ dbname_tablename.c_str());
+ }
+
+ const uchar *ptr = reinterpret_cast<const uchar *>(ts_value.data());
+ const int version = rdb_netbuf_read_uint16(&ptr);
+
+ if (version != Rdb_key_def::TABLE_CREATE_TIMESTAMP_VERSION) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: TABLE_CREATE_TIMESTAMP Version was not expected."
+ "Expected: %d, Actual: %d",
+ Rdb_key_def::TABLE_CREATE_TIMESTAMP_VERSION, version);
+ return;
+ }
+ if (ts_value.size() != 2 + Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE) {
+ // NO_LINT_DEBUG
+ sql_print_warning(
+ "RocksDB: Unexpected size of TABLE_CREATE_TIMESTAMP record: %ld",
+ ts_value.size());
+ return;
+ }
+ tdef->set_create_time(rdb_netbuf_read_uint64(&ptr));
+ }
+}
+
bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
Rdb_cf_manager *const cf_manager,
const uint32_t validate_tables) {
@@ -4109,6 +4172,7 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
tdef->m_key_count > 0 ? tdef->m_key_descr_arr[0]->m_stats.m_rows : 0, 0,
0);
+ load_create_timestamp(tdef);
put(tdef);
i++;
}
@@ -4436,6 +4500,12 @@ void Rdb_ddl_manager::remove(Rdb_tbl_def *const tbl,
m_dict->delete_key(batch, key_writer.to_slice());
+ // Also delete the table creation timestamp record
+ Rdb_buf_writer<FN_LEN * 2 + Rdb_key_def::INDEX_NUMBER_SIZE> ts_key;
+ ts_key.write_index(Rdb_key_def::TABLE_CREATE_TIMESTAMP);
+ ts_key.write(dbname_tablename.c_str(), dbname_tablename.size());
+ m_dict->delete_key(batch, ts_key.to_slice());
+
const auto it = m_ddl_map.find(dbname_tablename);
if (it != m_ddl_map.end()) {
// Free Rdb_tbl_def
@@ -4471,6 +4541,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
rec->m_hidden_pk_val.load(std::memory_order_relaxed);
new_rec->m_tbl_stats = rec->m_tbl_stats;
+ new_rec->set_create_time(rec->get_create_time());
// so that it's not free'd when deleting the old rec
rec->m_key_descr_arr = nullptr;
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 416857cad38..d2ff6d95625 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -465,6 +465,7 @@ class Rdb_key_def {
CF_NUMBER_SIZE = 4,
CF_FLAG_SIZE = 4,
PACKED_SIZE = 4, // one int
+ TABLE_CREATE_TIMESTAMP_SIZE = 8,
};
// bit flags for combining bools when writing to disk
@@ -500,6 +501,7 @@ class Rdb_key_def {
DDL_CREATE_INDEX_ONGOING = 8,
AUTO_INC = 9,
DROPPED_CF = 10,
+ TABLE_CREATE_TIMESTAMP = 11,
END_DICT_INDEX_ID = 255
};
@@ -514,6 +516,7 @@ class Rdb_key_def {
DDL_CREATE_INDEX_ONGOING_VERSION = 1,
AUTO_INCREMENT_VERSION = 1,
DROPPED_CF_VERSION = 1,
+ TABLE_CREATE_TIMESTAMP_VERSION = 1,
// Version for index stats is stored in IndexStats struct
};
@@ -1116,6 +1119,12 @@ class Rdb_tbl_def {
~Rdb_tbl_def();
+ // time values are shown in SHOW TABLE STATUS
+ void set_create_time(time_t val) { create_time = val; }
+ time_t get_create_time() { return create_time; }
+
+ time_t update_time = 0; // in-memory only value, maintained right here
+
void check_and_set_read_free_rpl_table();
/* Number of indexes */
@@ -1161,6 +1170,9 @@ class Rdb_tbl_def {
const std::string &base_tablename() const { return m_tablename; }
const std::string &base_partition() const { return m_partition; }
GL_INDEX_ID get_autoincr_gl_index_id();
+
+ private:
+ time_t create_time = 0;
};
/*
@@ -1248,6 +1260,7 @@ class Rdb_ddl_manager {
void persist_stats(const bool sync = false);
void set_table_stats(const std::string &tbl_name);
+ void load_create_timestamp(Rdb_tbl_def *tdef);
/* Modify the mapping and write it to on-disk storage */
int put_and_write(Rdb_tbl_def *const key_descr,
@@ -1386,6 +1399,11 @@ class Rdb_binlog_manager {
key: Rdb_key_def::DROPPED_CF(0xa) + cf_id
value: version
+ 11.Table Name => crete_timestamp.
+ key: Rdb_key_def::TABLE_CREATE_TIMESTAMP(0xB) + dbname.tablename
+ value: version, create_timestamp.
+ create_timestamp is 8 bytes.
+
Data dictionary operations are atomic inside RocksDB. For example,
when creating a table with two indexes, it is necessary to call Put
three times. They have to be atomic. Rdb_dict_manager has a wrapper function
1
0
26 Sep '19
revision-id: 673e253724979fd9fe43a4a22bd7e1b2c3a5269e (mariadb-10.4.4-333-g673e2537249)
parent(s): 8887effe13ad87ba0460d4d3068fb5696f089bb0
author: Kristian Nielsen
committer: Kristian Nielsen
timestamp: 2019-09-26 17:43:26 +0200
message:
Fix missing memory barrier in wait_for_commit
The function wait_for_commit::wait_for_prior_commit() has a fast path
where it checks without locks if wakeup_subsequent_commits() has
already been called. This check was missing a memory barrier. The
waitee thread does two writes to variables `waitee' and
`wakeup_error', and if the waiting thread sees the first write it
_must_ also see the second or incorrect behaviour will occur. This
requires memory barriers between both the writes (release semantics)
and the reads (acquire semantics) of those two variables.
Other accesses to these variables are done under lock or where only
one thread will be accessing them, and can be done without barriers
(relaxed sematics).
---
sql/log.cc | 19 +++++++++++++------
sql/sql_class.cc | 25 ++++++++++++++-----------
sql/sql_class.h | 17 ++++++++++++-----
3 files changed, 39 insertions(+), 22 deletions(-)
diff --git a/sql/log.cc b/sql/log.cc
index 4f51a9a9c17..a88d5147898 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -7467,8 +7467,10 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
*/
wfc= orig_entry->thd->wait_for_commit_ptr;
orig_entry->queued_by_other= false;
- if (wfc && wfc->waitee)
+ if (wfc && wfc->waitee.load(std::memory_order_acquire))
{
+ wait_for_commit *loc_waitee;
+
mysql_mutex_lock(&wfc->LOCK_wait_commit);
/*
Do an extra check here, this time safely under lock.
@@ -7480,10 +7482,10 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
before setting the flag, so there is no risk that we can queue ahead of
it.
*/
- if (wfc->waitee && !wfc->waitee->commit_started)
+ if ((loc_waitee= wfc->waitee.load(std::memory_order_relaxed)) &&
+ !loc_waitee->commit_started)
{
PSI_stage_info old_stage;
- wait_for_commit *loc_waitee;
/*
By setting wfc->opaque_pointer to our own entry, we mark that we are
@@ -7505,7 +7507,8 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
&wfc->LOCK_wait_commit,
&stage_waiting_for_prior_transaction_to_commit,
&old_stage);
- while ((loc_waitee= wfc->waitee) && !orig_entry->thd->check_killed(1))
+ while ((loc_waitee= wfc->waitee.load(std::memory_order_relaxed)) &&
+ !orig_entry->thd->check_killed(1))
mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
wfc->opaque_pointer= NULL;
DBUG_PRINT("info", ("After waiting for prior commit, queued_by_other=%d",
@@ -7523,14 +7526,18 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
do
{
mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
- } while (wfc->waitee);
+ } while (wfc->waitee.load(std::memory_order_relaxed));
}
else
{
/* We were killed, so remove us from the list of waitee. */
wfc->remove_from_list(&loc_waitee->subsequent_commits_list);
mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
- wfc->waitee= NULL;
+ /*
+ This is the thread clearing its own status, it is no longer on
+ the list of waiters. So no memory barriers are needed here.
+ */
+ wfc->waitee.store(NULL, std::memory_order_relaxed);
orig_entry->thd->EXIT_COND(&old_stage);
/* Interrupted by kill. */
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 4eab241232b..ca179a39dc1 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -7230,7 +7230,7 @@ wait_for_commit::reinit()
{
subsequent_commits_list= NULL;
next_subsequent_commit= NULL;
- waitee= NULL;
+ waitee.store(NULL, std::memory_order_relaxed);
opaque_pointer= NULL;
wakeup_error= 0;
wakeup_subsequent_commits_running= false;
@@ -7308,8 +7308,9 @@ wait_for_commit::wakeup(int wakeup_error)
*/
mysql_mutex_lock(&LOCK_wait_commit);
- waitee= NULL;
this->wakeup_error= wakeup_error;
+ /* Memory barrier to make wakeup_error visible to the waiter thread. */
+ waitee.store(NULL, std::memory_order_release);
/*
Note that it is critical that the mysql_cond_signal() here is done while
still holding the mutex. As soon as we release the mutex, the waiter might
@@ -7340,9 +7341,10 @@ wait_for_commit::wakeup(int wakeup_error)
void
wait_for_commit::register_wait_for_prior_commit(wait_for_commit *waitee)
{
- DBUG_ASSERT(!this->waitee /* No prior registration allowed */);
+ DBUG_ASSERT(!this->waitee.load(std::memory_order_relaxed)
+ /* No prior registration allowed */);
wakeup_error= 0;
- this->waitee= waitee;
+ this->waitee.store(waitee, std::memory_order_relaxed);
mysql_mutex_lock(&waitee->LOCK_wait_commit);
/*
@@ -7351,7 +7353,7 @@ wait_for_commit::register_wait_for_prior_commit(wait_for_commit *waitee)
see comments on wakeup_subsequent_commits2() for details.
*/
if (waitee->wakeup_subsequent_commits_running)
- this->waitee= NULL;
+ this->waitee.store(NULL, std::memory_order_relaxed);
else
{
/*
@@ -7381,7 +7383,8 @@ wait_for_commit::wait_for_prior_commit2(THD *thd)
thd->ENTER_COND(&COND_wait_commit, &LOCK_wait_commit,
&stage_waiting_for_prior_transaction_to_commit,
&old_stage);
- while ((loc_waitee= this->waitee) && likely(!thd->check_killed(1)))
+ while ((loc_waitee= this->waitee.load(std::memory_order_relaxed)) &&
+ likely(!thd->check_killed(1)))
mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit);
if (!loc_waitee)
{
@@ -7404,14 +7407,14 @@ wait_for_commit::wait_for_prior_commit2(THD *thd)
do
{
mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit);
- } while (this->waitee);
+ } while (this->waitee.load(std::memory_order_relaxed));
if (wakeup_error)
my_error(ER_PRIOR_COMMIT_FAILED, MYF(0));
goto end;
}
remove_from_list(&loc_waitee->subsequent_commits_list);
mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
- this->waitee= NULL;
+ this->waitee.store(NULL, std::memory_order_relaxed);
wakeup_error= thd->killed_errno();
if (!wakeup_error)
@@ -7513,7 +7516,7 @@ wait_for_commit::unregister_wait_for_prior_commit2()
wait_for_commit *loc_waitee;
mysql_mutex_lock(&LOCK_wait_commit);
- if ((loc_waitee= this->waitee))
+ if ((loc_waitee= this->waitee.load(std::memory_order_relaxed)))
{
mysql_mutex_lock(&loc_waitee->LOCK_wait_commit);
if (loc_waitee->wakeup_subsequent_commits_running)
@@ -7526,7 +7529,7 @@ wait_for_commit::unregister_wait_for_prior_commit2()
See comments on wakeup_subsequent_commits2() for more details.
*/
mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
- while (this->waitee)
+ while (this->waitee.load(std::memory_order_relaxed))
mysql_cond_wait(&COND_wait_commit, &LOCK_wait_commit);
}
else
@@ -7534,7 +7537,7 @@ wait_for_commit::unregister_wait_for_prior_commit2()
/* Remove ourselves from the list in the waitee. */
remove_from_list(&loc_waitee->subsequent_commits_list);
mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
- this->waitee= NULL;
+ this->waitee.store(NULL, std::memory_order_relaxed);
}
}
wakeup_error= 0;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 72cb8148895..1c81739865b 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -20,6 +20,7 @@
/* Classes in mysql */
+#include <atomic>
#include "dur_prop.h"
#include <waiting_threads.h>
#include "sql_const.h"
@@ -2018,7 +2019,7 @@ struct wait_for_commit
/*
The LOCK_wait_commit protects the fields subsequent_commits_list and
wakeup_subsequent_commits_running (for a waitee), and the pointer
- waiterr and associated COND_wait_commit (for a waiter).
+ waitee and associated COND_wait_commit (for a waiter).
*/
mysql_mutex_t LOCK_wait_commit;
mysql_cond_t COND_wait_commit;
@@ -2032,8 +2033,14 @@ struct wait_for_commit
When this is cleared for wakeup, the COND_wait_commit condition is
signalled.
+
+ This pointer is protected by LOCK_wait_commit. But there is also a "fast
+ path" where the waiter compares this to NULL without holding the lock.
+ Such read must be done with acquire semantics (and all corresponding
+ writes done with release semantics). This ensures that a wakeup with error
+ is reliably detected as (waitee==NULL && wakeup_error != 0).
*/
- wait_for_commit *waitee;
+ std::atomic<wait_for_commit *> waitee;
/*
Generic pointer for use by the transaction coordinator to optimise the
waiting for improved group commit.
@@ -2068,7 +2075,7 @@ struct wait_for_commit
Quick inline check, to avoid function call and locking in the common case
where no wakeup is registered, or a registered wait was already signalled.
*/
- if (waitee)
+ if (waitee.load(std::memory_order_acquire))
return wait_for_prior_commit2(thd);
else
{
@@ -2096,7 +2103,7 @@ struct wait_for_commit
}
void unregister_wait_for_prior_commit()
{
- if (waitee)
+ if (waitee.load(std::memory_order_relaxed))
unregister_wait_for_prior_commit2();
else
wakeup_error= 0;
@@ -2118,7 +2125,7 @@ struct wait_for_commit
}
next_ptr_ptr= &cur->next_subsequent_commit;
}
- waitee= NULL;
+ waitee.store(NULL, std::memory_order_relaxed);
}
void wakeup(int wakeup_error);
1
0