
12 Oct '19
revision-id: a4d7b682f90eaffb6c1de0e5ca8e75bb0167be17 (fb-prod201903-171-ga4d7b682f90)
parent(s): 71f5fa77faeae64d653012db88f0a9e224227d5f
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-12 22:41:44 +0300
message:
Issue #790, MyRocks/MRR: address review input part 3
- in ha_rocksdb::index_end(), call mrr_free() only if we are in an MRR scan.
- added a comment.
---
storage/rocksdb/ha_rocksdb.cc | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 2735d9d6adb..4da9293a75f 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -8013,6 +8013,9 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
#ifndef DBUG_OFF
m_keyread_only = save_keyread_only;
#endif
+ // Due to MRR, now an index-only scan have pushed index condition.
+ // (If it does, we follow non-index only code path here, except that
+ // we don't fetch the row).
bool have_icp = (pushed_idx_cond && pushed_idx_cond_keyno == active_index);
if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !have_icp) {
@@ -10569,7 +10572,8 @@ int ha_rocksdb::index_end() {
active_index = MAX_KEY;
in_range_check_pushed_down = FALSE;
- mrr_free();
+ if (mrr_rowid_reader)
+ mrr_free();
DBUG_RETURN(HA_EXIT_SUCCESS);
}
1
0

11 Oct '19
revision-id: 71f5fa77faeae64d653012db88f0a9e224227d5f (fb-prod201903-170-g71f5fa77fae)
parent(s): 40182db11b506f97183a8c41af747fb34eb239ec
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-12 00:09:11 +0300
message:
Issue #790, MyRocks/MRR: address more review input
Fix the code ha_rocksdb::read_row_from_secondary_key(). The idea is
that index-only scans can now Pushed Index Conditions (ICP):
one can run MRR scan with ICP, and MyRocks/MRR will use an index-only
scan under the hood to collect the rowids.
Previous variant of the code broke the "covered lookups" feature.
---
storage/rocksdb/ha_rocksdb.cc | 29 ++++++++++++++---------------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 8019be03401..2735d9d6adb 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -7998,6 +7998,8 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
int rc = 0;
uint pk_size;
+ /* Get the key columns and primary key value */
+ const rocksdb::Slice &rkey = m_scan_it->key();
const rocksdb::Slice &value = m_scan_it->value();
#ifndef DBUG_OFF
@@ -8011,21 +8013,17 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
#ifndef DBUG_OFF
m_keyread_only = save_keyread_only;
#endif
+ bool have_icp = (pushed_idx_cond && pushed_idx_cond_keyno == active_index);
- if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) {
- // Due to MRR, we can have ICP enabled with covered_lookup == true
- if (!(rc = find_icp_matching_index_rec(move_forward, buf))) {
- const rocksdb::Slice &rkey = m_scan_it->key();
- const rocksdb::Slice &rval = m_scan_it->value();
- pk_size =
- kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple);
- if (pk_size == RDB_INVALID_KEY_LEN) {
- rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
- } else {
- rc = kd.unpack_record(table, buf, &rkey, &rval,
- m_converter->get_verify_row_debug_checksums());
- global_stats.covered_secondary_key_lookups.inc();
- }
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !have_icp) {
+ pk_size =
+ kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple);
+ if (pk_size == RDB_INVALID_KEY_LEN) {
+ rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
+ } else {
+ rc = kd.unpack_record(table, buf, &rkey, &value,
+ m_converter->get_verify_row_debug_checksums());
+ global_stats.covered_secondary_key_lookups.inc();
}
} else {
if (kd.m_is_reverse_cf) move_forward = !move_forward;
@@ -8038,7 +8036,8 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
if (pk_size == RDB_INVALID_KEY_LEN) {
rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
} else {
- rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size);
+ if (!covered_lookup || m_lock_rows != RDB_LOCK_NONE)
+ rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size);
}
}
}
1
0

[Commits] 40182db11b5: Issue #790, MyRocks/MRR: address review input and cleanup
by psergey 11 Oct '19
by psergey 11 Oct '19
11 Oct '19
revision-id: 40182db11b506f97183a8c41af747fb34eb239ec (fb-prod201903-169-g40182db11b5)
parent(s): 8e230333ad43098b8460240386b82e5d67a974c0
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-11 22:25:13 +0300
message:
Issue #790, MyRocks/MRR: address review input and cleanup
- Join two multi_get() functions together (the other implementation
that is used by bypass code now uses RocksDB's MultiGet with the same
signature, so there's no difference)
- Add more DBUG_ASSERTs as requested
- Call RANGE_SEQ_IF::skip_index_tuple when appropriate (+testcase)
- Call RANGE_SEQ_IF::skip_record when appropriate (+testcase)
---
mysql-test/suite/rocksdb/r/rocksdb_mrr.result | 59 ++++++++++++++++++++
mysql-test/suite/rocksdb/t/rocksdb_mrr.test | 35 ++++++++++++
storage/rocksdb/ha_rocksdb.cc | 78 ++++++++++++++-------------
3 files changed, 136 insertions(+), 36 deletions(-)
diff --git a/mysql-test/suite/rocksdb/r/rocksdb_mrr.result b/mysql-test/suite/rocksdb/r/rocksdb_mrr.result
index 7ece91c1fa4..6aa2ff4dc23 100644
--- a/mysql-test/suite/rocksdb/r/rocksdb_mrr.result
+++ b/mysql-test/suite/rocksdb/r/rocksdb_mrr.result
@@ -284,6 +284,65 @@ pk1 pk2 col1 filler mod(t3.col1,2)
20 20 20 20 0
26 26 26 26 0
28 28 28 28 0
+#
+# Test for BKA's variant of Index Condition Pushdown. With BKA,
+# pushed index conditions that refer to preceding tables are
+# handled in a special way because there's no clear concept of
+# "current row" for the preceding table(s)
+#
+explain
+select * from t0,t3 where t3.col1=t0.a and mod(t3.pk2,2)=t0.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL NULL NULL NULL NULL 10 Using where
+1 SIMPLE t3 ref col1 col1 5 test.t0.a 4 Using index condition; Using join buffer (Batched Key Access)
+select * from t0,t3 where t3.col1=t0.a and mod(t3.pk2,2)=t0.a;
+a pk1 pk2 col1 filler
+0 0 0 0 0
+1 1 1 1 1
+set optimizer_switch='mrr=off';
+select * from t0,t3 where t3.col1=t0.a and mod(t3.pk2,2)=t0.a;
+a pk1 pk2 col1 filler
+0 0 0 0 0
+1 1 1 1 1
+set optimizer_switch='mrr=on';
+#
+# A query which has RANGE_SEQ_IF::skip_record != nullptr.
+#
+# MultiGet/MRR does not invoke skip_record() as it would not produce
+# much speedup.
+#
+insert into t3 select 10000+a, 10000+a, a, 'duplicate-match' from t1;
+delete from t3 where col1 in (3,5);
+explain
+select * from t0 left join t3 on t3.col1=t0.a where t3.pk1 is null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL NULL NULL NULL NULL 10 NULL
+1 SIMPLE t3 ref col1 col1 5 test.t0.a 4 Using where; Not exists; Using join buffer (Batched Key Access)
+select * from t0 left join t3 on t3.col1=t0.a where t3.pk1 is null;
+a pk1 pk2 col1 filler
+3 NULL NULL NULL NULL
+5 NULL NULL NULL NULL
+set optimizer_switch='mrr=off';
+select * from t0 left join t3 on t3.col1=t0.a where t3.pk1 is null;
+a pk1 pk2 col1 filler
+3 NULL NULL NULL NULL
+5 NULL NULL NULL NULL
+set optimizer_switch='mrr=on';
+explain
+select * from t0 where t0.a in (select t3.col1 from t3 where char_length(t3.filler)<30);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL NULL NULL NULL NULL 10 Using where
+1 SIMPLE t3 ref col1 col1 5 test.t0.a 4 Using where; FirstMatch(t0); Using join buffer (Batched Key Access)
+select * from t0 where t0.a in (select t3.col1 from t3 where char_length(t3.filler)<30);
+a
+0
+1
+2
+4
+6
+7
+8
+9
drop table t0,t1,t2,t3,t4;
#
# Multi-keypart testcase
diff --git a/mysql-test/suite/rocksdb/t/rocksdb_mrr.test b/mysql-test/suite/rocksdb/t/rocksdb_mrr.test
index dc8c260f754..74675b1cb5b 100644
--- a/mysql-test/suite/rocksdb/t/rocksdb_mrr.test
+++ b/mysql-test/suite/rocksdb/t/rocksdb_mrr.test
@@ -195,6 +195,41 @@ where (t3.col1=20 or t3.col1 between 25 and 28) and mod(t3.col1,2)=0;
select pk1,pk2,col1, filler,mod(t3.col1,2) from t3
where (t3.col1=20 or t3.col1 between 25 and 28) and mod(t3.col1,2)=0;
+--echo #
+--echo # Test for BKA's variant of Index Condition Pushdown. With BKA,
+--echo # pushed index conditions that refer to preceding tables are
+--echo # handled in a special way because there's no clear concept of
+--echo # "current row" for the preceding table(s)
+--echo #
+
+explain
+select * from t0,t3 where t3.col1=t0.a and mod(t3.pk2,2)=t0.a;
+select * from t0,t3 where t3.col1=t0.a and mod(t3.pk2,2)=t0.a;
+
+set optimizer_switch='mrr=off';
+select * from t0,t3 where t3.col1=t0.a and mod(t3.pk2,2)=t0.a;
+set optimizer_switch='mrr=on';
+
+--echo #
+--echo # A query which has RANGE_SEQ_IF::skip_record != nullptr.
+--echo #
+--echo # MultiGet/MRR does not invoke skip_record() as it would not produce
+--echo # much speedup.
+--echo #
+insert into t3 select 10000+a, 10000+a, a, 'duplicate-match' from t1;
+delete from t3 where col1 in (3,5);
+
+explain
+select * from t0 left join t3 on t3.col1=t0.a where t3.pk1 is null;
+select * from t0 left join t3 on t3.col1=t0.a where t3.pk1 is null;
+set optimizer_switch='mrr=off';
+select * from t0 left join t3 on t3.col1=t0.a where t3.pk1 is null;
+set optimizer_switch='mrr=on';
+
+explain
+select * from t0 where t0.a in (select t3.col1 from t3 where char_length(t3.filler)<30);
+select * from t0 where t0.a in (select t3.col1 from t3 where char_length(t3.filler)<30);
+
drop table t0,t1,t2,t3,t4;
--echo #
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 15601fd3f82..8019be03401 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -3095,11 +3095,6 @@ class Rdb_transaction {
const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
bool exclusive, const bool do_validate) = 0;
- virtual void multi_get(const rocksdb::ReadOptions &read_options,
- rocksdb::ColumnFamilyHandle *column_family,
- const size_t num_keys, const rocksdb::Slice *keys,
- rocksdb::PinnableSlice *values,
- rocksdb::Status *statuses, bool sorted_input) = 0;
virtual rocksdb::Iterator *get_iterator(
const rocksdb::ReadOptions &options,
rocksdb::ColumnFamilyHandle *column_family) = 0;
@@ -3476,15 +3471,6 @@ class Rdb_transaction_impl : public Rdb_transaction {
return m_rocksdb_tx->Get(m_read_opts, column_family, key, value);
}
- void multi_get(const rocksdb::ReadOptions &read_options,
- rocksdb::ColumnFamilyHandle *column_family,
- const size_t num_keys, const rocksdb::Slice *keys,
- rocksdb::PinnableSlice *values, rocksdb::Status *statuses,
- bool sorted_input) override {
- m_rocksdb_tx->MultiGet(read_options, column_family, num_keys, keys, values,
- statuses, sorted_input);
- }
-
void multi_get(rocksdb::ColumnFamilyHandle *const column_family,
const size_t num_keys, const rocksdb::Slice *keys,
rocksdb::PinnableSlice *values, rocksdb::Status *statuses,
@@ -3791,16 +3777,6 @@ class Rdb_writebatch_impl : public Rdb_transaction {
return get(column_family, key, value);
}
- void multi_get(const rocksdb::ReadOptions &read_options,
- rocksdb::ColumnFamilyHandle *column_family,
- const size_t num_keys, const rocksdb::Slice *keys,
- rocksdb::PinnableSlice *values, rocksdb::Status *statuses,
- bool sorted_input) override {
- // todo: could we just read the committed content from the DB here?
- //psergey-todo:!
- DBUG_ASSERT(0);
- }
-
void multi_get(rocksdb::ColumnFamilyHandle *const column_family,
const size_t num_keys, const rocksdb::Slice *keys,
rocksdb::PinnableSlice *values, rocksdb::Status *statuses,
@@ -15173,9 +15149,8 @@ ha_rows ha_rocksdb::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
}
if (all_eq_ranges) {
- // Indicate that we will use Mutlit-Get MRR
+ // Indicate that we will use MultiGet MRR
*flags &= ~HA_MRR_USE_DEFAULT_IMPL;
- *flags |= HA_MRR_CONVERT_REF_TO_RANGE;
*flags |= HA_MRR_SUPPORT_SORTED;
*bufsz = mrr_get_length_per_rec() * res * 1.1 + 1;
}
@@ -15237,7 +15212,7 @@ class Mrr_rowid_source {
//
-// Rowid source that produces rowids by enumerating a seqence of ranges
+// Rowid source that produces rowids by enumerating a sequence of ranges
//
class Mrr_pk_scan_rowid_source : public Mrr_rowid_source {
range_seq_t mrr_seq_it;
@@ -15263,6 +15238,8 @@ class Mrr_pk_scan_rowid_source : public Mrr_rowid_source {
(key_part_map(1) << self->m_pk_descr->get_key_parts()) - 1;
DBUG_ASSERT(range.start_key.keypart_map == all_parts_map);
DBUG_ASSERT(range.end_key.keypart_map == all_parts_map);
+ DBUG_ASSERT(range.start_key.flag == HA_READ_KEY_EXACT);
+ DBUG_ASSERT(range.end_key.flag == HA_READ_AFTER_KEY);
*range_ptr = range.ptr;
*size = self->m_pk_descr->pack_index_tuple(self->table,
@@ -15299,10 +15276,23 @@ class Mrr_sec_key_rowid_source : public Mrr_rowid_source {
if (err)
return err;
- err = self->handler::multi_range_read_next(range_ptr);
- if (!err) {
+ while (!(err = self->handler::multi_range_read_next(range_ptr))) {
+
+ if (self->mrr_funcs.skip_index_tuple &&
+ self->mrr_funcs.skip_index_tuple(self->mrr_iter, *range_ptr)) {
+ // BKA's variant of "Index Condition Pushdown" check failed
+ continue;
+ }
+
+ if (self->mrr_funcs.skip_record &&
+ self->mrr_funcs.skip_record(self->mrr_iter, *range_ptr,
+ (uchar*)self->m_last_rowkey.ptr())) {
+ continue;
+ }
+
memcpy(buf, self->m_last_rowkey.ptr(), self->m_last_rowkey.length());
*size = self->m_last_rowkey.length();
+ break;
}
return err;
}
@@ -15344,6 +15334,9 @@ int ha_rocksdb::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
mrr_sorted_mode = (mode & HA_MRR_SORTED) ? true : false;
if (active_index == table->s->primary_key) {
+ // ICP is not supported for PK, so we don't expect that BKA's variant
+ // of ICP would be used:
+ DBUG_ASSERT(!mrr_funcs.skip_index_tuple);
mrr_rowid_reader =
new Mrr_pk_scan_rowid_source(this, seq_init_param, n_ranges, mode);
} else {
@@ -15489,9 +15482,8 @@ int ha_rocksdb::mrr_fill_buffer() {
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
- tx->multi_get(tx->m_read_opts, m_pk_descr->get_cf(),
- mrr_n_elements, // actual number of elements we've got
- mrr_keys, mrr_values, mrr_statuses, mrr_sorted_mode);
+ tx->multi_get(m_pk_descr->get_cf(), mrr_n_elements, mrr_keys, mrr_values,
+ mrr_statuses, mrr_sorted_mode);
return 0;
}
@@ -15527,7 +15519,7 @@ int ha_rocksdb::multi_range_read_next(char **range_info) {
Rdb_transaction *&tx = get_tx_from_thd(table->in_use);
int rc;
- do {
+ while (1) {
while (1) {
if (mrr_read_index >= mrr_n_elements) {
if (mrr_rowid_reader->eof() || !mrr_n_elements) {
@@ -15543,13 +15535,26 @@ int ha_rocksdb::multi_range_read_next(char **range_info) {
return rc;
}
}
- // Skip the "is not found" errors
+ // If we found a status that has a row, leave the loop
if (mrr_statuses[mrr_read_index].ok()) break;
+
+ // Skip the NotFound errors, return any other error to the SQL layer
+ if (!mrr_statuses[mrr_read_index].IsNotFound())
+ return rdb_error_to_mysql(mrr_statuses[mrr_read_index]);
+
mrr_read_index++;
}
size_t cur_key = mrr_read_index++;
const rocksdb::Slice &rowkey = mrr_keys[cur_key];
+
+ if (mrr_funcs.skip_record &&
+ mrr_funcs.skip_record(mrr_iter, mrr_range_ptrs[cur_key],
+ (uchar*)rowkey.data())) {
+ rc = HA_ERR_END_OF_FILE;
+ continue;
+ }
+
m_last_rowkey.copy((const char *)rowkey.data(), rowkey.size(),
&my_charset_bin);
@@ -15570,8 +15575,9 @@ int ha_rocksdb::multi_range_read_next(char **range_info) {
rc = convert_record_from_storage_format(&rowkey, table->record[0]);
m_retrieved_record.Reset();
mrr_values[cur_key].Reset();
- table->status = rc ? STATUS_NOT_FOUND : 0;
- } while (0);
+ break;
+ }
+ table->status = rc ? STATUS_NOT_FOUND : 0;
return rc;
}
1
0

[Commits] 8ccda715d0e: MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
by Varun 10 Oct '19
by Varun 10 Oct '19
10 Oct '19
revision-id: 8ccda715d0e657673445e150a9ed6ef784c68a61 (mariadb-10.4.4-341-g8ccda715d0e)
parent(s): a340af922361e3958e5d6653c8b840771db282f2
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-10-10 12:58:57 +0530
message:
MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
For the case when the SJM scan table is the first table in the join order,
then if we want to do the sorting on the SJM scan table, then we need to
make sure that we unpack the values to base table fields in two cases:
1) Reading the SJM table and writing the sort-keys inside the sort-buffer
2) Reading the sorted data from the sort file
---
mysql-test/main/order_by.result | 138 +++++++++++++++++++++++++++++++++++++++-
mysql-test/main/order_by.test | 34 ++++++++++
sql/filesort.cc | 10 +++
sql/filesort.h | 14 +++-
sql/opt_subselect.cc | 10 ++-
sql/records.cc | 13 ++++
sql/records.h | 1 +
sql/sql_select.cc | 99 +++++++++++++---------------
sql/sql_select.h | 4 +-
sql/sql_sort.h | 2 +
sql/table.h | 1 +
11 files changed, 264 insertions(+), 62 deletions(-)
diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result
index b059cc686cd..e74583670fc 100644
--- a/mysql-test/main/order_by.result
+++ b/mysql-test/main/order_by.result
@@ -3322,7 +3322,7 @@ WHERE books.library_id = 8663 AND
books.scheduled_for_removal=0 )
ORDER BY wings.id;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using temporary; Using filesort
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using filesort
1 PRIMARY wings eq_ref PRIMARY PRIMARY 4 test.books.wings_id 1 100.00
2 MATERIALIZED books ref library_idx library_idx 4 const 2 100.00 Using where
Warnings:
@@ -3436,3 +3436,139 @@ Note 1003 select `test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` A
set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
optimizer_use_condition_selectivity=@tmp_o;
drop table t1,t2,t3,t4;
+#
+# MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
+#
+CREATE TABLE t1 (a INT, b int, primary key(a));
+CREATE TABLE t2 (a INT, b INT);
+INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
+(2354,7),(321421,3),(535,2),(4535,3);
+INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
+# Join order should have the SJM scan table as the first table for both
+# the queries with GROUP BY and ORDER BY clause.
+EXPLAIN SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort
+1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 Using index
+2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where
+EXPLAIN FORMAT=JSON SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "read_sorted_file": {
+ "filesort": {
+ "sort_key": "t1.a desc",
+ "table": {
+ "table_name": "<subquery2>",
+ "access_type": "ALL",
+ "possible_keys": ["distinct_key"],
+ "rows": 3,
+ "filtered": 100,
+ "materialized": {
+ "unique": 1,
+ "query_block": {
+ "select_id": 2,
+ "table": {
+ "table_name": "t2",
+ "access_type": "ALL",
+ "rows": 3,
+ "filtered": 100,
+ "attached_condition": "t2.b = 3 and t2.a is not null"
+ }
+ }
+ }
+ }
+ }
+ },
+ "table": {
+ "table_name": "t1",
+ "access_type": "eq_ref",
+ "possible_keys": ["PRIMARY"],
+ "key": "PRIMARY",
+ "key_length": "4",
+ "used_key_parts": ["a"],
+ "ref": ["test.t2.a"],
+ "rows": 1,
+ "filtered": 100,
+ "using_index": true
+ }
+ }
+}
+SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+a
+273
+96
+58
+EXPLAIN SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort
+1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1
+2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where
+EXPLAIN FORMAT=JSON SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "read_sorted_file": {
+ "filesort": {
+ "sort_key": "t1.a desc",
+ "table": {
+ "table_name": "<subquery2>",
+ "access_type": "ALL",
+ "possible_keys": ["distinct_key"],
+ "rows": 3,
+ "filtered": 100,
+ "materialized": {
+ "unique": 1,
+ "query_block": {
+ "select_id": 2,
+ "table": {
+ "table_name": "t2",
+ "access_type": "ALL",
+ "rows": 3,
+ "filtered": 100,
+ "attached_condition": "t2.b = 3 and t2.a is not null"
+ }
+ }
+ }
+ }
+ }
+ },
+ "table": {
+ "table_name": "t1",
+ "access_type": "eq_ref",
+ "possible_keys": ["PRIMARY"],
+ "key": "PRIMARY",
+ "key_length": "4",
+ "used_key_parts": ["a"],
+ "ref": ["test.t2.a"],
+ "rows": 1,
+ "filtered": 100
+ }
+ }
+}
+SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+a group_concat(t1.b)
+273 3
+96 2
+58 1
+DROP TABLE t1, t2;
diff --git a/mysql-test/main/order_by.test b/mysql-test/main/order_by.test
index 934c503302f..b3e43d27e2f 100644
--- a/mysql-test/main/order_by.test
+++ b/mysql-test/main/order_by.test
@@ -2276,3 +2276,37 @@ set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
optimizer_use_condition_selectivity=@tmp_o;
drop table t1,t2,t3,t4;
+
+
+--echo #
+--echo # MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
+--echo #
+
+CREATE TABLE t1 (a INT, b int, primary key(a));
+CREATE TABLE t2 (a INT, b INT);
+
+INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
+ (2354,7),(321421,3),(535,2),(4535,3);
+INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
+
+--echo # Join order should have the SJM scan table as the first table for both
+--echo # the queries with GROUP BY and ORDER BY clause.
+
+let $query= SELECT t1.a
+ FROM t1
+ WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ ORDER BY t1.a DESC;
+
+eval EXPLAIN $query;
+eval EXPLAIN FORMAT=JSON $query;
+eval $query;
+
+let $query= SELECT t1.a, group_concat(t1.b)
+ FROM t1
+ WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ GROUP BY t1.a DESC;
+
+eval EXPLAIN $query;
+eval EXPLAIN FORMAT=JSON $query;
+eval $query;
+DROP TABLE t1, t2;
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 3f4291cfb1f..0fc5724cf43 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -183,6 +183,9 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
&multi_byte_charset),
table, max_rows, filesort->sort_positions);
+ param.set_all_read_bits= filesort->set_all_read_bits;
+ param.unpack= filesort->unpack;
+
sort->addon_buf= param.addon_buf;
sort->addon_field= param.addon_field;
sort->unpack= unpack_addon_fields;
@@ -756,13 +759,20 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
goto err;
}
+ if (param->set_all_read_bits)
+ sort_form->column_bitmaps_set(save_read_set, save_write_set);
+
DEBUG_SYNC(thd, "after_index_merge_phase1");
for (;;)
{
if (quick_select)
error= select->quick->get_next();
else /* Not quick-select */
+ {
error= file->ha_rnd_next(sort_form->record[0]);
+ if (param->unpack)
+ param->unpack(sort_form);
+ }
if (unlikely(error))
break;
file->position(sort_form->record[0]);
diff --git a/sql/filesort.h b/sql/filesort.h
index 5f79a5095cc..7e88982ab5f 100644
--- a/sql/filesort.h
+++ b/sql/filesort.h
@@ -58,6 +58,13 @@ class Filesort: public Sql_alloc
Filesort_tracker *tracker;
+ /*
+ TRUE means all the fields of table of whose bitmap read_set is set
+ need to be read while reading records in the sort buffer.
+ FALSE otherwise
+ */
+ bool set_all_read_bits;
+
Filesort(ORDER *order_arg, ha_rows limit_arg, bool sort_positions_arg,
SQL_SELECT *select_arg):
order(order_arg),
@@ -66,7 +73,9 @@ class Filesort: public Sql_alloc
select(select_arg),
own_select(false),
using_pq(false),
- sort_positions(sort_positions_arg)
+ sort_positions(sort_positions_arg),
+ set_all_read_bits(FALSE),
+ unpack(NULL)
{
DBUG_ASSERT(order);
};
@@ -75,6 +84,9 @@ class Filesort: public Sql_alloc
/* Prepare ORDER BY list for sorting. */
uint make_sortorder(THD *thd, JOIN *join, table_map first_table_bit);
+ /* Unpack temp table columns to base table columns*/
+ void (*unpack)(TABLE *);
+
private:
void cleanup();
};
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 87458357865..f837a6394af 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -4252,11 +4252,11 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab)
sjm_tab->type= JT_ALL;
/* Initialize full scan */
- sjm_tab->read_first_record= join_read_record_no_init;
+ sjm_tab->read_first_record= join_init_read_record;
sjm_tab->read_record.copy_field= sjm->copy_field;
sjm_tab->read_record.copy_field_end= sjm->copy_field +
sjm->sjm_table_cols.elements;
- sjm_tab->read_record.read_record_func= rr_sequential_and_unpack;
+ sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack;
}
sjm_tab->bush_children->end[-1].next_select= end_sj_materialize;
@@ -7105,3 +7105,9 @@ bool Item_in_subselect::pushdown_cond_for_in_subquery(THD *thd, Item *cond)
thd->lex->current_select= save_curr_select;
DBUG_RETURN(FALSE);
}
+
+
+bool TABLE_LIST::is_sjm_scan_table()
+{
+ return is_active_sjm() && sj_mat_info->is_sj_scan;
+}
diff --git a/sql/records.cc b/sql/records.cc
index 3d709182a4e..f6885f773d5 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -709,3 +709,16 @@ static int rr_cmp(uchar *a,uchar *b)
return (int) a[7] - (int) b[7];
#endif
}
+
+
+int read_record_func_for_rr_and_unpack(READ_RECORD *info)
+{
+ int error;
+ if ((error= info->read_record_func_and_unpack_calls(info)))
+ return error;
+
+ for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+
+ return error;
+}
diff --git a/sql/records.h b/sql/records.h
index faf0d13c9a9..037a06b9d34 100644
--- a/sql/records.h
+++ b/sql/records.h
@@ -55,6 +55,7 @@ struct READ_RECORD
TABLE *table; /* Head-form */
Unlock_row_func unlock_row;
Read_func read_record_func;
+ Read_func read_record_func_and_unpack_calls;
THD *thd;
SQL_SELECT *select;
uint ref_length, reclength, rec_cache_size, error_offset;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 36d9eda3383..0523cdef178 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -3799,6 +3799,15 @@ JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order)
tab->select);
if (!tab->filesort)
return true;
+
+ TABLE *table= tab->table;
+ if ((tab == join_tab + const_tables) &&
+ table->pos_in_table_list &&
+ table->pos_in_table_list->is_active_sjm())
+ {
+ tab->filesort->set_all_read_bits= TRUE;
+ tab->filesort->unpack= unpack_to_base_table_fields;
+ }
/*
Select was moved to filesort->select to force join_init_read_record to use
sorted result instead of reading table through select.
@@ -14015,37 +14024,8 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
can be used without tmp. table.
*/
bool can_subst_to_first_table= false;
- bool first_is_in_sjm_nest= false;
- if (first_is_base_table)
- {
- TABLE_LIST *tbl_for_first=
- join->join_tab[join->const_tables].table->pos_in_table_list;
- first_is_in_sjm_nest= tbl_for_first->sj_mat_info &&
- tbl_for_first->sj_mat_info->is_used;
- }
- /*
- Currently we do not employ the optimization that uses multiple
- equalities for ORDER BY to remove tmp table in the case when
- the first table happens to be the result of materialization of
- a semi-join nest ( <=> first_is_in_sjm_nest == true).
-
- When a semi-join nest is materialized and scanned to look for
- possible matches in the remaining tables for every its row
- the fields from the result of materialization are copied
- into the record buffers of tables from the semi-join nest.
- So these copies are used to access the remaining tables rather
- than the fields from the result of materialization.
-
- Unfortunately now this so-called 'copy back' technique is
- supported only if the rows are scanned with the rr_sequential
- function, but not with other rr_* functions that are employed
- when the result of materialization is required to be sorted.
-
- TODO: either to support 'copy back' technique for the above case,
- or to get rid of this technique altogether.
- */
if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) &&
- first_is_base_table && !first_is_in_sjm_nest &&
+ first_is_base_table &&
order->item[0]->real_item()->type() == Item::FIELD_ITEM &&
join->cond_equal)
{
@@ -19922,19 +19902,6 @@ do_select(JOIN *join, Procedure *procedure)
}
-int rr_sequential_and_unpack(READ_RECORD *info)
-{
- int error;
- if (unlikely((error= rr_sequential(info))))
- return error;
-
- for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
- (*cp->do_copy)(cp);
-
- return error;
-}
-
-
/**
@brief
Instantiates temporary table
@@ -21223,6 +21190,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
int join_init_read_record(JOIN_TAB *tab)
{
+ bool need_unpacking= FALSE;
+ JOIN *join= tab->join;
/*
Note: the query plan tree for the below operations is constructed in
save_agg_explain_data.
@@ -21232,6 +21201,12 @@ int join_init_read_record(JOIN_TAB *tab)
if (tab->filesort && tab->sort_table()) // Sort table.
return 1;
+ if (join->top_join_tab_count != join->const_tables)
+ {
+ TABLE_LIST *tbl= tab->table->pos_in_table_list;
+ need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE;
+ }
+
tab->build_range_rowid_filter_if_needed();
DBUG_EXECUTE_IF("kill_join_init_read_record",
@@ -21249,16 +21224,6 @@ int join_init_read_record(JOIN_TAB *tab)
if (!tab->preread_init_done && tab->preread_init())
return 1;
-
- if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
- tab->select, tab->filesort_result, 1,1, FALSE))
- return 1;
- return tab->read_record.read_record();
-}
-
-int
-join_read_record_no_init(JOIN_TAB *tab)
-{
Copy_field *save_copy, *save_copy_end;
/*
@@ -21268,12 +21233,20 @@ join_read_record_no_init(JOIN_TAB *tab)
save_copy= tab->read_record.copy_field;
save_copy_end= tab->read_record.copy_field_end;
- init_read_record(&tab->read_record, tab->join->thd, tab->table,
- tab->select, tab->filesort_result, 1, 1, FALSE);
+
+ if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
+ tab->select, tab->filesort_result, 1, 1, FALSE))
+ return 1;
tab->read_record.copy_field= save_copy;
tab->read_record.copy_field_end= save_copy_end;
- tab->read_record.read_record_func= rr_sequential_and_unpack;
+
+ if (need_unpacking)
+ {
+ tab->read_record.read_record_func_and_unpack_calls=
+ tab->read_record.read_record_func;
+ tab->read_record.read_record_func = read_record_func_for_rr_and_unpack;
+ }
return tab->read_record.read_record();
}
@@ -28981,6 +28954,20 @@ void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join,
}
+/*
+ @brief
+ Unpacking temp table fields to base table fields.
+*/
+
+void unpack_to_base_table_fields(TABLE *table)
+{
+ JOIN_TAB *tab= table->reginfo.join_tab;
+ for (Copy_field *cp= tab->read_record.copy_field;
+ cp != tab->read_record.copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+}
+
+
/**
@} (end of group Query_Optimizer)
*/
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 4f7bf49f635..21c07c9bacd 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -223,7 +223,7 @@ typedef enum_nested_loop_state
(*Next_select_func)(JOIN *, struct st_join_table *, bool);
Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab);
int rr_sequential(READ_RECORD *info);
-int rr_sequential_and_unpack(READ_RECORD *info);
+int read_record_func_for_rr_and_unpack(READ_RECORD *info);
Item *remove_pushed_top_conjuncts(THD *thd, Item *cond);
Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond,
COND_EQUAL **cond_eq,
@@ -2352,7 +2352,6 @@ create_virtual_tmp_table(THD *thd, Field *field)
int test_if_item_cache_changed(List<Cached_item> &list);
int join_init_read_record(JOIN_TAB *tab);
-int join_read_record_no_init(JOIN_TAB *tab);
void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
inline Item * and_items(THD *thd, Item* cond, Item *item)
{
@@ -2410,6 +2409,7 @@ int print_explain_message_line(select_result_sink *result,
void explain_append_mrr_info(QUICK_RANGE_SELECT *quick, String *res);
int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table,
key_map possible_keys);
+void unpack_to_base_table_fields(TABLE *table);
/****************************************************************************
Temporary table support for SQL Runtime
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index 7abbc808632..cd712cb50aa 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -79,6 +79,7 @@ class Sort_param {
SORT_FIELD *end;
SORT_ADDON_FIELD *addon_field; // Descriptors for companion fields.
LEX_STRING addon_buf; // Buffer & length of added packed fields.
+ bool set_all_read_bits;
uchar *unique_buff;
bool not_killable;
@@ -93,6 +94,7 @@ class Sort_param {
}
void init_for_filesort(uint sortlen, TABLE *table,
ha_rows maxrows, bool sort_positions);
+ void (*unpack)(TABLE *);
};
diff --git a/sql/table.h b/sql/table.h
index 1a7e5fbd4dc..35ba9bbb95d 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -2622,6 +2622,7 @@ struct TABLE_LIST
*/
const char *get_table_name() const { return view != NULL ? view_name.str : table_name.str; }
bool is_active_sjm();
+ bool is_sjm_scan_table();
bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); }
st_select_lex_unit *get_unit();
st_select_lex *get_single_select();
2
1

[Commits] MDEV-20001 Potential dangerous regression: INSERT INTO >=100 rows fail for myisam table with HASH indexes
by Sachin Setiya 09 Oct '19
by Sachin Setiya 09 Oct '19
09 Oct '19
commit 52f3829b95cea6e97a1eedfd8e5a73ddaf611809
Author: Sachin <sachin.setiya(a)mariadb.com>
Date: Wed Oct 9 21:16:31 2019 +0530
MDEV-20001 Potential dangerous regression: INSERT INTO >=100 rows fail
for myisam table with HASH indexes
Problem:-
So the issue is when we do bulk insert with rows
> MI_MIN_ROWS_TO_DISABLE_INDEXES(100) , We try to disable the indexes to
speedup insert. But current logic also disables the long unique indexes.
Solution:- In ha_myisam::start_bulk_insert if we find long hash index
(HA_KEY_ALG_LONG_HASH) we will not disable the index.
This commit also refactors the mi_disable_indexes_for_rebuild function,
Since this is function is called at only one place, it is inlined into
start_bulk_insert
mi_clear_key_active is added into myisamdef.h because now it is also
used
in ha_myisam.cc file.
diff --git a/include/myisam.h b/include/myisam.h
index 216f041c8a9..f2e31bb9f60 100644
--- a/include/myisam.h
+++ b/include/myisam.h
@@ -430,6 +430,7 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param);
int thr_write_keys(MI_SORT_PARAM *sort_param);
int sort_write_record(MI_SORT_PARAM *sort_param);
int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages,
ulonglong);
+my_bool mi_too_big_key_for_sort(MI_KEYDEF *key, ha_rows rows);
#ifdef __cplusplus
}
diff --git a/mysql-test/main/long_unique_bugs.result
b/mysql-test/main/long_unique_bugs.result
index d4b71e2bc46..c0ba4d0b87d 100644
--- a/mysql-test/main/long_unique_bugs.result
+++ b/mysql-test/main/long_unique_bugs.result
@@ -267,3 +267,6 @@ connection default;
DROP TABLE t1, t2;
CREATE TABLE t1 (a TEXT, UNIQUE(a)) ENGINE=Aria;
ERROR 42000: Specified key was too long; max key length is 1000 bytes
+create table t1(a int, unique(a) using hash);
+#BULK insert > 100 rows (MI_MIN_ROWS_TO_DISABLE_INDEXES)
+drop table t1;
diff --git a/mysql-test/main/long_unique_bugs.test
b/mysql-test/main/long_unique_bugs.test
index 62c4076cee0..13a4e1367a0 100644
--- a/mysql-test/main/long_unique_bugs.test
+++ b/mysql-test/main/long_unique_bugs.test
@@ -323,3 +323,20 @@ DROP TABLE t1, t2;
#
--error ER_TOO_LONG_KEY
CREATE TABLE t1 (a TEXT, UNIQUE(a)) ENGINE=Aria;
+
+#
+# MDEV-20001 Potential dangerous regression: INSERT INTO >=100 rows fail
for myisam table with HASH indexes
+#
+create table t1(a int, unique(a) using hash);
+--let $count=150
+--let insert_stmt= insert into t1 values(200)
+while ($count)
+{
+ --let $insert_stmt=$insert_stmt,($count)
+ --dec $count
+}
+--disable_query_log
+--echo #BULK insert > 100 rows (MI_MIN_ROWS_TO_DISABLE_INDEXES)
+--eval $insert_stmt
+--enable_query_log
+drop table t1;
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index bde9c99288f..54c38ae69b4 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -1749,7 +1749,35 @@ void ha_myisam::start_bulk_insert(ha_rows rows, uint
flags)
else
{
my_bool all_keys= MY_TEST(flags & HA_CREATE_UNIQUE_INDEX_BY_SORT);
- mi_disable_indexes_for_rebuild(file, rows, all_keys);
+ MYISAM_SHARE *share=file->s;
+ MI_KEYDEF *key=share->keyinfo;
+ uint i;
+ /*
+ Deactivate all indexes that can be recreated fast.
+ These include packed keys on which sorting will use more temporary
+ space than the max allowed file length or for which the unpacked keys
+ will take much more space than packed keys.
+ Note that 'rows' may be zero for the case when we don't know how many
+ rows we will put into the file.
+ Long Unique Index (HA_KEY_ALG_LONG_HASH) will not be disabled because
+ there unique property is enforced at the time of ha_write_row
+ (check_duplicate_long_entries). So we need active index at the time
of
+ insert.
+ */
+ DBUG_ASSERT(file->state->records == 0 &&
+ (!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES));
+ for (i=0 ; i < share->base.keys ; i++,key++)
+ {
+ if (!(key->flag & (HA_SPATIAL | HA_AUTO_KEY)) &&
+ ! mi_too_big_key_for_sort(key,rows) && file->s->base.auto_key
!= i+1 &&
+ (all_keys || !(key->flag & HA_NOSAME)) &&
+ table->key_info[i].algorithm != HA_KEY_ALG_LONG_HASH)
+ {
+ mi_clear_key_active(share->state.key_map, i);
+ file->update|= HA_STATE_CHANGED;
+ file->create_unique_index_by_sort= all_keys;
+ }
+ }
}
}
else
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index 22eb97b24f3..2a4ef8851c9 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -4670,7 +4670,7 @@ static ha_checksum mi_byte_checksum(const uchar *buf,
uint length)
return crc;
}
-static my_bool mi_too_big_key_for_sort(MI_KEYDEF *key, ha_rows rows)
+my_bool mi_too_big_key_for_sort(MI_KEYDEF *key, ha_rows rows)
{
uint key_maxlength=key->maxlength;
if (key->flag & HA_FULLTEXT)
@@ -4684,38 +4684,6 @@ static my_bool mi_too_big_key_for_sort(MI_KEYDEF
*key, ha_rows rows)
((ulonglong) rows * key_maxlength > myisam_max_temp_length));
}
-/*
- Deactivate all indexes that can be recreated fast.
- These include packed keys on which sorting will use more temporary
- space than the max allowed file length or for which the unpacked keys
- will take much more space than packed keys.
- Note that 'rows' may be zero for the case when we don't know how many
- rows we will put into the file.
- */
-
-void mi_disable_indexes_for_rebuild(MI_INFO *info, ha_rows rows,
- my_bool all_keys)
-{
- MYISAM_SHARE *share=info->s;
- MI_KEYDEF *key=share->keyinfo;
- uint i;
-
- DBUG_ASSERT(info->state->records == 0 &&
- (!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES));
- for (i=0 ; i < share->base.keys ; i++,key++)
- {
- if (!(key->flag & (HA_SPATIAL | HA_AUTO_KEY)) &&
- ! mi_too_big_key_for_sort(key,rows) && info->s->base.auto_key !=
i+1 &&
- (all_keys || !(key->flag & HA_NOSAME)))
- {
- mi_clear_key_active(share->state.key_map, i);
- info->update|= HA_STATE_CHANGED;
- info->create_unique_index_by_sort= all_keys;
- }
- }
-}
-
-
/*
Return TRUE if we can use repair by sorting
One can set the force argument to force to use sorting
diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h
index c6fa777774a..f7b61ae638c 100644
--- a/storage/myisam/myisamdef.h
+++ b/storage/myisam/myisamdef.h
@@ -715,8 +715,6 @@ void mi_restore_status(void *param);
void mi_copy_status(void *to, void *from);
my_bool mi_check_status(void *param);
void mi_fix_status(MI_INFO *org_table, MI_INFO *new_table);
-void mi_disable_indexes_for_rebuild(MI_INFO *info, ha_rows rows,
- my_bool all_keys);
extern MI_INFO *test_if_reopen(char *filename);
my_bool check_table_is_closed(const char *name, const char *where);
int mi_open_datafile(MI_INFO *info, MYISAM_SHARE *share);
--
Regards
Sachin Setiya
Software Engineer at MariaDB
1
0

[Commits] d48294d15c4: MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
by Varun 09 Oct '19
by Varun 09 Oct '19
09 Oct '19
revision-id: d48294d15c4895215d5facef97fc80c03cd6b4b0 (mariadb-10.4.4-341-gd48294d15c4)
parent(s): a340af922361e3958e5d6653c8b840771db282f2
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-09-28 13:06:44 +0530
message:
MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
For the case when the SJM scan table is the first table in the join order,
then if we want to do the sorting on the SJM scan table, then we need to
make sure that we unpack the values to base table fields in two cases:
1) Reading the SJM table and writing the sort-keys inside the sort-buffer
2) Reading the sorted data from the sort file
---
mysql-test/main/order_by.result | 138 +++++++++++++++++++++++++++++++++++++++-
mysql-test/main/order_by.test | 34 ++++++++++
sql/filesort.cc | 17 +++++
sql/opt_subselect.cc | 10 ++-
sql/records.cc | 13 ++++
sql/records.h | 1 +
sql/sql_select.cc | 89 ++++++++++----------------
sql/sql_select.h | 4 +-
sql/table.h | 1 +
9 files changed, 246 insertions(+), 61 deletions(-)
diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result
index b059cc686cd..e74583670fc 100644
--- a/mysql-test/main/order_by.result
+++ b/mysql-test/main/order_by.result
@@ -3322,7 +3322,7 @@ WHERE books.library_id = 8663 AND
books.scheduled_for_removal=0 )
ORDER BY wings.id;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using temporary; Using filesort
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using filesort
1 PRIMARY wings eq_ref PRIMARY PRIMARY 4 test.books.wings_id 1 100.00
2 MATERIALIZED books ref library_idx library_idx 4 const 2 100.00 Using where
Warnings:
@@ -3436,3 +3436,139 @@ Note 1003 select `test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` A
set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
optimizer_use_condition_selectivity=@tmp_o;
drop table t1,t2,t3,t4;
+#
+# MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
+#
+CREATE TABLE t1 (a INT, b int, primary key(a));
+CREATE TABLE t2 (a INT, b INT);
+INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
+(2354,7),(321421,3),(535,2),(4535,3);
+INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
+# Join order should have the SJM scan table as the first table for both
+# the queries with GROUP BY and ORDER BY clause.
+EXPLAIN SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort
+1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 Using index
+2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where
+EXPLAIN FORMAT=JSON SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "read_sorted_file": {
+ "filesort": {
+ "sort_key": "t1.a desc",
+ "table": {
+ "table_name": "<subquery2>",
+ "access_type": "ALL",
+ "possible_keys": ["distinct_key"],
+ "rows": 3,
+ "filtered": 100,
+ "materialized": {
+ "unique": 1,
+ "query_block": {
+ "select_id": 2,
+ "table": {
+ "table_name": "t2",
+ "access_type": "ALL",
+ "rows": 3,
+ "filtered": 100,
+ "attached_condition": "t2.b = 3 and t2.a is not null"
+ }
+ }
+ }
+ }
+ }
+ },
+ "table": {
+ "table_name": "t1",
+ "access_type": "eq_ref",
+ "possible_keys": ["PRIMARY"],
+ "key": "PRIMARY",
+ "key_length": "4",
+ "used_key_parts": ["a"],
+ "ref": ["test.t2.a"],
+ "rows": 1,
+ "filtered": 100,
+ "using_index": true
+ }
+ }
+}
+SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+a
+273
+96
+58
+EXPLAIN SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort
+1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1
+2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where
+EXPLAIN FORMAT=JSON SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "read_sorted_file": {
+ "filesort": {
+ "sort_key": "t1.a desc",
+ "table": {
+ "table_name": "<subquery2>",
+ "access_type": "ALL",
+ "possible_keys": ["distinct_key"],
+ "rows": 3,
+ "filtered": 100,
+ "materialized": {
+ "unique": 1,
+ "query_block": {
+ "select_id": 2,
+ "table": {
+ "table_name": "t2",
+ "access_type": "ALL",
+ "rows": 3,
+ "filtered": 100,
+ "attached_condition": "t2.b = 3 and t2.a is not null"
+ }
+ }
+ }
+ }
+ }
+ },
+ "table": {
+ "table_name": "t1",
+ "access_type": "eq_ref",
+ "possible_keys": ["PRIMARY"],
+ "key": "PRIMARY",
+ "key_length": "4",
+ "used_key_parts": ["a"],
+ "ref": ["test.t2.a"],
+ "rows": 1,
+ "filtered": 100
+ }
+ }
+}
+SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+a group_concat(t1.b)
+273 3
+96 2
+58 1
+DROP TABLE t1, t2;
diff --git a/mysql-test/main/order_by.test b/mysql-test/main/order_by.test
index 934c503302f..b3e43d27e2f 100644
--- a/mysql-test/main/order_by.test
+++ b/mysql-test/main/order_by.test
@@ -2276,3 +2276,37 @@ set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
optimizer_use_condition_selectivity=@tmp_o;
drop table t1,t2,t3,t4;
+
+
+--echo #
+--echo # MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
+--echo #
+
+CREATE TABLE t1 (a INT, b int, primary key(a));
+CREATE TABLE t2 (a INT, b INT);
+
+INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
+ (2354,7),(321421,3),(535,2),(4535,3);
+INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
+
+--echo # Join order should have the SJM scan table as the first table for both
+--echo # the queries with GROUP BY and ORDER BY clause.
+
+let $query= SELECT t1.a
+ FROM t1
+ WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ ORDER BY t1.a DESC;
+
+eval EXPLAIN $query;
+eval EXPLAIN FORMAT=JSON $query;
+eval $query;
+
+let $query= SELECT t1.a, group_concat(t1.b)
+ FROM t1
+ WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ GROUP BY t1.a DESC;
+
+eval EXPLAIN $query;
+eval EXPLAIN FORMAT=JSON $query;
+eval $query;
+DROP TABLE t1, t2;
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 3f4291cfb1f..e5c83293e9f 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -716,11 +716,21 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
*found_rows= 0;
ref_pos= &file->ref[0];
next_pos=ref_pos;
+ JOIN_TAB *tab= sort_form->reginfo.join_tab;
+ JOIN *join= tab ? tab->join : NULL;
+ bool first_is_in_sjm_nest= FALSE;
DBUG_EXECUTE_IF("show_explain_in_find_all_keys",
dbug_serve_apcs(thd, 1);
);
+ if (join && join->table_count != join->const_tables &&
+ (join->join_tab + join->const_tables == tab))
+ {
+ TABLE_LIST *tbl_for_first= sort_form->pos_in_table_list;
+ first_is_in_sjm_nest= tbl_for_first && tbl_for_first->is_sjm_scan_table();
+ }
+
if (!quick_select)
{
next_pos=(uchar*) 0; /* Find records in sequence */
@@ -756,13 +766,20 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
goto err;
}
+ if (first_is_in_sjm_nest)
+ sort_form->column_bitmaps_set(save_read_set, save_write_set);
+
DEBUG_SYNC(thd, "after_index_merge_phase1");
for (;;)
{
if (quick_select)
error= select->quick->get_next();
else /* Not quick-select */
+ {
error= file->ha_rnd_next(sort_form->record[0]);
+ if (first_is_in_sjm_nest)
+ tab->unpacking_to_base_table_fields();
+ }
if (unlikely(error))
break;
file->position(sort_form->record[0]);
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 87458357865..f837a6394af 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -4252,11 +4252,11 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab)
sjm_tab->type= JT_ALL;
/* Initialize full scan */
- sjm_tab->read_first_record= join_read_record_no_init;
+ sjm_tab->read_first_record= join_init_read_record;
sjm_tab->read_record.copy_field= sjm->copy_field;
sjm_tab->read_record.copy_field_end= sjm->copy_field +
sjm->sjm_table_cols.elements;
- sjm_tab->read_record.read_record_func= rr_sequential_and_unpack;
+ sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack;
}
sjm_tab->bush_children->end[-1].next_select= end_sj_materialize;
@@ -7105,3 +7105,9 @@ bool Item_in_subselect::pushdown_cond_for_in_subquery(THD *thd, Item *cond)
thd->lex->current_select= save_curr_select;
DBUG_RETURN(FALSE);
}
+
+
+bool TABLE_LIST::is_sjm_scan_table()
+{
+ return is_active_sjm() && sj_mat_info->is_sj_scan;
+}
diff --git a/sql/records.cc b/sql/records.cc
index 3d709182a4e..f6885f773d5 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -709,3 +709,16 @@ static int rr_cmp(uchar *a,uchar *b)
return (int) a[7] - (int) b[7];
#endif
}
+
+
+int read_record_func_for_rr_and_unpack(READ_RECORD *info)
+{
+ int error;
+ if ((error= info->read_record_func_and_unpack_calls(info)))
+ return error;
+
+ for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+
+ return error;
+}
diff --git a/sql/records.h b/sql/records.h
index faf0d13c9a9..037a06b9d34 100644
--- a/sql/records.h
+++ b/sql/records.h
@@ -55,6 +55,7 @@ struct READ_RECORD
TABLE *table; /* Head-form */
Unlock_row_func unlock_row;
Read_func read_record_func;
+ Read_func read_record_func_and_unpack_calls;
THD *thd;
SQL_SELECT *select;
uint ref_length, reclength, rec_cache_size, error_offset;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 36d9eda3383..28bc57c692f 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -14015,37 +14015,8 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
can be used without tmp. table.
*/
bool can_subst_to_first_table= false;
- bool first_is_in_sjm_nest= false;
- if (first_is_base_table)
- {
- TABLE_LIST *tbl_for_first=
- join->join_tab[join->const_tables].table->pos_in_table_list;
- first_is_in_sjm_nest= tbl_for_first->sj_mat_info &&
- tbl_for_first->sj_mat_info->is_used;
- }
- /*
- Currently we do not employ the optimization that uses multiple
- equalities for ORDER BY to remove tmp table in the case when
- the first table happens to be the result of materialization of
- a semi-join nest ( <=> first_is_in_sjm_nest == true).
-
- When a semi-join nest is materialized and scanned to look for
- possible matches in the remaining tables for every its row
- the fields from the result of materialization are copied
- into the record buffers of tables from the semi-join nest.
- So these copies are used to access the remaining tables rather
- than the fields from the result of materialization.
-
- Unfortunately now this so-called 'copy back' technique is
- supported only if the rows are scanned with the rr_sequential
- function, but not with other rr_* functions that are employed
- when the result of materialization is required to be sorted.
-
- TODO: either to support 'copy back' technique for the above case,
- or to get rid of this technique altogether.
- */
if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) &&
- first_is_base_table && !first_is_in_sjm_nest &&
+ first_is_base_table &&
order->item[0]->real_item()->type() == Item::FIELD_ITEM &&
join->cond_equal)
{
@@ -19922,19 +19893,6 @@ do_select(JOIN *join, Procedure *procedure)
}
-int rr_sequential_and_unpack(READ_RECORD *info)
-{
- int error;
- if (unlikely((error= rr_sequential(info))))
- return error;
-
- for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
- (*cp->do_copy)(cp);
-
- return error;
-}
-
-
/**
@brief
Instantiates temporary table
@@ -21223,6 +21181,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
int join_init_read_record(JOIN_TAB *tab)
{
+ bool need_unpacking= FALSE;
+ JOIN *join= tab->join;
/*
Note: the query plan tree for the below operations is constructed in
save_agg_explain_data.
@@ -21232,6 +21192,12 @@ int join_init_read_record(JOIN_TAB *tab)
if (tab->filesort && tab->sort_table()) // Sort table.
return 1;
+ if (join->top_join_tab_count != join->const_tables)
+ {
+ TABLE_LIST *tbl= tab->table->pos_in_table_list;
+ need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE;
+ }
+
tab->build_range_rowid_filter_if_needed();
DBUG_EXECUTE_IF("kill_join_init_read_record",
@@ -21249,16 +21215,6 @@ int join_init_read_record(JOIN_TAB *tab)
if (!tab->preread_init_done && tab->preread_init())
return 1;
-
- if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
- tab->select, tab->filesort_result, 1,1, FALSE))
- return 1;
- return tab->read_record.read_record();
-}
-
-int
-join_read_record_no_init(JOIN_TAB *tab)
-{
Copy_field *save_copy, *save_copy_end;
/*
@@ -21268,12 +21224,20 @@ join_read_record_no_init(JOIN_TAB *tab)
save_copy= tab->read_record.copy_field;
save_copy_end= tab->read_record.copy_field_end;
- init_read_record(&tab->read_record, tab->join->thd, tab->table,
- tab->select, tab->filesort_result, 1, 1, FALSE);
+
+ if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
+ tab->select, tab->filesort_result, 1, 1, FALSE))
+ return 1;
tab->read_record.copy_field= save_copy;
tab->read_record.copy_field_end= save_copy_end;
- tab->read_record.read_record_func= rr_sequential_and_unpack;
+
+ if (need_unpacking)
+ {
+ tab->read_record.read_record_func_and_unpack_calls=
+ tab->read_record.read_record_func;
+ tab->read_record.read_record_func = read_record_func_for_rr_and_unpack;
+ }
return tab->read_record.read_record();
}
@@ -28981,6 +28945,19 @@ void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join,
}
+/*
+ @brief
+ Unpacking temp table fields to base table fields.
+*/
+
+void JOIN_TAB::unpacking_to_base_table_fields()
+{
+ for (Copy_field *cp= read_record.copy_field;
+ cp != read_record.copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+}
+
+
/**
@} (end of group Query_Optimizer)
*/
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 4f7bf49f635..545d4a788cc 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -223,7 +223,7 @@ typedef enum_nested_loop_state
(*Next_select_func)(JOIN *, struct st_join_table *, bool);
Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab);
int rr_sequential(READ_RECORD *info);
-int rr_sequential_and_unpack(READ_RECORD *info);
+int read_record_func_for_rr_and_unpack(READ_RECORD *info);
Item *remove_pushed_top_conjuncts(THD *thd, Item *cond);
Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond,
COND_EQUAL **cond_eq,
@@ -676,6 +676,7 @@ typedef struct st_join_table {
table_map remaining_tables);
bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
bool is_const_table);
+ void unpacking_to_base_table_fields();
} JOIN_TAB;
@@ -2352,7 +2353,6 @@ create_virtual_tmp_table(THD *thd, Field *field)
int test_if_item_cache_changed(List<Cached_item> &list);
int join_init_read_record(JOIN_TAB *tab);
-int join_read_record_no_init(JOIN_TAB *tab);
void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
inline Item * and_items(THD *thd, Item* cond, Item *item)
{
diff --git a/sql/table.h b/sql/table.h
index 1a7e5fbd4dc..35ba9bbb95d 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -2622,6 +2622,7 @@ struct TABLE_LIST
*/
const char *get_table_name() const { return view != NULL ? view_name.str : table_name.str; }
bool is_active_sjm();
+ bool is_sjm_scan_table();
bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); }
st_select_lex_unit *get_unit();
st_select_lex *get_single_select();
2
1

[Commits] 6dc3fa2e054: Support Create_time and Update_time in MyRocks table status
by psergey 06 Oct '19
by psergey 06 Oct '19
06 Oct '19
revision-id: 6dc3fa2e054d999a12cd5d7aaf637e4ac7c3f149 (fb-prod201903-144-g6dc3fa2e054)
parent(s): d97c0c628e5dc60abd725f6a7120a8d87b09321e
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-06 23:48:49 +0300
message:
Support Create_time and Update_time in MyRocks table status
(variant #4, with @@rocksdb_table_dictionary_format)
- Create_time is stored in the MyRocks' internal data dictionary.
- Update_time is in-memory only (like in InnoDB).
@@rocksdb_table_dictionary_format is a global read-only variable (set it
from my.cnf) which controls the on-disk data format.
rocksdb_table_dictionary_format=1 means use the same data format is
before. Create_time will always be NULL for all tables that are created.
rocksdb_table_dictionary_format=2 (the default) means use the newer data
format. All newly-created tables will have proper Create_time attribute.
Downgrades are only possible if one hasn't run any DDL that re-creates a
table.
---
mysql-test/suite/rocksdb/include/bulk_load.inc | 4 +-
.../suite/rocksdb/include/bulk_load_unsorted.inc | 4 +-
mysql-test/suite/rocksdb/r/bulk_load.result | 12 +--
mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result | 12 +--
.../rocksdb/r/bulk_load_rev_cf_and_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_rev_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted_rev.result | 12 +--
mysql-test/suite/rocksdb/r/issue255.result | 16 ++--
mysql-test/suite/rocksdb/r/rocksdb.result | 7 +-
.../r/rocksdb_table_dictionary_format.result | 37 ++++++++++
.../suite/rocksdb/r/show_table_status.result | 85 +++++++++++++++++++++-
mysql-test/suite/rocksdb/r/truncate_table.result | 8 +-
mysql-test/suite/rocksdb/t/issue255.test | 17 +++--
mysql-test/suite/rocksdb/t/rocksdb.test | 4 +-
.../rocksdb/t/rocksdb_table_dictionary_format.test | 67 +++++++++++++++++
mysql-test/suite/rocksdb/t/show_table_status.test | 81 ++++++++++++++++++++-
mysql-test/suite/rocksdb/t/truncate_table.test | 8 +-
.../r/rocksdb_table_dictionary_format_basic.result | 14 ++++
.../t/rocksdb_table_dictionary_format_basic.test | 16 ++++
storage/rocksdb/ha_rocksdb.cc | 53 ++++++++++++++
storage/rocksdb/ha_rocksdb.h | 1 +
storage/rocksdb/rdb_datadic.cc | 57 ++++++++++++---
storage/rocksdb/rdb_datadic.h | 36 ++++++++-
24 files changed, 502 insertions(+), 85 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/bulk_load.inc b/mysql-test/suite/rocksdb/include/bulk_load.inc
index 1b79825e507..7e163602202 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load.inc
@@ -121,12 +121,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
index 5cdc76a32d4..812af0401aa 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
@@ -119,12 +119,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/r/bulk_load.result b/mysql-test/suite/rocksdb/r/bulk_load.result
index a36f99a7619..76db28e66bd 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
index b5d3e252c5d..ae363f7ec0c 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
index f46acd41080..dd8dd7e60a8 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
index 3389968ef37..96738ae62e2 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
index 924032549ac..87fc63af2da 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
index 3cc9fb8e459..8e0914f0159 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/issue255.result b/mysql-test/suite/rocksdb/r/issue255.result
index c1ce3be2276..b45b3b5afc7 100644
--- a/mysql-test/suite/rocksdb/r/issue255.result
+++ b/mysql-test/suite/rocksdb/r/issue255.result
@@ -2,7 +2,7 @@ CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ('538647864786478647864');
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -12,7 +12,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -21,7 +21,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -30,13 +30,13 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES (1000);
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -46,7 +46,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -55,7 +55,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -64,5 +64,5 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result
index 088eb050f6f..c76f31afaab 100644
--- a/mysql-test/suite/rocksdb/r/rocksdb.result
+++ b/mysql-test/suite/rocksdb/r/rocksdb.result
@@ -979,6 +979,7 @@ rocksdb_store_row_debug_checksums OFF
rocksdb_strict_collation_check OFF
rocksdb_strict_collation_exceptions
rocksdb_table_cache_numshardbits 6
+rocksdb_table_dictionary_format 2
rocksdb_table_stats_background_thread_nice_value 19
rocksdb_table_stats_max_num_rows_scanned 0
rocksdb_table_stats_recalc_threshold_count 100
@@ -1417,7 +1418,7 @@ create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
show table status like 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 # # NULL latin1_swedish_ci NULL
drop table t1;
#
# Fix Issue #4: Crash when using pseudo-unique keys
@@ -2612,7 +2613,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
-1
@@ -2623,7 +2624,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
1
diff --git a/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result b/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result
new file mode 100644
index 00000000000..8f2fa96a04a
--- /dev/null
+++ b/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result
@@ -0,0 +1,37 @@
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+2
+#
+# Server restarted
+#
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+1
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+# Create_time will be NULL as the table doesn't support it
+# Update_time will be non-null
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time is not null
+NULL 1
+#
+# Server restarted
+#
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+2
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time is not null
+NULL 0
+set global rocksdb_compact_cf='default';
+select concat('00', '00', '00', '01', hex('test.t1'));
+concat('00', '00', '00', '01', hex('test.t1'))
+00000001746573742E7431
+Found the datadic entry
+Value has format version 1, followed by 8 bytes describing one index
+Done
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/r/show_table_status.result b/mysql-test/suite/rocksdb/r/show_table_status.result
index 29140f045e4..345882040ef 100644
--- a/mysql-test/suite/rocksdb/r/show_table_status.result
+++ b/mysql-test/suite/rocksdb/r/show_table_status.result
@@ -7,12 +7,12 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL
+t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL utf8_general_ci NULL
SHOW TABLE STATUS WHERE name LIKE 't2';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
DROP TABLE t1, t2, t3;
CREATE DATABASE `db_new..............................................end`;
USE `db_new..............................................end`;
@@ -22,3 +22,80 @@ SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE T
TABLE_SCHEMA db_new..............................................end
TABLE_NAME t1_new..............................................end
DROP DATABASE `db_new..............................................end`;
+#
+# MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+#
+use test;
+create table t1 (a int) engine=rocksdb;
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time check_time
+1 NULL NULL
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select sleep(3);
+sleep(3)
+0
+insert into t1 values (2);
+select
+create_time=@create_tm /* should not change */ ,
+timestampdiff(second, @update_tm, update_time) > 2,
+check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+timestampdiff(second, @update_tm, update_time) > 2 1
+check_time NULL
+#
+# Check how create_time survives ALTER TABLE.
+# First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+create_time<>@create_tm /* should change */,
+create_time IS NOT NULL,
+update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time<>@create_tm 1
+create_time IS NOT NULL 1
+update_time IS NULL 1
+insert into t1 values (5,5);
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+# Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+select
+create_time=@create_tm /* should not change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+update_time NULL
+#
+# Check what is left after server restart
+#
+# Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select
+create_time=(select create_time from t2) /* should change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=(select create_time from t2) 1
+update_time NULL
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/r/truncate_table.result b/mysql-test/suite/rocksdb/r/truncate_table.result
index 1544256f194..79b266a2453 100644
--- a/mysql-test/suite/rocksdb/r/truncate_table.result
+++ b/mysql-test/suite/rocksdb/r/truncate_table.result
@@ -9,19 +9,19 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 4 # # NULL latin1_swedish_ci NULL
TRUNCATE TABLE t1;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('d');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 2 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 2 # # NULL latin1_swedish_ci NULL
SELECT a,c FROM t1;
a c
1 d
diff --git a/mysql-test/suite/rocksdb/t/issue255.test b/mysql-test/suite/rocksdb/t/issue255.test
index 370dece0c6c..686f45b4056 100644
--- a/mysql-test/suite/rocksdb/t/issue255.test
+++ b/mysql-test/suite/rocksdb/t/issue255.test
@@ -3,24 +3,25 @@
CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES ('538647864786478647864');
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SELECT * FROM t1;
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
@@ -28,24 +29,24 @@ DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES (1000);
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb.test b/mysql-test/suite/rocksdb/t/rocksdb.test
index 5eff0fbf38f..7dcae569c92 100644
--- a/mysql-test/suite/rocksdb/t/rocksdb.test
+++ b/mysql-test/suite/rocksdb/t/rocksdb.test
@@ -1198,7 +1198,7 @@ drop table t1;
create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
---replace_column 7 #
+--replace_column 7 # 12 # 13 #
show table status like 't1';
drop table t1;
@@ -1903,11 +1903,13 @@ DROP TABLE t1;
# value is 4 while MyRocks will show it as 3.
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test b/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test
new file mode 100644
index 00000000000..3dedf6502ab
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test
@@ -0,0 +1,67 @@
+--source include/have_rocksdb.inc
+
+select @@rocksdb_table_dictionary_format;
+
+#
+# Check the upgrade from Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 to _2
+#
+
+--let $_mysqld_option=--rocksdb_table_dictionary_format=1
+--source include/restart_mysqld_with_option.inc
+
+--echo #
+--echo # Server restarted
+--echo #
+select @@rocksdb_table_dictionary_format;
+
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+
+--echo # Create_time will be NULL as the table doesn't support it
+--echo # Update_time will be non-null
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--let $_mysqld_option=--rocksdb_table_dictionary_format=2
+--source include/restart_mysqld_with_option.inc
+--echo #
+--echo # Server restarted
+--echo #
+select @@rocksdb_table_dictionary_format;
+
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+set global rocksdb_compact_cf='default';
+# use: $MYSQLTEST_VARDIR $MYSQL_SST_DUMP
+let MYSQL_DATADIR = `select @@datadir`;
+let MYSQL_SST_DUMP = $MYSQL_SST_DUMP;
+
+select concat('00', '00', '00', '01', hex('test.t1'));
+
+# We are looking to find a record like this:
+# '00000001746573742E7431' seq:0, type:1 => 00010000000000000104
+# that is:
+# '00000001:746573742E7431' seq:0, type:1 => 0001: 0000000000000104
+# the key is: DDL_ENTRY_INDEX_START_NUMBER, "test.t1"
+# the value is: DDL_ENTRY_INDEX_VERSION_1, {cf_id, index_id}
+perl;
+ my $datadir = $ENV{'MYSQL_DATADIR'};
+ my $sst_dump = $ENV{'MYSQL_SST_DUMP'};
+ open(IN, "$sst_dump --command=scan --output_hex --file=$datadir/.rocksdb |");
+ while(<IN>) {
+ if ($_ =~ /^\'00000001746573742E7431\'/) {
+ print "Found the datadic entry\n";
+ if ($_ =~ / 0001[0-9]{16}$/) {
+ print "Value has format version 1, followed by 8 bytes describing one index\n"
+ } else {
+ print "Unexpected value. $_\n";
+ }
+ }
+ }
+ print "Done\n";
+ close(IN);
+EOF
+drop table t1;
+
diff --git a/mysql-test/suite/rocksdb/t/show_table_status.test b/mysql-test/suite/rocksdb/t/show_table_status.test
index 29cc2ccfb5e..59effcc788c 100644
--- a/mysql-test/suite/rocksdb/t/show_table_status.test
+++ b/mysql-test/suite/rocksdb/t/show_table_status.test
@@ -24,7 +24,7 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
# Some statistics don't get updated as quickly. The Data_length and
@@ -48,7 +48,7 @@ set global rocksdb_force_flush_memtable_now = true;
# We expect the number of rows to be 10000. Data_len and Avg_row_len
# may vary, depending on built-in compression library.
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't2';
DROP TABLE t1, t2, t3;
@@ -62,3 +62,80 @@ CREATE TABLE `t1_new..............................................end`(a int) en
INSERT INTO `t1_new..............................................end` VALUES (1);
--query_vertical SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE TABLE_NAME = 't1_new..............................................end'
DROP DATABASE `db_new..............................................end`;
+--echo #
+--echo # MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+--echo #
+use test;
+create table t1 (a int) engine=rocksdb;
+
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+select sleep(3);
+insert into t1 values (2);
+
+--vertical_results
+select
+ create_time=@create_tm /* should not change */ ,
+ timestampdiff(second, @update_tm, update_time) > 2,
+ check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check how create_time survives ALTER TABLE.
+--echo # First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+ create_time<>@create_tm /* should change */,
+ create_time IS NOT NULL,
+ update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+insert into t1 values (5,5);
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo # Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+
+select
+ create_time=@create_tm /* should not change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check what is left after server restart
+--echo #
+
+--echo # Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--source include/restart_mysqld.inc
+
+select
+ create_time=(select create_time from t2) /* should change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/t/truncate_table.test b/mysql-test/suite/rocksdb/t/truncate_table.test
index a61488654a3..1001eeb6cde 100644
--- a/mysql-test/suite/rocksdb/t/truncate_table.test
+++ b/mysql-test/suite/rocksdb/t/truncate_table.test
@@ -29,22 +29,22 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
TRUNCATE TABLE t1;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('d');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--sorted_result
diff --git a/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result
new file mode 100644
index 00000000000..825d7a7113a
--- /dev/null
+++ b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT;
+SELECT @start_global_value;
+@start_global_value
+2
+"Trying to set variable @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT = 444;
+ERROR HY000: Variable 'rocksdb_table_dictionary_format' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test
new file mode 100644
index 00000000000..2b8c6e81bf7
--- /dev/null
+++ b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_TABLE_DICTIONARY_FORMAT
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index d6c89087bea..a0a344187cf 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -122,6 +122,8 @@ const std::string PER_INDEX_CF_NAME("$per_index_cf");
static std::vector<std::string> rdb_tables_to_recalc;
+uint rocksdb_table_dictionary_format;
+
class Rdb_explicit_snapshot : public explicit_snapshot {
public:
static std::shared_ptr<Rdb_explicit_snapshot> create(
@@ -2136,6 +2138,17 @@ static MYSQL_SYSVAR_ULONGLONG(
"MultiGet",
nullptr, nullptr, SIZE_T_MAX, /* min */ 0, /* max */ SIZE_T_MAX, 0);
+static MYSQL_SYSVAR_UINT(table_dictionary_format,
+ rocksdb_table_dictionary_format,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Max Table Dictionary format version that the server "
+ "may create (use lower values for backward "
+ " compatibility, higher values for new features)",
+ nullptr, nullptr,
+ ROCKSDB_DATADIC_FORMAT_DEFAULT,
+ ROCKSDB_DATADIC_FORMAT_INITIAL,
+ ROCKSDB_DATADIC_FORMAT_MAX, 0);
+
static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
static struct st_mysql_sys_var *rocksdb_system_variables[] = {
@@ -2309,6 +2322,8 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(select_bypass_log_rejected),
MYSQL_SYSVAR(select_bypass_debug_row_delay),
MYSQL_SYSVAR(select_bypass_multiget_min),
+
+ MYSQL_SYSVAR(table_dictionary_format),
nullptr};
static rocksdb::WriteOptions rdb_get_rocksdb_write_options(
@@ -2456,6 +2471,8 @@ class Rdb_transaction {
bool m_is_delayed_snapshot = false;
bool m_is_two_phase = false;
+ std::unordered_set<Rdb_tbl_def*> modified_tables;
+
private:
/*
Number of write operations this transaction had when we took the last
@@ -3139,6 +3156,25 @@ class Rdb_transaction {
virtual void start_tx() = 0;
virtual void start_stmt() = 0;
+ protected:
+ // Non-virtual functions with actions to be done on transaction start and
+ // commit.
+ void on_commit() {
+ time_t tm;
+ tm = time(nullptr);
+ for (auto &it : modified_tables) {
+ it->update_time = tm;
+ }
+ modified_tables.clear();
+ }
+ void on_rollback() {
+ modified_tables.clear();
+ }
+ public:
+ void log_table_write_op(Rdb_tbl_def *tbl) {
+ modified_tables.insert(tbl);
+ }
+
void set_initial_savepoint() {
/*
Set the initial savepoint. If the first statement in the transaction
@@ -3322,7 +3358,9 @@ class Rdb_transaction_impl : public Rdb_transaction {
goto error;
}
+ on_commit();
error:
+ on_rollback();
/* Save the transaction object to be reused */
release_tx();
@@ -3338,6 +3376,7 @@ class Rdb_transaction_impl : public Rdb_transaction {
public:
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -3659,7 +3698,9 @@ class Rdb_writebatch_impl : public Rdb_transaction {
res = true;
goto error;
}
+ on_commit();
error:
+ on_rollback();
reset();
m_write_count = 0;
@@ -3691,6 +3732,7 @@ class Rdb_writebatch_impl : public Rdb_transaction {
}
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -7549,6 +7591,7 @@ int ha_rocksdb::create_table(const std::string &table_name,
/* Create table/key descriptions and put them into the data dictionary */
m_tbl_def = new Rdb_tbl_def(table_name);
+ m_tbl_def->set_create_time(time(nullptr));
uint n_keys = table_arg->s->keys;
@@ -10177,6 +10220,8 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
row_info.tx->incr_insert_count();
}
+ row_info.tx->log_table_write_op(m_tbl_def);
+
if (do_bulk_commit(row_info.tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
}
@@ -10648,6 +10693,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
}
tx->incr_delete_count();
+ tx->log_table_write_op(m_tbl_def);
if (do_bulk_commit(tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
@@ -10802,6 +10848,12 @@ int ha_rocksdb::info(uint flag) {
k->rec_per_key[j] = x;
}
}
+
+ stats.create_time = m_tbl_def->get_create_time();
+ }
+
+ if (flag & HA_STATUS_TIME) {
+ stats.update_time = m_tbl_def->update_time;
}
if (flag & HA_STATUS_ERRKEY) {
@@ -12603,6 +12655,7 @@ bool ha_rocksdb::prepare_inplace_alter_table(
m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
new_tdef->m_hidden_pk_val =
m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
+ new_tdef->set_create_time(m_tbl_def->get_create_time());
if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
/* Delete the new key descriptors */
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 9a250af40c7..c36c5c98e19 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -82,6 +82,7 @@ extern PSI_rwlock_key key_rwlock_read_free_rpl_tables;
#endif
extern Regex_list_handler rdb_read_free_regex_handler;
+extern uint rocksdb_table_dictionary_format;
/**
@brief
Rdb_table_handler is a reference-counted structure storing information for
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index c0741a1ce9b..234f7a789ff 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -3514,8 +3514,21 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
const rocksdb::Slice &key) {
StringBuffer<8 * Rdb_key_def::PACKED_SIZE> indexes;
indexes.alloc(Rdb_key_def::VERSION_SIZE +
+ Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE +
m_key_count * Rdb_key_def::PACKED_SIZE * 2);
- rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION);
+
+ if (rocksdb_table_dictionary_format <
+ ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP) {
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1);
+ // We are using old data format, which means we cannot save Create_time
+ // Set it to be shown as unknown right away, so that the behavior before
+ // server restart and after is the same.
+ set_create_time(0);
+ }
+ else {
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2);
+ rdb_netstr_append_uint64(&indexes, create_time);
+ }
for (uint i = 0; i < m_key_count; i++) {
const Rdb_key_def &kd = *m_key_descr_arr[i];
@@ -4015,27 +4028,52 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
Rdb_tbl_def *const tdef =
new Rdb_tbl_def(key, Rdb_key_def::INDEX_NUMBER_SIZE);
- // Now, read the DDLs.
- const int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
- if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ if (val.size() < Rdb_key_def::VERSION_SIZE) {
// NO_LINT_DEBUG
sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
tdef->full_tablename().c_str());
return true;
}
- tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
- tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
ptr = reinterpret_cast<const uchar *>(val.data());
const int version = rdb_netbuf_read_uint16(&ptr);
- if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION) {
+
+ if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 &&
+ version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2) {
// NO_LINT_DEBUG
sql_print_error(
"RocksDB: DDL ENTRY Version was not expected."
- "Expected: %d, Actual: %d",
- Rdb_key_def::DDL_ENTRY_INDEX_VERSION, version);
+ "Expected: %d..%d, Actual: %d",
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1,
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2, version);
return true;
}
+ int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
+
+ if (version == Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2) {
+ if (real_val_size < Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE) {
+ // NO_LINT_DEBUG
+ sql_print_error( "RocksDB: DDL ENTRY V2 doesn't have timestamp");
+ delete tdef;
+ return true;
+ }
+ tdef->set_create_time(rdb_netbuf_read_uint64(&ptr));
+ real_val_size -= Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE;
+ }
+ else
+ tdef->set_create_time(0); // shown as SQL NULL.
+
+ // Now, read the DDLs.
+ if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
+ tdef->full_tablename().c_str());
+ return true;
+ }
+ tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
+ tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
+
+
ptr_end = ptr + real_val_size;
for (uint keyno = 0; ptr < ptr_end; keyno++) {
GL_INDEX_ID gl_index_id;
@@ -4471,6 +4509,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
rec->m_hidden_pk_val.load(std::memory_order_relaxed);
new_rec->m_tbl_stats = rec->m_tbl_stats;
+ new_rec->set_create_time(rec->get_create_time());
// so that it's not free'd when deleting the old rec
rec->m_key_descr_arr = nullptr;
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 416857cad38..9c2a96e77b7 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -203,6 +203,22 @@ enum {
UNPACK_FAILURE = 1,
};
+
+/*
+ Global user-visible data dictionary format version.
+ The server will read the data of any version, but will not write data
+ structures that were introduced after the version in
+ rocksdb_table_dictionary_format.
+ This way, one can keep the on-disk data backward-compatible.
+*/
+const uint ROCKSDB_DATADIC_FORMAT_INITIAL = 1;
+const uint ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP = 2;
+
+// Maximum possible value:
+const uint ROCKSDB_DATADIC_FORMAT_MAX = 2;
+const uint ROCKSDB_DATADIC_FORMAT_DEFAULT = ROCKSDB_DATADIC_FORMAT_MAX;
+
+
/*
An object of this class represents information about an index in an SQL
table. It provides services to encode and decode index tuples.
@@ -465,6 +481,7 @@ class Rdb_key_def {
CF_NUMBER_SIZE = 4,
CF_FLAG_SIZE = 4,
PACKED_SIZE = 4, // one int
+ TABLE_CREATE_TIMESTAMP_SIZE = 8,
};
// bit flags for combining bools when writing to disk
@@ -506,7 +523,10 @@ class Rdb_key_def {
// Data dictionary schema version. Introduce newer versions
// if changing schema layout
enum {
- DDL_ENTRY_INDEX_VERSION = 1,
+ DDL_ENTRY_INDEX_VERSION_1 = 1,
+ // this includes a 64-bit table_creation_time at the end.
+ // Allowed since ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP.
+ DDL_ENTRY_INDEX_VERSION_2 = 2,
CF_DEFINITION_VERSION = 1,
BINLOG_INFO_INDEX_NUMBER_VERSION = 1,
DDL_DROP_INDEX_ONGOING_VERSION = 1,
@@ -1116,6 +1136,12 @@ class Rdb_tbl_def {
~Rdb_tbl_def();
+ // time values are shown in SHOW TABLE STATUS
+ void set_create_time(time_t val) { create_time = val; }
+ time_t get_create_time() { return create_time; }
+
+ time_t update_time = 0; // in-memory only value, maintained right here
+
void check_and_set_read_free_rpl_table();
/* Number of indexes */
@@ -1161,6 +1187,9 @@ class Rdb_tbl_def {
const std::string &base_tablename() const { return m_tablename; }
const std::string &base_partition() const { return m_partition; }
GL_INDEX_ID get_autoincr_gl_index_id();
+
+ private:
+ time_t create_time = 0;
};
/*
@@ -1341,8 +1370,11 @@ class Rdb_binlog_manager {
1. Table Name => internal index id mappings
key: Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER(0x1) + dbname.tablename
- value: version, {cf_id, index_id}*n_indexes_of_the_table
+ value: DDL_ENTRY_INDEX_VERSION_1, {cf_id, index_id}*n_indexes_of_the_table
+ or value: DDL_ENTRY_INDEX_VERSION_2, create_timestamp, {cf_id, index_id}*
+ n_indexes_of_the_table
version is 2 bytes. cf_id and index_id are 4 bytes.
+ create_timestamp is 8 bytes.
2. internal cf_id, index id => index information
key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id
1
0

[Commits] 8dc011b9965: Support Create_time and Update_time in MyRocks table status
by psergey 06 Oct '19
by psergey 06 Oct '19
06 Oct '19
revision-id: 8dc011b99657ae3d25e44debdc4bbeaebbaecb52 (fb-prod201903-144-g8dc011b9965)
parent(s): d97c0c628e5dc60abd725f6a7120a8d87b09321e
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-06 23:47:41 +0300
message:
Support Create_time and Update_time in MyRocks table status
(variant #3, with @@rocksdb_table_dictionary_format)
- Create_time is stored in the MyRocks' internal data dictionary.
- Update_time is in-memory only (like in InnoDB).
@@rocksdb_table_dictionary_format is a global read-only variable (set it
from my.cnf) which controls the on-disk data format.
rocksdb_table_dictionary_format=1 means use the same data format is
before. Create_time will always be NULL for all tables that are created.
rocksdb_table_dictionary_format=2 (the default) means use the newer data
format. All newly-created tables will have proper Create_time attribute.
Downgrades are only possible if one hasn't run any DDL that re-creates a
table.
---
mysql-test/suite/rocksdb/include/bulk_load.inc | 4 +-
.../suite/rocksdb/include/bulk_load_unsorted.inc | 4 +-
mysql-test/suite/rocksdb/r/bulk_load.result | 12 +--
mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result | 12 +--
.../rocksdb/r/bulk_load_rev_cf_and_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_rev_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted_rev.result | 12 +--
mysql-test/suite/rocksdb/r/issue255.result | 16 ++--
mysql-test/suite/rocksdb/r/rocksdb.result | 7 +-
.../r/rocksdb_table_dictionary_format.result | 37 ++++++++++
.../suite/rocksdb/r/show_table_status.result | 85 +++++++++++++++++++++-
mysql-test/suite/rocksdb/r/truncate_table.result | 8 +-
mysql-test/suite/rocksdb/t/issue255.test | 17 +++--
mysql-test/suite/rocksdb/t/rocksdb.test | 4 +-
.../rocksdb/t/rocksdb_table_dictionary_format.test | 67 +++++++++++++++++
mysql-test/suite/rocksdb/t/show_table_status.test | 81 ++++++++++++++++++++-
mysql-test/suite/rocksdb/t/truncate_table.test | 8 +-
.../r/rocksdb_table_dictionary_format_basic.result | 14 ++++
.../t/rocksdb_table_dictionary_format_basic.test | 16 ++++
storage/rocksdb/ha_rocksdb.cc | 53 ++++++++++++++
storage/rocksdb/ha_rocksdb.h | 1 +
storage/rocksdb/rdb_datadic.cc | 57 ++++++++++++---
storage/rocksdb/rdb_datadic.h | 36 ++++++++-
24 files changed, 502 insertions(+), 85 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/bulk_load.inc b/mysql-test/suite/rocksdb/include/bulk_load.inc
index 1b79825e507..7e163602202 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load.inc
@@ -121,12 +121,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
index 5cdc76a32d4..812af0401aa 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
@@ -119,12 +119,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/r/bulk_load.result b/mysql-test/suite/rocksdb/r/bulk_load.result
index a36f99a7619..76db28e66bd 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
index b5d3e252c5d..ae363f7ec0c 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
index f46acd41080..dd8dd7e60a8 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
index 3389968ef37..96738ae62e2 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
index 924032549ac..87fc63af2da 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
index 3cc9fb8e459..8e0914f0159 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/issue255.result b/mysql-test/suite/rocksdb/r/issue255.result
index c1ce3be2276..b45b3b5afc7 100644
--- a/mysql-test/suite/rocksdb/r/issue255.result
+++ b/mysql-test/suite/rocksdb/r/issue255.result
@@ -2,7 +2,7 @@ CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ('538647864786478647864');
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -12,7 +12,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -21,7 +21,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -30,13 +30,13 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES (1000);
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -46,7 +46,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -55,7 +55,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -64,5 +64,5 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result
index 088eb050f6f..c76f31afaab 100644
--- a/mysql-test/suite/rocksdb/r/rocksdb.result
+++ b/mysql-test/suite/rocksdb/r/rocksdb.result
@@ -979,6 +979,7 @@ rocksdb_store_row_debug_checksums OFF
rocksdb_strict_collation_check OFF
rocksdb_strict_collation_exceptions
rocksdb_table_cache_numshardbits 6
+rocksdb_table_dictionary_format 2
rocksdb_table_stats_background_thread_nice_value 19
rocksdb_table_stats_max_num_rows_scanned 0
rocksdb_table_stats_recalc_threshold_count 100
@@ -1417,7 +1418,7 @@ create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
show table status like 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 # # NULL latin1_swedish_ci NULL
drop table t1;
#
# Fix Issue #4: Crash when using pseudo-unique keys
@@ -2612,7 +2613,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
-1
@@ -2623,7 +2624,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
1
diff --git a/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result b/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result
new file mode 100644
index 00000000000..8f2fa96a04a
--- /dev/null
+++ b/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result
@@ -0,0 +1,37 @@
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+2
+#
+# Server restarted
+#
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+1
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+# Create_time will be NULL as the table doesn't support it
+# Update_time will be non-null
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time is not null
+NULL 1
+#
+# Server restarted
+#
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+2
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time is not null
+NULL 0
+set global rocksdb_compact_cf='default';
+select concat('00', '00', '00', '01', hex('test.t1'));
+concat('00', '00', '00', '01', hex('test.t1'))
+00000001746573742E7431
+Found the datadic entry
+Value has format version 1, followed by 8 bytes describing one index
+Done
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/r/show_table_status.result b/mysql-test/suite/rocksdb/r/show_table_status.result
index 29140f045e4..345882040ef 100644
--- a/mysql-test/suite/rocksdb/r/show_table_status.result
+++ b/mysql-test/suite/rocksdb/r/show_table_status.result
@@ -7,12 +7,12 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL
+t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL utf8_general_ci NULL
SHOW TABLE STATUS WHERE name LIKE 't2';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
DROP TABLE t1, t2, t3;
CREATE DATABASE `db_new..............................................end`;
USE `db_new..............................................end`;
@@ -22,3 +22,80 @@ SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE T
TABLE_SCHEMA db_new..............................................end
TABLE_NAME t1_new..............................................end
DROP DATABASE `db_new..............................................end`;
+#
+# MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+#
+use test;
+create table t1 (a int) engine=rocksdb;
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time check_time
+1 NULL NULL
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select sleep(3);
+sleep(3)
+0
+insert into t1 values (2);
+select
+create_time=@create_tm /* should not change */ ,
+timestampdiff(second, @update_tm, update_time) > 2,
+check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+timestampdiff(second, @update_tm, update_time) > 2 1
+check_time NULL
+#
+# Check how create_time survives ALTER TABLE.
+# First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+create_time<>@create_tm /* should change */,
+create_time IS NOT NULL,
+update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time<>@create_tm 1
+create_time IS NOT NULL 1
+update_time IS NULL 1
+insert into t1 values (5,5);
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+# Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+select
+create_time=@create_tm /* should not change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+update_time NULL
+#
+# Check what is left after server restart
+#
+# Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select
+create_time=(select create_time from t2) /* should change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=(select create_time from t2) 1
+update_time NULL
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/r/truncate_table.result b/mysql-test/suite/rocksdb/r/truncate_table.result
index 1544256f194..79b266a2453 100644
--- a/mysql-test/suite/rocksdb/r/truncate_table.result
+++ b/mysql-test/suite/rocksdb/r/truncate_table.result
@@ -9,19 +9,19 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 4 # # NULL latin1_swedish_ci NULL
TRUNCATE TABLE t1;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('d');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 2 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 2 # # NULL latin1_swedish_ci NULL
SELECT a,c FROM t1;
a c
1 d
diff --git a/mysql-test/suite/rocksdb/t/issue255.test b/mysql-test/suite/rocksdb/t/issue255.test
index 370dece0c6c..686f45b4056 100644
--- a/mysql-test/suite/rocksdb/t/issue255.test
+++ b/mysql-test/suite/rocksdb/t/issue255.test
@@ -3,24 +3,25 @@
CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES ('538647864786478647864');
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SELECT * FROM t1;
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
@@ -28,24 +29,24 @@ DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES (1000);
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb.test b/mysql-test/suite/rocksdb/t/rocksdb.test
index 5eff0fbf38f..7dcae569c92 100644
--- a/mysql-test/suite/rocksdb/t/rocksdb.test
+++ b/mysql-test/suite/rocksdb/t/rocksdb.test
@@ -1198,7 +1198,7 @@ drop table t1;
create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
---replace_column 7 #
+--replace_column 7 # 12 # 13 #
show table status like 't1';
drop table t1;
@@ -1903,11 +1903,13 @@ DROP TABLE t1;
# value is 4 while MyRocks will show it as 3.
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test b/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test
new file mode 100644
index 00000000000..3dedf6502ab
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test
@@ -0,0 +1,67 @@
+--source include/have_rocksdb.inc
+
+select @@rocksdb_table_dictionary_format;
+
+#
+# Check the upgrade from Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 to _2
+#
+
+--let $_mysqld_option=--rocksdb_table_dictionary_format=1
+--source include/restart_mysqld_with_option.inc
+
+--echo #
+--echo # Server restarted
+--echo #
+select @@rocksdb_table_dictionary_format;
+
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+
+--echo # Create_time will be NULL as the table doesn't support it
+--echo # Update_time will be non-null
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--let $_mysqld_option=--rocksdb_table_dictionary_format=2
+--source include/restart_mysqld_with_option.inc
+--echo #
+--echo # Server restarted
+--echo #
+select @@rocksdb_table_dictionary_format;
+
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+set global rocksdb_compact_cf='default';
+# use: $MYSQLTEST_VARDIR $MYSQL_SST_DUMP
+let MYSQL_DATADIR = `select @@datadir`;
+let MYSQL_SST_DUMP = $MYSQL_SST_DUMP;
+
+select concat('00', '00', '00', '01', hex('test.t1'));
+
+# We are looking to find a record like this:
+# '00000001746573742E7431' seq:0, type:1 => 00010000000000000104
+# that is:
+# '00000001:746573742E7431' seq:0, type:1 => 0001: 0000000000000104
+# the key is: DDL_ENTRY_INDEX_START_NUMBER, "test.t1"
+# the value is: DDL_ENTRY_INDEX_VERSION_1, {cf_id, index_id}
+perl;
+ my $datadir = $ENV{'MYSQL_DATADIR'};
+ my $sst_dump = $ENV{'MYSQL_SST_DUMP'};
+ open(IN, "$sst_dump --command=scan --output_hex --file=$datadir/.rocksdb |");
+ while(<IN>) {
+ if ($_ =~ /^\'00000001746573742E7431\'/) {
+ print "Found the datadic entry\n";
+ if ($_ =~ / 0001[0-9]{16}$/) {
+ print "Value has format version 1, followed by 8 bytes describing one index\n"
+ } else {
+ print "Unexpected value. $_\n";
+ }
+ }
+ }
+ print "Done\n";
+ close(IN);
+EOF
+drop table t1;
+
diff --git a/mysql-test/suite/rocksdb/t/show_table_status.test b/mysql-test/suite/rocksdb/t/show_table_status.test
index 29cc2ccfb5e..59effcc788c 100644
--- a/mysql-test/suite/rocksdb/t/show_table_status.test
+++ b/mysql-test/suite/rocksdb/t/show_table_status.test
@@ -24,7 +24,7 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
# Some statistics don't get updated as quickly. The Data_length and
@@ -48,7 +48,7 @@ set global rocksdb_force_flush_memtable_now = true;
# We expect the number of rows to be 10000. Data_len and Avg_row_len
# may vary, depending on built-in compression library.
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't2';
DROP TABLE t1, t2, t3;
@@ -62,3 +62,80 @@ CREATE TABLE `t1_new..............................................end`(a int) en
INSERT INTO `t1_new..............................................end` VALUES (1);
--query_vertical SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE TABLE_NAME = 't1_new..............................................end'
DROP DATABASE `db_new..............................................end`;
+--echo #
+--echo # MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+--echo #
+use test;
+create table t1 (a int) engine=rocksdb;
+
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+select sleep(3);
+insert into t1 values (2);
+
+--vertical_results
+select
+ create_time=@create_tm /* should not change */ ,
+ timestampdiff(second, @update_tm, update_time) > 2,
+ check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check how create_time survives ALTER TABLE.
+--echo # First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+ create_time<>@create_tm /* should change */,
+ create_time IS NOT NULL,
+ update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+insert into t1 values (5,5);
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo # Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+
+select
+ create_time=@create_tm /* should not change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check what is left after server restart
+--echo #
+
+--echo # Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--source include/restart_mysqld.inc
+
+select
+ create_time=(select create_time from t2) /* should change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/t/truncate_table.test b/mysql-test/suite/rocksdb/t/truncate_table.test
index a61488654a3..1001eeb6cde 100644
--- a/mysql-test/suite/rocksdb/t/truncate_table.test
+++ b/mysql-test/suite/rocksdb/t/truncate_table.test
@@ -29,22 +29,22 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
TRUNCATE TABLE t1;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('d');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--sorted_result
diff --git a/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result
new file mode 100644
index 00000000000..825d7a7113a
--- /dev/null
+++ b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT;
+SELECT @start_global_value;
+@start_global_value
+2
+"Trying to set variable @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT = 444;
+ERROR HY000: Variable 'rocksdb_table_dictionary_format' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test
new file mode 100644
index 00000000000..2b8c6e81bf7
--- /dev/null
+++ b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_TABLE_DICTIONARY_FORMAT
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index d6c89087bea..a0a344187cf 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -122,6 +122,8 @@ const std::string PER_INDEX_CF_NAME("$per_index_cf");
static std::vector<std::string> rdb_tables_to_recalc;
+uint rocksdb_table_dictionary_format;
+
class Rdb_explicit_snapshot : public explicit_snapshot {
public:
static std::shared_ptr<Rdb_explicit_snapshot> create(
@@ -2136,6 +2138,17 @@ static MYSQL_SYSVAR_ULONGLONG(
"MultiGet",
nullptr, nullptr, SIZE_T_MAX, /* min */ 0, /* max */ SIZE_T_MAX, 0);
+static MYSQL_SYSVAR_UINT(table_dictionary_format,
+ rocksdb_table_dictionary_format,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Max Table Dictionary format version that the server "
+ "may create (use lower values for backward "
+ " compatibility, higher values for new features)",
+ nullptr, nullptr,
+ ROCKSDB_DATADIC_FORMAT_DEFAULT,
+ ROCKSDB_DATADIC_FORMAT_INITIAL,
+ ROCKSDB_DATADIC_FORMAT_MAX, 0);
+
static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
static struct st_mysql_sys_var *rocksdb_system_variables[] = {
@@ -2309,6 +2322,8 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(select_bypass_log_rejected),
MYSQL_SYSVAR(select_bypass_debug_row_delay),
MYSQL_SYSVAR(select_bypass_multiget_min),
+
+ MYSQL_SYSVAR(table_dictionary_format),
nullptr};
static rocksdb::WriteOptions rdb_get_rocksdb_write_options(
@@ -2456,6 +2471,8 @@ class Rdb_transaction {
bool m_is_delayed_snapshot = false;
bool m_is_two_phase = false;
+ std::unordered_set<Rdb_tbl_def*> modified_tables;
+
private:
/*
Number of write operations this transaction had when we took the last
@@ -3139,6 +3156,25 @@ class Rdb_transaction {
virtual void start_tx() = 0;
virtual void start_stmt() = 0;
+ protected:
+ // Non-virtual functions with actions to be done on transaction start and
+ // commit.
+ void on_commit() {
+ time_t tm;
+ tm = time(nullptr);
+ for (auto &it : modified_tables) {
+ it->update_time = tm;
+ }
+ modified_tables.clear();
+ }
+ void on_rollback() {
+ modified_tables.clear();
+ }
+ public:
+ void log_table_write_op(Rdb_tbl_def *tbl) {
+ modified_tables.insert(tbl);
+ }
+
void set_initial_savepoint() {
/*
Set the initial savepoint. If the first statement in the transaction
@@ -3322,7 +3358,9 @@ class Rdb_transaction_impl : public Rdb_transaction {
goto error;
}
+ on_commit();
error:
+ on_rollback();
/* Save the transaction object to be reused */
release_tx();
@@ -3338,6 +3376,7 @@ class Rdb_transaction_impl : public Rdb_transaction {
public:
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -3659,7 +3698,9 @@ class Rdb_writebatch_impl : public Rdb_transaction {
res = true;
goto error;
}
+ on_commit();
error:
+ on_rollback();
reset();
m_write_count = 0;
@@ -3691,6 +3732,7 @@ class Rdb_writebatch_impl : public Rdb_transaction {
}
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -7549,6 +7591,7 @@ int ha_rocksdb::create_table(const std::string &table_name,
/* Create table/key descriptions and put them into the data dictionary */
m_tbl_def = new Rdb_tbl_def(table_name);
+ m_tbl_def->set_create_time(time(nullptr));
uint n_keys = table_arg->s->keys;
@@ -10177,6 +10220,8 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
row_info.tx->incr_insert_count();
}
+ row_info.tx->log_table_write_op(m_tbl_def);
+
if (do_bulk_commit(row_info.tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
}
@@ -10648,6 +10693,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
}
tx->incr_delete_count();
+ tx->log_table_write_op(m_tbl_def);
if (do_bulk_commit(tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
@@ -10802,6 +10848,12 @@ int ha_rocksdb::info(uint flag) {
k->rec_per_key[j] = x;
}
}
+
+ stats.create_time = m_tbl_def->get_create_time();
+ }
+
+ if (flag & HA_STATUS_TIME) {
+ stats.update_time = m_tbl_def->update_time;
}
if (flag & HA_STATUS_ERRKEY) {
@@ -12603,6 +12655,7 @@ bool ha_rocksdb::prepare_inplace_alter_table(
m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
new_tdef->m_hidden_pk_val =
m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
+ new_tdef->set_create_time(m_tbl_def->get_create_time());
if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
/* Delete the new key descriptors */
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 9a250af40c7..c36c5c98e19 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -82,6 +82,7 @@ extern PSI_rwlock_key key_rwlock_read_free_rpl_tables;
#endif
extern Regex_list_handler rdb_read_free_regex_handler;
+extern uint rocksdb_table_dictionary_format;
/**
@brief
Rdb_table_handler is a reference-counted structure storing information for
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index c0741a1ce9b..234f7a789ff 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -3514,8 +3514,21 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
const rocksdb::Slice &key) {
StringBuffer<8 * Rdb_key_def::PACKED_SIZE> indexes;
indexes.alloc(Rdb_key_def::VERSION_SIZE +
+ Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE +
m_key_count * Rdb_key_def::PACKED_SIZE * 2);
- rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION);
+
+ if (rocksdb_table_dictionary_format <
+ ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP) {
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1);
+ // We are using old data format, which means we cannot save Create_time
+ // Set it to be shown as unknown right away, so that the behavior before
+ // server restart and after is the same.
+ set_create_time(0);
+ }
+ else {
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2);
+ rdb_netstr_append_uint64(&indexes, create_time);
+ }
for (uint i = 0; i < m_key_count; i++) {
const Rdb_key_def &kd = *m_key_descr_arr[i];
@@ -4015,27 +4028,52 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
Rdb_tbl_def *const tdef =
new Rdb_tbl_def(key, Rdb_key_def::INDEX_NUMBER_SIZE);
- // Now, read the DDLs.
- const int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
- if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ if (val.size() < Rdb_key_def::VERSION_SIZE) {
// NO_LINT_DEBUG
sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
tdef->full_tablename().c_str());
return true;
}
- tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
- tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
ptr = reinterpret_cast<const uchar *>(val.data());
const int version = rdb_netbuf_read_uint16(&ptr);
- if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION) {
+
+ if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 &&
+ version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2) {
// NO_LINT_DEBUG
sql_print_error(
"RocksDB: DDL ENTRY Version was not expected."
- "Expected: %d, Actual: %d",
- Rdb_key_def::DDL_ENTRY_INDEX_VERSION, version);
+ "Expected: %d..%d, Actual: %d",
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1,
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2, version);
return true;
}
+ int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
+
+ if (version == Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2) {
+ if (real_val_size < Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE) {
+ // NO_LINT_DEBUG
+ sql_print_error( "RocksDB: DDL ENTRY V2 doesn't have timestamp");
+ delete tdef;
+ return true;
+ }
+ tdef->set_create_time(rdb_netbuf_read_uint64(&ptr));
+ real_val_size -= Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE;
+ }
+ else
+ tdef->set_create_time(0); // shown as SQL NULL.
+
+ // Now, read the DDLs.
+ if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
+ tdef->full_tablename().c_str());
+ return true;
+ }
+ tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
+ tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
+
+
ptr_end = ptr + real_val_size;
for (uint keyno = 0; ptr < ptr_end; keyno++) {
GL_INDEX_ID gl_index_id;
@@ -4471,6 +4509,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
rec->m_hidden_pk_val.load(std::memory_order_relaxed);
new_rec->m_tbl_stats = rec->m_tbl_stats;
+ new_rec->set_create_time(rec->get_create_time());
// so that it's not free'd when deleting the old rec
rec->m_key_descr_arr = nullptr;
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 416857cad38..9c2a96e77b7 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -203,6 +203,22 @@ enum {
UNPACK_FAILURE = 1,
};
+
+/*
+ Global user-visible data dictionary format version.
+ The server will read the data of any version, but will not write data
+ structures that were introduced after the version in
+ rocksdb_table_dictionary_format.
+ This way, one can keep the on-disk data backward-compatible.
+*/
+const uint ROCKSDB_DATADIC_FORMAT_INITIAL = 1;
+const uint ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP = 2;
+
+// Maximum possible value:
+const uint ROCKSDB_DATADIC_FORMAT_MAX = 2;
+const uint ROCKSDB_DATADIC_FORMAT_DEFAULT = ROCKSDB_DATADIC_FORMAT_MAX;
+
+
/*
An object of this class represents information about an index in an SQL
table. It provides services to encode and decode index tuples.
@@ -465,6 +481,7 @@ class Rdb_key_def {
CF_NUMBER_SIZE = 4,
CF_FLAG_SIZE = 4,
PACKED_SIZE = 4, // one int
+ TABLE_CREATE_TIMESTAMP_SIZE = 8,
};
// bit flags for combining bools when writing to disk
@@ -506,7 +523,10 @@ class Rdb_key_def {
// Data dictionary schema version. Introduce newer versions
// if changing schema layout
enum {
- DDL_ENTRY_INDEX_VERSION = 1,
+ DDL_ENTRY_INDEX_VERSION_1 = 1,
+ // this includes a 64-bit table_creation_time at the end.
+ // Allowed since ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP.
+ DDL_ENTRY_INDEX_VERSION_2 = 2,
CF_DEFINITION_VERSION = 1,
BINLOG_INFO_INDEX_NUMBER_VERSION = 1,
DDL_DROP_INDEX_ONGOING_VERSION = 1,
@@ -1116,6 +1136,12 @@ class Rdb_tbl_def {
~Rdb_tbl_def();
+ // time values are shown in SHOW TABLE STATUS
+ void set_create_time(time_t val) { create_time = val; }
+ time_t get_create_time() { return create_time; }
+
+ time_t update_time = 0; // in-memory only value, maintained right here
+
void check_and_set_read_free_rpl_table();
/* Number of indexes */
@@ -1161,6 +1187,9 @@ class Rdb_tbl_def {
const std::string &base_tablename() const { return m_tablename; }
const std::string &base_partition() const { return m_partition; }
GL_INDEX_ID get_autoincr_gl_index_id();
+
+ private:
+ time_t create_time = 0;
};
/*
@@ -1341,8 +1370,11 @@ class Rdb_binlog_manager {
1. Table Name => internal index id mappings
key: Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER(0x1) + dbname.tablename
- value: version, {cf_id, index_id}*n_indexes_of_the_table
+ value: DDL_ENTRY_INDEX_VERSION_1, {cf_id, index_id}*n_indexes_of_the_table
+ or value: DDL_ENTRY_INDEX_VERSION_2, create_timestamp, {cf_id, index_id}*
+ n_indexes_of_the_table
version is 2 bytes. cf_id and index_id are 4 bytes.
+ create_timestamp is 8 bytes.
2. internal cf_id, index id => index information
key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id
1
0

[Commits] 38c89a95752: MDEV-20740: Odd computations in calculate_cond_selectivity_for_table
by Sergei Petrunia 04 Oct '19
by Sergei Petrunia 04 Oct '19
04 Oct '19
revision-id: 38c89a95752ee7a59f9366af0cefacf3a71a6fd0 (mariadb-10.4.7-102-g38c89a95752)
parent(s): eb0a10b07222b01d40dee24e61aac9502256481f
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-04 20:16:39 +0300
message:
MDEV-20740: Odd computations in calculate_cond_selectivity_for_table
Make SEL_ARG graph traversal code in sel_arg_range_seq_next() set
max_key_parts first.
(Pushing to 10.4 first)
---
mysql-test/main/index_intersect_innodb.result | 8 ++++----
sql/opt_range.cc | 16 ++++++++++------
sql/opt_range_mrr.cc | 5 ++++-
3 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/mysql-test/main/index_intersect_innodb.result b/mysql-test/main/index_intersect_innodb.result
index 854bcd75e5c..fd07f6b41c6 100644
--- a/mysql-test/main/index_intersect_innodb.result
+++ b/mysql-test/main/index_intersect_innodb.result
@@ -470,17 +470,17 @@ EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 501 AND 1000 AND Population > 700000 AND Country LIKE 'C%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Country,Population 4,7,4 NULL # Using sort_intersect(PRIMARY,Country,Population); Using where
+1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Population,Country 4,4,7 NULL # Using sort_intersect(PRIMARY,Population,Country); Using where
EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 1 AND 500 AND Population > 700000 AND Country LIKE 'C%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Country,Population 4,7,4 NULL # Using sort_intersect(PRIMARY,Country,Population); Using where
+1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Population,Country 4,4,7 NULL # Using sort_intersect(PRIMARY,Population,Country); Using where
EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 2001 AND 2500 AND Population > 300000 AND Country LIKE 'H%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Country 4,7 NULL # Using sort_intersect(PRIMARY,Country); Using where
+1 SIMPLE City range PRIMARY,Population,Country Country 7 NULL # Using index condition; Using where
EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 3701 AND 4000 AND Population > 1000000
@@ -724,7 +724,7 @@ EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 1 AND 500 AND Population > 700000 AND Country LIKE 'C%';
id select_type table type possible_keys key key_len ref rows Extra
-1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Country,Population 4,7,4 NULL # Using sort_intersect(PRIMARY,Country,Population); Using where
+1 SIMPLE City index_merge PRIMARY,Population,Country PRIMARY,Population,Country 4,4,7 NULL # Using sort_intersect(PRIMARY,Population,Country); Using where
EXPLAIN
SELECT * FROM City
WHERE ID BETWEEN 3001 AND 4000 AND Population > 600000
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 654fab7810a..c47da283e9c 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -315,7 +315,7 @@ class PARAM : public RANGE_OPT_PARAM
*/
key_map possible_keys;
longlong baseflag;
- uint max_key_part, range_count;
+ uint max_key_parts, range_count;
bool quick; // Don't calulate possible keys
@@ -7387,7 +7387,7 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
index_scan->idx= idx;
index_scan->keynr= keynr;
index_scan->key_info= ¶m->table->key_info[keynr];
- index_scan->used_key_parts= param->max_key_part+1;
+ index_scan->used_key_parts= param->max_key_parts;
index_scan->range_count= param->range_count;
index_scan->records= found_records;
index_scan->sel_arg= key;
@@ -11042,7 +11042,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
seq.start= tree;
param->range_count=0;
- param->max_key_part=0;
+ param->max_key_parts=0;
seq.is_ror_scan= TRUE;
if (file->index_flags(keynr, 0, TRUE) & HA_KEY_SCAN_NOT_ROR)
@@ -11055,9 +11055,13 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
*mrr_flags|= HA_MRR_NO_ASSOCIATION | HA_MRR_SORTED;
bool pk_is_clustered= file->primary_key_is_clustered();
+ // TODO: param->max_key_parts holds 0 now, and not the #keyparts used.
+ // Passing wrong second argument to index_flags() makes no difference for
+ // most storage engines but might be an issue for MyRocks with certain
+ // datatypes.
if (index_only &&
- (file->index_flags(keynr, param->max_key_part, 1) & HA_KEYREAD_ONLY) &&
- !(file->index_flags(keynr, param->max_key_part, 1) & HA_CLUSTERED_INDEX))
+ (file->index_flags(keynr, param->max_key_parts, 1) & HA_KEYREAD_ONLY) &&
+ !(file->index_flags(keynr, param->max_key_parts, 1) & HA_CLUSTERED_INDEX))
*mrr_flags |= HA_MRR_INDEX_ONLY;
if (param->thd->lex->sql_command != SQLCOM_SELECT)
@@ -11088,7 +11092,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
if (update_tbl_stats)
{
param->table->quick_keys.set_bit(keynr);
- param->table->quick_key_parts[keynr]= param->max_key_part+1;
+ param->table->quick_key_parts[keynr]= param->max_key_parts;
param->table->quick_n_ranges[keynr]= param->range_count;
param->table->quick_condition_rows=
MY_MIN(param->table->quick_condition_rows, rows);
diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc
index d3a1e155fb7..4afa06a7ca0 100644
--- a/sql/opt_range_mrr.cc
+++ b/sql/opt_range_mrr.cc
@@ -247,6 +247,7 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
uint min_key_length= (uint)(cur->min_key - seq->param->min_key);
range->ptr= (char*)(intptr)(key_tree->part);
+ uint max_key_parts;
if (cur->min_key_flag & GEOM_FLAG)
{
range->range_flag= cur->min_key_flag;
@@ -256,9 +257,11 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
range->start_key.length= min_key_length;
range->start_key.keypart_map= make_prev_keypart_map(cur->min_key_parts);
range->start_key.flag= (ha_rkey_function) (cur->min_key_flag ^ GEOM_FLAG);
+ max_key_parts= cur->min_key_parts;
}
else
{
+ max_key_parts= MY_MAX(cur->min_key_parts, cur->max_key_parts);
range->range_flag= cur->min_key_flag | cur->max_key_flag;
range->start_key.key= seq->param->min_key;
@@ -336,7 +339,7 @@ bool sel_arg_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range)
}
}
seq->param->range_count++;
- seq->param->max_key_part=MY_MAX(seq->param->max_key_part,key_tree->part);
+ seq->param->max_key_parts= MY_MAX(seq->param->max_key_parts, max_key_parts);
return 0;
}
1
0

[Commits] d34839cacbe: MDEV-18046: Assortment of crashes, assertion failures and ASAN errors in mysql_show_binlog_events
by sujatha 04 Oct '19
by sujatha 04 Oct '19
04 Oct '19
revision-id: d34839cacbef0f4c76b7aa7ae6165b37b293685c (mariadb-10.1.41-51-gd34839cacbe)
parent(s): 4bcf524482b3a4260347b0fe912fa2660af97c43
author: Sujatha
committer: Sujatha
timestamp: 2019-10-04 13:45:33 +0530
message:
MDEV-18046: Assortment of crashes, assertion failures and ASAN errors in mysql_show_binlog_events
Problem:
========
SHOW BINLOG EVENTS FROM <pos> causes a variety of failures, some of which are
listed below. It is not a race condition issue, but there is some
non-determinism in it.
Fix:
====
Assert 1: With ASAN
========
Rows_log_event::Rows_log_event(const char*, uint,
const Format_description_log_event*):
Assertion `var_header_len >= 2'
Implemented upstream patch.
commit a3a497ccf7ecacc900551fb1e47ea4078b45c351
Fix: Added checks to avoid reading out of buffer limits.
Assert 2:
=========
Table_map_log_event::Table_map_log_event(const char*, uint,
const Format_description_log_event*)
Assertion `m_field_metadata_size <= (m_colcnt * 2)' failed.
Fix: Converted debug assert to error handler code.
ASAN Error: head-buffer-overflow within "my_strndup" in Rotate_log_event
===========
my_strndup /home/sujatha/bug_repo/tmp-10.1/mysys/my_malloc.c:254
Rotate_log_event::Rotate_log_event(char const*, unsigned int,
Format_description_log_event const*)
Fix: Improved the check for event_len validation.
ASAN Error: AddressSanitizer: SEGV on unknown address
==========
in inline_mysql_mutex_destroy
in my_bitmap_free
in Update_rows_log_event::~Update_rows_log_event()
Fix: Intialize m_cols_ai.bitmap to NULL.
ASAN Error: AddressSanitizer: heap-buffer-overflow on address 0x60400002acb8
==========
Load_log_event::copy_log_event(char const*, unsigned long, int,
Format_description_log_event const*)
Fix: Moved the event_len validation to the begin of copy_log_event function.
ASAN Error: AddressSanitizer: SEGV on unknown address
==========
The signal is caused by a READ memory access.
User_var_log_event::User_var_log_event(char const*, unsigned int,
Format_description_log_event const*)
Implemented upstream patch.
commit a3a497ccf7ecacc900551fb1e47ea4078b45c351
User_var_log_event::User_var_log_event: added checks to avoid reading out of
buffer limits.
ASAN Error: AddressSanitizer: heap-buffer-overflow on address
==========
String::append(char const*, unsigned int)
Query_log_event::pack_info(Protocol*)
Fix: Added check to ensure that query_len is within event_length
---
.../binlog/r/binlog_invalid_read_in_rotate.result | 17 +++++
.../r/binlog_show_binlog_event_random_pos.result | 13 ++++
.../binlog/t/binlog_invalid_read_in_rotate.test | 47 ++++++++++++++
.../t/binlog_show_binlog_event_random_pos.test | 37 +++++++++++
sql/log_event.cc | 75 ++++++++++++++++------
5 files changed, 168 insertions(+), 21 deletions(-)
diff --git a/mysql-test/suite/binlog/r/binlog_invalid_read_in_rotate.result b/mysql-test/suite/binlog/r/binlog_invalid_read_in_rotate.result
new file mode 100644
index 00000000000..28131d2645c
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_invalid_read_in_rotate.result
@@ -0,0 +1,17 @@
+RESET MASTER;
+call mtr.add_suppression("Error in Log_event::read_log_event*");
+DROP TABLE /*! IF EXISTS*/ t1;
+Warnings:
+Note 1051 Unknown table 'test.t1'
+CREATE TABLE `t1` (
+`col_int` int,
+pk integer auto_increment,
+`col_char_12_key` char(12),
+`col_int_key` int,
+`col_char_12` char(12),
+primary key (pk),
+key (`col_char_12_key` ),
+key (`col_int_key` )) ENGINE=InnoDB;
+INSERT /*! IGNORE */ INTO t1 VALUES (183173120, NULL, 'abcd', 1, 'efghijk'), (1, NULL, 'lmno', 2, 'p');
+ALTER TABLE `t1` ENABLE KEYS;
+DROP TABLE t1;
diff --git a/mysql-test/suite/binlog/r/binlog_show_binlog_event_random_pos.result b/mysql-test/suite/binlog/r/binlog_show_binlog_event_random_pos.result
new file mode 100644
index 00000000000..98385a7ac5e
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_show_binlog_event_random_pos.result
@@ -0,0 +1,13 @@
+RESET MASTER;
+call mtr.add_suppression("Error in Log_event::read_log_event*");
+DROP TABLE IF EXISTS t1;
+Warnings:
+Note 1051 Unknown table 'test.t1'
+CREATE TABLE t1 (c1 CHAR(255), c2 CHAR(255), c3 CHAR(255), c4 CHAR(255), c5 CHAR(255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+UPDATE t1 SET c1=repeat('b',255);
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+DROP TABLE t1;
diff --git a/mysql-test/suite/binlog/t/binlog_invalid_read_in_rotate.test b/mysql-test/suite/binlog/t/binlog_invalid_read_in_rotate.test
new file mode 100644
index 00000000000..8360f2dcac4
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_invalid_read_in_rotate.test
@@ -0,0 +1,47 @@
+# ==== Purpose ====
+#
+# Test verifies that reading binary log by using "SHOW BINLOG EVENTS" command
+# from various random positions doesn't lead to crash. It should exit
+# gracefully with appropriate error.
+#
+# ==== References ====
+#
+# MDEV-18046:Assortment of crashes, assertion failures and ASAN errors in mysql_show_binlog_events
+
+--source include/have_log_bin.inc
+--source include/have_innodb.inc
+
+RESET MASTER;
+
+call mtr.add_suppression("Error in Log_event::read_log_event*");
+
+DROP TABLE /*! IF EXISTS*/ t1;
+CREATE TABLE `t1` (
+`col_int` int,
+pk integer auto_increment,
+`col_char_12_key` char(12),
+`col_int_key` int,
+`col_char_12` char(12),
+primary key (pk),
+key (`col_char_12_key` ),
+key (`col_int_key` )) ENGINE=InnoDB;
+
+INSERT /*! IGNORE */ INTO t1 VALUES (183173120, NULL, 'abcd', 1, 'efghijk'), (1, NULL, 'lmno', 2, 'p');
+
+--disable_warnings
+ALTER TABLE `t1` ENABLE KEYS;
+--enable_warnings
+
+--let $max_pos= query_get_value(SHOW MASTER STATUS,Position,1)
+--let $pos= 1
+while ($pos <= $max_pos)
+{
+ --disable_query_log
+ --disable_result_log
+ --error 0,1220
+ eval SHOW BINLOG EVENTS FROM $pos;
+ --inc $pos
+ --enable_result_log
+ --enable_query_log
+}
+DROP TABLE t1;
diff --git a/mysql-test/suite/binlog/t/binlog_show_binlog_event_random_pos.test b/mysql-test/suite/binlog/t/binlog_show_binlog_event_random_pos.test
new file mode 100644
index 00000000000..aaf56faa57e
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_show_binlog_event_random_pos.test
@@ -0,0 +1,37 @@
+# ==== Purpose ====
+#
+# Test verifies that reading binary log by using "SHOW BINLOG EVENTS" command
+# from various random positions doesn't lead to crash. It should exit
+# gracefully with appropriate error.
+#
+# ==== References ====
+#
+# MDEV-18046:Assortment of crashes, assertion failures and ASAN errors in mysql_show_binlog_events
+
+--source include/have_log_bin.inc
+--source include/have_innodb.inc
+
+RESET MASTER;
+call mtr.add_suppression("Error in Log_event::read_log_event*");
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (c1 CHAR(255), c2 CHAR(255), c3 CHAR(255), c4 CHAR(255), c5 CHAR(255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+UPDATE t1 SET c1=repeat('b',255);
+INSERT INTO t1 VALUES (repeat('a', 255), repeat('a', 255),repeat('a', 255),repeat('a', 255),repeat('a', 255));
+--let $max_pos= query_get_value(SHOW MASTER STATUS,Position,1)
+--let $pos= 1
+while ($pos <= $max_pos)
+{
+ --disable_query_log
+ --disable_result_log
+ --error 0,1220
+ eval SHOW BINLOG EVENTS FROM $pos LIMIT 100;
+ --inc $pos
+ --enable_result_log
+ --enable_query_log
+}
+
+DROP TABLE t1;
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 65f29441e1a..293bfa6680e 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -3502,7 +3502,6 @@ code_name(int code)
#define CHECK_SPACE(PTR,END,CNT) \
do { \
DBUG_PRINT("info", ("Read %s", code_name(pos[-1]))); \
- DBUG_ASSERT((PTR) + (CNT) <= (END)); \
if ((PTR) + (CNT) > (END)) { \
DBUG_PRINT("info", ("query= 0")); \
query= 0; \
@@ -3815,7 +3814,7 @@ Query_log_event::Query_log_event(const char* buf, uint event_len,
uint32 max_length= uint32(event_len - ((const char*)(end + db_len + 1) -
(buf - common_header_len)));
- if (q_len != max_length)
+ if (q_len != max_length || q_len > event_len)
{
q_len= 0;
query= NULL;
@@ -5889,6 +5888,9 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
{
DBUG_ENTER("Load_log_event::copy_log_event");
uint data_len;
+
+ if ((int) event_len < body_offset)
+ DBUG_RETURN(1);
char* buf_end = (char*)buf + event_len;
/* this is the beginning of the post-header */
const char* data_head = buf + description_event->common_header_len;
@@ -5898,18 +5900,17 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
table_name_len = (uint)data_head[L_TBL_LEN_OFFSET];
db_len = (uint)data_head[L_DB_LEN_OFFSET];
num_fields = uint4korr(data_head + L_NUM_FIELDS_OFFSET);
-
- if ((int) event_len < body_offset)
- DBUG_RETURN(1);
+
/*
Sql_ex.init() on success returns the pointer to the first byte after
the sql_ex structure, which is the start of field lengths array.
*/
+
if (!(field_lens= (uchar*)sql_ex.init((char*)buf + body_offset,
buf_end,
(uchar)buf[EVENT_TYPE_OFFSET] != LOAD_EVENT)))
DBUG_RETURN(1);
-
+
data_len = event_len - body_offset;
if (num_fields > data_len) // simple sanity check against corruption
DBUG_RETURN(1);
@@ -6449,7 +6450,7 @@ Rotate_log_event::Rotate_log_event(const char* buf, uint event_len,
// The caller will ensure that event_len is what we have at EVENT_LEN_OFFSET
uint8 post_header_len= description_event->post_header_len[ROTATE_EVENT-1];
uint ident_offset;
- if (event_len < LOG_EVENT_MINIMAL_HEADER_LEN)
+ if (event_len < (uint)(LOG_EVENT_MINIMAL_HEADER_LEN + post_header_len))
DBUG_VOID_RETURN;
buf+= LOG_EVENT_MINIMAL_HEADER_LEN;
pos= post_header_len ? uint8korr(buf + R_POS_OFFSET) : 4;
@@ -7955,6 +7956,12 @@ User_var_log_event(const char* buf, uint event_len,
we keep the flags set to UNDEF_F.
*/
uint bytes_read= ((val + val_len) - buf_start);
+ if (bytes_read > event_len)
+ {
+ error= true;
+ goto err;
+ }
+
if ((data_written - bytes_read) > 0)
{
flags= (uint) *(buf + UV_VAL_IS_NULL + UV_VAL_TYPE_SIZE +
@@ -9513,7 +9520,8 @@ Rows_log_event::Rows_log_event(const char *buf, uint event_len,
uint8 const common_header_len= description_event->common_header_len;
Log_event_type event_type= (Log_event_type) buf[EVENT_TYPE_OFFSET];
m_type= event_type;
-
+ m_cols_ai.bitmap= 0;
+
uint8 const post_header_len= description_event->post_header_len[event_type-1];
DBUG_PRINT("enter",("event_len: %u common_header_len: %d "
@@ -9546,7 +9554,16 @@ Rows_log_event::Rows_log_event(const char *buf, uint event_len,
which includes length bytes
*/
var_header_len= uint2korr(post_start);
- assert(var_header_len >= 2);
+
+ /* Check length and also avoid out of buffer read */
+ if (var_header_len < 2 ||
+ event_len < static_cast<unsigned int>(var_header_len +
+ (post_start - buf)))
+ {
+ m_cols.bitmap= 0;
+ DBUG_VOID_RETURN;
+ }
+
var_header_len-= 2;
/* Iterate over var-len header, extracting 'chunks' */
@@ -11047,6 +11064,10 @@ Table_map_log_event::Table_map_log_event(const char *buf, uint event_len,
/* Length of table name + counter + terminating null */
uchar const *const ptr_colcnt= ptr_tbllen + m_tbllen + 2;
uchar *ptr_after_colcnt= (uchar*) ptr_colcnt;
+ bytes_read= (unsigned int) (ptr_after_colcnt - (unsigned char *)buf);
+ /* Avoid reading out of buffer */
+ if (event_len <= bytes_read)
+ DBUG_VOID_RETURN;
m_colcnt= net_field_length(&ptr_after_colcnt);
DBUG_PRINT("info",("m_dblen: %lu off: %ld m_tbllen: %lu off: %ld m_colcnt: %lu off: %ld",
@@ -11074,15 +11095,24 @@ Table_map_log_event::Table_map_log_event(const char *buf, uint event_len,
if (bytes_read < event_len)
{
m_field_metadata_size= net_field_length(&ptr_after_colcnt);
- DBUG_ASSERT(m_field_metadata_size <= (m_colcnt * 2));
- uint num_null_bytes= (m_colcnt + 7) / 8;
- m_meta_memory= (uchar *)my_multi_malloc(MYF(MY_WME),
- &m_null_bits, num_null_bytes,
- &m_field_metadata, m_field_metadata_size,
- NULL);
- memcpy(m_field_metadata, ptr_after_colcnt, m_field_metadata_size);
- ptr_after_colcnt= (uchar*)ptr_after_colcnt + m_field_metadata_size;
- memcpy(m_null_bits, ptr_after_colcnt, num_null_bytes);
+ if (m_field_metadata_size <= (m_colcnt * 2))
+ {
+ uint num_null_bytes= (m_colcnt + 7) / 8;
+ m_meta_memory= (uchar *)my_multi_malloc(MYF(MY_WME),
+ &m_null_bits, num_null_bytes,
+ &m_field_metadata, m_field_metadata_size,
+ NULL);
+ memcpy(m_field_metadata, ptr_after_colcnt, m_field_metadata_size);
+ ptr_after_colcnt= (uchar*)ptr_after_colcnt + m_field_metadata_size;
+ memcpy(m_null_bits, ptr_after_colcnt, num_null_bytes);
+ }
+ else
+ {
+ m_coltype= NULL;
+ my_free(m_memory);
+ m_memory= NULL;
+ DBUG_VOID_RETURN;
+ }
}
}
@@ -12739,9 +12769,12 @@ void Update_rows_log_event::init(MY_BITMAP const *cols)
Update_rows_log_event::~Update_rows_log_event()
{
- if (m_cols_ai.bitmap == m_bitbuf_ai) // no my_malloc happened
- m_cols_ai.bitmap= 0; // so no my_free in my_bitmap_free
- my_bitmap_free(&m_cols_ai); // To pair with my_bitmap_init().
+ if (m_cols_ai.bitmap)
+ {
+ if (m_cols_ai.bitmap == m_bitbuf_ai) // no my_malloc happened
+ m_cols_ai.bitmap= 0; // so no my_free in my_bitmap_free
+ my_bitmap_free(&m_cols_ai); // To pair with my_bitmap_init().
+ }
}
1
0