
15 Oct '19
revision-id: a1c29cecdb17578ef6364ec9021fd9222097923c (fb-prod201903-171-ga1c29cecdb1)
parent(s): 71f5fa77faeae64d653012db88f0a9e224227d5f
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-15 20:48:46 +0300
message:
Issue #790, MyRocks/MRR: address review input part 3
- in ha_rocksdb::index_end(), call mrr_free() only if we are in an MRR scan.
- added a comment.
---
storage/rocksdb/ha_rocksdb.cc | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 2735d9d6adb..1ae5c1d6989 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -8013,6 +8013,9 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
#ifndef DBUG_OFF
m_keyread_only = save_keyread_only;
#endif
+ // Due to MRR, now an index-only scan have pushed index condition.
+ // (If it does, we follow non-index only code path here, except that
+ // we don't fetch the row).
bool have_icp = (pushed_idx_cond && pushed_idx_cond_keyno == active_index);
if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !have_icp) {
@@ -10569,7 +10572,8 @@ int ha_rocksdb::index_end() {
active_index = MAX_KEY;
in_range_check_pushed_down = FALSE;
- mrr_free();
+ if (mrr_rowid_reader)
+ mrr_free();
DBUG_RETURN(HA_EXIT_SUCCESS);
}
@@ -15520,6 +15524,8 @@ int ha_rocksdb::multi_range_read_next(char **range_info) {
while (1) {
while (1) {
+ if (table->in_use->killed) return HA_ERR_QUERY_INTERRUPTED;
+
if (mrr_read_index >= mrr_n_elements) {
if (mrr_rowid_reader->eof() || !mrr_n_elements) {
table->status = STATUS_NOT_FOUND; // not sure if this is necessary?
@@ -15527,8 +15533,6 @@ int ha_rocksdb::multi_range_read_next(char **range_info) {
return HA_ERR_END_OF_FILE;
}
- if (table->in_use->killed) return HA_ERR_QUERY_INTERRUPTED;
-
if ((rc = mrr_fill_buffer())) {
if (rc == HA_ERR_END_OF_FILE) table->status = STATUS_NOT_FOUND;
return rc;
1
0

[Commits] MDEV-19848 Server crashes in check_vcol_forward_refs upon INSERT DELAYED into table with long blob key
by Sachin Setiya 14 Oct '19
by Sachin Setiya 14 Oct '19
14 Oct '19
commit 02362a77d0d458f8bbdca50f660e7bfca1470a35
Author: Sachin <sachin.setiya(a)mariadb.com>
Date: Mon Oct 14 15:29:14 2019 +0530
MDEV-19848 Server crashes in check_vcol_forward_refs upon INSERT
DELAYED into table with long blob key
Problem:- Insert delayed is not working with Long Unique Index.
It is failing with
1. INSERT DELAYED INTO t1 VALUES();
2. INSERT DELAYED INTO t1 VALUES(1);
3. Potential Race condition When Insert DELAYED gets dup key
error(After fix),
And it will change original table key_info by calling
re_setup_keyinfo_hash, And second thread is in
check_duplicate_long_entries
4. Insert delayed into INVISIBLE COLUMN will also not work.
There are 4 main issue
1. while calling make_new_field we forgot to & LONG_UNIQUE_HASH_FIELD
flag into new field flags.
2. New field created created into get_local_table by make_new_field does
not respect old field visibility, Assigning old field visibility will
solve Problem 4 and part of problem 2.
3. As we know Problem 3 race condition is caused because table and
delayed table share same key_info, So we will make a copy of original
table
key_info in get_local_table.
4. In parse_vcol_defs we have this code block
keypart->field->vcol_info=
table->field[keypart->field->field_index]->vcol_info;
Which is wrong because we should not change original
table->field->vcol_info with vcol_info which is create on delayed
thread.
diff --git a/mysql-test/main/long_unique_bugs.result
b/mysql-test/main/long_unique_bugs.result
index c0ba4d0b87d..16e825f0905 100644
--- a/mysql-test/main/long_unique_bugs.result
+++ b/mysql-test/main/long_unique_bugs.result
@@ -270,3 +270,32 @@ ERROR 42000: Specified key was too long; max key
length is 1000 bytes
create table t1(a int, unique(a) using hash);
#BULK insert > 100 rows (MI_MIN_ROWS_TO_DISABLE_INDEXES)
drop table t1;
+CREATE TABLE t1 (a blob, UNIQUE(a)) ENGINE=MyISAM;
+INSERT DELAYED t1 () VALUES (1);
+INSERT t1 () VALUES (2);
+DROP TABLE t1;
+CREATE TABLE t1 (a char(50), UNIQUE(a(10)) USING HASH);
+INSERT DELAYED t1 () VALUES (1);
+INSERT t1 () VALUES (2);
+DROP TABLE t1;
+CREATE TABLE t1 (
+a CHAR(128),
+b CHAR(128) AS (a),
+c varchar(5000),
+UNIQUE(c,b(64))
+) ENGINE=myisam;
+INSERT DELAYED t1 (a,c) VALUES (1,1);
+INSERT t1 (a,c) VALUES (2,2);
+INSERT t1 (a,c) VALUES (3,3);
+drop table t1;
+create table t1(a int , b int invisible);
+insert into t1 values(1);
+insert delayed into t1(a,b) values(2,2);
+#Should not fails
+insert delayed into t1 values(2);
+select a,b from t1 order by a;
+a b
+1 NULL
+2 2
+2 NULL
+DROP TABLE t1;
diff --git a/mysql-test/main/long_unique_bugs.test
b/mysql-test/main/long_unique_bugs.test
index 13a4e1367a0..365393aa2ea 100644
--- a/mysql-test/main/long_unique_bugs.test
+++ b/mysql-test/main/long_unique_bugs.test
@@ -340,3 +340,35 @@ while ($count)
--eval $insert_stmt
--enable_query_log
drop table t1;
+#
+# MDEV-19848 Server crashes in check_vcol_forward_refs upon INSERT DELAYED
into table with long blob key
+#
+CREATE TABLE t1 (a blob, UNIQUE(a)) ENGINE=MyISAM;
+INSERT DELAYED t1 () VALUES (1);
+INSERT t1 () VALUES (2);
+# Cleanup
+DROP TABLE t1;
+CREATE TABLE t1 (a char(50), UNIQUE(a(10)) USING HASH);
+INSERT DELAYED t1 () VALUES (1);
+INSERT t1 () VALUES (2);
+# Cleanup
+DROP TABLE t1;
+CREATE TABLE t1 (
+ a CHAR(128),
+ b CHAR(128) AS (a),
+ c varchar(5000),
+ UNIQUE(c,b(64))
+) ENGINE=myisam;
+INSERT DELAYED t1 (a,c) VALUES (1,1);
+--sleep 1
+INSERT t1 (a,c) VALUES (2,2);
+INSERT t1 (a,c) VALUES (3,3);
+drop table t1;
+create table t1(a int , b int invisible);
+insert into t1 values(1);
+insert delayed into t1(a,b) values(2,2);
+--echo #Should not fails
+insert delayed into t1 values(2);
+select a,b from t1 order by a;
+# Cleanup
+DROP TABLE t1;
diff --git a/sql/field.cc b/sql/field.cc
index 0eb53f40a54..d1187237f23 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -2373,8 +2373,13 @@ Field *Field::make_new_field(MEM_ROOT *root, TABLE
*new_table,
tmp->flags&= (NOT_NULL_FLAG | BLOB_FLAG | UNSIGNED_FLAG |
ZEROFILL_FLAG | BINARY_FLAG | ENUM_FLAG | SET_FLAG |
VERS_SYS_START_FLAG | VERS_SYS_END_FLAG |
- VERS_UPDATE_UNVERSIONED_FLAG);
+ VERS_UPDATE_UNVERSIONED_FLAG | LONG_UNIQUE_HASH_FIELD);
tmp->reset_fields();
+ /*
+ Calling make_new_field will return a VISIBLE field, If caller function
+ wants original visibility he should change it later.
+ This is done because view created on invisible fields are visible.
+ */
tmp->invisible= VISIBLE;
return tmp;
}
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index fe6c5fa8ec4..d80044ec37d 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -2607,6 +2607,11 @@ TABLE *Delayed_insert::get_local_table(THD*
client_thd)
{
if (!(*field= (*org_field)->make_new_field(client_thd->mem_root, copy,
1)))
goto error;
+ /*
+ We want same visibility as of original table because we are just
creating
+ a clone for delayed insert.
+ */
+ (*field)->invisible= (*org_field)->invisible;
(*field)->unireg_check= (*org_field)->unireg_check;
(*field)->orig_table= copy; // Remove connection
(*field)->move_field_offset(adjust_ptrs); // Point at copy->record[0]
@@ -2621,7 +2626,33 @@ TABLE *Delayed_insert::get_local_table(THD*
client_thd)
if (share->virtual_fields || share->default_expressions ||
share->default_fields)
{
+ /*
+ If we have long unique table then delayed insert can modify key
structure
+ (re/setup_keyinfo_hash_all) of original table when it gets insert
error,
+ parse_vcol_defs will also modify key_info structure. So it is
better to
+ clone the table->key_info for copy table.
+ We will not be cloning key_part_info or even changing any field ptr.
+ Because re/setup_keyinfo_hash_all only modify key_info array. So it
will
+ be like having new key_info array for copy table with old
key_part_info
+ ptr.
+ */
+ if (share->long_unique_table)
+ {
+ KEY *key_info;
+ if (!(key_info= (KEY*) client_thd->alloc(share->keys*sizeof(KEY))))
+ goto error;
+ copy->key_info= key_info;
+ memcpy(key_info, table->key_info, sizeof(KEY)*share->keys);
+ }
+ /*
+ parse_vcol_defs expects key_infos to be in user defined format.
+ */
+ copy->setup_keyinfo_hash_all();
bool error_reported= FALSE;
+ /*
+ We won't be calling re_setup_keyinfo_hash because parse_vcol_defs
changes
+ key_infos to storage engine format
+ */
if (unlikely(parse_vcol_defs(client_thd, client_thd->mem_root, copy,
&error_reported,
VCOL_INIT_DEPENDENCY_FAILURE_IS_WARNING)))
diff --git a/sql/table.cc b/sql/table.cc
index 718d0dce072..d0fcd449098 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -1223,7 +1223,16 @@ bool parse_vcol_defs(THD *thd, MEM_ROOT *mem_root,
TABLE *table,
new (mem_root) Item_field(thd, keypart->field),
new (mem_root) Item_int(thd, length));
list_item->fix_fields(thd, NULL);
- keypart->field->vcol_info=
+ /*
+ Do not change the vcol_info when vcol_info->expr is not NULL
+ This will happen in the case of
Delayed_insert::get_local_table()
+ And if we change the vcol_info in Delayed insert , then
original
+ table field->vcol_info will be created on delayed insert thread
+ mem_root.
+ */
+ if (!keypart->field->vcol_info ||
+ !keypart->field->vcol_info->expr)
+ keypart->field->vcol_info=
table->field[keypart->field->field_index]->vcol_info;
}
else
@@ -9084,6 +9093,26 @@ void re_setup_keyinfo_hash(KEY *key_info)
key_info->ext_key_parts= 1;
key_info->flags&= ~HA_NOSAME;
}
+
+/*
+ call setup_keyinfo_hash for all keys in table
+ */
+void TABLE::setup_keyinfo_hash_all()
+{
+ for (uint i= 0; i < s->keys; i++)
+ if (key_info[i].algorithm == HA_KEY_ALG_LONG_HASH)
+ setup_keyinfo_hash(&key_info[i]);
+}
+
+/*
+ call re_setup_keyinfo_hash for all keys in table
+ */
+void TABLE::re_setup_keyinfo_hash_all()
+{
+ for (uint i= 0; i < s->keys; i++)
+ if (key_info[i].algorithm == HA_KEY_ALG_LONG_HASH)
+ re_setup_keyinfo_hash(&key_info[i]);
+}
/**
@brief clone of current handler.
Creates a clone of handler used in update for
diff --git a/sql/table.h b/sql/table.h
index 1c721624d5d..76d11bbd604 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -1612,6 +1612,8 @@ struct TABLE
void vers_update_fields();
void vers_update_end();
void find_constraint_correlated_indexes();
+ void setup_keyinfo_hash_all();
+ void re_setup_keyinfo_hash_all();
void clone_handler_for_update();
void delete_update_handler();
--
Regards
Sachin Setiya
Software Engineer at MariaDB
1
0

12 Oct '19
revision-id: a4d7b682f90eaffb6c1de0e5ca8e75bb0167be17 (fb-prod201903-171-ga4d7b682f90)
parent(s): 71f5fa77faeae64d653012db88f0a9e224227d5f
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-12 22:41:44 +0300
message:
Issue #790, MyRocks/MRR: address review input part 3
- in ha_rocksdb::index_end(), call mrr_free() only if we are in an MRR scan.
- added a comment.
---
storage/rocksdb/ha_rocksdb.cc | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 2735d9d6adb..4da9293a75f 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -8013,6 +8013,9 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
#ifndef DBUG_OFF
m_keyread_only = save_keyread_only;
#endif
+ // Due to MRR, now an index-only scan have pushed index condition.
+ // (If it does, we follow non-index only code path here, except that
+ // we don't fetch the row).
bool have_icp = (pushed_idx_cond && pushed_idx_cond_keyno == active_index);
if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !have_icp) {
@@ -10569,7 +10572,8 @@ int ha_rocksdb::index_end() {
active_index = MAX_KEY;
in_range_check_pushed_down = FALSE;
- mrr_free();
+ if (mrr_rowid_reader)
+ mrr_free();
DBUG_RETURN(HA_EXIT_SUCCESS);
}
1
0

11 Oct '19
revision-id: 71f5fa77faeae64d653012db88f0a9e224227d5f (fb-prod201903-170-g71f5fa77fae)
parent(s): 40182db11b506f97183a8c41af747fb34eb239ec
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-12 00:09:11 +0300
message:
Issue #790, MyRocks/MRR: address more review input
Fix the code ha_rocksdb::read_row_from_secondary_key(). The idea is
that index-only scans can now Pushed Index Conditions (ICP):
one can run MRR scan with ICP, and MyRocks/MRR will use an index-only
scan under the hood to collect the rowids.
Previous variant of the code broke the "covered lookups" feature.
---
storage/rocksdb/ha_rocksdb.cc | 29 ++++++++++++++---------------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 8019be03401..2735d9d6adb 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -7998,6 +7998,8 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
int rc = 0;
uint pk_size;
+ /* Get the key columns and primary key value */
+ const rocksdb::Slice &rkey = m_scan_it->key();
const rocksdb::Slice &value = m_scan_it->value();
#ifndef DBUG_OFF
@@ -8011,21 +8013,17 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
#ifndef DBUG_OFF
m_keyread_only = save_keyread_only;
#endif
+ bool have_icp = (pushed_idx_cond && pushed_idx_cond_keyno == active_index);
- if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) {
- // Due to MRR, we can have ICP enabled with covered_lookup == true
- if (!(rc = find_icp_matching_index_rec(move_forward, buf))) {
- const rocksdb::Slice &rkey = m_scan_it->key();
- const rocksdb::Slice &rval = m_scan_it->value();
- pk_size =
- kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple);
- if (pk_size == RDB_INVALID_KEY_LEN) {
- rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
- } else {
- rc = kd.unpack_record(table, buf, &rkey, &rval,
- m_converter->get_verify_row_debug_checksums());
- global_stats.covered_secondary_key_lookups.inc();
- }
+ if (covered_lookup && m_lock_rows == RDB_LOCK_NONE && !have_icp) {
+ pk_size =
+ kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple);
+ if (pk_size == RDB_INVALID_KEY_LEN) {
+ rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
+ } else {
+ rc = kd.unpack_record(table, buf, &rkey, &value,
+ m_converter->get_verify_row_debug_checksums());
+ global_stats.covered_secondary_key_lookups.inc();
}
} else {
if (kd.m_is_reverse_cf) move_forward = !move_forward;
@@ -8038,7 +8036,8 @@ int ha_rocksdb::read_row_from_secondary_key(uchar *const buf,
if (pk_size == RDB_INVALID_KEY_LEN) {
rc = HA_ERR_ROCKSDB_CORRUPT_DATA;
} else {
- rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size);
+ if (!covered_lookup || m_lock_rows != RDB_LOCK_NONE)
+ rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size);
}
}
}
1
0

[Commits] 40182db11b5: Issue #790, MyRocks/MRR: address review input and cleanup
by psergey 11 Oct '19
by psergey 11 Oct '19
11 Oct '19
revision-id: 40182db11b506f97183a8c41af747fb34eb239ec (fb-prod201903-169-g40182db11b5)
parent(s): 8e230333ad43098b8460240386b82e5d67a974c0
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-11 22:25:13 +0300
message:
Issue #790, MyRocks/MRR: address review input and cleanup
- Join two multi_get() functions together (the other implementation
that is used by bypass code now uses RocksDB's MultiGet with the same
signature, so there's no difference)
- Add more DBUG_ASSERTs as requested
- Call RANGE_SEQ_IF::skip_index_tuple when appropriate (+testcase)
- Call RANGE_SEQ_IF::skip_record when appropriate (+testcase)
---
mysql-test/suite/rocksdb/r/rocksdb_mrr.result | 59 ++++++++++++++++++++
mysql-test/suite/rocksdb/t/rocksdb_mrr.test | 35 ++++++++++++
storage/rocksdb/ha_rocksdb.cc | 78 ++++++++++++++-------------
3 files changed, 136 insertions(+), 36 deletions(-)
diff --git a/mysql-test/suite/rocksdb/r/rocksdb_mrr.result b/mysql-test/suite/rocksdb/r/rocksdb_mrr.result
index 7ece91c1fa4..6aa2ff4dc23 100644
--- a/mysql-test/suite/rocksdb/r/rocksdb_mrr.result
+++ b/mysql-test/suite/rocksdb/r/rocksdb_mrr.result
@@ -284,6 +284,65 @@ pk1 pk2 col1 filler mod(t3.col1,2)
20 20 20 20 0
26 26 26 26 0
28 28 28 28 0
+#
+# Test for BKA's variant of Index Condition Pushdown. With BKA,
+# pushed index conditions that refer to preceding tables are
+# handled in a special way because there's no clear concept of
+# "current row" for the preceding table(s)
+#
+explain
+select * from t0,t3 where t3.col1=t0.a and mod(t3.pk2,2)=t0.a;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL NULL NULL NULL NULL 10 Using where
+1 SIMPLE t3 ref col1 col1 5 test.t0.a 4 Using index condition; Using join buffer (Batched Key Access)
+select * from t0,t3 where t3.col1=t0.a and mod(t3.pk2,2)=t0.a;
+a pk1 pk2 col1 filler
+0 0 0 0 0
+1 1 1 1 1
+set optimizer_switch='mrr=off';
+select * from t0,t3 where t3.col1=t0.a and mod(t3.pk2,2)=t0.a;
+a pk1 pk2 col1 filler
+0 0 0 0 0
+1 1 1 1 1
+set optimizer_switch='mrr=on';
+#
+# A query which has RANGE_SEQ_IF::skip_record != nullptr.
+#
+# MultiGet/MRR does not invoke skip_record() as it would not produce
+# much speedup.
+#
+insert into t3 select 10000+a, 10000+a, a, 'duplicate-match' from t1;
+delete from t3 where col1 in (3,5);
+explain
+select * from t0 left join t3 on t3.col1=t0.a where t3.pk1 is null;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL NULL NULL NULL NULL 10 NULL
+1 SIMPLE t3 ref col1 col1 5 test.t0.a 4 Using where; Not exists; Using join buffer (Batched Key Access)
+select * from t0 left join t3 on t3.col1=t0.a where t3.pk1 is null;
+a pk1 pk2 col1 filler
+3 NULL NULL NULL NULL
+5 NULL NULL NULL NULL
+set optimizer_switch='mrr=off';
+select * from t0 left join t3 on t3.col1=t0.a where t3.pk1 is null;
+a pk1 pk2 col1 filler
+3 NULL NULL NULL NULL
+5 NULL NULL NULL NULL
+set optimizer_switch='mrr=on';
+explain
+select * from t0 where t0.a in (select t3.col1 from t3 where char_length(t3.filler)<30);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t0 ALL NULL NULL NULL NULL 10 Using where
+1 SIMPLE t3 ref col1 col1 5 test.t0.a 4 Using where; FirstMatch(t0); Using join buffer (Batched Key Access)
+select * from t0 where t0.a in (select t3.col1 from t3 where char_length(t3.filler)<30);
+a
+0
+1
+2
+4
+6
+7
+8
+9
drop table t0,t1,t2,t3,t4;
#
# Multi-keypart testcase
diff --git a/mysql-test/suite/rocksdb/t/rocksdb_mrr.test b/mysql-test/suite/rocksdb/t/rocksdb_mrr.test
index dc8c260f754..74675b1cb5b 100644
--- a/mysql-test/suite/rocksdb/t/rocksdb_mrr.test
+++ b/mysql-test/suite/rocksdb/t/rocksdb_mrr.test
@@ -195,6 +195,41 @@ where (t3.col1=20 or t3.col1 between 25 and 28) and mod(t3.col1,2)=0;
select pk1,pk2,col1, filler,mod(t3.col1,2) from t3
where (t3.col1=20 or t3.col1 between 25 and 28) and mod(t3.col1,2)=0;
+--echo #
+--echo # Test for BKA's variant of Index Condition Pushdown. With BKA,
+--echo # pushed index conditions that refer to preceding tables are
+--echo # handled in a special way because there's no clear concept of
+--echo # "current row" for the preceding table(s)
+--echo #
+
+explain
+select * from t0,t3 where t3.col1=t0.a and mod(t3.pk2,2)=t0.a;
+select * from t0,t3 where t3.col1=t0.a and mod(t3.pk2,2)=t0.a;
+
+set optimizer_switch='mrr=off';
+select * from t0,t3 where t3.col1=t0.a and mod(t3.pk2,2)=t0.a;
+set optimizer_switch='mrr=on';
+
+--echo #
+--echo # A query which has RANGE_SEQ_IF::skip_record != nullptr.
+--echo #
+--echo # MultiGet/MRR does not invoke skip_record() as it would not produce
+--echo # much speedup.
+--echo #
+insert into t3 select 10000+a, 10000+a, a, 'duplicate-match' from t1;
+delete from t3 where col1 in (3,5);
+
+explain
+select * from t0 left join t3 on t3.col1=t0.a where t3.pk1 is null;
+select * from t0 left join t3 on t3.col1=t0.a where t3.pk1 is null;
+set optimizer_switch='mrr=off';
+select * from t0 left join t3 on t3.col1=t0.a where t3.pk1 is null;
+set optimizer_switch='mrr=on';
+
+explain
+select * from t0 where t0.a in (select t3.col1 from t3 where char_length(t3.filler)<30);
+select * from t0 where t0.a in (select t3.col1 from t3 where char_length(t3.filler)<30);
+
drop table t0,t1,t2,t3,t4;
--echo #
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 15601fd3f82..8019be03401 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -3095,11 +3095,6 @@ class Rdb_transaction {
const rocksdb::Slice &key, rocksdb::PinnableSlice *const value,
bool exclusive, const bool do_validate) = 0;
- virtual void multi_get(const rocksdb::ReadOptions &read_options,
- rocksdb::ColumnFamilyHandle *column_family,
- const size_t num_keys, const rocksdb::Slice *keys,
- rocksdb::PinnableSlice *values,
- rocksdb::Status *statuses, bool sorted_input) = 0;
virtual rocksdb::Iterator *get_iterator(
const rocksdb::ReadOptions &options,
rocksdb::ColumnFamilyHandle *column_family) = 0;
@@ -3476,15 +3471,6 @@ class Rdb_transaction_impl : public Rdb_transaction {
return m_rocksdb_tx->Get(m_read_opts, column_family, key, value);
}
- void multi_get(const rocksdb::ReadOptions &read_options,
- rocksdb::ColumnFamilyHandle *column_family,
- const size_t num_keys, const rocksdb::Slice *keys,
- rocksdb::PinnableSlice *values, rocksdb::Status *statuses,
- bool sorted_input) override {
- m_rocksdb_tx->MultiGet(read_options, column_family, num_keys, keys, values,
- statuses, sorted_input);
- }
-
void multi_get(rocksdb::ColumnFamilyHandle *const column_family,
const size_t num_keys, const rocksdb::Slice *keys,
rocksdb::PinnableSlice *values, rocksdb::Status *statuses,
@@ -3791,16 +3777,6 @@ class Rdb_writebatch_impl : public Rdb_transaction {
return get(column_family, key, value);
}
- void multi_get(const rocksdb::ReadOptions &read_options,
- rocksdb::ColumnFamilyHandle *column_family,
- const size_t num_keys, const rocksdb::Slice *keys,
- rocksdb::PinnableSlice *values, rocksdb::Status *statuses,
- bool sorted_input) override {
- // todo: could we just read the committed content from the DB here?
- //psergey-todo:!
- DBUG_ASSERT(0);
- }
-
void multi_get(rocksdb::ColumnFamilyHandle *const column_family,
const size_t num_keys, const rocksdb::Slice *keys,
rocksdb::PinnableSlice *values, rocksdb::Status *statuses,
@@ -15173,9 +15149,8 @@ ha_rows ha_rocksdb::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
}
if (all_eq_ranges) {
- // Indicate that we will use Mutlit-Get MRR
+ // Indicate that we will use MultiGet MRR
*flags &= ~HA_MRR_USE_DEFAULT_IMPL;
- *flags |= HA_MRR_CONVERT_REF_TO_RANGE;
*flags |= HA_MRR_SUPPORT_SORTED;
*bufsz = mrr_get_length_per_rec() * res * 1.1 + 1;
}
@@ -15237,7 +15212,7 @@ class Mrr_rowid_source {
//
-// Rowid source that produces rowids by enumerating a seqence of ranges
+// Rowid source that produces rowids by enumerating a sequence of ranges
//
class Mrr_pk_scan_rowid_source : public Mrr_rowid_source {
range_seq_t mrr_seq_it;
@@ -15263,6 +15238,8 @@ class Mrr_pk_scan_rowid_source : public Mrr_rowid_source {
(key_part_map(1) << self->m_pk_descr->get_key_parts()) - 1;
DBUG_ASSERT(range.start_key.keypart_map == all_parts_map);
DBUG_ASSERT(range.end_key.keypart_map == all_parts_map);
+ DBUG_ASSERT(range.start_key.flag == HA_READ_KEY_EXACT);
+ DBUG_ASSERT(range.end_key.flag == HA_READ_AFTER_KEY);
*range_ptr = range.ptr;
*size = self->m_pk_descr->pack_index_tuple(self->table,
@@ -15299,10 +15276,23 @@ class Mrr_sec_key_rowid_source : public Mrr_rowid_source {
if (err)
return err;
- err = self->handler::multi_range_read_next(range_ptr);
- if (!err) {
+ while (!(err = self->handler::multi_range_read_next(range_ptr))) {
+
+ if (self->mrr_funcs.skip_index_tuple &&
+ self->mrr_funcs.skip_index_tuple(self->mrr_iter, *range_ptr)) {
+ // BKA's variant of "Index Condition Pushdown" check failed
+ continue;
+ }
+
+ if (self->mrr_funcs.skip_record &&
+ self->mrr_funcs.skip_record(self->mrr_iter, *range_ptr,
+ (uchar*)self->m_last_rowkey.ptr())) {
+ continue;
+ }
+
memcpy(buf, self->m_last_rowkey.ptr(), self->m_last_rowkey.length());
*size = self->m_last_rowkey.length();
+ break;
}
return err;
}
@@ -15344,6 +15334,9 @@ int ha_rocksdb::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
mrr_sorted_mode = (mode & HA_MRR_SORTED) ? true : false;
if (active_index == table->s->primary_key) {
+ // ICP is not supported for PK, so we don't expect that BKA's variant
+ // of ICP would be used:
+ DBUG_ASSERT(!mrr_funcs.skip_index_tuple);
mrr_rowid_reader =
new Mrr_pk_scan_rowid_source(this, seq_init_param, n_ranges, mode);
} else {
@@ -15489,9 +15482,8 @@ int ha_rocksdb::mrr_fill_buffer() {
Rdb_transaction *const tx = get_or_create_tx(table->in_use);
- tx->multi_get(tx->m_read_opts, m_pk_descr->get_cf(),
- mrr_n_elements, // actual number of elements we've got
- mrr_keys, mrr_values, mrr_statuses, mrr_sorted_mode);
+ tx->multi_get(m_pk_descr->get_cf(), mrr_n_elements, mrr_keys, mrr_values,
+ mrr_statuses, mrr_sorted_mode);
return 0;
}
@@ -15527,7 +15519,7 @@ int ha_rocksdb::multi_range_read_next(char **range_info) {
Rdb_transaction *&tx = get_tx_from_thd(table->in_use);
int rc;
- do {
+ while (1) {
while (1) {
if (mrr_read_index >= mrr_n_elements) {
if (mrr_rowid_reader->eof() || !mrr_n_elements) {
@@ -15543,13 +15535,26 @@ int ha_rocksdb::multi_range_read_next(char **range_info) {
return rc;
}
}
- // Skip the "is not found" errors
+ // If we found a status that has a row, leave the loop
if (mrr_statuses[mrr_read_index].ok()) break;
+
+ // Skip the NotFound errors, return any other error to the SQL layer
+ if (!mrr_statuses[mrr_read_index].IsNotFound())
+ return rdb_error_to_mysql(mrr_statuses[mrr_read_index]);
+
mrr_read_index++;
}
size_t cur_key = mrr_read_index++;
const rocksdb::Slice &rowkey = mrr_keys[cur_key];
+
+ if (mrr_funcs.skip_record &&
+ mrr_funcs.skip_record(mrr_iter, mrr_range_ptrs[cur_key],
+ (uchar*)rowkey.data())) {
+ rc = HA_ERR_END_OF_FILE;
+ continue;
+ }
+
m_last_rowkey.copy((const char *)rowkey.data(), rowkey.size(),
&my_charset_bin);
@@ -15570,8 +15575,9 @@ int ha_rocksdb::multi_range_read_next(char **range_info) {
rc = convert_record_from_storage_format(&rowkey, table->record[0]);
m_retrieved_record.Reset();
mrr_values[cur_key].Reset();
- table->status = rc ? STATUS_NOT_FOUND : 0;
- } while (0);
+ break;
+ }
+ table->status = rc ? STATUS_NOT_FOUND : 0;
return rc;
}
1
0

[Commits] 8ccda715d0e: MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
by Varun 10 Oct '19
by Varun 10 Oct '19
10 Oct '19
revision-id: 8ccda715d0e657673445e150a9ed6ef784c68a61 (mariadb-10.4.4-341-g8ccda715d0e)
parent(s): a340af922361e3958e5d6653c8b840771db282f2
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-10-10 12:58:57 +0530
message:
MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
For the case when the SJM scan table is the first table in the join order,
then if we want to do the sorting on the SJM scan table, then we need to
make sure that we unpack the values to base table fields in two cases:
1) Reading the SJM table and writing the sort-keys inside the sort-buffer
2) Reading the sorted data from the sort file
---
mysql-test/main/order_by.result | 138 +++++++++++++++++++++++++++++++++++++++-
mysql-test/main/order_by.test | 34 ++++++++++
sql/filesort.cc | 10 +++
sql/filesort.h | 14 +++-
sql/opt_subselect.cc | 10 ++-
sql/records.cc | 13 ++++
sql/records.h | 1 +
sql/sql_select.cc | 99 +++++++++++++---------------
sql/sql_select.h | 4 +-
sql/sql_sort.h | 2 +
sql/table.h | 1 +
11 files changed, 264 insertions(+), 62 deletions(-)
diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result
index b059cc686cd..e74583670fc 100644
--- a/mysql-test/main/order_by.result
+++ b/mysql-test/main/order_by.result
@@ -3322,7 +3322,7 @@ WHERE books.library_id = 8663 AND
books.scheduled_for_removal=0 )
ORDER BY wings.id;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using temporary; Using filesort
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using filesort
1 PRIMARY wings eq_ref PRIMARY PRIMARY 4 test.books.wings_id 1 100.00
2 MATERIALIZED books ref library_idx library_idx 4 const 2 100.00 Using where
Warnings:
@@ -3436,3 +3436,139 @@ Note 1003 select `test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` A
set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
optimizer_use_condition_selectivity=@tmp_o;
drop table t1,t2,t3,t4;
+#
+# MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
+#
+CREATE TABLE t1 (a INT, b int, primary key(a));
+CREATE TABLE t2 (a INT, b INT);
+INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
+(2354,7),(321421,3),(535,2),(4535,3);
+INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
+# Join order should have the SJM scan table as the first table for both
+# the queries with GROUP BY and ORDER BY clause.
+EXPLAIN SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort
+1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 Using index
+2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where
+EXPLAIN FORMAT=JSON SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "read_sorted_file": {
+ "filesort": {
+ "sort_key": "t1.a desc",
+ "table": {
+ "table_name": "<subquery2>",
+ "access_type": "ALL",
+ "possible_keys": ["distinct_key"],
+ "rows": 3,
+ "filtered": 100,
+ "materialized": {
+ "unique": 1,
+ "query_block": {
+ "select_id": 2,
+ "table": {
+ "table_name": "t2",
+ "access_type": "ALL",
+ "rows": 3,
+ "filtered": 100,
+ "attached_condition": "t2.b = 3 and t2.a is not null"
+ }
+ }
+ }
+ }
+ }
+ },
+ "table": {
+ "table_name": "t1",
+ "access_type": "eq_ref",
+ "possible_keys": ["PRIMARY"],
+ "key": "PRIMARY",
+ "key_length": "4",
+ "used_key_parts": ["a"],
+ "ref": ["test.t2.a"],
+ "rows": 1,
+ "filtered": 100,
+ "using_index": true
+ }
+ }
+}
+SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+a
+273
+96
+58
+EXPLAIN SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort
+1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1
+2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where
+EXPLAIN FORMAT=JSON SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "read_sorted_file": {
+ "filesort": {
+ "sort_key": "t1.a desc",
+ "table": {
+ "table_name": "<subquery2>",
+ "access_type": "ALL",
+ "possible_keys": ["distinct_key"],
+ "rows": 3,
+ "filtered": 100,
+ "materialized": {
+ "unique": 1,
+ "query_block": {
+ "select_id": 2,
+ "table": {
+ "table_name": "t2",
+ "access_type": "ALL",
+ "rows": 3,
+ "filtered": 100,
+ "attached_condition": "t2.b = 3 and t2.a is not null"
+ }
+ }
+ }
+ }
+ }
+ },
+ "table": {
+ "table_name": "t1",
+ "access_type": "eq_ref",
+ "possible_keys": ["PRIMARY"],
+ "key": "PRIMARY",
+ "key_length": "4",
+ "used_key_parts": ["a"],
+ "ref": ["test.t2.a"],
+ "rows": 1,
+ "filtered": 100
+ }
+ }
+}
+SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+a group_concat(t1.b)
+273 3
+96 2
+58 1
+DROP TABLE t1, t2;
diff --git a/mysql-test/main/order_by.test b/mysql-test/main/order_by.test
index 934c503302f..b3e43d27e2f 100644
--- a/mysql-test/main/order_by.test
+++ b/mysql-test/main/order_by.test
@@ -2276,3 +2276,37 @@ set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
optimizer_use_condition_selectivity=@tmp_o;
drop table t1,t2,t3,t4;
+
+
+--echo #
+--echo # MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
+--echo #
+
+CREATE TABLE t1 (a INT, b int, primary key(a));
+CREATE TABLE t2 (a INT, b INT);
+
+INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
+ (2354,7),(321421,3),(535,2),(4535,3);
+INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
+
+--echo # Join order should have the SJM scan table as the first table for both
+--echo # the queries with GROUP BY and ORDER BY clause.
+
+let $query= SELECT t1.a
+ FROM t1
+ WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ ORDER BY t1.a DESC;
+
+eval EXPLAIN $query;
+eval EXPLAIN FORMAT=JSON $query;
+eval $query;
+
+let $query= SELECT t1.a, group_concat(t1.b)
+ FROM t1
+ WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ GROUP BY t1.a DESC;
+
+eval EXPLAIN $query;
+eval EXPLAIN FORMAT=JSON $query;
+eval $query;
+DROP TABLE t1, t2;
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 3f4291cfb1f..0fc5724cf43 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -183,6 +183,9 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
&multi_byte_charset),
table, max_rows, filesort->sort_positions);
+ param.set_all_read_bits= filesort->set_all_read_bits;
+ param.unpack= filesort->unpack;
+
sort->addon_buf= param.addon_buf;
sort->addon_field= param.addon_field;
sort->unpack= unpack_addon_fields;
@@ -756,13 +759,20 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
goto err;
}
+ if (param->set_all_read_bits)
+ sort_form->column_bitmaps_set(save_read_set, save_write_set);
+
DEBUG_SYNC(thd, "after_index_merge_phase1");
for (;;)
{
if (quick_select)
error= select->quick->get_next();
else /* Not quick-select */
+ {
error= file->ha_rnd_next(sort_form->record[0]);
+ if (param->unpack)
+ param->unpack(sort_form);
+ }
if (unlikely(error))
break;
file->position(sort_form->record[0]);
diff --git a/sql/filesort.h b/sql/filesort.h
index 5f79a5095cc..7e88982ab5f 100644
--- a/sql/filesort.h
+++ b/sql/filesort.h
@@ -58,6 +58,13 @@ class Filesort: public Sql_alloc
Filesort_tracker *tracker;
+ /*
+ TRUE means all the fields of table of whose bitmap read_set is set
+ need to be read while reading records in the sort buffer.
+ FALSE otherwise
+ */
+ bool set_all_read_bits;
+
Filesort(ORDER *order_arg, ha_rows limit_arg, bool sort_positions_arg,
SQL_SELECT *select_arg):
order(order_arg),
@@ -66,7 +73,9 @@ class Filesort: public Sql_alloc
select(select_arg),
own_select(false),
using_pq(false),
- sort_positions(sort_positions_arg)
+ sort_positions(sort_positions_arg),
+ set_all_read_bits(FALSE),
+ unpack(NULL)
{
DBUG_ASSERT(order);
};
@@ -75,6 +84,9 @@ class Filesort: public Sql_alloc
/* Prepare ORDER BY list for sorting. */
uint make_sortorder(THD *thd, JOIN *join, table_map first_table_bit);
+ /* Unpack temp table columns to base table columns*/
+ void (*unpack)(TABLE *);
+
private:
void cleanup();
};
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 87458357865..f837a6394af 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -4252,11 +4252,11 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab)
sjm_tab->type= JT_ALL;
/* Initialize full scan */
- sjm_tab->read_first_record= join_read_record_no_init;
+ sjm_tab->read_first_record= join_init_read_record;
sjm_tab->read_record.copy_field= sjm->copy_field;
sjm_tab->read_record.copy_field_end= sjm->copy_field +
sjm->sjm_table_cols.elements;
- sjm_tab->read_record.read_record_func= rr_sequential_and_unpack;
+ sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack;
}
sjm_tab->bush_children->end[-1].next_select= end_sj_materialize;
@@ -7105,3 +7105,9 @@ bool Item_in_subselect::pushdown_cond_for_in_subquery(THD *thd, Item *cond)
thd->lex->current_select= save_curr_select;
DBUG_RETURN(FALSE);
}
+
+
+bool TABLE_LIST::is_sjm_scan_table()
+{
+ return is_active_sjm() && sj_mat_info->is_sj_scan;
+}
diff --git a/sql/records.cc b/sql/records.cc
index 3d709182a4e..f6885f773d5 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -709,3 +709,16 @@ static int rr_cmp(uchar *a,uchar *b)
return (int) a[7] - (int) b[7];
#endif
}
+
+
+int read_record_func_for_rr_and_unpack(READ_RECORD *info)
+{
+ int error;
+ if ((error= info->read_record_func_and_unpack_calls(info)))
+ return error;
+
+ for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+
+ return error;
+}
diff --git a/sql/records.h b/sql/records.h
index faf0d13c9a9..037a06b9d34 100644
--- a/sql/records.h
+++ b/sql/records.h
@@ -55,6 +55,7 @@ struct READ_RECORD
TABLE *table; /* Head-form */
Unlock_row_func unlock_row;
Read_func read_record_func;
+ Read_func read_record_func_and_unpack_calls;
THD *thd;
SQL_SELECT *select;
uint ref_length, reclength, rec_cache_size, error_offset;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 36d9eda3383..0523cdef178 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -3799,6 +3799,15 @@ JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order)
tab->select);
if (!tab->filesort)
return true;
+
+ TABLE *table= tab->table;
+ if ((tab == join_tab + const_tables) &&
+ table->pos_in_table_list &&
+ table->pos_in_table_list->is_active_sjm())
+ {
+ tab->filesort->set_all_read_bits= TRUE;
+ tab->filesort->unpack= unpack_to_base_table_fields;
+ }
/*
Select was moved to filesort->select to force join_init_read_record to use
sorted result instead of reading table through select.
@@ -14015,37 +14024,8 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
can be used without tmp. table.
*/
bool can_subst_to_first_table= false;
- bool first_is_in_sjm_nest= false;
- if (first_is_base_table)
- {
- TABLE_LIST *tbl_for_first=
- join->join_tab[join->const_tables].table->pos_in_table_list;
- first_is_in_sjm_nest= tbl_for_first->sj_mat_info &&
- tbl_for_first->sj_mat_info->is_used;
- }
- /*
- Currently we do not employ the optimization that uses multiple
- equalities for ORDER BY to remove tmp table in the case when
- the first table happens to be the result of materialization of
- a semi-join nest ( <=> first_is_in_sjm_nest == true).
-
- When a semi-join nest is materialized and scanned to look for
- possible matches in the remaining tables for every its row
- the fields from the result of materialization are copied
- into the record buffers of tables from the semi-join nest.
- So these copies are used to access the remaining tables rather
- than the fields from the result of materialization.
-
- Unfortunately now this so-called 'copy back' technique is
- supported only if the rows are scanned with the rr_sequential
- function, but not with other rr_* functions that are employed
- when the result of materialization is required to be sorted.
-
- TODO: either to support 'copy back' technique for the above case,
- or to get rid of this technique altogether.
- */
if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) &&
- first_is_base_table && !first_is_in_sjm_nest &&
+ first_is_base_table &&
order->item[0]->real_item()->type() == Item::FIELD_ITEM &&
join->cond_equal)
{
@@ -19922,19 +19902,6 @@ do_select(JOIN *join, Procedure *procedure)
}
-int rr_sequential_and_unpack(READ_RECORD *info)
-{
- int error;
- if (unlikely((error= rr_sequential(info))))
- return error;
-
- for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
- (*cp->do_copy)(cp);
-
- return error;
-}
-
-
/**
@brief
Instantiates temporary table
@@ -21223,6 +21190,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
int join_init_read_record(JOIN_TAB *tab)
{
+ bool need_unpacking= FALSE;
+ JOIN *join= tab->join;
/*
Note: the query plan tree for the below operations is constructed in
save_agg_explain_data.
@@ -21232,6 +21201,12 @@ int join_init_read_record(JOIN_TAB *tab)
if (tab->filesort && tab->sort_table()) // Sort table.
return 1;
+ if (join->top_join_tab_count != join->const_tables)
+ {
+ TABLE_LIST *tbl= tab->table->pos_in_table_list;
+ need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE;
+ }
+
tab->build_range_rowid_filter_if_needed();
DBUG_EXECUTE_IF("kill_join_init_read_record",
@@ -21249,16 +21224,6 @@ int join_init_read_record(JOIN_TAB *tab)
if (!tab->preread_init_done && tab->preread_init())
return 1;
-
- if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
- tab->select, tab->filesort_result, 1,1, FALSE))
- return 1;
- return tab->read_record.read_record();
-}
-
-int
-join_read_record_no_init(JOIN_TAB *tab)
-{
Copy_field *save_copy, *save_copy_end;
/*
@@ -21268,12 +21233,20 @@ join_read_record_no_init(JOIN_TAB *tab)
save_copy= tab->read_record.copy_field;
save_copy_end= tab->read_record.copy_field_end;
- init_read_record(&tab->read_record, tab->join->thd, tab->table,
- tab->select, tab->filesort_result, 1, 1, FALSE);
+
+ if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
+ tab->select, tab->filesort_result, 1, 1, FALSE))
+ return 1;
tab->read_record.copy_field= save_copy;
tab->read_record.copy_field_end= save_copy_end;
- tab->read_record.read_record_func= rr_sequential_and_unpack;
+
+ if (need_unpacking)
+ {
+ tab->read_record.read_record_func_and_unpack_calls=
+ tab->read_record.read_record_func;
+ tab->read_record.read_record_func = read_record_func_for_rr_and_unpack;
+ }
return tab->read_record.read_record();
}
@@ -28981,6 +28954,20 @@ void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join,
}
+/*
+ @brief
+ Unpacking temp table fields to base table fields.
+*/
+
+void unpack_to_base_table_fields(TABLE *table)
+{
+ JOIN_TAB *tab= table->reginfo.join_tab;
+ for (Copy_field *cp= tab->read_record.copy_field;
+ cp != tab->read_record.copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+}
+
+
/**
@} (end of group Query_Optimizer)
*/
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 4f7bf49f635..21c07c9bacd 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -223,7 +223,7 @@ typedef enum_nested_loop_state
(*Next_select_func)(JOIN *, struct st_join_table *, bool);
Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab);
int rr_sequential(READ_RECORD *info);
-int rr_sequential_and_unpack(READ_RECORD *info);
+int read_record_func_for_rr_and_unpack(READ_RECORD *info);
Item *remove_pushed_top_conjuncts(THD *thd, Item *cond);
Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond,
COND_EQUAL **cond_eq,
@@ -2352,7 +2352,6 @@ create_virtual_tmp_table(THD *thd, Field *field)
int test_if_item_cache_changed(List<Cached_item> &list);
int join_init_read_record(JOIN_TAB *tab);
-int join_read_record_no_init(JOIN_TAB *tab);
void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
inline Item * and_items(THD *thd, Item* cond, Item *item)
{
@@ -2410,6 +2409,7 @@ int print_explain_message_line(select_result_sink *result,
void explain_append_mrr_info(QUICK_RANGE_SELECT *quick, String *res);
int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table,
key_map possible_keys);
+void unpack_to_base_table_fields(TABLE *table);
/****************************************************************************
Temporary table support for SQL Runtime
diff --git a/sql/sql_sort.h b/sql/sql_sort.h
index 7abbc808632..cd712cb50aa 100644
--- a/sql/sql_sort.h
+++ b/sql/sql_sort.h
@@ -79,6 +79,7 @@ class Sort_param {
SORT_FIELD *end;
SORT_ADDON_FIELD *addon_field; // Descriptors for companion fields.
LEX_STRING addon_buf; // Buffer & length of added packed fields.
+ bool set_all_read_bits;
uchar *unique_buff;
bool not_killable;
@@ -93,6 +94,7 @@ class Sort_param {
}
void init_for_filesort(uint sortlen, TABLE *table,
ha_rows maxrows, bool sort_positions);
+ void (*unpack)(TABLE *);
};
diff --git a/sql/table.h b/sql/table.h
index 1a7e5fbd4dc..35ba9bbb95d 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -2622,6 +2622,7 @@ struct TABLE_LIST
*/
const char *get_table_name() const { return view != NULL ? view_name.str : table_name.str; }
bool is_active_sjm();
+ bool is_sjm_scan_table();
bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); }
st_select_lex_unit *get_unit();
st_select_lex *get_single_select();
2
1

[Commits] MDEV-20001 Potential dangerous regression: INSERT INTO >=100 rows fail for myisam table with HASH indexes
by Sachin Setiya 09 Oct '19
by Sachin Setiya 09 Oct '19
09 Oct '19
commit 52f3829b95cea6e97a1eedfd8e5a73ddaf611809
Author: Sachin <sachin.setiya(a)mariadb.com>
Date: Wed Oct 9 21:16:31 2019 +0530
MDEV-20001 Potential dangerous regression: INSERT INTO >=100 rows fail
for myisam table with HASH indexes
Problem:-
So the issue is when we do bulk insert with rows
> MI_MIN_ROWS_TO_DISABLE_INDEXES(100) , We try to disable the indexes to
speedup insert. But current logic also disables the long unique indexes.
Solution:- In ha_myisam::start_bulk_insert if we find long hash index
(HA_KEY_ALG_LONG_HASH) we will not disable the index.
This commit also refactors the mi_disable_indexes_for_rebuild function,
Since this is function is called at only one place, it is inlined into
start_bulk_insert
mi_clear_key_active is added into myisamdef.h because now it is also
used
in ha_myisam.cc file.
diff --git a/include/myisam.h b/include/myisam.h
index 216f041c8a9..f2e31bb9f60 100644
--- a/include/myisam.h
+++ b/include/myisam.h
@@ -430,6 +430,7 @@ int sort_ft_buf_flush(MI_SORT_PARAM *sort_param);
int thr_write_keys(MI_SORT_PARAM *sort_param);
int sort_write_record(MI_SORT_PARAM *sort_param);
int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages,
ulonglong);
+my_bool mi_too_big_key_for_sort(MI_KEYDEF *key, ha_rows rows);
#ifdef __cplusplus
}
diff --git a/mysql-test/main/long_unique_bugs.result
b/mysql-test/main/long_unique_bugs.result
index d4b71e2bc46..c0ba4d0b87d 100644
--- a/mysql-test/main/long_unique_bugs.result
+++ b/mysql-test/main/long_unique_bugs.result
@@ -267,3 +267,6 @@ connection default;
DROP TABLE t1, t2;
CREATE TABLE t1 (a TEXT, UNIQUE(a)) ENGINE=Aria;
ERROR 42000: Specified key was too long; max key length is 1000 bytes
+create table t1(a int, unique(a) using hash);
+#BULK insert > 100 rows (MI_MIN_ROWS_TO_DISABLE_INDEXES)
+drop table t1;
diff --git a/mysql-test/main/long_unique_bugs.test
b/mysql-test/main/long_unique_bugs.test
index 62c4076cee0..13a4e1367a0 100644
--- a/mysql-test/main/long_unique_bugs.test
+++ b/mysql-test/main/long_unique_bugs.test
@@ -323,3 +323,20 @@ DROP TABLE t1, t2;
#
--error ER_TOO_LONG_KEY
CREATE TABLE t1 (a TEXT, UNIQUE(a)) ENGINE=Aria;
+
+#
+# MDEV-20001 Potential dangerous regression: INSERT INTO >=100 rows fail
for myisam table with HASH indexes
+#
+create table t1(a int, unique(a) using hash);
+--let $count=150
+--let insert_stmt= insert into t1 values(200)
+while ($count)
+{
+ --let $insert_stmt=$insert_stmt,($count)
+ --dec $count
+}
+--disable_query_log
+--echo #BULK insert > 100 rows (MI_MIN_ROWS_TO_DISABLE_INDEXES)
+--eval $insert_stmt
+--enable_query_log
+drop table t1;
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index bde9c99288f..54c38ae69b4 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -1749,7 +1749,35 @@ void ha_myisam::start_bulk_insert(ha_rows rows, uint
flags)
else
{
my_bool all_keys= MY_TEST(flags & HA_CREATE_UNIQUE_INDEX_BY_SORT);
- mi_disable_indexes_for_rebuild(file, rows, all_keys);
+ MYISAM_SHARE *share=file->s;
+ MI_KEYDEF *key=share->keyinfo;
+ uint i;
+ /*
+ Deactivate all indexes that can be recreated fast.
+ These include packed keys on which sorting will use more temporary
+ space than the max allowed file length or for which the unpacked keys
+ will take much more space than packed keys.
+ Note that 'rows' may be zero for the case when we don't know how many
+ rows we will put into the file.
+ Long Unique Index (HA_KEY_ALG_LONG_HASH) will not be disabled because
+ there unique property is enforced at the time of ha_write_row
+ (check_duplicate_long_entries). So we need active index at the time
of
+ insert.
+ */
+ DBUG_ASSERT(file->state->records == 0 &&
+ (!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES));
+ for (i=0 ; i < share->base.keys ; i++,key++)
+ {
+ if (!(key->flag & (HA_SPATIAL | HA_AUTO_KEY)) &&
+ ! mi_too_big_key_for_sort(key,rows) && file->s->base.auto_key
!= i+1 &&
+ (all_keys || !(key->flag & HA_NOSAME)) &&
+ table->key_info[i].algorithm != HA_KEY_ALG_LONG_HASH)
+ {
+ mi_clear_key_active(share->state.key_map, i);
+ file->update|= HA_STATE_CHANGED;
+ file->create_unique_index_by_sort= all_keys;
+ }
+ }
}
}
else
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index 22eb97b24f3..2a4ef8851c9 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -4670,7 +4670,7 @@ static ha_checksum mi_byte_checksum(const uchar *buf,
uint length)
return crc;
}
-static my_bool mi_too_big_key_for_sort(MI_KEYDEF *key, ha_rows rows)
+my_bool mi_too_big_key_for_sort(MI_KEYDEF *key, ha_rows rows)
{
uint key_maxlength=key->maxlength;
if (key->flag & HA_FULLTEXT)
@@ -4684,38 +4684,6 @@ static my_bool mi_too_big_key_for_sort(MI_KEYDEF
*key, ha_rows rows)
((ulonglong) rows * key_maxlength > myisam_max_temp_length));
}
-/*
- Deactivate all indexes that can be recreated fast.
- These include packed keys on which sorting will use more temporary
- space than the max allowed file length or for which the unpacked keys
- will take much more space than packed keys.
- Note that 'rows' may be zero for the case when we don't know how many
- rows we will put into the file.
- */
-
-void mi_disable_indexes_for_rebuild(MI_INFO *info, ha_rows rows,
- my_bool all_keys)
-{
- MYISAM_SHARE *share=info->s;
- MI_KEYDEF *key=share->keyinfo;
- uint i;
-
- DBUG_ASSERT(info->state->records == 0 &&
- (!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES));
- for (i=0 ; i < share->base.keys ; i++,key++)
- {
- if (!(key->flag & (HA_SPATIAL | HA_AUTO_KEY)) &&
- ! mi_too_big_key_for_sort(key,rows) && info->s->base.auto_key !=
i+1 &&
- (all_keys || !(key->flag & HA_NOSAME)))
- {
- mi_clear_key_active(share->state.key_map, i);
- info->update|= HA_STATE_CHANGED;
- info->create_unique_index_by_sort= all_keys;
- }
- }
-}
-
-
/*
Return TRUE if we can use repair by sorting
One can set the force argument to force to use sorting
diff --git a/storage/myisam/myisamdef.h b/storage/myisam/myisamdef.h
index c6fa777774a..f7b61ae638c 100644
--- a/storage/myisam/myisamdef.h
+++ b/storage/myisam/myisamdef.h
@@ -715,8 +715,6 @@ void mi_restore_status(void *param);
void mi_copy_status(void *to, void *from);
my_bool mi_check_status(void *param);
void mi_fix_status(MI_INFO *org_table, MI_INFO *new_table);
-void mi_disable_indexes_for_rebuild(MI_INFO *info, ha_rows rows,
- my_bool all_keys);
extern MI_INFO *test_if_reopen(char *filename);
my_bool check_table_is_closed(const char *name, const char *where);
int mi_open_datafile(MI_INFO *info, MYISAM_SHARE *share);
--
Regards
Sachin Setiya
Software Engineer at MariaDB
1
0

[Commits] d48294d15c4: MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
by Varun 09 Oct '19
by Varun 09 Oct '19
09 Oct '19
revision-id: d48294d15c4895215d5facef97fc80c03cd6b4b0 (mariadb-10.4.4-341-gd48294d15c4)
parent(s): a340af922361e3958e5d6653c8b840771db282f2
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-09-28 13:06:44 +0530
message:
MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
For the case when the SJM scan table is the first table in the join order,
then if we want to do the sorting on the SJM scan table, then we need to
make sure that we unpack the values to base table fields in two cases:
1) Reading the SJM table and writing the sort-keys inside the sort-buffer
2) Reading the sorted data from the sort file
---
mysql-test/main/order_by.result | 138 +++++++++++++++++++++++++++++++++++++++-
mysql-test/main/order_by.test | 34 ++++++++++
sql/filesort.cc | 17 +++++
sql/opt_subselect.cc | 10 ++-
sql/records.cc | 13 ++++
sql/records.h | 1 +
sql/sql_select.cc | 89 ++++++++++----------------
sql/sql_select.h | 4 +-
sql/table.h | 1 +
9 files changed, 246 insertions(+), 61 deletions(-)
diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result
index b059cc686cd..e74583670fc 100644
--- a/mysql-test/main/order_by.result
+++ b/mysql-test/main/order_by.result
@@ -3322,7 +3322,7 @@ WHERE books.library_id = 8663 AND
books.scheduled_for_removal=0 )
ORDER BY wings.id;
id select_type table type possible_keys key key_len ref rows filtered Extra
-1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using temporary; Using filesort
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 2 100.00 Using filesort
1 PRIMARY wings eq_ref PRIMARY PRIMARY 4 test.books.wings_id 1 100.00
2 MATERIALIZED books ref library_idx library_idx 4 const 2 100.00 Using where
Warnings:
@@ -3436,3 +3436,139 @@ Note 1003 select `test`.`t4`.`a` AS `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` A
set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
optimizer_use_condition_selectivity=@tmp_o;
drop table t1,t2,t3,t4;
+#
+# MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
+#
+CREATE TABLE t1 (a INT, b int, primary key(a));
+CREATE TABLE t2 (a INT, b INT);
+INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
+(2354,7),(321421,3),(535,2),(4535,3);
+INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
+# Join order should have the SJM scan table as the first table for both
+# the queries with GROUP BY and ORDER BY clause.
+EXPLAIN SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort
+1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 Using index
+2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where
+EXPLAIN FORMAT=JSON SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "read_sorted_file": {
+ "filesort": {
+ "sort_key": "t1.a desc",
+ "table": {
+ "table_name": "<subquery2>",
+ "access_type": "ALL",
+ "possible_keys": ["distinct_key"],
+ "rows": 3,
+ "filtered": 100,
+ "materialized": {
+ "unique": 1,
+ "query_block": {
+ "select_id": 2,
+ "table": {
+ "table_name": "t2",
+ "access_type": "ALL",
+ "rows": 3,
+ "filtered": 100,
+ "attached_condition": "t2.b = 3 and t2.a is not null"
+ }
+ }
+ }
+ }
+ }
+ },
+ "table": {
+ "table_name": "t1",
+ "access_type": "eq_ref",
+ "possible_keys": ["PRIMARY"],
+ "key": "PRIMARY",
+ "key_length": "4",
+ "used_key_parts": ["a"],
+ "ref": ["test.t2.a"],
+ "rows": 1,
+ "filtered": 100,
+ "using_index": true
+ }
+ }
+}
+SELECT t1.a
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ORDER BY t1.a DESC;
+a
+273
+96
+58
+EXPLAIN SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 3 Using filesort
+1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1
+2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 Using where
+EXPLAIN FORMAT=JSON SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+EXPLAIN
+{
+ "query_block": {
+ "select_id": 1,
+ "read_sorted_file": {
+ "filesort": {
+ "sort_key": "t1.a desc",
+ "table": {
+ "table_name": "<subquery2>",
+ "access_type": "ALL",
+ "possible_keys": ["distinct_key"],
+ "rows": 3,
+ "filtered": 100,
+ "materialized": {
+ "unique": 1,
+ "query_block": {
+ "select_id": 2,
+ "table": {
+ "table_name": "t2",
+ "access_type": "ALL",
+ "rows": 3,
+ "filtered": 100,
+ "attached_condition": "t2.b = 3 and t2.a is not null"
+ }
+ }
+ }
+ }
+ }
+ },
+ "table": {
+ "table_name": "t1",
+ "access_type": "eq_ref",
+ "possible_keys": ["PRIMARY"],
+ "key": "PRIMARY",
+ "key_length": "4",
+ "used_key_parts": ["a"],
+ "ref": ["test.t2.a"],
+ "rows": 1,
+ "filtered": 100
+ }
+ }
+}
+SELECT t1.a, group_concat(t1.b)
+FROM t1
+WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+GROUP BY t1.a DESC;
+a group_concat(t1.b)
+273 3
+96 2
+58 1
+DROP TABLE t1, t2;
diff --git a/mysql-test/main/order_by.test b/mysql-test/main/order_by.test
index 934c503302f..b3e43d27e2f 100644
--- a/mysql-test/main/order_by.test
+++ b/mysql-test/main/order_by.test
@@ -2276,3 +2276,37 @@ set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u,
optimizer_use_condition_selectivity=@tmp_o;
drop table t1,t2,t3,t4;
+
+
+--echo #
+--echo # MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on
+--echo #
+
+CREATE TABLE t1 (a INT, b int, primary key(a));
+CREATE TABLE t2 (a INT, b INT);
+
+INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6),
+ (2354,7),(321421,3),(535,2),(4535,3);
+INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3);
+
+--echo # Join order should have the SJM scan table as the first table for both
+--echo # the queries with GROUP BY and ORDER BY clause.
+
+let $query= SELECT t1.a
+ FROM t1
+ WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ ORDER BY t1.a DESC;
+
+eval EXPLAIN $query;
+eval EXPLAIN FORMAT=JSON $query;
+eval $query;
+
+let $query= SELECT t1.a, group_concat(t1.b)
+ FROM t1
+ WHERE t1.a IN (SELECT a FROM t2 WHERE b=3)
+ GROUP BY t1.a DESC;
+
+eval EXPLAIN $query;
+eval EXPLAIN FORMAT=JSON $query;
+eval $query;
+DROP TABLE t1, t2;
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 3f4291cfb1f..e5c83293e9f 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -716,11 +716,21 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
*found_rows= 0;
ref_pos= &file->ref[0];
next_pos=ref_pos;
+ JOIN_TAB *tab= sort_form->reginfo.join_tab;
+ JOIN *join= tab ? tab->join : NULL;
+ bool first_is_in_sjm_nest= FALSE;
DBUG_EXECUTE_IF("show_explain_in_find_all_keys",
dbug_serve_apcs(thd, 1);
);
+ if (join && join->table_count != join->const_tables &&
+ (join->join_tab + join->const_tables == tab))
+ {
+ TABLE_LIST *tbl_for_first= sort_form->pos_in_table_list;
+ first_is_in_sjm_nest= tbl_for_first && tbl_for_first->is_sjm_scan_table();
+ }
+
if (!quick_select)
{
next_pos=(uchar*) 0; /* Find records in sequence */
@@ -756,13 +766,20 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
goto err;
}
+ if (first_is_in_sjm_nest)
+ sort_form->column_bitmaps_set(save_read_set, save_write_set);
+
DEBUG_SYNC(thd, "after_index_merge_phase1");
for (;;)
{
if (quick_select)
error= select->quick->get_next();
else /* Not quick-select */
+ {
error= file->ha_rnd_next(sort_form->record[0]);
+ if (first_is_in_sjm_nest)
+ tab->unpacking_to_base_table_fields();
+ }
if (unlikely(error))
break;
file->position(sort_form->record[0]);
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 87458357865..f837a6394af 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -4252,11 +4252,11 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab)
sjm_tab->type= JT_ALL;
/* Initialize full scan */
- sjm_tab->read_first_record= join_read_record_no_init;
+ sjm_tab->read_first_record= join_init_read_record;
sjm_tab->read_record.copy_field= sjm->copy_field;
sjm_tab->read_record.copy_field_end= sjm->copy_field +
sjm->sjm_table_cols.elements;
- sjm_tab->read_record.read_record_func= rr_sequential_and_unpack;
+ sjm_tab->read_record.read_record_func= read_record_func_for_rr_and_unpack;
}
sjm_tab->bush_children->end[-1].next_select= end_sj_materialize;
@@ -7105,3 +7105,9 @@ bool Item_in_subselect::pushdown_cond_for_in_subquery(THD *thd, Item *cond)
thd->lex->current_select= save_curr_select;
DBUG_RETURN(FALSE);
}
+
+
+bool TABLE_LIST::is_sjm_scan_table()
+{
+ return is_active_sjm() && sj_mat_info->is_sj_scan;
+}
diff --git a/sql/records.cc b/sql/records.cc
index 3d709182a4e..f6885f773d5 100644
--- a/sql/records.cc
+++ b/sql/records.cc
@@ -709,3 +709,16 @@ static int rr_cmp(uchar *a,uchar *b)
return (int) a[7] - (int) b[7];
#endif
}
+
+
+int read_record_func_for_rr_and_unpack(READ_RECORD *info)
+{
+ int error;
+ if ((error= info->read_record_func_and_unpack_calls(info)))
+ return error;
+
+ for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+
+ return error;
+}
diff --git a/sql/records.h b/sql/records.h
index faf0d13c9a9..037a06b9d34 100644
--- a/sql/records.h
+++ b/sql/records.h
@@ -55,6 +55,7 @@ struct READ_RECORD
TABLE *table; /* Head-form */
Unlock_row_func unlock_row;
Read_func read_record_func;
+ Read_func read_record_func_and_unpack_calls;
THD *thd;
SQL_SELECT *select;
uint ref_length, reclength, rec_cache_size, error_offset;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 36d9eda3383..28bc57c692f 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -14015,37 +14015,8 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
can be used without tmp. table.
*/
bool can_subst_to_first_table= false;
- bool first_is_in_sjm_nest= false;
- if (first_is_base_table)
- {
- TABLE_LIST *tbl_for_first=
- join->join_tab[join->const_tables].table->pos_in_table_list;
- first_is_in_sjm_nest= tbl_for_first->sj_mat_info &&
- tbl_for_first->sj_mat_info->is_used;
- }
- /*
- Currently we do not employ the optimization that uses multiple
- equalities for ORDER BY to remove tmp table in the case when
- the first table happens to be the result of materialization of
- a semi-join nest ( <=> first_is_in_sjm_nest == true).
-
- When a semi-join nest is materialized and scanned to look for
- possible matches in the remaining tables for every its row
- the fields from the result of materialization are copied
- into the record buffers of tables from the semi-join nest.
- So these copies are used to access the remaining tables rather
- than the fields from the result of materialization.
-
- Unfortunately now this so-called 'copy back' technique is
- supported only if the rows are scanned with the rr_sequential
- function, but not with other rr_* functions that are employed
- when the result of materialization is required to be sorted.
-
- TODO: either to support 'copy back' technique for the above case,
- or to get rid of this technique altogether.
- */
if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) &&
- first_is_base_table && !first_is_in_sjm_nest &&
+ first_is_base_table &&
order->item[0]->real_item()->type() == Item::FIELD_ITEM &&
join->cond_equal)
{
@@ -19922,19 +19893,6 @@ do_select(JOIN *join, Procedure *procedure)
}
-int rr_sequential_and_unpack(READ_RECORD *info)
-{
- int error;
- if (unlikely((error= rr_sequential(info))))
- return error;
-
- for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
- (*cp->do_copy)(cp);
-
- return error;
-}
-
-
/**
@brief
Instantiates temporary table
@@ -21223,6 +21181,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
int join_init_read_record(JOIN_TAB *tab)
{
+ bool need_unpacking= FALSE;
+ JOIN *join= tab->join;
/*
Note: the query plan tree for the below operations is constructed in
save_agg_explain_data.
@@ -21232,6 +21192,12 @@ int join_init_read_record(JOIN_TAB *tab)
if (tab->filesort && tab->sort_table()) // Sort table.
return 1;
+ if (join->top_join_tab_count != join->const_tables)
+ {
+ TABLE_LIST *tbl= tab->table->pos_in_table_list;
+ need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE;
+ }
+
tab->build_range_rowid_filter_if_needed();
DBUG_EXECUTE_IF("kill_join_init_read_record",
@@ -21249,16 +21215,6 @@ int join_init_read_record(JOIN_TAB *tab)
if (!tab->preread_init_done && tab->preread_init())
return 1;
-
- if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
- tab->select, tab->filesort_result, 1,1, FALSE))
- return 1;
- return tab->read_record.read_record();
-}
-
-int
-join_read_record_no_init(JOIN_TAB *tab)
-{
Copy_field *save_copy, *save_copy_end;
/*
@@ -21268,12 +21224,20 @@ join_read_record_no_init(JOIN_TAB *tab)
save_copy= tab->read_record.copy_field;
save_copy_end= tab->read_record.copy_field_end;
- init_read_record(&tab->read_record, tab->join->thd, tab->table,
- tab->select, tab->filesort_result, 1, 1, FALSE);
+
+ if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
+ tab->select, tab->filesort_result, 1, 1, FALSE))
+ return 1;
tab->read_record.copy_field= save_copy;
tab->read_record.copy_field_end= save_copy_end;
- tab->read_record.read_record_func= rr_sequential_and_unpack;
+
+ if (need_unpacking)
+ {
+ tab->read_record.read_record_func_and_unpack_calls=
+ tab->read_record.read_record_func;
+ tab->read_record.read_record_func = read_record_func_for_rr_and_unpack;
+ }
return tab->read_record.read_record();
}
@@ -28981,6 +28945,19 @@ void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join,
}
+/*
+ @brief
+ Unpacking temp table fields to base table fields.
+*/
+
+void JOIN_TAB::unpacking_to_base_table_fields()
+{
+ for (Copy_field *cp= read_record.copy_field;
+ cp != read_record.copy_field_end; cp++)
+ (*cp->do_copy)(cp);
+}
+
+
/**
@} (end of group Query_Optimizer)
*/
diff --git a/sql/sql_select.h b/sql/sql_select.h
index 4f7bf49f635..545d4a788cc 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -223,7 +223,7 @@ typedef enum_nested_loop_state
(*Next_select_func)(JOIN *, struct st_join_table *, bool);
Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab);
int rr_sequential(READ_RECORD *info);
-int rr_sequential_and_unpack(READ_RECORD *info);
+int read_record_func_for_rr_and_unpack(READ_RECORD *info);
Item *remove_pushed_top_conjuncts(THD *thd, Item *cond);
Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond,
COND_EQUAL **cond_eq,
@@ -676,6 +676,7 @@ typedef struct st_join_table {
table_map remaining_tables);
bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables,
bool is_const_table);
+ void unpacking_to_base_table_fields();
} JOIN_TAB;
@@ -2352,7 +2353,6 @@ create_virtual_tmp_table(THD *thd, Field *field)
int test_if_item_cache_changed(List<Cached_item> &list);
int join_init_read_record(JOIN_TAB *tab);
-int join_read_record_no_init(JOIN_TAB *tab);
void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key);
inline Item * and_items(THD *thd, Item* cond, Item *item)
{
diff --git a/sql/table.h b/sql/table.h
index 1a7e5fbd4dc..35ba9bbb95d 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -2622,6 +2622,7 @@ struct TABLE_LIST
*/
const char *get_table_name() const { return view != NULL ? view_name.str : table_name.str; }
bool is_active_sjm();
+ bool is_sjm_scan_table();
bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); }
st_select_lex_unit *get_unit();
st_select_lex *get_single_select();
2
1

[Commits] 6dc3fa2e054: Support Create_time and Update_time in MyRocks table status
by psergey 06 Oct '19
by psergey 06 Oct '19
06 Oct '19
revision-id: 6dc3fa2e054d999a12cd5d7aaf637e4ac7c3f149 (fb-prod201903-144-g6dc3fa2e054)
parent(s): d97c0c628e5dc60abd725f6a7120a8d87b09321e
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-06 23:48:49 +0300
message:
Support Create_time and Update_time in MyRocks table status
(variant #4, with @@rocksdb_table_dictionary_format)
- Create_time is stored in the MyRocks' internal data dictionary.
- Update_time is in-memory only (like in InnoDB).
@@rocksdb_table_dictionary_format is a global read-only variable (set it
from my.cnf) which controls the on-disk data format.
rocksdb_table_dictionary_format=1 means use the same data format is
before. Create_time will always be NULL for all tables that are created.
rocksdb_table_dictionary_format=2 (the default) means use the newer data
format. All newly-created tables will have proper Create_time attribute.
Downgrades are only possible if one hasn't run any DDL that re-creates a
table.
---
mysql-test/suite/rocksdb/include/bulk_load.inc | 4 +-
.../suite/rocksdb/include/bulk_load_unsorted.inc | 4 +-
mysql-test/suite/rocksdb/r/bulk_load.result | 12 +--
mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result | 12 +--
.../rocksdb/r/bulk_load_rev_cf_and_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_rev_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted_rev.result | 12 +--
mysql-test/suite/rocksdb/r/issue255.result | 16 ++--
mysql-test/suite/rocksdb/r/rocksdb.result | 7 +-
.../r/rocksdb_table_dictionary_format.result | 37 ++++++++++
.../suite/rocksdb/r/show_table_status.result | 85 +++++++++++++++++++++-
mysql-test/suite/rocksdb/r/truncate_table.result | 8 +-
mysql-test/suite/rocksdb/t/issue255.test | 17 +++--
mysql-test/suite/rocksdb/t/rocksdb.test | 4 +-
.../rocksdb/t/rocksdb_table_dictionary_format.test | 67 +++++++++++++++++
mysql-test/suite/rocksdb/t/show_table_status.test | 81 ++++++++++++++++++++-
mysql-test/suite/rocksdb/t/truncate_table.test | 8 +-
.../r/rocksdb_table_dictionary_format_basic.result | 14 ++++
.../t/rocksdb_table_dictionary_format_basic.test | 16 ++++
storage/rocksdb/ha_rocksdb.cc | 53 ++++++++++++++
storage/rocksdb/ha_rocksdb.h | 1 +
storage/rocksdb/rdb_datadic.cc | 57 ++++++++++++---
storage/rocksdb/rdb_datadic.h | 36 ++++++++-
24 files changed, 502 insertions(+), 85 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/bulk_load.inc b/mysql-test/suite/rocksdb/include/bulk_load.inc
index 1b79825e507..7e163602202 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load.inc
@@ -121,12 +121,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
index 5cdc76a32d4..812af0401aa 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
@@ -119,12 +119,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/r/bulk_load.result b/mysql-test/suite/rocksdb/r/bulk_load.result
index a36f99a7619..76db28e66bd 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
index b5d3e252c5d..ae363f7ec0c 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
index f46acd41080..dd8dd7e60a8 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
index 3389968ef37..96738ae62e2 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
index 924032549ac..87fc63af2da 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
index 3cc9fb8e459..8e0914f0159 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/issue255.result b/mysql-test/suite/rocksdb/r/issue255.result
index c1ce3be2276..b45b3b5afc7 100644
--- a/mysql-test/suite/rocksdb/r/issue255.result
+++ b/mysql-test/suite/rocksdb/r/issue255.result
@@ -2,7 +2,7 @@ CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ('538647864786478647864');
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -12,7 +12,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -21,7 +21,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -30,13 +30,13 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES (1000);
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -46,7 +46,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -55,7 +55,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -64,5 +64,5 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result
index 088eb050f6f..c76f31afaab 100644
--- a/mysql-test/suite/rocksdb/r/rocksdb.result
+++ b/mysql-test/suite/rocksdb/r/rocksdb.result
@@ -979,6 +979,7 @@ rocksdb_store_row_debug_checksums OFF
rocksdb_strict_collation_check OFF
rocksdb_strict_collation_exceptions
rocksdb_table_cache_numshardbits 6
+rocksdb_table_dictionary_format 2
rocksdb_table_stats_background_thread_nice_value 19
rocksdb_table_stats_max_num_rows_scanned 0
rocksdb_table_stats_recalc_threshold_count 100
@@ -1417,7 +1418,7 @@ create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
show table status like 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 # # NULL latin1_swedish_ci NULL
drop table t1;
#
# Fix Issue #4: Crash when using pseudo-unique keys
@@ -2612,7 +2613,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
-1
@@ -2623,7 +2624,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
1
diff --git a/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result b/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result
new file mode 100644
index 00000000000..8f2fa96a04a
--- /dev/null
+++ b/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result
@@ -0,0 +1,37 @@
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+2
+#
+# Server restarted
+#
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+1
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+# Create_time will be NULL as the table doesn't support it
+# Update_time will be non-null
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time is not null
+NULL 1
+#
+# Server restarted
+#
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+2
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time is not null
+NULL 0
+set global rocksdb_compact_cf='default';
+select concat('00', '00', '00', '01', hex('test.t1'));
+concat('00', '00', '00', '01', hex('test.t1'))
+00000001746573742E7431
+Found the datadic entry
+Value has format version 1, followed by 8 bytes describing one index
+Done
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/r/show_table_status.result b/mysql-test/suite/rocksdb/r/show_table_status.result
index 29140f045e4..345882040ef 100644
--- a/mysql-test/suite/rocksdb/r/show_table_status.result
+++ b/mysql-test/suite/rocksdb/r/show_table_status.result
@@ -7,12 +7,12 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL
+t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL utf8_general_ci NULL
SHOW TABLE STATUS WHERE name LIKE 't2';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
DROP TABLE t1, t2, t3;
CREATE DATABASE `db_new..............................................end`;
USE `db_new..............................................end`;
@@ -22,3 +22,80 @@ SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE T
TABLE_SCHEMA db_new..............................................end
TABLE_NAME t1_new..............................................end
DROP DATABASE `db_new..............................................end`;
+#
+# MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+#
+use test;
+create table t1 (a int) engine=rocksdb;
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time check_time
+1 NULL NULL
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select sleep(3);
+sleep(3)
+0
+insert into t1 values (2);
+select
+create_time=@create_tm /* should not change */ ,
+timestampdiff(second, @update_tm, update_time) > 2,
+check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+timestampdiff(second, @update_tm, update_time) > 2 1
+check_time NULL
+#
+# Check how create_time survives ALTER TABLE.
+# First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+create_time<>@create_tm /* should change */,
+create_time IS NOT NULL,
+update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time<>@create_tm 1
+create_time IS NOT NULL 1
+update_time IS NULL 1
+insert into t1 values (5,5);
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+# Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+select
+create_time=@create_tm /* should not change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+update_time NULL
+#
+# Check what is left after server restart
+#
+# Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select
+create_time=(select create_time from t2) /* should change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=(select create_time from t2) 1
+update_time NULL
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/r/truncate_table.result b/mysql-test/suite/rocksdb/r/truncate_table.result
index 1544256f194..79b266a2453 100644
--- a/mysql-test/suite/rocksdb/r/truncate_table.result
+++ b/mysql-test/suite/rocksdb/r/truncate_table.result
@@ -9,19 +9,19 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 4 # # NULL latin1_swedish_ci NULL
TRUNCATE TABLE t1;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('d');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 2 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 2 # # NULL latin1_swedish_ci NULL
SELECT a,c FROM t1;
a c
1 d
diff --git a/mysql-test/suite/rocksdb/t/issue255.test b/mysql-test/suite/rocksdb/t/issue255.test
index 370dece0c6c..686f45b4056 100644
--- a/mysql-test/suite/rocksdb/t/issue255.test
+++ b/mysql-test/suite/rocksdb/t/issue255.test
@@ -3,24 +3,25 @@
CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES ('538647864786478647864');
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SELECT * FROM t1;
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
@@ -28,24 +29,24 @@ DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES (1000);
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb.test b/mysql-test/suite/rocksdb/t/rocksdb.test
index 5eff0fbf38f..7dcae569c92 100644
--- a/mysql-test/suite/rocksdb/t/rocksdb.test
+++ b/mysql-test/suite/rocksdb/t/rocksdb.test
@@ -1198,7 +1198,7 @@ drop table t1;
create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
---replace_column 7 #
+--replace_column 7 # 12 # 13 #
show table status like 't1';
drop table t1;
@@ -1903,11 +1903,13 @@ DROP TABLE t1;
# value is 4 while MyRocks will show it as 3.
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test b/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test
new file mode 100644
index 00000000000..3dedf6502ab
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test
@@ -0,0 +1,67 @@
+--source include/have_rocksdb.inc
+
+select @@rocksdb_table_dictionary_format;
+
+#
+# Check the upgrade from Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 to _2
+#
+
+--let $_mysqld_option=--rocksdb_table_dictionary_format=1
+--source include/restart_mysqld_with_option.inc
+
+--echo #
+--echo # Server restarted
+--echo #
+select @@rocksdb_table_dictionary_format;
+
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+
+--echo # Create_time will be NULL as the table doesn't support it
+--echo # Update_time will be non-null
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--let $_mysqld_option=--rocksdb_table_dictionary_format=2
+--source include/restart_mysqld_with_option.inc
+--echo #
+--echo # Server restarted
+--echo #
+select @@rocksdb_table_dictionary_format;
+
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+set global rocksdb_compact_cf='default';
+# use: $MYSQLTEST_VARDIR $MYSQL_SST_DUMP
+let MYSQL_DATADIR = `select @@datadir`;
+let MYSQL_SST_DUMP = $MYSQL_SST_DUMP;
+
+select concat('00', '00', '00', '01', hex('test.t1'));
+
+# We are looking to find a record like this:
+# '00000001746573742E7431' seq:0, type:1 => 00010000000000000104
+# that is:
+# '00000001:746573742E7431' seq:0, type:1 => 0001: 0000000000000104
+# the key is: DDL_ENTRY_INDEX_START_NUMBER, "test.t1"
+# the value is: DDL_ENTRY_INDEX_VERSION_1, {cf_id, index_id}
+perl;
+ my $datadir = $ENV{'MYSQL_DATADIR'};
+ my $sst_dump = $ENV{'MYSQL_SST_DUMP'};
+ open(IN, "$sst_dump --command=scan --output_hex --file=$datadir/.rocksdb |");
+ while(<IN>) {
+ if ($_ =~ /^\'00000001746573742E7431\'/) {
+ print "Found the datadic entry\n";
+ if ($_ =~ / 0001[0-9]{16}$/) {
+ print "Value has format version 1, followed by 8 bytes describing one index\n"
+ } else {
+ print "Unexpected value. $_\n";
+ }
+ }
+ }
+ print "Done\n";
+ close(IN);
+EOF
+drop table t1;
+
diff --git a/mysql-test/suite/rocksdb/t/show_table_status.test b/mysql-test/suite/rocksdb/t/show_table_status.test
index 29cc2ccfb5e..59effcc788c 100644
--- a/mysql-test/suite/rocksdb/t/show_table_status.test
+++ b/mysql-test/suite/rocksdb/t/show_table_status.test
@@ -24,7 +24,7 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
# Some statistics don't get updated as quickly. The Data_length and
@@ -48,7 +48,7 @@ set global rocksdb_force_flush_memtable_now = true;
# We expect the number of rows to be 10000. Data_len and Avg_row_len
# may vary, depending on built-in compression library.
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't2';
DROP TABLE t1, t2, t3;
@@ -62,3 +62,80 @@ CREATE TABLE `t1_new..............................................end`(a int) en
INSERT INTO `t1_new..............................................end` VALUES (1);
--query_vertical SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE TABLE_NAME = 't1_new..............................................end'
DROP DATABASE `db_new..............................................end`;
+--echo #
+--echo # MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+--echo #
+use test;
+create table t1 (a int) engine=rocksdb;
+
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+select sleep(3);
+insert into t1 values (2);
+
+--vertical_results
+select
+ create_time=@create_tm /* should not change */ ,
+ timestampdiff(second, @update_tm, update_time) > 2,
+ check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check how create_time survives ALTER TABLE.
+--echo # First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+ create_time<>@create_tm /* should change */,
+ create_time IS NOT NULL,
+ update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+insert into t1 values (5,5);
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo # Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+
+select
+ create_time=@create_tm /* should not change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check what is left after server restart
+--echo #
+
+--echo # Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--source include/restart_mysqld.inc
+
+select
+ create_time=(select create_time from t2) /* should change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/t/truncate_table.test b/mysql-test/suite/rocksdb/t/truncate_table.test
index a61488654a3..1001eeb6cde 100644
--- a/mysql-test/suite/rocksdb/t/truncate_table.test
+++ b/mysql-test/suite/rocksdb/t/truncate_table.test
@@ -29,22 +29,22 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
TRUNCATE TABLE t1;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('d');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--sorted_result
diff --git a/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result
new file mode 100644
index 00000000000..825d7a7113a
--- /dev/null
+++ b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT;
+SELECT @start_global_value;
+@start_global_value
+2
+"Trying to set variable @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT = 444;
+ERROR HY000: Variable 'rocksdb_table_dictionary_format' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test
new file mode 100644
index 00000000000..2b8c6e81bf7
--- /dev/null
+++ b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_TABLE_DICTIONARY_FORMAT
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index d6c89087bea..a0a344187cf 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -122,6 +122,8 @@ const std::string PER_INDEX_CF_NAME("$per_index_cf");
static std::vector<std::string> rdb_tables_to_recalc;
+uint rocksdb_table_dictionary_format;
+
class Rdb_explicit_snapshot : public explicit_snapshot {
public:
static std::shared_ptr<Rdb_explicit_snapshot> create(
@@ -2136,6 +2138,17 @@ static MYSQL_SYSVAR_ULONGLONG(
"MultiGet",
nullptr, nullptr, SIZE_T_MAX, /* min */ 0, /* max */ SIZE_T_MAX, 0);
+static MYSQL_SYSVAR_UINT(table_dictionary_format,
+ rocksdb_table_dictionary_format,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Max Table Dictionary format version that the server "
+ "may create (use lower values for backward "
+ " compatibility, higher values for new features)",
+ nullptr, nullptr,
+ ROCKSDB_DATADIC_FORMAT_DEFAULT,
+ ROCKSDB_DATADIC_FORMAT_INITIAL,
+ ROCKSDB_DATADIC_FORMAT_MAX, 0);
+
static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
static struct st_mysql_sys_var *rocksdb_system_variables[] = {
@@ -2309,6 +2322,8 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(select_bypass_log_rejected),
MYSQL_SYSVAR(select_bypass_debug_row_delay),
MYSQL_SYSVAR(select_bypass_multiget_min),
+
+ MYSQL_SYSVAR(table_dictionary_format),
nullptr};
static rocksdb::WriteOptions rdb_get_rocksdb_write_options(
@@ -2456,6 +2471,8 @@ class Rdb_transaction {
bool m_is_delayed_snapshot = false;
bool m_is_two_phase = false;
+ std::unordered_set<Rdb_tbl_def*> modified_tables;
+
private:
/*
Number of write operations this transaction had when we took the last
@@ -3139,6 +3156,25 @@ class Rdb_transaction {
virtual void start_tx() = 0;
virtual void start_stmt() = 0;
+ protected:
+ // Non-virtual functions with actions to be done on transaction start and
+ // commit.
+ void on_commit() {
+ time_t tm;
+ tm = time(nullptr);
+ for (auto &it : modified_tables) {
+ it->update_time = tm;
+ }
+ modified_tables.clear();
+ }
+ void on_rollback() {
+ modified_tables.clear();
+ }
+ public:
+ void log_table_write_op(Rdb_tbl_def *tbl) {
+ modified_tables.insert(tbl);
+ }
+
void set_initial_savepoint() {
/*
Set the initial savepoint. If the first statement in the transaction
@@ -3322,7 +3358,9 @@ class Rdb_transaction_impl : public Rdb_transaction {
goto error;
}
+ on_commit();
error:
+ on_rollback();
/* Save the transaction object to be reused */
release_tx();
@@ -3338,6 +3376,7 @@ class Rdb_transaction_impl : public Rdb_transaction {
public:
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -3659,7 +3698,9 @@ class Rdb_writebatch_impl : public Rdb_transaction {
res = true;
goto error;
}
+ on_commit();
error:
+ on_rollback();
reset();
m_write_count = 0;
@@ -3691,6 +3732,7 @@ class Rdb_writebatch_impl : public Rdb_transaction {
}
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -7549,6 +7591,7 @@ int ha_rocksdb::create_table(const std::string &table_name,
/* Create table/key descriptions and put them into the data dictionary */
m_tbl_def = new Rdb_tbl_def(table_name);
+ m_tbl_def->set_create_time(time(nullptr));
uint n_keys = table_arg->s->keys;
@@ -10177,6 +10220,8 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
row_info.tx->incr_insert_count();
}
+ row_info.tx->log_table_write_op(m_tbl_def);
+
if (do_bulk_commit(row_info.tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
}
@@ -10648,6 +10693,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
}
tx->incr_delete_count();
+ tx->log_table_write_op(m_tbl_def);
if (do_bulk_commit(tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
@@ -10802,6 +10848,12 @@ int ha_rocksdb::info(uint flag) {
k->rec_per_key[j] = x;
}
}
+
+ stats.create_time = m_tbl_def->get_create_time();
+ }
+
+ if (flag & HA_STATUS_TIME) {
+ stats.update_time = m_tbl_def->update_time;
}
if (flag & HA_STATUS_ERRKEY) {
@@ -12603,6 +12655,7 @@ bool ha_rocksdb::prepare_inplace_alter_table(
m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
new_tdef->m_hidden_pk_val =
m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
+ new_tdef->set_create_time(m_tbl_def->get_create_time());
if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
/* Delete the new key descriptors */
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 9a250af40c7..c36c5c98e19 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -82,6 +82,7 @@ extern PSI_rwlock_key key_rwlock_read_free_rpl_tables;
#endif
extern Regex_list_handler rdb_read_free_regex_handler;
+extern uint rocksdb_table_dictionary_format;
/**
@brief
Rdb_table_handler is a reference-counted structure storing information for
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index c0741a1ce9b..234f7a789ff 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -3514,8 +3514,21 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
const rocksdb::Slice &key) {
StringBuffer<8 * Rdb_key_def::PACKED_SIZE> indexes;
indexes.alloc(Rdb_key_def::VERSION_SIZE +
+ Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE +
m_key_count * Rdb_key_def::PACKED_SIZE * 2);
- rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION);
+
+ if (rocksdb_table_dictionary_format <
+ ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP) {
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1);
+ // We are using old data format, which means we cannot save Create_time
+ // Set it to be shown as unknown right away, so that the behavior before
+ // server restart and after is the same.
+ set_create_time(0);
+ }
+ else {
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2);
+ rdb_netstr_append_uint64(&indexes, create_time);
+ }
for (uint i = 0; i < m_key_count; i++) {
const Rdb_key_def &kd = *m_key_descr_arr[i];
@@ -4015,27 +4028,52 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
Rdb_tbl_def *const tdef =
new Rdb_tbl_def(key, Rdb_key_def::INDEX_NUMBER_SIZE);
- // Now, read the DDLs.
- const int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
- if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ if (val.size() < Rdb_key_def::VERSION_SIZE) {
// NO_LINT_DEBUG
sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
tdef->full_tablename().c_str());
return true;
}
- tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
- tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
ptr = reinterpret_cast<const uchar *>(val.data());
const int version = rdb_netbuf_read_uint16(&ptr);
- if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION) {
+
+ if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 &&
+ version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2) {
// NO_LINT_DEBUG
sql_print_error(
"RocksDB: DDL ENTRY Version was not expected."
- "Expected: %d, Actual: %d",
- Rdb_key_def::DDL_ENTRY_INDEX_VERSION, version);
+ "Expected: %d..%d, Actual: %d",
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1,
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2, version);
return true;
}
+ int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
+
+ if (version == Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2) {
+ if (real_val_size < Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE) {
+ // NO_LINT_DEBUG
+ sql_print_error( "RocksDB: DDL ENTRY V2 doesn't have timestamp");
+ delete tdef;
+ return true;
+ }
+ tdef->set_create_time(rdb_netbuf_read_uint64(&ptr));
+ real_val_size -= Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE;
+ }
+ else
+ tdef->set_create_time(0); // shown as SQL NULL.
+
+ // Now, read the DDLs.
+ if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
+ tdef->full_tablename().c_str());
+ return true;
+ }
+ tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
+ tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
+
+
ptr_end = ptr + real_val_size;
for (uint keyno = 0; ptr < ptr_end; keyno++) {
GL_INDEX_ID gl_index_id;
@@ -4471,6 +4509,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
rec->m_hidden_pk_val.load(std::memory_order_relaxed);
new_rec->m_tbl_stats = rec->m_tbl_stats;
+ new_rec->set_create_time(rec->get_create_time());
// so that it's not free'd when deleting the old rec
rec->m_key_descr_arr = nullptr;
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 416857cad38..9c2a96e77b7 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -203,6 +203,22 @@ enum {
UNPACK_FAILURE = 1,
};
+
+/*
+ Global user-visible data dictionary format version.
+ The server will read the data of any version, but will not write data
+ structures that were introduced after the version in
+ rocksdb_table_dictionary_format.
+ This way, one can keep the on-disk data backward-compatible.
+*/
+const uint ROCKSDB_DATADIC_FORMAT_INITIAL = 1;
+const uint ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP = 2;
+
+// Maximum possible value:
+const uint ROCKSDB_DATADIC_FORMAT_MAX = 2;
+const uint ROCKSDB_DATADIC_FORMAT_DEFAULT = ROCKSDB_DATADIC_FORMAT_MAX;
+
+
/*
An object of this class represents information about an index in an SQL
table. It provides services to encode and decode index tuples.
@@ -465,6 +481,7 @@ class Rdb_key_def {
CF_NUMBER_SIZE = 4,
CF_FLAG_SIZE = 4,
PACKED_SIZE = 4, // one int
+ TABLE_CREATE_TIMESTAMP_SIZE = 8,
};
// bit flags for combining bools when writing to disk
@@ -506,7 +523,10 @@ class Rdb_key_def {
// Data dictionary schema version. Introduce newer versions
// if changing schema layout
enum {
- DDL_ENTRY_INDEX_VERSION = 1,
+ DDL_ENTRY_INDEX_VERSION_1 = 1,
+ // this includes a 64-bit table_creation_time at the end.
+ // Allowed since ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP.
+ DDL_ENTRY_INDEX_VERSION_2 = 2,
CF_DEFINITION_VERSION = 1,
BINLOG_INFO_INDEX_NUMBER_VERSION = 1,
DDL_DROP_INDEX_ONGOING_VERSION = 1,
@@ -1116,6 +1136,12 @@ class Rdb_tbl_def {
~Rdb_tbl_def();
+ // time values are shown in SHOW TABLE STATUS
+ void set_create_time(time_t val) { create_time = val; }
+ time_t get_create_time() { return create_time; }
+
+ time_t update_time = 0; // in-memory only value, maintained right here
+
void check_and_set_read_free_rpl_table();
/* Number of indexes */
@@ -1161,6 +1187,9 @@ class Rdb_tbl_def {
const std::string &base_tablename() const { return m_tablename; }
const std::string &base_partition() const { return m_partition; }
GL_INDEX_ID get_autoincr_gl_index_id();
+
+ private:
+ time_t create_time = 0;
};
/*
@@ -1341,8 +1370,11 @@ class Rdb_binlog_manager {
1. Table Name => internal index id mappings
key: Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER(0x1) + dbname.tablename
- value: version, {cf_id, index_id}*n_indexes_of_the_table
+ value: DDL_ENTRY_INDEX_VERSION_1, {cf_id, index_id}*n_indexes_of_the_table
+ or value: DDL_ENTRY_INDEX_VERSION_2, create_timestamp, {cf_id, index_id}*
+ n_indexes_of_the_table
version is 2 bytes. cf_id and index_id are 4 bytes.
+ create_timestamp is 8 bytes.
2. internal cf_id, index id => index information
key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id
1
0

[Commits] 8dc011b9965: Support Create_time and Update_time in MyRocks table status
by psergey 06 Oct '19
by psergey 06 Oct '19
06 Oct '19
revision-id: 8dc011b99657ae3d25e44debdc4bbeaebbaecb52 (fb-prod201903-144-g8dc011b9965)
parent(s): d97c0c628e5dc60abd725f6a7120a8d87b09321e
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-10-06 23:47:41 +0300
message:
Support Create_time and Update_time in MyRocks table status
(variant #3, with @@rocksdb_table_dictionary_format)
- Create_time is stored in the MyRocks' internal data dictionary.
- Update_time is in-memory only (like in InnoDB).
@@rocksdb_table_dictionary_format is a global read-only variable (set it
from my.cnf) which controls the on-disk data format.
rocksdb_table_dictionary_format=1 means use the same data format is
before. Create_time will always be NULL for all tables that are created.
rocksdb_table_dictionary_format=2 (the default) means use the newer data
format. All newly-created tables will have proper Create_time attribute.
Downgrades are only possible if one hasn't run any DDL that re-creates a
table.
---
mysql-test/suite/rocksdb/include/bulk_load.inc | 4 +-
.../suite/rocksdb/include/bulk_load_unsorted.inc | 4 +-
mysql-test/suite/rocksdb/r/bulk_load.result | 12 +--
mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result | 12 +--
.../rocksdb/r/bulk_load_rev_cf_and_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_rev_data.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted.result | 12 +--
.../suite/rocksdb/r/bulk_load_unsorted_rev.result | 12 +--
mysql-test/suite/rocksdb/r/issue255.result | 16 ++--
mysql-test/suite/rocksdb/r/rocksdb.result | 7 +-
.../r/rocksdb_table_dictionary_format.result | 37 ++++++++++
.../suite/rocksdb/r/show_table_status.result | 85 +++++++++++++++++++++-
mysql-test/suite/rocksdb/r/truncate_table.result | 8 +-
mysql-test/suite/rocksdb/t/issue255.test | 17 +++--
mysql-test/suite/rocksdb/t/rocksdb.test | 4 +-
.../rocksdb/t/rocksdb_table_dictionary_format.test | 67 +++++++++++++++++
mysql-test/suite/rocksdb/t/show_table_status.test | 81 ++++++++++++++++++++-
mysql-test/suite/rocksdb/t/truncate_table.test | 8 +-
.../r/rocksdb_table_dictionary_format_basic.result | 14 ++++
.../t/rocksdb_table_dictionary_format_basic.test | 16 ++++
storage/rocksdb/ha_rocksdb.cc | 53 ++++++++++++++
storage/rocksdb/ha_rocksdb.h | 1 +
storage/rocksdb/rdb_datadic.cc | 57 ++++++++++++---
storage/rocksdb/rdb_datadic.h | 36 ++++++++-
24 files changed, 502 insertions(+), 85 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/bulk_load.inc b/mysql-test/suite/rocksdb/include/bulk_load.inc
index 1b79825e507..7e163602202 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load.inc
@@ -121,12 +121,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
index 5cdc76a32d4..812af0401aa 100644
--- a/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
+++ b/mysql-test/suite/rocksdb/include/bulk_load_unsorted.inc
@@ -119,12 +119,12 @@ set rocksdb_bulk_load=0;
--remove_file $file
# Make sure row count index stats are correct
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
ANALYZE TABLE t1, t2, t3;
---replace_column 6 # 7 # 8 # 9 #
+--replace_column 6 # 7 # 8 # 9 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't%';
# Make sure all the data is there.
diff --git a/mysql-test/suite/rocksdb/r/bulk_load.result b/mysql-test/suite/rocksdb/r/bulk_load.result
index a36f99a7619..76db28e66bd 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
index b5d3e252c5d..ae363f7ec0c 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
index f46acd41080..dd8dd7e60a8 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_cf_and_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
index 3389968ef37..96738ae62e2 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_rev_data.result
@@ -38,9 +38,9 @@ pk a b
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -48,9 +48,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_bin NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_bin NULL partitioned
select count(pk) from t1;
count(pk)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
index 924032549ac..87fc63af2da 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
index 3cc9fb8e459..8e0914f0159 100644
--- a/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
+++ b/mysql-test/suite/rocksdb/r/bulk_load_unsorted_rev.result
@@ -70,9 +70,9 @@ LOAD DATA INFILE <input_file> INTO TABLE t3;
set rocksdb_bulk_load=0;
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
ANALYZE TABLE t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status OK
@@ -80,9 +80,9 @@ test.t2 analyze status OK
test.t3 analyze status OK
SHOW TABLE STATUS WHERE name LIKE 't%';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL NULL NULL NULL latin1_swedish_ci NULL partitioned
+t1 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 5000000 # # # # 0 NULL # # NULL latin1_swedish_ci NULL partitioned
select count(a) from t1;
count(a)
5000000
diff --git a/mysql-test/suite/rocksdb/r/issue255.result b/mysql-test/suite/rocksdb/r/issue255.result
index c1ce3be2276..b45b3b5afc7 100644
--- a/mysql-test/suite/rocksdb/r/issue255.result
+++ b/mysql-test/suite/rocksdb/r/issue255.result
@@ -2,7 +2,7 @@ CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ('538647864786478647864');
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -12,7 +12,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 2 22 44 0 0 0 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -21,7 +21,7 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '9223372036854775807' for key 'PRIMARY'
SELECT * FROM t1;
@@ -30,13 +30,13 @@ pk
9223372036854775807
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 9223372036854775807 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 6 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 6 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES (1000);
Warnings:
Warning 1264 Out of range value for column 'pk' at row 1
@@ -46,7 +46,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -55,7 +55,7 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 VALUES ();
ERROR 23000: Duplicate entry '127' for key 'PRIMARY'
SELECT * FROM t1;
@@ -64,5 +64,5 @@ pk
127
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB # Fixed # # # # # # 127 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB # Fixed # # # # # # 127 # # NULL latin1_swedish_ci NULL
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result
index 088eb050f6f..c76f31afaab 100644
--- a/mysql-test/suite/rocksdb/r/rocksdb.result
+++ b/mysql-test/suite/rocksdb/r/rocksdb.result
@@ -979,6 +979,7 @@ rocksdb_store_row_debug_checksums OFF
rocksdb_strict_collation_check OFF
rocksdb_strict_collation_exceptions
rocksdb_table_cache_numshardbits 6
+rocksdb_table_dictionary_format 2
rocksdb_table_stats_background_thread_nice_value 19
rocksdb_table_stats_max_num_rows_scanned 0
rocksdb_table_stats_recalc_threshold_count 100
@@ -1417,7 +1418,7 @@ create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
show table status like 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 # 0 0 0 3 # # NULL latin1_swedish_ci NULL
drop table t1;
#
# Fix Issue #4: Crash when using pseudo-unique keys
@@ -2612,7 +2613,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 3 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
-1
@@ -2623,7 +2624,7 @@ CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed 1000 0 0 0 0 0 12 # # NULL latin1_swedish_ci NULL
SELECT * FROM t1;
a
1
diff --git a/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result b/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result
new file mode 100644
index 00000000000..8f2fa96a04a
--- /dev/null
+++ b/mysql-test/suite/rocksdb/r/rocksdb_table_dictionary_format.result
@@ -0,0 +1,37 @@
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+2
+#
+# Server restarted
+#
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+1
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+# Create_time will be NULL as the table doesn't support it
+# Update_time will be non-null
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time is not null
+NULL 1
+#
+# Server restarted
+#
+select @@rocksdb_table_dictionary_format;
+@@rocksdb_table_dictionary_format
+2
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time update_time is not null
+NULL 0
+set global rocksdb_compact_cf='default';
+select concat('00', '00', '00', '01', hex('test.t1'));
+concat('00', '00', '00', '01', hex('test.t1'))
+00000001746573742E7431
+Found the datadic entry
+Value has format version 1, followed by 8 bytes describing one index
+Done
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/r/show_table_status.result b/mysql-test/suite/rocksdb/r/show_table_status.result
index 29140f045e4..345882040ef 100644
--- a/mysql-test/suite/rocksdb/r/show_table_status.result
+++ b/mysql-test/suite/rocksdb/r/show_table_status.result
@@ -7,12 +7,12 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
-t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL utf8_general_ci NULL
+t1 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
+t3 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL utf8_general_ci NULL
SHOW TABLE STATUS WHERE name LIKE 't2';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL NULL NULL NULL latin1_swedish_ci NULL
+t2 ROCKSDB 10 Fixed 1000 # # 0 0 0 NULL # # NULL latin1_swedish_ci NULL
DROP TABLE t1, t2, t3;
CREATE DATABASE `db_new..............................................end`;
USE `db_new..............................................end`;
@@ -22,3 +22,80 @@ SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE T
TABLE_SCHEMA db_new..............................................end
TABLE_NAME t1_new..............................................end
DROP DATABASE `db_new..............................................end`;
+#
+# MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+#
+use test;
+create table t1 (a int) engine=rocksdb;
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time check_time
+1 NULL NULL
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+create_time is not null update_time is not null check_time
+1 1 NULL
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select sleep(3);
+sleep(3)
+0
+insert into t1 values (2);
+select
+create_time=@create_tm /* should not change */ ,
+timestampdiff(second, @update_tm, update_time) > 2,
+check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+timestampdiff(second, @update_tm, update_time) > 2 1
+check_time NULL
+#
+# Check how create_time survives ALTER TABLE.
+# First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+create_time<>@create_tm /* should change */,
+create_time IS NOT NULL,
+update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time<>@create_tm 1
+create_time IS NOT NULL 1
+update_time IS NULL 1
+insert into t1 values (5,5);
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+# Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+select
+create_time=@create_tm /* should not change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=@create_tm 1
+update_time NULL
+#
+# Check what is left after server restart
+#
+# Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+select
+create_time=(select create_time from t2) /* should change */,
+update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+create_time=(select create_time from t2) 1
+update_time NULL
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/r/truncate_table.result b/mysql-test/suite/rocksdb/r/truncate_table.result
index 1544256f194..79b266a2453 100644
--- a/mysql-test/suite/rocksdb/r/truncate_table.result
+++ b/mysql-test/suite/rocksdb/r/truncate_table.result
@@ -9,19 +9,19 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 4 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 4 # # NULL latin1_swedish_ci NULL
TRUNCATE TABLE t1;
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 1 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 1 # # NULL latin1_swedish_ci NULL
INSERT INTO t1 (c) VALUES ('d');
SHOW TABLE STATUS LIKE 't1';
Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment
-t1 ROCKSDB 10 Fixed # # # 0 0 0 2 NULL NULL NULL latin1_swedish_ci NULL
+t1 ROCKSDB 10 Fixed # # # 0 0 0 2 # # NULL latin1_swedish_ci NULL
SELECT a,c FROM t1;
a c
1 d
diff --git a/mysql-test/suite/rocksdb/t/issue255.test b/mysql-test/suite/rocksdb/t/issue255.test
index 370dece0c6c..686f45b4056 100644
--- a/mysql-test/suite/rocksdb/t/issue255.test
+++ b/mysql-test/suite/rocksdb/t/issue255.test
@@ -3,24 +3,25 @@
CREATE TABLE t1 (pk BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES ('538647864786478647864');
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SELECT * FROM t1;
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
@@ -28,24 +29,24 @@ DROP TABLE t1;
CREATE TABLE t1 (pk TINYINT NOT NULL PRIMARY KEY AUTO_INCREMENT);
INSERT INTO t1 VALUES (5);
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 VALUES (1000);
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES ();
SELECT * FROM t1;
---replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 #
+--replace_column 3 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb.test b/mysql-test/suite/rocksdb/t/rocksdb.test
index 5eff0fbf38f..7dcae569c92 100644
--- a/mysql-test/suite/rocksdb/t/rocksdb.test
+++ b/mysql-test/suite/rocksdb/t/rocksdb.test
@@ -1198,7 +1198,7 @@ drop table t1;
create table t1 (i int primary key auto_increment) engine=RocksDB;
insert into t1 values (null),(null);
---replace_column 7 #
+--replace_column 7 # 12 # 13 #
show table status like 't1';
drop table t1;
@@ -1903,11 +1903,13 @@ DROP TABLE t1;
# value is 4 while MyRocks will show it as 3.
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(-1),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
CREATE TABLE t1(a INT AUTO_INCREMENT KEY);
INSERT INTO t1 VALUES(0),(10),(0);
+--replace_column 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
SELECT * FROM t1;
DROP TABLE t1;
diff --git a/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test b/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test
new file mode 100644
index 00000000000..3dedf6502ab
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/rocksdb_table_dictionary_format.test
@@ -0,0 +1,67 @@
+--source include/have_rocksdb.inc
+
+select @@rocksdb_table_dictionary_format;
+
+#
+# Check the upgrade from Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 to _2
+#
+
+--let $_mysqld_option=--rocksdb_table_dictionary_format=1
+--source include/restart_mysqld_with_option.inc
+
+--echo #
+--echo # Server restarted
+--echo #
+select @@rocksdb_table_dictionary_format;
+
+create table t1 (a int) engine=rocksdb;
+insert into t1 values (1);
+
+--echo # Create_time will be NULL as the table doesn't support it
+--echo # Update_time will be non-null
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--let $_mysqld_option=--rocksdb_table_dictionary_format=2
+--source include/restart_mysqld_with_option.inc
+--echo #
+--echo # Server restarted
+--echo #
+select @@rocksdb_table_dictionary_format;
+
+select create_time, update_time is not null
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+set global rocksdb_compact_cf='default';
+# use: $MYSQLTEST_VARDIR $MYSQL_SST_DUMP
+let MYSQL_DATADIR = `select @@datadir`;
+let MYSQL_SST_DUMP = $MYSQL_SST_DUMP;
+
+select concat('00', '00', '00', '01', hex('test.t1'));
+
+# We are looking to find a record like this:
+# '00000001746573742E7431' seq:0, type:1 => 00010000000000000104
+# that is:
+# '00000001:746573742E7431' seq:0, type:1 => 0001: 0000000000000104
+# the key is: DDL_ENTRY_INDEX_START_NUMBER, "test.t1"
+# the value is: DDL_ENTRY_INDEX_VERSION_1, {cf_id, index_id}
+perl;
+ my $datadir = $ENV{'MYSQL_DATADIR'};
+ my $sst_dump = $ENV{'MYSQL_SST_DUMP'};
+ open(IN, "$sst_dump --command=scan --output_hex --file=$datadir/.rocksdb |");
+ while(<IN>) {
+ if ($_ =~ /^\'00000001746573742E7431\'/) {
+ print "Found the datadic entry\n";
+ if ($_ =~ / 0001[0-9]{16}$/) {
+ print "Value has format version 1, followed by 8 bytes describing one index\n"
+ } else {
+ print "Unexpected value. $_\n";
+ }
+ }
+ }
+ print "Done\n";
+ close(IN);
+EOF
+drop table t1;
+
diff --git a/mysql-test/suite/rocksdb/t/show_table_status.test b/mysql-test/suite/rocksdb/t/show_table_status.test
index 29cc2ccfb5e..59effcc788c 100644
--- a/mysql-test/suite/rocksdb/t/show_table_status.test
+++ b/mysql-test/suite/rocksdb/t/show_table_status.test
@@ -24,7 +24,7 @@ set global rocksdb_force_flush_memtable_now = true;
CREATE TABLE t3 (a INT, b CHAR(8), pk INT PRIMARY KEY) ENGINE=rocksdb CHARACTER SET utf8;
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name IN ( 't1', 't2', 't3' );
# Some statistics don't get updated as quickly. The Data_length and
@@ -48,7 +48,7 @@ set global rocksdb_force_flush_memtable_now = true;
# We expect the number of rows to be 10000. Data_len and Avg_row_len
# may vary, depending on built-in compression library.
---replace_column 6 # 7 #
+--replace_column 6 # 7 # 12 # 13 #
SHOW TABLE STATUS WHERE name LIKE 't2';
DROP TABLE t1, t2, t3;
@@ -62,3 +62,80 @@ CREATE TABLE `t1_new..............................................end`(a int) en
INSERT INTO `t1_new..............................................end` VALUES (1);
--query_vertical SELECT TABLE_SCHEMA, TABLE_NAME FROM information_schema.table_statistics WHERE TABLE_NAME = 't1_new..............................................end'
DROP DATABASE `db_new..............................................end`;
+--echo #
+--echo # MDEV-17171: Bug: RocksDB Tables do not have "Creation Date"
+--echo #
+use test;
+create table t1 (a int) engine=rocksdb;
+
+select create_time is not null, update_time, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+insert into t1 values (1);
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+flush tables;
+select create_time is not null, update_time is not null, check_time
+from information_schema.tables where table_schema=database() and table_name='t1';
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+select sleep(3);
+insert into t1 values (2);
+
+--vertical_results
+select
+ create_time=@create_tm /* should not change */ ,
+ timestampdiff(second, @update_tm, update_time) > 2,
+ check_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check how create_time survives ALTER TABLE.
+--echo # First, an ALTER TABLE that re-creates the table:
+alter table t1 add b int;
+select
+ create_time<>@create_tm /* should change */,
+ create_time IS NOT NULL,
+ update_time IS NULL
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+insert into t1 values (5,5);
+
+select create_time, update_time into @create_tm, @update_tm
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo # Then, an in-place ALTER TABLE:
+alter table t1 add key (a);
+
+select
+ create_time=@create_tm /* should not change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--echo #
+--echo # Check what is left after server restart
+--echo #
+
+--echo # Save t1's creation time
+create table t2 as
+select create_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+--source include/restart_mysqld.inc
+
+select
+ create_time=(select create_time from t2) /* should change */,
+ update_time
+from information_schema.tables
+where table_schema=database() and table_name='t1';
+
+drop table t1, t2;
diff --git a/mysql-test/suite/rocksdb/t/truncate_table.test b/mysql-test/suite/rocksdb/t/truncate_table.test
index a61488654a3..1001eeb6cde 100644
--- a/mysql-test/suite/rocksdb/t/truncate_table.test
+++ b/mysql-test/suite/rocksdb/t/truncate_table.test
@@ -29,22 +29,22 @@ DROP TABLE t1;
CREATE TABLE t1 (a INT KEY AUTO_INCREMENT, c CHAR(8)) ENGINE=rocksdb;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('a'),('b'),('c');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
TRUNCATE TABLE t1;
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
INSERT INTO t1 (c) VALUES ('d');
#--replace_column 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9 # 10 # 12 # 13 # 14 # 15 # 16 # 17 # 18 #
---replace_column 5 # 6 # 7 #
+--replace_column 5 # 6 # 7 # 12 # 13 #
SHOW TABLE STATUS LIKE 't1';
--sorted_result
diff --git a/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result
new file mode 100644
index 00000000000..825d7a7113a
--- /dev/null
+++ b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_table_dictionary_format_basic.result
@@ -0,0 +1,14 @@
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+SET @start_global_value = @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT;
+SELECT @start_global_value;
+@start_global_value
+2
+"Trying to set variable @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT to 444. It should fail because it is readonly."
+SET @@global.ROCKSDB_TABLE_DICTIONARY_FORMAT = 444;
+ERROR HY000: Variable 'rocksdb_table_dictionary_format' is a read only variable
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test
new file mode 100644
index 00000000000..2b8c6e81bf7
--- /dev/null
+++ b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_table_dictionary_format_basic.test
@@ -0,0 +1,16 @@
+--source include/have_rocksdb.inc
+
+CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO valid_values VALUES(1);
+INSERT INTO valid_values VALUES(2);
+
+CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam;
+INSERT INTO invalid_values VALUES('\'aaa\'');
+
+--let $sys_var=ROCKSDB_TABLE_DICTIONARY_FORMAT
+--let $read_only=1
+--let $session=0
+--source ../include/rocksdb_sys_var.inc
+
+DROP TABLE valid_values;
+DROP TABLE invalid_values;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index d6c89087bea..a0a344187cf 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -122,6 +122,8 @@ const std::string PER_INDEX_CF_NAME("$per_index_cf");
static std::vector<std::string> rdb_tables_to_recalc;
+uint rocksdb_table_dictionary_format;
+
class Rdb_explicit_snapshot : public explicit_snapshot {
public:
static std::shared_ptr<Rdb_explicit_snapshot> create(
@@ -2136,6 +2138,17 @@ static MYSQL_SYSVAR_ULONGLONG(
"MultiGet",
nullptr, nullptr, SIZE_T_MAX, /* min */ 0, /* max */ SIZE_T_MAX, 0);
+static MYSQL_SYSVAR_UINT(table_dictionary_format,
+ rocksdb_table_dictionary_format,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Max Table Dictionary format version that the server "
+ "may create (use lower values for backward "
+ " compatibility, higher values for new features)",
+ nullptr, nullptr,
+ ROCKSDB_DATADIC_FORMAT_DEFAULT,
+ ROCKSDB_DATADIC_FORMAT_INITIAL,
+ ROCKSDB_DATADIC_FORMAT_MAX, 0);
+
static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100;
static struct st_mysql_sys_var *rocksdb_system_variables[] = {
@@ -2309,6 +2322,8 @@ static struct st_mysql_sys_var *rocksdb_system_variables[] = {
MYSQL_SYSVAR(select_bypass_log_rejected),
MYSQL_SYSVAR(select_bypass_debug_row_delay),
MYSQL_SYSVAR(select_bypass_multiget_min),
+
+ MYSQL_SYSVAR(table_dictionary_format),
nullptr};
static rocksdb::WriteOptions rdb_get_rocksdb_write_options(
@@ -2456,6 +2471,8 @@ class Rdb_transaction {
bool m_is_delayed_snapshot = false;
bool m_is_two_phase = false;
+ std::unordered_set<Rdb_tbl_def*> modified_tables;
+
private:
/*
Number of write operations this transaction had when we took the last
@@ -3139,6 +3156,25 @@ class Rdb_transaction {
virtual void start_tx() = 0;
virtual void start_stmt() = 0;
+ protected:
+ // Non-virtual functions with actions to be done on transaction start and
+ // commit.
+ void on_commit() {
+ time_t tm;
+ tm = time(nullptr);
+ for (auto &it : modified_tables) {
+ it->update_time = tm;
+ }
+ modified_tables.clear();
+ }
+ void on_rollback() {
+ modified_tables.clear();
+ }
+ public:
+ void log_table_write_op(Rdb_tbl_def *tbl) {
+ modified_tables.insert(tbl);
+ }
+
void set_initial_savepoint() {
/*
Set the initial savepoint. If the first statement in the transaction
@@ -3322,7 +3358,9 @@ class Rdb_transaction_impl : public Rdb_transaction {
goto error;
}
+ on_commit();
error:
+ on_rollback();
/* Save the transaction object to be reused */
release_tx();
@@ -3338,6 +3376,7 @@ class Rdb_transaction_impl : public Rdb_transaction {
public:
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -3659,7 +3698,9 @@ class Rdb_writebatch_impl : public Rdb_transaction {
res = true;
goto error;
}
+ on_commit();
error:
+ on_rollback();
reset();
m_write_count = 0;
@@ -3691,6 +3732,7 @@ class Rdb_writebatch_impl : public Rdb_transaction {
}
void rollback() override {
+ on_rollback();
m_write_count = 0;
m_insert_count = 0;
m_update_count = 0;
@@ -7549,6 +7591,7 @@ int ha_rocksdb::create_table(const std::string &table_name,
/* Create table/key descriptions and put them into the data dictionary */
m_tbl_def = new Rdb_tbl_def(table_name);
+ m_tbl_def->set_create_time(time(nullptr));
uint n_keys = table_arg->s->keys;
@@ -10177,6 +10220,8 @@ int ha_rocksdb::update_write_row(const uchar *const old_data,
row_info.tx->incr_insert_count();
}
+ row_info.tx->log_table_write_op(m_tbl_def);
+
if (do_bulk_commit(row_info.tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
}
@@ -10648,6 +10693,7 @@ int ha_rocksdb::delete_row(const uchar *const buf) {
}
tx->incr_delete_count();
+ tx->log_table_write_op(m_tbl_def);
if (do_bulk_commit(tx)) {
DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD);
@@ -10802,6 +10848,12 @@ int ha_rocksdb::info(uint flag) {
k->rec_per_key[j] = x;
}
}
+
+ stats.create_time = m_tbl_def->get_create_time();
+ }
+
+ if (flag & HA_STATUS_TIME) {
+ stats.update_time = m_tbl_def->update_time;
}
if (flag & HA_STATUS_ERRKEY) {
@@ -12603,6 +12655,7 @@ bool ha_rocksdb::prepare_inplace_alter_table(
m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed);
new_tdef->m_hidden_pk_val =
m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed);
+ new_tdef->set_create_time(m_tbl_def->get_create_time());
if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) {
/* Delete the new key descriptors */
diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h
index 9a250af40c7..c36c5c98e19 100644
--- a/storage/rocksdb/ha_rocksdb.h
+++ b/storage/rocksdb/ha_rocksdb.h
@@ -82,6 +82,7 @@ extern PSI_rwlock_key key_rwlock_read_free_rpl_tables;
#endif
extern Regex_list_handler rdb_read_free_regex_handler;
+extern uint rocksdb_table_dictionary_format;
/**
@brief
Rdb_table_handler is a reference-counted structure storing information for
diff --git a/storage/rocksdb/rdb_datadic.cc b/storage/rocksdb/rdb_datadic.cc
index c0741a1ce9b..234f7a789ff 100644
--- a/storage/rocksdb/rdb_datadic.cc
+++ b/storage/rocksdb/rdb_datadic.cc
@@ -3514,8 +3514,21 @@ bool Rdb_tbl_def::put_dict(Rdb_dict_manager *const dict,
const rocksdb::Slice &key) {
StringBuffer<8 * Rdb_key_def::PACKED_SIZE> indexes;
indexes.alloc(Rdb_key_def::VERSION_SIZE +
+ Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE +
m_key_count * Rdb_key_def::PACKED_SIZE * 2);
- rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION);
+
+ if (rocksdb_table_dictionary_format <
+ ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP) {
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1);
+ // We are using old data format, which means we cannot save Create_time
+ // Set it to be shown as unknown right away, so that the behavior before
+ // server restart and after is the same.
+ set_create_time(0);
+ }
+ else {
+ rdb_netstr_append_uint16(&indexes, Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2);
+ rdb_netstr_append_uint64(&indexes, create_time);
+ }
for (uint i = 0; i < m_key_count; i++) {
const Rdb_key_def &kd = *m_key_descr_arr[i];
@@ -4015,27 +4028,52 @@ bool Rdb_ddl_manager::init(Rdb_dict_manager *const dict_arg,
Rdb_tbl_def *const tdef =
new Rdb_tbl_def(key, Rdb_key_def::INDEX_NUMBER_SIZE);
- // Now, read the DDLs.
- const int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
- if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ if (val.size() < Rdb_key_def::VERSION_SIZE) {
// NO_LINT_DEBUG
sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
tdef->full_tablename().c_str());
return true;
}
- tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
- tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
ptr = reinterpret_cast<const uchar *>(val.data());
const int version = rdb_netbuf_read_uint16(&ptr);
- if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION) {
+
+ if (version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1 &&
+ version != Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2) {
// NO_LINT_DEBUG
sql_print_error(
"RocksDB: DDL ENTRY Version was not expected."
- "Expected: %d, Actual: %d",
- Rdb_key_def::DDL_ENTRY_INDEX_VERSION, version);
+ "Expected: %d..%d, Actual: %d",
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION_1,
+ Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2, version);
return true;
}
+ int real_val_size = val.size() - Rdb_key_def::VERSION_SIZE;
+
+ if (version == Rdb_key_def::DDL_ENTRY_INDEX_VERSION_2) {
+ if (real_val_size < Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE) {
+ // NO_LINT_DEBUG
+ sql_print_error( "RocksDB: DDL ENTRY V2 doesn't have timestamp");
+ delete tdef;
+ return true;
+ }
+ tdef->set_create_time(rdb_netbuf_read_uint64(&ptr));
+ real_val_size -= Rdb_key_def::TABLE_CREATE_TIMESTAMP_SIZE;
+ }
+ else
+ tdef->set_create_time(0); // shown as SQL NULL.
+
+ // Now, read the DDLs.
+ if (real_val_size % Rdb_key_def::PACKED_SIZE * 2 > 0) {
+ // NO_LINT_DEBUG
+ sql_print_error("RocksDB: Table_store: invalid keylist for table %s",
+ tdef->full_tablename().c_str());
+ return true;
+ }
+ tdef->m_key_count = real_val_size / (Rdb_key_def::PACKED_SIZE * 2);
+ tdef->m_key_descr_arr = new std::shared_ptr<Rdb_key_def>[tdef->m_key_count];
+
+
ptr_end = ptr + real_val_size;
for (uint keyno = 0; ptr < ptr_end; keyno++) {
GL_INDEX_ID gl_index_id;
@@ -4471,6 +4509,7 @@ bool Rdb_ddl_manager::rename(const std::string &from, const std::string &to,
rec->m_hidden_pk_val.load(std::memory_order_relaxed);
new_rec->m_tbl_stats = rec->m_tbl_stats;
+ new_rec->set_create_time(rec->get_create_time());
// so that it's not free'd when deleting the old rec
rec->m_key_descr_arr = nullptr;
diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h
index 416857cad38..9c2a96e77b7 100644
--- a/storage/rocksdb/rdb_datadic.h
+++ b/storage/rocksdb/rdb_datadic.h
@@ -203,6 +203,22 @@ enum {
UNPACK_FAILURE = 1,
};
+
+/*
+ Global user-visible data dictionary format version.
+ The server will read the data of any version, but will not write data
+ structures that were introduced after the version in
+ rocksdb_table_dictionary_format.
+ This way, one can keep the on-disk data backward-compatible.
+*/
+const uint ROCKSDB_DATADIC_FORMAT_INITIAL = 1;
+const uint ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP = 2;
+
+// Maximum possible value:
+const uint ROCKSDB_DATADIC_FORMAT_MAX = 2;
+const uint ROCKSDB_DATADIC_FORMAT_DEFAULT = ROCKSDB_DATADIC_FORMAT_MAX;
+
+
/*
An object of this class represents information about an index in an SQL
table. It provides services to encode and decode index tuples.
@@ -465,6 +481,7 @@ class Rdb_key_def {
CF_NUMBER_SIZE = 4,
CF_FLAG_SIZE = 4,
PACKED_SIZE = 4, // one int
+ TABLE_CREATE_TIMESTAMP_SIZE = 8,
};
// bit flags for combining bools when writing to disk
@@ -506,7 +523,10 @@ class Rdb_key_def {
// Data dictionary schema version. Introduce newer versions
// if changing schema layout
enum {
- DDL_ENTRY_INDEX_VERSION = 1,
+ DDL_ENTRY_INDEX_VERSION_1 = 1,
+ // this includes a 64-bit table_creation_time at the end.
+ // Allowed since ROCKSDB_DATADIC_FORMAT_CREATE_TIMESTAMP.
+ DDL_ENTRY_INDEX_VERSION_2 = 2,
CF_DEFINITION_VERSION = 1,
BINLOG_INFO_INDEX_NUMBER_VERSION = 1,
DDL_DROP_INDEX_ONGOING_VERSION = 1,
@@ -1116,6 +1136,12 @@ class Rdb_tbl_def {
~Rdb_tbl_def();
+ // time values are shown in SHOW TABLE STATUS
+ void set_create_time(time_t val) { create_time = val; }
+ time_t get_create_time() { return create_time; }
+
+ time_t update_time = 0; // in-memory only value, maintained right here
+
void check_and_set_read_free_rpl_table();
/* Number of indexes */
@@ -1161,6 +1187,9 @@ class Rdb_tbl_def {
const std::string &base_tablename() const { return m_tablename; }
const std::string &base_partition() const { return m_partition; }
GL_INDEX_ID get_autoincr_gl_index_id();
+
+ private:
+ time_t create_time = 0;
};
/*
@@ -1341,8 +1370,11 @@ class Rdb_binlog_manager {
1. Table Name => internal index id mappings
key: Rdb_key_def::DDL_ENTRY_INDEX_START_NUMBER(0x1) + dbname.tablename
- value: version, {cf_id, index_id}*n_indexes_of_the_table
+ value: DDL_ENTRY_INDEX_VERSION_1, {cf_id, index_id}*n_indexes_of_the_table
+ or value: DDL_ENTRY_INDEX_VERSION_2, create_timestamp, {cf_id, index_id}*
+ n_indexes_of_the_table
version is 2 bytes. cf_id and index_id are 4 bytes.
+ create_timestamp is 8 bytes.
2. internal cf_id, index id => index information
key: Rdb_key_def::INDEX_INFO(0x2) + cf_id + index_id
1
0