[Commits] 756b3e44a: Added a comment in PessimisticTransactionDB::GetDeadlockInfoBuffer
by Sergei Petrunia 10 Dec '18
by Sergei Petrunia 10 Dec '18
10 Dec '18
revision-id: 756b3e44a8b7173993c0076a76c56c6a31d52a8b (v5.8-1022-g756b3e44a)
parent(s): 51606993a6e04edfa32ba9ed445e47330a2b2647
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2018-12-10 22:54:44 +0300
message:
Added a comment in PessimisticTransactionDB::GetDeadlockInfoBuffer
---
utilities/transactions/pessimistic_transaction_db.cc | 2 ++
1 file changed, 2 insertions(+)
diff --git a/utilities/transactions/pessimistic_transaction_db.cc b/utilities/transactions/pessimistic_transaction_db.cc
index c5635cf05..425b3ba73 100644
--- a/utilities/transactions/pessimistic_transaction_db.cc
+++ b/utilities/transactions/pessimistic_transaction_db.cc
@@ -628,6 +628,8 @@ PessimisticTransactionDB::GetLockStatusData() {
}
std::vector<DeadlockPath> PessimisticTransactionDB::GetDeadlockInfoBuffer() {
+ // TODO: Here, we should get deadlock info from RangeLockMgr if we are using
+ // it. At the moment, it doesn't provide any deadlock information.
return lock_mgr_.GetDeadlockInfoBuffer();
}
1
0
revision-id: b6ee32afb6eb812130939d21652d198a15f91f4c (fb-prod201801-177-gb6ee32afb6e)
parent(s): 071b9dbb36d3c5f55afe721c7d5bbcb1d79584ac
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2018-12-10 18:39:57 +0300
message:
Remove errorneously added file
---
mysql-test/suite/rocksdb/t/deadlock-master.opt | 1 -
1 file changed, 1 deletion(-)
diff --git a/mysql-test/suite/rocksdb/t/deadlock-master.opt b/mysql-test/suite/rocksdb/t/deadlock-master.opt
deleted file mode 100644
index cce6bc574ab..00000000000
--- a/mysql-test/suite/rocksdb/t/deadlock-master.opt
+++ /dev/null
@@ -1 +0,0 @@
---rocksdb_use_range_locking=1
1
0
[Commits] 071b9dbb36d: Fix rocksdb.deadlock_tracking test with Range Locking
by Sergei Petrunia 10 Dec '18
by Sergei Petrunia 10 Dec '18
10 Dec '18
revision-id: 071b9dbb36d3c5f55afe721c7d5bbcb1d79584ac (fb-prod201801-176-g071b9dbb36d)
parent(s): 3e477e9e81c4a651e646be47ad7a31e1a5204019
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2018-12-10 18:29:36 +0300
message:
Fix rocksdb.deadlock_tracking test with Range Locking
- Disable the test when Range Locking is used:
-- It uses SELECT ... LOCK IN SHARE MODE
-- It produces deadlock information which Range Locking
currently doesn't support.
- Add rocksdb.range_locking_deadlock_tracking which tests a subset
of functionality.
---
.../suite/rocksdb/include/have_range_locking.inc | 3 +
.../r/range_locking_deadlock_tracking.result | 235 +++++++++++++++++++++
mysql-test/suite/rocksdb/t/deadlock-master.opt | 1 +
mysql-test/suite/rocksdb/t/deadlock_tracking.test | 7 +-
.../rocksdb/t/range_locking_deadlock_tracking.test | 194 +++++++++++++++++
5 files changed, 439 insertions(+), 1 deletion(-)
diff --git a/mysql-test/suite/rocksdb/include/have_range_locking.inc b/mysql-test/suite/rocksdb/include/have_range_locking.inc
new file mode 100644
index 00000000000..bb502d623ac
--- /dev/null
+++ b/mysql-test/suite/rocksdb/include/have_range_locking.inc
@@ -0,0 +1,3 @@
+if (`select count(*) = 0 from information_schema.session_variables where variable_name = 'rocksdb_use_range_locking' and variable_value = 'ON';`) {
+ --skip Test requires range locking
+}
diff --git a/mysql-test/suite/rocksdb/r/range_locking_deadlock_tracking.result b/mysql-test/suite/rocksdb/r/range_locking_deadlock_tracking.result
new file mode 100644
index 00000000000..1cfbfeb8f13
--- /dev/null
+++ b/mysql-test/suite/rocksdb/r/range_locking_deadlock_tracking.result
@@ -0,0 +1,235 @@
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+# Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+create table t (i int primary key) engine=rocksdb;
+insert into t values (1), (2), (3);
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #1
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #2
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 10;
+Deadlock #3
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+select * from t where i=2 for update;
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+rollback;
+i
+2
+rollback;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 1;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set rocksdb_deadlock_detect_depth = 2;
+Deadlock #4
+begin;
+select * from t where i=1 for update;
+i
+1
+begin;
+select * from t where i=2 for update;
+i
+2
+begin;
+select * from t where i=3 for update;
+i
+3
+select * from t where i=2 for update;
+select * from t where i=3 for update;
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
+select * from t where i=1 for update;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+select case when variable_value-@a = 1 then 'true' else 'false' end as deadlocks from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
+deadlocks
+true
+rollback;
+i
+3
+rollback;
+i
+2
+rollback;
+set global rocksdb_max_latest_deadlocks = 5;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+Deadlock #6
+create table t1 (id int primary key, value int) engine=rocksdb;
+insert into t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
+begin;
+update t1 set value=value+100 where id=1;
+update t1 set value=value+100 where id=2;
+begin;
+update t1 set value=value+200 where id=3;
+update t1 set value=value+100 where id=3;
+update t1 set value=value+200 where id=1;
+ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
+select * from t1;
+id value
+1 101
+2 102
+3 103
+4 4
+5 5
+drop table t1;
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
+set global rocksdb_max_latest_deadlocks = 0;
+# Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+show engine rocksdb transaction status;
+Type Name Status
+rocksdb
+============================================================
+TIMESTAMP ROCKSDB TRANSACTION MONITOR OUTPUT
+============================================================
+---------
+SNAPSHOTS
+---------
+LIST OF SNAPSHOTS FOR EACH SESSION:
+----------LATEST DETECTED DEADLOCKS----------
+-----------------------------------------
+END OF ROCKSDB TRANSACTION MONITOR OUTPUT
+=========================================
+
diff --git a/mysql-test/suite/rocksdb/t/deadlock-master.opt b/mysql-test/suite/rocksdb/t/deadlock-master.opt
new file mode 100644
index 00000000000..cce6bc574ab
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/deadlock-master.opt
@@ -0,0 +1 @@
+--rocksdb_use_range_locking=1
diff --git a/mysql-test/suite/rocksdb/t/deadlock_tracking.test b/mysql-test/suite/rocksdb/t/deadlock_tracking.test
index 8ceebef8f72..d531290d4fa 100644
--- a/mysql-test/suite/rocksdb/t/deadlock_tracking.test
+++ b/mysql-test/suite/rocksdb/t/deadlock_tracking.test
@@ -1,3 +1,9 @@
+# Deadlock #5 uses SELECT ... LOCK IN SHARE MODE;
+# SHOW ENGINE ROCKSDB TRANSACTION status prints information about deadlocks.
+# A part of this test that works with range locking is in
+# range_locking_deadlock_tracking.test
+--source suite/rocksdb/include/not_range_locking.inc
+
set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
@@ -137,7 +143,6 @@ rollback;
connection default;
--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
show engine rocksdb transaction status;
-
echo Deadlock #6;
connection con1;
create table t1 (id int primary key, value int) engine=rocksdb;
diff --git a/mysql-test/suite/rocksdb/t/range_locking_deadlock_tracking.test b/mysql-test/suite/rocksdb/t/range_locking_deadlock_tracking.test
new file mode 100644
index 00000000000..a1fdafa4613
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/range_locking_deadlock_tracking.test
@@ -0,0 +1,194 @@
+--source suite/rocksdb/include/have_range_locking.inc
+
+#
+# This is deadlock_tracking.test, variant for running with Range Locking:
+# - Deadlock #5 is disabled, it requires LOCK IN SHARE MODE tests
+# - In the result file, SHOW ENGINE ROCKSDB TRANSACTION STATUS does not print
+# deadlock information.
+#
+set @prior_lock_wait_timeout = @@rocksdb_lock_wait_timeout;
+set @prior_deadlock_detect = @@rocksdb_deadlock_detect;
+set @prior_max_latest_deadlocks = @@rocksdb_max_latest_deadlocks;
+set global rocksdb_deadlock_detect = on;
+set global rocksdb_lock_wait_timeout = 10000;
+--echo # Clears deadlock buffer of any prior deadlocks.
+set global rocksdb_max_latest_deadlocks = 0;
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+let $engine = rocksdb;
+
+--source include/count_sessions.inc
+connect (con1,localhost,root,,);
+let $con1= `SELECT CONNECTION_ID()`;
+
+connect (con2,localhost,root,,);
+let $con2= `SELECT CONNECTION_ID()`;
+
+connect (con3,localhost,root,,);
+let $con3= `SELECT CONNECTION_ID()`;
+
+connection default;
+eval create table t (i int primary key) engine=$engine;
+insert into t values (1), (2), (3);
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+
+echo Deadlock #1;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+
+echo Deadlock #2;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 10;
+
+echo Deadlock #3;
+--source include/simple_deadlock.inc
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 1;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+
+connection con3;
+set rocksdb_deadlock_detect_depth = 2;
+
+echo Deadlock #4;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 for update;
+
+connection con1;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+send select * from t where i=3 for update;
+
+connection con3;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con2 and waiting_key != "";
+--source include/wait_condition.inc
+
+select variable_value into @a from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 for update;
+select case when variable_value-@a = 1 then 'true' else 'false' end as deadlocks from information_schema.global_status where variable_name='rocksdb_row_lock_deadlocks';
+rollback;
+
+connection con2;
+reap;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection default;
+set global rocksdb_max_latest_deadlocks = 5;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+
+--disable_parsing
+echo Deadlock #5;
+connection con1;
+begin;
+select * from t where i=1 for update;
+
+connection con2;
+begin;
+select * from t where i=2 for update;
+
+connection con3;
+begin;
+select * from t where i=3 lock in share mode;
+
+connection con1;
+select * from t where i=100 for update;
+select * from t where i=101 for update;
+send select * from t where i=2 for update;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+
+select * from t where i=3 lock in share mode;
+select * from t where i=200 for update;
+select * from t where i=201 for update;
+
+--error ER_LOCK_DEADLOCK
+select * from t where i=1 lock in share mode;
+rollback;
+
+connection con1;
+reap;
+rollback;
+
+connection con3;
+rollback;
+
+connection default;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+--enable_parsing
+echo Deadlock #6;
+connection con1;
+create table t1 (id int primary key, value int) engine=rocksdb;
+insert into t1 values (1,1),(2,2),(3,3),(4,4),(5,5);
+begin;
+update t1 set value=value+100 where id=1;
+update t1 set value=value+100 where id=2;
+
+connection con2;
+begin;
+update t1 set value=value+200 where id=3;
+
+connection con1;
+send update t1 set value=value+100 where id=3;
+
+connection con2;
+let $wait_condition = select count(*) = 1 from information_schema.rocksdb_trx
+where thread_id = $con1 and waiting_key != "";
+--source include/wait_condition.inc
+--error ER_LOCK_DEADLOCK
+update t1 set value=value+200 where id=1;
+
+# con2 tx is automatically rolled back
+connection con1;
+reap;
+select * from t1;
+drop table t1;
+
+connection default;
+
+disconnect con1;
+disconnect con2;
+disconnect con3;
+
+set global rocksdb_lock_wait_timeout = @prior_lock_wait_timeout;
+set global rocksdb_deadlock_detect = @prior_deadlock_detect;
+drop table t;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+set global rocksdb_max_latest_deadlocks = 0;
+--echo # Clears deadlock buffer of any existent deadlocks.
+set global rocksdb_max_latest_deadlocks = @prior_max_latest_deadlocks;
+--replace_regex /[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/TIMESTAMP/ /WAITING KEY: [0-9a-f]{16}/KEY/ /TRANSACTION ID: [0-9]*/TXN_ID/ /INDEX_ID: [0-9a-f]*/IDX_ID/ /TIMESTAMP: [0-9]*/TSTAMP/
+show engine rocksdb transaction status;
+--source include/wait_until_count_sessions.inc
1
0
10 Dec '18
revision-id: 3e477e9e81c4a651e646be47ad7a31e1a5204019 (fb-prod201801-175-g3e477e9e81c)
parent(s): 8400b2c356d07d91ec49c559d859ab6f4798c789
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2018-12-10 15:21:32 +0300
message:
Update to newer Range-locking RocksDB
(Get the "Make Range Locking use its own storage for acquired locks" patch)
---
rocksdb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/rocksdb b/rocksdb
index 7f29e9512f6..51606993a6e 160000
--- a/rocksdb
+++ b/rocksdb
@@ -1 +1 @@
-Subproject commit 7f29e9512f6bd7a12d8a8d3a493eaff586c5f28b
+Subproject commit 51606993a6e04edfa32ba9ed445e47330a2b2647
1
0
[Commits] 51606993a: Make Range Locking use its own storage for acquired locks
by Sergei Petrunia 10 Dec '18
by Sergei Petrunia 10 Dec '18
10 Dec '18
revision-id: 51606993a6e04edfa32ba9ed445e47330a2b2647 (v5.8-1021-g51606993a)
parent(s): 7f29e9512f6bd7a12d8a8d3a493eaff586c5f28b
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2018-12-10 15:20:20 +0300
message:
Make Range Locking use its own storage for acquired locks
(Because it needs to store locked ranges, not just locked points.
and it doesn't need the key tracking provided by TrackKey() so that
one will be disabled)
---
utilities/transactions/pessimistic_transaction.h | 12 ++++++
utilities/transactions/transaction_lock_mgr.cc | 52 +++++++++++++++++++++++-
2 files changed, 63 insertions(+), 1 deletion(-)
diff --git a/utilities/transactions/pessimistic_transaction.h b/utilities/transactions/pessimistic_transaction.h
index 340ea929e..50030700d 100644
--- a/utilities/transactions/pessimistic_transaction.h
+++ b/utilities/transactions/pessimistic_transaction.h
@@ -150,6 +150,18 @@ class PessimisticTransaction : public TransactionBaseImpl {
// microseconds according to Env->NowMicros())
uint64_t expiration_time_;
+ /*
+ psergey: Range Locking: A list of locks the transaction is holding.
+
+ (for point locking, the locks are kept in TransactionBaseImpl::tracked_keys_)
+ */
+ public:
+ class LockStorage {
+ public:
+ virtual ~LockStorage(){}
+ };
+ std::unique_ptr<LockStorage> owned_locks;
+
private:
friend class TransactionTest_ValidateSnapshotTest_Test;
// Used to create unique ids for transactions.
diff --git a/utilities/transactions/transaction_lock_mgr.cc b/utilities/transactions/transaction_lock_mgr.cc
index a7eb7e30b..561531f7e 100644
--- a/utilities/transactions/transaction_lock_mgr.cc
+++ b/utilities/transactions/transaction_lock_mgr.cc
@@ -321,6 +321,33 @@ void RangeLockMgr::KillLockWait(void *cdata)
}
+/*
+ Storage for locks that are held by this transaction.
+
+ We store them in toku::range_buffer because toku::locktree::release_locks()
+ accepts that as an argument.
+
+ TODO: lock escalation in the lock table should affect this structure, too?
+*/
+class RangeLockList: public PessimisticTransaction::LockStorage
+{
+public:
+ virtual ~RangeLockList()
+ {
+ buffer.destroy();
+ }
+
+ RangeLockList()
+ {
+ buffer.create();
+ }
+
+ void append(const DBT *left_key, const DBT *right_key) {
+ buffer.append(left_key, right_key);
+ }
+
+ toku::range_buffer buffer;
+};
// Get a range lock on [start_key; end_key] range
// (TODO: check if we do what is inteded at the endpoints)
@@ -391,7 +418,7 @@ Status RangeLockMgr::TryRangeLock(PessimisticTransaction* txn,
request.destroy();
switch (r) {
case 0:
- return Status::OK();
+ break; /* fall through */
case DB_LOCK_NOTGRANTED:
return Status::TimedOut(Status::SubCode::kLockTimeout);
case TOKUDB_OUT_OF_LOCKS:
@@ -402,6 +429,17 @@ Status RangeLockMgr::TryRangeLock(PessimisticTransaction* txn,
assert(0);
return Status::Busy(Status::SubCode::kLockLimit);
}
+
+ /* Save the acquired lock in txn->owned_locks */
+ if (!txn->owned_locks)
+ {
+ //create the object
+ txn->owned_locks= std::unique_ptr<RangeLockList>(new RangeLockList);
+ }
+ RangeLockList* range_list= (RangeLockList*)txn->owned_locks.get();
+ range_list->append(&start_key_dbt, &end_key_dbt);
+
+ return Status::OK();
}
@@ -795,6 +833,7 @@ void RangeLockMgr::UnLock(const PessimisticTransaction* txn,
void RangeLockMgr::UnLockAll(const PessimisticTransaction* txn,
const TransactionKeyMap* key_map, Env* env) {
+#if 0
//TODO: collecting multiple locks into a buffer and then making one call
// to lock_tree::release_locks() will be faster.
for (auto& key_map_iter : *key_map) {
@@ -808,6 +847,17 @@ void RangeLockMgr::UnLockAll(const PessimisticTransaction* txn,
}
toku::lock_request::retry_all_lock_requests(lt, nullptr /* lock_wait_needed_callback */);
+#endif
+
+ // owned_locks may hold nullptr if the transaction has never acquired any
+ // locks.
+ if (txn->owned_locks)
+ {
+ RangeLockList* range_list= (RangeLockList*)txn->owned_locks.get();
+ lt->release_locks(txn->GetID(), &range_list->buffer);
+ range_list->buffer.destroy();
+ toku::lock_request::retry_all_lock_requests(lt, nullptr /* lock_wait_needed_callback */);
+ }
#if 0
Original usage:
1
0
[Commits] 6784d1c: MDEV-16188: Fixed problems of the implementation for rowid filter pushdown
by IgorBabaev 07 Dec '18
by IgorBabaev 07 Dec '18
07 Dec '18
revision-id: 6784d1ce238094f3cb16ad18a455810277320536 (mariadb-10.3.6-99-g6784d1c)
parent(s): 90708fc15cf835d03b511a953ae6939081a0f9e1
author: Igor Babaev
committer: Igor Babaev
timestamp: 2018-12-07 12:13:26 -0800
message:
MDEV-16188: Fixed problems of the implementation for rowid filter pushdown
in InnoDB engine.
Fixed some other problems at the SQL layer related to rowid filters.
After this patch all tests in the main test suite passed including
execution with --ps-protocol using both Debug and Release builds.
---
mysql-test/main/join_cache.result | 2 +-
sql/rowid_filter.cc | 7 +++++--
sql/rowid_filter.h | 10 +++++-----
sql/sql_select.cc | 14 ++++++++++----
storage/innobase/handler/ha_innodb.cc | 14 ++++++++------
storage/innobase/row/row0sel.cc | 12 ++++++------
6 files changed, 35 insertions(+), 24 deletions(-)
diff --git a/mysql-test/main/join_cache.result b/mysql-test/main/join_cache.result
index 46891e9..fde6e0f 100644
--- a/mysql-test/main/join_cache.result
+++ b/mysql-test/main/join_cache.result
@@ -1213,7 +1213,7 @@ ON City.Country=Country.Code AND City.Population > 5000000
WHERE Country.Name LIKE 'C%' AND Country.Population > 10000000;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE Country range Name Name 52 NULL # Using index condition; Using where; Rowid-ordered scan
-1 SIMPLE City hash_range|filter Population,Country #hash#Country:Population|Population 3:4|4 world.Country.Code # Using where; Rowid-ordered scan; Using join buffer (flat, BNLH join); Using filter
+1 SIMPLE City hash_range Population,Country #hash#Country:Population 3:4 world.Country.Code # Using where; Rowid-ordered scan; Using join buffer (flat, BNLH join)
SELECT Country.Name, Country.Population, City.Name, City.Population
FROM Country LEFT JOIN City
ON City.Country=Country.Code AND City.Population > 5000000
diff --git a/sql/rowid_filter.cc b/sql/rowid_filter.cc
index 5144319..f307fe5 100644
--- a/sql/rowid_filter.cc
+++ b/sql/rowid_filter.cc
@@ -213,7 +213,7 @@ Range_filter_cost_info
for (uint i= best_filter_count; i < range_filter_cost_info_elements; i++)
{
Range_filter_cost_info *filter= &range_filter_cost_info[i];
- if (intersected_with->is_set(filter->key_no))
+ if ((filter->key_no == ref_key_no) || intersected_with->is_set(filter->key_no))
continue;
if (card < filter->intersect_x_axis_abcissa)
break;
@@ -233,10 +233,12 @@ bool Range_filter_ordered_array::fill()
handler *file= table->file;
THD *thd= table->in_use;
QUICK_RANGE_SELECT* quick= (QUICK_RANGE_SELECT*) select->quick;
- Item *pushed_idx_cond_save = file->pushed_idx_cond;
+ uint table_status_save= table->status;
+ Item *pushed_idx_cond_save= file->pushed_idx_cond;
uint pushed_idx_cond_keyno_save= file->pushed_idx_cond_keyno;
bool in_range_check_pushed_down_save= file->in_range_check_pushed_down;
+ table->status= 0;
file->pushed_idx_cond= 0;
file->pushed_idx_cond_keyno= MAX_KEY;
file->in_range_check_pushed_down= false;
@@ -264,6 +266,7 @@ bool Range_filter_ordered_array::fill()
quick->range_end();
table->file->ha_end_keyread();
+ table->status= table_status_save;
file->pushed_idx_cond= pushed_idx_cond_save;
file->pushed_idx_cond_keyno= pushed_idx_cond_keyno_save;
file->in_range_check_pushed_down= in_range_check_pushed_down_save;
diff --git a/sql/rowid_filter.h b/sql/rowid_filter.h
index 4e3d7de..99fb75f 100644
--- a/sql/rowid_filter.h
+++ b/sql/rowid_filter.h
@@ -190,7 +190,7 @@ class Refpos_container_ordered_array : public Sql_alloc
public:
Refpos_container_ordered_array(uint elem_sz, uint max_elems)
- : elem_size(elem_sz), max_elements(max_elems) {}
+ : elem_size(elem_sz), max_elements(max_elems), array(0) {}
~Refpos_container_ordered_array()
{
@@ -204,7 +204,7 @@ class Refpos_container_ordered_array : public Sql_alloc
elem_size * max_elements/8 + 1);
return array == NULL;
}
-
+
bool add(char *elem)
{
for (uint i= 0; i < elem_size; i++)
@@ -227,7 +227,7 @@ class Refpos_container_ordered_array : public Sql_alloc
{
my_qsort2(array->front(), array->elements()/elem_size,
elem_size, (qsort2_cmp) cmp, cmp_arg);
- }
+ }
};
class Range_filter_ordered_array : public Sql_alloc
@@ -252,7 +252,7 @@ class Range_filter_ordered_array : public Sql_alloc
bool is_filled() { return container_is_filled; }
bool fill();
-
+
bool sort();
bool check(char *elem);
@@ -276,7 +276,7 @@ class Rowid_filter : public Sql_alloc
}
bool is_active()
- {
+ {
return get_container()->is_filled();
}
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index c194fdb..bca13e6 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -10873,6 +10873,11 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
sel->quick=tab->quick; // Use value from get_quick_...
sel->quick_keys.clear_all();
sel->needed_reg.clear_all();
+ if (is_hj && tab->rowid_filter)
+ {
+ delete tab->rowid_filter;
+ tab->rowid_filter= 0;
+ }
}
else
{
@@ -20421,6 +20426,8 @@ int join_init_read_record(JOIN_TAB *tab)
if (tab->filesort && tab->sort_table()) // Sort table.
return 1;
+ tab->fill_range_filter_if_needed();
+
DBUG_EXECUTE_IF("kill_join_init_read_record",
tab->join->thd->set_killed(KILL_QUERY););
if (tab->select && tab->select->quick && tab->select->quick->reset())
@@ -20436,8 +20443,7 @@ int join_init_read_record(JOIN_TAB *tab)
if (!tab->preread_init_done && tab->preread_init())
return 1;
- tab->fill_range_filter_if_needed();
-
+
if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
tab->select, tab->filesort_result, 1,1, FALSE))
return 1;
@@ -25174,7 +25180,7 @@ int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table,
bool JOIN_TAB::save_filter_explain_data(Explain_table_access *eta)
{
- if (!filter)
+ if (!rowid_filter)
return 0;
(filter->selectivity*100 >= 1) ? eta->filter_perc= round(filter->selectivity*100) :
eta->filter_perc= 1;
@@ -25319,7 +25325,7 @@ bool JOIN_TAB::save_explain_data(Explain_table_access *eta,
/* Build "key", "key_len", and "ref" */
- if (filter)
+ if (rowid_filter)
{
eta->key.set_filter(thd->mem_root,
&filter->table->key_info[filter->key_no],
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index c521f9c..e43019b 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -7557,8 +7557,8 @@ ha_innobase::build_template(
/* Below we check column by column if we need to access
the clustered index. */
- if (pushed_rowid_filter) {
- fetch_primary_key_cols = TRUE;
+ if (pushed_rowid_filter && rowid_filter_is_active) {
+ fetch_primary_key_cols = TRUE;
m_prebuilt->pk_filter = this;
} else {
m_prebuilt->pk_filter = NULL;
@@ -7584,8 +7584,9 @@ ha_innobase::build_template(
/* Note that in InnoDB, i is the column number in the table.
MySQL calls columns 'fields'. */
- if (active_index != MAX_KEY
- && active_index == pushed_idx_cond_keyno) {
+ if ((active_index != MAX_KEY
+ && active_index == pushed_idx_cond_keyno) ||
+ (pushed_rowid_filter && rowid_filter_is_active)) {
ulint num_v = 0;
/* Push down an index condition or an end_range check. */
@@ -7779,8 +7780,9 @@ ha_innobase::build_template(
}
}
}
-
- m_prebuilt->idx_cond = this;
+ if (active_index == pushed_idx_cond_keyno) {
+ m_prebuilt->idx_cond = this;
+ }
} else {
no_icp:
mysql_row_templ_t* templ;
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index 2f2e74c..73c21d9 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -3816,7 +3816,7 @@ row_sel_enqueue_cache_row_for_mysql(
/* For non ICP code path the row should already exist in the
next fetch cache slot. */
- if (prebuilt->idx_cond != NULL) {
+ if (prebuilt->idx_cond != NULL || prebuilt->pk_filter != NULL ) {
byte* dest = row_sel_fetch_last_buf(prebuilt);
ut_memcpy(dest, mysql_rec, prebuilt->mysql_row_len);
@@ -4427,7 +4427,7 @@ row_search_mvcc(
mtr.commit(). */
ut_ad(!rec_get_deleted_flag(rec, comp));
- if (prebuilt->idx_cond) {
+ if (prebuilt->idx_cond || prebuilt->pk_filter) {
switch (row_search_idx_cond_check(
buf, prebuilt,
rec, offsets)) {
@@ -5359,7 +5359,7 @@ row_search_mvcc(
result_rec = clust_rec;
ut_ad(rec_offs_validate(result_rec, clust_index, offsets));
- if (prebuilt->idx_cond) {
+ if (prebuilt->idx_cond || prebuilt->pk_filter) {
/* Convert the record to MySQL format. We were
unable to do this in row_search_idx_cond_check(),
because the condition is on the secondary index
@@ -5420,7 +5420,7 @@ row_search_mvcc(
/* We only convert from InnoDB row format to MySQL row
format when ICP is disabled. */
- if (!prebuilt->idx_cond) {
+ if (!(prebuilt->idx_cond || prebuilt->pk_filter)) {
/* We use next_buf to track the allocation of buffers
where we store and enqueue the buffers for our
@@ -5493,7 +5493,7 @@ row_search_mvcc(
rec_offs_size(offsets));
mach_write_to_4(buf,
rec_offs_extra_size(offsets) + 4);
- } else if (!prebuilt->idx_cond) {
+ } else if (!(prebuilt->idx_cond || prebuilt->pk_filter)) {
/* The record was not yet converted to MySQL format. */
if (!row_sel_store_mysql_rec(
buf, prebuilt, result_rec, vrow,
@@ -5727,7 +5727,7 @@ row_search_mvcc(
DEBUG_SYNC_C("row_search_for_mysql_before_return");
- if (prebuilt->idx_cond != 0) {
+ if (prebuilt->idx_cond != 0 || prebuilt->pk_filter != 0) {
/* When ICP is active we don't write to the MySQL buffer
directly, only to buffers that are enqueued in the pre-fetch
1
0
[Commits] 8bfb140d5dc: Move deletion of old GTID rows to slave background thread
by Kristian Nielsen 07 Dec '18
by Kristian Nielsen 07 Dec '18
07 Dec '18
revision-id: 8bfb140d5dc247c183787b8a0a1799cf375845bd (mariadb-10.3.10-25-g8bfb140d5dc)
parent(s): 74387028a06c557f36a0fd1bbde347f1551c8fb7
author: Kristian Nielsen
committer: Kristian Nielsen
timestamp: 2018-11-25 19:38:33 +0100
message:
Move deletion of old GTID rows to slave background thread
This patch changes how old rows in mysql.gtid_slave_pos* tables are deleted.
Instead of doing it as part of every replicated transaction in
record_gtid(), it is done periodically (every @@gtid_cleanup_batch_size
transaction) in the slave background thread.
This removes the deletion step from the replication process in SQL or worker
threads, which could speed up replication with many small transactions. It
also decreases contention on the global mutex LOCK_slave_state. And it
simplifies the logic, eg. when a replicated transaction fails after having
deleted old rows.
With this patch, the deletion of old GTID rows happens asynchroneously and
slightly non-deterministic. Thus the number of old rows in
mysql.gtid_slave_pos can temporarily exceed @@gtid_cleanup_batch_size. But
all old rows will be deleted eventually after sufficiently many new GTIDs
have been replicated.
---
mysql-test/main/mysqld--help.result | 10 +
mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result | 40 +-
mysql-test/suite/rpl/r/rpl_gtid_stop_start.result | 8 +-
.../suite/rpl/r/rpl_parallel_optimistic.result | 14 +-
mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test | 68 +++-
.../suite/rpl/t/rpl_parallel_optimistic.test | 42 ++-
.../sys_vars/r/sysvars_server_notembedded.result | 14 +
sql/log_event.cc | 6 +-
sql/mysqld.cc | 1 +
sql/mysqld.h | 1 +
sql/rpl_gtid.cc | 413 +++++++++++++--------
sql/rpl_gtid.h | 12 +-
sql/rpl_rli.cc | 87 +----
sql/rpl_rli.h | 11 -
sql/slave.cc | 35 +-
sql/slave.h | 1 +
sql/sys_vars.cc | 13 +
.../mysql-test/rocksdb_rpl/r/mdev12179.result | 18 +
.../mysql-test/rocksdb_rpl/t/mdev12179.test | 85 +++++
.../mysql-test/tokudb_rpl/r/mdev12179.result | 18 +
.../tokudb/mysql-test/tokudb_rpl/t/mdev12179.test | 85 +++++
21 files changed, 675 insertions(+), 307 deletions(-)
diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result
index 5a7153f32d3..4f801ec5275 100644
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@@ -294,6 +294,15 @@ The following specify which files/extra groups are read (specified before remain
--group-concat-max-len=#
The maximum length of the result of function
GROUP_CONCAT()
+ --gtid-cleanup-batch-size=#
+ Normally does not need tuning. How many old rows must
+ accumulate in the mysql.gtid_slave_pos table before a
+ background job will be run to delete them. Can be
+ increased to reduce number of commits if using many
+ different engines with --gtid_pos_auto_engines, or to
+ reduce CPU overhead if using a huge number of different
+ gtid_domain_ids. Can be decreased to reduce number of old
+ rows in the table.
--gtid-domain-id=# Used with global transaction ID to identify logically
independent replication streams. When events can
propagate through multiple parallel paths (for example
@@ -1425,6 +1434,7 @@ gdb FALSE
general-log FALSE
getopt-prefix-matching FALSE
group-concat-max-len 1048576
+gtid-cleanup-batch-size 64
gtid-domain-id 0
gtid-ignore-duplicates FALSE
gtid-pos-auto-engines
diff --git a/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result b/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result
index aaeb0c8f119..55d2831dcf4 100644
--- a/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result
+++ b/mysql-test/suite/rpl/r/rpl_gtid_mdev4484.result
@@ -16,36 +16,32 @@ INSERT INTO t1 VALUES (1);
connection slave;
connection slave;
include/stop_slave.inc
+SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
+SET GLOBAL gtid_cleanup_batch_size= 2;
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,gtid_slave_pos_simulate_failed_delete";
SET sql_log_bin= 0;
-CALL mtr.add_suppression("Can't find file");
+CALL mtr.add_suppression("<DEBUG> Error deleting old GTID row");
SET sql_log_bin= 1;
include/start_slave.inc
connection master;
-INSERT INTO t1 VALUES (2);
-connection slave;
-include/wait_for_slave_sql_error.inc [errno=1942]
-STOP SLAVE IO_THREAD;
-SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
-ORDER BY domain_id, sub_id DESC LIMIT 1;
-domain_id server_id seq_no
-0 1 3
+connection slave;
+SELECT COUNT(*), MAX(seq_no) INTO @pre_count, @pre_max_seq_no
+FROM mysql.gtid_slave_pos;
+SELECT IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count));
+IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count))
+OK
SET GLOBAL debug_dbug= @old_dbug;
-include/start_slave.inc
connection master;
-INSERT INTO t1 VALUES (3);
-connection slave;
-connection slave;
-SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
-ORDER BY domain_id, sub_id DESC LIMIT 1;
-domain_id server_id seq_no
-0 1 4
-SELECT * FROM t1 ORDER BY i;
-i
-1
-2
-3
+connection slave;
+connection slave;
+SELECT IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*)))
+FROM mysql.gtid_slave_pos
+WHERE seq_no <= @pre_max_seq_no;
+IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*)))
+OK
connection master;
DROP TABLE t1;
+connection slave;
+SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result b/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result
index ff845794c22..b27ffed9f94 100644
--- a/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result
+++ b/mysql-test/suite/rpl/r/rpl_gtid_stop_start.result
@@ -171,7 +171,7 @@ include/start_slave.inc
*** MDEV-4692: mysql.gtid_slave_pos accumulates values for a domain ***
SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id;
domain_id COUNT(*)
-0 2
+0 3
1 2
connection server_1;
INSERT INTO t1 VALUES (11);
@@ -179,7 +179,7 @@ connection server_2;
FLUSH NO_WRITE_TO_BINLOG TABLES;
SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id;
domain_id COUNT(*)
-0 2
+0 4
1 2
include/start_slave.inc
connection server_1;
@@ -189,8 +189,8 @@ connection server_2;
FLUSH NO_WRITE_TO_BINLOG TABLES;
SELECT domain_id, COUNT(*) FROM mysql.gtid_slave_pos GROUP BY domain_id;
domain_id COUNT(*)
-0 2
-1 2
+0 3
+1 1
*** MDEV-4650: show variables; ERROR 1946 (HY000): Failed to load replication slave GTID position ***
connection server_2;
SET sql_log_bin=0;
diff --git a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result
index ca202a66b0e..83343e52cab 100644
--- a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result
+++ b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result
@@ -12,6 +12,8 @@ SET GLOBAL slave_parallel_threads=10;
CHANGE MASTER TO master_use_gtid=slave_pos;
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
SET GLOBAL slave_parallel_mode='optimistic';
+SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
+SET GLOBAL gtid_cleanup_batch_size= 1000000;
connection server_1;
INSERT INTO t1 VALUES(1,1);
BEGIN;
@@ -131,6 +133,11 @@ c
204
205
206
+SELECT IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*)))
+FROM mysql.gtid_slave_pos;
+IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*)))
+OK
+SET GLOBAL gtid_cleanup_batch_size=1;
*** Test @@skip_parallel_replication. ***
connection server_2;
include/stop_slave.inc
@@ -651,9 +658,10 @@ DROP TABLE t1, t2, t3;
include/save_master_gtid.inc
connection server_2;
include/sync_with_master_gtid.inc
-Check that no more than the expected last four GTIDs are in mysql.gtid_slave_pos
-select count(4) <= 4 from mysql.gtid_slave_pos order by domain_id, sub_id;
-count(4) <= 4
+SELECT COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size
+FROM mysql.gtid_slave_pos;
+COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size
1
+SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
connection server_1;
include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test b/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test
index e1f5696f5a1..a28bff3d27a 100644
--- a/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test
+++ b/mysql-test/suite/rpl/t/rpl_gtid_mdev4484.test
@@ -28,37 +28,79 @@ INSERT INTO t1 VALUES (1);
# Inject an artificial error deleting entries, and check that the error handling code works.
--connection slave
--source include/stop_slave.inc
+SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
+SET GLOBAL gtid_cleanup_batch_size= 2;
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,gtid_slave_pos_simulate_failed_delete";
SET sql_log_bin= 0;
-CALL mtr.add_suppression("Can't find file");
+CALL mtr.add_suppression("<DEBUG> Error deleting old GTID row");
SET sql_log_bin= 1;
--source include/start_slave.inc
--connection master
-INSERT INTO t1 VALUES (2);
+--disable_query_log
+let $i = 20;
+while ($i) {
+ eval INSERT INTO t1 VALUES ($i+10);
+ dec $i;
+}
+--enable_query_log
+--save_master_pos
--connection slave
---let $slave_sql_errno= 1942
---source include/wait_for_slave_sql_error.inc
-STOP SLAVE IO_THREAD;
-SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
- ORDER BY domain_id, sub_id DESC LIMIT 1;
+--sync_with_master
+
+# Now wait for the slave background thread to try to delete old rows and
+# hit the error injection.
+--let _TEST_MYSQLD_ERROR_LOG=$MYSQLTEST_VARDIR/log/mysqld.2.err
+--perl
+ open F, '<', $ENV{'_TEST_MYSQLD_ERROR_LOG'} or die;
+ outer: while (1) {
+ inner: while (<F>) {
+ last outer if /<DEBUG> Error deleting old GTID row/;
+ }
+ # Easy way to do sub-second sleep without extra modules.
+ select(undef, undef, undef, 0.1);
+ }
+EOF
+
+# Since we injected error in the cleanup code, the rows should remain in
+# mysql.gtid_slave_pos. Check that we have at least 20 (more robust against
+# non-deterministic cleanup and future changes than checking for exact number).
+SELECT COUNT(*), MAX(seq_no) INTO @pre_count, @pre_max_seq_no
+ FROM mysql.gtid_slave_pos;
+SELECT IF(@pre_count >= 20, "OK", CONCAT("Error: too few rows seen while errors injected: ", @pre_count));
SET GLOBAL debug_dbug= @old_dbug;
---source include/start_slave.inc
--connection master
-INSERT INTO t1 VALUES (3);
+--disable_query_log
+let $i = 20;
+while ($i) {
+ eval INSERT INTO t1 VALUES ($i+40);
+ dec $i;
+}
+--enable_query_log
--sync_slave_with_master
--connection slave
-SELECT domain_id, server_id, seq_no FROM mysql.gtid_slave_pos
- ORDER BY domain_id, sub_id DESC LIMIT 1;
-SELECT * FROM t1 ORDER BY i;
-
+# Now check that 1) rows are being deleted again after removing error
+# injection, and 2) old rows are left that failed their delete while errors
+# where injected (again compensating for non-deterministic deletion).
+# Deletion is async and slightly non-deterministic, so we wait for at
+# least 10 of the 20 new rows to be deleted.
+let $wait_condition=
+ SELECT COUNT(*) <= 20-10
+ FROM mysql.gtid_slave_pos
+ WHERE seq_no > @pre_max_seq_no;
+--source include/wait_condition.inc
+SELECT IF(COUNT(*) >= 1, "OK", CONCAT("Error: too few rows seen after errors no longer injected: ", COUNT(*)))
+ FROM mysql.gtid_slave_pos
+ WHERE seq_no <= @pre_max_seq_no;
# Clean up
--connection master
DROP TABLE t1;
+--connection slave
+SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
--source include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test
index e08472d5f51..0060cf4416c 100644
--- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test
+++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test
@@ -21,6 +21,10 @@ SET GLOBAL slave_parallel_threads=10;
CHANGE MASTER TO master_use_gtid=slave_pos;
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
SET GLOBAL slave_parallel_mode='optimistic';
+# Run the first part of the test with high batch size and see that
+# old rows remain in the table.
+SET @old_gtid_cleanup_batch_size= @@GLOBAL.gtid_cleanup_batch_size;
+SET GLOBAL gtid_cleanup_batch_size= 1000000;
--connection server_1
@@ -108,7 +112,12 @@ SELECT * FROM t3 ORDER BY c;
SELECT * FROM t1 ORDER BY a;
SELECT * FROM t2 ORDER BY a;
SELECT * FROM t3 ORDER BY c;
-#SHOW STATUS LIKE 'Slave_retried_transactions';
+# Check that we have a bunch of old rows left-over - they were not deleted
+# due to high @@gtid_cleanup_batch_size. Then set a low
+# @@gtid_cleanup_batch_size so we can test that rows start being deleted.
+SELECT IF(COUNT(*) >= 30, "OK", CONCAT("Error: too few old rows found: ", COUNT(*)))
+ FROM mysql.gtid_slave_pos;
+SET GLOBAL gtid_cleanup_batch_size=1;
--echo *** Test @@skip_parallel_replication. ***
@@ -557,25 +566,18 @@ DROP TABLE t1, t2, t3;
--connection server_2
--source include/sync_with_master_gtid.inc
-# Check for left-over rows in table mysql.gtid_slave_pos (MDEV-12147).
-#
-# There was a bug when a transaction got a conflict and was rolled back. It
-# might have also handled deletion of some old rows, and these deletions would
-# then also be rolled back. And since the deletes were never re-tried, old no
-# longer needed rows would accumulate in the table without limit.
-#
-# The earlier part of this test file have plenty of transactions being rolled
-# back. But the last DROP TABLE statement runs on its own and should never
-# conflict, thus at this point the mysql.gtid_slave_pos table should be clean.
-#
-# To support @@gtid_pos_auto_engines, when a row is inserted in the table, it
-# is associated with the engine of the table at insertion time, and it will
-# only be deleted during record_gtid from a table of the same engine. Since we
-# alter the table from MyISAM to InnoDB at the start of this test, we should
-# end up with 4 rows: two left-over from when the table was MyISAM, and two
-# left-over from the InnoDB part.
---echo Check that no more than the expected last four GTIDs are in mysql.gtid_slave_pos
-select count(4) <= 4 from mysql.gtid_slave_pos order by domain_id, sub_id;
+# Check that old rows are deleted from mysql.gtid_slave_pos.
+# Deletion is asynchronous, so use wait_condition.inc.
+# Also, there is a small amount of non-determinism in the deletion of old
+# rows, so it is not guaranteed that there can never be more than
+# @@gtid_cleanup_batch_size rows in the table; so allow a bit of slack
+# here.
+let $wait_condition=
+ SELECT COUNT(*) <= 5*@@GLOBAL.gtid_cleanup_batch_size
+ FROM mysql.gtid_slave_pos;
+--source include/wait_condition.inc
+eval $wait_condition;
+SET GLOBAL gtid_cleanup_batch_size= @old_gtid_cleanup_batch_size;
--connection server_1
--source include/rpl_end.inc
diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
index e8e4d671eb9..5c5ca8b66b2 100644
--- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
@@ -1202,6 +1202,20 @@ NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT NULL
+VARIABLE_NAME GTID_CLEANUP_BATCH_SIZE
+SESSION_VALUE NULL
+GLOBAL_VALUE 64
+GLOBAL_VALUE_ORIGIN COMPILE-TIME
+DEFAULT_VALUE 64
+VARIABLE_SCOPE GLOBAL
+VARIABLE_TYPE INT UNSIGNED
+VARIABLE_COMMENT Normally does not need tuning. How many old rows must accumulate in the mysql.gtid_slave_pos table before a background job will be run to delete them. Can be increased to reduce number of commits if using many different engines with --gtid_pos_auto_engines, or to reduce CPU overhead if using a huge number of different gtid_domain_ids. Can be decreased to reduce number of old rows in the table.
+NUMERIC_MIN_VALUE 0
+NUMERIC_MAX_VALUE 2147483647
+NUMERIC_BLOCK_SIZE 1
+ENUM_VALUE_LIST NULL
+READ_ONLY NO
+COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME GTID_CURRENT_POS
SESSION_VALUE NULL
GLOBAL_VALUE
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 8813d20578e..e10480fb015 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -5565,7 +5565,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi,
gtid= rgi->current_gtid;
if (unlikely(rpl_global_gtid_slave_state->record_gtid(thd, >id,
sub_id,
- rgi, false,
+ true, false,
&hton)))
{
int errcode= thd->get_stmt_da()->sql_errno();
@@ -8362,7 +8362,7 @@ Gtid_list_log_event::do_apply_event(rpl_group_info *rgi)
{
if ((ret= rpl_global_gtid_slave_state->record_gtid(thd, &list[i],
sub_id_list[i],
- NULL, false, &hton)))
+ false, false, &hton)))
return ret;
rpl_global_gtid_slave_state->update_state_hash(sub_id_list[i], &list[i],
hton, NULL);
@@ -8899,7 +8899,7 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi)
rgi->gtid_pending= false;
gtid= rgi->current_gtid;
- err= rpl_global_gtid_slave_state->record_gtid(thd, >id, sub_id, rgi,
+ err= rpl_global_gtid_slave_state->record_gtid(thd, >id, sub_id, true,
false, &hton);
if (unlikely(err))
{
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index afef4a5f52c..07bdd66f74c 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -580,6 +580,7 @@ ulong opt_binlog_commit_wait_count= 0;
ulong opt_binlog_commit_wait_usec= 0;
ulong opt_slave_parallel_max_queued= 131072;
my_bool opt_gtid_ignore_duplicates= FALSE;
+uint opt_gtid_cleanup_batch_size= 64;
const double log_10[] = {
1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009,
diff --git a/sql/mysqld.h b/sql/mysqld.h
index d5cabd790b2..261748372f9 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -258,6 +258,7 @@ extern ulong opt_slave_parallel_mode;
extern ulong opt_binlog_commit_wait_count;
extern ulong opt_binlog_commit_wait_usec;
extern my_bool opt_gtid_ignore_duplicates;
+extern uint opt_gtid_cleanup_batch_size;
extern ulong back_log;
extern ulong executed_events;
extern char language[FN_REFLEN];
diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc
index fabd09adaa7..196c2fe3d16 100644
--- a/sql/rpl_gtid.cc
+++ b/sql/rpl_gtid.cc
@@ -79,7 +79,7 @@ rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi)
rgi->gtid_pending= false;
if (rgi->gtid_ignore_duplicate_state!=rpl_group_info::GTID_DUPLICATE_IGNORE)
{
- if (record_gtid(thd, &rgi->current_gtid, sub_id, NULL, false, &hton))
+ if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false, &hton))
DBUG_RETURN(1);
update_state_hash(sub_id, &rgi->current_gtid, hton, rgi);
}
@@ -244,7 +244,7 @@ rpl_slave_state_free_element(void *arg)
rpl_slave_state::rpl_slave_state()
- : last_sub_id(0), gtid_pos_tables(0), loaded(false)
+ : pending_gtid_count(0), last_sub_id(0), gtid_pos_tables(0), loaded(false)
{
mysql_mutex_init(key_LOCK_slave_state, &LOCK_slave_state,
MY_MUTEX_INIT_SLOW);
@@ -331,14 +331,11 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id,
}
}
rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_NULL;
-
-#ifdef HAVE_REPLICATION
- rgi->pending_gtid_deletes_clear();
-#endif
}
if (!(list_elem= (list_element *)my_malloc(sizeof(*list_elem), MYF(MY_WME))))
return 1;
+ list_elem->domain_id= domain_id;
list_elem->server_id= server_id;
list_elem->sub_id= sub_id;
list_elem->seq_no= seq_no;
@@ -348,6 +345,15 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id,
if (last_sub_id < sub_id)
last_sub_id= sub_id;
+#ifdef HAVE_REPLICATION
+ ++pending_gtid_count;
+ if (pending_gtid_count >= opt_gtid_cleanup_batch_size)
+ {
+ pending_gtid_count = 0;
+ slave_background_gtid_pending_delete_request();
+ }
+#endif
+
return 0;
}
@@ -382,20 +388,22 @@ rpl_slave_state::get_element(uint32 domain_id)
int
-rpl_slave_state::put_back_list(uint32 domain_id, list_element *list)
+rpl_slave_state::put_back_list(list_element *list)
{
- element *e;
+ element *e= NULL;
int err= 0;
mysql_mutex_lock(&LOCK_slave_state);
- if (!(e= (element *)my_hash_search(&hash, (const uchar *)&domain_id, 0)))
- {
- err= 1;
- goto end;
- }
while (list)
{
list_element *next= list->next;
+
+ if ((!e || e->domain_id != list->domain_id) &&
+ !(e= (element *)my_hash_search(&hash, (const uchar *)&list->domain_id, 0)))
+ {
+ err= 1;
+ goto end;
+ }
e->add(list);
list= next;
}
@@ -572,12 +580,12 @@ rpl_slave_state::select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename)
/*
Write a gtid to the replication slave state table.
+ Do it as part of the transaction, to get slave crash safety, or as a separate
+ transaction if !in_transaction (eg. MyISAM or DDL).
+
gtid The global transaction id for this event group.
sub_id Value allocated within the sub_id when the event group was
read (sub_id must be consistent with commit order in master binlog).
- rgi rpl_group_info context, if we are recording the gtid transactionally
- as part of replicating a transactional event. NULL if called from
- outside of a replicated transaction.
Note that caller must later ensure that the new gtid and sub_id is inserted
into the appropriate HASH element with rpl_slave_state.add(), so that it can
@@ -585,16 +593,13 @@ rpl_slave_state::select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename)
*/
int
rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
- rpl_group_info *rgi, bool in_statement,
+ bool in_transaction, bool in_statement,
void **out_hton)
{
TABLE_LIST tlist;
int err= 0, not_sql_thread;
bool table_opened= false;
TABLE *table;
- list_element *delete_list= 0, *next, *cur, **next_ptr_ptr, **best_ptr_ptr;
- uint64 best_sub_id;
- element *elem;
ulonglong thd_saved_option= thd->variables.option_bits;
Query_tables_list lex_backup;
wait_for_commit* suspended_wfc;
@@ -684,7 +689,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
thd->wsrep_ignore_table= true;
#endif
- if (!rgi)
+ if (!in_transaction)
{
DBUG_PRINT("info", ("resetting OPTION_BEGIN"));
thd->variables.option_bits&=
@@ -716,168 +721,280 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
my_error(ER_OUT_OF_RESOURCES, MYF(0));
goto end;
}
+end:
- mysql_mutex_lock(&LOCK_slave_state);
- if ((elem= get_element(gtid->domain_id)) == NULL)
+#ifdef WITH_WSREP
+ thd->wsrep_ignore_table= false;
+#endif
+
+ if (table_opened)
{
- mysql_mutex_unlock(&LOCK_slave_state);
- my_error(ER_OUT_OF_RESOURCES, MYF(0));
- err= 1;
- goto end;
+ if (err || (err= ha_commit_trans(thd, FALSE)))
+ ha_rollback_trans(thd, FALSE);
+
+ close_thread_tables(thd);
+ if (in_transaction)
+ thd->mdl_context.release_statement_locks();
+ else
+ thd->mdl_context.release_transactional_locks();
}
+ thd->lex->restore_backup_query_tables_list(&lex_backup);
+ thd->variables.option_bits= thd_saved_option;
+ thd->resume_subsequent_commits(suspended_wfc);
+ DBUG_EXECUTE_IF("inject_record_gtid_serverid_100_sleep",
+ {
+ if (gtid->server_id == 100)
+ my_sleep(500000);
+ });
+ DBUG_RETURN(err);
+}
- /* Now pull out all GTIDs that were recorded in this engine. */
- delete_list = NULL;
- next_ptr_ptr= &elem->list;
- cur= elem->list;
- best_sub_id= 0;
- best_ptr_ptr= NULL;
- while (cur)
+
+/*
+ Return a list of all old GTIDs in any mysql.gtid_slave_pos* table that are
+ no longer needed and can be deleted from the table.
+
+ Within each domain, we need to keep around the latest GTID (the one with the
+ highest sub_id), but any others in that domain can be deleted.
+*/
+rpl_slave_state::list_element *
+rpl_slave_state::gtid_grab_pending_delete_list()
+{
+ uint32 i;
+ list_element *full_list;
+
+ mysql_mutex_lock(&LOCK_slave_state);
+ full_list= NULL;
+ for (i= 0; i < hash.records; ++i)
{
- list_element *next= cur->next;
- if (cur->hton == hton)
- {
- /* Belongs to same engine, so move it to the delete list. */
- cur->next= delete_list;
- delete_list= cur;
- if (cur->sub_id > best_sub_id)
+ element *elem= (element *)my_hash_element(&hash, i);
+ list_element *elist= elem->list;
+ list_element *last_elem, **best_ptr_ptr, *cur, *next;
+ uint64 best_sub_id;
+
+ if (!elist)
+ continue; /* Nothing here */
+
+ /* Delete any old stuff, but keep around the most recent one. */
+ cur= elist;
+ best_sub_id= cur->sub_id;
+ best_ptr_ptr= &elist;
+ last_elem= cur;
+ while ((next= cur->next)) {
+ last_elem= next;
+ if (next->sub_id > best_sub_id)
{
- best_sub_id= cur->sub_id;
- best_ptr_ptr= &delete_list;
- }
- else if (best_ptr_ptr == &delete_list)
+ best_sub_id= next->sub_id;
best_ptr_ptr= &cur->next;
- }
- else
- {
- /* Another engine, leave it in the list. */
- if (cur->sub_id > best_sub_id)
- {
- best_sub_id= cur->sub_id;
- /* Current best is not on the delete list. */
- best_ptr_ptr= NULL;
}
- *next_ptr_ptr= cur;
- next_ptr_ptr= &cur->next;
+ cur= next;
}
- cur= next;
- }
- *next_ptr_ptr= NULL;
- /*
- If the highest sub_id element is on the delete list, put it back on the
- original list, to preserve the highest sub_id element in the table for
- GTID position recovery.
- */
- if (best_ptr_ptr)
- {
+ /*
+ Append the new elements to the full list. Note the order is important;
+ we do it here so that we do not break the list if best_sub_id is the
+ last of the new elements.
+ */
+ last_elem->next= full_list;
+ /*
+ Delete the highest sub_id element from the old list, and put it back as
+ the single-element new list.
+ */
cur= *best_ptr_ptr;
*best_ptr_ptr= cur->next;
- cur->next= elem->list;
+ cur->next= NULL;
elem->list= cur;
+
+ /*
+ Collect the full list so far here. Note that elist may have moved if we
+ deleted the first element, so order is again important.
+ */
+ full_list= elist;
}
mysql_mutex_unlock(&LOCK_slave_state);
- if (!delete_list)
- goto end;
+ return full_list;
+}
+
- /* Now delete any already committed GTIDs. */
- bitmap_set_bit(table->read_set, table->field[0]->field_index);
- bitmap_set_bit(table->read_set, table->field[1]->field_index);
+/* Find the mysql.gtid_slave_posXXX table associated with a given hton. */
+LEX_CSTRING *
+rpl_slave_state::select_gtid_pos_table(void *hton)
+{
+ struct gtid_pos_table *table_entry;
- if ((err= table->file->ha_index_init(0, 0)))
+ /*
+ See comments on rpl_slave_state::gtid_pos_tables for rules around proper
+ access to the list.
+ */
+ table_entry= (struct gtid_pos_table *)
+ my_atomic_loadptr_explicit(>id_pos_tables, MY_MEMORY_ORDER_ACQUIRE);
+
+ while (table_entry)
{
- table->file->print_error(err, MYF(0));
- goto end;
+ if (table_entry->table_hton == hton)
+ {
+ if (likely(table_entry->state == GTID_POS_AVAILABLE))
+ return &table_entry->table_name;
+ }
+ table_entry= table_entry->next;
}
- cur = delete_list;
- while (cur)
- {
- uchar key_buffer[4+8];
- DBUG_EXECUTE_IF("gtid_slave_pos_simulate_failed_delete",
- { err= ENOENT;
- table->file->print_error(err, MYF(0));
- /* `break' does not work inside DBUG_EXECUTE_IF */
- goto dbug_break; });
+ table_entry= (struct gtid_pos_table *)
+ my_atomic_loadptr_explicit(&default_gtid_pos_table, MY_MEMORY_ORDER_ACQUIRE);
+ return &table_entry->table_name;
+}
- next= cur->next;
- table->field[1]->store(cur->sub_id, true);
- /* domain_id is already set in table->record[0] from write_row() above. */
- key_copy(key_buffer, table->record[0], &table->key_info[0], 0, false);
- if (table->file->ha_index_read_map(table->record[1], key_buffer,
- HA_WHOLE_KEY, HA_READ_KEY_EXACT))
- /* We cannot find the row, assume it is already deleted. */
- ;
- else if ((err= table->file->ha_delete_row(table->record[1])))
- table->file->print_error(err, MYF(0));
- /*
- In case of error, we still discard the element from the list. We do
- not want to endlessly error on the same element in case of table
- corruption or such.
- */
- cur= next;
- if (err)
- break;
- }
-IF_DBUG(dbug_break:, )
- table->file->ha_index_end();
+void
+rpl_slave_state::gtid_delete_pending(THD *thd,
+ rpl_slave_state::list_element **list_ptr)
+{
+ int err= 0;
+ ulonglong thd_saved_option;
-end:
+ if (unlikely(!loaded))
+ return;
#ifdef WITH_WSREP
- thd->wsrep_ignore_table= false;
+ /*
+ Updates in slave state table should not be appended to galera transaction
+ writeset.
+ */
+ thd->wsrep_ignore_table= true;
#endif
- if (table_opened)
+ thd_saved_option= thd->variables.option_bits;
+ thd->variables.option_bits&=
+ ~(ulonglong)(OPTION_NOT_AUTOCOMMIT |OPTION_BEGIN |OPTION_BIN_LOG |
+ OPTION_GTID_BEGIN);
+
+ while (*list_ptr)
{
- if (err || (err= ha_commit_trans(thd, FALSE)))
- {
- /*
- If error, we need to put any remaining delete_list back into the HASH
- so we can do another delete attempt later.
- */
- if (delete_list)
- {
- put_back_list(gtid->domain_id, delete_list);
- delete_list = 0;
- }
+ LEX_CSTRING *gtid_pos_table_name, *tmp_table_name;
+ Query_tables_list lex_backup;
+ TABLE_LIST tlist;
+ TABLE *table;
+ handler::Table_flags direct_pos;
+ list_element *cur, **cur_ptr_ptr;
+ bool table_opened= false;
+ void *hton= (*list_ptr)->hton;
- ha_rollback_trans(thd, FALSE);
+ thd->reset_for_next_command();
+
+ /*
+ Only the SQL thread can call select_gtid_pos_table without a mutex
+ Other threads needs to use a mutex and take into account that the
+ result may change during execution, so we have to make a copy.
+ */
+ mysql_mutex_lock(&LOCK_slave_state);
+ tmp_table_name= select_gtid_pos_table(hton);
+ gtid_pos_table_name= thd->make_clex_string(tmp_table_name->str,
+ tmp_table_name->length);
+ mysql_mutex_unlock(&LOCK_slave_state);
+ if (!gtid_pos_table_name)
+ {
+ /* Out of memory - we can try again later. */
+ break;
}
- close_thread_tables(thd);
- if (rgi)
+
+ thd->lex->reset_n_backup_query_tables_list(&lex_backup);
+ tlist.init_one_table(&MYSQL_SCHEMA_NAME, gtid_pos_table_name, NULL, TL_WRITE);
+ if ((err= open_and_lock_tables(thd, &tlist, FALSE, 0)))
+ goto end;
+ table_opened= true;
+ table= tlist.table;
+
+ if ((err= gtid_check_rpl_slave_state_table(table)))
+ goto end;
+
+ direct_pos= table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION;
+ bitmap_set_all(table->write_set);
+ table->rpl_write_set= table->write_set;
+
+ /* Now delete any already committed GTIDs. */
+ bitmap_set_bit(table->read_set, table->field[0]->field_index);
+ bitmap_set_bit(table->read_set, table->field[1]->field_index);
+
+ if (!direct_pos && (err= table->file->ha_index_init(0, 0)))
{
- thd->mdl_context.release_statement_locks();
- /*
- Save the list of old gtid entries we deleted. If this transaction
- fails later for some reason and is rolled back, the deletion of those
- entries will be rolled back as well, and we will need to put them back
- on the to-be-deleted list so we can re-do the deletion. Otherwise
- redundant rows in mysql.gtid_slave_pos may accumulate if transactions
- are rolled back and retried after record_gtid().
- */
-#ifdef HAVE_REPLICATION
- rgi->pending_gtid_deletes_save(gtid->domain_id, delete_list);
-#endif
+ table->file->print_error(err, MYF(0));
+ goto end;
}
- else
+
+ cur = *list_ptr;
+ cur_ptr_ptr = list_ptr;
+ do
{
- thd->mdl_context.release_transactional_locks();
-#ifdef HAVE_REPLICATION
- rpl_group_info::pending_gtid_deletes_free(delete_list);
-#endif
+ uchar key_buffer[4+8];
+ list_element *next= cur->next;
+
+ if (cur->hton == hton)
+ {
+ int res;
+
+ table->field[0]->store((ulonglong)cur->domain_id, true);
+ table->field[1]->store(cur->sub_id, true);
+ if (direct_pos)
+ {
+ res= table->file->ha_rnd_pos_by_record(table->record[0]);
+ }
+ else
+ {
+ key_copy(key_buffer, table->record[0], &table->key_info[0], 0, false);
+ res= table->file->ha_index_read_map(table->record[0], key_buffer,
+ HA_WHOLE_KEY, HA_READ_KEY_EXACT);
+ }
+ DBUG_EXECUTE_IF("gtid_slave_pos_simulate_failed_delete",
+ { res= 1;
+ err= ENOENT;
+ sql_print_error("<DEBUG> Error deleting old GTID row");
+ });
+ if (res)
+ /* We cannot find the row, assume it is already deleted. */
+ ;
+ else if ((err= table->file->ha_delete_row(table->record[0])))
+ {
+ sql_print_error("Error deleting old GTID row: %s",
+ thd->get_stmt_da()->message());
+ /*
+ In case of error, we still discard the element from the list. We do
+ not want to endlessly error on the same element in case of table
+ corruption or such.
+ */
+ }
+ *cur_ptr_ptr= next;
+ my_free(cur);
+ }
+ else
+ {
+ /* Leave this one in the list until we get to the table for its hton. */
+ cur_ptr_ptr= &cur->next;
+ }
+ cur= next;
+ if (err)
+ break;
+ } while (cur);
+end:
+ if (table_opened)
+ {
+ if (!direct_pos)
+ table->file->ha_index_end();
+
+ if (err || (err= ha_commit_trans(thd, FALSE)))
+ ha_rollback_trans(thd, FALSE);
}
+ close_thread_tables(thd);
+ thd->mdl_context.release_transactional_locks();
+ thd->lex->restore_backup_query_tables_list(&lex_backup);
+
+ if (err)
+ break;
}
- thd->lex->restore_backup_query_tables_list(&lex_backup);
thd->variables.option_bits= thd_saved_option;
- thd->resume_subsequent_commits(suspended_wfc);
- DBUG_EXECUTE_IF("inject_record_gtid_serverid_100_sleep",
- {
- if (gtid->server_id == 100)
- my_sleep(500000);
- });
- DBUG_RETURN(err);
+
+#ifdef WITH_WSREP
+ thd->wsrep_ignore_table= false;
+#endif
}
@@ -1251,7 +1368,7 @@ rpl_slave_state::load(THD *thd, const char *state_from_master, size_t len,
if (gtid_parser_helper(&state_from_master, end, >id) ||
!(sub_id= next_sub_id(gtid.domain_id)) ||
- record_gtid(thd, >id, sub_id, NULL, in_statement, &hton) ||
+ record_gtid(thd, >id, sub_id, false, in_statement, &hton) ||
update(gtid.domain_id, gtid.server_id, sub_id, gtid.seq_no, hton, NULL))
return 1;
if (state_from_master == end)
diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h
index 0fc92d5e33c..60d822f7b0d 100644
--- a/sql/rpl_gtid.h
+++ b/sql/rpl_gtid.h
@@ -118,8 +118,9 @@ struct rpl_slave_state
{
struct list_element *next;
uint64 sub_id;
- uint64 seq_no;
+ uint32 domain_id;
uint32 server_id;
+ uint64 seq_no;
/*
hton of mysql.gtid_slave_pos* table used to record this GTID.
Can be NULL if the gtid table failed to load (eg. missing
@@ -191,6 +192,8 @@ struct rpl_slave_state
/* Mapping from domain_id to its element. */
HASH hash;
+ /* GTIDs added since last purge of old mysql.gtid_slave_pos rows. */
+ uint32 pending_gtid_count;
/* Mutex protecting access to the state. */
mysql_mutex_t LOCK_slave_state;
/* Auxiliary buffer to sort gtid list. */
@@ -233,7 +236,10 @@ struct rpl_slave_state
int truncate_state_table(THD *thd);
void select_gtid_pos_table(THD *thd, LEX_CSTRING *out_tablename);
int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
- rpl_group_info *rgi, bool in_statement, void **out_hton);
+ bool in_transaction, bool in_statement, void **out_hton);
+ list_element *gtid_grab_pending_delete_list();
+ LEX_CSTRING *select_gtid_pos_table(void *hton);
+ void gtid_delete_pending(THD *thd, rpl_slave_state::list_element **list_ptr);
uint64 next_sub_id(uint32 domain_id);
int iterate(int (*cb)(rpl_gtid *, void *), void *data,
rpl_gtid *extra_gtids, uint32 num_extra,
@@ -245,7 +251,7 @@ struct rpl_slave_state
bool is_empty();
element *get_element(uint32 domain_id);
- int put_back_list(uint32 domain_id, list_element *list);
+ int put_back_list(list_element *list);
void update_state_hash(uint64 sub_id, rpl_gtid *gtid, void *hton,
rpl_group_info *rgi);
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index b275ad884bd..2d91620c898 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -1820,6 +1820,7 @@ rpl_load_gtid_slave_state(THD *thd)
int err= 0;
uint32 i;
load_gtid_state_cb_data cb_data;
+ rpl_slave_state::list_element *old_gtids_list;
DBUG_ENTER("rpl_load_gtid_slave_state");
mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
@@ -1905,6 +1906,13 @@ rpl_load_gtid_slave_state(THD *thd)
rpl_global_gtid_slave_state->loaded= true;
mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
+ /* Clear out no longer needed elements now. */
+ old_gtids_list=
+ rpl_global_gtid_slave_state->gtid_grab_pending_delete_list();
+ rpl_global_gtid_slave_state->gtid_delete_pending(thd, &old_gtids_list);
+ if (old_gtids_list)
+ rpl_global_gtid_slave_state->put_back_list(old_gtids_list);
+
end:
if (array_inited)
delete_dynamic(&array);
@@ -2086,7 +2094,6 @@ rpl_group_info::reinit(Relay_log_info *rli)
long_find_row_note_printed= false;
did_mark_start_commit= false;
gtid_ev_flags2= 0;
- pending_gtid_delete_list= NULL;
last_master_timestamp = 0;
gtid_ignore_duplicate_state= GTID_DUPLICATE_NULL;
speculation= SPECULATE_NO;
@@ -2217,12 +2224,6 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
erroneously update the GTID position.
*/
gtid_pending= false;
-
- /*
- Rollback will have undone any deletions of old rows we might have made
- in mysql.gtid_slave_pos. Put those rows back on the list to be deleted.
- */
- pending_gtid_deletes_put_back();
}
m_table_map.clear_tables();
slave_close_thread_tables(thd);
@@ -2448,78 +2449,6 @@ rpl_group_info::unmark_start_commit()
}
-/*
- When record_gtid() has deleted any old rows from the table
- mysql.gtid_slave_pos as part of a replicated transaction, save the list of
- rows deleted here.
-
- If later the transaction fails (eg. optimistic parallel replication), the
- deletes will be undone when the transaction is rolled back. Then we can
- put back the list of rows into the rpl_global_gtid_slave_state, so that
- we can re-do the deletes and avoid accumulating old rows in the table.
-*/
-void
-rpl_group_info::pending_gtid_deletes_save(uint32 domain_id,
- rpl_slave_state::list_element *list)
-{
- /*
- We should never get to a state where we try to save a new pending list of
- gtid deletes while we still have an old one. But make sure we handle it
- anyway just in case, so we avoid leaving stray entries in the
- mysql.gtid_slave_pos table.
- */
- DBUG_ASSERT(!pending_gtid_delete_list);
- if (unlikely(pending_gtid_delete_list))
- pending_gtid_deletes_put_back();
-
- pending_gtid_delete_list= list;
- pending_gtid_delete_list_domain= domain_id;
-}
-
-
-/*
- Take the list recorded by pending_gtid_deletes_save() and put it back into
- rpl_global_gtid_slave_state. This is needed if deletion of the rows was
- rolled back due to transaction failure.
-*/
-void
-rpl_group_info::pending_gtid_deletes_put_back()
-{
- if (pending_gtid_delete_list)
- {
- rpl_global_gtid_slave_state->put_back_list(pending_gtid_delete_list_domain,
- pending_gtid_delete_list);
- pending_gtid_delete_list= NULL;
- }
-}
-
-
-/*
- Free the list recorded by pending_gtid_deletes_save(). Done when the deletes
- in the list have been permanently committed.
-*/
-void
-rpl_group_info::pending_gtid_deletes_clear()
-{
- pending_gtid_deletes_free(pending_gtid_delete_list);
- pending_gtid_delete_list= NULL;
-}
-
-
-void
-rpl_group_info::pending_gtid_deletes_free(rpl_slave_state::list_element *list)
-{
- rpl_slave_state::list_element *next;
-
- while (list)
- {
- next= list->next;
- my_free(list);
- list= next;
- }
-}
-
-
rpl_sql_thread_info::rpl_sql_thread_info(Rpl_filter *filter)
: rpl_filter(filter)
{
diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
index d9f0e0e5d3b..b8b153c34be 100644
--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -757,11 +757,6 @@ struct rpl_group_info
/* Needs room for "Gtid D-S-N\x00". */
char gtid_info_buf[5+10+1+10+1+20+1];
- /* List of not yet committed deletions in mysql.gtid_slave_pos. */
- rpl_slave_state::list_element *pending_gtid_delete_list;
- /* Domain associated with pending_gtid_delete_list. */
- uint32 pending_gtid_delete_list_domain;
-
/*
The timestamp, from the master, of the commit event.
Used to do delayed update of rli->last_master_timestamp, for getting
@@ -903,12 +898,6 @@ struct rpl_group_info
char *gtid_info();
void unmark_start_commit();
- static void pending_gtid_deletes_free(rpl_slave_state::list_element *list);
- void pending_gtid_deletes_save(uint32 domain_id,
- rpl_slave_state::list_element *list);
- void pending_gtid_deletes_put_back();
- void pending_gtid_deletes_clear();
-
longlong get_row_stmt_start_timestamp()
{
return row_stmt_start_timestamp;
diff --git a/sql/slave.cc b/sql/slave.cc
index bb1300d36e6..f8499513dd6 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -465,6 +465,8 @@ static struct slave_background_gtid_pos_create_t {
void *hton;
} *slave_background_gtid_pos_create_list;
+static volatile bool slave_background_gtid_pending_delete_flag;
+
pthread_handler_t
handle_slave_background(void *arg __attribute__((unused)))
@@ -499,6 +501,7 @@ handle_slave_background(void *arg __attribute__((unused)))
{
slave_background_kill_t *kill_list;
slave_background_gtid_pos_create_t *create_list;
+ bool pending_deletes;
thd->ENTER_COND(&COND_slave_background, &LOCK_slave_background,
&stage_slave_background_wait_request,
@@ -508,13 +511,15 @@ handle_slave_background(void *arg __attribute__((unused)))
stop= abort_loop || thd->killed || slave_background_thread_stop;
kill_list= slave_background_kill_list;
create_list= slave_background_gtid_pos_create_list;
- if (stop || kill_list || create_list)
+ pending_deletes= slave_background_gtid_pending_delete_flag;
+ if (stop || kill_list || create_list || pending_deletes)
break;
mysql_cond_wait(&COND_slave_background, &LOCK_slave_background);
}
slave_background_kill_list= NULL;
slave_background_gtid_pos_create_list= NULL;
+ slave_background_gtid_pending_delete_flag= false;
thd->EXIT_COND(&old_stage);
while (kill_list)
@@ -541,6 +546,17 @@ handle_slave_background(void *arg __attribute__((unused)))
create_list= next;
}
+ if (pending_deletes)
+ {
+ rpl_slave_state::list_element *list;
+
+ slave_background_gtid_pending_delete_flag= false;
+ list= rpl_global_gtid_slave_state->gtid_grab_pending_delete_list();
+ rpl_global_gtid_slave_state->gtid_delete_pending(thd, &list);
+ if (list)
+ rpl_global_gtid_slave_state->put_back_list(list);
+ }
+
mysql_mutex_lock(&LOCK_slave_background);
} while (!stop);
@@ -615,6 +631,23 @@ slave_background_gtid_pos_create_request(
/*
+ Request the slave background thread to delete no longer used rows from the
+ mysql.gtid_slave_pos* tables.
+
+ This is called from time-critical rpl_slave_state::update(), so we avoid
+ taking any locks here. This means we may race with the background thread
+ to occasionally lose a signal. This is not a problem; any pending rows to
+ be deleted will just be deleted a bit later as part of the next batch.
+*/
+void
+slave_background_gtid_pending_delete_request(void)
+{
+ slave_background_gtid_pending_delete_flag= true;
+ mysql_cond_signal(&COND_slave_background);
+}
+
+
+/*
Start the slave background thread.
This thread is currently used for two purposes:
diff --git a/sql/slave.h b/sql/slave.h
index 649d55b45b9..12d569b0333 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -276,6 +276,7 @@ bool net_request_file(NET* net, const char* fname);
void slave_background_kill_request(THD *to_kill);
void slave_background_gtid_pos_create_request
(rpl_slave_state::gtid_pos_table *table_entry);
+void slave_background_gtid_pending_delete_request(void);
extern bool volatile abort_loop;
extern Master_info *active_mi; /* active_mi for multi-master */
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 6d4c135683a..9348f4e5c98 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -1942,6 +1942,19 @@ Sys_var_last_gtid::session_value_ptr(THD *thd, const LEX_CSTRING *base)
}
+static Sys_var_uint Sys_gtid_cleanup_batch_size(
+ "gtid_cleanup_batch_size",
+ "Normally does not need tuning. How many old rows must accumulate in "
+ "the mysql.gtid_slave_pos table before a background job will be run to "
+ "delete them. Can be increased to reduce number of commits if "
+ "using many different engines with --gtid_pos_auto_engines, or to "
+ "reduce CPU overhead if using a huge number of different "
+ "gtid_domain_ids. Can be decreased to reduce number of old rows in the "
+ "table.",
+ GLOBAL_VAR(opt_gtid_cleanup_batch_size), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(0,2147483647), DEFAULT(64), BLOCK_SIZE(1));
+
+
static bool
check_slave_parallel_threads(sys_var *self, THD *thd, set_var *var)
{
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result b/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result
index 9c20fea97ae..a1e501f78f4 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result
@@ -2,6 +2,7 @@ include/master-slave.inc
[connection master]
connection server_2;
include/stop_slave.inc
+SET GLOBAL gtid_cleanup_batch_size = 999999999;
CHANGE MASTER TO master_use_gtid=slave_pos;
SET sql_log_bin=0;
CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
@@ -41,6 +42,8 @@ a
1
SELECT * FROM mysql.gtid_slave_pos ORDER BY sub_id;
domain_id sub_id server_id seq_no
+0 1 1 1
+0 2 1 2
0 3 1 3
0 4 1 4
SELECT * FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
@@ -121,6 +124,21 @@ Transactions_multi_engine 6
DELETE FROM t1 WHERE a >= 100;
DELETE FROM t2 WHERE a >= 100;
DELETE FROM t3 WHERE a >= 100;
+connection server_1;
+include/save_master_gtid.inc
+connection server_2;
+include/sync_with_master_gtid.inc
+SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
+COUNT(*)>=10
+1
+SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
+UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
+COUNT(*)>=10
+1
+SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_rocksdb;
+COUNT(*)>=10
+1
+SET GLOBAL gtid_cleanup_batch_size = 3;
connection server_2;
include/stop_slave.inc
SET sql_log_bin=0;
diff --git a/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test b/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test
index e0d16e7f242..631d9ca533f 100644
--- a/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test
+++ b/storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test
@@ -4,6 +4,12 @@
--connection server_2
--source include/stop_slave.inc
+
+# Set GTID cleanup limit high enough that cleanup will not run and we
+# can rely on consistent table output in .result.
+--let $old_gtid_cleanup_batch_size=`SELECT @@GLOBAL.gtid_cleanup_batch_size`
+SET GLOBAL gtid_cleanup_batch_size = 999999999;
+
CHANGE MASTER TO master_use_gtid=slave_pos;
SET sql_log_bin=0;
CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
@@ -89,6 +95,82 @@ DELETE FROM t2 WHERE a >= 100;
DELETE FROM t3 WHERE a >= 100;
+# Create a bunch more GTIDs in mysql.gtid_slave_pos* tables to test with.
+--connection server_1
+--disable_query_log
+let $i=10;
+while ($i) {
+ eval INSERT INTO t1 VALUES (300+$i);
+ eval INSERT INTO t2 VALUES (300+$i);
+ eval INSERT INTO t3 VALUES (300+$i);
+ dec $i;
+}
+--enable_query_log
+--source include/save_master_gtid.inc
+
+--connection server_2
+--source include/sync_with_master_gtid.inc
+
+# Check that we have many rows in mysql.gtid_slave_pos now (since
+# @@gtid_cleanup_batch_size was set to a huge value). No need to check
+# for an exact number, since that will require changing .result if
+# anything changes prior to this point, and we just need to know that
+# we have still have some data in the tables to make the following
+# test effective.
+SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
+SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
+ UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
+SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_rocksdb;
+
+# Check that old GTID rows will be deleted when batch delete size is
+# set reasonably. Old row deletion is not 100% deterministic (by design), so
+# we must wait for it to occur, but it should occur eventually.
+SET GLOBAL gtid_cleanup_batch_size = 3;
+let $i=40;
+--disable_query_log
+--let $keep_include_silent=1
+while ($i) {
+ let N=`SELECT 1+($i MOD 3)`;
+ --connection server_1
+ eval UPDATE t$N SET a=a+1 WHERE a=(SELECT MAX(a) FROM t$N);
+ --source include/save_master_gtid.inc
+ --connection server_2
+ --source include/sync_with_master_gtid.inc
+ let $j=50;
+ while ($j) {
+ let $is_done=`SELECT SUM(a)=1 FROM (
+ SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos
+ UNION ALL
+ SELECT COUNT(*) AS a FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
+ UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select
+ UNION ALL
+ SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos_rocksdb) outer_select`;
+ if ($is_done) {
+ let $j=0;
+ }
+ if (!$is_done) {
+ real_sleep 0.1;
+ dec $j;
+ }
+ }
+ dec $i;
+ if ($is_done) {
+ let $i=0;
+ }
+}
+--enable_query_log
+--let $keep_include_silent=0
+if (!$is_done) {
+ --echo Timed out waiting for mysql.gtid_slave_pos* tables to be cleaned up
+}
+
+--disable_query_log
+DELETE FROM t1 WHERE a >= 100;
+DELETE FROM t2 WHERE a >= 100;
+DELETE FROM t3 WHERE a >= 100;
+--enable_query_log
+
+
# Test status variables Rpl_transactions_multi_engine and Transactions_gtid_foreign_engine.
# Have mysql.gtid_slave_pos* for myisam and innodb but not rocksdb.
--connection server_2
@@ -223,6 +305,9 @@ SHOW STATUS LIKE "%transactions%engine";
SET sql_log_bin=0;
DROP TABLE mysql.gtid_slave_pos_innodb;
SET sql_log_bin=1;
+--disable_query_log
+eval SET GLOBAL gtid_cleanup_batch_size = $old_gtid_cleanup_batch_size;
+--enable_query_log
--connection server_1
DROP TABLE t1;
diff --git a/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result b/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result
index d4532eec4e2..d79e7e59aa4 100644
--- a/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result
+++ b/storage/tokudb/mysql-test/tokudb_rpl/r/mdev12179.result
@@ -2,6 +2,7 @@ include/master-slave.inc
[connection master]
connection server_2;
include/stop_slave.inc
+SET GLOBAL gtid_cleanup_batch_size = 999999999;
CHANGE MASTER TO master_use_gtid=slave_pos;
SET sql_log_bin=0;
CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
@@ -41,6 +42,8 @@ a
1
SELECT * FROM mysql.gtid_slave_pos ORDER BY sub_id;
domain_id sub_id server_id seq_no
+0 1 1 1
+0 2 1 2
0 3 1 3
0 4 1 4
SELECT * FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
@@ -121,6 +124,21 @@ Transactions_multi_engine 6
DELETE FROM t1 WHERE a >= 100;
DELETE FROM t2 WHERE a >= 100;
DELETE FROM t3 WHERE a >= 100;
+connection server_1;
+include/save_master_gtid.inc
+connection server_2;
+include/sync_with_master_gtid.inc
+SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
+COUNT(*)>=10
+1
+SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
+UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
+COUNT(*)>=10
+1
+SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_tokudb;
+COUNT(*)>=10
+1
+SET GLOBAL gtid_cleanup_batch_size = 3;
connection server_2;
include/stop_slave.inc
SET sql_log_bin=0;
diff --git a/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test b/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test
index ceb119cd0dc..1d19a25889e 100644
--- a/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test
+++ b/storage/tokudb/mysql-test/tokudb_rpl/t/mdev12179.test
@@ -4,6 +4,12 @@
--connection server_2
--source include/stop_slave.inc
+
+# Set GTID cleanup limit high enough that cleanup will not run and we
+# can rely on consistent table output in .result.
+--let $old_gtid_cleanup_batch_size=`SELECT @@GLOBAL.gtid_cleanup_batch_size`
+SET GLOBAL gtid_cleanup_batch_size = 999999999;
+
CHANGE MASTER TO master_use_gtid=slave_pos;
SET sql_log_bin=0;
CREATE TABLE mysql.gtid_slave_pos_innodb LIKE mysql.gtid_slave_pos;
@@ -89,6 +95,82 @@ DELETE FROM t2 WHERE a >= 100;
DELETE FROM t3 WHERE a >= 100;
+# Create a bunch more GTIDs in mysql.gtid_slave_pos* tables to test with.
+--connection server_1
+--disable_query_log
+let $i=10;
+while ($i) {
+ eval INSERT INTO t1 VALUES (300+$i);
+ eval INSERT INTO t2 VALUES (300+$i);
+ eval INSERT INTO t3 VALUES (300+$i);
+ dec $i;
+}
+--enable_query_log
+--source include/save_master_gtid.inc
+
+--connection server_2
+--source include/sync_with_master_gtid.inc
+
+# Check that we have many rows in mysql.gtid_slave_pos now (since
+# @@gtid_cleanup_batch_size was set to a huge value). No need to check
+# for an exact number, since that will require changing .result if
+# anything changes prior to this point, and we just need to know that
+# we have still have some data in the tables to make the following
+# test effective.
+SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos;
+SELECT COUNT(*)>=10 FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
+ UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select;
+SELECT COUNT(*)>=10 FROM mysql.gtid_slave_pos_tokudb;
+
+# Check that old GTID rows will be deleted when batch delete size is
+# set reasonably. Old row deletion is not 100% deterministic (by design), so
+# we must wait for it to occur, but it should occur eventually.
+SET GLOBAL gtid_cleanup_batch_size = 3;
+let $i=40;
+--disable_query_log
+--let $keep_include_silent=1
+while ($i) {
+ let N=`SELECT 1+($i MOD 3)`;
+ --connection server_1
+ eval UPDATE t$N SET a=a+1 WHERE a=(SELECT MAX(a) FROM t$N);
+ --source include/save_master_gtid.inc
+ --connection server_2
+ --source include/sync_with_master_gtid.inc
+ let $j=50;
+ while ($j) {
+ let $is_done=`SELECT SUM(a)=1 FROM (
+ SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos
+ UNION ALL
+ SELECT COUNT(*) AS a FROM ( SELECT * FROM mysql.gtid_slave_pos_innodb
+ UNION ALL SELECT * FROM mysql.gtid_slave_pos_innodb_redundant) inner_select
+ UNION ALL
+ SELECT COUNT(*) AS a FROM mysql.gtid_slave_pos_tokudb) outer_select`;
+ if ($is_done) {
+ let $j=0;
+ }
+ if (!$is_done) {
+ real_sleep 0.1;
+ dec $j;
+ }
+ }
+ dec $i;
+ if ($is_done) {
+ let $i=0;
+ }
+}
+--enable_query_log
+--let $keep_include_silent=0
+if (!$is_done) {
+ --echo Timed out waiting for mysql.gtid_slave_pos* tables to be cleaned up
+}
+
+--disable_query_log
+DELETE FROM t1 WHERE a >= 100;
+DELETE FROM t2 WHERE a >= 100;
+DELETE FROM t3 WHERE a >= 100;
+--enable_query_log
+
+
# Test status variables Rpl_transactions_multi_engine and Transactions_gtid_foreign_engine.
# Have mysql.gtid_slave_pos* for myisam and innodb but not tokudb.
--connection server_2
@@ -223,6 +305,9 @@ SHOW STATUS LIKE "%transactions%engine";
SET sql_log_bin=0;
DROP TABLE mysql.gtid_slave_pos_innodb;
SET sql_log_bin=1;
+--disable_query_log
+eval SET GLOBAL gtid_cleanup_batch_size = $old_gtid_cleanup_batch_size;
+--enable_query_log
--connection server_1
DROP TABLE t1;
2
3
06 Dec '18
revision-id: 35cd9eb6c5d8a0922b1544736320f013a485395d (mariadb-10.3.6-266-g35cd9eb6c5d)
parent(s): e0739064450f2c2be6d5de1d799582121747dd39
author: Varun Gupta
committer: Varun Gupta
timestamp: 2018-12-07 02:56:18 +0530
message:
MDEV-17255: New optimizer defaults and ANALYZE TABLE
Added to new values to the server variable use_stat_tables.
The values are COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES.
Both these values don't allow to collect EITS for queries like
analyze table t1;
To collect EITS we would need to use the syntax with persistent like
analyze table t1 persistent for columns (col1,col2...) index (idx1, idx2...) / ALL
Changing the default value from NEVER to PREFERABLY_FOR_QUERIES.
---
mysql-test/main/stat_tables.result | 56 +++++++++++++++++++++++++++++++
mysql-test/main/stat_tables.test | 42 +++++++++++++++++++++++
mysql-test/main/stat_tables_innodb.result | 56 +++++++++++++++++++++++++++++++
sql/sql_admin.cc | 2 +-
sql/sql_statistics.cc | 5 ++-
sql/sql_statistics.h | 27 +++++++++++++++
sql/sys_vars.cc | 5 +--
7 files changed, 187 insertions(+), 6 deletions(-)
diff --git a/mysql-test/main/stat_tables.result b/mysql-test/main/stat_tables.result
index 308529ece47..e7ace3dd85a 100644
--- a/mysql-test/main/stat_tables.result
+++ b/mysql-test/main/stat_tables.result
@@ -606,3 +606,59 @@ MAX(pk)
NULL
DROP TABLE t1;
set use_stat_tables=@save_use_stat_tables;
+#
+# MDEV-17255: New optimizer defaults and ANALYZE TABLE
+#
+create table t1 (a int, b int);
+insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10);
+set use_stat_tables= preferably_for_queries;
+#
+# with use_stat_tables= PREFERABLY_FOR_QUERIES
+# analyze table t1 will not collect statistics
+#
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+select * from mysql.column_stats;
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+analyze
+select * from t1 where a = 1 and b=3;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 100.00 10.00 Using where
+#
+# with use_stat_tables= PREFERABLY_FOR_QUERIES
+# analyze table t1 will collect statistics if we use PERSISTENT
+# for columns, indexes or everything
+#
+analyze table t1 persistent for columns (a) indexes ();
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status Table is already up to date
+select * from mysql.column_stats;
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
+# filtered shows that we used the data from stat tables
+analyze
+select * from t1 where a = 1 and b=3;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 25.00 10.00 Using where
+#
+# with use_stat_tables= PREFERABLY
+# analyze table t1 will collect statistics
+#
+set use_stat_tables=PREFERABLY;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status Table is already up to date
+select * from mysql.column_stats;
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
+test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
+# filtered shows that we used the data from stat tables
+analyze
+select * from t1 where a=1 and b=3;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
+drop table t1;
+set use_stat_tables=@save_use_stat_tables;
diff --git a/mysql-test/main/stat_tables.test b/mysql-test/main/stat_tables.test
index 19bc0fa2f46..3e6e2cf3093 100644
--- a/mysql-test/main/stat_tables.test
+++ b/mysql-test/main/stat_tables.test
@@ -383,5 +383,47 @@ CREATE OR REPLACE TABLE t1 (pk INT PRIMARY KEY, t TEXT);
SELECT MAX(pk) FROM t1;
DROP TABLE t1;
+set use_stat_tables=@save_use_stat_tables;
+
+--echo #
+--echo # MDEV-17255: New optimizer defaults and ANALYZE TABLE
+--echo #
+
+create table t1 (a int, b int);
+insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10);
+set use_stat_tables= preferably_for_queries;
+--echo #
+--echo # with use_stat_tables= PREFERABLY_FOR_QUERIES
+--echo # analyze table t1 will not collect statistics
+--echo #
+
+analyze table t1;
+select * from mysql.column_stats;
+analyze
+select * from t1 where a = 1 and b=3;
+--echo #
+--echo # with use_stat_tables= PREFERABLY_FOR_QUERIES
+--echo # analyze table t1 will collect statistics if we use PERSISTENT
+--echo # for columns, indexes or everything
+--echo #
+
+analyze table t1 persistent for columns (a) indexes ();
+select * from mysql.column_stats;
+--echo # filtered shows that we used the data from stat tables
+analyze
+select * from t1 where a = 1 and b=3;
+
+--echo #
+--echo # with use_stat_tables= PREFERABLY
+--echo # analyze table t1 will collect statistics
+--echo #
+
+set use_stat_tables=PREFERABLY;
+analyze table t1;
+select * from mysql.column_stats;
+--echo # filtered shows that we used the data from stat tables
+analyze
+select * from t1 where a=1 and b=3;
+drop table t1;
set use_stat_tables=@save_use_stat_tables;
diff --git a/mysql-test/main/stat_tables_innodb.result b/mysql-test/main/stat_tables_innodb.result
index 8198e94dc10..369d85da985 100644
--- a/mysql-test/main/stat_tables_innodb.result
+++ b/mysql-test/main/stat_tables_innodb.result
@@ -633,5 +633,61 @@ MAX(pk)
NULL
DROP TABLE t1;
set use_stat_tables=@save_use_stat_tables;
+#
+# MDEV-17255: New optimizer defaults and ANALYZE TABLE
+#
+create table t1 (a int, b int);
+insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10);
+set use_stat_tables= preferably_for_queries;
+#
+# with use_stat_tables= PREFERABLY_FOR_QUERIES
+# analyze table t1 will not collect statistics
+#
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status OK
+select * from mysql.column_stats;
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+analyze
+select * from t1 where a = 1 and b=3;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 100.00 10.00 Using where
+#
+# with use_stat_tables= PREFERABLY_FOR_QUERIES
+# analyze table t1 will collect statistics if we use PERSISTENT
+# for columns, indexes or everything
+#
+analyze table t1 persistent for columns (a) indexes ();
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+select * from mysql.column_stats;
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
+# filtered shows that we used the data from stat tables
+analyze
+select * from t1 where a = 1 and b=3;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 25.00 10.00 Using where
+#
+# with use_stat_tables= PREFERABLY
+# analyze table t1 will collect statistics
+#
+set use_stat_tables=PREFERABLY;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+select * from mysql.column_stats;
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
+test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
+# filtered shows that we used the data from stat tables
+analyze
+select * from t1 where a=1 and b=3;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
+drop table t1;
+set use_stat_tables=@save_use_stat_tables;
set optimizer_switch=@save_optimizer_switch_for_stat_tables_test;
SET SESSION STORAGE_ENGINE=DEFAULT;
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
index d0d959de8f9..b39103e382a 100644
--- a/sql/sql_admin.cc
+++ b/sql/sql_admin.cc
@@ -767,7 +767,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
}
collect_eis=
(table->table->s->table_category == TABLE_CATEGORY_USER &&
- (get_use_stat_tables_mode(thd) > NEVER ||
+ (check_eits_collection_allowed(thd) ||
lex->with_persistent_for_clause));
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 04806f07b3b..8c88f7f927f 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -3720,9 +3720,8 @@ void set_statistics_for_table(THD *thd, TABLE *table)
{
TABLE_STATISTICS_CB *stats_cb= &table->s->stats_cb;
Table_statistics *read_stats= stats_cb->table_stats;
- Use_stat_tables_mode use_stat_table_mode= get_use_stat_tables_mode(thd);
table->used_stat_records=
- (use_stat_table_mode <= COMPLEMENTARY ||
+ (!check_eits_preferred(thd) ||
!table->stats_is_read || read_stats->cardinality_is_null) ?
table->file->stats.records : read_stats->cardinality;
KEY *key_info, *key_info_end;
@@ -3730,7 +3729,7 @@ void set_statistics_for_table(THD *thd, TABLE *table)
key_info < key_info_end; key_info++)
{
key_info->is_statistics_from_stat_tables=
- (use_stat_table_mode > COMPLEMENTARY &&
+ (check_eits_preferred(thd) &&
table->stats_is_read &&
key_info->read_stats->avg_frequency_is_inited() &&
key_info->read_stats->get_avg_frequency(0) > 0.5);
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 39cddf95188..8439ac8db53 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -16,12 +16,26 @@
#ifndef SQL_STATISTICS_H
#define SQL_STATISTICS_H
+/*
+ For COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES they are
+ similar to the COMPLEMENTARY and PREFERABLY respectively except that
+ with these values we would not be collecting EITS for queries like
+ ANALYZE TABLE t1;
+ To collect EITS with these values, we have to use PERSISITENT FOR
+ analyze table t1 persistent for
+ columns (col1,col2...) index (idx1, idx2...)
+ or
+ analyze table t1 persistent for all
+*/
+
typedef
enum enum_use_stat_tables_mode
{
NEVER,
COMPLEMENTARY,
PREFERABLY,
+ COMPLEMENTARY_FOR_QUERIES,
+ PREFERABLY_FOR_QUERIES
} Use_stat_tables_mode;
typedef
@@ -87,6 +101,19 @@ Use_stat_tables_mode get_use_stat_tables_mode(THD *thd)
{
return (Use_stat_tables_mode) (thd->variables.use_stat_tables);
}
+inline
+bool check_eits_collection_allowed(THD *thd)
+{
+ return (get_use_stat_tables_mode(thd) == COMPLEMENTARY ||
+ get_use_stat_tables_mode(thd) == PREFERABLY);
+}
+
+inline
+bool check_eits_preferred(THD *thd)
+{
+ return (get_use_stat_tables_mode(thd) == PREFERABLY ||
+ get_use_stat_tables_mode(thd) == PREFERABLY_FOR_QUERIES);
+}
int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables);
int collect_statistics_for_table(THD *thd, TABLE *table);
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 420e7feabde..b925de34db4 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -5842,12 +5842,13 @@ static Sys_var_ulong Sys_progress_report_time(
VALID_RANGE(0, UINT_MAX), DEFAULT(5), BLOCK_SIZE(1));
const char *use_stat_tables_modes[] =
- {"NEVER", "COMPLEMENTARY", "PREFERABLY", 0};
+ {"NEVER", "COMPLEMENTARY", "PREFERABLY",
+ "COMPLEMENTARY_FOR_QUERIES", "PREFERABLY_FOR_QUERIES", 0};
static Sys_var_enum Sys_optimizer_use_stat_tables(
"use_stat_tables",
"Specifies how to use system statistics tables",
SESSION_VAR(use_stat_tables), CMD_LINE(REQUIRED_ARG),
- use_stat_tables_modes, DEFAULT(2));
+ use_stat_tables_modes, DEFAULT(4));
static Sys_var_ulong Sys_histogram_size(
"histogram_size",
1
0
[Commits] 2b340b1742c: MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
by Varun 06 Dec '18
by Varun 06 Dec '18
06 Dec '18
revision-id: 2b340b1742cfc6f04a0ff24c597ed224cbaf0fc6 (mariadb-10.0.36-81-g2b340b1742c)
parent(s): 14f6b0cdfd696ec0e4f24d914fc3123deaece2f6
author: Varun Gupta
committer: Varun Gupta
timestamp: 2018-12-03 02:11:45 +0530
message:
MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
The problem here is EITS statistics does not calculate statistics for the partitions of the table.
So a temporary solution would be to not read EITS statistics for partitioned tables.
Also disabling reading of EITS for columns that participate in the partition list of a table.
---
mysql-test/r/partition.result | 100 ++++++++++++++++++++++++++++++++++++++++++
mysql-test/t/partition.test | 61 ++++++++++++++++++++++++++
sql/opt_range.cc | 19 ++++----
sql/partition_info.cc | 17 +++++++
sql/partition_info.h | 1 +
sql/sql_statistics.cc | 45 +++++++++++++++++++
sql/sql_statistics.h | 1 +
7 files changed, 235 insertions(+), 9 deletions(-)
diff --git a/mysql-test/r/partition.result b/mysql-test/r/partition.result
index c6669176b3d..6732782c5f7 100644
--- a/mysql-test/r/partition.result
+++ b/mysql-test/r/partition.result
@@ -2645,3 +2645,103 @@ Warnings:
Note 1517 Duplicate partition name p2
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
+#
+# MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
+#
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+create table t2 (
+part_key int,
+a int,
+b int
+) partition by list(part_key) (
+partition p0 values in (0),
+partition p1 values in (1),
+partition p2 values in (2),
+partition p3 values in (3),
+partition p4 values in (4)
+);
+insert into t2
+select mod(a,5), a/100, mod(a,5) from t1;
+set @save_use_stat_tables= @@use_stat_tables;
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+#
+# Tests using stats provided by the storage engine
+#
+explain extended select * from t2 where part_key=1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1)
+explain partitions select * from t2 where part_key=1;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
+explain extended select * from t2 where part_key in (1,2);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
+explain partitions select * from t2 where part_key in (1,2);
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 Using where
+explain extended select * from t2 where b=5;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`b` = 5)
+explain partitions select * from t2 where b=5;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p0,p1,p2,p3,p4 ALL NULL NULL NULL NULL 1000 Using where
+explain extended select * from t2 partition(p0) where b=1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` PARTITION (`p0`) where (`test`.`t2`.`b` = 1)
+set @save_histogram_size=@@histogram_size;
+set @@histogram_size=100;
+set @@use_stat_tables= PREFERABLY;
+set @@optimizer_use_condition_selectivity=4;
+analyze table t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status OK
+#
+# Tests using EITS
+#
+# filtered should be 100
+explain extended select * from t2 where part_key=1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 200 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` = 1)
+explain partitions select * from t2 where part_key=1;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1 ALL NULL NULL NULL NULL 200 Using where
+# filtered should be 100
+explain extended select * from t2 where part_key in (1,2);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 400 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`part_key` in (1,2))
+explain partitions select * from t2 where part_key in (1,2);
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p1,p2 ALL NULL NULL NULL NULL 400 Using where
+explain extended select * from t2 where b=5;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 1000 19.80 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` where (`test`.`t2`.`b` = 5)
+explain partitions select * from t2 where b=5;
+id select_type table partitions type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 p0,p1,p2,p3,p4 ALL NULL NULL NULL NULL 1000 Using where
+explain extended select * from t2 partition(p0) where b=1;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 200 19.80 Using where
+Warnings:
+Note 1003 select `test`.`t2`.`part_key` AS `part_key`,`test`.`t2`.`a` AS `a`,`test`.`t2`.`b` AS `b` from `test`.`t2` PARTITION (`p0`) where (`test`.`t2`.`b` = 1)
+set @@use_stat_tables= @save_use_stat_tables;
+set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+set @@histogram_size= @save_histogram_size;
+drop table t0,t1,t2;
diff --git a/mysql-test/t/partition.test b/mysql-test/t/partition.test
index 1c8cd0375d6..b6a5db2db7c 100644
--- a/mysql-test/t/partition.test
+++ b/mysql-test/t/partition.test
@@ -2897,3 +2897,64 @@ EXECUTE stmt;
DEALLOCATE PREPARE stmt;
DROP TABLE t1;
+--echo #
+--echo # MDEV-17032: Estimates are higher for partitions of a table with @@use_stat_tables= PREFERABLY
+--echo #
+
+create table t0(a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
+create table t1(a int);
+insert into t1 select A.a + B.a* 10 + C.a * 100 from t0 A, t0 B, t0 C;
+
+
+create table t2 (
+ part_key int,
+ a int,
+ b int
+) partition by list(part_key) (
+ partition p0 values in (0),
+ partition p1 values in (1),
+ partition p2 values in (2),
+ partition p3 values in (3),
+ partition p4 values in (4)
+);
+insert into t2
+select mod(a,5), a/100, mod(a,5) from t1;
+
+set @save_use_stat_tables= @@use_stat_tables;
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+--echo #
+--echo # Tests using stats provided by the storage engine
+--echo #
+explain extended select * from t2 where part_key=1;
+explain partitions select * from t2 where part_key=1;
+explain extended select * from t2 where part_key in (1,2);
+explain partitions select * from t2 where part_key in (1,2);
+explain extended select * from t2 where b=5;
+explain partitions select * from t2 where b=5;
+explain extended select * from t2 partition(p0) where b=1;
+
+
+set @save_histogram_size=@@histogram_size;
+set @@histogram_size=100;
+set @@use_stat_tables= PREFERABLY;
+set @@optimizer_use_condition_selectivity=4;
+analyze table t2;
+--echo #
+--echo # Tests using EITS
+--echo #
+--echo # filtered should be 100
+explain extended select * from t2 where part_key=1;
+explain partitions select * from t2 where part_key=1;
+--echo # filtered should be 100
+explain extended select * from t2 where part_key in (1,2);
+explain partitions select * from t2 where part_key in (1,2);
+explain extended select * from t2 where b=5;
+explain partitions select * from t2 where b=5;
+explain extended select * from t2 partition(p0) where b=1;
+
+set @@use_stat_tables= @save_use_stat_tables;
+set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
+set @@histogram_size= @save_histogram_size;
+drop table t0,t1,t2;
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index a3943cbe3ff..005ae92a665 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3322,14 +3322,17 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
{
Field **field_ptr;
TABLE *table= param->table;
+ partition_info *part_info= NULL;
+ #ifdef WITH_PARTITION_STORAGE_ENGINE
+ part_info= table->part_info;
+ #endif
uint parts= 0;
for (field_ptr= table->field; *field_ptr; field_ptr++)
{
- Column_statistics* col_stats= (*field_ptr)->read_stats;
- if (bitmap_is_set(used_fields, (*field_ptr)->field_index)
- && col_stats && !col_stats->no_stat_values_provided()
- && !((*field_ptr)->type() == MYSQL_TYPE_GEOMETRY))
+ Field *field= *field_ptr;
+ if (bitmap_is_set(used_fields, field->field_index) &&
+ is_eits_usable(field))
parts++;
}
@@ -3347,12 +3350,10 @@ bool create_key_parts_for_pseudo_indexes(RANGE_OPT_PARAM *param,
uint max_key_len= 0;
for (field_ptr= table->field; *field_ptr; field_ptr++)
{
- if (bitmap_is_set(used_fields, (*field_ptr)->field_index))
+ Field *field= *field_ptr;
+ if (bitmap_is_set(used_fields, field->field_index))
{
- Field *field= *field_ptr;
- Column_statistics* col_stats= field->read_stats;
- if (field->type() == MYSQL_TYPE_GEOMETRY ||
- !col_stats || col_stats->no_stat_values_provided())
+ if (!is_eits_usable(field))
continue;
uint16 store_length;
diff --git a/sql/partition_info.cc b/sql/partition_info.cc
index 52bda560c1c..9d7d0d92686 100644
--- a/sql/partition_info.cc
+++ b/sql/partition_info.cc
@@ -3164,6 +3164,23 @@ void partition_info::print_debug(const char *str, uint *value)
DBUG_PRINT("info", ("parser: %s", str));
DBUG_VOID_RETURN;
}
+
+bool partition_info::field_in_partition_expr(Field *field) const
+{
+ uint i;
+ for (i= 0; i < num_part_fields; i++)
+ {
+ if (field->eq(part_field_array[i]))
+ return TRUE;
+ }
+ for (i= 0; i < num_subpart_fields; i++)
+ {
+ if (field->eq(subpart_field_array[i]))
+ return TRUE;
+ }
+ return FALSE;
+}
+
#else /* WITH_PARTITION_STORAGE_ENGINE */
/*
For builds without partitioning we need to define these functions
diff --git a/sql/partition_info.h b/sql/partition_info.h
index f250c5496bf..10b8954ace7 100644
--- a/sql/partition_info.h
+++ b/sql/partition_info.h
@@ -384,6 +384,7 @@ class partition_info : public Sql_alloc
bool is_full_part_expr_in_fields(List<Item> &fields);
public:
bool has_unique_name(partition_element *element);
+ bool field_in_partition_expr(Field *field) const;
};
uint32 get_next_partition_id_range(struct st_partition_iter* part_iter);
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index cb75a5c2176..0c359a29431 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -30,6 +30,7 @@
#include "opt_range.h"
#include "my_atomic.h"
#include "sql_show.h"
+#include "sql_partition.h"
/*
The system variable 'use_stat_tables' can take one of the
@@ -3589,6 +3590,22 @@ void set_statistics_for_table(THD *thd, TABLE *table)
(use_stat_table_mode <= COMPLEMENTARY ||
!table->stats_is_read || read_stats->cardinality_is_null) ?
table->file->stats.records : read_stats->cardinality;
+
+ /*
+ For partitioned table, EITS statistics is based on data from all partitions.
+
+ On the other hand, Partition Pruning figures which partitions will be
+ accessed and then computes the estimate of rows in used_partitions.
+
+ Use the estimate from Partition Pruning as it is typically more precise.
+ Ideally, EITS should provide per-partition statistics but this is not
+ implemented currently.
+ */
+ #ifdef WITH_PARTITION_STORAGE_ENGINE
+ if (table->part_info)
+ table->used_stat_records= table->file->stats.records;
+ #endif
+
KEY *key_info, *key_info_end;
for (key_info= table->key_info, key_info_end= key_info+table->s->keys;
key_info < key_info_end; key_info++)
@@ -3904,3 +3921,31 @@ bool is_stat_table(const char *db, const char *table)
}
return false;
}
+
+/*
+ Check wheter we can use EITS statistics for a field or not
+
+ TRUE : Use EITS for the columns
+ FALSE: Otherwise
+*/
+
+bool is_eits_usable(Field *field)
+{
+ partition_info *part_info= NULL;
+ #ifdef WITH_PARTITION_STORAGE_ENGINE
+ part_info= field->table->part_info;
+ #endif
+ /*
+ (1): checks if we have EITS statistics for a particular column
+ (2): Don't use EITS for GEOMETRY columns
+ (3): Disabling reading EITS statistics for columns involved in the
+ partition list of a table. We assume the selecticivity for
+ such columns would be handled during partition pruning.
+ */
+ Column_statistics* col_stats= field->read_stats;
+ if (col_stats && !col_stats->no_stat_values_provided() && //(1)
+ field->type() != MYSQL_TYPE_GEOMETRY && //(2)
+ (!part_info || !part_info->field_in_partition_expr(field))) //(3)
+ return TRUE;
+ return FALSE;
+}
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index f28d56e4a69..a891bef3164 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -109,6 +109,7 @@ double get_column_range_cardinality(Field *field,
key_range *max_endp,
uint range_flag);
bool is_stat_table(const char *db, const char *table);
+bool is_eits_usable(Field* field);
class Histogram
{
2
1
06 Dec '18
revision-id: 2da93752430d0dd07344c9d62f2468bba8962b77 (mariadb-10.3.6-266-g2da93752430)
parent(s): e0739064450f2c2be6d5de1d799582121747dd39
author: Varun Gupta
committer: Varun Gupta
timestamp: 2018-12-05 19:56:23 +0530
message:
MDEV-17255: New optimizer defaults and ANALYZE TABLE
Added to new values to the server variable use_stat_tables.
The values are COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES.
Both these values don't allow to collect EITS for queries like
analyze table t1;
To collect EITS we would need to use the syntax with persistent like
analyze table t1 persistent for columns (col1,col2...) index (idx1, idx2...) / ALL
Changing the default value from NEVER to PREFERABLY_FOR_QUERIES.
---
mysql-test/include/default_mysqld.cnf | 1 +
mysql-test/main/stat_tables.result | 59 +++++++++++++++++++++++++++++++
mysql-test/main/stat_tables.test | 43 ++++++++++++++++++++++
mysql-test/main/stat_tables_innodb.result | 59 +++++++++++++++++++++++++++++++
sql/sql_admin.cc | 2 +-
sql/sql_statistics.cc | 5 ++-
sql/sql_statistics.h | 27 ++++++++++++++
sql/sys_vars.cc | 5 +--
8 files changed, 195 insertions(+), 6 deletions(-)
diff --git a/mysql-test/include/default_mysqld.cnf b/mysql-test/include/default_mysqld.cnf
index 69a2b58288b..5ba3bdeb92c 100644
--- a/mysql-test/include/default_mysqld.cnf
+++ b/mysql-test/include/default_mysqld.cnf
@@ -107,6 +107,7 @@ loose-performance-schema-consumer-thread-instrumentation=ON
binlog-direct-non-transactional-updates
default-storage-engine=myisam
+use-stat-tables=preferably_for_queries
loose-ssl-ca=(a)ENV.MYSQL_TEST_DIR/std_data/cacert.pem
loose-ssl-cert=(a)ENV.MYSQL_TEST_DIR/std_data/server-cert.pem
diff --git a/mysql-test/main/stat_tables.result b/mysql-test/main/stat_tables.result
index 308529ece47..67bf3f9237d 100644
--- a/mysql-test/main/stat_tables.result
+++ b/mysql-test/main/stat_tables.result
@@ -605,4 +605,63 @@ SELECT MAX(pk) FROM t1;
MAX(pk)
NULL
DROP TABLE t1;
+#
+# MDEV-17255: New optimizer defaults and ANALYZE TABLE
+#
+set @save_use_stat_tables= @@use_stat_tables;
+create table t1 (a int, b int);
+insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10);
+#
+# with use_stat_tables= PREFERABLY_FOR_QUERIES
+# analyze table t1 will not collect statistics
+#
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+select * from mysql.column_stats;
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
+test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
+analyze
+select * from t1 where a = 1 and b=3;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
+#
+# with use_stat_tables= PREFERABLY_FOR_QUERIES
+# analyze table t1 will collect statistics if we use PERSISTENT
+# for columns, indexes or everything
+#
+analyze table t1 persistent for columns (a) indexes ();
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status Table is already up to date
+select * from mysql.column_stats;
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
+test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
+# filtered shows that we used the data from stat tables
+analyze
+select * from t1 where a = 1 and b=3;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
+#
+# with use_stat_tables= PREFERABLY
+# analyze table t1 will collect statistics
+#
+set @@use_stat_tables=PREFERABLY;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status Table is already up to date
+select * from mysql.column_stats;
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
+test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
+# filtered shows that we used the data from stat tables
+analyze
+select * from t1 where a=1 and b=3;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
+drop table t1;
set use_stat_tables=@save_use_stat_tables;
diff --git a/mysql-test/main/stat_tables.test b/mysql-test/main/stat_tables.test
index 19bc0fa2f46..8616f6386e1 100644
--- a/mysql-test/main/stat_tables.test
+++ b/mysql-test/main/stat_tables.test
@@ -384,4 +384,47 @@ SELECT MAX(pk) FROM t1;
DROP TABLE t1;
+--echo #
+--echo # MDEV-17255: New optimizer defaults and ANALYZE TABLE
+--echo #
+
+set @save_use_stat_tables= @@use_stat_tables;
+create table t1 (a int, b int);
+insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10);
+
+--echo #
+--echo # with use_stat_tables= PREFERABLY_FOR_QUERIES
+--echo # analyze table t1 will not collect statistics
+--echo #
+
+analyze table t1;
+select * from mysql.column_stats;
+analyze
+select * from t1 where a = 1 and b=3;
+
+--echo #
+--echo # with use_stat_tables= PREFERABLY_FOR_QUERIES
+--echo # analyze table t1 will collect statistics if we use PERSISTENT
+--echo # for columns, indexes or everything
+--echo #
+
+analyze table t1 persistent for columns (a) indexes ();
+select * from mysql.column_stats;
+--echo # filtered shows that we used the data from stat tables
+analyze
+select * from t1 where a = 1 and b=3;
+
+--echo #
+--echo # with use_stat_tables= PREFERABLY
+--echo # analyze table t1 will collect statistics
+--echo #
+
+set @@use_stat_tables=PREFERABLY;
+analyze table t1;
+select * from mysql.column_stats;
+--echo # filtered shows that we used the data from stat tables
+analyze
+select * from t1 where a=1 and b=3;
+drop table t1;
+
set use_stat_tables=@save_use_stat_tables;
diff --git a/mysql-test/main/stat_tables_innodb.result b/mysql-test/main/stat_tables_innodb.result
index 8198e94dc10..4bb8f34712c 100644
--- a/mysql-test/main/stat_tables_innodb.result
+++ b/mysql-test/main/stat_tables_innodb.result
@@ -632,6 +632,65 @@ SELECT MAX(pk) FROM t1;
MAX(pk)
NULL
DROP TABLE t1;
+#
+# MDEV-17255: New optimizer defaults and ANALYZE TABLE
+#
+set @save_use_stat_tables= @@use_stat_tables;
+create table t1 (a int, b int);
+insert into t1(a,b) values (1,2),(1,3),(1,4),(1,5),(2,6),(2,7),(3,8),(3,9),(3,9),(4,10);
+#
+# with use_stat_tables= PREFERABLY_FOR_QUERIES
+# analyze table t1 will not collect statistics
+#
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+select * from mysql.column_stats;
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
+test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
+analyze
+select * from t1 where a = 1 and b=3;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
+#
+# with use_stat_tables= PREFERABLY_FOR_QUERIES
+# analyze table t1 will collect statistics if we use PERSISTENT
+# for columns, indexes or everything
+#
+analyze table t1 persistent for columns (a) indexes ();
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+select * from mysql.column_stats;
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
+test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
+# filtered shows that we used the data from stat tables
+analyze
+select * from t1 where a = 1 and b=3;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
+#
+# with use_stat_tables= PREFERABLY
+# analyze table t1 will collect statistics
+#
+set @@use_stat_tables=PREFERABLY;
+analyze table t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+select * from mysql.column_stats;
+db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
+test t1 a 1 4 0.0000 4.0000 2.5000 0 NULL NULL
+test t1 b 2 10 0.0000 4.0000 1.1111 0 NULL NULL
+# filtered shows that we used the data from stat tables
+analyze
+select * from t1 where a=1 and b=3;
+id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 10 10.00 2.78 10.00 Using where
+drop table t1;
set use_stat_tables=@save_use_stat_tables;
set optimizer_switch=@save_optimizer_switch_for_stat_tables_test;
SET SESSION STORAGE_ENGINE=DEFAULT;
diff --git a/sql/sql_admin.cc b/sql/sql_admin.cc
index d0d959de8f9..b39103e382a 100644
--- a/sql/sql_admin.cc
+++ b/sql/sql_admin.cc
@@ -767,7 +767,7 @@ static bool mysql_admin_table(THD* thd, TABLE_LIST* tables,
}
collect_eis=
(table->table->s->table_category == TABLE_CATEGORY_USER &&
- (get_use_stat_tables_mode(thd) > NEVER ||
+ (check_eits_collection_allowed(thd) ||
lex->with_persistent_for_clause));
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 04806f07b3b..8c88f7f927f 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -3720,9 +3720,8 @@ void set_statistics_for_table(THD *thd, TABLE *table)
{
TABLE_STATISTICS_CB *stats_cb= &table->s->stats_cb;
Table_statistics *read_stats= stats_cb->table_stats;
- Use_stat_tables_mode use_stat_table_mode= get_use_stat_tables_mode(thd);
table->used_stat_records=
- (use_stat_table_mode <= COMPLEMENTARY ||
+ (!check_eits_preferred(thd) ||
!table->stats_is_read || read_stats->cardinality_is_null) ?
table->file->stats.records : read_stats->cardinality;
KEY *key_info, *key_info_end;
@@ -3730,7 +3729,7 @@ void set_statistics_for_table(THD *thd, TABLE *table)
key_info < key_info_end; key_info++)
{
key_info->is_statistics_from_stat_tables=
- (use_stat_table_mode > COMPLEMENTARY &&
+ (check_eits_preferred(thd) &&
table->stats_is_read &&
key_info->read_stats->avg_frequency_is_inited() &&
key_info->read_stats->get_avg_frequency(0) > 0.5);
diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h
index 39cddf95188..8439ac8db53 100644
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@@ -16,12 +16,26 @@
#ifndef SQL_STATISTICS_H
#define SQL_STATISTICS_H
+/*
+ For COMPLEMENTARY_FOR_QUERIES and PREFERABLY_FOR_QUERIES they are
+ similar to the COMPLEMENTARY and PREFERABLY respectively except that
+ with these values we would not be collecting EITS for queries like
+ ANALYZE TABLE t1;
+ To collect EITS with these values, we have to use PERSISITENT FOR
+ analyze table t1 persistent for
+ columns (col1,col2...) index (idx1, idx2...)
+ or
+ analyze table t1 persistent for all
+*/
+
typedef
enum enum_use_stat_tables_mode
{
NEVER,
COMPLEMENTARY,
PREFERABLY,
+ COMPLEMENTARY_FOR_QUERIES,
+ PREFERABLY_FOR_QUERIES
} Use_stat_tables_mode;
typedef
@@ -87,6 +101,19 @@ Use_stat_tables_mode get_use_stat_tables_mode(THD *thd)
{
return (Use_stat_tables_mode) (thd->variables.use_stat_tables);
}
+inline
+bool check_eits_collection_allowed(THD *thd)
+{
+ return (get_use_stat_tables_mode(thd) == COMPLEMENTARY ||
+ get_use_stat_tables_mode(thd) == PREFERABLY);
+}
+
+inline
+bool check_eits_preferred(THD *thd)
+{
+ return (get_use_stat_tables_mode(thd) == PREFERABLY ||
+ get_use_stat_tables_mode(thd) == PREFERABLY_FOR_QUERIES);
+}
int read_statistics_for_tables_if_needed(THD *thd, TABLE_LIST *tables);
int collect_statistics_for_table(THD *thd, TABLE *table);
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index 420e7feabde..b925de34db4 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -5842,12 +5842,13 @@ static Sys_var_ulong Sys_progress_report_time(
VALID_RANGE(0, UINT_MAX), DEFAULT(5), BLOCK_SIZE(1));
const char *use_stat_tables_modes[] =
- {"NEVER", "COMPLEMENTARY", "PREFERABLY", 0};
+ {"NEVER", "COMPLEMENTARY", "PREFERABLY",
+ "COMPLEMENTARY_FOR_QUERIES", "PREFERABLY_FOR_QUERIES", 0};
static Sys_var_enum Sys_optimizer_use_stat_tables(
"use_stat_tables",
"Specifies how to use system statistics tables",
SESSION_VAR(use_stat_tables), CMD_LINE(REQUIRED_ARG),
- use_stat_tables_modes, DEFAULT(2));
+ use_stat_tables_modes, DEFAULT(4));
static Sys_var_ulong Sys_histogram_size(
"histogram_size",
2
1