[Commits] 5a733ab: MDEV-19179 Regression: SELECT ... UNION ... with inconsistent column names fails
by IgorBabaev 18 Nov '20
by IgorBabaev 18 Nov '20
18 Nov '20
revision-id: 5a733abaa2780164ddbdb9a26d8eeaba47fb3642 (mariadb-10.2.31-580-g5a733ab)
parent(s): 984a06db2ce2b2e3c7c5028245905417f2141cd7
author: Igor Babaev
committer: Igor Babaev
timestamp: 2020-11-18 12:20:00 -0800
message:
MDEV-19179 Regression: SELECT ... UNION ... with inconsistent column names fails
A bogus error message was issued when a condition was pushed into a
materialized derived table or view specified as union of selects with
aggregation when the corresponding columns of the selects had different
names. This happened because the expression pushed into having clauses of
the selects was adjusted for the names of the first select of the union.
The easiest solution was to rename the columns of the other selects to be
name compatible with the columns of the first select.
---
mysql-test/r/derived_cond_pushdown.result | 41 +++++++++++++++++++++++++++++++
mysql-test/t/derived_cond_pushdown.test | 28 +++++++++++++++++++++
sql/item.h | 6 +++++
sql/sql_derived.cc | 22 +++++++++++++++--
4 files changed, 95 insertions(+), 2 deletions(-)
diff --git a/mysql-test/r/derived_cond_pushdown.result b/mysql-test/r/derived_cond_pushdown.result
index d4e8fef..25237aa 100644
--- a/mysql-test/r/derived_cond_pushdown.result
+++ b/mysql-test/r/derived_cond_pushdown.result
@@ -10593,4 +10593,45 @@ a
abc
DROP VIEW v1;
DROP TABLE t1;
+#
+# MDEV-19179: pushdown into UNION of aggregation selects whose
+# corresponding columns have different names
+#
+create table t1 (a int);
+insert into t1 values (3), (7), (1);
+select *
+from (select min(a) as x from t1 union all select max(a) as y from t1) t
+where x>0;
+x
+1
+7
+explain extended select *
+from (select min(a) as x from t1 union all select max(a) as y from t1) t
+where x>0;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 6 100.00 Using where
+2 DERIVED t1 ALL NULL NULL NULL NULL 3 100.00
+3 UNION t1 ALL NULL NULL NULL NULL 3 100.00
+Warnings:
+Note 1003 select `t`.`x` AS `x` from (select min(`test`.`t1`.`a`) AS `x` from `test`.`t1` having `x` > 0 union all select max(`test`.`t1`.`a`) AS `x` from `test`.`t1` having `x` > 0) `t` where `t`.`x` > 0
+prepare stmt from "select *
+from (select min(a) as x from t1 union all select max(a) as y from t1) t
+where x>0";
+execute stmt;
+x
+1
+7
+execute stmt;
+x
+1
+7
+deallocate prepare stmt;
+create view v1(m) as
+select min(a) as x from t1 union all select max(a) as y from t1;
+select * from v1 where m > 0;
+m
+1
+7
+drop view v1;
+drop table t1;
# End of 10.2 tests
diff --git a/mysql-test/t/derived_cond_pushdown.test b/mysql-test/t/derived_cond_pushdown.test
index a7df65f..31b4904 100644
--- a/mysql-test/t/derived_cond_pushdown.test
+++ b/mysql-test/t/derived_cond_pushdown.test
@@ -2184,4 +2184,32 @@ SELECT * FROM v1 WHERE IF( a REGEXP 'def', 'foo', a ) IN ('abc', 'foobar');
DROP VIEW v1;
DROP TABLE t1;
+--echo #
+--echo # MDEV-19179: pushdown into UNION of aggregation selects whose
+--echo # corresponding columns have different names
+--echo #
+
+create table t1 (a int);
+insert into t1 values (3), (7), (1);
+
+let $q=
+select *
+from (select min(a) as x from t1 union all select max(a) as y from t1) t
+where x>0;
+
+eval $q;
+eval explain extended $q;
+
+eval prepare stmt from "$q";
+execute stmt;
+execute stmt;
+deallocate prepare stmt;
+
+create view v1(m) as
+select min(a) as x from t1 union all select max(a) as y from t1;
+select * from v1 where m > 0;
+
+drop view v1;
+drop table t1;
+
--echo # End of 10.2 tests
diff --git a/sql/item.h b/sql/item.h
index a49f9e8..ed20074 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -818,6 +818,12 @@ class Item: public Value_source,
void set_name_for_rollback(THD *thd, const char *str, uint length,
CHARSET_INFO *cs);
void rename(char *new_name);
+ void share_name_with(Item *item)
+ {
+ name= item->name;
+ name_length= item->name_length;
+ is_autogenerated_name= item->is_autogenerated_name;
+ }
void init_make_field(Send_field *tmp_field,enum enum_field_types type);
virtual void cleanup();
virtual void make_field(THD *thd, Send_field *field);
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index 39499e6..f1bd2a5 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -1199,7 +1199,8 @@ bool pushdown_cond_for_derived(THD *thd, Item *cond, TABLE_LIST *derived)
DBUG_RETURN(false);
st_select_lex_unit *unit= derived->get_unit();
- st_select_lex *sl= unit->first_select();
+ st_select_lex *first_sl= unit->first_select();
+ st_select_lex *sl= first_sl;
if (derived->prohibit_cond_pushdown)
DBUG_RETURN(false);
@@ -1311,7 +1312,24 @@ bool pushdown_cond_for_derived(THD *thd, Item *cond, TABLE_LIST *derived)
if (!extracted_cond_copy)
continue;
}
-
+
+ /*
+ Rename the columns of all non-first selects of a union to be compatible
+ by names with the columns of the first select. It will allow to use copies
+ of the same expression pushed into having clauses of different selects.
+ */
+ if (sl != first_sl)
+ {
+ DBUG_ASSERT(sl->item_list.elements == first_sl->item_list.elements);
+ List_iterator_fast<Item> it(sl->item_list);
+ List_iterator_fast<Item> nm_it(sl->master_unit()->types);
+ Item * item;
+ while((item= it++))
+ {
+ item->share_name_with(nm_it++);
+ }
+ }
+
/*
Transform the references to the 'derived' columns from the condition
pushed into the having clause of sl to make them usable in the new context
1
0
17 Nov '20
revision-id: d809cdf8e4a68fd3af147d99f9308fbf789d0b86 (mariadb-10.2.31-581-gd809cdf)
parent(s): 190e8a4c2aeb417b405756b193e135c542d46b34
author: Igor Babaev
committer: Igor Babaev
timestamp: 2020-11-17 14:28:30 -0800
message:
MDEV-24220 Server crash in base_list_iterator::next or
in TABLE_LIST::is_recursive_with_tables
After the patch for MDEV-23619 the code of st_select_lex::cleanup started
using the list st_select_lex::leaf_tables. This list is built for any
query with FROM clause in the function setup_tables(). If such query is
used in a stored procedure it must be ensured that the list is empty
before each new call of the procedure. Otherwise if the first call of
the procedure is successful while the second call reports an error before
the setup_tables() is invoked then list st_select_lex::leaf_tables would
point to a piece of memory that has been already freed.
Approved by Oleksandr Byelkin <sanja(a)mariadb.com>
---
mysql-test/r/sp.result | 20 ++++++++++++++++++++
mysql-test/t/sp.test | 25 +++++++++++++++++++++++++
sql/sql_union.cc | 1 +
3 files changed, 46 insertions(+)
diff --git a/mysql-test/r/sp.result b/mysql-test/r/sp.result
index c4d3779..b679f3f 100644
--- a/mysql-test/r/sp.result
+++ b/mysql-test/r/sp.result
@@ -8467,3 +8467,23 @@ $$
ERROR 22007: Incorrect integer value: 'y' for column ``.``.`a` at row 1
DROP TABLE t1;
SET sql_mode=DEFAULT;
+#
+# MDEV-24220: error when opening a table for the second call of SP
+#
+CREATE TABLE t1 (a INT, b INT);
+INSERT INTO t1 VALUES (1,1),(2,2);
+CREATE VIEW v1 AS SELECT MAX(a) as f FROM t1;
+CREATE PROCEDURE p1()
+BEGIN
+SELECT * FROM v1;
+END $
+CALL p1;
+f
+2
+ALTER TABLE t1 DROP a;
+CALL p1;
+ERROR HY000: View 'test.v1' references invalid table(s) or column(s) or function(s) or definer/invoker of view lack rights to use them
+DROP PROCEDURE p1;
+DROP VIEW v1;
+DROP TABLE t1;
+#End of 10.2 tests
diff --git a/mysql-test/t/sp.test b/mysql-test/t/sp.test
index 99b8430..f13b3fb 100644
--- a/mysql-test/t/sp.test
+++ b/mysql-test/t/sp.test
@@ -10001,3 +10001,28 @@ $$
DELIMITER ;$$
DROP TABLE t1;
SET sql_mode=DEFAULT;
+
+--echo #
+--echo # MDEV-24220: error when opening a table for the second call of SP
+--echo #
+
+CREATE TABLE t1 (a INT, b INT);
+INSERT INTO t1 VALUES (1,1),(2,2);
+CREATE VIEW v1 AS SELECT MAX(a) as f FROM t1;
+--delimiter $
+CREATE PROCEDURE p1()
+BEGIN
+ SELECT * FROM v1;
+END $
+--delimiter ;
+
+CALL p1;
+ALTER TABLE t1 DROP a;
+-- error ER_VIEW_INVALID
+CALL p1;
+
+DROP PROCEDURE p1;
+DROP VIEW v1;
+DROP TABLE t1;
+
+--echo #End of 10.2 tests
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index 9a16237..7716f79 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -1568,6 +1568,7 @@ bool st_select_lex::cleanup()
delete join;
join= 0;
}
+ leaf_tables.empty();
for (SELECT_LEX_UNIT *lex_unit= first_inner_unit(); lex_unit ;
lex_unit= lex_unit->next_unit())
{
1
0
17 Nov '20
revision-id: 4d1644216fde19c7198b9c15110b513dac0f5a72 (mariadb-10.2.31-581-g4d16442)
parent(s): 190e8a4c2aeb417b405756b193e135c542d46b34
author: Igor Babaev
committer: Igor Babaev
timestamp: 2020-11-16 19:59:50 -0800
message:
MDEV-24220 Server crash in base_list_iterator::next or
in TABLE_LIST::is_recursive_with_tables
After the patch for MDEV-23619 the code of st_select_lex::cleanup started
using the list st_select_lex::leaf_tables. This list is built for any
query with FROM clause in the function setup_tables(). If such query is
used in a stored procedure it must be ensured that the list is empty
before each new call of the procedure. Otherwise if the first call of
the procedure is successful while the second call reports an error before
the setup_tables() is invoked then list st_select_lex::leaf_tables would
point to a piece of memory that has been already freed.
---
mysql-test/r/sp.result | 20 ++++++++++++++++++++
mysql-test/t/sp.test | 25 +++++++++++++++++++++++++
sql/sql_union.cc | 1 +
3 files changed, 46 insertions(+)
diff --git a/mysql-test/r/sp.result b/mysql-test/r/sp.result
index c4d3779..b679f3f 100644
--- a/mysql-test/r/sp.result
+++ b/mysql-test/r/sp.result
@@ -8467,3 +8467,23 @@ $$
ERROR 22007: Incorrect integer value: 'y' for column ``.``.`a` at row 1
DROP TABLE t1;
SET sql_mode=DEFAULT;
+#
+# MDEV-24220: error when opening a table for the second call of SP
+#
+CREATE TABLE t1 (a INT, b INT);
+INSERT INTO t1 VALUES (1,1),(2,2);
+CREATE VIEW v1 AS SELECT MAX(a) as f FROM t1;
+CREATE PROCEDURE p1()
+BEGIN
+SELECT * FROM v1;
+END $
+CALL p1;
+f
+2
+ALTER TABLE t1 DROP a;
+CALL p1;
+ERROR HY000: View 'test.v1' references invalid table(s) or column(s) or function(s) or definer/invoker of view lack rights to use them
+DROP PROCEDURE p1;
+DROP VIEW v1;
+DROP TABLE t1;
+#End of 10.2 tests
diff --git a/mysql-test/t/sp.test b/mysql-test/t/sp.test
index 99b8430..f13b3fb 100644
--- a/mysql-test/t/sp.test
+++ b/mysql-test/t/sp.test
@@ -10001,3 +10001,28 @@ $$
DELIMITER ;$$
DROP TABLE t1;
SET sql_mode=DEFAULT;
+
+--echo #
+--echo # MDEV-24220: error when opening a table for the second call of SP
+--echo #
+
+CREATE TABLE t1 (a INT, b INT);
+INSERT INTO t1 VALUES (1,1),(2,2);
+CREATE VIEW v1 AS SELECT MAX(a) as f FROM t1;
+--delimiter $
+CREATE PROCEDURE p1()
+BEGIN
+ SELECT * FROM v1;
+END $
+--delimiter ;
+
+CALL p1;
+ALTER TABLE t1 DROP a;
+-- error ER_VIEW_INVALID
+CALL p1;
+
+DROP PROCEDURE p1;
+DROP VIEW v1;
+DROP TABLE t1;
+
+--echo #End of 10.2 tests
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index 9a16237..7716f79 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -1568,6 +1568,7 @@ bool st_select_lex::cleanup()
delete join;
join= 0;
}
+ leaf_tables.empty();
for (SELECT_LEX_UNIT *lex_unit= first_inner_unit(); lex_unit ;
lex_unit= lex_unit->next_unit())
{
1
0
revision-id: 840f2d390209dd8ae553171c02909230ac15f03c (v5.8-3048-g840f2d390)
parent(s): 00751e4292e55c1604b28b7b93fe7a538fa05f29
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2020-11-17 00:06:38 +0300
message:
Range Locking - the interface part
Definitions and changes in the generic code (RangeTree-based implementation is
itself not included).
- DeadlockInfoBuffer has been changed into a template (as it is reused by
RangeDeadlockInfo)
- PointLockManagerTest is split into AnyLockManagerTest (has tests which
can be used for any lock manager) and PointLockManagerTest (tests for
this specific lock manager)
- Added #ifndef ROCKSDB_LITE: as LockTracker now depends on Endpoint
which is defined in transaction.h under #ifndef ROCKSDB_LITE
---
include/rocksdb/utilities/transaction.h | 82 +++++-
include/rocksdb/utilities/transaction_db.h | 100 +++++++-
utilities/transactions/lock/lock_manager.cc | 14 +-
utilities/transactions/lock/lock_manager.h | 4 +-
utilities/transactions/lock/lock_tracker.h | 10 +-
.../transactions/lock/point/point_lock_manager.cc | 58 -----
.../transactions/lock/point/point_lock_manager.h | 76 +++++-
.../lock/point/point_lock_manager_test.cc | 245 ++----------------
.../lock/point/point_lock_manager_test.h | 276 +++++++++++++++++++++
.../transactions/lock/point/point_lock_tracker.cc | 4 +
.../transactions/lock/point/point_lock_tracker.h | 2 +
.../transactions/lock/range/range_lock_manager.h | 30 +++
utilities/transactions/pessimistic_transaction.cc | 48 +++-
utilities/transactions/pessimistic_transaction.h | 4 +
.../transactions/pessimistic_transaction_db.cc | 8 +
.../transactions/pessimistic_transaction_db.h | 5 +-
16 files changed, 656 insertions(+), 310 deletions(-)
diff --git a/include/rocksdb/utilities/transaction.h b/include/rocksdb/utilities/transaction.h
index b553100f3..2f4804255 100644
--- a/include/rocksdb/utilities/transaction.h
+++ b/include/rocksdb/utilities/transaction.h
@@ -24,9 +24,81 @@ using TransactionName = std::string;
using TransactionID = uint64_t;
-// An endpoint for a range of keys.
+/*
+ class Endpoint allows to define prefix ranges.
+
+ Prefix ranges are introduced below.
+
+ == Basic Ranges ==
+ Let's start from basic ranges. Key Comparator defines ordering of rowkeys.
+ Then, one can specify finite closed ranges by just providing rowkeys of their
+ endpoints:
+
+ lower_endpoint <= X <= upper_endpoint
+
+ However our goal is to provide a richer set of endpoints. Read on.
+
+ == Lexicographic ordering ==
+ A lexicographic (or dictionary) ordering satisfies these criteria: If there
+ are two keys in form
+ key_a = {prefix_a, suffix_a}
+ key_b = {prefix_b, suffix_b}
+ and
+ prefix_a < prefix_b
+ then
+ key_a < key_b.
+
+ == Prefix ranges ==
+ With lexicographic ordering, one may want to define ranges in form
+
+ "prefix is $PREFIX"
+
+ which translates to a range in form
+
+ {$PREFIX, -infinity} < X < {$PREFIX, +infinity}
+
+ where -infinity will compare less than any possible suffix, and +infinity
+ will compare as greater than any possible suffix.
+
+ class Endpoint allows to define these kind of rangtes.
+
+ == Notes ==
+ BytewiseComparator and ReverseBytewiseComparator produce lexicographic
+ ordering.
+
+ The row comparison function is able to compare key prefixes. If the data
+ domain includes keys A and B, then the comparison function is able to compare
+ equal-length prefixes:
+
+ min_len= min(byte_length(A), byte_length(B));
+ cmp(Slice(A, min_len), Slice(B, min_len)); // this call is valid
+
+ == Other options ==
+ As far as MyRocks is concerned, the alternative to prefix ranges would be to
+ support both open (non-inclusive) and closed (inclusive) range endpoints.
+*/
+
class Endpoint {
- // TODO
+ public:
+ Slice slice;
+
+ /*
+ true : the key has a "+infinity" suffix. A suffix that would compare as
+ greater than any other suffix
+ false : otherwise
+ */
+ bool inf_suffix;
+
+ Endpoint(const Slice& slice_arg, bool inf_suffix_arg = false)
+ : slice(slice_arg), inf_suffix(inf_suffix_arg) {}
+
+ Endpoint(const char* s, bool inf_suffix_arg = false)
+ : slice(s), inf_suffix(inf_suffix_arg) {}
+
+ Endpoint(const char* s, size_t size, bool inf_suffix_arg = false)
+ : slice(s, size), inf_suffix(inf_suffix_arg) {}
+
+ Endpoint() : inf_suffix(false) {}
};
// Provides notification to the caller of SetSnapshotOnNextOperation when
@@ -282,6 +354,12 @@ class Transaction {
}
}
+ // Get a range lock on [start_endpoint; end_endpoint].
+ virtual Status GetRangeLock(ColumnFamilyHandle*, const Endpoint&,
+ const Endpoint&) {
+ return Status::NotSupported();
+ }
+
virtual Status GetForUpdate(const ReadOptions& options, const Slice& key,
std::string* value, bool exclusive = true,
const bool do_validate = true) = 0;
diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h
index 2e1a0a171..b9daf84a3 100644
--- a/include/rocksdb/utilities/transaction_db.h
+++ b/include/rocksdb/utilities/transaction_db.h
@@ -31,6 +31,98 @@ enum TxnDBWritePolicy {
const uint32_t kInitialMaxDeadlocks = 5;
+class LockManager;
+struct RangeLockInfo;
+
+// A lock manager handle
+// The workflow is as follows:
+// * Use a factory method (like NewRangeLockManager()) to create a lock
+// manager and get its handle.
+// * A Handle for a particular kind of lock manager will have extra
+// methods and parameters to control the lock manager
+// * Pass the handle to RocksDB in TransactionDBOptions::lock_mgr_handle. It
+// will be used to perform locking.
+class LockManagerHandle {
+ public:
+ // PessimisticTransactionDB will call this to get the Lock Manager it's going
+ // to use.
+ virtual LockManager* getLockManager() = 0;
+
+ virtual ~LockManagerHandle() {}
+};
+
+// Same as class Endpoint, but use std::string to manage the buffer allocation
+struct EndpointWithString {
+ std::string slice;
+ bool inf_suffix;
+};
+
+struct RangeDeadlockInfo {
+ TransactionID m_txn_id;
+ uint32_t m_cf_id;
+ bool m_exclusive;
+
+ EndpointWithString m_start;
+ EndpointWithString m_end;
+};
+
+struct RangeDeadlockPath {
+ std::vector<RangeDeadlockInfo> path;
+ bool limit_exceeded;
+ int64_t deadlock_time;
+
+ explicit RangeDeadlockPath(std::vector<RangeDeadlockInfo> path_entry,
+ const int64_t& dl_time)
+ : path(path_entry), limit_exceeded(false), deadlock_time(dl_time) {}
+
+ // empty path, limit exceeded constructor and default constructor
+ explicit RangeDeadlockPath(const int64_t& dl_time = 0, bool limit = false)
+ : path(0), limit_exceeded(limit), deadlock_time(dl_time) {}
+
+ bool empty() { return path.empty() && !limit_exceeded; }
+};
+
+// A handle to control RangeLockManager (Range-based lock manager) from outside
+// RocksDB
+class RangeLockManagerHandle : public LockManagerHandle {
+ public:
+ // Total amount of lock memory to use (per column family)
+ virtual int SetMaxLockMemory(size_t max_lock_memory) = 0;
+ virtual size_t GetMaxLockMemory() = 0;
+
+ using RangeLockStatus =
+ std::unordered_multimap<ColumnFamilyId, RangeLockInfo>;
+
+ virtual RangeLockStatus GetRangeLockStatusData() = 0;
+
+ class Counters {
+ public:
+ // Number of times lock escalation was triggered (for all column families)
+ uint64_t escalation_count;
+
+ // How much memory is currently used for locks (total for all column
+ // families)
+ uint64_t current_lock_memory;
+ };
+
+ // Get the current counter values
+ virtual Counters GetStatus() = 0;
+
+ // Functions for range-based Deadlock reporting.
+ virtual std::vector<RangeDeadlockPath> GetRangeDeadlockInfoBuffer() = 0;
+ virtual void SetRangeDeadlockInfoBufferSize(uint32_t target_size) = 0;
+
+ virtual ~RangeLockManagerHandle(){};
+};
+
+// A factory function to create a Range Lock Manager. The created object should
+// be:
+// 1. Passed in TransactionDBOptions::lock_mgr_handle to open the database in
+// range-locking mode
+// 2. Used to control the lock manager when the DB is already open.
+RangeLockManagerHandle* NewRangeLockManager(
+ std::shared_ptr<TransactionDBMutexFactory> mutex_factory);
+
struct TransactionDBOptions {
// Specifies the maximum number of keys that can be locked at the same time
// per column family.
@@ -92,6 +184,10 @@ struct TransactionDBOptions {
// for the special way that myrocks uses this operands.
bool rollback_merge_operands = false;
+ // nullptr means use default lock manager.
+ // Other value means the user provides a custom lock manager.
+ std::shared_ptr<LockManagerHandle> lock_mgr_handle;
+
// If true, the TransactionDB implementation might skip concurrency control
// unless it is overridden by TransactionOptions or
// TransactionDBWriteOptimizations. This can be used in conjuction with
@@ -203,8 +299,8 @@ struct KeyLockInfo {
};
struct RangeLockInfo {
- Endpoint start;
- Endpoint end;
+ EndpointWithString start;
+ EndpointWithString end;
std::vector<TransactionID> ids;
bool exclusive;
};
diff --git a/utilities/transactions/lock/lock_manager.cc b/utilities/transactions/lock/lock_manager.cc
index 200b15390..df16b32ad 100644
--- a/utilities/transactions/lock/lock_manager.cc
+++ b/utilities/transactions/lock/lock_manager.cc
@@ -11,11 +11,17 @@
namespace ROCKSDB_NAMESPACE {
-LockManager* NewLockManager(PessimisticTransactionDB* db,
- const TransactionDBOptions& opt) {
+std::shared_ptr<LockManager> NewLockManager(PessimisticTransactionDB* db,
+ const TransactionDBOptions& opt) {
assert(db);
- // TODO: determine the lock manager implementation based on configuration.
- return new PointLockManager(db, opt);
+ if (opt.lock_mgr_handle) {
+ // A custom lock manager was provided in options
+ auto mgr = opt.lock_mgr_handle->getLockManager();
+ return std::shared_ptr<LockManager>(opt.lock_mgr_handle, mgr);
+ } else {
+ // Use a point lock manager by default
+ return std::shared_ptr<LockManager>(new PointLockManager(db, opt));
+ }
}
} // namespace ROCKSDB_NAMESPACE
diff --git a/utilities/transactions/lock/lock_manager.h b/utilities/transactions/lock/lock_manager.h
index 32b3f9473..a5ce1948c 100644
--- a/utilities/transactions/lock/lock_manager.h
+++ b/utilities/transactions/lock/lock_manager.h
@@ -74,8 +74,8 @@ class LockManager {
// LockManager should always be constructed through this factory method,
// instead of constructing through concrete implementations' constructor.
// Caller owns the returned pointer.
-LockManager* NewLockManager(PessimisticTransactionDB* db,
- const TransactionDBOptions& opt);
+std::shared_ptr<LockManager> NewLockManager(PessimisticTransactionDB* db,
+ const TransactionDBOptions& opt);
} // namespace ROCKSDB_NAMESPACE
diff --git a/utilities/transactions/lock/lock_tracker.h b/utilities/transactions/lock/lock_tracker.h
index 0d3abded7..5fa228a82 100644
--- a/utilities/transactions/lock/lock_tracker.h
+++ b/utilities/transactions/lock/lock_tracker.h
@@ -4,12 +4,14 @@
// (found in the LICENSE.Apache file in the root directory).
#pragma once
+#ifndef ROCKSDB_LITE
#include <memory>
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/status.h"
#include "rocksdb/types.h"
+#include "rocksdb/utilities/transaction_db.h"
namespace ROCKSDB_NAMESPACE {
@@ -29,7 +31,12 @@ struct PointLockRequest {
// Request for locking a range of keys.
struct RangeLockRequest {
- // TODO
+ // The id of the key's column family.
+ ColumnFamilyId column_family_id;
+
+ // The range to be locked
+ Endpoint start_endp;
+ Endpoint end_endp;
};
struct PointLockStatus {
@@ -199,3 +206,4 @@ class LockTrackerFactory {
};
} // namespace ROCKSDB_NAMESPACE
+#endif // ROCKSDB_LITE
diff --git a/utilities/transactions/lock/point/point_lock_manager.cc b/utilities/transactions/lock/point/point_lock_manager.cc
index 0ca6e38f0..0bb7e8a40 100644
--- a/utilities/transactions/lock/point/point_lock_manager.cc
+++ b/utilities/transactions/lock/point/point_lock_manager.cc
@@ -94,64 +94,6 @@ struct LockMap {
size_t GetStripe(const std::string& key) const;
};
-void DeadlockInfoBuffer::AddNewPath(DeadlockPath path) {
- std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
-
- if (paths_buffer_.empty()) {
- return;
- }
-
- paths_buffer_[buffer_idx_] = std::move(path);
- buffer_idx_ = (buffer_idx_ + 1) % paths_buffer_.size();
-}
-
-void DeadlockInfoBuffer::Resize(uint32_t target_size) {
- std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
-
- paths_buffer_ = Normalize();
-
- // Drop the deadlocks that will no longer be needed ater the normalize
- if (target_size < paths_buffer_.size()) {
- paths_buffer_.erase(
- paths_buffer_.begin(),
- paths_buffer_.begin() + (paths_buffer_.size() - target_size));
- buffer_idx_ = 0;
- }
- // Resize the buffer to the target size and restore the buffer's idx
- else {
- auto prev_size = paths_buffer_.size();
- paths_buffer_.resize(target_size);
- buffer_idx_ = (uint32_t)prev_size;
- }
-}
-
-std::vector<DeadlockPath> DeadlockInfoBuffer::Normalize() {
- auto working = paths_buffer_;
-
- if (working.empty()) {
- return working;
- }
-
- // Next write occurs at a nonexistent path's slot
- if (paths_buffer_[buffer_idx_].empty()) {
- working.resize(buffer_idx_);
- } else {
- std::rotate(working.begin(), working.begin() + buffer_idx_, working.end());
- }
-
- return working;
-}
-
-std::vector<DeadlockPath> DeadlockInfoBuffer::PrepareBuffer() {
- std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
-
- // Reversing the normalized vector returns the latest deadlocks first
- auto working = Normalize();
- std::reverse(working.begin(), working.end());
-
- return working;
-}
-
namespace {
void UnrefLockMapsCache(void* ptr) {
// Called when a thread exits or a ThreadLocalPtr gets destroyed.
diff --git a/utilities/transactions/lock/point/point_lock_manager.h b/utilities/transactions/lock/point/point_lock_manager.h
index b22884424..3c541eb3a 100644
--- a/utilities/transactions/lock/point/point_lock_manager.h
+++ b/utilities/transactions/lock/point/point_lock_manager.h
@@ -27,21 +27,79 @@ struct LockInfo;
struct LockMap;
struct LockMapStripe;
-struct DeadlockInfoBuffer {
+template <class Path>
+class DeadlockInfoBufferTempl {
private:
- std::vector<DeadlockPath> paths_buffer_;
+ std::vector<Path> paths_buffer_;
uint32_t buffer_idx_;
std::mutex paths_buffer_mutex_;
- std::vector<DeadlockPath> Normalize();
+
+ std::vector<Path> Normalize() {
+ auto working = paths_buffer_;
+
+ if (working.empty()) {
+ return working;
+ }
+
+ // Next write occurs at a nonexistent path's slot
+ if (paths_buffer_[buffer_idx_].empty()) {
+ working.resize(buffer_idx_);
+ } else {
+ std::rotate(working.begin(), working.begin() + buffer_idx_,
+ working.end());
+ }
+
+ return working;
+ }
public:
- explicit DeadlockInfoBuffer(uint32_t n_latest_dlocks)
+ explicit DeadlockInfoBufferTempl(uint32_t n_latest_dlocks)
: paths_buffer_(n_latest_dlocks), buffer_idx_(0) {}
- void AddNewPath(DeadlockPath path);
- void Resize(uint32_t target_size);
- std::vector<DeadlockPath> PrepareBuffer();
+
+ void AddNewPath(Path path) {
+ std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
+
+ if (paths_buffer_.empty()) {
+ return;
+ }
+
+ paths_buffer_[buffer_idx_] = std::move(path);
+ buffer_idx_ = (buffer_idx_ + 1) % paths_buffer_.size();
+ }
+
+ void Resize(uint32_t target_size) {
+ std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
+
+ paths_buffer_ = Normalize();
+
+ // Drop the deadlocks that will no longer be needed ater the normalize
+ if (target_size < paths_buffer_.size()) {
+ paths_buffer_.erase(
+ paths_buffer_.begin(),
+ paths_buffer_.begin() + (paths_buffer_.size() - target_size));
+ buffer_idx_ = 0;
+ }
+ // Resize the buffer to the target size and restore the buffer's idx
+ else {
+ auto prev_size = paths_buffer_.size();
+ paths_buffer_.resize(target_size);
+ buffer_idx_ = (uint32_t)prev_size;
+ }
+ }
+
+ std::vector<Path> PrepareBuffer() {
+ std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
+
+ // Reversing the normalized vector returns the latest deadlocks first
+ auto working = Normalize();
+ std::reverse(working.begin(), working.end());
+
+ return working;
+ }
};
+typedef DeadlockInfoBufferTempl<DeadlockPath> DeadlockInfoBuffer;
+
struct TrackedTrxInfo {
autovector<TransactionID> m_neighbors;
uint32_t m_cf_id;
@@ -67,7 +125,11 @@ class PointLockManager : public LockManager {
return PointLockTrackerFactory::Get();
}
+ // Creates a new LockMap for this column family. Caller should guarantee
+ // that this column family does not already exist.
void AddColumnFamily(const ColumnFamilyHandle* cf) override;
+ // Deletes the LockMap for this column family. Caller should guarantee that
+ // this column family is no longer in use.
void RemoveColumnFamily(const ColumnFamilyHandle* cf) override;
Status TryLock(PessimisticTransaction* txn, ColumnFamilyId column_family_id,
diff --git a/utilities/transactions/lock/point/point_lock_manager_test.cc b/utilities/transactions/lock/point/point_lock_manager_test.cc
index 211f17b99..206a0395a 100644
--- a/utilities/transactions/lock/point/point_lock_manager_test.cc
+++ b/utilities/transactions/lock/point/point_lock_manager_test.cc
@@ -5,80 +5,12 @@
#ifndef ROCKSDB_LITE
-#include "utilities/transactions/lock/point/point_lock_manager.h"
-
-#include "file/file_util.h"
-#include "port/port.h"
-#include "port/stack_trace.h"
-#include "rocksdb/utilities/transaction_db.h"
-#include "test_util/testharness.h"
-#include "test_util/testutil.h"
-#include "utilities/transactions/pessimistic_transaction_db.h"
-#include "utilities/transactions/transaction_db_mutex_impl.h"
+#include "point_lock_manager_test.h"
namespace ROCKSDB_NAMESPACE {
-class MockColumnFamilyHandle : public ColumnFamilyHandle {
- public:
- explicit MockColumnFamilyHandle(ColumnFamilyId cf_id) : cf_id_(cf_id) {}
-
- ~MockColumnFamilyHandle() override {}
-
- const std::string& GetName() const override { return name_; }
-
- ColumnFamilyId GetID() const override { return cf_id_; }
-
- Status GetDescriptor(ColumnFamilyDescriptor*) override {
- return Status::OK();
- }
-
- const Comparator* GetComparator() const override { return nullptr; }
-
- private:
- ColumnFamilyId cf_id_;
- std::string name_ = "MockCF";
-};
-
-class PointLockManagerTest : public testing::Test {
- public:
- void SetUp() override {
- env_ = Env::Default();
- db_dir_ = test::PerThreadDBPath("point_lock_manager_test");
- ASSERT_OK(env_->CreateDir(db_dir_));
- mutex_factory_ = std::make_shared<TransactionDBMutexFactoryImpl>();
-
- Options opt;
- opt.create_if_missing = true;
- TransactionDBOptions txn_opt;
- txn_opt.transaction_lock_timeout = 0;
- txn_opt.custom_mutex_factory = mutex_factory_;
- ASSERT_OK(TransactionDB::Open(opt, txn_opt, db_dir_, &db_));
-
- locker_.reset(new PointLockManager(
- static_cast<PessimisticTransactionDB*>(db_), txn_opt));
- }
-
- void TearDown() override {
- delete db_;
- EXPECT_OK(DestroyDir(env_, db_dir_));
- }
-
- PessimisticTransaction* NewTxn(
- TransactionOptions txn_opt = TransactionOptions()) {
- Transaction* txn = db_->BeginTransaction(WriteOptions(), txn_opt);
- return reinterpret_cast<PessimisticTransaction*>(txn);
- }
-
- protected:
- Env* env_;
- std::unique_ptr<PointLockManager> locker_;
-
- private:
- std::string db_dir_;
- std::shared_ptr<TransactionDBMutexFactory> mutex_factory_;
- TransactionDB* db_;
-};
-
+// This test is not applicable for Range Lock manager as Range Lock Manager
+// operates on Column Families, not their ids.
TEST_F(PointLockManagerTest, LockNonExistingColumnFamily) {
MockColumnFamilyHandle cf(1024);
locker_->RemoveColumnFamily(&cf);
@@ -121,6 +53,12 @@ TEST_F(PointLockManagerTest, LockStatus) {
}
}
+ // Cleanup
+ locker_->UnLock(txn1, 1024, "k1", env_);
+ locker_->UnLock(txn1, 2048, "k1", env_);
+ locker_->UnLock(txn2, 1024, "k2", env_);
+ locker_->UnLock(txn2, 2048, "k2", env_);
+
delete txn1;
delete txn2;
}
@@ -136,6 +74,9 @@ TEST_F(PointLockManagerTest, UnlockExclusive) {
auto txn2 = NewTxn();
ASSERT_OK(locker_->TryLock(txn2, 1, "k", env_, true));
+ // Cleanup
+ locker_->UnLock(txn2, 1, "k", env_);
+
delete txn1;
delete txn2;
}
@@ -151,162 +92,15 @@ TEST_F(PointLockManagerTest, UnlockShared) {
auto txn2 = NewTxn();
ASSERT_OK(locker_->TryLock(txn2, 1, "k", env_, true));
- delete txn1;
- delete txn2;
-}
-
-TEST_F(PointLockManagerTest, ReentrantExclusiveLock) {
- // Tests that a txn can acquire exclusive lock on the same key repeatedly.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- auto txn = NewTxn();
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
- delete txn;
-}
-
-TEST_F(PointLockManagerTest, ReentrantSharedLock) {
- // Tests that a txn can acquire shared lock on the same key repeatedly.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- auto txn = NewTxn();
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
- delete txn;
-}
-
-TEST_F(PointLockManagerTest, LockUpgrade) {
- // Tests that a txn can upgrade from a shared lock to an exclusive lock.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- auto txn = NewTxn();
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
- delete txn;
-}
-
-TEST_F(PointLockManagerTest, LockDowngrade) {
- // Tests that a txn can acquire a shared lock after acquiring an exclusive
- // lock on the same key.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- auto txn = NewTxn();
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
- delete txn;
-}
-
-TEST_F(PointLockManagerTest, LockConflict) {
- // Tests that lock conflicts lead to lock timeout.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- auto txn1 = NewTxn();
- auto txn2 = NewTxn();
-
- {
- // exclusive-exclusive conflict.
- ASSERT_OK(locker_->TryLock(txn1, 1, "k1", env_, true));
- auto s = locker_->TryLock(txn2, 1, "k1", env_, true);
- ASSERT_TRUE(s.IsTimedOut());
- }
-
- {
- // exclusive-shared conflict.
- ASSERT_OK(locker_->TryLock(txn1, 1, "k2", env_, true));
- auto s = locker_->TryLock(txn2, 1, "k2", env_, false);
- ASSERT_TRUE(s.IsTimedOut());
- }
-
- {
- // shared-exclusive conflict.
- ASSERT_OK(locker_->TryLock(txn1, 1, "k2", env_, false));
- auto s = locker_->TryLock(txn2, 1, "k2", env_, true);
- ASSERT_TRUE(s.IsTimedOut());
- }
-
- delete txn1;
- delete txn2;
-}
-
-port::Thread BlockUntilWaitingTxn(std::function<void()> f) {
- std::atomic<bool> reached(false);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "PointLockManager::AcquireWithTimeout:WaitingTxn",
- [&](void* /*arg*/) { reached.store(true); });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
+ // Cleanup
+ locker_->UnLock(txn2, 1, "k", env_);
- port::Thread t(f);
-
- while (!reached.load()) {
- std::this_thread::sleep_for(std::chrono::milliseconds(100));
- }
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
-
- return t;
-}
-
-TEST_F(PointLockManagerTest, SharedLocks) {
- // Tests that shared locks can be concurrently held by multiple transactions.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- auto txn1 = NewTxn();
- auto txn2 = NewTxn();
- ASSERT_OK(locker_->TryLock(txn1, 1, "k", env_, false));
- ASSERT_OK(locker_->TryLock(txn2, 1, "k", env_, false));
delete txn1;
delete txn2;
}
-TEST_F(PointLockManagerTest, Deadlock) {
- // Tests that deadlock can be detected.
- // Deadlock scenario:
- // txn1 exclusively locks k1, and wants to lock k2;
- // txn2 exclusively locks k2, and wants to lock k1.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- TransactionOptions txn_opt;
- txn_opt.deadlock_detect = true;
- txn_opt.lock_timeout = 1000000;
- auto txn1 = NewTxn(txn_opt);
- auto txn2 = NewTxn(txn_opt);
-
- ASSERT_OK(locker_->TryLock(txn1, 1, "k1", env_, true));
- ASSERT_OK(locker_->TryLock(txn2, 1, "k2", env_, true));
-
- // txn1 tries to lock k2, will block forever.
- port::Thread t = BlockUntilWaitingTxn([&]() {
- // block because txn2 is holding a lock on k2.
- locker_->TryLock(txn1, 1, "k2", env_, true);
- });
-
- auto s = locker_->TryLock(txn2, 1, "k1", env_, true);
- ASSERT_TRUE(s.IsBusy());
- ASSERT_EQ(s.subcode(), Status::SubCode::kDeadlock);
-
- std::vector<DeadlockPath> deadlock_paths = locker_->GetDeadlockInfoBuffer();
- ASSERT_EQ(deadlock_paths.size(), 1u);
- ASSERT_FALSE(deadlock_paths[0].limit_exceeded);
-
- std::vector<DeadlockInfo> deadlocks = deadlock_paths[0].path;
- ASSERT_EQ(deadlocks.size(), 2u);
-
- ASSERT_EQ(deadlocks[0].m_txn_id, txn1->GetID());
- ASSERT_EQ(deadlocks[0].m_cf_id, 1u);
- ASSERT_TRUE(deadlocks[0].m_exclusive);
- ASSERT_EQ(deadlocks[0].m_waiting_key, "k2");
-
- ASSERT_EQ(deadlocks[1].m_txn_id, txn2->GetID());
- ASSERT_EQ(deadlocks[1].m_cf_id, 1u);
- ASSERT_TRUE(deadlocks[1].m_exclusive);
- ASSERT_EQ(deadlocks[1].m_waiting_key, "k1");
-
- locker_->UnLock(txn2, 1, "k2", env_);
- t.join();
-
- delete txn2;
- delete txn1;
-}
+// This test doesn't work with Range Lock Manager, because Range Lock Manager
+// doesn't support deadlock_detect_depth.
TEST_F(PointLockManagerTest, DeadlockDepthExceeded) {
// Tests that when detecting deadlock, if the detection depth is exceeded,
@@ -332,7 +126,7 @@ TEST_F(PointLockManagerTest, DeadlockDepthExceeded) {
// it must have another txn waiting on it, which is txn4 in this case.
ASSERT_OK(locker_->TryLock(txn1, 1, "k1", env_, true));
- port::Thread t1 = BlockUntilWaitingTxn([&]() {
+ port::Thread t1 = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() {
ASSERT_OK(locker_->TryLock(txn2, 1, "k2", env_, true));
// block because txn1 is holding a lock on k1.
locker_->TryLock(txn2, 1, "k1", env_, true);
@@ -340,7 +134,7 @@ TEST_F(PointLockManagerTest, DeadlockDepthExceeded) {
ASSERT_OK(locker_->TryLock(txn3, 1, "k3", env_, true));
- port::Thread t2 = BlockUntilWaitingTxn([&]() {
+ port::Thread t2 = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() {
// block because txn3 is holding a lock on k1.
locker_->TryLock(txn4, 1, "k3", env_, true);
});
@@ -364,6 +158,9 @@ TEST_F(PointLockManagerTest, DeadlockDepthExceeded) {
delete txn1;
}
+INSTANTIATE_TEST_CASE_P(PointLockManager, AnyLockManagerTest,
+ ::testing::Values(nullptr));
+
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
diff --git a/utilities/transactions/lock/point/point_lock_manager_test.h b/utilities/transactions/lock/point/point_lock_manager_test.h
new file mode 100644
index 000000000..63c580501
--- /dev/null
+++ b/utilities/transactions/lock/point/point_lock_manager_test.h
@@ -0,0 +1,276 @@
+
+#include "file/file_util.h"
+#include "port/port.h"
+#include "port/stack_trace.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+#include "utilities/transactions/lock/point/point_lock_manager.h"
+#include "utilities/transactions/pessimistic_transaction_db.h"
+#include "utilities/transactions/transaction_db_mutex_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class MockColumnFamilyHandle : public ColumnFamilyHandle {
+ public:
+ explicit MockColumnFamilyHandle(ColumnFamilyId cf_id) : cf_id_(cf_id) {}
+
+ ~MockColumnFamilyHandle() override {}
+
+ const std::string& GetName() const override { return name_; }
+
+ ColumnFamilyId GetID() const override { return cf_id_; }
+
+ Status GetDescriptor(ColumnFamilyDescriptor*) override {
+ return Status::OK();
+ }
+
+ const Comparator* GetComparator() const override {
+ return BytewiseComparator();
+ }
+
+ private:
+ ColumnFamilyId cf_id_;
+ std::string name_ = "MockCF";
+};
+
+class PointLockManagerTest : public testing::Test {
+ public:
+ void SetUp() override {
+ env_ = Env::Default();
+ db_dir_ = test::PerThreadDBPath("point_lock_manager_test");
+ ASSERT_OK(env_->CreateDir(db_dir_));
+
+ Options opt;
+ opt.create_if_missing = true;
+ TransactionDBOptions txn_opt;
+ txn_opt.transaction_lock_timeout = 0;
+
+ ASSERT_OK(TransactionDB::Open(opt, txn_opt, db_dir_, &db_));
+
+ // CAUTION: This test creates a separate lock manager object (right, NOT
+ // the one that the TransactionDB is using!), and runs tests on it.
+ locker_.reset(new PointLockManager(
+ static_cast<PessimisticTransactionDB*>(db_), txn_opt));
+
+ wait_sync_point_name_ = "PointLockManager::AcquireWithTimeout:WaitingTxn";
+ }
+
+ void TearDown() override {
+ delete db_;
+ EXPECT_OK(DestroyDir(env_, db_dir_));
+ }
+
+ PessimisticTransaction* NewTxn(
+ TransactionOptions txn_opt = TransactionOptions()) {
+ Transaction* txn = db_->BeginTransaction(WriteOptions(), txn_opt);
+ return reinterpret_cast<PessimisticTransaction*>(txn);
+ }
+
+ protected:
+ Env* env_;
+ std::shared_ptr<LockManager> locker_;
+ const char* wait_sync_point_name_;
+ friend void PointLockManagerTestExternalSetup(PointLockManagerTest*);
+
+ private:
+ std::string db_dir_;
+ TransactionDB* db_;
+};
+
+typedef void (*init_func_t)(PointLockManagerTest*);
+
+class AnyLockManagerTest : public PointLockManagerTest,
+ public testing::WithParamInterface<init_func_t> {
+ public:
+ void SetUp() override {
+ // If a custom setup function was provided, use it. Otherwise, use what we
+ // have inherited.
+ auto init_func = GetParam();
+ if (init_func)
+ (*init_func)(this);
+ else
+ PointLockManagerTest::SetUp();
+ }
+};
+
+TEST_P(AnyLockManagerTest, ReentrantExclusiveLock) {
+ // Tests that a txn can acquire exclusive lock on the same key repeatedly.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ auto txn = NewTxn();
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
+
+ // Cleanup
+ locker_->UnLock(txn, 1, "k", env_);
+
+ delete txn;
+}
+
+TEST_P(AnyLockManagerTest, ReentrantSharedLock) {
+ // Tests that a txn can acquire shared lock on the same key repeatedly.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ auto txn = NewTxn();
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
+
+ // Cleanup
+ locker_->UnLock(txn, 1, "k", env_);
+
+ delete txn;
+}
+
+TEST_P(AnyLockManagerTest, LockUpgrade) {
+ // Tests that a txn can upgrade from a shared lock to an exclusive lock.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ auto txn = NewTxn();
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
+
+ // Cleanup
+ locker_->UnLock(txn, 1, "k", env_);
+ delete txn;
+}
+
+TEST_P(AnyLockManagerTest, LockDowngrade) {
+ // Tests that a txn can acquire a shared lock after acquiring an exclusive
+ // lock on the same key.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ auto txn = NewTxn();
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
+
+ // Cleanup
+ locker_->UnLock(txn, 1, "k", env_);
+ delete txn;
+}
+
+TEST_P(AnyLockManagerTest, LockConflict) {
+ // Tests that lock conflicts lead to lock timeout.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ auto txn1 = NewTxn();
+ auto txn2 = NewTxn();
+
+ {
+ // exclusive-exclusive conflict.
+ ASSERT_OK(locker_->TryLock(txn1, 1, "k1", env_, true));
+ auto s = locker_->TryLock(txn2, 1, "k1", env_, true);
+ ASSERT_TRUE(s.IsTimedOut());
+ }
+
+ {
+ // exclusive-shared conflict.
+ ASSERT_OK(locker_->TryLock(txn1, 1, "k2", env_, true));
+ auto s = locker_->TryLock(txn2, 1, "k2", env_, false);
+ ASSERT_TRUE(s.IsTimedOut());
+ }
+
+ {
+ // shared-exclusive conflict.
+ ASSERT_OK(locker_->TryLock(txn1, 1, "k2", env_, false));
+ auto s = locker_->TryLock(txn2, 1, "k2", env_, true);
+ ASSERT_TRUE(s.IsTimedOut());
+ }
+
+ // Cleanup
+ locker_->UnLock(txn1, 1, "k1", env_);
+ locker_->UnLock(txn1, 1, "k2", env_);
+
+ delete txn1;
+ delete txn2;
+}
+
+port::Thread BlockUntilWaitingTxn(const char* sync_point_name,
+ std::function<void()> f) {
+ std::atomic<bool> reached(false);
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
+ sync_point_name, [&](void* /*arg*/) { reached.store(true); });
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
+
+ port::Thread t(f);
+
+ while (!reached.load()) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(100));
+ }
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
+
+ return t;
+}
+
+TEST_P(AnyLockManagerTest, SharedLocks) {
+ // Tests that shared locks can be concurrently held by multiple transactions.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ auto txn1 = NewTxn();
+ auto txn2 = NewTxn();
+ ASSERT_OK(locker_->TryLock(txn1, 1, "k", env_, false));
+ ASSERT_OK(locker_->TryLock(txn2, 1, "k", env_, false));
+
+ // Cleanup
+ locker_->UnLock(txn1, 1, "k", env_);
+ locker_->UnLock(txn2, 1, "k", env_);
+
+ delete txn1;
+ delete txn2;
+}
+
+TEST_P(AnyLockManagerTest, Deadlock) {
+ // Tests that deadlock can be detected.
+ // Deadlock scenario:
+ // txn1 exclusively locks k1, and wants to lock k2;
+ // txn2 exclusively locks k2, and wants to lock k1.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ TransactionOptions txn_opt;
+ txn_opt.deadlock_detect = true;
+ txn_opt.lock_timeout = 1000000;
+ auto txn1 = NewTxn(txn_opt);
+ auto txn2 = NewTxn(txn_opt);
+
+ ASSERT_OK(locker_->TryLock(txn1, 1, "k1", env_, true));
+ ASSERT_OK(locker_->TryLock(txn2, 1, "k2", env_, true));
+
+ // txn1 tries to lock k2, will block forever.
+ port::Thread t = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() {
+ // block because txn2 is holding a lock on k2.
+ locker_->TryLock(txn1, 1, "k2", env_, true);
+ });
+
+ auto s = locker_->TryLock(txn2, 1, "k1", env_, true);
+ ASSERT_TRUE(s.IsBusy());
+ ASSERT_EQ(s.subcode(), Status::SubCode::kDeadlock);
+
+ std::vector<DeadlockPath> deadlock_paths = locker_->GetDeadlockInfoBuffer();
+ ASSERT_EQ(deadlock_paths.size(), 1u);
+ ASSERT_FALSE(deadlock_paths[0].limit_exceeded);
+
+ std::vector<DeadlockInfo> deadlocks = deadlock_paths[0].path;
+ ASSERT_EQ(deadlocks.size(), 2u);
+
+ ASSERT_EQ(deadlocks[0].m_txn_id, txn1->GetID());
+ ASSERT_EQ(deadlocks[0].m_cf_id, 1u);
+ ASSERT_TRUE(deadlocks[0].m_exclusive);
+ ASSERT_EQ(deadlocks[0].m_waiting_key, "k2");
+
+ ASSERT_EQ(deadlocks[1].m_txn_id, txn2->GetID());
+ ASSERT_EQ(deadlocks[1].m_cf_id, 1u);
+ ASSERT_TRUE(deadlocks[1].m_exclusive);
+ ASSERT_EQ(deadlocks[1].m_waiting_key, "k1");
+
+ locker_->UnLock(txn2, 1, "k2", env_);
+ t.join();
+
+ // Cleanup
+ locker_->UnLock(txn1, 1, "k1", env_);
+ locker_->UnLock(txn1, 1, "k2", env_);
+ delete txn2;
+ delete txn1;
+}
+
+} // namespace ROCKSDB_NAMESPACE
diff --git a/utilities/transactions/lock/point/point_lock_tracker.cc b/utilities/transactions/lock/point/point_lock_tracker.cc
index 22eb6a0b8..837f377de 100644
--- a/utilities/transactions/lock/point/point_lock_tracker.cc
+++ b/utilities/transactions/lock/point/point_lock_tracker.cc
@@ -3,6 +3,8 @@
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
+#ifndef ROCKSDB_LITE
+
#include "utilities/transactions/lock/point/point_lock_tracker.h"
namespace ROCKSDB_NAMESPACE {
@@ -264,3 +266,5 @@ LockTracker::KeyIterator* PointLockTracker::GetKeyIterator(
void PointLockTracker::Clear() { tracked_keys_.clear(); }
} // namespace ROCKSDB_NAMESPACE
+
+#endif // ROCKSDB_LITE
diff --git a/utilities/transactions/lock/point/point_lock_tracker.h b/utilities/transactions/lock/point/point_lock_tracker.h
index 57e1b8437..daf6f9aa2 100644
--- a/utilities/transactions/lock/point/point_lock_tracker.h
+++ b/utilities/transactions/lock/point/point_lock_tracker.h
@@ -4,6 +4,7 @@
// (found in the LICENSE.Apache file in the root directory).
#pragma once
+#ifndef ROCKSDB_LITE
#include <memory>
#include <string>
@@ -95,3 +96,4 @@ class PointLockTrackerFactory : public LockTrackerFactory {
};
} // namespace ROCKSDB_NAMESPACE
+#endif // ROCKSDB_LITE
diff --git a/utilities/transactions/lock/range/range_lock_manager.h b/utilities/transactions/lock/range/range_lock_manager.h
new file mode 100644
index 000000000..91619934b
--- /dev/null
+++ b/utilities/transactions/lock/range/range_lock_manager.h
@@ -0,0 +1,30 @@
+//
+// Generic definitions for a Range-based Lock Manager
+//
+#pragma once
+#ifndef ROCKSDB_LITE
+
+#include "utilities/transactions/lock/lock_manager.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+/*
+ A base class for all Range-based lock managers
+
+ See also class RangeLockManagerHandle in
+ include/rocksdb/utilities/transaction_db.h
+*/
+class RangeLockManagerBase : public LockManager {
+ public:
+ // Geting a point lock is reduced to getting a range lock on a single-point
+ // range
+ using LockManager::TryLock;
+ Status TryLock(PessimisticTransaction* txn, ColumnFamilyId column_family_id,
+ const std::string& key, Env* env, bool exclusive) override {
+ Endpoint endp(key.data(), key.size(), false);
+ return TryLock(txn, column_family_id, endp, endp, env, exclusive);
+ }
+};
+
+} // namespace ROCKSDB_NAMESPACE
+#endif // ROCKSDB_LITE
diff --git a/utilities/transactions/pessimistic_transaction.cc b/utilities/transactions/pessimistic_transaction.cc
index 6531773ec..e8a8a6f0d 100644
--- a/utilities/transactions/pessimistic_transaction.cc
+++ b/utilities/transactions/pessimistic_transaction.cc
@@ -558,9 +558,20 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
}
uint32_t cfh_id = GetColumnFamilyID(column_family);
std::string key_str = key.ToString();
- PointLockStatus status = tracked_locks_->GetPointLockStatus(cfh_id, key_str);
- bool previously_locked = status.locked;
- bool lock_upgrade = previously_locked && exclusive && !status.exclusive;
+
+ PointLockStatus status;
+ bool lock_upgrade;
+ bool previously_locked;
+ if (tracked_locks_->IsPointLockSupported()) {
+ status = tracked_locks_->GetPointLockStatus(cfh_id, key_str);
+ previously_locked = status.locked;
+ lock_upgrade = previously_locked && exclusive && !status.exclusive;
+ } else {
+ // If the record is tracked, we can assume it was locked, too.
+ previously_locked = assume_tracked;
+ status.locked = false;
+ lock_upgrade = false;
+ }
// Lock this key if this transactions hasn't already locked it or we require
// an upgrade.
@@ -579,7 +590,8 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
SequenceNumber tracked_at_seq =
status.locked ? status.seq : kMaxSequenceNumber;
if (!do_validate || snapshot_ == nullptr) {
- if (assume_tracked && !previously_locked) {
+ if (assume_tracked && !previously_locked &&
+ tracked_locks_->IsPointLockSupported()) {
s = Status::InvalidArgument(
"assume_tracked is set but it is not tracked yet");
}
@@ -636,11 +648,13 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
TrackKey(cfh_id, key_str, tracked_at_seq, read_only, exclusive);
} else {
#ifndef NDEBUG
- PointLockStatus lock_status =
- tracked_locks_->GetPointLockStatus(cfh_id, key_str);
- assert(lock_status.locked);
- assert(lock_status.seq <= tracked_at_seq);
- assert(lock_status.exclusive == exclusive);
+ if (tracked_locks_->IsPointLockSupported()) {
+ PointLockStatus lock_status =
+ tracked_locks_->GetPointLockStatus(cfh_id, key_str);
+ assert(lock_status.locked);
+ assert(lock_status.seq <= tracked_at_seq);
+ assert(lock_status.exclusive == exclusive);
+ }
#endif
}
}
@@ -648,6 +662,22 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family,
return s;
}
+Status PessimisticTransaction::GetRangeLock(ColumnFamilyHandle* column_family,
+ const Endpoint& start_endp,
+ const Endpoint& end_endp) {
+ ColumnFamilyHandle* cfh =
+ column_family ? column_family : db_impl_->DefaultColumnFamily();
+ uint32_t cfh_id = GetColumnFamilyID(cfh);
+
+ Status s = txn_db_impl_->TryRangeLock(this, cfh_id, start_endp, end_endp);
+
+ if (s.ok()) {
+ RangeLockRequest req{cfh_id, start_endp, end_endp};
+ tracked_locks_->Track(req);
+ }
+ return s;
+}
+
// Return OK() if this key has not been modified more recently than the
// transaction snapshot_.
// tracked_at_seq is the global seq at which we either locked the key or already
diff --git a/utilities/transactions/pessimistic_transaction.h b/utilities/transactions/pessimistic_transaction.h
index 98eea7e2d..6c5754ac6 100644
--- a/utilities/transactions/pessimistic_transaction.h
+++ b/utilities/transactions/pessimistic_transaction.h
@@ -116,6 +116,10 @@ class PessimisticTransaction : public TransactionBaseImpl {
int64_t GetDeadlockDetectDepth() const { return deadlock_detect_depth_; }
+ virtual Status GetRangeLock(ColumnFamilyHandle* column_family,
+ const Endpoint& start_key,
+ const Endpoint& end_key) override;
+
protected:
// Refer to
// TransactionOptions::use_only_the_last_commit_time_batch_for_recovery
diff --git a/utilities/transactions/pessimistic_transaction_db.cc b/utilities/transactions/pessimistic_transaction_db.cc
index 73520f9ab..2c69f7359 100644
--- a/utilities/transactions/pessimistic_transaction_db.cc
+++ b/utilities/transactions/pessimistic_transaction_db.cc
@@ -391,6 +391,14 @@ Status PessimisticTransactionDB::TryLock(PessimisticTransaction* txn,
return lock_manager_->TryLock(txn, cfh_id, key, GetEnv(), exclusive);
}
+Status PessimisticTransactionDB::TryRangeLock(PessimisticTransaction* txn,
+ uint32_t cfh_id,
+ const Endpoint& start_endp,
+ const Endpoint& end_endp) {
+ return lock_manager_->TryLock(txn, cfh_id, start_endp, end_endp, GetEnv(),
+ /*exclusive=*/true);
+}
+
void PessimisticTransactionDB::UnLock(PessimisticTransaction* txn,
const LockTracker& keys) {
lock_manager_->UnLock(txn, keys, GetEnv());
diff --git a/utilities/transactions/pessimistic_transaction_db.h b/utilities/transactions/pessimistic_transaction_db.h
index 619a83e97..eb0dd2f05 100644
--- a/utilities/transactions/pessimistic_transaction_db.h
+++ b/utilities/transactions/pessimistic_transaction_db.h
@@ -21,6 +21,7 @@
#include "rocksdb/utilities/transaction_db.h"
#include "util/cast_util.h"
#include "utilities/transactions/lock/lock_manager.h"
+#include "utilities/transactions/lock/range/range_lock_manager.h"
#include "utilities/transactions/pessimistic_transaction.h"
#include "utilities/transactions/write_prepared_txn.h"
@@ -98,6 +99,8 @@ class PessimisticTransactionDB : public TransactionDB {
Status TryLock(PessimisticTransaction* txn, uint32_t cfh_id,
const std::string& key, bool exclusive);
+ Status TryRangeLock(PessimisticTransaction* txn, uint32_t cfh_id,
+ const Endpoint& start_endp, const Endpoint& end_endp);
void UnLock(PessimisticTransaction* txn, const LockTracker& keys);
void UnLock(PessimisticTransaction* txn, uint32_t cfh_id,
@@ -172,7 +175,7 @@ class PessimisticTransactionDB : public TransactionDB {
friend class WriteUnpreparedTransactionTest_RecoveryTest_Test;
friend class WriteUnpreparedTransactionTest_MarkLogWithPrepSection_Test;
- std::unique_ptr<LockManager> lock_manager_;
+ std::shared_ptr<LockManager> lock_manager_;
// Must be held when adding/dropping column families.
InstrumentedMutex column_family_mutex_;
1
0
revision-id: 55e2dc5e4f2f06efb25b53c48020dce5ea819b66 (v5.8-3069-g55e2dc5e4)
parent(s): ce0df6a41388f73fac44be21a0629854de2cf836
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2020-11-16 23:38:46 +0300
message:
make check-format
---
include/rocksdb/utilities/transaction_db.h | 2 +-
.../transactions/lock/point/point_lock_manager.h | 7 +++----
.../lock/point/point_lock_manager_test.cc | 3 +--
.../lock/point/point_lock_manager_test.h | 17 ++++++-----------
.../transactions/lock/range/range_locking_test.cc | 7 ++-----
.../range/range_tree/lib/locktree/lock_request.cc | 2 +-
.../range/range_tree/lib/locktree/lock_request.h | 2 +-
.../lock/range/range_tree/range_tree_lock_manager.cc | 20 ++++++++++----------
8 files changed, 25 insertions(+), 35 deletions(-)
diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h
index 7b89ffb77..b9daf84a3 100644
--- a/include/rocksdb/utilities/transaction_db.h
+++ b/include/rocksdb/utilities/transaction_db.h
@@ -72,7 +72,7 @@ struct RangeDeadlockPath {
int64_t deadlock_time;
explicit RangeDeadlockPath(std::vector<RangeDeadlockInfo> path_entry,
- const int64_t& dl_time)
+ const int64_t& dl_time)
: path(path_entry), limit_exceeded(false), deadlock_time(dl_time) {}
// empty path, limit exceeded constructor and default constructor
diff --git a/utilities/transactions/lock/point/point_lock_manager.h b/utilities/transactions/lock/point/point_lock_manager.h
index 966700c2a..3c541eb3a 100644
--- a/utilities/transactions/lock/point/point_lock_manager.h
+++ b/utilities/transactions/lock/point/point_lock_manager.h
@@ -27,15 +27,13 @@ struct LockInfo;
struct LockMap;
struct LockMapStripe;
-
-template<class Path>
+template <class Path>
class DeadlockInfoBufferTempl {
private:
std::vector<Path> paths_buffer_;
uint32_t buffer_idx_;
std::mutex paths_buffer_mutex_;
-
std::vector<Path> Normalize() {
auto working = paths_buffer_;
@@ -47,7 +45,8 @@ class DeadlockInfoBufferTempl {
if (paths_buffer_[buffer_idx_].empty()) {
working.resize(buffer_idx_);
} else {
- std::rotate(working.begin(), working.begin() + buffer_idx_, working.end());
+ std::rotate(working.begin(), working.begin() + buffer_idx_,
+ working.end());
}
return working;
diff --git a/utilities/transactions/lock/point/point_lock_manager_test.cc b/utilities/transactions/lock/point/point_lock_manager_test.cc
index 367f3310c..206a0395a 100644
--- a/utilities/transactions/lock/point/point_lock_manager_test.cc
+++ b/utilities/transactions/lock/point/point_lock_manager_test.cc
@@ -40,8 +40,7 @@ TEST_F(PointLockManagerTest, LockStatus) {
ASSERT_EQ(s.count(cf_id), 2u);
auto range = s.equal_range(cf_id);
for (auto it = range.first; it != range.second; it++) {
- ASSERT_TRUE(it->second.key == "k1" ||
- it->second.key == "k2");
+ ASSERT_TRUE(it->second.key == "k1" || it->second.key == "k2");
if (it->second.key == "k1") {
ASSERT_EQ(it->second.exclusive, true);
ASSERT_EQ(it->second.ids.size(), 1u);
diff --git a/utilities/transactions/lock/point/point_lock_manager_test.h b/utilities/transactions/lock/point/point_lock_manager_test.h
index b6534b5ca..63c580501 100644
--- a/utilities/transactions/lock/point/point_lock_manager_test.h
+++ b/utilities/transactions/lock/point/point_lock_manager_test.h
@@ -1,14 +1,12 @@
-#include "utilities/transactions/lock/point/point_lock_manager.h"
-
#include "file/file_util.h"
#include "port/port.h"
#include "port/stack_trace.h"
#include "rocksdb/utilities/transaction_db.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
+#include "utilities/transactions/lock/point/point_lock_manager.h"
#include "utilities/transactions/pessimistic_transaction_db.h"
-
#include "utilities/transactions/transaction_db_mutex_impl.h"
namespace ROCKSDB_NAMESPACE {
@@ -53,7 +51,7 @@ class PointLockManagerTest : public testing::Test {
// CAUTION: This test creates a separate lock manager object (right, NOT
// the one that the TransactionDB is using!), and runs tests on it.
locker_.reset(new PointLockManager(
- static_cast<PessimisticTransactionDB*>(db_), txn_opt));
+ static_cast<PessimisticTransactionDB*>(db_), txn_opt));
wait_sync_point_name_ = "PointLockManager::AcquireWithTimeout:WaitingTxn";
}
@@ -72,7 +70,7 @@ class PointLockManagerTest : public testing::Test {
protected:
Env* env_;
std::shared_ptr<LockManager> locker_;
- const char *wait_sync_point_name_;
+ const char* wait_sync_point_name_;
friend void PointLockManagerTestExternalSetup(PointLockManagerTest*);
private:
@@ -80,13 +78,11 @@ class PointLockManagerTest : public testing::Test {
TransactionDB* db_;
};
-
typedef void (*init_func_t)(PointLockManagerTest*);
class AnyLockManagerTest : public PointLockManagerTest,
- public testing::WithParamInterface<init_func_t>
-{
-public:
+ public testing::WithParamInterface<init_func_t> {
+ public:
void SetUp() override {
// If a custom setup function was provided, use it. Otherwise, use what we
// have inherited.
@@ -189,7 +185,7 @@ TEST_P(AnyLockManagerTest, LockConflict) {
delete txn2;
}
-port::Thread BlockUntilWaitingTxn(const char *sync_point_name,
+port::Thread BlockUntilWaitingTxn(const char* sync_point_name,
std::function<void()> f) {
std::atomic<bool> reached(false);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
@@ -278,4 +274,3 @@ TEST_P(AnyLockManagerTest, Deadlock) {
}
} // namespace ROCKSDB_NAMESPACE
-
diff --git a/utilities/transactions/lock/range/range_locking_test.cc b/utilities/transactions/lock/range/range_locking_test.cc
index 48d96d7d0..b5051dbee 100644
--- a/utilities/transactions/lock/range/range_locking_test.cc
+++ b/utilities/transactions/lock/range/range_locking_test.cc
@@ -17,11 +17,10 @@
#include "rocksdb/perf_context.h"
#include "rocksdb/utilities/transaction.h"
#include "rocksdb/utilities/transaction_db.h"
+#include "utilities/transactions/lock/point/point_lock_manager_test.h"
#include "utilities/transactions/pessimistic_transaction_db.h"
#include "utilities/transactions/transaction_test.h"
-#include "utilities/transactions/lock/point/point_lock_manager_test.h"
-
using std::string;
namespace ROCKSDB_NAMESPACE {
@@ -221,9 +220,7 @@ TEST_F(RangeLockingTest, MultipleTrxLockStatusData) {
delete txn1;
}
-
-void PointLockManagerTestExternalSetup(PointLockManagerTest* self)
-{
+void PointLockManagerTestExternalSetup(PointLockManagerTest* self) {
self->env_ = Env::Default();
self->db_dir_ = test::PerThreadDBPath("point_lock_manager_test");
ASSERT_OK(self->env_->CreateDir(self->db_dir_));
diff --git a/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc b/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc
index 471eb42b7..9df8f3cb3 100644
--- a/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc
+++ b/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc
@@ -179,7 +179,7 @@ bool lock_request::deadlock_exists(const txnid_set &conflicts) {
lock_request *req = find_lock_request(a);
if (req) {
m_deadlock_cb(req->m_txnid, (req->m_type == lock_request::WRITE),
- req->m_left_key,req->m_right_key);
+ req->m_left_key, req->m_right_key);
}
};
}
diff --git a/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h b/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h
index f2e8bcb54..1dce4973f 100644
--- a/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h
+++ b/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h
@@ -226,7 +226,7 @@ class lock_request {
void (*m_retry_test_callback)(void);
public:
- std::function<void(TXNID, bool, const DBT*, const DBT*)> m_deadlock_cb;
+ std::function<void(TXNID, bool, const DBT *, const DBT *)> m_deadlock_cb;
friend class lock_request_unit_test;
};
diff --git a/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc b/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc
index 7e039083d..32c678688 100644
--- a/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc
+++ b/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc
@@ -96,7 +96,7 @@ Status RangeTreeLockManager::TryLock(PessimisticTransaction* txn,
std::vector<RangeDeadlockInfo> di_path;
request.m_deadlock_cb = [&](TXNID txnid, bool is_exclusive,
- const DBT *start_dbt, const DBT *end_dbt) {
+ const DBT* start_dbt, const DBT* end_dbt) {
EndpointWithString start;
EndpointWithString end;
deserialize_endpoint(start_dbt, &start);
@@ -161,8 +161,8 @@ Status RangeTreeLockManager::TryLock(PessimisticTransaction* txn,
return Status::Busy(Status::SubCode::kLockLimit);
case DB_LOCK_DEADLOCK: {
std::reverse(di_path.begin(), di_path.end());
- dlock_buffer_.AddNewPath(RangeDeadlockPath(di_path,
- request.get_start_time()));
+ dlock_buffer_.AddNewPath(
+ RangeDeadlockPath(di_path, request.get_start_time()));
return Status::Busy(Status::SubCode::kDeadlock);
}
default:
@@ -331,7 +331,8 @@ RangeTreeLockManager::RangeTreeLockManager(
ltm_.create(on_create, on_destroy, on_escalate, NULL, mutex_factory_);
}
-void RangeTreeLockManager::SetRangeDeadlockInfoBufferSize(uint32_t target_size) {
+void RangeTreeLockManager::SetRangeDeadlockInfoBufferSize(
+ uint32_t target_size) {
dlock_buffer_.Resize(target_size);
}
@@ -339,7 +340,8 @@ void RangeTreeLockManager::Resize(uint32_t target_size) {
SetRangeDeadlockInfoBufferSize(target_size);
}
-std::vector<RangeDeadlockPath> RangeTreeLockManager::GetRangeDeadlockInfoBuffer() {
+std::vector<RangeDeadlockPath>
+RangeTreeLockManager::GetRangeDeadlockInfoBuffer() {
return dlock_buffer_.PrepareBuffer();
}
@@ -351,15 +353,14 @@ std::vector<DeadlockPath> RangeTreeLockManager::GetDeadlockInfoBuffer() {
std::vector<DeadlockInfo> path;
for (auto it2 = it->path.begin(); it2 != it->path.end(); ++it2) {
- path.push_back({it2->m_txn_id, it2->m_cf_id, it2->m_exclusive,
- it2->m_start.slice});
+ path.push_back(
+ {it2->m_txn_id, it2->m_cf_id, it2->m_exclusive, it2->m_start.slice});
}
res.push_back(DeadlockPath(path, it->deadlock_time));
}
return res;
}
-
/*
@brief Lock Escalation Callback function
@@ -528,7 +529,6 @@ struct LOCK_PRINT_CONTEXT {
uint32_t cfh_id; // Column Family whose tree we are traversing
};
-
// Report left endpoints of the acquired locks
LockManager::PointLockStatus RangeTreeLockManager::GetPointLockStatus() {
PointLockStatus res;
@@ -536,7 +536,7 @@ LockManager::PointLockStatus RangeTreeLockManager::GetPointLockStatus() {
// report left endpoints
for (auto it = data.begin(); it != data.end(); ++it) {
auto& val = it->second;
- res.insert({ it->first, { val.start.slice, val.ids, val.exclusive}});
+ res.insert({it->first, {val.start.slice, val.ids, val.exclusive}});
}
return res;
}
1
0
revision-id: ce0df6a41388f73fac44be21a0629854de2cf836 (v5.8-3068-gce0df6a41)
parent(s): d47cf5d0bff01681f18f2e0bb49558574140cf7c
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2020-11-16 23:33:07 +0300
message:
Address input
- Report range-based deadlocks
- point_lock_manager_test should only test point lock manager
- however certain tests can be reused to make sure that range
lock manager can serve as its substitute.
---
include/rocksdb/utilities/transaction_db.h | 42 ++-
.../transactions/lock/point/point_lock_manager.cc | 58 ----
.../transactions/lock/point/point_lock_manager.h | 73 ++++-
.../lock/point/point_lock_manager_test.cc | 322 +--------------------
.../lock/point/point_lock_manager_test.h | 281 ++++++++++++++++++
.../transactions/lock/range/range_locking_test.cc | 28 ++
.../range/range_tree/lib/locktree/lock_request.cc | 3 +-
.../range/range_tree/lib/locktree/lock_request.h | 2 +-
.../range/range_tree/range_tree_lock_manager.cc | 68 +++--
.../range/range_tree/range_tree_lock_manager.h | 17 +-
.../range/range_tree/range_tree_lock_tracker.h | 5 +-
11 files changed, 483 insertions(+), 416 deletions(-)
diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h
index 1175bc8d9..7b89ffb77 100644
--- a/include/rocksdb/utilities/transaction_db.h
+++ b/include/rocksdb/utilities/transaction_db.h
@@ -51,6 +51,37 @@ class LockManagerHandle {
virtual ~LockManagerHandle() {}
};
+// Same as class Endpoint, but use std::string to manage the buffer allocation
+struct EndpointWithString {
+ std::string slice;
+ bool inf_suffix;
+};
+
+struct RangeDeadlockInfo {
+ TransactionID m_txn_id;
+ uint32_t m_cf_id;
+ bool m_exclusive;
+
+ EndpointWithString m_start;
+ EndpointWithString m_end;
+};
+
+struct RangeDeadlockPath {
+ std::vector<RangeDeadlockInfo> path;
+ bool limit_exceeded;
+ int64_t deadlock_time;
+
+ explicit RangeDeadlockPath(std::vector<RangeDeadlockInfo> path_entry,
+ const int64_t& dl_time)
+ : path(path_entry), limit_exceeded(false), deadlock_time(dl_time) {}
+
+ // empty path, limit exceeded constructor and default constructor
+ explicit RangeDeadlockPath(const int64_t& dl_time = 0, bool limit = false)
+ : path(0), limit_exceeded(limit), deadlock_time(dl_time) {}
+
+ bool empty() { return path.empty() && !limit_exceeded; }
+};
+
// A handle to control RangeLockManager (Range-based lock manager) from outside
// RocksDB
class RangeLockManagerHandle : public LockManagerHandle {
@@ -76,6 +107,11 @@ class RangeLockManagerHandle : public LockManagerHandle {
// Get the current counter values
virtual Counters GetStatus() = 0;
+
+ // Functions for range-based Deadlock reporting.
+ virtual std::vector<RangeDeadlockPath> GetRangeDeadlockInfoBuffer() = 0;
+ virtual void SetRangeDeadlockInfoBufferSize(uint32_t target_size) = 0;
+
virtual ~RangeLockManagerHandle(){};
};
@@ -262,12 +298,6 @@ struct KeyLockInfo {
bool exclusive;
};
-// Same as class Endpoint, but use std::string to manage the buffer allocation
-struct EndpointWithString {
- std::string slice;
- bool inf_suffix;
-};
-
struct RangeLockInfo {
EndpointWithString start;
EndpointWithString end;
diff --git a/utilities/transactions/lock/point/point_lock_manager.cc b/utilities/transactions/lock/point/point_lock_manager.cc
index 0ca6e38f0..0bb7e8a40 100644
--- a/utilities/transactions/lock/point/point_lock_manager.cc
+++ b/utilities/transactions/lock/point/point_lock_manager.cc
@@ -94,64 +94,6 @@ struct LockMap {
size_t GetStripe(const std::string& key) const;
};
-void DeadlockInfoBuffer::AddNewPath(DeadlockPath path) {
- std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
-
- if (paths_buffer_.empty()) {
- return;
- }
-
- paths_buffer_[buffer_idx_] = std::move(path);
- buffer_idx_ = (buffer_idx_ + 1) % paths_buffer_.size();
-}
-
-void DeadlockInfoBuffer::Resize(uint32_t target_size) {
- std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
-
- paths_buffer_ = Normalize();
-
- // Drop the deadlocks that will no longer be needed ater the normalize
- if (target_size < paths_buffer_.size()) {
- paths_buffer_.erase(
- paths_buffer_.begin(),
- paths_buffer_.begin() + (paths_buffer_.size() - target_size));
- buffer_idx_ = 0;
- }
- // Resize the buffer to the target size and restore the buffer's idx
- else {
- auto prev_size = paths_buffer_.size();
- paths_buffer_.resize(target_size);
- buffer_idx_ = (uint32_t)prev_size;
- }
-}
-
-std::vector<DeadlockPath> DeadlockInfoBuffer::Normalize() {
- auto working = paths_buffer_;
-
- if (working.empty()) {
- return working;
- }
-
- // Next write occurs at a nonexistent path's slot
- if (paths_buffer_[buffer_idx_].empty()) {
- working.resize(buffer_idx_);
- } else {
- std::rotate(working.begin(), working.begin() + buffer_idx_, working.end());
- }
-
- return working;
-}
-
-std::vector<DeadlockPath> DeadlockInfoBuffer::PrepareBuffer() {
- std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
-
- // Reversing the normalized vector returns the latest deadlocks first
- auto working = Normalize();
- std::reverse(working.begin(), working.end());
-
- return working;
-}
-
namespace {
void UnrefLockMapsCache(void* ptr) {
// Called when a thread exits or a ThreadLocalPtr gets destroyed.
diff --git a/utilities/transactions/lock/point/point_lock_manager.h b/utilities/transactions/lock/point/point_lock_manager.h
index 3f4328d89..966700c2a 100644
--- a/utilities/transactions/lock/point/point_lock_manager.h
+++ b/utilities/transactions/lock/point/point_lock_manager.h
@@ -27,21 +27,80 @@ struct LockInfo;
struct LockMap;
struct LockMapStripe;
-struct DeadlockInfoBuffer {
+
+template<class Path>
+class DeadlockInfoBufferTempl {
private:
- std::vector<DeadlockPath> paths_buffer_;
+ std::vector<Path> paths_buffer_;
uint32_t buffer_idx_;
std::mutex paths_buffer_mutex_;
- std::vector<DeadlockPath> Normalize();
+
+
+ std::vector<Path> Normalize() {
+ auto working = paths_buffer_;
+
+ if (working.empty()) {
+ return working;
+ }
+
+ // Next write occurs at a nonexistent path's slot
+ if (paths_buffer_[buffer_idx_].empty()) {
+ working.resize(buffer_idx_);
+ } else {
+ std::rotate(working.begin(), working.begin() + buffer_idx_, working.end());
+ }
+
+ return working;
+ }
public:
- explicit DeadlockInfoBuffer(uint32_t n_latest_dlocks)
+ explicit DeadlockInfoBufferTempl(uint32_t n_latest_dlocks)
: paths_buffer_(n_latest_dlocks), buffer_idx_(0) {}
- void AddNewPath(DeadlockPath path);
- void Resize(uint32_t target_size);
- std::vector<DeadlockPath> PrepareBuffer();
+
+ void AddNewPath(Path path) {
+ std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
+
+ if (paths_buffer_.empty()) {
+ return;
+ }
+
+ paths_buffer_[buffer_idx_] = std::move(path);
+ buffer_idx_ = (buffer_idx_ + 1) % paths_buffer_.size();
+ }
+
+ void Resize(uint32_t target_size) {
+ std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
+
+ paths_buffer_ = Normalize();
+
+ // Drop the deadlocks that will no longer be needed ater the normalize
+ if (target_size < paths_buffer_.size()) {
+ paths_buffer_.erase(
+ paths_buffer_.begin(),
+ paths_buffer_.begin() + (paths_buffer_.size() - target_size));
+ buffer_idx_ = 0;
+ }
+ // Resize the buffer to the target size and restore the buffer's idx
+ else {
+ auto prev_size = paths_buffer_.size();
+ paths_buffer_.resize(target_size);
+ buffer_idx_ = (uint32_t)prev_size;
+ }
+ }
+
+ std::vector<Path> PrepareBuffer() {
+ std::lock_guard<std::mutex> lock(paths_buffer_mutex_);
+
+ // Reversing the normalized vector returns the latest deadlocks first
+ auto working = Normalize();
+ std::reverse(working.begin(), working.end());
+
+ return working;
+ }
};
+typedef DeadlockInfoBufferTempl<DeadlockPath> DeadlockInfoBuffer;
+
struct TrackedTrxInfo {
autovector<TransactionID> m_neighbors;
uint32_t m_cf_id;
diff --git a/utilities/transactions/lock/point/point_lock_manager_test.cc b/utilities/transactions/lock/point/point_lock_manager_test.cc
index 21d6b680f..367f3310c 100644
--- a/utilities/transactions/lock/point/point_lock_manager_test.cc
+++ b/utilities/transactions/lock/point/point_lock_manager_test.cc
@@ -5,128 +5,10 @@
#ifndef ROCKSDB_LITE
-#ifndef OS_WIN
-#define ENABLE_RANGE_LOCKING_TESTS
-#endif
-
-#include "utilities/transactions/lock/point/point_lock_manager.h"
-
-#include "file/file_util.h"
-#include "port/port.h"
-#include "port/stack_trace.h"
-#include "rocksdb/utilities/transaction_db.h"
-#include "test_util/testharness.h"
-#include "test_util/testutil.h"
-#include "utilities/transactions/pessimistic_transaction_db.h"
-
-#ifdef ENABLE_RANGE_LOCKING_TESTS
-#include "utilities/transactions/lock/range/range_lock_manager.h"
-#endif
-
-#include "utilities/transactions/transaction_db_mutex_impl.h"
+#include "point_lock_manager_test.h"
namespace ROCKSDB_NAMESPACE {
-class MockColumnFamilyHandle : public ColumnFamilyHandle {
- public:
- explicit MockColumnFamilyHandle(ColumnFamilyId cf_id) : cf_id_(cf_id) {}
-
- ~MockColumnFamilyHandle() override {}
-
- const std::string& GetName() const override { return name_; }
-
- ColumnFamilyId GetID() const override { return cf_id_; }
-
- Status GetDescriptor(ColumnFamilyDescriptor*) override {
- return Status::OK();
- }
-
- const Comparator* GetComparator() const override {
- return BytewiseComparator();
- }
-
- private:
- ColumnFamilyId cf_id_;
- std::string name_ = "MockCF";
-};
-
-class PointLockManagerTest : public testing::Test {
- public:
- void SetUp() override {
- env_ = Env::Default();
- db_dir_ = test::PerThreadDBPath("point_lock_manager_test");
- ASSERT_OK(env_->CreateDir(db_dir_));
- mutex_factory_ = std::make_shared<TransactionDBMutexFactoryImpl>();
-
- Options opt;
- opt.create_if_missing = true;
- TransactionDBOptions txn_opt;
- txn_opt.transaction_lock_timeout = 0;
- txn_opt.custom_mutex_factory = mutex_factory_;
-
-#ifdef ENABLE_RANGE_LOCKING_TESTS
- if (use_range_locking) {
- /*
- With Range Locking, we must use the same lock manager object that the
- TransactionDB is using.
- Create it here and pass it to the database through lock_mgr_handle.
- */
- locker_.reset(NewRangeLockManager(mutex_factory_)->getLockManager());
- range_lock_mgr =
- std::dynamic_pointer_cast<RangeLockManagerHandle>(locker_);
- txn_opt.lock_mgr_handle = range_lock_mgr;
- }
-#endif
-
- ASSERT_OK(TransactionDB::Open(opt, txn_opt, db_dir_, &db_));
-
- if (!use_range_locking) {
- // If not using range locking, this test creates a separate lock manager
- // object (right, NOT the one TransactionDB is using!), and runs tests on
- // that.
- locker_.reset(new PointLockManager(
- static_cast<PessimisticTransactionDB*>(db_), txn_opt));
- }
- }
-
- void TearDown() override {
- delete db_;
- EXPECT_OK(DestroyDir(env_, db_dir_));
- }
-
- PessimisticTransaction* NewTxn(
- TransactionOptions txn_opt = TransactionOptions()) {
- Transaction* txn = db_->BeginTransaction(WriteOptions(), txn_opt);
- return reinterpret_cast<PessimisticTransaction*>(txn);
- }
-
- protected:
- Env* env_;
- std::shared_ptr<LockManager> locker_;
- bool use_range_locking = false;
- std::shared_ptr<RangeLockManagerHandle> range_lock_mgr;
-
- // With Range Locking, functions like GetLockStatusData() return range
- // endpoints, not the keys themselves. This function extracts the key.
- std::string key_value(const std::string& s) {
- if (use_range_locking)
- return s.substr(1);
- else
- return s;
- }
-
- private:
- std::string db_dir_;
- std::shared_ptr<TransactionDBMutexFactory> mutex_factory_;
- TransactionDB* db_;
-};
-
-class AnyLockManagerTest : public PointLockManagerTest,
- public testing::WithParamInterface<bool> {
- public:
- AnyLockManagerTest() { use_range_locking = GetParam(); }
-};
-
// This test is not applicable for Range Lock manager as Range Lock Manager
// operates on Column Families, not their ids.
TEST_F(PointLockManagerTest, LockNonExistingColumnFamily) {
@@ -158,13 +40,13 @@ TEST_F(PointLockManagerTest, LockStatus) {
ASSERT_EQ(s.count(cf_id), 2u);
auto range = s.equal_range(cf_id);
for (auto it = range.first; it != range.second; it++) {
- ASSERT_TRUE(key_value(it->second.key) == "k1" ||
- key_value(it->second.key) == "k2");
- if (key_value(it->second.key) == "k1") {
+ ASSERT_TRUE(it->second.key == "k1" ||
+ it->second.key == "k2");
+ if (it->second.key == "k1") {
ASSERT_EQ(it->second.exclusive, true);
ASSERT_EQ(it->second.ids.size(), 1u);
ASSERT_EQ(it->second.ids[0], txn1->GetID());
- } else if (key_value(it->second.key) == "k2") {
+ } else if (it->second.key == "k2") {
ASSERT_EQ(it->second.exclusive, false);
ASSERT_EQ(it->second.ids.size(), 1u);
ASSERT_EQ(it->second.ids[0], txn2->GetID());
@@ -218,188 +100,6 @@ TEST_F(PointLockManagerTest, UnlockShared) {
delete txn2;
}
-TEST_P(AnyLockManagerTest, ReentrantExclusiveLock) {
- // Tests that a txn can acquire exclusive lock on the same key repeatedly.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- auto txn = NewTxn();
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
-
- // Cleanup
- locker_->UnLock(txn, 1, "k", env_);
-
- delete txn;
-}
-
-TEST_P(AnyLockManagerTest, ReentrantSharedLock) {
- // Tests that a txn can acquire shared lock on the same key repeatedly.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- auto txn = NewTxn();
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
-
- // Cleanup
- locker_->UnLock(txn, 1, "k", env_);
-
- delete txn;
-}
-
-TEST_P(AnyLockManagerTest, LockUpgrade) {
- // Tests that a txn can upgrade from a shared lock to an exclusive lock.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- auto txn = NewTxn();
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
-
- // Cleanup
- locker_->UnLock(txn, 1, "k", env_);
- delete txn;
-}
-
-TEST_P(AnyLockManagerTest, LockDowngrade) {
- // Tests that a txn can acquire a shared lock after acquiring an exclusive
- // lock on the same key.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- auto txn = NewTxn();
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
- ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
-
- // Cleanup
- locker_->UnLock(txn, 1, "k", env_);
- delete txn;
-}
-
-TEST_P(AnyLockManagerTest, LockConflict) {
- // Tests that lock conflicts lead to lock timeout.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- auto txn1 = NewTxn();
- auto txn2 = NewTxn();
-
- {
- // exclusive-exclusive conflict.
- ASSERT_OK(locker_->TryLock(txn1, 1, "k1", env_, true));
- auto s = locker_->TryLock(txn2, 1, "k1", env_, true);
- ASSERT_TRUE(s.IsTimedOut());
- }
-
- {
- // exclusive-shared conflict.
- ASSERT_OK(locker_->TryLock(txn1, 1, "k2", env_, true));
- auto s = locker_->TryLock(txn2, 1, "k2", env_, false);
- ASSERT_TRUE(s.IsTimedOut());
- }
-
- {
- // shared-exclusive conflict.
- ASSERT_OK(locker_->TryLock(txn1, 1, "k2", env_, false));
- auto s = locker_->TryLock(txn2, 1, "k2", env_, true);
- ASSERT_TRUE(s.IsTimedOut());
- }
-
- // Cleanup
- locker_->UnLock(txn1, 1, "k1", env_);
- locker_->UnLock(txn1, 1, "k2", env_);
-
- delete txn1;
- delete txn2;
-}
-
-port::Thread BlockUntilWaitingTxn(bool use_range_locking,
- std::function<void()> f) {
- std::atomic<bool> reached(false);
- const char* sync_point_name =
- use_range_locking ? "RangeTreeLockManager::TryRangeLock:WaitingTxn"
- : "PointLockManager::AcquireWithTimeout:WaitingTxn";
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- sync_point_name, [&](void* /*arg*/) { reached.store(true); });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
-
- port::Thread t(f);
-
- while (!reached.load()) {
- std::this_thread::sleep_for(std::chrono::milliseconds(100));
- }
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
-
- return t;
-}
-
-TEST_P(AnyLockManagerTest, SharedLocks) {
- // Tests that shared locks can be concurrently held by multiple transactions.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- auto txn1 = NewTxn();
- auto txn2 = NewTxn();
- ASSERT_OK(locker_->TryLock(txn1, 1, "k", env_, false));
- ASSERT_OK(locker_->TryLock(txn2, 1, "k", env_, false));
-
- // Cleanup
- locker_->UnLock(txn1, 1, "k", env_);
- locker_->UnLock(txn2, 1, "k", env_);
-
- delete txn1;
- delete txn2;
-}
-
-TEST_P(AnyLockManagerTest, Deadlock) {
- // Tests that deadlock can be detected.
- // Deadlock scenario:
- // txn1 exclusively locks k1, and wants to lock k2;
- // txn2 exclusively locks k2, and wants to lock k1.
- MockColumnFamilyHandle cf(1);
- locker_->AddColumnFamily(&cf);
- TransactionOptions txn_opt;
- txn_opt.deadlock_detect = true;
- txn_opt.lock_timeout = 1000000;
- auto txn1 = NewTxn(txn_opt);
- auto txn2 = NewTxn(txn_opt);
-
- ASSERT_OK(locker_->TryLock(txn1, 1, "k1", env_, true));
- ASSERT_OK(locker_->TryLock(txn2, 1, "k2", env_, true));
-
- // txn1 tries to lock k2, will block forever.
- port::Thread t = BlockUntilWaitingTxn(use_range_locking, [&]() {
- // block because txn2 is holding a lock on k2.
- locker_->TryLock(txn1, 1, "k2", env_, true);
- });
-
- auto s = locker_->TryLock(txn2, 1, "k1", env_, true);
- ASSERT_TRUE(s.IsBusy());
- ASSERT_EQ(s.subcode(), Status::SubCode::kDeadlock);
-
- std::vector<DeadlockPath> deadlock_paths = locker_->GetDeadlockInfoBuffer();
- ASSERT_EQ(deadlock_paths.size(), 1u);
- ASSERT_FALSE(deadlock_paths[0].limit_exceeded);
-
- std::vector<DeadlockInfo> deadlocks = deadlock_paths[0].path;
- ASSERT_EQ(deadlocks.size(), 2u);
-
- ASSERT_EQ(deadlocks[0].m_txn_id, txn1->GetID());
- ASSERT_EQ(deadlocks[0].m_cf_id, 1u);
- ASSERT_TRUE(deadlocks[0].m_exclusive);
- ASSERT_EQ(key_value(deadlocks[0].m_waiting_key), "k2");
-
- ASSERT_EQ(deadlocks[1].m_txn_id, txn2->GetID());
- ASSERT_EQ(deadlocks[1].m_cf_id, 1u);
- ASSERT_TRUE(deadlocks[1].m_exclusive);
- ASSERT_EQ(key_value(deadlocks[1].m_waiting_key), "k1");
-
- locker_->UnLock(txn2, 1, "k2", env_);
- t.join();
-
- // Cleanup
- locker_->UnLock(txn1, 1, "k1", env_);
- locker_->UnLock(txn1, 1, "k2", env_);
- delete txn2;
- delete txn1;
-}
-
// This test doesn't work with Range Lock Manager, because Range Lock Manager
// doesn't support deadlock_detect_depth.
@@ -427,7 +127,7 @@ TEST_F(PointLockManagerTest, DeadlockDepthExceeded) {
// it must have another txn waiting on it, which is txn4 in this case.
ASSERT_OK(locker_->TryLock(txn1, 1, "k1", env_, true));
- port::Thread t1 = BlockUntilWaitingTxn(false, [&]() {
+ port::Thread t1 = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() {
ASSERT_OK(locker_->TryLock(txn2, 1, "k2", env_, true));
// block because txn1 is holding a lock on k1.
locker_->TryLock(txn2, 1, "k1", env_, true);
@@ -435,7 +135,7 @@ TEST_F(PointLockManagerTest, DeadlockDepthExceeded) {
ASSERT_OK(locker_->TryLock(txn3, 1, "k3", env_, true));
- port::Thread t2 = BlockUntilWaitingTxn(false, [&]() {
+ port::Thread t2 = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() {
// block because txn3 is holding a lock on k1.
locker_->TryLock(txn4, 1, "k3", env_, true);
});
@@ -459,12 +159,8 @@ TEST_F(PointLockManagerTest, DeadlockDepthExceeded) {
delete txn1;
}
-#ifdef ENABLE_RANGE_LOCKING_TESTS
-INSTANTIATE_TEST_CASE_P(AnyLockManager, AnyLockManagerTest, ::testing::Bool());
-#else
-INSTANTIATE_TEST_CASE_P(AnyLockManager, AnyLockManagerTest,
- ::testing::Values(false));
-#endif
+INSTANTIATE_TEST_CASE_P(PointLockManager, AnyLockManagerTest,
+ ::testing::Values(nullptr));
} // namespace ROCKSDB_NAMESPACE
diff --git a/utilities/transactions/lock/point/point_lock_manager_test.h b/utilities/transactions/lock/point/point_lock_manager_test.h
new file mode 100644
index 000000000..b6534b5ca
--- /dev/null
+++ b/utilities/transactions/lock/point/point_lock_manager_test.h
@@ -0,0 +1,281 @@
+
+#include "utilities/transactions/lock/point/point_lock_manager.h"
+
+#include "file/file_util.h"
+#include "port/port.h"
+#include "port/stack_trace.h"
+#include "rocksdb/utilities/transaction_db.h"
+#include "test_util/testharness.h"
+#include "test_util/testutil.h"
+#include "utilities/transactions/pessimistic_transaction_db.h"
+
+#include "utilities/transactions/transaction_db_mutex_impl.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+class MockColumnFamilyHandle : public ColumnFamilyHandle {
+ public:
+ explicit MockColumnFamilyHandle(ColumnFamilyId cf_id) : cf_id_(cf_id) {}
+
+ ~MockColumnFamilyHandle() override {}
+
+ const std::string& GetName() const override { return name_; }
+
+ ColumnFamilyId GetID() const override { return cf_id_; }
+
+ Status GetDescriptor(ColumnFamilyDescriptor*) override {
+ return Status::OK();
+ }
+
+ const Comparator* GetComparator() const override {
+ return BytewiseComparator();
+ }
+
+ private:
+ ColumnFamilyId cf_id_;
+ std::string name_ = "MockCF";
+};
+
+class PointLockManagerTest : public testing::Test {
+ public:
+ void SetUp() override {
+ env_ = Env::Default();
+ db_dir_ = test::PerThreadDBPath("point_lock_manager_test");
+ ASSERT_OK(env_->CreateDir(db_dir_));
+
+ Options opt;
+ opt.create_if_missing = true;
+ TransactionDBOptions txn_opt;
+ txn_opt.transaction_lock_timeout = 0;
+
+ ASSERT_OK(TransactionDB::Open(opt, txn_opt, db_dir_, &db_));
+
+ // CAUTION: This test creates a separate lock manager object (right, NOT
+ // the one that the TransactionDB is using!), and runs tests on it.
+ locker_.reset(new PointLockManager(
+ static_cast<PessimisticTransactionDB*>(db_), txn_opt));
+
+ wait_sync_point_name_ = "PointLockManager::AcquireWithTimeout:WaitingTxn";
+ }
+
+ void TearDown() override {
+ delete db_;
+ EXPECT_OK(DestroyDir(env_, db_dir_));
+ }
+
+ PessimisticTransaction* NewTxn(
+ TransactionOptions txn_opt = TransactionOptions()) {
+ Transaction* txn = db_->BeginTransaction(WriteOptions(), txn_opt);
+ return reinterpret_cast<PessimisticTransaction*>(txn);
+ }
+
+ protected:
+ Env* env_;
+ std::shared_ptr<LockManager> locker_;
+ const char *wait_sync_point_name_;
+ friend void PointLockManagerTestExternalSetup(PointLockManagerTest*);
+
+ private:
+ std::string db_dir_;
+ TransactionDB* db_;
+};
+
+
+typedef void (*init_func_t)(PointLockManagerTest*);
+
+class AnyLockManagerTest : public PointLockManagerTest,
+ public testing::WithParamInterface<init_func_t>
+{
+public:
+ void SetUp() override {
+ // If a custom setup function was provided, use it. Otherwise, use what we
+ // have inherited.
+ auto init_func = GetParam();
+ if (init_func)
+ (*init_func)(this);
+ else
+ PointLockManagerTest::SetUp();
+ }
+};
+
+TEST_P(AnyLockManagerTest, ReentrantExclusiveLock) {
+ // Tests that a txn can acquire exclusive lock on the same key repeatedly.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ auto txn = NewTxn();
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
+
+ // Cleanup
+ locker_->UnLock(txn, 1, "k", env_);
+
+ delete txn;
+}
+
+TEST_P(AnyLockManagerTest, ReentrantSharedLock) {
+ // Tests that a txn can acquire shared lock on the same key repeatedly.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ auto txn = NewTxn();
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
+
+ // Cleanup
+ locker_->UnLock(txn, 1, "k", env_);
+
+ delete txn;
+}
+
+TEST_P(AnyLockManagerTest, LockUpgrade) {
+ // Tests that a txn can upgrade from a shared lock to an exclusive lock.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ auto txn = NewTxn();
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
+
+ // Cleanup
+ locker_->UnLock(txn, 1, "k", env_);
+ delete txn;
+}
+
+TEST_P(AnyLockManagerTest, LockDowngrade) {
+ // Tests that a txn can acquire a shared lock after acquiring an exclusive
+ // lock on the same key.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ auto txn = NewTxn();
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, true));
+ ASSERT_OK(locker_->TryLock(txn, 1, "k", env_, false));
+
+ // Cleanup
+ locker_->UnLock(txn, 1, "k", env_);
+ delete txn;
+}
+
+TEST_P(AnyLockManagerTest, LockConflict) {
+ // Tests that lock conflicts lead to lock timeout.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ auto txn1 = NewTxn();
+ auto txn2 = NewTxn();
+
+ {
+ // exclusive-exclusive conflict.
+ ASSERT_OK(locker_->TryLock(txn1, 1, "k1", env_, true));
+ auto s = locker_->TryLock(txn2, 1, "k1", env_, true);
+ ASSERT_TRUE(s.IsTimedOut());
+ }
+
+ {
+ // exclusive-shared conflict.
+ ASSERT_OK(locker_->TryLock(txn1, 1, "k2", env_, true));
+ auto s = locker_->TryLock(txn2, 1, "k2", env_, false);
+ ASSERT_TRUE(s.IsTimedOut());
+ }
+
+ {
+ // shared-exclusive conflict.
+ ASSERT_OK(locker_->TryLock(txn1, 1, "k2", env_, false));
+ auto s = locker_->TryLock(txn2, 1, "k2", env_, true);
+ ASSERT_TRUE(s.IsTimedOut());
+ }
+
+ // Cleanup
+ locker_->UnLock(txn1, 1, "k1", env_);
+ locker_->UnLock(txn1, 1, "k2", env_);
+
+ delete txn1;
+ delete txn2;
+}
+
+port::Thread BlockUntilWaitingTxn(const char *sync_point_name,
+ std::function<void()> f) {
+ std::atomic<bool> reached(false);
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
+ sync_point_name, [&](void* /*arg*/) { reached.store(true); });
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
+
+ port::Thread t(f);
+
+ while (!reached.load()) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(100));
+ }
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
+
+ return t;
+}
+
+TEST_P(AnyLockManagerTest, SharedLocks) {
+ // Tests that shared locks can be concurrently held by multiple transactions.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ auto txn1 = NewTxn();
+ auto txn2 = NewTxn();
+ ASSERT_OK(locker_->TryLock(txn1, 1, "k", env_, false));
+ ASSERT_OK(locker_->TryLock(txn2, 1, "k", env_, false));
+
+ // Cleanup
+ locker_->UnLock(txn1, 1, "k", env_);
+ locker_->UnLock(txn2, 1, "k", env_);
+
+ delete txn1;
+ delete txn2;
+}
+
+TEST_P(AnyLockManagerTest, Deadlock) {
+ // Tests that deadlock can be detected.
+ // Deadlock scenario:
+ // txn1 exclusively locks k1, and wants to lock k2;
+ // txn2 exclusively locks k2, and wants to lock k1.
+ MockColumnFamilyHandle cf(1);
+ locker_->AddColumnFamily(&cf);
+ TransactionOptions txn_opt;
+ txn_opt.deadlock_detect = true;
+ txn_opt.lock_timeout = 1000000;
+ auto txn1 = NewTxn(txn_opt);
+ auto txn2 = NewTxn(txn_opt);
+
+ ASSERT_OK(locker_->TryLock(txn1, 1, "k1", env_, true));
+ ASSERT_OK(locker_->TryLock(txn2, 1, "k2", env_, true));
+
+ // txn1 tries to lock k2, will block forever.
+ port::Thread t = BlockUntilWaitingTxn(wait_sync_point_name_, [&]() {
+ // block because txn2 is holding a lock on k2.
+ locker_->TryLock(txn1, 1, "k2", env_, true);
+ });
+
+ auto s = locker_->TryLock(txn2, 1, "k1", env_, true);
+ ASSERT_TRUE(s.IsBusy());
+ ASSERT_EQ(s.subcode(), Status::SubCode::kDeadlock);
+
+ std::vector<DeadlockPath> deadlock_paths = locker_->GetDeadlockInfoBuffer();
+ ASSERT_EQ(deadlock_paths.size(), 1u);
+ ASSERT_FALSE(deadlock_paths[0].limit_exceeded);
+
+ std::vector<DeadlockInfo> deadlocks = deadlock_paths[0].path;
+ ASSERT_EQ(deadlocks.size(), 2u);
+
+ ASSERT_EQ(deadlocks[0].m_txn_id, txn1->GetID());
+ ASSERT_EQ(deadlocks[0].m_cf_id, 1u);
+ ASSERT_TRUE(deadlocks[0].m_exclusive);
+ ASSERT_EQ(deadlocks[0].m_waiting_key, "k2");
+
+ ASSERT_EQ(deadlocks[1].m_txn_id, txn2->GetID());
+ ASSERT_EQ(deadlocks[1].m_cf_id, 1u);
+ ASSERT_TRUE(deadlocks[1].m_exclusive);
+ ASSERT_EQ(deadlocks[1].m_waiting_key, "k1");
+
+ locker_->UnLock(txn2, 1, "k2", env_);
+ t.join();
+
+ // Cleanup
+ locker_->UnLock(txn1, 1, "k1", env_);
+ locker_->UnLock(txn1, 1, "k2", env_);
+ delete txn2;
+ delete txn1;
+}
+
+} // namespace ROCKSDB_NAMESPACE
+
diff --git a/utilities/transactions/lock/range/range_locking_test.cc b/utilities/transactions/lock/range/range_locking_test.cc
index d0f3312d8..48d96d7d0 100644
--- a/utilities/transactions/lock/range/range_locking_test.cc
+++ b/utilities/transactions/lock/range/range_locking_test.cc
@@ -20,6 +20,8 @@
#include "utilities/transactions/pessimistic_transaction_db.h"
#include "utilities/transactions/transaction_test.h"
+#include "utilities/transactions/lock/point/point_lock_manager_test.h"
+
using std::string;
namespace ROCKSDB_NAMESPACE {
@@ -219,9 +221,35 @@ TEST_F(RangeLockingTest, MultipleTrxLockStatusData) {
delete txn1;
}
+
+void PointLockManagerTestExternalSetup(PointLockManagerTest* self)
+{
+ self->env_ = Env::Default();
+ self->db_dir_ = test::PerThreadDBPath("point_lock_manager_test");
+ ASSERT_OK(self->env_->CreateDir(self->db_dir_));
+
+ Options opt;
+ opt.create_if_missing = true;
+ TransactionDBOptions txn_opt;
+ txn_opt.transaction_lock_timeout = 0;
+
+ auto mutex_factory = std::make_shared<TransactionDBMutexFactoryImpl>();
+ self->locker_.reset(NewRangeLockManager(mutex_factory)->getLockManager());
+ std::shared_ptr<RangeLockManagerHandle> range_lock_mgr =
+ std::dynamic_pointer_cast<RangeLockManagerHandle>(self->locker_);
+ txn_opt.lock_mgr_handle = range_lock_mgr;
+
+ ASSERT_OK(TransactionDB::Open(opt, txn_opt, self->db_dir_, &self->db_));
+ self->wait_sync_point_name_ = "RangeTreeLockManager::TryRangeLock:WaitingTxn";
+}
+
+INSTANTIATE_TEST_CASE_P(RangeLockManager, AnyLockManagerTest,
+ ::testing::Values(PointLockManagerTestExternalSetup));
+
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
+ ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
diff --git a/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc b/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc
index b8650fa0e..471eb42b7 100644
--- a/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc
+++ b/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc
@@ -179,8 +179,7 @@ bool lock_request::deadlock_exists(const txnid_set &conflicts) {
lock_request *req = find_lock_request(a);
if (req) {
m_deadlock_cb(req->m_txnid, (req->m_type == lock_request::WRITE),
- std::string((const char *)req->m_left_key->data,
- req->m_left_key->size));
+ req->m_left_key,req->m_right_key);
}
};
}
diff --git a/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h b/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h
index 8fdd92fef..f2e8bcb54 100644
--- a/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h
+++ b/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.h
@@ -226,7 +226,7 @@ class lock_request {
void (*m_retry_test_callback)(void);
public:
- std::function<void(TXNID, bool, std::string)> m_deadlock_cb;
+ std::function<void(TXNID, bool, const DBT*, const DBT*)> m_deadlock_cb;
friend class lock_request_unit_test;
};
diff --git a/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc b/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc
index f28c85c1e..7e039083d 100644
--- a/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc
+++ b/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc
@@ -85,16 +85,26 @@ Status RangeTreeLockManager::TryLock(PessimisticTransaction* txn,
exclusive ? toku::lock_request::WRITE : toku::lock_request::READ,
false /* not a big txn */, (void*)wait_txn_id);
- uint64_t killed_time_msec = 0; // TODO: what should this have?
+ // This is for "periodically wake up and check if the wait is killed" feature
+ // which we are not using.
+ uint64_t killed_time_msec = 0;
uint64_t wait_time_msec = txn->GetLockTimeout();
+
// convert microseconds to milliseconds
if (wait_time_msec != (uint64_t)-1)
wait_time_msec = (wait_time_msec + 500) / 1000;
- std::vector<DeadlockInfo> di_path;
- request.m_deadlock_cb = [&](TXNID txnid, bool is_exclusive, std::string key) {
+ std::vector<RangeDeadlockInfo> di_path;
+ request.m_deadlock_cb = [&](TXNID txnid, bool is_exclusive,
+ const DBT *start_dbt, const DBT *end_dbt) {
+ EndpointWithString start;
+ EndpointWithString end;
+ deserialize_endpoint(start_dbt, &start);
+ deserialize_endpoint(end_dbt, &end);
+
di_path.push_back({((PessimisticTransaction*)txnid)->GetID(),
- column_family_id, is_exclusive, key});
+ column_family_id, is_exclusive, std::move(start),
+ std::move(end)});
};
request.start();
@@ -151,7 +161,8 @@ Status RangeTreeLockManager::TryLock(PessimisticTransaction* txn,
return Status::Busy(Status::SubCode::kLockLimit);
case DB_LOCK_DEADLOCK: {
std::reverse(di_path.begin(), di_path.end());
- dlock_buffer_.AddNewPath(DeadlockPath(di_path, request.get_start_time()));
+ dlock_buffer_.AddNewPath(RangeDeadlockPath(di_path,
+ request.get_start_time()));
return Status::Busy(Status::SubCode::kDeadlock);
}
default:
@@ -320,14 +331,35 @@ RangeTreeLockManager::RangeTreeLockManager(
ltm_.create(on_create, on_destroy, on_escalate, NULL, mutex_factory_);
}
-void RangeTreeLockManager::Resize(uint32_t target_size) {
+void RangeTreeLockManager::SetRangeDeadlockInfoBufferSize(uint32_t target_size) {
dlock_buffer_.Resize(target_size);
}
-std::vector<DeadlockPath> RangeTreeLockManager::GetDeadlockInfoBuffer() {
+void RangeTreeLockManager::Resize(uint32_t target_size) {
+ SetRangeDeadlockInfoBufferSize(target_size);
+}
+
+std::vector<RangeDeadlockPath> RangeTreeLockManager::GetRangeDeadlockInfoBuffer() {
return dlock_buffer_.PrepareBuffer();
}
+std::vector<DeadlockPath> RangeTreeLockManager::GetDeadlockInfoBuffer() {
+ std::vector<DeadlockPath> res;
+ std::vector<RangeDeadlockPath> data = GetRangeDeadlockInfoBuffer();
+ // report left endpoints
+ for (auto it = data.begin(); it != data.end(); ++it) {
+ std::vector<DeadlockInfo> path;
+
+ for (auto it2 = it->path.begin(); it2 != it->path.end(); ++it2) {
+ path.push_back({it2->m_txn_id, it2->m_cf_id, it2->m_exclusive,
+ it2->m_start.slice});
+ }
+ res.push_back(DeadlockPath(path, it->deadlock_time));
+ }
+ return res;
+}
+
+
/*
@brief Lock Escalation Callback function
@@ -496,20 +528,18 @@ struct LOCK_PRINT_CONTEXT {
uint32_t cfh_id; // Column Family whose tree we are traversing
};
-/*
-For reporting point locks:
- struct KeyLockInfo info;
-
- info.key.append((const char*)left->data, (size_t)left->size);
- info.exclusive = !is_shared;
- if (!(left->size == right->size &&
- !memcmp(left->data, right->data, left->size))) {
- // not a single-point lock
- info.has_key2 = true;
- info.key2.append((const char*)right->data, right->size);
+// Report left endpoints of the acquired locks
+LockManager::PointLockStatus RangeTreeLockManager::GetPointLockStatus() {
+ PointLockStatus res;
+ LockManager::RangeLockStatus data = GetRangeLockStatus();
+ // report left endpoints
+ for (auto it = data.begin(); it != data.end(); ++it) {
+ auto& val = it->second;
+ res.insert({ it->first, { val.start.slice, val.ids, val.exclusive}});
}
-*/
+ return res;
+}
static void push_into_lock_status_data(void* param, const DBT* left,
const DBT* right, TXNID txnid_arg,
diff --git a/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h b/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h
index 8511865ec..b53fe90a1 100644
--- a/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h
+++ b/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h
@@ -19,6 +19,8 @@ namespace ROCKSDB_NAMESPACE {
using namespace toku;
+typedef DeadlockInfoBufferTempl<RangeDeadlockPath> RangeDeadlockInfoBuffer;
+
/*
A Range Lock Manager that uses PerconaFT's locktree library
*/
@@ -30,10 +32,12 @@ class RangeTreeLockManager : public RangeLockManagerBase,
void AddColumnFamily(const ColumnFamilyHandle* cfh) override;
void RemoveColumnFamily(const ColumnFamilyHandle* cfh) override;
- // Resize the deadlock-info buffer, does nothing currently
void Resize(uint32_t) override;
std::vector<DeadlockPath> GetDeadlockInfoBuffer() override;
+ std::vector<RangeDeadlockPath> GetRangeDeadlockInfoBuffer() override;
+ void SetRangeDeadlockInfoBufferSize(uint32_t target_size) override;
+
// Get a lock on a range
// @note only exclusive locks are currently supported (requesting a
// non-exclusive lock will get an exclusive one)
@@ -66,15 +70,10 @@ class RangeTreeLockManager : public RangeLockManagerBase,
bool IsPointLockSupported() const override {
// One could have acquired a point lock (it is reduced to range lock)
- // but This doesn't mean that one could not have acquired point locks.
- // this means we can't implement GetPointLockStatus()
- return false;
+ return true;
}
- PointLockStatus GetPointLockStatus() override {
- // No point locks
- return {};
- }
+ PointLockStatus GetPointLockStatus() override;
// This is from LockManager
LockManager::RangeLockStatus GetRangeLockStatus() override;
@@ -107,7 +106,7 @@ class RangeTreeLockManager : public RangeLockManagerBase,
// (uses the same approach as TransactionLockMgr::lock_maps_cache_)
std::unique_ptr<ThreadLocalPtr> ltree_lookup_cache_;
- DeadlockInfoBuffer dlock_buffer_;
+ RangeDeadlockInfoBuffer dlock_buffer_;
// Get the lock tree which stores locks for Column Family with given cf_id
toku::locktree* get_locktree_by_cfid(uint32_t cf_id);
diff --git a/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h b/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h
index 670417031..8e9e0852d 100644
--- a/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h
+++ b/utilities/transactions/lock/range/range_tree/range_tree_lock_tracker.h
@@ -80,7 +80,10 @@ class RangeTreeLockTracker : public LockTracker {
void Track(const PointLockRequest&) override;
void Track(const RangeLockRequest&) override;
- bool IsPointLockSupported() const override { return false; }
+ bool IsPointLockSupported() const override {
+ // This indicates that we don't implement GetPointLockStatus()
+ return false;
+ }
bool IsRangeLockSupported() const override { return true; }
// a Not-supported dummy implementation.
1
0
[Commits] 9892a46: MDEV-19179 Regression: SELECT ... UNION ... with inconsistent column names fails
by IgorBabaev 13 Nov '20
by IgorBabaev 13 Nov '20
13 Nov '20
revision-id: 9892a4698fa9fe1b3424efa431b75989d3e429a5 (mariadb-10.2.31-580-g9892a46)
parent(s): 984a06db2ce2b2e3c7c5028245905417f2141cd7
author: Igor Babaev
committer: Igor Babaev
timestamp: 2020-11-12 21:02:07 -0800
message:
MDEV-19179 Regression: SELECT ... UNION ... with inconsistent column names fails
A bogus error message was issued when a condition was pushed into a
materialized derived table or view specified as union of selects with
aggregation when the corresponding columns of the selects had different
names. This happened because the expression pushed into having clauses of
the selects was adjusted for the names of the first select of the union.
The easiest solution was to rename the columns of the other selects to be
name compatible with the columns of the first select.
---
mysql-test/r/derived_cond_pushdown.result | 41 +++++++++++++++++++++++++++++++
mysql-test/t/derived_cond_pushdown.test | 28 +++++++++++++++++++++
sql/sql_derived.cc | 25 +++++++++++++++++--
3 files changed, 92 insertions(+), 2 deletions(-)
diff --git a/mysql-test/r/derived_cond_pushdown.result b/mysql-test/r/derived_cond_pushdown.result
index d4e8fef..25237aa 100644
--- a/mysql-test/r/derived_cond_pushdown.result
+++ b/mysql-test/r/derived_cond_pushdown.result
@@ -10593,4 +10593,45 @@ a
abc
DROP VIEW v1;
DROP TABLE t1;
+#
+# MDEV-19179: pushdown into UNION of aggregation selects whose
+# corresponding columns have different names
+#
+create table t1 (a int);
+insert into t1 values (3), (7), (1);
+select *
+from (select min(a) as x from t1 union all select max(a) as y from t1) t
+where x>0;
+x
+1
+7
+explain extended select *
+from (select min(a) as x from t1 union all select max(a) as y from t1) t
+where x>0;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 6 100.00 Using where
+2 DERIVED t1 ALL NULL NULL NULL NULL 3 100.00
+3 UNION t1 ALL NULL NULL NULL NULL 3 100.00
+Warnings:
+Note 1003 select `t`.`x` AS `x` from (select min(`test`.`t1`.`a`) AS `x` from `test`.`t1` having `x` > 0 union all select max(`test`.`t1`.`a`) AS `x` from `test`.`t1` having `x` > 0) `t` where `t`.`x` > 0
+prepare stmt from "select *
+from (select min(a) as x from t1 union all select max(a) as y from t1) t
+where x>0";
+execute stmt;
+x
+1
+7
+execute stmt;
+x
+1
+7
+deallocate prepare stmt;
+create view v1(m) as
+select min(a) as x from t1 union all select max(a) as y from t1;
+select * from v1 where m > 0;
+m
+1
+7
+drop view v1;
+drop table t1;
# End of 10.2 tests
diff --git a/mysql-test/t/derived_cond_pushdown.test b/mysql-test/t/derived_cond_pushdown.test
index a7df65f..31b4904 100644
--- a/mysql-test/t/derived_cond_pushdown.test
+++ b/mysql-test/t/derived_cond_pushdown.test
@@ -2184,4 +2184,32 @@ SELECT * FROM v1 WHERE IF( a REGEXP 'def', 'foo', a ) IN ('abc', 'foobar');
DROP VIEW v1;
DROP TABLE t1;
+--echo #
+--echo # MDEV-19179: pushdown into UNION of aggregation selects whose
+--echo # corresponding columns have different names
+--echo #
+
+create table t1 (a int);
+insert into t1 values (3), (7), (1);
+
+let $q=
+select *
+from (select min(a) as x from t1 union all select max(a) as y from t1) t
+where x>0;
+
+eval $q;
+eval explain extended $q;
+
+eval prepare stmt from "$q";
+execute stmt;
+execute stmt;
+deallocate prepare stmt;
+
+create view v1(m) as
+select min(a) as x from t1 union all select max(a) as y from t1;
+select * from v1 where m > 0;
+
+drop view v1;
+drop table t1;
+
--echo # End of 10.2 tests
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index 39499e6..984044c 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -1199,7 +1199,8 @@ bool pushdown_cond_for_derived(THD *thd, Item *cond, TABLE_LIST *derived)
DBUG_RETURN(false);
st_select_lex_unit *unit= derived->get_unit();
- st_select_lex *sl= unit->first_select();
+ st_select_lex *first_sl= unit->first_select();
+ st_select_lex *sl= first_sl;
if (derived->prohibit_cond_pushdown)
DBUG_RETURN(false);
@@ -1311,7 +1312,27 @@ bool pushdown_cond_for_derived(THD *thd, Item *cond, TABLE_LIST *derived)
if (!extracted_cond_copy)
continue;
}
-
+
+ /*
+ Rename the columns of all non-first selects of a union to be compatible
+ by names with the columns of the first select. It will allow to use copies
+ of the same expression pushed into having clauses of different selects.
+ */
+ if (sl != first_sl)
+ {
+ DBUG_ASSERT(sl->item_list.elements == first_sl->item_list.elements);
+ List_iterator_fast<Item> it(sl->item_list);
+ List_iterator_fast<Item> nm_it(first_sl->item_list);
+ Item * item;
+ char *name;
+ while((item= it++))
+ {
+ name= (nm_it++)->name;
+ item->set_name(thd, name, (uint) strlen(name), system_charset_info);
+ item->is_autogenerated_name= FALSE;
+ }
+ }
+
/*
Transform the references to the 'derived' columns from the condition
pushed into the having clause of sl to make them usable in the new context
1
0
[Commits] 2842692: MDEV-19179 Regression: SELECT ... UNION ... with inconsistent column names fails
by IgorBabaev 13 Nov '20
by IgorBabaev 13 Nov '20
13 Nov '20
revision-id: 284269212d2c114f3f81b0e18a6eeeaa9247bf08 (mariadb-10.2.31-580-g2842692)
parent(s): 984a06db2ce2b2e3c7c5028245905417f2141cd7
author: Igor Babaev
committer: Igor Babaev
timestamp: 2020-11-12 20:46:08 -0800
message:
MDEV-19179 Regression: SELECT ... UNION ... with inconsistent column names fails
A bogus error message was issued when a condition was pushed into the
having clause of materialized derived table or view specified as union
of selects when the corresponding columns of the selects had different
names. This happened because the pushed expression was adjusted for
the names of the first select of the union.
The easiest solution was to rename the columns of the other selects to be
name compatible with the columns of the first select.
---
mysql-test/r/derived_cond_pushdown.result | 41 +++++++++++++++++++++++++++++++
mysql-test/t/derived_cond_pushdown.test | 28 +++++++++++++++++++++
sql/sql_derived.cc | 25 +++++++++++++++++--
3 files changed, 92 insertions(+), 2 deletions(-)
diff --git a/mysql-test/r/derived_cond_pushdown.result b/mysql-test/r/derived_cond_pushdown.result
index d4e8fef..25237aa 100644
--- a/mysql-test/r/derived_cond_pushdown.result
+++ b/mysql-test/r/derived_cond_pushdown.result
@@ -10593,4 +10593,45 @@ a
abc
DROP VIEW v1;
DROP TABLE t1;
+#
+# MDEV-19179: pushdown into UNION of aggregation selects whose
+# corresponding columns have different names
+#
+create table t1 (a int);
+insert into t1 values (3), (7), (1);
+select *
+from (select min(a) as x from t1 union all select max(a) as y from t1) t
+where x>0;
+x
+1
+7
+explain extended select *
+from (select min(a) as x from t1 union all select max(a) as y from t1) t
+where x>0;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY <derived2> ALL NULL NULL NULL NULL 6 100.00 Using where
+2 DERIVED t1 ALL NULL NULL NULL NULL 3 100.00
+3 UNION t1 ALL NULL NULL NULL NULL 3 100.00
+Warnings:
+Note 1003 select `t`.`x` AS `x` from (select min(`test`.`t1`.`a`) AS `x` from `test`.`t1` having `x` > 0 union all select max(`test`.`t1`.`a`) AS `x` from `test`.`t1` having `x` > 0) `t` where `t`.`x` > 0
+prepare stmt from "select *
+from (select min(a) as x from t1 union all select max(a) as y from t1) t
+where x>0";
+execute stmt;
+x
+1
+7
+execute stmt;
+x
+1
+7
+deallocate prepare stmt;
+create view v1(m) as
+select min(a) as x from t1 union all select max(a) as y from t1;
+select * from v1 where m > 0;
+m
+1
+7
+drop view v1;
+drop table t1;
# End of 10.2 tests
diff --git a/mysql-test/t/derived_cond_pushdown.test b/mysql-test/t/derived_cond_pushdown.test
index a7df65f..31b4904 100644
--- a/mysql-test/t/derived_cond_pushdown.test
+++ b/mysql-test/t/derived_cond_pushdown.test
@@ -2184,4 +2184,32 @@ SELECT * FROM v1 WHERE IF( a REGEXP 'def', 'foo', a ) IN ('abc', 'foobar');
DROP VIEW v1;
DROP TABLE t1;
+--echo #
+--echo # MDEV-19179: pushdown into UNION of aggregation selects whose
+--echo # corresponding columns have different names
+--echo #
+
+create table t1 (a int);
+insert into t1 values (3), (7), (1);
+
+let $q=
+select *
+from (select min(a) as x from t1 union all select max(a) as y from t1) t
+where x>0;
+
+eval $q;
+eval explain extended $q;
+
+eval prepare stmt from "$q";
+execute stmt;
+execute stmt;
+deallocate prepare stmt;
+
+create view v1(m) as
+select min(a) as x from t1 union all select max(a) as y from t1;
+select * from v1 where m > 0;
+
+drop view v1;
+drop table t1;
+
--echo # End of 10.2 tests
diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc
index 39499e6..984044c 100644
--- a/sql/sql_derived.cc
+++ b/sql/sql_derived.cc
@@ -1199,7 +1199,8 @@ bool pushdown_cond_for_derived(THD *thd, Item *cond, TABLE_LIST *derived)
DBUG_RETURN(false);
st_select_lex_unit *unit= derived->get_unit();
- st_select_lex *sl= unit->first_select();
+ st_select_lex *first_sl= unit->first_select();
+ st_select_lex *sl= first_sl;
if (derived->prohibit_cond_pushdown)
DBUG_RETURN(false);
@@ -1311,7 +1312,27 @@ bool pushdown_cond_for_derived(THD *thd, Item *cond, TABLE_LIST *derived)
if (!extracted_cond_copy)
continue;
}
-
+
+ /*
+ Rename the columns of all non-first selects of a union to be compatible
+ by names with the columns of the first select. It will allow to use copies
+ of the same expression pushed into having clauses of different selects.
+ */
+ if (sl != first_sl)
+ {
+ DBUG_ASSERT(sl->item_list.elements == first_sl->item_list.elements);
+ List_iterator_fast<Item> it(sl->item_list);
+ List_iterator_fast<Item> nm_it(first_sl->item_list);
+ Item * item;
+ char *name;
+ while((item= it++))
+ {
+ name= (nm_it++)->name;
+ item->set_name(thd, name, (uint) strlen(name), system_charset_info);
+ item->is_autogenerated_name= FALSE;
+ }
+ }
+
/*
Transform the references to the 'derived' columns from the condition
pushed into the having clause of sl to make them usable in the new context
1
0
[Commits] 8f7e4642c56: MDEV-9750: Quick memory exhaustion with 'extended_keys=on' ...
by Sergei Petrunia 11 Nov '20
by Sergei Petrunia 11 Nov '20
11 Nov '20
revision-id: 8f7e4642c56bd03327947043c5d93a7cbfaaaed3 (mariadb-10.4.11-420-g8f7e4642c56)
parent(s): c2ac0ce1f02e3ae2b1de5c07ba40bed25c30dc40
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2020-11-11 23:26:54 +0300
message:
MDEV-9750: Quick memory exhaustion with 'extended_keys=on' ...
(Variant #2)
Do not produce SEL_ARG graphs that would yield huge numbers of ranges.
Introduce a concept of SEL_ARG graph's "weight". If we are about to
produce a graph whose "weight" exceeds the limit, remove the parts
of SEL_ARG graph that represent the biggest key parts. Do so until
the graph's is within the limit.
Variant #2: Don't call enforce_sel_arg_weight_limit() for sub-graphs,
as this has complicated semantics if the subgraph has shared
sub-sub-graphs. Instead, do pruning it only after we've constructed
the entire SEL_ARG graph.
---
mysql-test/main/range.result | 46 +++++++++
mysql-test/main/range.test | 34 +++++++
mysql-test/main/range_mrr_icp.result | 46 +++++++++
sql/opt_range.cc | 182 +++++++++++++++++++++++++++++++++--
sql/opt_range.h | 53 +++++++++-
5 files changed, 354 insertions(+), 7 deletions(-)
diff --git a/mysql-test/main/range.result b/mysql-test/main/range.result
index b708628b625..9800d931dd6 100644
--- a/mysql-test/main/range.result
+++ b/mysql-test/main/range.result
@@ -3135,6 +3135,52 @@ drop table t1,ten,t2;
#
# End of 10.2 tests
#
+#
+# MDEV-9750: Quick memory exhaustion with 'extended_keys=on'...
+#
+create table t1 (
+kp1 int,
+kp2 int,
+kp3 int,
+kp4 int,
+key key1(kp1, kp2, kp3,kp4)
+);
+insert into t1 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+set @tmp_9750=@@optimizer_trace;
+set optimizer_trace=1;
+explain select * from t1 where
+kp1 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
+kp2 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
+kp3 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
+kp4 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
+;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index key1 key1 20 NULL 3 Using where; Using index
+set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives'))
+from information_schema.optimizer_trace);
+# This will show 3-component ranges.
+# The ranges were produced, but the optimizer has cut away kp4
+# to keep the number of ranges at manageable level:
+select left(@json, 500);
+left(@json, 500)
+[
+
+ [
+
+ {
+ "index": "key1",
+ "ranges":
+ [
+ "(1,1,1) <= (kp1,kp2,kp3) <= (1,1,1)",
+ "(1,1,2) <= (kp1,kp2,kp3) <= (1,1,2)",
+ "(1,1,3) <= (kp1,kp2,kp3) <= (1,1,3)",
+ "(1,1,4) <= (kp1,kp2,kp3) <= (1,1,4)",
+ "(1,1,5) <= (kp1,kp2,kp3) <= (1,1,5)",
+ "(1,1,6) <= (kp1,kp2,kp3) <= (1,1,6)",
+ "(1,1,7) <= (kp1,kp2,kp3) <= (1,1,7)",
+ "
+set optimizer_trace=@tmp_9750;
+drop table t1;
set global innodb_stats_persistent= @innodb_stats_persistent_save;
set global innodb_stats_persistent_sample_pages=
@innodb_stats_persistent_sample_pages_save;
diff --git a/mysql-test/main/range.test b/mysql-test/main/range.test
index b5980a8f616..642ae3f8a08 100644
--- a/mysql-test/main/range.test
+++ b/mysql-test/main/range.test
@@ -2119,6 +2119,40 @@ drop table t1,ten,t2;
--echo # End of 10.2 tests
--echo #
+--echo #
+--echo # MDEV-9750: Quick memory exhaustion with 'extended_keys=on'...
+--echo #
+
+create table t1 (
+ kp1 int,
+ kp2 int,
+ kp3 int,
+ kp4 int,
+ key key1(kp1, kp2, kp3,kp4)
+);
+
+insert into t1 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+
+# 20 * 20 * 20 *20 = 400*400 = 160,000 ranges
+set @tmp_9750=@@optimizer_trace;
+set optimizer_trace=1;
+explain select * from t1 where
+ kp1 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
+ kp2 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
+ kp3 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
+ kp4 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
+;
+
+set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives'))
+ from information_schema.optimizer_trace);
+--echo # This will show 3-component ranges.
+--echo # The ranges were produced, but the optimizer has cut away kp4
+--echo # to keep the number of ranges at manageable level:
+select left(@json, 500);
+
+set optimizer_trace=@tmp_9750;
+drop table t1;
+
set global innodb_stats_persistent= @innodb_stats_persistent_save;
set global innodb_stats_persistent_sample_pages=
@innodb_stats_persistent_sample_pages_save;
diff --git a/mysql-test/main/range_mrr_icp.result b/mysql-test/main/range_mrr_icp.result
index 04c3ad2780d..128f23d71f6 100644
--- a/mysql-test/main/range_mrr_icp.result
+++ b/mysql-test/main/range_mrr_icp.result
@@ -3132,6 +3132,52 @@ drop table t1,ten,t2;
#
# End of 10.2 tests
#
+#
+# MDEV-9750: Quick memory exhaustion with 'extended_keys=on'...
+#
+create table t1 (
+kp1 int,
+kp2 int,
+kp3 int,
+kp4 int,
+key key1(kp1, kp2, kp3,kp4)
+);
+insert into t1 values (1,1,1,1),(2,2,2,2),(3,3,3,3);
+set @tmp_9750=@@optimizer_trace;
+set optimizer_trace=1;
+explain select * from t1 where
+kp1 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
+kp2 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
+kp3 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20) and
+kp4 in (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20)
+;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 index key1 key1 20 NULL 3 Using where; Using index
+set @json= (select json_detailed(JSON_EXTRACT(trace, '$**.range_scan_alternatives'))
+from information_schema.optimizer_trace);
+# This will show 3-component ranges.
+# The ranges were produced, but the optimizer has cut away kp4
+# to keep the number of ranges at manageable level:
+select left(@json, 500);
+left(@json, 500)
+[
+
+ [
+
+ {
+ "index": "key1",
+ "ranges":
+ [
+ "(1,1,1) <= (kp1,kp2,kp3) <= (1,1,1)",
+ "(1,1,2) <= (kp1,kp2,kp3) <= (1,1,2)",
+ "(1,1,3) <= (kp1,kp2,kp3) <= (1,1,3)",
+ "(1,1,4) <= (kp1,kp2,kp3) <= (1,1,4)",
+ "(1,1,5) <= (kp1,kp2,kp3) <= (1,1,5)",
+ "(1,1,6) <= (kp1,kp2,kp3) <= (1,1,6)",
+ "(1,1,7) <= (kp1,kp2,kp3) <= (1,1,7)",
+ "
+set optimizer_trace=@tmp_9750;
+drop table t1;
set global innodb_stats_persistent= @innodb_stats_persistent_save;
set global innodb_stats_persistent_sample_pages=
@innodb_stats_persistent_sample_pages_save;
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index eed7baab377..eb093e39011 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -399,6 +399,11 @@ static SEL_ARG *key_or(RANGE_OPT_PARAM *param,
static SEL_ARG *key_and(RANGE_OPT_PARAM *param,
SEL_ARG *key1, SEL_ARG *key2,
uint clone_flag);
+static SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param,
+ SEL_ARG *key1, SEL_ARG *key2);
+static SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param,
+ SEL_ARG *key1, SEL_ARG *key2,
+ uint clone_flag);
static bool get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1);
bool get_quick_keys(PARAM *param,QUICK_RANGE_SELECT *quick,KEY_PART *key,
SEL_ARG *key_tree, uchar *min_key,uint min_key_flag,
@@ -410,6 +415,8 @@ static bool null_part_in_key(KEY_PART *key_part, const uchar *key,
uint length);
static bool is_key_scan_ror(PARAM *param, uint keynr, uint8 nparts);
+static SEL_ARG *enforce_sel_arg_weight_limit(SEL_ARG *sel_arg);
+
#include "opt_range_mrr.cc"
static bool sel_trees_have_common_keys(SEL_TREE *tree1, SEL_TREE *tree2,
@@ -706,7 +713,7 @@ int SEL_IMERGE::or_sel_tree_with_checks(RANGE_OPT_PARAM *param,
SEL_ARG *key1= (*or_tree)->keys[key_no];
SEL_ARG *key2= tree->keys[key_no];
key2->incr_refs();
- if ((result->keys[key_no]= key_or(param, key1, key2)))
+ if ((result->keys[key_no]= key_or_with_limit(param, key1, key2)))
{
result_keys.set_bit(key_no);
@@ -1877,6 +1884,7 @@ SEL_ARG::SEL_ARG(SEL_ARG &arg) :Sql_alloc()
next_key_part=arg.next_key_part;
max_part_no= arg.max_part_no;
use_count=1; elements=1;
+ weight=1;
}
@@ -1893,7 +1901,7 @@ SEL_ARG::SEL_ARG(Field *f,const uchar *min_value_arg,
:min_flag(0), max_flag(0), maybe_flag(0), maybe_null(f->real_maybe_null()),
elements(1), use_count(1), field(f), min_value((uchar*) min_value_arg),
max_value((uchar*) max_value_arg), next(0),prev(0),
- next_key_part(0), color(BLACK), type(KEY_RANGE)
+ next_key_part(0), color(BLACK), type(KEY_RANGE), weight(1)
{
left=right= &null_element;
max_part_no= 1;
@@ -1905,7 +1913,7 @@ SEL_ARG::SEL_ARG(Field *field_,uint8 part_,
:min_flag(min_flag_),max_flag(max_flag_),maybe_flag(maybe_flag_),
part(part_),maybe_null(field_->real_maybe_null()), elements(1),use_count(1),
field(field_), min_value(min_value_), max_value(max_value_),
- next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE)
+ next(0),prev(0),next_key_part(0),color(BLACK),type(KEY_RANGE), weight(1)
{
max_part_no= part+1;
left=right= &null_element;
@@ -5432,7 +5440,7 @@ TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
if ((*tree)->keys[key_idx])
(*tree)->keys[key_idx]->incr_refs();
if (((*changed_tree)->keys[key_idx]=
- key_or(param, key, (*tree)->keys[key_idx])))
+ key_or_with_limit(param, key, (*tree)->keys[key_idx])))
(*changed_tree)->keys_map.set_bit(key_idx);
*tree= NULL;
removed_cnt++;
@@ -9094,7 +9102,7 @@ int and_range_trees(RANGE_OPT_PARAM *param, SEL_TREE *tree1, SEL_TREE *tree2,
key2->incr_refs();
}
SEL_ARG *key;
- if ((result->keys[key_no]= key =key_and(param, key1, key2, flag)))
+ if ((result->keys[key_no]= key =key_and_with_limit(param, key1, key2, flag)))
{
if (key && key->type == SEL_ARG::IMPOSSIBLE)
{
@@ -9637,7 +9645,7 @@ tree_or(RANGE_OPT_PARAM *param,SEL_TREE *tree1,SEL_TREE *tree2)
key1->incr_refs();
key2->incr_refs();
}
- if ((result->keys[key_no]= key_or(param, key1, key2)))
+ if ((result->keys[key_no]= key_or_with_limit(param, key1, key2)))
result->keys_map.set_bit(key_no);
}
result->type= tree1->type;
@@ -9723,6 +9731,8 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
key1->right= key1->left= &null_element;
key1->next= key1->prev= 0;
}
+ uint new_weight= 0;
+
for (next=key1->first(); next ; next=next->next)
{
if (next->next_key_part)
@@ -9734,18 +9744,23 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
continue;
}
next->next_key_part=tmp;
+ new_weight += 1 + tmp->weight;
if (use_count)
next->increment_use_count(use_count);
if (param->alloced_sel_args > SEL_ARG::MAX_SEL_ARGS)
break;
}
else
+ {
+ new_weight += 1 + key2->weight;
next->next_key_part=key2;
+ }
}
if (!key1)
return &null_element; // Impossible ranges
key1->use_count++;
key1->max_part_no= MY_MAX(key2->max_part_no, key2->part+1);
+ key1->weight= new_weight;
return key1;
}
@@ -9780,6 +9795,17 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
clone_flag=swap_clone_flag(clone_flag);
}
// key1->part < key2->part
+
+ /*
+ Do not combine the trees if their total weight is likely to exceed the
+ MAX_WEIGHT.
+ (It is possible that key1 has next_key_part that has empty overlap with
+ key2. In this case, the combined tree will have a smaller weight than we
+ predict. We assume this is rare.)
+ */
+ if (key1->weight + key1->elements*key2->weight > SEL_ARG::MAX_WEIGHT)
+ return key1;
+
key1->use_count--;
if (key1->use_count > 0)
if (!(key1= key1->clone_tree(param)))
@@ -9810,6 +9836,9 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
{ // Both are maybe key
key1->next_key_part=key_and(param, key1->next_key_part,
key2->next_key_part, clone_flag);
+
+ key1->weight = 1 + (key1->next_key_part? key1->next_key_part->weight : 0);
+
if (key1->next_key_part &&
key1->next_key_part->type == SEL_ARG::IMPOSSIBLE)
return key1;
@@ -9839,6 +9868,7 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
key2->use_count--;
SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;
uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no);
+ uint new_weight= 0; // Weight of the result tree
while (e1 && e2)
{
@@ -9860,6 +9890,7 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
if (!new_arg)
return &null_element; // End of memory
new_arg->next_key_part=next;
+ new_weight += 1 + (next? next->weight: 0);
if (!new_tree)
{
new_tree=new_arg;
@@ -9877,6 +9908,7 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
if (!new_tree)
return &null_element; // Impossible range
new_tree->max_part_no= max_part_no;
+ new_tree->weight= new_weight;
return new_tree;
}
@@ -9899,6 +9931,21 @@ get_range(SEL_ARG **e1,SEL_ARG **e2,SEL_ARG *root1)
}
+static SEL_ARG *key_or_with_limit(RANGE_OPT_PARAM *param,
+ SEL_ARG *key1, SEL_ARG *key2)
+{
+ return enforce_sel_arg_weight_limit(key_or(param, key1, key2));
+}
+
+
+static SEL_ARG *key_and_with_limit(RANGE_OPT_PARAM *param,
+ SEL_ARG *key1, SEL_ARG *key2,
+ uint clone_flag)
+{
+ return enforce_sel_arg_weight_limit(key_and(param, key1, key2, clone_flag));
+}
+
+
/**
Combine two range expression under a common OR. On a logical level, the
transformation is key_or( expr1, expr2 ) => expr1 OR expr2.
@@ -10553,6 +10600,19 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
}
key1->use_count++;
+ /* Re-compute the result tree's weight. */
+ {
+ uint new_weight= 0;
+ const SEL_ARG *sl;
+ for (sl= key1->first(); sl ; sl= sl->next)
+ {
+ new_weight++;
+ if (sl->next_key_part)
+ new_weight += sl->next_key_part->weight;
+ }
+ key1->weight= new_weight;
+ }
+
key1->max_part_no= max_part_no;
return key1;
}
@@ -10590,6 +10650,108 @@ static bool eq_tree(SEL_ARG* a,SEL_ARG *b)
}
+/*
+ Compute the MAX(key part) in this SEL_ARG graph.
+*/
+uint SEL_ARG::get_max_key_part() const
+{
+ const SEL_ARG *cur;
+ uint max_part= part;
+ for (cur= first(); cur ; cur=cur->next)
+ {
+ if (cur->next_key_part)
+ {
+ uint mp= cur->next_key_part->get_max_key_part();
+ max_part = MY_MAX(part, mp);
+ }
+ }
+ return max_part;
+}
+
+
+/*
+ Remove the SEL_ARG graph elements which have part > max_part.
+
+ @detail
+ Also update weight for the graph and any modified subgraphs.
+*/
+
+void prune_sel_arg_graph(SEL_ARG *sel_arg, uint max_part)
+{
+ SEL_ARG *cur;
+ DBUG_ASSERT(max_part >= sel_arg->part);
+
+ for (cur= sel_arg->first(); cur ; cur=cur->next)
+ {
+ if (cur->next_key_part)
+ {
+ if (cur->next_key_part->part > max_part)
+ {
+ // Remove cur->next_key_part.
+ sel_arg->weight -= cur->next_key_part->weight;
+ cur->next_key_part= NULL;
+ }
+ else
+ {
+ uint old_weight = cur->next_key_part->weight;
+ prune_sel_arg_graph(cur->next_key_part, max_part);
+ old_weight -= cur->next_key_part->weight;
+ sel_arg->weight -= old_weight;
+ }
+ }
+ }
+}
+
+
+/*
+ @brief
+ Make sure the passed SEL_ARG graph's weight is below SEL_ARG::MAX_WEIGHT,
+ by cutting off branches if necessary.
+
+ @detail
+ @see declaration of SEL_ARG::weight for definition of weight.
+
+ This function attempts to reduce the graph's weight by cutting off
+ SEL_ARG::next_key_part connections if necessary.
+
+ We start with maximum used keypart and then remove one keypart after
+ another until the graph's weight is within the limit.
+
+ @return
+ tree pointer The tree after processing,
+ NULL If it was not possible to reduce the weight of the tree below the
+ limit.
+*/
+
+SEL_ARG *enforce_sel_arg_weight_limit(SEL_ARG *sel_arg)
+{
+ if (!sel_arg || sel_arg->type != SEL_ARG::KEY_RANGE)
+ return sel_arg;
+
+ while (1)
+ {
+ if (sel_arg->weight <= SEL_ARG::MAX_WEIGHT)
+ return sel_arg;
+
+ uint max_part= sel_arg->get_max_key_part();
+ if (max_part == sel_arg->part)
+ return NULL;
+
+#ifdef EXTRA_DEBUG
+ uint weight1= sel_arg->weight;
+#endif
+
+ max_part--;
+ prune_sel_arg_graph(sel_arg, max_part);
+
+#ifdef EXTRA_DEBUG
+ DBUG_PRINT("info", ("enforce_sel_arg_weight_limit: %d->%d", weight1,
+ sel_arg->weight));
+#endif
+ }
+}
+
+
SEL_ARG *
SEL_ARG::insert(SEL_ARG *key)
{
@@ -10628,6 +10790,8 @@ SEL_ARG::insert(SEL_ARG *key)
SEL_ARG *root=rb_insert(key); // rebalance tree
root->use_count=this->use_count; // copy root info
root->elements= this->elements+1;
+ // Add the weight: weight of this element (=1) + next_key_part's weight
+ root->weight += 1 + (next_key_part? next_key_part->weight: 0);
root->maybe_flag=this->maybe_flag;
return root;
}
@@ -10685,6 +10849,11 @@ SEL_ARG::tree_delete(SEL_ARG *key)
root=this;
this->parent= 0;
+ // Compute the weight the tree will have after the element is removed
+ // We remove the element itself (weight=1)
+ // and the sub-graph connected to its next_key_part.
+ uint new_weight= root->weight - (1 + (key->next_key_part?
+ key->next_key_part->weight : 0));
/* Unlink from list */
if (key->prev)
key->prev->next=key->next;
@@ -10736,6 +10905,7 @@ SEL_ARG::tree_delete(SEL_ARG *key)
test_rb_tree(root,root->parent);
root->use_count=this->use_count; // Fix root counters
+ root->weight = new_weight;
root->elements=this->elements-1;
root->maybe_flag=this->maybe_flag;
DBUG_RETURN(root);
diff --git a/sql/opt_range.h b/sql/opt_range.h
index 11d9f80865a..d37339707b8 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -223,6 +223,43 @@ class RANGE_OPT_PARAM;
We avoid consuming too much memory by setting a limit on the number of
SEL_ARG object we can construct during one range analysis invocation.
+
+ 5. SEL_ARG GRAPH WEIGHT
+
+ A SEL_ARG graph has a property we call weight, and we define it as follows:
+
+ If the SEL_ARG graph does not have any node with multiple incoming
+ next_key_part edges, then its weight is the number of SEL_ARG objects used.
+
+ If there is a node with multiple next_key_part edges, clone the node (and
+ the nodes connected via prev/next links to it) and redirect one of the
+ incoming next_key_part to the clone. (If the node has "peer" nodes
+ connected to it via prev/next links, they will have to be cloned as well)
+
+ Repeat this until we get a graph without multiple next_key_part edges
+ coming into the same node. Then, the number of SEL_ARG objects in the
+ graph is the weight.
+
+ Example:
+
+ | +-------+ $ $
+ \->| kp1=2 |--$--------------$-+
+ +-------+ $ $ | +--------+
+ | $ $ ==>| kp3=11 |
+ +-------+ $ $ | +--------+
+ | kp1=3 |--$--------------$-+ |
+ +-------+ $ $ +--------+
+ $ $ | kp3=14 |
+ $ $ +--------+
+
+ Here, the weight is 2 + 2*2=6.
+
+ The rationale behind the weight is:
+ - it has the same order-of-magnitude as the number of ranges that the
+ SEL_ARG graph is describing,
+ - it is a lot easier to compute,
+ - it can be updated incrementally when performing AND/OR operations on
+ parts of the graph.
*/
class SEL_ARG :public Sql_alloc
@@ -236,6 +273,9 @@ class SEL_ARG :public Sql_alloc
/*
The ordinal number the least significant component encountered in
the ranges of the SEL_ARG tree (the first component has number 1)
+
+ Note: this number is currently not precise, it is an upper bound.
+ @seealso SEL_ARG::get_max_key_part()
*/
uint16 max_part_no;
/*
@@ -263,6 +303,14 @@ class SEL_ARG :public Sql_alloc
enum leaf_color { BLACK,RED } color;
enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type;
+ /*
+ For R-B root nodes only: the graph weight, as defined above in the
+ SEL_ARG GRAPH WEIGHT section.
+ */
+ uint weight;
+ enum { MAX_WEIGHT = 32000 };
+
+ /* See RANGE_OPT_PARAM::alloced_sel_args */
enum { MAX_SEL_ARGS = 16000 };
SEL_ARG() {}
@@ -273,7 +321,7 @@ class SEL_ARG :public Sql_alloc
SEL_ARG(enum Type type_arg)
:min_flag(0), max_part_no(0) /* first key part means 1. 0 mean 'no parts'*/,
elements(1),use_count(1),left(0),right(0),
- next_key_part(0), color(BLACK), type(type_arg)
+ next_key_part(0), color(BLACK), type(type_arg), weight(1)
{}
/**
returns true if a range predicate is equal. Use all_same()
@@ -287,6 +335,9 @@ class SEL_ARG :public Sql_alloc
return true;
return cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0;
}
+
+ uint get_max_key_part() const;
+
/**
returns true if all the predicates in the keypart tree are equal
*/
1
0
[Commits] 451c3b3: MDEV-23619 MariaDB crash on WITH RECURSIVE UNION ALL (CTE) query
by IgorBabaev 11 Nov '20
by IgorBabaev 11 Nov '20
11 Nov '20
revision-id: 451c3b36edb8f63fa66599bf5deefa9e4494979b (mariadb-10.2.31-561-g451c3b3)
parent(s): c7902186129cae888c45a87150d33059528a7033
author: Igor Babaev
committer: Igor Babaev
timestamp: 2020-11-10 16:02:30 -0800
message:
MDEV-23619 MariaDB crash on WITH RECURSIVE UNION ALL (CTE) query
Due to a premature cleanup of the unit that specified a recursive CTE
used in the second operand of union the server fell into an infinite
loop in the reported test case. In other cases this premature cleanup
could cause other problems.
The bug is the result of a not quite correct fix for MDEV-17024. The
unit that specifies a recursive CTE has to be cleaned only after the
cleanup of the last external reference to this CTE. It means that
cleanups of the unit triggered not by the cleanup of a external
reference to the CTE must be blocked.
Usage of local table chains in selects to get external references to
recursive CTEs was not correct either because of possible merges of
some selects.
Also fixed a minor bug in st_select_lex::set_explain_type() that caused
typing 'RECURSIVE UNION' instead of 'UNION' in EXPLAIN output for external
references to a recursive CTE.
---
mysql-test/r/cte_recursive.result | 229 +++++++++++++++++++++++++++++++++++++-
mysql-test/t/cte_recursive.test | 50 +++++++++
sql/sql_lex.cc | 8 +-
sql/sql_select.cc | 5 +-
sql/sql_union.cc | 42 +++----
5 files changed, 309 insertions(+), 25 deletions(-)
diff --git a/mysql-test/r/cte_recursive.result b/mysql-test/r/cte_recursive.result
index 6404931..85883d3 100644
--- a/mysql-test/r/cte_recursive.result
+++ b/mysql-test/r/cte_recursive.result
@@ -1301,7 +1301,7 @@ select ancestors.name, ancestors.dob from ancestors;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived4> ALL NULL NULL NULL NULL 24
4 DERIVED folks ALL NULL NULL NULL NULL 12 Using where
-6 RECURSIVE UNION <derived3> ALL NULL NULL NULL NULL 12
+6 UNION <derived3> ALL NULL NULL NULL NULL 12
5 RECURSIVE UNION <derived4> ALL NULL NULL NULL NULL 24
NULL UNION RESULT <union4,6,5> ALL NULL NULL NULL NULL NULL
3 DERIVED folks ALL NULL NULL NULL NULL 12 Using where
@@ -3964,5 +3964,232 @@ YEAR d1 d2
DROP PROCEDURE p;
DROP TABLE t1,t2,t3,t4;
#
+# MDEV-23619: recursive CTE used only in the second operand of UNION
+#
+create table t1 (
+a bigint(10) not null auto_increment,
+b int(5) not null,
+c bigint(10) default null,
+primary key (a)
+) engine myisam;
+insert into t1 values
+(1,3,12), (2,7,15), (3,1,3), (4,3,1);
+explain with recursive r_cte as
+( select * from t1 as s
+union
+select t1.* from t1, r_cte as r where t1.c = r.a )
+select 0 as b FROM dual union all select b FROM r_cte as t;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY NULL NULL NULL NULL NULL NULL NULL No tables used
+2 DERIVED s ALL NULL NULL NULL NULL 4
+3 RECURSIVE UNION t1 ALL NULL NULL NULL NULL 4 Using where
+3 RECURSIVE UNION <derived2> ref key0 key0 8 test.t1.c 2
+NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL
+4 UNION <derived2> ALL NULL NULL NULL NULL 4
+with recursive r_cte as
+( select * from t1 as s
+union
+select t1.* from t1, r_cte as r where t1.c = r.a )
+select 0 as b FROM dual union all select b FROM r_cte as t;
+b
+0
+3
+7
+1
+3
+analyze format=json with recursive r_cte as
+( select * from t1 as s
+union
+select t1.* from t1, r_cte as r where t1.c = r.a )
+select 0 as b FROM dual union all select b FROM r_cte as t;
+ANALYZE
+{
+ "query_block": {
+ "union_result": {
+ "table_name": "<union1,4>",
+ "access_type": "ALL",
+ "r_loops": 0,
+ "r_rows": null,
+ "query_specifications": [
+ {
+ "query_block": {
+ "select_id": 1,
+ "table": {
+ "message": "No tables used"
+ }
+ }
+ },
+ {
+ "query_block": {
+ "select_id": 4,
+ "r_loops": 1,
+ "r_total_time_ms": "REPLACED",
+ "table": {
+ "table_name": "<derived2>",
+ "access_type": "ALL",
+ "r_loops": 1,
+ "rows": 4,
+ "r_rows": 4,
+ "r_total_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100,
+ "materialized": {
+ "query_block": {
+ "recursive_union": {
+ "table_name": "<union2,3>",
+ "access_type": "ALL",
+ "r_loops": 0,
+ "r_rows": null,
+ "query_specifications": [
+ {
+ "query_block": {
+ "select_id": 2,
+ "r_loops": 1,
+ "r_total_time_ms": "REPLACED",
+ "table": {
+ "table_name": "s",
+ "access_type": "ALL",
+ "r_loops": 1,
+ "rows": 4,
+ "r_rows": 4,
+ "r_total_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100
+ }
+ }
+ },
+ {
+ "query_block": {
+ "select_id": 3,
+ "r_loops": 1,
+ "r_total_time_ms": "REPLACED",
+ "table": {
+ "table_name": "t1",
+ "access_type": "ALL",
+ "r_loops": 1,
+ "rows": 4,
+ "r_rows": 4,
+ "r_total_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100,
+ "attached_condition": "t1.c is not null"
+ },
+ "table": {
+ "table_name": "<derived2>",
+ "access_type": "ref",
+ "possible_keys": ["key0"],
+ "key": "key0",
+ "key_length": "8",
+ "used_key_parts": ["a"],
+ "ref": ["test.t1.c"],
+ "r_loops": 4,
+ "rows": 2,
+ "r_rows": 0.5,
+ "r_total_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100
+ }
+ }
+ }
+ ]
+ }
+ }
+ }
+ }
+ }
+ }
+ ]
+ }
+ }
+}
+prepare stmt from "with recursive r_cte as
+( select * from t1 as s
+union
+select t1.* from t1, r_cte as r where t1.c = r.a )
+select 0 as b FROM dual union all select b FROM r_cte as t";
+execute stmt;
+b
+0
+3
+7
+1
+3
+execute stmt;
+b
+0
+3
+7
+1
+3
+deallocate prepare stmt;
+#checking hanging cte that uses a recursive cte
+explain with h_cte as
+( with recursive r_cte as
+( select * from t1 as s
+union
+select t1.* from t1, r_cte as r where t1.c = r.a )
+select 0 as b FROM dual union all select b FROM r_cte as t)
+select * from t1 as tt;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY tt ALL NULL NULL NULL NULL 4
+with h_cte as
+( with recursive r_cte as
+( select * from t1 as s
+union
+select t1.* from t1, r_cte as r where t1.c = r.a )
+select 0 as b FROM dual union all select b FROM r_cte as t)
+select * from t1 as tt;
+a b c
+1 3 12
+2 7 15
+3 1 3
+4 3 1
+analyze format=json with h_cte as
+( with recursive r_cte as
+( select * from t1 as s
+union
+select t1.* from t1, r_cte as r where t1.c = r.a )
+select 0 as b FROM dual union all select b FROM r_cte as t)
+select * from t1 as tt;
+ANALYZE
+{
+ "query_block": {
+ "select_id": 1,
+ "r_loops": 1,
+ "r_total_time_ms": "REPLACED",
+ "table": {
+ "table_name": "tt",
+ "access_type": "ALL",
+ "r_loops": 1,
+ "rows": 4,
+ "r_rows": 4,
+ "r_total_time_ms": "REPLACED",
+ "filtered": 100,
+ "r_filtered": 100
+ }
+ }
+}
+prepare stmt from "with h_cte as
+( with recursive r_cte as
+( select * from t1 as s
+union
+select t1.* from t1, r_cte as r where t1.c = r.a )
+select 0 as b FROM dual union all select b FROM r_cte as t)
+select * from t1 as tt";
+execute stmt;
+a b c
+1 3 12
+2 7 15
+3 1 3
+4 3 1
+execute stmt;
+a b c
+1 3 12
+2 7 15
+3 1 3
+4 3 1
+deallocate prepare stmt;
+drop table t1;
+#
# End of 10.2 tests
#
diff --git a/mysql-test/t/cte_recursive.test b/mysql-test/t/cte_recursive.test
index d190458..082e9be 100644
--- a/mysql-test/t/cte_recursive.test
+++ b/mysql-test/t/cte_recursive.test
@@ -2641,5 +2641,55 @@ DROP PROCEDURE p;
DROP TABLE t1,t2,t3,t4;
--echo #
+--echo # MDEV-23619: recursive CTE used only in the second operand of UNION
+--echo #
+
+create table t1 (
+ a bigint(10) not null auto_increment,
+ b int(5) not null,
+ c bigint(10) default null,
+ primary key (a)
+) engine myisam;
+insert into t1 values
+ (1,3,12), (2,7,15), (3,1,3), (4,3,1);
+
+let $q=
+with recursive r_cte as
+( select * from t1 as s
+ union
+ select t1.* from t1, r_cte as r where t1.c = r.a )
+select 0 as b FROM dual union all select b FROM r_cte as t;
+
+eval explain $q;
+eval $q;
+--source include/analyze-format.inc
+eval analyze format=json $q;
+eval prepare stmt from "$q";
+execute stmt;
+execute stmt;
+deallocate prepare stmt;
+
+--echo #checking hanging cte that uses a recursive cte
+let $q1=
+with h_cte as
+( with recursive r_cte as
+ ( select * from t1 as s
+ union
+ select t1.* from t1, r_cte as r where t1.c = r.a )
+ select 0 as b FROM dual union all select b FROM r_cte as t)
+select * from t1 as tt;
+
+eval explain $q1;
+eval $q1;
+--source include/analyze-format.inc
+eval analyze format=json $q1;
+eval prepare stmt from "$q1";
+execute stmt;
+execute stmt;
+deallocate prepare stmt;
+
+drop table t1;
+
+--echo #
--echo # End of 10.2 tests
--echo #
diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc
index e1e3073..77e6b2b 100644
--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -4450,9 +4450,11 @@ void st_select_lex::set_explain_type(bool on_the_fly)
/*
pos_in_table_list=NULL for e.g. post-join aggregation JOIN_TABs.
*/
- if (tab->table && tab->table->pos_in_table_list &&
- tab->table->pos_in_table_list->with &&
- tab->table->pos_in_table_list->with->is_recursive)
+ if (!(tab->table && tab->table->pos_in_table_list))
+ continue;
+ TABLE_LIST *tbl= tab->table->pos_in_table_list;
+ if (tbl->with && tbl->with->is_recursive &&
+ tbl->is_with_table_recursive_reference())
{
uses_cte= true;
break;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 3b09009..82ff4d3 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -12275,6 +12275,9 @@ void JOIN::join_free()
for (tmp_unit= select_lex->first_inner_unit();
tmp_unit;
tmp_unit= tmp_unit->next_unit())
+ {
+ if (tmp_unit->with_element && tmp_unit->with_element->is_recursive)
+ continue;
for (sl= tmp_unit->first_select(); sl; sl= sl->next_select())
{
Item_subselect *subselect= sl->master_unit()->item;
@@ -12292,7 +12295,7 @@ void JOIN::join_free()
/* Can't unlock if at least one JOIN is still needed */
can_unlock= can_unlock && full_local;
}
-
+ }
/*
We are not using tables anymore
Unlock all tables. We may be in an INSERT .... SELECT statement.
diff --git a/sql/sql_union.cc b/sql/sql_union.cc
index 8b1719c..9a16237 100644
--- a/sql/sql_union.cc
+++ b/sql/sql_union.cc
@@ -1370,13 +1370,7 @@ bool st_select_lex_unit::cleanup()
{
DBUG_RETURN(FALSE);
}
- /*
- When processing a PS/SP or an EXPLAIN command cleanup of a unit can
- be performed immediately when the unit is reached in the cleanup
- traversal initiated by the cleanup of the main unit.
- */
- if (!thd->stmt_arena->is_stmt_prepare() && !thd->lex->describe &&
- with_element && with_element->is_recursive && union_result)
+ if (with_element && with_element->is_recursive && union_result)
{
select_union_recursive *result= with_element->rec_result;
if (++result->cleanup_count == with_element->rec_outer_references)
@@ -1554,27 +1548,31 @@ bool st_select_lex::cleanup()
if (join)
{
+ List_iterator<TABLE_LIST> ti(leaf_tables);
+ TABLE_LIST *tbl;
+ while ((tbl= ti++))
+ {
+ if (tbl->is_recursive_with_table() &&
+ !tbl->is_with_table_recursive_reference())
+ {
+ /*
+ If query is killed before open_and_process_table() for tbl
+ is called then 'with' is already set, but 'derived' is not.
+ */
+ st_select_lex_unit *unit= tbl->with->spec;
+ error|= (bool) error | (uint) unit->cleanup();
+ }
+ }
DBUG_ASSERT((st_select_lex*)join->select_lex == this);
error= join->destroy();
delete join;
join= 0;
}
- for (TABLE_LIST *tbl= get_table_list(); tbl; tbl= tbl->next_local)
- {
- if (tbl->is_recursive_with_table() &&
- !tbl->is_with_table_recursive_reference())
- {
- /*
- If query is killed before open_and_process_table() for tbl
- is called then 'with' is already set, but 'derived' is not.
- */
- st_select_lex_unit *unit= tbl->with->spec;
- error|= (bool) error | (uint) unit->cleanup();
- }
- }
for (SELECT_LEX_UNIT *lex_unit= first_inner_unit(); lex_unit ;
lex_unit= lex_unit->next_unit())
{
+ if (lex_unit->with_element && lex_unit->with_element->is_recursive)
+ continue;
error= (bool) ((uint) error | (uint) lex_unit->cleanup());
}
inner_refs_list.empty();
@@ -1594,8 +1592,12 @@ void st_select_lex::cleanup_all_joins(bool full)
join->cleanup(full);
for (unit= first_inner_unit(); unit; unit= unit->next_unit())
+ {
+ if (unit->with_element && unit->with_element->is_recursive)
+ continue;
for (sl= unit->first_select(); sl; sl= sl->next_select())
sl->cleanup_all_joins(full);
+ }
DBUG_VOID_RETURN;
}
1
0