revision-id: 99cc5123c10a4c65007e15e4ae6d06ac51bd2c36 (v5.8-2684-g99cc5123c) parent(s): 8787e79af4ebe931eeb0a1ef6633576996576126 author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2020-06-15 23:04:16 +0300 message: Range Locking: do Snapshot Checking on Put/Delete/etc if assume_tracked=false. Before this patch, Range Locking relied on the caller to obtain a lock on the rowkey before that rowkey is modified with a Transaction::Put/Delete/etc call. This property holds for MyRocks. However a RocksDB user using Transaction with txn->SetSnapshot(); v= txn->Get(k); txn->Put(k, v+1); txn->Commit(); will observe a weaker transactional isolation than with point locking: the Put() will blindly overwrite any changes commited after Get(). Fixed this by doing similar to what point lock manager does. If Get/Update/etc is called with assume_tracked=false (this is the default argument value), then: - Get a point lock - Perform Snapshot Checking of the - then do the write. This will have has extra overhead for MyRocks-like users that get locks before calling Put/Delete/etc. These callers can pass assume_tracked=true and then extra checks are avoided. so this is avloid. --- utilities/transactions/pessimistic_transaction.cc | 9 +++- utilities/transactions/range_locking_test.cc | 61 ++++++++++++++++++++++- utilities/transactions/transaction_base.cc | 42 +++++----------- 3 files changed, 80 insertions(+), 32 deletions(-) diff --git a/utilities/transactions/pessimistic_transaction.cc b/utilities/transactions/pessimistic_transaction.cc index 04f6c6c8d..8f7629bbf 100644 --- a/utilities/transactions/pessimistic_transaction.cc +++ b/utilities/transactions/pessimistic_transaction.cc @@ -558,7 +558,14 @@ Status PessimisticTransaction::TryLock(ColumnFamilyHandle* column_family, // If we are not doing key tracking, just get the lock and return (this // also assumes locks are "idempotent") if (!do_key_tracking_) { - return txn_db_impl_->TryLock(this, cfh_id, key_str, exclusive); + s = txn_db_impl_->TryLock(this, cfh_id, key_str, exclusive); + if (s.ok() && !assume_tracked && snapshot_ != nullptr) { + // Perform validation + SequenceNumber tracked_at_seq = kMaxSequenceNumber; + SetSnapshotIfNeeded(); + s = ValidateSnapshot(column_family, key, &tracked_at_seq); + } + return s; } // lock this key if this transactions hasn't already locked it diff --git a/utilities/transactions/range_locking_test.cc b/utilities/transactions/range_locking_test.cc index ff1ca5aae..1de512079 100644 --- a/utilities/transactions/range_locking_test.cc +++ b/utilities/transactions/range_locking_test.cc @@ -48,7 +48,7 @@ class RangeLockingTest : public ::testing::Test { RangeLockingTest() : db(nullptr) { options.create_if_missing = true; - dbname = test::PerThreadDBPath("transaction_testdb"); + dbname = test::PerThreadDBPath("range_locking_testdb"); DestroyDB(dbname, options); Status s; @@ -70,6 +70,13 @@ class RangeLockingTest : public ::testing::Test { // from attempting to delete such files in DestroyDB. DestroyDB(dbname, options); } + + PessimisticTransaction* NewTxn( + TransactionOptions txn_opt = TransactionOptions()) { + Transaction* txn = db->BeginTransaction(WriteOptions(), txn_opt); + return reinterpret_cast<PessimisticTransaction*>(txn); + } + }; // TODO: set a smaller lock wait timeout so that the test runs faster. @@ -122,6 +129,58 @@ TEST_F(RangeLockingTest, BasicRangeLocking) { delete txn1; } +TEST_F(RangeLockingTest, SnapshotValidation) { + Status s; + Slice key_slice= Slice("k"); + ColumnFamilyHandle *cfh= db->DefaultColumnFamily(); + + auto txn0 = NewTxn(); + txn0->Put(key_slice, Slice("initial")); + txn0->Commit(); + + // txn1 + auto txn1 = NewTxn(); + txn1->SetSnapshot(); + std::string val1; + s = txn1->Get(ReadOptions(), cfh, key_slice, &val1); + ASSERT_TRUE(s.ok()); + val1 = val1 + std::string("-txn1"); + + s = txn1->Put(cfh, key_slice, Slice(val1)); + ASSERT_TRUE(s.ok()); + + // txn2 + auto txn2 = NewTxn(); + txn2->SetSnapshot(); + std::string val2; + // This will see the original value as nothing is committed + // This is also Get, so it is doesn't acquire any locks. + s = txn2->Get(ReadOptions(), cfh, key_slice, &val2); + ASSERT_TRUE(s.ok()); + + // txn1 + s = txn1->Commit(); + ASSERT_TRUE(s.ok()); + + // txn2 + val2 = val2 + std::string("-txn2"); + // Now, this call should do Snapshot Validation and fail: + s = txn2->Put(cfh, key_slice, Slice(val2)); + ASSERT_TRUE(s.IsBusy()); + + s = txn2->Commit(); + ASSERT_TRUE(s.ok()); + + /* + // Not meaningful if s.IsBusy() above is true: + // Examine the result + auto txn3= NewTxn(); + std::string val3; + Status s3 = txn3->Get(ReadOptions(), cfh, key_slice, &val3); + fprintf(stderr, "Final: %s\n", val3.c_str()); + */ +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/utilities/transactions/transaction_base.cc b/utilities/transactions/transaction_base.cc index 3a2c75bec..fb79b9d5d 100644 --- a/utilities/transactions/transaction_base.cc +++ b/utilities/transactions/transaction_base.cc @@ -378,11 +378,8 @@ Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family, const Slice& key, const Slice& value, const bool assume_tracked) { const bool do_validate = !assume_tracked; - Status s; - if (do_key_tracking_) { - s = TryLock(column_family, key, false /* read_only */, - true /* exclusive */, do_validate, assume_tracked); - } + Status s = TryLock(column_family, key, false /* read_only */, + true /* exclusive */, do_validate, assume_tracked); if (s.ok()) { s = GetBatchForWrite()->Put(column_family, key, value); @@ -398,11 +395,8 @@ Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family, const SliceParts& key, const SliceParts& value, const bool assume_tracked) { const bool do_validate = !assume_tracked; - Status s; - if (do_key_tracking_) { - s = TryLock(column_family, key, false /* read_only */, - true /* exclusive */, do_validate, assume_tracked); - } + Status s = TryLock(column_family, key, false /* read_only */, + true /* exclusive */, do_validate, assume_tracked); if (s.ok()) { s = GetBatchForWrite()->Put(column_family, key, value); @@ -435,11 +429,8 @@ Status TransactionBaseImpl::Delete(ColumnFamilyHandle* column_family, const Slice& key, const bool assume_tracked) { const bool do_validate = !assume_tracked; - Status s; - if (do_key_tracking_) { - s = TryLock(column_family, key, false /* read_only */, - true /* exclusive */, do_validate, assume_tracked); - } + Status s = TryLock(column_family, key, false /* read_only */, + true /* exclusive */, do_validate, assume_tracked); if (s.ok()) { s = GetBatchForWrite()->Delete(column_family, key); @@ -455,11 +446,8 @@ Status TransactionBaseImpl::Delete(ColumnFamilyHandle* column_family, const SliceParts& key, const bool assume_tracked) { const bool do_validate = !assume_tracked; - Status s; - if (do_key_tracking_) { - s = TryLock(column_family, key, false /* read_only */, - true /* exclusive */, do_validate, assume_tracked); - } + Status s = TryLock(column_family, key, false /* read_only */, + true /* exclusive */, do_validate, assume_tracked); if (s.ok()) { s = GetBatchForWrite()->Delete(column_family, key); @@ -475,11 +463,8 @@ Status TransactionBaseImpl::SingleDelete(ColumnFamilyHandle* column_family, const Slice& key, const bool assume_tracked) { const bool do_validate = !assume_tracked; - Status s; - if (do_key_tracking_) { - s = TryLock(column_family, key, false /* read_only */, - true /* exclusive */, do_validate, assume_tracked); - } + Status s = TryLock(column_family, key, false /* read_only */, + true /* exclusive */, do_validate, assume_tracked); if (s.ok()) { s = GetBatchForWrite()->SingleDelete(column_family, key); @@ -495,11 +480,8 @@ Status TransactionBaseImpl::SingleDelete(ColumnFamilyHandle* column_family, const SliceParts& key, const bool assume_tracked) { const bool do_validate = !assume_tracked; - Status s; - if (do_key_tracking_) { - s = TryLock(column_family, key, false /* read_only */, - true /* exclusive */, do_validate, assume_tracked); - } + Status s = TryLock(column_family, key, false /* read_only */, + true /* exclusive */, do_validate, assume_tracked); if (s.ok()) { s = GetBatchForWrite()->SingleDelete(column_family, key);