[Commits] ae4869e88: Range Locking: shared point lock support: support lock escalations
revision-id: ae4869e880a4118bd912fafe7fbeb6f464c4d893 (v5.8-1043-gae4869e88) parent(s): 2f0ee897552bb4a8aa66b933c0d6f8529a82e2e8 author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2019-06-10 17:16:12 +0300 message: Range Locking: shared point lock support: support lock escalations - Make Lock Escalation keep shared locks. Shared locks are not collapsed with other kinds of locks. - Replace RangeLockMgrHandle::get_escalation_count() with GetStatus() which also reports amount of memory used for Range Locking (and there is more data we could report through this) - Initialize LTM_STATUS_S::m_initialized. --- include/rocksdb/utilities/transaction_db.h | 9 ++- .../transactions/range_locking/ft/ft-status.h | 2 +- .../range_locking/locktree/locktree.cc | 78 ++++++++++++++++------ .../range_locking/locktree/treenode.cc | 10 +++ .../transactions/range_locking/locktree/treenode.h | 11 --- utilities/transactions/transaction_lock_mgr.cc | 18 ++--- utilities/transactions/transaction_lock_mgr.h | 6 +- 7 files changed, 89 insertions(+), 45 deletions(-) diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h index 7ebac3e06..7444cb5f2 100644 --- a/include/rocksdb/utilities/transaction_db.h +++ b/include/rocksdb/utilities/transaction_db.h @@ -38,7 +38,14 @@ const uint32_t kInitialMaxDeadlocks = 5; class RangeLockMgrHandle { public: virtual int set_max_lock_memory(size_t max_lock_memory) = 0; - virtual uint64_t get_escalation_count() = 0; + + class Counters { + public: + uint64_t escalation_count; + uint64_t current_lock_memory; + }; + + virtual Counters GetStatus() = 0; virtual ~RangeLockMgrHandle() {}; }; diff --git a/utilities/transactions/range_locking/ft/ft-status.h b/utilities/transactions/range_locking/ft/ft-status.h index 25051f1ed..242964f0b 100644 --- a/utilities/transactions/range_locking/ft/ft-status.h +++ b/utilities/transactions/range_locking/ft/ft-status.h @@ -80,7 +80,7 @@ public: TOKU_ENGINE_STATUS_ROW_S status[LTM_STATUS_NUM_ROWS]; private: - bool m_initialized; + bool m_initialized = false; }; typedef LTM_STATUS_S* LTM_STATUS; extern LTM_STATUS_S ltm_status; diff --git a/utilities/transactions/range_locking/locktree/locktree.cc b/utilities/transactions/range_locking/locktree/locktree.cc index 0e5f7c307..00ce5aace 100644 --- a/utilities/transactions/range_locking/locktree/locktree.cc +++ b/utilities/transactions/range_locking/locktree/locktree.cc @@ -742,11 +742,16 @@ static int extract_first_n_row_locks(concurrent_tree::locked_keyrange *lkr, int num_to_extract; row_lock *row_locks; bool fn(const keyrange &range, TXNID txnid, bool is_shared, TxnidVector *owners) { - // psergey-todo: multiple owners! if (num_extracted < num_to_extract) { row_lock lock; lock.range.create_copy(range); lock.txnid = txnid; + lock.is_shared= is_shared; + // deep-copy the set of owners: + if (owners) + lock.owners = new TxnidVector(*owners); + else + lock.owners = nullptr; row_locks[num_extracted++] = lock; return true; } else { @@ -834,38 +839,60 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca // through them and merge adjacent locks with the same txnid into // one dominating lock and save it to a set of escalated locks. // - // first, find the index of the next row lock with a different txnid + // first, find the index of the next row lock that + // - belongs to a different txnid, or + // - belongs to several txnids, or + // - is a shared lock (we could potentially merge those but + // currently we don't) int next_txnid_index = current_index + 1; + while (next_txnid_index < num_extracted && - extracted_buf[current_index].txnid == extracted_buf[next_txnid_index].txnid) { + (extracted_buf[current_index].txnid == + extracted_buf[next_txnid_index].txnid) && + !extracted_buf[next_txnid_index].is_shared && + !extracted_buf[next_txnid_index].owners) { next_txnid_index++; } // Create an escalated range for the current txnid that dominates // each range between the current indext and the next txnid's index. - const TXNID current_txnid = extracted_buf[current_index].txnid; + //const TXNID current_txnid = extracted_buf[current_index].txnid; const DBT *escalated_left_key = extracted_buf[current_index].range.get_left_key(); const DBT *escalated_right_key = extracted_buf[next_txnid_index - 1].range.get_right_key(); // Try to find a range buffer for the current txnid. Create one if it doesn't exist. // Then, append the new escalated range to the buffer. - uint32_t idx; - struct txnid_range_buffer *existing_range_buffer; - int r = range_buffers.find_zero<TXNID, txnid_range_buffer::find_by_txnid>( - current_txnid, - &existing_range_buffer, - &idx - ); - if (r == DB_NOTFOUND) { - struct txnid_range_buffer *XMALLOC(new_range_buffer); - new_range_buffer->txnid = current_txnid; - new_range_buffer->buffer.create(); - new_range_buffer->buffer.append(escalated_left_key, escalated_right_key); - range_buffers.insert_at(new_range_buffer, idx); - } else { - invariant_zero(r); - invariant(existing_range_buffer->txnid == current_txnid); - existing_range_buffer->buffer.append(escalated_left_key, escalated_right_key); + // (If a lock is shared by multiple txnids, append it each of txnid's lists) + TxnidVector *owners_ptr; + TxnidVector singleton_owner; + if (extracted_buf[current_index].owners) + owners_ptr = extracted_buf[current_index].owners; + else { + singleton_owner.insert(extracted_buf[current_index].txnid); + owners_ptr = &singleton_owner; + } + + for (auto cur_txnid : *owners_ptr ) { + uint32_t idx; + struct txnid_range_buffer *existing_range_buffer; + int r = range_buffers.find_zero<TXNID, txnid_range_buffer::find_by_txnid>( + cur_txnid, + &existing_range_buffer, + &idx + ); + if (r == DB_NOTFOUND) { + struct txnid_range_buffer *XMALLOC(new_range_buffer); + new_range_buffer->txnid = cur_txnid; + new_range_buffer->buffer.create(); + new_range_buffer->buffer.append(escalated_left_key, escalated_right_key, + !extracted_buf[current_index].is_shared); + range_buffers.insert_at(new_range_buffer, idx); + } else { + invariant_zero(r); + invariant(existing_range_buffer->txnid == cur_txnid); + existing_range_buffer->buffer.append(escalated_left_key, escalated_right_key, + !extracted_buf[current_index].is_shared); + } } current_index = next_txnid_index; @@ -873,6 +900,7 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca // destroy the ranges copied during the extraction for (int i = 0; i < num_extracted; i++) { + delete extracted_buf[i].owners; extracted_buf[i].range.destroy(); } } @@ -880,6 +908,12 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca // Rebuild the locktree from each range in each range buffer, // then notify higher layers that the txnid's locks have changed. + // + // (shared locks: if a lock was initially shared between transactions TRX1, + // TRX2, etc, we will now try to acquire it acting on behalf on TRX1, on + // TRX2, etc. This will succeed and an identical shared lock will be + // constructed) + invariant(m_rangetree->is_empty()); const size_t num_range_buffers = range_buffers.size(); for (size_t i = 0; i < num_range_buffers; i++) { @@ -894,7 +928,7 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca keyrange range; range.create(rec.get_left_key(), rec.get_right_key()); row_lock lock = { .range = range, .txnid = current_txnid, - .is_shared= false, // psergey-todo: SharedLockEscalation + .is_shared= !rec.get_exclusive_flag(), .owners= nullptr }; insert_row_lock_into_tree(&lkr, lock, m_mgr); iter.next(); diff --git a/utilities/transactions/range_locking/locktree/treenode.cc b/utilities/transactions/range_locking/locktree/treenode.cc index f44918a1b..5bf349749 100644 --- a/utilities/transactions/range_locking/locktree/treenode.cc +++ b/utilities/transactions/range_locking/locktree/treenode.cc @@ -134,6 +134,16 @@ void treenode::swap_in_place(treenode *node1, treenode *node2) { node2->m_is_shared= tmp_is_shared; } +void treenode::add_shared_owner(TXNID txnid) { + assert(m_is_shared); + if (m_txnid != TXNID_SHARED) { + m_owners= new TxnidVector; + m_owners->insert(m_txnid); + m_txnid= TXNID_SHARED; + } + m_owners->insert(txnid); +} + void treenode::free(treenode *node) { // destroy the range, freeing any copied keys node->m_range.destroy(); diff --git a/utilities/transactions/range_locking/locktree/treenode.h b/utilities/transactions/range_locking/locktree/treenode.h index 6b082acc4..f23324f03 100644 --- a/utilities/transactions/range_locking/locktree/treenode.h +++ b/utilities/transactions/range_locking/locktree/treenode.h @@ -167,17 +167,6 @@ private: // it has at the time of destruction. keyrange m_range; - void add_shared_owner(TXNID txnid) - { - assert(m_is_shared); - if (m_txnid != TXNID_SHARED) - { - m_owners= new TxnidVector; - m_owners->insert(m_txnid); - m_txnid= TXNID_SHARED; - } - m_owners->insert(txnid); - } void remove_shared_owner(TXNID txnid); bool has_multiple_owners() { return (m_txnid == TXNID_SHARED); } diff --git a/utilities/transactions/transaction_lock_mgr.cc b/utilities/transactions/transaction_lock_mgr.cc index 34144c2af..564cd8600 100644 --- a/utilities/transactions/transaction_lock_mgr.cc +++ b/utilities/transactions/transaction_lock_mgr.cc @@ -1154,25 +1154,25 @@ RangeLockMgr::~RangeLockMgr() { ltm_.destroy(); } -uint64_t RangeLockMgr::get_escalation_count() { +RangeLockMgrHandle::Counters RangeLockMgr::GetStatus() { LTM_STATUS_S ltm_status_test; ltm_.get_status(<m_status_test); + Counters res; // Searching status variable by its string name is how Toku's unit tests // do it (why didn't they make LTM_ESCALATION_COUNT constant visible?) - TOKU_ENGINE_STATUS_ROW key_status = NULL; // lookup keyname in status - for (int i = 0; ; i++) { + for (int i = 0; i < LTM_STATUS_S::LTM_STATUS_NUM_ROWS; i++) { TOKU_ENGINE_STATUS_ROW status = <m_status_test.status[i]; - if (status->keyname == NULL) - break; if (strcmp(status->keyname, "LTM_ESCALATION_COUNT") == 0) { - key_status = status; - break; + res.escalation_count = status->value.num; + continue; + } + if (strcmp(status->keyname, "LTM_SIZE_CURRENT") == 0) { + res.current_lock_memory = status->value.num; } } - assert(key_status); - return key_status->value.num; + return res; } void RangeLockMgr::AddColumnFamily(const ColumnFamilyHandle *cfh) { diff --git a/utilities/transactions/transaction_lock_mgr.h b/utilities/transactions/transaction_lock_mgr.h index 608d6f34f..6c8e0638c 100644 --- a/utilities/transactions/transaction_lock_mgr.h +++ b/utilities/transactions/transaction_lock_mgr.h @@ -239,7 +239,11 @@ class RangeLockMgr : return ltm_.set_max_lock_memory(max_lock_memory); } - uint64_t get_escalation_count() override; + size_t get_max_lock_memory() { + return ltm_.get_max_lock_memory(); + } + + Counters GetStatus() override; LockStatusData GetLockStatusData() override;
participants (1)
-
Sergei Petrunia