revision-id: d88e173890f85c5138067c0dec897cc60566972a (fb-prod201801-235-gd88e173890f) parent(s): eb6f779acccf4e0f86de98eb894155fa28616cc9 author: Sergei Petrunia committer: Sergei Petrunia timestamp: 2019-06-10 17:40:00 +0300 message: Range Locking: Limited support for shared point locks: Lock escalation - Update RocksDB to a revision with it MyRocks part of it: - Testcase - @@rocksdb_max_lock_memory is now a writable global variable - When in range locking mode, there is 'rocksdb_current_lock_memory' status variable that shows amount of memory currently used for locks --- .../rocksdb/include/select_from_rocksdb_locks.inc | 25 ++++- .../rocksdb/r/range_locking_shared_locks.result | 125 ++++++++++++++++++++- .../rocksdb/t/range_locking_shared_locks.test | 85 +++++++++++++- rocksdb | 2 +- storage/rocksdb/ha_rocksdb.cc | 46 ++++++-- 5 files changed, 265 insertions(+), 18 deletions(-) diff --git a/mysql-test/suite/rocksdb/include/select_from_rocksdb_locks.inc b/mysql-test/suite/rocksdb/include/select_from_rocksdb_locks.inc index 31280b0510c..79bd261d7b4 100644 --- a/mysql-test/suite/rocksdb/include/select_from_rocksdb_locks.inc +++ b/mysql-test/suite/rocksdb/include/select_from_rocksdb_locks.inc @@ -1,3 +1,16 @@ +# +# An include to print contents of I_S.ROCKSB_LOCKS +# +# Implicit "parameters" +# - Currently it prints locks on t1.PRIMARY +# +# Explicit "parameter" variables: +# - $TRX1_ID - print this transaction as "TRX1" +# - $TRX2_ID - print this transaction as "TRX2" +# +# - $select_from_is_rowlocks_current_trx_only +# - $order_by_rowkey + --echo # select * from information_schema.rocksdb_locks; # With replacements by select_from_rocksdb_locks.inc --disable_query_log set @cf_id=(select column_family from information_schema.rocksdb_ddl @@ -24,12 +37,20 @@ if ($TRX2_ID) let $transaction_col = replace($transaction_col, '$TRX2_ID', "\$TRX2_ID"); } ---sorted_result +if ($order_by_rowkey) +{ + let $extra_order_by = ORDER BY 3,2; +} + +if (!$order_by_rowkey) +{ + --sorted_result +} eval select replace(column_family_id, @cf_id, "\$cf_id") as COLUMN_FAMILY_ID, $transaction_col as TRANSACTION_ID, replace(`key`, @indexnr, '\${indexnr}') as `KEY`, mode -from information_schema.rocksdb_locks $extra_where; +from information_schema.rocksdb_locks $extra_where $extra_order_by; --enable_query_log diff --git a/mysql-test/suite/rocksdb/r/range_locking_shared_locks.result b/mysql-test/suite/rocksdb/r/range_locking_shared_locks.result index 014b107aea5..12b6ae9a6ea 100644 --- a/mysql-test/suite/rocksdb/r/range_locking_shared_locks.result +++ b/mysql-test/suite/rocksdb/r/range_locking_shared_locks.result @@ -112,8 +112,6 @@ pk a 3 3 4 4 5 5 -# TODO: the following prints an X lock on the range, because GetRangeLock API -# currently only supports write locks: # select * from information_schema.rocksdb_locks; # With replacements by select_from_rocksdb_locks.inc COLUMN_FAMILY_ID TRANSACTION_ID KEY mode $cf_id $TRX1_ID 0000${indexnr}80000002 S @@ -126,3 +124,126 @@ $cf_id $TRX2_ID 0000${indexnr}80000384 X rollback; disconnect con1; drop table t0,t1; +# +# Test shared point locks and lock escalation +# +create table t0 (a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); +create table t1 ( +pk int primary key, +a int +) engine=rocksdb; +insert into t1 +select 1000 + 100*A.a + 10*B.a + C.a, 12345 from t0 A, t0 B, t0 C; +show status like 'rocksdb_locktree_current_lock_memory'; +Variable_name Value +rocksdb_locktree_current_lock_memory 0 +connect con1,localhost,root,,; +connection con1; +begin; +# CON1: get some shared locks +select * from t1 where pk=1001 lock in share mode; +pk a +1001 12345 +select * from t1 where pk=1100 lock in share mode; +pk a +1100 12345 +select * from t1 where pk=1200 lock in share mode; +pk a +1200 12345 +select * from t1 where pk=2500 lock in share mode; +pk a +connection default; +begin; +# DEFAULT: get the same locks so we have locks with multiple owners +select * from t1 where pk=1001 lock in share mode; +pk a +1001 12345 +select * from t1 where pk=1100 lock in share mode; +pk a +1100 12345 +select * from t1 where pk=1200 lock in share mode; +pk a +1200 12345 +# DEFAULT: get shared locks with one owner: +select * from t1 where pk=2510 lock in share mode; +pk a +# DEFAULT: exclusive locks on 0-10: +insert into t1 select A.a, 0 from t0 A; +connection con1; +# CON1: exclusive locks on 2000-2010: +insert into t1 select 2000+A.a, 0 from t0 A; +# select * from information_schema.rocksdb_locks; # With replacements by select_from_rocksdb_locks.inc +COLUMN_FAMILY_ID TRANSACTION_ID KEY mode +$cf_id $TRX2_ID 0000${indexnr}80000000 X +$cf_id $TRX2_ID 0000${indexnr}80000001 X +$cf_id $TRX2_ID 0000${indexnr}80000002 X +$cf_id $TRX2_ID 0000${indexnr}80000003 X +$cf_id $TRX2_ID 0000${indexnr}80000004 X +$cf_id $TRX2_ID 0000${indexnr}80000005 X +$cf_id $TRX2_ID 0000${indexnr}80000006 X +$cf_id $TRX2_ID 0000${indexnr}80000007 X +$cf_id $TRX2_ID 0000${indexnr}80000008 X +$cf_id $TRX2_ID 0000${indexnr}80000009 X +$cf_id $TRX1_ID 0000${indexnr}800003e9 S +$cf_id $TRX2_ID 0000${indexnr}800003e9 S +$cf_id $TRX1_ID 0000${indexnr}8000044c S +$cf_id $TRX2_ID 0000${indexnr}8000044c S +$cf_id $TRX1_ID 0000${indexnr}800004b0 S +$cf_id $TRX2_ID 0000${indexnr}800004b0 S +$cf_id $TRX1_ID 0000${indexnr}800007d0 X +$cf_id $TRX1_ID 0000${indexnr}800007d1 X +$cf_id $TRX1_ID 0000${indexnr}800007d2 X +$cf_id $TRX1_ID 0000${indexnr}800007d3 X +$cf_id $TRX1_ID 0000${indexnr}800007d4 X +$cf_id $TRX1_ID 0000${indexnr}800007d5 X +$cf_id $TRX1_ID 0000${indexnr}800007d6 X +$cf_id $TRX1_ID 0000${indexnr}800007d7 X +$cf_id $TRX1_ID 0000${indexnr}800007d8 X +$cf_id $TRX1_ID 0000${indexnr}800007d9 X +$cf_id $TRX1_ID 0000${indexnr}800009c4 S +$cf_id $TRX2_ID 0000${indexnr}800009ce S +connection default; +show status like 'rocksdb_locktree_current_lock_memory'; +Variable_name Value +rocksdb_locktree_current_lock_memory 7896 +set @save_mlm= @@rocksdb_max_lock_memory; +# Set the limit to cause lock escalation: +set @cur_mem_usage= (select +variable_value +from +information_schema.GLOBAL_STATUS +where +variable_name='rocksdb_locktree_current_lock_memory'); +set global rocksdb_max_lock_memory = cast(@cur_mem_usage+4 as SIGNED); +connection con1; +insert into t1 select 3000+A.a, 0 from t0 A; +# select * from information_schema.rocksdb_locks; # With replacements by select_from_rocksdb_locks.inc +COLUMN_FAMILY_ID TRANSACTION_ID KEY mode +$cf_id $TRX2_ID 0000${indexnr}80000000 - 0000${indexnr}80000009 X +$cf_id $TRX1_ID 0000${indexnr}800003e9 S +$cf_id $TRX2_ID 0000${indexnr}800003e9 S +$cf_id $TRX1_ID 0000${indexnr}8000044c S +$cf_id $TRX2_ID 0000${indexnr}8000044c S +$cf_id $TRX1_ID 0000${indexnr}800004b0 S +$cf_id $TRX2_ID 0000${indexnr}800004b0 S +$cf_id $TRX1_ID 0000${indexnr}800007d0 - 0000${indexnr}800007d9 X +$cf_id $TRX1_ID 0000${indexnr}800009c4 S +$cf_id $TRX2_ID 0000${indexnr}800009ce S +$cf_id $TRX1_ID 0000${indexnr}80000bb8 X +$cf_id $TRX1_ID 0000${indexnr}80000bb9 X +$cf_id $TRX1_ID 0000${indexnr}80000bba X +$cf_id $TRX1_ID 0000${indexnr}80000bbb X +$cf_id $TRX1_ID 0000${indexnr}80000bbc X +$cf_id $TRX1_ID 0000${indexnr}80000bbd X +$cf_id $TRX1_ID 0000${indexnr}80000bbe X +$cf_id $TRX1_ID 0000${indexnr}80000bbf X +$cf_id $TRX1_ID 0000${indexnr}80000bc0 X +$cf_id $TRX1_ID 0000${indexnr}80000bc1 X +connection con1; +rollback; +connection default; +rollback; +disconnect con1; +set global rocksdb_max_lock_memory= cast(@save_mlm as SIGNED); +drop table t0, t1; diff --git a/mysql-test/suite/rocksdb/t/range_locking_shared_locks.test b/mysql-test/suite/rocksdb/t/range_locking_shared_locks.test index 52118aa343f..abc437742bb 100644 --- a/mysql-test/suite/rocksdb/t/range_locking_shared_locks.test +++ b/mysql-test/suite/rocksdb/t/range_locking_shared_locks.test @@ -110,9 +110,6 @@ select * from t1 where a between 2 and 5 lock in share mode; select * from t1 where a between 2 and 5 lock in share mode; let $TRX1_ID=`select transaction_id from information_schema.rocksdb_trx where thread_id=connection_id()`; ---echo # TODO: the following prints an X lock on the range, because GetRangeLock API ---echo # currently only supports write locks: - --source suite/rocksdb/include/select_from_rocksdb_locks.inc rollback; @@ -121,3 +118,85 @@ disconnect con1; drop table t0,t1; +--echo # +--echo # Test shared point locks and lock escalation +--echo # +create table t0 (a int); +insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); + +create table t1 ( + pk int primary key, + a int +) engine=rocksdb; + +insert into t1 +select 1000 + 100*A.a + 10*B.a + C.a, 12345 from t0 A, t0 B, t0 C; + +show status like 'rocksdb_locktree_current_lock_memory'; + +connect (con1,localhost,root,,); +connection con1; + +begin; +--echo # CON1: get some shared locks +select * from t1 where pk=1001 lock in share mode; +select * from t1 where pk=1100 lock in share mode; +select * from t1 where pk=1200 lock in share mode; + +select * from t1 where pk=2500 lock in share mode; +let $TRX1_ID=`select transaction_id from information_schema.rocksdb_trx where thread_id=connection_id()`; + +connection default; +begin; +--echo # DEFAULT: get the same locks so we have locks with multiple owners +select * from t1 where pk=1001 lock in share mode; +select * from t1 where pk=1100 lock in share mode; +select * from t1 where pk=1200 lock in share mode; + +--echo # DEFAULT: get shared locks with one owner: +select * from t1 where pk=2510 lock in share mode; +let $TRX2_ID=`select transaction_id from information_schema.rocksdb_trx where thread_id=connection_id()`; + + +--echo # DEFAULT: exclusive locks on 0-10: +insert into t1 select A.a, 0 from t0 A; + +connection con1; +--echo # CON1: exclusive locks on 2000-2010: +insert into t1 select 2000+A.a, 0 from t0 A; + +let $order_by_rowkey=1; +#select * from information_schema.rocksdb_locks; +--source suite/rocksdb/include/select_from_rocksdb_locks.inc + +connection default; +show status like 'rocksdb_locktree_current_lock_memory'; +set @save_mlm= @@rocksdb_max_lock_memory; + +--echo # Set the limit to cause lock escalation: +set @cur_mem_usage= (select + variable_value + from + information_schema.GLOBAL_STATUS + where + variable_name='rocksdb_locktree_current_lock_memory'); + +set global rocksdb_max_lock_memory = cast(@cur_mem_usage+4 as SIGNED); + +connection con1; +insert into t1 select 3000+A.a, 0 from t0 A; + +#select * from information_schema.rocksdb_locks; +--source suite/rocksdb/include/select_from_rocksdb_locks.inc + +connection con1; +rollback; +connection default; +rollback; + +disconnect con1; +set global rocksdb_max_lock_memory= cast(@save_mlm as SIGNED); + +drop table t0, t1; + + diff --git a/rocksdb b/rocksdb index 2f0ee897552..ae4869e880a 160000 --- a/rocksdb +++ b/rocksdb @@ -1 +1 @@ -Subproject commit 2f0ee897552bb4a8aa66b933c0d6f8529a82e2e8 +Subproject commit ae4869e880a4118bd912fafe7fbeb6f464c4d893 diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index af6bc90ede7..c7a05f581a3 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -487,6 +487,10 @@ static void rocksdb_set_max_latest_deadlocks(THD *thd, struct st_mysql_sys_var *var, void *var_ptr, const void *save); +static void rocksdb_set_max_lock_memory(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, const void *save); + static void rdb_set_collation_exception_list(const char *exception_list); static void rocksdb_set_collation_exception_list(THD *thd, struct st_mysql_sys_var *var, @@ -595,7 +599,7 @@ static uint32_t rocksdb_max_manual_compactions = 0; // (note that this is different from rocksdb_max_row_locks as // that one is a hard per-thread count limit, and this one is a // global memory limit) -static ulong rocksdb_max_lock_memory; +static ulonglong rocksdb_max_lock_memory; static my_bool rocksdb_use_range_locking = 0; static std::shared_ptr<rocksdb::RangeLockMgrHandle> range_lock_mgr; @@ -980,6 +984,13 @@ static MYSQL_SYSVAR_UINT(max_latest_deadlocks, rocksdb_max_latest_deadlocks, nullptr, rocksdb_set_max_latest_deadlocks, rocksdb::kInitialMaxDeadlocks, 0, UINT32_MAX, 0); +static MYSQL_SYSVAR_ULONGLONG(max_lock_memory, rocksdb_max_lock_memory, + PLUGIN_VAR_RQCMDARG, + "Range-locking mode: Maximum amount of memory " + "that locks from all transactions can use at a time", + nullptr, rocksdb_set_max_lock_memory, + /*initial*/1073741824, 0, UINT64_MAX, 0); + static MYSQL_SYSVAR_ENUM( info_log_level, rocksdb_info_log_level, PLUGIN_VAR_RQCMDARG, "Filter level for info logs to be written mysqld error log. " @@ -1715,13 +1726,6 @@ static MYSQL_SYSVAR_BOOL(use_range_locking, rocksdb_use_range_locking, "Use Range Locking (NEW, incomplete yet)", nullptr, nullptr, FALSE); -static MYSQL_SYSVAR_ULONG( - max_lock_memory, rocksdb_max_lock_memory, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Max Lock Memory when using Range Locking (like in TokuDB)", - nullptr, nullptr, 1024*1024*1024 /* default value */, 1024 /* min value */, - ULONG_MAX /* max value */, 0); - static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100; static struct st_mysql_sys_var *rocksdb_system_variables[] = { @@ -5006,7 +5010,7 @@ static int rocksdb_init_func(void *const p) { { range_lock_mgr->set_max_lock_memory(rocksdb_max_lock_memory); sql_print_information("RocksDB: USING NEW RANGE LOCKING"); - sql_print_information("RocksDB: Max lock memory=%lu", rocksdb_max_lock_memory); + sql_print_information("RocksDB: Max lock memory=%llu", rocksdb_max_lock_memory); } else sql_print_information("RocksDB: USING POINT LOCKING"); @@ -12932,10 +12936,13 @@ static void show_rocksdb_stall_vars(THD *thd, SHOW_VAR *var, char *buff) { // psergey: lock tree escalation count status variable. // static longlong rocksdb_locktree_escalation_count=1234; +static longlong rocksdb_locktree_current_lock_memory=0; static SHOW_VAR rocksdb_locktree_status_variables[] = { DEF_STATUS_VAR_FUNC("escalation_count", &rocksdb_locktree_escalation_count, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("current_lock_memory", + &rocksdb_locktree_current_lock_memory, SHOW_LONGLONG), // end of the array marker {NullS, NullS, SHOW_LONG}}; @@ -12946,7 +12953,9 @@ static void show_rocksdb_locktree_vars(THD *thd, SHOW_VAR *var, char *buff) { var->type = SHOW_ARRAY; if (range_lock_mgr) { - rocksdb_locktree_escalation_count= range_lock_mgr->get_escalation_count(); + auto status = range_lock_mgr->GetStatus(); + rocksdb_locktree_escalation_count = status.escalation_count; + rocksdb_locktree_current_lock_memory = status.current_lock_memory; var->value = reinterpret_cast<char *>(&rocksdb_locktree_status_variables); } else @@ -13614,6 +13623,23 @@ void rocksdb_set_delayed_write_rate(THD *thd, struct st_mysql_sys_var *var, RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); } +void rocksdb_set_max_lock_memory(THD *thd, struct st_mysql_sys_var *var, + void *var_ptr, const void *save) { + const uint64_t new_val = *static_cast<const uint64_t *>(save); + if (rocksdb_max_lock_memory != new_val) { + if (range_lock_mgr->set_max_lock_memory(new_val)) { + /* NO_LINT_DEBUG */ + sql_print_warning("MyRocks: failed to set max_lock_memory"); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_ERROR_WHEN_EXECUTING_COMMAND, + "Cannot set max_lock_memory to size below currently used"); + } else { + // Succeeded + rocksdb_max_lock_memory = new_val; + } + } +} + void rocksdb_set_max_latest_deadlocks(THD *thd, struct st_mysql_sys_var *var, void *var_ptr, const void *save) { RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex);