[Commits] d64d583: MDEV-20056 Assertion `!prebuilt->index->is_primary()' failed
by IgorBabaev 26 Nov '19
by IgorBabaev 26 Nov '19
26 Nov '19
revision-id: d64d583aa46a1ec618812497dbe0a15c877f198f (mariadb-10.4.10-25-gd64d583)
parent(s): f95288211ce1023e0d268229fbe5febbf0b2edd3
author: Igor Babaev
committer: Igor Babaev
timestamp: 2019-11-25 17:40:47 -0800
message:
MDEV-20056 Assertion `!prebuilt->index->is_primary()' failed
in row_search_idx_cond_check
When usage of rowid filter is evaluated by the optimizer to join a table
to the current partial join employing a certain index it should be checked
that a key for at least the major component of this index can be constructed
using values from the columns of the partial join.
---
mysql-test/main/rowid_filter_innodb.result | 76 ++++++++++++++++++++++++++++++
mysql-test/main/rowid_filter_innodb.test | 53 +++++++++++++++++++++
sql/sql_select.cc | 9 +++-
3 files changed, 137 insertions(+), 1 deletion(-)
diff --git a/mysql-test/main/rowid_filter_innodb.result b/mysql-test/main/rowid_filter_innodb.result
index 37e32f0..eaad4ef 100644
--- a/mysql-test/main/rowid_filter_innodb.result
+++ b/mysql-test/main/rowid_filter_innodb.result
@@ -2245,3 +2245,79 @@ a
5
DROP TABLE t1;
SET GLOBAL innodb_stats_persistent= @stats.save;
+#
+# MDEV-20056: index to build range filter should not be
+# the same as table access index
+#
+SET @stats.save= @@innodb_stats_persistent;
+SET GLOBAL innodb_stats_persistent= ON;
+CREATE TABLE t1 (ch varchar(1), id int, id2 int) ENGINE=InnoDB;
+INSERT INTO t1 VALUES
+('l',3,2), ('e',NULL,NULL), ('r',7,3), ('h',NULL,2), (NULL,4,4), ('c',4,NULL),
+('k',NULL,NULL), ('h',NULL,NULL), ('b',9,NULL), ('f',6,NULL);
+CREATE TABLE t2 (
+pk int NOT NULL, col_date_key date, ch2 varchar(1), id2 int,
+PRIMARY KEY (pk), KEY (col_date_key), KEY (ch2), KEY (id2)
+) ENGINE=InnoDB;
+INSERT INTO t2 VALUES
+(1,'2034-04-21','g',9), (2,'2006-09-08','y',1), (3,NULL,'h',2),
+(4,'1987-03-02','s',2), (5,'2019-07-02','u',NULL),(6,'2012-12-18','z',1),
+(7,NULL,'w',4), (8,'2005-03-10','o',8), (9,'1987-02-12','d',4);
+CREATE TABLE t3 (id int) ENGINE=InnoDB;
+INSERT INTO t3 VALUES (6);
+ANALYZE TABLE t1,t2,t3;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+test.t2 analyze status Engine-independent statistics collected
+test.t2 analyze status OK
+test.t3 analyze status Engine-independent statistics collected
+test.t3 analyze status OK
+EXPLAIN EXTENDED SELECT 1 FROM t3
+WHERE EXISTS ( SELECT 1 FROM t1
+WHERE t3.id IN ( SELECT bt1.id FROM t2, t1 AS bt1
+WHERE bt1.id = t2.pk AND
+t2.ch2 <= 'g' ) OR
+t1.id2 = t1.id);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY t3 ALL NULL NULL NULL NULL 1 100.00 Using where
+2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 10 100.00 Using where
+3 MATERIALIZED t2 range PRIMARY,col_date_key,ch2,id2 ch2 4 NULL 2 100.00 Using where; Using index
+3 MATERIALIZED bt1 ALL NULL NULL NULL NULL 10 100.00 Using where; Using join buffer (flat, BNL join)
+Warnings:
+Note 1276 Field or reference 'test.t3.id' of SELECT #2 was resolved in SELECT #1
+Note 1003 /* select#1 */ select 1 AS `1` from `test`.`t3` where <in_optimizer>(1,<expr_cache><`test`.`t3`.`id`>(exists(/* select#2 */ select 1 from `test`.`t1` where <expr_cache><`test`.`t3`.`id`>(<in_optimizer>(`test`.`t3`.`id`,`test`.`t3`.`id` in ( <materialize> (/* select#3 */ select `test`.`bt1`.`id` from `test`.`t2` join `test`.`t1` `bt1` where `test`.`bt1`.`id` = `test`.`t2`.`pk` and `test`.`t2`.`ch2` <= 'g' ), <primary_index_lookup>(`test`.`t3`.`id` in <temporary table> on distinct_key where `test`.`t3`.`id` = `<subquery3>`.`id`)))) or `test`.`t1`.`id2` = `test`.`t1`.`id` limit 1)))
+SELECT 1 FROM t3
+WHERE EXISTS ( SELECT 1 FROM t1
+WHERE t3.id IN ( SELECT bt1.id FROM t2, t1 AS bt1
+WHERE bt1.id = t2.pk AND
+t2.ch2 <= 'g' ) OR
+t1.id2 = t1.id);
+1
+1
+EXPLAIN EXTENDED SELECT 1 FROM t3
+WHERE EXISTS ( SELECT 1 FROM t1
+WHERE t3.id IN ( SELECT bt1.id FROM t2, t1 AS bt1
+WHERE bt1.ch = t2.ch2 AND
+bt1.id = t2.pk AND
+t2.ch2 <= 'g' ) OR
+t1.id2 = t1.id);
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 PRIMARY t3 ALL NULL NULL NULL NULL 1 100.00 Using where
+2 DEPENDENT SUBQUERY t1 ALL NULL NULL NULL NULL 10 100.00 Using where
+3 MATERIALIZED t2 range PRIMARY,col_date_key,ch2,id2 ch2 4 NULL 2 100.00 Using where; Using index
+3 MATERIALIZED bt1 ALL NULL NULL NULL NULL 10 100.00 Using where; Using join buffer (flat, BNL join)
+Warnings:
+Note 1276 Field or reference 'test.t3.id' of SELECT #2 was resolved in SELECT #1
+Note 1003 /* select#1 */ select 1 AS `1` from `test`.`t3` where <in_optimizer>(1,<expr_cache><`test`.`t3`.`id`>(exists(/* select#2 */ select 1 from `test`.`t1` where <expr_cache><`test`.`t3`.`id`>(<in_optimizer>(`test`.`t3`.`id`,`test`.`t3`.`id` in ( <materialize> (/* select#3 */ select `test`.`bt1`.`id` from `test`.`t2` join `test`.`t1` `bt1` where `test`.`bt1`.`ch` = `test`.`t2`.`ch2` and `test`.`bt1`.`id` = `test`.`t2`.`pk` and `test`.`t2`.`ch2` <= 'g' ), <primary_index_lookup>(`test`.`t3`.`id` in <temporary table> on distinct_key where `test`.`t3`.`id` = `<subquery3>`.`id`)))) or `test`.`t1`.`id2` = `test`.`t1`.`id` limit 1)))
+SELECT 1 FROM t3
+WHERE EXISTS ( SELECT 1 FROM t1
+WHERE t3.id IN ( SELECT bt1.id FROM t2, t1 AS bt1
+WHERE bt1.ch = t2.ch2 AND
+bt1.id = t2.pk AND
+t2.ch2 <= 'g' ) OR
+t1.id2 = t1.id);
+1
+1
+DROP TABLE t1, t2, t3;
+SET GLOBAL innodb_stats_persistent= @stats.save;
diff --git a/mysql-test/main/rowid_filter_innodb.test b/mysql-test/main/rowid_filter_innodb.test
index 4a6c431..cfca162 100644
--- a/mysql-test/main/rowid_filter_innodb.test
+++ b/mysql-test/main/rowid_filter_innodb.test
@@ -129,3 +129,56 @@ SELECT a FROM t1 WHERE c < 'k' AND b > 't' ORDER BY a;
DROP TABLE t1;
SET GLOBAL innodb_stats_persistent= @stats.save;
+
+--echo #
+--echo # MDEV-20056: index to build range filter should not be
+--echo # the same as table access index
+--echo #
+
+SET @stats.save= @@innodb_stats_persistent;
+SET GLOBAL innodb_stats_persistent= ON;
+
+CREATE TABLE t1 (ch varchar(1), id int, id2 int) ENGINE=InnoDB;
+INSERT INTO t1 VALUES
+('l',3,2), ('e',NULL,NULL), ('r',7,3), ('h',NULL,2), (NULL,4,4), ('c',4,NULL),
+('k',NULL,NULL), ('h',NULL,NULL), ('b',9,NULL), ('f',6,NULL);
+
+CREATE TABLE t2 (
+ pk int NOT NULL, col_date_key date, ch2 varchar(1), id2 int,
+ PRIMARY KEY (pk), KEY (col_date_key), KEY (ch2), KEY (id2)
+) ENGINE=InnoDB;
+INSERT INTO t2 VALUES
+(1,'2034-04-21','g',9), (2,'2006-09-08','y',1), (3,NULL,'h',2),
+(4,'1987-03-02','s',2), (5,'2019-07-02','u',NULL),(6,'2012-12-18','z',1),
+(7,NULL,'w',4), (8,'2005-03-10','o',8), (9,'1987-02-12','d',4);
+
+CREATE TABLE t3 (id int) ENGINE=InnoDB;
+INSERT INTO t3 VALUES (6);
+
+ANALYZE TABLE t1,t2,t3;
+
+let $q1=
+SELECT 1 FROM t3
+WHERE EXISTS ( SELECT 1 FROM t1
+ WHERE t3.id IN ( SELECT bt1.id FROM t2, t1 AS bt1
+ WHERE bt1.id = t2.pk AND
+ t2.ch2 <= 'g' ) OR
+ t1.id2 = t1.id);
+
+eval EXPLAIN EXTENDED $q1;
+eval $q1;
+
+let $q2=
+SELECT 1 FROM t3
+WHERE EXISTS ( SELECT 1 FROM t1
+ WHERE t3.id IN ( SELECT bt1.id FROM t2, t1 AS bt1
+ WHERE bt1.ch = t2.ch2 AND
+ bt1.id = t2.pk AND
+ t2.ch2 <= 'g' ) OR
+ t1.id2 = t1.id);
+
+eval EXPLAIN EXTENDED $q2;
+eval $q2;
+
+DROP TABLE t1, t2, t3;
+SET GLOBAL innodb_stats_persistent= @stats.save;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 1ee2a17..3286224 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1602,6 +1602,12 @@ bool JOIN::make_range_rowid_filters()
{
if (!tab->range_rowid_filter_info)
continue;
+
+ DBUG_ASSERT(!(tab->ref.key >= 0 &&
+ tab->ref.key == (int) tab->range_rowid_filter_info->key_no));
+ DBUG_ASSERT(!(tab->ref.key == -1 && tab->quick &&
+ tab->quick->index == tab->range_rowid_filter_info->key_no));
+
int err;
SQL_SELECT *sel= NULL;
Rowid_filter_container *filter_container= NULL;
@@ -7665,7 +7671,8 @@ best_access_path(JOIN *join,
found_ref);
} /* not ft_key */
- if (records < DBL_MAX)
+ if (records < DBL_MAX &&
+ (found_part & 1)) // start_key->key can be used for index access
{
double rows= record_count * records;
double access_cost_factor= MY_MIN(tmp / rows, 1.0);
1
0
[Commits] 86500865916: Cleanup in tests: "${indexnr}" takes 8 hex digits, not 6.
by psergey 24 Nov '19
by psergey 24 Nov '19
24 Nov '19
revision-id: 86500865916f729216b7081f4bdc2cd9c0c37e70 (fb-prod201903-259-g86500865916)
parent(s): b3fb7f7674a9cd1df3931aa1121247713af75a89
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-11-24 22:29:47 +0300
message:
Cleanup in tests: "${indexnr}" takes 8 hex digits, not 6.
---
.../suite/rocksdb/include/select_from_is_rowlocks.inc | 4 ++--
mysql-test/suite/rocksdb/r/range_locking.result | 18 +++++++++---------
mysql-test/suite/rocksdb/r/range_locking_rev_cf.result | 18 +++++++++---------
.../rocksdb/r/range_locking_seek_for_update.result | 16 ++++++++--------
4 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc b/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
index 74330efca21..e5f54d68914 100644
--- a/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
+++ b/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
@@ -17,10 +17,10 @@ set @cf_id=(select column_family from information_schema.rocksdb_ddl
where table_name='t1' and index_name='PRIMARY');
set @rtrx_id=(select transaction_id from information_schema.rocksdb_trx
where thread_id=connection_id());
-set @indexnr= (select lower(lpad(hex(index_number),6,'0')) from information_schema.rocksdb_ddl
+set @indexnr= (select lower(lpad(hex(index_number),8,'0')) from information_schema.rocksdb_ddl
where table_name='t1' and index_name='PRIMARY');
-set @indexnr_next= (select lower(lpad(hex(index_number+1),6,'0'))
+set @indexnr_next= (select lower(lpad(hex(index_number+1),8,'0'))
from information_schema.rocksdb_ddl
where table_name='t1' and index_name='PRIMARY');
diff --git a/mysql-test/suite/rocksdb/r/range_locking.result b/mysql-test/suite/rocksdb/r/range_locking.result
index bee37926ddf..5a58ddf148e 100644
--- a/mysql-test/suite/rocksdb/r/range_locking.result
+++ b/mysql-test/suite/rocksdb/r/range_locking.result
@@ -99,12 +99,12 @@ pk a
10 10
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}8000000a X
+$cf_id $trx_id 00${indexnr}8000000a X
delete from t1 where pk between 25 and 40;
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}8000000a X
-$cf_id $trx_id 0000${indexnr}80000019 - 0100${indexnr}80000028 X
+$cf_id $trx_id 00${indexnr}8000000a X
+$cf_id $trx_id 00${indexnr}80000019 - 01${indexnr}80000028 X
rollback;
begin;
# The following will show a range lock on 2-9 and also a point lock on 10.
@@ -113,8 +113,8 @@ select * from t1 where pk between 2 and 9 for update;
pk a
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}80000002 - 0100${indexnr}80000009 X
-$cf_id $trx_id 0000${indexnr}8000000a X
+$cf_id $trx_id 00${indexnr}80000002 - 01${indexnr}80000009 X
+$cf_id $trx_id 00${indexnr}8000000a X
rollback;
drop table t1;
connection con1;
@@ -437,8 +437,8 @@ pk1 pk2 a
3 0 30
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}8000000280000009 X
-$cf_id $trx_id 0000${indexnr}80000003 - 0100${indexnr}80000003 X
+$cf_id $trx_id 00${indexnr}8000000280000009 X
+$cf_id $trx_id 00${indexnr}80000003 - 01${indexnr}80000003 X
rollback;
#
# Test range locking for ranges with HA_READ_PREFIX_LAST_OR_PREV
@@ -463,8 +463,8 @@ pk1 pk2 a
4 5 45
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}8000000480000004 X
-$cf_id $trx_id 0000${indexnr}8000000480000005 - 0100${indexnr}8000000480000008 X
+$cf_id $trx_id 00${indexnr}8000000480000004 X
+$cf_id $trx_id 00${indexnr}8000000480000005 - 01${indexnr}8000000480000008 X
rollback;
connection con1;
rollback;
diff --git a/mysql-test/suite/rocksdb/r/range_locking_rev_cf.result b/mysql-test/suite/rocksdb/r/range_locking_rev_cf.result
index 04f88ab656a..811ad88e91b 100644
--- a/mysql-test/suite/rocksdb/r/range_locking_rev_cf.result
+++ b/mysql-test/suite/rocksdb/r/range_locking_rev_cf.result
@@ -99,12 +99,12 @@ pk a
10 10
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}8000000a X
+$cf_id $trx_id 00${indexnr}8000000a X
delete from t1 where pk between 25 and 40;
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}8000000a X
-$cf_id $trx_id 0000${indexnr}80000028 - 0100${indexnr}80000019 X
+$cf_id $trx_id 00${indexnr}8000000a X
+$cf_id $trx_id 00${indexnr}80000028 - 01${indexnr}80000019 X
rollback;
begin;
# The following will show a range lock on 2-9 and also a point lock on 10.
@@ -113,8 +113,8 @@ select * from t1 where pk between 2 and 9 for update;
pk a
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}80000009 - 0100${indexnr}80000002 X
-$cf_id $trx_id 0000${indexnr}8000000a X
+$cf_id $trx_id 00${indexnr}80000009 - 01${indexnr}80000002 X
+$cf_id $trx_id 00${indexnr}8000000a X
rollback;
drop table t1;
connection con1;
@@ -397,8 +397,8 @@ pk1 pk2 a
3 0 30
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}8000000280000009 X
-$cf_id $trx_id 0000${indexnr}80000003 - 0100${indexnr}80000003 X
+$cf_id $trx_id 00${indexnr}8000000280000009 X
+$cf_id $trx_id 00${indexnr}80000003 - 01${indexnr}80000003 X
rollback;
#
# Test range locking for ranges with HA_READ_PREFIX_LAST_OR_PREV
@@ -423,8 +423,8 @@ pk1 pk2 a
4 5 45
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}8000000480000004 X
-$cf_id $trx_id 0000${indexnr}8000000480000008 - 0100${indexnr}8000000480000005 X
+$cf_id $trx_id 00${indexnr}8000000480000004 X
+$cf_id $trx_id 00${indexnr}8000000480000008 - 01${indexnr}8000000480000005 X
rollback;
connection con1;
rollback;
diff --git a/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result b/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result
index aa6f52a02bc..2614e8ce66e 100644
--- a/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result
+++ b/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result
@@ -28,7 +28,7 @@ pk a
# Now, we will just see locks on 10=0xA and 11=0xB:
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}8000000b X
+$cf_id $trx_id 00${indexnr}8000000b X
#
# SeekForUpdate Test #1: A query with type=range (without upper bound) and LIMIT
#
@@ -43,8 +43,8 @@ pk a
502 502
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}8000000b X
-$cf_id $trx_id 0000${indexnr}800001f4 - 0000${indexnr}800001f6 X
+$cf_id $trx_id 00${indexnr}8000000b X
+$cf_id $trx_id 00${indexnr}800001f4 - 00${indexnr}800001f6 X
rollback;
begin;
select * from t1 where pk=11 for update;
@@ -61,8 +61,8 @@ pk a
2 2
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr} - 0000${indexnr}80000002 X
-$cf_id $trx_id 0000${indexnr}8000000b X
+$cf_id $trx_id 00${indexnr} - 00${indexnr}80000002 X
+$cf_id $trx_id 00${indexnr}8000000b X
rollback;
connection con1;
rollback;
@@ -140,7 +140,7 @@ pk a
8 8
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}80000005 - 0000${indexnr}8000000a X
+$cf_id $trx_id 00${indexnr}80000005 - 00${indexnr}8000000a X
rollback;
delete from t1 where pk=8;
#
@@ -200,7 +200,7 @@ pk a
# The below will lock from pk=1007 (0x3ef) till the end of the table:
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}800003ef - 0000${indexnr+1} X
+$cf_id $trx_id 00${indexnr}800003ef - 00${indexnr+1} X
rollback;
begin;
select * from t1 where pk <1007 order by pk desc limit 2 for update;
@@ -209,7 +209,7 @@ pk a
1001 1001
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
-$cf_id $trx_id 0000${indexnr}800003e9 - 0000${indexnr}800003ef X
+$cf_id $trx_id 00${indexnr}800003e9 - 00${indexnr}800003ef X
connection con1;
rollback;
connection default;
1
0
24 Nov '19
revision-id: b3fb7f7674a9cd1df3931aa1121247713af75a89 (fb-prod201903-258-gb3fb7f7674a)
parent(s): 8599cfdb73e671ef867b2eec1a911cb461b9e5ec
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-11-24 22:19:55 +0300
message:
Fix rocksdb.range_locking_seek_for_update test
Make it work when ran after other tests
---
mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc | 3 ++-
mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result | 3 ---
mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test | 1 +
3 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc b/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
index cb95b149bae..74330efca21 100644
--- a/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
+++ b/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
@@ -20,7 +20,8 @@ set @rtrx_id=(select transaction_id from information_schema.rocksdb_trx
set @indexnr= (select lower(lpad(hex(index_number),6,'0')) from information_schema.rocksdb_ddl
where table_name='t1' and index_name='PRIMARY');
-set @indexnr_next= (select lpad(hex(index_number+1),6,'0') from information_schema.rocksdb_ddl
+set @indexnr_next= (select lower(lpad(hex(index_number+1),6,'0'))
+ from information_schema.rocksdb_ddl
where table_name='t1' and index_name='PRIMARY');
let $extra_where = where 1;
diff --git a/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result b/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result
index b31be59d35a..aa6f52a02bc 100644
--- a/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result
+++ b/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result
@@ -29,7 +29,6 @@ pk a
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
$cf_id $trx_id 0000${indexnr}8000000b X
-$cf_id 6 0000${indexnr}8000000a X
#
# SeekForUpdate Test #1: A query with type=range (without upper bound) and LIMIT
#
@@ -46,7 +45,6 @@ pk a
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
$cf_id $trx_id 0000${indexnr}8000000b X
$cf_id $trx_id 0000${indexnr}800001f4 - 0000${indexnr}800001f6 X
-$cf_id 6 0000${indexnr}8000000a X
rollback;
begin;
select * from t1 where pk=11 for update;
@@ -65,7 +63,6 @@ pk a
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
$cf_id $trx_id 0000${indexnr} - 0000${indexnr}80000002 X
$cf_id $trx_id 0000${indexnr}8000000b X
-$cf_id 6 0000${indexnr}8000000a X
rollback;
connection con1;
rollback;
diff --git a/mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test b/mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test
index edef8ac5bfa..0230113eeb1 100644
--- a/mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test
+++ b/mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test
@@ -33,6 +33,7 @@ connection default;
begin;
select * from t1 where pk=11 for update;
+let $select_from_is_rowlocks_current_trx_only=1;
--echo # Now, we will just see locks on 10=0xA and 11=0xB:
--source suite/rocksdb/include/select_from_is_rowlocks.inc
1
0
revision-id: 8599cfdb73e671ef867b2eec1a911cb461b9e5ec (fb-prod201903-257-g8599cfdb73e)
parent(s): 30c4f566dca5d8a8bec2969faa7132c746afad92
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-11-24 22:18:02 +0300
message:
Range locking fixes
- Update RocksDB to revision that prints the locks in STO-mode, too.
- Make rocksdb.range_locking test survive --repeat=N runs.
---
.../suite/rocksdb/include/select_from_is_rowlocks.inc | 2 +-
mysql-test/suite/rocksdb/r/range_locking.result | 11 +++++++++++
mysql-test/suite/rocksdb/r/range_locking_rev_cf.result | 11 +++++++++++
mysql-test/suite/rocksdb/t/range_locking.inc | 18 +++++++++++++++++-
rocksdb | 2 +-
5 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc b/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
index da80d466012..cb95b149bae 100644
--- a/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
+++ b/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
@@ -17,7 +17,7 @@ set @cf_id=(select column_family from information_schema.rocksdb_ddl
where table_name='t1' and index_name='PRIMARY');
set @rtrx_id=(select transaction_id from information_schema.rocksdb_trx
where thread_id=connection_id());
-set @indexnr= (select lpad(hex(index_number),6,'0') from information_schema.rocksdb_ddl
+set @indexnr= (select lower(lpad(hex(index_number),6,'0')) from information_schema.rocksdb_ddl
where table_name='t1' and index_name='PRIMARY');
set @indexnr_next= (select lpad(hex(index_number+1),6,'0') from information_schema.rocksdb_ddl
diff --git a/mysql-test/suite/rocksdb/r/range_locking.result b/mysql-test/suite/rocksdb/r/range_locking.result
index f9f77a31476..bee37926ddf 100644
--- a/mysql-test/suite/rocksdb/r/range_locking.result
+++ b/mysql-test/suite/rocksdb/r/range_locking.result
@@ -80,6 +80,12 @@ drop table t1;
#
# Test INFORMATION_SCHEMA.lock_info in range-locking mode
#
+connect con1,localhost,root,,;
+connection con1;
+create table t0 (a int primary key);
+begin;
+insert into t0 values (1);
+connection default;
create table t1 (
pk int,
a int,
@@ -111,6 +117,11 @@ $cf_id $trx_id 0000${indexnr}80000002 - 0100${indexnr}80000009 X
$cf_id $trx_id 0000${indexnr}8000000a X
rollback;
drop table t1;
+connection con1;
+rollback;
+drop table t0;
+connection default;
+disconnect con1;
#
# MDEV-18104: MyRocks-Gap-Lock: range locking bounds are incorrect for multi-part keys
#
diff --git a/mysql-test/suite/rocksdb/r/range_locking_rev_cf.result b/mysql-test/suite/rocksdb/r/range_locking_rev_cf.result
index b07c654e7d1..04f88ab656a 100644
--- a/mysql-test/suite/rocksdb/r/range_locking_rev_cf.result
+++ b/mysql-test/suite/rocksdb/r/range_locking_rev_cf.result
@@ -80,6 +80,12 @@ drop table t1;
#
# Test INFORMATION_SCHEMA.lock_info in range-locking mode
#
+connect con1,localhost,root,,;
+connection con1;
+create table t0 (a int primary key);
+begin;
+insert into t0 values (1);
+connection default;
create table t1 (
pk int,
a int,
@@ -111,6 +117,11 @@ $cf_id $trx_id 0000${indexnr}80000009 - 0100${indexnr}80000002 X
$cf_id $trx_id 0000${indexnr}8000000a X
rollback;
drop table t1;
+connection con1;
+rollback;
+drop table t0;
+connection default;
+disconnect con1;
#
# MDEV-18104: MyRocks-Gap-Lock: range locking bounds are incorrect for multi-part keys
#
diff --git a/mysql-test/suite/rocksdb/t/range_locking.inc b/mysql-test/suite/rocksdb/t/range_locking.inc
index 1c5fb92b584..ecc8e4432bb 100644
--- a/mysql-test/suite/rocksdb/t/range_locking.inc
+++ b/mysql-test/suite/rocksdb/t/range_locking.inc
@@ -90,7 +90,7 @@ connection con1;
rollback;
drop table t2;
-# Cleanup
+# cleanup
connection default;
disconnect con1;
disconnect con2;
@@ -99,6 +99,15 @@ drop table t1;
--echo #
--echo # Test INFORMATION_SCHEMA.lock_info in range-locking mode
--echo #
+
+connect (con1,localhost,root,,);
+connection con1;
+eval create table t0 (a int primary key);
+begin;
+insert into t0 values (1);
+connection default;
+
+
eval
create table t1 (
pk int,
@@ -112,6 +121,8 @@ insert into t1 values
begin;
select * from t1 where pk=10 for update;
+#let TRX1_ID=`(select transaction_id from information_schema.rocksdb_trx where thread_id=connection_id())` ;
+let $select_from_is_rowlocks_current_trx_only=1;
--source suite/rocksdb/include/select_from_is_rowlocks.inc
delete from t1 where pk between 25 and 40;
@@ -127,6 +138,11 @@ select * from t1 where pk between 2 and 9 for update;
rollback;
drop table t1;
+connection con1;
+rollback;
+drop table t0;
+connection default;
+disconnect con1;
--echo #
--echo # MDEV-18104: MyRocks-Gap-Lock: range locking bounds are incorrect for multi-part keys
diff --git a/rocksdb b/rocksdb
index c15474bf496..670bbd25c10 160000
--- a/rocksdb
+++ b/rocksdb
@@ -1 +1 @@
-Subproject commit c15474bf496a227c6247e2e56040009c55c4bbc0
+Subproject commit 670bbd25c10c47638f41ee7f635a99443d21e937
1
0
[Commits] ef1ce998bd0: Make rocksdb.range_locking_seek_for_update survive --repeat
by psergey 24 Nov '19
by psergey 24 Nov '19
24 Nov '19
revision-id: ef1ce998bd035756f545d45afe4c133f922313a7 (fb-prod201903-258-gef1ce998bd0)
parent(s): 34a0cf718aa19d5bda2d471b2c6e20dac8256989
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-11-24 20:17:54 +0300
message:
Make rocksdb.range_locking_seek_for_update survive --repeat
---
mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc | 3 ++-
mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result | 3 ---
mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test | 1 +
3 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc b/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
index cb95b149bae..74330efca21 100644
--- a/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
+++ b/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
@@ -20,7 +20,8 @@ set @rtrx_id=(select transaction_id from information_schema.rocksdb_trx
set @indexnr= (select lower(lpad(hex(index_number),6,'0')) from information_schema.rocksdb_ddl
where table_name='t1' and index_name='PRIMARY');
-set @indexnr_next= (select lpad(hex(index_number+1),6,'0') from information_schema.rocksdb_ddl
+set @indexnr_next= (select lower(lpad(hex(index_number+1),6,'0'))
+ from information_schema.rocksdb_ddl
where table_name='t1' and index_name='PRIMARY');
let $extra_where = where 1;
diff --git a/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result b/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result
index b31be59d35a..aa6f52a02bc 100644
--- a/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result
+++ b/mysql-test/suite/rocksdb/r/range_locking_seek_for_update.result
@@ -29,7 +29,6 @@ pk a
# select * from information_schema.rocksdb_locks; # With replacements by select_from_is_rowlocks.inc
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
$cf_id $trx_id 0000${indexnr}8000000b X
-$cf_id 6 0000${indexnr}8000000a X
#
# SeekForUpdate Test #1: A query with type=range (without upper bound) and LIMIT
#
@@ -46,7 +45,6 @@ pk a
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
$cf_id $trx_id 0000${indexnr}8000000b X
$cf_id $trx_id 0000${indexnr}800001f4 - 0000${indexnr}800001f6 X
-$cf_id 6 0000${indexnr}8000000a X
rollback;
begin;
select * from t1 where pk=11 for update;
@@ -65,7 +63,6 @@ pk a
COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
$cf_id $trx_id 0000${indexnr} - 0000${indexnr}80000002 X
$cf_id $trx_id 0000${indexnr}8000000b X
-$cf_id 6 0000${indexnr}8000000a X
rollback;
connection con1;
rollback;
diff --git a/mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test b/mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test
index edef8ac5bfa..0230113eeb1 100644
--- a/mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test
+++ b/mysql-test/suite/rocksdb/t/range_locking_seek_for_update.test
@@ -33,6 +33,7 @@ connection default;
begin;
select * from t1 where pk=11 for update;
+let $select_from_is_rowlocks_current_trx_only=1;
--echo # Now, we will just see locks on 10=0xA and 11=0xB:
--source suite/rocksdb/include/select_from_is_rowlocks.inc
1
0
revision-id: 34a0cf718aa19d5bda2d471b2c6e20dac8256989 (fb-prod201903-257-g34a0cf718aa)
parent(s): 30c4f566dca5d8a8bec2969faa7132c746afad92
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-11-24 20:01:53 +0300
message:
Range locking fixes
- Update RocksDB to revision that prints the locks in STO-mode, too.
- Make rocksdb.range_locking test survive --repeat=N runs.
---
.../suite/rocksdb/include/select_from_is_rowlocks.inc | 2 +-
mysql-test/suite/rocksdb/r/range_locking.result | 11 +++++++++++
mysql-test/suite/rocksdb/t/range_locking.inc | 18 +++++++++++++++++-
rocksdb | 2 +-
4 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc b/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
index da80d466012..cb95b149bae 100644
--- a/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
+++ b/mysql-test/suite/rocksdb/include/select_from_is_rowlocks.inc
@@ -17,7 +17,7 @@ set @cf_id=(select column_family from information_schema.rocksdb_ddl
where table_name='t1' and index_name='PRIMARY');
set @rtrx_id=(select transaction_id from information_schema.rocksdb_trx
where thread_id=connection_id());
-set @indexnr= (select lpad(hex(index_number),6,'0') from information_schema.rocksdb_ddl
+set @indexnr= (select lower(lpad(hex(index_number),6,'0')) from information_schema.rocksdb_ddl
where table_name='t1' and index_name='PRIMARY');
set @indexnr_next= (select lpad(hex(index_number+1),6,'0') from information_schema.rocksdb_ddl
diff --git a/mysql-test/suite/rocksdb/r/range_locking.result b/mysql-test/suite/rocksdb/r/range_locking.result
index f9f77a31476..bee37926ddf 100644
--- a/mysql-test/suite/rocksdb/r/range_locking.result
+++ b/mysql-test/suite/rocksdb/r/range_locking.result
@@ -80,6 +80,12 @@ drop table t1;
#
# Test INFORMATION_SCHEMA.lock_info in range-locking mode
#
+connect con1,localhost,root,,;
+connection con1;
+create table t0 (a int primary key);
+begin;
+insert into t0 values (1);
+connection default;
create table t1 (
pk int,
a int,
@@ -111,6 +117,11 @@ $cf_id $trx_id 0000${indexnr}80000002 - 0100${indexnr}80000009 X
$cf_id $trx_id 0000${indexnr}8000000a X
rollback;
drop table t1;
+connection con1;
+rollback;
+drop table t0;
+connection default;
+disconnect con1;
#
# MDEV-18104: MyRocks-Gap-Lock: range locking bounds are incorrect for multi-part keys
#
diff --git a/mysql-test/suite/rocksdb/t/range_locking.inc b/mysql-test/suite/rocksdb/t/range_locking.inc
index 1c5fb92b584..ecc8e4432bb 100644
--- a/mysql-test/suite/rocksdb/t/range_locking.inc
+++ b/mysql-test/suite/rocksdb/t/range_locking.inc
@@ -90,7 +90,7 @@ connection con1;
rollback;
drop table t2;
-# Cleanup
+# cleanup
connection default;
disconnect con1;
disconnect con2;
@@ -99,6 +99,15 @@ drop table t1;
--echo #
--echo # Test INFORMATION_SCHEMA.lock_info in range-locking mode
--echo #
+
+connect (con1,localhost,root,,);
+connection con1;
+eval create table t0 (a int primary key);
+begin;
+insert into t0 values (1);
+connection default;
+
+
eval
create table t1 (
pk int,
@@ -112,6 +121,8 @@ insert into t1 values
begin;
select * from t1 where pk=10 for update;
+#let TRX1_ID=`(select transaction_id from information_schema.rocksdb_trx where thread_id=connection_id())` ;
+let $select_from_is_rowlocks_current_trx_only=1;
--source suite/rocksdb/include/select_from_is_rowlocks.inc
delete from t1 where pk between 25 and 40;
@@ -127,6 +138,11 @@ select * from t1 where pk between 2 and 9 for update;
rollback;
drop table t1;
+connection con1;
+rollback;
+drop table t0;
+connection default;
+disconnect con1;
--echo #
--echo # MDEV-18104: MyRocks-Gap-Lock: range locking bounds are incorrect for multi-part keys
diff --git a/rocksdb b/rocksdb
index c15474bf496..670bbd25c10 160000
--- a/rocksdb
+++ b/rocksdb
@@ -1 +1 @@
-Subproject commit c15474bf496a227c6247e2e56040009c55c4bbc0
+Subproject commit 670bbd25c10c47638f41ee7f635a99443d21e937
1
0
[Commits] 670bbd25c: Range Locking: make GetLockStatusData report locks in STO mode
by psergey 24 Nov '19
by psergey 24 Nov '19
24 Nov '19
revision-id: 670bbd25c10c47638f41ee7f635a99443d21e937 (v5.8-1896-g670bbd25c)
parent(s): c15474bf496a227c6247e2e56040009c55c4bbc0
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-11-24 18:26:26 +0300
message:
Range Locking: make GetLockStatusData report locks in STO mode
(Implement this in locktree:dump_locks)
Before this patch, GetLockStatusData would return empty list when
the lock tree is in STO (Single Transaction Optimization mode.
This is confusing.
Note: some MTR testcases have workarounds for this: they get a lock in
another connection to make sure we're not in STO-mode.
They will still have to do that, because STO-mode may report different
set of locks (e.g. the locks can overlap with one another).
---
.../range_locking/locktree/locktree.cc | 44 +++++++++++++++-------
1 file changed, 30 insertions(+), 14 deletions(-)
diff --git a/utilities/transactions/range_locking/locktree/locktree.cc b/utilities/transactions/range_locking/locktree/locktree.cc
index 00ce5aace..4c7ce1bfe 100644
--- a/utilities/transactions/range_locking/locktree/locktree.cc
+++ b/utilities/transactions/range_locking/locktree/locktree.cc
@@ -543,22 +543,38 @@ void locktree::dump_locks(void *cdata, dump_callback cb)
lkr.prepare(m_rangetree);
lkr.acquire(range);
- GrowableArray<row_lock> all_locks;
- all_locks.init();
- iterate_and_get_overlapping_row_locks(&lkr, &all_locks);
-
- const size_t n_locks = all_locks.get_size();
- for (size_t i = 0; i < n_locks; i++) {
- const row_lock lock = all_locks.fetch_unchecked(i);
- (*cb)(cdata,
- lock.range.get_left_key(),
- lock.range.get_right_key(),
- lock.txnid,
- lock.is_shared,
- lock.owners);
+ TXNID sto_txn;
+ if ((sto_txn = toku_unsafe_fetch(m_sto_txnid)) != TXNID_NONE) {
+ // insert all of the ranges from the single txnid buffer into a new rangtree
+ range_buffer::iterator iter(&m_sto_buffer);
+ range_buffer::iterator::record rec;
+ while (iter.current(&rec)) {
+ (*cb)(cdata,
+ rec.get_left_key(),
+ rec.get_right_key(),
+ sto_txn,
+ !rec.get_exclusive_flag(),
+ nullptr);
+ iter.next();
+ }
+ } else {
+ GrowableArray<row_lock> all_locks;
+ all_locks.init();
+ iterate_and_get_overlapping_row_locks(&lkr, &all_locks);
+
+ const size_t n_locks = all_locks.get_size();
+ for (size_t i = 0; i < n_locks; i++) {
+ const row_lock lock = all_locks.fetch_unchecked(i);
+ (*cb)(cdata,
+ lock.range.get_left_key(),
+ lock.range.get_right_key(),
+ lock.txnid,
+ lock.is_shared,
+ lock.owners);
+ }
+ all_locks.deinit();
}
lkr.release();
- all_locks.deinit();
range.destroy();
}
1
0
revision-id: 30c4f566dca5d8a8bec2969faa7132c746afad92 (fb-prod201903-256-g30c4f566dca)
parent(s): b4a950866d15a0dbcbee527c2ab24ae9f2c8f2c6
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-11-24 17:23:49 +0300
message:
Update to merged range-locking rocksdb
---
rocksdb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/rocksdb b/rocksdb
index 156a12604b8..c15474bf496 160000
--- a/rocksdb
+++ b/rocksdb
@@ -1 +1 @@
-Subproject commit 156a12604b830df3765a12d7194ca5f9ca7d67e6
+Subproject commit c15474bf496a227c6247e2e56040009c55c4bbc0
1
0
[Commits] c15474bf4: Backport of: Initial support for shared point locks, support lock escalations
by psergey 24 Nov '19
by psergey 24 Nov '19
24 Nov '19
revision-id: c15474bf496a227c6247e2e56040009c55c4bbc0 (v5.8-1895-gc15474bf4)
parent(s): 156a12604b830df3765a12d7194ca5f9ca7d67e6
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-11-24 17:03:40 +0300
message:
Backport of: Initial support for shared point locks, support lock escalations
- Locks can now be shared.
- Sharing only supported as long as the locked ranges are an exact match.
If this requirement is not met, the locks behave as exclusive locks.
- Make Lock Escalation keep shared locks. Shared locks are not collapsed
with other kinds of locks.
- Replace RangeLockMgrHandle::get_escalation_count() with GetStatus()
which also reports amount of memory used for Range Locking (and there
is more data we could report through this)
- Initialize LTM_STATUS_S::m_initialized.
---
include/rocksdb/utilities/transaction_db.h | 9 +-
utilities/transactions/range_locking/db.h | 3 +
.../transactions/range_locking/ft/ft-status.h | 2 +-
.../range_locking/locktree/concurrent_tree.cc | 17 +-
.../range_locking/locktree/concurrent_tree.h | 18 +-
.../range_locking/locktree/locktree.cc | 240 +++++++++++++++++----
.../transactions/range_locking/locktree/locktree.h | 19 +-
.../range_locking/locktree/range_buffer.cc | 20 +-
.../range_locking/locktree/range_buffer.h | 15 +-
.../range_locking/locktree/treenode.cc | 84 ++++++--
.../transactions/range_locking/locktree/treenode.h | 30 ++-
.../range_locking/portability/txn_subst.h | 16 ++
utilities/transactions/transaction_lock_mgr.cc | 42 ++--
utilities/transactions/transaction_lock_mgr.h | 6 +-
14 files changed, 410 insertions(+), 111 deletions(-)
diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h
index 19e6d2411..bb13bba07 100644
--- a/include/rocksdb/utilities/transaction_db.h
+++ b/include/rocksdb/utilities/transaction_db.h
@@ -56,7 +56,14 @@ class LockManagerHandle {
class RangeLockMgrHandle : public LockManagerHandle {
public:
virtual int set_max_lock_memory(size_t max_lock_memory) = 0;
- virtual uint64_t get_escalation_count() = 0;
+
+ class Counters {
+ public:
+ uint64_t escalation_count;
+ uint64_t current_lock_memory;
+ };
+
+ virtual Counters GetStatus() = 0;
virtual ~RangeLockMgrHandle() {};
};
diff --git a/utilities/transactions/range_locking/db.h b/utilities/transactions/range_locking/db.h
index f9349c6ae..64ea28345 100644
--- a/utilities/transactions/range_locking/db.h
+++ b/utilities/transactions/range_locking/db.h
@@ -6,6 +6,8 @@
typedef struct __toku_db DB;
typedef struct __toku_dbt DBT;
+
+// port: this is currently not used
struct simple_dbt {
uint32_t len;
void *data;
@@ -72,6 +74,7 @@ struct __toku_dbt {
void*data;
uint32_t size;
uint32_t ulen;
+ // One of DB_DBT_XXX flags
uint32_t flags;
};
typedef struct __toku_descriptor {
diff --git a/utilities/transactions/range_locking/ft/ft-status.h b/utilities/transactions/range_locking/ft/ft-status.h
index 25051f1ed..242964f0b 100644
--- a/utilities/transactions/range_locking/ft/ft-status.h
+++ b/utilities/transactions/range_locking/ft/ft-status.h
@@ -80,7 +80,7 @@ public:
TOKU_ENGINE_STATUS_ROW_S status[LTM_STATUS_NUM_ROWS];
private:
- bool m_initialized;
+ bool m_initialized = false;
};
typedef LTM_STATUS_S* LTM_STATUS;
extern LTM_STATUS_S ltm_status;
diff --git a/utilities/transactions/range_locking/locktree/concurrent_tree.cc b/utilities/transactions/range_locking/locktree/concurrent_tree.cc
index a35a9e40b..74d65f710 100644
--- a/utilities/transactions/range_locking/locktree/concurrent_tree.cc
+++ b/utilities/transactions/range_locking/locktree/concurrent_tree.cc
@@ -97,6 +97,12 @@ void concurrent_tree::locked_keyrange::acquire(const keyrange &range) {
m_subtree = subtree;
}
+void concurrent_tree::locked_keyrange::add_shared_owner(const keyrange &range,
+ TXNID new_owner)
+{
+ m_subtree->insert(range, new_owner, /*is_shared*/ true);
+}
+
void concurrent_tree::locked_keyrange::release(void) {
m_subtree->mutex_unlock();
}
@@ -110,18 +116,19 @@ void concurrent_tree::locked_keyrange::iterate(F *function) const {
}
}
-void concurrent_tree::locked_keyrange::insert(const keyrange &range, TXNID txnid) {
+void concurrent_tree::locked_keyrange::insert(const keyrange &range,
+ TXNID txnid, bool is_shared) {
// empty means no children, and only the root should ever be empty
if (m_subtree->is_empty()) {
- m_subtree->set_range_and_txnid(range, txnid);
+ m_subtree->set_range_and_txnid(range, txnid, is_shared);
} else {
- m_subtree->insert(range, txnid);
+ m_subtree->insert(range, txnid, is_shared);
}
}
-void concurrent_tree::locked_keyrange::remove(const keyrange &range) {
+void concurrent_tree::locked_keyrange::remove(const keyrange &range, TXNID txnid) {
invariant(!m_subtree->is_empty());
- treenode *new_subtree = m_subtree->remove(range);
+ treenode *new_subtree = m_subtree->remove(range, txnid);
// if removing range changed the root of the subtree,
// then the subtree must be the root of the entire tree.
if (new_subtree == nullptr) {
diff --git a/utilities/transactions/range_locking/locktree/concurrent_tree.h b/utilities/transactions/range_locking/locktree/concurrent_tree.h
index 66a7ff176..fabda7294 100644
--- a/utilities/transactions/range_locking/locktree/concurrent_tree.h
+++ b/utilities/transactions/range_locking/locktree/concurrent_tree.h
@@ -106,15 +106,25 @@ public:
template <class F>
void iterate(F *function) const;
+ // Adds another owner to the lock on the specified keyrange.
+ // requires: the keyrange contains one treenode whose bounds are
+ // exactly equal to the specifed range (no sub/supersets)
+ void add_shared_owner(const keyrange &range, TXNID new_owner);
+
// inserts the given range into the tree, with an associated txnid.
// requires: range does not overlap with anything in this locked_keyrange
// rationale: caller is responsible for only inserting unique ranges
- void insert(const keyrange &range, TXNID txnid);
-
- // effect: removes the given range from the tree
+ void insert(const keyrange &range, TXNID txnid, bool is_shared);
+
+ // effect: removes the given range from the tree.
+ // - txnid=TXNID_ANY means remove the range no matter what its
+ // owners are
+ // - Other value means remove the specified txnid from
+ // ownership (if the range has other owners, it will remain
+ // in the tree)
// requires: range exists exactly in this locked_keyrange
// rationale: caller is responsible for only removing existing ranges
- void remove(const keyrange &range);
+ void remove(const keyrange &range, TXNID txnid);
// effect: removes all of the keys represented by this locked keyrange
// rationale: we'd like a fast way to empty out a tree
diff --git a/utilities/transactions/range_locking/locktree/locktree.cc b/utilities/transactions/range_locking/locktree/locktree.cc
index 9b530c7b0..00ce5aace 100644
--- a/utilities/transactions/range_locking/locktree/locktree.cc
+++ b/utilities/transactions/range_locking/locktree/locktree.cc
@@ -147,6 +147,8 @@ uint32_t locktree::get_reference_count(void) {
struct row_lock {
keyrange range;
TXNID txnid;
+ bool is_shared;
+ TxnidVector *owners;
};
// iterate over a locked keyrange and copy out all of the data,
@@ -157,8 +159,10 @@ static void iterate_and_get_overlapping_row_locks(const concurrent_tree::locked_
GrowableArray<row_lock> *row_locks) {
struct copy_fn_obj {
GrowableArray<row_lock> *row_locks;
- bool fn(const keyrange &range, TXNID txnid) {
- row_lock lock = { .range = range, .txnid = txnid };
+ bool fn(const keyrange &range, TXNID txnid, bool is_shared,
+ TxnidVector *owners) {
+ row_lock lock = { .range = range, .txnid = txnid,
+ .is_shared = is_shared, .owners = owners};
row_locks->push(lock);
return true;
}
@@ -196,9 +200,10 @@ static uint64_t row_lock_size_in_tree(const row_lock &lock) {
// remove and destroy the given row lock from the locked keyrange,
// then notify the memory tracker of the newly freed lock.
static void remove_row_lock_from_tree(concurrent_tree::locked_keyrange *lkr,
- const row_lock &lock, locktree_manager *mgr) {
+ const row_lock &lock, TXNID txnid,
+ locktree_manager *mgr) {
const uint64_t mem_released = row_lock_size_in_tree(lock);
- lkr->remove(lock.range);
+ lkr->remove(lock.range, txnid);
if (mgr != nullptr) {
mgr->note_mem_released(mem_released);
}
@@ -209,7 +214,7 @@ static void remove_row_lock_from_tree(concurrent_tree::locked_keyrange *lkr,
static void insert_row_lock_into_tree(concurrent_tree::locked_keyrange *lkr,
const row_lock &lock, locktree_manager *mgr) {
uint64_t mem_used = row_lock_size_in_tree(lock);
- lkr->insert(lock.range, lock.txnid);
+ lkr->insert(lock.range, lock.txnid, lock.is_shared);
if (mgr != nullptr) {
mgr->note_mem_used(mem_used);
}
@@ -221,13 +226,17 @@ void locktree::sto_begin(TXNID txnid) {
m_sto_txnid = txnid;
}
-void locktree::sto_append(const DBT *left_key, const DBT *right_key) {
+void locktree::sto_append(const DBT *left_key, const DBT *right_key,
+ bool is_write_request) {
uint64_t buffer_mem, delta;
+
+ // psergey: the below two lines do not make any sense
+ // (and it's the same in upstream TokuDB)
keyrange range;
range.create(left_key, right_key);
buffer_mem = m_sto_buffer.total_memory_size();
- m_sto_buffer.append(left_key, right_key);
+ m_sto_buffer.append(left_key, right_key, is_write_request);
delta = m_sto_buffer.total_memory_size() - buffer_mem;
if (m_mgr != nullptr) {
m_mgr->note_mem_used(delta);
@@ -274,8 +283,10 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
range_buffer::iterator::record rec;
while (iter.current(&rec)) {
sto_lkr.prepare(&sto_rangetree);
- int r = acquire_lock_consolidated(&sto_lkr,
- m_sto_txnid, rec.get_left_key(), rec.get_right_key(), nullptr);
+ int r = acquire_lock_consolidated(&sto_lkr, m_sto_txnid,
+ rec.get_left_key(),
+ rec.get_right_key(),
+ rec.get_exclusive_flag(), nullptr);
invariant_zero(r);
sto_lkr.release();
iter.next();
@@ -285,8 +296,10 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
// locktree's rangetree, on behalf of the old single txnid.
struct migrate_fn_obj {
concurrent_tree::locked_keyrange *dst_lkr;
- bool fn(const keyrange &range, TXNID txnid) {
- dst_lkr->insert(range, txnid);
+ bool fn(const keyrange &range, TXNID txnid, bool is_shared,
+ TxnidVector *owners) {
+ assert(owners == nullptr);
+ dst_lkr->insert(range, txnid, is_shared);
return true;
}
} migrate_fn;
@@ -301,7 +314,8 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
bool locktree::sto_try_acquire(void *prepared_lkr,
TXNID txnid,
- const DBT *left_key, const DBT *right_key) {
+ const DBT *left_key, const DBT *right_key,
+ bool is_write_request) {
if (m_rangetree->is_empty() && m_sto_buffer.is_empty() && toku_unsafe_fetch(m_sto_score) >= STO_SCORE_THRESHOLD) {
// We can do the optimization because the rangetree is empty, and
// we know its worth trying because the sto score is big enough.
@@ -319,7 +333,7 @@ bool locktree::sto_try_acquire(void *prepared_lkr,
// this txnid can append its lock to the sto buffer successfully.
if (m_sto_txnid != TXNID_NONE) {
invariant(m_sto_txnid == txnid);
- sto_append(left_key, right_key);
+ sto_append(left_key, right_key, is_write_request);
return true;
} else {
invariant(m_sto_buffer.is_empty());
@@ -327,12 +341,66 @@ bool locktree::sto_try_acquire(void *prepared_lkr,
}
}
+
+/*
+ Do the same as iterate_and_get_overlapping_row_locks does, but also check for
+ this:
+ The set of overlapping rows locks consists of just one read-only shared
+ lock with the same endpoints as specified (in that case, we can just add
+ ourselves into that list)
+
+ @return true - One compatible shared lock
+ false - Otherwise
+*/
+static
+bool iterate_and_get_overlapping_row_locks2(const concurrent_tree::locked_keyrange *lkr,
+ const DBT *left_key, const DBT *right_key,
+ comparator *cmp,
+ TXNID txnid,
+ GrowableArray<row_lock> *row_locks) {
+ struct copy_fn_obj {
+ GrowableArray<row_lock> *row_locks;
+ bool first_call= true;
+ bool matching_lock_found = false;
+ const DBT *left_key, *right_key;
+ comparator *cmp;
+
+ bool fn(const keyrange &range, TXNID txnid, bool is_shared,
+ TxnidVector *owners) {
+
+ if (first_call) {
+ first_call = false;
+ if (is_shared &&
+ !(*cmp)(left_key, range.get_left_key()) &&
+ !(*cmp)(right_key, range.get_right_key())) {
+ matching_lock_found = true;
+ }
+ } else {
+ // if we see multiple matching locks, it doesn't matter whether
+ // the first one was matching.
+ matching_lock_found = false;
+ }
+ row_lock lock = { .range = range, .txnid = txnid,
+ .is_shared = is_shared, .owners = owners };
+ row_locks->push(lock);
+ return true;
+ }
+ } copy_fn;
+ copy_fn.row_locks = row_locks;
+ copy_fn.left_key = left_key;
+ copy_fn.right_key = right_key;
+ copy_fn.cmp = cmp;
+ lkr->iterate(©_fn);
+ return copy_fn.matching_lock_found;
+}
+
// try to acquire a lock and consolidate it with existing locks if possible
// param: lkr, a prepared locked keyrange
// return: 0 on success, DB_LOCK_NOTGRANTED if conflicting locks exist.
int locktree::acquire_lock_consolidated(void *prepared_lkr,
TXNID txnid,
const DBT *left_key, const DBT *right_key,
+ bool is_write_request,
txnid_set *conflicts) {
int r = 0;
concurrent_tree::locked_keyrange *lkr;
@@ -345,24 +413,60 @@ int locktree::acquire_lock_consolidated(void *prepared_lkr,
// copy out the set of overlapping row locks.
GrowableArray<row_lock> overlapping_row_locks;
overlapping_row_locks.init();
- iterate_and_get_overlapping_row_locks(lkr, &overlapping_row_locks);
+ bool matching_shared_lock_found= false;
+
+ if (is_write_request)
+ iterate_and_get_overlapping_row_locks(lkr, &overlapping_row_locks);
+ else {
+ matching_shared_lock_found=
+ iterate_and_get_overlapping_row_locks2(lkr, left_key, right_key, &m_cmp,
+ txnid, &overlapping_row_locks);
+ // psergey-todo: what to do now? So, we have figured we have just one
+ // shareable lock. Need to add us into it as an owner but the lock
+ // pointer cannot be kept?
+ // A: use find_node_with_overlapping_child(key_range, nullptr);
+ // then, add ourselves to the owner list.
+ // Dont' foreget to release the subtree after that.
+ }
+
+ if (matching_shared_lock_found) {
+ // there is just one non-confliting matching shared lock.
+ // we are hilding a lock on it (see acquire() call above).
+ // we need to modify it to indicate there is another locker...
+ lkr->add_shared_owner(requested_range, txnid);
+
+ // Pretend shared lock uses as much memory.
+ row_lock new_lock = { .range = requested_range, .txnid = txnid,
+ .is_shared = false, .owners = nullptr };
+ uint64_t mem_used = row_lock_size_in_tree(new_lock);
+ if (m_mgr) {
+ m_mgr->note_mem_used(mem_used);
+ }
+ return 0;
+ }
+
+
size_t num_overlapping_row_locks = overlapping_row_locks.get_size();
// if any overlapping row locks conflict with this request, bail out.
+
bool conflicts_exist = determine_conflicting_txnids(overlapping_row_locks,
txnid, conflicts);
if (!conflicts_exist) {
// there are no conflicts, so all of the overlaps are for the requesting txnid.
// so, we must consolidate all existing overlapping ranges and the requested
// range into one dominating range. then we insert the dominating range.
+ bool all_shared = !is_write_request;
for (size_t i = 0; i < num_overlapping_row_locks; i++) {
row_lock overlapping_lock = overlapping_row_locks.fetch_unchecked(i);
invariant(overlapping_lock.txnid == txnid);
requested_range.extend(m_cmp, overlapping_lock.range);
- remove_row_lock_from_tree(lkr, overlapping_lock, m_mgr);
+ remove_row_lock_from_tree(lkr, overlapping_lock, TXNID_ANY, m_mgr);
+ all_shared = all_shared && overlapping_lock.is_shared;
}
- row_lock new_lock = { .range = requested_range, .txnid = txnid };
+ row_lock new_lock = { .range = requested_range, .txnid = txnid,
+ .is_shared = all_shared, .owners = nullptr };
insert_row_lock_into_tree(lkr, new_lock, m_mgr);
} else {
r = DB_LOCK_NOTGRANTED;
@@ -383,7 +487,7 @@ int locktree::acquire_lock(bool is_write_request,
int r = 0;
// we are only supporting write locks for simplicity
- invariant(is_write_request);
+ //invariant(is_write_request);
// acquire and prepare a locked keyrange over the requested range.
// prepare is a serialzation point, so we take the opportunity to
@@ -391,9 +495,11 @@ int locktree::acquire_lock(bool is_write_request,
concurrent_tree::locked_keyrange lkr;
lkr.prepare(m_rangetree);
- bool acquired = sto_try_acquire(&lkr, txnid, left_key, right_key);
+ bool acquired = sto_try_acquire(&lkr, txnid, left_key, right_key,
+ is_write_request);
if (!acquired) {
- r = acquire_lock_consolidated(&lkr, txnid, left_key, right_key, conflicts);
+ r = acquire_lock_consolidated(&lkr, txnid, left_key, right_key,
+ is_write_request, conflicts);
}
lkr.release();
@@ -418,7 +524,7 @@ int locktree::try_acquire_lock(bool is_write_request,
// the locktree silently upgrades read locks to write locks for simplicity
int locktree::acquire_read_lock(TXNID txnid, const DBT *left_key, const DBT *right_key,
txnid_set *conflicts, bool big_txn) {
- return acquire_write_lock(txnid, left_key, right_key, conflicts, big_txn);
+ return try_acquire_lock(false, txnid, left_key, right_key, conflicts, big_txn);
}
int locktree::acquire_write_lock(TXNID txnid, const DBT *left_key, const DBT *right_key,
@@ -447,7 +553,9 @@ void locktree::dump_locks(void *cdata, dump_callback cb)
(*cb)(cdata,
lock.range.get_left_key(),
lock.range.get_right_key(),
- lock.txnid);
+ lock.txnid,
+ lock.is_shared,
+ lock.owners);
}
lkr.release();
all_locks.deinit();
@@ -525,8 +633,11 @@ void locktree::remove_overlapping_locks_for_txnid(TXNID txnid,
row_lock lock = overlapping_row_locks.fetch_unchecked(i);
// If this isn't our lock, that's ok, just don't remove it.
// See rationale above.
- if (lock.txnid == txnid) {
- remove_row_lock_from_tree(&lkr, lock, m_mgr);
+ // psergey-todo: for shared locks, just remove ourselves from the
+ // owners.
+ if (lock.txnid == txnid ||
+ (lock.owners && lock.owners->contains(txnid))) {
+ remove_row_lock_from_tree(&lkr, lock, txnid, m_mgr);
}
}
@@ -630,11 +741,17 @@ static int extract_first_n_row_locks(concurrent_tree::locked_keyrange *lkr,
int num_extracted;
int num_to_extract;
row_lock *row_locks;
- bool fn(const keyrange &range, TXNID txnid) {
+ bool fn(const keyrange &range, TXNID txnid, bool is_shared, TxnidVector *owners) {
if (num_extracted < num_to_extract) {
row_lock lock;
lock.range.create_copy(range);
lock.txnid = txnid;
+ lock.is_shared= is_shared;
+ // deep-copy the set of owners:
+ if (owners)
+ lock.owners = new TxnidVector(*owners);
+ else
+ lock.owners = nullptr;
row_locks[num_extracted++] = lock;
return true;
} else {
@@ -655,7 +772,7 @@ static int extract_first_n_row_locks(concurrent_tree::locked_keyrange *lkr,
int num_extracted = extract_fn.num_extracted;
invariant(num_extracted <= num_to_extract);
for (int i = 0; i < num_extracted; i++) {
- remove_row_lock_from_tree(lkr, row_locks[i], mgr);
+ remove_row_lock_from_tree(lkr, row_locks[i], TXNID_ANY, mgr);
}
return num_extracted;
@@ -722,38 +839,60 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
// through them and merge adjacent locks with the same txnid into
// one dominating lock and save it to a set of escalated locks.
//
- // first, find the index of the next row lock with a different txnid
+ // first, find the index of the next row lock that
+ // - belongs to a different txnid, or
+ // - belongs to several txnids, or
+ // - is a shared lock (we could potentially merge those but
+ // currently we don't)
int next_txnid_index = current_index + 1;
+
while (next_txnid_index < num_extracted &&
- extracted_buf[current_index].txnid == extracted_buf[next_txnid_index].txnid) {
+ (extracted_buf[current_index].txnid ==
+ extracted_buf[next_txnid_index].txnid) &&
+ !extracted_buf[next_txnid_index].is_shared &&
+ !extracted_buf[next_txnid_index].owners) {
next_txnid_index++;
}
// Create an escalated range for the current txnid that dominates
// each range between the current indext and the next txnid's index.
- const TXNID current_txnid = extracted_buf[current_index].txnid;
+ //const TXNID current_txnid = extracted_buf[current_index].txnid;
const DBT *escalated_left_key = extracted_buf[current_index].range.get_left_key();
const DBT *escalated_right_key = extracted_buf[next_txnid_index - 1].range.get_right_key();
// Try to find a range buffer for the current txnid. Create one if it doesn't exist.
// Then, append the new escalated range to the buffer.
- uint32_t idx;
- struct txnid_range_buffer *existing_range_buffer;
- int r = range_buffers.find_zero<TXNID, txnid_range_buffer::find_by_txnid>(
- current_txnid,
- &existing_range_buffer,
- &idx
- );
- if (r == DB_NOTFOUND) {
- struct txnid_range_buffer *XMALLOC(new_range_buffer);
- new_range_buffer->txnid = current_txnid;
- new_range_buffer->buffer.create();
- new_range_buffer->buffer.append(escalated_left_key, escalated_right_key);
- range_buffers.insert_at(new_range_buffer, idx);
- } else {
- invariant_zero(r);
- invariant(existing_range_buffer->txnid == current_txnid);
- existing_range_buffer->buffer.append(escalated_left_key, escalated_right_key);
+ // (If a lock is shared by multiple txnids, append it each of txnid's lists)
+ TxnidVector *owners_ptr;
+ TxnidVector singleton_owner;
+ if (extracted_buf[current_index].owners)
+ owners_ptr = extracted_buf[current_index].owners;
+ else {
+ singleton_owner.insert(extracted_buf[current_index].txnid);
+ owners_ptr = &singleton_owner;
+ }
+
+ for (auto cur_txnid : *owners_ptr ) {
+ uint32_t idx;
+ struct txnid_range_buffer *existing_range_buffer;
+ int r = range_buffers.find_zero<TXNID, txnid_range_buffer::find_by_txnid>(
+ cur_txnid,
+ &existing_range_buffer,
+ &idx
+ );
+ if (r == DB_NOTFOUND) {
+ struct txnid_range_buffer *XMALLOC(new_range_buffer);
+ new_range_buffer->txnid = cur_txnid;
+ new_range_buffer->buffer.create();
+ new_range_buffer->buffer.append(escalated_left_key, escalated_right_key,
+ !extracted_buf[current_index].is_shared);
+ range_buffers.insert_at(new_range_buffer, idx);
+ } else {
+ invariant_zero(r);
+ invariant(existing_range_buffer->txnid == cur_txnid);
+ existing_range_buffer->buffer.append(escalated_left_key, escalated_right_key,
+ !extracted_buf[current_index].is_shared);
+ }
}
current_index = next_txnid_index;
@@ -761,6 +900,7 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
// destroy the ranges copied during the extraction
for (int i = 0; i < num_extracted; i++) {
+ delete extracted_buf[i].owners;
extracted_buf[i].range.destroy();
}
}
@@ -768,6 +908,12 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
// Rebuild the locktree from each range in each range buffer,
// then notify higher layers that the txnid's locks have changed.
+ //
+ // (shared locks: if a lock was initially shared between transactions TRX1,
+ // TRX2, etc, we will now try to acquire it acting on behalf on TRX1, on
+ // TRX2, etc. This will succeed and an identical shared lock will be
+ // constructed)
+
invariant(m_rangetree->is_empty());
const size_t num_range_buffers = range_buffers.size();
for (size_t i = 0; i < num_range_buffers; i++) {
@@ -781,7 +927,9 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
while (iter.current(&rec)) {
keyrange range;
range.create(rec.get_left_key(), rec.get_right_key());
- row_lock lock = { .range = range, .txnid = current_txnid };
+ row_lock lock = { .range = range, .txnid = current_txnid,
+ .is_shared= !rec.get_exclusive_flag(),
+ .owners= nullptr };
insert_row_lock_into_tree(&lkr, lock, m_mgr);
iter.next();
}
diff --git a/utilities/transactions/range_locking/locktree/locktree.h b/utilities/transactions/range_locking/locktree/locktree.h
index 5ff4f7449..e7c909be0 100644
--- a/utilities/transactions/range_locking/locktree/locktree.h
+++ b/utilities/transactions/range_locking/locktree/locktree.h
@@ -339,7 +339,10 @@ namespace toku {
// since the lock_request object is opaque
struct lt_lock_request_info *get_lock_request_info(void);
- typedef void (*dump_callback)(void *cdata, const DBT *left, const DBT *right, TXNID txnid);
+ typedef void (*dump_callback)(void *cdata,
+ const DBT *left, const DBT *right,
+ TXNID txnid, bool is_shared,
+ TxnidVector *owners);
void dump_locks(void *cdata, dump_callback cb);
private:
locktree_manager *m_mgr;
@@ -360,6 +363,12 @@ namespace toku {
void *m_userdata;
struct lt_lock_request_info m_lock_request_info;
+ // psergey-todo:
+ // Each transaction also keeps a list of ranges it has locked.
+ // So, when a transaction is running in STO mode, two identical
+ // lists are kept: the STO lock list and transaction's owned locks
+ // list. Why can't we do with just one list?
+
// The following fields and members prefixed with "sto_" are for
// the single txnid optimization, intended to speed up the case
// when only one transaction is using the locktree. If we know
@@ -453,7 +462,8 @@ namespace toku {
// effect: append a range to the sto buffer
// requires: m_sto_txnid is valid
- void sto_append(const DBT *left_key, const DBT *right_key);
+ void sto_append(const DBT *left_key, const DBT *right_key,
+ bool is_write_request);
// effect: ends the single txnid optimization, releaseing any memory
// stored in the sto buffer, notifying the tracker, and
@@ -494,7 +504,8 @@ namespace toku {
// back to zero.
// returns: true if the lock was acquired for this txnid
bool sto_try_acquire(void *prepared_lkr, TXNID txnid,
- const DBT *left_key, const DBT *right_key);
+ const DBT *left_key, const DBT *right_key,
+ bool is_write_request);
// Effect:
// Provides a hook for a helgrind suppression.
@@ -513,7 +524,7 @@ namespace toku {
int acquire_lock_consolidated(void *prepared_lkr, TXNID txnid,
const DBT *left_key, const DBT *right_key,
- txnid_set *conflicts);
+ bool is_write_request, txnid_set *conflicts);
int acquire_lock(bool is_write_request, TXNID txnid,
const DBT *left_key, const DBT *right_key,
diff --git a/utilities/transactions/range_locking/locktree/range_buffer.cc b/utilities/transactions/range_locking/locktree/range_buffer.cc
index d1f14fc4a..eab374945 100644
--- a/utilities/transactions/range_locking/locktree/range_buffer.cc
+++ b/utilities/transactions/range_locking/locktree/range_buffer.cc
@@ -66,7 +66,9 @@ namespace toku {
return right_neg_inf || right_pos_inf;
}
- void range_buffer::record_header::init(const DBT *left_key, const DBT *right_key) {
+ void range_buffer::record_header::init(const DBT *left_key, const DBT *right_key,
+ bool is_exclusive) {
+ is_exclusive_lock= is_exclusive;
left_neg_inf = left_key == toku_dbt_negative_infinity();
left_pos_inf = left_key == toku_dbt_positive_infinity();
left_key_size = toku_dbt_is_infinite(left_key) ? 0 : left_key->size;
@@ -186,15 +188,16 @@ namespace toku {
_num_ranges = 0;
}
- void range_buffer::append(const DBT *left_key, const DBT *right_key) {
+ void range_buffer::append(const DBT *left_key, const DBT *right_key,
+ bool is_write_request) {
// if the keys are equal, then only one copy is stored.
if (toku_dbt_equals(left_key, right_key)) {
invariant(left_key->size <= MAX_KEY_SIZE);
- append_point(left_key);
+ append_point(left_key, is_write_request);
} else {
invariant(left_key->size <= MAX_KEY_SIZE);
invariant(right_key->size <= MAX_KEY_SIZE);
- append_range(left_key, right_key);
+ append_range(left_key, right_key, is_write_request);
}
_num_ranges++;
}
@@ -215,12 +218,13 @@ namespace toku {
_arena.destroy();
}
- void range_buffer::append_range(const DBT *left_key, const DBT *right_key) {
+ void range_buffer::append_range(const DBT *left_key, const DBT *right_key,
+ bool is_exclusive) {
size_t record_length = sizeof(record_header) + left_key->size + right_key->size;
char *buf = reinterpret_cast<char *>(_arena.malloc_from_arena(record_length));
record_header h;
- h.init(left_key, right_key);
+ h.init(left_key, right_key, is_exclusive);
// serialize the header
memcpy(buf, &h, sizeof(record_header));
@@ -238,12 +242,12 @@ namespace toku {
}
}
- void range_buffer::append_point(const DBT *key) {
+ void range_buffer::append_point(const DBT *key, bool is_exclusive) {
size_t record_length = sizeof(record_header) + key->size;
char *buf = reinterpret_cast<char *>(_arena.malloc_from_arena(record_length));
record_header h;
- h.init(key, nullptr);
+ h.init(key, nullptr, is_exclusive);
// serialize the header
memcpy(buf, &h, sizeof(record_header));
diff --git a/utilities/transactions/range_locking/locktree/range_buffer.h b/utilities/transactions/range_locking/locktree/range_buffer.h
index 9bc02dc22..e8869fae5 100644
--- a/utilities/transactions/range_locking/locktree/range_buffer.h
+++ b/utilities/transactions/range_locking/locktree/range_buffer.h
@@ -77,12 +77,14 @@ namespace toku {
bool right_neg_inf;
uint16_t left_key_size;
uint16_t right_key_size;
+ bool is_exclusive_lock;
bool left_is_infinite(void) const;
bool right_is_infinite(void) const;
- void init(const DBT *left_key, const DBT *right_key);
+ void init(const DBT *left_key, const DBT *right_key,
+ bool is_exclusive);
};
// PORT static_assert(sizeof(record_header) == 8, "record header format is off");
@@ -109,6 +111,10 @@ namespace toku {
// how big is this record? this tells us where the next record is
size_t size(void) const;
+ bool get_exclusive_flag() const {
+ return _header.is_exclusive_lock;
+ }
+
// populate a record header and point our DBT's
// buffers into ours if they are not infinite.
void deserialize(const char *buf);
@@ -145,7 +151,8 @@ namespace toku {
// append a left/right key range to the buffer.
// if the keys are equal, then only one copy is stored.
- void append(const DBT *left_key, const DBT *right_key);
+ void append(const DBT *left_key, const DBT *right_key,
+ bool is_write_request=false);
// is this range buffer empty?
bool is_empty(void) const;
@@ -162,11 +169,11 @@ namespace toku {
memarena _arena;
int _num_ranges;
- void append_range(const DBT *left_key, const DBT *right_key);
+ void append_range(const DBT *left_key, const DBT *right_key, bool is_write_request);
// append a point to the buffer. this is the space/time saving
// optimization for key ranges where left == right.
- void append_point(const DBT *key);
+ void append_point(const DBT *key, bool is_write_request);
};
} /* namespace toku */
diff --git a/utilities/transactions/range_locking/locktree/treenode.cc b/utilities/transactions/range_locking/locktree/treenode.cc
index 051ec7d1c..5bf349749 100644
--- a/utilities/transactions/range_locking/locktree/treenode.cc
+++ b/utilities/transactions/range_locking/locktree/treenode.cc
@@ -64,6 +64,10 @@ void treenode::init(const comparator *cmp) {
m_is_root = false;
m_is_empty = true;
m_cmp = cmp;
+
+ m_is_shared= false;
+ m_owners= nullptr;
+
// use an adaptive mutex at each node since we expect the time the
// lock is held to be relatively short compared to a context switch.
// indeed, this improves performance at high thread counts considerably.
@@ -89,10 +93,11 @@ void treenode::destroy_root(void) {
m_cmp = nullptr;
}
-void treenode::set_range_and_txnid(const keyrange &range, TXNID txnid) {
+void treenode::set_range_and_txnid(const keyrange &range, TXNID txnid, bool is_shared) {
// allocates a new copy of the range for this node
m_range.create_copy(range);
m_txnid = txnid;
+ m_is_shared= is_shared;
m_is_empty = false;
}
@@ -108,10 +113,11 @@ bool treenode::range_overlaps(const keyrange &range) {
return m_range.overlaps(*m_cmp, range);
}
-treenode *treenode::alloc(const comparator *cmp, const keyrange &range, TXNID txnid) {
+treenode *treenode::alloc(const comparator *cmp, const keyrange &range,
+ TXNID txnid, bool is_shared) {
treenode *XCALLOC(node);
node->init(cmp);
- node->set_range_and_txnid(range, txnid);
+ node->set_range_and_txnid(range, txnid, is_shared);
return node;
}
@@ -122,12 +128,31 @@ void treenode::swap_in_place(treenode *node1, treenode *node2) {
node1->m_txnid = node2->m_txnid;
node2->m_range = tmp_range;
node2->m_txnid = tmp_txnid;
+
+ bool tmp_is_shared= node1->m_is_shared;
+ node1->m_is_shared= node2->m_is_shared;
+ node2->m_is_shared= tmp_is_shared;
+}
+
+void treenode::add_shared_owner(TXNID txnid) {
+ assert(m_is_shared);
+ if (m_txnid != TXNID_SHARED) {
+ m_owners= new TxnidVector;
+ m_owners->insert(m_txnid);
+ m_txnid= TXNID_SHARED;
+ }
+ m_owners->insert(txnid);
}
void treenode::free(treenode *node) {
// destroy the range, freeing any copied keys
node->m_range.destroy();
+ if (node->m_owners) {
+ delete node->m_owners;
+ node->m_owners = nullptr; // need this?
+ }
+
// the root is simply marked as empty.
if (node->is_root()) {
// PORT toku_mutex_assert_locked(&node->m_mutex);
@@ -189,7 +214,7 @@ void treenode::traverse_overlaps(const keyrange &range, F *function) {
if (c == keyrange::comparison::EQUALS) {
// Doesn't matter if fn wants to keep going, there
// is nothing left, so return.
- function->fn(m_range, m_txnid);
+ function->fn(m_range, m_txnid, m_is_shared, m_owners);
return;
}
@@ -204,7 +229,7 @@ void treenode::traverse_overlaps(const keyrange &range, F *function) {
}
if (c == keyrange::comparison::OVERLAPS) {
- bool keep_going = function->fn(m_range, m_txnid);
+ bool keep_going = function->fn(m_range, m_txnid, m_is_shared, m_owners);
if (!keep_going) {
return;
}
@@ -221,29 +246,35 @@ void treenode::traverse_overlaps(const keyrange &range, F *function) {
}
}
-void treenode::insert(const keyrange &range, TXNID txnid) {
+void treenode::insert(const keyrange &range, TXNID txnid, bool is_shared) {
// choose a child to check. if that child is null, then insert the new node there.
// otherwise recur down that child's subtree
keyrange::comparison c = range.compare(*m_cmp, m_range);
if (c == keyrange::comparison::LESS_THAN) {
treenode *left_child = lock_and_rebalance_left();
if (left_child == nullptr) {
- left_child = treenode::alloc(m_cmp, range, txnid);
+ left_child = treenode::alloc(m_cmp, range, txnid, is_shared);
m_left_child.set(left_child);
} else {
- left_child->insert(range, txnid);
+ left_child->insert(range, txnid, is_shared);
left_child->mutex_unlock();
}
- } else {
- invariant(c == keyrange::comparison::GREATER_THAN);
+ } else if (c == keyrange::comparison::GREATER_THAN) {
+ //invariant(c == keyrange::comparison::GREATER_THAN);
treenode *right_child = lock_and_rebalance_right();
if (right_child == nullptr) {
- right_child = treenode::alloc(m_cmp, range, txnid);
+ right_child = treenode::alloc(m_cmp, range, txnid, is_shared);
m_right_child.set(right_child);
} else {
- right_child->insert(range, txnid);
+ right_child->insert(range, txnid, is_shared);
right_child->mutex_unlock();
}
+ } else if (c == keyrange::comparison::EQUALS) {
+ invariant(is_shared);
+ invariant(m_is_shared);
+ add_shared_owner(txnid);
+ } else {
+ invariant(0);
}
}
@@ -337,19 +368,38 @@ void treenode::recursive_remove(void) {
treenode::free(this);
}
-treenode *treenode::remove(const keyrange &range) {
+void treenode::remove_shared_owner(TXNID txnid) {
+ m_owners->erase(txnid);
+ /* if there is just one owner left, move it to m_txnid */
+ if (m_owners->size() == 1)
+ {
+ m_txnid = * m_owners->begin();
+ delete m_owners;
+ m_owners = nullptr;
+ }
+}
+
+treenode *treenode::remove(const keyrange &range, TXNID txnid) {
treenode *child;
// if the range is equal to this node's range, then just remove
// the root of this subtree. otherwise search down the tree
// in either the left or right children.
keyrange::comparison c = range.compare(*m_cmp, m_range);
switch (c) {
- case keyrange::comparison::EQUALS:
- return remove_root_of_subtree();
+ case keyrange::comparison::EQUALS: {
+ // if we are the only owners, remove. Otherwise, just remove
+ // us from the owners list.
+ if (txnid != TXNID_ANY && has_multiple_owners()) {
+ remove_shared_owner(txnid);
+ return this;
+ } else {
+ return remove_root_of_subtree();
+ }
+ }
case keyrange::comparison::LESS_THAN:
child = m_left_child.get_locked();
invariant_notnull(child);
- child = child->remove(range);
+ child = child->remove(range, txnid);
// unlock the child if there still is one.
// regardless, set the right child pointer
@@ -361,7 +411,7 @@ treenode *treenode::remove(const keyrange &range) {
case keyrange::comparison::GREATER_THAN:
child = m_right_child.get_locked();
invariant_notnull(child);
- child = child->remove(range);
+ child = child->remove(range, txnid);
// unlock the child if there still is one.
// regardless, set the right child pointer
diff --git a/utilities/transactions/range_locking/locktree/treenode.h b/utilities/transactions/range_locking/locktree/treenode.h
index a4b01f1cc..f23324f03 100644
--- a/utilities/transactions/range_locking/locktree/treenode.h
+++ b/utilities/transactions/range_locking/locktree/treenode.h
@@ -92,7 +92,7 @@ public:
void destroy_root(void);
// effect: sets the txnid and copies the given range for this node
- void set_range_and_txnid(const keyrange &range, TXNID txnid);
+ void set_range_and_txnid(const keyrange &range, TXNID txnid, bool is_shared);
// returns: true iff this node is marked as empty
bool is_empty(void);
@@ -127,12 +127,12 @@ public:
// effect: inserts the given range and txnid into a subtree, recursively
// requires: range does not overlap with any node below the subtree
- void insert(const keyrange &range, TXNID txnid);
+ void insert(const keyrange &range, TXNID txnid, bool is_shared);
// effect: removes the given range from the subtree
// requires: range exists in the subtree
// returns: the root of the resulting subtree
- treenode *remove(const keyrange &range);
+ treenode *remove(const keyrange &range, TXNID txnid);
// effect: removes this node and all of its children, recursively
// requires: every node at and below this node is unlocked
@@ -166,13 +166,30 @@ private:
// destroyed, it frees the memory associated with whatever range
// it has at the time of destruction.
keyrange m_range;
+
+ void remove_shared_owner(TXNID txnid);
+
+ bool has_multiple_owners() { return (m_txnid == TXNID_SHARED); }
+
+private:
+ // Owner transaction id.
+ // A value of TXNID_SHARED means this node has multiple owners
TXNID m_txnid;
+ // If true, this lock is a non-exclusive lock, and it can have either
+ // one or several owners.
+ bool m_is_shared;
+
+ // List of the owners, or nullptr if there's just one owner.
+ TxnidVector *m_owners;
+
// two child pointers
child_ptr m_left_child;
child_ptr m_right_child;
// comparator for ranges
+ // psergey-todo: Is there any sense to store the comparator in each tree
+ // node?
const comparator *m_cmp;
// marked for the root node. the root node is never free()'d
@@ -185,6 +202,10 @@ private:
// effect: initializes an empty node with the given comparator
void init(const comparator *cmp);
+ // requires: this is a shared node (m_is_shared==true)
+ // effect: another transaction is added as an owner.
+ void add_shared_owner(TXNID txnid);
+
// requires: *parent is initialized to something meaningful.
// requires: subtree is non-empty
// returns: the leftmost child of the given subtree
@@ -230,7 +251,8 @@ private:
treenode *maybe_rebalance(void);
// returns: allocated treenode populated with a copy of the range and txnid
- static treenode *alloc(const comparator *cmp, const keyrange &range, TXNID txnid);
+ static treenode *alloc(const comparator *cmp, const keyrange &range,
+ TXNID txnid, bool is_shared);
// requires: node is a locked root node, or an unlocked non-root node
static void free(treenode *node);
diff --git a/utilities/transactions/range_locking/portability/txn_subst.h b/utilities/transactions/range_locking/portability/txn_subst.h
index 3882eb1c5..58c3fced0 100644
--- a/utilities/transactions/range_locking/portability/txn_subst.h
+++ b/utilities/transactions/range_locking/portability/txn_subst.h
@@ -3,8 +3,24 @@
//
#pragma once
+#include <set>
#include "util/omt.h"
typedef uint64_t TXNID;
#define TXNID_NONE ((TXNID)0)
+// A set of transactions
+// (TODO: consider using class toku::txnid_set. The reason for using STL
+// container was that its API is easier)
+class TxnidVector : public std::set<TXNID> {
+public:
+ bool contains(TXNID txnid) { return find(txnid) != end(); }
+};
+
+// A value for lock structures with a meaning "the lock is owned by multiple
+// transactions (and one has to check the TxnidVector to get their ids)
+#define TXNID_SHARED (TXNID(-1))
+
+// Auxiliary value meaning "any transaction id will do". No real transaction
+// may have this is as id.
+#define TXNID_ANY (TXNID(-2))
diff --git a/utilities/transactions/transaction_lock_mgr.cc b/utilities/transactions/transaction_lock_mgr.cc
index bd81dbdfc..fa8e49476 100644
--- a/utilities/transactions/transaction_lock_mgr.cc
+++ b/utilities/transactions/transaction_lock_mgr.cc
@@ -820,7 +820,7 @@ Status RangeLockMgr::TryRangeLock(PessimisticTransaction* txn,
uint32_t column_family_id,
const Endpoint &start_endp,
const Endpoint &end_endp,
- bool /*exclusive*/) {
+ bool exclusive) {
toku::lock_request request;
request.create(mutex_factory_);
DBT start_key_dbt, end_key_dbt;
@@ -842,7 +842,8 @@ Status RangeLockMgr::TryRangeLock(PessimisticTransaction* txn,
auto lt= get_locktree_by_cfid(column_family_id);
request.set(lt, (TXNID)txn, &start_key_dbt, &end_key_dbt,
- toku::lock_request::WRITE, false /* not a big txn */,
+ exclusive? toku::lock_request::WRITE: toku::lock_request::READ,
+ false /* not a big txn */,
(void*)wait_txn_id);
uint64_t killed_time_msec = 0; // TODO: what should this have?
@@ -1147,25 +1148,25 @@ RangeLockMgr::~RangeLockMgr() {
ltm_.destroy();
}
-uint64_t RangeLockMgr::get_escalation_count() {
+RangeLockMgrHandle::Counters RangeLockMgr::GetStatus() {
LTM_STATUS_S ltm_status_test;
ltm_.get_status(<m_status_test);
+ Counters res;
// Searching status variable by its string name is how Toku's unit tests
// do it (why didn't they make LTM_ESCALATION_COUNT constant visible?)
- TOKU_ENGINE_STATUS_ROW key_status = NULL;
// lookup keyname in status
- for (int i = 0; ; i++) {
+ for (int i = 0; i < LTM_STATUS_S::LTM_STATUS_NUM_ROWS; i++) {
TOKU_ENGINE_STATUS_ROW status = <m_status_test.status[i];
- if (status->keyname == NULL)
- break;
if (strcmp(status->keyname, "LTM_ESCALATION_COUNT") == 0) {
- key_status = status;
- break;
+ res.escalation_count = status->value.num;
+ continue;
+ }
+ if (strcmp(status->keyname, "LTM_SIZE_CURRENT") == 0) {
+ res.current_lock_memory = status->value.num;
}
}
- assert(key_status);
- return key_status->value.num;
+ return res;
}
void RangeLockMgr::AddColumnFamily(const ColumnFamilyHandle *cfh) {
@@ -1254,13 +1255,15 @@ struct LOCK_PRINT_CONTEXT {
};
static
-void push_into_lock_status_data(void* param, const DBT *left,
- const DBT *right, TXNID txnid_arg) {
+void push_into_lock_status_data(void* param,
+ const DBT *left, const DBT *right,
+ TXNID txnid_arg, bool is_shared,
+ TxnidVector *owners) {
struct LOCK_PRINT_CONTEXT *ctx= (LOCK_PRINT_CONTEXT*)param;
struct KeyLockInfo info;
info.key.append((const char*)left->data, (size_t)left->size);
- info.exclusive= true;
+ info.exclusive= !is_shared;
if (!(left->size == right->size &&
!memcmp(left->data, right->data, left->size)))
@@ -1270,8 +1273,15 @@ void push_into_lock_status_data(void* param, const DBT *left,
info.key2.append((const char*)right->data, right->size);
}
- TXNID txnid= ((PessimisticTransaction*)txnid_arg)->GetID();
- info.ids.push_back(txnid);
+ if (txnid_arg != TXNID_SHARED) {
+ TXNID txnid= ((PessimisticTransaction*)txnid_arg)->GetID();
+ info.ids.push_back(txnid);
+ } else {
+ for (auto it : *owners) {
+ TXNID real_id= ((PessimisticTransaction*)it)->GetID();
+ info.ids.push_back(real_id);
+ }
+ }
ctx->data->insert({ctx->cfh_id, info});
}
diff --git a/utilities/transactions/transaction_lock_mgr.h b/utilities/transactions/transaction_lock_mgr.h
index 33ebbeb03..4bb66febf 100644
--- a/utilities/transactions/transaction_lock_mgr.h
+++ b/utilities/transactions/transaction_lock_mgr.h
@@ -240,7 +240,11 @@ class RangeLockMgr :
return ltm_.set_max_lock_memory(max_lock_memory);
}
- uint64_t get_escalation_count() override;
+ size_t get_max_lock_memory() {
+ return ltm_.get_max_lock_memory();
+ }
+
+ Counters GetStatus() override;
LockStatusData GetLockStatusData() override;
1
0
[Commits] be83f81: MDEV-19919 Assertion `!prebuilt->index->is_primary()' failed
by IgorBabaev 23 Nov '19
by IgorBabaev 23 Nov '19
23 Nov '19
revision-id: be83f81fbf8fea75955e047c13501443bb953452 (mariadb-10.4.10-23-gbe83f81)
parent(s): 6cedb671e99038f1a10e0d8504f835aaabed9780
author: Igor Babaev
committer: Igor Babaev
timestamp: 2019-11-22 19:11:58 -0800
message:
MDEV-19919 Assertion `!prebuilt->index->is_primary()' failed
in row_search_idx_cond_check
For a single table query with ORDER BY and several sargable range
conditions the optimizer may choose an execution plan that employs
a rowid filter. In this case it is important to build the filter before
calling the function JOIN_TAB::sort_table() that creates sort index
for the result set, because when this is index created the filter has
to be already filled. After the sort index has been created the
filter must be deactivated. If not to do this the innodb function
row_search_idx_cond_check() is getting confused when it has to read rows
from the created sort index by using ha_rnd_pos().
The order of actions mentioned above is needed also when processing a
join query if sorting is performed for the first non constant table in
the chosen execution plan.
---
mysql-test/main/rowid_filter_innodb.result | 35 ++++++++++++++++++++++++++++++
mysql-test/main/rowid_filter_innodb.test | 33 ++++++++++++++++++++++++++++
sql/sql_select.cc | 8 +++++--
3 files changed, 74 insertions(+), 2 deletions(-)
diff --git a/mysql-test/main/rowid_filter_innodb.result b/mysql-test/main/rowid_filter_innodb.result
index 390c783..37e32f0 100644
--- a/mysql-test/main/rowid_filter_innodb.result
+++ b/mysql-test/main/rowid_filter_innodb.result
@@ -2210,3 +2210,38 @@ a b
drop table t1;
set optimizer_switch=@save_optimizer_switch;
SET SESSION STORAGE_ENGINE=DEFAULT;
+#
+# MDEV-19919: use of rowid filter for innodb table + ORDER BY
+#
+SET @stats.save= @@innodb_stats_persistent;
+SET GLOBAL innodb_stats_persistent= ON;
+CREATE TABLE t1 (
+a INT,
+b VARCHAR(10),
+c VARCHAR(1024),
+KEY (b),
+KEY (c)
+) ENGINE=InnoDB;
+INSERT INTO t1 VALUES
+(1,'w','z'), (1,'X','o'), (1,'q','c'), (5,'w','c'), (2,'j','m'),
+(2,'Q','s'), (9,'e','J'), (2,'p','W'), (9,'o','F'), (2,'g','S'),
+(1,'Y','a'), (NULL,'Y','p'), (NULL,'s','x'), (NULL,'i','S'),
+(1,'l','q'), (7,'r','e'), (4,'b','h'), (NULL,'E','c'),
+(NULL,'M','a'), (3,'e','X'), (NULL,'p','r'), (9,'e','i'),
+(3,'g','x'), (2,'h','y');
+ANALYZE TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 analyze status Engine-independent statistics collected
+test.t1 analyze status OK
+EXPLAIN EXTENDED
+SELECT a FROM t1 WHERE c < 'k' AND b > 't' ORDER BY a;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t1 range|filter b,c b|c 13|1027 NULL 5 (42%) 41.67 Using index condition; Using where; Using filesort; Using rowid filter
+Warnings:
+Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where `test`.`t1`.`c` < 'k' and `test`.`t1`.`b` > 't' order by `test`.`t1`.`a`
+SELECT a FROM t1 WHERE c < 'k' AND b > 't' ORDER BY a;
+a
+1
+5
+DROP TABLE t1;
+SET GLOBAL innodb_stats_persistent= @stats.save;
diff --git a/mysql-test/main/rowid_filter_innodb.test b/mysql-test/main/rowid_filter_innodb.test
index 240cd92..4a6c431 100644
--- a/mysql-test/main/rowid_filter_innodb.test
+++ b/mysql-test/main/rowid_filter_innodb.test
@@ -96,3 +96,36 @@ drop table t1;
set optimizer_switch=@save_optimizer_switch;
SET SESSION STORAGE_ENGINE=DEFAULT;
+
+--echo #
+--echo # MDEV-19919: use of rowid filter for innodb table + ORDER BY
+--echo #
+
+SET @stats.save= @@innodb_stats_persistent;
+SET GLOBAL innodb_stats_persistent= ON;
+
+CREATE TABLE t1 (
+ a INT,
+ b VARCHAR(10),
+ c VARCHAR(1024),
+ KEY (b),
+ KEY (c)
+) ENGINE=InnoDB;
+
+INSERT INTO t1 VALUES
+ (1,'w','z'), (1,'X','o'), (1,'q','c'), (5,'w','c'), (2,'j','m'),
+ (2,'Q','s'), (9,'e','J'), (2,'p','W'), (9,'o','F'), (2,'g','S'),
+ (1,'Y','a'), (NULL,'Y','p'), (NULL,'s','x'), (NULL,'i','S'),
+ (1,'l','q'), (7,'r','e'), (4,'b','h'), (NULL,'E','c'),
+ (NULL,'M','a'), (3,'e','X'), (NULL,'p','r'), (9,'e','i'),
+ (3,'g','x'), (2,'h','y');
+
+ANALYZE TABLE t1;
+
+EXPLAIN EXTENDED
+SELECT a FROM t1 WHERE c < 'k' AND b > 't' ORDER BY a;
+
+SELECT a FROM t1 WHERE c < 'k' AND b > 't' ORDER BY a;
+
+DROP TABLE t1;
+SET GLOBAL innodb_stats_persistent= @stats.save;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index a05c2f8..1ee2a17 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -21101,11 +21101,12 @@ int join_init_read_record(JOIN_TAB *tab)
*/
if (tab->distinct && tab->remove_duplicates()) // Remove duplicates.
return 1;
- if (tab->filesort && tab->sort_table()) // Sort table.
- return 1;
tab->build_range_rowid_filter_if_needed();
+ if (tab->filesort && tab->sort_table()) // Sort table.
+ return 1;
+
DBUG_EXECUTE_IF("kill_join_init_read_record",
tab->join->thd->set_killed(KILL_QUERY););
if (tab->select && tab->select->quick && tab->select->quick->reset())
@@ -21165,6 +21166,9 @@ JOIN_TAB::sort_table()
JOIN::ordered_index_order_by :
JOIN::ordered_index_group_by));
rc= create_sort_index(join->thd, join, this, NULL);
+ /* Disactivate rowid filter if it was used when creating sort index */
+ if (rowid_filter)
+ table->file->rowid_filter_is_active= false;
return (rc != 0);
}
1
0