revision-id: 62edf4ed553963fc40eef7087689e9d64d174538 (mariadb-10.1.39-31-g62edf4e)
parent(s): 47637a3dd13d19e897a7cbfd1499f1bf3b2fdb2a
author: Igor Babaev
committer: Igor Babaev
timestamp: 2019-05-14 23:50:08 -0700
message:
MDEV-19258 RIGHT JOIN hangs in MariaDB
This patch corrects the patch for the bug 10006. The latter incorrectly
calculates the attribute TABLE_LIST::dep_tables for inner tables
of outer joins that are to be converted into inner joins.
As a result after the patch some valid join orders were not evaluated
and the optimizer could choose an execution plan that was far from
being optimal.
---
mysql-test/r/join_outer.result | 139 +++++++++++++++++++++++++++++++++++-
mysql-test/r/join_outer_jcl6.result | 139 +++++++++++++++++++++++++++++++++++-
mysql-test/t/join_outer.test | 127 +++++++++++++++++++++++++++++++-
sql/sql_select.cc | 16 ++++-
4 files changed, 416 insertions(+), 5 deletions(-)
diff --git a/mysql-test/r/join_outer.result b/mysql-test/r/join_outer.result
index 6bd12db..3120a6d 100644
--- a/mysql-test/r/join_outer.result
+++ b/mysql-test/r/join_outer.result
@@ -2447,7 +2447,7 @@ t1.b1+'0' t2.b2 + '0'
0 0
1 1
DROP TABLE t1, t2;
-set @join_cache_level= @save_join_cache_level;
+set @@join_cache_level= @save_join_cache_level;
#
# MDEV-14779: using left join causes incorrect results with materialization and derived tables
#
@@ -2513,4 +2513,141 @@ ORDER BY tb1.i1;
v2
DROP TABLE t1,t2;
# end of 5.5 tests
+#
+# MDEV-19258: chained right joins all converted to inner joins
+#
+CREATE TABLE t1 (
+id int NOT NULL AUTO_INCREMENT,
+timestamp bigint NOT NULL,
+modifiedBy varchar(255) DEFAULT NULL,
+PRIMARY KEY (id)
+);
+CREATE TABLE t2 (
+id int NOT NULL,
+REV int NOT NULL,
+REVTYPE tinyint DEFAULT NULL,
+profile_id int DEFAULT NULL,
+PRIMARY KEY (id,REV)
+);
+CREATE TABLE t3 (
+id int NOT NULL,
+REV int NOT NULL,
+person_id int DEFAULT NULL,
+PRIMARY KEY (id,REV)
+);
+CREATE TABLE t4 (
+id int NOT NULL,
+REV int NOT NULL,
+PRIMARY KEY (id,REV)
+);
+INSERT INTO t1 VALUES
+(1,1294391193890,'Cxqy$*9.kKeE'),(2,1294643906883,'rE4wqGV0gif@'),
+(3,1294643927456,'L?3yt(%dY$Br'),(4,1294644343525,'WH&ObiZ$#2S4'),
+(5,1294644616416,'YXnCbt?olUZ0'),(6,1294644954537,'8Npe4!(#lU@k'),
+(7,1294645046659,'knc0GhXB1#ib'),(8,1294645183829,'w*oPpVfuS8^m'),
+(9,1294645386701,'hwXR@3qVzrbU'),(10,1294645525982,'BeLW*Y9ndP0l'),
+(11,1294645627723,'nTegib^)qZ$I'),(12,1294650860266,'u62C^Kzx3wH8'),
+(13,1294657613745,'4&BkFjGa!qLg'),(14,1294660627161,')anpt312SCoh'),
+(15,1294661023336,'LtJ2PX?*kTmx'),(16,1294662838066,'POGRr@?#ofpl'),
+(17,1294663020989,'o.)1EOT2jnF7'),(18,1294663308065,'&TZ0F0LHE6.h'),
+(19,1294664900039,'j)kSC%^In$9d'),(20,1294668904556,'97glN50)cAo.'),
+(21,1294728056853,'lrKZxmw?I.Ek'),(22,1294728157174,'@P*SRg!pT.q?'),
+(23,1294728327099,'W9gPrptF.)8n'),(24,1294728418481,'$q*c^sM&URd#'),
+(25,1294728729620,'9*f4&bTPRtHo'),(26,1294728906014,')4VtTEnS7$oI'),
+(27,1294732190003,'8dkNSPq2u3AQ'),(28,1294733205065,'SV2N6IoEf438'),
+(29,1294741984927,'rBKj.0S^Ey%*'),(30,1294751748352,'j$2DvlBqk)Fw'),
+(31,1294753902212,'C$N6OrEw8elz'),(32,1294758120598,'DCSVZw!rnxXq'),
+(33,1294761769556,'OTS@QU8a6s5c'),(34,1294816845305,'IUE2stG0D3L5'),
+(35,1294816966909,'Xd16yka.9nHe'),(36,1294817116302,'lOQHZpm%!8qb'),
+(37,1294817374775,'^&pE3IhNf7ey'),(38,1294817538907,'oEn4#7C0Vhfp'),
+(39,1294818482950,'bx54J*O0Va&?'),(40,1294819047024,'J%(a)a&1.qgdb?'),
+(41,1294821826077,'C9kojr$L3Phz'),(42,1294825454458,'gG#BOnM80ZPi'),
+(43,1294904129918,'F^!TrjM#zdvc'),(44,1294904254166,'Va&Tb)k0RvlM'),
+(45,1294904414964,'dJjq0M6HvhR#'),(46,1294904505784,'nJmxg)ELqY(b'),
+(47,1294904602835,'dhF#or$Vge!7'),(48,1294904684728,'?bIh5E3l!0em'),
+(49,1294904877898,'Y*WflOdcxnk.'),(50,1294905002390,'*?H!lUgez5A.'),
+(51,1294905096043,'wlEIY3n9uz!p'),(52,1294905404621,'T?qv3H6&hlQD'),
+(53,1294905603922,'S@Bhys^Ti7bt'),(54,1294905788416,'KR?a5NVukz#l'),
+(55,1294905993190,'A*&q4kWhED!o'),(56,1294906205254,'fT0%7z0DF6h*'),
+(57,1294906319680,'LhzdW4?ivjR0'),(58,1294906424296,'h0KDlns%U*6T'),
+(59,1294906623844,'b$CfB1noI6Ax'),(60,1294911258896,'#T1*LP!3$Oys');
+INSERT INTO t2 VALUES
+(1,1,0,10209),(1,42480,1,10209),(1,61612,1,10209),(1,257545,1,10209),
+(1,385332,1,10209),(1,1687999,1,10209),(3,1,0,10210),(3,617411,2,10210),
+(4,11,0,14),(4,95149,1,10211),(4,607890,2,10211),(5,1,0,10212),
+(6,1,0,10213),(6,93344,1,10213),(6,295578,1,10213),(6,295579,1,10213),
+(6,295644,1,10213),(7,1,0,10214),(7,12,1,7),(7,688796,1,10214),
+(7,1140433,1,10214),(7,1715227,1,10214),(8,1,0,10215),(8,74253,1,10215),
+(8,93345,1,10215),(8,12,2,2),(9,1,0,10216),(9,93342,1,10216),
+(9,122354,1,10216),(9,301499,2,10216),(10,11,0,5),(10,93343,1,10217),
+(10,122355,1,10217),(10,123050,1,10217),(10,301500,2,10217),(11,1,0,10218),
+(11,87852,1,10218),(11,605499,2,10218),(12,1,0,10219),(12,88024,1,10219),
+(12,605892,2,10219),(13,1,0,10220);
+INSERT INTO t3 VALUES
+(1,1,300003),(1,117548,NULL),(2,1,300003),(2,117548,300006),
+(3,1,300153),(3,117548,NULL),(4,1,300153),(4,117548,NULL),
+(5,1,300153),(5,117548,NULL),(6,1,300182),(6,117548,NULL),
+(7,1,300205),(7,117548,NULL),(8,1,300217),(8,117548,NULL),
+(9,1,300290),(9,117548,NULL),(10,1,300290),(10,117548,NULL),
+(11,1,300405),(11,117548,NULL),(12,1,300670),(12,117548,NULL),
+(13,1,300670),(13,117548,NULL),(14,1,300006),(14,117548,NULL),
+(15,1,300671),(15,117548,NULL),(16,1,300732),(16,117548,NULL);
+INSERT INTO t4 VALUES
+(300000,1),(300001,1),(300003,1),(300004,1),
+(300005,1),(300005,688796),(300006,1),(300006,97697),
+(300009,1),(300010,1),(300011,1),(300012,1),(300013,1),
+(300014,1),(300015,1),(300016,1),(300017,1),(300018,1),
+(300019,1),(300020,1),(300021,1),(300022,1),(300023,1),
+(300024,1),(300025,1),(300026,1),(300027,1),(300028,1);
+# This should have join order of t2,t3,t4,t1
+EXPLAIN EXTENDED SELECT *
+FROM t1 INNER JOIN t2 ON t2.REV=t1.id
+INNER JOIN t3 ON t3.id=t2.profile_id
+INNER JOIN t4 ON t4.id=t3.person_id
+WHERE t1.timestamp < 1294664900039 AND t1.timestamp > 1294644616416 AND
+t2.REVTYPE=2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 42 100.00 Using where
+1 SIMPLE t3 ref PRIMARY PRIMARY 4 test.t2.profile_id 1 100.00 Using where
+1 SIMPLE t4 ref PRIMARY PRIMARY 4 test.t3.person_id 1 100.00 Using index
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 4 test.t2.REV 1 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`id` AS `id`,`test`.`t1`.`timestamp` AS `timestamp`,`test`.`t1`.`modifiedBy` AS `modifiedBy`,`test`.`t2`.`id` AS `id`,`test`.`t2`.`REV` AS `REV`,`test`.`t2`.`REVTYPE` AS `REVTYPE`,`test`.`t2`.`profile_id` AS `profile_id`,`test`.`t3`.`id` AS `id`,`test`.`t3`.`REV` AS `REV`,`test`.`t3`.`person_id` AS `person_id`,`test`.`t4`.`id` AS `id`,`test`.`t4`.`REV` AS `REV` from `test`.`t1` join `test`.`t2` join `test`.`t3` join `test`.`t4` where ((`test`.`t2`.`REVTYPE` = 2) and (`test`.`t4`.`id` = `test`.`t3`.`person_id`) and (`test`.`t3`.`id` = `test`.`t2`.`profile_id`) and (`test`.`t1`.`id` = `test`.`t2`.`REV`) and (`test`.`t1`.`timestamp` < 1294664900039) and (`test`.`t1`.`timestamp` > 1294644616416))
+SELECT *
+FROM t1 INNER JOIN t2 ON t2.REV=t1.id
+INNER JOIN t3 ON t3.id=t2.profile_id
+INNER JOIN t4 ON t4.id=t3.person_id
+WHERE t1.timestamp < 1294664900039 AND t1.timestamp > 1294644616416 AND
+t2.REVTYPE=2;
+id timestamp modifiedBy id REV REVTYPE profile_id id REV person_id id REV
+12 1294650860266 u62C^Kzx3wH8 8 12 2 2 2 1 300003 300003 1
+12 1294650860266 u62C^Kzx3wH8 8 12 2 2 2 117548 300006 300006 1
+12 1294650860266 u62C^Kzx3wH8 8 12 2 2 2 117548 300006 300006 97697
+# This should have join order of t2,t3,t4,t1 with the same plan as above
+# because all RIGHT JOIN operations are converted into INNER JOIN
+EXPLAIN EXTENDED SELECT *
+FROM t1 RIGHT JOIN t2 ON t2.REV=t1.id
+RIGHT JOIN t3 ON t3.id=t2.profile_id
+RIGHT JOIN t4 ON t4.id=t3.person_id
+WHERE t1.timestamp < 1294664900039 AND t1.timestamp > 1294644616416
+AND t2.REVTYPE=2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 42 100.00 Using where
+1 SIMPLE t3 ref PRIMARY PRIMARY 4 test.t2.profile_id 1 100.00 Using where
+1 SIMPLE t4 ref PRIMARY PRIMARY 4 test.t3.person_id 1 100.00 Using index
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 4 test.t2.REV 1 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`id` AS `id`,`test`.`t1`.`timestamp` AS `timestamp`,`test`.`t1`.`modifiedBy` AS `modifiedBy`,`test`.`t2`.`id` AS `id`,`test`.`t2`.`REV` AS `REV`,`test`.`t2`.`REVTYPE` AS `REVTYPE`,`test`.`t2`.`profile_id` AS `profile_id`,`test`.`t3`.`id` AS `id`,`test`.`t3`.`REV` AS `REV`,`test`.`t3`.`person_id` AS `person_id`,`test`.`t4`.`id` AS `id`,`test`.`t4`.`REV` AS `REV` from `test`.`t4` join `test`.`t3` join `test`.`t2` join `test`.`t1` where ((`test`.`t2`.`REVTYPE` = 2) and (`test`.`t1`.`id` = `test`.`t2`.`REV`) and (`test`.`t3`.`id` = `test`.`t2`.`profile_id`) and (`test`.`t4`.`id` = `test`.`t3`.`person_id`) and (`test`.`t1`.`timestamp` < 1294664900039) and (`test`.`t1`.`timestamp` > 1294644616416))
+SELECT *
+FROM t1 RIGHT JOIN t2 ON t2.REV=t1.id
+RIGHT JOIN t3 ON t3.id=t2.profile_id
+RIGHT JOIN t4 ON t4.id=t3.person_id
+WHERE t1.timestamp < 1294664900039 AND t1.timestamp > 1294644616416
+AND t2.REVTYPE=2;
+id timestamp modifiedBy id REV REVTYPE profile_id id REV person_id id REV
+12 1294650860266 u62C^Kzx3wH8 8 12 2 2 2 1 300003 300003 1
+12 1294650860266 u62C^Kzx3wH8 8 12 2 2 2 117548 300006 300006 1
+12 1294650860266 u62C^Kzx3wH8 8 12 2 2 2 117548 300006 300006 97697
+DROP TABLE t1,t2,t3,t4;
+# end of 10.1 tests
SET optimizer_switch=@save_optimizer_switch;
diff --git a/mysql-test/r/join_outer_jcl6.result b/mysql-test/r/join_outer_jcl6.result
index faad874..44250a9 100644
--- a/mysql-test/r/join_outer_jcl6.result
+++ b/mysql-test/r/join_outer_jcl6.result
@@ -2458,7 +2458,7 @@ t1.b1+'0' t2.b2 + '0'
0 0
1 1
DROP TABLE t1, t2;
-set @join_cache_level= @save_join_cache_level;
+set @@join_cache_level= @save_join_cache_level;
#
# MDEV-14779: using left join causes incorrect results with materialization and derived tables
#
@@ -2524,6 +2524,143 @@ ORDER BY tb1.i1;
v2
DROP TABLE t1,t2;
# end of 5.5 tests
+#
+# MDEV-19258: chained right joins all converted to inner joins
+#
+CREATE TABLE t1 (
+id int NOT NULL AUTO_INCREMENT,
+timestamp bigint NOT NULL,
+modifiedBy varchar(255) DEFAULT NULL,
+PRIMARY KEY (id)
+);
+CREATE TABLE t2 (
+id int NOT NULL,
+REV int NOT NULL,
+REVTYPE tinyint DEFAULT NULL,
+profile_id int DEFAULT NULL,
+PRIMARY KEY (id,REV)
+);
+CREATE TABLE t3 (
+id int NOT NULL,
+REV int NOT NULL,
+person_id int DEFAULT NULL,
+PRIMARY KEY (id,REV)
+);
+CREATE TABLE t4 (
+id int NOT NULL,
+REV int NOT NULL,
+PRIMARY KEY (id,REV)
+);
+INSERT INTO t1 VALUES
+(1,1294391193890,'Cxqy$*9.kKeE'),(2,1294643906883,'rE4wqGV0gif@'),
+(3,1294643927456,'L?3yt(%dY$Br'),(4,1294644343525,'WH&ObiZ$#2S4'),
+(5,1294644616416,'YXnCbt?olUZ0'),(6,1294644954537,'8Npe4!(#lU@k'),
+(7,1294645046659,'knc0GhXB1#ib'),(8,1294645183829,'w*oPpVfuS8^m'),
+(9,1294645386701,'hwXR@3qVzrbU'),(10,1294645525982,'BeLW*Y9ndP0l'),
+(11,1294645627723,'nTegib^)qZ$I'),(12,1294650860266,'u62C^Kzx3wH8'),
+(13,1294657613745,'4&BkFjGa!qLg'),(14,1294660627161,')anpt312SCoh'),
+(15,1294661023336,'LtJ2PX?*kTmx'),(16,1294662838066,'POGRr@?#ofpl'),
+(17,1294663020989,'o.)1EOT2jnF7'),(18,1294663308065,'&TZ0F0LHE6.h'),
+(19,1294664900039,'j)kSC%^In$9d'),(20,1294668904556,'97glN50)cAo.'),
+(21,1294728056853,'lrKZxmw?I.Ek'),(22,1294728157174,'@P*SRg!pT.q?'),
+(23,1294728327099,'W9gPrptF.)8n'),(24,1294728418481,'$q*c^sM&URd#'),
+(25,1294728729620,'9*f4&bTPRtHo'),(26,1294728906014,')4VtTEnS7$oI'),
+(27,1294732190003,'8dkNSPq2u3AQ'),(28,1294733205065,'SV2N6IoEf438'),
+(29,1294741984927,'rBKj.0S^Ey%*'),(30,1294751748352,'j$2DvlBqk)Fw'),
+(31,1294753902212,'C$N6OrEw8elz'),(32,1294758120598,'DCSVZw!rnxXq'),
+(33,1294761769556,'OTS@QU8a6s5c'),(34,1294816845305,'IUE2stG0D3L5'),
+(35,1294816966909,'Xd16yka.9nHe'),(36,1294817116302,'lOQHZpm%!8qb'),
+(37,1294817374775,'^&pE3IhNf7ey'),(38,1294817538907,'oEn4#7C0Vhfp'),
+(39,1294818482950,'bx54J*O0Va&?'),(40,1294819047024,'J%(a)a&1.qgdb?'),
+(41,1294821826077,'C9kojr$L3Phz'),(42,1294825454458,'gG#BOnM80ZPi'),
+(43,1294904129918,'F^!TrjM#zdvc'),(44,1294904254166,'Va&Tb)k0RvlM'),
+(45,1294904414964,'dJjq0M6HvhR#'),(46,1294904505784,'nJmxg)ELqY(b'),
+(47,1294904602835,'dhF#or$Vge!7'),(48,1294904684728,'?bIh5E3l!0em'),
+(49,1294904877898,'Y*WflOdcxnk.'),(50,1294905002390,'*?H!lUgez5A.'),
+(51,1294905096043,'wlEIY3n9uz!p'),(52,1294905404621,'T?qv3H6&hlQD'),
+(53,1294905603922,'S@Bhys^Ti7bt'),(54,1294905788416,'KR?a5NVukz#l'),
+(55,1294905993190,'A*&q4kWhED!o'),(56,1294906205254,'fT0%7z0DF6h*'),
+(57,1294906319680,'LhzdW4?ivjR0'),(58,1294906424296,'h0KDlns%U*6T'),
+(59,1294906623844,'b$CfB1noI6Ax'),(60,1294911258896,'#T1*LP!3$Oys');
+INSERT INTO t2 VALUES
+(1,1,0,10209),(1,42480,1,10209),(1,61612,1,10209),(1,257545,1,10209),
+(1,385332,1,10209),(1,1687999,1,10209),(3,1,0,10210),(3,617411,2,10210),
+(4,11,0,14),(4,95149,1,10211),(4,607890,2,10211),(5,1,0,10212),
+(6,1,0,10213),(6,93344,1,10213),(6,295578,1,10213),(6,295579,1,10213),
+(6,295644,1,10213),(7,1,0,10214),(7,12,1,7),(7,688796,1,10214),
+(7,1140433,1,10214),(7,1715227,1,10214),(8,1,0,10215),(8,74253,1,10215),
+(8,93345,1,10215),(8,12,2,2),(9,1,0,10216),(9,93342,1,10216),
+(9,122354,1,10216),(9,301499,2,10216),(10,11,0,5),(10,93343,1,10217),
+(10,122355,1,10217),(10,123050,1,10217),(10,301500,2,10217),(11,1,0,10218),
+(11,87852,1,10218),(11,605499,2,10218),(12,1,0,10219),(12,88024,1,10219),
+(12,605892,2,10219),(13,1,0,10220);
+INSERT INTO t3 VALUES
+(1,1,300003),(1,117548,NULL),(2,1,300003),(2,117548,300006),
+(3,1,300153),(3,117548,NULL),(4,1,300153),(4,117548,NULL),
+(5,1,300153),(5,117548,NULL),(6,1,300182),(6,117548,NULL),
+(7,1,300205),(7,117548,NULL),(8,1,300217),(8,117548,NULL),
+(9,1,300290),(9,117548,NULL),(10,1,300290),(10,117548,NULL),
+(11,1,300405),(11,117548,NULL),(12,1,300670),(12,117548,NULL),
+(13,1,300670),(13,117548,NULL),(14,1,300006),(14,117548,NULL),
+(15,1,300671),(15,117548,NULL),(16,1,300732),(16,117548,NULL);
+INSERT INTO t4 VALUES
+(300000,1),(300001,1),(300003,1),(300004,1),
+(300005,1),(300005,688796),(300006,1),(300006,97697),
+(300009,1),(300010,1),(300011,1),(300012,1),(300013,1),
+(300014,1),(300015,1),(300016,1),(300017,1),(300018,1),
+(300019,1),(300020,1),(300021,1),(300022,1),(300023,1),
+(300024,1),(300025,1),(300026,1),(300027,1),(300028,1);
+# This should have join order of t2,t3,t4,t1
+EXPLAIN EXTENDED SELECT *
+FROM t1 INNER JOIN t2 ON t2.REV=t1.id
+INNER JOIN t3 ON t3.id=t2.profile_id
+INNER JOIN t4 ON t4.id=t3.person_id
+WHERE t1.timestamp < 1294664900039 AND t1.timestamp > 1294644616416 AND
+t2.REVTYPE=2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 42 100.00 Using where
+1 SIMPLE t3 ref PRIMARY PRIMARY 4 test.t2.profile_id 1 100.00 Using where
+1 SIMPLE t4 ref PRIMARY PRIMARY 4 test.t3.person_id 1 100.00 Using index
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 4 test.t2.REV 1 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`id` AS `id`,`test`.`t1`.`timestamp` AS `timestamp`,`test`.`t1`.`modifiedBy` AS `modifiedBy`,`test`.`t2`.`id` AS `id`,`test`.`t2`.`REV` AS `REV`,`test`.`t2`.`REVTYPE` AS `REVTYPE`,`test`.`t2`.`profile_id` AS `profile_id`,`test`.`t3`.`id` AS `id`,`test`.`t3`.`REV` AS `REV`,`test`.`t3`.`person_id` AS `person_id`,`test`.`t4`.`id` AS `id`,`test`.`t4`.`REV` AS `REV` from `test`.`t1` join `test`.`t2` join `test`.`t3` join `test`.`t4` where ((`test`.`t2`.`REVTYPE` = 2) and (`test`.`t4`.`id` = `test`.`t3`.`person_id`) and (`test`.`t3`.`id` = `test`.`t2`.`profile_id`) and (`test`.`t1`.`id` = `test`.`t2`.`REV`) and (`test`.`t1`.`timestamp` < 1294664900039) and (`test`.`t1`.`timestamp` > 1294644616416))
+SELECT *
+FROM t1 INNER JOIN t2 ON t2.REV=t1.id
+INNER JOIN t3 ON t3.id=t2.profile_id
+INNER JOIN t4 ON t4.id=t3.person_id
+WHERE t1.timestamp < 1294664900039 AND t1.timestamp > 1294644616416 AND
+t2.REVTYPE=2;
+id timestamp modifiedBy id REV REVTYPE profile_id id REV person_id id REV
+12 1294650860266 u62C^Kzx3wH8 8 12 2 2 2 1 300003 300003 1
+12 1294650860266 u62C^Kzx3wH8 8 12 2 2 2 117548 300006 300006 1
+12 1294650860266 u62C^Kzx3wH8 8 12 2 2 2 117548 300006 300006 97697
+# This should have join order of t2,t3,t4,t1 with the same plan as above
+# because all RIGHT JOIN operations are converted into INNER JOIN
+EXPLAIN EXTENDED SELECT *
+FROM t1 RIGHT JOIN t2 ON t2.REV=t1.id
+RIGHT JOIN t3 ON t3.id=t2.profile_id
+RIGHT JOIN t4 ON t4.id=t3.person_id
+WHERE t1.timestamp < 1294664900039 AND t1.timestamp > 1294644616416
+AND t2.REVTYPE=2;
+id select_type table type possible_keys key key_len ref rows filtered Extra
+1 SIMPLE t2 ALL NULL NULL NULL NULL 42 100.00 Using where
+1 SIMPLE t3 ref PRIMARY PRIMARY 4 test.t2.profile_id 1 100.00 Using where
+1 SIMPLE t4 ref PRIMARY PRIMARY 4 test.t3.person_id 1 100.00 Using index
+1 SIMPLE t1 eq_ref PRIMARY PRIMARY 4 test.t2.REV 1 100.00 Using where
+Warnings:
+Note 1003 select `test`.`t1`.`id` AS `id`,`test`.`t1`.`timestamp` AS `timestamp`,`test`.`t1`.`modifiedBy` AS `modifiedBy`,`test`.`t2`.`id` AS `id`,`test`.`t2`.`REV` AS `REV`,`test`.`t2`.`REVTYPE` AS `REVTYPE`,`test`.`t2`.`profile_id` AS `profile_id`,`test`.`t3`.`id` AS `id`,`test`.`t3`.`REV` AS `REV`,`test`.`t3`.`person_id` AS `person_id`,`test`.`t4`.`id` AS `id`,`test`.`t4`.`REV` AS `REV` from `test`.`t4` join `test`.`t3` join `test`.`t2` join `test`.`t1` where ((`test`.`t2`.`REVTYPE` = 2) and (`test`.`t1`.`id` = `test`.`t2`.`REV`) and (`test`.`t3`.`id` = `test`.`t2`.`profile_id`) and (`test`.`t4`.`id` = `test`.`t3`.`person_id`) and (`test`.`t1`.`timestamp` < 1294664900039) and (`test`.`t1`.`timestamp` > 1294644616416))
+SELECT *
+FROM t1 RIGHT JOIN t2 ON t2.REV=t1.id
+RIGHT JOIN t3 ON t3.id=t2.profile_id
+RIGHT JOIN t4 ON t4.id=t3.person_id
+WHERE t1.timestamp < 1294664900039 AND t1.timestamp > 1294644616416
+AND t2.REVTYPE=2;
+id timestamp modifiedBy id REV REVTYPE profile_id id REV person_id id REV
+12 1294650860266 u62C^Kzx3wH8 8 12 2 2 2 1 300003 300003 1
+12 1294650860266 u62C^Kzx3wH8 8 12 2 2 2 117548 300006 300006 1
+12 1294650860266 u62C^Kzx3wH8 8 12 2 2 2 117548 300006 300006 97697
+DROP TABLE t1,t2,t3,t4;
+# end of 10.1 tests
SET optimizer_switch=@save_optimizer_switch;
set join_cache_level=default;
show variables like 'join_cache_level';
diff --git a/mysql-test/t/join_outer.test b/mysql-test/t/join_outer.test
index 6d20c08..19cc5b8 100644
--- a/mysql-test/t/join_outer.test
+++ b/mysql-test/t/join_outer.test
@@ -1976,7 +1976,7 @@ set @save_join_cache_level= @@join_cache_level;
SET @@join_cache_level = 3;
SELECT t1.b1+'0' , t2.b2 + '0' FROM t1 LEFT JOIN t2 ON b1 = b2;
DROP TABLE t1, t2;
-set @join_cache_level= @save_join_cache_level;
+set @@join_cache_level= @save_join_cache_level;
--echo #
--echo # MDEV-14779: using left join causes incorrect results with materialization and derived tables
@@ -2042,4 +2042,129 @@ DROP TABLE t1,t2;
--echo # end of 5.5 tests
+--echo #
+--echo # MDEV-19258: chained right joins all converted to inner joins
+--echo #
+
+ CREATE TABLE t1 (
+ id int NOT NULL AUTO_INCREMENT,
+ timestamp bigint NOT NULL,
+ modifiedBy varchar(255) DEFAULT NULL,
+ PRIMARY KEY (id)
+);
+
+CREATE TABLE t2 (
+ id int NOT NULL,
+ REV int NOT NULL,
+ REVTYPE tinyint DEFAULT NULL,
+ profile_id int DEFAULT NULL,
+ PRIMARY KEY (id,REV)
+);
+
+CREATE TABLE t3 (
+ id int NOT NULL,
+ REV int NOT NULL,
+ person_id int DEFAULT NULL,
+ PRIMARY KEY (id,REV)
+);
+
+CREATE TABLE t4 (
+ id int NOT NULL,
+ REV int NOT NULL,
+ PRIMARY KEY (id,REV)
+);
+
+INSERT INTO t1 VALUES
+(1,1294391193890,'Cxqy$*9.kKeE'),(2,1294643906883,'rE4wqGV0gif@'),
+(3,1294643927456,'L?3yt(%dY$Br'),(4,1294644343525,'WH&ObiZ$#2S4'),
+(5,1294644616416,'YXnCbt?olUZ0'),(6,1294644954537,'8Npe4!(#lU@k'),
+(7,1294645046659,'knc0GhXB1#ib'),(8,1294645183829,'w*oPpVfuS8^m'),
+(9,1294645386701,'hwXR@3qVzrbU'),(10,1294645525982,'BeLW*Y9ndP0l'),
+(11,1294645627723,'nTegib^)qZ$I'),(12,1294650860266,'u62C^Kzx3wH8'),
+(13,1294657613745,'4&BkFjGa!qLg'),(14,1294660627161,')anpt312SCoh'),
+(15,1294661023336,'LtJ2PX?*kTmx'),(16,1294662838066,'POGRr@?#ofpl'),
+(17,1294663020989,'o.)1EOT2jnF7'),(18,1294663308065,'&TZ0F0LHE6.h'),
+(19,1294664900039,'j)kSC%^In$9d'),(20,1294668904556,'97glN50)cAo.'),
+(21,1294728056853,'lrKZxmw?I.Ek'),(22,1294728157174,'@P*SRg!pT.q?'),
+(23,1294728327099,'W9gPrptF.)8n'),(24,1294728418481,'$q*c^sM&URd#'),
+(25,1294728729620,'9*f4&bTPRtHo'),(26,1294728906014,')4VtTEnS7$oI'),
+(27,1294732190003,'8dkNSPq2u3AQ'),(28,1294733205065,'SV2N6IoEf438'),
+(29,1294741984927,'rBKj.0S^Ey%*'),(30,1294751748352,'j$2DvlBqk)Fw'),
+(31,1294753902212,'C$N6OrEw8elz'),(32,1294758120598,'DCSVZw!rnxXq'),
+(33,1294761769556,'OTS@QU8a6s5c'),(34,1294816845305,'IUE2stG0D3L5'),
+(35,1294816966909,'Xd16yka.9nHe'),(36,1294817116302,'lOQHZpm%!8qb'),
+(37,1294817374775,'^&pE3IhNf7ey'),(38,1294817538907,'oEn4#7C0Vhfp'),
+(39,1294818482950,'bx54J*O0Va&?'),(40,1294819047024,'J%(a)a&1.qgdb?'),
+(41,1294821826077,'C9kojr$L3Phz'),(42,1294825454458,'gG#BOnM80ZPi'),
+(43,1294904129918,'F^!TrjM#zdvc'),(44,1294904254166,'Va&Tb)k0RvlM'),
+(45,1294904414964,'dJjq0M6HvhR#'),(46,1294904505784,'nJmxg)ELqY(b'),
+(47,1294904602835,'dhF#or$Vge!7'),(48,1294904684728,'?bIh5E3l!0em'),
+(49,1294904877898,'Y*WflOdcxnk.'),(50,1294905002390,'*?H!lUgez5A.'),
+(51,1294905096043,'wlEIY3n9uz!p'),(52,1294905404621,'T?qv3H6&hlQD'),
+(53,1294905603922,'S@Bhys^Ti7bt'),(54,1294905788416,'KR?a5NVukz#l'),
+(55,1294905993190,'A*&q4kWhED!o'),(56,1294906205254,'fT0%7z0DF6h*'),
+(57,1294906319680,'LhzdW4?ivjR0'),(58,1294906424296,'h0KDlns%U*6T'),
+(59,1294906623844,'b$CfB1noI6Ax'),(60,1294911258896,'#T1*LP!3$Oys');
+
+
+INSERT INTO t2 VALUES
+(1,1,0,10209),(1,42480,1,10209),(1,61612,1,10209),(1,257545,1,10209),
+(1,385332,1,10209),(1,1687999,1,10209),(3,1,0,10210),(3,617411,2,10210),
+(4,11,0,14),(4,95149,1,10211),(4,607890,2,10211),(5,1,0,10212),
+(6,1,0,10213),(6,93344,1,10213),(6,295578,1,10213),(6,295579,1,10213),
+(6,295644,1,10213),(7,1,0,10214),(7,12,1,7),(7,688796,1,10214),
+(7,1140433,1,10214),(7,1715227,1,10214),(8,1,0,10215),(8,74253,1,10215),
+(8,93345,1,10215),(8,12,2,2),(9,1,0,10216),(9,93342,1,10216),
+(9,122354,1,10216),(9,301499,2,10216),(10,11,0,5),(10,93343,1,10217),
+(10,122355,1,10217),(10,123050,1,10217),(10,301500,2,10217),(11,1,0,10218),
+(11,87852,1,10218),(11,605499,2,10218),(12,1,0,10219),(12,88024,1,10219),
+(12,605892,2,10219),(13,1,0,10220);
+
+INSERT INTO t3 VALUES
+(1,1,300003),(1,117548,NULL),(2,1,300003),(2,117548,300006),
+(3,1,300153),(3,117548,NULL),(4,1,300153),(4,117548,NULL),
+(5,1,300153),(5,117548,NULL),(6,1,300182),(6,117548,NULL),
+(7,1,300205),(7,117548,NULL),(8,1,300217),(8,117548,NULL),
+(9,1,300290),(9,117548,NULL),(10,1,300290),(10,117548,NULL),
+(11,1,300405),(11,117548,NULL),(12,1,300670),(12,117548,NULL),
+(13,1,300670),(13,117548,NULL),(14,1,300006),(14,117548,NULL),
+(15,1,300671),(15,117548,NULL),(16,1,300732),(16,117548,NULL);
+
+INSERT INTO t4 VALUES
+(300000,1),(300001,1),(300003,1),(300004,1),
+(300005,1),(300005,688796),(300006,1),(300006,97697),
+(300009,1),(300010,1),(300011,1),(300012,1),(300013,1),
+(300014,1),(300015,1),(300016,1),(300017,1),(300018,1),
+(300019,1),(300020,1),(300021,1),(300022,1),(300023,1),
+(300024,1),(300025,1),(300026,1),(300027,1),(300028,1);
+
+let $q1=
+SELECT *
+FROM t1 INNER JOIN t2 ON t2.REV=t1.id
+ INNER JOIN t3 ON t3.id=t2.profile_id
+ INNER JOIN t4 ON t4.id=t3.person_id
+WHERE t1.timestamp < 1294664900039 AND t1.timestamp > 1294644616416 AND
+ t2.REVTYPE=2;
+
+--echo # This should have join order of t2,t3,t4,t1
+eval EXPLAIN EXTENDED $q1;
+eval $q1;
+
+let $q2=
+SELECT *
+FROM t1 RIGHT JOIN t2 ON t2.REV=t1.id
+ RIGHT JOIN t3 ON t3.id=t2.profile_id
+ RIGHT JOIN t4 ON t4.id=t3.person_id
+WHERE t1.timestamp < 1294664900039 AND t1.timestamp > 1294644616416
+ AND t2.REVTYPE=2;
+
+--echo # This should have join order of t2,t3,t4,t1 with the same plan as above
+--echo # because all RIGHT JOIN operations are converted into INNER JOIN
+eval EXPLAIN EXTENDED $q2;
+eval $q2;
+
+DROP TABLE t1,t2,t3,t4;
+
+--echo # end of 10.1 tests
+
SET optimizer_switch=@save_optimizer_switch;
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index cb8c042..242e787 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -14625,8 +14625,20 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top,
table->table->maybe_null= FALSE;
table->outer_join= 0;
if (!(straight_join || table->straight))
- table->dep_tables= table->embedding && !table->embedding->sj_subq_pred ?
- table->embedding->dep_tables : 0;
+ {
+ table->dep_tables= 0;
+ TABLE_LIST *embedding= table->embedding;
+ while (embedding)
+ {
+ if (embedding->nested_join->join_list.head()->outer_join)
+ {
+ if (!embedding->sj_subq_pred)
+ table->dep_tables= embedding->dep_tables;
+ break;
+ }
+ embedding= embedding->embedding;
+ }
+ }
if (table->on_expr)
{
/* Add ON expression to the WHERE or upper-level ON condition. */
1
0
[Commits] fc80bc5e623: MDEV-19407: Assertion `field->table->stats_is_read' failed in is_eits_usable
by Varun 14 May '19
by Varun 14 May '19
14 May '19
revision-id: fc80bc5e623790f3b26b5dee54d4b47d3ee7753d (mariadb-10.1.38-185-gfc80bc5e623)
parent(s): 2647fd101db922b1e6c7363124adfc03e81ab8a0
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-05-15 01:44:16 +0530
message:
MDEV-19407: Assertion `field->table->stats_is_read' failed in is_eits_usable
Statistics were not read for a table when we had a CREATE TABLE query.
Enforce reading statistics for commands CREATE TABLE, SET and DO.
---
mysql-test/r/stat_tables.result | 19 +++++++++++++++++++
mysql-test/r/stat_tables_innodb.result | 19 +++++++++++++++++++
mysql-test/t/stat_tables.test | 20 ++++++++++++++++++++
sql/sql_statistics.cc | 3 +++
4 files changed, 61 insertions(+)
diff --git a/mysql-test/r/stat_tables.result b/mysql-test/r/stat_tables.result
index 0a53a5ae99d..afb3e12a474 100644
--- a/mysql-test/r/stat_tables.result
+++ b/mysql-test/r/stat_tables.result
@@ -707,8 +707,27 @@ ERROR 42S02: Table 'test.x' doesn't exist
select * from information_schema.tables where table_name='v';
TABLE_CATALOG TABLE_SCHEMA TABLE_NAME TABLE_TYPE ENGINE VERSION ROW_FORMAT TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE AUTO_INCREMENT CREATE_TIME UPDATE_TIME CHECK_TIME TABLE_COLLATION CHECKSUM CREATE_OPTIONS TABLE_COMMENT
def test v VIEW NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL VIEW
+set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
drop table t1,t2;
drop view v;
+#
+# MDEV-19407: Assertion `field->table->stats_is_read' failed in is_eits_usable
+#
+set @save_optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
+set @@optimizer_use_condition_selectivity= 1;
+set @@use_stat_tables='never';
+create table t1(pk int);
+insert into t1 values (4),(3);
+set @@optimizer_use_condition_selectivity= 4;
+set use_stat_tables='preferably';
+INSERT INTO t1 SELECT * FROM x;
+ERROR 42S02: Table 'test.x' doesn't exist
+CREATE TABLE t2 SELECT pk FROM t1 WHERE pk>2;
+select * from t2;
+pk
+4
+3
+drop table t1,t2;
set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set @save_optimizer_switch=@@optimizer_switch;
set use_stat_tables=@save_use_stat_tables;
diff --git a/mysql-test/r/stat_tables_innodb.result b/mysql-test/r/stat_tables_innodb.result
index 9a93b479664..643a7aa55ea 100644
--- a/mysql-test/r/stat_tables_innodb.result
+++ b/mysql-test/r/stat_tables_innodb.result
@@ -734,8 +734,27 @@ ERROR 42S02: Table 'test.x' doesn't exist
select * from information_schema.tables where table_name='v';
TABLE_CATALOG TABLE_SCHEMA TABLE_NAME TABLE_TYPE ENGINE VERSION ROW_FORMAT TABLE_ROWS AVG_ROW_LENGTH DATA_LENGTH MAX_DATA_LENGTH INDEX_LENGTH DATA_FREE AUTO_INCREMENT CREATE_TIME UPDATE_TIME CHECK_TIME TABLE_COLLATION CHECKSUM CREATE_OPTIONS TABLE_COMMENT
def test v VIEW NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL VIEW
+set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
drop table t1,t2;
drop view v;
+#
+# MDEV-19407: Assertion `field->table->stats_is_read' failed in is_eits_usable
+#
+set @save_optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
+set @@optimizer_use_condition_selectivity= 1;
+set @@use_stat_tables='never';
+create table t1(pk int);
+insert into t1 values (4),(3);
+set @@optimizer_use_condition_selectivity= 4;
+set use_stat_tables='preferably';
+INSERT INTO t1 SELECT * FROM x;
+ERROR 42S02: Table 'test.x' doesn't exist
+CREATE TABLE t2 SELECT pk FROM t1 WHERE pk>2;
+select * from t2;
+pk
+4
+3
+drop table t1,t2;
set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set @save_optimizer_switch=@@optimizer_switch;
set use_stat_tables=@save_use_stat_tables;
diff --git a/mysql-test/t/stat_tables.test b/mysql-test/t/stat_tables.test
index 2727f8d8bb1..1e4d261c3f3 100644
--- a/mysql-test/t/stat_tables.test
+++ b/mysql-test/t/stat_tables.test
@@ -469,9 +469,29 @@ CREATE VIEW v AS SELECT * FROM t1 JOIN t2;
INSERT INTO t2 SELECT * FROM x;
select * from information_schema.tables where table_name='v';
+set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
drop table t1,t2;
drop view v;
+
+--echo #
+--echo # MDEV-19407: Assertion `field->table->stats_is_read' failed in is_eits_usable
+--echo #
+
+set @save_optimizer_use_condition_selectivity= @@optimizer_use_condition_selectivity;
+set @@optimizer_use_condition_selectivity= 1;
+set @@use_stat_tables='never';
+create table t1(pk int);
+insert into t1 values (4),(3);
+set @@optimizer_use_condition_selectivity= 4;
+set use_stat_tables='preferably';
+
+--error ER_NO_SUCH_TABLE
+INSERT INTO t1 SELECT * FROM x;
+CREATE TABLE t2 SELECT pk FROM t1 WHERE pk>2;
+select * from t2;
+drop table t1,t2;
+
set @@optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;
set @save_optimizer_switch=@@optimizer_switch;
diff --git a/sql/sql_statistics.cc b/sql/sql_statistics.cc
index 81ab599ed64..37f73adccb3 100644
--- a/sql/sql_statistics.cc
+++ b/sql/sql_statistics.cc
@@ -2191,6 +2191,9 @@ inline bool statistics_for_command_is_needed(THD *thd)
case SQLCOM_DELETE_MULTI:
case SQLCOM_REPLACE:
case SQLCOM_REPLACE_SELECT:
+ case SQLCOM_CREATE_TABLE:
+ case SQLCOM_SET_OPTION:
+ case SQLCOM_DO:
break;
default:
return FALSE;
1
0
revision-id: abde8dcc4fd4fa551cead438affa70be707d6367 (mariadb-10.4.4-87-gabde8dcc4fd)
parent(s): fda846b4af2fdb78a9c347650a51931ce499f351
author: Oleksandr Byelkin
committer: Oleksandr Byelkin
timestamp: 2019-05-14 16:09:44 +0200
message:
postreview
---
mysql-test/main/connect.result | 11 +++++++++++
mysql-test/main/connect.test | 25 +++++++++++++++++++++++--
2 files changed, 34 insertions(+), 2 deletions(-)
diff --git a/mysql-test/main/connect.result b/mysql-test/main/connect.result
index 8fa7724cff0..4e5b1d7f6e2 100644
--- a/mysql-test/main/connect.result
+++ b/mysql-test/main/connect.result
@@ -428,5 +428,16 @@ Aborted_clients 0
Aborted_connects 1
Aborted_connects_preauth 1
FOUND 1 /This connection closed normally without authentication/ in mysqld.1.err
+FOUND 52 /unauthenticated/ in mysqld.1.err
+SET @save_connect_timeout= @@connect_timeout;
+SET GLOBAL connect_timeout=2;
+SHOW GLOBAL STATUS LIKE 'Aborted%';
+Variable_name Value
+Aborted_clients 0
+Aborted_connects 2
+Aborted_connects_preauth 2
+FOUND 2 /This connection closed normally without authentication/ in mysqld.1.err
+FOUND 53 /unauthenticated/ in mysqld.1.err
SET GLOBAL log_warnings=default;
+SET GLOBAL connect_timeout= @save_connect_timeout;
# End of 10.4 tests
diff --git a/mysql-test/main/connect.test b/mysql-test/main/connect.test
index 1b804c2b0bb..083436d8630 100644
--- a/mysql-test/main/connect.test
+++ b/mysql-test/main/connect.test
@@ -479,11 +479,32 @@ EOF
SHOW GLOBAL STATUS LIKE 'Aborted%';
---let SEARCH_FILE=$MYSQLTEST_VARDIR/log/mysqld.1.err
--let SEARCH_PATTERN= This connection closed normally without authentication
--source include/search_pattern_in_file.inc
-SET GLOBAL log_warnings=default;
+--let SEARCH_PATTERN= unauthenticated
+--source include/search_pattern_in_file.inc
+SET @save_connect_timeout= @@connect_timeout;
+SET GLOBAL connect_timeout=2;
+
+--perl
+use Socket;
+use autodie;
+socket(SOCK, PF_INET, SOCK_STREAM, getprotobyname("tcp"));
+connect(SOCK, pack_sockaddr_in($ENV{MASTER_MYPORT}, inet_aton("localhost")));
+sleep 3;
+exit(0);
+EOF
+
+SHOW GLOBAL STATUS LIKE 'Aborted%';
+
+--let SEARCH_PATTERN= This connection closed normally without authentication
+--source include/search_pattern_in_file.inc
+
+--let SEARCH_PATTERN= unauthenticated
+--source include/search_pattern_in_file.inc
+SET GLOBAL log_warnings=default;
+SET GLOBAL connect_timeout= @save_connect_timeout;
--echo # End of 10.4 tests
1
0
[Commits] ddec45aa50e: MDEV-16932: ASAN heap-use-after-free in my_charlen_utf8 / my_well_formed_char_length_utf8 on 2nd execution of SP with ALTER trying to add bad CHECK
by Oleksandr Byelkin 14 May '19
by Oleksandr Byelkin 14 May '19
14 May '19
revision-id: ddec45aa50e94c137d35dfb16b79feb8b119174b (mariadb-10.2.24-11-gddec45aa50e)
parent(s): 50999738eaed907cfd94b554582b5416e0107642
author: Oleksandr Byelkin
committer: Oleksandr Byelkin
timestamp: 2019-05-14 14:01:15 +0200
message:
MDEV-16932: ASAN heap-use-after-free in my_charlen_utf8 / my_well_formed_char_length_utf8 on 2nd execution of SP with ALTER trying to add bad CHECK
In case of error the SP can be executed without re-comilation and so will reuse constructed constaint name, so the name should be allocated in the statement memory.
---
mysql-test/r/constraints.result | 14 ++++++++++++++
mysql-test/t/constraints.test | 20 ++++++++++++++++++++
sql/sql_table.cc | 6 ++++++
3 files changed, 40 insertions(+)
diff --git a/mysql-test/r/constraints.result b/mysql-test/r/constraints.result
index df93b69cb9e..47fe0c323be 100644
--- a/mysql-test/r/constraints.result
+++ b/mysql-test/r/constraints.result
@@ -130,3 +130,17 @@ t CREATE TABLE `t` (
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP table test.t;
SET @@SQL_MODE=@OLD_SQL_MODE;
+#
+# MDEV-16932 - ASAN heap-use-after-free in my_charlen_utf8 /
+# my_well_formed_char_length_utf8 on 2nd execution of SP with
+# ALTER trying to add bad CHECK
+#
+CREATE TABLE t1 (a INT);
+CREATE PROCEDURE sp() ALTER TABLE t1 ADD CONSTRAINT CHECK (b > 0);
+CALL sp;
+ERROR 42S22: Unknown column 'b' in 'CHECK'
+CALL sp;
+ERROR 42S22: Unknown column 'b' in 'CHECK'
+DROP PROCEDURE sp;
+DROP TABLE t1;
+# End of 10.2 tests
diff --git a/mysql-test/t/constraints.test b/mysql-test/t/constraints.test
index 39b2eb52a9f..669ef08a227 100644
--- a/mysql-test/t/constraints.test
+++ b/mysql-test/t/constraints.test
@@ -119,3 +119,23 @@ CREATE TABLE test.t (f int foo=bar check(f>0));
SHOW CREATE TABLE t;
DROP table test.t;
SET @@SQL_MODE=@OLD_SQL_MODE;
+
+--echo #
+--echo # MDEV-16932 - ASAN heap-use-after-free in my_charlen_utf8 /
+--echo # my_well_formed_char_length_utf8 on 2nd execution of SP with
+--echo # ALTER trying to add bad CHECK
+--echo #
+
+CREATE TABLE t1 (a INT);
+CREATE PROCEDURE sp() ALTER TABLE t1 ADD CONSTRAINT CHECK (b > 0);
+--error ER_BAD_FIELD_ERROR
+CALL sp;
+--error ER_BAD_FIELD_ERROR
+CALL sp;
+
+# Cleanup
+DROP PROCEDURE sp;
+DROP TABLE t1;
+
+
+--echo # End of 10.2 tests
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 3df877792cc..ecd5de3a09d 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -4185,9 +4185,15 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
while ((check= c_it++))
{
if (!check->name.length)
+ {
+ Query_arena backup;
+ Query_arena *arena= thd->activate_stmt_arena_if_needed(&backup);
make_unique_constraint_name(thd, &check->name,
&alter_info->check_constraint_list,
&nr);
+ if (arena)
+ thd->restore_active_arena(arena, &backup);
+ }
{
/* Check that there's no repeating constraint names. */
List_iterator_fast<Virtual_column_info>
1
0
[Commits] d87e960a68c: MDEV-18378: Estimates for a non-merged join having implicit grouping is not showing 1 with optimizer_use_condition_selectivity >1
by Varun 13 May '19
by Varun 13 May '19
13 May '19
revision-id: d87e960a68c5fb0625cd7b4ff5a01af5e51a0d07 (mariadb-10.1.38-181-gd87e960a68c)
parent(s): cb248f880619431850e5638009a6098048050edf
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-05-14 03:56:21 +0530
message:
MDEV-18378: Estimates for a non-merged join having implicit grouping is not showing 1 with optimizer_use_condition_selectivity >1
The estimates for the number of records in non-merged joins are stored
in Item_in_subselect:jtbm_record_count. Use this estimate instead of
stats.records when we have optimizer_use_condition_selectivity > 1.
The reason is we don't have selectivity estimates for tables filled
at execution, so use the estimates provided by the function
get_delayed_table_estimates() for such tables
---
mysql-test/r/subselect_mat.result | 31 +++++++++++++++++++++++++++++++
mysql-test/r/subselect_sj_mat.result | 31 +++++++++++++++++++++++++++++++
mysql-test/t/subselect_sj_mat.test | 21 +++++++++++++++++++++
sql/sql_select.cc | 8 ++++++++
4 files changed, 91 insertions(+)
diff --git a/mysql-test/r/subselect_mat.result b/mysql-test/r/subselect_mat.result
index 6fae057dd17..f9b84341431 100644
--- a/mysql-test/r/subselect_mat.result
+++ b/mysql-test/r/subselect_mat.result
@@ -2568,6 +2568,37 @@ i1 i2
1 4
2 6
DROP TABLE t1;
+#
+# MDEV-18378: Estimates for a non-merged join having implicit grouping is not showing 1
+# with optimizer_use_condition_selectivity >1
+#
+CREATE TABLE t1 (a INT, KEY(a)) ENGINE=MyISAM;
+INSERT INTO t1 VALUES (8),(0);
+CREATE TABLE t2 (b INT, c VARCHAR(1)) ENGINE=MyISAM;
+INSERT INTO t2 VALUES (4,'j'),(6,'v');
+CREATE TABLE t3 (d VARCHAR(1)) ENGINE=MyISAM;
+INSERT INTO t3 VALUES ('b'),('c');
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+set optimizer_use_condition_selectivity=1;
+explain SELECT * FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE c IN (SELECT MAX(d) FROM t3)) OR a = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 index a a 5 NULL 2 Using where; Using index
+2 SUBQUERY <subquery3> ALL distinct_key NULL NULL NULL 1
+2 SUBQUERY t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
+3 MATERIALIZED t3 ALL NULL NULL NULL NULL 2
+SELECT * FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE c IN (SELECT MAX(d) FROM t3)) OR a = 10;
+a
+set optimizer_use_condition_selectivity=4;
+explain SELECT * FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE c IN (SELECT MAX(d) FROM t3)) OR a = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 index a a 5 NULL 2 Using where; Using index
+2 SUBQUERY <subquery3> ALL distinct_key NULL NULL NULL 1
+2 SUBQUERY t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
+3 MATERIALIZED t3 ALL NULL NULL NULL NULL 2
+SELECT * FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE c IN (SELECT MAX(d) FROM t3)) OR a = 10;
+a
+set @@optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+drop table t1,t2,t3;
set @subselect_mat_test_optimizer_switch_value=null;
set @@optimizer_switch='materialization=on,in_to_exists=off,semijoin=off';
set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on';
diff --git a/mysql-test/r/subselect_sj_mat.result b/mysql-test/r/subselect_sj_mat.result
index 3cb3750cbd6..f7593945e41 100644
--- a/mysql-test/r/subselect_sj_mat.result
+++ b/mysql-test/r/subselect_sj_mat.result
@@ -2608,3 +2608,34 @@ i1 i2
1 4
2 6
DROP TABLE t1;
+#
+# MDEV-18378: Estimates for a non-merged join having implicit grouping is not showing 1
+# with optimizer_use_condition_selectivity >1
+#
+CREATE TABLE t1 (a INT, KEY(a)) ENGINE=MyISAM;
+INSERT INTO t1 VALUES (8),(0);
+CREATE TABLE t2 (b INT, c VARCHAR(1)) ENGINE=MyISAM;
+INSERT INTO t2 VALUES (4,'j'),(6,'v');
+CREATE TABLE t3 (d VARCHAR(1)) ENGINE=MyISAM;
+INSERT INTO t3 VALUES ('b'),('c');
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+set optimizer_use_condition_selectivity=1;
+explain SELECT * FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE c IN (SELECT MAX(d) FROM t3)) OR a = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 index a a 5 NULL 2 Using where; Using index
+2 SUBQUERY <subquery3> ALL distinct_key NULL NULL NULL 1
+2 SUBQUERY t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
+3 MATERIALIZED t3 ALL NULL NULL NULL NULL 2
+SELECT * FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE c IN (SELECT MAX(d) FROM t3)) OR a = 10;
+a
+set optimizer_use_condition_selectivity=4;
+explain SELECT * FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE c IN (SELECT MAX(d) FROM t3)) OR a = 10;
+id select_type table type possible_keys key key_len ref rows Extra
+1 PRIMARY t1 index a a 5 NULL 2 Using where; Using index
+2 SUBQUERY <subquery3> ALL distinct_key NULL NULL NULL 1
+2 SUBQUERY t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
+3 MATERIALIZED t3 ALL NULL NULL NULL NULL 2
+SELECT * FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE c IN (SELECT MAX(d) FROM t3)) OR a = 10;
+a
+set @@optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+drop table t1,t2,t3;
diff --git a/mysql-test/t/subselect_sj_mat.test b/mysql-test/t/subselect_sj_mat.test
index 1f514be23d2..cd6d614bad8 100644
--- a/mysql-test/t/subselect_sj_mat.test
+++ b/mysql-test/t/subselect_sj_mat.test
@@ -2318,3 +2318,24 @@ WHERE alias1.i1 IN (
);
DROP TABLE t1;
+--echo #
+--echo # MDEV-18378: Estimates for a non-merged join having implicit grouping is not showing 1
+--echo # with optimizer_use_condition_selectivity >1
+--echo #
+
+CREATE TABLE t1 (a INT, KEY(a)) ENGINE=MyISAM;
+INSERT INTO t1 VALUES (8),(0);
+CREATE TABLE t2 (b INT, c VARCHAR(1)) ENGINE=MyISAM;
+INSERT INTO t2 VALUES (4,'j'),(6,'v');
+CREATE TABLE t3 (d VARCHAR(1)) ENGINE=MyISAM;
+INSERT INTO t3 VALUES ('b'),('c');
+set @save_optimizer_use_condition_selectivity=@@optimizer_use_condition_selectivity;
+set optimizer_use_condition_selectivity=1;
+explain SELECT * FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE c IN (SELECT MAX(d) FROM t3)) OR a = 10;
+SELECT * FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE c IN (SELECT MAX(d) FROM t3)) OR a = 10;
+set optimizer_use_condition_selectivity=4;
+explain SELECT * FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE c IN (SELECT MAX(d) FROM t3)) OR a = 10;
+SELECT * FROM t1 WHERE a = (SELECT MAX(b) FROM t2 WHERE c IN (SELECT MAX(d) FROM t3)) OR a = 10;
+set @@optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity;
+drop table t1,t2,t3;
+
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index cb8c0429674..de8753d64cc 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -5859,6 +5859,14 @@ double matching_candidates_in_table(JOIN_TAB *s, bool with_found_constraint,
TABLE *table= s->table;
double sel= table->cond_selectivity;
double table_records= table->stat_records();
+ /*
+ For tables that are filled at execution, use the estimates that
+ are provided by get_delayed_table_estimates(). This is due to the
+ fact that we don't collect statistics for tables filled at execution.
+ see the function get_delayed_table_estimates()
+ */
+ if (table->is_filled_at_execution())
+ table_records= (double)s->found_records;
dbl_records= table_records * sel;
return dbl_records;
}
1
0
13 May '19
revision-id: eb6f779acccf4e0f86de98eb894155fa28616cc9 (fb-prod201801-234-geb6f779accc)
parent(s): d099340b9105d4fd5397c5a8548218372f963083
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-05-13 18:29:39 +0300
message:
Range Locking: Shared locks continued
- More test coverage
- ha_rocksdb::set_range_lock should set a range lock for LOCK IN SHARE MODE
- But the Range API only allows to get a write range lock
---
.../rocksdb/r/range_locking_shared_locks.result | 79 +++++++++++++++++++++-
.../rocksdb/t/range_locking_shared_locks.test | 75 +++++++++++++++++++-
storage/rocksdb/ha_rocksdb.cc | 3 +-
3 files changed, 149 insertions(+), 8 deletions(-)
diff --git a/mysql-test/suite/rocksdb/r/range_locking_shared_locks.result b/mysql-test/suite/rocksdb/r/range_locking_shared_locks.result
index 33c7b421f9b..014b107aea5 100644
--- a/mysql-test/suite/rocksdb/r/range_locking_shared_locks.result
+++ b/mysql-test/suite/rocksdb/r/range_locking_shared_locks.result
@@ -1,9 +1,13 @@
+select @@rocksdb_use_range_locking;
+@@rocksdb_use_range_locking
+1
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1 (
pk int primary key,
a int
) engine=rocksdb;
-insert into t1 values
-(0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9);
+insert into t1 select a,a from t0;
# A basic test for shared locks
begin;
select * from t1 where pk=3 for update;
@@ -51,5 +55,74 @@ $cf_id $TRX1_ID 0000${indexnr}80000003 X
$cf_id $TRX1_ID 0000${indexnr}80000005 X
connection default;
rollback;
-disconnect con1;
+#
+# Test if a read lock inhibits write locks
+#
+begin;
+select * from t1 where pk=2 lock in share mode;
+pk a
+2 2
+select * from t1 where pk=8 for update;
+pk a
+8 8
+connection con1;
+begin;
+select * from t1 where pk=2 for update;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t1.PRIMARY
+select * from t1 where pk between 0 and 4 for update;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t1.PRIMARY
+delete from t1 where pk=2;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t1.PRIMARY
+# Get a shared lock
+select * from t1 where pk=2 lock in share mode;
+pk a
+2 2
+# But this should still prevent us from acquiring a write lock on that value:
+select * from t1 where pk=2 for update;
+ERROR HY000: Lock wait timeout exceeded; try restarting transaction: Timeout on index: test.t1.PRIMARY
+rollback;
+connection default;
+rollback;
drop table t1;
+create table t1 (
+pk int not null primary key,
+a int not null,
+key(a)
+) engine=rocksdb;
+insert into t1
+select
+A.a+10*B.a+100*C.a+1000*D.a, A.a+10*B.a+100*C.a+1000*D.a
+from
+t0 A, t0 B, t0 C, t0 D;
+set global rocksdb_force_flush_memtable_now=1;
+connection con1;
+begin;
+select * from t1 where pk=900 for update;
+pk a
+900 900
+connection default;
+begin;
+explain
+select * from t1 where a between 2 and 5 lock in share mode;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 4 NULL # Using where; Using index
+select * from t1 where a between 2 and 5 lock in share mode;
+pk a
+2 2
+3 3
+4 4
+5 5
+# TODO: the following prints an X lock on the range, because GetRangeLock API
+# currently only supports write locks:
+# select * from information_schema.rocksdb_locks; # With replacements by select_from_rocksdb_locks.inc
+COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
+$cf_id $TRX1_ID 0000${indexnr}80000002 S
+$cf_id $TRX1_ID 0000${indexnr}80000003 S
+$cf_id $TRX1_ID 0000${indexnr}80000004 S
+$cf_id $TRX1_ID 0000${indexnr}80000005 S
+$cf_id $TRX1_ID 0000${indexnr}80000006 S
+$cf_id $TRX1_ID 000000010780000002 - 010000010780000005 X
+$cf_id $TRX2_ID 0000${indexnr}80000384 X
+rollback;
+disconnect con1;
+drop table t0,t1;
diff --git a/mysql-test/suite/rocksdb/t/range_locking_shared_locks.test b/mysql-test/suite/rocksdb/t/range_locking_shared_locks.test
index 372ced5bfac..52118aa343f 100644
--- a/mysql-test/suite/rocksdb/t/range_locking_shared_locks.test
+++ b/mysql-test/suite/rocksdb/t/range_locking_shared_locks.test
@@ -5,14 +5,18 @@
--source suite/rocksdb/include/have_range_locking.inc
--enable_connect_log
+select @@rocksdb_use_range_locking;
+
+create table t0 (a int);
+insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+
create table t1 (
pk int primary key,
a int
) engine=rocksdb;
-insert into t1 values
-(0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9);
+insert into t1 select a,a from t0;
--echo # A basic test for shared locks
@@ -48,7 +52,72 @@ select * from t1 where pk=5 for update;
connection default;
rollback;
-disconnect con1;
+--echo #
+--echo # Test if a read lock inhibits write locks
+--echo #
+
+begin;
+select * from t1 where pk=2 lock in share mode;
+select * from t1 where pk=8 for update;
+
+connection con1;
+begin;
+
+--error ER_LOCK_WAIT_TIMEOUT
+select * from t1 where pk=2 for update;
+
+--error ER_LOCK_WAIT_TIMEOUT
+select * from t1 where pk between 0 and 4 for update;
+
+--error ER_LOCK_WAIT_TIMEOUT
+delete from t1 where pk=2;
+
+--echo # Get a shared lock
+select * from t1 where pk=2 lock in share mode;
+
+--echo # But this should still prevent us from acquiring a write lock on that value:
+--error ER_LOCK_WAIT_TIMEOUT
+select * from t1 where pk=2 for update;
+
+rollback;
+connection default;
+rollback;
drop table t1;
+create table t1 (
+ pk int not null primary key,
+ a int not null,
+ key(a)
+) engine=rocksdb;
+
+insert into t1
+select
+ A.a+10*B.a+100*C.a+1000*D.a, A.a+10*B.a+100*C.a+1000*D.a
+from
+ t0 A, t0 B, t0 C, t0 D;
+set global rocksdb_force_flush_memtable_now=1;
+
+connection con1;
+begin;
+select * from t1 where pk=900 for update;
+let $TRX2_ID=`select transaction_id from information_schema.rocksdb_trx where thread_id=connection_id()`;
+
+connection default;
+begin;
+--replace_column 9 #
+explain
+select * from t1 where a between 2 and 5 lock in share mode;
+select * from t1 where a between 2 and 5 lock in share mode;
+let $TRX1_ID=`select transaction_id from information_schema.rocksdb_trx where thread_id=connection_id()`;
+
+--echo # TODO: the following prints an X lock on the range, because GetRangeLock API
+--echo # currently only supports write locks:
+
+--source suite/rocksdb/include/select_from_rocksdb_locks.inc
+
+rollback;
+
+disconnect con1;
+
+drop table t0,t1;
diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc
index 9342ca84b04..af6bc90ede7 100644
--- a/storage/rocksdb/ha_rocksdb.cc
+++ b/storage/rocksdb/ha_rocksdb.cc
@@ -8168,8 +8168,7 @@ int ha_rocksdb::set_range_lock(Rdb_transaction *tx,
bool start_has_inf_suffix = false, end_has_inf_suffix = false;
rocksdb::Slice slice(slice_arg);
-
- if (m_lock_rows != RDB_LOCK_WRITE || !rocksdb_use_range_locking) {
+ if (m_lock_rows == RDB_LOCK_NONE || !rocksdb_use_range_locking) {
return 0;
}
1
0
[Commits] d099340b910: MyRocksm, Range Locking, Shared point locks: got basic cases to work
by Sergei Petrunia 13 May '19
by Sergei Petrunia 13 May '19
13 May '19
revision-id: d099340b9105d4fd5397c5a8548218372f963083 (fb-prod201801-233-gd099340b910)
parent(s): a679ed01453c1898ec9d1fac95d8d4d149607b81
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-05-13 17:15:11 +0300
message:
MyRocksm, Range Locking, Shared point locks: got basic cases to work
- Point rocksdb submodule to the shared locks revision
- Add testcases
- (No changes were necessary on MyRocks side so far)
---
.../rocksdb/include/select_from_rocksdb_locks.inc | 35 ++++++++++++++
.../rocksdb/r/range_locking_shared_locks.result | 55 ++++++++++++++++++++++
.../rocksdb/t/range_locking_shared_locks.test | 54 +++++++++++++++++++++
rocksdb | 2 +-
4 files changed, 145 insertions(+), 1 deletion(-)
diff --git a/mysql-test/suite/rocksdb/include/select_from_rocksdb_locks.inc b/mysql-test/suite/rocksdb/include/select_from_rocksdb_locks.inc
new file mode 100644
index 00000000000..31280b0510c
--- /dev/null
+++ b/mysql-test/suite/rocksdb/include/select_from_rocksdb_locks.inc
@@ -0,0 +1,35 @@
+--echo # select * from information_schema.rocksdb_locks; # With replacements by select_from_rocksdb_locks.inc
+--disable_query_log
+set @cf_id=(select column_family from information_schema.rocksdb_ddl
+ where table_name='t1' and index_name='PRIMARY');
+set @indexnr= (select lpad(hex(index_number),6,'0') from information_schema.rocksdb_ddl
+ where table_name='t1' and index_name='PRIMARY');
+
+let $extra_where = where 1;
+
+if ($select_from_is_rowlocks_current_trx_only)
+{
+ let $extra_where = where transaction_id=(select transaction_id from information_schema.rocksdb_trx where connection_id()=thread_id);
+}
+
+let $transaction_col= transaction_id;
+
+if ($TRX1_ID)
+{
+ let $transaction_col = replace($transaction_col, '$TRX1_ID', "\$TRX1_ID");
+}
+
+if ($TRX2_ID)
+{
+ let $transaction_col = replace($transaction_col, '$TRX2_ID', "\$TRX2_ID");
+}
+
+--sorted_result
+eval select
+ replace(column_family_id, @cf_id, "\$cf_id") as COLUMN_FAMILY_ID,
+ $transaction_col as TRANSACTION_ID,
+ replace(`key`, @indexnr, '\${indexnr}') as `KEY`,
+ mode
+from information_schema.rocksdb_locks $extra_where;
+
+--enable_query_log
diff --git a/mysql-test/suite/rocksdb/r/range_locking_shared_locks.result b/mysql-test/suite/rocksdb/r/range_locking_shared_locks.result
new file mode 100644
index 00000000000..33c7b421f9b
--- /dev/null
+++ b/mysql-test/suite/rocksdb/r/range_locking_shared_locks.result
@@ -0,0 +1,55 @@
+create table t1 (
+pk int primary key,
+a int
+) engine=rocksdb;
+insert into t1 values
+(0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9);
+# A basic test for shared locks
+begin;
+select * from t1 where pk=3 for update;
+pk a
+3 3
+select * from t1 where pk=5 lock in share mode;
+pk a
+5 5
+connect con1,localhost,root,,;
+connection con1;
+begin;
+select * from t1 where pk=5 lock in share mode;
+pk a
+5 5
+# Now for pk=5 we should see two locks by TRX1 and TRX2 with mode=S:
+# select * from information_schema.rocksdb_locks; # With replacements by select_from_rocksdb_locks.inc
+COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
+$cf_id $TRX1_ID 0000${indexnr}80000003 X
+$cf_id $TRX1_ID 0000${indexnr}80000005 S
+$cf_id $TRX2_ID 0000${indexnr}80000005 S
+rollback;
+# Now, TRX2_ID should be gone:
+# select * from information_schema.rocksdb_locks; # With replacements by select_from_rocksdb_locks.inc
+COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
+$cf_id $TRX1_ID 0000${indexnr}80000003 X
+$cf_id $TRX1_ID 0000${indexnr}80000005 S
+connection default;
+# Get a read lock on pk=3 (where we have a write lock).
+# The result should be that we will still have a write lock
+select * from t1 where pk=3 for update;
+pk a
+3 3
+# select * from information_schema.rocksdb_locks; # With replacements by select_from_rocksdb_locks.inc
+COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
+$cf_id $TRX1_ID 0000${indexnr}80000003 X
+$cf_id $TRX1_ID 0000${indexnr}80000005 S
+# Get a write lock on pk=5 (where we have a read lock).
+# The result should be that we will have a write lock.
+select * from t1 where pk=5 for update;
+pk a
+5 5
+# select * from information_schema.rocksdb_locks; # With replacements by select_from_rocksdb_locks.inc
+COLUMN_FAMILY_ID TRANSACTION_ID KEY mode
+$cf_id $TRX1_ID 0000${indexnr}80000003 X
+$cf_id $TRX1_ID 0000${indexnr}80000005 X
+connection default;
+rollback;
+disconnect con1;
+drop table t1;
diff --git a/mysql-test/suite/rocksdb/t/range_locking_shared_locks.test b/mysql-test/suite/rocksdb/t/range_locking_shared_locks.test
new file mode 100644
index 00000000000..372ced5bfac
--- /dev/null
+++ b/mysql-test/suite/rocksdb/t/range_locking_shared_locks.test
@@ -0,0 +1,54 @@
+#
+# Test for shared lock support for range locking
+#
+--source include/have_rocksdb.inc
+--source suite/rocksdb/include/have_range_locking.inc
+--enable_connect_log
+
+create table t1 (
+ pk int primary key,
+ a int
+) engine=rocksdb;
+
+
+insert into t1 values
+(0,0),(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9);
+
+--echo # A basic test for shared locks
+
+begin;
+select * from t1 where pk=3 for update;
+select * from t1 where pk=5 lock in share mode;
+let $TRX1_ID=`select transaction_id from information_schema.rocksdb_trx where thread_id=connection_id()`;
+
+connect (con1,localhost,root,,);
+connection con1;
+begin;
+select * from t1 where pk=5 lock in share mode;
+let $TRX2_ID=`select transaction_id from information_schema.rocksdb_trx where thread_id=connection_id()`;
+--echo # Now for pk=5 we should see two locks by TRX1 and TRX2 with mode=S:
+--source suite/rocksdb/include/select_from_rocksdb_locks.inc
+
+rollback;
+--echo # Now, TRX2_ID should be gone:
+--source suite/rocksdb/include/select_from_rocksdb_locks.inc
+
+connection default;
+
+--echo # Get a read lock on pk=3 (where we have a write lock).
+--echo # The result should be that we will still have a write lock
+select * from t1 where pk=3 for update;
+--source suite/rocksdb/include/select_from_rocksdb_locks.inc
+
+--echo # Get a write lock on pk=5 (where we have a read lock).
+--echo # The result should be that we will have a write lock.
+select * from t1 where pk=5 for update;
+--source suite/rocksdb/include/select_from_rocksdb_locks.inc
+
+connection default;
+rollback;
+
+disconnect con1;
+
+drop table t1;
+
diff --git a/rocksdb b/rocksdb
index 1b423e3954d..2f0ee897552 160000
--- a/rocksdb
+++ b/rocksdb
@@ -1 +1 @@
-Subproject commit 1b423e3954d932c4624337308e3e1cd98481a495
+Subproject commit 2f0ee897552bb4a8aa66b933c0d6f8529a82e2e8
1
0
13 May '19
revision-id: 2f0ee897552bb4a8aa66b933c0d6f8529a82e2e8 (v5.8-1042-g2f0ee8975)
parent(s): 1b423e3954d932c4624337308e3e1cd98481a495
author: Sergei Petrunia
committer: Sergei Petrunia
timestamp: 2019-05-13 17:09:36 +0300
message:
Initial support for shared point locks.
- Locks can now be shared.
- Sharing only supported as long as the locked ranges are an exact match.
If this requirement is not met, the locks behave as exclusive locks.
- TODO Lock escalation is not supported yet (the intent is to not escalate
the shared locks at all; but even this is not yet implemented, current
code will exhibit incorrect behaviour if escalation hits a shared lock)
---
utilities/transactions/range_locking/db.h | 3 +
.../range_locking/locktree/concurrent_tree.cc | 17 ++-
.../range_locking/locktree/concurrent_tree.h | 18 ++-
.../range_locking/locktree/locktree.cc | 166 +++++++++++++++++----
.../transactions/range_locking/locktree/locktree.h | 19 ++-
.../range_locking/locktree/range_buffer.cc | 20 ++-
.../range_locking/locktree/range_buffer.h | 15 +-
.../range_locking/locktree/treenode.cc | 74 ++++++---
.../transactions/range_locking/locktree/treenode.h | 41 ++++-
.../range_locking/portability/txn_subst.h | 16 ++
utilities/transactions/transaction_lock_mgr.cc | 24 ++-
11 files changed, 334 insertions(+), 79 deletions(-)
diff --git a/utilities/transactions/range_locking/db.h b/utilities/transactions/range_locking/db.h
index f9349c6ae..64ea28345 100644
--- a/utilities/transactions/range_locking/db.h
+++ b/utilities/transactions/range_locking/db.h
@@ -6,6 +6,8 @@
typedef struct __toku_db DB;
typedef struct __toku_dbt DBT;
+
+// port: this is currently not used
struct simple_dbt {
uint32_t len;
void *data;
@@ -72,6 +74,7 @@ struct __toku_dbt {
void*data;
uint32_t size;
uint32_t ulen;
+ // One of DB_DBT_XXX flags
uint32_t flags;
};
typedef struct __toku_descriptor {
diff --git a/utilities/transactions/range_locking/locktree/concurrent_tree.cc b/utilities/transactions/range_locking/locktree/concurrent_tree.cc
index a35a9e40b..74d65f710 100644
--- a/utilities/transactions/range_locking/locktree/concurrent_tree.cc
+++ b/utilities/transactions/range_locking/locktree/concurrent_tree.cc
@@ -97,6 +97,12 @@ void concurrent_tree::locked_keyrange::acquire(const keyrange &range) {
m_subtree = subtree;
}
+void concurrent_tree::locked_keyrange::add_shared_owner(const keyrange &range,
+ TXNID new_owner)
+{
+ m_subtree->insert(range, new_owner, /*is_shared*/ true);
+}
+
void concurrent_tree::locked_keyrange::release(void) {
m_subtree->mutex_unlock();
}
@@ -110,18 +116,19 @@ void concurrent_tree::locked_keyrange::iterate(F *function) const {
}
}
-void concurrent_tree::locked_keyrange::insert(const keyrange &range, TXNID txnid) {
+void concurrent_tree::locked_keyrange::insert(const keyrange &range,
+ TXNID txnid, bool is_shared) {
// empty means no children, and only the root should ever be empty
if (m_subtree->is_empty()) {
- m_subtree->set_range_and_txnid(range, txnid);
+ m_subtree->set_range_and_txnid(range, txnid, is_shared);
} else {
- m_subtree->insert(range, txnid);
+ m_subtree->insert(range, txnid, is_shared);
}
}
-void concurrent_tree::locked_keyrange::remove(const keyrange &range) {
+void concurrent_tree::locked_keyrange::remove(const keyrange &range, TXNID txnid) {
invariant(!m_subtree->is_empty());
- treenode *new_subtree = m_subtree->remove(range);
+ treenode *new_subtree = m_subtree->remove(range, txnid);
// if removing range changed the root of the subtree,
// then the subtree must be the root of the entire tree.
if (new_subtree == nullptr) {
diff --git a/utilities/transactions/range_locking/locktree/concurrent_tree.h b/utilities/transactions/range_locking/locktree/concurrent_tree.h
index 66a7ff176..fabda7294 100644
--- a/utilities/transactions/range_locking/locktree/concurrent_tree.h
+++ b/utilities/transactions/range_locking/locktree/concurrent_tree.h
@@ -106,15 +106,25 @@ public:
template <class F>
void iterate(F *function) const;
+ // Adds another owner to the lock on the specified keyrange.
+ // requires: the keyrange contains one treenode whose bounds are
+ // exactly equal to the specifed range (no sub/supersets)
+ void add_shared_owner(const keyrange &range, TXNID new_owner);
+
// inserts the given range into the tree, with an associated txnid.
// requires: range does not overlap with anything in this locked_keyrange
// rationale: caller is responsible for only inserting unique ranges
- void insert(const keyrange &range, TXNID txnid);
-
- // effect: removes the given range from the tree
+ void insert(const keyrange &range, TXNID txnid, bool is_shared);
+
+ // effect: removes the given range from the tree.
+ // - txnid=TXNID_ANY means remove the range no matter what its
+ // owners are
+ // - Other value means remove the specified txnid from
+ // ownership (if the range has other owners, it will remain
+ // in the tree)
// requires: range exists exactly in this locked_keyrange
// rationale: caller is responsible for only removing existing ranges
- void remove(const keyrange &range);
+ void remove(const keyrange &range, TXNID txnid);
// effect: removes all of the keys represented by this locked keyrange
// rationale: we'd like a fast way to empty out a tree
diff --git a/utilities/transactions/range_locking/locktree/locktree.cc b/utilities/transactions/range_locking/locktree/locktree.cc
index 9b530c7b0..0e5f7c307 100644
--- a/utilities/transactions/range_locking/locktree/locktree.cc
+++ b/utilities/transactions/range_locking/locktree/locktree.cc
@@ -147,6 +147,8 @@ uint32_t locktree::get_reference_count(void) {
struct row_lock {
keyrange range;
TXNID txnid;
+ bool is_shared;
+ TxnidVector *owners;
};
// iterate over a locked keyrange and copy out all of the data,
@@ -157,8 +159,10 @@ static void iterate_and_get_overlapping_row_locks(const concurrent_tree::locked_
GrowableArray<row_lock> *row_locks) {
struct copy_fn_obj {
GrowableArray<row_lock> *row_locks;
- bool fn(const keyrange &range, TXNID txnid) {
- row_lock lock = { .range = range, .txnid = txnid };
+ bool fn(const keyrange &range, TXNID txnid, bool is_shared,
+ TxnidVector *owners) {
+ row_lock lock = { .range = range, .txnid = txnid,
+ .is_shared = is_shared, .owners = owners};
row_locks->push(lock);
return true;
}
@@ -196,9 +200,10 @@ static uint64_t row_lock_size_in_tree(const row_lock &lock) {
// remove and destroy the given row lock from the locked keyrange,
// then notify the memory tracker of the newly freed lock.
static void remove_row_lock_from_tree(concurrent_tree::locked_keyrange *lkr,
- const row_lock &lock, locktree_manager *mgr) {
+ const row_lock &lock, TXNID txnid,
+ locktree_manager *mgr) {
const uint64_t mem_released = row_lock_size_in_tree(lock);
- lkr->remove(lock.range);
+ lkr->remove(lock.range, txnid);
if (mgr != nullptr) {
mgr->note_mem_released(mem_released);
}
@@ -209,7 +214,7 @@ static void remove_row_lock_from_tree(concurrent_tree::locked_keyrange *lkr,
static void insert_row_lock_into_tree(concurrent_tree::locked_keyrange *lkr,
const row_lock &lock, locktree_manager *mgr) {
uint64_t mem_used = row_lock_size_in_tree(lock);
- lkr->insert(lock.range, lock.txnid);
+ lkr->insert(lock.range, lock.txnid, lock.is_shared);
if (mgr != nullptr) {
mgr->note_mem_used(mem_used);
}
@@ -221,13 +226,17 @@ void locktree::sto_begin(TXNID txnid) {
m_sto_txnid = txnid;
}
-void locktree::sto_append(const DBT *left_key, const DBT *right_key) {
+void locktree::sto_append(const DBT *left_key, const DBT *right_key,
+ bool is_write_request) {
uint64_t buffer_mem, delta;
+
+ // psergey: the below two lines do not make any sense
+ // (and it's the same in upstream TokuDB)
keyrange range;
range.create(left_key, right_key);
buffer_mem = m_sto_buffer.total_memory_size();
- m_sto_buffer.append(left_key, right_key);
+ m_sto_buffer.append(left_key, right_key, is_write_request);
delta = m_sto_buffer.total_memory_size() - buffer_mem;
if (m_mgr != nullptr) {
m_mgr->note_mem_used(delta);
@@ -274,8 +283,10 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
range_buffer::iterator::record rec;
while (iter.current(&rec)) {
sto_lkr.prepare(&sto_rangetree);
- int r = acquire_lock_consolidated(&sto_lkr,
- m_sto_txnid, rec.get_left_key(), rec.get_right_key(), nullptr);
+ int r = acquire_lock_consolidated(&sto_lkr, m_sto_txnid,
+ rec.get_left_key(),
+ rec.get_right_key(),
+ rec.get_exclusive_flag(), nullptr);
invariant_zero(r);
sto_lkr.release();
iter.next();
@@ -285,8 +296,10 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
// locktree's rangetree, on behalf of the old single txnid.
struct migrate_fn_obj {
concurrent_tree::locked_keyrange *dst_lkr;
- bool fn(const keyrange &range, TXNID txnid) {
- dst_lkr->insert(range, txnid);
+ bool fn(const keyrange &range, TXNID txnid, bool is_shared,
+ TxnidVector *owners) {
+ assert(owners == nullptr);
+ dst_lkr->insert(range, txnid, is_shared);
return true;
}
} migrate_fn;
@@ -301,7 +314,8 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
bool locktree::sto_try_acquire(void *prepared_lkr,
TXNID txnid,
- const DBT *left_key, const DBT *right_key) {
+ const DBT *left_key, const DBT *right_key,
+ bool is_write_request) {
if (m_rangetree->is_empty() && m_sto_buffer.is_empty() && toku_unsafe_fetch(m_sto_score) >= STO_SCORE_THRESHOLD) {
// We can do the optimization because the rangetree is empty, and
// we know its worth trying because the sto score is big enough.
@@ -319,7 +333,7 @@ bool locktree::sto_try_acquire(void *prepared_lkr,
// this txnid can append its lock to the sto buffer successfully.
if (m_sto_txnid != TXNID_NONE) {
invariant(m_sto_txnid == txnid);
- sto_append(left_key, right_key);
+ sto_append(left_key, right_key, is_write_request);
return true;
} else {
invariant(m_sto_buffer.is_empty());
@@ -327,12 +341,66 @@ bool locktree::sto_try_acquire(void *prepared_lkr,
}
}
+
+/*
+ Do the same as iterate_and_get_overlapping_row_locks does, but also check for
+ this:
+ The set of overlapping rows locks consists of just one read-only shared
+ lock with the same endpoints as specified (in that case, we can just add
+ ourselves into that list)
+
+ @return true - One compatible shared lock
+ false - Otherwise
+*/
+static
+bool iterate_and_get_overlapping_row_locks2(const concurrent_tree::locked_keyrange *lkr,
+ const DBT *left_key, const DBT *right_key,
+ comparator *cmp,
+ TXNID txnid,
+ GrowableArray<row_lock> *row_locks) {
+ struct copy_fn_obj {
+ GrowableArray<row_lock> *row_locks;
+ bool first_call= true;
+ bool matching_lock_found = false;
+ const DBT *left_key, *right_key;
+ comparator *cmp;
+
+ bool fn(const keyrange &range, TXNID txnid, bool is_shared,
+ TxnidVector *owners) {
+
+ if (first_call) {
+ first_call = false;
+ if (is_shared &&
+ !(*cmp)(left_key, range.get_left_key()) &&
+ !(*cmp)(right_key, range.get_right_key())) {
+ matching_lock_found = true;
+ }
+ } else {
+ // if we see multiple matching locks, it doesn't matter whether
+ // the first one was matching.
+ matching_lock_found = false;
+ }
+ row_lock lock = { .range = range, .txnid = txnid,
+ .is_shared = is_shared, .owners = owners };
+ row_locks->push(lock);
+ return true;
+ }
+ } copy_fn;
+ copy_fn.row_locks = row_locks;
+ copy_fn.left_key = left_key;
+ copy_fn.right_key = right_key;
+ copy_fn.cmp = cmp;
+ lkr->iterate(©_fn);
+ return copy_fn.matching_lock_found;
+}
+
// try to acquire a lock and consolidate it with existing locks if possible
// param: lkr, a prepared locked keyrange
// return: 0 on success, DB_LOCK_NOTGRANTED if conflicting locks exist.
int locktree::acquire_lock_consolidated(void *prepared_lkr,
TXNID txnid,
const DBT *left_key, const DBT *right_key,
+ bool is_write_request,
txnid_set *conflicts) {
int r = 0;
concurrent_tree::locked_keyrange *lkr;
@@ -345,24 +413,60 @@ int locktree::acquire_lock_consolidated(void *prepared_lkr,
// copy out the set of overlapping row locks.
GrowableArray<row_lock> overlapping_row_locks;
overlapping_row_locks.init();
- iterate_and_get_overlapping_row_locks(lkr, &overlapping_row_locks);
+ bool matching_shared_lock_found= false;
+
+ if (is_write_request)
+ iterate_and_get_overlapping_row_locks(lkr, &overlapping_row_locks);
+ else {
+ matching_shared_lock_found=
+ iterate_and_get_overlapping_row_locks2(lkr, left_key, right_key, &m_cmp,
+ txnid, &overlapping_row_locks);
+ // psergey-todo: what to do now? So, we have figured we have just one
+ // shareable lock. Need to add us into it as an owner but the lock
+ // pointer cannot be kept?
+ // A: use find_node_with_overlapping_child(key_range, nullptr);
+ // then, add ourselves to the owner list.
+ // Dont' foreget to release the subtree after that.
+ }
+
+ if (matching_shared_lock_found) {
+ // there is just one non-confliting matching shared lock.
+ // we are hilding a lock on it (see acquire() call above).
+ // we need to modify it to indicate there is another locker...
+ lkr->add_shared_owner(requested_range, txnid);
+
+ // Pretend shared lock uses as much memory.
+ row_lock new_lock = { .range = requested_range, .txnid = txnid,
+ .is_shared = false, .owners = nullptr };
+ uint64_t mem_used = row_lock_size_in_tree(new_lock);
+ if (m_mgr) {
+ m_mgr->note_mem_used(mem_used);
+ }
+ return 0;
+ }
+
+
size_t num_overlapping_row_locks = overlapping_row_locks.get_size();
// if any overlapping row locks conflict with this request, bail out.
+
bool conflicts_exist = determine_conflicting_txnids(overlapping_row_locks,
txnid, conflicts);
if (!conflicts_exist) {
// there are no conflicts, so all of the overlaps are for the requesting txnid.
// so, we must consolidate all existing overlapping ranges and the requested
// range into one dominating range. then we insert the dominating range.
+ bool all_shared = !is_write_request;
for (size_t i = 0; i < num_overlapping_row_locks; i++) {
row_lock overlapping_lock = overlapping_row_locks.fetch_unchecked(i);
invariant(overlapping_lock.txnid == txnid);
requested_range.extend(m_cmp, overlapping_lock.range);
- remove_row_lock_from_tree(lkr, overlapping_lock, m_mgr);
+ remove_row_lock_from_tree(lkr, overlapping_lock, TXNID_ANY, m_mgr);
+ all_shared = all_shared && overlapping_lock.is_shared;
}
- row_lock new_lock = { .range = requested_range, .txnid = txnid };
+ row_lock new_lock = { .range = requested_range, .txnid = txnid,
+ .is_shared = all_shared, .owners = nullptr };
insert_row_lock_into_tree(lkr, new_lock, m_mgr);
} else {
r = DB_LOCK_NOTGRANTED;
@@ -383,7 +487,7 @@ int locktree::acquire_lock(bool is_write_request,
int r = 0;
// we are only supporting write locks for simplicity
- invariant(is_write_request);
+ //invariant(is_write_request);
// acquire and prepare a locked keyrange over the requested range.
// prepare is a serialzation point, so we take the opportunity to
@@ -391,9 +495,11 @@ int locktree::acquire_lock(bool is_write_request,
concurrent_tree::locked_keyrange lkr;
lkr.prepare(m_rangetree);
- bool acquired = sto_try_acquire(&lkr, txnid, left_key, right_key);
+ bool acquired = sto_try_acquire(&lkr, txnid, left_key, right_key,
+ is_write_request);
if (!acquired) {
- r = acquire_lock_consolidated(&lkr, txnid, left_key, right_key, conflicts);
+ r = acquire_lock_consolidated(&lkr, txnid, left_key, right_key,
+ is_write_request, conflicts);
}
lkr.release();
@@ -418,7 +524,7 @@ int locktree::try_acquire_lock(bool is_write_request,
// the locktree silently upgrades read locks to write locks for simplicity
int locktree::acquire_read_lock(TXNID txnid, const DBT *left_key, const DBT *right_key,
txnid_set *conflicts, bool big_txn) {
- return acquire_write_lock(txnid, left_key, right_key, conflicts, big_txn);
+ return try_acquire_lock(false, txnid, left_key, right_key, conflicts, big_txn);
}
int locktree::acquire_write_lock(TXNID txnid, const DBT *left_key, const DBT *right_key,
@@ -447,7 +553,9 @@ void locktree::dump_locks(void *cdata, dump_callback cb)
(*cb)(cdata,
lock.range.get_left_key(),
lock.range.get_right_key(),
- lock.txnid);
+ lock.txnid,
+ lock.is_shared,
+ lock.owners);
}
lkr.release();
all_locks.deinit();
@@ -525,8 +633,11 @@ void locktree::remove_overlapping_locks_for_txnid(TXNID txnid,
row_lock lock = overlapping_row_locks.fetch_unchecked(i);
// If this isn't our lock, that's ok, just don't remove it.
// See rationale above.
- if (lock.txnid == txnid) {
- remove_row_lock_from_tree(&lkr, lock, m_mgr);
+ // psergey-todo: for shared locks, just remove ourselves from the
+ // owners.
+ if (lock.txnid == txnid ||
+ (lock.owners && lock.owners->contains(txnid))) {
+ remove_row_lock_from_tree(&lkr, lock, txnid, m_mgr);
}
}
@@ -630,7 +741,8 @@ static int extract_first_n_row_locks(concurrent_tree::locked_keyrange *lkr,
int num_extracted;
int num_to_extract;
row_lock *row_locks;
- bool fn(const keyrange &range, TXNID txnid) {
+ bool fn(const keyrange &range, TXNID txnid, bool is_shared, TxnidVector *owners) {
+ // psergey-todo: multiple owners!
if (num_extracted < num_to_extract) {
row_lock lock;
lock.range.create_copy(range);
@@ -655,7 +767,7 @@ static int extract_first_n_row_locks(concurrent_tree::locked_keyrange *lkr,
int num_extracted = extract_fn.num_extracted;
invariant(num_extracted <= num_to_extract);
for (int i = 0; i < num_extracted; i++) {
- remove_row_lock_from_tree(lkr, row_locks[i], mgr);
+ remove_row_lock_from_tree(lkr, row_locks[i], TXNID_ANY, mgr);
}
return num_extracted;
@@ -781,7 +893,9 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
while (iter.current(&rec)) {
keyrange range;
range.create(rec.get_left_key(), rec.get_right_key());
- row_lock lock = { .range = range, .txnid = current_txnid };
+ row_lock lock = { .range = range, .txnid = current_txnid,
+ .is_shared= false, // psergey-todo: SharedLockEscalation
+ .owners= nullptr };
insert_row_lock_into_tree(&lkr, lock, m_mgr);
iter.next();
}
diff --git a/utilities/transactions/range_locking/locktree/locktree.h b/utilities/transactions/range_locking/locktree/locktree.h
index 5ff4f7449..e7c909be0 100644
--- a/utilities/transactions/range_locking/locktree/locktree.h
+++ b/utilities/transactions/range_locking/locktree/locktree.h
@@ -339,7 +339,10 @@ namespace toku {
// since the lock_request object is opaque
struct lt_lock_request_info *get_lock_request_info(void);
- typedef void (*dump_callback)(void *cdata, const DBT *left, const DBT *right, TXNID txnid);
+ typedef void (*dump_callback)(void *cdata,
+ const DBT *left, const DBT *right,
+ TXNID txnid, bool is_shared,
+ TxnidVector *owners);
void dump_locks(void *cdata, dump_callback cb);
private:
locktree_manager *m_mgr;
@@ -360,6 +363,12 @@ namespace toku {
void *m_userdata;
struct lt_lock_request_info m_lock_request_info;
+ // psergey-todo:
+ // Each transaction also keeps a list of ranges it has locked.
+ // So, when a transaction is running in STO mode, two identical
+ // lists are kept: the STO lock list and transaction's owned locks
+ // list. Why can't we do with just one list?
+
// The following fields and members prefixed with "sto_" are for
// the single txnid optimization, intended to speed up the case
// when only one transaction is using the locktree. If we know
@@ -453,7 +462,8 @@ namespace toku {
// effect: append a range to the sto buffer
// requires: m_sto_txnid is valid
- void sto_append(const DBT *left_key, const DBT *right_key);
+ void sto_append(const DBT *left_key, const DBT *right_key,
+ bool is_write_request);
// effect: ends the single txnid optimization, releaseing any memory
// stored in the sto buffer, notifying the tracker, and
@@ -494,7 +504,8 @@ namespace toku {
// back to zero.
// returns: true if the lock was acquired for this txnid
bool sto_try_acquire(void *prepared_lkr, TXNID txnid,
- const DBT *left_key, const DBT *right_key);
+ const DBT *left_key, const DBT *right_key,
+ bool is_write_request);
// Effect:
// Provides a hook for a helgrind suppression.
@@ -513,7 +524,7 @@ namespace toku {
int acquire_lock_consolidated(void *prepared_lkr, TXNID txnid,
const DBT *left_key, const DBT *right_key,
- txnid_set *conflicts);
+ bool is_write_request, txnid_set *conflicts);
int acquire_lock(bool is_write_request, TXNID txnid,
const DBT *left_key, const DBT *right_key,
diff --git a/utilities/transactions/range_locking/locktree/range_buffer.cc b/utilities/transactions/range_locking/locktree/range_buffer.cc
index d1f14fc4a..eab374945 100644
--- a/utilities/transactions/range_locking/locktree/range_buffer.cc
+++ b/utilities/transactions/range_locking/locktree/range_buffer.cc
@@ -66,7 +66,9 @@ namespace toku {
return right_neg_inf || right_pos_inf;
}
- void range_buffer::record_header::init(const DBT *left_key, const DBT *right_key) {
+ void range_buffer::record_header::init(const DBT *left_key, const DBT *right_key,
+ bool is_exclusive) {
+ is_exclusive_lock= is_exclusive;
left_neg_inf = left_key == toku_dbt_negative_infinity();
left_pos_inf = left_key == toku_dbt_positive_infinity();
left_key_size = toku_dbt_is_infinite(left_key) ? 0 : left_key->size;
@@ -186,15 +188,16 @@ namespace toku {
_num_ranges = 0;
}
- void range_buffer::append(const DBT *left_key, const DBT *right_key) {
+ void range_buffer::append(const DBT *left_key, const DBT *right_key,
+ bool is_write_request) {
// if the keys are equal, then only one copy is stored.
if (toku_dbt_equals(left_key, right_key)) {
invariant(left_key->size <= MAX_KEY_SIZE);
- append_point(left_key);
+ append_point(left_key, is_write_request);
} else {
invariant(left_key->size <= MAX_KEY_SIZE);
invariant(right_key->size <= MAX_KEY_SIZE);
- append_range(left_key, right_key);
+ append_range(left_key, right_key, is_write_request);
}
_num_ranges++;
}
@@ -215,12 +218,13 @@ namespace toku {
_arena.destroy();
}
- void range_buffer::append_range(const DBT *left_key, const DBT *right_key) {
+ void range_buffer::append_range(const DBT *left_key, const DBT *right_key,
+ bool is_exclusive) {
size_t record_length = sizeof(record_header) + left_key->size + right_key->size;
char *buf = reinterpret_cast<char *>(_arena.malloc_from_arena(record_length));
record_header h;
- h.init(left_key, right_key);
+ h.init(left_key, right_key, is_exclusive);
// serialize the header
memcpy(buf, &h, sizeof(record_header));
@@ -238,12 +242,12 @@ namespace toku {
}
}
- void range_buffer::append_point(const DBT *key) {
+ void range_buffer::append_point(const DBT *key, bool is_exclusive) {
size_t record_length = sizeof(record_header) + key->size;
char *buf = reinterpret_cast<char *>(_arena.malloc_from_arena(record_length));
record_header h;
- h.init(key, nullptr);
+ h.init(key, nullptr, is_exclusive);
// serialize the header
memcpy(buf, &h, sizeof(record_header));
diff --git a/utilities/transactions/range_locking/locktree/range_buffer.h b/utilities/transactions/range_locking/locktree/range_buffer.h
index 9bc02dc22..e8869fae5 100644
--- a/utilities/transactions/range_locking/locktree/range_buffer.h
+++ b/utilities/transactions/range_locking/locktree/range_buffer.h
@@ -77,12 +77,14 @@ namespace toku {
bool right_neg_inf;
uint16_t left_key_size;
uint16_t right_key_size;
+ bool is_exclusive_lock;
bool left_is_infinite(void) const;
bool right_is_infinite(void) const;
- void init(const DBT *left_key, const DBT *right_key);
+ void init(const DBT *left_key, const DBT *right_key,
+ bool is_exclusive);
};
// PORT static_assert(sizeof(record_header) == 8, "record header format is off");
@@ -109,6 +111,10 @@ namespace toku {
// how big is this record? this tells us where the next record is
size_t size(void) const;
+ bool get_exclusive_flag() const {
+ return _header.is_exclusive_lock;
+ }
+
// populate a record header and point our DBT's
// buffers into ours if they are not infinite.
void deserialize(const char *buf);
@@ -145,7 +151,8 @@ namespace toku {
// append a left/right key range to the buffer.
// if the keys are equal, then only one copy is stored.
- void append(const DBT *left_key, const DBT *right_key);
+ void append(const DBT *left_key, const DBT *right_key,
+ bool is_write_request=false);
// is this range buffer empty?
bool is_empty(void) const;
@@ -162,11 +169,11 @@ namespace toku {
memarena _arena;
int _num_ranges;
- void append_range(const DBT *left_key, const DBT *right_key);
+ void append_range(const DBT *left_key, const DBT *right_key, bool is_write_request);
// append a point to the buffer. this is the space/time saving
// optimization for key ranges where left == right.
- void append_point(const DBT *key);
+ void append_point(const DBT *key, bool is_write_request);
};
} /* namespace toku */
diff --git a/utilities/transactions/range_locking/locktree/treenode.cc b/utilities/transactions/range_locking/locktree/treenode.cc
index 051ec7d1c..f44918a1b 100644
--- a/utilities/transactions/range_locking/locktree/treenode.cc
+++ b/utilities/transactions/range_locking/locktree/treenode.cc
@@ -64,6 +64,10 @@ void treenode::init(const comparator *cmp) {
m_is_root = false;
m_is_empty = true;
m_cmp = cmp;
+
+ m_is_shared= false;
+ m_owners= nullptr;
+
// use an adaptive mutex at each node since we expect the time the
// lock is held to be relatively short compared to a context switch.
// indeed, this improves performance at high thread counts considerably.
@@ -89,10 +93,11 @@ void treenode::destroy_root(void) {
m_cmp = nullptr;
}
-void treenode::set_range_and_txnid(const keyrange &range, TXNID txnid) {
+void treenode::set_range_and_txnid(const keyrange &range, TXNID txnid, bool is_shared) {
// allocates a new copy of the range for this node
m_range.create_copy(range);
m_txnid = txnid;
+ m_is_shared= is_shared;
m_is_empty = false;
}
@@ -108,10 +113,11 @@ bool treenode::range_overlaps(const keyrange &range) {
return m_range.overlaps(*m_cmp, range);
}
-treenode *treenode::alloc(const comparator *cmp, const keyrange &range, TXNID txnid) {
+treenode *treenode::alloc(const comparator *cmp, const keyrange &range,
+ TXNID txnid, bool is_shared) {
treenode *XCALLOC(node);
node->init(cmp);
- node->set_range_and_txnid(range, txnid);
+ node->set_range_and_txnid(range, txnid, is_shared);
return node;
}
@@ -122,12 +128,21 @@ void treenode::swap_in_place(treenode *node1, treenode *node2) {
node1->m_txnid = node2->m_txnid;
node2->m_range = tmp_range;
node2->m_txnid = tmp_txnid;
+
+ bool tmp_is_shared= node1->m_is_shared;
+ node1->m_is_shared= node2->m_is_shared;
+ node2->m_is_shared= tmp_is_shared;
}
void treenode::free(treenode *node) {
// destroy the range, freeing any copied keys
node->m_range.destroy();
+ if (node->m_owners) {
+ delete node->m_owners;
+ node->m_owners = nullptr; // need this?
+ }
+
// the root is simply marked as empty.
if (node->is_root()) {
// PORT toku_mutex_assert_locked(&node->m_mutex);
@@ -189,7 +204,7 @@ void treenode::traverse_overlaps(const keyrange &range, F *function) {
if (c == keyrange::comparison::EQUALS) {
// Doesn't matter if fn wants to keep going, there
// is nothing left, so return.
- function->fn(m_range, m_txnid);
+ function->fn(m_range, m_txnid, m_is_shared, m_owners);
return;
}
@@ -204,7 +219,7 @@ void treenode::traverse_overlaps(const keyrange &range, F *function) {
}
if (c == keyrange::comparison::OVERLAPS) {
- bool keep_going = function->fn(m_range, m_txnid);
+ bool keep_going = function->fn(m_range, m_txnid, m_is_shared, m_owners);
if (!keep_going) {
return;
}
@@ -221,29 +236,35 @@ void treenode::traverse_overlaps(const keyrange &range, F *function) {
}
}
-void treenode::insert(const keyrange &range, TXNID txnid) {
+void treenode::insert(const keyrange &range, TXNID txnid, bool is_shared) {
// choose a child to check. if that child is null, then insert the new node there.
// otherwise recur down that child's subtree
keyrange::comparison c = range.compare(*m_cmp, m_range);
if (c == keyrange::comparison::LESS_THAN) {
treenode *left_child = lock_and_rebalance_left();
if (left_child == nullptr) {
- left_child = treenode::alloc(m_cmp, range, txnid);
+ left_child = treenode::alloc(m_cmp, range, txnid, is_shared);
m_left_child.set(left_child);
} else {
- left_child->insert(range, txnid);
+ left_child->insert(range, txnid, is_shared);
left_child->mutex_unlock();
}
- } else {
- invariant(c == keyrange::comparison::GREATER_THAN);
+ } else if (c == keyrange::comparison::GREATER_THAN) {
+ //invariant(c == keyrange::comparison::GREATER_THAN);
treenode *right_child = lock_and_rebalance_right();
if (right_child == nullptr) {
- right_child = treenode::alloc(m_cmp, range, txnid);
+ right_child = treenode::alloc(m_cmp, range, txnid, is_shared);
m_right_child.set(right_child);
} else {
- right_child->insert(range, txnid);
+ right_child->insert(range, txnid, is_shared);
right_child->mutex_unlock();
}
+ } else if (c == keyrange::comparison::EQUALS) {
+ invariant(is_shared);
+ invariant(m_is_shared);
+ add_shared_owner(txnid);
+ } else {
+ invariant(0);
}
}
@@ -337,19 +358,38 @@ void treenode::recursive_remove(void) {
treenode::free(this);
}
-treenode *treenode::remove(const keyrange &range) {
+void treenode::remove_shared_owner(TXNID txnid) {
+ m_owners->erase(txnid);
+ /* if there is just one owner left, move it to m_txnid */
+ if (m_owners->size() == 1)
+ {
+ m_txnid = * m_owners->begin();
+ delete m_owners;
+ m_owners = nullptr;
+ }
+}
+
+treenode *treenode::remove(const keyrange &range, TXNID txnid) {
treenode *child;
// if the range is equal to this node's range, then just remove
// the root of this subtree. otherwise search down the tree
// in either the left or right children.
keyrange::comparison c = range.compare(*m_cmp, m_range);
switch (c) {
- case keyrange::comparison::EQUALS:
- return remove_root_of_subtree();
+ case keyrange::comparison::EQUALS: {
+ // if we are the only owners, remove. Otherwise, just remove
+ // us from the owners list.
+ if (txnid != TXNID_ANY && has_multiple_owners()) {
+ remove_shared_owner(txnid);
+ return this;
+ } else {
+ return remove_root_of_subtree();
+ }
+ }
case keyrange::comparison::LESS_THAN:
child = m_left_child.get_locked();
invariant_notnull(child);
- child = child->remove(range);
+ child = child->remove(range, txnid);
// unlock the child if there still is one.
// regardless, set the right child pointer
@@ -361,7 +401,7 @@ treenode *treenode::remove(const keyrange &range) {
case keyrange::comparison::GREATER_THAN:
child = m_right_child.get_locked();
invariant_notnull(child);
- child = child->remove(range);
+ child = child->remove(range, txnid);
// unlock the child if there still is one.
// regardless, set the right child pointer
diff --git a/utilities/transactions/range_locking/locktree/treenode.h b/utilities/transactions/range_locking/locktree/treenode.h
index a4b01f1cc..6b082acc4 100644
--- a/utilities/transactions/range_locking/locktree/treenode.h
+++ b/utilities/transactions/range_locking/locktree/treenode.h
@@ -92,7 +92,7 @@ public:
void destroy_root(void);
// effect: sets the txnid and copies the given range for this node
- void set_range_and_txnid(const keyrange &range, TXNID txnid);
+ void set_range_and_txnid(const keyrange &range, TXNID txnid, bool is_shared);
// returns: true iff this node is marked as empty
bool is_empty(void);
@@ -127,12 +127,12 @@ public:
// effect: inserts the given range and txnid into a subtree, recursively
// requires: range does not overlap with any node below the subtree
- void insert(const keyrange &range, TXNID txnid);
+ void insert(const keyrange &range, TXNID txnid, bool is_shared);
// effect: removes the given range from the subtree
// requires: range exists in the subtree
// returns: the root of the resulting subtree
- treenode *remove(const keyrange &range);
+ treenode *remove(const keyrange &range, TXNID txnid);
// effect: removes this node and all of its children, recursively
// requires: every node at and below this node is unlocked
@@ -166,13 +166,41 @@ private:
// destroyed, it frees the memory associated with whatever range
// it has at the time of destruction.
keyrange m_range;
+
+ void add_shared_owner(TXNID txnid)
+ {
+ assert(m_is_shared);
+ if (m_txnid != TXNID_SHARED)
+ {
+ m_owners= new TxnidVector;
+ m_owners->insert(m_txnid);
+ m_txnid= TXNID_SHARED;
+ }
+ m_owners->insert(txnid);
+ }
+ void remove_shared_owner(TXNID txnid);
+
+ bool has_multiple_owners() { return (m_txnid == TXNID_SHARED); }
+
+private:
+ // Owner transaction id.
+ // A value of TXNID_SHARED means this node has multiple owners
TXNID m_txnid;
+ // If true, this lock is a non-exclusive lock, and it can have either
+ // one or several owners.
+ bool m_is_shared;
+
+ // List of the owners, or nullptr if there's just one owner.
+ TxnidVector *m_owners;
+
// two child pointers
child_ptr m_left_child;
child_ptr m_right_child;
// comparator for ranges
+ // psergey-todo: Is there any sense to store the comparator in each tree
+ // node?
const comparator *m_cmp;
// marked for the root node. the root node is never free()'d
@@ -185,6 +213,10 @@ private:
// effect: initializes an empty node with the given comparator
void init(const comparator *cmp);
+ // requires: this is a shared node (m_is_shared==true)
+ // effect: another transaction is added as an owner.
+ void add_shared_owner(TXNID txnid);
+
// requires: *parent is initialized to something meaningful.
// requires: subtree is non-empty
// returns: the leftmost child of the given subtree
@@ -230,7 +262,8 @@ private:
treenode *maybe_rebalance(void);
// returns: allocated treenode populated with a copy of the range and txnid
- static treenode *alloc(const comparator *cmp, const keyrange &range, TXNID txnid);
+ static treenode *alloc(const comparator *cmp, const keyrange &range,
+ TXNID txnid, bool is_shared);
// requires: node is a locked root node, or an unlocked non-root node
static void free(treenode *node);
diff --git a/utilities/transactions/range_locking/portability/txn_subst.h b/utilities/transactions/range_locking/portability/txn_subst.h
index 3882eb1c5..58c3fced0 100644
--- a/utilities/transactions/range_locking/portability/txn_subst.h
+++ b/utilities/transactions/range_locking/portability/txn_subst.h
@@ -3,8 +3,24 @@
//
#pragma once
+#include <set>
#include "util/omt.h"
typedef uint64_t TXNID;
#define TXNID_NONE ((TXNID)0)
+// A set of transactions
+// (TODO: consider using class toku::txnid_set. The reason for using STL
+// container was that its API is easier)
+class TxnidVector : public std::set<TXNID> {
+public:
+ bool contains(TXNID txnid) { return find(txnid) != end(); }
+};
+
+// A value for lock structures with a meaning "the lock is owned by multiple
+// transactions (and one has to check the TxnidVector to get their ids)
+#define TXNID_SHARED (TXNID(-1))
+
+// Auxiliary value meaning "any transaction id will do". No real transaction
+// may have this is as id.
+#define TXNID_ANY (TXNID(-2))
diff --git a/utilities/transactions/transaction_lock_mgr.cc b/utilities/transactions/transaction_lock_mgr.cc
index b6c91393d..34144c2af 100644
--- a/utilities/transactions/transaction_lock_mgr.cc
+++ b/utilities/transactions/transaction_lock_mgr.cc
@@ -826,7 +826,7 @@ Status RangeLockMgr::TryRangeLock(PessimisticTransaction* txn,
uint32_t column_family_id,
const Endpoint &start_endp,
const Endpoint &end_endp,
- bool /*exclusive*/) {
+ bool exclusive) {
toku::lock_request request;
request.create(mutex_factory_);
DBT start_key_dbt, end_key_dbt;
@@ -848,7 +848,8 @@ Status RangeLockMgr::TryRangeLock(PessimisticTransaction* txn,
auto lt= get_locktree_by_cfid(column_family_id);
request.set(lt, (TXNID)txn, &start_key_dbt, &end_key_dbt,
- toku::lock_request::WRITE, false /* not a big txn */,
+ exclusive? toku::lock_request::WRITE: toku::lock_request::READ,
+ false /* not a big txn */,
(void*)wait_txn_id);
uint64_t killed_time_msec = 0; // TODO: what should this have?
@@ -1260,13 +1261,15 @@ struct LOCK_PRINT_CONTEXT {
};
static
-void push_into_lock_status_data(void* param, const DBT *left,
- const DBT *right, TXNID txnid_arg) {
+void push_into_lock_status_data(void* param,
+ const DBT *left, const DBT *right,
+ TXNID txnid_arg, bool is_shared,
+ TxnidVector *owners) {
struct LOCK_PRINT_CONTEXT *ctx= (LOCK_PRINT_CONTEXT*)param;
struct KeyLockInfo info;
info.key.append((const char*)left->data, (size_t)left->size);
- info.exclusive= true;
+ info.exclusive= !is_shared;
if (!(left->size == right->size &&
!memcmp(left->data, right->data, left->size)))
@@ -1276,8 +1279,15 @@ void push_into_lock_status_data(void* param, const DBT *left,
info.key2.append((const char*)right->data, right->size);
}
- TXNID txnid= ((PessimisticTransaction*)txnid_arg)->GetID();
- info.ids.push_back(txnid);
+ if (txnid_arg != TXNID_SHARED) {
+ TXNID txnid= ((PessimisticTransaction*)txnid_arg)->GetID();
+ info.ids.push_back(txnid);
+ } else {
+ for (auto it : *owners) {
+ TXNID real_id= ((PessimisticTransaction*)it)->GetID();
+ info.ids.push_back(real_id);
+ }
+ }
ctx->data->insert({ctx->cfh_id, info});
}
1
0
[Commits] 41a6e949504: MDEV-18015: Assertion `global_status_var.global_memory_used == 0' failed when using UDF , window functions and views
by Varun 13 May '19
by Varun 13 May '19
13 May '19
revision-id: 41a6e949504bb2ee4d81d24f8952b675e971625b (mariadb-10.2.23-130-g41a6e949504)
parent(s): 8ce702aa90c174566f4ac950e85cc7570bf9b647
author: Varun Gupta
committer: Varun Gupta
timestamp: 2019-05-13 15:15:06 +0530
message:
MDEV-18015: Assertion `global_status_var.global_memory_used == 0' failed when using UDF , window functions and views
Adding destructor for Group_bound_tracker to free Cached_item_str.
The Cached_item for window functions are allocated on THD:mem_root
but the Cached_item_str has value of type string which is allocated on
the heap, so we need to call free() for it
---
mysql-test/r/win.result | 33 +++++++++++++++++++++++++++++++++
mysql-test/t/win.test | 17 +++++++++++++++++
sql/item_windowfunc.h | 12 ++++--------
3 files changed, 54 insertions(+), 8 deletions(-)
diff --git a/mysql-test/r/win.result b/mysql-test/r/win.result
index 5da8a7f8a3c..2c50ba29d11 100644
--- a/mysql-test/r/win.result
+++ b/mysql-test/r/win.result
@@ -3607,5 +3607,38 @@ b row_number() over (partition by sum(a)+1)
2000 1
drop table t1;
#
+# MDEV-18015: Assertion `global_status_var.global_memory_used == 0' failed when using UDF,
+# window functions and views
+#
+create table t1 (id int, n1 int);
+insert into t1 values (1,1),(2,1),(3,2),(4,4);
+explain
+select max(n1) over (partition by 'abc') from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using temporary
+select max(n1) over (partition by 'abc') from t1;
+max(n1) over (partition by 'abc')
+4
+4
+4
+4
+explain
+select rank() over (partition by 'abc' order by 'xyz') from t1;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using temporary
+select rank() over (partition by 'abc' order by 'xyz') from t1;
+rank() over (partition by 'abc' order by 'xyz')
+1
+1
+1
+1
+select lag(id) over (order by null desc), sum(id) over (order by null) from t1;
+lag(id) over (order by null desc) sum(id) over (order by null)
+NULL 10
+1 10
+2 10
+3 10
+drop table t1;
+#
# End of 10.2 tests
#
diff --git a/mysql-test/t/win.test b/mysql-test/t/win.test
index 15c48c31250..7c2514b7114 100644
--- a/mysql-test/t/win.test
+++ b/mysql-test/t/win.test
@@ -2325,6 +2325,23 @@ select b, row_number() over (partition by sum(a)+1) from t1 group by b;
drop table t1;
+--echo #
+--echo # MDEV-18015: Assertion `global_status_var.global_memory_used == 0' failed when using UDF,
+--echo # window functions and views
+--echo #
+
+create table t1 (id int, n1 int);
+insert into t1 values (1,1),(2,1),(3,2),(4,4);
+explain
+select max(n1) over (partition by 'abc') from t1;
+select max(n1) over (partition by 'abc') from t1;
+
+explain
+select rank() over (partition by 'abc' order by 'xyz') from t1;
+select rank() over (partition by 'abc' order by 'xyz') from t1;
+select lag(id) over (order by null desc), sum(id) over (order by null) from t1;
+drop table t1;
+
--echo #
--echo # End of 10.2 tests
--echo #
diff --git a/sql/item_windowfunc.h b/sql/item_windowfunc.h
index 21270733051..b9df1b7482b 100644
--- a/sql/item_windowfunc.h
+++ b/sql/item_windowfunc.h
@@ -28,11 +28,6 @@ class Group_bound_tracker
first_check= true;
}
- void cleanup()
- {
- group_fields.empty();
- }
-
/*
Check if the current row is in a different group than the previous row
this function was called for.
@@ -70,6 +65,10 @@ class Group_bound_tracker
}
return 0;
}
+ ~Group_bound_tracker()
+ {
+ group_fields.delete_elements();
+ }
private:
List<Cached_item> group_fields;
@@ -199,7 +198,6 @@ class Item_sum_rank: public Item_sum_int
{
if (peer_tracker)
{
- peer_tracker->cleanup();
delete peer_tracker;
peer_tracker= NULL;
}
@@ -269,7 +267,6 @@ class Item_sum_dense_rank: public Item_sum_int
{
if (peer_tracker)
{
- peer_tracker->cleanup();
delete peer_tracker;
peer_tracker= NULL;
}
@@ -537,7 +534,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_row_count
{
if (peer_tracker)
{
- peer_tracker->cleanup();
delete peer_tracker;
peer_tracker= NULL;
}
1
0
[Commits] e1ffe81bb08: MDEV-19076: rpl_parallel_temptable result mismatch '-33 optimistic'
by sujatha 13 May '19
by sujatha 13 May '19
13 May '19
revision-id: e1ffe81bb08b67ad246fd4d3b2a7970e051cec69 (mariadb-10.2.24-8-ge1ffe81bb08)
parent(s): 8ce702aa90c174566f4ac950e85cc7570bf9b647
author: Sujatha
committer: Sujatha
timestamp: 2019-05-13 13:16:36 +0530
message:
MDEV-19076: rpl_parallel_temptable result mismatch '-33 optimistic'
Problem:
========
The test now fails with the following trace:
CURRENT_TEST: rpl.rpl_parallel_temptable
--- /mariadb/10.4/mysql-test/suite/rpl/r/rpl_parallel_temptable.result
+++ /mariadb/10.4/mysql-test/suite/rpl/r/rpl_parallel_temptable.reject
@@ -194,7 +194,6 @@
30 conservative
31 conservative
32 optimistic
-33 optimistic
Analysis:
=========
The part of test which fails with result content mismatch is given below.
CREATE TEMPORARY TABLE t4 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t4 VALUES (32);
INSERT INTO t4 VALUES (33);
INSERT INTO t1 SELECT a, "optimistic" FROM t4;
slave_parallel_mode=optimistic
The expectation of the above test script is, INSERT FROM SELECT should read both
32, 33 and populate table 't1'. But this expectation fails occasionally.
All three INSERT statements are handed over to three different slave parallel
workers. Temporary tables are not safe for parallel replication. They were
designed to be visible to one thread only, so have no table locking. Thus there
is no protection against two conflicting transactions committing in parallel and
things like that.
So anything that uses temporary tables will be serialized with anything before
it, when using parallel replication by using a "wait_for_prior_commit" function
call. This will ensure that the each transaction is executed sequentially.
But there exists a code path in which the above wait doesn't happen. Because of
this at times INSERT from SELECT doesn't wait for the INSERT (33) to complete
and it completes its executes and enters commit stage. Hence only row 32 is
found in those cases resulting in test failure.
The wait needs to be added within "open_temporary_table" call. The code looks
like this within "open_temporary_table".
Each thread tries to open temporary table in 3 different ways:
case 1: Find a temporary table which is already in use by using
find_temporary_table(tl) && wait_for_prior_commit()
case 2: If above failed then try to look for temporary table which is marked for
free for reuse. This internally calls "wait_for_prior_commit()" if table
is found.
find_and_use_tmp_table(tl, &table)
case 3: If none of the above open a new table handle from table share.
if (!table && (share= find_tmp_table_share(tl)))
{ table= open_temporary_table(share, tl->get_table_name(), true); }
At present the "wait_for_prior_commit" happens only in case 1 & 2.
Fix:
====
On slave add a call for "wait_for_prior_commit" for case 3.
The above wait on slave will solve the issue. A more detailed fix would be to
mark temporary tables as not safe for parallel execution on the master side.
In order to do that, on the master side, mark the Gtid_log_event specific flag
FL_TRANSACTIONAL to be false all the time. So that they are not scheduled
parallely.
---
sql/log_event.cc | 3 ++-
sql/sql_lex.h | 32 ++++++++++++++++++++++++++++++++
sql/temporary_tables.cc | 13 +++++++++++++
3 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 8aea32004c3..5b916495c20 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -7546,6 +7546,7 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg,
flags2((standalone ? FL_STANDALONE : 0) | (commit_id_arg ? FL_GROUP_COMMIT_ID : 0))
{
cache_type= Log_event::EVENT_NO_CACHE;
+ bool is_tmp_table= thd_arg->lex->stmt_accessed_temp_table();
if (thd_arg->transaction.stmt.trans_did_wait() ||
thd_arg->transaction.all.trans_did_wait())
flags2|= FL_WAITED;
@@ -7554,7 +7555,7 @@ Gtid_log_event::Gtid_log_event(THD *thd_arg, uint64 seq_no_arg,
thd_arg->transaction.all.trans_did_ddl() ||
thd_arg->transaction.all.has_created_dropped_temp_table())
flags2|= FL_DDL;
- else if (is_transactional)
+ else if (is_transactional && !is_tmp_table)
flags2|= FL_TRANSACTIONAL;
if (!(thd_arg->variables.option_bits & OPTION_RPL_SKIP_PARALLEL))
flags2|= FL_ALLOW_PARALLEL;
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index a880b6e4283..1889a25b0fb 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -1761,6 +1761,38 @@ class Query_tables_list
DBUG_RETURN((stmt_accessed_table_flag & (1U << accessed_table)) != 0);
}
+ /**
+ Checks either a trans/non trans temporary table is being accessed while
+ executing a statement.
+
+ @return
+ @retval TRUE if a temporary table is being accessed
+ @retval FALSE otherwise
+ */
+ inline bool stmt_accessed_temp_table()
+ {
+ DBUG_ENTER("THD::stmt_accessed_temp_table");
+ DBUG_RETURN(stmt_accessed_non_trans_temp_table() ||
+ stmt_accessed_trans_temp_table());
+ }
+
+ /**
+ Checks if a temporary transactional table is being accessed while executing
+ a statement.
+
+ @return
+ @retval TRUE if a temporary transactional table is being accessed
+ @retval FALSE otherwise
+ */
+ inline bool stmt_accessed_trans_temp_table()
+ {
+ DBUG_ENTER("THD::stmt_accessed_trans_temp_table");
+
+ DBUG_RETURN((stmt_accessed_table_flag &
+ ((1U << STMT_READS_TEMP_TRANS_TABLE) |
+ (1U << STMT_WRITES_TEMP_TRANS_TABLE))) != 0);
+ }
+
/**
Checks if a temporary non-transactional table is about to be accessed
while executing a statement.
diff --git a/sql/temporary_tables.cc b/sql/temporary_tables.cc
index ed23dae06d2..da938db00f9 100644
--- a/sql/temporary_tables.cc
+++ b/sql/temporary_tables.cc
@@ -369,6 +369,19 @@ bool THD::open_temporary_table(TABLE_LIST *tl)
if (!table && (share= find_tmp_table_share(tl)))
{
table= open_temporary_table(share, tl->get_table_name(), true);
+ /*
+ Temporary tables are not safe for parallel replication. They were
+ designed to be visible to one thread only, so have no table locking.
+ Thus there is no protection against two conflicting transactions
+ committing in parallel and things like that.
+
+ So for now, anything that uses temporary tables will be serialised
+ with anything before it, when using parallel replication.
+ */
+ if (table && rgi_slave &&
+ rgi_slave->is_parallel_exec &&
+ wait_for_prior_commit())
+ DBUG_RETURN(true);
}
if (!table)
1
0