- commits - lists.mariadb.org

[Commits] 0bafdc478c4: MDEV-17230: encryption_key_id from alter is ignored by encryption threads
by jan 10 Oct '18

10 Oct '18

revision-id: 0bafdc478c4ab8c577331b950dc7719abd6781c1 (mariadb-10.1.35-84-g0bafdc478c4) parent(s): 3c3c4ae22545d3242a8b7c4f2bec3bf2d245890a author: Jan Lindström committer: Jan Lindström timestamp: 2018-10-10 18:25:53 +0300 message: MDEV-17230: encryption_key_id from alter is ignored by encryption threads Background: Used encryption key_id is stored to encryption metadata i.e. crypt_data that is stored on page 0 of the tablespace of the table. crypt_data is created only if implicit encryption/not encryption is requested i.e. ENCRYPTED=[YES|NO] table option is used fil_create_new_single_table_tablespace on fil0fil.cc. Later if encryption is enabled all tables that use default encryption mode (i.e. no encryption table option is set) are encrypted with default encryption key_id that is 1. See fil_crypt_start_encrypting_space on fil0crypt.cc. ha_innobase::check_table_options() If default encryption is used and encryption is disabled, you may not use nondefault encryption_key_id as it is not stored anywhere. --- .../encryption/r/innodb-encryption-alter.result | 37 ++++++++++++++++++++++ .../encryption/t/innodb-encryption-alter.test | 24 ++++++++++++++ storage/innobase/handler/ha_innodb.cc | 23 ++++++-------- storage/xtradb/handler/ha_innodb.cc | 23 ++++++-------- 4 files changed, 81 insertions(+), 26 deletions(-) diff --git a/mysql-test/suite/encryption/r/innodb-encryption-alter.result b/mysql-test/suite/encryption/r/innodb-encryption-alter.result index 9ff0f492034..75417074fb0 100644 --- a/mysql-test/suite/encryption/r/innodb-encryption-alter.result +++ b/mysql-test/suite/encryption/r/innodb-encryption-alter.result @@ -50,3 +50,40 @@ Warning 140 InnoDB: ENCRYPTION_KEY_ID 99 not available Error 1478 Table storage engine 'InnoDB' does not support the create option 'ENCRYPTION_KEY_ID' set innodb_default_encryption_key_id = 1; drop table t1,t2; +SET GLOBAL innodb_encrypt_tables=OFF; +CREATE TABLE t1 (a int not null primary key) engine=innodb; +ALTER TABLE t1 ENCRYPTION_KEY_ID=4; +ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'ENCRYPTION_KEY_ID' +SHOW WARNINGS; +Level Code Message +Warning 140 InnoDB: Incorrect ENCRYPTION_KEY_ID 4 when encryption is disabled +Error 1478 Table storage engine 'InnoDB' does not support the create option 'ENCRYPTION_KEY_ID' +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + PRIMARY KEY (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +DROP TABLE t1; +CREATE TABLE t2 (a int not null primary key) engine=innodb; +ALTER TABLE t2 ENCRYPTION_KEY_ID=4, ALGORITHM=COPY; +ERROR HY000: Can't create table `test`.`#sql-temporary` (errno: 140 "Wrong create options") +SHOW WARNINGS; +Level Code Message +Warning 140 InnoDB: Incorrect ENCRYPTION_KEY_ID 4 when encryption is disabled +Error 1005 Can't create table `test`.`#sql-temporary` (errno: 140 "Wrong create options") +Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) NOT NULL, + PRIMARY KEY (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +DROP TABLE t2; +CREATE TABLE t3 (a int not null primary key) engine=innodb ENCRYPTION_KEY_ID=4; +ERROR HY000: Can't create table `test`.`t3` (errno: 140 "Wrong create options") +SHOW WARNINGS; +Level Code Message +Warning 140 InnoDB: Incorrect ENCRYPTION_KEY_ID 4 when encryption is disabled +Error 1005 Can't create table `test`.`t3` (errno: 140 "Wrong create options") +Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB diff --git a/mysql-test/suite/encryption/t/innodb-encryption-alter.test b/mysql-test/suite/encryption/t/innodb-encryption-alter.test index 9420fb74a4c..9465226dd96 100644 --- a/mysql-test/suite/encryption/t/innodb-encryption-alter.test +++ b/mysql-test/suite/encryption/t/innodb-encryption-alter.test @@ -87,6 +87,30 @@ connection default; drop table t1,t2; +# +# MDEV-17230: encryption_key_id from alter is ignored by encryption threads +# +SET GLOBAL innodb_encrypt_tables=OFF; +CREATE TABLE t1 (a int not null primary key) engine=innodb; +--error ER_ILLEGAL_HA_CREATE_OPTION +ALTER TABLE t1 ENCRYPTION_KEY_ID=4; +SHOW WARNINGS; +SHOW CREATE TABLE t1; +DROP TABLE t1; + +CREATE TABLE t2 (a int not null primary key) engine=innodb; +--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/ +--error ER_CANT_CREATE_TABLE +ALTER TABLE t2 ENCRYPTION_KEY_ID=4, ALGORITHM=COPY; +--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/ +SHOW WARNINGS; +SHOW CREATE TABLE t2; +DROP TABLE t2; + +--error ER_CANT_CREATE_TABLE +CREATE TABLE t3 (a int not null primary key) engine=innodb ENCRYPTION_KEY_ID=4; +SHOW WARNINGS; + # reset system --disable_query_log EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 084272124b7..50c081b960e 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -11958,21 +11958,18 @@ ha_innobase::check_table_options( options->encryption_key_id = FIL_DEFAULT_ENCRYPTION_KEY; } - /* If default encryption is used make sure that used kay is found - from key file. */ + /* If default encryption is used and encryption is disabled, you may + not use nondefault encryption_key_id as it is not stored anywhere. */ if (encrypt == FIL_ENCRYPTION_DEFAULT && - !srv_encrypt_tables && - options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) { - if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: ENCRYPTION_KEY_ID %u not available", - (uint)options->encryption_key_id + !srv_encrypt_tables && + options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: Incorrect ENCRYPTION_KEY_ID %u when encryption is disabled", + (uint)options->encryption_key_id ); - return "ENCRYPTION_KEY_ID"; - - } + return "ENCRYPTION_KEY_ID"; } /* Check atomic writes requirements */ diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index fa63ed58292..896a230ab6b 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -12524,21 +12524,18 @@ ha_innobase::check_table_options( options->encryption_key_id = FIL_DEFAULT_ENCRYPTION_KEY; } - /* If default encryption is used make sure that used kay is found - from key file. */ + /* If default encryption is used and encryption is disabled, you may + not use nondefault encryption_key_id as it is not stored anywhere. */ if (encrypt == FIL_ENCRYPTION_DEFAULT && - !srv_encrypt_tables && - options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) { - if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: ENCRYPTION_KEY_ID %u not available", - (uint)options->encryption_key_id + !srv_encrypt_tables && + options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: Incorrect ENCRYPTION_KEY_ID %u when encryption is disabled", + (uint)options->encryption_key_id ); - return "ENCRYPTION_KEY_ID"; - - } + return "ENCRYPTION_KEY_ID"; } /* Check atomic writes requirements */

1 0

[Commits] a86bb52cca0: MDEV-17229: Encryption threads ignore innodb_default_encryption_key_id
by jan 10 Oct '18

10 Oct '18

revision-id: a86bb52cca013eebdb1d7813e537eda0e14c7395 (mariadb-10.1.35-84-ga86bb52cca0) parent(s): 3c3c4ae22545d3242a8b7c4f2bec3bf2d245890a author: Jan Lindström committer: Jan Lindström timestamp: 2018-10-10 18:12:27 +0300 message: MDEV-17229: Encryption threads ignore innodb_default_encryption_key_id Background: Used encryption key_id is stored to encryption metadata i.e. crypt_data that is stored on page 0 of the tablespace of the table. crypt_data is created only if implicit encryption/not encryption is requested i.e. ENCRYPTED=[YES|NO] table option is used fil_create_new_single_table_tablespace on fil0fil.cc. innodb_default_encryption_key_id setting should effect only tables created with ENCRYPTED=YES table option. Similarly ENCRYPTION_KEY_ID table option should be allowed only for tables using ENCRYPTED=YES table option. Later if encryption is enabled all tables that use default encryption mode (i.e. no encryption table option is set) are encrypted with default encryption key_id that is 1. See fil_crypt_start_encrypting_space on fil0crypt.cc. --- .../encryption/r/innodb-encryption-alter.result | 37 ++++++++++++++++++++++ .../encryption/t/innodb-encryption-alter.test | 24 ++++++++++++++ storage/innobase/handler/ha_innodb.cc | 23 ++++++-------- storage/xtradb/handler/ha_innodb.cc | 23 ++++++-------- 4 files changed, 81 insertions(+), 26 deletions(-) diff --git a/mysql-test/suite/encryption/r/innodb-encryption-alter.result b/mysql-test/suite/encryption/r/innodb-encryption-alter.result index 9ff0f492034..75417074fb0 100644 --- a/mysql-test/suite/encryption/r/innodb-encryption-alter.result +++ b/mysql-test/suite/encryption/r/innodb-encryption-alter.result @@ -50,3 +50,40 @@ Warning 140 InnoDB: ENCRYPTION_KEY_ID 99 not available Error 1478 Table storage engine 'InnoDB' does not support the create option 'ENCRYPTION_KEY_ID' set innodb_default_encryption_key_id = 1; drop table t1,t2; +SET GLOBAL innodb_encrypt_tables=OFF; +CREATE TABLE t1 (a int not null primary key) engine=innodb; +ALTER TABLE t1 ENCRYPTION_KEY_ID=4; +ERROR HY000: Table storage engine 'InnoDB' does not support the create option 'ENCRYPTION_KEY_ID' +SHOW WARNINGS; +Level Code Message +Warning 140 InnoDB: Incorrect ENCRYPTION_KEY_ID 4 when encryption is disabled +Error 1478 Table storage engine 'InnoDB' does not support the create option 'ENCRYPTION_KEY_ID' +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) NOT NULL, + PRIMARY KEY (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +DROP TABLE t1; +CREATE TABLE t2 (a int not null primary key) engine=innodb; +ALTER TABLE t2 ENCRYPTION_KEY_ID=4, ALGORITHM=COPY; +ERROR HY000: Can't create table `test`.`#sql-temporary` (errno: 140 "Wrong create options") +SHOW WARNINGS; +Level Code Message +Warning 140 InnoDB: Incorrect ENCRYPTION_KEY_ID 4 when encryption is disabled +Error 1005 Can't create table `test`.`#sql-temporary` (errno: 140 "Wrong create options") +Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `a` int(11) NOT NULL, + PRIMARY KEY (`a`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1 +DROP TABLE t2; +CREATE TABLE t3 (a int not null primary key) engine=innodb ENCRYPTION_KEY_ID=4; +ERROR HY000: Can't create table `test`.`t3` (errno: 140 "Wrong create options") +SHOW WARNINGS; +Level Code Message +Warning 140 InnoDB: Incorrect ENCRYPTION_KEY_ID 4 when encryption is disabled +Error 1005 Can't create table `test`.`t3` (errno: 140 "Wrong create options") +Warning 1030 Got error 140 "Wrong create options" from storage engine InnoDB diff --git a/mysql-test/suite/encryption/t/innodb-encryption-alter.test b/mysql-test/suite/encryption/t/innodb-encryption-alter.test index 9420fb74a4c..9465226dd96 100644 --- a/mysql-test/suite/encryption/t/innodb-encryption-alter.test +++ b/mysql-test/suite/encryption/t/innodb-encryption-alter.test @@ -87,6 +87,30 @@ connection default; drop table t1,t2; +# +# MDEV-17230: encryption_key_id from alter is ignored by encryption threads +# +SET GLOBAL innodb_encrypt_tables=OFF; +CREATE TABLE t1 (a int not null primary key) engine=innodb; +--error ER_ILLEGAL_HA_CREATE_OPTION +ALTER TABLE t1 ENCRYPTION_KEY_ID=4; +SHOW WARNINGS; +SHOW CREATE TABLE t1; +DROP TABLE t1; + +CREATE TABLE t2 (a int not null primary key) engine=innodb; +--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/ +--error ER_CANT_CREATE_TABLE +ALTER TABLE t2 ENCRYPTION_KEY_ID=4, ALGORITHM=COPY; +--replace_regex /#sql-[0-9a-f_]*`/#sql-temporary`/ +SHOW WARNINGS; +SHOW CREATE TABLE t2; +DROP TABLE t2; + +--error ER_CANT_CREATE_TABLE +CREATE TABLE t3 (a int not null primary key) engine=innodb ENCRYPTION_KEY_ID=4; +SHOW WARNINGS; + # reset system --disable_query_log EVAL SET GLOBAL innodb_file_per_table = $innodb_file_per_table_orig; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 084272124b7..50c081b960e 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -11958,21 +11958,18 @@ ha_innobase::check_table_options( options->encryption_key_id = FIL_DEFAULT_ENCRYPTION_KEY; } - /* If default encryption is used make sure that used kay is found - from key file. */ + /* If default encryption is used and encryption is disabled, you may + not use nondefault encryption_key_id as it is not stored anywhere. */ if (encrypt == FIL_ENCRYPTION_DEFAULT && - !srv_encrypt_tables && - options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) { - if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: ENCRYPTION_KEY_ID %u not available", - (uint)options->encryption_key_id + !srv_encrypt_tables && + options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: Incorrect ENCRYPTION_KEY_ID %u when encryption is disabled", + (uint)options->encryption_key_id ); - return "ENCRYPTION_KEY_ID"; - - } + return "ENCRYPTION_KEY_ID"; } /* Check atomic writes requirements */ diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index fa63ed58292..896a230ab6b 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -12524,21 +12524,18 @@ ha_innobase::check_table_options( options->encryption_key_id = FIL_DEFAULT_ENCRYPTION_KEY; } - /* If default encryption is used make sure that used kay is found - from key file. */ + /* If default encryption is used and encryption is disabled, you may + not use nondefault encryption_key_id as it is not stored anywhere. */ if (encrypt == FIL_ENCRYPTION_DEFAULT && - !srv_encrypt_tables && - options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) { - if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: ENCRYPTION_KEY_ID %u not available", - (uint)options->encryption_key_id + !srv_encrypt_tables && + options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) { + push_warning_printf( + thd, Sql_condition::WARN_LEVEL_WARN, + HA_WRONG_CREATE_OPTION, + "InnoDB: Incorrect ENCRYPTION_KEY_ID %u when encryption is disabled", + (uint)options->encryption_key_id ); - return "ENCRYPTION_KEY_ID"; - - } + return "ENCRYPTION_KEY_ID"; } /* Check atomic writes requirements */

1 0

[Commits] 5aeaba1: MDEV-17382 Hash join algorithm should not be used to join materialized
by IgorBabaev 10 Oct '18

10 Oct '18

revision-id: 5aeaba11c7dcbda40fd9cd68ab060a446ab6eb77 (mariadb-10.2.2-867-g5aeaba1) parent(s): ac7d142d11010c28fdf15aca8a40008566abc4ad author: Igor Babaev committer: Igor Babaev timestamp: 2018-10-10 04:40:30 -0700 message: MDEV-17382 Hash join algorithm should not be used to join materialized derived table / view by equality Now rows of a materialized derived table are always put into a temporary table before join operation. If BNLH is used to join this table with the result of a partial join then both operands of the join are actually put into main memory. In most cases this is not efficient. We could avoid this by sending the rows of the derived table directly to the join operation. However this kind of data flow is not supported yet. Fixed by not allowing usage of hash join algorithm to join a materialized derived table if it's joined by an equality predicate of the form f=e where f is a field of the derived table. Adjusted the results of the test case for 10.2-compatibility. --- mysql-test/r/derived_opt.result | 35 ++++++++++++++++++++++++++++++++++- mysql-test/r/innodb_mrr_cpk.result | 2 +- mysql-test/t/derived_opt.test | 38 ++++++++++++++++++++++++++++++++++++++ sql/sql_select.cc | 8 ++++++++ 4 files changed, 81 insertions(+), 2 deletions(-) diff --git a/mysql-test/r/derived_opt.result b/mysql-test/r/derived_opt.result index 6e4ea1b..e43243c 100644 --- a/mysql-test/r/derived_opt.result +++ b/mysql-test/r/derived_opt.result @@ -499,9 +499,42 @@ where D1.a= t1.a; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 10 Using where -1 PRIMARY <derived2> hash_ALL key0 #hash#key0 5 test.t1.a 100 Using join buffer (flat, BNLH join) +1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 10 2 DERIVED t2 ALL NULL NULL NULL NULL 100 Using filesort set join_cache_level=@tmp_jcl; set optimizer_switch=@tmp_os; drop table t1, t2; +# +# Bug mdev-17382: equi-join of derived table with join_cache_level=4 +# +CREATE TABLE t1 ( +id int NOT NULL, +amount decimal DEFAULT NULL, +PRIMARY KEY (id) +); +CREATE TABLE t2 ( +id int NOT NULL, +name varchar(50) DEFAULT NULL, +PRIMARY KEY (id) +); +INSERT INTO t1 VALUES +(1, 10.0000), (2, 20.0000), (3, 30.0000), (4, 40.0000), +(5, NULL), (6, NULL), (7, 70.0000), (8, 80.0000); +INSERT INTO t2 VALUES +(1,'A'), (2,'B'), (3,'C'), (4,'D'), (5, NULL), (6, NULL), +(7,'E'), (8,'F'), (9,'G'), (10,'H'), (11, NULL), (12, NULL); +set join_cache_level=4; +EXPLAIN +SELECT t2.id,t2.name,t.total_amt +FROM t2 +LEFT JOIN +(SELECT id, sum(amount) total_amt FROM t1 GROUP BY id) AS t +ON t2.id=t.id +WHERE t2.id < 3; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 range PRIMARY PRIMARY 4 NULL 3 Using index condition +1 PRIMARY <derived2> ref key0 key0 5 test.t2.id 2 +2 LATERAL DERIVED t1 eq_ref PRIMARY PRIMARY 4 test.t2.id 1 +set join_cache_level=default; +DROP TABLE t1,t2; set optimizer_switch=@exit_optimizer_switch; diff --git a/mysql-test/r/innodb_mrr_cpk.result b/mysql-test/r/innodb_mrr_cpk.result index 28d7dd5..a2e43d7 100644 --- a/mysql-test/r/innodb_mrr_cpk.result +++ b/mysql-test/r/innodb_mrr_cpk.result @@ -226,7 +226,7 @@ set join_cache_level=3; explain SELECT 1 FROM (SELECT url, id FROM t2 LIMIT 1 OFFSET 20) derived RIGHT JOIN t1 ON t1.id = derived.id; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL # -1 PRIMARY <derived2> hash_ALL key0 #hash#key0 25 test.t1.id # Using join buffer (flat, BNLH join) +1 PRIMARY <derived2> ref key0 key0 25 test.t1.id # 2 DERIVED t2 ALL NULL NULL NULL NULL # set join_cache_level= @tmp_mdev5037; drop table t0,t1,t2; diff --git a/mysql-test/t/derived_opt.test b/mysql-test/t/derived_opt.test index 7f19553..aab95f6 100644 --- a/mysql-test/t/derived_opt.test +++ b/mysql-test/t/derived_opt.test @@ -363,5 +363,43 @@ set join_cache_level=@tmp_jcl; set optimizer_switch=@tmp_os; drop table t1, t2; +--echo # +--echo # Bug mdev-17382: equi-join of derived table with join_cache_level=4 +--echo # + +CREATE TABLE t1 ( + id int NOT NULL, + amount decimal DEFAULT NULL, +PRIMARY KEY (id) +); + +CREATE TABLE t2 ( + id int NOT NULL, + name varchar(50) DEFAULT NULL, +PRIMARY KEY (id) +); + +INSERT INTO t1 VALUES +(1, 10.0000), (2, 20.0000), (3, 30.0000), (4, 40.0000), +(5, NULL), (6, NULL), (7, 70.0000), (8, 80.0000); + +INSERT INTO t2 VALUES +(1,'A'), (2,'B'), (3,'C'), (4,'D'), (5, NULL), (6, NULL), +(7,'E'), (8,'F'), (9,'G'), (10,'H'), (11, NULL), (12, NULL); + +set join_cache_level=4; + +EXPLAIN +SELECT t2.id,t2.name,t.total_amt + FROM t2 + LEFT JOIN + (SELECT id, sum(amount) total_amt FROM t1 GROUP BY id) AS t + ON t2.id=t.id + WHERE t2.id < 3; + +set join_cache_level=default; + +DROP TABLE t1,t2; + # The following command must be the last one the file set optimizer_switch=@exit_optimizer_switch; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 6fd161f..2e123df 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -11415,7 +11415,15 @@ uint check_join_cache_usage(JOIN_TAB *tab, effort now. */ if (tab->table->pos_in_table_list->is_materialized_derived()) + { no_bka_cache= true; + /* + Don't use hash join algorithm if the temporary table for the rows + of the derived table will be created with an equi-join key. + */ + if (tab->table->s->keys) + no_hashed_cache= true; + } /* Don't use join buffering if we're dictated not to by no_jbuf_after

1 0

[Commits] 56e6c0f: MDEV-17382 Hash join algorithm should not be used to join materialized
by IgorBabaev 10 Oct '18

10 Oct '18

revision-id: 56e6c0f8301733c166dc678d48d9086630e2865f (mariadb-10.2.2-867-g56e6c0f) parent(s): ac7d142d11010c28fdf15aca8a40008566abc4ad author: Igor Babaev committer: Igor Babaev timestamp: 2018-10-10 04:24:39 -0700 message: MDEV-17382 Hash join algorithm should not be used to join materialized derived table / view by equality Now rows of a materialized derived table are always put into a temporary table before join operation. If BNLH is used to join this table with the result of a partial join then both operands of the join are actually put into main memory. In most cases this is not efficient. We could avoid this by sending the rows of the derived table directly to the join operation. However this kind of data flow is not supported yet. Fixed by not allowing usage of hash join algorithm to join a materialized derived table if it's joined by an equality predicate of the form f=e where f is a field of the derived table. --- mysql-test/r/derived_opt.result | 35 ++++++++++++++++++++++++++++++++++- mysql-test/r/innodb_mrr_cpk.result | 2 +- mysql-test/t/derived_opt.test | 38 ++++++++++++++++++++++++++++++++++++++ sql/sql_select.cc | 8 ++++++++ 4 files changed, 81 insertions(+), 2 deletions(-) diff --git a/mysql-test/r/derived_opt.result b/mysql-test/r/derived_opt.result index 6e4ea1b..0e8b49d 100644 --- a/mysql-test/r/derived_opt.result +++ b/mysql-test/r/derived_opt.result @@ -499,9 +499,42 @@ where D1.a= t1.a; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL 10 Using where -1 PRIMARY <derived2> hash_ALL key0 #hash#key0 5 test.t1.a 100 Using join buffer (flat, BNLH join) +1 PRIMARY <derived2> ref key0 key0 5 test.t1.a 10 2 DERIVED t2 ALL NULL NULL NULL NULL 100 Using filesort set join_cache_level=@tmp_jcl; set optimizer_switch=@tmp_os; drop table t1, t2; +# +# Bug mdev-17382: equi-join of derived table with join_cache_level=4 +# +CREATE TABLE t1 ( +id int NOT NULL, +amount decimal DEFAULT NULL, +PRIMARY KEY (id) +); +CREATE TABLE t2 ( +id int NOT NULL, +name varchar(50) DEFAULT NULL, +PRIMARY KEY (id) +); +INSERT INTO t1 VALUES +(1, 10.0000), (2, 20.0000), (3, 30.0000), (4, 40.0000), +(5, NULL), (6, NULL), (7, 70.0000), (8, 80.0000); +INSERT INTO t2 VALUES +(1,'A'), (2,'B'), (3,'C'), (4,'D'), (5, NULL), (6, NULL), +(7,'E'), (8,'F'), (9,'G'), (10,'H'), (11, NULL), (12, NULL); +set join_cache_level=4; +EXPLAIN +SELECT t2.id,t2.name,t.total_amt +FROM t2 +LEFT JOIN +(SELECT id, sum(amount) total_amt FROM t1 GROUP BY id) AS t +ON t2.id=t.id +WHERE t2.id < 3; +id select_type table type possible_keys key key_len ref rows Extra +1 PRIMARY t2 range PRIMARY PRIMARY 4 NULL 3 Using index condition +1 PRIMARY <derived2> ref key0 key0 5 test.t2.id 2 +2 DERIVED t1 ALL NULL NULL NULL NULL 8 Using temporary; Using filesort +set join_cache_level=default; +DROP TABLE t1,t2; set optimizer_switch=@exit_optimizer_switch; diff --git a/mysql-test/r/innodb_mrr_cpk.result b/mysql-test/r/innodb_mrr_cpk.result index 28d7dd5..a2e43d7 100644 --- a/mysql-test/r/innodb_mrr_cpk.result +++ b/mysql-test/r/innodb_mrr_cpk.result @@ -226,7 +226,7 @@ set join_cache_level=3; explain SELECT 1 FROM (SELECT url, id FROM t2 LIMIT 1 OFFSET 20) derived RIGHT JOIN t1 ON t1.id = derived.id; id select_type table type possible_keys key key_len ref rows Extra 1 PRIMARY t1 ALL NULL NULL NULL NULL # -1 PRIMARY <derived2> hash_ALL key0 #hash#key0 25 test.t1.id # Using join buffer (flat, BNLH join) +1 PRIMARY <derived2> ref key0 key0 25 test.t1.id # 2 DERIVED t2 ALL NULL NULL NULL NULL # set join_cache_level= @tmp_mdev5037; drop table t0,t1,t2; diff --git a/mysql-test/t/derived_opt.test b/mysql-test/t/derived_opt.test index 7f19553..aab95f6 100644 --- a/mysql-test/t/derived_opt.test +++ b/mysql-test/t/derived_opt.test @@ -363,5 +363,43 @@ set join_cache_level=@tmp_jcl; set optimizer_switch=@tmp_os; drop table t1, t2; +--echo # +--echo # Bug mdev-17382: equi-join of derived table with join_cache_level=4 +--echo # + +CREATE TABLE t1 ( + id int NOT NULL, + amount decimal DEFAULT NULL, +PRIMARY KEY (id) +); + +CREATE TABLE t2 ( + id int NOT NULL, + name varchar(50) DEFAULT NULL, +PRIMARY KEY (id) +); + +INSERT INTO t1 VALUES +(1, 10.0000), (2, 20.0000), (3, 30.0000), (4, 40.0000), +(5, NULL), (6, NULL), (7, 70.0000), (8, 80.0000); + +INSERT INTO t2 VALUES +(1,'A'), (2,'B'), (3,'C'), (4,'D'), (5, NULL), (6, NULL), +(7,'E'), (8,'F'), (9,'G'), (10,'H'), (11, NULL), (12, NULL); + +set join_cache_level=4; + +EXPLAIN +SELECT t2.id,t2.name,t.total_amt + FROM t2 + LEFT JOIN + (SELECT id, sum(amount) total_amt FROM t1 GROUP BY id) AS t + ON t2.id=t.id + WHERE t2.id < 3; + +set join_cache_level=default; + +DROP TABLE t1,t2; + # The following command must be the last one the file set optimizer_switch=@exit_optimizer_switch; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 6fd161f..2e123df 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -11415,7 +11415,15 @@ uint check_join_cache_usage(JOIN_TAB *tab, effort now. */ if (tab->table->pos_in_table_list->is_materialized_derived()) + { no_bka_cache= true; + /* + Don't use hash join algorithm if the temporary table for the rows + of the derived table will be created with an equi-join key. + */ + if (tab->table->s->keys) + no_hashed_cache= true; + } /* Don't use join buffering if we're dictated not to by no_jbuf_after

1 0

[Commits] f1a69879f24: MDEV-17403: Test failure on galera.galera_enum
by jan 10 Oct '18

10 Oct '18

revision-id: f1a69879f2498deccc16c317d78d9f91ad7acc63 (mariadb-10.1.35-82-gf1a69879f24) parent(s): f517d8c7425257b6b9fe81c82c489e1e5619898d author: Jan Lindström committer: Jan Lindström timestamp: 2018-10-10 09:14:16 +0300 message: MDEV-17403: Test failure on galera.galera_enum Add wait on second node. --- mysql-test/suite/galera/r/galera_enum.result | 37 ++++++++++++++++------------ mysql-test/suite/galera/t/galera_enum.test | 18 +++++++++----- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/mysql-test/suite/galera/r/galera_enum.result b/mysql-test/suite/galera/r/galera_enum.result index e853c5c9943..7b42aab264c 100644 --- a/mysql-test/suite/galera/r/galera_enum.result +++ b/mysql-test/suite/galera/r/galera_enum.result @@ -4,23 +4,23 @@ INSERT INTO t1 VALUES ('one'), ('two'); INSERT INTO t1 VALUES (0), (1), (2); Warnings: Warning 1265 Data truncated for column 'f1' at row 1 -SELECT COUNT(*) = 6 FROM t1; -COUNT(*) = 6 -1 -SELECT COUNT(*) = 2 FROM t1 where f1 = ''; -COUNT(*) = 2 -1 -SELECT COUNT(*) = 2 FROM t1 where f1 = 'one'; -COUNT(*) = 2 -1 +SELECT COUNT(*) FROM t1; +COUNT(*) +6 +SELECT COUNT(*) FROM t1 where f1 = ''; +COUNT(*) +2 +SELECT COUNT(*) FROM t1 where f1 = 'one'; +COUNT(*) +2 DROP TABLE t1; CREATE TABLE t1 (f1 ENUM('', 'one', 'two', 'three', 'four') PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t1 VALUES (''), ('one'), ('two'); -SELECT COUNT(*) = 3 FROM t1; -COUNT(*) = 3 -1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = ''; -COUNT(*) = 1 +SELECT COUNT(*) FROM t1; +COUNT(*) +3 +SELECT COUNT(*) FROM t1 WHERE f1 = ''; +COUNT(*) 1 SET AUTOCOMMIT=OFF; START TRANSACTION; @@ -31,7 +31,12 @@ UPDATE t1 SET f1 = 'four' where f1 = ''; COMMIT; COMMIT; ERROR 40001: Deadlock found when trying to get lock; try restarting transaction -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 'three'; -COUNT(*) = 1 +SELECT COUNT(*) FROM t1 WHERE f1 = 'three'; +COUNT(*) 1 +SELECT * FROM t1; +f1 +one +two +three DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/galera_enum.test b/mysql-test/suite/galera/t/galera_enum.test index ff5332486aa..782180a3aa1 100644 --- a/mysql-test/suite/galera/t/galera_enum.test +++ b/mysql-test/suite/galera/t/galera_enum.test @@ -17,9 +17,12 @@ INSERT INTO t1 VALUES ('one'), ('two'); INSERT INTO t1 VALUES (0), (1), (2); --connection node_2 -SELECT COUNT(*) = 6 FROM t1; -SELECT COUNT(*) = 2 FROM t1 where f1 = ''; -SELECT COUNT(*) = 2 FROM t1 where f1 = 'one'; +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t1'; +--source include/wait_condition.inc + +SELECT COUNT(*) FROM t1; +SELECT COUNT(*) FROM t1 where f1 = ''; +SELECT COUNT(*) FROM t1 where f1 = 'one'; DROP TABLE t1; @@ -33,8 +36,10 @@ CREATE TABLE t1 (f1 ENUM('', 'one', 'two', 'three', 'four') PRIMARY KEY) ENGINE= INSERT INTO t1 VALUES (''), ('one'), ('two'); --connection node_2 -SELECT COUNT(*) = 3 FROM t1; -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = ''; +--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.INNODB_SYS_TABLES WHERE NAME LIKE 'test/t1'; +--source include/wait_condition.inc +SELECT COUNT(*) FROM t1; +SELECT COUNT(*) FROM t1 WHERE f1 = ''; # Conflict @@ -57,6 +62,7 @@ COMMIT; --connection node_1 -SELECT COUNT(*) = 1 FROM t1 WHERE f1 = 'three'; +SELECT COUNT(*) FROM t1 WHERE f1 = 'three'; +SELECT * FROM t1; DROP TABLE t1;

1 0

[Commits] 5dc4ca6: MDEV-17096 Pushdown of simple derived tables to storage engines
by IgorBabaev 09 Oct '18

09 Oct '18

revision-id: 5dc4ca6554c9bb4685b64c35f345c06d28c8d3f9 (mariadb-10.3.6-130-g5dc4ca6) parent(s): 171fbbb968ed52dc7e2bbd33a6f8f72bbc6f5e88 author: Igor Babaev committer: Igor Babaev timestamp: 2018-10-09 02:36:09 -0700 message: MDEV-17096 Pushdown of simple derived tables to storage engines Interface + Proof of Concept for federatedx. --- libmysqld/CMakeLists.txt | 2 +- sql/CMakeLists.txt | 2 +- sql/derived_handler.cc | 72 ++++++++++++++++++++ sql/derived_handler.h | 59 +++++++++++++++++ sql/handler.h | 10 +++ sql/sql_derived.cc | 78 +++++++++++++++++++++- sql/sql_explain.cc | 22 ++++-- sql/sql_explain.h | 1 + sql/sql_lex.cc | 6 +- sql/sql_select.cc | 8 ++- sql/sql_select.h | 18 ++++- sql/table.h | 7 ++ storage/federatedx/ha_federatedx.cc | 129 +++++++++++++++++++++++++++++++++++- storage/federatedx/ha_federatedx.h | 24 +++++++ 14 files changed, 424 insertions(+), 14 deletions(-) diff --git a/libmysqld/CMakeLists.txt b/libmysqld/CMakeLists.txt index 99b6208..dc3d190 100644 --- a/libmysqld/CMakeLists.txt +++ b/libmysqld/CMakeLists.txt @@ -77,7 +77,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc ../sql/debug_sync.cc ../sql/opt_table_elimination.cc ../sql/sql_prepare.cc ../sql/sql_rename.cc ../sql/sql_repl.cc ../sql/sql_select.cc ../sql/sql_servers.cc - ../sql/group_by_handler.cc + ../sql/group_by_handler.cc ../sql/derived_handler.cc ../sql/sql_show.cc ../sql/sql_state.c ../sql/sql_statistics.cc ../sql/sql_string.cc ../sql/sql_tablespace.cc ../sql/sql_table.cc ../sql/sql_test.cc diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 708c36a..f76753a 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -96,7 +96,7 @@ SET (SQL_SOURCE sql_partition.cc sql_plugin.cc sql_prepare.cc sql_rename.cc debug_sync.cc sql_repl.cc sql_select.cc sql_show.cc sql_state.c - group_by_handler.cc + group_by_handler.cc derived_handler.cc sql_statistics.cc sql_string.cc lex_string.h sql_table.cc sql_test.cc sql_trigger.cc sql_udf.cc sql_union.cc sql_update.cc sql_view.cc strfunc.cc table.cc thr_malloc.cc diff --git a/sql/derived_handler.cc b/sql/derived_handler.cc new file mode 100644 index 0000000..dd017fd --- /dev/null +++ b/sql/derived_handler.cc @@ -0,0 +1,72 @@ +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_select.h" +#include "derived_handler.h" + +void derived_handler::set_derived(TABLE_LIST *tbl) +{ + derived= tbl; + table= tbl->table; + unit= tbl->derived; + select= unit->first_select(); + tmp_table_param= select->next_select() ? + ((select_unit *)(unit->result))->get_tmp_table_param() : + &select->join->tmp_table_param; +} + +Pushdown_derived::~Pushdown_derived() +{ + delete handler; +} + +int Pushdown_derived::execute() +{ + int err; + THD *thd= handler->thd; + TABLE *table= handler->table; + TMP_TABLE_PARAM *tmp_table_param= handler->tmp_table_param; + + DBUG_ENTER("Pushdown_query::execute"); + + if ((err= handler->init_scan())) + goto error; + + while (!(err= handler->next_row())) + { + if (unlikely(thd->check_killed())) + { + handler->end_scan(); + DBUG_RETURN(-1); + } + + if ((err= table->file->ha_write_tmp_row(table->record[0]))) + { + bool is_duplicate; + if (likely(!table->file->is_fatal_error(err, HA_CHECK_DUP))) + continue; // Distinct elimination + + if (create_internal_tmp_table_from_heap(thd, table, + tmp_table_param->start_recinfo, + &tmp_table_param->recinfo, + err, 1, &is_duplicate)) + DBUG_RETURN(1); + if (is_duplicate) + continue; + } + } + + if (err != 0 && err != HA_ERR_END_OF_FILE) + goto error; + + if ((err= handler->end_scan())) + goto error_2; + + DBUG_RETURN(0); + +error: + handler->end_scan(); +error_2: + handler->print_error(err, MYF(0)); + DBUG_RETURN(-1); // Error not sent to client +} + diff --git a/sql/derived_handler.h b/sql/derived_handler.h new file mode 100644 index 0000000..a7b1294 --- /dev/null +++ b/sql/derived_handler.h @@ -0,0 +1,59 @@ +#ifndef DERIVED_HANDLER_INCLUDED +#define DERIVED_HANDLER_INCLUDED + +#include "mariadb.h" +#include "sql_priv.h" + +class TMP_TABLE_PARAM; + +class derived_handler +{ +public: + THD *thd; + handlerton *ht; + + TABLE_LIST *derived; + + /* + Temporary table where all results should be stored in record[0] + The table has a field for every item from the select list of + the specification of derived. + */ + TABLE *table; + + TMP_TABLE_PARAM *tmp_table_param; + + struct st_select_lex_unit *unit; + + struct st_select_lex *select; + + derived_handler(THD *thd_arg, handlerton *ht_arg) + : thd(thd_arg), ht(ht_arg), derived(0),table(0), tmp_table_param(0), + unit(0), select(0) {} + virtual ~derived_handler() {} + + /* + Functions to scan data. All these returns 0 if ok, error code in case + of error + */ + + /* Initialize the process of producing rows of the derived table */ + virtual int init_scan()= 0; + + /* + Put the next produced row of the derived in table->record[0] and return 0. + Return HA_ERR_END_OF_FILE if there are no more rows, return other error + number in case of fatal error. + */ + virtual int next_row()= 0; + + /* End prodicing rows */ + virtual int end_scan()=0; + + /* Report errors */ + virtual void print_error(int error, myf errflag)=0; + + void set_derived(TABLE_LIST *tbl); +}; + +#endif /* DERIVED_HANDLER_INCLUDED */ diff --git a/sql/handler.h b/sql/handler.h index 68a54cc..ce6dd35 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -1183,6 +1183,7 @@ struct handler_iterator { class handler; class group_by_handler; +class derived_handler; struct Query; typedef class st_select_lex SELECT_LEX; typedef struct st_order ORDER; @@ -1502,6 +1503,15 @@ struct handlerton */ group_by_handler *(*create_group_by)(THD *thd, Query *query); + /* + Create and return a derived_handler if the storage engine can execute + the derived table 'derived', otherwise return NULL. + In a general case 'derived' may contain tables not from the engine. + If the engine cannot handle or does not want to handle such pushed derived + the function create_group_by has to return NULL. + */ + derived_handler *(*create_derived)(THD *thd, TABLE_LIST *derived); + /********************************************************************* Table discovery API. It allows the server to "discover" tables that exist in the storage diff --git a/sql/sql_derived.cc b/sql/sql_derived.cc index d65969d..7e7ac02 100644 --- a/sql/sql_derived.cc +++ b/sql/sql_derived.cc @@ -27,6 +27,7 @@ #include "unireg.h" #include "sql_derived.h" #include "sql_select.h" +#include "derived_handler.h" #include "sql_base.h" #include "sql_view.h" // check_duplicate_names #include "sql_acl.h" // SELECT_ACL @@ -384,9 +385,16 @@ bool mysql_derived_merge(THD *thd, LEX *lex, TABLE_LIST *derived) DBUG_RETURN(FALSE); } - if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI || - thd->lex->sql_command == SQLCOM_DELETE_MULTI) - thd->save_prep_leaf_list= TRUE; + if ((derived->dt_handler= derived->find_derived_handler(thd))) + { + derived->change_refs_to_fields(); + derived->set_materialized_derived(); + DBUG_RETURN(FALSE); + } + + if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI || + thd->lex->sql_command == SQLCOM_DELETE_MULTI) + thd->save_prep_leaf_list= TRUE; arena= thd->activate_stmt_arena_if_needed(&backup); // For easier test @@ -904,6 +912,15 @@ bool mysql_derived_optimize(THD *thd, LEX *lex, TABLE_LIST *derived) DBUG_RETURN(FALSE); } + if (derived->is_materialized_derived() && !derived->dt_handler) + derived->dt_handler= derived->find_derived_handler(thd); + if (derived->dt_handler) + { + if (!(derived->pushdown_derived= + new (thd->mem_root) Pushdown_derived(derived, derived->dt_handler))) + DBUG_RETURN(1); + } + lex->current_select= first_select; if (unit->is_unit_op()) @@ -1108,6 +1125,17 @@ bool mysql_derived_fill(THD *thd, LEX *lex, TABLE_LIST *derived) select_unit *derived_result= derived->derived_result; SELECT_LEX *save_current_select= lex->current_select; + if (derived->pushdown_derived) + { + int res; + if (unit->executed) + DBUG_RETURN(FALSE); + res= derived->pushdown_derived->execute(); + unit->executed= true; + delete derived->pushdown_derived; + DBUG_RETURN(res); + } + if (unit->executed && !derived_is_recursive && (unit->uncacheable & UNCACHEABLE_DEPENDENT)) { @@ -1404,3 +1432,47 @@ bool pushdown_cond_for_derived(THD *thd, Item *cond, TABLE_LIST *derived) thd->lex->current_select= save_curr_select; DBUG_RETURN(false); } + + +derived_handler *TABLE_LIST::find_derived_handler(THD *thd) +{ + if (!derived || is_recursive_with_table()) + return 0; + for (SELECT_LEX *sl= derived->first_select(); sl; sl= sl->next_select()) + { + if (!(sl->join)) + continue; + for (TABLE_LIST *tbl= sl->join->tables_list; tbl; tbl= tbl->next_local) + { + if (!tbl->table) + continue; + handlerton *ht= tbl->table->file->partition_ht(); + if (!ht->create_derived) + continue; + derived_handler *dh= ht->create_derived(thd, this); + if (dh) + { + dh->set_derived(this); + return dh; + } + } + } + return 0; +} + + +TABLE_LIST *TABLE_LIST::get_first_table() +{ + for (SELECT_LEX *sl= derived->first_select(); sl; sl= sl->next_select()) + { + if (!(sl->join)) + continue; + for (TABLE_LIST *tbl= sl->join->tables_list; tbl; tbl= tbl->next_local) + { + if (!tbl->table) + continue; + return tbl; + } + } + return 0; +} diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc index 1c45b05..3f1dc80 100644 --- a/sql/sql_explain.cc +++ b/sql/sql_explain.cc @@ -34,6 +34,8 @@ const char *unit_operation_text[4]= "UNIT RESULT","UNION RESULT","INTERSECT RESULT","EXCEPT RESULT" }; +const char *pushed_derived_text= "PUSHED DERIVED"; + static void write_item(Json_writer *writer, Item *item); static void append_item_to_str(String *out, Item *item); @@ -334,6 +336,9 @@ int print_explain_row(select_result_sink *result, List<Item> item_list; Item *item; + if (!select_type[0]) + return 0; + item_list.push_back(new (mem_root) Item_int(thd, (int32) select_number), mem_root); item_list.push_back(new (mem_root) Item_string_sys(thd, select_type), @@ -746,7 +751,15 @@ int Explain_select::print_explain(Explain_query *query, THD *thd= output->thd; MEM_ROOT *mem_root= thd->mem_root; - if (message) + if (select_type == pushed_derived_text) + { + print_explain_message_line(output, explain_flags, is_analyze, + select_id /*select number*/, + select_type, + NULL, /* rows */ + NULL); + } + else if (message) { List<Item> item_list; Item *item_null= new (mem_root) Item_null(thd); @@ -869,14 +882,15 @@ void Explain_select::print_explain_json(Explain_query *query, bool started_cache= print_explain_json_cache(writer, is_analyze); - if (message) + if (message || select_type == pushed_derived_text) { writer->add_member("query_block").start_object(); writer->add_member("select_id").add_ll(select_id); add_linkage(writer); writer->add_member("table").start_object(); - writer->add_member("message").add_str(message); + writer->add_member("message").add_str(select_type == pushed_derived_text ? + "Pushed derived" : message); writer->end_object(); print_explain_json_for_children(query, writer, is_analyze); @@ -1205,7 +1219,7 @@ int Explain_table_access::print_explain(select_result_sink *output, uint8 explai { THD *thd= output->thd; MEM_ROOT *mem_root= thd->mem_root; - + List<Item> item_list; Item *item_null= new (mem_root) Item_null(thd); diff --git a/sql/sql_explain.h b/sql/sql_explain.h index 38250cc..549b085 100644 --- a/sql/sql_explain.h +++ b/sql/sql_explain.h @@ -328,6 +328,7 @@ class Explain_aggr_window_funcs : public Explain_aggr_node ///////////////////////////////////////////////////////////////////////////// extern const char *unit_operation_text[4]; +extern const char *pushed_derived_text; /* Explain structure for a UNION. diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index bff6dfb..b309a0a 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -4659,7 +4659,11 @@ void st_select_lex::set_explain_type(bool on_the_fly) /* If we're a direct child of a UNION, we're the first sibling there */ if (linkage == DERIVED_TABLE_TYPE) { - if (is_uncacheable & UNCACHEABLE_DEPENDENT) + bool is_pushed_master_unit= master_unit()->derived && + master_unit()->derived->pushdown_derived; + if (is_pushed_master_unit) + type= pushed_derived_text; + else if (is_uncacheable & UNCACHEABLE_DEPENDENT) type= "LATERAL DERIVED"; else type= "DERIVED"; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 3b92751..d0acbef 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -25681,6 +25681,7 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result) DBUG_ENTER("mysql_explain_union"); bool res= 0; SELECT_LEX *first= unit->first_select(); + bool is_pushed_union= unit->derived && unit->derived->pushdown_derived; for (SELECT_LEX *sl= first; sl; sl= sl->next_select()) { @@ -25698,9 +25699,12 @@ bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result) } if (!(res= unit->prepare(unit->derived, result, SELECT_NO_UNLOCK | SELECT_DESCRIBE))) - res= unit->exec(); + { + if (!is_pushed_union) + res= unit->exec(); + } } - else + else { thd->lex->current_select= first; unit->set_limit(unit->global_parameters()); diff --git a/sql/sql_select.h b/sql/sql_select.h index 4140a02..e51367c 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -2442,7 +2442,23 @@ class Pushdown_query: public Sql_alloc ~Pushdown_query() { delete handler; } /* Function that calls the above scan functions */ - int execute(JOIN *join); + int execute(JOIN *); +}; + +class derived_handler; + +class Pushdown_derived: public Sql_alloc +{ +public: + TABLE_LIST *derived; + derived_handler *handler; + + Pushdown_derived(TABLE_LIST *tbl, derived_handler *h) + : derived(tbl), handler(h) {} + + ~Pushdown_derived(); + + int execute(); }; bool test_if_order_compatible(SQL_I_List<ORDER> &a, SQL_I_List<ORDER> &b); diff --git a/sql/table.h b/sql/table.h index b75fa90..33cf23b 100644 --- a/sql/table.h +++ b/sql/table.h @@ -55,6 +55,8 @@ class Virtual_column_info; class Table_triggers_list; class TMP_TABLE_PARAM; class SEQUENCE; +class derived_handler; +class Pushdown_derived; /* Used to identify NESTED_JOIN structures within a join (applicable only to @@ -2118,6 +2120,8 @@ struct TABLE_LIST TABLE_LIST * next_with_rec_ref; bool is_derived_with_recursive_reference; bool block_handle_derived; + derived_handler *dt_handler; + Pushdown_derived *pushdown_derived; ST_SCHEMA_TABLE *schema_table; /* Information_schema table */ st_select_lex *schema_select_lex; /* @@ -2584,6 +2588,9 @@ struct TABLE_LIST } void set_lock_type(THD* thd, enum thr_lock_type lock); + derived_handler *find_derived_handler(THD *thd); + TABLE_LIST *get_first_table(); + private: bool prep_check_option(THD *thd, uint8 check_opt_type); bool prep_where(THD *thd, Item **conds, bool no_where_clause); diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc index 74d547c..45cd14e 100644 --- a/storage/federatedx/ha_federatedx.cc +++ b/storage/federatedx/ha_federatedx.cc @@ -319,6 +319,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "sql_analyse.h" // append_escaped() #include "sql_show.h" // append_identifier() #include "tztime.h" // my_tz_find() +#include "sql_select.h" #ifdef I_AM_PARANOID #define MIN_PORT 1023 @@ -401,6 +402,10 @@ static void init_federated_psi_keys(void) #define init_federated_psi_keys() /* no-op */ #endif /* HAVE_PSI_INTERFACE */ +handlerton* federatedx_hton; + +static derived_handler* +create_federatedx_derived_handler(THD* thd, TABLE_LIST *derived); /* Initialize the federatedx handler. @@ -418,7 +423,7 @@ int federatedx_db_init(void *p) { DBUG_ENTER("federatedx_db_init"); init_federated_psi_keys(); - handlerton *federatedx_hton= (handlerton *)p; + federatedx_hton= (handlerton *)p; federatedx_hton->state= SHOW_OPTION_YES; /* Needed to work with old .frm files */ federatedx_hton->db_type= DB_TYPE_FEDERATED_DB; @@ -432,6 +437,7 @@ int federatedx_db_init(void *p) federatedx_hton->discover_table_structure= ha_federatedx::discover_assisted; federatedx_hton->create= federatedx_create_handler; federatedx_hton->flags= HTON_ALTER_NOT_SUPPORTED; + federatedx_hton->create_derived= create_federatedx_derived_handler; if (mysql_mutex_init(fe_key_mutex_federatedx, &federatedx_mutex, MY_MUTEX_INIT_FAST)) @@ -3668,6 +3674,126 @@ int ha_federatedx::discover_assisted(handlerton *hton, THD* thd, return error; } +static derived_handler* +create_federatedx_derived_handler(THD* thd, TABLE_LIST *derived) +{ + ha_federatedx_derived_handler* handler = NULL; + handlerton *ht= 0; + + SELECT_LEX_UNIT *unit= derived->derived; + + for (SELECT_LEX *sl= unit->first_select(); sl; sl= sl->next_select()) + { + if (!(sl->join)) + return 0; + for (TABLE_LIST *tbl= sl->join->tables_list; tbl; tbl= tbl->next_local) + { + if (!tbl->table) + return 0; + if (!ht) + ht= tbl->table->file->partition_ht(); + else if (ht != tbl->table->file->partition_ht()) + return 0; + } + } + + handler= new ha_federatedx_derived_handler(thd, derived); + + return handler; +} + + +ha_federatedx_derived_handler::ha_federatedx_derived_handler(THD *thd, + TABLE_LIST *dt) + : derived_handler(thd, federatedx_hton) +{ + derived= dt; +} + +ha_federatedx_derived_handler::~ha_federatedx_derived_handler() {} + +int ha_federatedx_derived_handler::init_scan() +{ + char query_buff[4096]; + THD *thd; + int rc= 0; + + DBUG_ENTER("ha_federatedx_derived_handler::init_scan"); + + TABLE *table= derived->get_first_table()->table; + ha_federatedx *h= (ha_federatedx *) table->file; + io= h->io; + share= get_share(table->s->table_name.str, table); + thd= table->in_use; + txn= h->get_txn(thd); + if ((rc= txn->acquire(share, thd, TRUE, &io))) + DBUG_RETURN(rc); + + String derived_query(query_buff, sizeof(query_buff), thd->charset()); + derived_query.length(0); + derived->derived->print(&derived_query, QT_ORDINARY); + + // if (stored_result) + // (void) free_result(); + + if (io->query(derived_query.ptr(), derived_query.length())) + goto err; + + stored_result= io->store_result(); + if (!stored_result) + goto err; + + DBUG_RETURN(0); + +err: + DBUG_RETURN(HA_FEDERATEDX_ERROR_WITH_REMOTE_SYSTEM); +} + +int ha_federatedx_derived_handler::next_row() +{ + int rc; + FEDERATEDX_IO_ROW *row; + ulong *lengths; + Field **field; + int column= 0; + Time_zone *saved_time_zone= table->in_use->variables.time_zone; + DBUG_ENTER("ha_federatedx_derived_handler::next_row"); + + if ((rc= txn->acquire(share, table->in_use, TRUE, &io))) + DBUG_RETURN(rc); + + if (!(row= io->fetch_row(stored_result))) + DBUG_RETURN(HA_ERR_END_OF_FILE); + + /* Convert row to internal format */ + table->in_use->variables.time_zone= UTC; + lengths= io->fetch_lengths(stored_result); + + for (field= table->field; *field; field++, column++) + { + if (io->is_column_null(row, column)) + (*field)->set_null(); + else + { + (*field)->set_notnull(); + (*field)->store(io->get_column_data(row, column), + lengths[column], &my_charset_bin); + } + } + table->in_use->variables.time_zone= saved_time_zone; + + DBUG_RETURN(rc); +} + +int ha_federatedx_derived_handler::end_scan() +{ + DBUG_ENTER("ha_federatedx_derived_handler::end_scan"); + DBUG_RETURN(0); +} + +void ha_federatedx_derived_handler::print_error(int, unsigned long) +{ +} struct st_mysql_storage_engine federatedx_storage_engine= { MYSQL_HANDLERTON_INTERFACE_VERSION }; @@ -3689,3 +3815,4 @@ maria_declare_plugin(federatedx) MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ } maria_declare_plugin_end; + diff --git a/storage/federatedx/ha_federatedx.h b/storage/federatedx/ha_federatedx.h index 16a1944..61c7029 100644 --- a/storage/federatedx/ha_federatedx.h +++ b/storage/federatedx/ha_federatedx.h @@ -1,3 +1,5 @@ +#ifndef HA_FEDERATEDX_INCLUDED +#define HA_FEDERATEDX_INCLUDED /* Copyright (c) 2008, Patrick Galbraith All rights reserved. @@ -40,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <my_global.h> #include <thr_lock.h> #include "handler.h" +#include "derived_handler.h" class federatedx_io; @@ -445,6 +448,8 @@ class ha_federatedx: public handler int external_lock(THD *thd, int lock_type); int reset(void); int free_result(void); + + friend class ha_federatedx_derived_handler; }; extern const char ident_quote_char; // Character for quoting @@ -460,3 +465,22 @@ extern federatedx_io *instantiate_io_mysql(MEM_ROOT *server_root, FEDERATEDX_SERVER *server); extern federatedx_io *instantiate_io_null(MEM_ROOT *server_root, FEDERATEDX_SERVER *server); + +class ha_federatedx_derived_handler: public derived_handler +{ +private: + FEDERATEDX_SHARE *share; + federatedx_txn *txn; + federatedx_io *io; + FEDERATEDX_IO_RESULT *stored_result; + +public: + ha_federatedx_derived_handler(THD* thd_arg, TABLE_LIST *tbl); + ~ha_federatedx_derived_handler(); + int init_scan(); + int next_row(); + int end_scan(); + void print_error(int, unsigned long); +}; + +#endif /* HA_FEDERATEDX_INCLUDED */

1 0

[Commits] 9267625: Make RangeLockMgr::UnLock() only unlock the keys that it is asked to unlock.
by psergey＠askmonty.org 08 Oct '18

08 Oct '18

revision-id: 926762540d9873042d0a1d2afc5fc2c456260908 parent(s): aaa8e2bf45cdebcbbb10fb33e87503a2f4064c33 committer: Sergei Petrunia branch nick: mysql-5.6-rocksdb-rangelocking2 timestamp: 2018-10-08 19:35:19 +0300 message: Make RangeLockMgr::UnLock() only unlock the keys that it is asked to unlock. The MyRocks part. --- rocksdb | 2 +- storage/rocksdb/range_locking/locktree/locktree.cc | 32 ++++++++++++++++++++-- storage/rocksdb/range_locking/locktree/locktree.h | 2 +- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/rocksdb b/rocksdb index d629f93..b2be543 160000 --- a/rocksdb +++ b/rocksdb @@ -1 +1 @@ -Subproject commit d629f934d907f506bd114d9d48852e6570d333da +Subproject commit b2be543c0d6084c1bd5ff99d6ffed90ff3d0bd24 diff --git a/storage/rocksdb/range_locking/locktree/locktree.cc b/storage/rocksdb/range_locking/locktree/locktree.cc index 069aae2..75f7830 100644 --- a/storage/rocksdb/range_locking/locktree/locktree.cc +++ b/storage/rocksdb/range_locking/locktree/locktree.cc @@ -521,12 +521,40 @@ bool locktree::sto_try_release(TXNID txnid) { return released; } + // release all of the locks for a txnid whose endpoints are pairs // in the given range buffer. -void locktree::release_locks(TXNID txnid, const range_buffer *ranges) { +void locktree::release_locks(TXNID txnid, const range_buffer *ranges, + bool all_trx_locks_hint) { // try the single txn optimization. if it worked, then all of the // locks are already released, otherwise we need to do it here. - bool released = sto_try_release(txnid); + bool released; + if (all_trx_locks_hint) + { + // This will release all of the locks the transaction is holding + released = sto_try_release(txnid); + } + else + { + /* + psergey: we are asked to release *Some* of the locks the transaction + is holding. + We could try doing that without leaving the STO mode, but right now, + the easiest way is to exit the STO mode and let the non-STO code path + handle it. + */ + if (toku_unsafe_fetch(m_sto_txnid) != TXNID_NONE) { + // check the bit again with a prepared locked keyrange, + // which protects the optimization bits and rangetree data + concurrent_tree::locked_keyrange lkr; + lkr.prepare(m_rangetree); + if (m_sto_txnid != TXNID_NONE) { + sto_end_early(&lkr); + } + lkr.release(); + } + released = false; + } if (!released) { range_buffer::iterator iter(ranges); range_buffer::iterator::record rec; diff --git a/storage/rocksdb/range_locking/locktree/locktree.h b/storage/rocksdb/range_locking/locktree/locktree.h index bb3ea9d..0e6b5d6 100644 --- a/storage/rocksdb/range_locking/locktree/locktree.h +++ b/storage/rocksdb/range_locking/locktree/locktree.h @@ -296,7 +296,7 @@ namespace toku { const DBT *left_key, const DBT *right_key, txnid_set *conflicts); // effect: Release all of the lock ranges represented by the range buffer for a txnid. - void release_locks(TXNID txnid, const range_buffer *ranges); + void release_locks(TXNID txnid, const range_buffer *ranges, bool all_trx_locks_hint= false); // effect: Runs escalation on this locktree void escalate(lt_escalate_cb after_escalate_callback, void *extra);

1 0

[Commits] b2be543: - Make RangeLockMgr::UnLock() only unlock the keys that it is asked
by psergey＠askmonty.org 08 Oct '18

08 Oct '18

revision-id: b2be543c0d6084c1bd5ff99d6ffed90ff3d0bd24 parent(s): d629f934d907f506bd114d9d48852e6570d333da committer: Sergei Petrunia branch nick: modules timestamp: 2018-10-08 19:29:10 +0300 message: - Make RangeLockMgr::UnLock() only unlock the keys that it is asked to unlock. = In STO-mode, it used to unlock all keys. = Now fixed by leaving the STO-mode. This is the easiest fix, although probably not optimal. We could have walked the STO array from its back) - Add RangeLockMgr::UnLockAll() that does release all of transaction' locks. - Performance of unlock operation can still be improved. --- utilities/transactions/pessimistic_transaction.cc | 2 +- .../transactions/pessimistic_transaction_db.cc | 10 ++++++-- .../transactions/pessimistic_transaction_db.h | 3 ++- utilities/transactions/transaction_lock_mgr.cc | 29 ++++++++++++++++------ utilities/transactions/transaction_lock_mgr.h | 6 +++++ 5 files changed, 39 insertions(+), 11 deletions(-) diff --git a/utilities/transactions/pessimistic_transaction.cc b/utilities/transactions/pessimistic_transaction.cc index befa19f..a83db5e 100644 --- a/utilities/transactions/pessimistic_transaction.cc +++ b/utilities/transactions/pessimistic_transaction.cc @@ -97,7 +97,7 @@ PessimisticTransaction::~PessimisticTransaction() { } void PessimisticTransaction::Clear() { - txn_db_impl_->UnLock(this, &GetTrackedKeys()); + txn_db_impl_->UnLock(this, &GetTrackedKeys(), /*all_keys_hint=*/true); TransactionBaseImpl::Clear(); } diff --git a/utilities/transactions/pessimistic_transaction_db.cc b/utilities/transactions/pessimistic_transaction_db.cc index dcc420a..2325ea9 100644 --- a/utilities/transactions/pessimistic_transaction_db.cc +++ b/utilities/transactions/pessimistic_transaction_db.cc @@ -380,9 +380,15 @@ Status PessimisticTransactionDB::TryLock(PessimisticTransaction* txn, } void PessimisticTransactionDB::UnLock(PessimisticTransaction* txn, - const TransactionKeyMap* keys) { + const TransactionKeyMap* keys, + bool all_keys_hint) { if (use_range_locking) - range_lock_mgr_.UnLock(txn, keys, GetEnv()); + { + if (all_keys_hint) + range_lock_mgr_.UnLockAll(txn, keys, GetEnv()); + else + range_lock_mgr_.UnLock(txn, keys, GetEnv()); + } else lock_mgr_.UnLock(txn, keys, GetEnv()); } diff --git a/utilities/transactions/pessimistic_transaction_db.h b/utilities/transactions/pessimistic_transaction_db.h index 1876de5..262ca99 100644 --- a/utilities/transactions/pessimistic_transaction_db.h +++ b/utilities/transactions/pessimistic_transaction_db.h @@ -79,7 +79,8 @@ class PessimisticTransactionDB : public TransactionDB { Status TryLock(PessimisticTransaction* txn, uint32_t cfh_id, const std::string& key, bool exclusive); - void UnLock(PessimisticTransaction* txn, const TransactionKeyMap* keys); + void UnLock(PessimisticTransaction* txn, const TransactionKeyMap* keys, + bool all_keys_hint=false); void UnLock(PessimisticTransaction* txn, uint32_t cfh_id, const std::string& key); diff --git a/utilities/transactions/transaction_lock_mgr.cc b/utilities/transactions/transaction_lock_mgr.cc index 0ce5124..3482082 100644 --- a/utilities/transactions/transaction_lock_mgr.cc +++ b/utilities/transactions/transaction_lock_mgr.cc @@ -691,10 +691,11 @@ void TransactionLockMgr::UnLock(PessimisticTransaction* txn, } static void -another_lock_mgr_release_lock_int(toku::locktree *lt, +range_lock_mgr_release_lock_int(toku::locktree *lt, const PessimisticTransaction* txn, uint32_t column_family_id, - const std::string& key) + const std::string& key, + bool releasing_all_locks_hint= false) { DBT key_dbt; toku_fill_dbt(&key_dbt, key.data(), key.size()); @@ -708,31 +709,45 @@ another_lock_mgr_release_lock_int(toku::locktree *lt, void RangeLockMgr::UnLock(PessimisticTransaction* txn, uint32_t column_family_id, const std::string& key, Env* env) { - //fprintf(stderr, "RangeLockMgr::UnLock (key)\n"); - another_lock_mgr_release_lock_int(lt, txn, column_family_id, key); + range_lock_mgr_release_lock_int(lt, txn, column_family_id, key); toku::lock_request::retry_all_lock_requests(lt, nullptr /* lock_wait_needed_callback */); } void RangeLockMgr::UnLock(const PessimisticTransaction* txn, const TransactionKeyMap* key_map, Env* env) { + //TODO: if we collect all locks in a range buffer and then + // make one call to lock_tree::release_locks(), will that be faster? + for (auto& key_map_iter : *key_map) { + uint32_t column_family_id = key_map_iter.first; + auto& keys = key_map_iter.second; - //fprintf(stderr, "RangeLockMgr::UnLock(key_map)\n"); + for (auto& key_iter : keys) { + const std::string& key = key_iter.first; + range_lock_mgr_release_lock_int(lt, txn, column_family_id, key); + } + } + toku::lock_request::retry_all_lock_requests(lt, nullptr /* lock_wait_needed_callback */); +} +void RangeLockMgr::UnLockAll(const PessimisticTransaction* txn, + const TransactionKeyMap* key_map, Env* env) { + //TODO: collecting multiple locks into a buffer and then making one call + // to lock_tree::release_locks() will be faster. for (auto& key_map_iter : *key_map) { uint32_t column_family_id = key_map_iter.first; - //TODO: ^ What to do about the above? auto& keys = key_map_iter.second; for (auto& key_iter : keys) { const std::string& key = key_iter.first; - another_lock_mgr_release_lock_int(lt, txn, column_family_id, key); + range_lock_mgr_release_lock_int(lt, txn, column_family_id, key, true); } } toku::lock_request::retry_all_lock_requests(lt, nullptr /* lock_wait_needed_callback */); #if 0 + Original usage: void release_locks(TXNID txnid, const range_buffer *ranges); // release all of the locks this txn has ever successfully diff --git a/utilities/transactions/transaction_lock_mgr.h b/utilities/transactions/transaction_lock_mgr.h index 3580441..c2de992 100644 --- a/utilities/transactions/transaction_lock_mgr.h +++ b/utilities/transactions/transaction_lock_mgr.h @@ -92,6 +92,12 @@ class RangeLockMgr :public BaseLockMgr { void UnLock(const PessimisticTransaction* txn, const TransactionKeyMap* keys, Env* env) override ; + /* + Same as above, but *keys is guaranteed to hold all the locks obtained by + the transaction. + */ + void UnLockAll(const PessimisticTransaction* txn, const TransactionKeyMap* keys, + Env* env); void UnLock(PessimisticTransaction* txn, uint32_t column_family_id, const std::string& key, Env* env) override ;

1 0

[Commits] 2f4a0c5be2c: Fix accumulation of old rows in mysql.gtid_slave_pos
by Kristian Nielsen 08 Oct '18

08 Oct '18

revision-id: 2f4a0c5be2c5d5153c4253a49ba8820ab333a9a0 (mariadb-10.1.35-71-g2f4a0c5be2c) parent(s): 1fc5a6f30c3a9c047dcf9a36b00026d98f286f6b author: Kristian Nielsen committer: Kristian Nielsen timestamp: 2018-10-07 18:59:52 +0200 message: Fix accumulation of old rows in mysql.gtid_slave_pos This would happen especially in optimistic parallel replication, where there is a good chance that a transaction will be rolled back (due to conflicts) after it has executed record_gtid(). If the transaction did any deletions of old rows as part of record_gtid(), those deletions will be undone as well. And the code did not properly ensure that the deletions would be re-tried. This patch makes record_gtid() remember the list of deletions done as part of a transaction. Then in rpl_slave_state::update() when the changes have been committed, we discard the list. However, in case of error and rollback, in cleanup_context() we will instead put the list back into rpl_global_gtid_slave_state so that the deletions will be re-tried later. Probably fixes part of the cause of MDEV-12147 as well. Signed-off-by: Kristian Nielsen <knielsen(a)knielsen-hq.org> --- .../suite/rpl/r/rpl_parallel_optimistic.result | 6 ++ .../suite/rpl/t/rpl_parallel_optimistic.test | 17 +++++ sql/log_event.cc | 6 +- sql/rpl_gtid.cc | 64 ++++++++++++------ sql/rpl_gtid.h | 2 +- sql/rpl_rli.cc | 79 ++++++++++++++++++++++ sql/rpl_rli.h | 11 +++ 7 files changed, 161 insertions(+), 24 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result index 3cd4f8231bf..99bd8562ffe 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result +++ b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result @@ -571,4 +571,10 @@ SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_threads=@old_parallel_threads; include/start_slave.inc DROP TABLE t1, t2, t3; +include/save_master_gtid.inc +include/sync_with_master_gtid.inc +Check that no more than the expected last two GTIDs are in mysql.gtid_slave_pos +select count(*) from mysql.gtid_slave_pos order by domain_id, sub_id; +count(*) +2 include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test index 9f6669279db..3867a3fdf3a 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test @@ -549,5 +549,22 @@ SET GLOBAL slave_parallel_threads=@old_parallel_threads; --connection server_1 DROP TABLE t1, t2, t3; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc +# Check for left-over rows in table mysql.gtid_slave_pos (MDEV-12147). +# +# There was a bug when a transaction got a conflict and was rolled back. It +# might have also handled deletion of some old rows, and these deletions would +# then also be rolled back. And since the deletes were never re-tried, old no +# longer needed rows would accumulate in the table without limit. +# +# The earlier part of this test file have plenty of transactions being rolled +# back. But the last DROP TABLE statement runs on its own and should never +# conflict, thus at this point the mysql.gtid_slave_pos table should be clean. +--echo Check that no more than the expected last two GTIDs are in mysql.gtid_slave_pos +select count(*) from mysql.gtid_slave_pos order by domain_id, sub_id; +--connection server_1 --source include/rpl_end.inc diff --git a/sql/log_event.cc b/sql/log_event.cc index e1912ad4620..e07b7002398 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -4429,7 +4429,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, gtid= rgi->current_gtid; if (rpl_global_gtid_slave_state->record_gtid(thd, &gtid, sub_id, - true, false)) + rgi, false)) { int errcode= thd->get_stmt_da()->sql_errno(); if (!is_parallel_retry_error(rgi, errcode)) @@ -7132,7 +7132,7 @@ Gtid_list_log_event::do_apply_event(rpl_group_info *rgi) { if ((ret= rpl_global_gtid_slave_state->record_gtid(thd, &list[i], sub_id_list[i], - false, false))) + NULL, false))) return ret; rpl_global_gtid_slave_state->update_state_hash(sub_id_list[i], &list[i], NULL); @@ -7639,7 +7639,7 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi) rgi->gtid_pending= false; gtid= rgi->current_gtid; - err= rpl_global_gtid_slave_state->record_gtid(thd, &gtid, sub_id, true, + err= rpl_global_gtid_slave_state->record_gtid(thd, &gtid, sub_id, rgi, false); if (err) { diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index 7b1acf17ef5..94944b5b3e5 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -77,7 +77,7 @@ rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi) rgi->gtid_pending= false; if (rgi->gtid_ignore_duplicate_state!=rpl_group_info::GTID_DUPLICATE_IGNORE) { - if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false)) + if (record_gtid(thd, &rgi->current_gtid, sub_id, NULL, false)) DBUG_RETURN(1); update_state_hash(sub_id, &rgi->current_gtid, rgi); } @@ -328,6 +328,8 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id, } } rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_NULL; + + rgi->pending_gtid_deletes_clear(); } if (!(list_elem= (list_element *)my_malloc(sizeof(*list_elem), MYF(MY_WME)))) @@ -377,15 +379,24 @@ int rpl_slave_state::put_back_list(uint32 domain_id, list_element *list) { element *e; + int err= 0; + + mysql_mutex_lock(&LOCK_slave_state); if (!(e= (element *)my_hash_search(&hash, (const uchar *)&domain_id, 0))) - return 1; + { + err= 1; + goto end; + } while (list) { list_element *next= list->next; e->add(list); list= next; } - return 0; + +end: + mysql_mutex_unlock(&LOCK_slave_state); + return err; } @@ -468,12 +479,12 @@ gtid_check_rpl_slave_state_table(TABLE *table) /* Write a gtid to the replication slave state table. - Do it as part of the transaction, to get slave crash safety, or as a separate - transaction if !in_transaction (eg. MyISAM or DDL). - gtid The global transaction id for this event group. sub_id Value allocated within the sub_id when the event group was read (sub_id must be consistent with commit order in master binlog). + rgi rpl_group_info context, if we are recording the gtid transactionally + as part of replicating a transactional event. NULL if called from + outside of a replicated transaction. Note that caller must later ensure that the new gtid and sub_id is inserted into the appropriate HASH element with rpl_slave_state.add(), so that it can @@ -481,13 +492,13 @@ gtid_check_rpl_slave_state_table(TABLE *table) */ int rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, - bool in_transaction, bool in_statement) + rpl_group_info *rgi, bool in_statement) { TABLE_LIST tlist; int err= 0; bool table_opened= false; TABLE *table; - list_element *elist= 0, *next; + list_element *elist= 0, *cur, *next; element *elem; ulonglong thd_saved_option= thd->variables.option_bits; Query_tables_list lex_backup; @@ -558,7 +569,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, thd->wsrep_ignore_table= true; #endif - if (!in_transaction) + if (!rgi) { DBUG_PRINT("info", ("resetting OPTION_BEGIN")); thd->variables.option_bits&= @@ -601,9 +612,9 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, if ((elist= elem->grab_list()) != NULL) { /* Delete any old stuff, but keep around the most recent one. */ - list_element *cur= elist; - uint64 best_sub_id= cur->sub_id; + uint64 best_sub_id= elist->sub_id; list_element **best_ptr_ptr= &elist; + cur= elist; while ((next= cur->next)) { if (next->sub_id > best_sub_id) @@ -636,7 +647,8 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, table->file->print_error(err, MYF(0)); goto end; } - while (elist) + cur = elist; + while (cur) { uchar key_buffer[4+8]; @@ -646,9 +658,9 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, /* `break' does not work inside DBUG_EXECUTE_IF */ goto dbug_break; }); - next= elist->next; + next= cur->next; - table->field[1]->store(elist->sub_id, true); + table->field[1]->store(cur->sub_id, true); /* domain_id is already set in table->record[0] from write_row() above. */ key_copy(key_buffer, table->record[0], &table->key_info[0], 0, false); if (table->file->ha_index_read_map(table->record[1], key_buffer, @@ -662,8 +674,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, not want to endlessly error on the same element in case of table corruption or such. */ - my_free(elist); - elist= next; + cur= next; if (err) break; } @@ -686,18 +697,31 @@ IF_DBUG(dbug_break:, ) */ if (elist) { - mysql_mutex_lock(&LOCK_slave_state); put_back_list(gtid->domain_id, elist); - mysql_mutex_unlock(&LOCK_slave_state); + elist = 0; } ha_rollback_trans(thd, FALSE); } close_thread_tables(thd); - if (in_transaction) + if (rgi) + { thd->mdl_context.release_statement_locks(); + /* + Save the list of old gtid entries we deleted. If this transaction + fails later for some reason and is rolled back, the deletion of those + entries will be rolled back as well, and we will need to put them back + on the to-be-deleted list so we can re-do the deletion. Otherwise + redundant rows in mysql.gtid_slave_pos may accumulate if transactions + are rolled back and retried after record_gtid(). + */ + rgi->pending_gtid_deletes_save(gtid->domain_id, elist); + } else + { thd->mdl_context.release_transactional_locks(); + rpl_group_info::pending_gtid_deletes_free(elist); + } } thd->lex->restore_backup_query_tables_list(&lex_backup); thd->variables.option_bits= thd_saved_option; @@ -1080,7 +1104,7 @@ rpl_slave_state::load(THD *thd, char *state_from_master, size_t len, if (gtid_parser_helper(&state_from_master, end, &gtid) || !(sub_id= next_sub_id(gtid.domain_id)) || - record_gtid(thd, &gtid, sub_id, false, in_statement) || + record_gtid(thd, &gtid, sub_id, NULL, in_statement) || update(gtid.domain_id, gtid.server_id, sub_id, gtid.seq_no, NULL)) return 1; if (state_from_master == end) diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h index 79d566bddbf..7bd639b768f 100644 --- a/sql/rpl_gtid.h +++ b/sql/rpl_gtid.h @@ -182,7 +182,7 @@ struct rpl_slave_state uint64 seq_no, rpl_group_info *rgi); int truncate_state_table(THD *thd); int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, - bool in_transaction, bool in_statement); + rpl_group_info *rgi, bool in_statement); uint64 next_sub_id(uint32 domain_id); int iterate(int (*cb)(rpl_gtid *, void *), void *data, rpl_gtid *extra_gtids, uint32 num_extra, diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 64a1b535307..b35130c1505 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1680,6 +1680,7 @@ rpl_group_info::reinit(Relay_log_info *rli) long_find_row_note_printed= false; did_mark_start_commit= false; gtid_ev_flags2= 0; + pending_gtid_delete_list= NULL; last_master_timestamp = 0; gtid_ignore_duplicate_state= GTID_DUPLICATE_NULL; speculation= SPECULATE_NO; @@ -1804,6 +1805,12 @@ void rpl_group_info::cleanup_context(THD *thd, bool error) erroneously update the GTID position. */ gtid_pending= false; + + /* + Rollback will have undone any deletions of old rows we might have made + in mysql.gtid_slave_pos. Put those rows back on the list to be deleted. + */ + pending_gtid_deletes_put_back(); } m_table_map.clear_tables(); slave_close_thread_tables(thd); @@ -2027,6 +2034,78 @@ rpl_group_info::unmark_start_commit() } +/* + When record_gtid() has deleted any old rows from the table + mysql.gtid_slave_pos as part of a replicated transaction, save the list of + rows deleted here. + + If later the transaction fails (eg. optimistic parallel replication), the + deletes will be undone when the transaction is rolled back. Then we can + put back the list of rows into the rpl_global_gtid_slave_state, so that + we can re-do the deletes and avoid accumulating old rows in the table. +*/ +void +rpl_group_info::pending_gtid_deletes_save(uint32 domain_id, + rpl_slave_state::list_element *list) +{ + /* + We should never get to a state where we try to save a new pending list of + gtid deletes while we still have an old one. But make sure we handle it + anyway just in case, so we avoid leaving stray entries in the + mysql.gtid_slave_pos table. + */ + DBUG_ASSERT(!pending_gtid_delete_list); + if (unlikely(pending_gtid_delete_list)) + pending_gtid_deletes_put_back(); + + pending_gtid_delete_list= list; + pending_gtid_delete_list_domain= domain_id; +} + + +/* + Take the list recorded by pending_gtid_deletes_save() and put it back into + rpl_global_gtid_slave_state. This is needed if deletion of the rows was + rolled back due to transaction failure. +*/ +void +rpl_group_info::pending_gtid_deletes_put_back() +{ + if (pending_gtid_delete_list) + { + rpl_global_gtid_slave_state->put_back_list(pending_gtid_delete_list_domain, + pending_gtid_delete_list); + pending_gtid_delete_list= NULL; + } +} + + +/* + Free the list recorded by pending_gtid_deletes_save(). Done when the deletes + in the list have been permanently committed. +*/ +void +rpl_group_info::pending_gtid_deletes_clear() +{ + pending_gtid_deletes_free(pending_gtid_delete_list); + pending_gtid_delete_list= NULL; +} + + +void +rpl_group_info::pending_gtid_deletes_free(rpl_slave_state::list_element *list) +{ + rpl_slave_state::list_element *next; + + while (list) + { + next= list->next; + my_free(list); + list= next; + } +} + + rpl_sql_thread_info::rpl_sql_thread_info(Rpl_filter *filter) : rpl_filter(filter) { diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 74d5b6fe416..b40a34a54e6 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -676,6 +676,11 @@ struct rpl_group_info /* Needs room for "Gtid D-S-N\x00". */ char gtid_info_buf[5+10+1+10+1+20+1]; + /* List of not yet committed deletions in mysql.gtid_slave_pos. */ + rpl_slave_state::list_element *pending_gtid_delete_list; + /* Domain associated with pending_gtid_delete_list. */ + uint32 pending_gtid_delete_list_domain; + /* The timestamp, from the master, of the commit event. Used to do delayed update of rli->last_master_timestamp, for getting @@ -817,6 +822,12 @@ struct rpl_group_info char *gtid_info(); void unmark_start_commit(); + static void pending_gtid_deletes_free(rpl_slave_state::list_element *list); + void pending_gtid_deletes_save(uint32 domain_id, + rpl_slave_state::list_element *list); + void pending_gtid_deletes_put_back(); + void pending_gtid_deletes_clear(); + time_t get_row_stmt_start_timestamp() { return row_stmt_start_timestamp;

1 0

[Commits] 7a883d4: Issue #881: Issue #809 still occurs for reverse scans on forward cfs
by psergey＠askmonty.org 08 Oct '18

08 Oct '18

revision-id: 7a883d4a5093c5e25dbc7fdfd6b8b91de6a9b853 parent(s): f06c79b76b1896324d816399efaaf711ad177dc0 committer: Sergei Petrunia branch nick: mysql-5.6-rocksdb-spetrunia timestamp: 2018-10-08 19:01:06 +0300 message: Issue #881: Issue #809 still occurs for reverse scans on forward cfs When constructing a lookup key for reverse full index scan, pass the correct eq_cond_len to setup_scan_iterator(). The code mirrors the code in ha_rocksdb::index_first_intern(). --- mysql-test/suite/rocksdb/r/bloomfilter5.result | 25 +++++++++++++++++++- mysql-test/suite/rocksdb/t/bloomfilter5-master.opt | 2 +- mysql-test/suite/rocksdb/t/bloomfilter5.test | 27 +++++++++++++++++++++- storage/rocksdb/ha_rocksdb.cc | 8 ++----- storage/rocksdb/rdb_datadic.h | 21 +++++++++++++++++ 5 files changed, 74 insertions(+), 9 deletions(-) diff --git a/mysql-test/suite/rocksdb/r/bloomfilter5.result b/mysql-test/suite/rocksdb/r/bloomfilter5.result index 058d360..4cde60d 100644 --- a/mysql-test/suite/rocksdb/r/bloomfilter5.result +++ b/mysql-test/suite/rocksdb/r/bloomfilter5.result @@ -59,4 +59,27 @@ insert into t4 values (1, 0xFFFF, 0xFFF, 12345); # This must not fail an assert: select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; pk kp1 kp2 col1 -drop table t1,t2,t3,t4; +# +# Issue #881: Issue #809 still occurs for reverse scans on forward cfs +# +create table t5 ( +id1 bigint not null, +id2 bigint not null, +id3 varchar(100) not null, +id4 int not null, +id5 int not null, +value bigint, +value2 varchar(100), +primary key (id1, id2, id3, id4) COMMENT 'bf5_1' +) engine=ROCKSDB; +insert into t5 select * from t1; +set global rocksdb_force_flush_memtable_now=1; +# An index scan starting from the end of the table: +explain +select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t5 index NULL PRIMARY 122 NULL 1 NULL +select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +id1 id2 id3 id4 id5 value value2 +1000 2000 2000 10000 10000 1000 aaabbbccc +drop table t1,t2,t3,t4,t5; diff --git a/mysql-test/suite/rocksdb/t/bloomfilter5-master.opt b/mysql-test/suite/rocksdb/t/bloomfilter5-master.opt index efcd69b..4576d20 100644 --- a/mysql-test/suite/rocksdb/t/bloomfilter5-master.opt +++ b/mysql-test/suite/rocksdb/t/bloomfilter5-master.opt @@ -1,3 +1,3 @@ --rocksdb_default_cf_options=write_buffer_size=256k;block_based_table_factory={filter_policy=bloomfilter:10:false;whole_key_filtering=0;} ---rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4}; +--rocksdb_override_cf_options=rev:bf5_1={prefix_extractor=capped:4};bf5_1={prefix_extractor=capped:4} diff --git a/mysql-test/suite/rocksdb/t/bloomfilter5.test b/mysql-test/suite/rocksdb/t/bloomfilter5.test index 00968ae..11890dc 100644 --- a/mysql-test/suite/rocksdb/t/bloomfilter5.test +++ b/mysql-test/suite/rocksdb/t/bloomfilter5.test @@ -56,6 +56,31 @@ insert into t4 values (1, 0xFFFF, 0xFFF, 12345); --echo # This must not fail an assert: select * from t4 force index(kp1) where kp1=0xFFFFFFFF and kp2<=0xFFFFFFFF order by kp2 desc; -drop table t1,t2,t3,t4; +--echo # +--echo # Issue #881: Issue #809 still occurs for reverse scans on forward cfs +--echo # + +# The same as t1 above but uses forward-ordered column family: + +create table t5 ( + id1 bigint not null, + id2 bigint not null, + id3 varchar(100) not null, + id4 int not null, + id5 int not null, + value bigint, + value2 varchar(100), + primary key (id1, id2, id3, id4) COMMENT 'bf5_1' +) engine=ROCKSDB; + +insert into t5 select * from t1; + +set global rocksdb_force_flush_memtable_now=1; + +--echo # An index scan starting from the end of the table: +explain +select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +select * from t5 order by id1 desc,id2 desc, id3 desc, id4 desc limit 1; +drop table t1,t2,t3,t4,t5; diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 2c63947..e883f1d 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -8788,11 +8788,7 @@ int ha_rocksdb::index_last_intern(uchar *const buf) { DBUG_ASSERT(key != nullptr); const Rdb_key_def &kd = *m_key_descr_arr[active_index]; - if (kd.m_is_reverse_cf) { - kd.get_infimum_key(key, &key_size); - } else { - kd.get_supremum_key(key, &key_size); - } + int key_end_matching_bytes = kd.get_last_key(key, &key_size); rocksdb::Slice index_key((const char *)key, key_size); @@ -8803,7 +8799,7 @@ int ha_rocksdb::index_last_intern(uchar *const buf) { // Loop as long as we get a deadlock error AND we end up creating the // snapshot here (i.e. it did not exist prior to this) for (;;) { - setup_scan_iterator(kd, &index_key, false, Rdb_key_def::INDEX_NUMBER_SIZE); + setup_scan_iterator(kd, &index_key, false, key_end_matching_bytes); m_scan_it->SeekForPrev(index_key); m_skip_scan_it_next_call = false; diff --git a/storage/rocksdb/rdb_datadic.h b/storage/rocksdb/rdb_datadic.h index 25a6281..2085bfa 100644 --- a/storage/rocksdb/rdb_datadic.h +++ b/storage/rocksdb/rdb_datadic.h @@ -256,6 +256,27 @@ public: return i; } + /* + The same as get_first_key, but get the key for the last entry in the index + */ + inline int get_last_key(uchar *const key, uint *const size) const { + if (m_is_reverse_cf) + get_infimum_key(key, size); + else + get_supremum_key(key, size); + + /* Find out how many bytes of infimum are the same as m_index_number */ + uchar unmodified_key[INDEX_NUMBER_SIZE]; + rdb_netbuf_store_index(unmodified_key, m_index_number); + int i; + for (i = 0; i < INDEX_NUMBER_SIZE; i++) { + if (key[i] != unmodified_key[i]) + break; + } + return i; + } + + /* Make a key that is right after the given key. */ static int successor(uchar *const packed_tuple, const uint &len);

1 0