developers
Threads by month
- ----- 2025 -----
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- 8 participants
- 6853 discussions

Re: [Maria-developers] 22fb7f8995c: Updated client and server to use new binary names in --debug traces
by Sergei Golubchik 13 Apr '20
by Sergei Golubchik 13 Apr '20
13 Apr '20
Hi, Michael!
On Apr 13, Michael Widenius wrote:
> revision-id: 22fb7f8995c (mariadb-10.5.2-122-g22fb7f8995c)
> parent(s): faf8de3aa98
> author: Michael Widenius <monty(a)mariadb.com>
> committer: Michael Widenius <monty(a)mariadb.com>
> timestamp: 2020-04-09 01:37:02 +0300
> message:
>
> Updated client and server to use new binary names in --debug traces
ok to push
Regards,
Sergei
VP of MariaDB Server Engineering
and security(a)mariadb.org
1
0

Re: [Maria-developers] 9941c6a3179: MDEV-17554 Auto-create new partition for system versioned tables with history partitioned by INTERVAL/LIMIT
by Sergei Golubchik 13 Apr '20
by Sergei Golubchik 13 Apr '20
13 Apr '20
Hi, Aleksey!
On Apr 07, Aleksey Midenkov wrote:
> revision-id: 9941c6a3179 (mariadb-10.5.2-164-g9941c6a3179)
> parent(s): 920c3c6b237
> author: Aleksey Midenkov <midenok(a)gmail.com>
> committer: Aleksey Midenkov <midenok(a)gmail.com>
> timestamp: 2020-04-06 08:05:43 +0300
> message:
>
> MDEV-17554 Auto-create new partition for system versioned tables with history partitioned by INTERVAL/LIMIT
See some comment below, please
> diff --git a/mysql-test/suite/versioning/common.inc b/mysql-test/suite/versioning/common.inc
> index 355b571e5a0..b35a5138015 100644
> --- a/mysql-test/suite/versioning/common.inc
> +++ b/mysql-test/suite/versioning/common.inc
> @@ -6,6 +6,7 @@ if (!$TEST_VERSIONING_SO)
> source include/have_innodb.inc;
>
> set @@session.time_zone='+00:00';
> +set @@global.time_zone='+00:00';
Why is that? I understand you might've needed it when a partition was added
in a separate thread, but why now?
> select ifnull(max(transaction_id), 0) into @start_trx_id from mysql.transaction_registry;
> set @test_start=now(6);
>
> diff --git a/mysql-test/suite/versioning/r/delete_history.result b/mysql-test/suite/versioning/r/delete_history.result
> index cb865a835b3..2e4a2bf9974 100644
> --- a/mysql-test/suite/versioning/r/delete_history.result
> +++ b/mysql-test/suite/versioning/r/delete_history.result
> @@ -154,3 +154,18 @@ select * from t1;
> a
> 1
> drop table t1;
> +#
> +# MDEV-17554 Auto-create new partition for system versioned tables with history partitioned by INTERVAL/LIMIT
> +#
> +# Don't auto-create new partition on DELETE HISTORY:
> +create or replace table t (a int) with system versioning
> +partition by system_time limit 1000 auto_increment;
this looks like a hack, I think we need a dedicated syntax for that.
but I couldn't think of anything good now.
ok, I see that you allow both AUTO_INCREMENT and AUTO.
May be better just to use AUTO?
> +delete history from t;
> +show create table t;
> +Table Create Table
> +t CREATE TABLE `t` (
> + `a` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME LIMIT 1000 AUTO_INCREMENT
> +PARTITIONS 2
> +drop table t;
> diff --git a/mysql-test/suite/versioning/r/partition.result b/mysql-test/suite/versioning/r/partition.result
> index a7047cbd11b..660d2c81961 100644
> --- a/mysql-test/suite/versioning/r/partition.result
> +++ b/mysql-test/suite/versioning/r/partition.result
> @@ -289,11 +291,27 @@ x
> 6
> 7
> 8
> -## rotation by INTERVAL
> +# Auto-create history partitions
> +create or replace table t1 (x int) with system versioning
> +partition by system_time limit 1000 auto_increment;
> +lock tables t1 write;
> +insert into t1 values (1);
> +update t1 set x= x + 1;
> +unlock tables;
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME LIMIT 1000 AUTO_INCREMENT
> +PARTITIONS 3
this test needs a comment, I don't understand what's happening here.
why lock tables?
> +#
> +# Rotation by INTERVAL
> +#
> create or replace table t1 (x int)
> with system versioning
> partition by system_time interval 0 second partitions 3;
> -ERROR HY000: Wrong parameters for partitioned `t1`: wrong value for 'INTERVAL'
> +ERROR HY000: Wrong parameters for partitioned `t1`: wrong value for INTERVAL
> create table t1 (i int) with system versioning
> partition by system_time interval 6 day limit 98;
> ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'limit 98' at line 2
> @@ -302,7 +320,7 @@ partition by system_time interval 10 year partitions 3;
> ERROR 22003: TIMESTAMP value is out of range in 'INTERVAL'
> # INTERVAL and ALTER TABLE
> create or replace table t1 (i int) with system versioning
> -partition by system_time interval 1 hour;
> +partition by system_time interval 59 minute;
why?
> set @ts=(select partition_description from information_schema.partitions
> where table_schema='test' and table_name='t1' and partition_name='p0');
> alter table t1 add column b int;
> @@ -353,28 +371,51 @@ Warning 4114 Versioned table `test`.`t1`: last HISTORY partition (`p1`) is out o
> delete from t1;
> Warnings:
> Warning 4114 Versioned table `test`.`t1`: last HISTORY partition (`p1`) is out of INTERVAL, need more HISTORY partitions
> -select subpartition_name,partition_description,table_rows from information_schema.partitions where table_schema='test' and table_name='t1';
> +select subpartition_name,partition_description from information_schema.partitions where table_schema='test' and table_name='t1';
why?
> -subpartition_name partition_description table_rows
> -p1sp0 2001-02-04 00:00:00 1
> -p1sp1 2001-02-04 00:00:00 1
> -pnsp0 CURRENT 0
> -pnsp1 CURRENT 0
> +subpartition_name partition_description
> +p1sp0 2001-02-04 00:00:00
> +p1sp1 2001-02-04 00:00:00
> +pnsp0 CURRENT
> +pnsp1 CURRENT
> +select * from t1 partition (p1);
> +i
> +1
> +2
> set timestamp=unix_timestamp('2001-02-04 10:20:55');
> alter table t1 add partition (partition p0 history, partition p2 history);
> set timestamp=unix_timestamp('2001-02-04 10:30:00');
> insert t1 values (4),(5);
> set timestamp=unix_timestamp('2001-02-04 10:30:10');
> update t1 set i=6 where i=5;
> -select subpartition_name,partition_description,table_rows from information_schema.partitions where table_schema='test' and table_name='t1';
> -subpartition_name partition_description table_rows
> -p1sp0 2001-02-04 00:00:00 1
> -p1sp1 2001-02-04 00:00:00 0
> -p0sp0 2001-02-05 00:00:00 1
> -p0sp1 2001-02-05 00:00:00 1
> -p2sp0 2001-02-06 00:00:00 0
> -p2sp1 2001-02-06 00:00:00 0
> -pnsp0 CURRENT 0
> -pnsp1 CURRENT 2
> +select subpartition_name, partition_description from information_schema.partitions where table_schema='test' and table_name='t1';
> +subpartition_name partition_description
> +p1sp0 2001-02-04 00:00:00
> +p1sp1 2001-02-04 00:00:00
> +p0sp0 2001-02-05 00:00:00
> +p0sp1 2001-02-05 00:00:00
> +p2sp0 2001-02-06 00:00:00
> +p2sp1 2001-02-06 00:00:00
> +pnsp0 CURRENT
> +pnsp1 CURRENT
> +select * from t1 partition (p1);
> +i
> +1
> +select * from t1 partition (p0);
> +i
> +5
> +2
> +select * from t1 partition (p2);
> +i
> +alter table t1 rebuild partition p0, p1, p2;
> +select * from t1 partition (p1);
> +i
> +1
> +select * from t1 partition (p0);
> +i
> +5
> +2
> +select * from t1 partition (p2);
> +i
> ## pruning check
> set @ts=(select partition_description from information_schema.partitions
> where table_schema='test' and table_name='t1' and partition_name='p0' limit 1);
> @@ -1044,3 +1085,568 @@ t1 CREATE TABLE `t1` (
> PARTITION BY SYSTEM_TIME
> PARTITIONS 8
> drop tables t1;
> +#
> +# MDEV-17554 Auto-create new partition for system versioned tables with history partitioned by INTERVAL/LIMIT
> +#
> +create or replace table t1 (x int) with system versioning
> +partition by system_time limit 999 auto_increment;
> +ERROR HY000: Wrong parameters for partitioned `t1`: wrong value for LIMIT (< 1000)
> +create or replace table t1 (x int) with system versioning
> +partition by system_time interval 3599 second auto_increment;
> +ERROR HY000: Wrong parameters for partitioned `t1`: wrong value for INTERVAL (< 1 HOUR)
no arbitrary limitations, please
> +create or replace table t1 (x int) with system versioning
> +partition by system_time limit 1000 auto_increment;
> +affected rows: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME LIMIT 1000 AUTO_INCREMENT
> +PARTITIONS 2
> +affected rows: 1
> +insert into t1 values (1);
> +affected rows: 1
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME LIMIT 1000 AUTO_INCREMENT
> +PARTITIONS 3
> +affected rows: 1
> +# Increment from 2 to 5
> +create or replace table t1 (x int) with system versioning
> +partition by system_time interval 3600 second
> +starts '2000-01-01 00:00:00' auto_increment;
> +affected rows: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 3600 SECOND STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 2
> +affected rows: 1
> +set timestamp= unix_timestamp('2000-01-01 00:00:00');
> +affected rows: 0
> +insert into t1 values (1);
> +affected rows: 1
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 3600 SECOND STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 3
> +affected rows: 1
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +affected rows: 0
> +update t1 set x= x + 1;
> +affected rows: 1
> +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 3600 SECOND STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 4
> +affected rows: 1
> +set timestamp= unix_timestamp('2000-01-01 02:00:00');
> +affected rows: 0
> +update t1 set x= x + 2;
> +affected rows: 1
> +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 3600 SECOND STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 5
> +affected rows: 1
> +# Increment from 3 to 6, manual names, LOCK TABLES
again, I think this is overcomplication and overengineering.
I don't see any need for mixing automatic and manual partition names.
> +set timestamp= unix_timestamp('2000-01-01 00:00:00');
> +affected rows: 0
> +create or replace table t1 (x int) with system versioning
> +partition by system_time interval 1 hour auto_increment (
> +partition p1 history,
> +partition p3 history,
> +partition pn current);
> +affected rows: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +(PARTITION `p1` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p3` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `pn` CURRENT ENGINE = DEFAULT_ENGINE)
> +affected rows: 1
> +insert into t1 values (1);
> +affected rows: 1
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +(PARTITION `p1` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p3` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `pn` CURRENT ENGINE = DEFAULT_ENGINE)
> +affected rows: 1
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +affected rows: 0
> +update t1 set x= x + 3;
> +affected rows: 1
> +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +(PARTITION `p1` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p3` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p2` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `pn` CURRENT ENGINE = DEFAULT_ENGINE)
> +affected rows: 1
> +set timestamp= unix_timestamp('2000-01-01 02:00:00');
> +affected rows: 0
> +update t1 set x= x + 4;
> +affected rows: 1
> +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +(PARTITION `p1` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p3` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p2` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p4` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `pn` CURRENT ENGINE = DEFAULT_ENGINE)
> +affected rows: 1
> +lock tables t1 write;
> +affected rows: 0
> +set timestamp= unix_timestamp('2000-01-01 03:00:00');
> +affected rows: 0
> +update t1 set x= x + 5;
> +affected rows: 1
> +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +(PARTITION `p1` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p3` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p2` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p4` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p5` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `pn` CURRENT ENGINE = DEFAULT_ENGINE)
> +affected rows: 1
> +unlock tables;
> +affected rows: 0
> +# Test VIEW, LOCK TABLES
> +set timestamp= unix_timestamp('2000-01-01 00:00:00');
> +affected rows: 0
> +create or replace table t1 (x int) with system versioning
> +partition by system_time interval 1 hour auto_increment;
> +affected rows: 0
> +create or replace view v1 as select * from t1;
> +affected rows: 0
> +insert into v1 values (1);
> +affected rows: 1
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 3
> +affected rows: 1
> +lock tables t1 write;
> +affected rows: 0
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +affected rows: 0
> +update t1 set x= x + 3;
> +affected rows: 1
> +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 4
> +affected rows: 1
> +unlock tables;
> +affected rows: 0
> +drop view v1;
> +affected rows: 0
> +drop tables t1;
> +affected rows: 0
> +# Multiple increments in single command
> +set timestamp= unix_timestamp('2000-01-01 00:00:00');
> +affected rows: 0
> +create or replace table t1 (x int) with system versioning
> +partition by system_time interval 1 hour auto_increment partitions 3;
> +affected rows: 0
> +create or replace table t2 (y int) with system versioning
> +partition by system_time interval 1 hour auto_increment partitions 4;
> +affected rows: 0
> +insert into t1 values (1);
> +affected rows: 1
> +insert into t2 values (2);
> +affected rows: 1
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +affected rows: 0
> +update t1, t2 set x= x + 1, y= y + 1;
> +affected rows: 2
> +info: Rows matched: 2 Changed: 2 Warnings: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 4
> +affected rows: 1
> +show create table t2;
> +Table Create Table
> +t2 CREATE TABLE `t2` (
> + `y` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 4
> +affected rows: 1
> +set timestamp= unix_timestamp('2000-01-01 02:00:00');
> +affected rows: 0
> +update t1, t2 set x= x + 1, y= y + 1;
> +affected rows: 2
> +info: Rows matched: 2 Changed: 2 Warnings: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 5
> +affected rows: 1
> +show create table t2;
> +Table Create Table
> +t2 CREATE TABLE `t2` (
> + `y` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 5
> +affected rows: 1
> +drop tables t1, t2;
> +affected rows: 0
> +# PS, SP, LOCK TABLES
> +set timestamp= unix_timestamp('2000-01-01 00:00:00');
> +affected rows: 0
> +create or replace table t1 (x int) with system versioning
> +partition by system_time interval 1 hour auto_increment;
> +affected rows: 0
> +execute immediate 'insert into t1 values (1)';
> +affected rows: 1
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 3
> +affected rows: 1
> +prepare s from 'update t1 set x= x + 6';
> +affected rows: 0
> +info: Statement prepared
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +affected rows: 0
> +execute s;
> +affected rows: 1
> +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0
> +execute s;
> +affected rows: 1
> +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 4
> +affected rows: 1
> +lock tables t1 write;
> +affected rows: 0
> +set timestamp= unix_timestamp('2000-01-01 02:00:00');
> +affected rows: 0
> +execute s;
> +affected rows: 1
> +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0
> +execute s;
> +affected rows: 1
> +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 5
add a test where timestamp is incremented by, say, 24 hours, please
> +affected rows: 1
> +unlock tables;
> +affected rows: 0
> +drop prepare s;
> +affected rows: 0
> +create procedure sp() update t1 set x= x + 7;
> +affected rows: 0
> +set timestamp= unix_timestamp('2000-01-01 03:00:00');
> +affected rows: 0
> +call sp;
> +affected rows: 1
> +call sp;
> +affected rows: 1
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 6
> +affected rows: 1
> +lock tables t1 write;
> +affected rows: 0
> +set timestamp= unix_timestamp('2000-01-01 04:00:00');
> +affected rows: 0
> +call sp;
> +affected rows: 1
> +call sp;
> +affected rows: 1
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 7
> +affected rows: 1
> +unlock tables;
> +affected rows: 0
> +drop procedure sp;
> +affected rows: 0
> +# Complex table
> +set timestamp= unix_timestamp('2000-01-01 00:00:00');
> +affected rows: 0
> +create or replace table t1 (
> +x int primary key auto_increment,
> +t timestamp(6) default '2001-11-11 11:11:11',
> +b blob(4096) compressed null,
> +c varchar(1033) character set utf8 not null,
> +u int unique,
> +m enum('a', 'b', 'c') not null default 'a' comment 'absolute',
> +i1 tinyint, i2 smallint, i3 bigint,
> +index three(i1, i2, i3),
> +v1 timestamp(6) generated always as (t + interval 1 day),
> +v2 timestamp(6) generated always as (t + interval 1 month) stored,
> +s timestamp(6) as row start,
> +e timestamp(6) as row end,
> +period for system_time (s, e),
> +ps date, pe date,
> +period for app_time (ps, pe),
> +constraint check_constr check (u > -1))
> +with system versioning default charset=ucs2
> +partition by system_time interval 1 hour auto_increment (
> +partition p2 history,
> +partition pn current);
> +affected rows: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) NOT NULL AUTO_INCREMENT,
> + `t` timestamp(6) NOT NULL DEFAULT '2001-11-11 11:11:11.000000',
> + `b` blob /*!100301 COMPRESSED*/ DEFAULT NULL,
> + `c` varchar(1033) CHARACTER SET utf8 NOT NULL,
> + `u` int(11) DEFAULT NULL,
> + `m` enum('a','b','c') NOT NULL DEFAULT 'a' COMMENT 'absolute',
> + `i1` tinyint(4) DEFAULT NULL,
> + `i2` smallint(6) DEFAULT NULL,
> + `i3` bigint(20) DEFAULT NULL,
> + `v1` timestamp(6) GENERATED ALWAYS AS (`t` + interval 1 day) VIRTUAL,
> + `v2` timestamp(6) GENERATED ALWAYS AS (`t` + interval 1 month) STORED,
> + `s` timestamp(6) GENERATED ALWAYS AS ROW START,
> + `e` timestamp(6) GENERATED ALWAYS AS ROW END,
> + `ps` date NOT NULL,
> + `pe` date NOT NULL,
> + PRIMARY KEY (`x`,`e`),
> + UNIQUE KEY `u` (`u`,`e`),
> + KEY `three` (`i1`,`i2`,`i3`),
> + PERIOD FOR SYSTEM_TIME (`s`, `e`),
> + PERIOD FOR `app_time` (`ps`, `pe`),
> + CONSTRAINT `check_constr` CHECK (`u` > -1)
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=ucs2 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +(PARTITION `p2` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `pn` CURRENT ENGINE = DEFAULT_ENGINE)
> +affected rows: 1
> +insert into t1 (x, c, u, i1, i2, i3, ps, pe)
> +values (1, 'cc', 0, 1, 2, 3, '1999-01-01', '2000-01-01');
> +affected rows: 1
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) NOT NULL AUTO_INCREMENT,
> + `t` timestamp(6) NOT NULL DEFAULT '2001-11-11 11:11:11.000000',
> + `b` blob /*!100301 COMPRESSED*/ DEFAULT NULL,
> + `c` varchar(1033) CHARACTER SET utf8 NOT NULL,
> + `u` int(11) DEFAULT NULL,
> + `m` enum('a','b','c') NOT NULL DEFAULT 'a' COMMENT 'absolute',
> + `i1` tinyint(4) DEFAULT NULL,
> + `i2` smallint(6) DEFAULT NULL,
> + `i3` bigint(20) DEFAULT NULL,
> + `v1` timestamp(6) GENERATED ALWAYS AS (`t` + interval 1 day) VIRTUAL,
> + `v2` timestamp(6) GENERATED ALWAYS AS (`t` + interval 1 month) STORED,
> + `s` timestamp(6) GENERATED ALWAYS AS ROW START,
> + `e` timestamp(6) GENERATED ALWAYS AS ROW END,
> + `ps` date NOT NULL,
> + `pe` date NOT NULL,
> + PRIMARY KEY (`x`,`e`),
> + UNIQUE KEY `u` (`u`,`e`),
> + KEY `three` (`i1`,`i2`,`i3`),
> + PERIOD FOR SYSTEM_TIME (`s`, `e`),
> + PERIOD FOR `app_time` (`ps`, `pe`),
> + CONSTRAINT `check_constr` CHECK (`u` > -1)
> +) ENGINE=DEFAULT_ENGINE AUTO_INCREMENT=2 DEFAULT CHARSET=ucs2 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +(PARTITION `p2` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p1` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `pn` CURRENT ENGINE = DEFAULT_ENGINE)
> +affected rows: 1
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +affected rows: 0
> +update t1 set x= x + 8;
> +affected rows: 1
> +info: Rows matched: 1 Changed: 1 Inserted: 1 Warnings: 0
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) NOT NULL AUTO_INCREMENT,
> + `t` timestamp(6) NOT NULL DEFAULT '2001-11-11 11:11:11.000000',
> + `b` blob /*!100301 COMPRESSED*/ DEFAULT NULL,
> + `c` varchar(1033) CHARACTER SET utf8 NOT NULL,
> + `u` int(11) DEFAULT NULL,
> + `m` enum('a','b','c') NOT NULL DEFAULT 'a' COMMENT 'absolute',
> + `i1` tinyint(4) DEFAULT NULL,
> + `i2` smallint(6) DEFAULT NULL,
> + `i3` bigint(20) DEFAULT NULL,
> + `v1` timestamp(6) GENERATED ALWAYS AS (`t` + interval 1 day) VIRTUAL,
> + `v2` timestamp(6) GENERATED ALWAYS AS (`t` + interval 1 month) STORED,
> + `s` timestamp(6) GENERATED ALWAYS AS ROW START,
> + `e` timestamp(6) GENERATED ALWAYS AS ROW END,
> + `ps` date NOT NULL,
> + `pe` date NOT NULL,
> + PRIMARY KEY (`x`,`e`),
> + UNIQUE KEY `u` (`u`,`e`),
> + KEY `three` (`i1`,`i2`,`i3`),
> + PERIOD FOR SYSTEM_TIME (`s`, `e`),
> + PERIOD FOR `app_time` (`ps`, `pe`),
> + CONSTRAINT `check_constr` CHECK (`u` > -1)
> +) ENGINE=DEFAULT_ENGINE AUTO_INCREMENT=10 DEFAULT CHARSET=ucs2 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +(PARTITION `p2` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p1` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `p3` HISTORY ENGINE = DEFAULT_ENGINE,
> + PARTITION `pn` CURRENT ENGINE = DEFAULT_ENGINE)
> +affected rows: 1
> +# Concurrent DML
> +set timestamp= unix_timestamp('2000-01-01 00:00:00');
> +create or replace table t1 (x int) with system versioning
> +partition by system_time interval 1 hour auto_increment;
> +insert into t1 values (1);
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 3
> +connect con8, localhost, root;
> +connect con7, localhost, root;
> +connect con6, localhost, root;
> +connect con5, localhost, root;
> +connect con4, localhost, root;
> +connect con3, localhost, root;
> +connect con2, localhost, root;
> +connect con1, localhost, root;
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +update t1 set x= x + 10;
> +connection con2;
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +update t1 set x= x + 20;
> +connection con3;
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +update t1 set x= x + 30;
> +connection con4;
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +update t1 set x= x + 40;
> +connection con5;
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +update t1 set x= x + 50;
> +connection con6;
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +update t1 set x= x + 60;
> +connection con7;
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +update t1 set x= x + 70;
> +connection con8;
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +update t1 set x= x + 80;
> +connection con1;
> +disconnect con1;
> +connection con2;
> +disconnect con2;
> +connection con3;
> +disconnect con3;
> +connection con4;
> +disconnect con4;
> +connection con5;
> +disconnect con5;
> +connection con6;
> +disconnect con6;
> +connection con7;
> +disconnect con7;
> +disconnect con8;
> +connection default;
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 4
> +drop tables t1;
> +create or replace table t1 (x int) with system versioning engine innodb
> +partition by system_time interval 1 hour auto;
> +start transaction;
> +select * from t1;
> +x
> +connect con1, localhost, root;
> +set lock_wait_timeout= 1;
> +insert into t1 values (1);
> +Warnings:
> +Warning 4114 Versioned table `test`.`t1`: last HISTORY partition (`p0`) is out of INTERVAL, need more HISTORY partitions
> +Error 1205 Lock wait timeout exceeded; try restarting transaction
> +Warning 4171 Auto-increment history partition: alter partition table failed
> +Warning 4171 Versioned table `test`.`t1`: adding HISTORY partition failed with error 0, see error log for details
"error 0" is strange and "see error log for details" isn't very
user-friendly, a user might not have access to the error log at all
> +select * from t1;
> +x
> +1
I don't understand, there was an error above, why did insert succeed?
> +disconnect con1;
> +connection default;
> +drop table t1;
> diff --git a/mysql-test/suite/versioning/r/rpl.result b/mysql-test/suite/versioning/r/rpl.result
> index 627f3991499..68113190889 100644
> --- a/mysql-test/suite/versioning/r/rpl.result
> +++ b/mysql-test/suite/versioning/r/rpl.result
> @@ -164,4 +164,65 @@ update t1 set i = 0;
> connection slave;
> connection master;
> drop table t1;
> +#
> +# MDEV-17554 Auto-create new partition for system versioned tables with history partitioned by INTERVAL/LIMIT
> +#
> +set timestamp= unix_timestamp('2000-01-01 00:00:00');
> +create or replace table t1 (x int) with system versioning
> +partition by system_time interval 1 hour auto_increment;
> +insert t1 values ();
> +set timestamp= unix_timestamp('2000-01-01 01:00:00');
> +delete from t1;
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 4
> +connection slave;
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO_INCREMENT
> +PARTITIONS 4
> +connection master;
> +drop table t1;
> +#
> +# MENT-685 DML events for auto-partitioned tables are written into binary log twice
> +#
the test below doesn't seem to match the description above
> +create table t1 (x int) partition by hash (x);
> +alter table t1 add partition partitions 1 auto_increment;
> +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use
> +drop table t1;
> +create table t1 (x int) with system versioning
> +partition by system_time limit 1000 auto
> +(partition p1 history, partition pn current);
> +insert into t1 values (1);
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME LIMIT 1000 AUTO_INCREMENT
> +(PARTITION `p1` HISTORY ENGINE = ENGINE,
> + PARTITION `p2` HISTORY ENGINE = ENGINE,
> + PARTITION `pn` CURRENT ENGINE = ENGINE)
> +connection slave;
> +show create table t1;
> +Table Create Table
> +t1 CREATE TABLE `t1` (
> + `x` int(11) DEFAULT NULL
> +) ENGINE=ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING
> + PARTITION BY SYSTEM_TIME LIMIT 1000 AUTO_INCREMENT
> +(PARTITION `p1` HISTORY ENGINE = ENGINE,
> + PARTITION `p2` HISTORY ENGINE = ENGINE,
> + PARTITION `pn` CURRENT ENGINE = ENGINE)
> +select * from t1;
> +x
> +1
> +connection master;
> +drop table t1;
> include/rpl_end.inc
> diff --git a/sql/handler.cc b/sql/handler.cc
> index c6ecc9566d8..c86f96b9689 100644
> --- a/sql/handler.cc
> +++ b/sql/handler.cc
> @@ -1512,7 +1512,7 @@ int ha_commit_trans(THD *thd, bool all)
> DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
> trans == &thd->transaction.stmt);
>
> - if (thd->in_sub_stmt)
> + if (thd->in_sub_stmt & ~SUB_STMT_AUTO_HIST)
1. why? That is, why do you call ha_commit_trans() when adding a partition?
2. please add a test with insert under start transaction.
what should happen in this case?
> {
> DBUG_ASSERT(0);
> /*
> diff --git a/sql/partition_info.cc b/sql/partition_info.cc
> index f4b7260f8b0..617c839721b 100644
> --- a/sql/partition_info.cc
> +++ b/sql/partition_info.cc
> @@ -848,29 +850,289 @@ void partition_info::vers_set_hist_part(THD *thd)
> else
> vers_info->hist_part= next;
> }
> + }
> + else if (vers_info->interval.is_set())
> + {
> + if (vers_info->hist_part->range_value <= thd->query_start())
> + {
> + partition_element *next= NULL;
> + bool error= true;
> + List_iterator<partition_element> it(partitions);
> + while (next != vers_info->hist_part)
> + next= it++;
> +
> + while ((next= it++) != vers_info->now_part)
> + {
> + vers_info->hist_part= next;
> + if (next->range_value > thd->query_start())
> + {
> + error= false;
> + break;
> + }
> + }
> + if (error)
> + my_error(WARN_VERS_PART_FULL, MYF(ME_WARNING|ME_ERROR_LOG),
> + table->s->db.str, table->s->table_name.str,
> + vers_info->hist_part->partition_name, "INTERVAL");
> + }
> + }
> +
> + if (!vers_info->auto_inc ||
> + vers_info->hist_part->id + VERS_MIN_EMPTY < vers_info->now_part->id)
> return;
> +
> + switch (thd->lex->sql_command)
> + {
> + case SQLCOM_DELETE:
> + if (thd->lex->last_table()->vers_conditions.delete_history)
> + break;
> + /* fallthrough */
> + case SQLCOM_UPDATE:
> + case SQLCOM_INSERT:
> + case SQLCOM_INSERT_SELECT:
> + case SQLCOM_LOAD:
> + case SQLCOM_REPLACE:
> + case SQLCOM_REPLACE_SELECT:
> + case SQLCOM_DELETE_MULTI:
> + case SQLCOM_UPDATE_MULTI:
it's rather fragile to check for specific sql statements.
why not to look at the table lock instead?
(with a special check for delete history)
Ok, it's a bug. Please add a test with multi-update,
where a partitioned table is *not* updated. Like
update t1, tpart set t1.x=5 where t1.y=tpart.z;
here a new partition should clearly not be created.
also, a simpler example (multi-update is difficult):
insert t1 select * from tpart;
add both, please.
> + {
> + TABLE_SHARE *share;
> + List_iterator_fast<TABLE_SHARE> it(thd->vers_auto_part_tables);
> + while ((share= it++))
> + {
> + if (table->s == share)
> + break;
> + }
> + if (share)
> + break;
> + /* Prevent spawning multiple instances of vers_add_auto_parts() */
> + bool altering;
> + mysql_mutex_lock(&table->s->LOCK_share);
> + altering= table->s->vers_auto_part;
> + if (!altering)
> + table->s->vers_auto_part= true;
> + mysql_mutex_unlock(&table->s->LOCK_share);
> + if (altering)
> + break;
what happens if you're altering already?
logically this thread should wait. Where does it do it?
> + if (thd->vers_auto_part_tables.push_back(table->s))
> + {
> + my_error(ER_OUT_OF_RESOURCES, MYF(0));
> + }
> + }
> + default:;
> }
> +}
>
> - if (vers_info->interval.is_set())
> - {
> - if (vers_info->hist_part->range_value > thd->query_start())
> - return;
>
> - partition_element *next= NULL;
> - List_iterator<partition_element> it(partitions);
> - while (next != vers_info->hist_part)
> - next= it++;
> +/**
> + @brief Run fast_alter_partition_table() to add new history partitions
> + for tables requiring them.
> +*/
> +void vers_add_auto_parts(THD *thd)
> +{
> + HA_CREATE_INFO create_info;
> + Alter_info alter_info;
> + String query;
> + TABLE_LIST *table_list= NULL;
> + partition_info *save_part_info= thd->work_part_info;
> + Query_tables_list save_query_tables;
> + Reprepare_observer *save_reprepare_observer= thd->m_reprepare_observer;
> + Diagnostics_area new_stmt_da(thd->query_id, false, true);
> + Diagnostics_area *save_stmt_da= thd->get_stmt_da();
> + bool save_no_write_to_binlog= thd->lex->no_write_to_binlog;
> + const CSET_STRING save_query= thd->query_string;
> + thd->m_reprepare_observer= NULL;
> + thd->lex->reset_n_backup_query_tables_list(&save_query_tables);
> + thd->in_sub_stmt|= SUB_STMT_AUTO_HIST;
> + thd->lex->no_write_to_binlog= !thd->is_current_stmt_binlog_format_row();
> + TABLE_LIST *tl;
> +
> + DBUG_ASSERT(!thd->vers_auto_part_tables.is_empty());
> +
> + for (TABLE_SHARE &share: thd->vers_auto_part_tables)
> + {
> + tl= (TABLE_LIST *) thd->alloc(sizeof(TABLE_LIST));
> + tl->init_one_table(&share.db, &share.table_name, NULL, TL_READ_NO_INSERT);
> + tl->open_type= OT_BASE_ONLY;
> + tl->i_s_requested_object= OPEN_TABLE_ONLY;
> + tl->next_global= table_list;
> + table_list= tl;
> + }
> +
> + /* NB: mysql_execute_command() can be recursive because of PS/SP.
> + Don't duplicate any processing including error messages. */
> + thd->vers_auto_part_tables.empty();
> +
> + DBUG_ASSERT(!thd->is_error());
> + /* NB: we have to preserve m_affected_rows, m_row_count_func, m_last_insert_id, etc */
> + thd->set_stmt_da(&new_stmt_da);
> + new_stmt_da.set_overwrite_status(true);
> +
> + DDL_options_st ddl_opts_none;
> + ddl_opts_none.init();
> + if (open_and_lock_tables(thd, ddl_opts_none, table_list, false, 0))
> + goto open_err;
> +
> + for (tl= table_list; tl; tl= tl->next_global)
> + {
> + TABLE *table= tl->table;
> + DBUG_ASSERT(table);
> + DBUG_ASSERT(table->s->get_table_ref_type() == TABLE_REF_BASE_TABLE);
> + DBUG_ASSERT(table->versioned());
> + DBUG_ASSERT(table->part_info);
> + DBUG_ASSERT(table->part_info->vers_info);
> + alter_info.reset();
> + alter_info.partition_flags= ALTER_PARTITION_ADD|ALTER_PARTITION_AUTO_HIST;
> + create_info.init();
> + create_info.alter_info= &alter_info;
> + Alter_table_ctx alter_ctx(thd, tl, 1, &table->s->db, &table->s->table_name);
> +
> + if (thd->mdl_context.upgrade_shared_lock(table->mdl_ticket,
> + MDL_SHARED_NO_WRITE,
> + thd->variables.lock_wait_timeout))
> + goto exit;
> +
> + create_info.db_type= table->s->db_type();
> + create_info.options|= HA_VERSIONED_TABLE;
> + DBUG_ASSERT(create_info.db_type);
> +
> + create_info.vers_info.set_start(table->s->vers_start_field()->field_name);
> + create_info.vers_info.set_end(table->s->vers_end_field()->field_name);
> +
> + partition_info *part_info= new partition_info();
> + if (unlikely(!part_info))
> + {
> + my_error(ER_OUT_OF_RESOURCES, MYF(0));
> + goto exit;
> + }
> + part_info->use_default_num_partitions= false;
> + part_info->use_default_num_subpartitions= false;
> + part_info->num_parts= 1;
> + part_info->num_subparts= table->part_info->num_subparts;
> + part_info->subpart_type= table->part_info->subpart_type;
> + if (unlikely(part_info->vers_init_info(thd)))
> + {
> + my_error(ER_OUT_OF_RESOURCES, MYF(0));
> + goto exit;
> + }
> + /* Choose first non-occupied name suffix */
> + uint32 suffix= table->part_info->num_parts - 1;
> + DBUG_ASSERT(suffix > 0);
> + char part_name[MAX_PART_NAME_SIZE + 1];
> + if (make_partition_name(part_name, suffix))
> + {
> +vers_make_name_err:
> + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
> + WARN_VERS_HIST_PART_ERROR,
> + "Auto-increment history partition: "
> + "name generation failed for suffix %d", suffix);
> + my_error(WARN_VERS_HIST_PART_ERROR, MYF(ME_WARNING),
> + table->s->db.str, table->s->table_name.str, 0);
> + goto exit;
> + }
> + List_iterator_fast<partition_element> it(table->part_info->partitions);
> + partition_element *el;
> + while ((el= it++))
> + {
> + if (0 == my_strcasecmp(&my_charset_latin1, el->partition_name, part_name))
> + {
> + if (make_partition_name(part_name, ++suffix))
> + goto vers_make_name_err;
> + it.rewind();
> + }
> + }
>
> - while ((next= it++) != vers_info->now_part)
> + // NB: set_ok_status() requires DA_EMPTY
> + thd->get_stmt_da()->reset_diagnostics_area();
> +
> + thd->work_part_info= part_info;
> + if (part_info->set_up_defaults_for_partitioning(thd, table->file,
> + NULL, suffix + 1))
> {
> - vers_info->hist_part= next;
> - if (next->range_value > thd->query_start())
> - return;
> + push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
> + WARN_VERS_HIST_PART_ERROR,
> + "Auto-increment history partition: "
> + "setting up defaults failed");
> + goto exit;
> + }
> + bool partition_changed= false;
> + bool fast_alter_partition= false;
> + if (prep_alter_part_table(thd, table, &alter_info, &create_info,
> + &partition_changed, &fast_alter_partition))
> + {
> + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, WARN_VERS_HIST_PART_ERROR,
> + "Auto-increment history partition: "
> + "alter partitition prepare failed");
> + goto exit;
> + }
> + if (!fast_alter_partition)
> + {
> + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, WARN_VERS_HIST_PART_ERROR,
> + "Auto-increment history partition: "
> + "fast alter partitition is not possible");
> + my_error(WARN_VERS_HIST_PART_ERROR, MYF(ME_WARNING),
> + table->s->db.str, table->s->table_name.str, 0);
> + goto exit;
> + }
> + DBUG_ASSERT(partition_changed);
> + if (mysql_prepare_alter_table(thd, table, &create_info, &alter_info,
> + &alter_ctx))
> + {
> + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, WARN_VERS_HIST_PART_ERROR,
> + "Auto-increment history partition: "
> + "alter prepare failed");
> + goto exit;
> + }
> +
> + // Forge query string for rpl logging
> + if (!thd->lex->no_write_to_binlog)
> + {
> + query.set(STRING_WITH_LEN("ALTER TABLE `"), &my_charset_latin1);
> +
> + if (query.append(table->s->db) ||
> + query.append(STRING_WITH_LEN("`.`")) ||
> + query.append(table->s->table_name) ||
> + query.append("` ADD PARTITION (PARTITION `") ||
> + query.append(part_name) ||
> + query.append("` HISTORY) AUTO_INCREMENT"))
> + {
> + my_error(ER_OUT_OF_RESOURCES, MYF(ME_ERROR_LOG));
> + goto exit;
> + }
> + CSET_STRING qs(query.c_ptr(), query.length(), &my_charset_latin1);
> + thd->set_query(qs);
> + }
> +
> + if (fast_alter_partition_table(thd, table, &alter_info, &create_info,
> + tl, &table->s->db, &table->s->table_name))
> + {
> + push_warning(thd, Sql_condition::WARN_LEVEL_WARN, WARN_VERS_HIST_PART_ERROR,
> + "Auto-increment history partition: "
> + "alter partition table failed");
> + my_error(WARN_VERS_HIST_PART_ERROR, MYF(ME_WARNING),
> + tl->db.str, tl->table_name.str, 0);
> }
> - my_error(WARN_VERS_PART_FULL, MYF(ME_WARNING|ME_ERROR_LOG),
> - table->s->db.str, table->s->table_name.str,
> - vers_info->hist_part->partition_name, "INTERVAL");
> }
> +
> + if (!thd->transaction.stmt.is_empty())
> + trans_commit_stmt(thd);
> +
> +exit:
> + // If we failed with error allow non-processed tables to be processed next time
> + if (tl)
> + while ((tl= tl->next_global))
> + tl->table->s->vers_auto_part= false;
> + close_thread_tables(thd);
> +open_err:
> + thd->work_part_info= save_part_info;
> + thd->m_reprepare_observer= save_reprepare_observer;
> + thd->lex->restore_backup_query_tables_list(&save_query_tables);
> + thd->in_sub_stmt&= ~SUB_STMT_AUTO_HIST;
> + if (!new_stmt_da.is_warning_info_empty())
> + save_stmt_da->copy_sql_conditions_from_wi(thd, new_stmt_da.get_warning_info());
> + thd->set_stmt_da(save_stmt_da);
> + thd->lex->no_write_to_binlog= save_no_write_to_binlog;
> + thd->set_query(save_query);
> }
>
>
> diff --git a/sql/partition_info.h b/sql/partition_info.h
> index eb8e53a381a..d02eecea073 100644
> --- a/sql/partition_info.h
> +++ b/sql/partition_info.h
> @@ -34,10 +34,19 @@ typedef bool (*check_constants_func)(THD *thd, partition_info *part_info);
>
> struct st_ddl_log_memory_entry;
>
> +
> +/* Auto-create history partition configuration */
> +static const uint VERS_MIN_EMPTY= 1;
No, this doesn't work. One can easily set @@timestamp to do a multi-hour jump and no
fixed value of VERS_MIN_EMPTY will help. You need to add partitions before the statement, as
I wrote in my previous review.
> +static const uint VERS_MIN_INTERVAL= 3600; // seconds
> +static const uint VERS_MIN_LIMIT= 1000;
> +static const uint VERS_ERROR_TIMEOUT= 300; // seconds
> +
> +
> struct Vers_part_info : public Sql_alloc
> {
> Vers_part_info() :
> limit(0),
> + auto_inc(false),
> now_part(NULL),
> hist_part(NULL)
> {
> diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
> index 7cc1faea79b..4be3342e78e 100644
> --- a/sql/sql_yacc.yy
> +++ b/sql/sql_yacc.yy
> @@ -7521,14 +7533,19 @@ add_partition_rule:
>
> add_part_extra:
> /* empty */
> - | '(' part_def_list ')'
> + | '(' part_def_list ')' opt_vers_auto_inc
> {
> LEX *lex= Lex;
> lex->part_info->num_parts= lex->part_info->partitions.elements;
> + if ($4)
> + lex->alter_info.partition_flags|= ALTER_PARTITION_AUTO_HIST;
> }
> - | PARTITIONS_SYM real_ulong_num
> + | PARTITIONS_SYM real_ulong_num opt_vers_auto_inc
> {
> - Lex->part_info->num_parts= $2;
> + LEX *lex= Lex;
^^^ this pattern is obsolete for, like, 10 years.
It does no harm, but adds no value either.
> + lex->part_info->num_parts= $2;
> + if ($3)
> + lex->alter_info.partition_flags|= ALTER_PARTITION_AUTO_HIST;
> }
> ;
>
> diff --git a/storage/mroonga/mrn_table.cpp b/storage/mroonga/mrn_table.cpp
> index b10668cfcce..6458402f572 100644
> --- a/storage/mroonga/mrn_table.cpp
> +++ b/storage/mroonga/mrn_table.cpp
> @@ -932,7 +932,7 @@ MRN_SHARE *mrn_get_share(const char *table_name, TABLE *table, int *error)
> share->wrap_key_info = NULL;
> share->wrap_primary_key = MAX_KEY;
> }
> - memcpy(wrap_table_share, table->s, sizeof(*wrap_table_share));
> + memcpy((void *)wrap_table_share, (void *)table->s, sizeof(*wrap_table_share));
why is that?
> mrn_init_sql_alloc(current_thd, &(wrap_table_share->mem_root));
> wrap_table_share->keys = share->wrap_keys;
> wrap_table_share->key_info = share->wrap_key_info;
Regards,
Sergei
VP of MariaDB Server Engineering
and security(a)mariadb.org
2
3

Re: [Maria-developers] d4d332d196d: Added error message to --die in mtr
by Sergei Golubchik 13 Apr '20
by Sergei Golubchik 13 Apr '20
13 Apr '20
Hi, Michael!
On Apr 13, Michael Widenius wrote:
> revision-id: d4d332d196d (mariadb-10.5.2-119-gd4d332d196d)
> parent(s): c7ab676192d
> author: Michael Widenius <monty(a)mariadb.com>
> committer: Michael Widenius <monty(a)mariadb.com>
> timestamp: 2020-04-09 01:37:01 +0300
> message:
>
> Added error message to --die in mtr
>
> ---
> client/mysqltest.cc | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/client/mysqltest.cc b/client/mysqltest.cc
> index 3e08d3f62d7..f593d8e949e 100644
> --- a/client/mysqltest.cc
> +++ b/client/mysqltest.cc
> @@ -9748,7 +9748,8 @@ int main(int argc, char **argv)
> break;
> case Q_DIE:
> /* Abort test with error code and error message */
> - die("%s", command->first_argument);
> + die("%s", command->first_argument[0] ? command->first_argument :
> + "Explicite --die command executed");
typo. "Explicit"
> break;
> case Q_EXIT:
> /* Stop processing any more commands */
Regards,
Sergei
VP of MariaDB Server Engineering
and security(a)mariadb.org
1
0

12 Apr '20
Hi Alexey,
Please find my review input below.
There is one big issue and a number of smaller ones.
> commit 654fdfee33e3eafe3b7f25d7e213717c22ea1e18
> Author: Alexey Botchkov <holyfoot(a)askmonty.org>
> Date: Mon Mar 30 01:00:28 2020 +0400
>
> MDEV-17399 Add support for JSON_TABLE.
>
> Syntax for JSON_TABLE added.
> The ha_json_table handler added. Executing the JSON_TABLE we
> create the temporary table of the ha_json_table, add dependencies
> of other tables and sometimes conditions to WHERE.
>
== The big issue ==
I think some design choices of this patch are questionable:
The temporary table has a unique key. What is it for? The key is defined over
the field holding the JSON text. What if the JSON text gets bigger than
MAX_KEY_LEN?
Then, there is an Item_func_eq(json_text, temp_table_field), which is set to
be "always equal" with set_always_equal(). This looks like a hack.
EXPLAIN shows that JSON_TABLE uses a kind of ref access which I think is
counter-intuitive.
What is the goal of all this?
The JSON_TABLE table is "laterally dependent" on the tables that are referred
from its arguments.
For the optimizer, this means:
1. The JSON_TABLE table must be put after its dependencies (this is similar to
outer joins)
2. The JSON_TABLE can only be read when there are "current records" available
for each of the dependencies (an example when they are not available: join
buffering. There are many records in the buffer, which one we should produce
matches for? For outer joins, join buffering code has a complex system to
track this).
The above "kludges" make an attempt to trick the optimizer into meeting
requrirements #1 and #2 but I think this is approach is hack-ish and has
holes.
It would be much better if the optimizer was explicitly aware that a table is
"laterally dependent" on other tables.
"table dependencies" are already there, so the property #1 is already taken
care of.
We still need to take care of #2 which means disable join buffering in such
cases. I think this should be easy to do.
== Small bits from trying out the patch ==
=== FOR ORDINALITY doesn't seem to work ===
select *
from
json_table('[{"a": 1, "b": [11,111]}, {"a": 2, "b": [22,222]}]',
'$[*]' COLUMNS(a for ordinality)
) as tt;
+------+
| a |
+------+
| |
| |
+------+
2 rows in set (0.001 sec)
=== NULL ON ERROR doesn't seem to work ===
select *
from
json_table('[{"a": 1, "b": [11,111]}, {"a": "bbbb", "b": [22,222]}]',
'$[*]' COLUMNS( a DECIMAL(6,3) PATH '$.a' NULL ON ERROR)) as tt;
+-------+
| a |
+-------+
| 1.000 |
| 0.000 |
+-------+
I would expect the second row to have NULL, not 0.
=== Lack of test coverage ===
Other parts, like NESTED PATH, also have no test coverage at all. I think this
must be addressed before this is done.
=== No error on Invalid Json input ===
If I pass invalid JSON, I get no warning or error or anything:
MariaDB [test]> select * from json_table('[{"a": 1, invalid-json', '$[*]' COLUMNS( a INT PATH '$.a')) as tt;
+------+
| a |
+------+
| 1 |
+------+
1 row in set (0.001 sec)
=== Recursive ON-rule in the grammar ===
The json_opt_on_empty_or_error rule in sql_yacc.yy is recursive and causes the
following to be accepted:
select *
from
json_table(
'[{"a": 1, "b": [11,111]}, {"a": 2, "b": [22,222]}]',
'$[*]'
COLUMNS(a INT PATH '$.a' NULL ON EMPTY NULL ON EMPTY NULL ON EMPTY)
) as tt;
Is this intentional?
=== No error on invalid LATERAL dependency ===
create table t1 (item_name varchar(32), item_props varchar(1024));
insert into t1 values ('Laptop', '{"color": "black", "price": 1000}');
insert into t1 values ('Jeans', '{"color": "blue", "price": 50}');
MariaDB [test]> select * from t1 right join json_table(t1.item_props,'$' columns( color varchar(100) path '$.color')) as T on 1;
Empty set (0.000 sec)
The above query cannot be executed as left join execution requires that T is
computed before 1, but t is dependent on t1. We dont get an error for this
though.
MySQL produces this:
ERROR 3668 (HY000): INNER or LEFT JOIN must be used for LATERAL references made by 'T'
with left join, there's a demo of how the trickery with the optimizer was
successful (and I think one can construct other examples of this):
MariaDB [test]> select * from t1 left join json_table(t1.item_props,'$' columns( color varchar(100) path '$.color')) as T on 1;
+-----------+-----------------------------------+-------+
| item_name | item_props | color |
+-----------+-----------------------------------+-------+
| Laptop | {"color": "black", "price": 1000} | blue |
| Jeans | {"color": "blue", "price": 50} | blue |
+-----------+-----------------------------------+-------+
2 rows in set (0.002 sec)
MariaDB [test]> explain select * from t1 left join json_table(t1.item_props,'$' columns( color varchar(100) path '$.color')) as T on 1;
+------+-------------+-------+------+---------------+------+---------+------+------+-------------------------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+------+-------------+-------+------+---------------+------+---------+------+------+-------------------------------------------------+
| 1 | SIMPLE | t1 | ALL | NULL | NULL | NULL | NULL | 2 | |
| 1 | SIMPLE | T | ALL | NULL | NULL | NULL | NULL | 40 | Using where; Using join buffer (flat, BNL join) |
+------+-------------+-------+------+---------------+------+---------+------+------+-------------------------------------------------+
2 rows in set (0.001 sec)
=== AS is required with table alias ===
sql_yacc.yy has this:
> +table_function:
> + JSON_TABLE_SYM '(' expr ',' TEXT_STRING_sys
> + { ...
> + }
> + json_table_columns_clause ')' AS ident_table_alias
Is the 'AS' really required? MySQL-8 doesn't seem to require it.
BR
Sergei
--
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog
1
1

Re: [Maria-developers] [Commits] c1394ab6b5c: MDEV-22191: Range access is not picked when index_merge_sort_union is turned off
by Sergey Petrunia 09 Apr '20
by Sergey Petrunia 09 Apr '20
09 Apr '20
Hi Varun,
On Wed, Apr 08, 2020 at 11:48:28PM +0530, Varun wrote:
> revision-id: c1394ab6b5c0830ec09f6afdae11fa82bae1a123 (mariadb-5.5.67-9-gc1394ab6b5c)
> parent(s): 64b70b09e6ac253b7915f6120ade5e69fa750b18
> author: Varun Gupta
> committer: Varun Gupta
> timestamp: 2020-04-08 23:47:03 +0530
> message:
>
> MDEV-22191: Range access is not picked when index_merge_sort_union is turned off
>
> When index_merge_sort_union is turned off only ror scans were considered for range
> scans, which is wrong.
> To fix the problem ensure both ror scans and non ror scans are considered for range
> access
>
> ---
> mysql-test/r/range.result | 19 +++++++++++++++++++
> mysql-test/r/range_mrr_icp.result | 19 +++++++++++++++++++
> mysql-test/t/range.test | 14 ++++++++++++++
> sql/opt_range.cc | 16 ++++++++++------
> 4 files changed, 62 insertions(+), 6 deletions(-)
...
> --- a/sql/opt_range.cc
> +++ b/sql/opt_range.cc
> @@ -949,7 +949,8 @@ QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
> static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
> bool index_read_must_be_used,
> bool update_tbl_stats,
> - double read_time);
> + double read_time,
> + bool ror_scans_required);
> static
> TRP_INDEX_INTERSECT *get_best_index_intersect(PARAM *param, SEL_TREE *tree,
> double read_time);
> @@ -3146,7 +3147,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
>
> /* Get best 'range' plan and prepare data for making other plans */
> if ((range_trp= get_key_scans_params(¶m, tree, FALSE, TRUE,
> - best_read_time)))
> + best_read_time, FALSE)))
> {
> best_trp= range_trp;
> best_read_time= best_trp->read_cost;
> @@ -4708,7 +4709,8 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
> {
> DBUG_EXECUTE("info", print_sel_tree(param, *ptree, &(*ptree)->keys_map,
> "tree in SEL_IMERGE"););
> - if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE, read_time)))
> + if (!(*cur_child= get_key_scans_params(param, *ptree, TRUE, FALSE,
> + read_time, TRUE)))
> {
> /*
> One of index scans in this index_merge is more expensive than entire
> @@ -5030,7 +5032,7 @@ TABLE_READ_PLAN *merge_same_index_scans(PARAM *param, SEL_IMERGE *imerge,
> index merge retrievals are not well calibrated
> */
> trp= get_key_scans_params(param, *imerge->trees, FALSE, TRUE,
> - read_time);
> + read_time, TRUE);
> }
As far as I understand, this call constructs range scan, not a portion of
index_merge scan.
So, it is not correct to require a ROR scan here.
>
> DBUG_RETURN(trp);
> @@ -6747,6 +6749,7 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
> index_read_must_be_used if TRUE, assume 'index only' option will be set
> (except for clustered PK indexes)
> read_time don't create read plans with cost > read_time.
> + ror_scans_required set to TRUE for index merge
> RETURN
> Best range read plan
> NULL if no plan found or error occurred
> @@ -6755,7 +6758,8 @@ TRP_ROR_INTERSECT *get_best_covering_ror_intersect(PARAM *param,
> static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
> bool index_read_must_be_used,
> bool update_tbl_stats,
> - double read_time)
> + double read_time,
> + bool ror_scans_required)
> {
> uint idx;
> SEL_ARG **key,**end, **key_to_read= NULL;
> @@ -6802,7 +6806,7 @@ static TRP_RANGE *get_key_scans_params(PARAM *param, SEL_TREE *tree,
> update_tbl_stats, &mrr_flags,
> &buf_size, &cost);
>
> - if (!param->is_ror_scan &&
> + if (ror_scans_required && !param->is_ror_scan &&
> !optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION))
So, the meaning if "ror_scans_required" parameter is actually:
" require ror scans if index_merge optimizer settings are such that it cannot
use ROR-scans"
This is very complicated and non-orthogonal.
How about moving the
"!optimizer_flag(param->thd, OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION)"
check up into get_best_disjunct_quick()?
That is, get_best_disjunct_quick() will pass ror_scans_required=true iff
OPTIMIZER_SWITCH_INDEX_MERGE_SORT_UNION is not set.
This way, index_merge switches will be only checked in index_merge code and
ror_scans_required will mean what its name says.
> {
> /* The scan is not a ROR-scan, just skip it */
BR
Sergei
--
Sergei Petrunia, Software Developer
MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog
1
0

08 Apr '20
revision-id: 383e1a5b71e164f1c1b17a085ba90a8c30cbdf46 (mariadb-10.2.24-716-g383e1a5b71e)
parent(s): 2969d0702d56405d1aec8c16a272ac85fef7bd61
author: Sujatha
committer: Sujatha
timestamp: 2020-04-08 13:31:33 +0530
message:
MENT-703: Disable rpl.rpl_skip_replication, rpl.rpl_set_statement_default_master, rpl.rpl_extra_col_master_myisam, rpl.rpl_slave_load_tmpdir_not_exist tests in ES 10.2 and above
Problem:
=======
Disable following tests in ES 10.2 and above, till MDEV-13258 and MDEV-14203 are fixed.
rpl.rpl_skip_replication, rpl.rpl_set_statement_default_master,
rpl.rpl_extra_col_master_myisam, rpl.rpl_slave_load_tmpdir_not_exist
---
mysql-test/suite/rpl/disabled.def | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/mysql-test/suite/rpl/disabled.def b/mysql-test/suite/rpl/disabled.def
index bdefb1660bd..951f8a6aa0f 100644
--- a/mysql-test/suite/rpl/disabled.def
+++ b/mysql-test/suite/rpl/disabled.def
@@ -16,3 +16,7 @@ rpl_partition_archive : MDEV-5077 2013-09-27 svoj Cannot exchange partition
rpl_row_binlog_max_cache_size : MDEV-11092
rpl_row_index_choice : MDEV-11666
rpl_slave_grp_exec: MDEV-10514
+rpl_skip_replication : MDEV-13258
+rpl_set_statement_default_master : MDEV-13258
+rpl_extra_col_master_myisam : MDEV-14203
+rpl_slave_load_tmpdir_not_exist : MDEV-14203
1
0

Re: [Maria-developers] 7665323d02a: MENT-202 Implement system variable that makes slow master shutdown the default behavior
by Sergei Golubchik 07 Apr '20
by Sergei Golubchik 07 Apr '20
07 Apr '20
Hi, Andrei!
ok to push. one comment below.
On Apr 07, Andrei Elkin wrote:
> revision-id: 7665323d02a (mariadb-10.3.22-194-g7665323d02a)
> parent(s): 04e38549af8
> author: Andrei Elkin <andrei.elkin(a)mariadb.com>
> committer: Andrei Elkin <andrei.elkin(a)mariadb.com>
> timestamp: 2020-04-07 14:25:25 +0300
> message:
>
> MENT-202 Implement system variable that makes slow master shutdown the default behavior
>
> @@global.shutdown_wait_slaves is introduced to hold a desirable
> shutdown mode that the SHUTDOWN sql command runs with implicitly
> when invoked without any option.
>
> diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
> index 8bb9f86c8b6..2b23edc7000 100644
> --- a/sql/sys_vars.cc
> +++ b/sql/sys_vars.cc
> @@ -4270,6 +4270,12 @@ static Sys_var_ulong Sys_profiling_history_size(
> VALID_RANGE(0, 100), DEFAULT(15), BLOCK_SIZE(1));
> #endif
>
> +static Sys_var_mybool Sys_shutdown_wait_for_slaves(
> + "shutdown_wait_for_slaves",
> + "when ON, SHUTDOWN command runs with implicit WAIT FOR ALL SLAVES option.",
> + GLOBAL_VAR(opt_shutdown_wait_for_slaves), CMD_LINE(OPT_ARG),
> + DEFAULT(FALSE), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(0));
You don't need to write all this tail of the default behavior. The last
line could be just
DEFAULT(FALSE));
Regards,
Sergei
VP of MariaDB Server Engineering
and security(a)mariadb.org
1
0

[Maria-developers] 2888a225214: MDEV-21117: --tc-heuristic-recover=rollback is not replication safe
by sujatha 07 Apr '20
by sujatha 07 Apr '20
07 Apr '20
revision-id: 2888a22521475f734b28ed65d199cfe2a754aed7 (mariadb-10.1.43-100-g2888a225214)
parent(s): 5720db2b43491e5aec9265bce9637e00c72fa0aa
author: Sujatha
committer: Sujatha
timestamp: 2020-04-07 19:21:45 +0530
message:
MDEV-21117: --tc-heuristic-recover=rollback is not replication safe
Problem:
=======
When run after master server crash --tc-heuristic-recover=rollback produces
inconsistent server state with binlog still containing transactions that were
rolled back by the option. Such way recovered server may not be used for
replication.
Fix:
===
During "--tc-heuristic-recover=ROLLBACK", query the storage engine to get
binlog file name and position corresponding to the last committed transaction.
This marks the consistent binlog state. If last_commit_file is not set then
checkpoint binary log file is considered as starting point.
Look for first transactional event beyond the consistent binlog state. This
will be the starting point for heuristic rollback. Consider this event
specific starting point as binlog truncation position. Now traverse the rest
of binlogs beyond this point. During this traversal check for the presence of
DDL or non transactional operations, as they cannot be safely rolled back. If
such events are found the truncation will not happen it will return an error.
If only transactional events are found beyond the binlog truncation position
it is safe to truncate binlog. The log gets truncated to the identified
position and the GTID state is adjusted accordingly. If there are more binary
logs beyond the being truncated file they are all removed.
---
.../r/binlog_heuristic_rollback_active_log.result | 18 +
.../binlog/r/binlog_truncate_multi_log.result | 21 +
.../suite/binlog/r/binlog_truncate_none.result | 42 ++
.../t/binlog_heuristic_rollback_active_log.test | 75 ++++
.../suite/binlog/t/binlog_truncate_multi_log.test | 81 ++++
.../suite/binlog/t/binlog_truncate_none.test | 130 ++++++
.../suite/rpl/r/rpl_heuristic_fail_over.result | 53 +++
.../suite/rpl/t/rpl_heuristic_fail_over.test | 160 +++++++
sql/log.cc | 479 ++++++++++++++++++++-
sql/log.h | 7 +-
storage/innobase/handler/ha_innodb.cc | 3 +
storage/xtradb/handler/ha_innodb.cc | 3 +
12 files changed, 1068 insertions(+), 4 deletions(-)
diff --git a/mysql-test/suite/binlog/r/binlog_heuristic_rollback_active_log.result b/mysql-test/suite/binlog/r/binlog_heuristic_rollback_active_log.result
new file mode 100644
index 00000000000..eff95d05aac
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_heuristic_rollback_active_log.result
@@ -0,0 +1,18 @@
+call mtr.add_suppression("Can't init tc log");
+call mtr.add_suppression("Aborting");
+RESET MASTER;
+CREATE TABLE t ( f INT ) ENGINE=INNODB;
+INSERT INTO t VALUES (10);
+SET DEBUG_SYNC= "commit_before_update_end_pos SIGNAL con1_ready WAIT_FOR con1_go";
+INSERT INTO t VALUES (20);
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+# Kill the server
+"One row should be present in table 't'"
+SELECT COUNT(*) FROM t;
+COUNT(*)
+1
+"gtid_binlog_state should be 0-1-2
+SELECT @@GLOBAL.gtid_binlog_state;
+@@GLOBAL.gtid_binlog_state
+0-1-2
+DROP TABLE t;
diff --git a/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result
new file mode 100644
index 00000000000..9796a480cca
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_truncate_multi_log.result
@@ -0,0 +1,21 @@
+SET @old_max_binlog_size= @@global.max_binlog_size;
+SET GLOBAL max_binlog_size= 4096;
+call mtr.add_suppression("Can't init tc log");
+call mtr.add_suppression("Aborting");
+RESET MASTER;
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+SET DEBUG_SYNC= "commit_before_update_end_pos SIGNAL con1_ready WAIT_FOR con1_go";
+INSERT INTO t1 VALUES (2, REPEAT("x", 4100));
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+# Kill the server
+"Zero rows shoule be present in table"
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+SELECT @@GLOBAL.gtid_current_pos;
+@@GLOBAL.gtid_current_pos
+0-1-1
+DROP TABLE t1;
+SELECT @@GLOBAL.gtid_binlog_state;
+@@GLOBAL.gtid_binlog_state
+0-1-2
diff --git a/mysql-test/suite/binlog/r/binlog_truncate_none.result b/mysql-test/suite/binlog/r/binlog_truncate_none.result
new file mode 100644
index 00000000000..bae87985208
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_truncate_none.result
@@ -0,0 +1,42 @@
+SET @old_max_binlog_size= @@global.max_binlog_size;
+SET GLOBAL max_binlog_size= 4096;
+call mtr.add_suppression("Can't init tc log");
+call mtr.add_suppression("Aborting");
+RESET MASTER;
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+INSERT INTO t1 VALUES (1, REPEAT("x", 4100));
+CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+# Kill the server
+"Zero records should be there."
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+1
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+master-bin.000002 #
+master-bin.000003 #
+master-bin.000004 #
+DROP TABLE t1,t2;
+call mtr.add_suppression("Can't init tc log");
+call mtr.add_suppression("Aborting");
+RESET MASTER;
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+show binary logs;
+Log_name File_size
+master-bin.000001 #
+# Kill the server
+"Zero records should be there."
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+0
+SHOW TABLES;
+Tables_in_test
+t1
+t2
+DROP TABLE t1,t2;
diff --git a/mysql-test/suite/binlog/t/binlog_heuristic_rollback_active_log.test b/mysql-test/suite/binlog/t/binlog_heuristic_rollback_active_log.test
new file mode 100644
index 00000000000..87af83a7a92
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_heuristic_rollback_active_log.test
@@ -0,0 +1,75 @@
+# ==== Purpose ====
+#
+# Test verifies the truncation of single binary log file.
+#
+# ==== Implementation ====
+#
+# Steps:
+# 0 - Create table t1 and insert a row.
+# 1 - Insert an another row such that it gets written to binlog but commit
+# in engine fails as server crashed at this point.
+# 2 - Restart server with --tc-heuristic-recover=ROLLBACK
+# 3 - Upon server start 'master-bin.000001' will be truncated to contain
+# only the first insert
+#
+# ==== References ====
+#
+# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe
+
+
+--source include/have_innodb.inc
+--source include/have_log_bin.inc
+--source include/have_debug.inc
+--source include/have_binlog_format_statement.inc
+
+call mtr.add_suppression("Can't init tc log");
+call mtr.add_suppression("Aborting");
+
+connect(master,localhost,root,,);
+connect(master1,localhost,root,,);
+
+--connection master
+RESET MASTER;
+CREATE TABLE t ( f INT ) ENGINE=INNODB;
+INSERT INTO t VALUES (10);
+
+--connection master1
+# Hold insert after write to binlog and before "run_commit_ordered" in engine
+SET DEBUG_SYNC= "commit_before_update_end_pos SIGNAL con1_ready WAIT_FOR con1_go";
+send INSERT INTO t VALUES (20);
+
+--connection master
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait
+EOF
+
+--source include/kill_mysqld.inc
+--source include/wait_until_disconnected.inc
+#
+# Server restart
+#
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart: --tc-heuristic-recover=ROLLBACK
+EOF
+--source include/wait_until_disconnected.inc
+
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart:
+EOF
+
+connection default;
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--connection master
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--echo "One row should be present in table 't'"
+SELECT COUNT(*) FROM t;
+
+--echo "gtid_binlog_state should be 0-1-2
+SELECT @@GLOBAL.gtid_binlog_state;
+DROP TABLE t;
diff --git a/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test
new file mode 100644
index 00000000000..976b987d3bc
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_truncate_multi_log.test
@@ -0,0 +1,81 @@
+# ==== Purpose ====
+#
+# Test verifies truncation of multiple binary logs.
+#
+# ==== Implementation ====
+#
+# Steps:
+# 0 - Set max_binlog_size= 4096. Create a table do an insert such that the
+# max_binlog_size is reached and binary log gets rotated.
+# 1 - Using debug simulation make the server crash at a point where the DML
+# transaction is written to binary log but not committed in engine.
+# 2 - At the time of crash two binary logs will be there master-bin.0000001
+# and master-bin.000002.
+# 3 - Restart server with --tc-heuristic-recover=ROLLBACK
+# 4 - Since the prepared DML in master-bin.000001 is rolled back the first
+# binlog will be truncated prior to the DML and master-bin.000002 will be
+# removed.
+#
+# ==== References ====
+#
+# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe
+
+
+--source include/have_innodb.inc
+--source include/have_log_bin.inc
+--source include/have_debug.inc
+--source include/have_debug_sync.inc
+--source include/have_binlog_format_row.inc
+
+SET @old_max_binlog_size= @@global.max_binlog_size;
+SET GLOBAL max_binlog_size= 4096;
+
+call mtr.add_suppression("Can't init tc log");
+call mtr.add_suppression("Aborting");
+
+RESET MASTER;
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+
+connect(master1,localhost,root,,);
+connect(master2,localhost,root,,);
+
+--connection master1
+# Hold insert after write to binlog and before "run_commit_ordered" in engine
+SET DEBUG_SYNC= "commit_before_update_end_pos SIGNAL con1_ready WAIT_FOR con1_go";
+send INSERT INTO t1 VALUES (2, REPEAT("x", 4100));
+
+--connection master2
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait
+EOF
+
+--source include/kill_mysqld.inc
+--source include/wait_until_disconnected.inc
+
+#
+# Server restart
+#
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart: --tc-heuristic-recover=ROLLBACK --debug-dbug=d,simulate_innodb_forget_commit_pos
+EOF
+--source include/wait_until_disconnected.inc
+
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart:
+EOF
+
+connection default;
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--echo "Zero rows shoule be present in table"
+SELECT COUNT(*) FROM t1;
+
+SELECT @@GLOBAL.gtid_current_pos;
+
+DROP TABLE t1;
+SELECT @@GLOBAL.gtid_binlog_state;
+
diff --git a/mysql-test/suite/binlog/t/binlog_truncate_none.test b/mysql-test/suite/binlog/t/binlog_truncate_none.test
new file mode 100644
index 00000000000..cc3fd4f0b37
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_truncate_none.test
@@ -0,0 +1,130 @@
+# ==== Purpose ====
+#
+# Test case verifies no binlog truncation happens when non transactional
+# events are found in binlog after the last committed transaction.
+#
+# ==== Implementation ====
+#
+# Steps:
+# 0 - Set max_binlog_size= 4096. Create a table and do an insert such that
+# the max_binlog_size is reached and binary log gets rotated.
+# 1 - Create a table in newly created binary log and crash the server
+# 2 - Restart server with --tc-heuristic-recover=ROLLBACK
+# 3 - Recovery code will get the last committed DML specific postion and
+# will try to check if binlog can be truncated upto this position.
+# Since a DDL is present beyond this no truncation will happen.
+# ==== References ====
+#
+# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe
+
+
+--source include/have_innodb.inc
+--source include/have_log_bin.inc
+--source include/have_debug.inc
+--source include/have_binlog_format_row.inc
+
+SET @old_max_binlog_size= @@global.max_binlog_size;
+SET GLOBAL max_binlog_size= 4096;
+
+call mtr.add_suppression("Can't init tc log");
+call mtr.add_suppression("Aborting");
+
+RESET MASTER;
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+INSERT INTO t1 VALUES (1, REPEAT("x", 4100));
+CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+--source include/show_binary_logs.inc
+
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait
+EOF
+
+--source include/kill_mysqld.inc
+--source include/wait_until_disconnected.inc
+
+#
+# Server restart
+#
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart: --tc-heuristic-recover=ROLLBACK
+EOF
+--source include/wait_until_disconnected.inc
+
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart:
+EOF
+
+connection default;
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--echo "Zero records should be there."
+SELECT COUNT(*) FROM t1;
+--source include/show_binary_logs.inc
+DROP TABLE t1,t2;
+
+# ==== Purpose ====
+#
+# Test case verifies no binlog truncation happens when only DDLs are present in
+# the binary log. Since none of the DMLs are performed in storage engine,
+# Engine will not have last committed transaction file name or position.
+# Truncation code should return success.
+#
+# ==== Implementation ====
+#
+# Steps:
+# 0 - Create two table t1, t2
+# 1 - Kill and restart server with --tc-heuristic-recover=ROLLBACK
+# 2 - Only DDL statements are present in the binary log. Since
+# no DML was performed engine will not have last commited transaction
+# specific binary log name and position. Since no transactional events
+# are found, truncation code should simply return.
+#
+# ==== References ====
+#
+# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe
+
+
+--source include/have_innodb.inc
+--source include/have_log_bin.inc
+--source include/have_debug.inc
+--source include/have_binlog_format_row.inc
+
+call mtr.add_suppression("Can't init tc log");
+call mtr.add_suppression("Aborting");
+
+RESET MASTER;
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+CREATE TABLE t2 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+--source include/show_binary_logs.inc
+
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait
+EOF
+
+--source include/kill_mysqld.inc
+--source include/wait_until_disconnected.inc
+
+#
+# Server restart
+#
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart: --tc-heuristic-recover=ROLLBACK --debug-dbug=d,simulate_innodb_forget_commit_pos
+EOF
+--source include/wait_until_disconnected.inc
+
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart:
+EOF
+
+connection default;
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--echo "Zero records should be there."
+SELECT COUNT(*) FROM t1;
+SHOW TABLES;
+DROP TABLE t1,t2;
+
diff --git a/mysql-test/suite/rpl/r/rpl_heuristic_fail_over.result b/mysql-test/suite/rpl/r/rpl_heuristic_fail_over.result
new file mode 100644
index 00000000000..8391245ba27
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_heuristic_fail_over.result
@@ -0,0 +1,53 @@
+include/rpl_init.inc [topology=1->2]
+include/stop_slave.inc
+set global rpl_semi_sync_slave_enabled = 1;
+CHANGE MASTER TO master_use_gtid= current_pos;
+include/start_slave.inc
+ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
+set global rpl_semi_sync_master_enabled = 1;
+set global rpl_semi_sync_master_wait_point=AFTER_SYNC;
+SET @old_max_binlog_size= @@global.max_binlog_size;
+SET GLOBAL max_binlog_size= 4096;
+call mtr.add_suppression("Can't init tc log");
+call mtr.add_suppression("Aborting");
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+INSERT INTO t1 VALUES (1, 'dummy1');
+SET DEBUG_SYNC= "commit_before_update_end_pos SIGNAL con1_ready WAIT_FOR con1_go";
+INSERT INTO t1 VALUES (2, REPEAT("x", 4100));
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+# Kill the server
+include/stop_slave.inc
+SELECT @@GLOBAL.gtid_current_pos;
+@@GLOBAL.gtid_current_pos
+0-1-5
+FOUND /Crashed binlog file \.\/master\-bin\.000001 size is [0-9]*, but truncated to */ in mysqld.1.err
+set global rpl_semi_sync_master_enabled = 1;
+set global rpl_semi_sync_master_wait_point=AFTER_SYNC;
+CHANGE MASTER TO master_host='127.0.0.1', master_port=SERVER_MYPORT_2, master_user='root', master_use_gtid=CURRENT_POS;
+set global rpl_semi_sync_slave_enabled = 1;
+include/start_slave.inc
+INSERT INTO t1 VALUES (3, 'dummy3');
+SELECT COUNT(*) FROM t1;
+COUNT(*)
+2
+SHOW VARIABLES LIKE 'gtid_current_pos';
+Variable_name Value
+gtid_current_pos 0-2-6
+SHOW VARIABLES LIKE 'gtid_current_pos';
+Variable_name Value
+gtid_current_pos 0-2-6
+DROP TABLE t1;
+set global rpl_semi_sync_master_enabled = 0;
+set global rpl_semi_sync_slave_enabled = 0;
+set global rpl_semi_sync_master_wait_point=default;
+set global rpl_semi_sync_master_enabled = 0;
+set global rpl_semi_sync_slave_enabled = 0;
+set global rpl_semi_sync_master_wait_point=default;
+include/stop_slave.inc
+RESET MASTER;
+RESET SLAVE;
+RESET MASTER;
+RESET SLAVE;
+CHANGE MASTER TO master_host='127.0.0.1', master_port=SERVER_MYPORT_1, master_user='root', master_use_gtid=no;
+include/start_slave.inc
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_heuristic_fail_over.test b/mysql-test/suite/rpl/t/rpl_heuristic_fail_over.test
new file mode 100644
index 00000000000..5e9273de62d
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_heuristic_fail_over.test
@@ -0,0 +1,160 @@
+# ==== Purpose ====
+#
+# Test verifies replication failover scenario.
+#
+# ==== Implementation ====
+#
+# Steps:
+# 0 - Have two servers with id's 1 and 2. Enable semi-sync based
+# replication. Have semi sync master wait point as 'after_sync'.
+# 1 - Create a table and insert a row. While inserting second row simulate
+# a server crash at once the transaction is written to binlog, flushed
+# and synced but the binlog position is not updated.
+# 2 - Restart the server using tc-heuristic-recover=ROLLBACK
+# 3 - Post restart switch the crashed master to slave. Execute CHANGE MASTER
+# TO command to connect to server id 2.
+# 4 - Slave should be able to connect to master.
+# 5 - Execute some DML on new master with server id 2. Ensure that it gets
+# replicated to server id 1.
+# 6 - Verify the "gtid_current_pos" for correctness.
+# 7 - Clean up
+#
+# ==== References ====
+#
+# MDEV-21117: --tc-heuristic-recover=rollback is not replication safe
+
+
+--source include/have_semisync.inc
+--source include/have_innodb.inc
+--source include/have_log_bin.inc
+--source include/have_debug.inc
+--source include/have_debug_sync.inc
+--source include/have_binlog_format_row.inc
+--let $rpl_topology=1->2
+--source include/rpl_init.inc
+
+--connection server_2
+--source include/stop_slave.inc
+set global rpl_semi_sync_slave_enabled = 1;
+CHANGE MASTER TO master_use_gtid= current_pos;
+--source include/start_slave.inc
+
+
+--connection server_1
+ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
+set global rpl_semi_sync_master_enabled = 1;
+set global rpl_semi_sync_master_wait_point=AFTER_SYNC;
+SET @old_max_binlog_size= @@global.max_binlog_size;
+SET GLOBAL max_binlog_size= 4096;
+
+call mtr.add_suppression("Can't init tc log");
+call mtr.add_suppression("Aborting");
+
+CREATE TABLE t1 (a INT PRIMARY KEY, b MEDIUMTEXT) ENGINE=Innodb;
+INSERT INTO t1 VALUES (1, 'dummy1');
+--save_master_pos
+
+--connection server_2
+--sync_with_master
+
+--connection server_1
+connect(master1,localhost,root,,);
+connect(master2,localhost,root,,);
+
+--connection master1
+# Hold insert after write to binlog and before "run_commit_ordered" in engine
+SET DEBUG_SYNC= "commit_before_update_end_pos SIGNAL con1_ready WAIT_FOR con1_go";
+send INSERT INTO t1 VALUES (2, REPEAT("x", 4100));
+
+--connection master2
+SET DEBUG_SYNC= "now WAIT_FOR con1_ready";
+--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+wait
+EOF
+
+--source include/kill_mysqld.inc
+--source include/wait_until_disconnected.inc
+
+--connection server_2
+--error 2003
+--source include/stop_slave.inc
+SELECT @@GLOBAL.gtid_current_pos;
+
+--connection server_1
+#
+# Server restart
+#
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart: --tc-heuristic-recover=ROLLBACK
+EOF
+--source include/wait_until_disconnected.inc
+
+--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+restart:
+EOF
+
+connection default;
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--connection server_1
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+# Check error log for correct messages.
+let $log_error_= `SELECT @@GLOBAL.log_error`;
+if(!$log_error_)
+{
+ # MySQL Server on windows is started with --console and thus
+ # does not know the location of its .err log, use default location
+ let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.1.err;
+}
+--let SEARCH_FILE=$log_error_
+--let SEARCH_RANGE=-50000
+--let SEARCH_PATTERN=Crashed binlog file \.\/master\-bin\.000001 size is [0-9]*, but truncated to *
+--source include/search_pattern_in_file.inc
+
+--connection server_2
+set global rpl_semi_sync_master_enabled = 1;
+set global rpl_semi_sync_master_wait_point=AFTER_SYNC;
+
+--connection server_1
+--replace_result $SERVER_MYPORT_2 SERVER_MYPORT_2
+eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_2, master_user='root', master_use_gtid=CURRENT_POS;
+set global rpl_semi_sync_slave_enabled = 1;
+--source include/start_slave.inc
+
+--connection server_2
+INSERT INTO t1 VALUES (3, 'dummy3');
+--save_master_pos
+
+--connection server_1
+--sync_with_master
+SELECT COUNT(*) FROM t1;
+SHOW VARIABLES LIKE 'gtid_current_pos';
+
+--connection server_2
+SHOW VARIABLES LIKE 'gtid_current_pos';
+DROP TABLE t1;
+set global rpl_semi_sync_master_enabled = 0;
+set global rpl_semi_sync_slave_enabled = 0;
+set global rpl_semi_sync_master_wait_point=default;
+--save_master_pos
+
+--connection server_1
+--sync_with_master
+set global rpl_semi_sync_master_enabled = 0;
+set global rpl_semi_sync_slave_enabled = 0;
+set global rpl_semi_sync_master_wait_point=default;
+--source include/stop_slave.inc
+RESET MASTER;
+RESET SLAVE;
+
+--connection server_2
+RESET MASTER;
+RESET SLAVE;
+--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1
+eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_use_gtid=no;
+--source include/start_slave.inc
+
+--source include/rpl_end.inc
diff --git a/sql/log.cc b/sql/log.cc
index 0efef6d1e29..f383033e975 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -3164,6 +3164,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
description_event_for_exec(0), description_event_for_queue(0),
+ last_commit_pos_offset(0),
current_binlog_id(0)
{
/*
@@ -3173,6 +3174,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
before main().
*/
index_file_name[0] = 0;
+ last_commit_pos_file[0]= 0;
bzero((char*) &index_file, sizeof(index_file));
bzero((char*) &purge_index_file, sizeof(purge_index_file));
}
@@ -7877,6 +7879,7 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
first= false;
}
+ DEBUG_SYNC(leader->thd, "commit_before_update_end_pos");
/* update binlog_end_pos so it can be read by dump thread
*
* note: must be _after_ the RUN_HOOK(after_flush) or else
@@ -8964,7 +8967,7 @@ int TC_LOG_MMAP::open(const char *opt_name)
{
if (my_errno != ENOENT)
goto err;
- if (using_heuristic_recover())
+ if (using_heuristic_recover(opt_name))
return 1;
if ((fd= mysql_file_create(key_file_tclog, logname, CREATE_MODE,
O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0)
@@ -9497,14 +9500,42 @@ TC_LOG_MMAP tc_log_mmap;
1 heuristic recovery was performed
*/
-int TC_LOG::using_heuristic_recover()
+int TC_LOG::using_heuristic_recover(const char* opt_name)
{
+ LOG_INFO log_info;
+ int error;
+
if (!tc_heuristic_recover)
return 0;
sql_print_information("Heuristic crash recovery mode");
+
if (ha_recover(0))
+ {
sql_print_error("Heuristic crash recovery failed");
+ }
+
+ /*
+ Check if TC log is referring to binary log not memory map. Ensure that
+ tc_heuristic_recover being ROLLBACK". If both match initiate binlog
+ truncation mechanism.
+ */
+ if (!strcmp(opt_name,opt_bin_logname) &&
+ tc_heuristic_recover == TC_HEURISTIC_RECOVER_ROLLBACK)
+ {
+ if ((error= mysql_bin_log.find_log_pos(&log_info, NullS, 1)))
+ {
+ if (error != LOG_INFO_EOF)
+ sql_print_error("find_log_pos() failed (error: %d)", error);
+ }
+ else
+ {
+ if ((error= heuristic_binlog_rollback()))
+ sql_print_error("Heuristic crash recovery failed to remove "
+ "rolled back trancations from binary log");
+ }
+ }
+
sql_print_information("Please restart mysqld without --tc-heuristic-recover");
return 1;
}
@@ -9512,6 +9543,448 @@ int TC_LOG::using_heuristic_recover()
/****** transaction coordinator log for 2pc - binlog() based solution ******/
#define TC_LOG_BINLOG MYSQL_BIN_LOG
+/**
+ Truncates the current binlog to specified position. Removes the rest of binlogs
+ which are present after this binlog file.
+
+ @param truncate_file Holds the binlog name to be truncated
+ @param truncate_pos Position within binlog from where it needs to
+ truncated.
+
+ @retval true ok
+ @retval false error
+
+*/
+bool MYSQL_BIN_LOG::truncate_and_remove_binlogs(const char *truncate_file,
+ my_off_t truncate_pos)
+{
+ int error= 0;
+#ifdef HAVE_REPLICATION
+ LOG_INFO log_info;
+ THD *thd= current_thd;
+ my_off_t index_file_offset=0;
+ File file= -1;
+ MY_STAT s;
+ my_off_t binlog_size;
+
+ if ((error= open_purge_index_file(TRUE)))
+ {
+ sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
+ goto err;
+ }
+
+ if ((error=find_log_pos(&log_info, truncate_file, 1)))
+ goto err;
+
+ while (!(error= find_next_log(&log_info, 1)))
+ {
+ if (!index_file_offset)
+ index_file_offset= log_info.index_file_start_offset;
+ if((error= register_purge_index_entry(log_info.log_file_name)))
+ {
+ sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
+ log_info.log_file_name);
+ goto err;
+ }
+ }
+
+ if (error != LOG_INFO_EOF)
+ {
+ sql_print_error("Error while looking for next binlog file to be truncated. "
+ "Error:%d", error);
+ goto err;
+ }
+
+ if (!index_file_offset)
+ index_file_offset= log_info.index_file_start_offset;
+
+ if ((error= sync_purge_index_file()))
+ {
+ sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
+ goto err;
+ }
+
+ DBUG_ASSERT(index_file_offset != 0);
+ // Trim index file
+ if (mysql_file_chsize(index_file.file, index_file_offset, '\n', MYF(MY_WME)) ||
+ mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
+ {
+ sql_print_error("Failed to trim binlog index file "
+ "when master server is recovering it.");
+ mysql_file_close(index_file.file, MYF(MY_WME));
+ goto err;
+ }
+
+ /* Reset data in old index cache */
+ reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 1);
+
+ /* Read each entry from purge_index_file and delete the file. */
+ if (is_inited_purge_index_file() &&
+ (error= purge_index_entry(thd, NULL, TRUE)))
+ {
+ sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
+ " that would be purged.");
+ goto err;
+ }
+
+ if (truncate_pos)
+ {
+ if ((file= mysql_file_open(key_file_binlog, truncate_file,
+ O_RDWR | O_BINARY, MYF(MY_WME))) < 0)
+ {
+ sql_print_error("Failed to open binlog file:%s for truncation.",
+ truncate_file);
+ error= 1;
+ goto err;
+ }
+ my_stat(truncate_file, &s, MYF(0));
+ binlog_size= s.st_size;
+
+ /* Change binlog file size to truncate_pos */
+ if (mysql_file_chsize(file, truncate_pos, 0, MYF(MY_WME)) ||
+ mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)))
+ {
+ sql_print_error("Failed to trim the crashed binlog file:%s to size:%llu",
+ truncate_file, truncate_pos);
+ mysql_file_close(file, MYF(MY_WME));
+ error= 1;
+ goto err;
+ }
+ else
+ {
+ sql_print_information("Crashed binlog file %s size is %llu, "
+ "but truncated to %llu.",
+ truncate_file, binlog_size, truncate_pos);
+ }
+ mysql_file_close(file, MYF(MY_WME));
+ file= -1;
+ }
+
+err:
+ if (file >= 0)
+ mysql_file_close(file, MYF(MY_WME));
+ close_purge_index_file();
+#endif
+ return error;
+}
+
+/**
+ Returns the checkpoint binlog file name found in the lastest binlog file.
+
+ @param checkpoint_file Holds the binlog checkpoint file name.
+
+ @retval 0 ok
+ @retval 1 error
+
+*/
+int TC_LOG_BINLOG::get_binlog_checkpoint_file(char* checkpoint_file)
+{
+ Log_event *ev= NULL;
+ bool binlog_checkpoint_found= false;
+ LOG_INFO log_info;
+ const char *errmsg;
+ IO_CACHE log;
+ File file;
+ Format_description_log_event fdle(BINLOG_VERSION);
+ char log_name[FN_REFLEN];
+ int error=1;
+
+ if (!fdle.is_valid())
+ return 1;
+
+ if ((error= find_log_pos(&log_info, NullS, 1)))
+ {
+ sql_print_error("find_log_pos() failed (error: %d)", error);
+ return error;
+ }
+
+ // Move to the last binary log.
+ do
+ {
+ strmake_buf(log_name, log_info.log_file_name);
+ } while (!(error= find_next_log(&log_info, 1)));
+
+ if (error != LOG_INFO_EOF)
+ {
+ sql_print_error("find_log_pos() failed (error: %d)", error);
+ return error;
+ }
+ if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
+ {
+ sql_print_error("%s", errmsg);
+ return error;
+ }
+ while (!binlog_checkpoint_found &&
+ (ev= Log_event::read_log_event(&log, 0, &fdle,
+ opt_master_verify_checksum)) &&
+ ev->is_valid())
+ {
+ enum Log_event_type typ= ev->get_type_code();
+ if (typ == BINLOG_CHECKPOINT_EVENT)
+ {
+ uint dir_len;
+ Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev;
+ if (cev->binlog_file_len >= FN_REFLEN)
+ {
+ sql_print_error("Incorrect binlog checkpoint event with too "
+ "long file name found.");
+ delete ev;
+ ev= NULL;
+ end_io_cache(&log);
+ mysql_file_close(file, MYF(MY_WME));
+ return 1;
+ }
+ else
+ {
+ dir_len= dirname_length(log_name);
+ strmake(strnmov(checkpoint_file, log_name, dir_len),
+ cev->binlog_file_name, FN_REFLEN - 1 - dir_len);
+ binlog_checkpoint_found= true;
+ }
+ }
+ delete ev;
+ ev= NULL;
+ } // End of while
+ end_io_cache(&log);
+ mysql_file_close(file, MYF(MY_WME));
+ file= -1;
+ /*
+ Old binary log without checkpoint found, binlog truncation is not
+ possible. Hence return error.
+ */
+ if (!binlog_checkpoint_found)
+ return 1;
+ else
+ {
+ // Look for binlog checkpoint file in binlog index file.
+ if (find_log_pos(&log_info, checkpoint_file, 1))
+ {
+ sql_print_error("Binlog file '%s' not found in binlog index, needed "
+ "for recovery. Aborting.", checkpoint_file);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/**
+ Truncates the binary log, according to the transactions that got rolled
+ back from engine, during heuristic-recover=ROLLBACK. Global GTID state is
+ adjusted as per the truncated binlog.
+
+ Called from @c TC_LOG::using_heuristic_recover(const char* opt_name)
+
+ @param opt_name The base name of binary log.
+
+ @return indicates success or failure of binlog rollback
+ @retval 0 success
+ @retval 1 failure
+
+*/
+int TC_LOG_BINLOG::heuristic_binlog_rollback()
+{
+ int error=0;
+#ifdef HAVE_REPLICATION
+ Log_event *ev= NULL;
+ char engine_commit_file[FN_REFLEN];
+ char binlog_truncate_file_name[FN_REFLEN];
+ char checkpoint_file[FN_REFLEN];
+ my_off_t binlog_truncate_pos= 0;
+ my_off_t engine_commit_pos= 0;
+ LOG_INFO log_info;
+ const char *errmsg;
+ IO_CACHE log;
+ File file;
+ Format_description_log_event fdle(BINLOG_VERSION);
+ bool found_engine_commit_pos= false;
+ bool found_truncate_pos= false;
+ bool is_safe= true;
+ my_off_t tmp_truncate_pos=0;
+ rpl_gtid last_gtid;
+ bool last_gtid_standalone= false;
+ bool last_gtid_valid= false;
+
+ if (!fdle.is_valid())
+ return 1;
+
+ DBUG_EXECUTE_IF("simulate_innodb_forget_commit_pos",
+ {
+ last_commit_pos_file[0]= 0;
+ };);
+
+ // Initialize engine_commit_file/pos
+ if (last_commit_pos_file[0] != 0)
+ {
+ strmake_buf(engine_commit_file, last_commit_pos_file);
+ engine_commit_pos= last_commit_pos_offset;
+ }
+ else
+ {
+ if ((error= get_binlog_checkpoint_file(checkpoint_file)))
+ return error;
+ strmake_buf(engine_commit_file, checkpoint_file);
+ }
+
+ // Look for last last_commit_pos_file in binlog index file
+ if ((error= find_log_pos(&log_info, engine_commit_file, 1)))
+ {
+ sql_print_error("find_log_pos() failed (error: %d)", error);
+ return error;
+ }
+ if ((file= open_binlog(&log, log_info.log_file_name, &errmsg)) < 0)
+ {
+ sql_print_error("Failed to open the binlog:%s for recovery. Error: %s",
+ binlog_truncate_file_name, errmsg);
+ goto err;
+ }
+
+ /*
+ If there is no engine specific commit file we are doing checkpoint file
+ based recovery. Hence we mark "found_engine_commit_pos" true. Next start
+ looking for the first transactional event group with is the candidate for
+ rollback.
+ */
+ if (engine_commit_pos == 0)
+ found_engine_commit_pos= true;
+
+ error= read_state_from_file();
+ if (error && error != 2)
+ {
+ sql_print_error("Failed to load global gtid binlog state from file");
+ goto err;
+ }
+
+ for(;;)
+ {
+ while (is_safe && (ev= Log_event::read_log_event(&log, 0, &fdle,
+ opt_master_verify_checksum)) &&
+ ev->is_valid())
+ {
+ enum Log_event_type typ= ev->get_type_code();
+ switch (typ)
+ {
+ case XID_EVENT:
+ if (ev->log_pos == engine_commit_pos)
+ {
+ found_engine_commit_pos= true;
+ }
+ break;
+ case GTID_LIST_EVENT:
+ {
+ Gtid_list_log_event *glev= (Gtid_list_log_event *)ev;
+ /* Initialise the binlog state from the Gtid_list event. */
+ if (!found_truncate_pos && glev->count > 0 &&
+ rpl_global_gtid_binlog_state.load(glev->list, glev->count))
+ goto err;
+ }
+ break;
+ case GTID_EVENT:
+ {
+ Gtid_log_event *gev= (Gtid_log_event *)ev;
+
+ /* Update the binlog state with any GTID logged after Gtid_list. */
+ last_gtid.domain_id= gev->domain_id;
+ last_gtid.server_id= gev->server_id;
+ last_gtid.seq_no= gev->seq_no;
+ last_gtid_standalone=
+ ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false);
+ last_gtid_valid= true;
+ if (gev->flags2 & Gtid_log_event::FL_TRANSACTIONAL &&
+ !found_truncate_pos)
+ {
+ if ((engine_commit_pos == 0 || found_engine_commit_pos))
+ {
+ found_truncate_pos=true;
+ strmake_buf(binlog_truncate_file_name, log_info.log_file_name);
+ binlog_truncate_pos= tmp_truncate_pos;
+ }
+ }
+ else
+ {
+ if (found_truncate_pos)
+ is_safe=false;
+ }
+ }
+ break;
+ default:
+ /* Nothing. */
+ break;
+ }// End switch
+ if (!found_truncate_pos && last_gtid_valid &&
+ ((last_gtid_standalone && !ev->is_part_of_group(typ)) ||
+ (!last_gtid_standalone &&
+ (typ == XID_EVENT ||
+ (typ == QUERY_EVENT &&
+ (((Query_log_event *)ev)->is_commit() ||
+ ((Query_log_event *)ev)->is_rollback()))))))
+ {
+ if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false))
+ goto err;
+ last_gtid_valid= false;
+ }
+ // Used to identify the last group specific end position.
+ tmp_truncate_pos= ev->log_pos;
+ delete ev;
+ ev= NULL;
+ }// End While
+ end_io_cache(&log);
+ mysql_file_close(file, MYF(MY_WME));
+ file= -1;
+
+ if (is_safe)
+ {
+ if ((error=find_next_log(&log_info, 1)))
+ {
+ if (error != LOG_INFO_EOF)
+ {
+ sql_print_error("Error reading binlog files during recovery. Aborting.");
+ goto err;
+ }
+ else
+ break;
+ }
+
+ if ((file= open_binlog(&log, log_info.log_file_name, &errmsg)) < 0)
+ {
+ sql_print_error("%s", errmsg);
+ goto err;
+ }
+ }
+ } //end of for(;;)
+ if (!found_truncate_pos)
+ {
+ return 0; // Nothing to truncate
+ }
+ else
+ DBUG_ASSERT(binlog_truncate_pos > 0);
+
+ if (is_safe)
+ {
+ if (truncate_and_remove_binlogs(binlog_truncate_file_name,
+ binlog_truncate_pos))
+ goto err;
+ }
+ if (write_state_to_file())
+ {
+ sql_print_error("Failed to save binlog GTID state during heuristic "
+ "binlog rollback. ");
+ goto err;
+ }
+ return 0;
+
+err:
+ error= 1;
+ if (file >= 0)
+ {
+ end_io_cache(&log);
+ mysql_file_close(file, MYF(MY_WME));
+ }
+ sql_print_error("Binlog truncation failed");
+#endif
+ return error;
+}
+
int TC_LOG_BINLOG::open(const char *opt_name)
{
int error= 1;
@@ -9526,7 +9999,7 @@ int TC_LOG_BINLOG::open(const char *opt_name)
return 1;
}
- if (using_heuristic_recover())
+ if (using_heuristic_recover(opt_name))
{
mysql_mutex_lock(&LOCK_log);
/* generate a new binlog to mask a corrupted one */
diff --git a/sql/log.h b/sql/log.h
index 277e5c6f69c..e91bdd94083 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -41,7 +41,8 @@ bool stmt_has_updated_non_trans_table(const THD* thd);
class TC_LOG
{
public:
- int using_heuristic_recover();
+ int using_heuristic_recover(const char* opt_name);
+ virtual int heuristic_binlog_rollback() { return 0; };
TC_LOG() {}
virtual ~TC_LOG() {}
@@ -694,6 +695,7 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
void commit_checkpoint_notify(void *cookie);
int recover(LOG_INFO *linfo, const char *last_log_name, IO_CACHE *first_log,
Format_description_log_event *fdle, bool do_xa);
+ int heuristic_binlog_rollback();
int do_binlog_recovery(const char *opt_name, bool do_xa_recovery);
#if !defined(MYSQL_CLIENT)
@@ -794,6 +796,9 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
int purge_first_log(Relay_log_info* rli, bool included);
int set_purge_index_file_name(const char *base_file_name);
int open_purge_index_file(bool destroy);
+ bool truncate_and_remove_binlogs(const char *truncate_file,
+ my_off_t truncate_pos);
+ int get_binlog_checkpoint_file(char* checkpoint_file);
bool is_inited_purge_index_file();
int close_purge_index_file();
int clean_purge_index_file();
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 4de2cdbeaec..42d655ee0e8 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -16493,6 +16493,9 @@ innobase_xa_recover(
{
DBUG_ASSERT(hton == innodb_hton_ptr);
+ mysql_bin_log.last_commit_pos_offset= trx_sys_mysql_bin_log_pos;
+ strmake_buf(mysql_bin_log.last_commit_pos_file, trx_sys_mysql_bin_log_name);
+
if (len == 0 || xid_list == NULL) {
return(0);
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 2aafb1a44ee..dec106cac54 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -17165,6 +17165,9 @@ innobase_xa_recover(
{
DBUG_ASSERT(hton == innodb_hton_ptr);
+ mysql_bin_log.last_commit_pos_offset= trx_sys_mysql_bin_log_pos;
+ strmake_buf(mysql_bin_log.last_commit_pos_file, trx_sys_mysql_bin_log_name);
+
if (len == 0 || xid_list == NULL) {
return(0);
1
0

Re: [Maria-developers] 7b03ce7af3d: MDEV-17395 REPLACE/INSERT ODKU: support WITHOUT OVERLAPS
by Sergei Golubchik 06 Apr '20
by Sergei Golubchik 06 Apr '20
06 Apr '20
Hi, Nikita!
First, about tests:
On Mar 06, Nikita Malyavin wrote:
> revision-id: 7b03ce7af3d (mariadb-10.4.4-504-g7b03ce7af3d)
> parent(s): 5c94cf3bf44
> author: Nikita Malyavin <nikitamalyavin(a)gmail.com>
> committer: Nikita Malyavin <nikitamalyavin(a)gmail.com>
> timestamp: 2019-11-28 01:38:53 +1000
> message:
>
> MDEV-17395 REPLACE/INSERT ODKU: support WITHOUT OVERLAPS
>
> ---
> mysql-test/suite/period/r/insert_replace.result | 56 +++
> mysql-test/suite/period/t/insert_replace.test | 37 ++
> sql/handler.cc | 14 +
> sql/handler.h | 3 +-
> sql/sql_insert.cc | 492 ++++++++++++++----------
> sql/sql_load.cc | 6 +
> sql/table.cc | 4 +-
> sql/table.h | 5 +-
> 8 files changed, 409 insertions(+), 208 deletions(-)
>
> diff --git a/mysql-test/suite/period/r/insert_replace.result b/mysql-test/suite/period/r/insert_replace.result
> --- /dev/null
> +++ b/mysql-test/suite/period/r/insert_replace.result
> @@ -0,0 +1,56 @@
> +create or replace table t(id int, val int, s date, e date,
> +period for p(s,e),
> +primary key(id, p without overlaps)) engine=myisam;
> +insert into t values (1, 1, '2003-01-01', '2003-03-01'),
> +(1, 2, '2003-05-01', '2003-07-01');
> +# This just inserts a row; no rows matched
> +insert into t values (2, 3, '2003-01-01', '2003-04-01')
> +on duplicate key update val=3;
> +# The following command is equivalent to
> +# MERGE INTO t USING t
> +# ON id = 1 AND s <= 2003-04-01 AND e > 2003-01-01
> +# WHEN MATCHED UPDATE SET val=3
> +# WHEN NOT MATCHED INSERT VALUES (1, 3, '2003-01-01', '2003-04-01');
> +insert into t values (1, 3, '2003-01-01', '2003-04-01')
> +on duplicate key update val=3;
> +select row_count();
> +row_count()
> +2
> +select * from t;
> +id val s e
> +1 3 2003-01-01 2003-03-01
> +1 2 2003-05-01 2003-07-01
> +2 3 2003-01-01 2003-04-01
It's somewhat misleading, because you've used '3' everywhere.
Please rewrite your tests (all of them, also tests below) to identify
every operation uniquely. For example:
insert into t values (1, 1, '2003-01-01', '2003-03-01'),
(1, 2, '2003-05-01', '2003-07-01');
# This just inserts a row; no rows matched
insert into t values (2, 3, '2003-01-01', '2003-04-01')
on duplicate key update val=4;
# The following command is equivalent to
# MERGE INTO t USING t
# ON id = 1 AND s <= 2003-04-01 AND e > 2003-01-01
# WHEN MATCHED UPDATE SET val=6
# WHEN NOT MATCHED INSERT VALUES (1, 5, '2003-01-01', '2003-04-01');
insert into t values (1, 5, '2003-01-01', '2003-04-01')
on duplicate key update val=6;
> +insert into t values (1, 3, '2003-01-01', '2003-06-01')
> +on duplicate key update val=4;
> +select row_count();
> +row_count()
> +4
> +select * from t;
> +id val s e
> +1 4 2003-01-01 2003-03-01
> +1 4 2003-05-01 2003-07-01
> +2 3 2003-01-01 2003-04-01
I don't think IODKU is defined via MERGE. Unfortunately.
See, how it works:
create table t1 (a int not null, b int not null, c int, unique(a), unique(b));
insert t1 values (1,1,1), (2,2,2);
insert t1 values (1,2,3) on duplicate key update c=4;
select * from t1;
a b c
1 1 4
2 2 2
here only one row was updated. If it would've been defined as
MERGE INTO t1 USING t1
ON a=1 OR b=2
WHEN MATCHED UPDATE c=4
WHEN NOT MATCHED INSERT VALUES (1,2,3)
then it would've updated both rows.
As you can see it literally is defined as "insert, and if there's a
duplicate key error, then update the conflicting row instead"
That is, in your case it should've updated only one row too.
Also, please, add this statement to your test:
insert into t values (1, 3, '2003-01-01', '2003-02-01')
on duplicate key update val=4;
> +# No rows matched
> +insert into t values (1, 3, '2003-07-01', '2003-08-01')
> +on duplicate key update val=5;
> +select row_count();
> +row_count()
> +1
> +select * from t;
> +id val s e
> +1 4 2003-01-01 2003-03-01
> +1 4 2003-05-01 2003-07-01
> +2 3 2003-01-01 2003-04-01
> +1 3 2003-07-01 2003-08-01
> +replace into t values(1, 6, '2003-01-01', '2003-06-01');
> +select row_count();
> +row_count()
> +4
> +select * from t;
> +id val s e
> +1 6 2003-01-01 2003-06-01
> +1 4 2003-06-01 2003-07-01
> +2 3 2003-01-01 2003-04-01
> +1 3 2003-07-01 2003-08-01
Here you do DELETE FOR PERIOD. But above you didn't do UPDATE FOR PERIOD.
Add also this: replace into t values(1, 6, '2003-01-01', '2003-02-01');
And tests for INSERT SELECT (also with IGNORE, REPLACE, ODKU) and for
LOAD DATA (also with IGNORE and REPLACE).
> +drop table t;
Now, about semantics. It is very arguable here. One options is to do,
literally, "insert, if fails delete/update the conflicting row". No
periods involved here. The other option is to use FOR PERIOD implicitly
for updates and deletes.
Example, in all test cases below I'll assume:
insert t1 values (1,1,'2003-01-01','2003-03-01');
insert t1 values (1,2,'2003-05-01','2003-06-01');
-> 1 1 2003-01-01 2003-03-01
1 2 2003-05-01 2003-06-01
So, option one:
insert t1 values (1,3,'2003-01-01','2003-02-01')
on duplicate key update val=4;
-> 1 4 2003-01-01 2003-02-01
1 2 2003-05-01 2003-06-01
Option two:
insert t1 values (1,3,'2003-01-01','2003-02-01')
on duplicate key update val=4;
-> 1 4 2003-01-01 2003-02-01
1 1 2003-02-01 2003-03-01
1 2 2003-05-01 2003-06-01
Overlapping range, option two:
insert t1 values (1,3,'2003-01-01','2003-04-01')
on duplicate key update val=4;
-> 1 4 2003-01-01 2003-03-01
1 3 2003-03-01 2003-04-01
1 2 2003-05-01 2003-06-01
And the same for replace, basically:
replace t1 values (1,3,'2003-01-01','2003-02-01');
-> 1 3 2003-01-01 2003-02-01
1 2 2003-05-01 2003-06-01
or
-> 1 3 2003-01-01 2003-02-01
1 1 2003-02-01 2003-03-01
1 2 2003-05-01 2003-06-01
Regards,
Sergei
VP of MariaDB Server Engineering
and security(a)mariadb.org
2
3

Re: [Maria-developers] 6daf451415f: Let "FTWRL <table_list>" use extra(HA_EXTRA_FLUSH)
by Sergei Golubchik 02 Apr '20
by Sergei Golubchik 02 Apr '20
02 Apr '20
Hi, Sergey!
On Apr 01, Sergey Vojtovich wrote:
> revision-id: 6daf451415f (mariadb-10.5.0-69-g6daf451415f)
> parent(s): c24253d0fa3
> author: Sergey Vojtovich <svoj(a)mariadb.org>
> committer: Sergey Vojtovich <svoj(a)mariadb.org>
> timestamp: 2019-12-25 20:24:24 +0400
> message:
>
> Let "FTWRL <table_list>" use extra(HA_EXTRA_FLUSH)
>
> Rather than flushing caches with tdc_remove_table(TDC_RT_REMOVE_UNUSED)
> flush them with extra(HA_EXTRA_FLUSH) instead. This goes inline with
> regular FTWRL.
Not quite. FTWRL calls flush_tables(thd, FLUSH_ALL), and flush_tables()
does extra(HA_EXTRA_FLUSH) + tc_release_table() + tdc_release_share(share)
So FTWRL still closes all tables properly.
> diff --git a/sql/sql_reload.cc b/sql/sql_reload.cc
> --- a/sql/sql_reload.cc
> +++ b/sql/sql_reload.cc
> @@ -539,16 +538,10 @@ bool flush_tables_with_read_lock(THD *thd, TABLE_LIST *all_tables)
>
> DEBUG_SYNC(thd,"flush_tables_with_read_lock_after_acquire_locks");
>
> - for (table_list= all_tables; table_list;
> + /* Reset ticket to satisfy asserts in open_tables(). */
> + for (auto table_list= all_tables; table_list;
> table_list= table_list->next_global)
> - {
> - /* Request removal of table from cache. */
> - tdc_remove_table(thd, TDC_RT_REMOVE_UNUSED,
> - table_list->db.str,
> - table_list->table_name.str);
> - /* Reset ticket to satisfy asserts in open_tables(). */
> table_list->mdl_request.ticket= NULL;
> - }
> }
>
> thd->variables.option_bits|= OPTION_TABLE_LOCK;
> @@ -583,6 +576,16 @@ bool flush_tables_with_read_lock(THD *thd, TABLE_LIST *all_tables)
> }
> }
>
> + if (thd->lex->type & REFRESH_READ_LOCK)
> + {
> + for (auto table_list= all_tables; table_list;
> + table_list= table_list->next_global)
> + {
> + if (table_list->table->file->extra(HA_EXTRA_FLUSH))
> + goto error_reset_bits;
> + }
> + }
> +
> if (thd->locked_tables_list.init_locked_tables(thd))
> goto error_reset_bits;
>
>
Regards,
Sergei
VP of MariaDB Server Engineering
and security(a)mariadb.org
2
3