Hi, Aleksey! Like last time, it's a review of the diff bc04ded2353 22158b3c05a, so not only commit 38a888da0f1. The main thing below - I didn't understand a couple of changes and asked for clarifications. On May 30, Aleksey Midenkov wrote:
revision-id: 38a888da0f1 (mariadb-10.5.2-653-g38a888da0f1) parent(s): bc04ded2353 author: Aleksey Midenkov <midenok@gmail.com> committer: Aleksey Midenkov <midenok@gmail.com> timestamp: 2021-04-22 23:35:06 +0300 message:
MDEV-17554 Auto-create new partition for system versioned tables with history partitioned by INTERVAL/LIMIT
diff --git a/mysql-test/suite/versioning/r/delete_history.result b/mysql-test/suite/versioning/r/delete_history.result --- a/mysql-test/suite/versioning/r/delete_history.result +++ b/mysql-test/suite/versioning/r/delete_history.result @@ -154,3 +152,21 @@ select * from t1; a 1 drop table t1; +# +# MDEV-17554 Auto-create new partition for system versioned tables with history partitioned by INTERVAL/LIMIT +# +# Don't auto-create new partition on DELETE HISTORY: +set timestamp= unix_timestamp('2000-01-01 00:00:00'); +create or replace table t (a int) with system versioning +partition by system_time interval 1 hour auto; +set timestamp= unix_timestamp('2000-01-01 10:00:00');
perhaps, make it only 02:00:00 to avoid any chance that a partition won't be created because of max-auto-create limit. it's possible we put the limit at 10, but 2 is very unlikely. or does max-auto-create limit always result in a warning?
+delete history from t; +set timestamp= default; +show create table t; +Table Create Table +t CREATE TABLE `t` ( + `a` int(11) DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING + PARTITION BY SYSTEM_TIME INTERVAL 1 HOUR STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO +PARTITIONS 2 +drop table t; diff --git a/mysql-test/suite/versioning/r/partition.result b/mysql-test/suite/versioning/r/partition.result --- a/mysql-test/suite/versioning/r/partition.result +++ b/mysql-test/suite/versioning/r/partition.result @@ -1236,27 +1270,826 @@ t1 CREATE TABLE `t1` ( PARTITION `p5` HISTORY ENGINE = DEFAULT_ENGINE, PARTITION `pn` CURRENT ENGINE = DEFAULT_ENGINE) alter table t1 add partition partitions 8; +drop table t1; +# +# End of 10.5 tests +# +# +# MDEV-17554 Auto-create new partition for system versioned tables with history partitioned by INTERVAL/LIMIT +# +create or replace table t1 (x int) with system versioning +partition by system_time limit 1 auto; show create table t1; Table Create Table t1 CREATE TABLE `t1` ( `x` int(11) DEFAULT NULL ) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING - PARTITION BY SYSTEM_TIME LIMIT 1 -(PARTITION `p2` HISTORY ENGINE = DEFAULT_ENGINE, - PARTITION `p8` HISTORY ENGINE = DEFAULT_ENGINE, + PARTITION BY SYSTEM_TIME LIMIT 1 AUTO +PARTITIONS 2 +insert into t1 values (1); +create or replace table t2 (y int); +insert into t2 values (2); +insert into t1 select * from t2; +insert into t2 select * from t1; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `x` int(11) DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING + PARTITION BY SYSTEM_TIME LIMIT 1 AUTO +PARTITIONS 2 +# Too many partitions error +set timestamp= unix_timestamp('2000-01-01 00:00:00'); +create or replace table t1 (x int) with system versioning +partition by system_time interval 1 hour auto; +set timestamp= unix_timestamp('2001-01-01 00:01:00'); +update t1 set x= x + 1; +ERROR HY000: Too many partitions (including subpartitions) were defined +# Partition overflow error and manual fix
seems to be a wrong comment? no manual fix here.
+set timestamp= unix_timestamp('2000-01-01 00:00:00'); +create or replace table t1 (x int) with system versioning +partition by system_time interval 1 hour; +insert into t1 values (333); +set timestamp= unix_timestamp('2000-01-01 01:00:00'); +update t1 set x= x + 1; +Warnings: +Warning 4114 Versioned table `test`.`t1`: last HISTORY partition (`p0`) is out of INTERVAL, need more HISTORY partitions +drop table t1; +# Partition overflow error and manual fix +set timestamp= unix_timestamp('2000-01-01 00:00:00'); +create or replace table t1 (x int) with system versioning +partition by system_time interval 1 hour; +insert into t1 values (440); +set timestamp= unix_timestamp('2000-01-01 00:10:00'); +update t1 set x= x + 1; +# Check how pruning boundaries work +explain partitions select * from t1 for system_time as of '2000-01-01 00:59:58'; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 p0,pn # NULL NULL NULL NULL # # +explain partitions select * from t1 for system_time as of '2000-01-01 00:59:59'; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 pn # NULL NULL NULL NULL # #
Why the first select looks in both partitions and the second one - only in pn?
+explain partitions select * from t1 for system_time as of '2000-01-01 01:00:00'; +id select_type table partitions type possible_keys key key_len ref rows Extra +1 SIMPLE t1 pn # NULL NULL NULL NULL # # +select * from t1 for system_time as of '2000-01-01 00:09:59'; +x +440 +set timestamp= unix_timestamp('2000-01-01 02:00:00'); +update t1 set x= x + 1; +Warnings: +Warning 4114 Versioned table `test`.`t1`: last HISTORY partition (`p0`) is out of INTERVAL, need more HISTORY partitions +select * from t1 for system_time as of '2000-01-01 01:00:00'; +x +select * from t1 partition (p0) order by x; +x +440 +441 +alter table t1 add partition partitions 3; +select * from t1 for system_time as of '2000-01-01 01:00:00'; +x +441 +select * from t1 partition (p0) order by x; +x +440
would be good to have EXPLAINs after the overflow and after the manual fix. to show how ALTER changes what partition will be pruned
+drop table t1; +create or replace table t1 (x int) with system versioning +partition by system_time interval 3600 second +starts '2000-01-01 00:00:00' auto partitions 3; +affected rows: 0 +insert into t1 values (1); +affected rows: 1 +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `x` int(11) DEFAULT NULL +) ENGINE=DEFAULT_ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING + PARTITION BY SYSTEM_TIME INTERVAL 3600 SECOND STARTS TIMESTAMP'2000-01-01 00:00:00' AUTO +PARTITIONS 3 ... diff --git a/mysql-test/suite/versioning/r/rpl_mix.result b/mysql-test/suite/versioning/r/rpl_mix.result --- a/mysql-test/suite/versioning/r/rpl_mix.result +++ b/mysql-test/suite/versioning/r/rpl_mix.result @@ -8,4 +8,59 @@ DELETE HISTORY FROM t1; connection slave; connection master; drop table t1; +# +# MDEV-25347 DML events for auto-partitioned tables are written into binary log twice +# +flush binary logs; +create table t1 (a int) with system versioning +partition by system_time limit 1 auto; +insert into t1 values (1); +update t1 set a= a + 1; +update t1 set a= a + 1;
better do a+2 the second time. otherwise it's a bit confusing, the bug says "written twice" and you indeed have update a=a+1 written twice :) with clearly different statements the test will prove that every event is written once, and not one is skipped and the other is duplicated
+show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING + PARTITION BY SYSTEM_TIME LIMIT 1 AUTO +PARTITIONS 3 +select * from t1; +a +3 +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000002 # Binlog_checkpoint # # master-bin.000002 +master-bin.000002 # Gtid # # GTID #-#-# +master-bin.000002 # Query # # use `test`; create table t1 (a int) with system versioning +partition by system_time limit 1 auto +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Annotate_rows # # insert into t1 values (1) +master-bin.000002 # Table_map # # table_id: # (test.t1) +master-bin.000002 # Write_rows_v1 # # table_id: # flags: STMT_END_F +master-bin.000002 # Query # # COMMIT +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Annotate_rows # # update t1 set a= a + 1 +master-bin.000002 # Table_map # # table_id: # (test.t1) +master-bin.000002 # Update_rows_v1 # # table_id: # +master-bin.000002 # Write_rows_v1 # # table_id: # flags: STMT_END_F +master-bin.000002 # Query # # COMMIT +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Annotate_rows # # update t1 set a= a + 1 +master-bin.000002 # Table_map # # table_id: # (test.t1) +master-bin.000002 # Update_rows_v1 # # table_id: # +master-bin.000002 # Write_rows_v1 # # table_id: # flags: STMT_END_F +master-bin.000002 # Query # # COMMIT +connection slave; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING + PARTITION BY SYSTEM_TIME LIMIT 1 AUTO +PARTITIONS 3 +select * from t1; +a +3 +connection master; +drop table t1; include/rpl_end.inc diff --git a/mysql-test/suite/versioning/r/rpl_row.result b/mysql-test/suite/versioning/r/rpl_row.result --- a/mysql-test/suite/versioning/r/rpl_row.result +++ b/mysql-test/suite/versioning/r/rpl_row.result @@ -11,4 +11,59 @@ connection slave; connection master; drop table t1; set binlog_row_image= @old_row_image; +# +# MDEV-25347 DML events for auto-partitioned tables are written into binary log twice +# +flush binary logs; +create table t1 (a int) with system versioning +partition by system_time limit 1 auto; +insert into t1 values (1); +update t1 set a= a + 1; +update t1 set a= a + 1;
same. as a general rule it's helpful to use slightly different statement, to be able to map them to binlog events uniquely. a+2, or 1+a, or whatever.
+show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING + PARTITION BY SYSTEM_TIME LIMIT 1 AUTO +PARTITIONS 3 +select * from t1; +a +3 +include/show_binlog_events.inc +Log_name Pos Event_type Server_id End_log_pos Info +master-bin.000002 # Binlog_checkpoint # # master-bin.000002 +master-bin.000002 # Gtid # # GTID #-#-# +master-bin.000002 # Query # # use `test`; create table t1 (a int) with system versioning +partition by system_time limit 1 auto +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Annotate_rows # # insert into t1 values (1) +master-bin.000002 # Table_map # # table_id: # (test.t1) +master-bin.000002 # Write_rows_v1 # # table_id: # flags: STMT_END_F +master-bin.000002 # Query # # COMMIT +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Annotate_rows # # update t1 set a= a + 1 +master-bin.000002 # Table_map # # table_id: # (test.t1) +master-bin.000002 # Update_rows_v1 # # table_id: # +master-bin.000002 # Write_rows_v1 # # table_id: # flags: STMT_END_F +master-bin.000002 # Query # # COMMIT +master-bin.000002 # Gtid # # BEGIN GTID #-#-# +master-bin.000002 # Annotate_rows # # update t1 set a= a + 1 +master-bin.000002 # Table_map # # table_id: # (test.t1) +master-bin.000002 # Update_rows_v1 # # table_id: # +master-bin.000002 # Write_rows_v1 # # table_id: # flags: STMT_END_F +master-bin.000002 # Query # # COMMIT +connection slave; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` int(11) DEFAULT NULL +) ENGINE=ENGINE DEFAULT CHARSET=latin1 WITH SYSTEM VERSIONING + PARTITION BY SYSTEM_TIME LIMIT 1 AUTO +PARTITIONS 3 +select * from t1; +a +3 +connection master; +drop table t1; include/rpl_end.inc diff --git a/mysql-test/suite/versioning/t/partition.test b/mysql-test/suite/versioning/t/partition.test --- a/mysql-test/suite/versioning/t/partition.test +++ b/mysql-test/suite/versioning/t/partition.test @@ -1068,13 +1078,456 @@ alter table t1 add partition partitions 2; --replace_result $default_engine DEFAULT_ENGINE show create table t1; alter table t1 add partition partitions 8; +drop table t1; + +--echo # +--echo # End of 10.5 tests +--echo # + +--echo # +--echo # MDEV-17554 Auto-create new partition for system versioned tables with history partitioned by INTERVAL/LIMIT +--echo # +create or replace table t1 (x int) with system versioning +partition by system_time limit 1 auto; +--replace_result $default_engine DEFAULT_ENGINE +show create table t1; +
in the previous version of this patch you had clarifying comments here, like --echo # INSERT, INSERT .. SELECT don't auto-create partitions or --echo # Number of partitions goes from 3 to 5 I kind of miss them now, they were helpful
+insert into t1 values (1); + +create or replace table t2 (y int); +insert into t2 values (2); + +insert into t1 select * from t2; +insert into t2 select * from t1; +--replace_result $default_engine DEFAULT_ENGINE +show create table t1; + ... diff --git a/sql/ha_partition.h b/sql/ha_partition.h --- a/sql/ha_partition.h +++ b/sql/ha_partition.h @@ -1607,7 +1607,7 @@ class ha_partition :public handler for (; part_id < part_id_end; ++part_id) { handler *file= m_file[part_id]; - DBUG_ASSERT(bitmap_is_set(&(m_part_info->read_partitions), part_id)); + bitmap_set_bit(&(m_part_info->read_partitions), part_id);
would be good to have a comment before the method that it's only used in vers_set_hist_part(), if (vers_info->limit). because if it's a generic method to return the number of rows in a partition (incl. subpartitions) then it's totally not clear why it modifies read_partitions bitmap. or may be you'd want to rename the method to make it look more specific for LIMIT partitions in vers_set_hist_part() ?
file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK | HA_STATUS_OPEN); part_recs+= file->stats.records; } diff --git a/sql/handler.cc b/sql/handler.cc --- a/sql/handler.cc +++ b/sql/handler.cc @@ -1574,6 +1574,8 @@ int ha_commit_trans(THD *thd, bool all) DBUG_ASSERT(thd->transaction->stmt.ha_list == NULL || trans == &thd->transaction->stmt);
+ DBUG_ASSERT(!thd->in_sub_stmt); if (thd->in_sub_stmt) { DBUG_ASSERT(0);
This is a bit redundant, don't you think? One DBUG_ASSERT would be enough. (personally, I'd prefer the one you added, not DBUG_ASSERT(0))
diff --git a/sql/lock.cc b/sql/lock.cc --- a/sql/lock.cc +++ b/sql/lock.cc @@ -662,16 +662,28 @@ MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b) DBUG_PRINT("enter", ("a->lock_count: %u b->lock_count: %u", a->lock_count, b->lock_count));
- if (!(sql_lock= (MYSQL_LOCK*) - my_malloc(key_memory_MYSQL_LOCK, sizeof(*sql_lock) + - sizeof(THR_LOCK_DATA*)*((a->lock_count+b->lock_count)*2) + - sizeof(TABLE*)*(a->table_count+b->table_count),MYF(MY_WME)))) - DBUG_RETURN(0); // Fatal error + const size_t lock_size= sizeof(*sql_lock) + + sizeof(THR_LOCK_DATA *) * ((a->lock_count + b->lock_count) * 2) + + sizeof(TABLE *) * (a->table_count + b->table_count); + if (thd) + { + sql_lock= (MYSQL_LOCK *) thd->alloc(lock_size); + if (!sql_lock) + DBUG_RETURN(0); + sql_lock->flags= GET_LOCK_ON_THD;
why? the commit comment for MDEV-23639 doesn't mention it. generally, I think it's a good idea and, likely, should be used more than just in recover_from_failed_open(). But why right now and in this commit? Does this commit (22158b3c) rely on locks being on THD memroot? How so?
+ } + else + { + sql_lock= (MYSQL_LOCK *) + my_malloc(key_memory_MYSQL_LOCK, lock_size, MYF(MY_WME)); + if (!sql_lock) + DBUG_RETURN(0); + sql_lock->flags= 0; + } sql_lock->lock_count=a->lock_count+b->lock_count; sql_lock->table_count=a->table_count+b->table_count; sql_lock->locks=(THR_LOCK_DATA**) (sql_lock+1); sql_lock->table=(TABLE**) (sql_lock->locks+sql_lock->lock_count*2); - sql_lock->flags= 0; memcpy(sql_lock->locks,a->locks,a->lock_count*sizeof(*a->locks)); memcpy(sql_lock->locks+a->lock_count,b->locks, b->lock_count*sizeof(*b->locks)); @@ -705,8 +717,10 @@ MYSQL_LOCK *mysql_lock_merge(MYSQL_LOCK *a,MYSQL_LOCK *b) a->lock_count, b->lock_count);
/* Delete old, not needed locks */ - my_free(a); - my_free(b); + if (!(a->flags & GET_LOCK_ON_THD)) + my_free(a); + if (!(b->flags & GET_LOCK_ON_THD)) + my_free(b);
this even looks like a bug, no check that old locks weren't on THD memroot. (probably impossible in this particular case, but still rather fragile) thanks!
DBUG_RETURN(sql_lock); }
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt --- a/sql/share/errmsg-utf8.txt +++ b/sql/share/errmsg-utf8.txt @@ -7985,3 +7985,5 @@ ER_JSON_TABLE_MULTIPLE_MATCHES eng "Can't store multiple matches of the path in the column '%s' of JSON_TABLE '%s'." ER_WITH_TIES_NEEDS_ORDER eng "FETCH ... WITH TIES requires ORDER BY clause to be present" +ER_VERS_HIST_PART_FAILED + eng "Versioned table %`s.%`s: adding HISTORY partition failed"
Can you come up with a test for this error?
diff --git a/sql/sql_base.cc b/sql/sql_base.cc --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -931,7 +934,7 @@ void close_thread_table(THD *thd, TABLE **table_ptr) DBUG_PRINT("tcache", ("table: '%s'.'%s' %p", table->s->db.str, table->s->table_name.str, table)); DBUG_ASSERT(!table->file->keyread_enabled()); - DBUG_ASSERT(!table->file || table->file->inited == handler::NONE); + DBUG_ASSERT(table->file->inited == handler::NONE);
indeed
/* The metadata lock must be released after giving back @@ -1625,6 +1625,125 @@ bool is_locked_view(THD *thd, TABLE_LIST *t) }
+#ifdef WITH_PARTITION_STORAGE_ENGINE +/** + Switch part_info->hist_part and request partition creation if needed. + + @retval true Error or partition creation was requested. + @retval false No error +*/ +bool TABLE::vers_switch_partition(THD *thd, TABLE_LIST *table_list, + Open_table_context *ot_ctx) +{ + if (!part_info || part_info->part_type != VERSIONING_PARTITION || + table_list->vers_conditions.delete_history || + thd->stmt_arena->is_stmt_prepare() || + table_list->lock_type < TL_WRITE_ALLOW_WRITE || + table_list->mdl_request.type < MDL_SHARED_WRITE || + table_list->mdl_request.type == MDL_EXCLUSIVE) + { + return false; + } + + switch (thd->lex->sql_command) + { + case SQLCOM_INSERT: + if (thd->lex->duplicates != DUP_UPDATE) + return false; + break; + case SQLCOM_LOAD: + if (thd->lex->duplicates != DUP_REPLACE) + return false; + break; + case SQLCOM_LOCK_TABLES: + case SQLCOM_DELETE: + case SQLCOM_UPDATE: + case SQLCOM_REPLACE: + case SQLCOM_REPLACE_SELECT: + case SQLCOM_DELETE_MULTI: + case SQLCOM_UPDATE_MULTI: + break; + default: + /* + TODO: make row events set thd->lex->sql_command appropriately. + + Sergei Golubchik: f.ex. currently row events increment + thd->status_var.com_stat[] each event for its own SQLCOM_xxx, it won't be + needed if they'll just set thd->lex->sql_command. + */ + if (thd->rgi_slave && thd->rgi_slave->current_event && + thd->lex->sql_command == SQLCOM_END) + { + switch (thd->rgi_slave->current_event->get_type_code()) + { + case UPDATE_ROWS_EVENT: + case UPDATE_ROWS_EVENT_V1: + case DELETE_ROWS_EVENT: + case DELETE_ROWS_EVENT_V1: + break; + default:; + return false; + } + } + break; + } + + TABLE *table= this; + + /* + NOTE: The semantics of vers_set_hist_part() is double: even when we
twofold
+ don't need auto-create, we need to update part_info->hist_part. + */ + uint *create_count= table_list->vers_skip_create ? + NULL : &ot_ctx->vers_create_count; + table_list->vers_skip_create= true; + if (table->part_info->vers_set_hist_part(thd, create_count)) + { + MYSQL_UNBIND_TABLE(table->file); + tc_release_table(table); + return true; + } + if (ot_ctx->vers_create_count) + { + Open_table_context::enum_open_table_action action; + TABLE_LIST *table_arg; + mysql_mutex_lock(&table->s->LOCK_share); + if (!table->s->vers_skip_auto_create) + { + table->s->vers_skip_auto_create= true; + action= Open_table_context::OT_ADD_HISTORY_PARTITION; + table_arg= table_list; + } + else + { + /* + NOTE: this may repeat multiple times until creating thread acquires + MDL_EXCLUSIVE. Since auto-creation is rare operation this is acceptable. + We could suspend this thread on cond-var but we must first exit + MDL_SHARED_WRITE first and we cannot store cond-var into TABLE_SHARE + because it is already released and there is no guarantee that it will + be same instance if we acquire it again. + */ + table_list->vers_skip_create= false; + ot_ctx->vers_create_count= 0; + action= Open_table_context::OT_REOPEN_TABLES; + table_arg= NULL; + }
I'm afraid I don't understand. All this business with vers_skip_create and vers_skip_auto_create, it wasn't in the previous version of the patch. So, I believe it was a fix for one of the MDEV bugs reported and fixed meanwhile. What MDEV was it? Could you explain what was the issue and what was the fix, please?
+ mysql_mutex_unlock(&table->s->LOCK_share); + if (!thd->locked_tables_mode) + { + MYSQL_UNBIND_TABLE(table->file); + tc_release_table(table); + } + ot_ctx->request_backoff_action(action, table_arg); + return true; + } + + return false; +} +#endif /* WITH_PARTITION_STORAGE_ENGINE */ + + /** Open a base table.
@@ -2572,11 +2699,13 @@ void Locked_tables_list::mark_table_for_reopen(THD *thd, TABLE *table) table_list; table_list= table_list->next_global) { if (table_list->table->s == share) + { table_list->table->internal_set_needs_reopen(true); + some_table_marked_for_reopen= 1; + } } /* This is needed in the case where lock tables where not used */ table->internal_set_needs_reopen(true);
why ^^^ this doesn't count as "some_table_marked_for_reopen" ?
- some_table_marked_for_reopen= 1; }
@@ -3172,13 +3304,49 @@ Open_table_context::recover_from_failed_open() break; case OT_DISCOVER: case OT_REPAIR: - if ((result= lock_table_names(m_thd, m_thd->lex->create_info, - m_failed_table, NULL, - get_timeout(), 0))) + case OT_ADD_HISTORY_PARTITION: + if (!m_thd->locked_tables_mode) + result= lock_table_names(m_thd, m_thd->lex->create_info, m_failed_table, + NULL, get_timeout(), 0); + else + { + DBUG_ASSERT(!result); + DBUG_ASSERT(m_action == OT_ADD_HISTORY_PARTITION);
It seems you've solved recover_from_failed_open under LOCK TABLES, so it could be possible to make OT_DISCOVER and OT_REPAIR to work there too. Is that right? It's just a question, I don't imply it should be part of this MDEV.
+ } + /* + We are now under MDL_EXCLUSIVE mode. Other threads have no table share + acquired: they are blocked either at open_table_get_mdl_lock() in + open_table() or at lock_table_names() here. + */ + if (result) + { + if (m_action == OT_ADD_HISTORY_PARTITION) + { + TABLE_SHARE *share= tdc_acquire_share(m_thd, m_failed_table, + GTS_TABLE, NULL); + if (share) + { + share->vers_skip_auto_create= false; + tdc_release_share(share); + } + if (m_thd->get_stmt_da()->sql_errno() == ER_LOCK_WAIT_TIMEOUT) + { + // MDEV-23642 Locking timeout caused by auto-creation affects original DML + m_thd->clear_error(); + vers_create_count= 0; + result= false; + } + } break; + }
- tdc_remove_table(m_thd, m_failed_table->db.str, - m_failed_table->table_name.str); + /* + We don't need to remove share under OT_ADD_HISTORY_PARTITION. + Moreover fast_alter_partition_table() works with TABLE instance. + */ + if (m_action != OT_ADD_HISTORY_PARTITION) + tdc_remove_table(m_thd, m_failed_table->db.str, + m_failed_table->table_name.str);
switch (m_action) { @@ -3206,6 +3374,70 @@ Open_table_context::recover_from_failed_open() case OT_REPAIR: result= auto_repair_table(m_thd, m_failed_table); break; + case OT_ADD_HISTORY_PARTITION: + { + result= false; + TABLE *table= open_ltable(m_thd, m_failed_table, TL_WRITE, + MYSQL_OPEN_HAS_MDL_LOCK | MYSQL_OPEN_IGNORE_LOGGING_FORMAT); + if (table == NULL) + { + m_thd->clear_error(); + break; + } + + DBUG_ASSERT(vers_create_count); + result= vers_create_partitions(m_thd, m_failed_table, vers_create_count); + vers_create_count= 0; + if (!m_thd->transaction->stmt.is_empty()) + trans_commit_stmt(m_thd); + DBUG_ASSERT(!result || + !m_thd->locked_tables_mode || + m_thd->lock->lock_count); + if (result) + break; + if (!m_thd->locked_tables_mode) + { + /* + alter_partition_lock_handling() does mysql_lock_remove() but + does not clear thd->lock completely. + */ + DBUG_ASSERT(m_thd->lock->lock_count == 0); + if (!(m_thd->lock->flags & GET_LOCK_ON_THD)) + my_free(m_thd->lock); + m_thd->lock= NULL; + } + else if (m_thd->locked_tables_mode == LTM_PRELOCKED) + { + MYSQL_LOCK *lock; + MYSQL_LOCK *merged_lock; + + /* + In LTM_LOCK_TABLES table was reopened via locked_tables_list, + but not in prelocked environment where we have to reopen + the table manually. + */ + Open_table_context ot_ctx(m_thd, MYSQL_OPEN_REOPEN); + if (open_table(m_thd, m_failed_table, &ot_ctx)) + { + result= true; + break; + } + TABLE *table= m_failed_table->table; + table->reginfo.lock_type= m_thd->update_lock_default; + m_thd->in_lock_tables= 1; + lock= mysql_lock_tables(m_thd, &table, 1, + MYSQL_OPEN_REOPEN | MYSQL_LOCK_USE_MALLOC); + m_thd->in_lock_tables= 0; + if (lock == NULL || + !(merged_lock= mysql_lock_merge(m_thd->lock, lock, m_thd))) + { + result= true; + break; + } + m_thd->lock= merged_lock;
What MDEV does it fix? I wonder why other cases in recover_from_failed_open doesn't do that
+ } + break; + } case OT_BACKOFF_AND_RETRY: case OT_REOPEN_TABLES: case OT_NO_ACTION: @@ -4370,6 +4611,8 @@ bool open_tables(THD *thd, const DDL_options_st &options, { if (ot_ctx.can_recover_from_failed_open()) { + // FIXME: is this really used? + DBUG_ASSERT(0);
I take it, you weren't able to get this assert to fire?
close_tables_for_reopen(thd, start, ot_ctx.start_of_statement_svp()); if (ot_ctx.recover_from_failed_open())
Regards, Sergei VP of MariaDB Server Engineering and security@mariadb.org