Hi, Michael! On May 12, Michael Widenius wrote:
revision-id: c80f867b0df (mariadb-10.6.0-78-gc80f867b0df) parent(s): eebebe8ef75 author: Michael Widenius <michael.widenius@gmail.com> committer: Michael Widenius <michael.widenius@gmail.com> timestamp: 2021-05-04 21:09:11 +0300 message:
MDEV-23844 Atomic DROP TABLE ... diff --git a/sql/sql_view.cc b/sql/sql_view.cc index e6d726b30d7..ff8aa55adef 100644 --- a/sql/sql_view.cc +++ b/sql/sql_view.cc @@ -1861,8 +1868,18 @@ bool mysql_drop_view(THD *thd, TABLE_LIST *views, enum_drop_mode drop_mode) not_exists_count++; continue; } + if (!first_table++)
yuck. so your "first_table" actually means "not_first_view", because when it's FALSE, it's the first view. why not to rename it to "not_first_view" ?
+ { + if (ddl_log_drop_view_init(thd, &ddl_log_state)) + DBUG_RETURN(TRUE);
what about a crash here? the first entry is written, the second is not.
+ } + if (ddl_log_drop_view(thd, &ddl_log_state, &cpath, &view->db, + &view->table_name)) + DBUG_RETURN(TRUE); + debug_crash_here("ddl_log_drop_before_delete_view"); if (unlikely(mysql_file_delete(key_file_frm, path, MYF(MY_WME)))) delete_error= TRUE; + debug_crash_here("ddl_log_drop_after_delete_view");
some_views_deleted= TRUE;
diff --git a/sql/log.cc b/sql/log.cc index 6e0a9706ec8..dba999054e4 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -10471,16 +10483,16 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name, bool last_gtid_standalone= false; bool last_gtid_valid= false; #endif + DBUG_ENTER("TC_LOG_BINLOG::recover");
if (! fdle->is_valid() || - (do_xa && my_hash_init(key_memory_binlog_recover_exec, &xids, - &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0, - sizeof(my_xid), 0, 0, MYF(0)))) + (my_hash_init(key_memory_binlog_recover_exec, &xids, + &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0, + sizeof(my_xid), 0, 0, MYF(0)))) goto err1;
- if (do_xa) - init_alloc_root(key_memory_binlog_recover_exec, &mem_root, - TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0)); + init_alloc_root(key_memory_binlog_recover_exec, &mem_root, + TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0));
I don't think these two changes are correct. do_xa is TRUE on crash recovery and FALSE on gtid state recovery on 5.5->10.0 upgade or if gtid state file was deleted. There is no point to initialize the xid hash or mem_root, as it's not a crash recovery and there can be no incomplete transactions or ddl statements. Yes, I've seen the commit comment:
- TC_LOG_BINLOG::recover() was changed to always collect Xid's from the binary log and always call ddl_log_close_binlogged_events(). This was needed to be able to collect DROP TABLE events with embedded Xid's (used by ddl log).
but I cannot understand how it applies here.
fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
diff --git a/sql/sql_table.cc b/sql/sql_table.cc index 7a88ffdfd1b..0a83396ae14 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -1363,6 +1373,17 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists,
thd->replication_flags= 0; was_view= table_type == TABLE_TYPE_VIEW; + + if (!first_table++)
first_table -> not_first_table
+ { + LEX_CSTRING comment= {comment_start, (size_t) comment_len}; + if (ddl_log_drop_table_init(thd, &ddl_log_state, &comment)) + { + error= 1; + goto err; + } + } + if ((table_type == TABLE_TYPE_UNKNOWN) || (was_view && !drop_view) || (table_type != TABLE_TYPE_SEQUENCE && drop_sequence)) { @@ -1565,6 +1611,7 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists, table_name.str, (uint)table_name.length); mysql_audit_drop_table(thd, table); } + ddl_log_update_phase(&ddl_log_state, DDL_DROP_PHASE_COLLECT);
better call it DDL_DROP_PHASE_BINLOG. It would've saved me ~10 minutes reading the code back and forth, as ""DDL_DROP_PHASE_BINLOG" immediately explains what this phase is for.
if (!dont_log_query && (!error || table_dropped || non_existing_table_error(error))) diff --git a/sql/ddl_log.h b/sql/ddl_log.h index 316e6708f22..b843404fc3c 100644 --- a/sql/ddl_log.h +++ b/sql/ddl_log.h @@ -97,6 +101,14 @@ enum enum_ddl_log_rename_table_phase { DDL_RENAME_PHASE_TABLE, };
+enum enum_ddl_log_drop_table_phase { + DDL_DROP_PHASE_TABLE=0, + DDL_DROP_PHASE_TRIGGER,
no phase for deleting data from stat tables?
+ DDL_DROP_PHASE_COLLECT, + DDL_DROP_PHASE_RESET, /* Reset found list of dropped tables */
this one is unused (also unused in further patches, I've checked)
+ DDL_DROP_PHASE_END +}; + /* Setting ddl_log_entry.phase to this has the same effect as setting action_type to DDL_IGNORE_LOG_ENTRY_CODE diff --git a/sql/ddl_log.cc b/sql/ddl_log.cc index 777dfdddbc7..6145b2f7318 100644 --- a/sql/ddl_log.cc +++ b/sql/ddl_log.cc @@ -109,6 +112,7 @@ struct st_global_ddl_log };
st_global_ddl_log global_ddl_log; +String ddl_drop_query; // Used during startup recovery
static
mysql_mutex_t LOCK_gdl;
@@ -1132,6 +1149,122 @@ static int ddl_log_execute_action(THD *thd, MEM_ROOT *mem_root, (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); } break; + case DDL_LOG_DROP_TABLE_INIT_ACTION: + { + LEX_CSTRING *comment= &ddl_log_entry->tmp_name; + ddl_drop_query.length(0); + ddl_drop_query.set_charset(system_charset_info); + ddl_drop_query.append(STRING_WITH_LEN("DROP TABLE IF EXISTS ")); + if (comment->length) + { + ddl_drop_query.append(comment); + ddl_drop_query.append(' '); + } + /* We don't increment phase as we want to retry this in case of crash */ + break; + } + case DDL_LOG_DROP_TABLE_ACTION: + { + LEX_CSTRING db, table, path; + db= ddl_log_entry->db; + table= ddl_log_entry->name; + /* Note that path is without .frm extension */ + path= ddl_log_entry->tmp_name; + + switch (ddl_log_entry->phase) { + case DDL_DROP_PHASE_TABLE: + if (hton) + { + if ((error= hton->drop_table(hton, path.str))) + { + if (!non_existing_table_error(error)) + break; + error= -1; + } + } + else + error= ha_delete_table_force(thd, path.str, &db, &table); + if (error <= 0) + { + /* Not found or already deleted. Delete .frm if it exists */ + strxnmov(to_path, sizeof(to_path)-1, path.str, reg_ext, NullS); + mysql_file_delete(key_file_frm, to_path, MYF(MY_WME|MY_IGNORE_ENOENT));
this should rather be inside the previous if(). no need to do it for ha_delete_table_force(), as it doesn't leave any traces and removes all remnants of a table.
+ } + if (ddl_log_increment_phase_no_lock(entry_pos)) + break; + (void) ddl_log_sync_no_lock();
No need to, ddl_log_increment_phase_no_lock() syncs internally
+ /* Fall through */ + case DDL_DROP_PHASE_TRIGGER: + Table_triggers_list::drop_all_triggers(thd, &db, &table, + MYF(MY_WME | MY_IGNORE_ENOENT)); + if (ddl_log_increment_phase_no_lock(entry_pos)) + break; + (void) ddl_log_sync_no_lock();
same. and in other places too, I'd expect.
+ /* Fall through */ + + case DDL_DROP_PHASE_COLLECT:
indentation?
+ append_identifier(thd, &ddl_drop_query, &db); + ddl_drop_query.append('.'); + append_identifier(thd, &ddl_drop_query, &table); + ddl_drop_query.append(','); + /* We don't increment phase as we want to retry this in case of crash */ + + if (!ddl_log_entry->next_entry && mysql_bin_log.is_open()) + { + /* Last drop table. Write query to binlog */ + LEX_CSTRING end_comment= + { STRING_WITH_LEN(" /* generated by ddl log */")};
"generated by ddl log" ? a "log" cannot "generate" a statement, may be "generated during ddl recovery" ?
+ ddl_drop_query.length(ddl_drop_query.length()-1); + ddl_drop_query.append(&end_comment); + + mysql_mutex_unlock(&LOCK_gdl); + (void) thd->binlog_query(THD::STMT_QUERY_TYPE, ddl_drop_query.ptr(), + ddl_drop_query.length(), TRUE, FALSE, + FALSE, 0); + mysql_mutex_lock(&LOCK_gdl); + } + break; + } + break; + } + case DDL_LOG_DROP_VIEW_INIT_ACTION: + { + ddl_drop_query.length(0); + ddl_drop_query.set_charset(system_charset_info); + ddl_drop_query.append(STRING_WITH_LEN("DROP VIEW IF EXISTS ")); + /* We don't increment phase as we want to retry this in case of crash */ + break; + } + case DDL_LOG_DROP_VIEW_ACTION: + { + LEX_CSTRING db, table, path; + db= ddl_log_entry->db; + table= ddl_log_entry->name; + /* Note that for views path is WITH .frm extension */ + path= ddl_log_entry->tmp_name; + + mysql_file_delete(key_file_frm, path.str, MYF(MY_WME|MY_IGNORE_ENOENT)); + append_identifier(thd, &ddl_drop_query, &db); + ddl_drop_query.append('.'); + append_identifier(thd, &ddl_drop_query, &table); + ddl_drop_query.append(','); + + if (!ddl_log_entry->next_entry) + { + /* Last drop view. Write query to binlog */ + LEX_CSTRING end_comment= + { STRING_WITH_LEN(" /* generated by ddl log */")}; + ddl_drop_query.length(ddl_drop_query.length()-1); + ddl_drop_query.append(&end_comment); + + mysql_mutex_unlock(&LOCK_gdl); + (void) thd->binlog_query(THD::STMT_QUERY_TYPE, ddl_drop_query.ptr(), + ddl_drop_query.length(), TRUE, FALSE, + FALSE, 0); + mysql_mutex_lock(&LOCK_gdl); + } + break; + } default: DBUG_ASSERT(0); break; @@ -1682,6 +1823,7 @@ void ddl_log_release_entries(DDL_LOG_STATE *ddl_log_state) next= log_entry->next_active_log_entry; ddl_log_release_memory_entry(log_entry); } + ddl_log_state->list= 0;
may be you should also clear ddl_log_state->execute_entry below?
if (ddl_log_state->execute_entry) ddl_log_release_memory_entry(ddl_log_state->execute_entry); @@ -1776,6 +1918,33 @@ bool ddl_log_update_xid(DDL_LOG_STATE *state, ulonglong xid) }
+/* + Write ddl_log_entry and write or update ddl_execute_entry +*/ + +static bool ddl_log_write(DDL_LOG_STATE *ddl_state, + DDL_LOG_ENTRY *ddl_log_entry)
could you use a more descriptive name, please?
+{ + int error; + DDL_LOG_MEMORY_ENTRY *log_entry; + DBUG_ENTER("ddl_log_write"); + + mysql_mutex_lock(&LOCK_gdl); + error= ((ddl_log_write_entry(ddl_log_entry, &log_entry)) || + ddl_log_write_execute_entry(log_entry->entry_pos, + &ddl_state->execute_entry)); + mysql_mutex_unlock(&LOCK_gdl); + if (error) + { + if (log_entry) + ddl_log_release_memory_entry(log_entry); + DBUG_RETURN(1); + } + add_log_entry(ddl_state, log_entry); + DBUG_RETURN(0); +} + + /** Logging of rename table */
Regards, Sergei VP of MariaDB Server Engineering and security@mariadb.org