revision-id: 2f4a0c5be2c5d5153c4253a49ba8820ab333a9a0 (mariadb-10.1.35-71-g2f4a0c5be2c) parent(s): 1fc5a6f30c3a9c047dcf9a36b00026d98f286f6b author: Kristian Nielsen committer: Kristian Nielsen timestamp: 2018-10-07 18:59:52 +0200 message: Fix accumulation of old rows in mysql.gtid_slave_pos This would happen especially in optimistic parallel replication, where there is a good chance that a transaction will be rolled back (due to conflicts) after it has executed record_gtid(). If the transaction did any deletions of old rows as part of record_gtid(), those deletions will be undone as well. And the code did not properly ensure that the deletions would be re-tried. This patch makes record_gtid() remember the list of deletions done as part of a transaction. Then in rpl_slave_state::update() when the changes have been committed, we discard the list. However, in case of error and rollback, in cleanup_context() we will instead put the list back into rpl_global_gtid_slave_state so that the deletions will be re-tried later. Probably fixes part of the cause of MDEV-12147 as well. Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org> --- .../suite/rpl/r/rpl_parallel_optimistic.result | 6 ++ .../suite/rpl/t/rpl_parallel_optimistic.test | 17 +++++ sql/log_event.cc | 6 +- sql/rpl_gtid.cc | 64 ++++++++++++------ sql/rpl_gtid.h | 2 +- sql/rpl_rli.cc | 79 ++++++++++++++++++++++ sql/rpl_rli.h | 11 +++ 7 files changed, 161 insertions(+), 24 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result index 3cd4f8231bf..99bd8562ffe 100644 --- a/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result +++ b/mysql-test/suite/rpl/r/rpl_parallel_optimistic.result @@ -571,4 +571,10 @@ SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_threads=@old_parallel_threads; include/start_slave.inc DROP TABLE t1, t2, t3; +include/save_master_gtid.inc +include/sync_with_master_gtid.inc +Check that no more than the expected last two GTIDs are in mysql.gtid_slave_pos +select count(*) from mysql.gtid_slave_pos order by domain_id, sub_id; +count(*) +2 include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test index 9f6669279db..3867a3fdf3a 100644 --- a/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test +++ b/mysql-test/suite/rpl/t/rpl_parallel_optimistic.test @@ -549,5 +549,22 @@ SET GLOBAL slave_parallel_threads=@old_parallel_threads; --connection server_1 DROP TABLE t1, t2, t3; +--source include/save_master_gtid.inc + +--connection server_2 +--source include/sync_with_master_gtid.inc +# Check for left-over rows in table mysql.gtid_slave_pos (MDEV-12147). +# +# There was a bug when a transaction got a conflict and was rolled back. It +# might have also handled deletion of some old rows, and these deletions would +# then also be rolled back. And since the deletes were never re-tried, old no +# longer needed rows would accumulate in the table without limit. +# +# The earlier part of this test file have plenty of transactions being rolled +# back. But the last DROP TABLE statement runs on its own and should never +# conflict, thus at this point the mysql.gtid_slave_pos table should be clean. +--echo Check that no more than the expected last two GTIDs are in mysql.gtid_slave_pos +select count(*) from mysql.gtid_slave_pos order by domain_id, sub_id; +--connection server_1 --source include/rpl_end.inc diff --git a/sql/log_event.cc b/sql/log_event.cc index e1912ad4620..e07b7002398 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -4429,7 +4429,7 @@ int Query_log_event::do_apply_event(rpl_group_info *rgi, gtid= rgi->current_gtid; if (rpl_global_gtid_slave_state->record_gtid(thd, >id, sub_id, - true, false)) + rgi, false)) { int errcode= thd->get_stmt_da()->sql_errno(); if (!is_parallel_retry_error(rgi, errcode)) @@ -7132,7 +7132,7 @@ Gtid_list_log_event::do_apply_event(rpl_group_info *rgi) { if ((ret= rpl_global_gtid_slave_state->record_gtid(thd, &list[i], sub_id_list[i], - false, false))) + NULL, false))) return ret; rpl_global_gtid_slave_state->update_state_hash(sub_id_list[i], &list[i], NULL); @@ -7639,7 +7639,7 @@ int Xid_log_event::do_apply_event(rpl_group_info *rgi) rgi->gtid_pending= false; gtid= rgi->current_gtid; - err= rpl_global_gtid_slave_state->record_gtid(thd, >id, sub_id, true, + err= rpl_global_gtid_slave_state->record_gtid(thd, >id, sub_id, rgi, false); if (err) { diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc index 7b1acf17ef5..94944b5b3e5 100644 --- a/sql/rpl_gtid.cc +++ b/sql/rpl_gtid.cc @@ -77,7 +77,7 @@ rpl_slave_state::record_and_update_gtid(THD *thd, rpl_group_info *rgi) rgi->gtid_pending= false; if (rgi->gtid_ignore_duplicate_state!=rpl_group_info::GTID_DUPLICATE_IGNORE) { - if (record_gtid(thd, &rgi->current_gtid, sub_id, false, false)) + if (record_gtid(thd, &rgi->current_gtid, sub_id, NULL, false)) DBUG_RETURN(1); update_state_hash(sub_id, &rgi->current_gtid, rgi); } @@ -328,6 +328,8 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id, } } rgi->gtid_ignore_duplicate_state= rpl_group_info::GTID_DUPLICATE_NULL; + + rgi->pending_gtid_deletes_clear(); } if (!(list_elem= (list_element *)my_malloc(sizeof(*list_elem), MYF(MY_WME)))) @@ -377,15 +379,24 @@ int rpl_slave_state::put_back_list(uint32 domain_id, list_element *list) { element *e; + int err= 0; + + mysql_mutex_lock(&LOCK_slave_state); if (!(e= (element *)my_hash_search(&hash, (const uchar *)&domain_id, 0))) - return 1; + { + err= 1; + goto end; + } while (list) { list_element *next= list->next; e->add(list); list= next; } - return 0; + +end: + mysql_mutex_unlock(&LOCK_slave_state); + return err; } @@ -468,12 +479,12 @@ gtid_check_rpl_slave_state_table(TABLE *table) /* Write a gtid to the replication slave state table. - Do it as part of the transaction, to get slave crash safety, or as a separate - transaction if !in_transaction (eg. MyISAM or DDL). - gtid The global transaction id for this event group. sub_id Value allocated within the sub_id when the event group was read (sub_id must be consistent with commit order in master binlog). + rgi rpl_group_info context, if we are recording the gtid transactionally + as part of replicating a transactional event. NULL if called from + outside of a replicated transaction. Note that caller must later ensure that the new gtid and sub_id is inserted into the appropriate HASH element with rpl_slave_state.add(), so that it can @@ -481,13 +492,13 @@ gtid_check_rpl_slave_state_table(TABLE *table) */ int rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, - bool in_transaction, bool in_statement) + rpl_group_info *rgi, bool in_statement) { TABLE_LIST tlist; int err= 0; bool table_opened= false; TABLE *table; - list_element *elist= 0, *next; + list_element *elist= 0, *cur, *next; element *elem; ulonglong thd_saved_option= thd->variables.option_bits; Query_tables_list lex_backup; @@ -558,7 +569,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, thd->wsrep_ignore_table= true; #endif - if (!in_transaction) + if (!rgi) { DBUG_PRINT("info", ("resetting OPTION_BEGIN")); thd->variables.option_bits&= @@ -601,9 +612,9 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, if ((elist= elem->grab_list()) != NULL) { /* Delete any old stuff, but keep around the most recent one. */ - list_element *cur= elist; - uint64 best_sub_id= cur->sub_id; + uint64 best_sub_id= elist->sub_id; list_element **best_ptr_ptr= &elist; + cur= elist; while ((next= cur->next)) { if (next->sub_id > best_sub_id) @@ -636,7 +647,8 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, table->file->print_error(err, MYF(0)); goto end; } - while (elist) + cur = elist; + while (cur) { uchar key_buffer[4+8]; @@ -646,9 +658,9 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, /* `break' does not work inside DBUG_EXECUTE_IF */ goto dbug_break; }); - next= elist->next; + next= cur->next; - table->field[1]->store(elist->sub_id, true); + table->field[1]->store(cur->sub_id, true); /* domain_id is already set in table->record[0] from write_row() above. */ key_copy(key_buffer, table->record[0], &table->key_info[0], 0, false); if (table->file->ha_index_read_map(table->record[1], key_buffer, @@ -662,8 +674,7 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, not want to endlessly error on the same element in case of table corruption or such. */ - my_free(elist); - elist= next; + cur= next; if (err) break; } @@ -686,18 +697,31 @@ IF_DBUG(dbug_break:, ) */ if (elist) { - mysql_mutex_lock(&LOCK_slave_state); put_back_list(gtid->domain_id, elist); - mysql_mutex_unlock(&LOCK_slave_state); + elist = 0; } ha_rollback_trans(thd, FALSE); } close_thread_tables(thd); - if (in_transaction) + if (rgi) + { thd->mdl_context.release_statement_locks(); + /* + Save the list of old gtid entries we deleted. If this transaction + fails later for some reason and is rolled back, the deletion of those + entries will be rolled back as well, and we will need to put them back + on the to-be-deleted list so we can re-do the deletion. Otherwise + redundant rows in mysql.gtid_slave_pos may accumulate if transactions + are rolled back and retried after record_gtid(). + */ + rgi->pending_gtid_deletes_save(gtid->domain_id, elist); + } else + { thd->mdl_context.release_transactional_locks(); + rpl_group_info::pending_gtid_deletes_free(elist); + } } thd->lex->restore_backup_query_tables_list(&lex_backup); thd->variables.option_bits= thd_saved_option; @@ -1080,7 +1104,7 @@ rpl_slave_state::load(THD *thd, char *state_from_master, size_t len, if (gtid_parser_helper(&state_from_master, end, >id) || !(sub_id= next_sub_id(gtid.domain_id)) || - record_gtid(thd, >id, sub_id, false, in_statement) || + record_gtid(thd, >id, sub_id, NULL, in_statement) || update(gtid.domain_id, gtid.server_id, sub_id, gtid.seq_no, NULL)) return 1; if (state_from_master == end) diff --git a/sql/rpl_gtid.h b/sql/rpl_gtid.h index 79d566bddbf..7bd639b768f 100644 --- a/sql/rpl_gtid.h +++ b/sql/rpl_gtid.h @@ -182,7 +182,7 @@ struct rpl_slave_state uint64 seq_no, rpl_group_info *rgi); int truncate_state_table(THD *thd); int record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id, - bool in_transaction, bool in_statement); + rpl_group_info *rgi, bool in_statement); uint64 next_sub_id(uint32 domain_id); int iterate(int (*cb)(rpl_gtid *, void *), void *data, rpl_gtid *extra_gtids, uint32 num_extra, diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 64a1b535307..b35130c1505 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1680,6 +1680,7 @@ rpl_group_info::reinit(Relay_log_info *rli) long_find_row_note_printed= false; did_mark_start_commit= false; gtid_ev_flags2= 0; + pending_gtid_delete_list= NULL; last_master_timestamp = 0; gtid_ignore_duplicate_state= GTID_DUPLICATE_NULL; speculation= SPECULATE_NO; @@ -1804,6 +1805,12 @@ void rpl_group_info::cleanup_context(THD *thd, bool error) erroneously update the GTID position. */ gtid_pending= false; + + /* + Rollback will have undone any deletions of old rows we might have made + in mysql.gtid_slave_pos. Put those rows back on the list to be deleted. + */ + pending_gtid_deletes_put_back(); } m_table_map.clear_tables(); slave_close_thread_tables(thd); @@ -2027,6 +2034,78 @@ rpl_group_info::unmark_start_commit() } +/* + When record_gtid() has deleted any old rows from the table + mysql.gtid_slave_pos as part of a replicated transaction, save the list of + rows deleted here. + + If later the transaction fails (eg. optimistic parallel replication), the + deletes will be undone when the transaction is rolled back. Then we can + put back the list of rows into the rpl_global_gtid_slave_state, so that + we can re-do the deletes and avoid accumulating old rows in the table. +*/ +void +rpl_group_info::pending_gtid_deletes_save(uint32 domain_id, + rpl_slave_state::list_element *list) +{ + /* + We should never get to a state where we try to save a new pending list of + gtid deletes while we still have an old one. But make sure we handle it + anyway just in case, so we avoid leaving stray entries in the + mysql.gtid_slave_pos table. + */ + DBUG_ASSERT(!pending_gtid_delete_list); + if (unlikely(pending_gtid_delete_list)) + pending_gtid_deletes_put_back(); + + pending_gtid_delete_list= list; + pending_gtid_delete_list_domain= domain_id; +} + + +/* + Take the list recorded by pending_gtid_deletes_save() and put it back into + rpl_global_gtid_slave_state. This is needed if deletion of the rows was + rolled back due to transaction failure. +*/ +void +rpl_group_info::pending_gtid_deletes_put_back() +{ + if (pending_gtid_delete_list) + { + rpl_global_gtid_slave_state->put_back_list(pending_gtid_delete_list_domain, + pending_gtid_delete_list); + pending_gtid_delete_list= NULL; + } +} + + +/* + Free the list recorded by pending_gtid_deletes_save(). Done when the deletes + in the list have been permanently committed. +*/ +void +rpl_group_info::pending_gtid_deletes_clear() +{ + pending_gtid_deletes_free(pending_gtid_delete_list); + pending_gtid_delete_list= NULL; +} + + +void +rpl_group_info::pending_gtid_deletes_free(rpl_slave_state::list_element *list) +{ + rpl_slave_state::list_element *next; + + while (list) + { + next= list->next; + my_free(list); + list= next; + } +} + + rpl_sql_thread_info::rpl_sql_thread_info(Rpl_filter *filter) : rpl_filter(filter) { diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h index 74d5b6fe416..b40a34a54e6 100644 --- a/sql/rpl_rli.h +++ b/sql/rpl_rli.h @@ -676,6 +676,11 @@ struct rpl_group_info /* Needs room for "Gtid D-S-N\x00". */ char gtid_info_buf[5+10+1+10+1+20+1]; + /* List of not yet committed deletions in mysql.gtid_slave_pos. */ + rpl_slave_state::list_element *pending_gtid_delete_list; + /* Domain associated with pending_gtid_delete_list. */ + uint32 pending_gtid_delete_list_domain; + /* The timestamp, from the master, of the commit event. Used to do delayed update of rli->last_master_timestamp, for getting @@ -817,6 +822,12 @@ struct rpl_group_info char *gtid_info(); void unmark_start_commit(); + static void pending_gtid_deletes_free(rpl_slave_state::list_element *list); + void pending_gtid_deletes_save(uint32 domain_id, + rpl_slave_state::list_element *list); + void pending_gtid_deletes_put_back(); + void pending_gtid_deletes_clear(); + time_t get_row_stmt_start_timestamp() { return row_stmt_start_timestamp;