Hi Nirbhay, I'm not expert on this code area but some questions, comments below. On Tue, May 3, 2016 at 1:26 AM, Nirbhay Choubey <nirbhay@mariadb.com> wrote:
revision-id: a6fb98fd74ab6f14fab0c1ef4630b010ff28880f (mariadb-10.1.13-6-ga6fb98f) parent(s): f8cdb9e7e8fb692f8eb3b9bb4792cd60653e0eb8 author: Nirbhay Choubey committer: Nirbhay Choubey timestamp: 2016-05-02 18:26:58 -0400 message:
MDEV-6368: assertion xid_seqno > trx_sys_cur_xid_seqno
- Validate the specified wsrep_start_position value by also checking the return status of wsrep->sst_received. This also ensures that changes in wsrep_start_position is not allowed when the node is not in JOINING state. - Do not allow decrease in seqno within same UUID. - The initial checkpoint in SEs should be [0...:-1].
diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index 4f6cc51..44fdd03 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -264,42 +264,91 @@ void wsrep_sst_complete (const wsrep_uuid_t* sst_uuid, mysql_mutex_unlock (&LOCK_wsrep_sst); }
-void wsrep_sst_received (wsrep_t* const wsrep, +/* + If wsrep provider is loaded, inform that the new state snapshot + has been received. Also update the local checkpoint. + + @param wsrep [IN] wsrep handle + @param uuid [IN] Initial state UUID + @param seqno [IN] Initial state sequence number + @param state [IN] Always NULL, also ignored by wsrep provider (?) + @param state_len [IN] Always 0, also ignored by wsrep provider (?) + @param implicit [IN] Whether invoked implicitly due to SST + (true) or explicitly because if change + in wsrep_start_position by user (false). + @return false Success + true Error + +*/ +bool wsrep_sst_received (wsrep_t* const wsrep, const wsrep_uuid_t& uuid, wsrep_seqno_t const seqno, const void* const state, - size_t const state_len)
Why not const size_t state_len ? Use of const here is confusing.
+ size_t const state_len, + bool const implicit) { - wsrep_get_SE_checkpoint(local_uuid, local_seqno); + /* + To keep track of whether the local uuid:seqno should be updated. Also, note + that local state (uuid:seqno) is updated/checkpointed only after we get an + OK from wsrep provider. By doing so, the values remain consistent across + the server & wsrep provider. + */ + bool do_update= false; + + // Get the locally stored uuid:seqno. + wsrep_get_SE_checkpoint(local_uuid, local_seqno);
What if this fails, no error handling ?
- if (memcmp(&local_uuid, &uuid, sizeof(wsrep_uuid_t)) || - local_seqno < seqno || seqno < 0) + if (memcmp(&local_uuid, &uuid, sizeof(wsrep_uuid_t)) || + local_seqno < seqno) + { + do_update= true; + } + else if (local_seqno > seqno) + { + WSREP_WARN("SST position can't be set in past. Requested: %lld, Current: " + " %lld.", (long long)seqno, (long long)local_seqno); + /* + If we are here because of SET command, simply return true (error) instead of + aborting. + */ + if (implicit) { - wsrep_set_SE_checkpoint(uuid, seqno); - local_uuid = uuid; - local_seqno = seqno; + WSREP_WARN("Can't continue."); + unireg_abort(1); } - else if (local_seqno > seqno) + else { - WSREP_WARN("SST postion is in the past: %lld, current: %lld. " - "Can't continue.", - (long long)seqno, (long long)local_seqno); - unireg_abort(1); + return true; } + }
#ifdef GTID_SUPPORT - wsrep_init_sidno(uuid); + wsrep_init_sidno(uuid); #endif /* GTID_SUPPORT */
- if (wsrep) - { - int const rcode(seqno < 0 ? seqno : 0); - wsrep_gtid_t const state_id = { - uuid, (rcode ? WSREP_SEQNO_UNDEFINED : seqno) - }; + if (wsrep) + { + int const rcode(seqno < 0 ? seqno : 0);
When seqno can be < 0 ?
+ wsrep_gtid_t const state_id= {uuid, + (rcode ? WSREP_SEQNO_UNDEFINED : seqno)}; + + wsrep_status_t ret= wsrep->sst_received(wsrep, &state_id, state, + state_len, rcode);
- wsrep->sst_received(wsrep, &state_id, state, state_len, rcode); + if (ret != WSREP_OK) + { + return true; } + } + + // Now is the good time to update the local state and checkpoint. + if (do_update) { + wsrep_set_SE_checkpoint(uuid, seqno);
Again, I would add error handling.
+ local_uuid= uuid; + local_seqno= seqno; + } + + return false; }
// Let applier threads to continue @@ -308,7 +357,7 @@ void wsrep_sst_continue () if (sst_needed) { WSREP_INFO("Signalling provider to continue."); - wsrep_sst_received (wsrep, local_uuid, local_seqno, NULL, 0); + wsrep_sst_received (wsrep, local_uuid, local_seqno, NULL, 0, true);
You should check the return code.
} }
diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc index 9c01e54..bd94190 100644
bool wsrep_start_position_update (sys_var *self, THD* thd, enum_var_type type) { - WSREP_INFO ("wsrep_start_position var submitted: '%s'", - wsrep_start_position); - // since this value passed wsrep_start_position_check, don't check anything - // here - wsrep_set_local_position (wsrep_start_position, true); - return 0; + // Print a confirmation that wsrep_start_position has been updated. + WSREP_INFO ("wsrep_start_position set to '%s'", wsrep_start_position); + return false;
Hmm, normally I thought that system variable set would go like if (variable_check()) variable_update() but now update does not do nothing ?
}
void wsrep_start_position_init (const char* val) @@ -164,7 +194,7 @@ void wsrep_start_position_init (const char* val) return; }
- wsrep_set_local_position (val, false); + wsrep_set_local_position (val, strlen(val), false);
Add error handling.
}
static bool refresh_provider_options() diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc index 76c8613..1625e8d 100644 --- a/storage/innobase/trx/trx0sys.cc +++ b/storage/innobase/trx/trx0sys.cc @@ -349,10 +349,10 @@ void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf) unsigned char xid_uuid[16]; long long xid_seqno = read_wsrep_xid_seqno(xid); read_wsrep_xid_uuid(xid, xid_uuid); - if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8)) + if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 16) && + (xid_seqno > -1))
Does this mean that you ignore uuid if it is exactly the same, or is there error handling on else? R: Jan