Hi Nirbhay,I'm not expert on this code area
but some questions, comments below.
On Tue, May 3, 2016 at 1:26 AM, Nirbhay Choubey <nirbhay@mariadb.com> wrote:revision-id: a6fb98fd74ab6f14fab0c1ef4630b010ff28880f (mariadb-10.1.13-6-ga6fb98f)
parent(s): f8cdb9e7e8fb692f8eb3b9bb4792cd60653e0eb8
author: Nirbhay Choubey
committer: Nirbhay Choubey
timestamp: 2016-05-02 18:26:58 -0400
message:
MDEV-6368: assertion xid_seqno > trx_sys_cur_xid_seqno
- Validate the specified wsrep_start_position value by also
checking the return status of wsrep->sst_received. This also
ensures that changes in wsrep_start_position is not allowed
when the node is not in JOINING state.
- Do not allow decrease in seqno within same UUID.
- The initial checkpoint in SEs should be [0...:-1].
diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc
index 4f6cc51..44fdd03 100644
--- a/sql/wsrep_sst.cc
+++ b/sql/wsrep_sst.cc
@@ -264,42 +264,91 @@ void wsrep_sst_complete (const wsrep_uuid_t* sst_uuid,
mysql_mutex_unlock (&LOCK_wsrep_sst);
}
-void wsrep_sst_received (wsrep_t* const wsrep,
+/*
+ If wsrep provider is loaded, inform that the new state snapshot
+ has been received. Also update the local checkpoint.
+
+ @param wsrep [IN] wsrep handle
+ @param uuid [IN] Initial state UUID
+ @param seqno [IN] Initial state sequence number
+ @param state [IN] Always NULL, also ignored by wsrep provider (?)
+ @param state_len [IN] Always 0, also ignored by wsrep provider (?)
+ @param implicit [IN] Whether invoked implicitly due to SST
+ (true) or explicitly because if change
+ in wsrep_start_position by user (false).
+ @return false Success
+ true Error
+
+*/
+bool wsrep_sst_received (wsrep_t* const wsrep,
const wsrep_uuid_t& uuid,
wsrep_seqno_t const seqno,
const void* const state,
- size_t const state_len)Why not const size_t state_len ? Use of const here is confusing.
+ size_t const state_len,
+ bool const implicit)
{
- wsrep_get_SE_checkpoint(local_uuid, local_seqno);
+ /*
+ To keep track of whether the local uuid:seqno should be updated. Also, note
+ that local state (uuid:seqno) is updated/checkpointed only after we get an
+ OK from wsrep provider. By doing so, the values remain consistent across
+ the server & wsrep provider.
+ */
+ bool do_update= false;
+
+ // Get the locally stored uuid:seqno.
+ wsrep_get_SE_checkpoint(local_uuid, local_seqno);What if this fails, no error handling ?
- if (memcmp(&local_uuid, &uuid, sizeof(wsrep_uuid_t)) ||
- local_seqno < seqno || seqno < 0)
+ if (memcmp(&local_uuid, &uuid, sizeof(wsrep_uuid_t)) ||
+ local_seqno < seqno)
+ {
+ do_update= true;
+ }
+ else if (local_seqno > seqno)
+ {
+ WSREP_WARN("SST position can't be set in past. Requested: %lld, Current: "
+ " %lld.", (long long)seqno, (long long)local_seqno);
+ /*
+ If we are here because of SET command, simply return true (error) instead of
+ aborting.
+ */
+ if (implicit)
{
- wsrep_set_SE_checkpoint(uuid, seqno);
- local_uuid = uuid;
- local_seqno = seqno;
+ WSREP_WARN("Can't continue.");
+ unireg_abort(1);
}
- else if (local_seqno > seqno)
+ else
{
- WSREP_WARN("SST postion is in the past: %lld, current: %lld. "
- "Can't continue.",
- (long long)seqno, (long long)local_seqno);
- unireg_abort(1);
+ return true;
}
+ }
#ifdef GTID_SUPPORT
- wsrep_init_sidno(uuid);
+ wsrep_init_sidno(uuid);
#endif /* GTID_SUPPORT */
- if (wsrep)
- {
- int const rcode(seqno < 0 ? seqno : 0);
- wsrep_gtid_t const state_id = {
- uuid, (rcode ? WSREP_SEQNO_UNDEFINED : seqno)
- };
+ if (wsrep)
+ {
+ int const rcode(seqno < 0 ? seqno : 0);When seqno can be < 0 ?
+ wsrep_gtid_t const state_id= {uuid,
+ (rcode ? WSREP_SEQNO_UNDEFINED : seqno)};
+
+ wsrep_status_t ret= wsrep->sst_received(wsrep, &state_id, state,
+ state_len, rcode);
- wsrep->sst_received(wsrep, &state_id, state, state_len, rcode);
+ if (ret != WSREP_OK)
+ {
+ return true;
}
+ }
+
+ // Now is the good time to update the local state and checkpoint.
+ if (do_update) {
+ wsrep_set_SE_checkpoint(uuid, seqno);Again, I would add error handling.
+ local_uuid= uuid;
+ local_seqno= seqno;
+ }
+
+ return false;
}
// Let applier threads to continue
@@ -308,7 +357,7 @@ void wsrep_sst_continue ()
if (sst_needed)
{
WSREP_INFO("Signalling provider to continue.");
- wsrep_sst_received (wsrep, local_uuid, local_seqno, NULL, 0);
+ wsrep_sst_received (wsrep, local_uuid, local_seqno, NULL, 0, true);You should check the return code.
}
}
diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc
index 9c01e54..bd94190 100644
bool wsrep_start_position_update (sys_var *self, THD* thd, enum_var_type type)
{
- WSREP_INFO ("wsrep_start_position var submitted: '%s'",
- wsrep_start_position);
- // since this value passed wsrep_start_position_check, don't check anything
- // here
- wsrep_set_local_position (wsrep_start_position, true);
- return 0;
+ // Print a confirmation that wsrep_start_position has been updated.
+ WSREP_INFO ("wsrep_start_position set to '%s'", wsrep_start_position);
+ return false;Hmm, normally I thought that system variable set would go like if (variable_check()) variable_update()but now update does not do nothing ?
}
void wsrep_start_position_init (const char* val)
@@ -164,7 +194,7 @@ void wsrep_start_position_init (const char* val)
return;
}
- wsrep_set_local_position (val, false);
+ wsrep_set_local_position (val, strlen(val), false);Add error handling.
}
static bool refresh_provider_options()
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index 76c8613..1625e8d 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -349,10 +349,10 @@ void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf)
unsigned char xid_uuid[16];
long long xid_seqno = read_wsrep_xid_seqno(xid);
read_wsrep_xid_uuid(xid, xid_uuid);
- if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8))
+ if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 16) &&
+ (xid_seqno > -1))Does this mean that you ignore uuid if it is exactly the same, or is there error handling on else?
R: Jan