From 658195ade8223a3a27ed3dd102dfb9566b514206 Mon Sep 17 00:00:00 2001 From: Joshua Zhang Date: Mon, 8 Jul 2024 03:00:08 +0000 Subject: [PATCH] Address review feedbacks. 1. Remove type annotations which are not needed here. 2. Add more comments for the actions. 3. Fix simulation argument in valide-model.sh. 4. Let validate-model.sh use all cores to improve performance. Signed-off-by: Joshua Zhang --- tla/etcdraft.tla | 120 +++-- tla/etcdraft2.tla | 1035 ----------------------------------------- tla/validate-model.sh | 6 +- 3 files changed, 61 insertions(+), 1100 deletions(-) delete mode 100644 tla/etcdraft2.tla diff --git a/tla/etcdraft.tla b/tla/etcdraft.tla index 3b7bdb21..34e958d8 100644 --- a/tla/etcdraft.tla +++ b/tla/etcdraft.tla @@ -32,27 +32,19 @@ CONSTANT ValueEntry, ConfigEntry \* Server states. CONSTANTS - \* @type: Str; Follower, - \* @type: Str; Candidate, - \* @type: Str; Leader \* A reserved value. CONSTANTS - \* @type: Int; Nil \* Message types: CONSTANTS - \* @type: Str; RequestVoteRequest, - \* @type: Str; RequestVoteResponse, - \* @type: Str; AppendEntriesRequest, - \* @type: Str; AppendEntriesResponse @@ -62,14 +54,6 @@ CONSTANTS \* A bag of records representing requests and responses sent from one server \* to another. We differentiate between the message types to support Apalache. VARIABLE - \* @typeAlias: ENTRY = [term: Int, value: Int]; - \* @typeAlias: LOGT = Seq(ENTRY); - \* @typeAlias: RVREQT = [mtype: Str, mterm: Int, mlastLogTerm: Int, mlastLogIndex: Int, msource: Int, mdest: Int]; - \* @typeAlias: RVRESPT = [mtype: Str, mterm: Int, mvoteGranted: Bool, msource: Int, mdest: Int ]; - \* @typeAlias: AEREQT = [mtype: Str, mterm: Int, mprevLogIndex: Int, mprevLogTerm: Int, mentries: LOGT, mcommitIndex: Int, msource: Int, mdest: Int ]; - \* @typeAlias: AERESPT = [mtype: Str, mterm: Int, msuccess: Bool, mmatchIndex: Int, msource: Int, mdest: Int ]; - \* @typeAlias: MSG = [ wrapped: Bool, mtype: Str, mterm: Int, msource: Int, mdest: Int, RVReq: RVREQT, RVResp: RVRESPT, AEReq: AEREQT, AEResp: AERESPT ]; - \* @type: MSG -> Int; messages VARIABLE pendingMessages @@ -80,16 +64,13 @@ messageVars == <> \* The server's term number. VARIABLE - \* @type: Int -> Int; currentTerm \* The server's state (Follower, Candidate, or Leader). VARIABLE - \* @type: Int -> Str; state \* The candidate the server voted for in its current term, or \* Nil if it hasn't voted for any. VARIABLE - \* @type: Int -> Int; votedFor serverVars == <> @@ -97,12 +78,11 @@ serverVars == <> \* log entry. Unfortunately, the Sequence module defines Head(s) as the entry \* with index 1, so be careful not to use that! VARIABLE - \* @type: Int -> [ entries: LOGT, len: Int ]; log \* The index of the latest entry in the log the state machine may apply. VARIABLE - \* @type: Int -> Int; commitIndex +\* The index of the latest entry in the log the state machine has applied. VARIABLE applied logVars == <> @@ -111,36 +91,37 @@ logVars == <> \* The set of servers from which the candidate has received a RequestVote \* response in its currentTerm. VARIABLE - \* @type: Int -> Set(Int); votesResponded \* The set of servers from which the candidate has received a vote in its \* currentTerm. VARIABLE - \* @type: Int -> Set(Int); votesGranted -\* @type: Seq(Int -> Set(Int)); candidateVars == <> \* The following variables are used only on leaders: \* The latest entry that each follower has acknowledged is the same as the \* leader's. This is used to calculate commitIndex on the leader. VARIABLE - \* @type: Int -> (Int -> Int); matchIndex +\* The upper bound index of the latest appended configuration change entry that +\* has not been applied to the leader. 0 means all configuration change entries +\* have been applied in the leader. VARIABLE pendingConfChangeIndex leaderVars == <> -\* @type: Int -> [jointConfig: Seq(Set(int)), learners: Set(int)] +\* Current configurations of each node. VARIABLE config +\* The upper bound index of the latest applied configuration change entry in each node. VARIABLE appliedConfChange - configVars == <> +\* States that are persisted in durable storage. VARIABLE durableState +\* The ready states to be processed in each node in ready phase. VARIABLE ready readyVars == <> @@ -160,72 +141,77 @@ vars == < Cardinality(c)} \* The term of the last entry in a log, or 0 if the log is empty. -\* @type: LOGT => Int; LastTerm(xlog) == IF xlog = <<>> THEN 0 ELSE xlog[Len(xlog)].term \* Helper for Send and Reply. Given a message m and bag of messages, return a \* new bag of messages with one more m in it. -\* @type: (MSG, MSG -> Int) => MSG -> Int; WithMessage(m, msgs) == msgs (+) SetToBag({m}) \* Helper for Discard and Reply. Given a message m and bag of messages, return \* a new bag of messages with one less m in it. -\* @type: (MSG, MSG -> Int) => MSG -> Int; WithoutMessage(m, msgs) == msgs (-) SetToBag({m}) \* Add a message to the bag of pendingMessages. SendDirect(m) == pendingMessages' = WithMessage(m, pendingMessages) -\* All pending messages sent from node i +\* All pending messages sent from node i. PendingMessages(i) == [ m \in { mm \in DOMAIN pendingMessages : mm.msource = i } |-> pendingMessages[m] ] -\* Remove all messages in pendingMessages that were sent from node i +\* Remove all messages in pendingMessages that were sent from node i. ClearPendingMessages(i) == pendingMessages (-) PendingMessages(i) -\* Remove a message from the bag of messages. Used when a server is done +\* Remove a message from the bag of messages. Used when a server is done. DiscardDirect(m) == messages' = WithoutMessage(m, messages) -\* Combination of Send and Discard +\* Combination of Send and Discard. ReplyDirect(response, request) == /\ pendingMessages' = WithMessage(response, pendingMessages) /\ messages' = WithoutMessage(request, messages) -\* Default: change when needed +\* Default: change when needed. Send(m) == SendDirect(m) Reply(response, request) == ReplyDirect(response, request) Discard(m) == DiscardDirect(m) MaxOrZero(s) == IF s = {} THEN 0 ELSE Max(s) +\* Get joint configuration of node i. GetJointConfig(i) == config[i].jointConfig +\* Get incoming configuration of node i. GetConfig(i) == GetJointConfig(i)[1] +\* Get outgoing configuration of node i. GetOutgoingConfig(i) == GetJointConfig(i)[2] +\* Check if current configuration of node i is a joint configuration (has outgoing configuration). IsJointConfig(i) == /\ GetJointConfig(i)[2] # {} +\* Get learners in current configuration of node i. GetLearners(i) == config[i].learners +\* Compute effective configuration change for configuraiton change entry in index k of log in node i. ConfFromLog(i, k) == [jointConfig |-> << log[i][k].value.newconf, {} >>, learners |-> log[i][k].value.learners] -\* Apply conf change log entry to configuration +\* Apply conf change log entry to configuration. ApplyConfigUpdate(i, k) == config' = [config EXCEPT ![i]= ConfFromLog(i, k)] +\* Try to commit entries up to index c in log of node i. CommitTo(i, c) == commitIndex' = [commitIndex EXCEPT ![i] = Max({@, c})] +\* Set of all leaders at this moment. CurrentLeaders == {i \in Server : state[i] = Leader} ---- @@ -294,9 +280,14 @@ SpecActions == { "DeleteServer" } +\* Default implementation of IsEnabled which controls if the action is enabled or not. IsEnabled(action, args) == action \in SpecActions +\* Default implementation of PostAction which is called after the action. PostAction(action, args) == TRUE +\* This is a wrapper of top level actions in the spec which allows injecting of custom +\* IsEnabled to constrain the action and custom PostAction to process necessary operations +\* after the action (validation for example). SpecAction(action, args, op(_)) == /\ IsEnabled(action, args) /\ op(action) @@ -304,7 +295,6 @@ SpecAction(action, args, op(_)) == \* Server i restarts from stable storage. \* It loses everything but its currentTerm, commitIndex, votedFor, log, and config in durable state. -\* @type: Int => Bool; Restart(i) == SpecAction("Restart", <>, LAMBDA act: /\ state' = [state EXCEPT ![i] = Follower] /\ votesResponded' = [votesResponded EXCEPT ![i] = {}] @@ -324,7 +314,6 @@ Restart(i) == SpecAction("Restart", <>, LAMBDA act: ) \* Server i times out and starts a new election. -\* @type: Int => Bool; Timeout(i) == SpecAction("Timeout", <>, LAMBDA act: /\ ~InReadyPhase(i) /\ state[i] \in {Follower, Candidate} @@ -338,7 +327,6 @@ Timeout(i) == SpecAction("Timeout", <>, LAMBDA act: ) \* Candidate i sends j a RequestVote request. -\* @type: (Int, Int) => Bool; RequestVote(i, j) == SpecAction("RequestVote", <>, LAMBDA act: /\ state[i] = Candidate /\ j \in ((GetConfig(i) \union GetLearners(i)) \ votesResponded[i]) @@ -359,7 +347,6 @@ RequestVote(i, j) == SpecAction("RequestVote", <>, LAMBDA act: \* Leader i sends j an AppendEntries request containing entries in [b,e) range. \* N.B. range is right open -\* @type: (Int, Int, <>, Int) => Bool; AppendEntriesInRangeToPeer(subtype, i, j, range) == /\ i /= j /\ range[1] <= range[2] @@ -388,7 +375,7 @@ AppendEntriesInRangeToPeer(subtype, i, j, range) == mdest |-> j]) /\ UNCHANGED <> -\* etcd leader sends MsgAppResp to itself immediately after appending log entry +\* etcd leader sends MsgAppResp to itself immediately after appending log entry. AppendEntriesToSelf(i) == SpecAction("AppendEntriesToSelf", <>, LAMBDA act: /\ state[i] = Leader /\ Send([mtype |-> AppendEntriesResponse, @@ -401,21 +388,23 @@ AppendEntriesToSelf(i) == SpecAction("AppendEntriesToSelf", <>, LAMBDA act: /\ UNCHANGED <> ) +\* Leader i replicates its entries in range to peer j. AppendEntries(i, j, range) == SpecAction("AppendEntries", <>, LAMBDA act: AppendEntriesInRangeToPeer("app", i, j, range) ) +\* Leader i sends heartbeat to peer j. Heartbeat(i, j) == SpecAction("Heartbeat", <>, LAMBDA act: - \* heartbeat is equivalent to an append-entry request with 0 entry index 1 + \* heartbeat is equivalent to an append-entry request with 0 entry index 1. AppendEntriesInRangeToPeer("heartbeat", i, j, <<1,1>>) ) +\* Leader i sends snapshot to peer j. SendSnapshot(i, j, index) == SpecAction("SendSnapshot", <>, LAMBDA act: AppendEntriesInRangeToPeer("snapshot", i, j, <<1,index+1>>) ) \* Candidate i transitions to leader. -\* @type: Int => Bool; BecomeLeader(i) == SpecAction("BecomeLeader", <>, LAMBDA act: /\ ~InReadyPhase(i) /\ state[i] = Candidate @@ -425,7 +414,8 @@ BecomeLeader(i) == SpecAction("BecomeLeader", <>, LAMBDA act: [j \in Server |-> IF j = i THEN Len(log[i]) ELSE 0]] /\ UNCHANGED <> ) - + +\* Leader i appends one entry of type t and value v to its log. Replicate(i, v, t) == /\ t \in {ValueEntry, ConfigEntry} /\ state[i] = Leader @@ -436,7 +426,6 @@ Replicate(i, v, t) == IN /\ log' = [log EXCEPT ![i] = newLog] \* Leader i receives a client request to add v to the log. -\* @type: (Int, Int) => Bool; ClientRequest(i, v) == SpecAction("ClientRequest", <>, LAMBDA act: /\ Replicate(i, [val |-> v], ValueEntry) /\ UNCHANGED <> @@ -446,7 +435,6 @@ ClientRequest(i, v) == SpecAction("ClientRequest", <>, LAMBDA act: \* This is done as a separate step from handling AppendEntries responses, \* in part to minimize atomic regions, and in part so that leaders of \* single-server clusters are able to mark entries committed. -\* @type: Int => Bool; AdvanceCommitIndex(i) == SpecAction("AdvanceCommitIndex", <>, LAMBDA act: /\ state[i] = Leader /\ LET \* The set of servers that agree up through index. @@ -469,7 +457,7 @@ AdvanceCommitIndex(i) == SpecAction("AdvanceCommitIndex", <>, LAMBDA act: /\ UNCHANGED <> ) -\* Leader i adds a new server j or promote learner j +\* Leader i adds a new server j or promote learner j. AddNewServer(i, j) == SpecAction("AddNewServer", <>, LAMBDA act: /\ state[i] = Leader /\ j \notin GetConfig(i) @@ -511,6 +499,7 @@ DeleteServer(i, j) == SpecAction("DeleteServer", <>, LAMBDA act: /\ UNCHANGED <> ) +\* Get current ready data in node i. ReadyData(i) == [ msgs |-> PendingMessages(i), @@ -522,6 +511,7 @@ ReadyData(i) == config |-> config[i] ] +\* Node i enters ready phase. Ready(i) == SpecAction("Ready", <>, LAMBDA act: /\ ~InReadyPhase(i) /\ ready' = [ready EXCEPT ![i] = ReadyData(i)] @@ -529,6 +519,7 @@ Ready(i) == SpecAction("Ready", <>, LAMBDA act: /\ UNCHANGED <> ) +\* Get states to be persisted from ready data. DurableStateFromReady(rd) == [ currentTerm |-> rd.currentTerm, @@ -538,6 +529,8 @@ DurableStateFromReady(rd) == config |-> rd.config, applied |-> rd.applied ] + +\* Node i persists states to durable storage. PersistState(i) == SpecAction("PersistState", <>, LAMBDA act: /\ InReadyPhase(i) /\ durableState[i] /= DurableStateFromReady(ready[i]) @@ -545,15 +538,18 @@ PersistState(i) == SpecAction("PersistState", <>, LAMBDA act: /\ UNCHANGED <> ) +\* Get the index of the first unapplied configuration change entry in log of node i. UnappliedConfChange(i) == SelectInSubSeq(log[i], Max({applied[i], appliedConfChange[i]})+1, ready[i].commitIndex, LAMBDA x: x.type = ConfigEntry) +\* Check if node i has unapplied configuration change entry in its log. HasUnappliedConfChange(i) == LET k == UnappliedConfChange(i) IN /\ k > 0 /\ config[i] /= ConfFromLog(i, k) +\* Node i applies one unapplied configuration change entry in its log. ApplyConfChange(i) == SpecAction("ApplyConfChange", <>, LAMBDA act: /\ InReadyPhase(i) /\ ~IsJointConfig(i) @@ -569,6 +565,7 @@ ApplyConfChange(i) == SpecAction("ApplyConfChange", <>, LAMBDA act: /\ UNCHANGED <> ) +\* Node i sends pending messages to its peers. SendMessages(i) == SpecAction("SendMessages", <>, LAMBDA act: /\ InReadyPhase(i) /\ ready[i].msgs /= EmptyBag @@ -577,6 +574,7 @@ SendMessages(i) == SpecAction("SendMessages", <>, LAMBDA act: /\ UNCHANGED <> ) +\* Node i exits ready phase. Advance(i) == SpecAction("Advance", <>, LAMBDA act: /\ InReadyPhase(i) /\ ready[i].msgs = EmptyBag \* all pending messages in ready are sent out @@ -586,6 +584,7 @@ Advance(i) == SpecAction("Advance", <>, LAMBDA act: /\ UNCHANGED <> ) +\* Node i becomes follower of term t. BecomeFollowerOfTerm(i, t) == /\ currentTerm' = [currentTerm EXCEPT ![i] = t] /\ state' = [state EXCEPT ![i] = Follower] @@ -594,6 +593,7 @@ BecomeFollowerOfTerm(i, t) == ELSE UNCHANGED <> +\* Node i steps down to follower of current term. StepDownToFollower(i) == SpecAction("StepDownToFollower", <>, LAMBDA act: /\ BecomeFollowerOfTerm(i, currentTerm[i]) /\ UNCHANGED <> @@ -605,7 +605,6 @@ StepDownToFollower(i) == SpecAction("StepDownToFollower", <>, LAMBDA act: \* Server i receives a RequestVote request from server j with \* m.mterm <= currentTerm[i]. -\* @type: (Int, Int, RVREQT) => Bool; HandleRequestVoteRequest(i, j, m) == LET logOk == \/ m.mlastLogTerm > LastTerm(log[i]) \/ /\ m.mlastLogTerm = LastTerm(log[i]) @@ -626,7 +625,6 @@ HandleRequestVoteRequest(i, j, m) == \* Server i receives a RequestVote response from server j with \* m.mterm = currentTerm[i]. -\* @type: (Int, Int, RVRESPT) => Bool; HandleRequestVoteResponse(i, j, m) == \* This tallies votes even when the current state is not Candidate, but \* they won't be looked at, so it doesn't matter. @@ -641,7 +639,7 @@ HandleRequestVoteResponse(i, j, m) == /\ Discard(m) /\ UNCHANGED <> -\* @type: (Int, Int, AEREQT, Bool) => Bool; +\* Server i rejects the AppendEntriesRequest message m from node j and replies a AppendEntriesResponse message. RejectAppendEntriesRequest(i, j, m, logOk) == /\ \/ m.mterm < currentTerm[i] \/ /\ m.mterm = currentTerm[i] @@ -657,18 +655,21 @@ RejectAppendEntriesRequest(i, j, m, logOk) == m) /\ UNCHANGED <> -\* @type: (Int, MSG) => Bool; +\* Candidate i steps down to follower of same term due to receiving AppendEntriesRequest messages of +\* other leader in current term. ReturnToFollowerState(i, m) == /\ m.mterm = currentTerm[i] /\ state[i] = Candidate /\ state' = [state EXCEPT ![i] = Follower] /\ UNCHANGED <> +\* Check if the entries received by node i do not conflict with its own entries starting from index. HasNoConflict(i, index, ents) == /\ index <= Len(log[i]) + 1 /\ \A k \in 1..Len(ents): index + k - 1 <= Len(log[i]) => log[i][index+k-1].term = ents[k].term -\* @type: (Int, Int, Int, AEREQT) => Bool; +\* All entries in message m sent from node j has been replicated into log of node i. Node i will sends +\* succeess AppendEntriesResponse message to acknowledge this. AppendEntriesAlreadyDone(i, j, index, m) == /\ \/ index <= commitIndex[i] \/ /\ index > commitIndex[i] @@ -690,7 +691,7 @@ AppendEntriesAlreadyDone(i, j, index, m) == m) /\ UNCHANGED <> -\* @type: (Int, Int, AEREQT) => Bool; +\* Entries in message m conflicts with entries in log of node i starting from index. ConflictAppendEntriesRequest(i, index, m) == /\ m.mentries /= << >> /\ index > commitIndex[i] @@ -698,7 +699,7 @@ ConflictAppendEntriesRequest(i, index, m) == /\ log' = [log EXCEPT ![i] = SubSeq(@, 1, Len(@) - 1)] /\ UNCHANGED <> -\* @type: (Int, AEREQT) => Bool; +\* Entries in message m do not conflict with entries in log of node i starting from index. NoConflictAppendEntriesRequest(i, index, m) == /\ m.mentries /= << >> /\ index > commitIndex[i] @@ -706,7 +707,8 @@ NoConflictAppendEntriesRequest(i, index, m) == /\ log' = [log EXCEPT ![i] = @ \o SubSeq(m.mentries, Len(@)-index+2, Len(m.mentries))] /\ UNCHANGED <> -\* @type: (Int, Int, Bool, AEREQT) => Bool; +\* Node i accepts message m sent from node j. logOK indicates if the index of term of +\* the previous log for inserting matches that in node j. AcceptAppendEntriesRequest(i, j, logOk, m) == \* accept request /\ m.mterm = currentTerm[i] @@ -721,7 +723,6 @@ AcceptAppendEntriesRequest(i, j, logOk, m) == \* m.mterm <= currentTerm[i]. This just handles m.entries of length 0 or 1, but \* implementations could safely accept more by treating them the same as \* multiple independent requests of 1 entry. -\* @type: (Int, Int, AEREQT) => Bool; HandleAppendEntriesRequest(i, j, m) == LET logOk == \/ m.mprevLogIndex = 0 \/ /\ m.mprevLogIndex > 0 @@ -736,7 +737,6 @@ HandleAppendEntriesRequest(i, j, m) == \* Server i receives an AppendEntries response from server j with \* m.mterm = currentTerm[i]. -\* @type: (Int, Int, AERESPT) => Bool; HandleAppendEntriesResponse(i, j, m) == /\ m.mterm = currentTerm[i] /\ \/ /\ m.msuccess \* successful @@ -748,7 +748,6 @@ HandleAppendEntriesResponse(i, j, m) == /\ UNCHANGED <> \* Any RPC with a newer term causes the recipient to advance its term first. -\* @type: (Int, Int, MSG) => Bool; UpdateTerm(i, j, m) == /\ m.mterm > currentTerm[i] /\ BecomeFollowerOfTerm(i, m.mterm) @@ -756,7 +755,6 @@ UpdateTerm(i, j, m) == /\ UNCHANGED <> \* Responses with stale terms are ignored. -\* @type: (Int, Int, MSG) => Bool; DropStaleResponse(i, j, m) == /\ m.mterm < currentTerm[i] /\ Discard(m) @@ -781,6 +779,7 @@ ReceiveDirect(m) == /\ \/ DropStaleResponse(i, j, m) \/ HandleAppendEntriesResponse(i, j, m) +\* Receive message m. Receive(m) == SpecAction("Receive", <>, LAMBDA act: ReceiveDirect(m) ) @@ -795,7 +794,6 @@ NextAppendEntriesResponse == \E m \in DOMAIN messages : m.mtype = AppendEntriesR \* Network state transitions \* The network duplicates a message -\* @type: MSG => Bool; DuplicateMessage(m) == SpecAction("DuplicateMessage", <>, LAMBDA act: /\ m \in DOMAIN messages /\ messages' = WithMessage(m, messages) @@ -803,7 +801,6 @@ DuplicateMessage(m) == SpecAction("DuplicateMessage", <>, LAMBDA act: ) \* The network drops a message -\* @type: MSG => Bool; DropMessage(m) == SpecAction("DropMessage", <>, LAMBDA act: \* Do not drop loopback messages \* /\ m.msource /= m.mdest @@ -879,7 +876,6 @@ Committed(i) == SubSeq(log[i],1,commitIndex[i]) \* The current term of any server is at least the term \* of any message sent by that server -\* @type: MSG => Bool; MessageTermsLtCurrentTerm(m) == m.mterm <= currentTerm[m.msource] diff --git a/tla/etcdraft2.tla b/tla/etcdraft2.tla deleted file mode 100644 index 4e16da8f..00000000 --- a/tla/etcdraft2.tla +++ /dev/null @@ -1,1035 +0,0 @@ ---------------------------------- MODULE etcdraft2 --------------------------------- -\* Copyright 2024 The etcd Authors -\* -\* Licensed under the Apache License, Version 2.0 (the "License"); -\* you may not use this file except in compliance with the License. -\* You may obtain a copy of the License at -\* -\* http://www.apache.org/licenses/LICENSE-2.0 -\* -\* Unless required by applicable law or agreed to in writing, software -\* distributed under the License is distributed on an "AS IS" BASIS, -\* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -\* See the License for the specific language governing permissions and -\* limitations under the License. -\* -\* -\* This is the formal specification for the Raft consensus algorithm. -\* -\* Copyright 2014 Diego Ongaro, 2015 Brandon Amos and Huanchen Zhang, -\* 2016 Daniel Ricketts, 2021 George Pîrlea and Darius Foo. -\* -\* This work is licensed under the Creative Commons Attribution-4.0 -\* International License https://creativecommons.org/licenses/by/4.0/ - -EXTENDS Naturals, Integers, Bags, FiniteSets, Sequences, SequencesExt, FiniteSetsExt, BagsExt, TLC - -\* The initial and global set of server IDs. -CONSTANTS InitServer, Server - -\* Log metadata to distinguish values from configuration changes. -CONSTANT ValueEntry, ConfigEntry - -\* Server states. -CONSTANTS - \* @type: Str; - Follower, - \* @type: Str; - Candidate, - \* @type: Str; - Leader - -\* A reserved value. -CONSTANTS - \* @type: Int; - Nil - -\* Message types: -CONSTANTS - \* @type: Str; - RequestVoteRequest, - \* @type: Str; - RequestVoteResponse, - \* @type: Str; - AppendEntriesRequest, - \* @type: Str; - AppendEntriesResponse - - ----- -\* Global variables - -\* A bag of records representing requests and responses sent from one server -\* to another. We differentiate between the message types to support Apalache. -VARIABLE - \* @typeAlias: ENTRY = [term: Int, value: Int]; - \* @typeAlias: LOGT = Seq(ENTRY); - \* @typeAlias: RVREQT = [mtype: Str, mterm: Int, mlastLogTerm: Int, mlastLogIndex: Int, msource: Int, mdest: Int]; - \* @typeAlias: RVRESPT = [mtype: Str, mterm: Int, mvoteGranted: Bool, msource: Int, mdest: Int ]; - \* @typeAlias: AEREQT = [mtype: Str, mterm: Int, mprevLogIndex: Int, mprevLogTerm: Int, mentries: LOGT, mcommitIndex: Int, msource: Int, mdest: Int ]; - \* @typeAlias: AERESPT = [mtype: Str, mterm: Int, msuccess: Bool, mmatchIndex: Int, msource: Int, mdest: Int ]; - \* @typeAlias: MSG = [ wrapped: Bool, mtype: Str, mterm: Int, msource: Int, mdest: Int, RVReq: RVREQT, RVResp: RVRESPT, AEReq: AEREQT, AEResp: AERESPT ]; - \* @type: MSG -> Int; - messages -VARIABLE - pendingMessages -messageVars == <> - ----- -\* The following variables are all per server (functions with domain Server). - -\* The server's term number. -VARIABLE - \* @type: Int -> Int; - currentTerm -\* The server's state (Follower, Candidate, or Leader). -VARIABLE - \* @type: Int -> Str; - state -\* The candidate the server voted for in its current term, or -\* Nil if it hasn't voted for any. -VARIABLE - \* @type: Int -> Int; - votedFor -serverVars == <> - -\* A Sequence of log entries. The index into this sequence is the index of the -\* log entry. Unfortunately, the Sequence module defines Head(s) as the entry -\* with index 1, so be careful not to use that! -VARIABLE - \* @type: Int -> [ entries: LOGT, len: Int ]; - log -\* The index of the latest entry in the log the state machine may apply. -VARIABLE - \* @type: Int -> Int; - commitIndex -\* The index of the latest applied entry. In this spec we only apply records for -\* configuration changes. -VARIABLE - applied -logVars == <> - -\* The following variables are used only on candidates: -\* The set of servers from which the candidate has received a RequestVote -\* response in its currentTerm. -VARIABLE - \* @type: Int -> Set(Int); - votesResponded -\* The set of servers from which the candidate has received a vote in its -\* currentTerm. -VARIABLE - \* @type: Int -> Set(Int); - votesGranted -\* @type: Seq(Int -> Set(Int)); -candidateVars == <> - -\* The following variables are used only on leaders: -\* The latest entry that each follower has acknowledged is the same as the -\* leader's. This is used to calculate commitIndex on the leader. -VARIABLE - \* @type: Int -> (Int -> Int); - matchIndex -VARIABLE - pendingConfChangeIndex -leaderVars == <> - -\* @type: Int -> [jointConfig: Seq(Set(int)), learners: Set(int)] -VARIABLE - config -VARIABLE - reconfigCount - -configVars == <> - -VARIABLE - durableState -VARIABLE - ready -readyVars == <> - -\* End of per server variables. ----- - -\* All variables; used for stuttering (asserting state hasn't changed). -vars == <> - -Vars == vars - - ----- -\* Helpers - -\* The set of all quorums. This just calculates simple majorities, but the only -\* important property is that every quorum overlaps with every other. -Quorum(c) == {i \in SUBSET(c) : Cardinality(i) * 2 > Cardinality(c)} - -\* The term of the last entry in a log, or 0 if the log is empty. -\* @type: LOGT => Int; -LastTerm(xlog) == IF xlog = <<>> THEN 0 ELSE xlog[Len(xlog)].term - -\* Helper for Send and Reply. Given a message m and bag of messages, return a -\* new bag of messages with one more m in it. -\* @type: (MSG, MSG -> Int) => MSG -> Int; -WithMessage(m, msgs) == msgs (+) SetToBag({m}) - -\* Helper for Discard and Reply. Given a message m and bag of messages, return -\* a new bag of messages with one less m in it. -\* @type: (MSG, MSG -> Int) => MSG -> Int; -WithoutMessage(m, msgs) == msgs (-) SetToBag({m}) - -\* Add a message to the bag of pendingMessages. -SendDirect(m) == - pendingMessages' = WithMessage(m, pendingMessages) - -\* All pending messages sent from node i -PendingMessages(i) == - [ m \in { mm \in DOMAIN pendingMessages : mm.msource = i } |-> pendingMessages[m] ] - -\* Remove all messages in pendingMessages that were sent from node i -ClearPendingMessages(i) == - pendingMessages (-) PendingMessages(i) - -\* Move all messages which was sent from node i in pendingMessages to messages -SendPendingMessagesInReady(i) == - LET msgs == ready[i].msgs - IN /\ messages' = msgs (+) messages - /\ pendingMessages' = pendingMessages (-) msgs - -\* Remove a message from the bag of messages. Used when a server is done -DiscardDirect(m) == - messages' = WithoutMessage(m, messages) - -\* Combination of Send and Discard -ReplyDirect(response, request) == - /\ pendingMessages' = WithMessage(response, pendingMessages) - /\ messages' = WithoutMessage(request, messages) - -\* Default: change when needed - Send(m) == SendDirect(m) - Reply(response, request) == ReplyDirect(response, request) - Discard(m) == DiscardDirect(m) - -MaxOrZero(s) == IF s = {} THEN 0 ELSE Max(s) - -GetJointConfig(i) == - config[i].jointConfig - -GetConfig(i) == - GetJointConfig(i)[1] - -GetOutgoingConfig(i) == - GetJointConfig(i)[2] - -IsJointConfig(i) == - /\ GetJointConfig(i)[2] # {} - -GetLearners(i) == - config[i].learners - -\* Apply conf change log entry to configuration -ApplyConfigUpdate(i, k) == - config' = [config EXCEPT ![i]= [jointConfig |-> << log[i][k].value.newconf, {} >>, learners |-> log[i][k].value.learners]] - -CommitTo(i, c) == - commitIndex' = [commitIndex EXCEPT ![i] = Max({@, c})] - -CurrentLeaders == {i \in Server : state[i] = Leader} - ----- -\* Define initial values for all variables -InitMessageVars == /\ messages = EmptyBag - /\ pendingMessages = EmptyBag -InitServerVars == /\ currentTerm = [i \in Server |-> 0] - /\ state = [i \in Server |-> Follower] - /\ votedFor = [i \in Server |-> Nil] -InitCandidateVars == /\ votesResponded = [i \in Server |-> {}] - /\ votesGranted = [i \in Server |-> {}] -InitLeaderVars == /\ matchIndex = [i \in Server |-> [j \in Server |-> 0]] - /\ pendingConfChangeIndex = [i \in Server |-> 0] -InitLogVars == /\ log = [i \in Server |-> <<>>] - /\ commitIndex = [i \in Server |-> 0] - /\ applied = [i \in Server |-> 0] -InitConfigVars == /\ config = [i \in Server |-> [ jointConfig |-> <>, learners |-> {}]] - /\ reconfigCount = 0 - -InReadyPhase(i) == ready[i] /= <<>> - -InitReadyVars == - /\ durableState = [ i \in Server |-> [ - currentTerm |-> currentTerm[i], - votedFor |-> votedFor[i], - log |-> Len(log[i]), - applied |-> 0, - commitIndex |-> commitIndex[i], - config |-> config[i] - ]] - /\ ready = [i \in Server |-> <<>>] - -Init == /\ InitMessageVars - /\ InitServerVars - /\ InitCandidateVars - /\ InitLeaderVars - /\ InitLogVars - /\ InitConfigVars - /\ InitReadyVars - ----- -\* Define state transitions - -Transitions == { - "RequestVote", - "BecomeLeader", - "ClientRequest", - "AdvanceCommitIndex", - "AppendEntries", - "AppendEntriesToSelf", - "Heartbeat", - "SendSnapshot", - "Receive", - "Timeout", - "StepDownToFollower", - "Ready", - "PersistState", - "ApplyConfChange", - "SendMessages", - "Advance", - "Restart", - "DuplicateMessage", - "DropMessage", - "AddNewServer", - "AddLearner", - "DeleteServer" -} - -IsEnabled(action, args) == action \in Transitions - -\* Server i restarts from stable storage. -\* It loses everything but its currentTerm, commitIndex, votedFor, log, and config in durable state. -\* @type: Int => Bool; -Restart(i) == - /\ IsEnabled("Restart", <>) - /\ state' = [state EXCEPT ![i] = Follower] - /\ votesResponded' = [votesResponded EXCEPT ![i] = {}] - /\ votesGranted' = [votesGranted EXCEPT ![i] = {}] - /\ matchIndex' = [matchIndex EXCEPT ![i] = [j \in Server |-> 0]] - /\ pendingConfChangeIndex' = [pendingConfChangeIndex EXCEPT ![i] = 0] - /\ pendingMessages' = ClearPendingMessages(i) - /\ currentTerm' = [currentTerm EXCEPT ![i] = durableState[i].currentTerm] - /\ commitIndex' = [commitIndex EXCEPT ![i] = durableState[i].commitIndex] - /\ applied' = [applied EXCEPT ![i] = durableState[i].applied] - /\ votedFor' = [votedFor EXCEPT ![i] = durableState[i].votedFor] - /\ log' = [log EXCEPT ![i] = SubSeq(@, 1, durableState[i].log)] - /\ config' = [config EXCEPT ![i] = durableState[i].config] - /\ ready' = [ready EXCEPT ![i] = <<>>] - /\ UNCHANGED <> - -\* Server i times out and starts a new election. -\* @type: Int => Bool; -Timeout(i) == /\ IsEnabled("Timeout", <>) - /\ ~InReadyPhase(i) - /\ state[i] \in {Follower, Candidate} - /\ i \in GetConfig(i) - /\ state' = [state EXCEPT ![i] = Candidate] - /\ currentTerm' = [currentTerm EXCEPT ![i] = currentTerm[i] + 1] - /\ votedFor' = [votedFor EXCEPT ![i] = i] - /\ votesResponded' = [votesResponded EXCEPT ![i] = {}] - /\ votesGranted' = [votesGranted EXCEPT ![i] = {}] - /\ UNCHANGED <> - -\* Candidate i sends j a RequestVote request. -\* @type: (Int, Int) => Bool; -RequestVote(i, j) == - /\ IsEnabled("RequestVote", <>) - /\ state[i] = Candidate - /\ j \in ((GetConfig(i) \union GetLearners(i)) \ votesResponded[i]) - /\ IF i # j - THEN Send([mtype |-> RequestVoteRequest, - mterm |-> currentTerm[i], - mlastLogTerm |-> LastTerm(log[i]), - mlastLogIndex |-> Len(log[i]), - msource |-> i, - mdest |-> j]) - ELSE Send([mtype |-> RequestVoteResponse, - mterm |-> currentTerm[i], - mvoteGranted |-> TRUE, - msource |-> i, - mdest |-> i]) - /\ UNCHANGED <> - -\* Leader i sends j an AppendEntries request containing entries in [b,e) range. -\* N.B. range is right open -\* @type: (Int, Int, <>, Int) => Bool; -AppendEntriesInRangeToPeer(subtype, i, j, range) == - /\ i /= j - /\ range[1] <= range[2] - /\ state[i] = Leader - /\ j \in GetConfig(i) \union GetLearners(i) - /\ LET - prevLogIndex == range[1] - 1 - \* The following upper bound on prevLogIndex is unnecessary - \* but makes verification substantially simpler. - prevLogTerm == IF prevLogIndex > 0 /\ prevLogIndex <= Len(log[i]) THEN - log[i][prevLogIndex].term - ELSE - 0 - \* Send the entries - lastEntry == Min({Len(log[i]), range[2]-1}) - entries == SubSeq(log[i], range[1], lastEntry) - commit == IF subtype = "heartbeat" THEN Min({commitIndex[i], matchIndex[i][j]}) ELSE Min({commitIndex[i], lastEntry}) - IN /\ Send( [mtype |-> AppendEntriesRequest, - msubtype |-> subtype, - mterm |-> currentTerm[i], - mprevLogIndex |-> prevLogIndex, - mprevLogTerm |-> prevLogTerm, - mentries |-> entries, - mcommitIndex |-> commit, - msource |-> i, - mdest |-> j]) - /\ UNCHANGED <> - -\* etcd leader sends MsgAppResp to itself immediately after appending log entry -AppendEntriesToSelf(i) == - /\ IsEnabled("AppendEntriesToSelf", <>) - /\ state[i] = Leader - /\ Send([mtype |-> AppendEntriesResponse, - msubtype |-> "app", - mterm |-> currentTerm[i], - msuccess |-> TRUE, - mmatchIndex |-> Len(log[i]), - msource |-> i, - mdest |-> i]) - /\ UNCHANGED <> - -AppendEntries(i, j, range) == - /\ IsEnabled("AppendEntries", <>) - /\ AppendEntriesInRangeToPeer("app", i, j, range) - -Heartbeat(i, j) == - /\ IsEnabled("Heartbeat", <>) - \* heartbeat is equivalent to an append-entry request with 0 entry index 1 - /\ AppendEntriesInRangeToPeer("heartbeat", i, j, <<1,1>>) - -SendSnapshot(i, j, index) == - /\ IsEnabled("SendSnapshot", <>) - /\ AppendEntriesInRangeToPeer("snapshot", i, j, <<1,index+1>>) - -\* Candidate i transitions to leader. -\* @type: Int => Bool; -BecomeLeader(i) == - /\ IsEnabled("BecomeLeader", <>) - /\ ~InReadyPhase(i) - /\ state[i] = Candidate - /\ votesGranted[i] \in Quorum(GetConfig(i)) - /\ state' = [state EXCEPT ![i] = Leader] - /\ matchIndex' = [matchIndex EXCEPT ![i] = - [j \in Server |-> IF j = i THEN Len(log[i]) ELSE 0]] - /\ UNCHANGED <> - -Replicate(i, v, t) == - /\ t \in {ValueEntry, ConfigEntry} - /\ state[i] = Leader - /\ LET entry == [term |-> currentTerm[i], - type |-> t, - value |-> v] - newLog == Append(log[i], entry) - IN /\ log' = [log EXCEPT ![i] = newLog] - -\* Leader i receives a client request to add v to the log. -\* @type: (Int, Int) => Bool; -ClientRequest(i, v) == - /\ IsEnabled("ClientRequest", <>) - /\ Replicate(i, [val |-> v], ValueEntry) - /\ UNCHANGED <> - -\* Leader i advances its commitIndex. -\* This is done as a separate step from handling AppendEntries responses, -\* in part to minimize atomic regions, and in part so that leaders of -\* single-server clusters are able to mark entries committed. -\* @type: Int => Bool; -AdvanceCommitIndex(i) == - /\ IsEnabled("AdvanceCommitIndex", <>) - /\ state[i] = Leader - /\ LET \* The set of servers that agree up through index. - Agree(index) == {k \in GetConfig(i) : matchIndex[i][k] >= index} - logSize == Len(log[i]) - \* logSize == MaxLogLength - \* The maximum indexes for which a quorum agrees - agreeIndexes == {index \in 1..logSize : - Agree(index) \in Quorum(GetConfig(i))} - \* New value for commitIndex'[i] - newCommitIndex == - IF /\ agreeIndexes /= {} - /\ log[i][Max(agreeIndexes)].term = currentTerm[i] - THEN - Max(agreeIndexes) - ELSE - commitIndex[i] - IN - /\ CommitTo(i, newCommitIndex) - /\ UNCHANGED <> - - -\* Leader i adds a new server j or promote learner j -AddNewServer(i, j) == - /\ IsEnabled("AddNewServer", <>) - /\ state[i] = Leader - /\ j \notin GetConfig(i) - /\ ~IsJointConfig(i) - /\ IF pendingConfChangeIndex[i] = 0 THEN - /\ Replicate(i, [newconf |-> GetConfig(i) \union {j}, learners |-> GetLearners(i)], ConfigEntry) - /\ pendingConfChangeIndex' = [pendingConfChangeIndex EXCEPT ![i]=Len(log'[i])] - ELSE - /\ Replicate(i, <<>>, ValueEntry) - /\ UNCHANGED <> - /\ UNCHANGED <> - -\* Leader i adds a leaner j to the cluster. -AddLearner(i, j) == - /\ IsEnabled("AddLearner", <>) - /\ state[i] = Leader - /\ j \notin GetConfig(i) \union GetLearners(i) - /\ ~IsJointConfig(i) - /\ IF pendingConfChangeIndex[i] = 0 THEN - /\ Replicate(i, [newconf |-> GetConfig(i), learners |-> GetLearners(i) \union {j}], ConfigEntry) - /\ pendingConfChangeIndex' = [pendingConfChangeIndex EXCEPT ![i]=Len(log'[i])] - ELSE - /\ Replicate(i, <<>>, ValueEntry) - /\ UNCHANGED <> - /\ UNCHANGED <> - -\* Leader i removes a server j (possibly itself) from the cluster. -DeleteServer(i, j) == - /\ IsEnabled("DeleteServer", <>) - /\ state[i] = Leader - /\ j \in GetConfig(i) \union GetLearners(i) - /\ ~IsJointConfig(i) - /\ IF pendingConfChangeIndex[i] = 0 THEN - /\ Replicate(i, [newconf |-> GetConfig(i) \ {j}, learners |-> GetLearners(i) \ {j}], ConfigEntry) - /\ pendingConfChangeIndex' = [pendingConfChangeIndex EXCEPT ![i]=Len(log'[i])] - ELSE - /\ Replicate(i, <<>>, ValueEntry) - /\ UNCHANGED <> - /\ UNCHANGED <> - -Ready(i) == - /\ IsEnabled("Ready", <>) - /\ ~InReadyPhase(i) - /\ ready' = [ready EXCEPT ![i] = [ - msgs |-> PendingMessages(i), - lastEntry |-> Len(log[i]), - currentTerm |-> currentTerm[i], - votedFor |-> votedFor[i], - commitIndex |-> commitIndex[i], - applied |-> applied[i], - config |-> config[i]]] - /\ UNCHANGED <> - -PersistState(i) == - /\ IsEnabled("PersistState", <>) - /\ InReadyPhase(i) - /\ durableState' = [durableState EXCEPT ![i] = [ - currentTerm |-> ready[i].currentTerm, - votedFor |-> ready[i].votedFor, - log |-> ready[i].lastEntry, - commitIndex |-> ready[i].commitIndex, - config |-> ready[i].config, - applied |-> ready[i].applied - ]] - /\ UNCHANGED <> - -HasUnappliedConfChange(i) == - SelectLastInSubSeq(log[i], applied[i]+1, ready[i].commitIndex, LAMBDA x: x.type = ConfigEntry) > 0 - -ApplyConfChange(i) == - /\ IsEnabled("ApplyConfChange", <>) - /\ InReadyPhase(i) - /\ ~IsJointConfig(i) - /\ LET k == SelectInSubSeq(log[i], applied[i]+1, ready[i].commitIndex, LAMBDA x: x.type = ConfigEntry) - IN - /\ k > 0 - /\ ApplyConfigUpdate(i, k) - /\ applied' = [applied EXCEPT ![i] = k] - /\ IF state[i] = Leader /\ pendingConfChangeIndex[i] >= k THEN - /\ reconfigCount' = reconfigCount + 1 - /\ pendingConfChangeIndex' = [pendingConfChangeIndex EXCEPT ![i] = 0] - ELSE UNCHANGED <> - /\ UNCHANGED <> - -Debug == - \E m \in DOMAIN pendingMessages: m.mtype = "RequestVoteResponse" /\ m.msource /= m.mdest - -SendMessages(i) == - /\ IsEnabled("SendMessages", <>) - /\ InReadyPhase(i) - /\ SendPendingMessagesInReady(i) - /\ ready' = [ready EXCEPT ![i]["msgs"] = EmptyBag] - /\ UNCHANGED <> - -Advance(i) == - /\ IsEnabled("Advance", <>) - /\ InReadyPhase(i) - /\ ready[i].msgs = EmptyBag \* all pending messages in ready are sent out - /\ ~HasUnappliedConfChange(i) \* all committed conf change entries in ready are applied - /\ ready' = [ready EXCEPT ![i] = <<>>] - /\ applied' = [applied EXCEPT ![i] = ready[i].commitIndex] - /\ UNCHANGED <> - -BecomeFollowerOfTerm(i, t) == - /\ currentTerm' = [currentTerm EXCEPT ![i] = t] - /\ state' = [state EXCEPT ![i] = Follower] - /\ IF currentTerm[i] # t THEN - votedFor' = [votedFor EXCEPT ![i] = Nil] - ELSE - UNCHANGED <> - -StepDownToFollower(i) == - /\ IsEnabled("StepDownToFollower", <>) - /\ BecomeFollowerOfTerm(i, currentTerm[i]) - /\ UNCHANGED <> - ----- -\* Message handlers -\* i = recipient, j = sender, m = message - -\* Server i receives a RequestVote request from server j with -\* m.mterm <= currentTerm[i]. -\* @type: (Int, Int, RVREQT) => Bool; -HandleRequestVoteRequest(i, j, m) == - LET logOk == \/ m.mlastLogTerm > LastTerm(log[i]) - \/ /\ m.mlastLogTerm = LastTerm(log[i]) - /\ m.mlastLogIndex >= Len(log[i]) - grant == /\ m.mterm = currentTerm[i] - /\ logOk - /\ votedFor[i] \in {Nil, j} - IN /\ m.mterm <= currentTerm[i] - /\ \/ grant /\ votedFor' = [votedFor EXCEPT ![i] = j] - \/ ~grant /\ UNCHANGED votedFor - /\ Reply([mtype |-> RequestVoteResponse, - mterm |-> currentTerm[i], - mvoteGranted |-> grant, - msource |-> i, - mdest |-> j], - m) - /\ UNCHANGED <> - -\* Server i receives a RequestVote response from server j with -\* m.mterm = currentTerm[i]. -\* @type: (Int, Int, RVRESPT) => Bool; -HandleRequestVoteResponse(i, j, m) == - \* This tallies votes even when the current state is not Candidate, but - \* they won't be looked at, so it doesn't matter. - /\ m.mterm = currentTerm[i] - /\ votesResponded' = [votesResponded EXCEPT ![i] = - votesResponded[i] \cup {j}] - /\ \/ /\ m.mvoteGranted - /\ votesGranted' = [votesGranted EXCEPT ![i] = - votesGranted[i] \cup {j}] - \/ /\ ~m.mvoteGranted - /\ UNCHANGED <> - /\ Discard(m) - /\ UNCHANGED <> - -\* @type: (Int, Int, AEREQT, Bool) => Bool; -RejectAppendEntriesRequest(i, j, m, logOk) == - /\ \/ m.mterm < currentTerm[i] - \/ /\ m.mterm = currentTerm[i] - /\ state[i] = Follower - /\ \lnot logOk - /\ Reply([mtype |-> AppendEntriesResponse, - msubtype |-> "app", - mterm |-> currentTerm[i], - msuccess |-> FALSE, - mmatchIndex |-> 0, - msource |-> i, - mdest |-> j], - m) - /\ UNCHANGED <> - -\* @type: (Int, MSG) => Bool; -ReturnToFollowerState(i, m) == - /\ m.mterm = currentTerm[i] - /\ state[i] = Candidate - /\ state' = [state EXCEPT ![i] = Follower] - /\ UNCHANGED <> - -HasNoConflict(i, index, ents) == - /\ index <= Len(log[i]) + 1 - /\ \A k \in 1..Len(ents): index + k - 1 <= Len(log[i]) => log[i][index+k-1].term = ents[k].term - -\* @type: (Int, Int, Int, AEREQT) => Bool; -AppendEntriesAlreadyDone(i, j, index, m) == - /\ \/ index <= commitIndex[i] - \/ /\ index > commitIndex[i] - /\ \/ m.mentries = << >> - \/ /\ m.mentries /= << >> - /\ m.mprevLogIndex + Len(m.mentries) <= Len(log[i]) - /\ HasNoConflict(i, index, m.mentries) - /\ IF index <= commitIndex[i] THEN - IF m.msubtype = "heartbeat" THEN CommitTo(i, m.mcommitIndex) ELSE UNCHANGED commitIndex - ELSE - CommitTo(i, Min({m.mcommitIndex, m.mprevLogIndex+Len(m.mentries)})) - /\ Reply([ mtype |-> AppendEntriesResponse, - msubtype |-> m.msubtype, - mterm |-> currentTerm[i], - msuccess |-> TRUE, - mmatchIndex |-> IF m.msubtype = "heartbeat" \/ index > commitIndex[i] THEN m.mprevLogIndex+Len(m.mentries) ELSE commitIndex[i], - msource |-> i, - mdest |-> j], - m) - /\ UNCHANGED <> - -\* @type: (Int, Int, AEREQT) => Bool; -ConflictAppendEntriesRequest(i, index, m) == - /\ m.mentries /= << >> - /\ index > commitIndex[i] - /\ ~HasNoConflict(i, index, m.mentries) - /\ log' = [log EXCEPT ![i] = SubSeq(@, 1, Len(@) - 1)] - /\ UNCHANGED <> - -\* @type: (Int, AEREQT) => Bool; -NoConflictAppendEntriesRequest(i, index, m) == - /\ m.mentries /= << >> - /\ index > commitIndex[i] - /\ HasNoConflict(i, index, m.mentries) - /\ log' = [log EXCEPT ![i] = @ \o SubSeq(m.mentries, Len(@)-index+2, Len(m.mentries))] - /\ UNCHANGED <> - -\* @type: (Int, Int, Bool, AEREQT) => Bool; -AcceptAppendEntriesRequest(i, j, logOk, m) == - \* accept request - /\ m.mterm = currentTerm[i] - /\ state[i] = Follower - /\ logOk - /\ LET index == m.mprevLogIndex + 1 - IN \/ AppendEntriesAlreadyDone(i, j, index, m) - \/ ConflictAppendEntriesRequest(i, index, m) - \/ NoConflictAppendEntriesRequest(i, index, m) - -\* Server i receives an AppendEntries request from server j with -\* m.mterm <= currentTerm[i]. This just handles m.entries of length 0 or 1, but -\* implementations could safely accept more by treating them the same as -\* multiple independent requests of 1 entry. -\* @type: (Int, Int, AEREQT) => Bool; -HandleAppendEntriesRequest(i, j, m) == - LET logOk == \/ m.mprevLogIndex = 0 - \/ /\ m.mprevLogIndex > 0 - /\ m.mprevLogIndex <= Len(log[i]) - /\ m.mprevLogTerm = log[i][m.mprevLogIndex].term - IN - /\ m.mterm <= currentTerm[i] - /\ \/ RejectAppendEntriesRequest(i, j, m, logOk) - \/ ReturnToFollowerState(i, m) - \/ AcceptAppendEntriesRequest(i, j, logOk, m) - /\ UNCHANGED <> - -\* Server i receives an AppendEntries response from server j with -\* m.mterm = currentTerm[i]. -\* @type: (Int, Int, AERESPT) => Bool; -HandleAppendEntriesResponse(i, j, m) == - /\ m.mterm = currentTerm[i] - /\ \/ /\ m.msuccess \* successful - /\ matchIndex' = [matchIndex EXCEPT ![i][j] = Max({@, m.mmatchIndex})] - /\ UNCHANGED <> - \/ /\ \lnot m.msuccess \* not successful - /\ UNCHANGED <> - /\ Discard(m) - /\ UNCHANGED <> - -\* Any RPC with a newer term causes the recipient to advance its term first. -\* @type: (Int, Int, MSG) => Bool; -UpdateTerm(i, j, m) == - /\ m.mterm > currentTerm[i] - /\ BecomeFollowerOfTerm(i, m.mterm) - \* messages is unchanged so m can be processed further. - /\ UNCHANGED <> - -\* Responses with stale terms are ignored. -\* @type: (Int, Int, MSG) => Bool; -DropStaleResponse(i, j, m) == - /\ m.mterm < currentTerm[i] - /\ Discard(m) - /\ UNCHANGED <> - -\* Receive a message. -ReceiveDirect(m) == - LET i == m.mdest - j == m.msource - IN - \* Any RPC with a newer term causes the recipient to advance - \* its term first. Responses with stale terms are ignored. - \/ UpdateTerm(i, j, m) - \/ /\ m.mtype = RequestVoteRequest - /\ HandleRequestVoteRequest(i, j, m) - \/ /\ m.mtype = RequestVoteResponse - /\ \/ DropStaleResponse(i, j, m) - \/ HandleRequestVoteResponse(i, j, m) - \/ /\ m.mtype = AppendEntriesRequest - /\ HandleAppendEntriesRequest(i, j, m) - \/ /\ m.mtype = AppendEntriesResponse - /\ \/ DropStaleResponse(i, j, m) - \/ HandleAppendEntriesResponse(i, j, m) - -Receive(m) == - /\ IsEnabled("Receive", <>) - /\ ReceiveDirect(m) - -NextRequestVoteRequest == \E m \in DOMAIN messages : m.mtype = RequestVoteRequest /\ Receive(m) -NextRequestVoteResponse == \E m \in DOMAIN messages : m.mtype = RequestVoteResponse /\ Receive(m) -NextAppendEntriesRequest == \E m \in DOMAIN messages : m.mtype = AppendEntriesRequest /\ Receive(m) -NextAppendEntriesResponse == \E m \in DOMAIN messages : m.mtype = AppendEntriesResponse /\ Receive(m) - -\* End of message handlers. ----- -\* Network state transitions - -\* The network duplicates a message -\* @type: MSG => Bool; -DuplicateMessage(m) == - /\ IsEnabled("DuplicateMessage", <>) - /\ m \in DOMAIN messages - /\ messages' = WithMessage(m, messages) - /\ UNCHANGED <> - -\* The network drops a message -\* @type: MSG => Bool; -DropMessage(m) == - /\ IsEnabled("DropMessage", <>) - \* Do not drop loopback messages - \* /\ m.msource /= m.mdest - /\ Discard(m) - /\ UNCHANGED <> - ----- - -\* Defines how the variables may transition. -NextAsync == - \/ \E i,j \in Server : RequestVote(i, j) - \/ \E i \in Server : BecomeLeader(i) - \/ \E i \in Server: ClientRequest(i, 0) - \/ \E i \in Server : AdvanceCommitIndex(i) - \/ \E i,j \in Server : \E b,e \in matchIndex[i][j]+1..Len(log[i])+1 : AppendEntries(i, j, <>) - \/ \E i \in Server : AppendEntriesToSelf(i) - \/ \E i,j \in Server : Heartbeat(i, j) - \/ \E i,j \in Server : \E index \in 1..commitIndex[i] : SendSnapshot(i, j, index) - \/ \E m \in DOMAIN messages : Receive(m) - \/ \E i \in Server : Timeout(i) - \/ \E i \in Server : StepDownToFollower(i) - \/ \E i \in Server : Ready(i) - \/ \E i \in Server : PersistState(i) - \/ \E i \in Server : ApplyConfChange(i) - \/ \E i \in Server : SendMessages(i) - \/ \E i \in Server : Advance(i) - -NextCrash == \E i \in Server : Restart(i) - -NextAsyncCrash == - \/ NextAsync - \/ NextCrash - -NextUnreliable == - \* Only duplicate once - \/ \E m \in DOMAIN messages : - /\ messages[m] = 1 - /\ DuplicateMessage(m) - \* Only drop if it makes a difference - \/ \E m \in DOMAIN messages : - /\ messages[m] = 1 - /\ DropMessage(m) - -\* Most pessimistic network model -Next == \/ NextAsync - \/ NextCrash - \/ NextUnreliable - -\* Membership changes -NextDynamic == - \/ Next - \/ \E i, j \in Server : AddNewServer(i, j) - \/ \E i, j \in Server : AddLearner(i, j) - \/ \E i, j \in Server : DeleteServer(i, j) - -\* The specification must start with the initial state and transition according -\* to Next. -Spec == Init /\ [][Next]_Vars - -(***************************************************************************) -(* The main safety properties are below *) -(***************************************************************************) ----- - -ASSUME DistinctRoles == /\ Leader /= Candidate - /\ Candidate /= Follower - /\ Follower /= Leader - -ASSUME DistinctMessageTypes == /\ RequestVoteRequest /= AppendEntriesRequest - /\ RequestVoteRequest /= RequestVoteResponse - /\ RequestVoteRequest /= AppendEntriesResponse - /\ AppendEntriesRequest /= RequestVoteResponse - /\ AppendEntriesRequest /= AppendEntriesResponse - /\ RequestVoteResponse /= AppendEntriesResponse - ----- -\* Correctness invariants - -\* The prefix of the log of server i that has been committed -Committed(i) == SubSeq(log[i],1,commitIndex[i]) - -\* The current term of any server is at least the term -\* of any message sent by that server -\* @type: MSG => Bool; -MessageTermsLtCurrentTerm(m) == - m.mterm <= currentTerm[m.msource] - -\* Committed log entries should never conflict between servers -LogInv == - \A i, j \in Server : - \/ IsPrefix(Committed(i),Committed(j)) - \/ IsPrefix(Committed(j),Committed(i)) - -\* Note that LogInv checks for safety violations across space -\* This is a key safety invariant and should always be checked -THEOREM Spec => []LogInv - -\* There should not be more than one leader per term at the same time -\* Note that this does not rule out multiple leaders in the same term at different times -MoreThanOneLeaderInv == - \A i,j \in Server : - (/\ currentTerm[i] = currentTerm[j] - /\ state[i] = Leader - /\ state[j] = Leader) - => i = j - -\* A leader always has the greatest index for its current term -ElectionSafetyInv == - \A i \in Server : - state[i] = Leader => - \A j \in Server : - MaxOrZero({n \in DOMAIN log[i] : log[i][n].term = currentTerm[i]}) >= - MaxOrZero({n \in DOMAIN log[j] : log[j][n].term = currentTerm[i]}) - -\* Every (index, term) pair determines a log prefix -LogMatchingInv == - \A i, j \in Server : - \A n \in (1..Len(log[i])) \cap (1..Len(log[j])) : - log[i][n].term = log[j][n].term => - SubSeq(log[i],1,n) = SubSeq(log[j],1,n) - -\* When two candidates competes in a campaign, if a follower voted to -\* a candidate that did not win in the end, the follower's votedFor will -\* not reset nor change to the winner (the other candidate) because its -\* term remains same. This will violate this invariant. -\* -\* This invariant can be false because a server's votedFor is not reset -\* for messages with same term. Refer to the case below. -\* 1. This is a 3 node cluster with nodes A, B, and C. Let's assume they are all followers with same term 1 and log at beginning. -\* 2. Now B and C starts compaign and both become candidates of term 2. -\* 3. B requests vote to A and A grant it. Now A is a term 2 follower whose votedFor is B. -\* 4. A's response to B is lost. -\* 5. C requests vote to B and B grant it. Now B is a term 2 follower whose votedFor is C. -\* 6. C becomes leader of term 2. -\* 7. C replicates logs to A but not B. -\* 8. A's votedFor is not changed because the incoming messages has same term (see UpdateTerm and ReturnToFollowerState) -\* 9. Now the commited log in A will exceed B's log. The invariant is violated. -\* VotesGrantedInv == -\* \A i, j \in Server : -\* \* if i has voted for j -\* votedFor[i] = j => -\* IsPrefix(Committed(i), log[j]) - -\* All committed entries are contained in the log -\* of at least one server in every quorum -QuorumLogInv == - \A i \in Server : - ~InReadyPhase(i) => - \A S \in Quorum(GetConfig(i)) : - \E j \in S : - IsPrefix(Committed(i), log[j]) - -\* The "up-to-date" check performed by servers -\* before issuing a vote implies that i receives -\* a vote from j only if i has all of j's committed -\* entries -MoreUpToDateCorrectInv == - \A i, j \in Server : - (\/ LastTerm(log[i]) > LastTerm(log[j]) - \/ /\ LastTerm(log[i]) = LastTerm(log[j]) - /\ Len(log[i]) >= Len(log[j])) => - IsPrefix(Committed(j), log[i]) - -\* If a log entry is committed in a given term, then that -\* entry will be present in the logs of the leaders -\* for all higher-numbered terms -\* See: https://github.com/uwplse/verdi-raft/blob/master/raft/LeaderCompletenessInterface.v -LeaderCompletenessInv == - \A i \in Server : - LET committed == Committed(i) IN - \A idx \in 1..Len(committed) : - LET entry == log[i][idx] IN - \* if the entry is committed - \A l \in CurrentLeaders : - \* all leaders with higher-number terms - currentTerm[l] > entry.term => - \* have the entry at the same log position - log[l][idx] = entry - -\* Any entry committed by leader shall be persisted already -CommittedIsDurableInv == - \A i \in Server : - state[i] = Leader => commitIndex[i] <= durableState[i].log - -\* Applied entry must be committed -AppliedIsCommittedInv == - \A i \in Server : - applied[i] <= commitIndex[i] - ------ - - -=============================================================================== - - - -\* Changelog: -\* -\* 2023-11-23: -\* - Replace configuration actions by those in etcd implementation. -\* - Refactor spec structure to decouple core spec and model checker spec for -\* better readness and future update -\* - Remove unused actions and properties, e.g. wrapper -\* -\* 2021-04-??: -\* - Abandoned Apalache due to slowness and went back to TLC. There are remains -\* of the Apalache-specific annotations and message wrapping/unwrapping, but -\* those are not actually used. The annotations are no longer up-to-date. -\* -\* 2021-04-09: -\* - Added type annotations for Apalache symbolic model checker. As part of -\* this change, the message format was changed to a tagged union. -\* -\* 2016-09-09: -\* - Daniel Ricketts added the major safety invariants and proved them in -\* TLAPS. -\* -\* 2015-05-10: -\* - Add cluster membership changes as described in Section 4 of -\* Diego Ongaro. Consensus: Bridging theory and practice. -\* PhD thesis, Stanford University, 2014. -\* This introduces: InitServer, ValueEntry, ConfigEntry, CatchupRequest, -\* CatchupResponse, CheckOldConfig, GetMaxConfigIndex, -\* GetConfig (parameterized), AddNewServer, DeleteServer, -\* HandleCatchupRequest, HandleCatchupResponse, -\* HandleCheckOldConfig -\* -\* -\* 2014-12-02: -\* - Fix AppendEntries to only send one entry at a time, as originally -\* intended. Since SubSeq is inclusive, the upper bound of the range should -\* have been nextIndex, not nextIndex + 1. Thanks to Igor Kovalenko for -\* reporting the issue. -\* - Change matchIndex' to matchIndex (without the apostrophe) in -\* AdvanceCommitIndex. This apostrophe was not intentional and perhaps -\* confusing, though it makes no practical difference (matchIndex' equals -\* matchIndex). Thanks to Hugues Evrard for reporting the issue. -\* -\* 2014-07-06: -\* - Version from PhD dissertation diff --git a/tla/validate-model.sh b/tla/validate-model.sh index 04817e08..14ceb10b 100755 --- a/tla/validate-model.sh +++ b/tla/validate-model.sh @@ -25,10 +25,10 @@ function validate { local sim=${5:-false} set -o pipefail - if [ "$sim" ]; then - java -XX:+UseParallelGC -cp ${tooldir}/tla2tools.jar:${tooldir}/CommunityModules-deps.jar tlc2.TLC -config "${config}" "${spec}" -lncheck final -metadir "${statedir}" -fpmem 0.9 + if [ "$sim" = true ]; then + java -XX:+UseParallelGC -cp ${tooldir}/tla2tools.jar:${tooldir}/CommunityModules-deps.jar tlc2.TLC -config "${config}" "${spec}" -lncheck final -metadir "${statedir}" -fpmem 0.9 -workers auto -simulate else - java -XX:+UseParallelGC -cp ${tooldir}/tla2tools.jar:${tooldir}/CommunityModules-deps.jar tlc2.TLC -config "${config}" "${spec}" -lncheck final -metadir "${statedir}" -fpmem 0.9 -simulate + java -XX:+UseParallelGC -cp ${tooldir}/tla2tools.jar:${tooldir}/CommunityModules-deps.jar tlc2.TLC -config "${config}" "${spec}" -lncheck final -metadir "${statedir}" -fpmem 0.9 -workers auto fi }