Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance etcd raft TLA+ spec #226

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,7 @@ func (rn *RawNode) Bootstrap(peers []Peer) error {
for _, peer := range peers {
rn.raft.applyConfChange(pb.ConfChange{NodeID: peer.ID, Type: pb.ConfChangeAddNode}.AsV2())
}

traceBootstrap(rn.raft)
return nil
}
4 changes: 2 additions & 2 deletions raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -1963,6 +1963,8 @@ func (r *raft) applyConfChange(cc pb.ConfChangeV2) pb.ConfState {
panic(err)
}

traceConfChangeEvent(cfg, r)

return r.switchToConfig(cfg, trk)
}

Expand All @@ -1973,8 +1975,6 @@ func (r *raft) applyConfChange(cc pb.ConfChangeV2) pb.ConfState {
//
// The inputs usually result from restoring a ConfState or applying a ConfChange.
func (r *raft) switchToConfig(cfg tracker.Config, trk tracker.ProgressMap) pb.ConfState {
traceConfChangeEvent(cfg, r)

r.trk.Config = cfg
r.trk.Progress = trk

Expand Down
2 changes: 2 additions & 0 deletions rawnode.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,8 @@ func (rn *RawNode) Advance(_ Ready) {
rn.stepsOnAdvance[i] = pb.Message{}
}
rn.stepsOnAdvance = rn.stepsOnAdvance[:0]

traceAdvance(rn.raft)
}

// Status returns the current status of the given group. This allocates, see
Expand Down
14 changes: 14 additions & 0 deletions state_trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type stateMachineEventType int

const (
rsmInitState stateMachineEventType = iota
rsmBootstrap
rsmBecomeCandidate
rsmBecomeFollower
rsmBecomeLeader
Expand All @@ -38,6 +39,7 @@ const (
rsmChangeConf
rsmApplyConfChange
rsmReady
rsmAdvance
rsmSendAppendEntriesRequest
rsmReceiveAppendEntriesRequest
rsmSendAppendEntriesResponse
Expand All @@ -53,6 +55,7 @@ const (
func (e stateMachineEventType) String() string {
return []string{
"InitState",
"Bootstrap",
"BecomeCandidate",
"BecomeFollower",
"BecomeLeader",
Expand All @@ -61,6 +64,7 @@ func (e stateMachineEventType) String() string {
"ChangeConf",
"ApplyConfChange",
"Ready",
"Advance",
"SendAppendEntriesRequest",
"ReceiveAppendEntriesRequest",
"SendAppendEntriesResponse",
Expand All @@ -86,6 +90,7 @@ type TracingEvent struct {
State TracingState `json:"state"`
Role string `json:"role"`
LogSize uint64 `json:"log"`
Applied uint64 `json:"applied"`
Conf [2][]string `json:"conf"`
Message *TracingMessage `json:"msg,omitempty"`
ConfChange *TracingConfChange `json:"cc,omitempty"`
Expand Down Expand Up @@ -173,6 +178,7 @@ func traceEvent(evt stateMachineEventType, r *raft, m *raftpb.Message, prop map[
NodeID: strconv.FormatUint(r.id, 10),
State: makeTracingState(r),
LogSize: r.raftLog.lastIndex(),
Applied: r.raftLog.applied,
Conf: [2][]string{formatConf(r.trk.Voters[0].Slice()), formatConf(r.trk.Voters[1].Slice())},
Role: r.state.String(),
Message: makeTracingMessage(m),
Expand Down Expand Up @@ -206,10 +212,18 @@ func traceInitState(r *raft) {
traceNodeEvent(rsmInitState, r)
}

func traceBootstrap(r *raft) {
traceNodeEvent(rsmBootstrap, r)
}

func traceReady(r *raft) {
traceNodeEvent(rsmReady, r)
}

func traceAdvance(r *raft) {
traceNodeEvent(rsmAdvance, r)
}

func traceCommit(r *raft) {
traceNodeEvent(rsmCommit, r)
}
Expand Down
4 changes: 4 additions & 0 deletions state_trace_nop.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,12 @@ type TracingEvent struct{}

func traceInitState(*raft) {}

func traceBootstrap(*raft) {}

func traceReady(*raft) {}

func traceAdvance(*raft) {}

func traceCommit(*raft) {}

func traceReplicate(*raft, ...raftpb.Entry) {}
Expand Down
33 changes: 15 additions & 18 deletions tla/MCetcdraft.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -12,33 +12,31 @@
\* See the License for the specific language governing permissions and
\* limitations under the License.
\*
SPECIFICATION mc_etcdSpec
SPECIFICATION MCSpec

CONSTANTS
s1 = 1
s2 = 2
s3 = 3
s4 = 4
s5 = 5
s1 = s1
s2 = s2
s3 = s3
s4 = s4
s5 = s5

InitServer = {s1, s2, s3}
Server = {s1, s2, s3, s4}

ReconfigurationLimit = 2
MaxTermLimit = 10
RequestLimit = 5

Timeout <- MCTimeout
Send <- MCSend
ClientRequest <- MCClientRequest
AddNewServer <- MCAddNewServer
DeleteServer <- MCDeleteServer
AddLearner <- MCAddLearner

MaxTermLimit = 3
RequestLimit = 3
MaxRestart = 2
MaxStepDownToFollower = 1

InitServerVars <- etcdInitServerVars
InitLogVars <- etcdInitLogVars
InitConfigVars <- etcdInitConfigVars

IsEnabled <- MCIsEnabled
PostAction <- MCPostAction

Nil = 0

ValueEntry = "ValueEntry"
Expand All @@ -65,5 +63,4 @@ INVARIANTS
LogMatchingInv
QuorumLogInv
MoreUpToDateCorrectInv
LeaderCompletenessInv
CommittedIsDurableInv
LeaderCompletenessInv
144 changes: 80 additions & 64 deletions tla/MCetcdraft.tla
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,39 @@
\* limitations under the License.
\*

EXTENDS etcdraft
EXTENDS etcdraft_control

CONSTANTS s1,s2,s3,s4,s5

CONSTANT ReconfigurationLimit
ASSUME ReconfigurationLimit \in Nat

CONSTANT MaxTermLimit
ASSUME MaxTermLimit \in Nat

CONSTANT MaxRestart
ASSUME MaxRestart \in Nat

\* Limit on client requests
CONSTANT RequestLimit
ASSUME RequestLimit \in Nat

etcd == INSTANCE etcdraft
CONSTANT MaxStepDownToFollower
ASSUME MaxStepDownToFollower \in Nat

etcd_control == INSTANCE etcdraft_control

VARIABLE restartCount
VARIABLE stepDownCount
constaintVars == <<restartCount, stepDownCount>>

mcVars == << vars, constaintVars >>

ChangeOneVar(_vars, index, op(_)) ==
IF op(index) THEN
UNCHANGED RemoveAt(_vars, index)
ELSE
UNCHANGED _vars

\* Application uses Node (instead of RawNode) will have multiple ConfigEntry entries appended to log in bootstrapping.
BootstrapLog ==
Expand All @@ -42,79 +62,75 @@ etcdInitServerVars == /\ currentTerm = [i \in Server |-> IF i \in InitServer TH
/\ votedFor = [i \in Server |-> Nil]
etcdInitLogVars == /\ log = [i \in Server |-> IF i \in InitServer THEN BootstrapLog ELSE <<>>]
/\ commitIndex = [i \in Server |-> IF i \in InitServer THEN Cardinality(InitServer) ELSE 0]
/\ applied = [i \in Server |-> 0]
etcdInitConfigVars == /\ config = [i \in Server |-> [ jointConfig |-> IF i \in InitServer THEN <<InitServer, {}>> ELSE <<{}, {}>>, learners |-> {}]]
/\ reconfigCount = 0 \* the bootstrap configuraitons are not counted
/\ appliedConfChange = [i \in Server |-> Len(BootstrapLog)]

\* This file controls the constants as seen below.
\* In addition to basic settings of how many nodes are to be model checked,
\* the model allows to place additional limitations on the state space of the program.

\* Limit the # of reconfigurations to ReconfigurationLimit
MCAddNewServer(i, j) ==
/\ reconfigCount < ReconfigurationLimit
/\ etcd!AddNewServer(i, j)
MCDeleteServer(i, j) ==
/\ reconfigCount < ReconfigurationLimit
/\ etcd!DeleteServer(i, j)
MCAddLearner(i, j) ==
/\ reconfigCount < ReconfigurationLimit
/\ etcd!AddLearner(i, j)

\* Limit the terms that can be reached. Needs to be set to at least 3 to
\* evaluate all relevant states. If set to only 2, the candidate_quorum
\* constraint below is too restrictive.
MCTimeout(i) ==
\* Limit the term of each server to reduce state space
/\ currentTerm[i] < MaxTermLimit
\* Limit max number of simultaneous candidates
\* We made several restrictions to the state space of Raft. However since we
\* made these restrictions, Deadlocks can occur at places that Raft would in
\* real-world deployments handle graciously.
\* One example of this is if a Quorum of nodes becomes Candidate but can not
\* timeout anymore since we constrained the terms. Then, an artificial Deadlock
\* is reached. We solve this below. If TermLimit is set to any number >2, this is
\* not an issue since breadth-first search will make sure that a similar
\* situation is simulated at term==1 which results in a term increase to 2.
/\ Cardinality({ s \in GetConfig(i) : state[s] = Candidate}) < 1
/\ etcd!Timeout(i)

\* Limit number of requests (new entries) that can be made
MCClientRequest(i, v) ==
\* Allocation-free variant of Len(SelectSeq(log[i], LAMBDA e: e.contentType = TypeEntry)) < RequestLimit
/\ FoldSeq(LAMBDA e, count: IF e.type = ValueEntry THEN count + 1 ELSE count, 0, log[i]) < RequestLimit
/\ etcd!ClientRequest(i, v)

\* Limit how many identical append entries messages each node can send to another
\* Limit number of duplicate messages sent to the same server
MCSend(msg) ==
\* One AppendEntriesRequest per node-pair at a time:
\* a) No AppendEntries request from i to j.
/\ ~ \E n \in DOMAIN messages \union DOMAIN pendingMessages:
/\ n.mdest = msg.mdest
/\ n.msource = msg.msource
/\ n.mterm = msg.mterm
/\ n.mtype = AppendEntriesRequest
/\ msg.mtype = AppendEntriesRequest
\* b) No (corresponding) AppendEntries response from j to i.
/\ ~ \E n \in DOMAIN messages \union DOMAIN pendingMessages:
/\ n.mdest = msg.msource
/\ n.msource = msg.mdest
/\ n.mterm = msg.mterm
/\ n.mtype = AppendEntriesResponse
/\ msg.mtype = AppendEntriesRequest
/\ etcd!Send(msg)

mc_etcdSpec ==
/\ Init
/\ [][NextDynamic]_vars

\* Check the # of reconfigurations and return true if it is lower than ReconfigurationLimit
UnderReconfigLimit ==
LET leaders == {i \in Server : state[i] = Leader}
IN
\E i \in leaders :
/\ \A j \in leaders \ {i} : Len(log[i]) >= Len(log[j])
/\ ReconfigurationLimit > FoldSeq(LAMBDA x, y: IF x.type = ConfigEntry THEN 1 ELSE 0, -Cardinality(InitServer), log[i])

MCIsEnabled(action, args) ==
CASE action \in { "AddNewServer", "DeleteServer", "AddLearner" } -> UnderReconfigLimit
[] action = "Restart" -> restartCount < MaxRestart
[] action = "Timeout" ->
\* Limit the term of each server to reduce state space
/\ currentTerm[args[1]] < MaxTermLimit
\* Limit max number of simultaneous candidates
\* We made several restrictions to the state space of Raft. However since we
\* made these restrictions, Deadlocks can occur at places that Raft would in
\* real-world deployments handle graciously.
\* One example of this is if a Quorum of nodes becomes Candidate but can not
\* timeout anymore since we constrained the terms. Then, an artificial Deadlock
\* is reached. We solve this below. If TermLimit is set to any number >2, this is
\* not an issue since breadth-first search will make sure that a similar
\* situation is simulated at term==1 which results in a term increase to 2.
/\ Cardinality({ s \in GetConfig(args[1]) : state[s] = Candidate}) < 1
[] action = "ClientRequest" ->
\* Allocation-free variant of Len(SelectSeq(log[i], LAMBDA e: e.contentType = TypeEntry)) < RequestLimit
FoldSeq(LAMBDA e, count: IF e.type = ValueEntry THEN count + 1 ELSE count, 0, log[args[1]]) < RequestLimit
[] action = "Ready" ->
\* If there is no new message and no state change, we can skip Advance to reduce state spece
\* to be explored
LET rd == ReadyData(args[1])
IN
\/ rd.msgs /= EmptyBag
\/ DurableStateFromReady(rd) /= durableState[args[1]]
[] action = "StepDownToFollower" -> stepDownCount < MaxStepDownToFollower
[] OTHER -> etcd_control!IsEnabled(action, args)

MCPostAction(action, args) ==
CASE action = "Restart" ->
/\ restartCount' = restartCount + 1
/\ UNCHANGED <<stepDownCount>>
[] action = "StepDownToFollower" ->
/\ stepDownCount' = stepDownCount + 1
/\ UNCHANGED <<restartCount>>
[] OTHER -> UNCHANGED <<constaintVars>>
\* Symmetry set over possible servers. May dangerous and is only enabled
\* via the Symmetry option in cfg file.
Symmetry == Permutations(Server)

\* Include all variables in the view, which is similar to defining no view.
View == << vars >>
View == mcVars

----
InitConstraintVars ==
/\ restartCount = 0
/\ stepDownCount = 0

MCInit ==
/\ Init
/\ InitConstraintVars

MCSpec == MCInit /\ [][Controlled_Next]_mcVars

----
===================================
5 changes: 5 additions & 0 deletions tla/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ The TLA+ spec defines the desired behaviors of the model. To validate the correc
./validate-model.sh -s ./MCetcdraft.tla -c ./MCetcdraft.cfg
```

You can also add `-m` option to run model checking in simulation mode, which will randomly walk in the state machine and is helpful for finding issues quickly.

```console
./validate-model.sh -m -s ./MCetcdraft.tla -c ./MCetcdraft.cfg
```

## Validate Collected Traces
With above example trace logger, validate.sh can be used to validate traces parallelly.
Expand Down
9 changes: 4 additions & 5 deletions tla/Traceetcdraft.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ VIEW
PROPERTIES
TraceMatched

PROPERTIES
etcdSpec

\* Checking for deadlocks during trace validation is disabled, as it may lead to false
\* counterexamples. A trace specification defines a set of traces, where at least one
\* trace is expected to match the log file in terms of variable values and length.
Expand Down Expand Up @@ -64,12 +61,14 @@ CONSTANTS
InitServer <- TraceInitServer
Server <- TraceServer

IsEnabled <- TraceIsEnabled
PostAction <- TracePostAction

INVARIANTS
LogInv
MoreThanOneLeaderInv
ElectionSafetyInv
LogMatchingInv
QuorumLogInv
MoreUpToDateCorrectInv
LeaderCompletenessInv
CommittedIsDurableInv
LeaderCompletenessInv
Loading
Loading