diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 6bb94cf54..000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,89 +0,0 @@ -version: 2.1 - -workflows: - ci: - jobs: - - lint - - go-test: - name: test go1.15 - version: "1.15" - - go-test: - name: test go1.16 - version: "1.16" - - go-test: - name: test go1.16 32bit - version: "1.16" - goarch: "386" - args: "" # remove -race - -executors: - golang: - parameters: - version: - type: string - goarch: - type: string - default: amd64 - docker: - - image: docker.mirror.hashicorp.services/circleci/golang:<> - environment: - TEST_RESULTS_DIR: /tmp/test-results - GOTRACEBACK: "all" - GO111MODULE: "on" - GOMAXPROCS: 2 - GOARCH: <> - -jobs: - lint: - executor: - name: golang - version: "1.16" - steps: - - checkout - - run: go mod download - - # check go fmt output because it does not report non-zero when there are fmt changes - - run: - name: check go fmt - command: | - files=$(go fmt ./...) - if [ -n "$files" ]; then - echo "The following file(s) do not conform to go fmt:" - echo "$files" - exit 1 - fi - - run: | - PACKAGE_NAMES=$(go list ./... | grep -v github.com/hashicorp/raft/fuzzy) - go vet $PACKAGE_NAMES - - go-test: - parameters: - version: - type: string - goarch: - type: string - default: amd64 - args: - type: string - default: "-race" - executor: - name: golang - version: <> - goarch: <> - steps: - - run: go env - - checkout - - run: mkdir -p $TEST_RESULTS_DIR - - run: - name: run tests - environment: - INTEG_TESTS: "yes" - GOTESTSUM_FORMAT: short-verbose - command: | - gotestsum --junitfile ${TEST_RESULTS_DIR}/junit.xml -- -timeout=240s <> . - gotestsum --junitfile ${TEST_RESULTS_DIR}/junit-batch.xml -- -timeout=240s <> -tags batchtest . - - - store_test_results: - path: /tmp/test-results - - store_artifacts: - path: /tmp/test-results diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..28773e7da --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,4 @@ +* @hashicorp/consul-core-reviewers @hashicorp/nomad-eng + +/.release/ @hashicorp/release-engineering +/.github/workflows/ci.yml @hashicorp/release-engineering diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..efba123cd --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 + +updates: + - package-ecosystem: "gomod" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/stale.yml b/.github/stale.yml index 45624202f..69aadab02 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -1,3 +1,6 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 + # Number of days of inactivity before an Issue becomes stale daysUntilStale: 60 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..a6daa7c40 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,88 @@ +name: ci + +on: + pull_request: + branches: ["main"] + push: + branches: ["main"] + tags: ["*"] + +permissions: + contents: read + +jobs: + go-fmt-and-vet: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0 + - uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0 + with: + go-version: '1.20' + cache: true + - run: | + files=$(go fmt ./...) + if [ -n "$files" ]; then + echo "The following file(s) do not conform to go fmt:" + echo "$files" + exit 1 + fi + - run: | + PACKAGE_NAMES=$(go list ./... | grep -v github.com/hashicorp/raft/fuzzy) + go vet $PACKAGE_NAMES + + go-test: + needs: go-fmt-and-vet + strategy: + matrix: + go: ['1.19', '1.20'] + arch: ['x32', 'x64'] + runs-on: ubuntu-22.04 + env: + INTEG_TESTS: yes + steps: + - uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0 + - uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0 + with: + go-version: ${{ matrix.go }} + architecture: ${{ matrix.arch }} + cache: true + # x86 specific build. + - if: matrix.arch == 'x32' + run: | + sudo apt-get update + sudo apt-get install gcc-multilib + go test --tags batchtest ./... + # x86-64 specific build. + - if: matrix.arch == 'x64' + run: go test -race --tags batchtest ./... + go-test-compat: + needs: go-test + strategy: + matrix: + go: [ '1.20', '1.21', '1.22' ] + arch: [ 'x32', 'x64' ] + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c # v3.3.0 + - uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0 + with: + go-version: ${{ matrix.go }} + architecture: ${{ matrix.arch }} + cache: true + submodules: true + # x86 specific build. + - if: matrix.arch == 'x32' + run: | + sudo apt-get update + sudo apt-get install gcc-multilib + git submodule update --init --recursive + cd raft-compat + go mod tidy + go test ./... + # x86-64 specific build. + - if: matrix.arch == 'x64' + run: | + git submodule update --init --recursive + cd raft-compat + go mod tidy + go test -race ./... diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..cbcd5cc91 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "raft-compat/raft-latest"] + path = raft-compat/raft-previous-version + url = https://github.com/hashicorp/raft.git diff --git a/.golangci-lint.yml b/.golangci-lint.yml index a021e196e..5f2a2d9f3 100644 --- a/.golangci-lint.yml +++ b/.golangci-lint.yml @@ -1,3 +1,6 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 + run: deadline: 5m diff --git a/.travis.yml b/.travis.yml index badd7ff92..f214436ca 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,6 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 + language: go go: diff --git a/CHANGELOG.md b/CHANGELOG.md index 49476897c..b0fef7eb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,70 @@ # UNRELEASED +# 1.7.0 (June 5th, 2024) + +CHANGES + +* Raft multi version testing [GH-559](https://github.com/hashicorp/raft/pull/559) + +IMPROVEMENTS + +* Raft pre-vote extension implementation, activated by default. [GH-530](https://github.com/hashicorp/raft/pull/530) + +BUG FIXES + +* Fix serialize NetworkTransport data race on ServerAddr(). [GH-591](https://github.com/hashicorp/raft/pull/591) + +# 1.6.1 (January 8th, 2024) + +CHANGES + +* Add reference use of Hashicorp Raft. [GH-584](https://github.com/hashicorp/raft/pull/584) +* [COMPLIANCE] Add Copyright and License Headers. [GH-580](https://github.com/hashicorp/raft/pull/580) + +IMPROVEMENTS + +* Bump github.com/hashicorp/go-hclog from 1.5.0 to 1.6.2. [GH-583](https://github.com/hashicorp/raft/pull/583) + +BUG FIXES + +* Fix rare leadership transfer failures when writes happen during transfer. [GH-581](https://github.com/hashicorp/raft/pull/581) + +# 1.6.0 (November 15th, 2023) + +CHANGES + +* Upgrade hashicorp/go-msgpack to v2, with go.mod upgraded from v0.5.5 to v2.1.1. [GH-577](https://github.com/hashicorp/raft/pull/577) + + go-msgpack v2.1.1 is by default binary compatible with v0.5.5 ("non-builtin" encoding of `time.Time`), but can decode messages produced by v1.1.5 as well ("builtin" encoding of `time.Time`). + + However, if users of this libary overrode the version of go-msgpack (especially to v1), this **could break** compatibility if raft nodes are running a mix of versions. + + This compatibility can be configured at runtime in Raft using `NetworkTransportConfig.MsgpackUseNewTimeFormat` -- the default is `false`, which maintains compatibility with `go-msgpack` v0.5.5, but if set to `true`, will be compatible with `go-msgpack` v1.1.5. + +IMPROVEMENTS + +* Push to notify channel when shutting down. [GH-567](https://github.com/hashicorp/raft/pull/567) +* Add CommitIndex API [GH-560](https://github.com/hashicorp/raft/pull/560) +* Document some Apply error cases better [GH-561](https://github.com/hashicorp/raft/pull/561) + +BUG FIXES + +* Race with `candidateFromLeadershipTransfer` [GH-570](https://github.com/hashicorp/raft/pull/570) + + +# 1.5.0 (April 21st, 2023) + +IMPROVEMENTS +* Fixed a performance anomaly related to pipelining RPCs that caused large increases in commit latency under high write throughput. Default behavior has changed. For more information see #541. + +# 1.4.0 (March 17th, 2023) + +FEATURES +* Support log stores with a monotonically increasing index. Implementing a log store with the `MonotonicLogStore` interface where `IsMonotonic()` returns true will allow Raft to clear all previous logs on user restores of Raft snapshots. + +BUG FIXES +* Restoring a snapshot with the raft-wal log store caused a panic due to index gap that is created during snapshot restores. + # 1.3.0 (April 22nd, 2021) IMPROVEMENTS diff --git a/LICENSE b/LICENSE index c33dcc7c9..c72625e4c 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,5 @@ +Copyright (c) 2013 HashiCorp, Inc. + Mozilla Public License, version 2.0 1. Definitions diff --git a/README.md b/README.md index 11239ecb4..ded5bd02e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -raft [![CircleCI](https://circleci.com/gh/hashicorp/raft.svg?style=svg)](https://circleci.com/gh/hashicorp/raft) +raft [![Build Status](https://github.com/hashicorp/raft/workflows/ci/badge.svg)](https://github.com/hashicorp/raft/actions) ==== raft is a [Go](http://www.golang.org) library that manages a replicated @@ -12,7 +12,7 @@ fault tolerance as well. ## Building -If you wish to build raft you'll need Go version 1.2+ installed. +If you wish to build raft you'll need Go version 1.16+ installed. Please check your installation with: @@ -34,7 +34,8 @@ and `StableStore`. ## Community Contributed Examples -[Raft gRPC Example](https://github.com/Jille/raft-grpc-example) - Utilizing the Raft repository with gRPC +- [Raft gRPC Example](https://github.com/Jille/raft-grpc-example) - Utilizing the Raft repository with gRPC +- [Raft-based KV-store Example](https://github.com/otoolep/hraftd) - Uses Hashicorp Raft to build a distributed key-value store ## Tagged Releases diff --git a/api.go b/api.go index 9152cf620..cff2eaac2 100644 --- a/api.go +++ b/api.go @@ -1,10 +1,12 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( "errors" "fmt" "io" - "os" "strconv" "sync" "sync/atomic" @@ -15,15 +17,16 @@ import ( ) const ( - // This is the current suggested max size of the data in a raft log entry. - // This is based on current architecture, default timing, etc. Clients can + // SuggestedMaxDataSize of the data in a raft log entry, in bytes. + // + // The value is based on current architecture, default timing, etc. Clients can // ignore this value if they want as there is no actual hard checking // within the library. As the library is enhanced this value may change // over time to reflect current suggested maximums. // - // Increasing beyond this risks RPC IO taking too long and preventing - // timely heartbeat signals which are sent in serial in current transports, - // potentially causing leadership instability. + // Applying log entries with data greater than this size risks RPC IO taking + // too long and preventing timely heartbeat signals. These signals are sent in serial + // in current transports, potentially causing leadership instability. SuggestedMaxDataSize = 512 * 1024 ) @@ -36,6 +39,10 @@ var ( // follower or candidate node. ErrNotLeader = errors.New("node is not the leader") + // ErrNotVoter is returned when an operation can't be completed on a + // non-voter node. + ErrNotVoter = errors.New("node is not a voter") + // ErrLeadershipLost is returned when a leader fails to commit a log entry // because it's been deposed in the process. ErrLeadershipLost = errors.New("leadership lost while committing log") @@ -111,8 +118,10 @@ type Raft struct { lastContact time.Time lastContactLock sync.RWMutex - // Leader is the current cluster leader - leader ServerAddress + // leaderAddr is the current cluster leader Address + leaderAddr ServerAddress + // LeaderID is the current cluster leader ID + leaderID ServerID leaderLock sync.RWMutex // leaderCh is used to notify of leadership changes @@ -125,7 +134,7 @@ type Raft struct { // candidate because the leader tries to transfer leadership. This flag is // used in RequestVoteRequest to express that a leadership transfer is going // on. - candidateFromLeadershipTransfer bool + candidateFromLeadershipTransfer atomic.Bool // Stores our local server ID, used to avoid sending RPCs to ourself localID ServerID @@ -146,8 +155,8 @@ type Raft struct { // the log/snapshot. configurations configurations - // Holds a copy of the latest configuration which can be read - // independently from main loop. + // Holds a copy of the latest configuration which can be read independently + // of the main loop. latestConfiguration atomic.Value // RPC chan comes from the transport layer @@ -195,6 +204,19 @@ type Raft struct { // leadershipTransferCh is used to start a leadership transfer from outside of // the main thread. leadershipTransferCh chan *leadershipTransferFuture + + // leaderNotifyCh is used to tell leader that config has changed + leaderNotifyCh chan struct{} + + // followerNotifyCh is used to tell followers that config has changed + followerNotifyCh chan struct{} + + // mainThreadSaturation measures the saturation of the main raft goroutine. + mainThreadSaturation *saturationMetric + + // preVoteDisabled control if the pre-vote feature is activated, + // prevote feature is disabled if set to true. + preVoteDisabled bool } // BootstrapCluster initializes a server's storage with the given cluster @@ -314,6 +336,9 @@ func RecoverCluster(conf *Config, fsm FSM, logs LogStore, stable StableStore, if err != nil { return fmt.Errorf("failed to list snapshots: %v", err) } + + logger := conf.getOrCreateLogger() + for _, snapshot := range snapshots { var source io.ReadCloser _, source, err = snaps.Open(snapshot.ID) @@ -329,9 +354,18 @@ func RecoverCluster(conf *Config, fsm FSM, logs LogStore, stable StableStore, // server instance. If the same process will eventually become a Raft peer // then it will call NewRaft and restore again from disk then which will // report metrics. - err = fsm.Restore(source) + snapLogger := logger.With( + "id", snapshot.ID, + "last-index", snapshot.Index, + "last-term", snapshot.Term, + "size-in-bytes", snapshot.Size, + ) + crc := newCountingReadCloser(source) + monitor := startSnapshotRestoreMonitor(snapLogger, crc, snapshot.Size, false) + err = fsm.Restore(crc) // Close the source after the restore has completed source.Close() + monitor.StopAndWait() if err != nil { // Same here, skip and try the next one. continue @@ -462,20 +496,7 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna } // Ensure we have a LogOutput. - var logger hclog.Logger - if conf.Logger != nil { - logger = conf.Logger - } else { - if conf.LogOutput == nil { - conf.LogOutput = os.Stderr - } - - logger = hclog.New(&hclog.LoggerOptions{ - Name: "raft", - Level: hclog.LevelFromString(conf.LogLevel), - Output: conf.LogOutput, - }) - } + logger := conf.getOrCreateLogger() // Try to restore the current term. currentTerm, err := stable.GetUint64(keyCurrentTerm) @@ -514,6 +535,7 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna applyCh = make(chan *logFuture, conf.MaxAppendEntries) } + _, transportSupportPreVote := trans.(WithPreVote) // Create Raft struct. r := &Raft{ protocolVersion: protocolVersion, @@ -540,6 +562,13 @@ func NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps Sna bootstrapCh: make(chan *bootstrapFuture), observers: make(map[uint64]*Observer), leadershipTransferCh: make(chan *leadershipTransferFuture, 1), + leaderNotifyCh: make(chan struct{}, 1), + followerNotifyCh: make(chan struct{}, 1), + mainThreadSaturation: newSaturationMetric([]string{"raft", "thread", "main", "saturation"}, 1*time.Second), + preVoteDisabled: conf.PreVoteDisabled || !transportSupportPreVote, + } + if !transportSupportPreVote && !conf.PreVoteDisabled { + r.logger.Warn("pre-vote is disabled because it is not supported by the Transport") } r.conf.Store(*conf) @@ -599,21 +628,8 @@ func (r *Raft) restoreSnapshot() error { // Try to load in order of newest to oldest for _, snapshot := range snapshots { - if !r.config().NoSnapshotRestoreOnStart { - _, source, err := r.snapshots.Open(snapshot.ID) - if err != nil { - r.logger.Error("failed to open snapshot", "id", snapshot.ID, "error", err) - continue - } - - if err := fsmRestoreAndMeasure(r.fsm, source); err != nil { - source.Close() - r.logger.Error("failed to restore snapshot", "id", snapshot.ID, "error", err) - continue - } - source.Close() - - r.logger.Info("restored from snapshot", "id", snapshot.ID) + if success := r.tryRestoreSingleSnapshot(snapshot); !success { + continue } // Update the lastApplied so we don't replay old logs @@ -649,6 +665,38 @@ func (r *Raft) restoreSnapshot() error { return nil } +func (r *Raft) tryRestoreSingleSnapshot(snapshot *SnapshotMeta) bool { + if r.config().NoSnapshotRestoreOnStart { + return true + } + + snapLogger := r.logger.With( + "id", snapshot.ID, + "last-index", snapshot.Index, + "last-term", snapshot.Term, + "size-in-bytes", snapshot.Size, + ) + + snapLogger.Info("starting restore from snapshot") + + _, source, err := r.snapshots.Open(snapshot.ID) + if err != nil { + snapLogger.Error("failed to open snapshot", "error", err) + return false + } + + if err := fsmRestoreAndMeasure(snapLogger, r.fsm, source, snapshot.Size); err != nil { + source.Close() + snapLogger.Error("failed to restore snapshot", "error", err) + return false + } + source.Close() + + snapLogger.Info("restored from snapshot") + + return true +} + func (r *Raft) config() Config { return r.conf.Load().(Config) } @@ -672,6 +720,14 @@ func (r *Raft) ReloadConfig(rc ReloadableConfig) error { return err } r.conf.Store(newCfg) + + if rc.HeartbeatTimeout < oldCfg.HeartbeatTimeout { + // On leader, ensure replication loops running with a longer + // timeout than what we want now discover the change. + asyncNotifyCh(r.leaderNotifyCh) + // On follower, update current timer to use the shorter new value. + asyncNotifyCh(r.followerNotifyCh) + } return nil } @@ -714,13 +770,26 @@ func (r *Raft) BootstrapCluster(configuration Configuration) Future { } // Leader is used to return the current leader of the cluster. +// Deprecated: use LeaderWithID instead // It may return empty string if there is no current leader // or the leader is unknown. +// Deprecated: use LeaderWithID instead. func (r *Raft) Leader() ServerAddress { r.leaderLock.RLock() - leader := r.leader + leaderAddr := r.leaderAddr r.leaderLock.RUnlock() - return leader + return leaderAddr +} + +// LeaderWithID is used to return the current leader address and ID of the cluster. +// It may return empty strings if there is no current leader +// or the leader is unknown. +func (r *Raft) LeaderWithID() (ServerAddress, ServerID) { + r.leaderLock.RLock() + leaderAddr := r.leaderAddr + leaderID := r.leaderID + r.leaderLock.RUnlock() + return leaderAddr, leaderID } // Apply is used to apply a command to the FSM in a highly consistent @@ -728,12 +797,23 @@ func (r *Raft) Leader() ServerAddress { // An optional timeout can be provided to limit the amount of time we wait // for the command to be started. This must be run on the leader or it // will fail. +// +// If the node discovers it is no longer the leader while applying the command, +// it will return ErrLeadershipLost. There is no way to guarantee whether the +// write succeeded or failed in this case. For example, if the leader is +// partitioned it can't know if a quorum of followers wrote the log to disk. If +// at least one did, it may survive into the next leader's term. +// +// If a user snapshot is restored while the command is in-flight, an +// ErrAbortedByRestore is returned. In this case the write effectively failed +// since its effects will not be present in the FSM after the restore. func (r *Raft) Apply(cmd []byte, timeout time.Duration) ApplyFuture { return r.ApplyLog(Log{Data: cmd}, timeout) } // ApplyLog performs Apply but takes in a Log directly. The only values -// currently taken from the submitted Log are Data and Extensions. +// currently taken from the submitted Log are Data and Extensions. See +// Apply for details on error cases. func (r *Raft) ApplyLog(log Log, timeout time.Duration) ApplyFuture { metrics.IncrCounter([]string{"raft", "apply"}, 1) @@ -762,11 +842,11 @@ func (r *Raft) ApplyLog(log Log, timeout time.Duration) ApplyFuture { } } -// Barrier is used to issue a command that blocks until all preceeding +// Barrier is used to issue a command that blocks until all preceding // operations have been applied to the FSM. It can be used to ensure the // FSM reflects all queued writes. An optional timeout can be provided to // limit the amount of time we wait for the command to be started. This -// must be run on the leader or it will fail. +// must be run on the leader, or it will fail. func (r *Raft) Barrier(timeout time.Duration) Future { metrics.IncrCounter([]string{"raft", "barrier"}, 1) var timer <-chan time.Time @@ -775,11 +855,7 @@ func (r *Raft) Barrier(timeout time.Duration) Future { } // Create a log future, no index or term yet - logFuture := &logFuture{ - log: Log{ - Type: LogBarrier, - }, - } + logFuture := &logFuture{log: Log{Type: LogBarrier}} logFuture.init() select { @@ -792,9 +868,9 @@ func (r *Raft) Barrier(timeout time.Duration) Future { } } -// VerifyLeader is used to ensure the current node is still -// the leader. This can be done to prevent stale reads when a -// new leader has potentially been elected. +// VerifyLeader is used to ensure this peer is still the leader. It may be used +// to prevent returning stale data from the FSM after the peer has lost +// leadership. func (r *Raft) VerifyLeader() Future { metrics.IncrCounter([]string{"raft", "verify_leader"}, 1) verifyFuture := &verifyFuture{} @@ -817,25 +893,27 @@ func (r *Raft) GetConfiguration() ConfigurationFuture { return configReq } -// AddPeer (deprecated) is used to add a new peer into the cluster. This must be -// run on the leader or it will fail. Use AddVoter/AddNonvoter instead. +// AddPeer to the cluster configuration. Must be run on the leader, or it will fail. +// +// Deprecated: Use AddVoter/AddNonvoter instead. func (r *Raft) AddPeer(peer ServerAddress) Future { if r.protocolVersion > 2 { return errorFuture{ErrUnsupportedProtocol} } return r.requestConfigChange(configurationChangeRequest{ - command: AddStaging, + command: AddVoter, serverID: ServerID(peer), serverAddress: peer, prevIndex: 0, }, 0) } -// RemovePeer (deprecated) is used to remove a peer from the cluster. If the -// current leader is being removed, it will cause a new election -// to occur. This must be run on the leader or it will fail. -// Use RemoveServer instead. +// RemovePeer from the cluster configuration. If the current leader is being +// removed, it will cause a new election to occur. Must be run on the leader, +// or it will fail. + +// Deprecated: Use RemoveServer instead. func (r *Raft) RemovePeer(peer ServerAddress) Future { if r.protocolVersion > 2 { return errorFuture{ErrUnsupportedProtocol} @@ -862,7 +940,7 @@ func (r *Raft) AddVoter(id ServerID, address ServerAddress, prevIndex uint64, ti } return r.requestConfigChange(configurationChangeRequest{ - command: AddStaging, + command: AddVoter, serverID: id, serverAddress: address, prevIndex: prevIndex, @@ -955,7 +1033,7 @@ func (r *Raft) Snapshot() SnapshotFuture { // Restore is used to manually force Raft to consume an external snapshot, such // as if restoring from a backup. We will use the current Raft configuration, // not the one from the snapshot, so that we can restore into a new cluster. We -// will also use the higher of the index of the snapshot, or the current index, +// will also use the max of the index of the snapshot, or the current index, // and then add 1 to that, so we force a new state with a hole in the Raft log, // so that the snapshot will be sent to followers and used for any new joiners. // This can only be run on the leader, and blocks until the restore is complete @@ -1011,7 +1089,7 @@ func (r *Raft) Restore(meta *SnapshotMeta, reader io.Reader, timeout time.Durati } } -// State is used to return the current raft state. +// State returns the state of this raft peer. func (r *Raft) State() RaftState { return r.getState() } @@ -1136,6 +1214,13 @@ func (r *Raft) LastIndex() uint64 { return r.getLastIndex() } +// CommitIndex returns the committed index. +// This API maybe helpful for server to implement the read index optimization +// as described in the Raft paper. +func (r *Raft) CommitIndex() uint64 { + return r.getCommitIndex() +} + // AppliedIndex returns the last index applied to the FSM. This is generally // lagging behind the last index, especially for indexes that are persisted but // have not yet been considered committed by the leader. NOTE - this reflects @@ -1151,7 +1236,7 @@ func (r *Raft) AppliedIndex() uint64 { // This can only be called from the leader, or it will fail. The leader will // stop accepting client requests, make sure the target server is up to date // and starts the transfer with a TimeoutNow message. This message has the same -// effect as if the election timeout on the on the target server fires. Since +// effect as if the election timeout on the target server fires. Since // it is unlikely that another server is starting an election, it is very // likely that the target server is able to win the election. Note that raft // protocol version 3 is not sufficient to use LeadershipTransfer. A recent diff --git a/bench/bench.go b/bench/bench.go index 51156a5c7..270a72303 100644 --- a/bench/bench.go +++ b/bench/bench.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raftbench // raftbench provides common benchmarking functions which can be used by @@ -6,8 +9,9 @@ package raftbench // makes comparing backend performance easier by sharing the tests. import ( - "github.com/hashicorp/raft" "testing" + + "github.com/hashicorp/raft" ) func FirstIndex(b *testing.B, store raft.LogStore) { @@ -164,7 +168,7 @@ func GetUint64(b *testing.B, store raft.StableStore) { // Run GetUint64 a number of times for n := 0; n < b.N; n++ { - if _, err := store.Get([]byte{0x05}); err != nil { + if _, err := store.GetUint64([]byte{0x05}); err != nil { b.Fatalf("err: %s", err) } } diff --git a/bench_test.go b/bench_test.go new file mode 100644 index 000000000..1dd6f7e3e --- /dev/null +++ b/bench_test.go @@ -0,0 +1,46 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package raft + +import ( + "testing" + "time" + + "github.com/hashicorp/go-hclog" +) + +func BenchmarkStoreLogInMem(b *testing.B) { + conf := DefaultConfig() + conf.LocalID = "first" + conf.HeartbeatTimeout = 50 * time.Millisecond + conf.ElectionTimeout = 50 * time.Millisecond + conf.LeaderLeaseTimeout = 50 * time.Millisecond + conf.CommitTimeout = 5 * time.Millisecond + conf.SnapshotThreshold = 100 + conf.TrailingLogs = 10 + conf.LogLevel = "OFF" + raft := MakeRaft(b, conf, true) + raft.logger.SetLevel(hclog.Off) + + NoErr(WaitFor(raft, Leader), b) + + applyAndWait := func(leader *RaftEnv, n, sz int) { + // Do some commits + var futures []ApplyFuture + for i := 0; i < n; i++ { + futures = append(futures, leader.raft.Apply(logBytes(i, sz), 0)) + } + for _, f := range futures { + NoErr(WaitFuture(f), b) + leader.logger.Debug("applied", "index", f.Index(), "size", sz) + } + } + + for i := 0; i < b.N; i++ { + // Do some commits + applyAndWait(raft, 100, 10) + // Do a snapshot + NoErr(WaitFuture(raft.raft.Snapshot()), b) + } +} diff --git a/commands.go b/commands.go index 3358a3284..1ec76cb27 100644 --- a/commands.go +++ b/commands.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft // RPCHeader is a common sub-structure used to pass along protocol version and @@ -8,6 +11,10 @@ type RPCHeader struct { // ProtocolVersion is the version of the protocol the sender is // speaking. ProtocolVersion ProtocolVersion + // ID is the ServerID of the node sending the RPC Request or Response + ID []byte + // Addr is the ServerAddr of the node sending the RPC Request or Response + Addr []byte } // WithRPCHeader is an interface that exposes the RPC header. @@ -21,7 +28,9 @@ type AppendEntriesRequest struct { RPCHeader // Provide the current term and leader - Term uint64 + Term uint64 + + // Deprecated: use RPCHeader.Addr instead Leader []byte // Provide the previous entries for integrity checking @@ -70,7 +79,9 @@ type RequestVoteRequest struct { RPCHeader // Provide the term and our id - Term uint64 + Term uint64 + + // Deprecated: use RPCHeader.Addr instead Candidate []byte // Used to ensure safety @@ -109,6 +120,40 @@ func (r *RequestVoteResponse) GetRPCHeader() RPCHeader { return r.RPCHeader } +// RequestPreVoteRequest is the command used by a candidate to ask a Raft peer +// for a vote in an election. +type RequestPreVoteRequest struct { + RPCHeader + + // Provide the term and our id + Term uint64 + + // Used to ensure safety + LastLogIndex uint64 + LastLogTerm uint64 +} + +// GetRPCHeader - See WithRPCHeader. +func (r *RequestPreVoteRequest) GetRPCHeader() RPCHeader { + return r.RPCHeader +} + +// RequestPreVoteResponse is the response returned from a RequestPreVoteRequest. +type RequestPreVoteResponse struct { + RPCHeader + + // Newer term if leader is out of date. + Term uint64 + + // Is the vote granted. + Granted bool +} + +// GetRPCHeader - See WithRPCHeader. +func (r *RequestPreVoteResponse) GetRPCHeader() RPCHeader { + return r.RPCHeader +} + // InstallSnapshotRequest is the command sent to a Raft peer to bootstrap its // log (and state machine) from a snapshot on another peer. type InstallSnapshotRequest struct { @@ -122,9 +167,10 @@ type InstallSnapshotRequest struct { LastLogIndex uint64 LastLogTerm uint64 - // Peer Set in the snapshot. This is deprecated in favor of Configuration + // Peer Set in the snapshot. // but remains here in case we receive an InstallSnapshot from a leader // that's running old code. + // Deprecated: This is deprecated in favor of Configuration Peers []byte // Cluster membership. diff --git a/commitment.go b/commitment.go index 9fdef3035..7d100a63e 100644 --- a/commitment.go +++ b/commitment.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -6,7 +9,7 @@ import ( ) // Commitment is used to advance the leader's commit index. The leader and -// replication goroutines report in newly written entries with Match(), and +// replication goroutines report in newly written entries with match(), and // this notifies on commitCh when the commit index has advanced. type commitment struct { // protects matchIndexes and commitIndex diff --git a/commitment_test.go b/commitment_test.go index 6b78a641a..458ab3022 100644 --- a/commitment_test.go +++ b/commitment_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -212,7 +215,6 @@ func TestCommitment_noVoterSanity(t *testing.T) { if drainNotifyCh(commitCh) { t.Fatalf("unexpected commit notify") } - } // Single voter commits immediately. diff --git a/config.go b/config.go index 78dde9225..d14392fc3 100644 --- a/config.go +++ b/config.go @@ -1,8 +1,12 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( "fmt" "io" + "os" "time" "github.com/hashicorp/go-hclog" @@ -24,67 +28,74 @@ import ( // The version details are complicated, but here's a summary of what's required // to get from a version 0 cluster to version 3: // -// 1. In version N of your app that starts using the new Raft library with -// versioning, set ProtocolVersion to 1. -// 2. Make version N+1 of your app require version N as a prerequisite (all -// servers must be upgraded). For version N+1 of your app set ProtocolVersion -// to 2. -// 3. Similarly, make version N+2 of your app require version N+1 as a -// prerequisite. For version N+2 of your app, set ProtocolVersion to 3. +// 1. In version N of your app that starts using the new Raft library with +// versioning, set ProtocolVersion to 1. +// 2. Make version N+1 of your app require version N as a prerequisite (all +// servers must be upgraded). For version N+1 of your app set ProtocolVersion +// to 2. +// 3. Similarly, make version N+2 of your app require version N+1 as a +// prerequisite. For version N+2 of your app, set ProtocolVersion to 3. // // During this upgrade, older cluster members will still have Server IDs equal // to their network addresses. To upgrade an older member and give it an ID, it // needs to leave the cluster and re-enter: // -// 1. Remove the server from the cluster with RemoveServer, using its network -// address as its ServerID. -// 2. Update the server's config to use a UUID or something else that is -// not tied to the machine as the ServerID (restarting the server). -// 3. Add the server back to the cluster with AddVoter, using its new ID. +// 1. Remove the server from the cluster with RemoveServer, using its network +// address as its ServerID. +// 2. Update the server's config to use a UUID or something else that is +// not tied to the machine as the ServerID (restarting the server). +// 3. Add the server back to the cluster with AddVoter, using its new ID. // // You can do this during the rolling upgrade from N+1 to N+2 of your app, or // as a rolling change at any time after the upgrade. // -// Version History +// # Version History // // 0: Original Raft library before versioning was added. Servers running this -// version of the Raft library use AddPeerDeprecated/RemovePeerDeprecated -// for all configuration changes, and have no support for LogConfiguration. +// +// version of the Raft library use AddPeerDeprecated/RemovePeerDeprecated +// for all configuration changes, and have no support for LogConfiguration. +// // 1: First versioned protocol, used to interoperate with old servers, and begin -// the migration path to newer versions of the protocol. Under this version -// all configuration changes are propagated using the now-deprecated -// RemovePeerDeprecated Raft log entry. This means that server IDs are always -// set to be the same as the server addresses (since the old log entry type -// cannot transmit an ID), and only AddPeer/RemovePeer APIs are supported. -// Servers running this version of the protocol can understand the new -// LogConfiguration Raft log entry but will never generate one so they can -// remain compatible with version 0 Raft servers in the cluster. +// +// the migration path to newer versions of the protocol. Under this version +// all configuration changes are propagated using the now-deprecated +// RemovePeerDeprecated Raft log entry. This means that server IDs are always +// set to be the same as the server addresses (since the old log entry type +// cannot transmit an ID), and only AddPeer/RemovePeer APIs are supported. +// Servers running this version of the protocol can understand the new +// LogConfiguration Raft log entry but will never generate one so they can +// remain compatible with version 0 Raft servers in the cluster. +// // 2: Transitional protocol used when migrating an existing cluster to the new -// server ID system. Server IDs are still set to be the same as server -// addresses, but all configuration changes are propagated using the new -// LogConfiguration Raft log entry type, which can carry full ID information. -// This version supports the old AddPeer/RemovePeer APIs as well as the new -// ID-based AddVoter/RemoveServer APIs which should be used when adding -// version 3 servers to the cluster later. This version sheds all -// interoperability with version 0 servers, but can interoperate with newer -// Raft servers running with protocol version 1 since they can understand the -// new LogConfiguration Raft log entry, and this version can still understand -// their RemovePeerDeprecated Raft log entries. We need this protocol version -// as an intermediate step between 1 and 3 so that servers will propagate the -// ID information that will come from newly-added (or -rolled) servers using -// protocol version 3, but since they are still using their address-based IDs -// from the previous step they will still be able to track commitments and -// their own voting status properly. If we skipped this step, servers would -// be started with their new IDs, but they wouldn't see themselves in the old -// address-based configuration, so none of the servers would think they had a -// vote. +// +// server ID system. Server IDs are still set to be the same as server +// addresses, but all configuration changes are propagated using the new +// LogConfiguration Raft log entry type, which can carry full ID information. +// This version supports the old AddPeer/RemovePeer APIs as well as the new +// ID-based AddVoter/RemoveServer APIs which should be used when adding +// version 3 servers to the cluster later. This version sheds all +// interoperability with version 0 servers, but can interoperate with newer +// Raft servers running with protocol version 1 since they can understand the +// new LogConfiguration Raft log entry, and this version can still understand +// their RemovePeerDeprecated Raft log entries. We need this protocol version +// as an intermediate step between 1 and 3 so that servers will propagate the +// ID information that will come from newly-added (or -rolled) servers using +// protocol version 3, but since they are still using their address-based IDs +// from the previous step they will still be able to track commitments and +// their own voting status properly. If we skipped this step, servers would +// be started with their new IDs, but they wouldn't see themselves in the old +// address-based configuration, so none of the servers would think they had a +// vote. +// // 3: Protocol adding full support for server IDs and new ID-based server APIs -// (AddVoter, AddNonvoter, etc.), old AddPeer/RemovePeer APIs are no longer -// supported. Version 2 servers should be swapped out by removing them from -// the cluster one-by-one and re-adding them with updated configuration for -// this protocol version, along with their server ID. The remove/add cycle -// is required to populate their server ID. Note that removing must be done -// by ID, which will be the old server's address. +// +// (AddVoter, AddNonvoter, etc.), old AddPeer/RemovePeer APIs are no longer +// supported. Version 2 servers should be swapped out by removing them from +// the cluster one-by-one and re-adding them with updated configuration for +// this protocol version, along with their server ID. The remove/add cycle +// is required to populate their server ID. Note that removing must be done +// by ID, which will be the old server's address. type ProtocolVersion int const ( @@ -98,19 +109,22 @@ const ( // Currently, it is always assumed that the server generates the latest version, // though this may be changed in the future to include a configurable version. // -// Version History +// # Version History // // 0: Original Raft library before versioning was added. The peers portion of -// these snapshots is encoded in the legacy format which requires decodePeers -// to parse. This version of snapshots should only be produced by the -// unversioned Raft library. +// +// these snapshots is encoded in the legacy format which requires decodePeers +// to parse. This version of snapshots should only be produced by the +// unversioned Raft library. +// // 1: New format which adds support for a full configuration structure and its -// associated log index, with support for server IDs and non-voting server -// modes. To ease upgrades, this also includes the legacy peers structure but -// that will never be used by servers that understand version 1 snapshots. -// Since the original Raft library didn't enforce any versioning, we must -// include the legacy peers structure for this version, but we can deprecate -// it in the next snapshot version. +// +// associated log index, with support for server IDs and non-voting server +// modes. To ease upgrades, this also includes the legacy peers structure but +// that will never be used by servers that understand version 1 snapshots. +// Since the original Raft library didn't enforce any versioning, we must +// include the legacy peers structure for this version, but we can deprecate +// it in the next snapshot version. type SnapshotVersion int const ( @@ -132,17 +146,18 @@ type Config struct { // can _understand_. ProtocolVersion ProtocolVersion - // HeartbeatTimeout specifies the time in follower state without - // a leader before we attempt an election. + // HeartbeatTimeout specifies the time in follower state without contact + // from a leader before we attempt an election. HeartbeatTimeout time.Duration - // ElectionTimeout specifies the time in candidate state without - // a leader before we attempt an election. + // ElectionTimeout specifies the time in candidate state without contact + // from a leader before we attempt an election. ElectionTimeout time.Duration - // CommitTimeout controls the time without an Apply() operation - // before we heartbeat to ensure a timely commit. Due to random - // staggering, may be delayed as much as 2x this value. + // CommitTimeout specifies the time without an Apply operation before the + // leader sends an AppendEntry RPC to followers, to ensure a timely commit of + // log entries. + // Due to random staggering, may be delayed as much as 2x this value. CommitTimeout time.Duration // MaxAppendEntries controls the maximum number of append entries @@ -217,10 +232,28 @@ type Config struct { // raft's configuration and index values. NoSnapshotRestoreOnStart bool + // PreVoteDisabled deactivate the pre-vote feature when set to true + PreVoteDisabled bool + // skipStartup allows NewRaft() to bypass all background work goroutines skipStartup bool } +func (conf *Config) getOrCreateLogger() hclog.Logger { + if conf.Logger != nil { + return conf.Logger + } + if conf.LogOutput == nil { + conf.LogOutput = os.Stderr + } + + return hclog.New(&hclog.LoggerOptions{ + Name: "raft", + Level: hclog.LevelFromString(conf.LogLevel), + Output: conf.LogOutput, + }) +} + // ReloadableConfig is the subset of Config that may be reconfigured during // runtime using raft.ReloadConfig. We choose to duplicate fields over embedding // or accepting a Config but only using specific fields to keep the API clear. @@ -243,6 +276,14 @@ type ReloadableConfig struct { // we perform a snapshot. This is to prevent excessive snapshots when we can // just replay a small set of logs. SnapshotThreshold uint64 + + // HeartbeatTimeout specifies the time in follower state without + // a leader before we attempt an election. + HeartbeatTimeout time.Duration + + // ElectionTimeout specifies the time in candidate state without + // a leader before we attempt an election. + ElectionTimeout time.Duration } // apply sets the reloadable fields on the passed Config to the values in @@ -252,6 +293,8 @@ func (rc *ReloadableConfig) apply(to Config) Config { to.TrailingLogs = rc.TrailingLogs to.SnapshotInterval = rc.SnapshotInterval to.SnapshotThreshold = rc.SnapshotThreshold + to.HeartbeatTimeout = rc.HeartbeatTimeout + to.ElectionTimeout = rc.ElectionTimeout return to } @@ -260,6 +303,8 @@ func (rc *ReloadableConfig) fromConfig(from Config) { rc.TrailingLogs = from.TrailingLogs rc.SnapshotInterval = from.SnapshotInterval rc.SnapshotThreshold = from.SnapshotThreshold + rc.HeartbeatTimeout = from.HeartbeatTimeout + rc.ElectionTimeout = from.ElectionTimeout } // DefaultConfig returns a Config with usable defaults. @@ -317,10 +362,10 @@ func ValidateConfig(config *Config) error { return fmt.Errorf("LeaderLeaseTimeout is too low") } if config.LeaderLeaseTimeout > config.HeartbeatTimeout { - return fmt.Errorf("LeaderLeaseTimeout cannot be larger than heartbeat timeout") + return fmt.Errorf("LeaderLeaseTimeout (%s) cannot be larger than heartbeat timeout (%s)", config.LeaderLeaseTimeout, config.HeartbeatTimeout) } if config.ElectionTimeout < config.HeartbeatTimeout { - return fmt.Errorf("ElectionTimeout must be equal or greater than Heartbeat Timeout") + return fmt.Errorf("ElectionTimeout (%s) must be equal or greater than Heartbeat Timeout (%s)", config.ElectionTimeout, config.HeartbeatTimeout) } return nil } diff --git a/configuration.go b/configuration.go index 1ac92fdc0..9bfad14f7 100644 --- a/configuration.go +++ b/configuration.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import "fmt" @@ -13,10 +16,10 @@ const ( // Nonvoter is a server that receives log entries but is not considered for // elections or commitment purposes. Nonvoter - // Staging is a server that acts like a nonvoter with one exception: once a - // staging server receives enough log entries to be sufficiently caught up to - // the leader's log, the leader will invoke a membership change to change - // the Staging server to a Voter. + // Staging is a server that acts like a Nonvoter. A configuration change + // with a ConfigurationChangeCommand of Promote can change a Staging server + // into a Voter. + // Deprecated: use Nonvoter instead. Staging ) @@ -87,23 +90,27 @@ func (c *Configuration) Clone() (copy Configuration) { type ConfigurationChangeCommand uint8 const ( - // AddStaging makes a server Staging unless its Voter. - AddStaging ConfigurationChangeCommand = iota + // AddVoter adds a server with Suffrage of Voter. + AddVoter ConfigurationChangeCommand = iota // AddNonvoter makes a server Nonvoter unless its Staging or Voter. AddNonvoter // DemoteVoter makes a server Nonvoter unless its absent. DemoteVoter // RemoveServer removes a server entirely from the cluster membership. RemoveServer - // Promote is created automatically by a leader; it turns a Staging server - // into a Voter. + // Promote changes a server from Staging to Voter. The command will be a + // no-op if the server is not Staging. + // Deprecated: use AddVoter instead. Promote + // AddStaging makes a server a Voter. + // Deprecated: AddStaging was actually AddVoter. Use AddVoter instead. + AddStaging = 0 // explicit 0 to preserve the old value. ) func (c ConfigurationChangeCommand) String() string { switch c { - case AddStaging: - return "AddStaging" + case AddVoter: + return "AddVoter" case AddNonvoter: return "AddNonvoter" case DemoteVoter: @@ -122,7 +129,7 @@ func (c ConfigurationChangeCommand) String() string { type configurationChangeRequest struct { command ConfigurationChangeCommand serverID ServerID - serverAddress ServerAddress // only present for AddStaging, AddNonvoter + serverAddress ServerAddress // only present for AddVoter, AddNonvoter // prevIndex, if nonzero, is the index of the only configuration upon which // this change may be applied; if another configuration entry has been // added in the meantime, this request will fail. @@ -173,9 +180,9 @@ func hasVote(configuration Configuration, id ServerID) bool { return false } -// hasVote returns true if the server identified by 'id' is a Voter in the +// inConfiguration returns true if the server identified by 'id' is in in the // provided Configuration. -func inConfig(configuration Configuration, id ServerID) bool { +func inConfiguration(configuration Configuration, id ServerID) bool { for _, server := range configuration.Servers { if server.ID == id { return true @@ -225,15 +232,8 @@ func nextConfiguration(current Configuration, currentIndex uint64, change config configuration := current.Clone() switch change.command { - case AddStaging: - // TODO: barf on new address? + case AddVoter: newServer := Server{ - // TODO: This should add the server as Staging, to be automatically - // promoted to Voter later. However, the promotion to Voter is not yet - // implemented, and doing so is not trivial with the way the leader loop - // coordinates with the replication goroutines today. So, for now, the - // server will have a vote right away, and the Promote case below is - // unused. Suffrage: Voter, ID: change.serverID, Address: change.serverAddress, diff --git a/configuration_test.go b/configuration_test.go index edbe545b1..a7eab9929 100644 --- a/configuration_test.go +++ b/configuration_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -179,6 +182,16 @@ var nextConfigurationTests = []struct { // AddStaging: was Nonvoter. {oneOfEach, AddStaging, 3, "{[{Voter id1 addr1x} {Staging id2 addr2x} {Voter id3 addr3}]}"}, + // AddVoter: was missing. + {Configuration{}, AddVoter, 1, "{[{Voter id1 addr1}]}"}, + {singleServer, AddVoter, 2, "{[{Voter id1 addr1x} {Voter id2 addr2}]}"}, + // AddVoter: was Voter. + {singleServer, AddVoter, 1, "{[{Voter id1 addr1}]}"}, + // AddVoter: was Staging. + {oneOfEach, AddVoter, 2, "{[{Voter id1 addr1x} {Voter id2 addr2} {Nonvoter id3 addr3x}]}"}, + // AddVoter: was Nonvoter. + {oneOfEach, AddVoter, 3, "{[{Voter id1 addr1x} {Staging id2 addr2x} {Voter id3 addr3}]}"}, + // AddNonvoter: was missing. {singleServer, AddNonvoter, 2, "{[{Voter id1 addr1x} {Nonvoter id2 addr2}]}"}, // AddNonvoter: was Voter. @@ -238,7 +251,7 @@ func TestConfiguration_nextConfiguration_table(t *testing.T) { func TestConfiguration_nextConfiguration_prevIndex(t *testing.T) { // Stale prevIndex. req := configurationChangeRequest{ - command: AddStaging, + command: AddVoter, serverID: ServerID("id1"), serverAddress: ServerAddress("addr1"), prevIndex: 1, @@ -250,7 +263,7 @@ func TestConfiguration_nextConfiguration_prevIndex(t *testing.T) { // Current prevIndex. req = configurationChangeRequest{ - command: AddStaging, + command: AddVoter, serverID: ServerID("id2"), serverAddress: ServerAddress("addr2"), prevIndex: 2, @@ -262,7 +275,7 @@ func TestConfiguration_nextConfiguration_prevIndex(t *testing.T) { // Zero prevIndex. req = configurationChangeRequest{ - command: AddStaging, + command: AddVoter, serverID: ServerID("id3"), serverAddress: ServerAddress("addr3"), prevIndex: 0, diff --git a/discard_snapshot.go b/discard_snapshot.go index fb15d4d3e..aa148fb78 100644 --- a/discard_snapshot.go +++ b/discard_snapshot.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( diff --git a/discard_snapshot_test.go b/discard_snapshot_test.go index 5abedfe2c..7bf2d972c 100644 --- a/discard_snapshot_test.go +++ b/discard_snapshot_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import "testing" diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 000000000..0762c5994 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,106 @@ +# Raft Developer Documentation + +This documentation provides a high level introduction to the `hashicorp/raft` +implementation. The intended audience is anyone interested in understanding +or contributing to the code. + +## Contents + +1. [Terminology](#terminology) +2. [Operations](#operations) + 1. [Apply](./apply.md) +3. [Threads](#threads) + + +## Terminology + +This documentation uses the following terms as defined. + +* **Cluster** - the set of peers in the raft configuration +* **Peer** - a node that participates in the consensus protocol using `hashicorp/raft`. A + peer may be in one of the following states: **follower**, **candidate**, or **leader**. +* **Log** - the full set of log entries. +* **Log Entry** - an entry in the log. Each entry has an index that is used to order it + relative to other log entries. + * **Committed** - A log entry is considered committed if it is safe for that entry to be + applied to state machines. A log entry is committed once the leader that created the + entry has replicated it on a majority of the peers. A peer has successfully + replicated the entry once it is persisted. + * **Applied** - log entry applied to the state machine (FSM) +* **Term** - raft divides time into terms of arbitrary length. Terms are numbered with + consecutive integers. Each term begins with an election, in which one or more candidates + attempt to become leader. If a candidate wins the election, then it serves as leader for + the rest of the term. If the election ends with a split vote, the term will end with no + leader. +* **FSM** - finite state machine, stores the cluster state +* **Client** - the application that uses the `hashicorp/raft` library + +## Operations + +### Leader Write + +Most write operations must be performed on the leader. + +* RequestConfigChange - update the raft peer list configuration +* Apply - apply a log entry to the log on a majority of peers, and the FSM. See [raft apply](apply.md) for more details. +* Barrier - a special Apply that does not modify the FSM, used to wait for previous logs to be applied +* LeadershipTransfer - stop accepting client requests, and tell a different peer to start a leadership election +* Restore (Snapshot) - overwrite the cluster state with the contents of the snapshot (excluding cluster configuration) +* VerifyLeader - send a heartbeat to all voters to confirm the peer is still the leader + +### Follower Write + +* BootstrapCluster - store the cluster configuration in the local log store + + +### Read + +Read operations can be performed on a peer in any state. + +* AppliedIndex - get the index of the last log entry applied to the FSM +* GetConfiguration - return the latest cluster configuration +* LastContact - get the last time this peer made contact with the leader +* LastIndex - get the index of the latest stored log entry +* Leader - get the address of the peer that is currently the leader +* Snapshot - snapshot the current state of the FSM into a file +* State - return the state of the peer +* Stats - return some stats about the peer and the cluster + +## Threads + +Raft uses the following threads to handle operations. The name of the thread is in bold, +and a short description of the operation handled by the thread follows. The main thread is +responsible for handling many operations. + +* **run** (main thread) - different behaviour based on peer state + * follower + * processRPC (from rpcCh) + * AppendEntries + * RequestVote + * InstallSnapshot + * TimeoutNow + * liveBootstrap (from bootstrapCh) + * periodic heartbeatTimer (HeartbeatTimeout) + * candidate - starts an election for itself when called + * processRPC (from rpcCh) - same as follower + * acceptVote (from askPeerForVote) + * leader - first starts replication to all peers, and applies a Noop log to ensure the new leader has committed up to the commit index + * processRPC (from rpcCh) - same as follower, however we don’t actually expect to receive any RPCs other than a RequestVote + * leadershipTransfer (from leadershipTransferCh) - + * commit (from commitCh) - + * verifyLeader (from verifyCh) - + * user restore snapshot (from userRestoreCh) - + * changeConfig (from configurationChangeCh) - + * dispatchLogs (from applyCh) - handle client Raft.Apply requests by persisting logs to disk, and notifying replication goroutines to replicate the new logs + * checkLease (periodically LeaseTimeout) - +* **runFSM** - has exclusive access to the FSM, all reads and writes must send a message to this thread. Commands: + * apply logs to the FSM, from the fsmMutateCh, from processLogs, from leaderLoop (leader) or appendEntries RPC (follower/candidate) + * restore a snapshot to the FSM, from the fsmMutateCh, from restoreUserSnapshot (leader) or installSnapshot RPC (follower/candidate) + * capture snapshot, from fsmSnapshotCh, from takeSnapshot (runSnapshot thread) +* **runSnapshot** - handles the slower part of taking a snapshot. From a pointer captured by the FSM.Snapshot operation, this thread persists the snapshot by calling FSMSnapshot.Persist. Also calls compactLogs to delete old logs. + * periodically (SnapshotInterval) takeSnapshot for log compaction + * user snapshot, from userSnapshotCh, takeSnapshot to return to the user +* **askPeerForVote (candidate only)** - short lived goroutine that synchronously sends a RequestVote RPC to all voting peers, and waits for the response. One goroutine per voting peer. +* **replicate (leader only)** - long running goroutine that synchronously sends log entry AppendEntry RPCs to all peers. Also starts the heartbeat thread, and possibly the pipelineDecode thread. Runs sendLatestSnapshot when AppendEntry fails. + * **heartbeat (leader only)** - long running goroutine that synchronously sends heartbeat AppendEntry RPCs to all peers. + * **pipelineDecode (leader only)** diff --git a/docs/apply.md b/docs/apply.md new file mode 100644 index 000000000..29404d5a7 --- /dev/null +++ b/docs/apply.md @@ -0,0 +1,116 @@ +# Raft Apply + +Apply is the primary operation provided by raft. A client calls `raft.Apply` to apply +a command to the FSM. A command will first be commited, i.e., durably stored on a +quorum of raft nodes. Then, the committed command is applied to fsm. + +This sequence diagram shows the steps involved in a `raft.Apply` operation. Each box +across the top is a separate thread. The name in the box identifies the state of the peer +(leader or follower) and the thread (`:`). When there are +multiple copies of the thread, it is indicated with `(each peer)`. + +```mermaid +sequenceDiagram + autonumber + + participant client + participant leadermain as leader:main + participant leaderfsm as leader:fsm + participant leaderreplicate as leader:replicate (each peer) + participant followermain as follower:main (each peer) + participant followerfsm as follower:fsm (each peer) + + client-)leadermain: applyCh to dispatchLogs + leadermain->>leadermain: store logs to disk + + leadermain-)leaderreplicate: triggerCh + leaderreplicate-->>followermain: Transport.AppendEntries RPC + + followermain->>followermain: store logs to disk + + opt leader commit index is ahead of peer commit index + followermain-)followerfsm: fsmMutateCh
apply committed logs + followerfsm->>followerfsm: fsm.Apply + end + + followermain-->>leaderreplicate: respond success=true + leaderreplicate->>leaderreplicate: update commitment + + opt quorum commit index has increased + leaderreplicate-)leadermain: commitCh + leadermain-)leaderfsm: fsmMutateCh + leaderfsm->>leaderfsm: fsm.Apply + leaderfsm-)client: future.respond + end + +``` + +Following is the description of each step as shown in the above diagram + +1. The raft node handles the `raft.Apply` call by creating a new log entry and send the entry +to the `applyCh` channel. + +2. If the node is not a leader, the method will return an error of `ErrNotLeader`. Otherwise, +the main loop of the leader node calls `raft.dispatchLogs` to write the log entry locally. + +3. `raft.dispatchLogs` also sends a notification to the `f.triggerCh` of each follower (`map[ServerID]*followerReplication`) to start replicating log entries to the followers. + +4. For each follower, the leader has started a long running routine (`replicate`) to +replicates log entries. On receiving a log entry to the `triggerCh`, the `replicate` +routine makes the `Transport.AppendEntries` RPC call to do the replication. The log entries +to be replicated are from the follower's nextIndex to min(nextIndex + maxAppendEntries, +leader's lastIndex). Another parameter to AppendEntries is the LeaderCommitIndex. Following +is some examples: + +``` +AppenEntries(Log: 1..5, LeaderCommitIndex: 0) // Replicating log entries 1..5, + // the leader hasn't committed any log entry; +AppendEntries(Log: 6..8, LeaderCommitIndex: 4) // Replicating log entries 6..8, + // log 0..4 are committed after the leader receives + // a quorum of responses +AppendEntries(Log: 9, LeaderCommitIndex: 8) // Replicating log entry 9, + // log 5..8 are committed. +AppendEntries(Log: , LeaderCommitIndex: 9) // no new log, bumping the commit index + // to let the follower stay up to date of the + // latest committed entries +``` + +5. The follower which receives the `appendEntries` RPC calls invokes `raft.appendEntries` to handle +the request. It appends any new entries to the local log store. + +6. In the same method on the follower as step 5, if the LeaderCommitIndex > this follower's +commitIndex, the follower updates it's commitIndex to min(LeaderCommitIndex, index of its last +log entries). In the first `AppendEntries` call of the above example, the follower won't +update it's commitIndex, because LeaderCommitIndex is 0. The last RPC call doesn't contain +any new log, whereas the follower will update its commitIndex to 9. + +Further, the follower start `processLogs` to send all the committed entries that haven't been +applied to fsm (`fsmMutateCh <- batch`). Otherwise (i.e., `commitIndex <= lastApplied`), +the appendEntries RPC call returns success. + +Therefore, it's possible that a very small window of time exists when all followers have +committed the log to disk, the write has been realized in the FSM of the leader but the +followers have not yet applied the log to their FSM. + +7. The peer applies the commited entries to the FSM. + +8. If all went well, the follower responds success (`resp.Success = true`) to the +`appendEntries` RPC call. + +9. On receiving the successful response from `Transport.AppendEntries`, the leader needs to +update the fsm based on the replicated log entries. Specifically, the leader finds the +highest log entry index that has been replicated to a quorum of the servers ( +`if quorumMatchIndex > c.commitIndex`), update `commitIndex` to that index, and +notify through the `commitCh` channel. + +10. The leader receives the notification on the `r.leaderState.commitCh` channel and starts +grouping the entries that can be applied to the fsm. + +11. `processLogs` applies all the committed entries that haven't been applied by batching the log entries and forwarding them through the `fsmMutateCh` channel to fsm. + +12. The actual place applying the commited log entries is in the main loop of `runFSM()`. + +13. After the log entries that contains the client req are applied to the fsm, the fsm +module will set the reponses to the client request (`req.future.respond(nil)`). From the +client's point of view, the future returned by `raft.Apply` should now be unblocked and +calls to `Error()` or `Response()` should return the data at this point. diff --git a/file_snapshot.go b/file_snapshot.go index e4d1ea4f9..25ace6c3b 100644 --- a/file_snapshot.go +++ b/file_snapshot.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -8,7 +11,6 @@ import ( "hash" "hash/crc64" "io" - "io/ioutil" "os" "path/filepath" "runtime" @@ -97,7 +99,7 @@ func NewFileSnapshotStoreWithLogger(base string, retain int, logger hclog.Logger // Ensure our path exists path := filepath.Join(base, snapPath) - if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) { + if err := os.MkdirAll(path, 0o755); err != nil && !os.IsExist(err) { return nil, fmt.Errorf("snapshot path not accessible: %v", err) } @@ -168,7 +170,7 @@ func (f *FileSnapshotStore) Create(version SnapshotVersion, index, term uint64, f.logger.Info("creating new snapshot", "path", path) // Make the directory - if err := os.MkdirAll(path, 0755); err != nil { + if err := os.MkdirAll(path, 0o755); err != nil { f.logger.Error("failed to make snapshot directly", "error", err) return nil, err } @@ -242,7 +244,7 @@ func (f *FileSnapshotStore) List() ([]*SnapshotMeta, error) { // getSnapshots returns all the known snapshots. func (f *FileSnapshotStore) getSnapshots() ([]*fileSnapshotMeta, error) { // Get the eligible snapshots - snapshots, err := ioutil.ReadDir(f.path) + snapshots, err := os.ReadDir(f.path) if err != nil { f.logger.Error("failed to scan snapshot directory", "error", err) return nil, err @@ -424,11 +426,11 @@ func (s *FileSnapshotSink) Close() error { if !s.noSync && runtime.GOOS != "windows" { // skipping fsync for directory entry edits on Windows, only needed for *nix style file systems parentFH, err := os.Open(s.parentDir) - defer parentFH.Close() if err != nil { s.logger.Error("failed to open snapshot parent directory", "path", s.parentDir, "error", err) return err } + defer parentFH.Close() if err = parentFH.Sync(); err != nil { s.logger.Error("failed syncing parent directory", "path", s.parentDir, "error", err) diff --git a/file_snapshot_test.go b/file_snapshot_test.go index 007b44ead..83d680917 100644 --- a/file_snapshot_test.go +++ b/file_snapshot_test.go @@ -1,9 +1,11 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( "bytes" "io" - "io/ioutil" "os" "reflect" "runtime" @@ -25,13 +27,13 @@ func TestFileSnapshotSinkImpl(t *testing.T) { } func TestFileSS_CreateSnapshotMissingParentDir(t *testing.T) { - parent, err := ioutil.TempDir("", "raft") + parent, err := os.MkdirTemp("", "raft") if err != nil { t.Fatalf("err: %v ", err) } defer os.RemoveAll(parent) - dir, err := ioutil.TempDir(parent, "raft") + dir, err := os.MkdirTemp(parent, "raft") if err != nil { t.Fatalf("err: %v ", err) } @@ -47,11 +49,11 @@ func TestFileSS_CreateSnapshotMissingParentDir(t *testing.T) { if err != nil { t.Fatalf("should not fail when using non existing parent") } - } + func TestFileSS_CreateSnapshot(t *testing.T) { // Create a test dir - dir, err := ioutil.TempDir("", "raft") + dir, err := os.MkdirTemp("", "raft") if err != nil { t.Fatalf("err: %v ", err) } @@ -159,7 +161,7 @@ func TestFileSS_CreateSnapshot(t *testing.T) { func TestFileSS_CancelSnapshot(t *testing.T) { // Create a test dir - dir, err := ioutil.TempDir("", "raft") + dir, err := os.MkdirTemp("", "raft") if err != nil { t.Fatalf("err: %v ", err) } @@ -197,7 +199,7 @@ func TestFileSS_Retention(t *testing.T) { var err error // Create a test dir var dir string - dir, err = ioutil.TempDir("", "raft") + dir, err = os.MkdirTemp("", "raft") if err != nil { t.Fatalf("err: %v ", err) } @@ -250,7 +252,7 @@ func TestFileSS_BadPerm(t *testing.T) { // Create a temp dir var dir1 string - dir1, err = ioutil.TempDir("", "raft") + dir1, err = os.MkdirTemp("", "raft") if err != nil { t.Fatalf("err: %s", err) } @@ -258,11 +260,11 @@ func TestFileSS_BadPerm(t *testing.T) { // Create a sub dir and remove all permissions var dir2 string - dir2, err = ioutil.TempDir(dir1, "badperm") + dir2, err = os.MkdirTemp(dir1, "badperm") if err != nil { t.Fatalf("err: %s", err) } - if err = os.Chmod(dir2, 000); err != nil { + if err = os.Chmod(dir2, 0o00); err != nil { t.Fatalf("err: %s", err) } defer os.Chmod(dir2, 777) // Set perms back for delete @@ -274,13 +276,13 @@ func TestFileSS_BadPerm(t *testing.T) { } func TestFileSS_MissingParentDir(t *testing.T) { - parent, err := ioutil.TempDir("", "raft") + parent, err := os.MkdirTemp("", "raft") if err != nil { t.Fatalf("err: %v ", err) } defer os.RemoveAll(parent) - dir, err := ioutil.TempDir(parent, "raft") + dir, err := os.MkdirTemp(parent, "raft") if err != nil { t.Fatalf("err: %v ", err) } @@ -294,7 +296,7 @@ func TestFileSS_MissingParentDir(t *testing.T) { func TestFileSS_Ordering(t *testing.T) { // Create a test dir - dir, err := ioutil.TempDir("", "raft") + dir, err := os.MkdirTemp("", "raft") if err != nil { t.Fatalf("err: %v ", err) } diff --git a/fsm.go b/fsm.go index 4c11bc29b..9d9a45346 100644 --- a/fsm.go +++ b/fsm.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -6,29 +9,43 @@ import ( "time" "github.com/armon/go-metrics" + hclog "github.com/hashicorp/go-hclog" ) -// FSM provides an interface that can be implemented by -// clients to make use of the replicated log. +// FSM is implemented by clients to make use of the replicated log. type FSM interface { - // Apply log is invoked once a log entry is committed. - // It returns a value which will be made available in the - // ApplyFuture returned by Raft.Apply method if that - // method was called on the same Raft node as the FSM. + // Apply is called once a log entry is committed by a majority of the cluster. + // + // Apply should apply the log to the FSM. Apply must be deterministic and + // produce the same result on all peers in the cluster. + // + // The returned value is returned to the client as the ApplyFuture.Response. Apply(*Log) interface{} - // Snapshot is used to support log compaction. This call should - // return an FSMSnapshot which can be used to save a point-in-time - // snapshot of the FSM. Apply and Snapshot are not called in multiple - // threads, but Apply will be called concurrently with Persist. This means - // the FSM should be implemented in a fashion that allows for concurrent - // updates while a snapshot is happening. + // Snapshot returns an FSMSnapshot used to: support log compaction, to + // restore the FSM to a previous state, or to bring out-of-date followers up + // to a recent log index. + // + // The Snapshot implementation should return quickly, because Apply can not + // be called while Snapshot is running. Generally this means Snapshot should + // only capture a pointer to the state, and any expensive IO should happen + // as part of FSMSnapshot.Persist. + // + // Apply and Snapshot are always called from the same thread, but Apply will + // be called concurrently with FSMSnapshot.Persist. This means the FSM should + // be implemented to allow for concurrent updates while a snapshot is happening. + // + // Clients of this library should make no assumptions about whether a returned + // Snapshot() will actually be stored by Raft. In fact it's quite possible that + // any Snapshot returned by this call will be discarded, and that + // FSMSnapshot.Persist will never be called. Raft will always call + // FSMSnapshot.Release however. Snapshot() (FSMSnapshot, error) // Restore is used to restore an FSM from a snapshot. It is not called // concurrently with any other command. The FSM must discard all previous - // state. - Restore(io.ReadCloser) error + // state before restoring the snapshot. + Restore(snapshot io.ReadCloser) error } // BatchingFSM extends the FSM interface to add an ApplyBatch function. This can @@ -72,7 +89,7 @@ func (r *Raft) runFSM() { batchingFSM, batchingEnabled := r.fsm.(BatchingFSM) configStore, configStoreEnabled := r.fsm.(ConfigurationStore) - commitSingle := func(req *commitTuple) { + applySingle := func(req *commitTuple) { // Apply the log if a command or config change var resp interface{} // Make sure we send a response @@ -107,10 +124,10 @@ func (r *Raft) runFSM() { lastTerm = req.log.Term } - commitBatch := func(reqs []*commitTuple) { + applyBatch := func(reqs []*commitTuple) { if !batchingEnabled { for _, ct := range reqs { - commitSingle(ct) + applySingle(ct) } return } @@ -177,8 +194,15 @@ func (r *Raft) runFSM() { } defer source.Close() + snapLogger := r.logger.With( + "id", req.ID, + "last-index", meta.Index, + "last-term", meta.Term, + "size-in-bytes", meta.Size, + ) + // Attempt to restore - if err := fsmRestoreAndMeasure(r.fsm, source); err != nil { + if err := fsmRestoreAndMeasure(snapLogger, r.fsm, source, meta.Size); err != nil { req.respond(fmt.Errorf("failed to restore snapshot %v: %v", req.ID, err)) return } @@ -208,12 +232,18 @@ func (r *Raft) runFSM() { req.respond(err) } + saturation := newSaturationMetric([]string{"raft", "thread", "fsm", "saturation"}, 1*time.Second) + for { + saturation.sleeping() + select { case ptr := <-r.fsmMutateCh: + saturation.working() + switch req := ptr.(type) { case []*commitTuple: - commitBatch(req) + applyBatch(req) case *restoreFuture: restore(req) @@ -223,6 +253,8 @@ func (r *Raft) runFSM() { } case req := <-r.fsmSnapshotCh: + saturation.working() + snapshot(req) case <-r.shutdownCh: @@ -234,13 +266,20 @@ func (r *Raft) runFSM() { // fsmRestoreAndMeasure wraps the Restore call on an FSM to consistently measure // and report timing metrics. The caller is still responsible for calling Close // on the source in all cases. -func fsmRestoreAndMeasure(fsm FSM, source io.ReadCloser) error { +func fsmRestoreAndMeasure(logger hclog.Logger, fsm FSM, source io.ReadCloser, snapshotSize int64) error { start := time.Now() - if err := fsm.Restore(source); err != nil { + + crc := newCountingReadCloser(source) + + monitor := startSnapshotRestoreMonitor(logger, crc, snapshotSize, false) + defer monitor.StopAndWait() + + if err := fsm.Restore(crc); err != nil { return err } metrics.MeasureSince([]string{"raft", "fsm", "restore"}, start) metrics.SetGauge([]string{"raft", "fsm", "lastRestoreDuration"}, float32(time.Since(start).Milliseconds())) + return nil } diff --git a/future.go b/future.go index 1411ae219..303da4487 100644 --- a/future.go +++ b/future.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( diff --git a/future_test.go b/future_test.go index 8bb958329..5ed428776 100644 --- a/future_test.go +++ b/future_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( diff --git a/fuzzy/apply_src.go b/fuzzy/apply_src.go index 2bb1cadfa..95144d92f 100644 --- a/fuzzy/apply_src.go +++ b/fuzzy/apply_src.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package fuzzy import ( diff --git a/fuzzy/cluster.go b/fuzzy/cluster.go index 57abfc4c8..93025aefe 100644 --- a/fuzzy/cluster.go +++ b/fuzzy/cluster.go @@ -1,10 +1,12 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package fuzzy import ( "bytes" "fmt" "io" - "io/ioutil" "os" "path/filepath" "testing" @@ -291,12 +293,12 @@ func (c *cluster) VerifyFSM(t *testing.T) { } func (c *cluster) RecordState(t *testing.T) { - td, _ := ioutil.TempDir(os.Getenv("TEST_FAIL_DIR"), "failure") + td, _ := os.MkdirTemp(os.Getenv("TEST_FAIL_DIR"), "failure") sd, _ := resolveDirectory("data", false) copyDir(td, sd) dump := func(n *raftNode) { nt := filepath.Join(td, n.name) - os.Mkdir(nt, 0777) + os.Mkdir(nt, 0o777) n.fsm.WriteTo(filepath.Join(nt, "fsm.txt")) n.transport.DumpLog(nt) } @@ -313,7 +315,7 @@ func copyDir(target, src string) { filepath.Walk(src, func(path string, info os.FileInfo, err error) error { relPath := path[len(src):] if info.IsDir() { - return os.MkdirAll(filepath.Join(target, relPath), 0777) + return os.MkdirAll(filepath.Join(target, relPath), 0o777) } return copyFile(filepath.Join(target, relPath), path) }) diff --git a/fuzzy/fsm.go b/fuzzy/fsm.go index 07ded50d0..56d2dbd59 100644 --- a/fuzzy/fsm.go +++ b/fuzzy/fsm.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package fuzzy import ( diff --git a/fuzzy/fsm_batch.go b/fuzzy/fsm_batch.go index 5330de53e..5667c0aac 100644 --- a/fuzzy/fsm_batch.go +++ b/fuzzy/fsm_batch.go @@ -1,3 +1,7 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build batchtest // +build batchtest package fuzzy diff --git a/fuzzy/go.mod b/fuzzy/go.mod index b2362e026..196abb440 100644 --- a/fuzzy/go.mod +++ b/fuzzy/go.mod @@ -1,14 +1,24 @@ module github.com/hashicorp/raft/fuzzy -go 1.16 +go 1.20 require ( - github.com/boltdb/bolt v1.3.1 // indirect - github.com/hashicorp/go-hclog v0.9.1 - github.com/hashicorp/go-msgpack v0.5.5 + github.com/hashicorp/go-hclog v1.6.2 + github.com/hashicorp/go-msgpack/v2 v2.1.1 github.com/hashicorp/raft v1.2.0 github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea - golang.org/x/sys v0.0.0-20210414055047-fe65e336abe0 // indirect +) + +require ( + github.com/armon/go-metrics v0.4.1 // indirect + github.com/boltdb/bolt v1.3.1 // indirect + github.com/fatih/color v1.13.0 // indirect + github.com/hashicorp/go-immutable-radix v1.0.0 // indirect + github.com/hashicorp/go-msgpack v0.5.5 // indirect + github.com/hashicorp/golang-lru v0.5.0 // indirect + github.com/mattn/go-colorable v0.1.12 // indirect + github.com/mattn/go-isatty v0.0.14 // indirect + golang.org/x/sys v0.13.0 // indirect ) replace github.com/hashicorp/raft => ../ diff --git a/fuzzy/go.sum b/fuzzy/go.sum index 8b8798340..de7a9127c 100644 --- a/fuzzy/go.sum +++ b/fuzzy/go.sum @@ -1,28 +1,45 @@ -github.com/DataDog/datadog-go v2.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= -github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878 h1:EFSB7Zo9Eg91v7MJPVsifUysc/wPdN+NOnVe6bWbdBM= -github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg= +github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= +github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= +github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4= github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= +github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys= -github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= +github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= +github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-hclog v0.9.1 h1:9PZfAcVEvez4yhLH2TBU64/h/z4xlFI80cWXRrxuKuM= -github.com/hashicorp/go-hclog v0.9.1/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= -github.com/hashicorp/go-hclog v0.16.0 h1:uCeOEwSWGMwhJUdpUjk+1cVKIEfGu2/1nFXukimi2MU= -github.com/hashicorp/go-hclog v0.16.0/go.mod h1:whpDNt7SSdeAju8AWKIWsul05p54N/39EeqMAyrmvFQ= +github.com/hashicorp/go-hclog v1.5.0 h1:bI2ocEMgcVlz55Oj1xZNBsVi900c7II+fWDyV9o+13c= +github.com/hashicorp/go-hclog v1.5.0/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= +github.com/hashicorp/go-hclog v1.6.2/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-msgpack v0.5.5 h1:i9R9JSrqIz0QVLz3sz+i3YJdT7TTSLcfLLzJi9aZTuI= github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= -github.com/hashicorp/go-msgpack v1.1.5 h1:9byZdVjKTe5mce63pRVNP1L7UAmdHOTEMGehn6KvJWs= -github.com/hashicorp/go-msgpack v1.1.5/go.mod h1:gWVc3sv/wbDmR3rQsj1CAktEZzoz1YNK9NfGLXJ69/4= +github.com/hashicorp/go-msgpack/v2 v2.1.1 h1:xQEY9yB2wnHitoSzk/B9UjXWRQ67QKu5AOm8aFp8N3I= +github.com/hashicorp/go-msgpack/v2 v2.1.1/go.mod h1:upybraOAblm4S7rx0+jeNy+CWWhzywQsSRV5033mMu4= github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-uuid v1.0.0 h1:RS8zrF7PhGwyNPOtxSClXXj9HA8feRnJzgnI1RJCSnM= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= @@ -30,39 +47,83 @@ github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCO github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea h1:xykPFhrBAS2J0VBzVa5e80b5ZtYuNQtgXjN40qBZlD4= github.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea/go.mod h1:pNv7Wc3ycL6F5oOWn+tPGo2gWD4a5X+yp/ntwdKLjRk= -github.com/hashicorp/raft-boltdb v0.0.0-20210409134258-03c10cc3d4ea h1:RxcPJuutPRM8PUOyiweMmkuNO+RJyfy2jds2gfvgNmU= -github.com/hashicorp/raft-boltdb v0.0.0-20210409134258-03c10cc3d4ea/go.mod h1:qRd6nFJYYS6Iqnc/8HcUmko2/2Gw8qTFEmxDLii6W5I= -github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA= -github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= -github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= -github.com/mattn/go-isatty v0.0.10 h1:qxFzApOv4WsAL965uUPIsXzAKCZxN2p9UqdhFS4ZW10= -github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40= +github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM= +github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= +github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/common v0.0.0-20181126121408-4724e9255275/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= -github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= +github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= +github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= +golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190523142557-0e01d883c5c5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191008105621-543471e840be h1:QAcqgptGM8IQBC9K/RC4o+O9YmqEm0diQn9QmZw/0mU= -golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210414055047-fe65e336abe0 h1:g9s1Ppvvun/fI+BptTMj909BBIcGrzQ32k9FNlcevOE= -golang.org/x/sys v0.0.0-20210414055047-fe65e336abe0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/tools v0.0.0-20190424220101-1e8e1cfdf96b/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/fuzzy/leadershiptransfer_test.go b/fuzzy/leadershiptransfer_test.go index 8d254a2e7..1e3b2b157 100644 --- a/fuzzy/leadershiptransfer_test.go +++ b/fuzzy/leadershiptransfer_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package fuzzy import ( diff --git a/fuzzy/membership_test.go b/fuzzy/membership_test.go index 9eb643b88..9c12e20b5 100644 --- a/fuzzy/membership_test.go +++ b/fuzzy/membership_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package fuzzy import ( diff --git a/fuzzy/node.go b/fuzzy/node.go index cbcc37c34..0b382b7f7 100644 --- a/fuzzy/node.go +++ b/fuzzy/node.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package fuzzy import ( diff --git a/fuzzy/partition_test.go b/fuzzy/partition_test.go index 9007c425b..0b40c0adc 100644 --- a/fuzzy/partition_test.go +++ b/fuzzy/partition_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package fuzzy import ( diff --git a/fuzzy/resolve.go b/fuzzy/resolve.go index 965fb4b37..14eb38efb 100644 --- a/fuzzy/resolve.go +++ b/fuzzy/resolve.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package fuzzy import ( @@ -28,7 +31,7 @@ func resolveDirectory(dir string, create bool) (string, error) { } if create { if _, err := os.Stat(resolved); os.IsNotExist(err) { - if err := os.MkdirAll(resolved, 0744); err != nil { + if err := os.MkdirAll(resolved, 0o744); err != nil { return "", err } } diff --git a/fuzzy/simple_test.go b/fuzzy/simple_test.go index ff4b7f072..1e3027cf4 100644 --- a/fuzzy/simple_test.go +++ b/fuzzy/simple_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package fuzzy import ( diff --git a/fuzzy/slowvoter_test.go b/fuzzy/slowvoter_test.go index c3c2b5692..f5c62c228 100644 --- a/fuzzy/slowvoter_test.go +++ b/fuzzy/slowvoter_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package fuzzy import ( diff --git a/fuzzy/transport.go b/fuzzy/transport.go index b7e0a65fe..d6030d9b0 100644 --- a/fuzzy/transport.go +++ b/fuzzy/transport.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package fuzzy import ( @@ -5,20 +8,18 @@ import ( "bytes" "errors" "fmt" - "github.com/hashicorp/go-hclog" "io" "os" "path/filepath" "sync" "time" - "github.com/hashicorp/go-msgpack/codec" + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-msgpack/v2/codec" "github.com/hashicorp/raft" ) -var ( - codecHandle codec.MsgpackHandle -) +var codecHandle codec.MsgpackHandle type appendEntries struct { source string @@ -220,6 +221,11 @@ func (t *transport) RequestVote(id raft.ServerID, target raft.ServerAddress, arg return t.sendRPC(string(target), args, resp) } +// RequestPreVote sends the appropriate RPC to the target node. +func (t *transport) RequestPreVote(id raft.ServerID, target raft.ServerAddress, args *raft.RequestPreVoteRequest, resp *raft.RequestPreVoteResponse) error { + return t.sendRPC(string(target), args, resp) +} + // InstallSnapshot is used to push a snapshot down to a follower. The data is read from // the ReadCloser and streamed to the client. func (t *transport) InstallSnapshot(id raft.ServerID, target raft.ServerAddress, args *raft.InstallSnapshotRequest, resp *raft.InstallSnapshotResponse, data io.Reader) error { diff --git a/fuzzy/verifier.go b/fuzzy/verifier.go index 44b5ad5b5..95a815c9f 100644 --- a/fuzzy/verifier.go +++ b/fuzzy/verifier.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package fuzzy import ( @@ -45,7 +48,13 @@ func (v *appendEntriesVerifier) PreRequestVote(src, target string, rv *raft.Requ func (v *appendEntriesVerifier) PreAppendEntries(src, target string, req *raft.AppendEntriesRequest) (*raft.AppendEntriesResponse, error) { term := req.Term - ldr := string(req.Leader) + var ldr string + if len(req.RPCHeader.Addr) > 0 { + ldr = string(req.RPCHeader.Addr) + } else { + ldr = string(req.Leader) + } + if ldr != src { v.Lock() defer v.Unlock() diff --git a/go.mod b/go.mod index 09803b688..7d35dd979 100644 --- a/go.mod +++ b/go.mod @@ -1,10 +1,25 @@ module github.com/hashicorp/raft -go 1.12 +go 1.20 + +retract v1.1.3 // Deleted original tag; module checksum may not be accurate. + +require ( + github.com/armon/go-metrics v0.4.1 + github.com/hashicorp/go-hclog v1.6.2 + github.com/hashicorp/go-msgpack/v2 v2.1.2 + github.com/stretchr/testify v1.8.4 +) require ( - github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878 - github.com/hashicorp/go-hclog v0.9.1 - github.com/hashicorp/go-msgpack v0.5.5 - github.com/stretchr/testify v1.3.0 + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/fatih/color v1.13.0 // indirect + github.com/hashicorp/go-immutable-radix v1.0.0 // indirect + github.com/hashicorp/golang-lru v0.5.0 // indirect + github.com/kr/pretty v0.2.1 // indirect + github.com/mattn/go-colorable v0.1.12 // indirect + github.com/mattn/go-isatty v0.0.14 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + golang.org/x/sys v0.13.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index f087772cc..0801b78b5 100644 --- a/go.sum +++ b/go.sum @@ -1,39 +1,124 @@ -github.com/DataDog/datadog-go v2.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= -github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878 h1:EFSB7Zo9Eg91v7MJPVsifUysc/wPdN+NOnVe6bWbdBM= -github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg= +github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= +github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= +github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= +github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= +github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-hclog v0.9.1 h1:9PZfAcVEvez4yhLH2TBU64/h/z4xlFI80cWXRrxuKuM= -github.com/hashicorp/go-hclog v0.9.1/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= +github.com/hashicorp/go-hclog v1.6.2 h1:NOtoftovWkDheyUM/8JW3QMiXyxJK3uHRK7wV04nD2I= +github.com/hashicorp/go-hclog v1.6.2/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= -github.com/hashicorp/go-msgpack v0.5.5 h1:i9R9JSrqIz0QVLz3sz+i3YJdT7TTSLcfLLzJi9aZTuI= -github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-msgpack/v2 v2.1.2 h1:4Ee8FTp834e+ewB71RDrQ0VKpyFdrKOjvYtnQ/ltVj0= +github.com/hashicorp/go-msgpack/v2 v2.1.2/go.mod h1:upybraOAblm4S7rx0+jeNy+CWWhzywQsSRV5033mMu4= github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-uuid v1.0.0 h1:RS8zrF7PhGwyNPOtxSClXXj9HA8feRnJzgnI1RJCSnM= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40= +github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v0.9.2/go.mod h1:OsXs2jCmiKlQ1lTBmv21f2mNfw4xf/QclQDMrYNZzcM= +github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= +github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/common v0.0.0-20181126121408-4724e9255275/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= -github.com/prometheus/procfs v0.0.0-20181204211112-1dc9a6cbc91a/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= +github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= +github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= -golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/inmem_snapshot.go b/inmem_snapshot.go index 5e0c202fa..d23bc2099 100644 --- a/inmem_snapshot.go +++ b/inmem_snapshot.go @@ -1,10 +1,12 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( "bytes" "fmt" "io" - "io/ioutil" "sync" ) @@ -85,7 +87,7 @@ func (m *InmemSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, erro // Make a copy of the contents, since a bytes.Buffer can only be read // once. contents := bytes.NewBuffer(m.latest.contents.Bytes()) - return &m.latest.meta, ioutil.NopCloser(contents), nil + return &m.latest.meta, io.NopCloser(contents), nil } // Write appends the given bytes to the snapshot contents diff --git a/inmem_snapshot_test.go b/inmem_snapshot_test.go index 9fc7ae295..345286c25 100644 --- a/inmem_snapshot_test.go +++ b/inmem_snapshot_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( diff --git a/inmem_store.go b/inmem_store.go index 6285610f9..730d03f28 100644 --- a/inmem_store.go +++ b/inmem_store.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( diff --git a/inmem_transport.go b/inmem_transport.go index b5bdecc73..561ba73d7 100644 --- a/inmem_transport.go +++ b/inmem_transport.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -122,6 +125,18 @@ func (i *InmemTransport) RequestVote(id ServerID, target ServerAddress, args *Re return nil } +func (i *InmemTransport) RequestPreVote(id ServerID, target ServerAddress, args *RequestPreVoteRequest, resp *RequestPreVoteResponse) error { + rpcResp, err := i.makeRPC(target, args, nil, i.timeout) + if err != nil { + return err + } + + // Copy the result back + out := rpcResp.Response.(*RequestPreVoteResponse) + *resp = *out + return nil +} + // InstallSnapshot implements the Transport interface. func (i *InmemTransport) InstallSnapshot(id ServerID, target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error { rpcResp, err := i.makeRPC(target, args, data, 10*i.timeout) diff --git a/inmem_transport_test.go b/inmem_transport_test.go index 2ac8709a0..fa4b889b0 100644 --- a/inmem_transport_test.go +++ b/inmem_transport_test.go @@ -1,9 +1,13 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( - "github.com/stretchr/testify/require" "testing" "time" + + "github.com/stretchr/testify/require" ) func TestInmemTransportImpl(t *testing.T) { diff --git a/integ_test.go b/integ_test.go index 310e5868e..f5bb65121 100644 --- a/integ_test.go +++ b/integ_test.go @@ -1,14 +1,19 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( "bytes" + "context" "fmt" - "io/ioutil" "os" + "sync/atomic" "testing" "time" "github.com/hashicorp/go-hclog" + "github.com/stretchr/testify/require" ) // CheckInteg will skip a test if integration testing is not enabled. @@ -66,22 +71,22 @@ func (r *RaftEnv) Restart(t *testing.T) { r.raft = raft } -func MakeRaft(t *testing.T, conf *Config, bootstrap bool) *RaftEnv { +func MakeRaft(tb testing.TB, conf *Config, bootstrap bool) *RaftEnv { // Set the config if conf == nil { - conf = inmemConfig(t) + conf = inmemConfig(tb) } - dir, err := ioutil.TempDir("", "raft") + dir, err := os.MkdirTemp("", "raft") if err != nil { - t.Fatalf("err: %v ", err) + tb.Fatalf("err: %v ", err) } stable := NewInmemStore() snap, err := NewFileSnapshotStore(dir, 3, nil) if err != nil { - t.Fatalf("err: %v", err) + tb.Fatalf("err: %v", err) } env := &RaftEnv{ @@ -93,7 +98,7 @@ func MakeRaft(t *testing.T, conf *Config, bootstrap bool) *RaftEnv { } trans, err := NewTCPTransport("localhost:0", nil, 2, time.Second, nil) if err != nil { - t.Fatalf("err: %v", err) + tb.Fatalf("err: %v", err) } env.logger = hclog.New(&hclog.LoggerOptions{ @@ -110,14 +115,14 @@ func MakeRaft(t *testing.T, conf *Config, bootstrap bool) *RaftEnv { }) err = BootstrapCluster(conf, stable, stable, snap, trans, configuration) if err != nil { - t.Fatalf("err: %v", err) + tb.Fatalf("err: %v", err) } } env.logger.Info("starting node", "addr", trans.LocalAddr()) conf.Logger = env.logger raft, err := NewRaft(conf, env.fsm, stable, stable, snap, trans) if err != nil { - t.Fatalf("err: %v", err) + tb.Fatalf("err: %v", err) } env.raft = raft return env @@ -152,7 +157,7 @@ WAIT: goto CHECK } -func WaitFuture(f Future, t *testing.T) error { +func WaitFuture(f Future) error { timer := time.AfterFunc(1000*time.Millisecond, func() { panic(fmt.Errorf("timeout waiting for future %v", f)) }) @@ -160,10 +165,10 @@ func WaitFuture(f Future, t *testing.T) error { return f.Error() } -func NoErr(err error, t *testing.T) { - t.Helper() +func NoErr(err error, tb testing.TB) { + tb.Helper() if err != nil { - t.Fatalf("err: %v", err) + tb.Fatalf("err: %v", err) } } @@ -240,7 +245,7 @@ func TestRaft_Integ(t *testing.T) { futures = append(futures, leader.raft.Apply(logBytes(i, sz), 0)) } for _, f := range futures { - NoErr(WaitFuture(f, t), t) + NoErr(WaitFuture(f), t) leader.logger.Debug("applied", "index", f.Index(), "size", sz) } totalApplied += n @@ -249,7 +254,7 @@ func TestRaft_Integ(t *testing.T) { applyAndWait(env1, 100, 10) // Do a snapshot - NoErr(WaitFuture(env1.raft.Snapshot(), t), t) + NoErr(WaitFuture(env1.raft.Snapshot()), t) // Join a few nodes! var envs []*RaftEnv @@ -257,7 +262,7 @@ func TestRaft_Integ(t *testing.T) { conf.LocalID = ServerID(fmt.Sprintf("next-batch-%d", i)) env := MakeRaft(t, conf, false) addr := env.trans.LocalAddr() - NoErr(WaitFuture(env1.raft.AddVoter(conf.LocalID, addr, 0, 0), t), t) + NoErr(WaitFuture(env1.raft.AddVoter(conf.LocalID, addr, 0, 0)), t) envs = append(envs, env) } @@ -269,7 +274,7 @@ func TestRaft_Integ(t *testing.T) { applyAndWait(leader, 100, 10) // Snapshot the leader - NoErr(WaitFuture(leader.raft.Snapshot(), t), t) + NoErr(WaitFuture(leader.raft.Snapshot()), t) CheckConsistent(append([]*RaftEnv{env1}, envs...), t) @@ -281,7 +286,7 @@ func TestRaft_Integ(t *testing.T) { applyAndWait(leader, 100, 10000) // snapshot the leader [leaders log should be compacted past the disconnected follower log now] - NoErr(WaitFuture(leader.raft.Snapshot(), t), t) + NoErr(WaitFuture(leader.raft.Snapshot()), t) // Unfortunately we need to wait for the leader to start backing off RPCs to the down follower // such that when the follower comes back up it'll run an election before it gets an rpc from @@ -321,7 +326,7 @@ func TestRaft_Integ(t *testing.T) { conf.LocalID = ServerID(fmt.Sprintf("final-batch-%d", i)) env := MakeRaft(t, conf, false) addr := env.trans.LocalAddr() - NoErr(WaitFuture(leader.raft.AddVoter(conf.LocalID, addr, 0, 0), t), t) + NoErr(WaitFuture(leader.raft.AddVoter(conf.LocalID, addr, 0, 0)), t) envs = append(envs, env) leader, err = WaitForAny(Leader, append([]*RaftEnv{env1}, envs...)) @@ -333,8 +338,8 @@ func TestRaft_Integ(t *testing.T) { NoErr(err, t) // Remove the old nodes - NoErr(WaitFuture(leader.raft.RemoveServer(rm1.raft.localID, 0, 0), t), t) - NoErr(WaitFuture(leader.raft.RemoveServer(rm2.raft.localID, 0, 0), t), t) + NoErr(WaitFuture(leader.raft.RemoveServer(rm1.raft.localID, 0, 0)), t) + NoErr(WaitFuture(leader.raft.RemoveServer(rm2.raft.localID, 0, 0)), t) // Shoot the leader env1.Release() @@ -355,3 +360,229 @@ func TestRaft_Integ(t *testing.T) { e.Release() } } + +func TestRaft_RestartFollower_LongInitialHeartbeat(t *testing.T) { + CheckInteg(t) + tests := []struct { + name string + restartInitialTimeouts time.Duration + expectNewLeader bool + }{ + {"Default", 0, true}, + {"InitialHigher", time.Second, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + conf := DefaultConfig() + conf.LocalID = ServerID("first") + conf.HeartbeatTimeout = 50 * time.Millisecond + conf.ElectionTimeout = 50 * time.Millisecond + conf.LeaderLeaseTimeout = 50 * time.Millisecond + conf.CommitTimeout = 5 * time.Millisecond + conf.SnapshotThreshold = 100 + conf.TrailingLogs = 10 + + // Create a single node + env1 := MakeRaft(t, conf, true) + NoErr(WaitFor(env1, Leader), t) + + // Join a few nodes! + var envs []*RaftEnv + for i := 0; i < 2; i++ { + conf.LocalID = ServerID(fmt.Sprintf("next-batch-%d", i)) + env := MakeRaft(t, conf, false) + addr := env.trans.LocalAddr() + NoErr(WaitFuture(env1.raft.AddVoter(conf.LocalID, addr, 0, 0)), t) + envs = append(envs, env) + } + allEnvs := append([]*RaftEnv{env1}, envs...) + + // Wait for a leader + _, err := WaitForAny(Leader, append([]*RaftEnv{env1}, envs...)) + NoErr(err, t) + + CheckConsistent(append([]*RaftEnv{env1}, envs...), t) + // TODO without this sleep, the restarted follower doesn't have any stored config + // and aborts the election because it doesn't know of any peers. Shouldn't + // CheckConsistent prevent that? + time.Sleep(time.Second) + + // shutdown a follower + disconnected := envs[len(envs)-1] + disconnected.logger.Info("stopping follower") + disconnected.Shutdown() + + seeNewLeader := func(o *Observation) bool { _, ok := o.Data.(LeaderObservation); return ok } + leaderCh := make(chan Observation) + // TODO Closing this channel results in panics, even though we're calling Release. + // defer close(leaderCh) + leaderChanges := new(uint32) + go func() { + for range leaderCh { + atomic.AddUint32(leaderChanges, 1) + } + }() + + requestVoteCh := make(chan Observation) + seeRequestVote := func(o *Observation) bool { _, ok := o.Data.(RequestVoteRequest); return ok } + requestVotes := new(uint32) + go func() { + for range requestVoteCh { + atomic.AddUint32(requestVotes, 1) + } + }() + + for _, env := range allEnvs { + env.raft.RegisterObserver(NewObserver(leaderCh, false, seeNewLeader)) + } + + // Unfortunately we need to wait for the leader to start backing off RPCs to the down follower + // such that when the follower comes back up it'll run an election before it gets an rpc from + // the leader + time.Sleep(time.Second * 5) + + if tt.restartInitialTimeouts != 0 { + disconnected.conf.HeartbeatTimeout = tt.restartInitialTimeouts + disconnected.conf.ElectionTimeout = tt.restartInitialTimeouts + } + disconnected.logger.Info("restarting follower") + disconnected.Restart(t) + + time.Sleep(time.Second * 2) + + if tt.expectNewLeader { + require.NotEqual(t, 0, atomic.LoadUint32(leaderChanges)) + } else { + require.Equal(t, uint32(0), atomic.LoadUint32(leaderChanges)) + } + + if tt.restartInitialTimeouts != 0 { + for _, env := range envs { + env.raft.RegisterObserver(NewObserver(requestVoteCh, false, seeRequestVote)) + NoErr(env.raft.ReloadConfig(ReloadableConfig{ + TrailingLogs: conf.TrailingLogs, + SnapshotInterval: conf.SnapshotInterval, + SnapshotThreshold: conf.SnapshotThreshold, + HeartbeatTimeout: 250 * time.Millisecond, + ElectionTimeout: 250 * time.Millisecond, + }), t) + } + // Make sure that reload by itself doesn't trigger a vote + time.Sleep(300 * time.Millisecond) + require.Equal(t, uint32(0), atomic.LoadUint32(requestVotes)) + + // Stop the leader, ensure that we don't see a request vote within the first 50ms + // (original config of the non-restarted follower), but that we do see one within + // the 250ms both followers should now be using for heartbeat timeout. Well, not + // quite: we wait for two heartbeat intervals (plus a fudge factor), because the + // first time around, last contact will have been recent enough that no vote will + // be triggered. + env1.logger.Info("stopping leader") + env1.Shutdown() + time.Sleep(50 * time.Millisecond) + require.Equal(t, uint32(0), atomic.LoadUint32(requestVotes)) + time.Sleep(600 * time.Millisecond) + require.NotEqual(t, uint32(0), atomic.LoadUint32(requestVotes)) + } + + for _, e := range allEnvs { + e.Release() + } + }) + } +} + +// TestRaft_PreVote_LeaderSpam test that when a leader spam the followers +// with pre-vote requests they can still transition to candidate. +// The reason this test need to live in here is that we need the transport heartbeat fast-path +// to use as a trick to avoid heartbeat keeping the cluster stable. +// That fast-path only exists in the net transport. +func TestRaft_PreVote_LeaderSpam(t *testing.T) { + CheckInteg(t) + conf := DefaultConfig() + conf.LocalID = ServerID("first") + conf.HeartbeatTimeout = 50 * time.Millisecond + conf.ElectionTimeout = 50 * time.Millisecond + conf.LeaderLeaseTimeout = 50 * time.Millisecond + conf.CommitTimeout = 5 * time.Second + conf.SnapshotThreshold = 100 + conf.TrailingLogs = 10 + + // Create a single node + leader := MakeRaft(t, conf, true) + NoErr(WaitFor(leader, Leader), t) + + // Join a few nodes! + var followers []*RaftEnv + for i := 0; i < 2; i++ { + conf.LocalID = ServerID(fmt.Sprintf("next-batch-%d", i)) + env := MakeRaft(t, conf, false) + addr := env.trans.LocalAddr() + NoErr(WaitFuture(leader.raft.AddVoter(conf.LocalID, addr, 0, 0)), t) + followers = append(followers, env) + } + + // Wait for a leader + _, err := WaitForAny(Leader, append([]*RaftEnv{leader}, followers...)) + NoErr(err, t) + + CheckConsistent(append([]*RaftEnv{leader}, followers...), t) + + leaderT := leader.raft.trans + + // spam all the followers with pre-vote requests from the leader + // those requests should be granted as long as the leader haven't changed. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go func() { + for { + ticker := time.NewTicker(conf.HeartbeatTimeout / 2) + for _, f := range followers { + rsp := RequestPreVoteResponse{} + reqPreVote := RequestPreVoteRequest{ + RPCHeader: leader.raft.getRPCHeader(), + Term: leader.raft.getCurrentTerm() + 1, + LastLogIndex: leader.raft.getLastIndex(), + LastLogTerm: leader.raft.getCurrentTerm(), + } + // We don't need to check the error here because when leader change + // it will start failing with "rejecting pre-vote request since we have a leader" + _ = leaderT.(WithPreVote).RequestPreVote(f.raft.localID, f.raft.localAddr, &reqPreVote, &rsp) + } + select { + case <-ticker.C: + case <-ctx.Done(): + return + } + } + }() + time.Sleep(time.Second) + + // for all followers ignore heartbeat from current leader, so we can transition to candidate state. + // the purpose of this test is to verify that spamming nodes with pre-votes don't cause them to never + // transition to Candidates. + for _, f := range followers { + //copy f to avoid data race + f1 := f + f1.trans.SetHeartbeatHandler(func(rpc RPC) { + if a, ok := rpc.Command.(*AppendEntriesRequest); ok { + if ServerID(a.GetRPCHeader().ID) == leader.raft.localID { + resp := &AppendEntriesResponse{ + RPCHeader: f1.raft.getRPCHeader(), + Term: f1.raft.getCurrentTerm(), + LastLog: f1.raft.getLastIndex(), + Success: false, + NoRetryBackoff: false, + } + rpc.Respond(resp, nil) + } else { + f.raft.processHeartbeat(rpc) + } + } + }) + } + time.Sleep(1 * time.Second) + // New leader should be one of the former followers. + _, err = WaitForAny(Leader, followers) + NoErr(err, t) +} diff --git a/log.go b/log.go index a637d5193..4ae219327 100644 --- a/log.go +++ b/log.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -29,7 +32,7 @@ const ( // LogBarrier is used to ensure all preceding operations have been // applied to the FSM. It is similar to LogNoop, but instead of returning - // once committed, it only returns once the FSM manager acks it. Otherwise + // once committed, it only returns once the FSM manager acks it. Otherwise, // it is possible there are operations committed but not yet applied to // the FSM. LogBarrier @@ -119,13 +122,26 @@ type LogStore interface { // StoreLog stores a log entry. StoreLog(log *Log) error - // StoreLogs stores multiple log entries. + // StoreLogs stores multiple log entries. By default the logs stored may not be contiguous with previous logs (i.e. may have a gap in Index since the last log written). If an implementation can't tolerate this it may optionally implement `MonotonicLogStore` to indicate that this is not allowed. This changes Raft's behaviour after restoring a user snapshot to remove all previous logs instead of relying on a "gap" to signal the discontinuity between logs before the snapshot and logs after. StoreLogs(logs []*Log) error // DeleteRange deletes a range of log entries. The range is inclusive. DeleteRange(min, max uint64) error } +// MonotonicLogStore is an optional interface for LogStore implementations that +// cannot tolerate gaps in between the Index values of consecutive log entries. For example, +// this may allow more efficient indexing because the Index values are densely populated. If true is +// returned, Raft will avoid relying on gaps to trigger re-synching logs on followers after a +// snapshot is restored. The LogStore must have an efficient implementation of +// DeleteLogs for the case where all logs are removed, as this must be called after snapshot restore when gaps are not allowed. +// We avoid deleting all records for LogStores that do not implement MonotonicLogStore +// because although it's always correct to do so, it has a major negative performance impact on the BoltDB store that is currently +// the most widely used. +type MonotonicLogStore interface { + IsMonotonic() bool +} + func oldestLog(s LogStore) (Log, error) { var l Log diff --git a/log_cache.go b/log_cache.go index 7328a1203..2cc3885aa 100644 --- a/log_cache.go +++ b/log_cache.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -30,6 +33,16 @@ func NewLogCache(capacity int, store LogStore) (*LogCache, error) { return c, nil } +// IsMonotonic implements the MonotonicLogStore interface. This is a shim to +// expose the underyling store as monotonically indexed or not. +func (c *LogCache) IsMonotonic() bool { + if store, ok := c.store.(MonotonicLogStore); ok { + return store.IsMonotonic() + } + + return false +} + func (c *LogCache) GetLog(idx uint64, log *Log) error { // Check the buffer for an entry c.l.RLock() diff --git a/log_cache_test.go b/log_cache_test.go index 95bfa0f9b..fb9612dd6 100644 --- a/log_cache_test.go +++ b/log_cache_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( diff --git a/log_test.go b/log_test.go index 92a7da0c2..b1d7b3168 100644 --- a/log_test.go +++ b/log_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -25,15 +28,15 @@ func TestOldestLog(t *testing.T) { { Name: "simple case", Logs: []*Log{ - &Log{ + { Index: 1234, Term: 1, }, - &Log{ + { Index: 1235, Term: 1, }, - &Log{ + { Index: 1236, Term: 2, }, @@ -73,16 +76,16 @@ func TestEmitsLogStoreMetrics(t *testing.T) { s := NewInmemStore() logs := []*Log{ - &Log{ + { Index: 1234, Term: 1, AppendedAt: time.Now(), }, - &Log{ + { Index: 1235, Term: 1, }, - &Log{ + { Index: 1236, Term: 2, }, diff --git a/net_transport.go b/net_transport.go index 3ac845290..1bac17d66 100644 --- a/net_transport.go +++ b/net_transport.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -11,8 +14,9 @@ import ( "sync" "time" + "github.com/armon/go-metrics" "github.com/hashicorp/go-hclog" - "github.com/hashicorp/go-msgpack/codec" + "github.com/hashicorp/go-msgpack/v2/codec" ) const ( @@ -20,13 +24,34 @@ const ( rpcRequestVote rpcInstallSnapshot rpcTimeoutNow + rpcRequestPreVote // DefaultTimeoutScale is the default TimeoutScale in a NetworkTransport. DefaultTimeoutScale = 256 * 1024 // 256KB - // rpcMaxPipeline controls the maximum number of outstanding - // AppendEntries RPC calls. - rpcMaxPipeline = 128 + // DefaultMaxRPCsInFlight is the default value used for pipelining configuration + // if a zero value is passed. See https://github.com/hashicorp/raft/pull/541 + // for rationale. Note, if this is changed we should update the doc comments + // below for NetworkTransportConfig.MaxRPCsInFlight. + DefaultMaxRPCsInFlight = 2 + + // connReceiveBufferSize is the size of the buffer we will use for reading RPC requests into + // on followers + connReceiveBufferSize = 256 * 1024 // 256KB + + // connSendBufferSize is the size of the buffer we will use for sending RPC request data from + // the leader to followers. + connSendBufferSize = 256 * 1024 // 256KB + + // minInFlightForPipelining is a property of our current pipelining + // implementation and must not be changed unless we change the invariants of + // that implementation. Roughly speaking even with a zero-length in-flight + // buffer we still allow 2 requests to be in-flight before we block because we + // only block after sending and the receiving go-routine always unblocks the + // chan right after first send. This is a constant just to provide context + // rather than a magic number in a few places we have to check invariants to + // avoid panics etc. + minInFlightForPipelining = 2 ) var ( @@ -38,25 +63,21 @@ var ( ErrPipelineShutdown = errors.New("append pipeline closed") ) -/* - -NetworkTransport provides a network based transport that can be -used to communicate with Raft on remote machines. It requires -an underlying stream layer to provide a stream abstraction, which can -be simple TCP, TLS, etc. - -This transport is very simple and lightweight. Each RPC request is -framed by sending a byte that indicates the message type, followed -by the MsgPack encoded request. - -The response is an error string followed by the response object, -both are encoded using MsgPack. - -InstallSnapshot is special, in that after the RPC request we stream -the entire state. That socket is not re-used as the connection state -is not known if there is an error. - -*/ +// NetworkTransport provides a network based transport that can be +// used to communicate with Raft on remote machines. It requires +// an underlying stream layer to provide a stream abstraction, which can +// be simple TCP, TLS, etc. +// +// This transport is very simple and lightweight. Each RPC request is +// framed by sending a byte that indicates the message type, followed +// by the MsgPack encoded request. +// +// The response is an error string followed by the response object, +// both are encoded using MsgPack. +// +// InstallSnapshot is special, in that after the RPC request we stream +// the entire state. That socket is not re-used as the connection state +// is not known if there is an error. type NetworkTransport struct { connPool map[ServerAddress][]*netConn connPoolLock sync.Mutex @@ -68,8 +89,10 @@ type NetworkTransport struct { logger hclog.Logger - maxPool int + maxPool int + maxInFlight int + serverAddressLock sync.RWMutex serverAddressProvider ServerAddressProvider shutdown bool @@ -85,6 +108,8 @@ type NetworkTransport struct { timeout time.Duration TimeoutScale int + + msgpackUseNewTimeFormat bool } // NetworkTransportConfig encapsulates configuration for the network transport layer. @@ -100,9 +125,48 @@ type NetworkTransportConfig struct { // MaxPool controls how many connections we will pool MaxPool int + // MaxRPCsInFlight controls the pipelining "optimization" when replicating + // entries to followers. + // + // Setting this to 1 explicitly disables pipelining since no overlapping of + // request processing is allowed. If set to 1 the pipelining code path is + // skipped entirely and every request is entirely synchronous. + // + // If zero is set (or left as default), DefaultMaxRPCsInFlight is used which + // is currently 2. A value of 2 overlaps the preparation and sending of the + // next request while waiting for the previous response, but avoids additional + // queuing. + // + // Historically this was internally fixed at (effectively) 130 however + // performance testing has shown that in practice the pipelining optimization + // combines badly with batching and actually has a very large negative impact + // on commit latency when throughput is high, whilst having very little + // benefit on latency or throughput in any other case! See + // [#541](https://github.com/hashicorp/raft/pull/541) for more analysis of the + // performance impacts. + // + // Increasing this beyond 2 is likely to be beneficial only in very + // high-latency network conditions. HashiCorp doesn't recommend using our own + // products this way. + // + // To maintain the behavior from before version 1.4.1 exactly, set this to + // 130. The old internal constant was 128 but was used directly as a channel + // buffer size. Since we send before blocking on the channel and unblock the + // channel as soon as the receiver is done with the earliest outstanding + // request, even an unbuffered channel (buffer=0) allows one request to be + // sent while waiting for the previous one (i.e. 2 inflight). so the old + // buffer actually allowed 130 RPCs to be inflight at once. + MaxRPCsInFlight int + // Timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply // the timeout by (SnapshotSize / TimeoutScale). Timeout time.Duration + + // MsgpackUseNewTimeFormat when set to true, force the underlying msgpack + // codec to use the new format of time.Time when encoding (used in + // go-msgpack v1.1.5 by default). Decoding is not affected, as all + // go-msgpack v2.1.0+ decoders know how to decode both formats. + MsgpackUseNewTimeFormat bool } // ServerAddressProvider is a target address to which we invoke an RPC when establishing a connection @@ -154,16 +218,23 @@ func NewNetworkTransportWithConfig( Level: hclog.DefaultLevel, }) } + maxInFlight := config.MaxRPCsInFlight + if maxInFlight == 0 { + // Default zero value + maxInFlight = DefaultMaxRPCsInFlight + } trans := &NetworkTransport{ - connPool: make(map[ServerAddress][]*netConn), - consumeCh: make(chan RPC), - logger: config.Logger, - maxPool: config.MaxPool, - shutdownCh: make(chan struct{}), - stream: config.Stream, - timeout: config.Timeout, - TimeoutScale: DefaultTimeoutScale, - serverAddressProvider: config.ServerAddressProvider, + connPool: make(map[ServerAddress][]*netConn), + consumeCh: make(chan RPC), + logger: config.Logger, + maxPool: config.MaxPool, + maxInFlight: maxInFlight, + shutdownCh: make(chan struct{}), + stream: config.Stream, + timeout: config.Timeout, + TimeoutScale: DefaultTimeoutScale, + serverAddressProvider: config.ServerAddressProvider, + msgpackUseNewTimeFormat: config.MsgpackUseNewTimeFormat, } // Create the connection context and then start our listener. @@ -224,7 +295,7 @@ func (n *NetworkTransport) getStreamContext() context.Context { return n.streamCtx } -// SetHeartbeatHandler is used to setup a heartbeat handler +// SetHeartbeatHandler is used to set up a heartbeat handler // as a fast-pass. This is to avoid head-of-line blocking from // disk IO. func (n *NetworkTransport) SetHeartbeatHandler(cb func(rpc RPC)) { @@ -315,6 +386,8 @@ func (n *NetworkTransport) getConnFromAddressProvider(id ServerID, target Server } func (n *NetworkTransport) getProviderAddressOrFallback(id ServerID, target ServerAddress) ServerAddress { + n.serverAddressLock.RLock() + defer n.serverAddressLock.RUnlock() if n.serverAddressProvider != nil { serverAddressOverride, err := n.serverAddressProvider.ServerAddr(id) if err != nil { @@ -344,10 +417,14 @@ func (n *NetworkTransport) getConn(target ServerAddress) (*netConn, error) { target: target, conn: conn, dec: codec.NewDecoder(bufio.NewReader(conn), &codec.MsgpackHandle{}), - w: bufio.NewWriter(conn), + w: bufio.NewWriterSize(conn, connSendBufferSize), } - netConn.enc = codec.NewEncoder(netConn.w, &codec.MsgpackHandle{}) + netConn.enc = codec.NewEncoder(netConn.w, &codec.MsgpackHandle{ + BasicHandle: codec.BasicHandle{ + TimeNotBuiltin: !n.msgpackUseNewTimeFormat, + }, + }) // Done return netConn, nil @@ -359,7 +436,7 @@ func (n *NetworkTransport) returnConn(conn *netConn) { defer n.connPoolLock.Unlock() key := conn.target - conns, _ := n.connPool[key] + conns := n.connPool[key] if !n.IsShutdown() && len(conns) < n.maxPool { n.connPool[key] = append(conns, conn) @@ -371,6 +448,12 @@ func (n *NetworkTransport) returnConn(conn *netConn) { // AppendEntriesPipeline returns an interface that can be used to pipeline // AppendEntries requests. func (n *NetworkTransport) AppendEntriesPipeline(id ServerID, target ServerAddress) (AppendPipeline, error) { + if n.maxInFlight < minInFlightForPipelining { + // Pipelining is disabled since no more than one request can be outstanding + // at once. Skip the whole code path and use synchronous requests. + return nil, ErrPipelineReplicationNotSupported + } + // Get a connection conn, err := n.getConnFromAddressProvider(id, target) if err != nil { @@ -378,7 +461,7 @@ func (n *NetworkTransport) AppendEntriesPipeline(id ServerID, target ServerAddre } // Create the pipeline - return newNetPipeline(n, conn), nil + return newNetPipeline(n, conn, n.maxInFlight), nil } // AppendEntries implements the Transport interface. @@ -391,6 +474,11 @@ func (n *NetworkTransport) RequestVote(id ServerID, target ServerAddress, args * return n.genericRPC(id, target, rpcRequestVote, args, resp) } +// RequestPreVote implements the Transport interface. +func (n *NetworkTransport) RequestPreVote(id ServerID, target ServerAddress, args *RequestPreVoteRequest, resp *RequestPreVoteResponse) error { + return n.genericRPC(id, target, rpcRequestPreVote, args, resp) +} + // genericRPC handles a simple request/response RPC. func (n *NetworkTransport) genericRPC(id ServerID, target ServerAddress, rpcType uint8, args interface{}, resp interface{}) error { // Get a conn @@ -517,10 +605,14 @@ func (n *NetworkTransport) listen() { // closed. func (n *NetworkTransport) handleConn(connCtx context.Context, conn net.Conn) { defer conn.Close() - r := bufio.NewReader(conn) + r := bufio.NewReaderSize(conn, connReceiveBufferSize) w := bufio.NewWriter(conn) dec := codec.NewDecoder(r, &codec.MsgpackHandle{}) - enc := codec.NewEncoder(w, &codec.MsgpackHandle{}) + enc := codec.NewEncoder(w, &codec.MsgpackHandle{ + BasicHandle: codec.BasicHandle{ + TimeNotBuiltin: !n.msgpackUseNewTimeFormat, + }, + }) for { select { @@ -545,12 +637,19 @@ func (n *NetworkTransport) handleConn(connCtx context.Context, conn net.Conn) { // handleCommand is used to decode and dispatch a single command. func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, enc *codec.Encoder) error { + getTypeStart := time.Now() + // Get the rpc type rpcType, err := r.ReadByte() if err != nil { return err } + // measuring the time to get the first byte separately because the heartbeat conn will hang out here + // for a good while waiting for a heartbeat whereas the append entries/rpc conn should not. + metrics.MeasureSince([]string{"raft", "net", "getRPCType"}, getTypeStart) + decodeStart := time.Now() + // Create the RPC object respCh := make(chan RPCResponse, 1) rpc := RPC{ @@ -559,6 +658,7 @@ func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, en // Decode the command isHeartbeat := false + var labels []metrics.Label switch rpcType { case rpcAppendEntries: var req AppendEntriesRequest @@ -567,20 +667,37 @@ func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, en } rpc.Command = &req + leaderAddr := req.RPCHeader.Addr + if len(leaderAddr) == 0 { + leaderAddr = req.Leader + } + // Check if this is a heartbeat - if req.Term != 0 && req.Leader != nil && + if req.Term != 0 && leaderAddr != nil && req.PrevLogEntry == 0 && req.PrevLogTerm == 0 && len(req.Entries) == 0 && req.LeaderCommitIndex == 0 { isHeartbeat = true } + if isHeartbeat { + labels = []metrics.Label{{Name: "rpcType", Value: "Heartbeat"}} + } else { + labels = []metrics.Label{{Name: "rpcType", Value: "AppendEntries"}} + } case rpcRequestVote: var req RequestVoteRequest if err := dec.Decode(&req); err != nil { return err } rpc.Command = &req - + labels = []metrics.Label{{Name: "rpcType", Value: "RequestVote"}} + case rpcRequestPreVote: + var req RequestPreVoteRequest + if err := dec.Decode(&req); err != nil { + return err + } + rpc.Command = &req + labels = []metrics.Label{{Name: "rpcType", Value: "RequestPreVote"}} case rpcInstallSnapshot: var req InstallSnapshotRequest if err := dec.Decode(&req); err != nil { @@ -588,18 +705,22 @@ func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, en } rpc.Command = &req rpc.Reader = io.LimitReader(r, req.Size) - + labels = []metrics.Label{{Name: "rpcType", Value: "InstallSnapshot"}} case rpcTimeoutNow: var req TimeoutNowRequest if err := dec.Decode(&req); err != nil { return err } rpc.Command = &req - + labels = []metrics.Label{{Name: "rpcType", Value: "TimeoutNow"}} default: return fmt.Errorf("unknown rpc type %d", rpcType) } + metrics.MeasureSinceWithLabels([]string{"raft", "net", "rpcDecode"}, decodeStart, labels) + + processStart := time.Now() + // Check for heartbeat fast-path if isHeartbeat { n.heartbeatFnLock.Lock() @@ -620,8 +741,12 @@ func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, en // Wait for response RESP: + // we will differentiate the heartbeat fast path from normal RPCs with labels + metrics.MeasureSinceWithLabels([]string{"raft", "net", "rpcEnqueue"}, processStart, labels) + respWaitStart := time.Now() select { case resp := <-respCh: + defer metrics.MeasureSinceWithLabels([]string{"raft", "net", "rpcRespond"}, respWaitStart, labels) // Send the error first respErr := "" if resp.Error != nil { @@ -686,14 +811,25 @@ func sendRPC(conn *netConn, rpcType uint8, args interface{}) error { return nil } -// newNetPipeline is used to construct a netPipeline from a given -// transport and connection. -func newNetPipeline(trans *NetworkTransport, conn *netConn) *netPipeline { +// newNetPipeline is used to construct a netPipeline from a given transport and +// connection. It is a bug to ever call this with maxInFlight less than 2 +// (minInFlightForPipelining) and will cause a panic. +func newNetPipeline(trans *NetworkTransport, conn *netConn, maxInFlight int) *netPipeline { + if maxInFlight < minInFlightForPipelining { + // Shouldn't happen (tm) since we validate this in the one call site and + // skip pipelining if it's lower. + panic("pipelining makes no sense if maxInFlight < 2") + } n := &netPipeline{ - conn: conn, - trans: trans, - doneCh: make(chan AppendFuture, rpcMaxPipeline), - inprogressCh: make(chan *appendFuture, rpcMaxPipeline), + conn: conn, + trans: trans, + // The buffer size is 2 less than the configured max because we send before + // waiting on the channel and the decode routine unblocks the channel as + // soon as it's waiting on the first request. So a zero-buffered channel + // still allows 1 request to be sent even while decode is still waiting for + // a response from the previous one. i.e. two are inflight at the same time. + inprogressCh: make(chan *appendFuture, maxInFlight-2), + doneCh: make(chan AppendFuture, maxInFlight-2), shutdownCh: make(chan struct{}), } go n.decodeResponses() @@ -759,7 +895,7 @@ func (n *netPipeline) Consumer() <-chan AppendFuture { return n.doneCh } -// Closed is used to shutdown the pipeline connection. +// Close is used to shut down the pipeline connection. func (n *netPipeline) Close() error { n.shutdownLock.Lock() defer n.shutdownLock.Unlock() diff --git a/net_transport_test.go b/net_transport_test.go index d42d469db..8c824df77 100644 --- a/net_transport_test.go +++ b/net_transport_test.go @@ -1,7 +1,11 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( "bytes" + "context" "fmt" "net" "reflect" @@ -35,7 +39,6 @@ func TestNetworkTransport_CloseStreams(t *testing.T) { // Make the RPC request args := AppendEntriesRequest{ Term: 10, - Leader: []byte("cartman"), PrevLogEntry: 100, PrevLogTerm: 4, Entries: []*Log{ @@ -46,13 +49,20 @@ func TestNetworkTransport_CloseStreams(t *testing.T) { }, }, LeaderCommitIndex: 90, + RPCHeader: RPCHeader{Addr: []byte("cartman")}, } + resp := AppendEntriesResponse{ Term: 4, LastLog: 90, Success: true, } + // errCh is used to report errors from any of the goroutines + // created in this test. + // It is buffered as to not block. + errCh := make(chan error, 100) + // Listen for a request go func() { for { @@ -61,7 +71,7 @@ func TestNetworkTransport_CloseStreams(t *testing.T) { // Verify the command req := rpc.Command.(*AppendEntriesRequest) if !reflect.DeepEqual(req, &args) { - t.Errorf("command mismatch: %#v %#v", *req, args) + errCh <- fmt.Errorf("command mismatch: %#v %#v", *req, args) return } rpc.Respond(&resp, nil) @@ -78,33 +88,38 @@ func TestNetworkTransport_CloseStreams(t *testing.T) { t.Fatalf("err: %v", err) } defer trans2.Close() - var i int - for i = 0; i < 2; i++ { + + for i := 0; i < 2; i++ { // Create wait group wg := &sync.WaitGroup{} - wg.Add(5) - - appendFunc := func() { - defer wg.Done() - var out AppendEntriesResponse - if err := trans2.AppendEntries("id1", trans1.LocalAddr(), &args, &out); err != nil { - t.Fatalf("err: %v", err) - } - - // Verify the response - if !reflect.DeepEqual(resp, out) { - t.Fatalf("command mismatch: %#v %#v", resp, out) - } - } // Try to do parallel appends, should stress the conn pool for i = 0; i < 5; i++ { - go appendFunc() + wg.Add(1) + go func() { + defer wg.Done() + var out AppendEntriesResponse + if err := trans2.AppendEntries("id1", trans1.LocalAddr(), &args, &out); err != nil { + errCh <- err + return + } + + // Verify the response + if !reflect.DeepEqual(resp, out) { + errCh <- fmt.Errorf("command mismatch: %#v %#v", resp, out) + return + } + }() } // Wait for the routines to finish wg.Wait() + // Check if we received any errors from the above goroutines. + if len(errCh) > 0 { + t.Fatal(<-errCh) + } + // Check the conn pool size addr := trans1.LocalAddr() if len(trans2.connPool[addr]) != 3 { @@ -138,9 +153,11 @@ func TestNetworkTransport_Heartbeat_FastPath(t *testing.T) { // Make the RPC request args := AppendEntriesRequest{ - Term: 10, - Leader: []byte("cartman"), + Term: 10, + RPCHeader: RPCHeader{ProtocolVersion: ProtocolVersionMax, Addr: []byte("cartman")}, + Leader: []byte("cartman"), } + resp := AppendEntriesResponse{ Term: 4, LastLog: 90, @@ -183,8 +200,32 @@ func TestNetworkTransport_Heartbeat_FastPath(t *testing.T) { } } -func TestNetworkTransport_AppendEntries(t *testing.T) { +func makeAppendRPC() AppendEntriesRequest { + return AppendEntriesRequest{ + Term: 10, + PrevLogEntry: 100, + PrevLogTerm: 4, + Entries: []*Log{ + { + Index: 101, + Term: 4, + Type: LogNoop, + }, + }, + LeaderCommitIndex: 90, + RPCHeader: RPCHeader{Addr: []byte("cartman")}, + } +} + +func makeAppendRPCResponse() AppendEntriesResponse { + return AppendEntriesResponse{ + Term: 4, + LastLog: 90, + Success: true, + } +} +func TestNetworkTransport_AppendEntries(t *testing.T) { for _, useAddrProvider := range []bool{true, false} { // Transport 1 is consumer trans1, err := makeTransport(t, useAddrProvider, "localhost:0") @@ -195,25 +236,8 @@ func TestNetworkTransport_AppendEntries(t *testing.T) { rpcCh := trans1.Consumer() // Make the RPC request - args := AppendEntriesRequest{ - Term: 10, - Leader: []byte("cartman"), - PrevLogEntry: 100, - PrevLogTerm: 4, - Entries: []*Log{ - { - Index: 101, - Term: 4, - Type: LogNoop, - }, - }, - LeaderCommitIndex: 90, - } - resp := AppendEntriesResponse{ - Term: 4, - LastLog: 90, - Success: true, - } + args := makeAppendRPC() + resp := makeAppendRPCResponse() // Listen for a request go func() { @@ -254,7 +278,6 @@ func TestNetworkTransport_AppendEntries(t *testing.T) { } func TestNetworkTransport_AppendEntriesPipeline(t *testing.T) { - for _, useAddrProvider := range []bool{true, false} { // Transport 1 is consumer trans1, err := makeTransport(t, useAddrProvider, "localhost:0") @@ -265,25 +288,8 @@ func TestNetworkTransport_AppendEntriesPipeline(t *testing.T) { rpcCh := trans1.Consumer() // Make the RPC request - args := AppendEntriesRequest{ - Term: 10, - Leader: []byte("cartman"), - PrevLogEntry: 100, - PrevLogTerm: 4, - Entries: []*Log{ - { - Index: 101, - Term: 4, - Type: LogNoop, - }, - }, - LeaderCommitIndex: 90, - } - resp := AppendEntriesResponse{ - Term: 4, - LastLog: 90, - Success: true, - } + args := makeAppendRPC() + resp := makeAppendRPCResponse() // Listen for a request go func() { @@ -350,25 +356,8 @@ func TestNetworkTransport_AppendEntriesPipeline_CloseStreams(t *testing.T) { rpcCh := trans1.Consumer() // Make the RPC request - args := AppendEntriesRequest{ - Term: 10, - Leader: []byte("cartman"), - PrevLogEntry: 100, - PrevLogTerm: 4, - Entries: []*Log{ - { - Index: 101, - Term: 4, - Type: LogNoop, - }, - }, - LeaderCommitIndex: 90, - } - resp := AppendEntriesResponse{ - Term: 4, - LastLog: 90, - Success: true, - } + args := makeAppendRPC() + resp := makeAppendRPCResponse() shutdownCh := make(chan struct{}) defer close(shutdownCh) @@ -448,8 +437,106 @@ func TestNetworkTransport_AppendEntriesPipeline_CloseStreams(t *testing.T) { } } -func TestNetworkTransport_RequestVote(t *testing.T) { +func TestNetworkTransport_AppendEntriesPipeline_MaxRPCsInFlight(t *testing.T) { + // Test the important cases 0 (default to 2), 1 (disabled), 2 and "some" + for _, max := range []int{0, 1, 2, 10} { + t.Run(fmt.Sprintf("max=%d", max), func(t *testing.T) { + config := &NetworkTransportConfig{ + MaxPool: 2, + MaxRPCsInFlight: max, + Timeout: time.Second, + // Don't use test logger as the transport has multiple goroutines and + // causes panics. + ServerAddressProvider: &testAddrProvider{"localhost:0"}, + } + + // Transport 1 is consumer + trans1, err := NewTCPTransportWithConfig("localhost:0", nil, config) + require.NoError(t, err) + defer trans1.Close() + + // Make the RPC request + args := makeAppendRPC() + resp := makeAppendRPCResponse() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + // Transport 2 makes outbound request + config.ServerAddressProvider = &testAddrProvider{string(trans1.LocalAddr())} + trans2, err := NewTCPTransportWithConfig("localhost:0", nil, config) + require.NoError(t, err) + defer trans2.Close() + + // Kill the transports on the timeout to unblock. That means things that + // shouldn't have blocked did block. + go func() { + <-ctx.Done() + trans2.Close() + trans1.Close() + }() + + // Attempt to pipeline + pipeline, err := trans2.AppendEntriesPipeline("id1", trans1.LocalAddr()) + if max == 1 { + // Max == 1 implies no pipelining + require.EqualError(t, err, ErrPipelineReplicationNotSupported.Error()) + return + } + require.NoError(t, err) + + expectedMax := max + if max == 0 { + // Should have defaulted to 2 + expectedMax = 2 + } + for i := 0; i < expectedMax-1; i++ { + // We should be able to send `max - 1` rpcs before `AppendEntries` + // blocks. It blocks on the `max` one because it it sends before pushing + // to the chan. It will block forever when it does because nothing is + // responding yet. + out := new(AppendEntriesResponse) + _, err := pipeline.AppendEntries(&args, out) + require.NoError(t, err) + } + + // Verify the next send blocks without blocking test forever + errCh := make(chan error, 1) + go func() { + out := new(AppendEntriesResponse) + _, err := pipeline.AppendEntries(&args, out) + errCh <- err + }() + + select { + case err := <-errCh: + require.NoError(t, err) + t.Fatalf("AppendEntries didn't block with %d in flight", max) + case <-time.After(50 * time.Millisecond): + // OK it's probably blocked or we got _really_ unlucky with scheduling! + } + + // Verify that once we receive/respond another one can be sent. + rpc := <-trans1.Consumer() + rpc.Respond(resp, nil) + + // We also need to consume the response from the pipeline in case chan is + // unbuffered (inflight is 2 or 1) + <-pipeline.Consumer() + + // The last append should unblock once the response is received. + select { + case <-errCh: + // OK + case <-time.After(50 * time.Millisecond): + t.Fatalf("last append didn't unblock") + } + }) + } +} + +func TestNetworkTransport_RequestVote(t *testing.T) { for _, useAddrProvider := range []bool{true, false} { // Transport 1 is consumer trans1, err := makeTransport(t, useAddrProvider, "localhost:0") @@ -462,10 +549,11 @@ func TestNetworkTransport_RequestVote(t *testing.T) { // Make the RPC request args := RequestVoteRequest{ Term: 20, - Candidate: []byte("butters"), LastLogIndex: 100, LastLogTerm: 19, + RPCHeader: RPCHeader{Addr: []byte("butters")}, } + resp := RequestVoteResponse{ Term: 100, Granted: false, @@ -510,7 +598,6 @@ func TestNetworkTransport_RequestVote(t *testing.T) { } func TestNetworkTransport_InstallSnapshot(t *testing.T) { - for _, useAddrProvider := range []bool{true, false} { // Transport 1 is consumer trans1, err := makeTransport(t, useAddrProvider, "localhost:0") @@ -523,12 +610,13 @@ func TestNetworkTransport_InstallSnapshot(t *testing.T) { // Make the RPC request args := InstallSnapshotRequest{ Term: 10, - Leader: []byte("kyle"), LastLogIndex: 100, LastLogTerm: 9, Peers: []byte("blah blah"), Size: 10, + RPCHeader: RPCHeader{Addr: []byte("kyle")}, } + resp := InstallSnapshotResponse{ Term: 10, Success: true, @@ -550,7 +638,7 @@ func TestNetworkTransport_InstallSnapshot(t *testing.T) { rpc.Reader.Read(buf) // Compare - if bytes.Compare(buf, []byte("0123456789")) != 0 { + if !bytes.Equal(buf, []byte("0123456789")) { t.Errorf("bad buf %v", buf) return } @@ -631,7 +719,6 @@ func TestNetworkTransport_PooledConn(t *testing.T) { // Make the RPC request args := AppendEntriesRequest{ Term: 10, - Leader: []byte("cartman"), PrevLogEntry: 100, PrevLogTerm: 4, Entries: []*Log{ @@ -642,13 +729,20 @@ func TestNetworkTransport_PooledConn(t *testing.T) { }, }, LeaderCommitIndex: 90, + RPCHeader: RPCHeader{Addr: []byte("cartman")}, } + resp := AppendEntriesResponse{ Term: 4, LastLog: 90, Success: true, } + // errCh is used to report errors from any of the goroutines + // created in this test. + // It is buffered as to not block. + errCh := make(chan error, 100) + // Listen for a request go func() { for { @@ -657,7 +751,7 @@ func TestNetworkTransport_PooledConn(t *testing.T) { // Verify the command req := rpc.Command.(*AppendEntriesRequest) if !reflect.DeepEqual(req, &args) { - t.Errorf("command mismatch: %#v %#v", *req, args) + errCh <- fmt.Errorf("command mismatch: %#v %#v", *req, args) return } rpc.Respond(&resp, nil) @@ -677,29 +771,35 @@ func TestNetworkTransport_PooledConn(t *testing.T) { // Create wait group wg := &sync.WaitGroup{} - wg.Add(5) - - appendFunc := func() { - defer wg.Done() - var out AppendEntriesResponse - if err := trans2.AppendEntries("id1", trans1.LocalAddr(), &args, &out); err != nil { - t.Fatalf("err: %v", err) - } - - // Verify the response - if !reflect.DeepEqual(resp, out) { - t.Fatalf("command mismatch: %#v %#v", resp, out) - } - } // Try to do parallel appends, should stress the conn pool for i := 0; i < 5; i++ { - go appendFunc() + wg.Add(1) + + go func() { + defer wg.Done() + var out AppendEntriesResponse + if err := trans2.AppendEntries("id1", trans1.LocalAddr(), &args, &out); err != nil { + errCh <- err + return + } + + // Verify the response + if !reflect.DeepEqual(resp, out) { + errCh <- fmt.Errorf("command mismatch: %#v %#v", resp, out) + return + } + }() } // Wait for the routines to finish wg.Wait() + // Check if we received any errors from the above goroutines. + if len(errCh) > 0 { + t.Fatal(<-errCh) + } + // Check the conn pool size addr := trans1.LocalAddr() if len(trans2.connPool[addr]) != 3 { @@ -708,11 +808,18 @@ func TestNetworkTransport_PooledConn(t *testing.T) { } func makeTransport(t *testing.T, useAddrProvider bool, addressOverride string) (*NetworkTransport, error) { + config := &NetworkTransportConfig{ + MaxPool: 2, + // Setting this because older tests for pipelining were written when this + // was a constant and block forever if it's not large enough. + MaxRPCsInFlight: 130, + Timeout: time.Second, + Logger: newTestLogger(t), + } if useAddrProvider { - config := &NetworkTransportConfig{MaxPool: 2, Timeout: time.Second, Logger: newTestLogger(t), ServerAddressProvider: &testAddrProvider{addressOverride}} - return NewTCPTransportWithConfig("localhost:0", nil, config) + config.ServerAddressProvider = &testAddrProvider{addressOverride} } - return NewTCPTransportWithLogger("localhost:0", nil, 2, time.Second, newTestLogger(t)) + return NewTCPTransportWithConfig("localhost:0", nil, config) } type testCountingWriter struct { @@ -754,7 +861,6 @@ func (sl testCountingStreamLayer) Dial(address ServerAddress, timeout time.Durat // do not result in a tight loop and spam the log. We verify this here by counting the number // of calls against Accept() and the logger func TestNetworkTransport_ListenBackoff(t *testing.T) { - // testTime is the amount of time we will allow NetworkTransport#listen() to run // This needs to be long enough that to verify that maxDelay is in force, // but not so long as to be obnoxious when running the test suite. diff --git a/observer.go b/observer.go index 29f2d5802..400a381ed 100644 --- a/observer.go +++ b/observer.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -10,7 +13,7 @@ type Observation struct { // Raft holds the Raft instance generating the observation. Raft *Raft // Data holds observation-specific data. Possible types are - // *RequestVoteRequest + // RequestVoteRequest // RaftState // PeerObservation // LeaderObservation @@ -19,7 +22,10 @@ type Observation struct { // LeaderObservation is used for the data when leadership changes. type LeaderObservation struct { - Leader ServerAddress + // DEPRECATED The LeaderAddr field should now be used + Leader ServerAddress + LeaderAddr ServerAddress + LeaderID ServerID } // PeerObservation is sent to observers when peers change. diff --git a/peersjson.go b/peersjson.go index 38ca2a8b8..d81d5ec4c 100644 --- a/peersjson.go +++ b/peersjson.go @@ -1,9 +1,12 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( "bytes" "encoding/json" - "io/ioutil" + "os" ) // ReadPeersJSON consumes a legacy peers.json file in the format of the old JSON @@ -14,7 +17,7 @@ import ( // support for these, nor non-voter suffrage types. func ReadPeersJSON(path string) (Configuration, error) { // Read in the file. - buf, err := ioutil.ReadFile(path) + buf, err := os.ReadFile(path) if err != nil { return Configuration{}, err } @@ -63,7 +66,7 @@ type configEntry struct { // versions that use server IDs. func ReadConfigJSON(path string) (Configuration, error) { // Read in the file. - buf, err := ioutil.ReadFile(path) + buf, err := os.ReadFile(path) if err != nil { return Configuration{}, err } diff --git a/peersjson_test.go b/peersjson_test.go index a0504af7f..1e98d932d 100644 --- a/peersjson_test.go +++ b/peersjson_test.go @@ -1,7 +1,9 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( - "io/ioutil" "os" "path/filepath" "reflect" @@ -12,14 +14,14 @@ import ( func TestPeersJSON_BadConfiguration(t *testing.T) { var err error var base string - base, err = ioutil.TempDir("", "") + base, err = os.MkdirTemp("", "") if err != nil { t.Fatalf("err: %v", err) } defer os.RemoveAll(base) peers := filepath.Join(base, "peers.json") - if err = ioutil.WriteFile(peers, []byte("null"), 0666); err != nil { + if err = os.WriteFile(peers, []byte("null"), 0o666); err != nil { t.Fatalf("err: %v", err) } @@ -32,7 +34,7 @@ func TestPeersJSON_BadConfiguration(t *testing.T) { func TestPeersJSON_ReadPeersJSON(t *testing.T) { var err error var base string - base, err = ioutil.TempDir("", "") + base, err = os.MkdirTemp("", "") if err != nil { t.Fatalf("err: %v", err) } @@ -44,7 +46,7 @@ func TestPeersJSON_ReadPeersJSON(t *testing.T) { "127.0.0.3:123"] `) peers := filepath.Join(base, "peers.json") - if err = ioutil.WriteFile(peers, content, 0666); err != nil { + if err = os.WriteFile(peers, content, 0o666); err != nil { t.Fatalf("err: %v", err) } var configuration Configuration @@ -80,7 +82,7 @@ func TestPeersJSON_ReadPeersJSON(t *testing.T) { func TestPeersJSON_ReadConfigJSON(t *testing.T) { var err error var base string - base, err = ioutil.TempDir("", "") + base, err = os.MkdirTemp("", "") if err != nil { t.Fatalf("err: %v", err) } @@ -105,7 +107,7 @@ func TestPeersJSON_ReadConfigJSON(t *testing.T) { ] `) peers := filepath.Join(base, "peers.json") - if err = ioutil.WriteFile(peers, content, 0666); err != nil { + if err = os.WriteFile(peers, content, 0o666); err != nil { t.Fatalf("err: %v", err) } diff --git a/progress.go b/progress.go new file mode 100644 index 000000000..6b4df53f5 --- /dev/null +++ b/progress.go @@ -0,0 +1,149 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package raft + +import ( + "context" + "io" + "sync" + "time" + + hclog "github.com/hashicorp/go-hclog" +) + +const ( + snapshotRestoreMonitorInterval = 10 * time.Second +) + +type snapshotRestoreMonitor struct { + logger hclog.Logger + cr CountingReader + size int64 + networkTransfer bool + + once sync.Once + cancel func() + doneCh chan struct{} +} + +func startSnapshotRestoreMonitor( + logger hclog.Logger, + cr CountingReader, + size int64, + networkTransfer bool, +) *snapshotRestoreMonitor { + ctx, cancel := context.WithCancel(context.Background()) + + m := &snapshotRestoreMonitor{ + logger: logger, + cr: cr, + size: size, + networkTransfer: networkTransfer, + cancel: cancel, + doneCh: make(chan struct{}), + } + go m.run(ctx) + return m +} + +func (m *snapshotRestoreMonitor) run(ctx context.Context) { + defer close(m.doneCh) + + ticker := time.NewTicker(snapshotRestoreMonitorInterval) + defer ticker.Stop() + + ranOnce := false + for { + select { + case <-ctx.Done(): + if !ranOnce { + m.runOnce() + } + return + case <-ticker.C: + m.runOnce() + ranOnce = true + } + } +} + +func (m *snapshotRestoreMonitor) runOnce() { + readBytes := m.cr.Count() + pct := float64(100*readBytes) / float64(m.size) + + message := "snapshot restore progress" + if m.networkTransfer { + message = "snapshot network transfer progress" + } + + m.logger.Info(message, + "read-bytes", readBytes, + "percent-complete", hclog.Fmt("%0.2f%%", pct), + ) +} + +func (m *snapshotRestoreMonitor) StopAndWait() { + m.once.Do(func() { + m.cancel() + <-m.doneCh + }) +} + +type CountingReader interface { + io.Reader + Count() int64 +} + +type countingReader struct { + reader io.Reader + + mu sync.Mutex + bytes int64 +} + +func (r *countingReader) Read(p []byte) (n int, err error) { + n, err = r.reader.Read(p) + r.mu.Lock() + r.bytes += int64(n) + r.mu.Unlock() + return n, err +} + +func (r *countingReader) Count() int64 { + r.mu.Lock() + defer r.mu.Unlock() + return r.bytes +} + +func newCountingReader(r io.Reader) *countingReader { + return &countingReader{reader: r} +} + +type countingReadCloser struct { + *countingReader + readCloser io.ReadCloser +} + +func newCountingReadCloser(rc io.ReadCloser) *countingReadCloser { + return &countingReadCloser{ + countingReader: newCountingReader(rc), + readCloser: rc, + } +} + +func (c countingReadCloser) Close() error { + return c.readCloser.Close() +} + +func (c countingReadCloser) WrappedReadCloser() io.ReadCloser { + return c.readCloser +} + +// ReadCloserWrapper allows access to an underlying ReadCloser from a wrapper. +type ReadCloserWrapper interface { + io.ReadCloser + WrappedReadCloser() io.ReadCloser +} + +var _ ReadCloserWrapper = &countingReadCloser{} diff --git a/raft-compat/go.mod b/raft-compat/go.mod new file mode 100644 index 000000000..5d86c2a96 --- /dev/null +++ b/raft-compat/go.mod @@ -0,0 +1,30 @@ +module github.com/hashicorp/raft/compat + +go 1.20 + +require github.com/stretchr/testify v1.8.4 + +require ( + github.com/armon/go-metrics v0.4.1 // indirect + github.com/fatih/color v1.13.0 // indirect + github.com/hashicorp/go-hclog v1.6.2 // indirect + github.com/hashicorp/go-immutable-radix v1.0.0 // indirect + github.com/hashicorp/go-msgpack v0.5.5 // indirect + github.com/hashicorp/go-msgpack/v2 v2.1.1 // indirect + github.com/hashicorp/golang-lru v0.5.0 // indirect + github.com/mattn/go-colorable v0.1.12 // indirect + github.com/mattn/go-isatty v0.0.14 // indirect + golang.org/x/sys v0.13.0 // indirect +) + +replace github.com/hashicorp/raft-previous-version => ./raft-previous-version + +replace github.com/hashicorp/raft => ../ + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/hashicorp/raft v1.6.1 + github.com/hashicorp/raft-previous-version v1.2.0 + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/raft-compat/go.sum b/raft-compat/go.sum new file mode 100644 index 000000000..9608c05c1 --- /dev/null +++ b/raft-compat/go.sum @@ -0,0 +1,134 @@ +github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= +github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= +github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= +github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= +github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= +github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= +github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-hclog v1.5.0 h1:bI2ocEMgcVlz55Oj1xZNBsVi900c7II+fWDyV9o+13c= +github.com/hashicorp/go-hclog v1.5.0/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= +github.com/hashicorp/go-hclog v1.6.2 h1:NOtoftovWkDheyUM/8JW3QMiXyxJK3uHRK7wV04nD2I= +github.com/hashicorp/go-hclog v1.6.2/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= +github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0= +github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-msgpack v0.5.5 h1:i9R9JSrqIz0QVLz3sz+i3YJdT7TTSLcfLLzJi9aZTuI= +github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= +github.com/hashicorp/go-msgpack/v2 v2.1.1 h1:xQEY9yB2wnHitoSzk/B9UjXWRQ67QKu5AOm8aFp8N3I= +github.com/hashicorp/go-msgpack/v2 v2.1.1/go.mod h1:upybraOAblm4S7rx0+jeNy+CWWhzywQsSRV5033mMu4= +github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= +github.com/hashicorp/go-uuid v1.0.0 h1:RS8zrF7PhGwyNPOtxSClXXj9HA8feRnJzgnI1RJCSnM= +github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40= +github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= +github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= +github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= +github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= +github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= +github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= +github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= +golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6 h1:nonptSpoQ4vQjyraW20DXPAglgQfVnM9ZC6MmNLMR60= +golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/raft-compat/prevote_test.go b/raft-compat/prevote_test.go new file mode 100644 index 000000000..e8d41d8a1 --- /dev/null +++ b/raft-compat/prevote_test.go @@ -0,0 +1,293 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package raft_compat + +import ( + "github.com/hashicorp/raft" + raftprevious "github.com/hashicorp/raft-previous-version" + "github.com/hashicorp/raft/compat/testcluster" + "github.com/hashicorp/raft/compat/utils" + "github.com/stretchr/testify/require" + "testing" + "time" +) + +func TestRaft_PreVote_BootStrap_PreVote(t *testing.T) { + leaveTransfer := func(t *testing.T, cluster testcluster.RaftCluster, id string) { + if cluster.GetLeader().GetLocalID() == id { + transfer := cluster.Raft(id).(*raftprevious.Raft).LeadershipTransfer() + utils.WaitFuture(t, transfer) + } + f := cluster.Raft(id).(*raftprevious.Raft).Shutdown() + utils.WaitFuture(t, f) + } + leaveNoTransfer := func(t *testing.T, cluster testcluster.RaftCluster, id string) { + fr := cluster.GetLeader().GetRaft().(*raftprevious.Raft).RemoveServer(raftprevious.ServerID(id), 0, 0) + utils.WaitFuture(t, fr) + f := cluster.Raft(id).(*raftprevious.Raft).Shutdown() + utils.WaitFuture(t, f) + } + tcs := []struct { + name string + numNodes int + preVote bool + Leave func(t *testing.T, cluster testcluster.RaftCluster, id string) + }{ + {"no prevote -> prevote (leave transfer)", 3, true, leaveTransfer}, + {"no prevote -> prevote (leave no transfer)", 3, true, leaveNoTransfer}, + {"no prevote -> prevote (leave transfer) 5", 5, true, leaveTransfer}, + {"no prevote -> prevote (leave no transfer) 5", 5, true, leaveNoTransfer}, + {"no prevote -> no prevote (leave transfer)", 3, false, leaveTransfer}, + {"no prevote -> no prevote (leave no transfer)", 3, false, leaveNoTransfer}, + {"no prevote -> no prevote (leave transfer) 5", 5, false, leaveTransfer}, + {"no prevote -> no prevote (leave no transfer) 5", 5, false, leaveNoTransfer}, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + + cluster := testcluster.NewPreviousRaftCluster(t, tc.numNodes, "raftNode") + configuration := raftprevious.Configuration{} + + for i := 0; i < tc.numNodes; i++ { + var err error + require.NoError(t, err) + configuration.Servers = append(configuration.Servers, raftprevious.Server{ + ID: raftprevious.ServerID(cluster.ID(i)), + Address: raftprevious.ServerAddress(cluster.Addr(i)), + }) + } + raft0 := cluster.Raft(cluster.ID(0)).(*raftprevious.Raft) + boot := raft0.BootstrapCluster(configuration) + if err := boot.Error(); err != nil { + t.Fatalf("bootstrap err: %v", err) + } + utils.WaitForNewLeader(t, "", cluster) + getLeader := cluster.GetLeader() + require.NotEmpty(t, getLeader) + a, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.NotEmpty(t, a) + future := getLeader.GetRaft().(*raftprevious.Raft).Apply([]byte("test"), time.Second) + utils.WaitFuture(t, future) + + leader, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.NotEmpty(t, leader) + // Upgrade all the followers + for i := 0; i < tc.numNodes; i++ { + if getLeader.GetLocalID() == cluster.ID(i) { + continue + } + + // Check Leader haven't changed + a, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.Equal(t, a, leader) + tc.Leave(t, cluster, cluster.ID(i)) + + // Keep the store, to be passed to the upgraded node. + store := cluster.Store(cluster.ID(i)) + id := cluster.ID(i) + + //Delete the node from the cluster + cluster.DeleteNode(cluster.ID(i)) + + //Create an upgraded node with the store + rUIT := testcluster.InitUITWithStore(t, id, store.(*raftprevious.InmemStore), func(config *raft.Config) { + config.PreVoteDisabled = !tc.preVote + }) + future := getLeader.GetRaft().(*raftprevious.Raft).AddVoter(raftprevious.ServerID(rUIT.GetLocalID()), raftprevious.ServerAddress(rUIT.GetLocalAddr()), 0, 0) + utils.WaitFuture(t, future) + //Add the new node to the cluster + cluster.AddNode(rUIT) + + // Wait enough to have the configuration propagated. + time.Sleep(time.Second) + + //Apply some logs + future = getLeader.GetRaft().(*raftprevious.Raft).Apply([]byte("test2"), time.Second) + require.NoError(t, future.Error()) + + // Check Leader haven't changed as we haven't replaced the leader yet + a, _ = getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.Equal(t, a, leader) + } + // keep a reference to the store + store := cluster.Store(getLeader.GetLocalID()) + id := getLeader.GetLocalID() + + //Remove and shutdown the leader node + tc.Leave(t, cluster, getLeader.GetLocalID()) + + // Delete the old leader node from the cluster + cluster.DeleteNode(getLeader.GetLocalID()) + oldLeaderID := getLeader.GetLocalID() + + // Wait for a new leader to be elected + utils.WaitForNewLeader(t, oldLeaderID, cluster) + getLeader = cluster.GetLeader() + require.NotEmpty(t, getLeader) + + // Create a new node to replace the deleted one + rUIT := testcluster.InitUITWithStore(t, id, store.(*raftprevious.InmemStore), func(config *raft.Config) { config.PreVoteDisabled = false }) + fa := getLeader.GetRaft().(*raft.Raft).AddVoter(raft.ServerID(rUIT.GetLocalID()), raft.ServerAddress(rUIT.GetLocalAddr()), 0, 0) + utils.WaitFuture(t, fa) + + // Wait for new leader, (this happens because of not having prevote) + utils.WaitForNewLeader(t, "", cluster) + newLeaderID := rUIT.GetLeaderID() + require.NotEmpty(t, newLeaderID) + + require.NotEqual(t, newLeaderID, leader) + + newLeader := cluster.GetLeader() + //Apply some logs + future = newLeader.GetRaft().(*raft.Raft).Apply([]byte("test2"), time.Second) + require.NoError(t, future.Error()) + + // Check Leader haven't changed as we haven't replaced the leader yet + newAddr, _ := newLeader.GetRaft().(*raft.Raft).LeaderWithID() + require.Equal(t, string(newAddr), newLeader.GetLocalAddr()) + + require.Equal(t, tc.numNodes, rUIT.NumLogs()) + }) + } + +} + +func TestRaft_PreVote_Rollback(t *testing.T) { + leaveTransfer := func(t *testing.T, cluster testcluster.RaftCluster, id string) { + if cluster.GetLeader().GetLocalID() == id { + transfer := cluster.Raft(id).(*raft.Raft).LeadershipTransfer() + utils.WaitFuture(t, transfer) + } + f := cluster.Raft(id).(*raft.Raft).Shutdown() + utils.WaitFuture(t, f) + } + leaveNoTransfer := func(t *testing.T, cluster testcluster.RaftCluster, id string) { + fr := cluster.GetLeader().GetRaft().(*raft.Raft).RemoveServer(raft.ServerID(id), 0, 0) + utils.WaitFuture(t, fr) + f := cluster.Raft(id).(*raft.Raft).Shutdown() + utils.WaitFuture(t, f) + } + tcs := []struct { + name string + numNodes int + preVote bool + Leave func(t *testing.T, cluster testcluster.RaftCluster, id string) + }{ + {"no prevote -> prevote (leave transfer)", 3, true, leaveTransfer}, + {"no prevote -> prevote (leave no transfer)", 3, true, leaveNoTransfer}, + {"no prevote -> prevote (leave transfer) 5", 5, true, leaveTransfer}, + {"no prevote -> prevote (leave no transfer) 5", 5, true, leaveNoTransfer}, + {"no prevote -> no prevote (leave transfer)", 3, false, leaveTransfer}, + {"no prevote -> no prevote (leave no transfer)", 3, false, leaveNoTransfer}, + {"no prevote -> no prevote (leave transfer) 5", 5, false, leaveTransfer}, + {"no prevote -> no prevote (leave no transfer) 5", 5, false, leaveNoTransfer}, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + + cluster := testcluster.NewUITRaftCluster(t, tc.numNodes, "raftIUTNode") + configuration := raft.Configuration{} + + for i := 0; i < tc.numNodes; i++ { + var err error + require.NoError(t, err) + configuration.Servers = append(configuration.Servers, raft.Server{ + ID: raft.ServerID(cluster.ID(i)), + Address: raft.ServerAddress(cluster.Addr(i)), + }) + } + raft0 := cluster.Raft(cluster.ID(0)).(*raft.Raft) + boot := raft0.BootstrapCluster(configuration) + if err := boot.Error(); err != nil { + t.Fatalf("bootstrap err: %v", err) + } + utils.WaitForNewLeader(t, "", cluster) + getLeader := cluster.GetLeader() + require.NotEmpty(t, getLeader) + a, _ := getLeader.GetRaft().(*raft.Raft).LeaderWithID() + require.NotEmpty(t, a) + future := getLeader.GetRaft().(*raft.Raft).Apply([]byte("test"), time.Second) + utils.WaitFuture(t, future) + + leader, _ := getLeader.GetRaft().(*raft.Raft).LeaderWithID() + require.NotEmpty(t, leader) + // Upgrade all the followers + for i := 0; i < tc.numNodes; i++ { + if getLeader.GetLocalID() == cluster.ID(i) { + continue + } + + // Check Leader haven't changed + a, _ := getLeader.GetRaft().(*raft.Raft).LeaderWithID() + require.Equal(t, a, leader) + tc.Leave(t, cluster, cluster.ID(i)) + + // Keep the store, to be passed to the upgraded node. + store := cluster.Store(cluster.ID(i)) + id := cluster.ID(i) + + //Delete the node from the cluster + cluster.DeleteNode(cluster.ID(i)) + + //Create an upgraded node with the store + rUIT := testcluster.InitPreviousWithStore(t, id, store.(*raft.InmemStore), func(config *raftprevious.Config) { + }) + future := getLeader.GetRaft().(*raft.Raft).AddVoter(raft.ServerID(rUIT.GetLocalID()), raft.ServerAddress(rUIT.GetLocalAddr()), 0, 0) + utils.WaitFuture(t, future) + //Add the new node to the cluster + cluster.AddNode(rUIT) + + // Wait enough to have the configuration propagated. + time.Sleep(time.Second) + + //Apply some logs + future = getLeader.GetRaft().(*raft.Raft).Apply([]byte("test2"), time.Second) + require.NoError(t, future.Error()) + + // Check Leader haven't changed as we haven't replaced the leader yet + a, _ = getLeader.GetRaft().(*raft.Raft).LeaderWithID() + require.Equal(t, a, leader) + } + // keep a reference to the store + store := cluster.Store(getLeader.GetLocalID()) + id := getLeader.GetLocalID() + + //Remove and shutdown the leader node + tc.Leave(t, cluster, getLeader.GetLocalID()) + + // Delete the old leader node from the cluster + cluster.DeleteNode(getLeader.GetLocalID()) + oldLeaderID := getLeader.GetLocalID() + + // Wait for a new leader to be elected + utils.WaitForNewLeader(t, oldLeaderID, cluster) + getLeader = cluster.GetLeader() + require.NotEmpty(t, getLeader) + + // Create a new node to replace the deleted one + rUIT := testcluster.InitPreviousWithStore(t, id, store.(*raft.InmemStore), func(config *raftprevious.Config) {}) + fa := getLeader.GetRaft().(*raftprevious.Raft).AddVoter(raftprevious.ServerID(rUIT.GetLocalID()), raftprevious.ServerAddress(rUIT.GetLocalAddr()), 0, 0) + utils.WaitFuture(t, fa) + + // Wait for new leader, (this happens because of not having prevote) + utils.WaitForNewLeader(t, "", cluster) + newLeaderID := rUIT.GetLeaderID() + require.NotEmpty(t, newLeaderID) + + require.NotEqual(t, newLeaderID, leader) + + newLeader := cluster.GetLeader() + //Apply some logs + future = newLeader.GetRaft().(*raftprevious.Raft).Apply([]byte("test2"), time.Second) + require.NoError(t, future.Error()) + + // Check Leader haven't changed as we haven't replaced the leader yet + newAddr, _ := newLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.Equal(t, string(newAddr), newLeader.GetLocalAddr()) + + require.Equal(t, tc.numNodes, rUIT.NumLogs()) + }) + } + +} diff --git a/raft-compat/raft-previous-version b/raft-compat/raft-previous-version new file mode 160000 index 000000000..b96f998ff --- /dev/null +++ b/raft-compat/raft-previous-version @@ -0,0 +1 @@ +Subproject commit b96f998ff7e752c7eb68615f086a9c52008a40b6 diff --git a/raft-compat/rolling_upgrade_test.go b/raft-compat/rolling_upgrade_test.go new file mode 100644 index 000000000..f28c3cd2b --- /dev/null +++ b/raft-compat/rolling_upgrade_test.go @@ -0,0 +1,297 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package raft_compat + +import ( + "fmt" + "github.com/hashicorp/raft" + raftprevious "github.com/hashicorp/raft-previous-version" + "github.com/hashicorp/raft/compat/testcluster" + "github.com/hashicorp/raft/compat/utils" + "github.com/stretchr/testify/require" + "testing" + "time" +) + +// TestRaft_RollingUpgrade This test perform a rolling upgrade by adding a new node, +// wait for it to join the cluster and remove one of the old nodes, until all nodes +// are cycled +func TestRaft_RollingUpgrade(t *testing.T) { + tcs := []struct { + Name string + Leave func(t *testing.T, cluster testcluster.RaftCluster, id string) + }{ + { + Name: "leave before shutdown", + Leave: func(t *testing.T, cluster testcluster.RaftCluster, id string) { + fr := cluster.GetLeader().GetRaft().(*raftprevious.Raft).RemoveServer(raftprevious.ServerID(id), 0, 0) + utils.WaitFuture(t, fr) + f := cluster.Raft(id).(*raftprevious.Raft).Shutdown() + utils.WaitFuture(t, f) + }, + }, + { + Name: "leader transfer", + Leave: func(t *testing.T, cluster testcluster.RaftCluster, id string) { + if cluster.GetLeader().GetLocalID() == id { + transfer := cluster.Raft(id).(*raftprevious.Raft).LeadershipTransfer() + utils.WaitFuture(t, transfer) + utils.WaitForNewLeader(t, id, cluster) + } + switch cluster.GetLeader().GetRaft().(type) { + case *raftprevious.Raft: + fr := cluster.GetLeader().GetRaft().(*raftprevious.Raft).RemoveServer(raftprevious.ServerID(id), 0, 0) + utils.WaitFuture(t, fr) + f := cluster.Raft(id).(*raftprevious.Raft).Shutdown() + utils.WaitFuture(t, f) + case *raft.Raft: + fr := cluster.GetLeader().GetRaft().(*raft.Raft).RemoveServer(raft.ServerID(id), 0, 0) + utils.WaitFuture(t, fr) + f := cluster.Raft(id).(*raftprevious.Raft).Shutdown() + utils.WaitFuture(t, f) + } + + }, + }, + } + + for _, tc := range tcs { + t.Run(tc.Name, func(t *testing.T) { + initCount := 3 + cluster := testcluster.NewPreviousRaftCluster(t, initCount, "raftNode") + configuration := raftprevious.Configuration{} + + for i := 0; i < initCount; i++ { + var err error + require.NoError(t, err) + configuration.Servers = append(configuration.Servers, raftprevious.Server{ + ID: raftprevious.ServerID(cluster.ID(i)), + Address: raftprevious.ServerAddress(cluster.Addr(i)), + }) + } + raft0 := cluster.Raft(cluster.ID(0)).(*raftprevious.Raft) + boot := raft0.BootstrapCluster(configuration) + if err := boot.Error(); err != nil { + t.Fatalf("bootstrap err: %v", err) + } + utils.WaitForNewLeader(t, "", cluster) + getLeader := cluster.GetLeader() + require.NotEmpty(t, getLeader) + a, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.NotEmpty(t, a) + future := getLeader.GetRaft().(*raftprevious.Raft).Apply([]byte("test"), time.Second) + utils.WaitFuture(t, future) + + leader, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.NotEmpty(t, leader) + // Upgrade all the followers + leaderIdx := 0 + + followers := make([]string, 0) + for i := 0; i < initCount; i++ { + if getLeader.GetLocalID() == cluster.ID(i) { + leaderIdx = i + continue + } + followers = append(followers, cluster.ID(i)) + } + + for _, f := range followers { + require.NotEqual(t, f, getLeader.GetLocalID()) + // Check Leader haven't changed + a, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.Equal(t, a, leader) + + //Create an upgraded node with the store + rUIT := testcluster.InitUIT(t, fmt.Sprintf("%s-new", f)) + future := getLeader.GetRaft().(*raftprevious.Raft).AddVoter(raftprevious.ServerID(rUIT.GetLocalID()), raftprevious.ServerAddress(rUIT.GetLocalAddr()), 0, 0) + utils.WaitFuture(t, future) + + //Add the new node to the cluster + + tc.Leave(t, cluster, f) + + //Delete the node from the cluster + cluster.AddNode(rUIT) + cluster.DeleteNode(f) + } + + // Wait enough to have the configuration propagated. + time.Sleep(time.Second) + + //Apply some logs + future = getLeader.GetRaft().(*raftprevious.Raft).Apply([]byte("test2"), time.Second) + require.NoError(t, future.Error()) + + // Check Leader haven't changed as we haven't replaced the leader yet + a, _ = getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.Equal(t, a, leader) + + //Remove and shutdown the leader node + tc.Leave(t, cluster, getLeader.GetLocalID()) + + // Delete the old leader node from the cluster + + oldLeaderID := getLeader.GetLocalID() + + // Wait for a new leader to be elected + utils.WaitForNewLeader(t, oldLeaderID, cluster) + getLeader = cluster.GetLeader() + require.NotEmpty(t, getLeader) + + // Create a new node to replace the deleted one + rUIT := testcluster.InitUIT(t, fmt.Sprintf("raftNew-%d", leaderIdx)) + fa := getLeader.GetRaft().(*raft.Raft).AddVoter(raft.ServerID(rUIT.GetLocalID()), raft.ServerAddress(rUIT.GetLocalAddr()), 0, 0) + utils.WaitFuture(t, fa) + + // Wait for new leader, (this happens because of not having prevote) + utils.WaitForNewLeader(t, "", cluster) + newLeader := rUIT.GetLeaderID() + require.NotEmpty(t, newLeader) + require.NotEqual(t, newLeader, leader) + + cluster.DeleteNode(getLeader.GetLocalID()) + require.Equal(t, rUIT.NumLogs(), 2) + }) + } +} + +// TestRaft_ReplaceUpgrade This test perform a rolling upgrade by removing an old node, +// and create a new node with the same store until all old nodes are cycled to new nodes. +// This simulate the advised way of upgrading in Consul. +func TestRaft_ReplaceUpgrade(t *testing.T) { + + tcs := []struct { + Name string + Leave func(t *testing.T, cluster testcluster.RaftCluster, id string) + }{ + { + Name: "leave before shutdown", + Leave: func(t *testing.T, cluster testcluster.RaftCluster, id string) { + fr := cluster.GetLeader().GetRaft().(*raftprevious.Raft).RemoveServer(raftprevious.ServerID(id), 0, 0) + utils.WaitFuture(t, fr) + f := cluster.Raft(id).(*raftprevious.Raft).Shutdown() + utils.WaitFuture(t, f) + }, + }, + { + Name: "shutdown without leave", + Leave: func(t *testing.T, cluster testcluster.RaftCluster, id string) { + f := cluster.Raft(id).(*raftprevious.Raft).Shutdown() + utils.WaitFuture(t, f) + }, + }, + { + Name: "leader transfer", + Leave: func(t *testing.T, cluster testcluster.RaftCluster, id string) { + if cluster.GetLeader().GetLocalID() == id { + transfer := cluster.Raft(id).(*raftprevious.Raft).LeadershipTransfer() + utils.WaitFuture(t, transfer) + } + f := cluster.Raft(id).(*raftprevious.Raft).Shutdown() + utils.WaitFuture(t, f) + }, + }, + } + + for _, tc := range tcs { + t.Run(tc.Name, func(t *testing.T) { + initCount := 3 + cluster := testcluster.NewPreviousRaftCluster(t, initCount, "raftNode") + configuration := raftprevious.Configuration{} + + for i := 0; i < initCount; i++ { + var err error + require.NoError(t, err) + configuration.Servers = append(configuration.Servers, raftprevious.Server{ + ID: raftprevious.ServerID(cluster.ID(i)), + Address: raftprevious.ServerAddress(cluster.Addr(i)), + }) + } + raft0 := cluster.Raft(cluster.ID(0)).(*raftprevious.Raft) + boot := raft0.BootstrapCluster(configuration) + if err := boot.Error(); err != nil { + t.Fatalf("bootstrap err: %v", err) + } + utils.WaitForNewLeader(t, "", cluster) + getLeader := cluster.GetLeader() + require.NotEmpty(t, getLeader) + a, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.NotEmpty(t, a) + future := getLeader.GetRaft().(*raftprevious.Raft).Apply([]byte("test"), time.Second) + utils.WaitFuture(t, future) + + leader, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.NotEmpty(t, leader) + // Upgrade all the followers + for i := 0; i < initCount; i++ { + if getLeader.GetLocalID() == cluster.ID(i) { + continue + } + + // Check Leader haven't changed + a, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.Equal(t, a, leader) + + // + tc.Leave(t, cluster, cluster.ID(i)) + + // Keep the store, to be passed to the upgraded node. + store := cluster.Store(cluster.ID(i)) + id := cluster.ID(i) + + //Delete the node from the cluster + cluster.DeleteNode(cluster.ID(i)) + + //Create an upgraded node with the store + rUIT := testcluster.InitUITWithStore(t, id, store.(*raftprevious.InmemStore), func(config *raft.Config) {}) + future := getLeader.GetRaft().(*raftprevious.Raft).AddVoter(raftprevious.ServerID(rUIT.GetLocalID()), raftprevious.ServerAddress(rUIT.GetLocalAddr()), 0, 0) + utils.WaitFuture(t, future) + //Add the new node to the cluster + cluster.AddNode(rUIT) + } + + // Wait enough to have the configuration propagated. + time.Sleep(time.Second) + + //Apply some logs + future = getLeader.GetRaft().(*raftprevious.Raft).Apply([]byte("test2"), time.Second) + require.NoError(t, future.Error()) + + // Check Leader haven't changed as we haven't replaced the leader yet + a, _ = getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID() + require.Equal(t, a, leader) + + // keep a reference to the store + store := cluster.Store(getLeader.GetLocalID()) + id := getLeader.GetLocalID() + + //Remove and shutdown the leader node + tc.Leave(t, cluster, getLeader.GetLocalID()) + + // Delete the old leader node from the cluster + cluster.DeleteNode(getLeader.GetLocalID()) + oldLeaderID := getLeader.GetLocalID() + + // Wait for a new leader to be elected + utils.WaitForNewLeader(t, oldLeaderID, cluster) + getLeader = cluster.GetLeader() + require.NotEmpty(t, getLeader) + + // Create a new node to replace the deleted one + rUIT := testcluster.InitUITWithStore(t, id, store.(*raftprevious.InmemStore), func(config *raft.Config) {}) + fa := getLeader.GetRaft().(*raft.Raft).AddVoter(raft.ServerID(rUIT.GetLocalID()), raft.ServerAddress(rUIT.GetLocalAddr()), 0, 0) + utils.WaitFuture(t, fa) + + // Wait for new leader, (this happens because of not having prevote) + utils.WaitForNewLeader(t, "", cluster) + newLeader := rUIT.GetLeaderID() + require.NotEmpty(t, newLeader) + + require.NotEqual(t, newLeader, leader) + + require.Equal(t, rUIT.NumLogs(), 2) + }) + } +} diff --git a/raft-compat/testcluster/cluster.go b/raft-compat/testcluster/cluster.go new file mode 100644 index 000000000..348c79cc4 --- /dev/null +++ b/raft-compat/testcluster/cluster.go @@ -0,0 +1,314 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package testcluster + +import ( + "fmt" + "github.com/hashicorp/raft" + raftprevious "github.com/hashicorp/raft-previous-version" + "github.com/stretchr/testify/require" + "testing" + "time" +) + +type RaftUIT struct { + raft *raft.Raft + trans *raft.NetworkTransport + Config *raft.Config + Store *raft.InmemStore + Snap *raft.InmemSnapshotStore + id raft.ServerID + fsm *raft.MockFSM +} + +func (r RaftUIT) NumLogs() int { + return len(r.fsm.Logs()) +} + +func (r RaftUIT) GetLocalAddr() string { + return string(r.trans.LocalAddr()) +} + +func (r RaftUIT) GetRaft() interface{} { + return r.raft +} + +func (r RaftUIT) GetStore() interface{} { + return r.Store +} + +func (r RaftUIT) GetLocalID() string { + return string(r.id) +} + +func (r RaftUIT) GetLeaderID() string { + _, id := r.raft.LeaderWithID() + return string(id) +} + +func (r *RaftCluster) ID(i int) string { + return r.rafts[i].GetLocalID() +} +func (r *RaftCluster) Addr(i int) string { + return r.rafts[i].GetLocalAddr() +} + +func (r *RaftCluster) Raft(id string) interface{} { + i := r.GetIndex(id) + return r.rafts[i].GetRaft() +} + +func (r *RaftCluster) Store(id string) interface{} { + i := r.GetIndex(id) + return r.rafts[i].GetStore() +} + +type RaftLatest struct { + raft *raftprevious.Raft + trans *raftprevious.NetworkTransport + Config *raftprevious.Config + Store *raftprevious.InmemStore + Snap *raftprevious.InmemSnapshotStore + id raftprevious.ServerID + fsm *raftprevious.MockFSM +} + +func (r RaftLatest) NumLogs() int { + return len(r.fsm.Logs()) +} + +func (r RaftLatest) GetLocalAddr() string { + return string(r.trans.LocalAddr()) +} + +func (r RaftLatest) GetRaft() interface{} { + return r.raft +} +func (r RaftLatest) GetStore() interface{} { + return r.Store +} + +func (r RaftLatest) GetLocalID() string { + return string(r.id) +} + +func (r RaftLatest) GetLeaderID() string { + _, id := r.raft.LeaderWithID() + return string(id) +} + +type RaftNode interface { + GetLocalID() string + GetLocalAddr() string + GetLeaderID() string + GetRaft() interface{} + GetStore() interface{} + NumLogs() int +} + +type RaftCluster struct { + rafts []RaftNode +} + +func NewRaftCluster(t *testing.T, f func(t *testing.T, id string) RaftNode, count int, name string) RaftCluster { + rc := RaftCluster{} + rc.rafts = make([]RaftNode, count) + for i := 0; i < count; i++ { + rc.rafts[i] = f(t, fmt.Sprintf("%s-%d", name, i)) + } + return rc +} + +func NewPreviousRaftCluster(t *testing.T, count int, name string) RaftCluster { + return NewRaftCluster(t, InitPrevious, count, name) +} + +func NewUITRaftCluster(t *testing.T, count int, name string) RaftCluster { + return NewRaftCluster(t, InitUIT, count, name) +} + +func (r *RaftCluster) GetLeader() RaftNode { + for _, n := range r.rafts { + if n.GetLocalID() == n.GetLeaderID() { + return n + } + } + return nil +} + +func (r *RaftCluster) Len() int { + return len(r.rafts) +} + +func (r *RaftCluster) AddNode(node RaftNode) { + r.rafts = append([]RaftNode{node}, r.rafts...) +} + +func (r *RaftCluster) DeleteNode(id string) { + i := r.GetIndex(id) + r.rafts = append(r.rafts[:i], r.rafts[i+1:]...) +} + +func (r *RaftCluster) GetIndex(id string) int { + i := 0 + for _, r := range r.rafts { + if r.GetLocalID() == id { + return i + } + i++ + } + return -1 +} + +func InitUIT(t *testing.T, id string) RaftNode { + return InitUITWithStore(t, id, nil, func(config *raft.Config) {}) +} + +func InitUITWithStore(t *testing.T, id string, store *raftprevious.InmemStore, cfgMod func(config *raft.Config)) RaftNode { + node := RaftUIT{} + node.Config = raft.DefaultConfig() + cfgMod(node.Config) + node.Config.HeartbeatTimeout = 50 * time.Millisecond + node.Config.ElectionTimeout = 50 * time.Millisecond + node.Config.LeaderLeaseTimeout = 50 * time.Millisecond + node.Config.CommitTimeout = 5 * time.Millisecond + node.id = raft.ServerID(id) + node.Config.LocalID = node.id + if store != nil { + node.Store = convertInMemStoreToUIT(store) + } else { + node.Store = raft.NewInmemStore() + } + + node.Snap = raft.NewInmemSnapshotStore() + node.fsm = &raft.MockFSM{} + var err error + node.trans, err = raft.NewTCPTransport("localhost:0", nil, 2, time.Second, nil) + require.NoError(t, err) + node.raft, err = raft.NewRaft(node.Config, node.fsm, node.Store, + node.Store, node.Snap, node.trans) + require.NoError(t, err) + return node +} + +func InitPrevious(t *testing.T, id string) RaftNode { + return InitPreviousWithStore(t, id, nil, func(config *raftprevious.Config) { + }) +} + +func InitPreviousWithStore(t *testing.T, id string, store *raft.InmemStore, f func(config *raftprevious.Config)) RaftNode { + node := RaftLatest{} + node.Config = raftprevious.DefaultConfig() + node.Config.HeartbeatTimeout = 50 * time.Millisecond + node.Config.ElectionTimeout = 50 * time.Millisecond + node.Config.LeaderLeaseTimeout = 50 * time.Millisecond + node.Config.CommitTimeout = 5 * time.Millisecond + node.id = raftprevious.ServerID(id) + node.Config.LocalID = node.id + f(node.Config) + + if store != nil { + node.Store = convertInMemStoreToPrevious(store) + } else { + node.Store = raftprevious.NewInmemStore() + } + node.Snap = raftprevious.NewInmemSnapshotStore() + node.fsm = &raftprevious.MockFSM{} + var err error + node.trans, err = raftprevious.NewTCPTransport("localhost:0", nil, 2, time.Second, nil) + require.NoError(t, err) + node.raft, err = raftprevious.NewRaft(node.Config, node.fsm, node.Store, + node.Store, node.Snap, node.trans) + require.NoError(t, err) + return node +} + +func convertLogToUIT(ll *raftprevious.Log) *raft.Log { + l := new(raft.Log) + l.Index = ll.Index + l.AppendedAt = ll.AppendedAt + l.Type = raft.LogType(ll.Type) + l.Term = ll.Term + l.Data = ll.Data + l.Extensions = ll.Extensions + return l +} +func convertLogToPrevious(ll *raft.Log) *raftprevious.Log { + l := new(raftprevious.Log) + l.Index = ll.Index + l.AppendedAt = ll.AppendedAt + l.Type = raftprevious.LogType(ll.Type) + l.Term = ll.Term + l.Data = ll.Data + l.Extensions = ll.Extensions + return l +} + +var ( + keyCurrentTerm = []byte("CurrentTerm") + keyLastVoteTerm = []byte("LastVoteTerm") + keyLastVoteCand = []byte("LastVoteCand") +) + +func convertInMemStoreToPrevious(s *raft.InmemStore) *raftprevious.InmemStore { + ss := raftprevious.NewInmemStore() + fi, _ := s.FirstIndex() + li, _ := s.LastIndex() + for i := fi; i <= li; i++ { + log := new(raft.Log) + s.GetLog(i, log) + ss.StoreLog(convertLogToPrevious(log)) + } + + get, _ := ss.Get(keyCurrentTerm) + ss.Set(keyCurrentTerm, get) + + get, _ = ss.Get(keyLastVoteTerm) + ss.Set(keyLastVoteTerm, get) + + get, _ = ss.Get(keyLastVoteCand) + ss.Set(keyLastVoteCand, get) + + get64, _ := ss.GetUint64(keyCurrentTerm) + ss.SetUint64(keyCurrentTerm, get64) + + get64, _ = ss.GetUint64(keyLastVoteTerm) + ss.SetUint64(keyLastVoteTerm, get64) + + get64, _ = ss.GetUint64(keyLastVoteCand) + ss.SetUint64(keyLastVoteCand, get64) + + return ss +} + +func convertInMemStoreToUIT(s *raftprevious.InmemStore) *raft.InmemStore { + ss := raft.NewInmemStore() + fi, _ := s.FirstIndex() + li, _ := s.LastIndex() + for i := fi; i <= li; i++ { + log := new(raftprevious.Log) + s.GetLog(i, log) + ss.StoreLog(convertLogToUIT(log)) + } + + get, _ := ss.Get(keyCurrentTerm) + ss.Set(keyCurrentTerm, get) + + get, _ = ss.Get(keyLastVoteTerm) + ss.Set(keyLastVoteTerm, get) + + get, _ = ss.Get(keyLastVoteCand) + ss.Set(keyLastVoteCand, get) + + get64, _ := ss.GetUint64(keyCurrentTerm) + ss.SetUint64(keyCurrentTerm, get64) + + get64, _ = ss.GetUint64(keyLastVoteTerm) + ss.SetUint64(keyLastVoteTerm, get64) + + get64, _ = ss.GetUint64(keyLastVoteCand) + ss.SetUint64(keyLastVoteCand, get64) + + return ss +} diff --git a/raft-compat/utils/test_utils.go b/raft-compat/utils/test_utils.go new file mode 100644 index 000000000..39d883332 --- /dev/null +++ b/raft-compat/utils/test_utils.go @@ -0,0 +1,60 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package utils + +import ( + "fmt" + "github.com/hashicorp/raft" + raftprevious "github.com/hashicorp/raft-previous-version" + "github.com/hashicorp/raft/compat/testcluster" + "github.com/stretchr/testify/require" + "testing" + "time" +) + +func WaitForNewLeader(t *testing.T, oldLeader string, c testcluster.RaftCluster) { + + leader := func() string { + for i := 0; i < c.Len(); i++ { + switch r := c.Raft(c.ID(i)).(type) { + case *raft.Raft: + if r.State() == raft.Leader { + return c.ID(i) + } + case *raftprevious.Raft: + if r.State() == raftprevious.Leader { + return c.ID(i) + } + } + } + return "" + } + after := time.After(5 * time.Second) + ticker := time.NewTicker(100 * time.Millisecond) + for { + select { + case <-after: + t.Fatalf("timedout") + case <-ticker.C: + id := leader() + if id != "" { + if id != oldLeader || oldLeader == "" { + return + } + } + } + } +} + +type future interface { + Error() error +} + +func WaitFuture(t *testing.T, f future) { + timer := time.AfterFunc(1000*time.Millisecond, func() { + panic(fmt.Errorf("timeout waiting for future %v", f)) + }) + defer timer.Stop() + require.NoError(t, f.Error()) +} diff --git a/raft.go b/raft.go index a53492bd4..cbc9a59af 100644 --- a/raft.go +++ b/raft.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -5,7 +8,7 @@ import ( "container/list" "fmt" "io" - "io/ioutil" + "strings" "sync/atomic" "time" @@ -15,8 +18,9 @@ import ( ) const ( - minCheckInterval = 10 * time.Millisecond - oldestLogGaugeInterval = 10 * time.Second + minCheckInterval = 10 * time.Millisecond + oldestLogGaugeInterval = 10 * time.Second + rpcUnexpectedCommandError = "unexpected command" ) var ( @@ -31,6 +35,8 @@ var ( func (r *Raft) getRPCHeader() RPCHeader { return RPCHeader{ ProtocolVersion: r.config().ProtocolVersion, + ID: []byte(r.config().LocalID), + Addr: r.trans.EncodePeer(r.config().LocalID, r.localAddr), } } @@ -90,14 +96,16 @@ type leaderState struct { stepDown chan struct{} } -// setLeader is used to modify the current leader of the cluster -func (r *Raft) setLeader(leader ServerAddress) { +// setLeader is used to modify the current leader Address and ID of the cluster +func (r *Raft) setLeader(leaderAddr ServerAddress, leaderID ServerID) { r.leaderLock.Lock() - oldLeader := r.leader - r.leader = leader + oldLeaderAddr := r.leaderAddr + r.leaderAddr = leaderAddr + oldLeaderID := r.leaderID + r.leaderID = leaderID r.leaderLock.Unlock() - if oldLeader != leader { - r.observe(LeaderObservation{Leader: leader}) + if oldLeaderAddr != leaderAddr || oldLeaderID != leaderID { + r.observe(LeaderObservation{Leader: leaderAddr, LeaderAddr: leaderAddr, LeaderID: leaderID}) } } @@ -123,19 +131,18 @@ func (r *Raft) requestConfigChange(req configurationChangeRequest, timeout time. } } -// run is a long running goroutine that runs the Raft FSM. +// run the main thread that handles leadership and RPC requests. func (r *Raft) run() { for { // Check if we are doing a shutdown select { case <-r.shutdownCh: // Clear the leader to prevent forwarding - r.setLeader("") + r.setLeader("", "") return default: } - // Enter into a sub-FSM switch r.getState() { case Follower: r.runFollower() @@ -147,59 +154,77 @@ func (r *Raft) run() { } } -// runFollower runs the FSM for a follower. +// runFollower runs the main loop while in the follower state. func (r *Raft) runFollower() { didWarn := false - r.logger.Info("entering follower state", "follower", r, "leader", r.Leader()) + leaderAddr, leaderID := r.LeaderWithID() + r.logger.Info("entering follower state", "follower", r, "leader-address", leaderAddr, "leader-id", leaderID) metrics.IncrCounter([]string{"raft", "state", "follower"}, 1) heartbeatTimer := randomTimeout(r.config().HeartbeatTimeout) for r.getState() == Follower { + r.mainThreadSaturation.sleeping() + select { case rpc := <-r.rpcCh: + r.mainThreadSaturation.working() r.processRPC(rpc) case c := <-r.configurationChangeCh: + r.mainThreadSaturation.working() // Reject any operations since we are not the leader c.respond(ErrNotLeader) case a := <-r.applyCh: + r.mainThreadSaturation.working() // Reject any operations since we are not the leader a.respond(ErrNotLeader) case v := <-r.verifyCh: + r.mainThreadSaturation.working() // Reject any operations since we are not the leader v.respond(ErrNotLeader) - case r := <-r.userRestoreCh: + case ur := <-r.userRestoreCh: + r.mainThreadSaturation.working() // Reject any restores since we are not the leader - r.respond(ErrNotLeader) + ur.respond(ErrNotLeader) - case r := <-r.leadershipTransferCh: + case l := <-r.leadershipTransferCh: + r.mainThreadSaturation.working() // Reject any operations since we are not the leader - r.respond(ErrNotLeader) + l.respond(ErrNotLeader) case c := <-r.configurationsCh: + r.mainThreadSaturation.working() c.configurations = r.configurations.Clone() c.respond(nil) case b := <-r.bootstrapCh: + r.mainThreadSaturation.working() b.respond(r.liveBootstrap(b.configuration)) + case <-r.leaderNotifyCh: + // Ignore since we are not the leader + + case <-r.followerNotifyCh: + heartbeatTimer = time.After(0) + case <-heartbeatTimer: + r.mainThreadSaturation.working() // Restart the heartbeat timer hbTimeout := r.config().HeartbeatTimeout heartbeatTimer = randomTimeout(hbTimeout) // Check if we have had a successful contact lastContact := r.LastContact() - if time.Now().Sub(lastContact) < hbTimeout { + if time.Since(lastContact) < hbTimeout { continue } // Heartbeat failed! Transition to the candidate state - lastLeader := r.Leader() - r.setLeader("") + lastLeaderAddr, lastLeaderID := r.LeaderWithID() + r.setLeader("", "") if r.configurations.latestIndex == 0 { if !didWarn { @@ -214,15 +239,13 @@ func (r *Raft) runFollower() { } } else { metrics.IncrCounter([]string{"raft", "transition", "heartbeat_timeout"}, 1) - if inConfig(r.configurations.latest, r.localID) { - r.logger.Warn("heartbeat timeout reached, starting election", "last-leader", lastLeader) + if hasVote(r.configurations.latest, r.localID) { + r.logger.Warn("heartbeat timeout reached, starting election", "last-leader-addr", lastLeaderAddr, "last-leader-id", lastLeaderID) r.setState(Candidate) return - } else { - if !didWarn { - r.logger.Warn("heartbeat timeout reached, not part of stable configuration, not triggering a leader election") - didWarn = true - } + } else if !didWarn { + r.logger.Warn("heartbeat timeout reached, not part of a stable configuration or a non-voter, not triggering a leader election") + didWarn = true } } @@ -236,10 +259,14 @@ func (r *Raft) runFollower() { // the Raft object's member BootstrapCluster for more details. This must only be // called on the main thread, and only makes sense in the follower state. func (r *Raft) liveBootstrap(configuration Configuration) error { + if !hasVote(configuration, r.localID) { + // Reject this operation since we are not a voter + return ErrNotVoter + } + // Use the pre-init API to make the static updates. cfg := r.config() - err := BootstrapCluster(&cfg, r.logs, r.stable, r.snapshots, - r.trans, configuration) + err := BootstrapCluster(&cfg, r.logs, r.stable, r.snapshots, r.trans, configuration) if err != nil { return err } @@ -254,37 +281,90 @@ func (r *Raft) liveBootstrap(configuration Configuration) error { return r.processConfigurationLogEntry(&entry) } -// runCandidate runs the FSM for a candidate. +// runCandidate runs the main loop while in the candidate state. func (r *Raft) runCandidate() { - r.logger.Info("entering candidate state", "node", r, "term", r.getCurrentTerm()+1) + term := r.getCurrentTerm() + 1 + r.logger.Info("entering candidate state", "node", r, "term", term) metrics.IncrCounter([]string{"raft", "state", "candidate"}, 1) // Start vote for us, and set a timeout - voteCh := r.electSelf() + var voteCh <-chan *voteResult + var prevoteCh <-chan *preVoteResult + + // check if pre-vote is active and that this is not a leader transfer. + // Leader transfer do not perform prevote by design + if !r.preVoteDisabled && !r.candidateFromLeadershipTransfer.Load() { + prevoteCh = r.preElectSelf() + } else { + voteCh = r.electSelf() + } // Make sure the leadership transfer flag is reset after each run. Having this // flag will set the field LeadershipTransfer in a RequestVoteRequst to true, // which will make other servers vote even though they have a leader already. // It is important to reset that flag, because this priviledge could be abused // otherwise. - defer func() { r.candidateFromLeadershipTransfer = false }() + defer func() { r.candidateFromLeadershipTransfer.Store(false) }() - electionTimer := randomTimeout(r.config().ElectionTimeout) + electionTimeout := r.config().ElectionTimeout + electionTimer := randomTimeout(electionTimeout) // Tally the votes, need a simple majority + preVoteGrantedVotes := 0 + preVoteRefusedVotes := 0 grantedVotes := 0 votesNeeded := r.quorumSize() - r.logger.Debug("votes", "needed", votesNeeded) + r.logger.Debug("calculated votes needed", "needed", votesNeeded, "term", term) for r.getState() == Candidate { + r.mainThreadSaturation.sleeping() + select { case rpc := <-r.rpcCh: + r.mainThreadSaturation.working() r.processRPC(rpc) + case preVote := <-prevoteCh: + // This a pre-vote case it should trigger a "real" election if the pre-vote is won. + r.mainThreadSaturation.working() + r.logger.Debug("pre-vote received", "from", preVote.voterID, "term", preVote.Term, "tally", preVoteGrantedVotes) + // Check if the term is greater than ours, bail + if preVote.Term > term { + r.logger.Debug("pre-vote denied: found newer term, falling back to follower", "term", preVote.Term) + r.setState(Follower) + r.setCurrentTerm(preVote.Term) + return + } + // Check if the preVote is granted + if preVote.Granted { + preVoteGrantedVotes++ + r.logger.Debug("pre-vote granted", "from", preVote.voterID, "term", preVote.Term, "tally", preVoteGrantedVotes) + } else { + preVoteRefusedVotes++ + r.logger.Debug("pre-vote denied", "from", preVote.voterID, "term", preVote.Term, "tally", preVoteGrantedVotes) + } + + // Check if we've won the pre-vote and proceed to election if so + if preVoteGrantedVotes >= votesNeeded { + r.logger.Info("pre-vote successful, starting election", "term", preVote.Term, + "tally", preVoteGrantedVotes, "refused", preVoteRefusedVotes, "votesNeeded", votesNeeded) + preVoteGrantedVotes = 0 + preVoteRefusedVotes = 0 + electionTimer = randomTimeout(electionTimeout) + prevoteCh = nil + voteCh = r.electSelf() + } + // Check if we've lost the pre-vote and wait for the election to timeout so we can do another time of + // prevote. + if preVoteRefusedVotes >= votesNeeded { + r.logger.Info("pre-vote campaign failed, waiting for election timeout", "term", preVote.Term, + "tally", preVoteGrantedVotes, "refused", preVoteRefusedVotes, "votesNeeded", votesNeeded) + } case vote := <-voteCh: + r.mainThreadSaturation.working() // Check if the term is greater than ours, bail if vote.Term > r.getCurrentTerm() { - r.logger.Debug("newer term discovered, fallback to follower") + r.logger.Debug("newer term discovered, fallback to follower", "term", vote.Term) r.setState(Follower) r.setCurrentTerm(vote.Term) return @@ -298,40 +378,56 @@ func (r *Raft) runCandidate() { // Check if we've become the leader if grantedVotes >= votesNeeded { - r.logger.Info("election won", "tally", grantedVotes) + r.logger.Info("election won", "term", vote.Term, "tally", grantedVotes) r.setState(Leader) - r.setLeader(r.localAddr) + r.setLeader(r.localAddr, r.localID) return } - case c := <-r.configurationChangeCh: + r.mainThreadSaturation.working() // Reject any operations since we are not the leader c.respond(ErrNotLeader) case a := <-r.applyCh: + r.mainThreadSaturation.working() // Reject any operations since we are not the leader a.respond(ErrNotLeader) case v := <-r.verifyCh: + r.mainThreadSaturation.working() // Reject any operations since we are not the leader v.respond(ErrNotLeader) - case r := <-r.userRestoreCh: + case ur := <-r.userRestoreCh: + r.mainThreadSaturation.working() // Reject any restores since we are not the leader - r.respond(ErrNotLeader) + ur.respond(ErrNotLeader) - case r := <-r.leadershipTransferCh: + case l := <-r.leadershipTransferCh: + r.mainThreadSaturation.working() // Reject any operations since we are not the leader - r.respond(ErrNotLeader) + l.respond(ErrNotLeader) case c := <-r.configurationsCh: + r.mainThreadSaturation.working() c.configurations = r.configurations.Clone() c.respond(nil) case b := <-r.bootstrapCh: + r.mainThreadSaturation.working() b.respond(ErrCantBootstrap) + case <-r.leaderNotifyCh: + // Ignore since we are not the leader + + case <-r.followerNotifyCh: + if electionTimeout != r.config().ElectionTimeout { + electionTimeout = r.config().ElectionTimeout + electionTimer = randomTimeout(electionTimeout) + } + case <-electionTimer: + r.mainThreadSaturation.working() // Election failed! Restart the election. We simply return, // which will kick us back into runCandidate r.logger.Warn("Election timeout reached, restarting election") @@ -367,7 +463,7 @@ func (r *Raft) setupLeaderState() { r.leaderState.stepDown = make(chan struct{}, 1) } -// runLeader runs the FSM for a leader. Do the setup here and drop into +// runLeader runs the main loop while in leader state. Do the setup here and drop into // the leaderLoop for the hot loop. func (r *Raft) runLeader() { r.logger.Info("entering leader state", "leader", r) @@ -386,6 +482,11 @@ func (r *Raft) runLeader() { select { case notify <- true: case <-r.shutdownCh: + // make sure push to the notify channel ( if given ) + select { + case notify <- true: + default: + } } } @@ -435,8 +536,9 @@ func (r *Raft) runLeader() { // We may have stepped down due to an RPC call, which would // provide the leader, so we cannot always blank this out. r.leaderLock.Lock() - if r.leader == r.localAddr { - r.leader = "" + if r.leaderAddr == r.localAddr && r.leaderID == r.localID { + r.leaderAddr = "" + r.leaderID = "" } r.leaderLock.Unlock() @@ -466,11 +568,7 @@ func (r *Raft) runLeader() { // an unbounded number of uncommitted configurations in the log. We now // maintain that there exists at most one uncommitted configuration entry in // any log, so we have to do proper no-ops here. - noop := &logFuture{ - log: Log{ - Type: LogNoop, - }, - } + noop := &logFuture{log: Log{Type: LogNoop}} r.dispatchLogs([]*logFuture{noop}) // Sit in the leader loop until we step down @@ -579,14 +677,19 @@ func (r *Raft) leaderLoop() { lease := time.After(r.config().LeaderLeaseTimeout) for r.getState() == Leader { + r.mainThreadSaturation.sleeping() + select { case rpc := <-r.rpcCh: + r.mainThreadSaturation.working() r.processRPC(rpc) case <-r.leaderState.stepDown: + r.mainThreadSaturation.working() r.setState(Follower) case future := <-r.leadershipTransferCh: + r.mainThreadSaturation.working() if r.getLeadershipTransferInProgress() { r.logger.Debug(ErrLeadershipTransferInProgress.Error()) future.respond(ErrLeadershipTransferInProgress) @@ -609,7 +712,18 @@ func (r *Raft) leaderLoop() { // in case eg the timer expires. // The leadershipTransfer function is controlled with // the stopCh and doneCh. + // No matter how this exits, have this function set + // leadership transfer to false before we return + // + // Note that this leaves a window where callers of + // LeadershipTransfer() and LeadershipTransferToServer() + // may start executing after they get their future but before + // this routine has set leadershipTransferInProgress back to false. + // It may be safe to modify things such that setLeadershipTransferInProgress + // is set to false before calling future.Respond, but that still needs + // to be tested and this situation mirrors what callers already had to deal with. go func() { + defer r.setLeadershipTransferInProgress(false) select { case <-time.After(r.config().ElectionTimeout): close(stopCh) @@ -626,8 +740,21 @@ func (r *Raft) leaderLoop() { case err := <-doneCh: if err != nil { r.logger.Debug(err.Error()) + future.respond(err) + } else { + // Wait for up to ElectionTimeout before flagging the + // leadership transfer as done and unblocking applies in + // the leaderLoop. + select { + case <-time.After(r.config().ElectionTimeout): + err := fmt.Errorf("leadership transfer timeout") + r.logger.Debug(err.Error()) + future.respond(err) + case <-leftLeaderLoop: + r.logger.Debug("lost leadership during transfer (expected)") + future.respond(nil) + } } - future.respond(err) } }() @@ -652,16 +779,17 @@ func (r *Raft) leaderLoop() { doneCh <- fmt.Errorf("cannot find replication state for %v", id) continue } - + r.setLeadershipTransferInProgress(true) go r.leadershipTransfer(*id, *address, state, stopCh, doneCh) case <-r.leaderState.commitCh: + r.mainThreadSaturation.working() // Process the newly committed entries oldCommitIndex := r.getCommitIndex() commitIndex := r.leaderState.commitment.getCommitIndex() r.setCommitIndex(commitIndex) - // New configration has been committed, set it as the committed + // New configuration has been committed, set it as the committed // value. if r.configurations.latestIndex > oldCommitIndex && r.configurations.latestIndex <= commitIndex { @@ -673,7 +801,7 @@ func (r *Raft) leaderLoop() { start := time.Now() var groupReady []*list.Element - var groupFutures = make(map[uint64]*logFuture) + groupFutures := make(map[uint64]*logFuture) var lastIdxInGroup uint64 // Pull all inflight logs that are committed off the queue. @@ -718,10 +846,10 @@ func (r *Raft) leaderLoop() { } case v := <-r.verifyCh: + r.mainThreadSaturation.working() if v.quorumSize == 0 { // Just dispatched, start the verification r.verifyLeader(v) - } else if v.votes < v.quorumSize { // Early return, means there must be a new leader r.logger.Warn("new leader elected, stepping down") @@ -742,6 +870,7 @@ func (r *Raft) leaderLoop() { } case future := <-r.userRestoreCh: + r.mainThreadSaturation.working() if r.getLeadershipTransferInProgress() { r.logger.Debug(ErrLeadershipTransferInProgress.Error()) future.respond(ErrLeadershipTransferInProgress) @@ -751,6 +880,7 @@ func (r *Raft) leaderLoop() { future.respond(err) case future := <-r.configurationsCh: + r.mainThreadSaturation.working() if r.getLeadershipTransferInProgress() { r.logger.Debug(ErrLeadershipTransferInProgress.Error()) future.respond(ErrLeadershipTransferInProgress) @@ -760,6 +890,7 @@ func (r *Raft) leaderLoop() { future.respond(nil) case future := <-r.configurationChangeChIfStable(): + r.mainThreadSaturation.working() if r.getLeadershipTransferInProgress() { r.logger.Debug(ErrLeadershipTransferInProgress.Error()) future.respond(ErrLeadershipTransferInProgress) @@ -768,9 +899,11 @@ func (r *Raft) leaderLoop() { r.appendConfigurationEntry(future) case b := <-r.bootstrapCh: + r.mainThreadSaturation.working() b.respond(ErrCantBootstrap) case newLog := <-r.applyCh: + r.mainThreadSaturation.working() if r.getLeadershipTransferInProgress() { r.logger.Debug(ErrLeadershipTransferInProgress.Error()) newLog.respond(ErrLeadershipTransferInProgress) @@ -799,6 +932,7 @@ func (r *Raft) leaderLoop() { } case <-lease: + r.mainThreadSaturation.working() // Check if we've exceeded the lease, potentially stepping down maxDiff := r.checkLeaderLease() @@ -812,6 +946,14 @@ func (r *Raft) leaderLoop() { // Renew the lease timer lease = time.After(checkInterval) + case <-r.leaderNotifyCh: + for _, repl := range r.leaderState.replState { + asyncNotifyCh(repl.notifyCh) + } + + case <-r.followerNotifyCh: + // Ignore since we are not a follower + case <-r.shutdownCh: return } @@ -846,7 +988,6 @@ func (r *Raft) verifyLeader(v *verifyFuture) { // leadershipTransfer is doing the heavy lifting for the leadership transfer. func (r *Raft) leadershipTransfer(id ServerID, address ServerAddress, repl *followerReplication, stopCh chan struct{}, doneCh chan error) { - // make sure we are not already stopped select { case <-stopCh: @@ -855,10 +996,6 @@ func (r *Raft) leadershipTransfer(id ServerID, address ServerAddress, repl *foll default: } - // Step 1: set this field which stops this leader from responding to any client requests. - r.setLeadershipTransferInProgress(true) - defer func() { r.setLeadershipTransferInProgress(false) }() - for atomic.LoadUint64(&repl.nextIndex) <= r.getLastIndex() { err := &deferError{} err.init() @@ -1049,7 +1186,14 @@ func (r *Raft) restoreUserSnapshot(meta *SnapshotMeta, reader io.Reader) error { r.setLastApplied(lastIndex) r.setLastSnapshot(lastIndex, term) - r.logger.Info("restored user snapshot", "index", latestIndex) + // Remove old logs if r.logs is a MonotonicLogStore. Log any errors and continue. + if logs, ok := r.logs.(MonotonicLogStore); ok && logs.IsMonotonic() { + if err := r.removeOldLogs(); err != nil { + r.logger.Error("failed to remove old logs", "error", err) + } + } + + r.logger.Info("restored user snapshot", "index", lastIndex) return nil } @@ -1254,6 +1398,8 @@ func (r *Raft) processRPC(rpc RPC) { r.appendEntries(rpc, cmd) case *RequestVoteRequest: r.requestVote(rpc, cmd) + case *RequestPreVoteRequest: + r.requestPreVote(rpc, cmd) case *InstallSnapshotRequest: r.installSnapshot(rpc, cmd) case *TimeoutNowRequest: @@ -1261,7 +1407,8 @@ func (r *Raft) processRPC(rpc RPC) { default: r.logger.Error("got unexpected command", "command", hclog.Fmt("%#v", rpc.Command)) - rpc.Respond(nil, fmt.Errorf("unexpected command")) + + rpc.Respond(nil, fmt.Errorf(rpcUnexpectedCommandError)) } } @@ -1312,7 +1459,7 @@ func (r *Raft) appendEntries(rpc RPC, a *AppendEntriesRequest) { // Increase the term if we see a newer one, also transition to follower // if we ever get an appendEntries call - if a.Term > r.getCurrentTerm() || r.getState() != Follower { + if a.Term > r.getCurrentTerm() || (r.getState() != Follower && !r.candidateFromLeadershipTransfer.Load()) { // Ensure transition to follower r.setState(Follower) r.setCurrentTerm(a.Term) @@ -1320,8 +1467,11 @@ func (r *Raft) appendEntries(rpc RPC, a *AppendEntriesRequest) { } // Save the current leader - r.setLeader(r.trans.DecodePeer(a.Leader)) - + if len(a.Addr) > 0 { + r.setLeader(r.trans.DecodePeer(a.Addr), ServerID(a.ID)) + } else { + r.setLeader(r.trans.DecodePeer(a.Leader), ServerID(a.ID)) + } // Verify the last log entry if a.PrevLogEntry > 0 { lastIdx, lastTerm := r.getLastEntry() @@ -1329,7 +1479,6 @@ func (r *Raft) appendEntries(rpc RPC, a *AppendEntriesRequest) { var prevLogTerm uint64 if a.PrevLogEntry == lastIdx { prevLogTerm = lastTerm - } else { var prevLog Log if err := r.logs.GetLog(a.PrevLogEntry, &prevLog); err != nil { @@ -1372,9 +1521,7 @@ func (r *Raft) appendEntries(rpc RPC, a *AppendEntriesRequest) { return } if entry.Term != storeEntry.Term { - r.logger.Warn("clearing log suffix", - "from", entry.Index, - "to", lastLogIdx) + r.logger.Warn("clearing log suffix", "from", entry.Index, "to", lastLogIdx) if err := r.logs.DeleteRange(entry.Index, lastLogIdx); err != nil { r.logger.Error("failed to clear log suffix", "error", err) return @@ -1430,7 +1577,6 @@ func (r *Raft) appendEntries(rpc RPC, a *AppendEntriesRequest) { // Everything went well, set success resp.Success = true r.setLastContact() - return } // processConfigurationLogEntry takes a log entry and updates the latest @@ -1453,7 +1599,7 @@ func (r *Raft) processConfigurationLogEntry(entry *Log) error { return nil } -// requestVote is invoked when we get an request vote RPC call. +// requestVote is invoked when we get a request vote RPC call. func (r *Raft) requestVote(rpc RPC, req *RequestVoteRequest) { defer metrics.MeasureSince([]string{"raft", "rpc", "requestVote"}, time.Now()) r.observe(*req) @@ -1479,11 +1625,33 @@ func (r *Raft) requestVote(rpc RPC, req *RequestVoteRequest) { // check the LeadershipTransfer flag is set. Usually votes are rejected if // there is a known leader. But if the leader initiated a leadership transfer, // vote! - candidate := r.trans.DecodePeer(req.Candidate) - if leader := r.Leader(); leader != "" && leader != candidate && !req.LeadershipTransfer { + var candidate ServerAddress + var candidateBytes []byte + if len(req.RPCHeader.Addr) > 0 { + candidate = r.trans.DecodePeer(req.RPCHeader.Addr) + candidateBytes = req.RPCHeader.Addr + } else { + candidate = r.trans.DecodePeer(req.Candidate) + candidateBytes = req.Candidate + } + + // For older raft version ID is not part of the packed message + // We assume that the peer is part of the configuration and skip this check + if len(req.ID) > 0 { + candidateID := ServerID(req.ID) + // if the Servers list is empty that mean the cluster is very likely trying to bootstrap, + // Grant the vote + if len(r.configurations.latest.Servers) > 0 && !inConfiguration(r.configurations.latest, candidateID) { + r.logger.Warn("rejecting vote request since node is not in configuration", + "from", candidate) + return + } + } + if leaderAddr, leaderID := r.LeaderWithID(); leaderAddr != "" && leaderAddr != candidate && !req.LeadershipTransfer { r.logger.Warn("rejecting vote request since we have a leader", "from", candidate, - "leader", leader) + "leader", leaderAddr, + "leader-id", string(leaderID)) return } @@ -1498,9 +1666,22 @@ func (r *Raft) requestVote(rpc RPC, req *RequestVoteRequest) { r.logger.Debug("lost leadership because received a requestVote with a newer term") r.setState(Follower) r.setCurrentTerm(req.Term) + resp.Term = req.Term } + // if we get a request for vote from a nonVoter and the request term is higher, + // step down and update term, but reject the vote request + // This could happen when a node, previously voter, is converted to non-voter + // The reason we need to step in is to permit to the cluster to make progress in such a scenario + // More details about that in https://github.com/hashicorp/raft/pull/526 + if len(req.ID) > 0 { + candidateID := ServerID(req.ID) + if len(r.configurations.latest.Servers) > 0 && !hasVote(r.configurations.latest, candidateID) { + r.logger.Warn("rejecting vote request since node is not a voter", "from", candidate) + return + } + } // Check if we have voted yet lastVoteTerm, err := r.stable.GetUint64(keyLastVoteTerm) if err != nil && err.Error() != "not found" { @@ -1516,7 +1697,7 @@ func (r *Raft) requestVote(rpc RPC, req *RequestVoteRequest) { // Check if we've voted in this election before if lastVoteTerm == req.Term && lastVoteCandBytes != nil { r.logger.Info("duplicate requestVote for same term", "term", req.Term) - if bytes.Compare(lastVoteCandBytes, req.Candidate) == 0 { + if bytes.Equal(lastVoteCandBytes, candidateBytes) { r.logger.Warn("duplicate requestVote from", "candidate", candidate) resp.Granted = true } @@ -1542,14 +1723,88 @@ func (r *Raft) requestVote(rpc RPC, req *RequestVoteRequest) { } // Persist a vote for safety - if err := r.persistVote(req.Term, req.Candidate); err != nil { + if err := r.persistVote(req.Term, candidateBytes); err != nil { r.logger.Error("failed to persist vote", "error", err) return } resp.Granted = true r.setLastContact() - return +} + +// requestPreVote is invoked when we get a request Pre-Vote RPC call. +func (r *Raft) requestPreVote(rpc RPC, req *RequestPreVoteRequest) { + defer metrics.MeasureSince([]string{"raft", "rpc", "requestVote"}, time.Now()) + r.observe(*req) + + // Setup a response + resp := &RequestPreVoteResponse{ + RPCHeader: r.getRPCHeader(), + Term: r.getCurrentTerm(), + Granted: false, + } + var rpcErr error + defer func() { + rpc.Respond(resp, rpcErr) + }() + + // Check if we have an existing leader [who's not the candidate] and also + candidate := r.trans.DecodePeer(req.GetRPCHeader().Addr) + candidateID := ServerID(req.ID) + + // if the Servers list is empty that mean the cluster is very likely trying to bootstrap, + // Grant the vote + if len(r.configurations.latest.Servers) > 0 && !inConfiguration(r.configurations.latest, candidateID) { + r.logger.Warn("rejecting pre-vote request since node is not in configuration", + "from", candidate) + return + } + + if leaderAddr, leaderID := r.LeaderWithID(); leaderAddr != "" && leaderAddr != candidate { + r.logger.Warn("rejecting pre-vote request since we have a leader", + "from", candidate, + "leader", leaderAddr, + "leader-id", string(leaderID)) + return + } + + // Ignore an older term + if req.Term < r.getCurrentTerm() { + return + } + + if req.Term > r.getCurrentTerm() { + // continue processing here to possibly grant the pre-vote as in a "real" vote this will transition us to follower + r.logger.Debug("received a requestPreVote with a newer term, grant the pre-vote") + resp.Term = req.Term + } + + // if we get a request for a pre-vote from a nonVoter and the request term is higher, do not grant the Pre-Vote + // This could happen when a node, previously voter, is converted to non-voter + if len(r.configurations.latest.Servers) > 0 && !hasVote(r.configurations.latest, candidateID) { + r.logger.Warn("rejecting pre-vote request since node is not a voter", "from", candidate) + return + } + + // Reject if their term is older + lastIdx, lastTerm := r.getLastEntry() + if lastTerm > req.LastLogTerm { + r.logger.Warn("rejecting pre-vote request since our last term is greater", + "candidate", candidate, + "last-term", lastTerm, + "last-candidate-term", req.LastLogTerm) + return + } + + if lastTerm == req.LastLogTerm && lastIdx > req.LastLogIndex { + r.logger.Warn("rejecting pre-vote request since our last index is greater", + "candidate", candidate, + "last-index", lastIdx, + "last-candidate-index", req.LastLogIndex) + return + } + + resp.Granted = true } // installSnapshot is invoked when we get a InstallSnapshot RPC call. @@ -1565,7 +1820,7 @@ func (r *Raft) installSnapshot(rpc RPC, req *InstallSnapshotRequest) { } var rpcErr error defer func() { - io.Copy(ioutil.Discard, rpc.Reader) // ensure we always consume all the snapshot data from the stream [see issue #212] + _, _ = io.Copy(io.Discard, rpc.Reader) // ensure we always consume all the snapshot data from the stream [see issue #212] rpc.Respond(resp, rpcErr) }() @@ -1593,7 +1848,11 @@ func (r *Raft) installSnapshot(rpc RPC, req *InstallSnapshotRequest) { } // Save the current leader - r.setLeader(r.trans.DecodePeer(req.Leader)) + if len(req.ID) > 0 { + r.setLeader(r.trans.DecodePeer(req.RPCHeader.Addr), ServerID(req.ID)) + } else { + r.setLeader(r.trans.DecodePeer(req.Leader), ServerID(req.ID)) + } // Create a new snapshot var reqConfiguration Configuration @@ -1618,8 +1877,14 @@ func (r *Raft) installSnapshot(rpc RPC, req *InstallSnapshotRequest) { return } + // Separately track the progress of streaming a snapshot over the network + // because this too can take a long time. + countingRPCReader := newCountingReader(rpc.Reader) + // Spill the remote snapshot to disk - n, err := io.Copy(sink, rpc.Reader) + transferMonitor := startSnapshotRestoreMonitor(r.logger, countingRPCReader, req.Size, true) + n, err := io.Copy(sink, countingRPCReader) + transferMonitor.StopAndWait() if err != nil { sink.Cancel() r.logger.Error("failed to copy snapshot", "error", err) @@ -1672,15 +1937,19 @@ func (r *Raft) installSnapshot(rpc RPC, req *InstallSnapshotRequest) { r.setLatestConfiguration(reqConfiguration, reqConfigurationIndex) r.setCommittedConfiguration(reqConfiguration, reqConfigurationIndex) - // Compact logs, continue even if this fails - if err := r.compactLogs(req.LastLogIndex); err != nil { + // Clear old logs if r.logs is a MonotonicLogStore. Otherwise compact the + // logs. In both cases, log any errors and continue. + if mlogs, ok := r.logs.(MonotonicLogStore); ok && mlogs.IsMonotonic() { + if err := r.removeOldLogs(); err != nil { + r.logger.Error("failed to reset logs", "error", err) + } + } else if err := r.compactLogs(req.LastLogIndex); err != nil { r.logger.Error("failed to compact logs", "error", err) } r.logger.Info("Installed remote snapshot") resp.Success = true r.setLastContact() - return } // setLastContact is used to set the last contact time to now @@ -1695,6 +1964,11 @@ type voteResult struct { voterID ServerID } +type preVoteResult struct { + RequestPreVoteResponse + voterID ServerID +} + // electSelf is used to send a RequestVote RPC to all peers, and vote for // ourself. This has the side affecting of incrementing the current term. The // response channel returned is used to wait for all the responses (including a @@ -1704,17 +1978,19 @@ func (r *Raft) electSelf() <-chan *voteResult { respCh := make(chan *voteResult, len(r.configurations.latest.Servers)) // Increment the term - r.setCurrentTerm(r.getCurrentTerm() + 1) + newTerm := r.getCurrentTerm() + 1 + r.setCurrentTerm(newTerm) // Construct the request lastIdx, lastTerm := r.getLastEntry() req := &RequestVoteRequest{ - RPCHeader: r.getRPCHeader(), - Term: r.getCurrentTerm(), + RPCHeader: r.getRPCHeader(), + Term: newTerm, + // this is needed for retro compatibility, before RPCHeader.Addr was added Candidate: r.trans.EncodePeer(r.localID, r.localAddr), LastLogIndex: lastIdx, LastLogTerm: lastTerm, - LeadershipTransfer: r.candidateFromLeadershipTransfer, + LeadershipTransfer: r.candidateFromLeadershipTransfer.Load(), } // Construct a function to ask for a vote @@ -1726,7 +2002,8 @@ func (r *Raft) electSelf() <-chan *voteResult { if err != nil { r.logger.Error("failed to make requestVote RPC", "target", peer, - "error", err) + "error", err, + "term", req.Term) resp.Term = req.Term resp.Granted = false } @@ -1738,10 +2015,13 @@ func (r *Raft) electSelf() <-chan *voteResult { for _, server := range r.configurations.latest.Servers { if server.Suffrage == Voter { if server.ID == r.localID { + r.logger.Debug("voting for self", "term", req.Term, "id", r.localID) + // Persist a vote for ourselves - if err := r.persistVote(req.Term, req.Candidate); err != nil { + if err := r.persistVote(req.Term, req.RPCHeader.Addr); err != nil { r.logger.Error("failed to persist vote", "error", err) return nil + } // Include our own vote respCh <- &voteResult{ @@ -1753,6 +2033,91 @@ func (r *Raft) electSelf() <-chan *voteResult { voterID: r.localID, } } else { + r.logger.Debug("asking for vote", "term", req.Term, "from", server.ID, "address", server.Address) + askPeer(server) + } + } + } + + return respCh +} + +// preElectSelf is used to send a RequestPreVote RPC to all peers, and vote for +// ourself. This will not increment the current term. The +// response channel returned is used to wait for all the responses (including a +// vote for ourself). +// This must only be called from the main thread. +func (r *Raft) preElectSelf() <-chan *preVoteResult { + + // At this point transport should support pre-vote + // but check just in case + prevoteTrans, prevoteTransSupported := r.trans.(WithPreVote) + if !prevoteTransSupported { + panic("preElection is not possible if the transport don't support pre-vote") + } + + // Create a response channel + respCh := make(chan *preVoteResult, len(r.configurations.latest.Servers)) + + // Propose the next term without actually changing our state + newTerm := r.getCurrentTerm() + 1 + + // Construct the request + lastIdx, lastTerm := r.getLastEntry() + req := &RequestPreVoteRequest{ + RPCHeader: r.getRPCHeader(), + Term: newTerm, + LastLogIndex: lastIdx, + LastLogTerm: lastTerm, + } + + // Construct a function to ask for a vote + askPeer := func(peer Server) { + r.goFunc(func() { + defer metrics.MeasureSince([]string{"raft", "candidate", "preElectSelf"}, time.Now()) + resp := &preVoteResult{voterID: peer.ID} + + err := prevoteTrans.RequestPreVote(peer.ID, peer.Address, req, &resp.RequestPreVoteResponse) + + // If the target server do not support Pre-vote RPC we count this as a granted vote to allow + // the cluster to progress. + if err != nil && strings.Contains(err.Error(), rpcUnexpectedCommandError) { + r.logger.Error("target does not support pre-vote RPC, treating as granted", + "target", peer, + "error", err, + "term", req.Term) + resp.Term = req.Term + resp.Granted = true + } else if err != nil { + r.logger.Error("failed to make requestVote RPC", + "target", peer, + "error", err, + "term", req.Term) + resp.Term = req.Term + resp.Granted = false + } + respCh <- resp + + }) + } + + // For each peer, request a vote + for _, server := range r.configurations.latest.Servers { + if server.Suffrage == Voter { + if server.ID == r.localID { + r.logger.Debug("pre-voting for self", "term", req.Term, "id", r.localID) + + // cast a pre-vote for our self + respCh <- &preVoteResult{ + RequestPreVoteResponse: RequestPreVoteResponse{ + RPCHeader: r.getRPCHeader(), + Term: req.Term, + Granted: true, + }, + voterID: r.localID, + } + } else { + r.logger.Debug("asking for pre-vote", "term", req.Term, "from", server.ID, "address", server.Address) askPeer(server) } } @@ -1785,7 +2150,7 @@ func (r *Raft) setCurrentTerm(t uint64) { // transition causes the known leader to be cleared. This means // that leader should be set only after updating the state. func (r *Raft) setState(state RaftState) { - r.setLeader("") + r.setLeader("", "") oldState := r.raftState.getState() r.raftState.setState(state) if oldState != state { @@ -1842,9 +2207,9 @@ func (r *Raft) initiateLeadershipTransfer(id *ServerID, address *ServerAddress) // timeoutNow is what happens when a server receives a TimeoutNowRequest. func (r *Raft) timeoutNow(rpc RPC, req *TimeoutNowRequest) { - r.setLeader("") + r.setLeader("", "") r.setState(Candidate) - r.candidateFromLeadershipTransfer = true + r.candidateFromLeadershipTransfer.Store(true) rpc.Respond(&TimeoutNowResponse{}, nil) } diff --git a/raft_test.go b/raft_test.go index bf4bfcbd4..2db115b68 100644 --- a/raft_test.go +++ b/raft_test.go @@ -1,9 +1,13 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( "bytes" + "encoding/json" + "errors" "fmt" - "io/ioutil" "os" "path/filepath" "reflect" @@ -14,6 +18,7 @@ import ( "time" "github.com/hashicorp/go-hclog" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -55,7 +60,6 @@ func TestRaft_AfterShutdown(t *testing.T) { if f := raft.Shutdown(); f.Error() != nil { t.Fatalf("shutdown should be idempotent") } - } func TestRaft_LiveBootstrap(t *testing.T) { @@ -98,6 +102,31 @@ func TestRaft_LiveBootstrap(t *testing.T) { } } +func TestRaft_LiveBootstrap_From_NonVoter(t *testing.T) { + // Make the cluster. + c := MakeClusterNoBootstrap(2, t, nil) + defer c.Close() + + // Build the configuration. + configuration := Configuration{} + for i, r := range c.rafts { + server := Server{ + ID: r.localID, + Address: r.localAddr, + } + if i == 0 { + server.Suffrage = Nonvoter + } + configuration.Servers = append(configuration.Servers, server) + } + + // Bootstrap one of the nodes live (the non-voter). + boot := c.rafts[0].BootstrapCluster(configuration) + if err := boot.Error(); err != ErrNotVoter { + t.Fatalf("bootstrap should have failed: %v", err) + } +} + func TestRaft_RecoverCluster_NoState(t *testing.T) { c := MakeClusterNoBootstrap(1, t, nil) defer c.Close() @@ -396,12 +425,9 @@ func TestRaft_LeaderFail(t *testing.T) { if len(fsm.logs) != 2 { t.Fatalf("did not apply both to FSM! %v", fsm.logs) } - if bytes.Compare(fsm.logs[0], []byte("test")) != 0 { - t.Fatalf("first entry should be 'test'") - } - if bytes.Compare(fsm.logs[1], []byte("apply")) != 0 { - t.Fatalf("second entry should be 'apply'") - } + + require.Equal(t, fsm.logs[0], []byte("test")) + require.Equal(t, fsm.logs[1], []byte("apply")) fsm.Unlock() } } @@ -658,7 +684,6 @@ func TestRaft_JoinNode_ConfigStore(t *testing.T) { t.Fatalf("unexpected number of servers in config change: %v", fsm.configurations[2].Servers) } } - } func TestRaft_RemoveFollower(t *testing.T) { @@ -988,6 +1013,201 @@ func TestRaft_SnapshotRestore(t *testing.T) { } } +func TestRaft_RestoreSnapshotOnStartup_Monotonic(t *testing.T) { + // Make the cluster + conf := inmemConfig(t) + conf.TrailingLogs = 10 + opts := &MakeClusterOpts{ + Peers: 1, + Bootstrap: true, + Conf: conf, + MonotonicLogs: true, + } + c := MakeClusterCustom(t, opts) + defer c.Close() + + leader := c.Leader() + + // Commit a lot of things + var future Future + for i := 0; i < 100; i++ { + future = leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0) + } + + // Wait for the last future to apply + if err := future.Error(); err != nil { + t.Fatalf("err: %v", err) + } + + // Take a snapshot + snapFuture := leader.Snapshot() + if err := snapFuture.Error(); err != nil { + t.Fatalf("err: %v", err) + } + + // Check for snapshot + snaps, _ := leader.snapshots.List() + if len(snaps) != 1 { + t.Fatalf("should have a snapshot") + } + snap := snaps[0] + + // Logs should be trimmed + firstIdx, err := leader.logs.FirstIndex() + if err != nil { + t.Fatalf("err: %v", err) + } + lastIdx, err := leader.logs.LastIndex() + if err != nil { + t.Fatalf("err: %v", err) + } + + if firstIdx != snap.Index-conf.TrailingLogs+1 { + t.Fatalf("should trim logs to %d: but is %d", snap.Index-conf.TrailingLogs+1, firstIdx) + } + + // Shutdown + shutdown := leader.Shutdown() + if err := shutdown.Error(); err != nil { + t.Fatalf("err: %v", err) + } + + // Restart the Raft + r := leader + // Can't just reuse the old transport as it will be closed + _, trans2 := NewInmemTransport(r.trans.LocalAddr()) + cfg := r.config() + r, err = NewRaft(&cfg, r.fsm, r.logs, r.stable, r.snapshots, trans2) + if err != nil { + t.Fatalf("err: %v", err) + } + c.rafts[0] = r + + // We should have restored from the snapshot! + if last := r.getLastApplied(); last != snap.Index { + t.Fatalf("bad last index: %d, expecting %d", last, snap.Index) + } + + // Verify that logs have not been reset + first, _ := r.logs.FirstIndex() + last, _ := r.logs.LastIndex() + assert.Equal(t, firstIdx, first) + assert.Equal(t, lastIdx, last) +} + +func TestRaft_SnapshotRestore_Progress(t *testing.T) { + // Make the cluster + conf := inmemConfig(t) + conf.TrailingLogs = 10 + c := MakeCluster(1, t, conf) + defer c.Close() + + // Commit a lot of things + leader := c.Leader() + var future Future + for i := 0; i < 100; i++ { + future = leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0) + } + + // Wait for the last future to apply + if err := future.Error(); err != nil { + t.Fatalf("err: %v", err) + } + + // Take a snapshot + snapFuture := leader.Snapshot() + if err := snapFuture.Error(); err != nil { + t.Fatalf("err: %v", err) + } + + // Check for snapshot + snaps, _ := leader.snapshots.List() + if len(snaps) != 1 { + t.Fatalf("should have a snapshot") + } + snap := snaps[0] + + // Logs should be trimmed + if idx, _ := leader.logs.FirstIndex(); idx != snap.Index-conf.TrailingLogs+1 { + t.Fatalf("should trim logs to %d: but is %d", snap.Index-conf.TrailingLogs+1, idx) + } + + // Shutdown + shutdown := leader.Shutdown() + if err := shutdown.Error(); err != nil { + t.Fatalf("err: %v", err) + } + + // Restart the Raft + r := leader + // Can't just reuse the old transport as it will be closed + _, trans2 := NewInmemTransport(r.trans.LocalAddr()) + cfg := r.config() + + // Intercept logs and look for specific log messages. + var logbuf lockedBytesBuffer + cfg.Logger = hclog.New(&hclog.LoggerOptions{ + Name: "test", + JSONFormat: true, + Level: hclog.Info, + Output: &logbuf, + }) + r, err := NewRaft(&cfg, r.fsm, r.logs, r.stable, r.snapshots, trans2) + if err != nil { + t.Fatalf("err: %v", err) + } + c.rafts[0] = r + + // We should have restored from the snapshot! + if last := r.getLastApplied(); last != snap.Index { + t.Fatalf("bad last index: %d, expecting %d", last, snap.Index) + } + + { + dec := json.NewDecoder(strings.NewReader(logbuf.String())) + + found := false + + type partialRecord struct { + Message string `json:"@message"` + PercentComplete string `json:"percent-complete"` + } + + for !found { + var record partialRecord + if err := dec.Decode(&record); err != nil { + t.Fatalf("error while decoding json logs: %v", err) + } + + if record.Message == "snapshot restore progress" && record.PercentComplete == "100.00%" { + found = true + break + } + + } + if !found { + t.Fatalf("could not find a log line indicating that snapshot restore progress was being logged") + } + } +} + +type lockedBytesBuffer struct { + mu sync.Mutex + buf bytes.Buffer +} + +func (b *lockedBytesBuffer) Write(p []byte) (n int, err error) { + b.mu.Lock() + defer b.mu.Unlock() + return b.buf.Write(p) +} + +func (b *lockedBytesBuffer) String() string { + b.mu.Lock() + defer b.mu.Unlock() + return b.buf.String() +} + // TODO: Need a test that has a previous format Snapshot and check that it can // be read/installed on the new code. @@ -1095,13 +1315,13 @@ func TestRaft_SnapshotRestore_PeerChange(t *testing.T) { content := []byte(fmt.Sprintf("[%s]", strings.Join(peers, ","))) // Perform a manual recovery on the cluster. - base, err := ioutil.TempDir("", "") + base, err := os.MkdirTemp("", "") if err != nil { t.Fatalf("err: %v", err) } defer os.RemoveAll(base) peersFile := filepath.Join(base, "peers.json") - if err = ioutil.WriteFile(peersFile, content, 0666); err != nil { + if err = os.WriteFile(peersFile, content, 0o666); err != nil { t.Fatalf("[ERR] err: %v", err) } configuration, err := ReadPeersJSON(peersFile) @@ -1211,7 +1431,9 @@ func TestRaft_UserSnapshot(t *testing.T) { // snapshotAndRestore does a snapshot and restore sequence and applies the given // offset to the snapshot index, so we can try out different situations. -func snapshotAndRestore(t *testing.T, offset uint64) { +func snapshotAndRestore(t *testing.T, offset uint64, monotonicLogStore bool, restoreNewCluster bool) { + t.Helper() + // Make the cluster. conf := inmemConfig(t) @@ -1221,7 +1443,19 @@ func snapshotAndRestore(t *testing.T, offset uint64) { conf.ElectionTimeout = 500 * time.Millisecond conf.LeaderLeaseTimeout = 500 * time.Millisecond - c := MakeCluster(3, t, conf) + var c *cluster + numPeers := 3 + optsMonotonic := &MakeClusterOpts{ + Peers: numPeers, + Bootstrap: true, + Conf: conf, + MonotonicLogs: true, + } + if monotonicLogStore { + c = MakeClusterCustom(t, optsMonotonic) + } else { + c = MakeCluster(numPeers, t, conf) + } defer c.Close() // Wait for things to get stable and commit some things. @@ -1251,6 +1485,17 @@ func snapshotAndRestore(t *testing.T, offset uint64) { // Get the last index before the restore. preIndex := leader.getLastIndex() + if restoreNewCluster { + var c2 *cluster + if monotonicLogStore { + c2 = MakeClusterCustom(t, optsMonotonic) + } else { + c2 = MakeCluster(numPeers, t, conf) + } + c = c2 + leader = c.Leader() + } + // Restore the snapshot, twiddling the index with the offset. meta, reader, err := snap.Open() meta.Index += offset @@ -1266,17 +1511,40 @@ func snapshotAndRestore(t *testing.T, offset uint64) { // an index to create a hole, and then we apply a no-op after the // restore. var expected uint64 - if meta.Index < preIndex { + if !restoreNewCluster && meta.Index < preIndex { expected = preIndex + 2 } else { + // restoring onto a new cluster should always have a last index based + // off of the snaphsot meta index expected = meta.Index + 2 } + lastIndex := leader.getLastIndex() if lastIndex != expected { t.Fatalf("Index was not updated correctly: %d vs. %d", lastIndex, expected) } - // Ensure all the logs are the same and that we have everything that was + // Ensure raft logs are removed for monotonic log stores but remain + // untouched for non-monotic (BoltDB) logstores. + // When first index = 1, then logs have remained untouched. + // When first indext is set to the next commit index / last index, then + // it means logs have been removed. + raftNodes := make([]*Raft, 0, numPeers+1) + raftNodes = append(raftNodes, leader) + raftNodes = append(raftNodes, c.Followers()...) + for _, raftNode := range raftNodes { + firstLogIndex, err := raftNode.logs.FirstIndex() + require.NoError(t, err) + lastLogIndex, err := raftNode.logs.LastIndex() + require.NoError(t, err) + if monotonicLogStore { + require.Equal(t, expected, firstLogIndex) + } else { + require.Equal(t, uint64(1), firstLogIndex) + } + require.Equal(t, expected, lastLogIndex) + } + // Ensure all the fsm logs are the same and that we have everything that was // part of the original snapshot, and that the contents after were // reverted. c.EnsureSame(t) @@ -1287,9 +1555,7 @@ func snapshotAndRestore(t *testing.T, offset uint64) { } for i, entry := range fsm.logs { expected := []byte(fmt.Sprintf("test %d", i)) - if bytes.Compare(entry, expected) != 0 { - t.Fatalf("Log entry bad: %v", entry) - } + require.Equal(t, entry, expected) } fsm.Unlock() @@ -1315,10 +1581,17 @@ func TestRaft_UserRestore(t *testing.T) { 10000, } + restoreToNewClusterCases := []bool{false, true} + for _, c := range cases { - t.Run(fmt.Sprintf("case %v", c), func(t *testing.T) { - snapshotAndRestore(t, c) - }) + for _, restoreNewCluster := range restoreToNewClusterCases { + t.Run(fmt.Sprintf("case %v | restored to new cluster: %t", c, restoreNewCluster), func(t *testing.T) { + snapshotAndRestore(t, c, false, restoreNewCluster) + }) + t.Run(fmt.Sprintf("monotonic case %v | restored to new cluster: %t", c, restoreNewCluster), func(t *testing.T) { + snapshotAndRestore(t, c, true, restoreNewCluster) + }) + } } } @@ -1554,10 +1827,10 @@ LOOP: } // Ensure both have cleared their leader - if l := leader.Leader(); l != "" { + if l, id := leader.LeaderWithID(); l != "" && id != "" { t.Fatalf("bad: %v", l) } - if l := follower.Leader(); l != "" { + if l, id := follower.LeaderWithID(); l != "" && id != "" { t.Fatalf("bad: %v", l) } } @@ -1659,7 +1932,7 @@ func TestRaft_VerifyLeader_Fail(t *testing.T) { } // Ensure the known leader is cleared - if l := leader.Leader(); l != "" { + if l, _ := leader.LeaderWithID(); l != "" { t.Fatalf("bad: %v", l) } } @@ -1729,18 +2002,172 @@ func TestRaft_NotifyCh(t *testing.T) { } } -func TestRaft_Voting(t *testing.T) { +func TestRaft_AppendEntry(t *testing.T) { c := MakeCluster(3, t, nil) defer c.Close() followers := c.Followers() ldr := c.Leader() ldrT := c.trans[c.IndexOf(ldr)] + reqAppendEntries := AppendEntriesRequest{ + RPCHeader: ldr.getRPCHeader(), + Term: ldr.getCurrentTerm() + 1, + PrevLogEntry: 0, + PrevLogTerm: ldr.getCurrentTerm(), + Leader: nil, + Entries: []*Log{ + { + Index: 1, + Term: ldr.getCurrentTerm() + 1, + Type: LogCommand, + Data: []byte("log 1"), + }, + }, + LeaderCommitIndex: 90, + } + // a follower that thinks there's a leader should vote for that leader. + var resp AppendEntriesResponse + if err := ldrT.AppendEntries(followers[0].localID, followers[0].localAddr, &reqAppendEntries, &resp); err != nil { + t.Fatalf("RequestVote RPC failed %v", err) + } + + require.True(t, resp.Success) + + headers := ldr.getRPCHeader() + headers.ID = nil + headers.Addr = nil + reqAppendEntries = AppendEntriesRequest{ + RPCHeader: headers, + Term: ldr.getCurrentTerm() + 1, + PrevLogEntry: 0, + PrevLogTerm: ldr.getCurrentTerm(), + Leader: ldr.trans.EncodePeer(ldr.config().LocalID, ldr.localAddr), + Entries: []*Log{ + { + Index: 1, + Term: ldr.getCurrentTerm() + 1, + Type: LogCommand, + Data: []byte("log 1"), + }, + }, + LeaderCommitIndex: 90, + } + // a follower that thinks there's a leader should vote for that leader. + var resp2 AppendEntriesResponse + if err := ldrT.AppendEntries(followers[0].localID, followers[0].localAddr, &reqAppendEntries, &resp2); err != nil { + t.Fatalf("RequestVote RPC failed %v", err) + } + + require.True(t, resp2.Success) +} + +// TestRaft_PreVoteMixedCluster focus on testing a cluster with +// a mix of nodes that have pre-vote activated and deactivated. +// Once the cluster is created, we force an election by partioning the leader +// and verify that the cluster regain stability. +func TestRaft_PreVoteMixedCluster(t *testing.T) { + + tcs := []struct { + name string + prevoteNum int + noprevoteNum int + }{ + {"majority no pre-vote", 2, 3}, + {"majority pre-vote", 3, 2}, + {"majority no pre-vote", 1, 2}, + {"majority pre-vote", 2, 1}, + {"all pre-vote", 3, 0}, + {"all no pre-vote", 0, 3}, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + + // Make majority cluster. + majority := tc.prevoteNum + minority := tc.noprevoteNum + if tc.prevoteNum < tc.noprevoteNum { + majority = tc.noprevoteNum + minority = tc.prevoteNum + } + + conf := inmemConfig(t) + conf.PreVoteDisabled = tc.prevoteNum <= tc.noprevoteNum + c := MakeCluster(majority, t, conf) + defer c.Close() + + // Set up another server speaking protocol version 2. + conf = inmemConfig(t) + conf.PreVoteDisabled = tc.prevoteNum >= tc.noprevoteNum + c1 := MakeClusterNoBootstrap(minority, t, conf) + + // Merge clusters. + c.Merge(c1) + c.FullyConnect() + + for _, r := range c1.rafts { + future := c.Leader().AddVoter(r.localID, r.localAddr, 0, 0) + if err := future.Error(); err != nil { + t.Fatalf("err: %v", err) + } + } + time.Sleep(c.propagateTimeout * 10) + + leaderOld := c.Leader() + c.Followers() + c.Partition([]ServerAddress{leaderOld.localAddr}) + time.Sleep(c.propagateTimeout * 3) + leader := c.Leader() + require.NotEqual(t, leader.leaderID, leaderOld.leaderID) + }) + } + +} + +func TestRaft_PreVoteAvoidElectionWithPartition(t *testing.T) { + // Make a prevote cluster. + conf := inmemConfig(t) + conf.PreVoteDisabled = false + c := MakeCluster(5, t, conf) + defer c.Close() + + oldLeaderTerm := c.Leader().getCurrentTerm() + followers := c.Followers() + require.Len(t, followers, 4) + + //Partition a node and wait enough for it to increase its term + c.Partition([]ServerAddress{followers[0].localAddr}) + time.Sleep(10 * c.propagateTimeout) + + // Check the leader is stable and the followers are as expected + leaderTerm := c.Leader().getCurrentTerm() + require.Equal(t, leaderTerm, oldLeaderTerm) + require.Len(t, c.WaitForFollowers(3), 3) + + // reconnect the partitioned node + c.FullyConnect() + time.Sleep(3 * c.propagateTimeout) + + // Check that the number of followers increase and the term is not increased + require.Len(t, c.Followers(), 4) + leaderTerm = c.Leader().getCurrentTerm() + require.Equal(t, leaderTerm, oldLeaderTerm) + +} + +func TestRaft_VotingGrant_WhenLeaderAvailable(t *testing.T) { + conf := inmemConfig(t) + conf.ProtocolVersion = 3 + c := MakeCluster(3, t, conf) + defer c.Close() + followers := c.Followers() + ldr := c.Leader() + ldrT := c.trans[c.IndexOf(ldr)] + reqVote := RequestVoteRequest{ RPCHeader: ldr.getRPCHeader(), Term: ldr.getCurrentTerm() + 10, - Candidate: ldrT.EncodePeer(ldr.localID, ldr.localAddr), LastLogIndex: ldr.LastIndex(), + Candidate: ldrT.EncodePeer(ldr.localID, ldr.localAddr), LastLogTerm: ldr.getCurrentTerm(), LeadershipTransfer: false, } @@ -1753,6 +2180,7 @@ func TestRaft_Voting(t *testing.T) { t.Fatalf("expected vote to be granted, but wasn't %+v", resp) } // a follower that thinks there's a leader shouldn't vote for a different candidate + reqVote.Addr = ldrT.EncodePeer(followers[0].localID, followers[0].localAddr) reqVote.Candidate = ldrT.EncodePeer(followers[0].localID, followers[0].localAddr) if err := ldrT.RequestVote(followers[1].localID, followers[1].localAddr, &reqVote, &resp); err != nil { t.Fatalf("RequestVote RPC failed %v", err) @@ -1763,6 +2191,7 @@ func TestRaft_Voting(t *testing.T) { // a follower that thinks there's a leader, but the request has the leadership transfer flag, should // vote for a different candidate reqVote.LeadershipTransfer = true + reqVote.Addr = ldrT.EncodePeer(followers[0].localID, followers[0].localAddr) reqVote.Candidate = ldrT.EncodePeer(followers[0].localID, followers[0].localAddr) if err := ldrT.RequestVote(followers[1].localID, followers[1].localAddr, &reqVote, &resp); err != nil { t.Fatalf("RequestVote RPC failed %v", err) @@ -1782,9 +2211,9 @@ func TestRaft_ProtocolVersion_RejectRPC(t *testing.T) { reqVote := RequestVoteRequest{ RPCHeader: RPCHeader{ ProtocolVersion: ProtocolVersionMax + 1, + Addr: ldrT.EncodePeer(ldr.localID, ldr.localAddr), }, Term: ldr.getCurrentTerm() + 10, - Candidate: ldrT.EncodePeer(ldr.localID, ldr.localAddr), LastLogIndex: ldr.LastIndex(), LastLogTerm: ldr.getCurrentTerm(), } @@ -1881,6 +2310,35 @@ func TestRaft_ProtocolVersion_Upgrade_2_3(t *testing.T) { } } +func TestRaft_LeaderID_Propagated(t *testing.T) { + // Make a cluster on protocol version 3. + conf := inmemConfig(t) + c := MakeCluster(3, t, conf) + defer c.Close() + err := waitForLeader(c) + require.NoError(t, err) + + for _, n := range c.rafts { + require.Equal(t, ProtocolVersion(3), n.protocolVersion) + addr, id := n.LeaderWithID() + require.NotEmpty(t, id) + require.NotEmpty(t, addr) + } + for i := 0; i < 5; i++ { + future := c.Leader().Apply([]byte(fmt.Sprintf("test%d", i)), 0) + if err := future.Error(); err != nil { + t.Fatalf("[ERR] err: %v", err) + } + } + // Wait a while + time.Sleep(c.propagateTimeout) + + // Sanity check the cluster. + c.EnsureSame(t) + c.EnsureSamePeers(t) + c.EnsureLeader(t, c.Leader().localAddr) +} + func TestRaft_LeadershipTransferInProgress(t *testing.T) { r := &Raft{leaderState: leaderState{}} r.setupLeaderState() @@ -1972,17 +2430,71 @@ func TestRaft_LeadershipTransferWithOneNode(t *testing.T) { } } +func TestRaft_LeadershipTransferWithWrites(t *testing.T) { + conf := inmemConfig(t) + conf.Logger = hclog.New(&hclog.LoggerOptions{Level: hclog.Trace}) + c := MakeCluster(7, t, conf) + defer c.Close() + + doneCh := make(chan struct{}) + var writerErr error + var wg sync.WaitGroup + var writes int + wg.Add(1) + leader := c.Leader() + go func() { + defer wg.Done() + for { + select { + case <-doneCh: + return + default: + future := leader.Apply([]byte("test"), 0) + switch err := future.Error(); { + case errors.Is(err, ErrRaftShutdown): + return + case errors.Is(err, ErrNotLeader): + leader = c.Leader() + case errors.Is(err, ErrLeadershipTransferInProgress): + continue + case errors.Is(err, ErrLeadershipLost): + continue + case err == nil: + writes++ + default: + writerErr = err + } + time.Sleep(time.Millisecond) + } + } + }() + + follower := c.Followers()[0] + future := c.Leader().LeadershipTransferToServer(follower.localID, follower.localAddr) + if future.Error() != nil { + t.Fatalf("Didn't expect error: %v", future.Error()) + } + if follower.localID != c.Leader().localID { + t.Error("Leadership should have been transitioned to specified server.") + } + close(doneCh) + wg.Wait() + if writerErr != nil { + t.Fatal(writerErr) + } + t.Logf("writes: %d", writes) +} + func TestRaft_LeadershipTransferWithSevenNodes(t *testing.T) { c := MakeCluster(7, t, nil) defer c.Close() - oldLeader := c.Leader().localID follower := c.GetInState(Follower)[0] future := c.Leader().LeadershipTransferToServer(follower.localID, follower.localAddr) if future.Error() != nil { t.Fatalf("Didn't expect error: %v", future.Error()) } - if oldLeader == c.Leader().localID { + if follower.localID != c.Leader().localID { t.Error("Leadership should have been transitioned to specified server.") } } @@ -2145,7 +2657,7 @@ func TestRaft_LeadershipTransferIgnoresNonvoters(t *testing.T) { } func TestRaft_LeadershipTransferStopRightAway(t *testing.T) { - r := Raft{leaderState: leaderState{}} + r := Raft{leaderState: leaderState{}, logger: hclog.New(nil)} r.setupLeaderState() stopCh := make(chan struct{}) @@ -2157,6 +2669,7 @@ func TestRaft_LeadershipTransferStopRightAway(t *testing.T) { t.Errorf("leadership shouldn't have started, but instead it error with: %v", err) } } + func TestRaft_GetConfigurationNoBootstrap(t *testing.T) { c := MakeCluster(2, t, nil) defer c.Close() @@ -2194,6 +2707,41 @@ func TestRaft_GetConfigurationNoBootstrap(t *testing.T) { } } +func TestRaft_LogStoreIsMonotonic(t *testing.T) { + c := MakeCluster(1, t, nil) + defer c.Close() + + // Should be one leader + leader := c.Leader() + c.EnsureLeader(t, leader.localAddr) + + // Test the monotonic type assertion on the InmemStore. + _, ok := leader.logs.(MonotonicLogStore) + assert.False(t, ok) + + var log LogStore + + // Wrapping the non-monotonic store as a LogCache should make it pass the + // type assertion, but the underlying store is still non-monotonic. + log, _ = NewLogCache(100, leader.logs) + mcast, ok := log.(MonotonicLogStore) + require.True(t, ok) + assert.False(t, mcast.IsMonotonic()) + + // Now create a new MockMonotonicLogStore using the leader logs and expect + // it to work. + log = &MockMonotonicLogStore{s: leader.logs} + mcast, ok = log.(MonotonicLogStore) + require.True(t, ok) + assert.True(t, mcast.IsMonotonic()) + + // Wrap the mock logstore in a LogCache and check again. + log, _ = NewLogCache(100, log) + mcast, ok = log.(MonotonicLogStore) + require.True(t, ok) + assert.True(t, mcast.IsMonotonic()) +} + func TestRaft_CacheLogWithStoreError(t *testing.T) { c := MakeCluster(2, t, nil) defer c.Close() @@ -2246,6 +2794,7 @@ func TestRaft_CacheLogWithStoreError(t *testing.T) { func TestRaft_ReloadConfig(t *testing.T) { conf := inmemConfig(t) + conf.LeaderLeaseTimeout = 40 * time.Millisecond c := MakeCluster(1, t, conf) defer c.Close() raft := c.rafts[0] @@ -2260,6 +2809,8 @@ func TestRaft_ReloadConfig(t *testing.T) { TrailingLogs: 12345, SnapshotInterval: 234 * time.Second, SnapshotThreshold: 6789, + HeartbeatTimeout: 45 * time.Millisecond, + ElectionTimeout: 46 * time.Millisecond, } require.NoError(t, raft.ReloadConfig(newCfg)) @@ -2268,6 +2819,8 @@ func TestRaft_ReloadConfig(t *testing.T) { require.Equal(t, newCfg.TrailingLogs, raft.config().TrailingLogs) require.Equal(t, newCfg.SnapshotInterval, raft.config().SnapshotInterval) require.Equal(t, newCfg.SnapshotThreshold, raft.config().SnapshotThreshold) + require.Equal(t, newCfg.HeartbeatTimeout, raft.config().HeartbeatTimeout) + require.Equal(t, newCfg.ElectionTimeout, raft.config().ElectionTimeout) } func TestRaft_ReloadConfigValidates(t *testing.T) { @@ -2345,3 +2898,407 @@ func TestRaft_InstallSnapshot_InvalidPeers(t *testing.T) { require.Error(t, resp.Error) require.Contains(t, resp.Error.Error(), "failed to decode peers") } + +func TestRaft_VoteNotGranted_WhenNodeNotInCluster(t *testing.T) { + // Make a cluster + c := MakeCluster(3, t, nil) + + defer c.Close() + + // Get the leader + leader := c.Leader() + + // Wait until we have 2 followers + limit := time.Now().Add(c.longstopTimeout) + var followers []*Raft + for time.Now().Before(limit) && len(followers) != 2 { + c.WaitEvent(nil, c.conf.CommitTimeout) + followers = c.GetInState(Follower) + } + if len(followers) != 2 { + t.Fatalf("expected two followers: %v", followers) + } + + // Remove a follower + followerRemoved := followers[0] + future := leader.RemoveServer(followerRemoved.localID, 0, 0) + if err := future.Error(); err != nil { + t.Fatalf("err: %v", err) + } + + // Wait a while + time.Sleep(c.propagateTimeout) + + // Other nodes should have fewer peers + if configuration := c.getConfiguration(leader); len(configuration.Servers) != 2 { + t.Fatalf("too many peers") + } + if configuration := c.getConfiguration(followers[1]); len(configuration.Servers) != 2 { + t.Fatalf("too many peers") + } + waitForState(followerRemoved, Follower) + // The removed node should be still in Follower state + require.Equal(t, Follower, followerRemoved.getState()) + + // Prepare a Vote request from the removed follower + follower := followers[1] + followerRemovedT := c.trans[c.IndexOf(followerRemoved)] + reqVote := RequestVoteRequest{ + RPCHeader: followerRemoved.getRPCHeader(), + Term: followerRemoved.getCurrentTerm() + 10, + LastLogIndex: followerRemoved.LastIndex(), + LastLogTerm: followerRemoved.getCurrentTerm(), + LeadershipTransfer: false, + } + // a follower that thinks there's a leader should vote for that leader. + var resp RequestVoteResponse + + // partiton the leader to simulate an unstable cluster + c.Partition([]ServerAddress{leader.localAddr}) + time.Sleep(c.propagateTimeout) + + // wait for the remaining follower to trigger an election + waitForState(follower, Candidate) + + // send a vote request from the removed follower to the Candidate follower + if err := followerRemovedT.RequestVote(follower.localID, follower.localAddr, &reqVote, &resp); err != nil { + t.Fatalf("RequestVote RPC failed %v", err) + } + + // the vote request should not be granted, because the voter is not part of the cluster anymore + if resp.Granted { + t.Fatalf("expected vote to not be granted, but it was %+v", resp) + } +} + +func TestRaft_ClusterCanRegainStability_WhenNonVoterWithHigherTermJoin(t *testing.T) { + // Make a cluster + c := MakeCluster(3, t, nil) + + defer c.Close() + + // Get the leader + leader := c.Leader() + + // Wait until we have 2 followers + limit := time.Now().Add(c.longstopTimeout) + var followers []*Raft + for time.Now().Before(limit) && len(followers) != 2 { + c.WaitEvent(nil, c.conf.CommitTimeout) + followers = c.GetInState(Follower) + } + if len(followers) != 2 { + t.Fatalf("expected two followers: %v", followers) + } + + // Remove a follower + followerRemoved := followers[0] + c.Disconnect(followerRemoved.localAddr) + time.Sleep(c.propagateTimeout) + + future := leader.RemoveServer(followerRemoved.localID, 0, 0) + if err := future.Error(); err != nil { + t.Fatalf("err: %v", err) + } + + // set that follower term to higher term to faster simulate a partitioning + newTerm := leader.getCurrentTerm() + 20 + followerRemoved.setCurrentTerm(newTerm) + // Add the node back as NonVoter + future = leader.AddNonvoter(followerRemoved.localID, followerRemoved.localAddr, 0, 0) + if err := future.Error(); err != nil { + t.Fatalf("err: %v", err) + } + + c.FullyConnect() + + // Wait a while + time.Sleep(c.propagateTimeout) + // Check the term is now a new term + leader = c.Leader() + currentTerm := leader.getCurrentTerm() + if newTerm > currentTerm { + t.Fatalf("term should have changed,%d < %d", newTerm, currentTerm) + } + + // check nonVoter is not elected + if leader.localID == followerRemoved.localID { + t.Fatalf("Should not be leader %s", followerRemoved.localID) + } + + // Write some logs to ensure they replicate + for i := 0; i < 100; i++ { + future := leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0) + if err := future.Error(); err != nil { + t.Fatalf("[ERR] apply err: %v", err) + } + } + c.WaitForReplication(100) + + // Remove the server and add it back as Voter + future = leader.RemoveServer(followerRemoved.localID, 0, 0) + if err := future.Error(); err != nil { + t.Fatalf("err: %v", err) + } + leader.AddVoter(followerRemoved.localID, followerRemoved.localAddr, 0, 0) + + // Wait a while + time.Sleep(c.propagateTimeout * 10) + + // Write some logs to ensure they replicate + for i := 100; i < 200; i++ { + future := leader.Apply([]byte(fmt.Sprintf("test%d", i)), 0) + if err := future.Error(); err != nil { + t.Fatalf("[ERR] apply err: %v", err) + } + } + c.WaitForReplication(200) + + // Check leader stable + newLeader := c.Leader() + if newLeader.leaderID != leader.leaderID { + t.Fatalf("leader changed") + } +} + +// TestRaft_FollowerRemovalNoElection ensures that a leader election is not +// started when a standby is shut down and restarted. +func TestRaft_FollowerRemovalNoElection(t *testing.T) { + // Make a cluster + inmemConf := inmemConfig(t) + inmemConf.HeartbeatTimeout = 100 * time.Millisecond + inmemConf.ElectionTimeout = 100 * time.Millisecond + c := MakeCluster(3, t, inmemConf) + + defer c.Close() + err := waitForLeader(c) + require.NoError(t, err) + leader := c.Leader() + + // Wait until we have 2 followers + limit := time.Now().Add(c.longstopTimeout) + var followers []*Raft + for time.Now().Before(limit) && len(followers) != 2 { + c.WaitEvent(nil, c.conf.CommitTimeout) + followers = c.GetInState(Follower) + } + if len(followers) != 2 { + t.Fatalf("expected two followers: %v", followers) + } + + // Disconnect one of the followers and wait for the heartbeat timeout + i := 0 + follower := c.rafts[i] + if follower == c.Leader() { + i = 1 + follower = c.rafts[i] + } + logs := follower.logs + t.Logf("[INFO] restarting %v", follower) + // Shutdown follower + if f := follower.Shutdown(); f.Error() != nil { + t.Fatalf("error shuting down follower: %v", f.Error()) + } + + _, trans := NewInmemTransport(follower.localAddr) + conf := follower.config() + n, err := NewRaft(&conf, &MockFSM{}, logs, follower.stable, follower.snapshots, trans) + if err != nil { + t.Fatalf("error restarting follower: %v", err) + } + c.rafts[i] = n + c.trans[i] = n.trans.(*InmemTransport) + c.fsms[i] = n.fsm.(*MockFSM) + c.FullyConnect() + // There should be no re-election during this sleep + time.Sleep(250 * time.Millisecond) + + // Let things settle and make sure we recovered. + c.EnsureLeader(t, leader.localAddr) + c.EnsureSame(t) + c.EnsureSamePeers(t) + n.Shutdown() +} + +func waitForState(follower *Raft, state RaftState) { + count := 0 + for follower.getState() != state && count < 1000 { + count++ + time.Sleep(1 * time.Millisecond) + } +} + +func waitForLeader(c *cluster) error { + count := 0 + for count < 100 { + r := c.GetInState(Leader) + if len(r) >= 1 { + return nil + } + count++ + time.Sleep(50 * time.Millisecond) + } + return errors.New("no leader elected") +} + +func TestRaft_runFollower_State_Transition(t *testing.T) { + type fields struct { + conf *Config + servers []Server + serverID ServerID + } + tests := []struct { + name string + fields fields + expectedState RaftState + }{ + {"NonVoter", fields{conf: DefaultConfig(), servers: []Server{{Nonvoter, "first", ""}}, serverID: "first"}, Follower}, + {"Voter", fields{conf: DefaultConfig(), servers: []Server{{Voter, "first", ""}}, serverID: "first"}, Candidate}, + {"Not in Config", fields{conf: DefaultConfig(), servers: []Server{{Voter, "second", ""}}, serverID: "first"}, Follower}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // set timeout to tests specific + tt.fields.conf.LocalID = tt.fields.serverID + tt.fields.conf.HeartbeatTimeout = 50 * time.Millisecond + tt.fields.conf.ElectionTimeout = 50 * time.Millisecond + tt.fields.conf.LeaderLeaseTimeout = 50 * time.Millisecond + tt.fields.conf.CommitTimeout = 5 * time.Millisecond + tt.fields.conf.SnapshotThreshold = 100 + tt.fields.conf.TrailingLogs = 10 + tt.fields.conf.skipStartup = true + + // Create a raft instance and set the latest configuration + env1 := MakeRaft(t, tt.fields.conf, false) + env1.raft.setLatestConfiguration(Configuration{Servers: tt.fields.servers}, 1) + env1.raft.setState(Follower) + + // run the follower loop exclusively + go env1.raft.runFollower() + + // wait enough time to have HeartbeatTimeout + time.Sleep(tt.fields.conf.HeartbeatTimeout * 3) + + // Check the follower loop set the right state + require.Equal(t, tt.expectedState, env1.raft.getState()) + }) + } +} + +func TestRaft_runFollower_ReloadTimeoutConfigs(t *testing.T) { + conf := DefaultConfig() + conf.LocalID = ServerID("first") + conf.HeartbeatTimeout = 500 * time.Millisecond + conf.ElectionTimeout = 500 * time.Millisecond + conf.LeaderLeaseTimeout = 50 * time.Millisecond + conf.CommitTimeout = 5 * time.Millisecond + conf.SnapshotThreshold = 100 + conf.TrailingLogs = 10 + conf.skipStartup = true + + env := MakeRaft(t, conf, false) + servers := []Server{{Voter, "first", ""}} + env.raft.setLatestConfiguration(Configuration{Servers: servers}, 1) + env.raft.setState(Follower) + + // run the follower loop exclusively + go env.raft.runFollower() + + newCfg := ReloadableConfig{ + TrailingLogs: conf.TrailingLogs, + SnapshotInterval: conf.SnapshotInterval, + SnapshotThreshold: conf.SnapshotThreshold, + HeartbeatTimeout: 50 * time.Millisecond, + ElectionTimeout: 50 * time.Millisecond, + } + require.NoError(t, env.raft.ReloadConfig(newCfg)) + // wait enough time to have HeartbeatTimeout + time.Sleep(3 * newCfg.HeartbeatTimeout) + + // Check the follower loop set the right state + require.Equal(t, Candidate, env.raft.getState()) +} + +func TestRaft_PreVote_ShouldNotRejectLeader(t *testing.T) { + // Make a cluster + c := MakeCluster(3, t, nil) + defer c.Close() + err := waitForLeader(c) + require.NoError(t, err) + leader := c.Leader() + + // Wait until we have 2 followers + limit := time.Now().Add(c.longstopTimeout) + var followers []*Raft + for time.Now().Before(limit) && len(followers) != 2 { + c.WaitEvent(nil, c.conf.CommitTimeout) + followers = c.GetInState(Follower) + } + if len(followers) != 2 { + t.Fatalf("expected two followers: %v", followers) + } + + // A follower who thinks that x is the leader should not reject x's pre-vote + follower := followers[0] + require.Equal(t, leader.localAddr, follower.Leader()) + + reqPreVote := RequestPreVoteRequest{ + RPCHeader: leader.getRPCHeader(), + Term: leader.getCurrentTerm() + 1, + LastLogIndex: leader.lastLogIndex, + LastLogTerm: leader.getCurrentTerm(), + } + + var resp RequestPreVoteResponse + leaderT := c.trans[c.IndexOf(leader)] + if err := leaderT.RequestPreVote(follower.localID, follower.localAddr, &reqPreVote, &resp); err != nil { + t.Fatalf("RequestPreVote RPC failed %v", err) + } + + // the pre-vote should be granted + if !resp.Granted { + t.Fatalf("expected pre-vote to be granted, but it wasn't, %+v", resp) + } +} + +func TestRaft_PreVote_ShouldRejectNonLeader(t *testing.T) { + // Make a cluster + c := MakeCluster(3, t, nil) + defer c.Close() + err := waitForLeader(c) + require.NoError(t, err) + + // Wait until we have 2 followers + limit := time.Now().Add(c.longstopTimeout) + var followers []*Raft + for time.Now().Before(limit) && len(followers) != 2 { + c.WaitEvent(nil, c.conf.CommitTimeout) + followers = c.GetInState(Follower) + } + if len(followers) != 2 { + t.Fatalf("expected two followers: %v", followers) + } + + // A follower who thinks that x is the leader should reject another node's pre-vote request + follower := followers[0] + anotherFollower := followers[1] + require.NotEqual(t, anotherFollower.localAddr, follower.Leader()) + + reqPreVote := RequestPreVoteRequest{ + RPCHeader: anotherFollower.getRPCHeader(), + Term: anotherFollower.getCurrentTerm() + 1, + LastLogIndex: anotherFollower.lastLogIndex, + LastLogTerm: anotherFollower.getCurrentTerm(), + } + + var resp RequestPreVoteResponse + anotherFollowerT := c.trans[c.IndexOf(anotherFollower)] + if err := anotherFollowerT.RequestPreVote(follower.localID, follower.localAddr, &reqPreVote, &resp); err != nil { + t.Fatalf("RequestPreVote RPC failed %v", err) + } + + // the pre-vote should not be granted + if resp.Granted { + t.Fatalf("expected pre-vote to not be granted, but it was granted, %+v", resp) + } +} diff --git a/replication.go b/replication.go index f5e81924b..c0343df32 100644 --- a/replication.go +++ b/replication.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -317,9 +320,10 @@ func (r *Raft) sendLatestSnapshot(s *followerReplication) (bool, error) { // Setup the request req := InstallSnapshotRequest{ - RPCHeader: r.getRPCHeader(), - SnapshotVersion: meta.Version, - Term: s.currentTerm, + RPCHeader: r.getRPCHeader(), + SnapshotVersion: meta.Version, + Term: s.currentTerm, + // this is needed for retro compatibility, before RPCHeader.Addr was added Leader: r.trans.EncodePeer(r.localID, r.localAddr), LastLogIndex: meta.Index, LastLogTerm: meta.Term, @@ -381,8 +385,10 @@ func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) { req := AppendEntriesRequest{ RPCHeader: r.getRPCHeader(), Term: s.currentTerm, - Leader: r.trans.EncodePeer(r.localID, r.localAddr), + // this is needed for retro compatibility, before RPCHeader.Addr was added + Leader: r.trans.EncodePeer(r.localID, r.localAddr), } + var resp AppendEntriesResponse for { // Wait for the next heartbeat interval or forced notify @@ -399,12 +405,15 @@ func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) { start := time.Now() if err := r.trans.AppendEntries(peer.ID, peer.Address, &req, &resp); err != nil { - r.logger.Error("failed to heartbeat to", "peer", peer.Address, "error", err) + nextBackoffTime := cappedExponentialBackoff(failureWait, failures, maxFailureScale, r.config().HeartbeatTimeout/2) + r.logger.Error("failed to heartbeat to", "peer", peer.Address, "backoff time", + nextBackoffTime, "error", err) r.observe(FailedHeartbeatObservation{PeerID: peer.ID, LastContact: s.LastContact()}) failures++ select { - case <-time.After(backoff(failureWait, failures, maxFailureScale)): + case <-time.After(nextBackoffTime): case <-stopCh: + return } } else { if failures > 0 { @@ -552,6 +561,7 @@ func (r *Raft) pipelineDecode(s *followerReplication, p AppendPipeline, stopCh, func (r *Raft) setupAppendEntries(s *followerReplication, req *AppendEntriesRequest, nextIndex, lastIndex uint64) error { req.RPCHeader = r.getRPCHeader() req.Term = s.currentTerm + // this is needed for retro compatibility, before RPCHeader.Addr was added req.Leader = r.trans.EncodePeer(r.localID, r.localAddr) req.LeaderCommitIndex = r.getCommitIndex() if err := r.setPreviousLog(req, nextIndex); err != nil { diff --git a/saturation.go b/saturation.go new file mode 100644 index 000000000..508f08fd7 --- /dev/null +++ b/saturation.go @@ -0,0 +1,114 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package raft + +import ( + "math" + "time" + + "github.com/armon/go-metrics" +) + +// saturationMetric measures the saturation (percentage of time spent working vs +// waiting for work) of an event processing loop, such as runFSM. It reports the +// saturation as a gauge metric (at most) once every reportInterval. +// +// Callers must instrument their loop with calls to sleeping and working, starting +// with a call to sleeping. +// +// Note: the caller must be single-threaded and saturationMetric is not safe for +// concurrent use by multiple goroutines. +type saturationMetric struct { + reportInterval time.Duration + + // slept contains time for which the event processing loop was sleeping rather + // than working in the period since lastReport. + slept time.Duration + + // lost contains time that is considered lost due to incorrect use of + // saturationMetricBucket (e.g. calling sleeping() or working() multiple + // times in succession) in the period since lastReport. + lost time.Duration + + lastReport, sleepBegan, workBegan time.Time + + // These are overwritten in tests. + nowFn func() time.Time + reportFn func(float32) +} + +// newSaturationMetric creates a saturationMetric that will update the gauge +// with the given name at the given reportInterval. keepPrev determines the +// number of previous measurements that will be used to smooth out spikes. +func newSaturationMetric(name []string, reportInterval time.Duration) *saturationMetric { + m := &saturationMetric{ + reportInterval: reportInterval, + nowFn: time.Now, + lastReport: time.Now(), + reportFn: func(sat float32) { metrics.AddSample(name, sat) }, + } + return m +} + +// sleeping records the time at which the loop began waiting for work. After the +// initial call it must always be proceeded by a call to working. +func (s *saturationMetric) sleeping() { + now := s.nowFn() + + if !s.sleepBegan.IsZero() { + // sleeping called twice in succession. Count that time as lost rather than + // measuring nonsense. + s.lost += now.Sub(s.sleepBegan) + } + + s.sleepBegan = now + s.workBegan = time.Time{} + s.report() +} + +// working records the time at which the loop began working. It must always be +// proceeded by a call to sleeping. +func (s *saturationMetric) working() { + now := s.nowFn() + + if s.workBegan.IsZero() { + if s.sleepBegan.IsZero() { + // working called before the initial call to sleeping. Count that time as + // lost rather than measuring nonsense. + s.lost += now.Sub(s.lastReport) + } else { + s.slept += now.Sub(s.sleepBegan) + } + } else { + // working called twice in succession. Count that time as lost rather than + // measuring nonsense. + s.lost += now.Sub(s.workBegan) + } + + s.workBegan = now + s.sleepBegan = time.Time{} + s.report() +} + +// report updates the gauge if reportInterval has passed since our last report. +func (s *saturationMetric) report() { + now := s.nowFn() + timeSinceLastReport := now.Sub(s.lastReport) + + if timeSinceLastReport < s.reportInterval { + return + } + + var saturation float64 + total := timeSinceLastReport - s.lost + if total != 0 { + saturation = float64(total-s.slept) / float64(total) + saturation = math.Round(saturation*100) / 100 + } + s.reportFn(float32(saturation)) + + s.slept = 0 + s.lost = 0 + s.lastReport = now +} diff --git a/saturation_test.go b/saturation_test.go new file mode 100644 index 000000000..ec3731ad2 --- /dev/null +++ b/saturation_test.go @@ -0,0 +1,147 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package raft + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestSaturationMetric(t *testing.T) { + t.Run("without smoothing", func(t *testing.T) { + sat := newSaturationMetric([]string{"metric"}, 100*time.Millisecond) + + now := sat.lastReport + sat.nowFn = func() time.Time { return now } + + var reported float32 + sat.reportFn = func(val float32) { reported = val } + + sat.sleeping() + + // First window: 50ms sleeping + 75ms working. + now = now.Add(50 * time.Millisecond) + sat.working() + + now = now.Add(75 * time.Millisecond) + sat.sleeping() + + // Should be 60% saturation. + require.Equal(t, float32(0.6), reported) + + // Second window: 90ms sleeping + 10ms working. + now = now.Add(90 * time.Millisecond) + sat.working() + + now = now.Add(10 * time.Millisecond) + sat.sleeping() + + // Should be 10% saturation. + require.Equal(t, float32(0.1), reported) + + // Third window: 100ms sleeping + 0ms working. + now = now.Add(100 * time.Millisecond) + sat.working() + + // Should be 0% saturation. + require.Equal(t, float32(0), reported) + }) +} + +func TestSaturationMetric_IncorrectUsage(t *testing.T) { + t.Run("calling sleeping() consecutively", func(t *testing.T) { + sat := newSaturationMetric([]string{"metric"}, 50*time.Millisecond) + + now := sat.lastReport + sat.nowFn = func() time.Time { return now } + + var reported float32 + sat.reportFn = func(v float32) { reported = v } + + // Calling sleeping() consecutively should reset sleepBegan without recording + // a sample, such that we "lose" time rather than recording nonsense data. + // + // 0 | sleeping() | + // => Sleeping (10ms) + // +10ms | working() | + // => Working (10ms) + // +20ms | sleeping() | + // => [!] LOST [!] (10ms) + // +30ms | sleeping() | + // => Sleeping (10ms) + // +40ms | working() | + // => Working (10ms) + // +50ms | sleeping() | + // + // Total reportable time: 40ms. Saturation: 50%. + sat.sleeping() + now = now.Add(10 * time.Millisecond) + sat.working() + now = now.Add(10 * time.Millisecond) + sat.sleeping() + now = now.Add(10 * time.Millisecond) + sat.sleeping() + now = now.Add(10 * time.Millisecond) + sat.working() + now = now.Add(10 * time.Millisecond) + sat.sleeping() + + require.Equal(t, float32(0.5), reported) + }) + + t.Run("calling working() consecutively", func(t *testing.T) { + sat := newSaturationMetric([]string{"metric"}, 30*time.Millisecond) + + now := sat.lastReport + sat.nowFn = func() time.Time { return now } + + var reported float32 + sat.reportFn = func(v float32) { reported = v } + + // Calling working() consecutively should reset workBegan without recording + // a sample, such that we "lose" time rather than recording nonsense data. + // + // 0 | sleeping() | + // => Sleeping (10ms) + // +10ms | working() | + // => [!] LOST [!] (10ms) + // +20ms | working() | + // => Working (10ms) + // +30ms | sleeping() | + // + // Total reportable time: 20ms. Saturation: 50%. + sat.sleeping() + now = now.Add(10 * time.Millisecond) + sat.working() + now = now.Add(10 * time.Millisecond) + sat.working() + now = now.Add(10 * time.Millisecond) + sat.sleeping() + + require.Equal(t, float32(0.5), reported) + }) + + t.Run("calling working() first", func(t *testing.T) { + sat := newSaturationMetric([]string{"metric"}, 10*time.Millisecond) + + now := sat.lastReport + sat.nowFn = func() time.Time { return now } + + var reported float32 + sat.reportFn = func(v float32) { reported = v } + + // Time from start until working() is treated as lost. + sat.working() + require.Equal(t, float32(0), reported) + + sat.sleeping() + now = now.Add(5 * time.Millisecond) + sat.working() + now = now.Add(5 * time.Millisecond) + sat.sleeping() + require.Equal(t, float32(0.5), reported) + }) +} diff --git a/snapshot.go b/snapshot.go index d6b267963..89d11fda4 100644 --- a/snapshot.go +++ b/snapshot.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -207,10 +210,10 @@ func (r *Raft) takeSnapshot() (string, error) { return sink.ID(), nil } -// compactLogs takes the last inclusive index of a snapshot -// and trims the logs that are no longer needed. -func (r *Raft) compactLogs(snapIdx uint64) error { - defer metrics.MeasureSince([]string{"raft", "compactLogs"}, time.Now()) +// compactLogsWithTrailing takes the last inclusive index of a snapshot, +// the lastLogIdx, and and the trailingLogs and trims the logs that +// are no longer needed. +func (r *Raft) compactLogsWithTrailing(snapIdx uint64, lastLogIdx uint64, trailingLogs uint64) error { // Determine log ranges to compact minLog, err := r.logs.FirstIndex() if err != nil { @@ -218,11 +221,8 @@ func (r *Raft) compactLogs(snapIdx uint64) error { } // Check if we have enough logs to truncate - lastLogIdx, _ := r.getLastLog() - // Use a consistent value for trailingLogs for the duration of this method // call to avoid surprising behaviour. - trailingLogs := r.config().TrailingLogs if lastLogIdx <= trailingLogs { return nil } @@ -246,3 +246,33 @@ func (r *Raft) compactLogs(snapIdx uint64) error { } return nil } + +// compactLogs takes the last inclusive index of a snapshot +// and trims the logs that are no longer needed. +func (r *Raft) compactLogs(snapIdx uint64) error { + defer metrics.MeasureSince([]string{"raft", "compactLogs"}, time.Now()) + + lastLogIdx, _ := r.getLastLog() + trailingLogs := r.config().TrailingLogs + + return r.compactLogsWithTrailing(snapIdx, lastLogIdx, trailingLogs) +} + +// removeOldLogs removes all old logs from the store. This is used for +// MonotonicLogStores after restore. Callers should verify that the store +// implementation is monotonic prior to calling. +func (r *Raft) removeOldLogs() error { + defer metrics.MeasureSince([]string{"raft", "removeOldLogs"}, time.Now()) + + lastLogIdx, err := r.logs.LastIndex() + if err != nil { + return fmt.Errorf("failed to get last log index: %w", err) + } + + r.logger.Info("removing all old logs from log store") + + // call compactLogsWithTrailing with lastLogIdx for snapIdx since + // it will take the lesser of lastLogIdx and snapIdx to figure out + // the end for which to apply trailingLogs. + return r.compactLogsWithTrailing(lastLogIdx, lastLogIdx, 0) +} diff --git a/stable.go b/stable.go index ff59a8c57..3d5a57644 100644 --- a/stable.go +++ b/stable.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft // StableStore is used to provide stable storage diff --git a/state.go b/state.go index a58cd0d19..edbccae72 100644 --- a/state.go +++ b/state.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( diff --git a/tag.sh b/tag.sh index ddea0cf31..c6eb8a066 100755 --- a/tag.sh +++ b/tag.sh @@ -1,4 +1,7 @@ #!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 + set -e # The version must be supplied from the environment. Do not include the diff --git a/tcp_transport.go b/tcp_transport.go index 3bd421958..573696e46 100644 --- a/tcp_transport.go +++ b/tcp_transport.go @@ -1,11 +1,15 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( "errors" - "github.com/hashicorp/go-hclog" "io" "net" "time" + + "github.com/hashicorp/go-hclog" ) var ( diff --git a/tcp_transport_test.go b/tcp_transport_test.go index 86735e9d0..131dec27c 100644 --- a/tcp_transport_test.go +++ b/tcp_transport_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( diff --git a/testing.go b/testing.go index 1dd61b94c..351a9abab 100644 --- a/testing.go +++ b/testing.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -5,7 +8,6 @@ import ( "context" "fmt" "io" - "io/ioutil" "os" "reflect" "sync" @@ -13,15 +15,13 @@ import ( "time" "github.com/hashicorp/go-hclog" - "github.com/hashicorp/go-msgpack/codec" + "github.com/hashicorp/go-msgpack/v2/codec" ) -var ( - userSnapshotErrorsOnNoData = true -) +var userSnapshotErrorsOnNoData = true // Return configurations optimized for in-memory -func inmemConfig(t *testing.T) *Config { +func inmemConfig(t testing.TB) *Config { conf := DefaultConfig() conf.HeartbeatTimeout = 50 * time.Millisecond conf.ElectionTimeout = 50 * time.Millisecond @@ -130,11 +130,52 @@ func (m *MockSnapshot) Persist(sink SnapshotSink) error { func (m *MockSnapshot) Release() { } +// MockMonotonicLogStore is a LogStore wrapper for testing the +// MonotonicLogStore interface. +type MockMonotonicLogStore struct { + s LogStore +} + +// IsMonotonic implements the MonotonicLogStore interface. +func (m *MockMonotonicLogStore) IsMonotonic() bool { + return true +} + +// FirstIndex implements the LogStore interface. +func (m *MockMonotonicLogStore) FirstIndex() (uint64, error) { + return m.s.FirstIndex() +} + +// LastIndex implements the LogStore interface. +func (m *MockMonotonicLogStore) LastIndex() (uint64, error) { + return m.s.LastIndex() +} + +// GetLog implements the LogStore interface. +func (m *MockMonotonicLogStore) GetLog(index uint64, log *Log) error { + return m.s.GetLog(index, log) +} + +// StoreLog implements the LogStore interface. +func (m *MockMonotonicLogStore) StoreLog(log *Log) error { + return m.s.StoreLog(log) +} + +// StoreLogs implements the LogStore interface. +func (m *MockMonotonicLogStore) StoreLogs(logs []*Log) error { + return m.s.StoreLogs(logs) +} + +// DeleteRange implements the LogStore interface. +func (m *MockMonotonicLogStore) DeleteRange(min uint64, max uint64) error { + return m.s.DeleteRange(min, max) +} + // This can be used as the destination for a logger and it'll // map them into calls to testing.T.Log, so that you only see // the logging for failed tests. type testLoggerAdapter struct { - t *testing.T + tb testing.TB prefix string } @@ -144,33 +185,38 @@ func (a *testLoggerAdapter) Write(d []byte) (int, error) { } if a.prefix != "" { l := a.prefix + ": " + string(d) - a.t.Log(l) + a.tb.Log(l) return len(l), nil } - a.t.Log(string(d)) + a.tb.Log(string(d)) return len(d), nil } -func newTestLogger(t *testing.T) hclog.Logger { - return newTestLoggerWithPrefix(t, "") +func newTestLogger(tb testing.TB) hclog.Logger { + return newTestLoggerWithPrefix(tb, "") } -// newTestLoggerWithPrefix returns a Logger that can be used in tests. prefix will -// be added as the name of the logger. +// newTestLoggerWithPrefix returns a Logger that can be used in tests. prefix +// will be added as the name of the logger. // // If tests are run with -v (verbose mode, or -json which implies verbose) the -// log output will go to stderr directly. -// If tests are run in regular "quiet" mode, logs will be sent to t.Log so that -// the logs only appear when a test fails. -func newTestLoggerWithPrefix(t *testing.T, prefix string) hclog.Logger { +// log output will go to stderr directly. If tests are run in regular "quiet" +// mode, logs will be sent to t.Log so that the logs only appear when a test +// fails. +// +// Be careful where this is used though - calling t.Log after the test completes +// causes a panic. This is common if you use it for a NetworkTransport for +// example and then close the transport at the end of the test because an error +// is logged after the test is complete. +func newTestLoggerWithPrefix(tb testing.TB, prefix string) hclog.Logger { if testing.Verbose() { - return hclog.New(&hclog.LoggerOptions{Name: prefix}) + return hclog.New(&hclog.LoggerOptions{Name: prefix, Level: hclog.Trace}) } return hclog.New(&hclog.LoggerOptions{ Name: prefix, - Output: &testLoggerAdapter{t: t, prefix: prefix}, + Output: &testLoggerAdapter{tb: tb, prefix: prefix}, }) } @@ -203,6 +249,15 @@ func (c *cluster) Merge(other *cluster) { c.rafts = append(c.rafts, other.rafts...) } +func (c *cluster) RemoveServer(id ServerID) { + for i, n := range c.rafts { + if n.localID == id { + c.rafts = append(c.rafts[:i], c.rafts[i+1:]...) + return + } + } +} + // notifyFailed will close the failed channel which can signal the goroutine // running the test that another goroutine has detected a failure in order to // terminate the test. @@ -376,9 +431,9 @@ func (c *cluster) GetInState(s RaftState) []*Raft { // Wait until we have a stable instate slice. Each time we see an // observation a state has changed, recheck it and if it has changed, // restart the timer. - var pollStartTime = time.Now() + pollStartTime := time.Now() for { - inState, highestTerm := c.pollState(s) + _, highestTerm := c.pollState(s) inStateTime := time.Now() // Sometimes this routine is called very early on before the @@ -424,8 +479,9 @@ func (c *cluster) GetInState(s RaftState) []*Raft { c.t.Fatalf("timer channel errored") } - c.logger.Info(fmt.Sprintf("stable state for %s reached at %s (%d nodes), %s from start of poll, %s from cluster start. Timeout at %s, %s after stability", - s, inStateTime, len(inState), inStateTime.Sub(pollStartTime), inStateTime.Sub(c.startTime), t, t.Sub(inStateTime))) + inState, highestTerm := c.pollState(s) + c.logger.Info(fmt.Sprintf("stable state for %s reached at %s (%d nodes), highestTerm is %d, %s from start of poll, %s from cluster start. Timeout at %s, %s after stability", + s, inStateTime, len(inState), highestTerm, inStateTime.Sub(pollStartTime), inStateTime.Sub(c.startTime), t, t.Sub(inStateTime))) return inState } } @@ -445,6 +501,12 @@ func (c *cluster) Leader() *Raft { // state. func (c *cluster) Followers() []*Raft { expFollowers := len(c.rafts) - 1 + return c.WaitForFollowers(expFollowers) +} + +// WaitForFollowers waits for the cluster to have a given number of followers and stay in a stable +// state. +func (c *cluster) WaitForFollowers(expFollowers int) []*Raft { followers := c.GetInState(Follower) if len(followers) != expFollowers { c.t.Fatalf("timeout waiting for %d followers (followers are %v)", expFollowers, followers) @@ -529,15 +591,16 @@ func (c *cluster) EnsureLeader(t *testing.T, expect ServerAddress) { // think the leader is correct fail := false for _, r := range c.rafts { - leader := ServerAddress(r.Leader()) - if leader != expect { - if leader == "" { - leader = "[none]" + leaderAddr, _ := r.LeaderWithID() + + if leaderAddr != expect { + if leaderAddr == "" { + leaderAddr = "[none]" } if expect == "" { - c.logger.Error("peer sees incorrect leader", "peer", r, "leader", leader, "expected-leader", "[none]") + c.logger.Error("peer sees incorrect leader", "peer", r, "leader", leaderAddr, "expected-leader", "[none]") } else { - c.logger.Error("peer sees incorrect leader", "peer", r, "leader", leader, "expected-leader", expect) + c.logger.Error("peer sees incorrect leader", "peer", r, "leader", leaderAddr, "expected-leader", expect) } fail = true } @@ -660,6 +723,7 @@ type MakeClusterOpts struct { ConfigStoreFSM bool MakeFSMFunc func() FSM LongstopTimeout time.Duration + MonotonicLogs bool } // makeCluster will return a cluster with the given config and number of peers. @@ -690,7 +754,7 @@ func makeCluster(t *testing.T, opts *MakeClusterOpts) *cluster { // Setup the stores and transports for i := 0; i < opts.Peers; i++ { - dir, err := ioutil.TempDir("", "raft") + dir, err := os.MkdirTemp("", "raft") if err != nil { t.Fatalf("err: %v", err) } @@ -735,11 +799,16 @@ func makeCluster(t *testing.T, opts *MakeClusterOpts) *cluster { // Create all the rafts c.startTime = time.Now() for i := 0; i < opts.Peers; i++ { - logs := c.stores[i] + var logs LogStore + logs = c.stores[i] store := c.stores[i] snap := c.snaps[i] trans := c.trans[i] + if opts.MonotonicLogs { + logs = &MockMonotonicLogStore{s: logs} + } + peerConf := opts.Conf peerConf.LocalID = configuration.Servers[i].ID peerConf.Logger = newTestLoggerWithPrefix(t, string(configuration.Servers[i].ID)) @@ -791,7 +860,7 @@ func MakeClusterCustom(t *testing.T, opts *MakeClusterOpts) *cluster { // NOTE: This is exposed for middleware testing purposes and is not a stable API func FileSnapTest(t *testing.T) (string, *FileSnapshotStore) { // Create a test dir - dir, err := ioutil.TempDir("", "raft") + dir, err := os.MkdirTemp("", "raft") if err != nil { t.Fatalf("err: %v ", err) } diff --git a/testing_batch.go b/testing_batch.go index afb228561..3903d95a5 100644 --- a/testing_batch.go +++ b/testing_batch.go @@ -1,3 +1,7 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build batchtest // +build batchtest package raft diff --git a/transport.go b/transport.go index b18d24593..c64fff6ec 100644 --- a/transport.go +++ b/transport.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -63,6 +66,16 @@ type Transport interface { TimeoutNow(id ServerID, target ServerAddress, args *TimeoutNowRequest, resp *TimeoutNowResponse) error } +// WithPreVote is an interface that a transport may provide which +// allows a transport to support a PreVote request. +// +// It is defined separately from Transport as unfortunately it wasn't in the +// original interface specification. +type WithPreVote interface { + // RequestPreVote sends the appropriate RPC to the target node. + RequestPreVote(id ServerID, target ServerAddress, args *RequestPreVoteRequest, resp *RequestPreVoteResponse) error +} + // WithClose is an interface that a transport may provide which // allows a transport to be shut down cleanly when a Raft instance // shuts down. @@ -78,9 +91,10 @@ type WithClose interface { // LoopbackTransport is an interface that provides a loopback transport suitable for testing // e.g. InmemTransport. It's there so we don't have to rewrite tests. type LoopbackTransport interface { - Transport // Embedded transport reference - WithPeers // Embedded peer management - WithClose // with a close routine + Transport // Embedded transport reference + WithPeers // Embedded peer management + WithClose // with a close routine + WithPreVote // with a prevote } // WithPeers is an interface that a transport may provide which allows for connection and diff --git a/transport_test.go b/transport_test.go index 5a59253df..6218b3de2 100644 --- a/transport_test.go +++ b/transport_test.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -41,7 +44,6 @@ func TestTransport_AppendEntries(t *testing.T) { // Make the RPC request args := AppendEntriesRequest{ Term: 10, - Leader: []byte("cartman"), PrevLogEntry: 100, PrevLogTerm: 4, Entries: []*Log{ @@ -52,7 +54,9 @@ func TestTransport_AppendEntries(t *testing.T) { }, }, LeaderCommitIndex: 90, + RPCHeader: RPCHeader{Addr: []byte("cartman")}, } + resp := AppendEntriesResponse{ Term: 4, LastLog: 90, @@ -104,7 +108,6 @@ func TestTransport_AppendEntriesPipeline(t *testing.T) { // Make the RPC request args := AppendEntriesRequest{ Term: 10, - Leader: []byte("cartman"), PrevLogEntry: 100, PrevLogTerm: 4, Entries: []*Log{ @@ -115,7 +118,9 @@ func TestTransport_AppendEntriesPipeline(t *testing.T) { }, }, LeaderCommitIndex: 90, + RPCHeader: RPCHeader{Addr: []byte("cartman")}, } + resp := AppendEntriesResponse{ Term: 4, LastLog: 90, @@ -185,9 +190,9 @@ func TestTransport_RequestVote(t *testing.T) { // Make the RPC request args := RequestVoteRequest{ Term: 20, - Candidate: []byte("butters"), LastLogIndex: 100, LastLogTerm: 19, + RPCHeader: RPCHeader{Addr: []byte("butters")}, } resp := RequestVoteResponse{ Term: 100, @@ -240,12 +245,13 @@ func TestTransport_InstallSnapshot(t *testing.T) { // Make the RPC request args := InstallSnapshotRequest{ Term: 10, - Leader: []byte("kyle"), LastLogIndex: 100, LastLogTerm: 9, Peers: []byte("blah blah"), Size: 10, + RPCHeader: RPCHeader{Addr: []byte("kyle")}, } + resp := InstallSnapshotResponse{ Term: 10, Success: true, diff --git a/util.go b/util.go index 59a3f71d3..09c7742b2 100644 --- a/util.go +++ b/util.go @@ -1,3 +1,6 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( @@ -9,7 +12,7 @@ import ( "math/rand" "time" - "github.com/hashicorp/go-msgpack/codec" + "github.com/hashicorp/go-msgpack/v2/codec" ) func init() { @@ -32,7 +35,7 @@ func randomTimeout(minVal time.Duration) <-chan time.Time { if minVal == 0 { return nil } - extra := (time.Duration(rand.Int63()) % minVal) + extra := time.Duration(rand.Int63()) % minVal return time.After(minVal + extra) } @@ -126,7 +129,11 @@ func decodeMsgPack(buf []byte, out interface{}) error { // Encode writes an encoded object to a new bytes buffer. func encodeMsgPack(in interface{}) (*bytes.Buffer, error) { buf := bytes.NewBuffer(nil) - hd := codec.MsgpackHandle{} + hd := codec.MsgpackHandle{ + BasicHandle: codec.BasicHandle{ + TimeNotBuiltin: true, + }, + } enc := codec.NewEncoder(buf, &hd) err := enc.Encode(in) return buf, err @@ -144,6 +151,23 @@ func backoff(base time.Duration, round, limit uint64) time.Duration { return base } +// cappedExponentialBackoff computes the exponential backoff with an adjustable +// cap on the max timeout. +func cappedExponentialBackoff(base time.Duration, round, limit uint64, cap time.Duration) time.Duration { + power := min(round, limit) + for power > 2 { + if base > cap { + return cap + } + base *= 2 + power-- + } + if base > cap { + return cap + } + return base +} + // Needed for sorting []uint64, used to determine commitment type uint64Slice []uint64 diff --git a/util_test.go b/util_test.go index b029706dc..9e3959fd5 100644 --- a/util_test.go +++ b/util_test.go @@ -1,11 +1,32 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + package raft import ( + "bytes" "regexp" "testing" "time" ) +// TestMsgpackEncodeTime ensures that we don't break backwards compatibility when updating go-msgpack with +// Raft binary formats. +func TestMsgpackEncodeTimeDefaultFormat(t *testing.T) { + stamp := "2006-01-02T15:04:05Z" + tm, err := time.Parse(time.RFC3339, stamp) + if err != nil { + t.Fatal(err) + } + buf, err := encodeMsgPack(tm) + + expected := []byte{175, 1, 0, 0, 0, 14, 187, 75, 55, 229, 0, 0, 0, 0, 255, 255} + + if !bytes.Equal(buf.Bytes(), expected) { + t.Errorf("Expected time %s to encode as %+v but got %+v", stamp, expected, buf.Bytes()) + } +} + func TestRandomTimeout(t *testing.T) { start := time.Now() timeout := randomTimeout(time.Millisecond)