Skip to content

Commit

Permalink
Non-quorum member support.
Browse files Browse the repository at this point in the history
1. Currently all nodes participate in quorum decision making process
2. This patch allows nodes to be marked as non-quorum members.
3. Such members do not count as the nodes that need to be alive for
quorum to exist.
4. Quorum = Majority of quorum nodes are online.
5. Add tests to ensure:
- even if majority of non-quorum members die cluster stays in quorum
- add/remove of quorum members does not change cluster quorum
  • Loading branch information
sangleganesh committed Mar 28, 2017
1 parent 02f3e3a commit aa5bca5
Show file tree
Hide file tree
Showing 13 changed files with 566 additions and 267 deletions.
6 changes: 3 additions & 3 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ type GossipStore interface {
GetLocalNodeInfo(types.NodeId) (types.NodeInfo, error)

// Add a new node in the database
AddNode(types.NodeId, types.NodeStatus)
AddNode(types.NodeId, types.NodeStatus, bool)

// Remove a node from the database
RemoveNode(types.NodeId) error
Expand All @@ -72,8 +72,8 @@ type Gossiper interface {
// GetNodes returns a list of the connection addresses
GetNodes() []string

// UpdateCluster updates gossip with latest peer nodes Id-Ip mapping
UpdateCluster(map[types.NodeId]string)
// UpdateCluster updates gossip with latest peer nodes info
UpdateCluster(map[types.NodeId]types.NodeUpdate)

// ExternalNodeLeave is used to indicate gossip that one of the nodes might be down.
// It checks quorum and appropriately marks either self down or the other node down.
Expand Down
8 changes: 4 additions & 4 deletions proto/gossip.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func (g *GossiperImpl) Init(
}

func (g *GossiperImpl) Start(knownIps []string) error {
g.InitCurrentState(len(knownIps) + 1)
g.InitCurrentState(uint(len(knownIps) + 1))
list, err := ml.Create(g.mlConf)
if err != nil {
log.Warnf("gossip: Unable to create memberlist: " + err.Error())
Expand Down Expand Up @@ -166,10 +166,9 @@ func (g *GossiperImpl) GetNodes() []string {
nodeList[i] = node.Addr.String()
}
return nodeList

}

func (g *GossiperImpl) UpdateCluster(peers map[types.NodeId]string) {
func (g *GossiperImpl) UpdateCluster(peers map[types.NodeId]types.NodeUpdate) {
g.updateCluster(peers)
g.triggerStateEvent(types.UPDATE_CLUSTER_SIZE)
}
Expand All @@ -181,7 +180,8 @@ func (g *GossiperImpl) ExternalNodeLeave(nodeId types.NodeId) types.NodeId {
return nodeId
} else {
// We are the culprit as we are not in quorum
log.Infof("gossip: Our Status: %v. We should go down.", g.GetSelfStatus())
log.Infof("gossip: Our Status: %v. We should go down.",
g.GetSelfStatus())
return g.NodeId()
}
}
17 changes: 11 additions & 6 deletions proto/gossip_delegates.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ func (gd *GossipDelegate) InitGossipDelegate(
gd.quorumTimeout = quorumTimeout
}

func (gd *GossipDelegate) InitCurrentState(clusterSize int) {
func (gd *GossipDelegate) InitCurrentState(clusterSize uint) {
// Our initial state is NOT_IN_QUORUM
gd.currentState = state.GetNotInQuorum(clusterSize, types.NodeId(gd.nodeId), gd.stateEvent)
gd.currentState = state.GetNotInQuorum(
uint(clusterSize), types.NodeId(gd.nodeId), gd.stateEvent)
// Start the go routine which handles all the events
// and changes state of the node
go gd.handleStateEvents()
Expand Down Expand Up @@ -302,16 +303,20 @@ func (gd *GossipDelegate) handleStateEvents() {
case types.NODE_LEAVE:
gd.currentState, _ = gd.currentState.NodeLeave(gd.GetLocalState())
case types.UPDATE_CLUSTER_SIZE:
gd.currentState, _ = gd.currentState.UpdateClusterSize(gd.getClusterSize(), gd.GetLocalState())
gd.currentState, _ = gd.currentState.UpdateClusterSize(
gd.getNumQuorumMembers(), gd.GetLocalState())
case types.TIMEOUT:
newState, _ := gd.currentState.Timeout(gd.getClusterSize(), gd.GetLocalState())
newState, _ := gd.currentState.Timeout(
gd.getNumQuorumMembers(), gd.GetLocalState())
if newState.NodeStatus() != gd.currentState.NodeStatus() {
logrus.Infof("gossip: Quorum Timeout. Waited for (%v)", gd.quorumTimeout)
logrus.Infof("gossip: Quorum Timeout. Waited for (%v)",
gd.quorumTimeout)
}
gd.currentState = newState
}
newStatus := gd.currentState.NodeStatus()
if previousStatus == types.NODE_STATUS_UP && newStatus == types.NODE_STATUS_SUSPECT_NOT_IN_QUORUM {
if previousStatus == types.NODE_STATUS_UP &&
newStatus == types.NODE_STATUS_SUSPECT_NOT_IN_QUORUM {
// Start a timer
go gd.startQuorumTimer()
}
Expand Down
Loading

0 comments on commit aa5bca5

Please sign in to comment.