Skip to content

Commit

Permalink
chore: Refactor host in scope and seperate to new package topology (#356
Browse files Browse the repository at this point in the history
)

* chore: Refactor host in scope and seperate to new package topology

*Motivation:*
Right now a lot of the code base in chproxy resides in a shared main package. This has lead to a lot of coupled code and a code base that is very hard to read.

This PR is a continuation of the work started with the heartbeat to move code away from the main package, decouple the code and improve readability.

*Additions and Changes:*
- Create a new `Node` struct in the new `topology` package that exposes methods from the previous `host` struct. However, it doesn't expose internal state.
- Improve the Node code by using more modern constructs such as `atomic.Bool`.
- Update the scope package and every usage of `host` with `topology.Node`.
- Include a new test case for `Node.StartHeartbeat`

*Notes:*
Due to the coupled nature of the code around scope, I didn't see an opportunity to do this incrementally. The PR will sadly be large and hard to review.

Signed-off-by: Lennard Eijsackers <[email protected]>

* chore: Resolve comments on PR

---------

Signed-off-by: Lennard Eijsackers <[email protected]>
  • Loading branch information
Blokje5 authored Sep 15, 2023
1 parent 466b4bf commit 73bdd51
Show file tree
Hide file tree
Showing 11 changed files with 500 additions and 304 deletions.
15 changes: 15 additions & 0 deletions internal/counter/counter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package counter

import "sync/atomic"

type Counter struct {
value atomic.Uint32
}

func (c *Counter) Store(n uint32) { c.value.Store(n) }

func (c *Counter) Load() uint32 { return c.value.Load() }

func (c *Counter) Dec() { c.value.Add(^uint32(0)) }

func (c *Counter) Inc() uint32 { return c.value.Add(1) }
23 changes: 23 additions & 0 deletions internal/topology/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package topology

import (
"os"
"testing"

"github.com/contentsquare/chproxy/config"
)

func TestMain(m *testing.M) {
cfg := &config.Config{
Server: config.Server{
Metrics: config.Metrics{
Namespace: "test",
},
},
}

// Metrics should be preregistered to avoid nil-panics.
RegisterMetrics(cfg)
code := m.Run()
os.Exit(code)
}
61 changes: 61 additions & 0 deletions internal/topology/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package topology

// TODO this is only here to avoid recursive imports. We should have a separate package for metrics.
import (
"github.com/contentsquare/chproxy/config"
"github.com/prometheus/client_golang/prometheus"
)

var (
HostHealth *prometheus.GaugeVec
HostPenalties *prometheus.CounterVec
)

func initMetrics(cfg *config.Config) {
namespace := cfg.Server.Metrics.Namespace
HostHealth = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "host_health",
Help: "Health state of hosts by clusters",
},
[]string{"cluster", "replica", "cluster_node"},
)
HostPenalties = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: "host_penalties_total",
Help: "Total number of given penalties by host",
},
[]string{"cluster", "replica", "cluster_node"},
)
}

func RegisterMetrics(cfg *config.Config) {
initMetrics(cfg)
prometheus.MustRegister(HostHealth, HostPenalties)
}

func reportNodeHealthMetric(clusterName, replicaName, nodeName string, active bool) {
label := prometheus.Labels{
"cluster": clusterName,
"replica": replicaName,
"cluster_node": nodeName,
}

if active {
HostHealth.With(label).Set(1)
} else {
HostHealth.With(label).Set(0)
}
}

func incrementPenaltiesMetric(clusterName, replicaName, nodeName string) {
label := prometheus.Labels{
"cluster": clusterName,
"replica": replicaName,
"cluster_node": nodeName,
}

HostPenalties.With(label).Inc()
}
194 changes: 194 additions & 0 deletions internal/topology/node.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
package topology

import (
"context"
"net/url"
"sync/atomic"
"time"

"github.com/contentsquare/chproxy/internal/counter"
"github.com/contentsquare/chproxy/internal/heartbeat"
"github.com/contentsquare/chproxy/log"
)

const (
// prevents excess goroutine creating while penalizing overloaded host
DefaultPenaltySize = 5
DefaultMaxSize = 300
DefaultPenaltyDuration = time.Second * 10
)

type nodeOpts struct {
defaultActive bool
penaltySize uint32
penaltyMaxSize uint32
penaltyDuration time.Duration
}

func defaultNodeOpts() nodeOpts {
return nodeOpts{
penaltySize: DefaultPenaltySize,
penaltyMaxSize: DefaultMaxSize,
penaltyDuration: DefaultPenaltyDuration,
}
}

type NodeOption interface {
apply(*nodeOpts)
}

type defaultActive struct {
active bool
}

func (o defaultActive) apply(opts *nodeOpts) {
opts.defaultActive = o.active
}

func WithDefaultActiveState(active bool) NodeOption {
return defaultActive{
active: active,
}
}

type Node struct {
// Node Address.
addr *url.URL

// Whether this node is alive.
active atomic.Bool

// Counter of currently running connections.
connections counter.Counter

// Counter of unsuccesfull request to decrease host priority.
penalty atomic.Uint32

// Heartbeat function
hb heartbeat.HeartBeat

// TODO These fields are only used for labels in prometheus. We should have a different way to pass the labels.
// For metrics only
clusterName string
replicaName string

// Additional configuration options
opts nodeOpts
}

func NewNode(addr *url.URL, hb heartbeat.HeartBeat, clusterName, replicaName string, opts ...NodeOption) *Node {
nodeOpts := defaultNodeOpts()

for _, opt := range opts {
opt.apply(&nodeOpts)
}

n := &Node{
addr: addr,
hb: hb,
clusterName: clusterName,
replicaName: replicaName,
opts: nodeOpts,
}

if n.opts.defaultActive {
n.SetIsActive(true)
}

return n
}

func (n *Node) IsActive() bool {
return n.active.Load()
}

func (n *Node) SetIsActive(active bool) {
n.active.Store(active)
}

// StartHeartbeat runs the heartbeat healthcheck against the node
// until the done channel is closed.
// If the heartbeat fails, the active status of the node is changed.
func (n *Node) StartHeartbeat(done <-chan struct{}) {
ctx, cancel := context.WithCancel(context.Background())
for {
n.heartbeat(ctx)
select {
case <-done:
cancel()
return
case <-time.After(n.hb.Interval()):
}
}
}

func (n *Node) heartbeat(ctx context.Context) {
if err := n.hb.IsHealthy(ctx, n.addr.String()); err == nil {
n.active.Store(true)
reportNodeHealthMetric(n.clusterName, n.replicaName, n.Host(), true)
} else {
log.Errorf("error while health-checking %q host: %s", n.Host(), err)
n.active.Store(false)
reportNodeHealthMetric(n.clusterName, n.replicaName, n.Host(), false)
}
}

// Penalize a node if a request failed to decrease it's priority.
// If the penalty is already at the maximum allowed size this function
// will not penalize the node further.
// A function will be registered to run after the penalty duration to
// increase the priority again.
func (n *Node) Penalize() {
penalty := n.penalty.Load()
if penalty >= n.opts.penaltyMaxSize {
return
}

incrementPenaltiesMetric(n.clusterName, n.replicaName, n.Host())

n.penalty.Add(n.opts.penaltySize)

time.AfterFunc(n.opts.penaltyDuration, func() {
n.penalty.Add(^uint32(n.opts.penaltySize - 1))
})
}

// CurrentLoad returns the current node returns the number of open connections
// plus the penalty.
func (n *Node) CurrentLoad() uint32 {
c := n.connections.Load()
p := n.penalty.Load()
return c + p
}

func (n *Node) CurrentConnections() uint32 {
return n.connections.Load()
}

func (n *Node) CurrentPenalty() uint32 {
return n.penalty.Load()
}

func (n *Node) IncrementConnections() {
n.connections.Inc()
}

func (n *Node) DecrementConnections() {
n.connections.Dec()
}

func (n *Node) Scheme() string {
return n.addr.Scheme
}

func (n *Node) Host() string {
return n.addr.Host
}

func (n *Node) ReplicaName() string {
return n.replicaName
}

func (n *Node) String() string {
return n.addr.String()
}
84 changes: 84 additions & 0 deletions internal/topology/node_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package topology

import (
"context"
"errors"
"net/url"
"testing"
"time"

"github.com/contentsquare/chproxy/internal/heartbeat"
"github.com/stretchr/testify/assert"
)

var _ heartbeat.HeartBeat = &mockHeartbeat{}

type mockHeartbeat struct {
interval time.Duration
err error
}

func (hb *mockHeartbeat) Interval() time.Duration {
return hb.interval
}

func (hb *mockHeartbeat) IsHealthy(ctx context.Context, addr string) error {
return hb.err
}

func TestPenalize(t *testing.T) {
node := NewNode(&url.URL{Host: "127.0.0.1"}, nil, "test", "test")
expectedLoad := uint32(0)
assert.Equal(t, expectedLoad, node.CurrentLoad(), "got running queries %d; expected %d", node.CurrentLoad(), expectedLoad)

node.Penalize()
expectedLoad = uint32(DefaultPenaltySize)
assert.Equal(t, expectedLoad, node.CurrentLoad(), "got running queries %d; expected %d", node.CurrentLoad(), expectedLoad)

// do more penalties than `penaltyMaxSize` allows
max := int(DefaultMaxSize/DefaultPenaltySize) * 2
for i := 0; i < max; i++ {
node.Penalize()
}

expectedLoad = uint32(DefaultMaxSize)
assert.Equal(t, expectedLoad, node.CurrentLoad(), "got running queries %d; expected %d", node.CurrentLoad(), expectedLoad)

// Still allow connections to increase.
node.IncrementConnections()
expectedLoad++
assert.Equal(t, expectedLoad, node.CurrentLoad(), "got running queries %d; expected %d", node.CurrentLoad(), expectedLoad)
}

func TestStartHeartbeat(t *testing.T) {
hb := &mockHeartbeat{
interval: 10 * time.Millisecond,
err: nil,
}

done := make(chan struct{})
defer close(done)

node := NewNode(&url.URL{Host: "127.0.0.1"}, hb, "test", "test")

// Node is eventually active after start.
go node.StartHeartbeat(done)

assert.Eventually(t, func() bool {
return node.IsActive()
}, time.Second, 100*time.Millisecond)

// change heartbeat to error, node eventually becomes inactive.
hb.err = errors.New("failed connection")

assert.Eventually(t, func() bool {
return !node.IsActive()
}, time.Second, 100*time.Millisecond)

// If error is removed node becomes active again.
hb.err = nil

assert.Eventually(t, func() bool {
return node.IsActive()
}, time.Second, 100*time.Millisecond)
}
Loading

0 comments on commit 73bdd51

Please sign in to comment.