-
Notifications
You must be signed in to change notification settings - Fork 259
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: Refactor host in scope and seperate to new package topology (#356
) * chore: Refactor host in scope and seperate to new package topology *Motivation:* Right now a lot of the code base in chproxy resides in a shared main package. This has lead to a lot of coupled code and a code base that is very hard to read. This PR is a continuation of the work started with the heartbeat to move code away from the main package, decouple the code and improve readability. *Additions and Changes:* - Create a new `Node` struct in the new `topology` package that exposes methods from the previous `host` struct. However, it doesn't expose internal state. - Improve the Node code by using more modern constructs such as `atomic.Bool`. - Update the scope package and every usage of `host` with `topology.Node`. - Include a new test case for `Node.StartHeartbeat` *Notes:* Due to the coupled nature of the code around scope, I didn't see an opportunity to do this incrementally. The PR will sadly be large and hard to review. Signed-off-by: Lennard Eijsackers <[email protected]> * chore: Resolve comments on PR --------- Signed-off-by: Lennard Eijsackers <[email protected]>
- Loading branch information
Showing
11 changed files
with
500 additions
and
304 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
package counter | ||
|
||
import "sync/atomic" | ||
|
||
type Counter struct { | ||
value atomic.Uint32 | ||
} | ||
|
||
func (c *Counter) Store(n uint32) { c.value.Store(n) } | ||
|
||
func (c *Counter) Load() uint32 { return c.value.Load() } | ||
|
||
func (c *Counter) Dec() { c.value.Add(^uint32(0)) } | ||
|
||
func (c *Counter) Inc() uint32 { return c.value.Add(1) } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package topology | ||
|
||
import ( | ||
"os" | ||
"testing" | ||
|
||
"github.com/contentsquare/chproxy/config" | ||
) | ||
|
||
func TestMain(m *testing.M) { | ||
cfg := &config.Config{ | ||
Server: config.Server{ | ||
Metrics: config.Metrics{ | ||
Namespace: "test", | ||
}, | ||
}, | ||
} | ||
|
||
// Metrics should be preregistered to avoid nil-panics. | ||
RegisterMetrics(cfg) | ||
code := m.Run() | ||
os.Exit(code) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
package topology | ||
|
||
// TODO this is only here to avoid recursive imports. We should have a separate package for metrics. | ||
import ( | ||
"github.com/contentsquare/chproxy/config" | ||
"github.com/prometheus/client_golang/prometheus" | ||
) | ||
|
||
var ( | ||
HostHealth *prometheus.GaugeVec | ||
HostPenalties *prometheus.CounterVec | ||
) | ||
|
||
func initMetrics(cfg *config.Config) { | ||
namespace := cfg.Server.Metrics.Namespace | ||
HostHealth = prometheus.NewGaugeVec( | ||
prometheus.GaugeOpts{ | ||
Namespace: namespace, | ||
Name: "host_health", | ||
Help: "Health state of hosts by clusters", | ||
}, | ||
[]string{"cluster", "replica", "cluster_node"}, | ||
) | ||
HostPenalties = prometheus.NewCounterVec( | ||
prometheus.CounterOpts{ | ||
Namespace: namespace, | ||
Name: "host_penalties_total", | ||
Help: "Total number of given penalties by host", | ||
}, | ||
[]string{"cluster", "replica", "cluster_node"}, | ||
) | ||
} | ||
|
||
func RegisterMetrics(cfg *config.Config) { | ||
initMetrics(cfg) | ||
prometheus.MustRegister(HostHealth, HostPenalties) | ||
} | ||
|
||
func reportNodeHealthMetric(clusterName, replicaName, nodeName string, active bool) { | ||
label := prometheus.Labels{ | ||
"cluster": clusterName, | ||
"replica": replicaName, | ||
"cluster_node": nodeName, | ||
} | ||
|
||
if active { | ||
HostHealth.With(label).Set(1) | ||
} else { | ||
HostHealth.With(label).Set(0) | ||
} | ||
} | ||
|
||
func incrementPenaltiesMetric(clusterName, replicaName, nodeName string) { | ||
label := prometheus.Labels{ | ||
"cluster": clusterName, | ||
"replica": replicaName, | ||
"cluster_node": nodeName, | ||
} | ||
|
||
HostPenalties.With(label).Inc() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
package topology | ||
|
||
import ( | ||
"context" | ||
"net/url" | ||
"sync/atomic" | ||
"time" | ||
|
||
"github.com/contentsquare/chproxy/internal/counter" | ||
"github.com/contentsquare/chproxy/internal/heartbeat" | ||
"github.com/contentsquare/chproxy/log" | ||
) | ||
|
||
const ( | ||
// prevents excess goroutine creating while penalizing overloaded host | ||
DefaultPenaltySize = 5 | ||
DefaultMaxSize = 300 | ||
DefaultPenaltyDuration = time.Second * 10 | ||
) | ||
|
||
type nodeOpts struct { | ||
defaultActive bool | ||
penaltySize uint32 | ||
penaltyMaxSize uint32 | ||
penaltyDuration time.Duration | ||
} | ||
|
||
func defaultNodeOpts() nodeOpts { | ||
return nodeOpts{ | ||
penaltySize: DefaultPenaltySize, | ||
penaltyMaxSize: DefaultMaxSize, | ||
penaltyDuration: DefaultPenaltyDuration, | ||
} | ||
} | ||
|
||
type NodeOption interface { | ||
apply(*nodeOpts) | ||
} | ||
|
||
type defaultActive struct { | ||
active bool | ||
} | ||
|
||
func (o defaultActive) apply(opts *nodeOpts) { | ||
opts.defaultActive = o.active | ||
} | ||
|
||
func WithDefaultActiveState(active bool) NodeOption { | ||
return defaultActive{ | ||
active: active, | ||
} | ||
} | ||
|
||
type Node struct { | ||
// Node Address. | ||
addr *url.URL | ||
|
||
// Whether this node is alive. | ||
active atomic.Bool | ||
|
||
// Counter of currently running connections. | ||
connections counter.Counter | ||
|
||
// Counter of unsuccesfull request to decrease host priority. | ||
penalty atomic.Uint32 | ||
|
||
// Heartbeat function | ||
hb heartbeat.HeartBeat | ||
|
||
// TODO These fields are only used for labels in prometheus. We should have a different way to pass the labels. | ||
// For metrics only | ||
clusterName string | ||
replicaName string | ||
|
||
// Additional configuration options | ||
opts nodeOpts | ||
} | ||
|
||
func NewNode(addr *url.URL, hb heartbeat.HeartBeat, clusterName, replicaName string, opts ...NodeOption) *Node { | ||
nodeOpts := defaultNodeOpts() | ||
|
||
for _, opt := range opts { | ||
opt.apply(&nodeOpts) | ||
} | ||
|
||
n := &Node{ | ||
addr: addr, | ||
hb: hb, | ||
clusterName: clusterName, | ||
replicaName: replicaName, | ||
opts: nodeOpts, | ||
} | ||
|
||
if n.opts.defaultActive { | ||
n.SetIsActive(true) | ||
} | ||
|
||
return n | ||
} | ||
|
||
func (n *Node) IsActive() bool { | ||
return n.active.Load() | ||
} | ||
|
||
func (n *Node) SetIsActive(active bool) { | ||
n.active.Store(active) | ||
} | ||
|
||
// StartHeartbeat runs the heartbeat healthcheck against the node | ||
// until the done channel is closed. | ||
// If the heartbeat fails, the active status of the node is changed. | ||
func (n *Node) StartHeartbeat(done <-chan struct{}) { | ||
ctx, cancel := context.WithCancel(context.Background()) | ||
for { | ||
n.heartbeat(ctx) | ||
select { | ||
case <-done: | ||
cancel() | ||
return | ||
case <-time.After(n.hb.Interval()): | ||
} | ||
} | ||
} | ||
|
||
func (n *Node) heartbeat(ctx context.Context) { | ||
if err := n.hb.IsHealthy(ctx, n.addr.String()); err == nil { | ||
n.active.Store(true) | ||
reportNodeHealthMetric(n.clusterName, n.replicaName, n.Host(), true) | ||
} else { | ||
log.Errorf("error while health-checking %q host: %s", n.Host(), err) | ||
n.active.Store(false) | ||
reportNodeHealthMetric(n.clusterName, n.replicaName, n.Host(), false) | ||
} | ||
} | ||
|
||
// Penalize a node if a request failed to decrease it's priority. | ||
// If the penalty is already at the maximum allowed size this function | ||
// will not penalize the node further. | ||
// A function will be registered to run after the penalty duration to | ||
// increase the priority again. | ||
func (n *Node) Penalize() { | ||
penalty := n.penalty.Load() | ||
if penalty >= n.opts.penaltyMaxSize { | ||
return | ||
} | ||
|
||
incrementPenaltiesMetric(n.clusterName, n.replicaName, n.Host()) | ||
|
||
n.penalty.Add(n.opts.penaltySize) | ||
|
||
time.AfterFunc(n.opts.penaltyDuration, func() { | ||
n.penalty.Add(^uint32(n.opts.penaltySize - 1)) | ||
}) | ||
} | ||
|
||
// CurrentLoad returns the current node returns the number of open connections | ||
// plus the penalty. | ||
func (n *Node) CurrentLoad() uint32 { | ||
c := n.connections.Load() | ||
p := n.penalty.Load() | ||
return c + p | ||
} | ||
|
||
func (n *Node) CurrentConnections() uint32 { | ||
return n.connections.Load() | ||
} | ||
|
||
func (n *Node) CurrentPenalty() uint32 { | ||
return n.penalty.Load() | ||
} | ||
|
||
func (n *Node) IncrementConnections() { | ||
n.connections.Inc() | ||
} | ||
|
||
func (n *Node) DecrementConnections() { | ||
n.connections.Dec() | ||
} | ||
|
||
func (n *Node) Scheme() string { | ||
return n.addr.Scheme | ||
} | ||
|
||
func (n *Node) Host() string { | ||
return n.addr.Host | ||
} | ||
|
||
func (n *Node) ReplicaName() string { | ||
return n.replicaName | ||
} | ||
|
||
func (n *Node) String() string { | ||
return n.addr.String() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
package topology | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"net/url" | ||
"testing" | ||
"time" | ||
|
||
"github.com/contentsquare/chproxy/internal/heartbeat" | ||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
var _ heartbeat.HeartBeat = &mockHeartbeat{} | ||
|
||
type mockHeartbeat struct { | ||
interval time.Duration | ||
err error | ||
} | ||
|
||
func (hb *mockHeartbeat) Interval() time.Duration { | ||
return hb.interval | ||
} | ||
|
||
func (hb *mockHeartbeat) IsHealthy(ctx context.Context, addr string) error { | ||
return hb.err | ||
} | ||
|
||
func TestPenalize(t *testing.T) { | ||
node := NewNode(&url.URL{Host: "127.0.0.1"}, nil, "test", "test") | ||
expectedLoad := uint32(0) | ||
assert.Equal(t, expectedLoad, node.CurrentLoad(), "got running queries %d; expected %d", node.CurrentLoad(), expectedLoad) | ||
|
||
node.Penalize() | ||
expectedLoad = uint32(DefaultPenaltySize) | ||
assert.Equal(t, expectedLoad, node.CurrentLoad(), "got running queries %d; expected %d", node.CurrentLoad(), expectedLoad) | ||
|
||
// do more penalties than `penaltyMaxSize` allows | ||
max := int(DefaultMaxSize/DefaultPenaltySize) * 2 | ||
for i := 0; i < max; i++ { | ||
node.Penalize() | ||
} | ||
|
||
expectedLoad = uint32(DefaultMaxSize) | ||
assert.Equal(t, expectedLoad, node.CurrentLoad(), "got running queries %d; expected %d", node.CurrentLoad(), expectedLoad) | ||
|
||
// Still allow connections to increase. | ||
node.IncrementConnections() | ||
expectedLoad++ | ||
assert.Equal(t, expectedLoad, node.CurrentLoad(), "got running queries %d; expected %d", node.CurrentLoad(), expectedLoad) | ||
} | ||
|
||
func TestStartHeartbeat(t *testing.T) { | ||
hb := &mockHeartbeat{ | ||
interval: 10 * time.Millisecond, | ||
err: nil, | ||
} | ||
|
||
done := make(chan struct{}) | ||
defer close(done) | ||
|
||
node := NewNode(&url.URL{Host: "127.0.0.1"}, hb, "test", "test") | ||
|
||
// Node is eventually active after start. | ||
go node.StartHeartbeat(done) | ||
|
||
assert.Eventually(t, func() bool { | ||
return node.IsActive() | ||
}, time.Second, 100*time.Millisecond) | ||
|
||
// change heartbeat to error, node eventually becomes inactive. | ||
hb.err = errors.New("failed connection") | ||
|
||
assert.Eventually(t, func() bool { | ||
return !node.IsActive() | ||
}, time.Second, 100*time.Millisecond) | ||
|
||
// If error is removed node becomes active again. | ||
hb.err = nil | ||
|
||
assert.Eventually(t, func() bool { | ||
return node.IsActive() | ||
}, time.Second, 100*time.Millisecond) | ||
} |
Oops, something went wrong.