-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(health): fancy composable status
- Loading branch information
Showing
6 changed files
with
411 additions
and
179 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
package healthstatus | ||
|
||
import ( | ||
"context" | ||
"log/slog" | ||
"time" | ||
|
||
"golang.org/x/sync/semaphore" | ||
) | ||
|
||
type AsyncHealthCheck struct { | ||
healthCheck HealthCheck | ||
log *slog.Logger | ||
healthCheckInterval time.Duration | ||
|
||
sem *semaphore.Weighted | ||
current currentState | ||
ticker *time.Ticker | ||
} | ||
|
||
func Async(log *slog.Logger, interval time.Duration, hc HealthCheck) *AsyncHealthCheck { | ||
return &AsyncHealthCheck{ | ||
healthCheckInterval: interval, | ||
healthCheck: hc, | ||
log: log, | ||
sem: semaphore.NewWeighted(1), | ||
current: currentState{ | ||
Status: HealthResult{ | ||
Status: HealthStatusHealthy, | ||
Message: "", | ||
}, | ||
}, | ||
} | ||
} | ||
|
||
func (c *AsyncHealthCheck) ServiceName() string { | ||
return c.healthCheck.ServiceName() | ||
} | ||
|
||
func (c *AsyncHealthCheck) Check(context.Context) (HealthResult, error) { | ||
c.log.Debug("checked async") | ||
if c.ticker == nil { | ||
c.Start(context.Background()) | ||
} | ||
return c.current.Status, c.current.Err | ||
} | ||
|
||
func (r *AsyncHealthCheck) Start(ctx context.Context) { | ||
r.log.Debug("started async updates") | ||
if r.ticker != nil { | ||
r.ticker.Reset(r.healthCheckInterval) | ||
} else { | ||
r.ticker = time.NewTicker(r.healthCheckInterval) | ||
} | ||
go func() { | ||
err := r.updateStatus(ctx) | ||
if err != nil { | ||
r.log.Error("services are unhealthy", "error", err) | ||
} | ||
|
||
for { | ||
select { | ||
case <-ctx.Done(): | ||
r.log.Info("stop health checking, context is done") | ||
return | ||
case <-r.ticker.C: | ||
if r.sem.TryAcquire(1) { | ||
err := r.updateStatus(ctx) | ||
if err != nil { | ||
r.log.Error("services are unhealthy", "error", err) | ||
} | ||
r.sem.Release(1) | ||
} else { | ||
r.log.Info("skip updating health status because update is still running") | ||
} | ||
} | ||
} | ||
}() | ||
} | ||
|
||
func (r *AsyncHealthCheck) Stop(ctx context.Context) { | ||
r.ticker.Stop() | ||
} | ||
|
||
func (r *AsyncHealthCheck) ForceUpdateStatus(ctx context.Context) error { | ||
err := r.sem.Acquire(ctx, 1) | ||
if err != nil { | ||
return err | ||
} | ||
err = r.updateStatus(ctx) | ||
if err != nil { | ||
r.log.Error("services are unhealthy", "error", err) | ||
} | ||
r.sem.Release(1) | ||
return err | ||
} | ||
|
||
func (r *AsyncHealthCheck) updateStatus(ctx context.Context) error { | ||
r.log.Info("evaluating current service health statuses") | ||
if ctx == nil { | ||
ctx = context.Background() | ||
} | ||
|
||
ctx, cancel := context.WithTimeout(ctx, r.healthCheckInterval/2) | ||
defer cancel() | ||
|
||
res, err := r.healthCheck.Check(ctx) | ||
r.current = currentState{res, err} | ||
r.log.Debug("evaluated current service health statuses", "current", r.current) | ||
return err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package healthstatus | ||
|
||
import "context" | ||
|
||
// HealthStatus indicates the health of a service. | ||
type HealthStatus string | ||
|
||
const ( | ||
// HealthStatusHealthy is returned when the service is healthy. | ||
HealthStatusHealthy HealthStatus = "healthy" | ||
// HealthStatusUnhealthy is returned when the service is not healthy. | ||
HealthStatusUnhealthy HealthStatus = "unhealthy" | ||
// HealthStatusDegraded is returned when the service is degraded. | ||
HealthStatusDegraded HealthStatus = "degraded" | ||
// HealthStatusPartiallyUnhealthy is returned when the service is partially not healthy. | ||
HealthStatusPartiallyUnhealthy HealthStatus = "partial-outage" | ||
) | ||
|
||
// HealthCheck defines an interface for health checks. | ||
type HealthCheck interface { | ||
// ServiceName returns the name of the service that is health checked. | ||
ServiceName() string | ||
// Check is a function returning a service status and an error. | ||
Check(ctx context.Context) (HealthResult, error) | ||
} | ||
|
||
// HealthResult holds the health state of a service. | ||
type HealthResult struct { | ||
// Status indicates the overall health state. | ||
Status HealthStatus | ||
// Message gives additional information on the overall health state. | ||
Message string | ||
// Services contain the individual health results of the services as evaluated by the HealthCheck interface. The overall HealthStatus is then derived automatically from the results of the health checks. | ||
// | ||
// Note that the individual HealthResults evaluated by the HealthCheck interface may again consist of a plurality services. While this is only optional it allows for creating nested health structures. These can be used for more sophisticated scenarios like evaluating platform health describing service availability in different locations or similar. | ||
// | ||
// If using nested HealthResults, the status of the parent service can be derived automatically from the status of its children by leaving the parent's health status field blank. | ||
Services map[string]HealthResult | ||
} | ||
|
||
type currentState struct { | ||
Status HealthResult | ||
Err error | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package healthstatus | ||
|
||
import ( | ||
"context" | ||
) | ||
|
||
type DeferredErrorHealthCheck struct { | ||
maxIgnoredErrors int | ||
errorCountSinceSuccess int | ||
lastSuccess currentState | ||
healthCheck HealthCheck | ||
} | ||
|
||
func DeferErrors(maxIgnoredErrors int, hc HealthCheck) *DeferredErrorHealthCheck { | ||
return &DeferredErrorHealthCheck{ | ||
maxIgnoredErrors: maxIgnoredErrors, | ||
healthCheck: hc, | ||
lastSuccess: currentState{ | ||
Status: HealthResult{ | ||
Status: HealthStatusHealthy, | ||
Message: "", | ||
}, | ||
}, | ||
} | ||
} | ||
|
||
func (c *DeferredErrorHealthCheck) ServiceName() string { | ||
return c.healthCheck.ServiceName() | ||
} | ||
|
||
func (c *DeferredErrorHealthCheck) Check(ctx context.Context) (HealthResult, error) { | ||
status, err := c.healthCheck.Check(ctx) | ||
state := currentState{status, err} | ||
|
||
if err == nil { | ||
c.errorCountSinceSuccess = 0 | ||
c.lastSuccess = state | ||
return status, err | ||
} | ||
c.errorCountSinceSuccess++ | ||
if c.errorCountSinceSuccess > c.maxIgnoredErrors { | ||
return status, err | ||
} | ||
return c.lastSuccess.Status, c.lastSuccess.Err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
package healthstatus | ||
|
||
import ( | ||
"context" | ||
"log/slog" | ||
"sync" | ||
|
||
"golang.org/x/sync/errgroup" | ||
) | ||
|
||
type groupedHealthCheck struct { | ||
serviceName string | ||
hcs []HealthCheck | ||
log *slog.Logger | ||
} | ||
|
||
func Grouped(log *slog.Logger, serviceName string, checks ...HealthCheck) *groupedHealthCheck { | ||
return &groupedHealthCheck{ | ||
serviceName: serviceName, | ||
hcs: checks, | ||
log: log, | ||
} | ||
} | ||
|
||
func (c *groupedHealthCheck) Add(hc HealthCheck) { | ||
c.hcs = append(c.hcs, hc) | ||
} | ||
|
||
func (c *groupedHealthCheck) ServiceName() string { | ||
return c.serviceName | ||
} | ||
func (h *groupedHealthCheck) Check(ctx context.Context) (HealthResult, error) { | ||
type chanResult struct { | ||
name string | ||
HealthResult | ||
} | ||
if len(h.hcs) == 0 { | ||
return HealthResult{ | ||
Status: HealthStatusHealthy, | ||
Message: "", | ||
Services: nil, | ||
}, nil | ||
} | ||
var ( | ||
result = HealthResult{ | ||
Status: HealthStatusHealthy, | ||
Message: "", | ||
Services: map[string]HealthResult{}, | ||
} | ||
|
||
resultChan = make(chan chanResult) | ||
once sync.Once | ||
) | ||
defer once.Do(func() { close(resultChan) }) | ||
|
||
g, _ := errgroup.WithContext(ctx) | ||
|
||
for _, healthCheck := range h.hcs { | ||
name := healthCheck.ServiceName() | ||
healthCheck := healthCheck | ||
|
||
g.Go(func() error { | ||
result := chanResult{ | ||
name: name, | ||
HealthResult: HealthResult{ | ||
Status: HealthStatusHealthy, | ||
Message: "", | ||
}, | ||
} | ||
defer func() { | ||
resultChan <- result | ||
}() | ||
|
||
var err error | ||
result.HealthResult, err = healthCheck.Check(ctx) | ||
if err != nil { | ||
result.Message = err.Error() | ||
h.log.Error("unhealthy service", "name", name, "status", result.Status, "error", err) | ||
} | ||
|
||
return err | ||
}) | ||
} | ||
|
||
finished := make(chan bool) | ||
go func() { | ||
for r := range resultChan { | ||
result.Services[r.name] = r.HealthResult | ||
} | ||
finished <- true | ||
}() | ||
err := g.Wait() | ||
once.Do(func() { close(resultChan) }) | ||
|
||
<-finished | ||
|
||
if err != nil { | ||
result.Message = err.Error() | ||
result.Status = HealthStatusUnhealthy | ||
} | ||
result.Status = DeriveOverallHealthStatus(result.Services) | ||
return result, err | ||
} | ||
|
||
func DeriveOverallHealthStatus(services map[string]HealthResult) HealthStatus { | ||
var ( | ||
result = HealthStatusHealthy | ||
degraded int | ||
unhealthy int | ||
) | ||
|
||
for k, service := range services { | ||
if len(service.Services) > 0 && service.Status == "" { | ||
service.Status = DeriveOverallHealthStatus(service.Services) | ||
} | ||
services[k] = service | ||
switch service.Status { | ||
case HealthStatusHealthy: | ||
case HealthStatusDegraded: | ||
degraded++ | ||
case HealthStatusUnhealthy, HealthStatusPartiallyUnhealthy: | ||
unhealthy++ | ||
default: | ||
unhealthy++ | ||
} | ||
} | ||
|
||
if len(services) > 0 { | ||
if degraded > 0 { | ||
result = HealthStatusDegraded | ||
} | ||
if unhealthy > 0 { | ||
result = HealthStatusPartiallyUnhealthy | ||
} | ||
if unhealthy == len(services) { | ||
result = HealthStatusUnhealthy | ||
} | ||
} | ||
|
||
return result | ||
} |
Oops, something went wrong.