Skip to content

Commit

Permalink
Add timeout option for webhook notifier.
Browse files Browse the repository at this point in the history
  • Loading branch information
stevesg committed Nov 27, 2024
1 parent 82b89dc commit fb57eba
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 0 deletions.
3 changes: 3 additions & 0 deletions config/notifiers.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,9 @@ type WebhookConfig struct {
// Alerts exceeding this threshold will be truncated. Setting this to 0
// allows an unlimited number of alerts.
MaxAlerts uint64 `yaml:"max_alerts" json:"max_alerts"`

// Timeout is the maximum time allowed to invoke the webhook.
Timeout *time.Duration `yaml:"timeout" json:"timeout"`
}

// UnmarshalYAML implements the yaml.Unmarshaler interface.
Expand Down
10 changes: 10 additions & 0 deletions notify/webhook/webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"net/http"
Expand Down Expand Up @@ -112,8 +113,17 @@ func (n *Notifier) Notify(ctx context.Context, alerts ...*types.Alert) (bool, er
url = strings.TrimSpace(string(content))
}

if n.conf.Timeout != nil {
postCtx, cancel := context.WithTimeoutCause(ctx, *n.conf.Timeout, fmt.Errorf("configured webhook timeout (%s) reached", *n.conf.Timeout))
defer cancel()
ctx = postCtx
}

resp, err := notify.PostJSON(ctx, n.client, url, &buf)
if err != nil {
if errors.Is(err, context.DeadlineExceeded) && ctx.Err() != nil {
err = context.Cause(ctx)
}
return true, notify.RedactURL(err)
}
defer notify.Drain(resp)
Expand Down
52 changes: 52 additions & 0 deletions test/with_api_v2/acceptance/send_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package test

import (
"fmt"
"sync"
"testing"
"time"

Expand Down Expand Up @@ -464,3 +465,54 @@ receivers:

t.Log(co.Check())
}

func TestWebhookTimeout(t *testing.T) {
t.Parallel()

conf := `
route:
receiver: "default"
group_by: [alertname]
group_wait: 1s
group_interval: 30s
repeat_interval: 1m
receivers:
- name: "default"
webhook_configs:
- url: 'http://%s'
timeout: 1s
`

at := NewAcceptanceTest(t, &AcceptanceOpts{
Tolerance: 150 * time.Millisecond,
})

co := at.Collector("webhook")
wh := NewWebhook(t, co)

once := sync.Once{}

wh.Func = func(ts float64) bool {
// Make the first webhook request slow enough to hit
// the webhook timeout, but not so slow as to hit the
// dispatcher timeout.
once.Do(func() {
time.Sleep(2 * time.Second)
})
return false
}

am := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1)

am.Push(At(1), Alert("alertname", "test1"))

// First alert will be considered a failure due to timeout, and retried.
co.Want(Between(4, 5), Alert("alertname", "test1").Active(1))
// Second attempt will not be delayed, so successful.
co.Want(Between(4, 5), Alert("alertname", "test1").Active(1))

at.Run()

t.Log(co.Check())
}

0 comments on commit fb57eba

Please sign in to comment.