diff --git a/cmd/vault-raft-snapshot-agent/main.go b/cmd/vault-raft-snapshot-agent/main.go index 65c04c4..dc6a8a0 100644 --- a/cmd/vault-raft-snapshot-agent/main.go +++ b/cmd/vault-raft-snapshot-agent/main.go @@ -43,15 +43,18 @@ import ( "github.com/Argelbargel/vault-raft-snapshot-agent/internal/agent" "github.com/Argelbargel/vault-raft-snapshot-agent/internal/agent/logging" "log" + "net/http" "os" "os/signal" "syscall" + "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/urfave/cli/v2" ) var Version = "development" var Platform = "linux/amd64" +var MetricsPort int var agentOptions = agent.SnapshotAgentOptions{ ConfigFileName: "snapshots", @@ -60,10 +63,11 @@ var agentOptions = agent.SnapshotAgentOptions{ } const ( - optionConfig = "config" - optionLogFormat = "log-format" - optionLogOutput = "log-output" - optionLogLevel = "log-level" + optionConfig = "config" + optionLogFormat = "log-format" + optionLogOutput = "log-output" + optionLogLevel = "log-level" + optionMetricsPort = "metrics-port" ) var cliFlags = []cli.Flag{ @@ -94,6 +98,14 @@ var cliFlags = []cli.Flag{ EnvVars: []string{agentOptions.EnvPrefix + "_LOG_LEVEL"}, Value: logging.LevelInfo, }, + &cli.IntFlag{ + Name: optionMetricsPort, + Aliases: []string{"p"}, + Usage: "Port to serve metrics on", + EnvVars: []string{agentOptions.EnvPrefix + "_METRICS_PORT"}, + Value: 2112, + Destination: &MetricsPort, + }, } type quietBoolFlag struct { @@ -159,9 +171,20 @@ func run() error { cancel() }() + // serve metrics in a go routine. + go serveMetrics() return runAgent(ctx) } +func serveMetrics() { + // serve prometheus metrics + http.Handle("/metrics", promhttp.Handler()) + err := http.ListenAndServe(fmt.Sprintf(":%d", MetricsPort), nil) + if err != nil { + logging.Fatal("failed to setup metrics", "error", err) + } +} + func runAgent(ctx context.Context) error { snapshotAgent, err := agent.CreateSnapshotAgent(ctx, agentOptions) if err != nil { diff --git a/go.mod b/go.mod index ed47d35..747583e 100644 --- a/go.mod +++ b/go.mod @@ -61,6 +61,8 @@ require golang.org/x/crypto v0.25.0 // indirect // testing require github.com/stretchr/testify v1.9.0 +require github.com/prometheus/client_golang v1.20.5 + require ( cloud.google.com/go v0.115.0 // indirect cloud.google.com/go/auth v0.7.3 // indirect @@ -84,7 +86,9 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.26.4 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.30.3 // indirect github.com/aws/smithy-go v1.20.3 // indirect + github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v3 v3.2.2 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dustin/go-humanize v1.0.1 // indirect @@ -124,9 +128,13 @@ require ( github.com/mattn/go-isatty v0.0.20 // indirect github.com/minio/md5-simd v1.1.2 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect github.com/rs/xid v1.5.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/ryanuber/go-glob v1.0.0 // indirect diff --git a/go.sum b/go.sum index 420dcd8..45ccdd1 100644 --- a/go.sum +++ b/go.sum @@ -67,9 +67,13 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.30.3 h1:ZsDKRLXGWHk8WdtyYMoGNO7bTudr github.com/aws/aws-sdk-go-v2/service/sts v1.30.3/go.mod h1:zwySh8fpFyXp9yOr/KVzxOl8SRqgf/IDw5aUt9UKFcQ= github.com/aws/smithy-go v1.20.3 h1:ryHwveWzPV5BIof6fyDvor6V3iUL7nTfiTKXHiW05nE= github.com/aws/smithy-go v1.20.3/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cenkalti/backoff/v3 v3.2.2 h1:cfUAAO3yvKMYKPrvhDuHSwQnhZNk/RMHKdZqKTxfm6M= github.com/cenkalti/backoff/v3 v3.2.2/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4rc0ij+ULvLYs= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= @@ -240,6 +244,8 @@ github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/ncw/swift/v2 v2.0.2 h1:jx282pcAKFhmoZBSdMcCRFn9VWkoBIRsCpe+yZq7vEk= github.com/ncw/swift/v2 v2.0.2/go.mod h1:z0A9RVdYPjNjXVo2pDOPxZ4eu3oarO1P91fTItcb+Kg= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= @@ -251,7 +257,15 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= +github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= @@ -405,8 +419,9 @@ gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/agent/snapshot-agent.go b/internal/agent/snapshot-agent.go index 3401e98..f64c948 100644 --- a/internal/agent/snapshot-agent.go +++ b/internal/agent/snapshot-agent.go @@ -2,6 +2,8 @@ package agent import ( "context" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "io" "os" "sync" @@ -61,6 +63,33 @@ func (c SnapshotsConfig) HasStorages() bool { return c.Storages.AWS != nil || c.Storages.Azure != nil || c.Storages.GCP != nil || c.Storages.Local != nil || c.Storages.Swift != nil || c.Storages.S3 != nil } +var ( + lastSnapshotTime = promauto.NewGauge( + prometheus.GaugeOpts{ + Name: "vrsa_last_snapshot_time", + Help: "Unix timestamp of the last snapshot time", + }, + ) + nextSnapshotTime = promauto.NewGauge( + prometheus.GaugeOpts{ + Name: "vrsa_next_snapshot_time", + Help: "Unix timestamp of the next scheduled snapshot time", + }, + ) + lastSnapshotSuccess = promauto.NewGauge( + prometheus.GaugeOpts{ + Name: "vrsa_last_snapshot_success", + Help: "Returns 1 if the last snapshot was successful and 0 if not", + }, + ) + lastSnapshotSize = promauto.NewGauge( + prometheus.GaugeOpts{ + Name: "vrsa_last_snapshot_size", + Help: "Size of the last snapshot in bytes", + }, + ) +) + func CreateSnapshotAgent(ctx context.Context, options SnapshotAgentOptions) (*SnapshotAgent, error) { data := SnapshotAgentConfig{} parser := config.NewParser[*SnapshotAgentConfig](options.EnvPrefix, options.ConfigFileName, options.ConfigFileSearchPaths...) @@ -130,11 +159,13 @@ func (a *SnapshotAgent) TakeSnapshot(ctx context.Context) *time.Ticker { a.lastSnapshotTime = time.Now() // ensure that we do not hammer on vault in case of errors nextSnapshot := a.lastSnapshotTime.Add(a.storageConfigDefaults.Frequency) + nextSnapshotTime.Set(float64(nextSnapshot.Unix())) a.updateTicker(nextSnapshot) snapshot, err := os.CreateTemp(a.tempDir, "snapshot") if err != nil { logging.Warn("Could not create snapshot-temp-file", "nextSnapshot", nextSnapshot, "error", err) + lastSnapshotSuccess.Set(0.0) return a.snapshotTicker } @@ -149,21 +180,26 @@ func (a *SnapshotAgent) TakeSnapshot(ctx context.Context) *time.Ticker { err = a.client.TakeSnapshot(ctx, snapshot) if err != nil { logging.Error("Could not take snapshot of vault", "nextSnapshot", nextSnapshot, "error", err) + lastSnapshotSuccess.Set(0.0) return a.snapshotTicker } info, err := snapshot.Stat() if err != nil { logging.Error("Could not stat snapshot-temp-file", "file", snapshot.Name(), "nextSnapshot", nextSnapshot, "error", err) + lastSnapshotSuccess.Set(0.0) return a.snapshotTicker } + lastSnapshotSize.Set(float64(info.Size())) if info.Size() < 1 { logging.Warn("Ignoring empty snapshot", "file", snapshot.Name(), "nextSnapshot", nextSnapshot) return a.snapshotTicker } nextSnapshot = a.manager.UploadSnapshot(ctx, snapshot, info.Size(), a.lastSnapshotTime, a.storageConfigDefaults) + lastSnapshotTime.Set(float64(a.lastSnapshotTime.Unix())) + lastSnapshotSuccess.Set(1.0) return a.updateTicker(nextSnapshot) }