Skip to content

Commit

Permalink
feat: record file descriptors used as a prom metric
Browse files Browse the repository at this point in the history
  • Loading branch information
brianluong committed Sep 27, 2024
1 parent a8e6e90 commit 69d9970
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 9 deletions.
7 changes: 3 additions & 4 deletions cmd/gateway/main.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package main

import (
"context"
"fmt"
"net/http"
"os"
Expand Down Expand Up @@ -53,7 +52,7 @@ func main() {

var rpcServer *server.RPCServer

var metricsServer *http.Server
var metricsServer *metrics.Server

zap.L().Info("Starting node gateway.", zap.String("env", env), zap.Any("config", conf))

Expand All @@ -71,7 +70,7 @@ func main() {
go func() {
metricsServer = metrics.NewMetricsServer()

if err := metricsServer.ListenAndServe(); err != http.ErrServerClosed {
if err := metricsServer.Start(); err != http.ErrServerClosed {
zap.L().Fatal("Failed to start metrics server.", zap.Error(err))
}
}()
Expand All @@ -90,7 +89,7 @@ func main() {
zap.L().Fatal("Failed to gracefully shut down RPC server.", zap.Error(err))
}

if err := metricsServer.Shutdown(context.Background()); err != nil {
if err := metricsServer.Shutdown(); err != nil {
zap.L().Fatal("Failed to gracefully shut down metrics server.", zap.Error(err))
}
}
Expand Down
87 changes: 82 additions & 5 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
package metrics

import (
"context"
"fmt"
"net/http"
"os"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.uber.org/zap"
)

const (
DefaultPort = 9090
metricsNamespace = "node_gateway"
defaultReadHeaderTimeout = 10 * time.Second
DefaultPort = 9090
metricsNamespace = "node_gateway"
defaultReadHeaderTimeout = 10 * time.Second
systemStatsEmissionInterval = 60 * time.Second

// Metric labels

Expand Down Expand Up @@ -285,6 +289,15 @@ var (
},
[]string{"chain_name", "upstream_id", "url", "errorType", "method"},
)

// System metrics
fileDescriptorsUsed = promauto.NewGauge(
prometheus.GaugeOpts{
Namespace: metricsNamespace,
Name: "file_descriptors_used",
Help: "Count of Unix file descriptors used.",
},
)
)

type Container struct {
Expand Down Expand Up @@ -358,15 +371,79 @@ func NewContainer(chainName string) *Container {
return result
}

func NewMetricsServer() *http.Server {
func NewMetricsServer() *Server {
mux := http.NewServeMux()
mux.Handle("/", promhttp.Handler())

return &http.Server{
server := &http.Server{
Addr: fmt.Sprintf(":%d", DefaultPort),
Handler: mux,
ReadHeaderTimeout: defaultReadHeaderTimeout,
}

return &Server{
server: server,
shutdownChannel: make(chan int),
}
}

type Server struct {
server *http.Server
shutdownChannel chan int
}

func (m *Server) Start() error {
m.StartEmittingSystemStats()
return m.server.ListenAndServe()
}

func (m *Server) Shutdown() error {
select {
case m.shutdownChannel <- 1:
zap.L().Debug("Metrics server is stopping")
default:
zap.L().Debug("Metrics server has likely already shutdown.")
}

return m.server.Shutdown(context.Background())
}

func getNumFileDesciptors() (int, error) {
pid := os.Getpid()
fds, err := os.Open(fmt.Sprintf("/proc/%d/fd", pid))

if err != nil {
return 0, err
}

defer fds.Close()

files, err := fds.Readdirnames(-1)
if err != nil {
return 0, err
}

return len(files), nil
}

func (m *Server) StartEmittingSystemStats() {
go func() {
for {
select {
case <-m.shutdownChannel:
return
case <-time.After(systemStatsEmissionInterval):
numFileDescriptors, err := getNumFileDesciptors()
zap.L().Debug("Emitting system stats.", zap.Int("numFileDescriptors", numFileDescriptors))

if err != nil {
zap.L().Error("Failed to get number of file descriptors.", zap.Error(err))
} else {
fileDescriptorsUsed.Set(float64(numFileDescriptors))
}
}
}
}()
}

func InstrumentHandler(handler http.Handler, container *Container) http.Handler {
Expand Down

0 comments on commit 69d9970

Please sign in to comment.