diff --git a/router/dataplane.go b/router/dataplane.go index d19d8fa724..2a1e06e3d6 100644 --- a/router/dataplane.go +++ b/router/dataplane.go @@ -39,6 +39,7 @@ import ( "github.com/scionproto/scion/pkg/drkey" libepic "github.com/scionproto/scion/pkg/experimental/epic" "github.com/scionproto/scion/pkg/log" + "github.com/scionproto/scion/pkg/private/processmetrics" "github.com/scionproto/scion/pkg/private/serrors" "github.com/scionproto/scion/pkg/private/util" "github.com/scionproto/scion/pkg/scrypto" @@ -396,7 +397,7 @@ func (d *DataPlane) AddSvc(svc addr.SVC, a *net.UDPAddr) error { } d.svc.AddSvc(svc, a) if d.Metrics != nil { - labels := serviceMetricLabels(d.localIA, svc) + labels := serviceLabels(d.localIA, svc) d.Metrics.ServiceInstanceChanges.With(labels).Add(1) d.Metrics.ServiceInstanceCount.With(labels).Add(1) } @@ -415,7 +416,7 @@ func (d *DataPlane) DelSvc(svc addr.SVC, a *net.UDPAddr) error { } d.svc.DelSvc(svc, a) if d.Metrics != nil { - labels := serviceMetricLabels(d.localIA, svc) + labels := serviceLabels(d.localIA, svc) d.Metrics.ServiceInstanceChanges.With(labels).Add(1) d.Metrics.ServiceInstanceCount.With(labels).Add(-1) } @@ -620,10 +621,6 @@ func (d *DataPlane) runReceiver(ifID uint16, conn BatchConn, cfg *RunConfig, enqueueForProcessing := func(pkt ipv4.Message) { srcAddr := pkt.Addr.(*net.UDPAddr) - - // For non-broken packets, we defer the ingress-side metrics accounting - // until we have a chance to figure the traffic type. - // That's in runProcessor and processPkt. size := pkt.N sc := classOfSize(size) metrics[sc].InputPacketsTotal.Inc() @@ -924,21 +921,17 @@ func updateOutputMetrics(metrics interfaceMetrics, packets []packet) { writtenBytes[tt][sc] += s } for t := ttOther; t < ttMax; t++ { - for sc := sizeClass(0); sc < maxSizeClass; sc++ { + for sc := minSizeClass; sc < maxSizeClass; sc++ { if writtenPkts[t][sc] > 0 { - metrics[sc].Output[t].OutputPacketsTotal.Add( - float64(writtenPkts[t][sc])) - metrics[sc].Output[t].OutputBytesTotal.Add( - float64(writtenBytes[t][sc])) + metrics[sc].Output[t].OutputPacketsTotal.Add(float64(writtenPkts[t][sc])) + metrics[sc].Output[t].OutputBytesTotal.Add(float64(writtenBytes[t][sc])) } } } } -func (d *DataPlane) runForwarder(ifID uint16, conn BatchConn, - cfg *RunConfig, c <-chan packet) { +func (d *DataPlane) runForwarder(ifID uint16, conn BatchConn, cfg *RunConfig, c <-chan packet) { - fmt.Println("Initialize forwarder for", "interface") log.Debug("Initialize forwarder for", "interface", ifID) // We use this somewhat like a ring buffer. @@ -955,8 +948,7 @@ func (d *DataPlane) runForwarder(ifID uint16, conn BatchConn, toWrite := 0 for d.running { - toWrite += readUpTo(c, cfg.BatchSize-toWrite, toWrite == 0, - pkts[toWrite:]) + toWrite += readUpTo(c, cfg.BatchSize-toWrite, toWrite == 0, pkts[toWrite:]) // Turn the packets into underlay messages that WriteBatch can send. for i, p := range pkts[:toWrite] { @@ -2417,3 +2409,26 @@ func nextHdr(layer gopacket.DecodingLayer) slayers.L4ProtocolType { return slayers.L4None } } + +// initMetrics initializes the metrics related to packet forwarding. The counters are already +// instantiated for all the relevant interfaces so this will not have to be repeated during packet +// forwarding. +func (d *DataPlane) initMetrics() { + d.forwardingMetrics = make(map[uint16]interfaceMetrics) + d.forwardingMetrics[0] = newInterfaceMetrics(d.Metrics, 0, d.localIA, d.neighborIAs) + for id := range d.external { + if _, notOwned := d.internalNextHops[id]; notOwned { + continue + } + d.forwardingMetrics[id] = newInterfaceMetrics(d.Metrics, id, d.localIA, d.neighborIAs) + } + + // Start our custom /proc/pid/stat collector to export iowait time and (in the future) other + // process-wide metrics that prometheus does not. + err := processmetrics.Init() + + // we can live without these metrics. Just log the error. + if err != nil { + log.Error("Could not initialize processmetrics", "err", err) + } +} diff --git a/router/dataplane_test.go b/router/dataplane_test.go index 5b47f38c82..baf163fb53 100644 --- a/router/dataplane_test.go +++ b/router/dataplane_test.go @@ -1482,7 +1482,6 @@ func TestProcessPkt(t *testing.T) { t.Run(name, func(t *testing.T) { t.Parallel() dp := tc.prepareDP(ctrl) - router.InitDPMetrics(dp) input, want := tc.mockMsg(false), tc.mockMsg(true) result, err := dp.ProcessPkt(tc.srcInterface, input) tc.assertFunc(t, err) diff --git a/router/export_test.go b/router/export_test.go index 82e67763fa..f8134b4a3d 100644 --- a/router/export_test.go +++ b/router/export_test.go @@ -63,11 +63,8 @@ func NewDP( if err := dp.SetKey(key); err != nil { panic(err) } - return dp -} - -func InitDPMetrics(dp *DataPlane) { dp.initMetrics() + return dp } func (d *DataPlane) FakeStart() { diff --git a/router/metrics.go b/router/metrics.go index 27d49980f1..df7af4c74f 100644 --- a/router/metrics.go +++ b/router/metrics.go @@ -23,8 +23,6 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" "github.com/scionproto/scion/pkg/addr" - "github.com/scionproto/scion/pkg/log" - "github.com/scionproto/scion/pkg/private/processmetrics" ) // Metrics defines the data-plane metrics for the BR. @@ -47,8 +45,8 @@ type Metrics struct { SiblingBFDStateChanges *prometheus.CounterVec } -// NewMetrics initializes the metrics for the Border Router, and registers them -// with the default registry. +// NewMetrics initializes the metrics for the Border Router, and registers them with the default +// registry. func NewMetrics() *Metrics { return &Metrics{ ProcessedPackets: promauto.NewCounterVec( @@ -168,30 +166,48 @@ func NewMetrics() *Metrics { } } -// interfaceMetrics is the set of metrics that are relevant for one given interface. -// It is a map that associates each (traffic-type, size-class) pair -// with the set of metrics belonging to that interface that have these label values. -// This set of metrics is itself a trafficMetric structure. -// Explanation: -// Metrics are labeled by interface, local-as, neighbor-as, packet size, and (for output -// metrics only) traffic type. -// Instances are grouped in a hierarchical manner for efficient access by the using code. -// forwardingMetrics is a map of interface to interfaceMetrics. -// To access a specific InputPacketsTotal counter, one refers to: -// dataplane.forwardingMetrics[interface][size-class]. -// trafficMetrics.Output is a map of traffic type to outputMetrics. -type interfaceMetrics map[sizeClass]trafficMetrics +// trafficType labels traffic as being of either of the following types: in, out, inTransit, +// outTransit, brTransit. inTransit or outTransit means that traffic is crossing the local AS via +// two routers. If the router being observed is the one receiving the packet from the outside, then +// the type is inTransit; else it is outTransit. brTransit means that traffic is crossing only the +// observed router. Non-scion traffic or somehow malformed traffic has type Other. +type trafficType uint8 -// The number of bits needed to represent some given size. -// This is quicker than computing Log2 and serves the same purpose. +const ( + ttOther trafficType = iota + ttIn + ttOut + ttInTransit + ttOutTransit + ttBrTransit + ttMax +) + +// Returns a human-friendly representation of the given traffic type. +func (t trafficType) String() string { + switch t { + case ttIn: + return "in" + case ttOut: + return "out" + case ttInTransit: + return "in_transit" + case ttOutTransit: + return "out_transit" + case ttBrTransit: + return "br_transit" + } + return "other" +} + +// sizeClass is the number of bits needed to represent some given size. This is quicker than +// computing Log2 and serves the same purpose. type sizeClass uint8 -// maxSizeClass is the smallest NOT-supported sizeClass. This must be -// enough to support the largest valid packet size (defined by -// bufSize). Since this must be a constant (to allow efficient fixed-sized -// arrays), we have to assert it's large enough for bufSize. Just in case -// we do get packets larger than bufSize, they are simply put in the last -// class. +// maxSizeClass is the smallest NOT-supported sizeClass. This must be enough to support the largest +// valid packet size (defined by bufSize). Since this must be a constant (to allow efficient +// fixed-sized arrays), we have to assert it's large enough for bufSize. Just in case we do get +// packets larger than bufSize, they are simply put in the last class. const maxSizeClass sizeClass = 15 // This will failto compile if bufSize cannot fit in (maxSizeClass - 1) bits. @@ -212,16 +228,34 @@ func classOfSize(pktSize int) sizeClass { return cs } -// Returns a human-friendly representation of the given size class. -// Avoid bracket notation to make the values possibly easier to use -// in monitoring queries. +// Returns a human-friendly representation of the given size class. Avoid bracket notation to make +// the values possibly easier to use in monitoring queries. func (sc sizeClass) String() string { - return strings.Join( - []string{strconv.Itoa((1 << sc) >> 1), strconv.Itoa(1<> 1) + high := strconv.Itoa((1 << sc) - 1) + if sc == minSizeClass { + low = "0" + } + if sc == maxSizeClass { + high = "inf" + } + + return strings.Join([]string{low, high}, "_") } +// interfaceMetrics is the set of metrics that are relevant for one given interface. It is a map +// that associates each (traffic-type, size-class) pair with the set of metrics belonging to that +// interface that have these label values. This set of metrics is itself a trafficMetric structure. +// Explanation: Metrics are labeled by interface, local-as, neighbor-as, packet size, and (for +// output metrics only) traffic type. Instances are grouped in a hierarchical manner for efficient +// access by the using code. forwardingMetrics is a map of interface to interfaceMetrics. To access +// a specific InputPacketsTotal counter, one refers to: +// +// dataplane.forwardingMetrics[interface][size-class]. +// +// trafficMetrics.Output is an array of outputMetrics indexed by traffic type. +type interfaceMetrics map[sizeClass]trafficMetrics + // trafficMetrics groups all the metrics instances that all share the same interface AND // sizeClass label values (but have different names - i.e. they count different things). type trafficMetrics struct { @@ -232,100 +266,47 @@ type trafficMetrics struct { DroppedPacketsBusyForwarder prometheus.Counter DroppedPacketsBusySlowPath prometheus.Counter ProcessedPackets prometheus.Counter - Output map[trafficType]outputMetrics -} - -// trafficType labels traffic as being of either of the following types: -// in, out, inTransit, outTransit, brTransit. -// inTransit or outTransit means that traffic is crossing the local AS via two routers. -// If the router being observed is the one receiving the packet from the outside, -// then the type is inTransit; else it is outTransit. -// brTransit means that traffic is crossing only the observed router. -// Non-scion traffic or somehow malformed traffic has type Other. -type trafficType uint8 - -const ( - ttOther trafficType = iota - ttIn - ttOut - ttInTransit - ttOutTransit - ttBrTransit - ttMax -) - -// Returns a human-friendly representation of the given traffic type. -func (t trafficType) String() string { - switch t { - case ttIn: - return "in" - case ttOut: - return "out" - case ttInTransit: - return "in_transit" - case ttOutTransit: - return "out_transit" - case ttBrTransit: - return "br_transit" - } - return "other" + Output [ttMax]outputMetrics } -// outputMetrics groups all the metrics about traffic that has reached the output stage. -// Metrics instances in each of these all have the same interface AND sizeClass AND -// trafficType label values. +// outputMetrics groups all the metrics about traffic that has reached the output stage. Metrics +// instances in each of these all have the same interface AND sizeClass AND trafficType label +// values. type outputMetrics struct { OutputBytesTotal prometheus.Counter OutputPacketsTotal prometheus.Counter } -// initMetrics initializes the metrics related to packet forwarding. The -// counters are already instantiated for all the relevant interfaces so this -// will not have to be repeated during packet forwarding. -func (d *DataPlane) initMetrics() { - d.forwardingMetrics = make(map[uint16]interfaceMetrics) - labels := interfaceToMetricLabels(0, d.localIA, d.neighborIAs) - d.forwardingMetrics[0] = initInterfaceMetrics(d.Metrics, labels) - for id := range d.external { - if _, notOwned := d.internalNextHops[id]; notOwned { - continue - } - labels = interfaceToMetricLabels(id, d.localIA, d.neighborIAs) - d.forwardingMetrics[id] = initInterfaceMetrics(d.Metrics, labels) - } - - // Start our custom /proc/pid/stat collector to export iowait time and - // (in the future) other process-wide metrics that prometheus does not. - err := processmetrics.Init() +func newInterfaceMetrics( + metrics *Metrics, + id uint16, + localIA addr.IA, + neighbors map[uint16]addr.IA) interfaceMetrics { - // we can live without these metrics. Just log the error. - if err != nil { - log.Error("Could not initialize processmetrics", "err", err) - } -} - -func initInterfaceMetrics(metrics *Metrics, labels prometheus.Labels) interfaceMetrics { + ifLabels := interfaceLabels(0, localIA, neighbors) m := interfaceMetrics{} for sc := minSizeClass; sc < maxSizeClass; sc++ { scLabels := prometheus.Labels{"sizeclass": sc.String()} - m[sc] = initTrafficMetrics(metrics, labels, scLabels) + m[sc] = newTrafficMetrics(metrics, ifLabels, scLabels) } return m } -func initTrafficMetrics(metrics *Metrics, - labels prometheus.Labels, scLabels prometheus.Labels) trafficMetrics { +func newTrafficMetrics( + metrics *Metrics, + ifLabels prometheus.Labels, + scLabels prometheus.Labels) trafficMetrics { + c := trafficMetrics{ - InputBytesTotal: metrics.InputBytesTotal.MustCurryWith(labels).With(scLabels), - InputPacketsTotal: metrics.InputPacketsTotal.MustCurryWith(labels).With(scLabels), - ProcessedPackets: metrics.ProcessedPackets.MustCurryWith(labels).With(scLabels), - Output: make(map[trafficType]outputMetrics), + InputBytesTotal: metrics.InputBytesTotal.MustCurryWith(ifLabels).With(scLabels), + InputPacketsTotal: metrics.InputPacketsTotal.MustCurryWith(ifLabels).With(scLabels), + ProcessedPackets: metrics.ProcessedPackets.MustCurryWith(ifLabels).With(scLabels), } // Output metrics have the extra "trafficType" label. for t := ttOther; t < ttMax; t++ { ttLabels := prometheus.Labels{"type": t.String()} - c.Output[t] = initOutputMetrics(metrics, labels, scLabels, ttLabels) + c.Output[t] = newOutputMetrics(metrics, ifLabels, scLabels, ttLabels) } // Dropped metrics have the extra "Reason" label. @@ -333,19 +314,19 @@ func initTrafficMetrics(metrics *Metrics, reasonMap["reason"] = "invalid" c.DroppedPacketsInvalid = - metrics.DroppedPacketsTotal.MustCurryWith(labels).MustCurryWith(scLabels).With(reasonMap) + metrics.DroppedPacketsTotal.MustCurryWith(ifLabels).MustCurryWith(scLabels).With(reasonMap) reasonMap["reason"] = "busy_processor" c.DroppedPacketsBusyProcessor = - metrics.DroppedPacketsTotal.MustCurryWith(labels).MustCurryWith(scLabels).With(reasonMap) + metrics.DroppedPacketsTotal.MustCurryWith(ifLabels).MustCurryWith(scLabels).With(reasonMap) reasonMap["reason"] = "busy_forwarder" c.DroppedPacketsBusyForwarder = - metrics.DroppedPacketsTotal.MustCurryWith(labels).MustCurryWith(scLabels).With(reasonMap) + metrics.DroppedPacketsTotal.MustCurryWith(ifLabels).MustCurryWith(scLabels).With(reasonMap) reasonMap["reason"] = "busy_slow_path" c.DroppedPacketsBusySlowPath = - metrics.DroppedPacketsTotal.MustCurryWith(labels).MustCurryWith(scLabels).With(reasonMap) + metrics.DroppedPacketsTotal.MustCurryWith(ifLabels).MustCurryWith(scLabels).With(reasonMap) c.InputBytesTotal.Add(0) c.InputPacketsTotal.Add(0) @@ -357,21 +338,23 @@ func initTrafficMetrics(metrics *Metrics, return c } -func initOutputMetrics(metrics *Metrics, labels prometheus.Labels, - scLabels prometheus.Labels, ttLabels prometheus.Labels) outputMetrics { +func newOutputMetrics( + metrics *Metrics, + ifLabels prometheus.Labels, + scLabels prometheus.Labels, + ttLabels prometheus.Labels) outputMetrics { + om := outputMetrics{} om.OutputBytesTotal = - metrics.OutputBytesTotal.MustCurryWith(labels).MustCurryWith(scLabels).With(ttLabels) + metrics.OutputBytesTotal.MustCurryWith(ifLabels).MustCurryWith(scLabels).With(ttLabels) om.OutputPacketsTotal = - metrics.OutputPacketsTotal.MustCurryWith(labels).MustCurryWith(scLabels).With(ttLabels) + metrics.OutputPacketsTotal.MustCurryWith(ifLabels).MustCurryWith(scLabels).With(ttLabels) om.OutputBytesTotal.Add(0) om.OutputPacketsTotal.Add(0) return om } -func interfaceToMetricLabels(id uint16, localIA addr.IA, - neighbors map[uint16]addr.IA) prometheus.Labels { - +func interfaceLabels(id uint16, localIA addr.IA, neighbors map[uint16]addr.IA) prometheus.Labels { if id == 0 { return prometheus.Labels{ "isd_as": localIA.String(), @@ -386,7 +369,7 @@ func interfaceToMetricLabels(id uint16, localIA addr.IA, } } -func serviceMetricLabels(localIA addr.IA, svc addr.SVC) prometheus.Labels { +func serviceLabels(localIA addr.IA, svc addr.SVC) prometheus.Labels { return prometheus.Labels{ "isd_as": localIA.String(), "service": svc.BaseString(),