diff --git a/go.mod b/go.mod index 4d563cbf1..53a3490ee 100644 --- a/go.mod +++ b/go.mod @@ -19,6 +19,7 @@ require ( k8s.io/api v0.29.1 k8s.io/apiextensions-apiserver v0.29.1 k8s.io/apimachinery v0.29.1 + k8s.io/apiserver v0.29.1 k8s.io/client-go v0.29.1 k8s.io/component-base v0.29.1 k8s.io/klog/v2 v2.110.1 diff --git a/go.sum b/go.sum index 46cd0c925..4f88534dc 100644 --- a/go.sum +++ b/go.sum @@ -352,6 +352,8 @@ k8s.io/apiextensions-apiserver v0.29.1 h1:S9xOtyk9M3Sk1tIpQMu9wXHm5O2MX6Y1kIpPMi k8s.io/apiextensions-apiserver v0.29.1/go.mod h1:zZECpujY5yTW58co8V2EQR4BD6A9pktVgHhvc0uLfeU= k8s.io/apimachinery v0.29.1 h1:KY4/E6km/wLBguvCZv8cKTeOwwOBqFNjwJIdMkMbbRc= k8s.io/apimachinery v0.29.1/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU= +k8s.io/apiserver v0.29.1 h1:e2wwHUfEmMsa8+cuft8MT56+16EONIEK8A/gpBSco+g= +k8s.io/apiserver v0.29.1/go.mod h1:V0EpkTRrJymyVT3M49we8uh2RvXf7fWC5XLB0P3SwRw= k8s.io/client-go v0.29.1 h1:19B/+2NGEwnFLzt0uB5kNJnfTsbV8w6TgQRz9l7ti7A= k8s.io/client-go v0.29.1/go.mod h1:TDG/psL9hdet0TI9mGyHJSgRkW3H9JZk2dNEUS7bRks= k8s.io/component-base v0.29.1 h1:MUimqJPCRnnHsskTTjKD+IC1EHBbRCVyi37IoFBrkYw= diff --git a/pkg/controller/options.go b/pkg/controller/options.go index ee55c1ae2..b4afe37f4 100644 --- a/pkg/controller/options.go +++ b/pkg/controller/options.go @@ -27,6 +27,7 @@ import ( "github.com/crossplane/crossplane-runtime/pkg/logging" "github.com/crossplane/crossplane-runtime/pkg/ratelimiter" "github.com/crossplane/crossplane-runtime/pkg/reconciler/managed" + "github.com/crossplane/crossplane-runtime/pkg/statemetrics" ) // DefaultOptions returns a functional set of options with conservative @@ -65,7 +66,7 @@ type Options struct { MetricRecorder managed.MetricRecorder - StateRecorder managed.StateRecorder + StateRecorder statemetrics.StateRecorder } // ForControllerRuntime extracts options for controller-runtime. diff --git a/pkg/reconciler/managed/metrics.go b/pkg/reconciler/managed/metrics.go index 0255ae34d..0099ddf2b 100644 --- a/pkg/reconciler/managed/metrics.go +++ b/pkg/reconciler/managed/metrics.go @@ -61,25 +61,25 @@ func NewMRMetricRecorder() *MRMetricRecorder { Name: "managed_resource_time_to_first_reconcile_seconds", Help: "The time it took for a managed resource to be detected by the controller", Buckets: kmetrics.ExponentialBuckets(10e-9, 10, 10), - }, []string{"gvk", "claim", "composite"}), + }, []string{"gvk"}), mrFirstTimeReady: prometheus.NewHistogramVec(prometheus.HistogramOpts{ Subsystem: subSystem, Name: "managed_resource_first_time_to_readiness_seconds", Help: "The time it took for a managed resource to become ready first time after creation", Buckets: []float64{1, 5, 10, 15, 30, 60, 120, 300, 600, 1800, 3600}, - }, []string{"gvk", "claim", "composite"}), + }, []string{"gvk"}), mrDeletion: prometheus.NewHistogramVec(prometheus.HistogramOpts{ Subsystem: subSystem, Name: "managed_resource_deletion_seconds", Help: "The time it took for a managed resource to be deleted", Buckets: []float64{1, 5, 10, 15, 30, 60, 120, 300, 600, 1800, 3600}, - }, []string{"gvk", "claim", "composite"}), + }, []string{"gvk"}), mrDrift: prometheus.NewHistogramVec(prometheus.HistogramOpts{ Subsystem: subSystem, Name: "managed_resource_drift_seconds", Help: "ALPHA: How long since the previous successful reconcile when a resource was found to be out of sync; excludes restart of the provider", Buckets: kmetrics.ExponentialBuckets(10e-9, 10, 10), - }, []string{"gvk", "claim", "composite"}), + }, []string{"gvk"}), } } @@ -109,7 +109,8 @@ func (r *MRMetricRecorder) recordUnchanged(name string) { func (r *MRMetricRecorder) recordFirstTimeReconciled(managed resource.Managed) { if managed.GetCondition(xpv1.TypeSynced).Status == corev1.ConditionUnknown { - r.mrDetected.With(getMRMetricLabels(managed)).Observe(time.Since(managed.GetCreationTimestamp().Time).Seconds()) + r.mrDetected.WithLabelValues("gvk", managed.GetObjectKind().GroupVersionKind().String()). + Observe(time.Since(managed.GetCreationTimestamp().Time).Seconds()) r.firstObservation.Store(managed.GetName(), time.Now()) // this is the first time we reconciled on this resource } } @@ -125,13 +126,15 @@ func (r *MRMetricRecorder) recordDrift(managed resource.Managed) { return } - r.mrDrift.With(getMRMetricLabels(managed)).Observe(time.Since(lt).Seconds()) + r.mrDrift.WithLabelValues("gvk", managed.GetObjectKind().GroupVersionKind().String()). + Observe(time.Since(lt).Seconds()) r.lastObservation.Store(name, time.Now()) } func (r *MRMetricRecorder) recordDeleted(managed resource.Managed) { - r.mrDeletion.With(getMRMetricLabels(managed)).Observe(time.Since(managed.GetDeletionTimestamp().Time).Seconds()) + r.mrDeletion.WithLabelValues("gvk", managed.GetObjectKind().GroupVersionKind().String()). + Observe(time.Since(managed.GetDeletionTimestamp().Time).Seconds()) } func (r *MRMetricRecorder) recordFirstTimeReady(managed resource.Managed) { @@ -142,7 +145,8 @@ func (r *MRMetricRecorder) recordFirstTimeReady(managed resource.Managed) { if !ok { return } - r.mrFirstTimeReady.With(getMRMetricLabels(managed)).Observe(time.Since(managed.GetCreationTimestamp().Time).Seconds()) + r.mrFirstTimeReady.WithLabelValues("gvk", managed.GetObjectKind().GroupVersionKind().String()). + Observe(time.Since(managed.GetCreationTimestamp().Time).Seconds()) r.firstObservation.Delete(managed.GetName()) } } @@ -170,17 +174,3 @@ func (r *NopMetricRecorder) recordDrift(_ resource.Managed) {} func (r *NopMetricRecorder) recordDeleted(_ resource.Managed) {} func (r *NopMetricRecorder) recordFirstTimeReady(_ resource.Managed) {} - -func getMRMetricLabels(managed resource.Managed) prometheus.Labels { - l := prometheus.Labels{ - "gvk": managed.GetObjectKind().GroupVersionKind().String(), - "claim": "", - "composite": managed.GetLabels()["crossplane.io/composite"], - } - - if managed.GetLabels()["crossplane.io/claim-namespace"] != "" && managed.GetLabels()["crossplane.io/claim-name"] != "" { - l["claim"] = managed.GetLabels()["crossplane.io/claim-namespace"] + "/" + managed.GetLabels()["crossplane.io/claim-name"] - } - - return l -} diff --git a/pkg/reconciler/managed/reconciler.go b/pkg/reconciler/managed/reconciler.go index 0f2b99d4e..2be38e18e 100644 --- a/pkg/reconciler/managed/reconciler.go +++ b/pkg/reconciler/managed/reconciler.go @@ -36,6 +36,7 @@ import ( "github.com/crossplane/crossplane-runtime/pkg/logging" "github.com/crossplane/crossplane-runtime/pkg/meta" "github.com/crossplane/crossplane-runtime/pkg/resource" + "github.com/crossplane/crossplane-runtime/pkg/statemetrics" ) const ( @@ -488,7 +489,7 @@ type Reconciler struct { log logging.Logger record event.Recorder metricRecorder MetricRecorder - stateRecorder StateRecorder + stateRecorder statemetrics.StateRecorder } type mrManaged struct { @@ -554,7 +555,7 @@ func WithMetricRecorder(recorder MetricRecorder) ReconcilerOption { } // WithStateRecorder configures the Reconciler to use the supplied StateRecorder. -func WithStateRecorder(recorder StateRecorder) ReconcilerOption { +func WithStateRecorder(recorder statemetrics.StateRecorder) ReconcilerOption { return func(r *Reconciler) { r.stateRecorder = recorder } @@ -718,7 +719,7 @@ func NewReconciler(m manager.Manager, of resource.ManagedKind, o ...ReconcilerOp log: logging.NewNopLogger(), record: event.NewNopRecorder(), metricRecorder: NewNopMetricRecorder(), - stateRecorder: NewNopStateRecorder(), + stateRecorder: statemetrics.NewNopStateRecorder(), } for _, ro := range o { diff --git a/pkg/reconciler/managed/state_metrics.go b/pkg/statemetrics/mr_state_metrics.go similarity index 67% rename from pkg/reconciler/managed/state_metrics.go rename to pkg/statemetrics/mr_state_metrics.go index 0c63b4ece..cb4eae966 100644 --- a/pkg/reconciler/managed/state_metrics.go +++ b/pkg/statemetrics/mr_state_metrics.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package managed +package statemetrics import ( "context" @@ -31,20 +31,14 @@ import ( "github.com/crossplane/crossplane-runtime/pkg/logging" ) -// A StateRecorder records the state of given GroupVersionKind. -type StateRecorder interface { - Describe(ch chan<- *prometheus.Desc) - Collect(ch chan<- prometheus.Metric) - - Record(ctx context.Context, gvk schema.GroupVersionKind) - Run(ctx context.Context, gvk schema.GroupVersionKind) -} +// A MRStateRecorderOption configures a MRStateRecorder. +type MRStateRecorderOption func(*MRStateRecorder) // A MRStateRecorder records the state of managed resources. type MRStateRecorder struct { - client client.Client - log logging.Logger - frequency time.Duration + client client.Client + log logging.Logger + interval time.Duration mrExists *prometheus.GaugeVec mrReady *prometheus.GaugeVec @@ -52,10 +46,11 @@ type MRStateRecorder struct { } // NewMRStateRecorder returns a new MRStateRecorder which records the state of managed resources. -func NewMRStateRecorder(client client.Client, log logging.Logger, o ...StateRecorderOption) *MRStateRecorder { - r := &MRStateRecorder{ - client: client, - log: log, +func NewMRStateRecorder(client client.Client, log logging.Logger, interval time.Duration) *MRStateRecorder { + return &MRStateRecorder{ + client: client, + log: log, + interval: interval, mrExists: prometheus.NewGaugeVec(prometheus.GaugeOpts{ Subsystem: subSystem, @@ -73,23 +68,6 @@ func NewMRStateRecorder(client client.Client, log logging.Logger, o ...StateReco Help: "The number of managed resources in Synced=True state", }, []string{"gvk"}), } - - for _, ro := range o { - ro(r) - } - - return r -} - -// A StateRecorderOption configures a MRStateRecorder. -type StateRecorderOption func(*MRStateRecorder) - -// WithFrequency configures the frequency at which the MRStateRecorder -// will record. -func WithFrequency(f time.Duration) StateRecorderOption { - return func(r *MRStateRecorder) { - r.frequency = f - } } // Describe sends the super-set of all possible descriptors of metrics @@ -133,10 +111,13 @@ func (r *MRStateRecorder) Record(ctx context.Context, gvk schema.GroupVersionKin } for _, condition := range conditioned.Conditions { - if condition.Type == xpv1.TypeReady && condition.Status == corev1.ConditionTrue { - numReady++ - } else if condition.Type == xpv1.TypeSynced && condition.Status == corev1.ConditionTrue { - numSynced++ + if condition.Status == corev1.ConditionTrue { + switch condition.Type { + case xpv1.TypeReady: + numReady++ + case xpv1.TypeSynced: + numSynced++ + } } } } @@ -145,39 +126,18 @@ func (r *MRStateRecorder) Record(ctx context.Context, gvk schema.GroupVersionKin r.mrSynced.WithLabelValues(label).Set(numSynced) } -// Run records state of managed resources with given frequency. +// Run records state of managed resources with given interval. func (r *MRStateRecorder) Run(ctx context.Context, gvk schema.GroupVersionKind) { - ticker := time.NewTicker(r.frequency) - quit := make(chan struct{}) + ticker := time.NewTicker(r.interval) go func() { for { select { case <-ticker.C: r.Record(ctx, gvk) - case <-quit: + case <-ctx.Done(): ticker.Stop() return } } }() } - -// A NopStateRecorder does nothing. -type NopStateRecorder struct{} - -// NewNopStateRecorder returns a NopStateRecorder that does nothing. -func NewNopStateRecorder() *NopStateRecorder { - return &NopStateRecorder{} -} - -// Describe does nothing. -func (r *NopStateRecorder) Describe(_ chan<- *prometheus.Desc) {} - -// Collect does nothing. -func (r *NopStateRecorder) Collect(_ chan<- prometheus.Metric) {} - -// Record does nothing. -func (r *NopStateRecorder) Record(_ context.Context, _ schema.GroupVersionKind) {} - -// Run does nothing. -func (r *NopStateRecorder) Run(_ context.Context, _ schema.GroupVersionKind) {} diff --git a/pkg/statemetrics/state_recorder.go b/pkg/statemetrics/state_recorder.go new file mode 100644 index 000000000..e0629bcf9 --- /dev/null +++ b/pkg/statemetrics/state_recorder.go @@ -0,0 +1,56 @@ +/* +Copyright 2024 The Crossplane Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package statemetrics contains utilities for recording Crossplane resource state metrics. +package statemetrics + +import ( + "context" + + "github.com/prometheus/client_golang/prometheus" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +const subSystem = "crossplane" + +// A StateRecorder records the state of given GroupVersionKind. +type StateRecorder interface { + Describe(ch chan<- *prometheus.Desc) + Collect(ch chan<- prometheus.Metric) + + Record(ctx context.Context, gvk schema.GroupVersionKind) + Run(ctx context.Context, gvk schema.GroupVersionKind) +} + +// A NopStateRecorder does nothing. +type NopStateRecorder struct{} + +// NewNopStateRecorder returns a NopStateRecorder that does nothing. +func NewNopStateRecorder() *NopStateRecorder { + return &NopStateRecorder{} +} + +// Describe does nothing. +func (r *NopStateRecorder) Describe(_ chan<- *prometheus.Desc) {} + +// Collect does nothing. +func (r *NopStateRecorder) Collect(_ chan<- prometheus.Metric) {} + +// Record does nothing. +func (r *NopStateRecorder) Record(_ context.Context, _ schema.GroupVersionKind) {} + +// Run does nothing. +func (r *NopStateRecorder) Run(_ context.Context, _ schema.GroupVersionKind) {} diff --git a/pkg/statemetrics/xr_state_metrics.go b/pkg/statemetrics/xr_state_metrics.go new file mode 100644 index 000000000..7d374de4b --- /dev/null +++ b/pkg/statemetrics/xr_state_metrics.go @@ -0,0 +1,182 @@ +/* +Copyright 2024 The Crossplane Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package statemetrics + +import ( + "context" + "time" + + "github.com/prometheus/client_golang/prometheus" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + v1 "k8s.io/apiserver/pkg/apis/audit/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1" + "github.com/crossplane/crossplane-runtime/pkg/fieldpath" + "github.com/crossplane/crossplane-runtime/pkg/logging" +) + +// A XRStateRecorder records the state of composite resources. +type XRStateRecorder struct { + client client.Client + log logging.Logger + interval time.Duration + + compositeExists *prometheus.GaugeVec + compositeReady *prometheus.GaugeVec + compositeSynced *prometheus.GaugeVec + compositeComposedCount *prometheus.GaugeVec +} + +// A APIExtStateRecorderOption configures a MRStateRecorder. +type APIExtStateRecorderOption func(*XRStateRecorder) + +// NewXRStateRecorder returns a new XRStateRecorder which records the state of claim, +// composite and composition metrics. +func NewXRStateRecorder(client client.Client, log logging.Logger, interval time.Duration) *XRStateRecorder { + return &XRStateRecorder{ + client: client, + log: log, + interval: interval, + + compositeExists: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: subSystem, + Name: "composite_resource_exists", + Help: "The number of composite resources that exist", + }, []string{"gvk", "composition"}), + compositeReady: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: subSystem, + Name: "composite_resource_ready", + Help: "The number of composite resources in Ready=True state", + }, []string{"gvk", "composition"}), + compositeSynced: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: subSystem, + Name: "composite_resource_synced", + Help: "The number of composite resources in Synced=True state", + }, []string{"gvk", "composition"}), + compositeComposedCount: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: subSystem, + Name: "composite_resource_composed_count", + Help: "The number of composed resources in total", + }, []string{"gvk", "composition"}), + } +} + +// Describe sends the super-set of all possible descriptors of metrics +// collected by this Collector to the provided channel and returns once +// the last descriptor has been sent. +func (r *XRStateRecorder) Describe(ch chan<- *prometheus.Desc) { + r.compositeExists.Describe(ch) + r.compositeReady.Describe(ch) + r.compositeSynced.Describe(ch) + r.compositeComposedCount.Describe(ch) +} + +// Collect is called by the Prometheus registry when collecting +// metrics. The implementation sends each collected metric via the +// provided channel and returns once the last metric has been sent. +func (r *XRStateRecorder) Collect(ch chan<- prometheus.Metric) { + r.compositeExists.Collect(ch) + r.compositeReady.Collect(ch) + r.compositeSynced.Collect(ch) + r.compositeComposedCount.Collect(ch) +} + +// Record records the state of managed resources. +func (r *XRStateRecorder) Record(ctx context.Context, gvk schema.GroupVersionKind) { + xrs := &unstructured.UnstructuredList{} + xrs.SetGroupVersionKind(gvk) + err := r.client.List(ctx, xrs) + if err != nil { + r.log.Info("Failed to list composite resources", "error", err) + return + } + + composition, err := getCompositionRef(xrs) + if err != nil { + r.log.Info("Failed to get composition reference of composite resource", "error", err) + return + } + + labels := prometheus.Labels{ + "gvk": gvk.String(), + "composition": composition, + } + r.compositeExists.With(labels).Set(float64(len(xrs.Items))) + + var numReady, numSynced, numComposed float64 = 0, 0, 0 + for _, xr := range xrs.Items { + conditioned := xpv1.ConditionedStatus{} + if err := fieldpath.Pave(xr.Object).GetValueInto("status", &conditioned); err != nil { + r.log.Info("Failed to get conditions of managed resource", "error", err) + continue + } + + for _, condition := range conditioned.Conditions { + if condition.Type == xpv1.TypeReady && condition.Status == corev1.ConditionTrue { + numReady++ + } else if condition.Type == xpv1.TypeSynced && condition.Status == corev1.ConditionTrue { + numSynced++ + } + } + + resourceRefs := make([]v1.ObjectReference, 0) + if err := fieldpath.Pave(xr.Object).GetValueInto("spec.resourceRefs", &resourceRefs); err != nil { + r.log.Info("Failed to get resource references of composed resource", "error", err) + continue + } + + numComposed += float64(len(resourceRefs)) + } + + r.compositeReady.With(labels).Set(numReady) + r.compositeSynced.With(labels).Set(numSynced) + r.compositeComposedCount.With(labels).Set(numComposed) +} + +// Run records state of managed resources with given interval. +func (r *XRStateRecorder) Run(ctx context.Context, gvk schema.GroupVersionKind) { + ticker := time.NewTicker(r.interval) + quit := make(chan struct{}) + go func() { + for { + select { + case <-ticker.C: + r.Record(ctx, gvk) + case <-quit: + ticker.Stop() + return + } + } + }() +} + +func getCompositionRef(l *unstructured.UnstructuredList) (string, error) { + if len(l.Items) == 0 { + return "", nil + } + + xr := l.Items[0].Object + compRef, err := fieldpath.Pave(xr).GetString("spec.compositionRef") + if err != nil { + return "", err + } + + return compRef, nil +}