Skip to content

Commit

Permalink
[issue-464] Create a Prometheus ServiceMonitor object that can captur…
Browse files Browse the repository at this point in the history
…e/collect metrics from deployed SonataFlow instances
  • Loading branch information
jianrongzhang89 committed Oct 4, 2024
1 parent 645b09f commit 6231535
Show file tree
Hide file tree
Showing 36 changed files with 746 additions and 76 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ env:
PYTHON_VERSION: "3.10"
KIND_VERSION: v0.20.0
KNATIVE_VERSION: v1.12.5
PROMETHEUS_VERSION: v0.70.0
OPERATOR_IMAGE_NAME: "127.0.0.1:5001/kogito-serverless-operator:0.0.1"

jobs:
Expand Down Expand Up @@ -68,6 +69,9 @@ jobs:
- name: Deploy Knative Eventing and Serving
run: make KNATIVE_VERSION=${{ env.KNATIVE_VERSION }} deploy-knative

- name: Deploy Prometheus
run: make PROMETHEUS_VERSION=${{ env.PROMETHEUS_VERSION }} deploy-prometheus

- name: Set OPERATOR_IMAGE_NAME to Point to Kind's Local Registry
run: echo "OPERATOR_IMAGE_NAME=${{ env.OPERATOR_IMAGE_NAME }}" >> $GITHUB_ENV

Expand Down
17 changes: 15 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,7 @@ vet: ## Run go vet against code.
.PHONY: test
test: manifests generate envtest test-api ## Run tests.
@echo "🔍 Running controller tests..."
@KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" \
go test $(shell go list ./... | grep -v /test/) -coverprofile cover.out > /dev/null 2>&1
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $(shell go list ./... | grep -v /test/) -coverprofile cover.out
@echo "✅ Tests completed successfully. Coverage report generated: cover.out."

.PHONY: test-api
Expand Down Expand Up @@ -261,6 +260,8 @@ GOLANGCI_LINT_VERSION ?= v1.57.2
KIND_VERSION ?= v0.20.0
KNATIVE_VERSION ?= v1.13.2
TIMEOUT_SECS ?= 180s
PROMETHEUS_VERSION ?= v0.70.0
GRAFANA_VERSION ?= v5.13.0

KNATIVE_SERVING_PREFIX ?= "https://github.com/knative/serving/releases/download/knative-$(KNATIVE_VERSION)"
KNATIVE_EVENTING_PREFIX ?= "https://github.com/knative/eventing/releases/download/knative-$(KNATIVE_VERSION)"
Expand Down Expand Up @@ -447,6 +448,18 @@ deploy-knative:
kubectl wait --for=condition=Ready=True KnativeServing/knative-serving -n knative-serving --timeout=$(TIMEOUT_SECS)
kubectl wait --for=condition=Ready=True KnativeEventing/knative-eventing -n knative-eventing --timeout=$(TIMEOUT_SECS)

.PHONY: deploy-prometheus
deploy-prometheus: create-cluster
kubectl create -f https://github.com/prometheus-operator/prometheus-operator/releases/download/$(PROMETHEUS_VERSION)/bundle.yaml
kubectl wait --for=condition=Available=True deploy/prometheus-operator -n default --timeout=$(TIMEOUT_SECS)
kubectl apply -f ./test/testdata/prometheus.yaml
kubectl wait --for=condition=Available=True prometheus/prometheus -n default --timeout=$(TIMEOUT_SECS)

.PHONY: deploy-grafana
deploy-grafana: create-cluster
kubectl create -f https://github.com/grafana/grafana-operator/releases/download/$(GRAFANA_VERSION)/kustomize-cluster_scoped.yaml
kubectl wait --for=condition=Available=True deploy/grafana-operator-controller-manager -n grafana --timeout=$(TIMEOUT_SECS)

.PHONY: delete-cluster
delete-cluster: install-kind
kind delete cluster && $(BUILDER) rm -f kind-registry
4 changes: 4 additions & 0 deletions api/v1alpha08/sonataflowplatform_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ type SonataFlowPlatformSpec struct {
// These properties MAY NOT be propagated to a SonataFlowClusterPlatform since PropertyVarSource can only refer local context sources.
// +optional
Properties *PropertyPlatformSpec `json:"properties,omitempty"`
// MonitoringEnabled indicates whether prometheus service monitors should be created for metrics scraping
// +optional
// +default: false
MonitoringEnabled bool `json:"monitoringEnabled"`
}

// PlatformEventingSpec specifies the Knative Eventing integration details in the platform.
Expand Down
4 changes: 4 additions & 0 deletions bundle/manifests/sonataflow.org_sonataflowplatforms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,10 @@ spec:
type: string
type: object
type: object
monitoringEnabled:
description: MonitoringEnabled indicates whether prometheus service
monitors should be created for metrics scraping
type: boolean
persistence:
description: |-
Persistence defines the platform persistence configuration. When this field is set,
Expand Down
5 changes: 3 additions & 2 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ import (
"github.com/apache/incubator-kie-kogito-serverless-operator/internal/controller"
"github.com/apache/incubator-kie-kogito-serverless-operator/internal/controller/cfg"
"github.com/apache/incubator-kie-kogito-serverless-operator/version"
prometheus "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"k8s.io/klog/v2/klogr"
eventingv1 "knative.dev/eventing/pkg/apis/eventing/v1"
sourcesv1 "knative.dev/eventing/pkg/apis/sources/v1"
servingv1 "knative.dev/serving/pkg/apis/serving/v1"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"

"k8s.io/klog/v2/klogr"

"k8s.io/klog/v2"

"github.com/apache/incubator-kie-kogito-serverless-operator/utils"
Expand Down Expand Up @@ -66,6 +66,7 @@ func init() {
utilruntime.Must(sourcesv1.AddToScheme(scheme))
utilruntime.Must(eventingv1.AddToScheme(scheme))
utilruntime.Must(servingv1.AddToScheme(scheme))
utilruntime.Must(prometheus.AddToScheme(scheme))
//+kubebuilder:scaffold:scheme
}

Expand Down
4 changes: 4 additions & 0 deletions config/crd/bases/sonataflow.org_sonataflowplatforms.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,10 @@ spec:
type: string
type: object
type: object
monitoringEnabled:
description: MonitoringEnabled indicates whether prometheus service
monitors should be created for metrics scraping
type: boolean
persistence:
description: |-
Persistence defines the platform persistence configuration. When this field is set,
Expand Down
11 changes: 11 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ kind: ClusterRole
metadata:
name: manager-role
rules:
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- create
- delete
- get
- list
- update
- watch
- apiGroups:
- sonataflow.org
resources:
Expand Down
2 changes: 2 additions & 0 deletions go.work.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2174,6 +2174,7 @@ github.com/google/go-containerregistry/pkg/authn/kubernetes v0.0.0-2023020916533
github.com/google/go-containerregistry/pkg/authn/kubernetes v0.0.0-20230209165335-3624968304fd/go.mod h1:6pjZpt+0dg+Z0kUEn53qLtD57raiZo/bqWzsuX6dDjo=
github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4rEjNlfyDHW9dolSY=
github.com/google/go-github/v27 v27.0.6 h1:oiOZuBmGHvrGM1X9uNUAUlLgp5r1UUO/M/KnbHnLRlQ=
github.com/google/go-jsonnet v0.18.0/go.mod h1:C3fTzyVJDslXdiTqw/bTFk7vSGyCtH3MGRbDfvEwGd0=
github.com/google/go-pkcs11 v0.2.1-0.20230907215043-c6f79328ddf9 h1:OF1IPgv+F4NmqmJ98KTjdN97Vs1JxDPB3vbmYzV2dpk=
github.com/google/go-pkcs11 v0.2.1-0.20230907215043-c6f79328ddf9/go.mod h1:6eQoGcuNJpa7jnd5pMGdkSaQpNDYvPlXWMcjXXThLlY=
github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk=
Expand Down Expand Up @@ -2466,6 +2467,7 @@ github.com/openzipkin/zipkin-go v0.3.0/go.mod h1:4c3sLeE8xjNqehmF5RpAFLPLJxXscc0
github.com/openzipkin/zipkin-go v0.4.2 h1:zjqfqHjUpPmB3c1GlCvvgsM1G4LkvqQbBDueDOCg/jA=
github.com/openzipkin/zipkin-go v0.4.2/go.mod h1:ZeVkFjuuBiSy13y8vpSDCjMi9GoI3hPpCJSBx/EYFhY=
github.com/operator-framework/api v0.1.1 h1:DbfxRJUPMQlQW6nbfoNzWLxv1rIv13Gt8GbsF2aglFk=
github.com/operator-framework/operator-lib v0.11.0/go.mod h1:RpyKhFAoG6DmKTDIwMuO6pI3LRc8IE9rxEYWy476o6g=
github.com/operator-framework/operator-registry v1.6.1 h1:Ow0Ko9DRIZ4xvH55vFAslcTy6A9FhlIeXvm+FhyRd84=
github.com/otiai10/copy v1.0.2 h1:DDNipYy6RkIkjMwy+AWzgKiNTyj2RUI9yEMeETEpVyc=
github.com/otiai10/curr v0.0.0-20190513014714-f5a3d24e5776 h1:o59bHXu8Ejas8Kq6pjoVJQ9/neN66SM8AKh6wI42BBs=
Expand Down
19 changes: 1 addition & 18 deletions internal/controller/knative/knative.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/discovery"
"k8s.io/client-go/rest"
eventingv1 "knative.dev/eventing/pkg/apis/eventing/v1"
sourcesv1 "knative.dev/eventing/pkg/apis/sources/v1"
Expand All @@ -44,7 +43,6 @@ import (

var servingClient clientservingv1.ServingV1Interface
var eventingClient clienteventingv1.EventingV1Interface
var discoveryClient discovery.DiscoveryInterface

type Availability struct {
Eventing bool
Expand Down Expand Up @@ -92,23 +90,8 @@ func NewKnativeEventingClient(cfg *rest.Config) (*clienteventingv1.EventingV1Cli
return clienteventingv1.NewForConfig(cfg)
}

func getDiscoveryClient(cfg *rest.Config) (discovery.DiscoveryInterface, error) {
if discoveryClient == nil {
if cli, err := discovery.NewDiscoveryClientForConfig(cfg); err != nil {
return nil, err
} else {
discoveryClient = cli
}
}
return discoveryClient, nil
}

func SetDiscoveryClient(cli discovery.DiscoveryInterface) {
discoveryClient = cli
}

func GetKnativeAvailability(cfg *rest.Config) (*Availability, error) {
if cli, err := getDiscoveryClient(cfg); err != nil {
if cli, err := utils.GetDiscoveryClient(cfg); err != nil {
return nil, err
} else {
apiList, err := cli.ServerGroups()
Expand Down
47 changes: 47 additions & 0 deletions internal/controller/monitoring/monitoring.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package monitoring

import (
"github.com/apache/incubator-kie-kogito-serverless-operator/utils"
"k8s.io/client-go/rest"
)

const (
prometheusGroup = "monitoring.coreos.com"
)

func GetPrometheusAvailability(cfg *rest.Config) (bool, error) {
cli, err := utils.GetDiscoveryClient(cfg)
if err != nil {
return false, err
}
apiList, err := cli.ServerGroups()
if err != nil {
return false, err
}
for _, group := range apiList.Groups {
if group.Name == prometheusGroup {
return true, nil
}

}
return false, nil
}
8 changes: 7 additions & 1 deletion internal/controller/profiles/common/mutate_visitors.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,13 @@ func ServiceMutateVisitor(workflow *operatorapi.SonataFlow) MutateVisitor {
if kubeutil.IsObjectNew(object) {
return nil
}
original, err := ServiceCreator(workflow)
var original client.Object
var err error
if workflow.IsKnativeDeployment() {
original, err = KnativeMetricsServiceCreator(workflow)
} else {
original, err = ServiceCreator(workflow)
}
if err != nil {
return err
}
Expand Down
76 changes: 75 additions & 1 deletion internal/controller/profiles/common/object_creators.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
cncfmodel "github.com/serverlessworkflow/sdk-go/v2/model"

"github.com/imdario/mergo"
prometheus "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -64,6 +65,10 @@ const (
deploymentKind = "Deployment"
k8sServiceAPIVersion = "v1"
k8sServiceKind = "Service"
k8sServicePortName = "web"
knativeServicePortName = "user-port"
metricsServicePortPath = "/q/metrics"
knativeServiceRouteLabel = "serving.knative.dev/route"
)

// ObjectCreator is the func that creates the initial reference object, if the object doesn't exist in the cluster, this one is created.
Expand Down Expand Up @@ -262,6 +267,7 @@ func ServiceCreator(workflow *operatorapi.SonataFlow) (client.Object, error) {
Spec: corev1.ServiceSpec{
Selector: lbl,
Ports: []corev1.ServicePort{{
Name: k8sServicePortName,
Protocol: corev1.ProtocolTCP,
Port: defaultHTTPServicePort,
TargetPort: variables.DefaultHTTPWorkflowPortIntStr,
Expand All @@ -272,6 +278,29 @@ func ServiceCreator(workflow *operatorapi.SonataFlow) (client.Object, error) {
return service, nil
}

// KnativeMetricsServiceCreator is an objectCreator for a kubernetes service for exposing prometheus metrics for serverless workflows.
// It maps the default HTTP port (80) to the target Java application webserver on port 8080.
func KnativeMetricsServiceCreator(workflow *operatorapi.SonataFlow) (client.Object, error) {
lbl := workflowproj.GetMergedLabels(workflow)
service := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-metrics", workflow.Name), // default service name is already used by knaitve service
Namespace: workflow.Namespace,
Labels: lbl,
},
Spec: corev1.ServiceSpec{
Selector: lbl,
Ports: []corev1.ServicePort{{
Name: knativeServicePortName,
Protocol: corev1.ProtocolTCP,
Port: defaultHTTPServicePort,
TargetPort: variables.DefaultHTTPWorkflowPortIntStr,
}},
},
}
return service, nil
}

// SinkBindingCreator is an ObjectsCreator for SinkBinding.
// It will create v1.SinkBinding based on events defined in workflow.
func SinkBindingCreator(workflow *operatorapi.SonataFlow, plf *operatorapi.SonataFlowPlatform) (client.Object, error) {
Expand Down Expand Up @@ -439,10 +468,55 @@ func UserPropsConfigMapCreator(workflow *operatorapi.SonataFlow) (client.Object,

// ManagedPropsConfigMapCreator creates an empty ConfigMap to hold the external application properties
func ManagedPropsConfigMapCreator(workflow *operatorapi.SonataFlow, platform *operatorapi.SonataFlowPlatform) (client.Object, error) {

props, err := properties.ApplicationManagedProperties(workflow, platform)
if err != nil {
return nil, err
}
return workflowproj.CreateNewManagedPropsConfigMap(workflow, props), nil
}

// ServiceMonitorCreator is an ObjectsCreator for Service Monitor for the workflow service.
func ServiceMonitorCreator(workflow *operatorapi.SonataFlow) (client.Object, error) {
lbl := workflowproj.GetMergedLabels(workflow)
var spec *prometheus.ServiceMonitorSpec
if workflow.IsKnativeDeployment() {
spec = &prometheus.ServiceMonitorSpec{
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
workflowproj.LabelWorkflow: workflow.Name,
workflowproj.LabelWorkflowNamespace: workflow.Namespace,
},
},
Endpoints: []prometheus.Endpoint{
{
Port: knativeServicePortName,
Path: metricsServicePortPath,
},
},
}
} else {
spec = &prometheus.ServiceMonitorSpec{
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
workflowproj.LabelWorkflow: workflow.Name,
workflowproj.LabelWorkflowNamespace: workflow.Namespace,
},
},
Endpoints: []prometheus.Endpoint{
{
Port: k8sServicePortName,
Path: metricsServicePortPath,
},
},
}
}
serviceMonitor := &prometheus.ServiceMonitor{
ObjectMeta: metav1.ObjectMeta{
Name: workflow.Name,
Namespace: workflow.Namespace,
Labels: lbl,
},
Spec: *spec,
}
return serviceMonitor, nil
}
Loading

0 comments on commit 6231535

Please sign in to comment.