From a1f77d7338fc9a9b3648b48e2bb9d8d990ef1e87 Mon Sep 17 00:00:00 2001 From: gourishkb <104021126+gourishkb@users.noreply.github.com> Date: Thu, 3 Oct 2024 12:06:05 +0530 Subject: [PATCH 01/23] fix(): initial changes for offboarding replicated ns Signed-off-by: gourishkb <104021126+gourishkb@users.noreply.github.com> --- pkg/webhook/pod/webhook.go | 75 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/pkg/webhook/pod/webhook.go b/pkg/webhook/pod/webhook.go index 1bc7a1f73..10b3afbed 100644 --- a/pkg/webhook/pod/webhook.go +++ b/pkg/webhook/pod/webhook.go @@ -80,6 +80,10 @@ func (wh *WebhookServer) Handle(ctx context.Context, req admission.Request) admi if mutate, sliceName := wh.MutationRequired(pod.ObjectMeta, ctx, req.Kind.Kind); !mutate { log.Info("mutation not required for pod", "pod metadata", pod.ObjectMeta.Name) + if offBoard := wh.OffboardRequired(pod.ObjectMeta, ctx, req.Kind.Kind); offBoard { + log.Info("mutation to offboard required for pod", "pod metadata", pod.ObjectMeta.Name) + pod = OffBoardPod(pod, ctx) + } } else { log.Info("mutating pod", "pod metadata", pod.ObjectMeta.Name) pod = MutatePod(pod, sliceName) @@ -177,6 +181,44 @@ func (wh *WebhookServer) Handle(ctx context.Context, req admission.Request) admi }} } +func OffBoardPod(pod *corev1.Pod, ctx context.Context) *corev1.Pod { + log := logger.FromContext(ctx) + + metadata := pod.ObjectMeta + annotations := metadata.GetAnnotations() + labels := metadata.GetLabels() + + //TODO: + // a. if not part of slice but has kubeslice or nsm labels -> yes ? remove them + // i. remove labels + //TODO: move as global variable + // Remove kubeslice and nsm labels if present + //TODO: use constants + labelsToRemove := []string{"kubeslice.io/nsmIP", "kubeslice.io/pod-type", "kubeslice.io/slice"} + for _, labelKey := range labelsToRemove { + if _, exists := labels[labelKey]; exists { + log.Info("Removing label", "labelKey", labelKey) + delete(labels, labelKey) + } + } + metadata.SetLabels(labels) + //TODO: ii. remove annotations + //TODO: move as global variable + // Remove annotations if necessary + //TODO: use constants + annotationsToRemove := []string{"kubeslice.io/status", "ns.networkservicemesh.io", "networkservicemesh.io"} + for _, annotationKey := range annotationsToRemove { + if _, exists := annotations[annotationKey]; exists { + log.Info("Removing annotation", "annotationKey", annotationKey) + delete(annotations, annotationKey) + } + } + metadata.SetAnnotations(annotations) + pod.ObjectMeta = metadata + //TODO iii. remove containers + return pod +} + func MutatePod(pod *corev1.Pod, sliceName string) *corev1.Pod { // Add injection status to pod annotations if pod.ObjectMeta.Annotations == nil { @@ -317,6 +359,39 @@ func (wh *WebhookServer) ValidateServiceExport(svcex *v1beta1.ServiceExport, ctx return true, "", nil } +func (wh *WebhookServer) OffboardRequired(metadata metav1.ObjectMeta, ctx context.Context, kind string) bool { + log := logger.FromContext(ctx) + annotations := metadata.GetAnnotations() + labels := metadata.GetLabels() + + //TODO: + // a. if not part of slice but has kubeslice or nsm labels -> yes ? remove them + // i. remove labels + // ii. remove annotations + //TODO: move as global variable + // Remove kubeslice and nsm labels if present + //TODO: use constants + labelsToRemove := []string{"kubeslice.io/nsmIP", "kubeslice.io/pod-type", "kubeslice.io/slice"} + for _, labelKey := range labelsToRemove { + if _, exists := labels[labelKey]; exists { + log.Info("Removing label", "labelKey", labelKey) + return true + } + } + + //TODO: move as global variable + // Remove annotations if necessary + //TODO: use constants + annotationsToRemove := []string{"kubeslice.io/status", "ns.networkservicemesh.io", "networkservicemesh.io"} + for _, annotationKey := range annotationsToRemove { + if _, exists := annotations[annotationKey]; exists { + log.Info("Removing annotation", "annotationKey", annotationKey) + return true + } + } + return false +} + // returns mutationRequired bool, sliceName string func (wh *WebhookServer) MutationRequired(metadata metav1.ObjectMeta, ctx context.Context, kind string) (bool, string) { log := logger.FromContext(ctx) From 6973574a231f27f0ed6592dc944a1968463691a9 Mon Sep 17 00:00:00 2001 From: gourishkb <104021126+gourishkb@users.noreply.github.com> Date: Fri, 4 Oct 2024 11:35:28 +0530 Subject: [PATCH 02/23] fix(): expose offBoardPod function Signed-off-by: gourishkb <104021126+gourishkb@users.noreply.github.com> --- pkg/webhook/pod/webhook.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/webhook/pod/webhook.go b/pkg/webhook/pod/webhook.go index 10b3afbed..0c768a219 100644 --- a/pkg/webhook/pod/webhook.go +++ b/pkg/webhook/pod/webhook.go @@ -82,7 +82,7 @@ func (wh *WebhookServer) Handle(ctx context.Context, req admission.Request) admi log.Info("mutation not required for pod", "pod metadata", pod.ObjectMeta.Name) if offBoard := wh.OffboardRequired(pod.ObjectMeta, ctx, req.Kind.Kind); offBoard { log.Info("mutation to offboard required for pod", "pod metadata", pod.ObjectMeta.Name) - pod = OffBoardPod(pod, ctx) + pod = wh.OffBoardPod(pod, ctx) } } else { log.Info("mutating pod", "pod metadata", pod.ObjectMeta.Name) @@ -181,7 +181,7 @@ func (wh *WebhookServer) Handle(ctx context.Context, req admission.Request) admi }} } -func OffBoardPod(pod *corev1.Pod, ctx context.Context) *corev1.Pod { +func (wh *WebhookServer) OffBoardPod(pod *corev1.Pod, ctx context.Context) *corev1.Pod { log := logger.FromContext(ctx) metadata := pod.ObjectMeta From 10ebd099a1742d5e353c9128ccd453dc008aa114 Mon Sep 17 00:00:00 2001 From: gourishkb <104021126+gourishkb@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:19:14 +0530 Subject: [PATCH 03/23] fix(): offboard objects, while cleaning up ns Signed-off-by: gourishkb <104021126+gourishkb@users.noreply.github.com> --- pkg/namespace/controllers/reconciler.go | 13 +++++ pkg/webhook/pod/webhook.go | 72 +++++++++++++++---------- 2 files changed, 56 insertions(+), 29 deletions(-) diff --git a/pkg/namespace/controllers/reconciler.go b/pkg/namespace/controllers/reconciler.go index a234663da..b74596fd7 100644 --- a/pkg/namespace/controllers/reconciler.go +++ b/pkg/namespace/controllers/reconciler.go @@ -26,6 +26,7 @@ import ( "github.com/kubeslice/worker-operator/controllers" + cntrl "github.com/kubeslice/worker-operator/controllers" hub "github.com/kubeslice/worker-operator/pkg/hub/hubclient" "github.com/kubeslice/worker-operator/pkg/logger" "github.com/kubeslice/worker-operator/pkg/utils" @@ -100,6 +101,18 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu log.Error(err, "error while retrieving labels from namespace") return ctrl.Result{}, err } + if sliceName != "" { + isNsConfigured, err := cntrl.SliceAppNamespaceConfigured(ctx, sliceName, namespace.Name) + if err != nil { + log.Error(err, "Failed to get app namespace info for slice", + "slice", sliceName, "namespace", namespace.Name) + return ctrl.Result{}, nil + } + if !isNsConfigured { + log.Info("Namespace not part of slice", "namespace", namespace.Name, "slice", sliceName) + return ctrl.Result{}, nil + } + } *r.EventRecorder = (*r.EventRecorder).WithSlice(sliceName) err = hub.UpdateNamespaceInfoToHub(ctx, r.Hubclient, namespace.Name, sliceName) if err != nil { diff --git a/pkg/webhook/pod/webhook.go b/pkg/webhook/pod/webhook.go index 0c768a219..39253acef 100644 --- a/pkg/webhook/pod/webhook.go +++ b/pkg/webhook/pod/webhook.go @@ -80,9 +80,9 @@ func (wh *WebhookServer) Handle(ctx context.Context, req admission.Request) admi if mutate, sliceName := wh.MutationRequired(pod.ObjectMeta, ctx, req.Kind.Kind); !mutate { log.Info("mutation not required for pod", "pod metadata", pod.ObjectMeta.Name) - if offBoard := wh.OffboardRequired(pod.ObjectMeta, ctx, req.Kind.Kind); offBoard { + if offBoard := wh.OffboardRequired(pod.ObjectMeta, ctx, req.Kind.Kind, sliceName); offBoard { log.Info("mutation to offboard required for pod", "pod metadata", pod.ObjectMeta.Name) - pod = wh.OffBoardPod(pod, ctx) + pod.ObjectMeta = wh.OffBoardObject(pod.ObjectMeta, ctx) } } else { log.Info("mutating pod", "pod metadata", pod.ObjectMeta.Name) @@ -181,10 +181,9 @@ func (wh *WebhookServer) Handle(ctx context.Context, req admission.Request) admi }} } -func (wh *WebhookServer) OffBoardPod(pod *corev1.Pod, ctx context.Context) *corev1.Pod { - log := logger.FromContext(ctx) +func (wh *WebhookServer) OffBoardObject(metadata metav1.ObjectMeta, ctx context.Context) metav1.ObjectMeta { + log := logger.FromContext(ctx).WithName("Webhook") - metadata := pod.ObjectMeta annotations := metadata.GetAnnotations() labels := metadata.GetLabels() @@ -214,9 +213,8 @@ func (wh *WebhookServer) OffBoardPod(pod *corev1.Pod, ctx context.Context) *core } } metadata.SetAnnotations(annotations) - pod.ObjectMeta = metadata //TODO iii. remove containers - return pod + return metadata } func MutatePod(pod *corev1.Pod, sliceName string) *corev1.Pod { @@ -359,8 +357,8 @@ func (wh *WebhookServer) ValidateServiceExport(svcex *v1beta1.ServiceExport, ctx return true, "", nil } -func (wh *WebhookServer) OffboardRequired(metadata metav1.ObjectMeta, ctx context.Context, kind string) bool { - log := logger.FromContext(ctx) +func (wh *WebhookServer) OffboardRequired(metadata metav1.ObjectMeta, ctx context.Context, kind, sliceName string) bool { + log := logger.FromContext(ctx).WithName("Webhook") annotations := metadata.GetAnnotations() labels := metadata.GetLabels() @@ -371,10 +369,26 @@ func (wh *WebhookServer) OffboardRequired(metadata metav1.ObjectMeta, ctx contex //TODO: move as global variable // Remove kubeslice and nsm labels if present //TODO: use constants + if sliceNameInNs, exists := labels[admissionWebhookSliceNamespaceSelectorKey]; exists { + if sliceNameInNs != sliceName { + nsConfigured, err := wh.SliceInfoClient.SliceAppNamespaceConfigured(context.Background(), sliceNameInNs, metadata.Namespace) + if err != nil { + log.Error(err, "Failed to get app namespace info for slice", + "slice", sliceNameInNs, "namespace", metadata.Namespace) + return false + } + if !nsConfigured { + log.Info("Namespace not part of slice", "namespace", metadata.Namespace, "slice", sliceNameInNs) + return false + } + return true + } + } + labelsToRemove := []string{"kubeslice.io/nsmIP", "kubeslice.io/pod-type", "kubeslice.io/slice"} for _, labelKey := range labelsToRemove { if _, exists := labels[labelKey]; exists { - log.Info("Removing label", "labelKey", labelKey) + log.Info("Found label", "labelKey", labelKey) return true } } @@ -385,7 +399,7 @@ func (wh *WebhookServer) OffboardRequired(metadata metav1.ObjectMeta, ctx contex annotationsToRemove := []string{"kubeslice.io/status", "ns.networkservicemesh.io", "networkservicemesh.io"} for _, annotationKey := range annotationsToRemove { if _, exists := annotations[annotationKey]; exists { - log.Info("Removing annotation", "annotationKey", annotationKey) + log.Info("Found annotation", "annotationKey", annotationKey) return true } } @@ -394,7 +408,7 @@ func (wh *WebhookServer) OffboardRequired(metadata metav1.ObjectMeta, ctx contex // returns mutationRequired bool, sliceName string func (wh *WebhookServer) MutationRequired(metadata metav1.ObjectMeta, ctx context.Context, kind string) (bool, string) { - log := logger.FromContext(ctx) + log := logger.FromContext(ctx).WithName("Webhook") annotations := metadata.GetAnnotations() labels := metadata.GetLabels() @@ -413,19 +427,6 @@ func (wh *WebhookServer) MutationRequired(metadata metav1.ObjectMeta, ctx contex return false, "" } - // do not inject if it is already injected - //TODO(rahulsawra): need better way to define injected status - if annotations[AdmissionWebhookAnnotationStatusKey] == "injected" { - log.Info("obj is already injected", "kind", kind) - return false, "" - } - - // Do not auto onboard control plane namespace. Ideally, we should not have any deployment/pod in the control plane ns connect to a slice - if metadata.Namespace == controlPlaneNamespace { - log.Info("namespace is same as controle plane") - return false, "" - } - nsLabels, err := wh.SliceInfoClient.GetNamespaceLabels(context.Background(), wh.Client, metadata.Namespace) if err != nil { log.Error(err, "Error getting namespace labels") @@ -442,25 +443,38 @@ func (wh *WebhookServer) MutationRequired(metadata metav1.ObjectMeta, ctx contex return false, "" } + // do not inject if it is already injected + //TODO(rahulsawra): need better way to define injected status + if annotations[AdmissionWebhookAnnotationStatusKey] == "injected" { + log.Info("obj is already injected", "kind", kind) + return false, sliceNameInNs + } + + // Do not auto onboard control plane namespace. Ideally, we should not have any deployment/pod in the control plane ns connect to a slice + if metadata.Namespace == controlPlaneNamespace { + log.Info("namespace is same as controle plane") + return false, sliceNameInNs + } + sliceNetworkType, err := wh.SliceInfoClient.GetSliceOverlayNetworkType(context.Background(), wh.Client, sliceNameInNs) if err != nil { log.Error(err, "Error getting slice overlay network type") - return false, "" + return false, sliceNameInNs } if sliceNetworkType != "" && sliceNetworkType != v1alpha1.SINGLENET { log.Info("Slice overlay type is not single-network. Skip pod mutation...") - return false, "" + return false, sliceNameInNs } nsConfigured, err := wh.SliceInfoClient.SliceAppNamespaceConfigured(context.Background(), sliceNameInNs, metadata.Namespace) if err != nil { log.Error(err, "Failed to get app namespace info for slice", "slice", sliceNameInNs, "namespace", metadata.Namespace) - return false, "" + return false, sliceNameInNs } if !nsConfigured { log.Info("Namespace not part of slice", "namespace", metadata.Namespace, "slice", sliceNameInNs) - return false, "" + return false, sliceNameInNs } // The annotation kubeslice.io/slice:SLICENAME is present, enable mutation return true, sliceNameInNs From acea6a806eb23650a6a78d4ad59fe155c5c32cb7 Mon Sep 17 00:00:00 2001 From: Mridul Gain Date: Tue, 22 Oct 2024 14:48:05 +0530 Subject: [PATCH 04/23] select the slice gateway services only This filter is to prevent envoy gateway services to be fetched in the slice gateway edge list --- controllers/controller.go | 1 + 1 file changed, 1 insertion(+) diff --git a/controllers/controller.go b/controllers/controller.go index c1d249f02..de77f91b8 100644 --- a/controllers/controller.go +++ b/controllers/controller.go @@ -98,6 +98,7 @@ func GetSliceGatewayServers(ctx context.Context, c client.Client, sliceName stri func GetSliceGwServices(ctx context.Context, c client.Client, sliceName string) (*corev1.ServiceList, error) { sliceGwSvcList := &corev1.ServiceList{} listOpts := []client.ListOption{ + client.InNamespace(ControlPlaneNamespace), client.MatchingLabels(map[string]string{ApplicationNamespaceSelectorLabelKey: sliceName}), } From 242a3238fb3ed72636504ceb89baf585fc76b552 Mon Sep 17 00:00:00 2001 From: gourishkb <104021126+gourishkb@users.noreply.github.com> Date: Wed, 23 Oct 2024 19:30:57 +0530 Subject: [PATCH 05/23] fix(): offboardRequired function Signed-off-by: gourishkb <104021126+gourishkb@users.noreply.github.com> --- pkg/webhook/pod/webhook.go | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/pkg/webhook/pod/webhook.go b/pkg/webhook/pod/webhook.go index 39253acef..5e117422d 100644 --- a/pkg/webhook/pod/webhook.go +++ b/pkg/webhook/pod/webhook.go @@ -104,9 +104,13 @@ func (wh *WebhookServer) Handle(ctx context.Context, req admission.Request) admi if mutate, sliceName := wh.MutationRequired(deploy.ObjectMeta, ctx, req.Kind.Kind); !mutate { log.Info("mutation not required for deployment", "pod metadata", deploy.Spec.Template.ObjectMeta) + if offBoard := wh.OffboardRequired(deploy.ObjectMeta, ctx, req.Kind.Kind, sliceName); offBoard { + log.Info("mutation to offboard required for deploy", "deploy metadata", deploy.ObjectMeta.Name) + deploy.ObjectMeta = wh.OffBoardObject(deploy.ObjectMeta, ctx) + } } else { deploy = MutateDeployment(deploy, sliceName) - log.Info("mutated deploy", "pod metadata", deploy.Spec.Template.ObjectMeta) + log.Info("mutated deploy", "deploy metadata", deploy.Spec.Template.ObjectMeta) } marshaled, err := json.Marshal(deploy) @@ -123,10 +127,14 @@ func (wh *WebhookServer) Handle(ctx context.Context, req admission.Request) admi log := logger.FromContext(ctx) if mutate, sliceName := wh.MutationRequired(statefulset.ObjectMeta, ctx, req.Kind.Kind); !mutate { - log.Info("mutation not required for statefulsets", "pod metadata", statefulset.Spec.Template.ObjectMeta) + log.Info("mutation not required for statefulsets", "statefulset metadata", statefulset.Spec.Template.ObjectMeta) + if offBoard := wh.OffboardRequired(statefulset.ObjectMeta, ctx, req.Kind.Kind, sliceName); offBoard { + log.Info("mutation to offboard required for statefulset", "statefulset metadata", statefulset.ObjectMeta.Name) + statefulset.ObjectMeta = wh.OffBoardObject(statefulset.ObjectMeta, ctx) + } } else { statefulset = MutateStatefulset(statefulset, sliceName) - log.Info("mutated statefulset", "pod metadata", statefulset.Spec.Template.ObjectMeta) + log.Info("mutated statefulset", "statefulset metadata", statefulset.Spec.Template.ObjectMeta) } marshaled, err := json.Marshal(statefulset) @@ -143,10 +151,14 @@ func (wh *WebhookServer) Handle(ctx context.Context, req admission.Request) admi log := logger.FromContext(ctx) if mutate, sliceName := wh.MutationRequired(daemonset.ObjectMeta, ctx, req.Kind.Kind); !mutate { - log.Info("mutation not required for daemonset", "pod metadata", daemonset.Spec.Template.ObjectMeta) + log.Info("mutation not required for daemonset", "daemonset metadata", daemonset.Spec.Template.ObjectMeta) + if offBoard := wh.OffboardRequired(daemonset.ObjectMeta, ctx, req.Kind.Kind, sliceName); offBoard { + log.Info("mutation to offboard required for daemonset", "daemonset metadata", daemonset.ObjectMeta.Name) + daemonset.ObjectMeta = wh.OffBoardObject(daemonset.ObjectMeta, ctx) + } } else { daemonset = MutateDaemonSet(daemonset, sliceName) - log.Info("mutated daemonset", "pod metadata", daemonset.Spec.Template.ObjectMeta) + log.Info("mutated daemonset", "daemonset metadata", daemonset.Spec.Template.ObjectMeta) } marshaled, err := json.Marshal(daemonset) @@ -370,19 +382,22 @@ func (wh *WebhookServer) OffboardRequired(metadata metav1.ObjectMeta, ctx contex // Remove kubeslice and nsm labels if present //TODO: use constants if sliceNameInNs, exists := labels[admissionWebhookSliceNamespaceSelectorKey]; exists { + log.Info("slice name in namespace exists", "sliceNameInNs", sliceNameInNs) if sliceNameInNs != sliceName { - nsConfigured, err := wh.SliceInfoClient.SliceAppNamespaceConfigured(context.Background(), sliceNameInNs, metadata.Namespace) + log.Info("slice name in namespace does not match sliceName", "sliceNameInNs", sliceNameInNs, "sliceName", sliceName) + nsConfigured, err := wh.SliceInfoClient.SliceAppNamespaceConfigured(context.Background(), sliceName, metadata.Namespace) if err != nil { log.Error(err, "Failed to get app namespace info for slice", - "slice", sliceNameInNs, "namespace", metadata.Namespace) + "slice", sliceName, "namespace", metadata.Namespace) return false } if !nsConfigured { - log.Info("Namespace not part of slice", "namespace", metadata.Namespace, "slice", sliceNameInNs) - return false + log.Info("Namespace not part of slice", "namespace", metadata.Namespace, "slice", sliceName) + return true } - return true + return false } + return false } labelsToRemove := []string{"kubeslice.io/nsmIP", "kubeslice.io/pod-type", "kubeslice.io/slice"} From 81f2f58954b486954f46d7cbd51dd63c0295681c Mon Sep 17 00:00:00 2001 From: Kranthi Kumar Date: Tue, 29 Oct 2024 16:57:16 +0530 Subject: [PATCH 06/23] fix(): Add ignore_critical parameter to pipeline config --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 4612aed9e..d055acd83 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -3,7 +3,8 @@ dockerbuildtestPipeline( script: this, service: 'worker-operator', buildArguments: [PLATFORM:"amd64"], - run_unit_tests: 'false' + run_unit_tests: 'false', + ignore_critical: 'true' ) From a693a2e5f323fc99193216b529eae361c35883f3 Mon Sep 17 00:00:00 2001 From: Kranthi Kumar Date: Tue, 29 Oct 2024 20:47:30 +0530 Subject: [PATCH 07/23] fix(): Set ignore_critical to false in pipeline configuration --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index d055acd83..5d39401c5 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -4,7 +4,7 @@ dockerbuildtestPipeline( service: 'worker-operator', buildArguments: [PLATFORM:"amd64"], run_unit_tests: 'false', - ignore_critical: 'true' + ignore_critical: 'false' ) From 91094ac82f896b911e689e061fadad69f42e17a0 Mon Sep 17 00:00:00 2001 From: Kranthi Kumar Date: Wed, 30 Oct 2024 01:10:58 +0530 Subject: [PATCH 08/23] fix(): Add update_trivy flag with default 'false' --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 5d39401c5..69d9a3cfe 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -4,7 +4,8 @@ dockerbuildtestPipeline( service: 'worker-operator', buildArguments: [PLATFORM:"amd64"], run_unit_tests: 'false', - ignore_critical: 'false' + ignore_critical: 'false', + update_trivy: 'false' ) From 07d43aaae5282a3d24a2ca29d555b532794cc78e Mon Sep 17 00:00:00 2001 From: gourishkb <104021126+gourishkb@users.noreply.github.com> Date: Wed, 30 Oct 2024 11:55:10 +0530 Subject: [PATCH 09/23] fix(): vulnerabilities CVE-2024-24790 CVE-2023-45288 CVE-2024-24788 CVE-2024-34156 CVE-2024-24789 CVE-2024-24791 CVE-2024-34155 CVE-2024-34158 Signed-off-by: gourishkb <104021126+gourishkb@users.noreply.github.com> --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index ebe5c9d66..3c5be5bf2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ ########################################################## # Build the manager binary -FROM golang:1.22.1 as builder +FROM golang:1.23.1 as builder WORKDIR /workspace # Copy the Go Modules manifests From 210435b16f1bb4db9105cfe8d1e3f9f94e482d29 Mon Sep 17 00:00:00 2001 From: gourishkb <104021126+gourishkb@users.noreply.github.com> Date: Mon, 4 Nov 2024 11:03:33 +0530 Subject: [PATCH 10/23] fix(): code cleanup Signed-off-by: gourishkb <104021126+gourishkb@users.noreply.github.com> --- pkg/webhook/pod/webhook.go | 47 +++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/pkg/webhook/pod/webhook.go b/pkg/webhook/pod/webhook.go index 5e117422d..d79461bcd 100644 --- a/pkg/webhook/pod/webhook.go +++ b/pkg/webhook/pod/webhook.go @@ -48,7 +48,18 @@ const ( ) var ( - log = logger.NewWrappedLogger().WithName("Webhook").V(1) + log = logger.NewWrappedLogger().WithName("Webhook").V(1) + LabelsToRemove = []string{ + "kubeslice.io/nsmIP", + "kubeslice.io/pod-type", + "kubeslice.io/slice", + } + + AnnotationsToRemove = []string{ + "kubeslice.io/status", + "ns.networkservicemesh.io", + "networkservicemesh.io", + } ) type SliceInfoProvider interface { @@ -199,33 +210,25 @@ func (wh *WebhookServer) OffBoardObject(metadata metav1.ObjectMeta, ctx context. annotations := metadata.GetAnnotations() labels := metadata.GetLabels() - //TODO: - // a. if not part of slice but has kubeslice or nsm labels -> yes ? remove them - // i. remove labels - //TODO: move as global variable // Remove kubeslice and nsm labels if present - //TODO: use constants - labelsToRemove := []string{"kubeslice.io/nsmIP", "kubeslice.io/pod-type", "kubeslice.io/slice"} - for _, labelKey := range labelsToRemove { + LabelsToRemove := []string{"kubeslice.io/nsmIP", "kubeslice.io/pod-type", "kubeslice.io/slice"} + for _, labelKey := range LabelsToRemove { if _, exists := labels[labelKey]; exists { log.Info("Removing label", "labelKey", labelKey) delete(labels, labelKey) } } metadata.SetLabels(labels) - //TODO: ii. remove annotations - //TODO: move as global variable + // Remove annotations if necessary - //TODO: use constants - annotationsToRemove := []string{"kubeslice.io/status", "ns.networkservicemesh.io", "networkservicemesh.io"} - for _, annotationKey := range annotationsToRemove { + AnnotationsToRemove := []string{"kubeslice.io/status", "ns.networkservicemesh.io", "networkservicemesh.io"} + for _, annotationKey := range AnnotationsToRemove { if _, exists := annotations[annotationKey]; exists { log.Info("Removing annotation", "annotationKey", annotationKey) delete(annotations, annotationKey) } } metadata.SetAnnotations(annotations) - //TODO iii. remove containers return metadata } @@ -374,13 +377,7 @@ func (wh *WebhookServer) OffboardRequired(metadata metav1.ObjectMeta, ctx contex annotations := metadata.GetAnnotations() labels := metadata.GetLabels() - //TODO: - // a. if not part of slice but has kubeslice or nsm labels -> yes ? remove them - // i. remove labels - // ii. remove annotations - //TODO: move as global variable // Remove kubeslice and nsm labels if present - //TODO: use constants if sliceNameInNs, exists := labels[admissionWebhookSliceNamespaceSelectorKey]; exists { log.Info("slice name in namespace exists", "sliceNameInNs", sliceNameInNs) if sliceNameInNs != sliceName { @@ -400,19 +397,17 @@ func (wh *WebhookServer) OffboardRequired(metadata metav1.ObjectMeta, ctx contex return false } - labelsToRemove := []string{"kubeslice.io/nsmIP", "kubeslice.io/pod-type", "kubeslice.io/slice"} - for _, labelKey := range labelsToRemove { + LabelsToRemove := []string{"kubeslice.io/nsmIP", "kubeslice.io/pod-type", "kubeslice.io/slice"} + for _, labelKey := range LabelsToRemove { if _, exists := labels[labelKey]; exists { log.Info("Found label", "labelKey", labelKey) return true } } - //TODO: move as global variable // Remove annotations if necessary - //TODO: use constants - annotationsToRemove := []string{"kubeslice.io/status", "ns.networkservicemesh.io", "networkservicemesh.io"} - for _, annotationKey := range annotationsToRemove { + AnnotationsToRemove := []string{"kubeslice.io/status", "ns.networkservicemesh.io", "networkservicemesh.io"} + for _, annotationKey := range AnnotationsToRemove { if _, exists := annotations[annotationKey]; exists { log.Info("Found annotation", "annotationKey", annotationKey) return true From 5db5228877e99f9fe05811224485bf5849a7ee4c Mon Sep 17 00:00:00 2001 From: Kranthi Kumar Date: Tue, 5 Nov 2024 15:33:44 +0530 Subject: [PATCH 11/23] fix(): Update Jenkinsfile --- Jenkinsfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 69d9a3cfe..4612aed9e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -3,9 +3,7 @@ dockerbuildtestPipeline( script: this, service: 'worker-operator', buildArguments: [PLATFORM:"amd64"], - run_unit_tests: 'false', - ignore_critical: 'false', - update_trivy: 'false' + run_unit_tests: 'false' ) From 01e11368ce48aebb83bd75320a2482e723b5d4e7 Mon Sep 17 00:00:00 2001 From: uma-kt <105046156+uma-kt@users.noreply.github.com> Date: Tue, 29 Oct 2024 10:19:21 +0530 Subject: [PATCH 12/23] Update README.md worker-operator Updated the docs URL link --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 2d283da4d..3f4080585 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ The `kubeslice-worker` operator uses Kubebuilder, a framework for building Kuber It is strongly recommended that you use a released version. Please refer to our documentation on: -- [Get Started on KubeSlice](https://kubeslice.io/documentation/open-source/1.3.0/category/get-started) -- [Install KubeSlice](https://kubeslice.io/documentation/open-source/1.3.0/category/install-kubeslice) +- [Get Started on KubeSlice](https://kubeslice.io/documentation/open-source/latest/category/get-started) +- [Install KubeSlice](https://kubeslice.io/documentation/open-source/latest/category/install-kubeslice) ## Install `kubeslice-worker` on a Kind Cluster @@ -19,7 +19,7 @@ Before you begin, make sure the following prerequisites are met: * Docker is installed and running on your local machine. * A running [`kind`](https://kind.sigs.k8s.io/) cluster. * [`kubectl`](https://kubernetes.io/docs/tasks/tools/) is installed and configured. -* You have prepared the environment to install [`kubeslice-controller`](https://github.com/kubeslice/kubeslice-controller) on the controller cluster and [`worker-operator`](https://github.com/kubeslice/worker-operator) on the worker cluster. For more information, see [Prerequisites](https://kubeslice.io/documentation/open-source/1.3.0/category/prerequisites). +* You have prepared the environment to install [`kubeslice-controller`](https://github.com/kubeslice/kubeslice-controller) on the controller cluster and [`worker-operator`](https://github.com/kubeslice/worker-operator) on the worker cluster. For more information, see [Prerequisites](https://kubeslice.io/documentation/open-source/latest/category/prerequisites). ### Build and Deploy a Worker Operator on a Kind Cluster @@ -31,13 +31,13 @@ docker pull aveshasystems/worker-operator:latest ### Setting up Your Helm Repo -If you have not added avesha helm repo yet, add it. +If you have not added Avesha's `helm repo` yet, add it. ```console helm repo add avesha https://kubeslice.github.io/charts/ ``` -Upgrade the avesha helm repo. +Upgrade Avesha's `helm repo`. ```console helm repo update @@ -69,7 +69,7 @@ deploy/controller_secret.sh gke_avesha-dev_us-east1-c_xxxx kubeslice-cisco my-aw ``` 2. Edit the `VERSION` variable in the Makefile to change the docker tag to be built. -The image is set as `docker.io/aveshasystems/worker-operator:$(VERSION)` in the Makefile. Modify this if required. +The image is set as `docker.io/aveshasystems/worker-operator:$(VERSION)` in the Makefile. Modify this as required. ```console make docker-build @@ -93,13 +93,13 @@ The image is set as `docker.io/aveshasystems/worker-operator:$(VERSION)` in the 2. Check the loaded image in the cluster. Modify the node name if required. ```console - docker exec -it crictl images + docker exec -it critical images ``` Example: ```console - docker exec -it kind-control-plane crictl images + docker exec -it kind-control-plane critical images ``` ### Deploy the Worker Operator on a Cluster @@ -155,7 +155,7 @@ prefix-service-76bd89c44f-2p6dw 1/1 Running 0 48s ### Uninstall the Worker Operator -For more information, see [deregister the worker cluster](https://kubeslice.io/documentation/open-source/1.3.0/uninstall-kubeslice/#deregister-worker-clusters). +For more information, see [deregister the worker cluster](https://kubeslice.io/documentation/open-source/latest/uninstall-kubeslice/#deregister-worker-clusters). ```console helm uninstall kubeslice-worker -n kubeslice-system From 247409d5426d2fdd774943461c685307ca1b6a4c Mon Sep 17 00:00:00 2001 From: uma-kt <105046156+uma-kt@users.noreply.github.com> Date: Tue, 29 Oct 2024 10:44:46 +0530 Subject: [PATCH 13/23] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3f4080585..c91dc8bb2 100644 --- a/README.md +++ b/README.md @@ -93,13 +93,13 @@ The image is set as `docker.io/aveshasystems/worker-operator:$(VERSION)` in the 2. Check the loaded image in the cluster. Modify the node name if required. ```console - docker exec -it critical images + docker exec -it crictl images ``` Example: ```console - docker exec -it kind-control-plane critical images + docker exec -it kind-control-plane crictl images ``` ### Deploy the Worker Operator on a Cluster From fbf2deccd1a9fc3e497ec01ae437741ba57fa8af Mon Sep 17 00:00:00 2001 From: Priyank Upadhyay Date: Wed, 30 Oct 2024 11:31:19 +0530 Subject: [PATCH 14/23] fix(): configurable ns exclusion list (#408) * fix(): configurable ns exclusion list Signed-off-by: Priyank Upadhyay * fix(): refactors Signed-off-by: Priyank Upadhyay --------- Signed-off-by: Priyank Upadhyay --- pkg/namespace/controllers/reconciler.go | 7 +++++-- pkg/utils/constants.go | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pkg/namespace/controllers/reconciler.go b/pkg/namespace/controllers/reconciler.go index b74596fd7..ddeeee205 100644 --- a/pkg/namespace/controllers/reconciler.go +++ b/pkg/namespace/controllers/reconciler.go @@ -19,6 +19,7 @@ package namespace import ( "context" + "strings" "github.com/go-logr/logr" "github.com/kubeslice/kubeslice-monitoring/pkg/events" @@ -49,8 +50,7 @@ type Reconciler struct { Hubclient *hub.HubClientConfig } -var excludedNs = []string{"kube-system", "default", "kubeslice-system", "kube-node-lease", - "kube-public", "istio-system"} +var excludedNs []string var controllerName string = "namespaceReconciler" @@ -69,6 +69,9 @@ func (c *Reconciler) getSliceNameFromNs(ns string) (string, error) { } func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + + excludedNsEnv := utils.GetEnvOrDefault("EXCLUDED_NS", utils.DefaultExcludedNS) + excludedNs = strings.Split(excludedNsEnv, ",") for _, v := range excludedNs { if v == req.Name { return ctrl.Result{}, nil diff --git a/pkg/utils/constants.go b/pkg/utils/constants.go index bca137326..678c78124 100644 --- a/pkg/utils/constants.go +++ b/pkg/utils/constants.go @@ -1,6 +1,7 @@ package utils const ( - NotApplicable = "NA" - EventsVersion = "v1alpha1" + NotApplicable = "NA" + EventsVersion = "v1alpha1" + DefaultExcludedNS = "kube-system,default,kubeslice-system,kube-node-lease,kube-public,istio-system" ) From f3eb9a5bba4c49e6c8bbd7b8af95b2be6f90dd78 Mon Sep 17 00:00:00 2001 From: bharath-avesha <99859949+bharath-avesha@users.noreply.github.com> Date: Mon, 4 Nov 2024 15:37:09 +0530 Subject: [PATCH 15/23] fix(): Fixed tunnel status reporting in the slicegw CR (#406) Signed-off-by: Bharath Horatti --- api/v1beta1/slicegateway_types.go | 14 ++++- ...networking.kubeslice.io_slicegateways.yaml | 14 +++++ controllers/slicegateway/slicegateway.go | 55 +++++++++++++++---- controllers/slicegateway/utils.go | 19 ++++--- pkg/gwsidecar/gwsidecar.go | 45 ++++++++++----- 5 files changed, 112 insertions(+), 35 deletions(-) diff --git a/api/v1beta1/slicegateway_types.go b/api/v1beta1/slicegateway_types.go index b501687a4..c0ce78cf3 100644 --- a/api/v1beta1/slicegateway_types.go +++ b/api/v1beta1/slicegateway_types.go @@ -132,9 +132,14 @@ type GwPodInfo struct { PeerPodName string `json:"peerPodName,omitempty"` PodIP string `json:"podIP,omitempty"` LocalNsmIP string `json:"localNsmIP,omitempty"` - TunnelStatus TunnelStatus `json:"tunnelStatus,omitempty"` - RouteRemoved int32 `json:"routeRemoved,omitempty"` + // TunnelStatus is the status of the tunnel between this gw pod and its peer + TunnelStatus TunnelStatus `json:"tunnelStatus,omitempty"` + RouteRemoved int32 `json:"routeRemoved,omitempty"` + // RemotePort is the port number this gw pod is connected to on the remote cluster. + // Applicable only for gw clients. Would be set to 0 for gw servers. + RemotePort int32 `json:"remotePort,omitempty"` } + type TunnelStatus struct { IntfName string `json:"IntfName,omitempty"` LocalIP string `json:"LocalIP,omitempty"` @@ -143,7 +148,10 @@ type TunnelStatus struct { TxRate uint64 `json:"TxRate,omitempty"` RxRate uint64 `json:"RxRate,omitempty"` PacketLoss uint64 `json:"PacketLoss,omitempty"` - Status int32 `json:"Status,omitempty"` + // Status is the status of the tunnel. 0: DOWN, 1: UP + Status int32 `json:"Status,omitempty"` + // TunnelState is the state of the tunnel in string format: UP, DOWN, UNKNOWN + TunnelState string `json:"TunnelState,omitempty"` } func init() { diff --git a/config/crd/bases/networking.kubeslice.io_slicegateways.yaml b/config/crd/bases/networking.kubeslice.io_slicegateways.yaml index e05e95083..ac28fcc93 100644 --- a/config/crd/bases/networking.kubeslice.io_slicegateways.yaml +++ b/config/crd/bases/networking.kubeslice.io_slicegateways.yaml @@ -169,10 +169,18 @@ spec: type: string podName: type: string + remotePort: + description: |- + RemotePort is the port number this gw pod is connected to on the remote cluster. + Applicable only for gw clients. Would be set to 0 for gw servers. + format: int32 + type: integer routeRemoved: format: int32 type: integer tunnelStatus: + description: TunnelStatus is the status of the tunnel between + this gw pod and its peer properties: IntfName: type: string @@ -190,8 +198,14 @@ spec: format: int64 type: integer Status: + description: 'Status is the status of the tunnel. 0: DOWN, + 1: UP' format: int32 type: integer + TunnelState: + description: 'TunnelState is the state of the tunnel in + string format: UP, DOWN, UNKNOWN' + type: string TxRate: format: int64 type: integer diff --git a/controllers/slicegateway/slicegateway.go b/controllers/slicegateway/slicegateway.go index b4e779b63..6ce391ef7 100644 --- a/controllers/slicegateway/slicegateway.go +++ b/controllers/slicegateway/slicegateway.go @@ -78,7 +78,7 @@ func labelsForSliceGwDeployment(name, slice, depName string) map[string]string { } } -func labelsForSliceGwService(name, svcName, depName string) map[string]string { +func labelsForSliceGwService(name, depName string) map[string]string { return map[string]string{ controllers.SliceGatewaySelectorLabelKey: name, "kubeslice.io/slice-gw-dep": depName, @@ -360,7 +360,7 @@ func (r *SliceGwReconciler) serviceForGateway(g *kubeslicev1beta1.SliceGateway, }, Spec: corev1.ServiceSpec{ Type: "NodePort", - Selector: labelsForSliceGwService(g.Name, svcName, depName), + Selector: labelsForSliceGwService(g.Name, depName), Ports: []corev1.ServicePort{{ Port: 11194, Protocol: proto, @@ -661,8 +661,25 @@ func (r *SliceGwReconciler) ReconcileGwPodStatus(ctx context.Context, slicegatew return ctrl.Result{}, err, true } gwPod.LocalNsmIP = status.NsmStatus.LocalIP - gwPod.TunnelStatus = kubeslicev1beta1.TunnelStatus(status.TunnelStatus) - // this grpc call fails untill the openvpn tunnel connection is not established, so its better to do not reconcile in case of errors, hence the reconciler does not proceedes further + gwPod.TunnelStatus = kubeslicev1beta1.TunnelStatus{ + IntfName: status.TunnelStatus.IntfName, + LocalIP: status.TunnelStatus.LocalIP, + RemoteIP: status.TunnelStatus.RemoteIP, + Latency: status.TunnelStatus.Latency, + TxRate: status.TunnelStatus.TxRate, + RxRate: status.TunnelStatus.RxRate, + PacketLoss: status.TunnelStatus.PacketLoss, + Status: int32(status.TunnelStatus.Status), + TunnelState: status.TunnelStatus.TunnelState, + } + + if isClient(slicegateway) { + // get the remote port number this gw pod is connected to on the remote cluster + _, remotePortInUse := getClientGwRemotePortInUse(ctx, r.Client, slicegateway, GetDepNameFromPodName(slicegateway.Status.Config.SliceGatewayID, gwPod.PodName)) + gwPod.RemotePort = int32(remotePortInUse) + } + + // this grpc call fails untill the openvpn tunnel connection is not established, so its better to do not reconcile in case of errors, hence the reconciler does not proceeds further gwPod.PeerPodName, err = r.getRemoteGwPodName(ctx, slicegateway.Status.Config.SliceGatewayRemoteVpnIP, gwPod.PodIP) if err != nil { log.Error(err, "Error getting peer pod name", "PodName", gwPod.PodName, "PodIP", gwPod.PodIP) @@ -671,10 +688,11 @@ func (r *SliceGwReconciler) ReconcileGwPodStatus(ctx context.Context, slicegatew if isGatewayStatusChanged(slicegateway, gwPod) { toUpdate = true } - if len(slicegateway.Status.GatewayPodStatus) != len(gwPodsInfo) { - toUpdate = true - } } + if len(slicegateway.Status.GatewayPodStatus) != len(gwPodsInfo) { + toUpdate = true + } + if toUpdate { log.Info("gwPodsInfo", "gwPodsInfo", gwPodsInfo) slicegateway.Status.GatewayPodStatus = gwPodsInfo @@ -725,6 +743,10 @@ func (r *SliceGwReconciler) SendConnectionContextAndQosToGwPod(ctx context.Conte err = retry.RetryOnConflict(retry.DefaultRetry, func() error { err := r.Get(ctx, req.NamespacedName, slicegateway) + if err != nil { + log.Error(err, "Failed to get SliceGateway") + return err + } slicegateway.Status.ConnectionContextUpdatedOn = time.Now().Unix() err = r.Status().Update(ctx, slicegateway) if err != nil { @@ -1094,6 +1116,15 @@ func (r *SliceGwReconciler) gwPodPlacementIsSkewed(ctx context.Context, sliceGw func (r *SliceGwReconciler) ReconcileGwPodPlacement(ctx context.Context, sliceGw *kubeslicev1beta1.SliceGateway) error { log := r.Log + + // if the env variable is set, do not perform any gw pod rebalancing. This is useful in clusters where + // the k8s scheduler does not honor the pod anti-affinity rule and places the gw pods on the same node. Such scenarios + // could occur if the node with the kubeslice gateway label is cordoned off or if the node has insufficient resources or + // if the node has some taints that the gw pods cannot tolerate. + if os.Getenv("DISABLE_GW_POD_REBALANCING") == "true" { + return nil + } + // The gw pod rebalancing is always performed on a deployment. We expect the gw pods belonging to a slicegateway // object between any two clusters to placed on different nodes marked as kubeslice gateway nodes. If they are // initially placed on the same node due to lack of kubeslice-gateway nodes, the rebalancing algorithim is expected @@ -1170,7 +1201,7 @@ func (r *SliceGwReconciler) handleSliceGwSvcCreation(ctx context.Context, sliceG return ctrl.Result{Requeue: true}, nil, true } -func (r *SliceGwReconciler) handleSliceGwSvcDeletion(ctx context.Context, sliceGw *kubeslicev1beta1.SliceGateway, svcName, depName string) error { +func (r *SliceGwReconciler) handleSliceGwSvcDeletion(ctx context.Context, sliceGw *kubeslicev1beta1.SliceGateway, svcName string) error { log := logger.FromContext(ctx).WithName("slicegw") serviceFound := corev1.Service{} err := r.Get(ctx, types.NamespacedName{Namespace: sliceGw.Namespace, Name: svcName}, &serviceFound) @@ -1385,7 +1416,7 @@ func (r *SliceGwReconciler) ReconcileGatewayDeployments(ctx context.Context, sli } // Update the port map gwClientToRemotePortMap.Store(deployment.Name, portNumToUpdate) - err = r.updateGatewayDeploymentNodePort(ctx, r.Client, sliceGw, &deployment, portNumToUpdate) + err = r.updateGatewayDeploymentNodePort(ctx, sliceGw, &deployment, portNumToUpdate) if err != nil { return ctrl.Result{}, err, true } @@ -1399,7 +1430,7 @@ func (r *SliceGwReconciler) ReconcileGatewayDeployments(ctx context.Context, sli if foundInMap { if portInMap != nodePortInUse { // Update the deployment since the port numbers do not match - err := r.updateGatewayDeploymentNodePort(ctx, r.Client, sliceGw, &deployment, portInMap.(int)) + err := r.updateGatewayDeploymentNodePort(ctx, sliceGw, &deployment, portInMap.(int)) if err != nil { return ctrl.Result{}, err, true } @@ -1425,7 +1456,7 @@ func (r *SliceGwReconciler) ReconcileGatewayDeployments(ctx context.Context, sli if deploymentsToDelete != nil { for _, depToDelete := range deploymentsToDelete.Items { // Delete the gw svc associated with the deployment - err := r.handleSliceGwSvcDeletion(ctx, sliceGw, getGwSvcNameFromDepName(depToDelete.Name), depToDelete.Name) + err := r.handleSliceGwSvcDeletion(ctx, sliceGw, getGwSvcNameFromDepName(depToDelete.Name)) if err != nil { log.Error(err, "Failed to delete gw svc", "svcName", depToDelete.Name) return ctrl.Result{}, err, true @@ -1615,7 +1646,7 @@ func (r *SliceGwReconciler) createPodDisruptionBudgetForSliceGatewayPods(ctx con // updateGatewayDeploymentNodePort updates the gateway client deployments with the relevant updated ports // from the workersliceconfig -func (r *SliceGwReconciler) updateGatewayDeploymentNodePort(ctx context.Context, c client.Client, g *kubeslicev1beta1.SliceGateway, deployment *appsv1.Deployment, nodePort int) error { +func (r *SliceGwReconciler) updateGatewayDeploymentNodePort(ctx context.Context, g *kubeslicev1beta1.SliceGateway, deployment *appsv1.Deployment, nodePort int) error { containers := deployment.Spec.Template.Spec.Containers for contIndex, cont := range containers { if cont.Name == "kubeslice-sidecar" { diff --git a/controllers/slicegateway/utils.go b/controllers/slicegateway/utils.go index 1938411f1..7e32a9ecb 100644 --- a/controllers/slicegateway/utils.go +++ b/controllers/slicegateway/utils.go @@ -22,16 +22,17 @@ import ( "context" "errors" "fmt" + "os" + "strconv" + "strings" + "sync" + gwsidecarpb "github.com/kubeslice/gateway-sidecar/pkg/sidecar/sidecarpb" kubeslicev1beta1 "github.com/kubeslice/worker-operator/api/v1beta1" "github.com/kubeslice/worker-operator/controllers" ossEvents "github.com/kubeslice/worker-operator/events" "github.com/kubeslice/worker-operator/pkg/utils" webhook "github.com/kubeslice/worker-operator/pkg/webhook/pod" - "os" - "strconv" - "strings" - "sync" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -112,6 +113,10 @@ func getPodNames(slicegateway *kubeslicev1beta1.SliceGateway) []string { } func GetDepNameFromPodName(sliceGwID, podName string) string { + if sliceGwID == "" || podName == "" { + return "" + } + after, found := strings.CutPrefix(podName, sliceGwID) if !found { return "" @@ -204,13 +209,13 @@ func getPodPairToRebalance(podsOnNode []corev1.Pod, sliceGw *kubeslicev1beta1.Sl func GetPeerGwPodName(gwPodName string, sliceGw *kubeslicev1beta1.SliceGateway) (string, error) { podInfo := findGwPodInfo(sliceGw.Status.GatewayPodStatus, gwPodName) if podInfo == nil { - return "", errors.New("Gw pod not found") + return "", errors.New("gw pod not found") } if podInfo.TunnelStatus.Status != int32(gwsidecarpb.TunnelStatusType_GW_TUNNEL_STATE_UP) { - return "", errors.New("Gw tunnel is down") + return "", errors.New("gw tunnel is down") } if podInfo.PeerPodName == "" { - return "", errors.New("Gw peer pod info unavailable") + return "", errors.New("gw peer pod info unavailable") } return podInfo.PeerPodName, nil diff --git a/pkg/gwsidecar/gwsidecar.go b/pkg/gwsidecar/gwsidecar.go index 1be8704e3..a9ec5458a 100644 --- a/pkg/gwsidecar/gwsidecar.go +++ b/pkg/gwsidecar/gwsidecar.go @@ -33,16 +33,19 @@ type NsmStatus struct { IntfName string LocalIP string } + type TunnelStatus struct { - IntfName string - LocalIP string - RemoteIP string - Latency uint64 - TxRate uint64 - RxRate uint64 - PacketLoss uint64 - Status int32 + IntfName string + LocalIP string + RemoteIP string + Latency uint64 + TxRate uint64 + RxRate uint64 + PacketLoss uint64 + Status int32 + TunnelState string } + type GwStatus struct { NsmStatus TunnelStatus @@ -79,6 +82,17 @@ func (worker gwSidecarClient) GetSliceGwRemotePodName(ctx context.Context, gwRem return res.GatewayPodName, nil } +func getTunnelState(tunnelState sidecar.TunnelStatusType) string { + switch tunnelState { + case sidecar.TunnelStatusType_GW_TUNNEL_STATE_UP: + return "UP" + case sidecar.TunnelStatusType_GW_TUNNEL_STATE_DOWN: + return "DOWN" + default: + return "UNKNOWN" + } +} + // GetStatus retrieves sidecar status func (worker gwSidecarClient) GetStatus(ctx context.Context, serverAddr string) (*GwStatus, error) { conn, err := grpc.Dial(serverAddr, grpc.WithTransportCredentials(insecure.NewCredentials())) @@ -103,14 +117,19 @@ func (worker gwSidecarClient) GetStatus(ctx context.Context, serverAddr string) } if res.TunnelStatus != nil { gwStatus.TunnelStatus = TunnelStatus{ - IntfName: res.TunnelStatus.NetInterface, - LocalIP: res.TunnelStatus.LocalIP, - RemoteIP: res.TunnelStatus.PeerIP, - PacketLoss: res.TunnelStatus.PacketLoss, - Status: int32(res.TunnelStatus.Status), + IntfName: res.TunnelStatus.NetInterface, + LocalIP: res.TunnelStatus.LocalIP, + RemoteIP: res.TunnelStatus.PeerIP, + Latency: res.TunnelStatus.Latency, + TxRate: res.TunnelStatus.TxRate, + RxRate: res.TunnelStatus.RxRate, + PacketLoss: res.TunnelStatus.PacketLoss, + Status: int32(res.TunnelStatus.Status), + TunnelState: getTunnelState(res.TunnelStatus.Status), } } else { gwStatus.TunnelStatus.Status = int32(sidecar.TunnelStatusType_GW_TUNNEL_STATE_DOWN) + gwStatus.TunnelStatus.TunnelState = getTunnelState(sidecar.TunnelStatusType_GW_TUNNEL_STATE_DOWN) } return gwStatus, err From 63cb4aada2e70dff6b78128244d47abc18694dda Mon Sep 17 00:00:00 2001 From: Mridul Gain Date: Tue, 5 Nov 2024 20:22:47 +0530 Subject: [PATCH 16/23] fix(): update gw deploy if gateway sidecar image has been changed in worker env vars Signed-off-by: Mridul Gain --- controllers/slicegateway/slicegateway.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/controllers/slicegateway/slicegateway.go b/controllers/slicegateway/slicegateway.go index 6ce391ef7..179976e18 100644 --- a/controllers/slicegateway/slicegateway.go +++ b/controllers/slicegateway/slicegateway.go @@ -1386,6 +1386,27 @@ func (r *SliceGwReconciler) ReconcileGatewayDeployments(ctx context.Context, sli return ctrl.Result{}, err, true } return ctrl.Result{Requeue: true}, nil, true + } else { + // update logic for gateways + for i := range deployments.Items { + deployment := &deployments.Items[i] + if deployment.Name == sliceGwName+"-"+fmt.Sprint(gwInstance)+"-"+"0" { + // update if gateway sidecar image has been changed in worker env vars + for j := range deployment.Spec.Template.Spec.Containers { + container := &deployment.Spec.Template.Spec.Containers[j] + if container.Name == "kubeslice-sidecar" && container.Image != gwSidecarImage { + container.Image = gwSidecarImage + log.Info("updating gw Deployment sidecar", "Name", deployment.Name, "image", gwSidecarImage) + err = r.Update(ctx, deployment) + if err != nil { + log.Error(err, "Failed to update Deployment", "Name", deployment.Name) + return ctrl.Result{}, err, true + } + return ctrl.Result{Requeue: true}, nil, true + } + } + } + } } } From ffb8b62d806b66b86e713fbc37fe1394d2db98c5 Mon Sep 17 00:00:00 2001 From: Mridul Gain Date: Tue, 5 Nov 2024 22:53:09 +0530 Subject: [PATCH 17/23] add empty string check for sidecar image name Signed-off-by: Mridul Gain --- controllers/slicegateway/slicegateway.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/controllers/slicegateway/slicegateway.go b/controllers/slicegateway/slicegateway.go index 179976e18..14958fbf0 100644 --- a/controllers/slicegateway/slicegateway.go +++ b/controllers/slicegateway/slicegateway.go @@ -53,6 +53,10 @@ import ( webhook "github.com/kubeslice/worker-operator/pkg/webhook/pod" ) +const ( + DEFAULT_SIDECAR_IMG = "nexus.dev.aveshalabs.io/kubeslice/gw-sidecar:1.0.0" +) + var ( vpnClientFileName = "openvpn_client.ovpn" gwSidecarImage = os.Getenv("AVESHA_GW_SIDECAR_IMAGE") @@ -110,7 +114,7 @@ func (r *SliceGwReconciler) deploymentForGatewayServer(g *kubeslicev1beta1.Slice var privileged = true - sidecarImg := "nexus.dev.aveshalabs.io/kubeslice/gw-sidecar:1.0.0" + sidecarImg := DEFAULT_SIDECAR_IMG sidecarPullPolicy := corev1.PullAlways vpnImg := "nexus.dev.aveshalabs.io/kubeslice/openvpn-server.ubuntu.18.04:1.0.0" vpnPullPolicy := corev1.PullAlways @@ -1376,6 +1380,11 @@ func (r *SliceGwReconciler) ReconcileGatewayDeployments(ctx context.Context, sli } } + sidecarImg := DEFAULT_SIDECAR_IMG + if len(gwSidecarImage) != 0 { + sidecarImg = gwSidecarImage + } + for gwInstance := 0; gwInstance < numGwInstances; gwInstance++ { if !gwDeploymentIsPresent(sliceGwName, gwInstance, deployments) { dep := r.deploymentForGateway(sliceGw, sliceGwName+"-"+fmt.Sprint(gwInstance)+"-"+"0", gwConfigKey) @@ -1394,8 +1403,8 @@ func (r *SliceGwReconciler) ReconcileGatewayDeployments(ctx context.Context, sli // update if gateway sidecar image has been changed in worker env vars for j := range deployment.Spec.Template.Spec.Containers { container := &deployment.Spec.Template.Spec.Containers[j] - if container.Name == "kubeslice-sidecar" && container.Image != gwSidecarImage { - container.Image = gwSidecarImage + if container.Name == "kubeslice-sidecar" && container.Image != sidecarImg { + container.Image = sidecarImg log.Info("updating gw Deployment sidecar", "Name", deployment.Name, "image", gwSidecarImage) err = r.Update(ctx, deployment) if err != nil { From 3e5301d121e39369b8bf40bf27e5a2f795b1f462 Mon Sep 17 00:00:00 2001 From: Mridul Gain Date: Tue, 5 Nov 2024 23:37:20 +0530 Subject: [PATCH 18/23] fix(): test build Signed-off-by: Mridul Gain --- go.mod | 2 +- test.Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index d3bd5e283..3029cf6de 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/kubeslice/worker-operator -go 1.22.1 +go 1.23.2 // replace github.com/kubeslice/apis => ../../misc/apis diff --git a/test.Dockerfile b/test.Dockerfile index dcf82caff..cce8e352b 100644 --- a/test.Dockerfile +++ b/test.Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.22.3 as builder +FROM golang:1.23.2 AS builder WORKDIR /workspace # Copy the Go Modules manifests From 71628a84dee6e7c9bb0e9d028329ceaab8631c87 Mon Sep 17 00:00:00 2001 From: Mridul Gain Date: Wed, 6 Nov 2024 12:33:54 +0530 Subject: [PATCH 19/23] fix(): update gw deploy if gateway sidecar image pull policy is changed in worker env vars Signed-off-by: Mridul Gain --- controllers/slicegateway/slicegateway.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/controllers/slicegateway/slicegateway.go b/controllers/slicegateway/slicegateway.go index 14958fbf0..a1019cd7d 100644 --- a/controllers/slicegateway/slicegateway.go +++ b/controllers/slicegateway/slicegateway.go @@ -54,7 +54,8 @@ import ( ) const ( - DEFAULT_SIDECAR_IMG = "nexus.dev.aveshalabs.io/kubeslice/gw-sidecar:1.0.0" + DEFAULT_SIDECAR_IMG = "nexus.dev.aveshalabs.io/kubeslice/gw-sidecar:1.0.0" + DEFAULT_SIDECAR_PULLPOLICY = corev1.PullAlways ) var ( @@ -115,7 +116,7 @@ func (r *SliceGwReconciler) deploymentForGatewayServer(g *kubeslicev1beta1.Slice var privileged = true sidecarImg := DEFAULT_SIDECAR_IMG - sidecarPullPolicy := corev1.PullAlways + sidecarPullPolicy := DEFAULT_SIDECAR_PULLPOLICY vpnImg := "nexus.dev.aveshalabs.io/kubeslice/openvpn-server.ubuntu.18.04:1.0.0" vpnPullPolicy := corev1.PullAlways baseFileName := os.Getenv("CLUSTER_NAME") + "-" + g.Spec.SliceName + "-" + g.Status.Config.SliceGatewayName + ".vpn.aveshasystems.com" @@ -1384,6 +1385,10 @@ func (r *SliceGwReconciler) ReconcileGatewayDeployments(ctx context.Context, sli if len(gwSidecarImage) != 0 { sidecarImg = gwSidecarImage } + sidecarPullPolicy := DEFAULT_SIDECAR_PULLPOLICY + if len(gwSidecarImagePullPolicy) != 0 { + sidecarPullPolicy = corev1.PullPolicy(gwSidecarImagePullPolicy) + } for gwInstance := 0; gwInstance < numGwInstances; gwInstance++ { if !gwDeploymentIsPresent(sliceGwName, gwInstance, deployments) { @@ -1403,8 +1408,9 @@ func (r *SliceGwReconciler) ReconcileGatewayDeployments(ctx context.Context, sli // update if gateway sidecar image has been changed in worker env vars for j := range deployment.Spec.Template.Spec.Containers { container := &deployment.Spec.Template.Spec.Containers[j] - if container.Name == "kubeslice-sidecar" && container.Image != sidecarImg { + if container.Name == "kubeslice-sidecar" && (container.Image != sidecarImg || container.ImagePullPolicy != sidecarPullPolicy) { container.Image = sidecarImg + container.ImagePullPolicy = sidecarPullPolicy log.Info("updating gw Deployment sidecar", "Name", deployment.Name, "image", gwSidecarImage) err = r.Update(ctx, deployment) if err != nil { From 46a48e9f5fe24665820dd1de832a657dfc3c0ca7 Mon Sep 17 00:00:00 2001 From: Mridul Gain Date: Wed, 13 Nov 2024 10:30:18 +0530 Subject: [PATCH 20/23] fix: getNodeIp logic for no-network mode Signed-off-by: Mridul Gain --- pkg/cluster/node.go | 15 +++++++++------ pkg/hub/controllers/cluster/reconciler.go | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pkg/cluster/node.go b/pkg/cluster/node.go index d6fc1667f..a109f1ed6 100644 --- a/pkg/cluster/node.go +++ b/pkg/cluster/node.go @@ -53,20 +53,23 @@ type NodeInfo struct { //GetNodeExternalIpList gets the list of External Node IPs of kubeslice-gateway nodes -func (n *NodeInfo) getNodeExternalIpList() ([]string, error) { +func (n *NodeInfo) getNodeExternalIpList(networkMode bool) ([]string, error) { n.Lock() defer n.Unlock() - err := n.populateNodeIpList() + err := n.populateNodeIpList(networkMode) if err != nil { return nil, err } return n.NodeIPList, nil } -func (n *NodeInfo) populateNodeIpList() error { +func (n *NodeInfo) populateNodeIpList(networkMode bool) error { ctx := context.Background() nodeList := corev1.NodeList{} - labels := map[string]string{controllers.NodeTypeSelectorLabelKey: "gateway"} + labels := map[string]string{} + if networkMode { + labels = map[string]string{controllers.NodeTypeSelectorLabelKey: "gateway"} + } listOptions := []client.ListOption{ client.MatchingLabels(labels), } @@ -101,10 +104,10 @@ func (n *NodeInfo) populateNodeIpList() error { return err } -func GetNodeIP(client client.Client) ([]string, error) { +func GetNodeIP(client client.Client, networkMode bool) ([]string, error) { nodeInfo.Client = client // nodeIPs will either have list of ExternalIPs if available, else Internal IPs - nodeIps, err := nodeInfo.getNodeExternalIpList() + nodeIps, err := nodeInfo.getNodeExternalIpList(networkMode) if err != nil || len(nodeIps) == 0 { log.Error(err, "Getting NodeIP From kube-api-server") return []string{""}, err diff --git a/pkg/hub/controllers/cluster/reconciler.go b/pkg/hub/controllers/cluster/reconciler.go index d4ab17977..7c4d25026 100644 --- a/pkg/hub/controllers/cluster/reconciler.go +++ b/pkg/hub/controllers/cluster/reconciler.go @@ -455,7 +455,7 @@ func (r *Reconciler) updateNodeIps(ctx context.Context, cr *hubv1alpha1.Cluster) if err != nil { return err } - nodeIPs, err := cluster.GetNodeIP(r.MeshClient) + nodeIPs, err := cluster.GetNodeIP(r.MeshClient, cr.Status.NetworkPresent) if err != nil { log.Error(err, "Error Getting nodeIP") return err From 403decd3a745129bc5b8c614afe1b78887128dff Mon Sep 17 00:00:00 2001 From: Mridul Gain Date: Wed, 13 Nov 2024 14:06:07 +0530 Subject: [PATCH 21/23] address review comments Signed-off-by: Mridul Gain --- pkg/cluster/node.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/cluster/node.go b/pkg/cluster/node.go index a109f1ed6..58922b407 100644 --- a/pkg/cluster/node.go +++ b/pkg/cluster/node.go @@ -51,23 +51,23 @@ type NodeInfo struct { sync.Mutex } -//GetNodeExternalIpList gets the list of External Node IPs of kubeslice-gateway nodes - -func (n *NodeInfo) getNodeExternalIpList(networkMode bool) ([]string, error) { +// When kubeslice network is enabled this method gets the list of External/Internal Node IPs of ready nodes that are labeled as kubeslice-gateway. +// If kubeslice networking is disabled it returns IPs of all ready nodes +func (n *NodeInfo) getNodeExternalIpList(isNetworkPresent bool) ([]string, error) { n.Lock() defer n.Unlock() - err := n.populateNodeIpList(networkMode) + err := n.populateNodeIpList(isNetworkPresent) if err != nil { return nil, err } return n.NodeIPList, nil } -func (n *NodeInfo) populateNodeIpList(networkMode bool) error { +func (n *NodeInfo) populateNodeIpList(isNetworkPresent bool) error { ctx := context.Background() nodeList := corev1.NodeList{} labels := map[string]string{} - if networkMode { + if isNetworkPresent { labels = map[string]string{controllers.NodeTypeSelectorLabelKey: "gateway"} } listOptions := []client.ListOption{ @@ -104,10 +104,10 @@ func (n *NodeInfo) populateNodeIpList(networkMode bool) error { return err } -func GetNodeIP(client client.Client, networkMode bool) ([]string, error) { +func GetNodeIP(client client.Client, isNetworkPresent bool) ([]string, error) { nodeInfo.Client = client // nodeIPs will either have list of ExternalIPs if available, else Internal IPs - nodeIps, err := nodeInfo.getNodeExternalIpList(networkMode) + nodeIps, err := nodeInfo.getNodeExternalIpList(isNetworkPresent) if err != nil || len(nodeIps) == 0 { log.Error(err, "Getting NodeIP From kube-api-server") return []string{""}, err From d54ebc57ad059d998f7717f056e780dc16606037 Mon Sep 17 00:00:00 2001 From: Kranthi Kumar Date: Wed, 13 Nov 2024 18:23:23 +0530 Subject: [PATCH 22/23] Set custom Trivy DB repository in GitHub Action env --- .github/workflows/trivy.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index bd9a09831..fcee60d26 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -30,6 +30,8 @@ jobs: format: 'sarif' output: 'trivy-results.sarif' severity: 'CRITICAL' + env: + TRIVY_DB_REPOSITORY: "public.ecr.aws/aquasecurity/trivy-db" - name: Upload Trivy scan results to GitHub Security tab uses: github/codeql-action/upload-sarif@v2 From 5c889b8f7f2da5bc33bb51f24ae7882c43a98ea1 Mon Sep 17 00:00:00 2001 From: gourishkb <104021126+gourishkb@users.noreply.github.com> Date: Tue, 19 Nov 2024 11:00:49 +0530 Subject: [PATCH 23/23] fix(): Dockerfile golang 1.23.2 Signed-off-by: gourishkb <104021126+gourishkb@users.noreply.github.com> --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 3c5be5bf2..72166e07a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,7 @@ ########################################################## # Build the manager binary -FROM golang:1.23.1 as builder +FROM golang:1.23.2 as builder WORKDIR /workspace # Copy the Go Modules manifests