diff --git a/cmd/main.go b/cmd/main.go index 702cae6ce..acf62c376 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -226,6 +226,7 @@ func main() { Client: mgr.GetClient(), Scheme: mgr.GetScheme(), Config: mgr.GetConfig(), + DynamicClient: dc, SystemNamespace: currentNamespace, CreateTemplateManagement: createTemplateManagement, }).SetupWithManager(mgr); err != nil { diff --git a/internal/controller/managedcluster_controller.go b/internal/controller/managedcluster_controller.go index ec5fe2a4d..c8d5840a5 100644 --- a/internal/controller/managedcluster_controller.go +++ b/internal/controller/managedcluster_controller.go @@ -33,9 +33,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/dynamic" "k8s.io/client-go/rest" @@ -53,6 +51,7 @@ import ( "github.com/Mirantis/hmc/internal/helm" "github.com/Mirantis/hmc/internal/sveltos" "github.com/Mirantis/hmc/internal/telemetry" + "github.com/Mirantis/hmc/internal/utils/status" ) const ( @@ -96,7 +95,8 @@ func (r *ManagedClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reque l.Error(err, "Failed to get Management object") return ctrl.Result{}, err } - if err := telemetry.TrackManagedClusterCreate(string(mgmt.UID), string(managedCluster.UID), managedCluster.Spec.Template, managedCluster.Spec.DryRun); err != nil { + if err := telemetry.TrackManagedClusterCreate( + string(mgmt.UID), string(managedCluster.UID), managedCluster.Spec.Template, managedCluster.Spec.DryRun); err != nil { l.Error(err, "Failed to track ManagedCluster creation") } } @@ -104,52 +104,27 @@ func (r *ManagedClusterReconciler) Reconcile(ctx context.Context, req ctrl.Reque return r.Update(ctx, managedCluster) } -func (r *ManagedClusterReconciler) setStatusFromClusterStatus(ctx context.Context, managedCluster *hmc.ManagedCluster) (requeue bool, _ error) { +func (r *ManagedClusterReconciler) setStatusFromClusterStatus( + ctx context.Context, managedCluster *hmc.ManagedCluster, +) (bool, error) { l := ctrl.LoggerFrom(ctx) - resourceID := schema.GroupVersionResource{ + resourceConditions, err := status.GetResourceConditions(ctx, managedCluster.Namespace, r.DynamicClient, schema.GroupVersionResource{ Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "clusters", - } - - list, err := r.DynamicClient.Resource(resourceID).Namespace(managedCluster.Namespace).List(ctx, metav1.ListOptions{ - LabelSelector: labels.SelectorFromSet(map[string]string{hmc.FluxHelmChartNameKey: managedCluster.Name}).String(), - }) - - if apierrors.IsNotFound(err) || len(list.Items) == 0 { - l.Info("Clusters not found, ignoring since object must be deleted or not yet created") - return true, nil - } - - if err != nil { - return true, fmt.Errorf("failed to get cluster information for managedCluster %s in namespace: %s: %w", - managedCluster.Namespace, managedCluster.Name, err) - } - conditions, found, err := unstructured.NestedSlice(list.Items[0].Object, "status", "conditions") + }, labels.SelectorFromSet(map[string]string{hmc.FluxHelmChartNameKey: managedCluster.Name}).String()) if err != nil { - return true, fmt.Errorf("failed to get cluster information for managedCluster %s in namespace: %s: %w", - managedCluster.Namespace, managedCluster.Name, err) - } - if !found { - return true, fmt.Errorf("failed to get cluster information for managedCluster %s in namespace: %s: status.conditions not found", - managedCluster.Namespace, managedCluster.Name) + notFoundErr := status.ResourceNotFoundError{} + if errors.As(err, ¬FoundErr) { + l.Info(err.Error()) + return true, nil + } + return false, fmt.Errorf("failed to get conditions: %w", err) } allConditionsComplete := true - for _, condition := range conditions { - conditionMap, ok := condition.(map[string]any) - if !ok { - return true, fmt.Errorf("failed to cast condition to map[string]any for managedCluster: %s in namespace: %s: %w", - managedCluster.Namespace, managedCluster.Name, err) - } - - var metaCondition metav1.Condition - if err := runtime.DefaultUnstructuredConverter.FromUnstructured(conditionMap, &metaCondition); err != nil { - return true, fmt.Errorf("failed to convert unstructured conditions to metav1.Condition for managedCluster %s in namespace: %s: %w", - managedCluster.Namespace, managedCluster.Name, err) - } - + for _, metaCondition := range resourceConditions.Conditions { if metaCondition.Status != "True" { allConditionsComplete = false } diff --git a/internal/controller/management_controller.go b/internal/controller/management_controller.go index 5d4b324a9..e2ade51c1 100644 --- a/internal/controller/management_controller.go +++ b/internal/controller/management_controller.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "slices" + "strings" fluxv2 "github.com/fluxcd/helm-controller/api/v2" "github.com/fluxcd/pkg/apis/meta" @@ -30,6 +31,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" "k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -39,6 +42,7 @@ import ( "github.com/Mirantis/hmc/internal/certmanager" "github.com/Mirantis/hmc/internal/helm" "github.com/Mirantis/hmc/internal/utils" + "github.com/Mirantis/hmc/internal/utils/status" ) // Those are only needed for the initial installation @@ -56,6 +60,7 @@ type ManagementReconciler struct { Scheme *runtime.Scheme Config *rest.Config SystemNamespace string + DynamicClient *dynamic.DynamicClient CreateTemplateManagement bool } @@ -154,6 +159,15 @@ func (r *ManagementReconciler) Update(ctx context.Context, management *hmc.Manag errs = errors.Join(errs, errors.New(errMsg)) continue } + + if component.Template != hmc.CoreHMCName { + if err := r.checkProviderStatus(ctx, component.Template); err != nil { + updateComponentsStatus(detectedComponents, &detectedProviders, detectedContracts, component.helmReleaseName, component.Template, template.Status.Providers, template.Status.CAPIContracts, err.Error()) + errs = errors.Join(errs, err) + continue + } + } + updateComponentsStatus(detectedComponents, &detectedProviders, detectedContracts, component.helmReleaseName, component.Template, template.Status.Providers, template.Status.CAPIContracts, "") } @@ -206,6 +220,58 @@ func (r *ManagementReconciler) ensureTemplateManagement(ctx context.Context, mgm return fmt.Errorf("failed to create %s TemplateManagement object: %w", hmc.TemplateManagementName, err) } l.Info("Successfully created TemplateManagement object") + + return nil +} + +// checkProviderStatus checks the status of a provider associated with a given +// ProviderTemplate name. Since there's no way to determine resource Kind from +// the given template iterate over all possible provider types. +func (r *ManagementReconciler) checkProviderStatus(ctx context.Context, providerTemplateName string) error { + var errs error + + for _, resourceType := range []string{ + "coreproviders", + "infrastructureproviders", + "controlplaneproviders", + "bootstrapproviders", + } { + gvr := schema.GroupVersionResource{ + Group: "operator.cluster.x-k8s.io", + Version: "v1alpha2", + Resource: resourceType, + } + + resourceConditions, err := status.GetResourceConditions(ctx, r.SystemNamespace, r.DynamicClient, gvr, + labels.SelectorFromSet(map[string]string{hmc.FluxHelmChartNameKey: providerTemplateName}).String(), + ) + if err != nil { + notFoundErr := status.ResourceNotFoundError{} + if errors.As(err, ¬FoundErr) { + // Check the next resource type. + continue + } + + return fmt.Errorf("failed to get status for template: %s: %w", providerTemplateName, err) + } + + var falseConditionTypes []string + for _, condition := range resourceConditions.Conditions { + if condition.Status != metav1.ConditionTrue { + falseConditionTypes = append(falseConditionTypes, condition.Type) + } + } + + if len(falseConditionTypes) > 0 { + errs = errors.Join(fmt.Errorf("%s: %s is not yet ready, has false conditions: %s", + resourceConditions.Name, resourceConditions.Kind, strings.Join(falseConditionTypes, ", "))) + } + } + + if errs != nil { + return errs + } + return nil } diff --git a/internal/utils/status/status.go b/internal/utils/status/status.go new file mode 100644 index 000000000..389005f28 --- /dev/null +++ b/internal/utils/status/status.go @@ -0,0 +1,126 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package status + +import ( + "context" + "fmt" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" +) + +// ConditionsFromUnstructured fetches all of the status.conditions from an +// unstructured object and returns them as a slice of metav1.Condition. The +// status.conditions field is expected to be a slice of map[string]any +// which can be cast into a metav1.Condition. +func ConditionsFromUnstructured(unstrObj *unstructured.Unstructured) ([]metav1.Condition, error) { + objKind, objName := ObjKindName(unstrObj) + + // Iterate the status conditions and ensure each condition reports a "Ready" + // status. + unstrConditions, found, err := unstructured.NestedSlice(unstrObj.Object, "status", "conditions") + if !found { + return nil, fmt.Errorf("no status conditions found for %s: %s", objKind, objName) + } + if err != nil { + return nil, fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) + } + + conditions := make([]metav1.Condition, 0, len(unstrConditions)) + + for _, condition := range unstrConditions { + conditionMap, ok := condition.(map[string]any) + if !ok { + return nil, fmt.Errorf("expected %s: %s condition to be type map[string]any, got: %T", + objKind, objName, conditionMap) + } + + var c *metav1.Condition + + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(conditionMap, &c); err != nil { + return nil, fmt.Errorf("failed to convert condition map to metav1.Condition: %w", err) + } + + conditions = append(conditions, *c) + } + + return conditions, nil +} + +type ResourceNotFoundError struct { + Resource string +} + +func (e ResourceNotFoundError) Error() string { + return fmt.Sprintf("no %s found, ignoring since object must be deleted or not yet created", e.Resource) +} + +type ResourceConditions struct { + Kind string + Name string + Conditions []metav1.Condition +} + +// GetResourceConditions fetches the conditions from a resource identified by +// the provided GroupVersionResource and labelSelector. The function returns +// a ResourceConditions struct containing the name/kind of the resource +// and the conditions. +// If the resource is not found, returns a ResourceNotFoundError which can be +// checked by the caller to prevent reconciliation loops. +func GetResourceConditions( + ctx context.Context, namespace string, dynamicClient dynamic.Interface, + gvr schema.GroupVersionResource, labelSelector string, +) (resourceConditions *ResourceConditions, err error) { + list, err := dynamicClient.Resource(gvr).Namespace(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: labelSelector, Limit: 2, + }) + if err != nil { + if apierrors.IsNotFound(err) { + return nil, ResourceNotFoundError{Resource: gvr.Resource} + } + + return nil, fmt.Errorf("failed to list %s: %w", gvr.Resource, err) + } + + if len(list.Items) == 0 { + return nil, ResourceNotFoundError{Resource: gvr.Resource} + } + + if len(list.Items) > 1 { + return nil, fmt.Errorf("expected to find only one of resource: %s with label: %q, found: %d", + gvr.Resource, labelSelector, len(list.Items)) + } + + kind, name := ObjKindName(&list.Items[0]) + conditions, err := ConditionsFromUnstructured(&list.Items[0]) + if err != nil { + return nil, fmt.Errorf("failed to get conditions: %w", err) + } + + return &ResourceConditions{ + Kind: kind, + Name: name, + Conditions: conditions, + }, nil +} + +func ObjKindName(unstrObj *unstructured.Unstructured) (name, kind string) { + return unstrObj.GetKind(), unstrObj.GetName() +} diff --git a/templates/provider/hmc/templates/rbac/controller/roles.yaml b/templates/provider/hmc/templates/rbac/controller/roles.yaml index 446058fed..9334495ee 100644 --- a/templates/provider/hmc/templates/rbac/controller/roles.yaml +++ b/templates/provider/hmc/templates/rbac/controller/roles.yaml @@ -5,6 +5,16 @@ metadata: labels: {{- include "hmc.labels" . | nindent 4 }} rules: +- apiGroups: + - operator.cluster.x-k8s.io + resources: + - coreproviders + - infrastructureproviders + - bootstrapproviders + - controlplaneproviders + verbs: + - get + - list - apiGroups: - cluster.x-k8s.io resources: diff --git a/test/e2e/kubeclient/kubeclient.go b/test/e2e/kubeclient/kubeclient.go index 089dce519..e3801e4e0 100644 --- a/test/e2e/kubeclient/kubeclient.go +++ b/test/e2e/kubeclient/kubeclient.go @@ -33,7 +33,7 @@ import ( "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" - "github.com/Mirantis/hmc/test/utils" + "github.com/Mirantis/hmc/internal/utils/status" ) type KubeClient struct { @@ -162,7 +162,7 @@ func (kc *KubeClient) CreateOrUpdateUnstructuredObject(gvr schema.GroupVersionRe client := kc.GetDynamicClient(gvr, namespaced) - kind, name := utils.ObjKindName(obj) + kind, name := status.ObjKindName(obj) resp, err := client.Get(context.Background(), name, metav1.GetOptions{}) if apierrors.IsNotFound(err) { diff --git a/test/e2e/managedcluster/validate_deleted.go b/test/e2e/managedcluster/validate_deleted.go index 582c27623..e09d4c254 100644 --- a/test/e2e/managedcluster/validate_deleted.go +++ b/test/e2e/managedcluster/validate_deleted.go @@ -22,6 +22,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "github.com/Mirantis/hmc/internal/utils/status" "github.com/Mirantis/hmc/test/e2e/kubeclient" "github.com/Mirantis/hmc/test/utils" ) @@ -44,7 +45,7 @@ func validateClusterDeleted(ctx context.Context, kc *kubeclient.KubeClient, clus return fmt.Errorf("cluster: %q exists, but is not in 'Deleting' phase", clusterName) } - conditions, err := utils.GetConditionsFromUnstructured(cluster) + conditions, err := status.ConditionsFromUnstructured(cluster) if err != nil { return fmt.Errorf("failed to get conditions from unstructured object: %w", err) } diff --git a/test/e2e/managedcluster/validate_deployed.go b/test/e2e/managedcluster/validate_deployed.go index abb8c9511..bae823f75 100644 --- a/test/e2e/managedcluster/validate_deployed.go +++ b/test/e2e/managedcluster/validate_deployed.go @@ -27,6 +27,7 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/util/intstr" + "github.com/Mirantis/hmc/internal/utils/status" "github.com/Mirantis/hmc/test/e2e/kubeclient" "github.com/Mirantis/hmc/test/utils" ) @@ -108,12 +109,12 @@ func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, cl Fail(err.Error()) } - objKind, objName := utils.ObjKindName(&controlPlane) + objKind, objName := status.ObjKindName(&controlPlane) // k0s does not use the metav1.Condition type for status.conditions, // instead it uses a custom type so we can't use // ValidateConditionsTrue here, instead we'll check for "ready: true". - status, found, err := unstructured.NestedFieldCopy(controlPlane.Object, "status") + objStatus, found, err := unstructured.NestedFieldCopy(controlPlane.Object, "status") if !found { return fmt.Errorf("no status found for %s: %s", objKind, objName) } @@ -121,9 +122,9 @@ func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, cl return fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) } - st, ok := status.(map[string]any) + st, ok := objStatus.(map[string]any) if !ok { - return fmt.Errorf("expected K0sControlPlane condition to be type map[string]any, got: %T", status) + return fmt.Errorf("expected K0sControlPlane condition to be type map[string]any, got: %T", objStatus) } if _, ok := st["ready"]; !ok { diff --git a/test/utils/utils.go b/test/utils/utils.go index f158af7ba..56ce5ee33 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -24,7 +24,8 @@ import ( . "github.com/onsi/ginkgo/v2" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime" + + "github.com/Mirantis/hmc/internal/utils/status" ) const ( @@ -114,9 +115,9 @@ func GetProjectDir() (string, error) { // unstructured object and returns an error if any of the conditions are not // true. Conditions are expected to be of type metav1.Condition. func ValidateConditionsTrue(unstrObj *unstructured.Unstructured) error { - objKind, objName := ObjKindName(unstrObj) + objKind, objName := status.ObjKindName(unstrObj) - conditions, err := GetConditionsFromUnstructured(unstrObj) + conditions, err := status.ConditionsFromUnstructured(unstrObj) if err != nil { return fmt.Errorf("failed to get conditions from unstructured object: %w", err) } @@ -143,43 +144,9 @@ func ConvertConditionsToString(condition metav1.Condition) string { condition.Type, condition.Status, condition.Reason, condition.Message) } -func GetConditionsFromUnstructured(unstrObj *unstructured.Unstructured) ([]metav1.Condition, error) { - objKind, objName := ObjKindName(unstrObj) - - // Iterate the status conditions and ensure each condition reports a "Ready" - // status. - unstrConditions, found, err := unstructured.NestedSlice(unstrObj.Object, "status", "conditions") - if !found { - return nil, fmt.Errorf("no status conditions found for %s: %s", objKind, objName) - } - if err != nil { - return nil, fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) - } - - conditions := make([]metav1.Condition, 0, len(unstrConditions)) - - for _, condition := range unstrConditions { - conditionMap, ok := condition.(map[string]any) - if !ok { - return nil, fmt.Errorf("expected %s: %s condition to be type map[string]any, got: %T", - objKind, objName, conditionMap) - } - - var c *metav1.Condition - - if err := runtime.DefaultUnstructuredConverter.FromUnstructured(conditionMap, &c); err != nil { - return nil, fmt.Errorf("failed to convert condition map to metav1.Condition: %w", err) - } - - conditions = append(conditions, *c) - } - - return conditions, nil -} - // ValidateObjectNamePrefix checks if the given object name has the given prefix. func ValidateObjectNamePrefix(unstrObj *unstructured.Unstructured, clusterName string) error { - objKind, objName := ObjKindName(unstrObj) + objKind, objName := status.ObjKindName(unstrObj) // Verify the machines are prefixed with the cluster name and fail // the test if they are not. @@ -190,10 +157,6 @@ func ValidateObjectNamePrefix(unstrObj *unstructured.Unstructured, clusterName s return nil } -func ObjKindName(unstrObj *unstructured.Unstructured) (kind, name string) { - return unstrObj.GetKind(), unstrObj.GetName() -} - func WarnError(err error) { _, _ = fmt.Fprintf(GinkgoWriter, "Warning: %v\n", err) }