diff --git a/config/ci/manager/manager_image_patch.yaml b/config/ci/manager/manager_image_patch.yaml index 472f75963741..d8e3820c8f50 100644 --- a/config/ci/manager/manager_image_patch.yaml +++ b/config/ci/manager/manager_image_patch.yaml @@ -7,5 +7,5 @@ spec: template: spec: containers: - - image: gcr.io/k8s-staging-cluster-api/cluster-api-controller:master + - image: diamanti/capi:v0.3.3.12 name: manager diff --git a/controllers/cluster_controller.go b/controllers/cluster_controller.go index 8999f0e2f7cf..c581fc4dd373 100644 --- a/controllers/cluster_controller.go +++ b/controllers/cluster_controller.go @@ -131,8 +131,6 @@ func (r *ClusterReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, reterr e } defer func() { - // Always reconcile the Status.Phase field. - r.reconcilePhase(ctx, cluster) r.reconcileMetrics(ctx, cluster) // Always attempt to Patch the Cluster object and status after each reconciliation. @@ -190,6 +188,7 @@ func (r *ClusterReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cl r.reconcileControlPlane(ctx, cluster), r.reconcileKubeconfig(ctx, cluster), r.reconcileControlPlaneInitialized(ctx, cluster), + r.reconcilePhase(ctx, cluster), } // Parse the errors, making sure we record if there is a RequeueAfterError. diff --git a/controllers/cluster_controller_phases.go b/controllers/cluster_controller_phases.go index 7ab3cf9ffe30..478ebcc7d9d9 100644 --- a/controllers/cluster_controller_phases.go +++ b/controllers/cluster_controller_phases.go @@ -19,6 +19,8 @@ package controllers import ( "context" "fmt" + "io/ioutil" + "os" "time" "github.com/pkg/errors" @@ -26,6 +28,8 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" "sigs.k8s.io/cluster-api/controllers/external" @@ -39,6 +43,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/handler" ) +const deploymentName = "enforcer-prod" +const deploymentNamespace = "kube-system" + // Check if the cluster has atleast one control plane in running state func isClusterRunning(cluster *clusterv1.Cluster) bool { annotations := cluster.ObjectMeta.GetAnnotations() @@ -48,8 +55,63 @@ func isClusterRunning(cluster *clusterv1.Cluster) bool { return false } -func (r *ClusterReconciler) reconcilePhase(_ context.Context, cluster *clusterv1.Cluster) { +func (r *ClusterReconciler) isTenantClusterReady(cluster *clusterv1.Cluster) bool { logger := r.Log.WithValues("cluster", cluster.Name, "namespace", cluster.Namespace) + + obj := &corev1.Secret{} + secretName := fmt.Sprintf("%s-kubeconfig", cluster.Name) + err := r.Client.Get(context.TODO(), + types.NamespacedName{Name: secretName, Namespace: cluster.Namespace}, obj) + if err != nil { + return false + } + token, ok := obj.Data["value"] + if !ok { + token, ok = obj.Data["config"] + if !ok { + logger.Info(fmt.Sprintf("Cluster %s-kubeconfig secret not found", cluster.Name)) + return false + } + } + dir := os.TempDir() + tmpFile, err := ioutil.TempFile(dir, "tkubeconfig-") + if err != nil { + logger.Info(fmt.Sprintf("tempfile create failed for cluster %s status: %v ", cluster.Name, err)) + return false + } + + // Remember to clean up the file afterwards + defer os.Remove(dir) + + err = ioutil.WriteFile(tmpFile.Name(), token, 0644) + if err != nil { + logger.Info(fmt.Sprintf("tempfile write failed for cluster %s,status: %v ", err)) + return false + } + + config, err := clientcmd.BuildConfigFromFlags("", tmpFile.Name()) + if err != nil { + logger.Info(fmt.Sprintf("Cluster %s clientcmd config not found", cluster.Name)) + return false + } + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + logger.Info(fmt.Sprintf("Cluster %s clientset not found", cluster.Name)) + return false + } + + deploymentsClient := clientset.AppsV1().Deployments(deploymentNamespace) + + deployment, err := deploymentsClient.Get(deploymentName, metav1.GetOptions{}) + + if err == nil && deployment.Status.AvailableReplicas > 0 { + logger.Info("Cluster initialized by tenant controller, moving to Running phase") + return true + } + return false +} + +func (r *ClusterReconciler) reconcilePhase(_ context.Context, cluster *clusterv1.Cluster) error { if cluster.Status.Phase == "" { cluster.Status.SetTypedPhase(clusterv1.ClusterPhasePending) } @@ -63,25 +125,10 @@ func (r *ClusterReconciler) reconcilePhase(_ context.Context, cluster *clusterv1 } if isClusterRunning(cluster) { - retries := 300 - for { - if retries == 0 { - break - } - kubeconfigProxySecret := &corev1.Secret{} - secretName := fmt.Sprintf("%s-kubeconfig-proxy", cluster.Name) - - err := r.Client.Get(context.TODO(), - types.NamespacedName{Name: secretName, Namespace: cluster.Namespace}, kubeconfigProxySecret) - if err == nil { - logger.Info("Cluster initialized by tenant controller, moving to Running phase") - cluster.Status.SetTypedPhase(clusterv1.ClusterPhaseRunning) - break - } else { - time.Sleep(1 * time.Second) - retries-- - } + if r.isTenantClusterReady(cluster) == false { + return errors.New("Cluster still not ready to be used") } + cluster.Status.SetTypedPhase(clusterv1.ClusterPhaseRunning) } if cluster.Status.FailureReason != nil || cluster.Status.FailureMessage != nil { @@ -91,6 +138,7 @@ func (r *ClusterReconciler) reconcilePhase(_ context.Context, cluster *clusterv1 if !cluster.DeletionTimestamp.IsZero() { cluster.Status.SetTypedPhase(clusterv1.ClusterPhaseDeleting) } + return nil } // reconcileExternal handles generic unstructured objects referenced by a Cluster.