Skip to content

Commit

Permalink
Fix pods rolling restart when number of replicas changed (#25)
Browse files Browse the repository at this point in the history
* Remove filtered annotations from Pod Template Spec

Annotation change cause changes of controller-hash-revision of the
StatefulSet. It caused unnesesary restars of all pods.

"kubectl.kubernetes.io/last-applied-configuration" is updated with every
update of the HBase CRD including just replicas count update. It
should not be included into pod temlpate metadata to avoid unnesesary
pods restart.

ensureStatefulSet function logging is more clear.

Minor changes with logging messages and test fixes.

* Fix typos, detailed logging for # of pod replicas
  • Loading branch information
mihkulemin authored May 29, 2023
1 parent 8a814c9 commit d0b77bf
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 39 deletions.
80 changes: 43 additions & 37 deletions controllers/hbase_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,32 +90,33 @@ type HBaseReconciler struct {
// +kubebuilder:rbac:groups="apps",namespace="hbase",resources=statefulsets,verbs=*

func (r *HBaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := r.Log.WithValues("hbase", req.NamespacedName)
log := r.Log.WithValues("hbase_resource", req.NamespacedName)

log.Info("got request")
log.Info("Start reconciliation")

// Fetch the App instance.
app := &hbasev1.HBase{}
err := r.Get(ctx, req.NamespacedName, app)
if err != nil {
if errors.IsNotFound(err) {
r.Log.Error(err, "hbase crd is not found")
r.Log.Error(err, "HBase CRD is not found")
return ctrl.Result{}, nil
}
r.Log.Error(err, "failed getting hbase crd")
r.Log.Error(err, "Failed getting HBase CRD")
return ctrl.Result{}, err
}

log.Info("got hbase crd")
log.Info("got HBase CRD")

serviceOk, err := r.ensureService(app)
if err != nil {
return ctrl.Result{}, err
}
if !serviceOk {
log.Info("HBase service reconfigured, reconciling again")
return ctrl.Result{Requeue: true}, nil
}
log.Info("hbase headless service is in sync")
log.Info("HBase headless service is in sync")

// deploy configmap if it doesn't exist
configMapName := getConfigMapName(app)
Expand All @@ -124,35 +125,36 @@ func (r *HBaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
return ctrl.Result{}, err
}
if !cmOk {
log.Info("HBase ConfigMap reconfigured, reconciling again")
return ctrl.Result{Requeue: true}, nil
}
log.Info("hbase configmap is in sync")
log.Info("HBase ConfigMap is in sync")

// update hbasemaster statefulset
masterName := types.NamespacedName{Name: "hbasemaster", Namespace: app.Namespace}
masterSts, masterUpdated, err := r.ensureStatefulSet(app, masterName, configMapName, app.Spec.MasterSpec)
if err != nil {
r.Log.Error(err, "failed reconciling HBase Master StatefulSet")
r.Log.Error(err, "Failed reconciling HBase Master StatefulSet")
return ctrl.Result{}, err
}
if masterUpdated {
log.Info("hbase master statefulset updated")
log.Info("HBase Master StatefulSet updated, reconciling again")
return ctrl.Result{Requeue: true}, nil
}
log.Info("hbasemaster statefulset is in sync")
log.Info("HBase Master StatefulSet is in sync")

// update regionserver statefulset
rsName := types.NamespacedName{Name: "regionserver", Namespace: app.Namespace}
rsSts, rsUpdated, err := r.ensureStatefulSet(app, rsName, configMapName, app.Spec.RegionServerSpec)
if err != nil {
r.Log.Error(err, "failed reconciling HBase RegionServer StatefulSet")
r.Log.Error(err, "Failed reconciling HBase RegionServer StatefulSet")
return ctrl.Result{}, err
}
if rsUpdated {
log.Info("hbase regionserver statefulset updated")
log.Info("HBase RegionServer StatefulSet updated")
return ctrl.Result{Requeue: true}, nil
}
log.Info("regionserver statefulset is in sync")
log.Info("RegionServer StatefulSet is in sync")

// make sure there are no regions in transition.
// we want this to happen after we've deployed all manifests in order to
Expand All @@ -162,35 +164,37 @@ func (r *HBaseReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
return ctrl.Result{}, fmt.Errorf("failed to get regions in transition: %v", err)
}
if rit != 0 {
log.Info("there are regions in transition, wait", "regions", rit)
log.Info("There are regions in transition, wait and restart reconciling", "regions", rit)
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
}
log.Info("there are no regions in transition")
log.Info("There are no regions in transition")

r.Log.Info("reconciling masters")
r.Log.Info("Reconciling Master pods")
mastersOk, err := r.ensureStatefulSetPods(ctx, masterSts, r.pickMasterToDelete)
if err != nil {
r.Log.Error(err, "failed reconciling HBase Masters")
r.Log.Error(err, "Failed reconciling HBase Master pods")
return ctrl.Result{}, err
}
if !mastersOk {
return ctrl.Result{RequeueAfter: 15 * time.Second}, nil
}

r.Log.Info("reconciling regionservers")
r.Log.Info("Reconciling RegionServer Pods")
rsOk, err := r.ensureStatefulSetPods(ctx, rsSts, r.pickRegionServerToDelete)
if err != nil {
r.Log.Error(err, "failed reconciling HBase RegionServers")
r.Log.Error(err, "Failed reconciling HBase RegionServer pods")
return ctrl.Result{}, err
}
if !rsOk {
return ctrl.Result{RequeueAfter: 15 * time.Second}, nil
}

r.Log.Info("Deleting unused config maps")
if err := r.deleteUnusedConfigMaps(ctx, app, configMapName); err != nil {
return ctrl.Result{}, err
}

r.Log.Info("Everything is up to date!")
return ctrl.Result{}, nil
}

Expand Down Expand Up @@ -550,7 +554,7 @@ func (r *HBaseReconciler) ensureStatefulSetPods(ctx context.Context, sts *appsv1
}
}

r.Log.Info("pick pod to delete", "statefulset", sts.Name, "pods", sprintPodList(toDelete))
r.Log.Info("pick pod to delete", "StatefulSet", sts.Name, "pods", sprintPodList(toDelete))

// delete one pod at a time
p, err := pickToDelete(ctx, toDelete, upToDate)
Expand All @@ -563,7 +567,7 @@ func (r *HBaseReconciler) ensureStatefulSetPods(ctx context.Context, sts *appsv1
return false, r.Delete(ctx, p)
}

r.Log.Info("statefulsets pods are reconciled")
r.Log.Info("pods are up to date", "StatefulSet", sts.Name)
// all is perfect, ensured
return true, nil

Expand All @@ -587,24 +591,29 @@ func (r *HBaseReconciler) ensureStatefulSet(hb *hbasev1.HBase,
}
return nil, false, err
}
r.Log.Info("updating revision", "sameRevision", expectedRevision != actual.Annotations[HBaseControllerRevisionKey])
r.Log.Info("updating replica count", "sameReplicaCount", *actual.Spec.Replicas != *expected.Spec.Replicas)

if actual.Annotations != nil &&
actual.Annotations[HBaseControllerRevisionKey] == expectedRevision &&
*actual.Spec.Replicas == *expected.Spec.Replicas {
r.Log.Info("StatefulSet is up-to-date", "name", stsName)
return actual, false, nil
var actualRevision string
if actual.Annotations != nil {
actualRevision = actual.Annotations[HBaseControllerRevisionKey]
}

// update
if err := r.Update(context.TODO(), expected); err != nil {
r.Log.Info("StatefulSet is up-to-date", "name", stsName)
return nil, false, err
r.Log.Info("StatefulSet reconciliation",
"name", stsName,
"revision is up to date", actualRevision == expectedRevision,
"expected replicas", *actual.Spec.Replicas,
"actual replicas", *expected.Spec.Replicas)

if *actual.Spec.Replicas != *expected.Spec.Replicas || actualRevision != expectedRevision {
// Update StatefulSet to expected configuration
if err := r.Update(context.TODO(), expected); err != nil {
return nil, false, err
}
r.Log.Info("updated StatefulSet", "name", stsName)
return expected, true, nil
}
r.Log.Info("updated StatefulSet", "name", stsName)
return expected, true, nil

r.Log.Info("StatefulSet is up to date", "name", stsName)
return actual, false, nil
}

func configMapVolume(cmName types.NamespacedName) corev1.Volume {
Expand Down Expand Up @@ -688,9 +697,6 @@ func (r *HBaseReconciler) statefulSet(hb *hbasev1.HBase,
// After revision hash calculation, actually update replica count
stsSpec.Replicas = pointer.Int32Ptr(ss.Count)

// Restore metadata annotations without filtering
stsSpec.Template.ObjectMeta.Annotations = templateMetadataAnnotations

return &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: stsName.Name,
Expand Down
4 changes: 2 additions & 2 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ var _ = Describe("HBase controller", func() {
getExistingSts("hbasemaster", namespace, createdMasterStatefulSet)

By("By checking master statefulset has correct number of replicas")
Ω(hb.Spec.MasterSpec.Count).Should(Equal(int32(2)))
Ω(*createdMasterStatefulSet.Spec.Replicas).Should(Equal(int32(2)))

By("By checking master statefulset has mounted correct confgmap")
vs := createdMasterStatefulSet.Spec.Template.Spec.Volumes
Expand All @@ -263,7 +263,7 @@ var _ = Describe("HBase controller", func() {
getExistingSts("regionserver", namespace, createdRegionServerStatefulSet)

By("By checking regionserver statefulset has correct number of replicas")
Ω(hb.Spec.RegionServerSpec.Count).Should(Equal(int32(3)))
Ω(*createdRegionServerStatefulSet.Spec.Replicas).Should(Equal(int32(3)))

By("By checking regionserver statefulset has mounted correct confgmap")
vs = createdRegionServerStatefulSet.Spec.Template.Spec.Volumes
Expand Down

0 comments on commit d0b77bf

Please sign in to comment.