Skip to content

Commit

Permalink
Merge pull request #33 from kubeslice/hotfix-worker-pods-timeout
Browse files Browse the repository at this point in the history
fix(): Increased timeout duration for slice-operator installation
  • Loading branch information
priyank-upadhyay authored Jan 12, 2023
2 parents b9ee345 + 961c1ef commit ad0a074
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 5 deletions.
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
// "github.com/spf13/cobra/doc"
)

var version = "0.4.1"
var version = "0.4.2"
var rootCmd = &cobra.Command{
Use: "kubeslice-cli",
Version: version,
Expand Down
8 changes: 5 additions & 3 deletions pkg/internal/kubernetes-operation.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const (
func PodVerification(message string, cluster Cluster, namespace string) {
var i = 0
var backoffCount = 0
var backoffLimit = 6
var backoffLimit = 20
for {
i = i + 1
time.Sleep(5 * time.Second)
Expand All @@ -33,11 +33,13 @@ func PodVerification(message string, cluster Cluster, namespace string) {
break
} else if status == PodVerificationStatusFailed {
backoffCount = backoffCount + 1
util.Printf("%s %s... Pod(s) in error state, waiting to recover... %d seconds elapsed", util.Wait, message, i*5)
if backoffCount > backoffLimit {
log.Fatalf("Pod(s) in error state\n%s", output)
log.Fatalf("Pod(s) in error state,\n%s", output)
}
} else {
util.Printf("%s %s... %d seconds elapsed", util.Wait, message, i*5)
}
util.Printf("%s %s... %d seconds elapsed", util.Wait, message, i*5)
}
}

Expand Down
29 changes: 28 additions & 1 deletion pkg/internal/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,35 @@ func UninstallKubeSliceWorker(ApplicationConfiguration *ConfigurationSpecs, work
time.Sleep(200 * time.Millisecond)
}

// Retry tries to execute the funtion, If failed reattempts till backoffLimit
func Retry(backoffLimit int, sleep time.Duration, f func() error) (err error) {
start := time.Now()
for i := 0; i < backoffLimit; i++ {
if i > 0 {
time.Sleep(sleep)
sleep *= 2
}
err = f()
if err == nil {
return nil
}
}
elapsed := time.Since(start)
return fmt.Errorf("retry failed after %d attempts (took %d seconds), last error: %s", backoffLimit, int(elapsed.Seconds()), err)
}

func generateWorkerValuesFile(cluster Cluster, valuesFile string, imagePullSecrets ImagePullSecrets, cc Cluster, projectName string) {
secrets := fetchSecret(cluster.Name, cc, projectName)
var secrets map[string]string
err := Retry(3, 1*time.Second, func() (err error) {
secrets = fetchSecret(cluster.Name, cc, projectName)
if secrets["namespace"] == "" || secrets["controllerEndpoint"] == "" || secrets["ca.crt"] == "" || secrets["token"] == "" {
return fmt.Errorf("secret is empty")
}
return nil
})
if err != nil {
log.Fatalf("Unable to fetch secrets\n%s", err)
}
util.DumpFile(fmt.Sprintf(workerValuesTemplate+generateImagePullSecretsValue(imagePullSecrets), secrets["namespace"], secrets["controllerEndpoint"], secrets["ca.crt"], secrets["token"], cluster.Name, cluster.NodeIP, cluster.ControlPlaneAddress), kubesliceDirectory+"/"+valuesFile)
}

Expand Down

0 comments on commit ad0a074

Please sign in to comment.