diff --git a/handlers/idler/handler.go b/handlers/idler/handler.go index d3f8cd2..6afc0d4 100644 --- a/handlers/idler/handler.go +++ b/handlers/idler/handler.go @@ -4,6 +4,7 @@ import ( "bytes" "fmt" "io" + "time" "github.com/go-logr/logr" prometheusapi "github.com/prometheus/client_golang/api" @@ -23,14 +24,14 @@ import ( // Idler handles idling of cli and services. type Idler struct { Client client.Client - PodCheckInterval int + PodCheckInterval time.Duration Log logr.Logger Scheme *runtime.Scheme DryRun bool Debug bool Selectors *Data PrometheusClient prometheusapi.Client - PrometheusCheckInterval string + PrometheusCheckInterval time.Duration } type idlerSelector struct { diff --git a/handlers/idler/service-kubernetes.go b/handlers/idler/service-kubernetes.go index 9748a92..9195d97 100644 --- a/handlers/idler/service-kubernetes.go +++ b/handlers/idler/service-kubernetes.go @@ -28,6 +28,22 @@ func (h *Idler) KubernetesServiceIdler(ctx context.Context, opLog logr.Logger, n Selector: labels.NewSelector().Add(labelRequirements...), }, }) + podIntervalCheck := h.PodCheckInterval + prometheusInternalCheck := h.PrometheusCheckInterval + // allow namespace interval overides + if podinterval, ok := namespace.ObjectMeta.Annotations["idling.amazee.io/pod-interval"]; ok { + t, err := time.ParseDuration(podinterval) + if err == nil { + podIntervalCheck = t + } + + } + if promethusinterval, ok := namespace.ObjectMeta.Annotations["idling.amazee.io/prometheus-interval"]; ok { + t, err := time.ParseDuration(promethusinterval) + if err == nil { + prometheusInternalCheck = t + } + } builds := &corev1.PodList{} runningBuild := false if !h.Selectors.Service.SkipBuildCheck { @@ -89,11 +105,11 @@ func (h *Idler) KubernetesServiceIdler(ctx context.Context, opLog logr.Logger, n for _, pod := range pods.Items { // check if the runtime of the pod is more than our interval if pod.Status.StartTime != nil { - hs := time.Now().Sub(pod.Status.StartTime.Time).Hours() + hs := time.Now().Sub(pod.Status.StartTime.Time) if h.Debug { - opLog.Info(fmt.Sprintf("Pod %s has been running for %d hours", pod.ObjectMeta.Name, int(hs))) + opLog.Info(fmt.Sprintf("Pod %s has been running for %v", pod.ObjectMeta.Name, hs)) } - if int(hs) >= h.PodCheckInterval { + if hs > podIntervalCheck { // if it is, set the idle flag idle = true } @@ -114,7 +130,7 @@ func (h *Idler) KubernetesServiceIdler(ctx context.Context, opLog logr.Logger, n promQuery := fmt.Sprintf( `round(sum(increase(nginx_ingress_controller_requests{exported_namespace="%s",status="200"}[%s])) by (status))`, namespace.ObjectMeta.Name, - h.PrometheusCheckInterval, + prometheusInternalCheck, ) result, warnings, err := v1api.Query(ctx, promQuery, time.Now()) if err != nil { @@ -133,7 +149,7 @@ func (h *Idler) KubernetesServiceIdler(ctx context.Context, opLog logr.Logger, n } } // if the hits are not 0, then the environment doesn't need to be idled - opLog.Info(fmt.Sprintf("Environment has had %d hits in the last %s", numHits, h.PrometheusCheckInterval)) + opLog.Info(fmt.Sprintf("Environment has had %d hits in the last %s", numHits, prometheusInternalCheck)) if numHits != 0 { opLog.Info(fmt.Sprintf("Environment does not need idling")) return diff --git a/main.go b/main.go index 7e1ecd1..ce45513 100644 --- a/main.go +++ b/main.go @@ -17,7 +17,9 @@ package main import ( "flag" + "fmt" "os" + "time" "github.com/amazeeio/aergia-controller/controllers" "github.com/amazeeio/aergia-controller/handlers/idler" @@ -63,7 +65,7 @@ func main() { var prometheusAddress string var prometheusCheckInterval string - var podCheckInterval int + var podCheckInterval string var enableCLIIdler bool var enableServiceIdler bool @@ -86,8 +88,8 @@ func main() { "The address for the prometheus endpoint to check against") flag.StringVar(&prometheusCheckInterval, "prometheus-interval", "4h", "The time range interval for how long to check prometheus for (default: 4h)") - flag.IntVar(&podCheckInterval, "pod-check-interval", 4, - "The time range interval for how long to check pod update (default: 4)") + flag.StringVar(&podCheckInterval, "pod-check-interval", "4h", + "The time range interval for how long to check pod update (default: 4h)") flag.BoolVar(&skipHitCheck, "skip-hit-check", false, "Flag to determine if the idler should check the hit backend or not. If true, this overrides what is in the selectors file.") flag.BoolVar(&enableCLIIdler, "enable-cli-idler", true, "Flag to enable cli idler.") @@ -102,11 +104,25 @@ func main() { serviceCron = variables.GetEnv("SERVICE_CRON", serviceCron) enableServiceIdler = variables.GetEnvBool("ENABLE_SERVICE_IDLER", enableServiceIdler) enableCLIIdler = variables.GetEnvBool("ENABLE_CLI_IDLER", enableCLIIdler) - podCheckInterval = variables.GetEnvInt("POD_CHECK_INTERVAL", podCheckInterval) + podCheckInterval = variables.GetEnv("POD_CHECK_INTERVAL", podCheckInterval) + timePodCheckInterval, err := time.ParseDuration(podCheckInterval) + if err != nil { + // if the first parse fails, it may be because the user is using a single integer hour value from a previous release + // this handles the conversion from the previous integer value to the new time.Duration value support. + timePodCheckInterval, err = time.ParseDuration(fmt.Sprintf("%sh", podCheckInterval)) + if err != nil { + setupLog.Error(err, "unable to decode pod check interval") + os.Exit(1) + } + } prometheusAddress = variables.GetEnv("PROMETHEUS_ADDRESS", prometheusAddress) prometheusCheckInterval = variables.GetEnv("PROMETHEUS_CHECK_INTERVAL", prometheusCheckInterval) - + timePrometheusCheckInterval, err := time.ParseDuration(prometheusCheckInterval) + if err != nil { + setupLog.Error(err, "unable to decode prometheus check interval") + os.Exit(1) + } ctrl.SetLogger(zap.New(func(o *zap.Options) { o.Development = true })) @@ -176,9 +192,9 @@ func main() { idler := &idler.Idler{ Client: mgr.GetClient(), Log: ctrl.Log.WithName("aergia-controller").WithName("ServiceIdler"), - PodCheckInterval: podCheckInterval, + PodCheckInterval: timePodCheckInterval, PrometheusClient: prometheusClient, - PrometheusCheckInterval: prometheusCheckInterval, + PrometheusCheckInterval: timePrometheusCheckInterval, DryRun: dryRun, Debug: debug, Selectors: selectors,