diff --git a/README.md b/README.md index b7f058bc..5eec035c 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,7 @@ kubectl apply -f https://raw.githubusercontent.com/abahmed/kwatch/v0.3.0/deploy/ |:-------------------------------------|:-----------------------------------------------------------------------------------------------------| | `maxRecentLogLines` | Optional Max tail log lines in messages, if it's not provided it will get all log lines | | `namespaces` | Optional list of namespaces that you want to watch, if it's not provided it will watch all namespaces| +| `ignoreFailedGracefulShutdown` | If set to true, containers which are forcefully killed during shutdown (as their graceful shutdown failed) are not reported as error | #### Slack diff --git a/controller/controller.go b/controller/controller.go index a5aae9b6..721455bd 100644 --- a/controller/controller.go +++ b/controller/controller.go @@ -23,13 +23,14 @@ import ( // Controller holds necessary type Controller struct { - name string - informer cache.Controller - indexer cache.Indexer - kclient kubernetes.Interface - queue workqueue.RateLimitingInterface - providers []provider.Provider - store storage.Storage + name string + informer cache.Controller + indexer cache.Indexer + kclient kubernetes.Interface + queue workqueue.RateLimitingInterface + providers []provider.Provider + store storage.Storage + ignoreFailedGracefulShutdown bool } // run starts the controller @@ -144,13 +145,24 @@ func (c *Controller) processPod(key string, pod *v1.Pod) { continue } - if (container.State.Waiting != nil && - container.State.Waiting.Reason == "ContainerCreating") || - (container.State.Waiting != nil && - container.State.Waiting.Reason == "PodInitializing") || - (container.State.Terminated != nil && - container.State.Terminated.Reason == "Completed") { - continue + if container.State.Waiting != nil { + switch { + case container.State.Waiting.Reason == "ContainerCreating": + continue + case container.State.Waiting.Reason == "PodInitializing": + continue + } + } else if container.State.Terminated != nil { + switch { + case container.State.Terminated.Reason == "Completed": + continue + case container.State.Terminated.ExitCode == 143: + // 143 is the exit code for graceful termination + continue + case container.State.Terminated.ExitCode == 0: + // 0 is the exit code for purpose stop + continue + } } // if reported, continue @@ -158,6 +170,12 @@ func (c *Controller) processPod(key string, pod *v1.Pod) { continue } + if c.ignoreFailedGracefulShutdown && util.ContainsKillingStoppingContainerEvents(c.kclient, pod.Name, pod.Namespace) { + // Graceful shutdown did not work and container was killed during shutdown. + // Not really an error + continue + } + logrus.Debugf( "processing container %s in pod %s@%s", container.Name, diff --git a/controller/start.go b/controller/start.go index 5866f5e5..d09c3f39 100644 --- a/controller/start.go +++ b/controller/start.go @@ -17,7 +17,7 @@ import ( ) // Start creates an instance of controller after initialization and runs it -func Start(providers []provider.Provider) { +func Start(providers []provider.Provider, ignoreFailedGracefulShutdown bool) { // create kubernetes client kclient := client.Create() @@ -67,13 +67,14 @@ func Start(providers []provider.Provider) { }, cache.Indexers{}) controller := Controller{ - name: "pod-crash", - informer: informer, - indexer: indexer, - queue: queue, - kclient: kclient, - providers: providers, - store: memory.NewMemory(), + name: "pod-crash", + informer: informer, + indexer: indexer, + queue: queue, + kclient: kclient, + providers: providers, + store: memory.NewMemory(), + ignoreFailedGracefulShutdown: ignoreFailedGracefulShutdown, } stopCh := make(chan struct{}) diff --git a/main.go b/main.go index 24636dd4..f3cbadee 100644 --- a/main.go +++ b/main.go @@ -38,5 +38,5 @@ func main() { go upgrader.CheckUpdates(providers) // start controller - controller.Start(providers) + controller.Start(providers, viper.GetBool("ignoreFailedGracefulShutdown")) } diff --git a/util/util.go b/util/util.go index 36fc9b02..0f8e587e 100644 --- a/util/util.go +++ b/util/util.go @@ -39,6 +39,24 @@ func GetPodEventsStr(c kubernetes.Interface, name, namespace string) string { return strings.TrimSpace(eventsString) } +// ContainsKillingStoppingContainerEvents checks if the events contain an event with "Killing Stopping container" which +// indicates that a container could not be gracefully shutdown +func ContainsKillingStoppingContainerEvents(c kubernetes.Interface, name, namespace string) bool { + events, err := getPodEvents(c, name, namespace) + if err != nil { + return false + } + + for _, ev := range events.Items { + if strings.ToLower(ev.Reason) == "killing" && + strings.Contains(strings.ToLower(ev.Message), "stopping container") { + return true + } + } + + return false +} + // GetPodContainerLogs returns logs for specified container in pod func GetPodContainerLogs( c kubernetes.Interface, name, container, namespace string,