Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow to ignore if a container has to forcefully shutdown due to graceful shutdown not working #58

Merged
merged 5 commits into from
Jan 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ kubectl apply -f https://raw.githubusercontent.com/abahmed/kwatch/v0.3.0/deploy/
|:-------------------------------------|:-----------------------------------------------------------------------------------------------------|
| `maxRecentLogLines` | Optional Max tail log lines in messages, if it's not provided it will get all log lines |
| `namespaces` | Optional list of namespaces that you want to watch, if it's not provided it will watch all namespaces|
| `ignoreFailedGracefulShutdown` | If set to true, containers which are forcefully killed during shutdown (as their graceful shutdown failed) are not reported as error |


#### Slack
Expand Down
46 changes: 32 additions & 14 deletions controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@ import (

// Controller holds necessary
type Controller struct {
name string
informer cache.Controller
indexer cache.Indexer
kclient kubernetes.Interface
queue workqueue.RateLimitingInterface
providers []provider.Provider
store storage.Storage
name string
informer cache.Controller
indexer cache.Indexer
kclient kubernetes.Interface
queue workqueue.RateLimitingInterface
providers []provider.Provider
store storage.Storage
ignoreFailedGracefulShutdown bool
}

// run starts the controller
Expand Down Expand Up @@ -144,20 +145,37 @@ func (c *Controller) processPod(key string, pod *v1.Pod) {
continue
}

if (container.State.Waiting != nil &&
container.State.Waiting.Reason == "ContainerCreating") ||
(container.State.Waiting != nil &&
container.State.Waiting.Reason == "PodInitializing") ||
(container.State.Terminated != nil &&
container.State.Terminated.Reason == "Completed") {
continue
if container.State.Waiting != nil {
switch {
case container.State.Waiting.Reason == "ContainerCreating":
continue
case container.State.Waiting.Reason == "PodInitializing":
continue
}
} else if container.State.Terminated != nil {
switch {
case container.State.Terminated.Reason == "Completed":
continue
case container.State.Terminated.ExitCode == 143:
// 143 is the exit code for graceful termination
continue
case container.State.Terminated.ExitCode == 0:
// 0 is the exit code for purpose stop
continue
}
}

// if reported, continue
if c.store.HasPodContainer(key, container.Name) {
continue
}

if c.ignoreFailedGracefulShutdown && util.ContainsKillingStoppingContainerEvents(c.kclient, pod.Name, pod.Namespace) {
// Graceful shutdown did not work and container was killed during shutdown.
// Not really an error
continue
}

logrus.Debugf(
"processing container %s in pod %s@%s",
container.Name,
Expand Down
17 changes: 9 additions & 8 deletions controller/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (
)

// Start creates an instance of controller after initialization and runs it
func Start(providers []provider.Provider) {
func Start(providers []provider.Provider, ignoreFailedGracefulShutdown bool) {
// create kubernetes client
kclient := client.Create()

Expand Down Expand Up @@ -67,13 +67,14 @@ func Start(providers []provider.Provider) {
}, cache.Indexers{})

controller := Controller{
name: "pod-crash",
informer: informer,
indexer: indexer,
queue: queue,
kclient: kclient,
providers: providers,
store: memory.NewMemory(),
name: "pod-crash",
informer: informer,
indexer: indexer,
queue: queue,
kclient: kclient,
providers: providers,
store: memory.NewMemory(),
ignoreFailedGracefulShutdown: ignoreFailedGracefulShutdown,
}

stopCh := make(chan struct{})
Expand Down
2 changes: 1 addition & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,5 @@ func main() {
go upgrader.CheckUpdates(providers)

// start controller
controller.Start(providers)
controller.Start(providers, viper.GetBool("ignoreFailedGracefulShutdown"))
}
18 changes: 18 additions & 0 deletions util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,24 @@ func GetPodEventsStr(c kubernetes.Interface, name, namespace string) string {
return strings.TrimSpace(eventsString)
}

// ContainsKillingStoppingContainerEvents checks if the events contain an event with "Killing Stopping container" which
// indicates that a container could not be gracefully shutdown
func ContainsKillingStoppingContainerEvents(c kubernetes.Interface, name, namespace string) bool {
events, err := getPodEvents(c, name, namespace)
if err != nil {
return false
}

for _, ev := range events.Items {
if strings.ToLower(ev.Reason) == "killing" &&
strings.Contains(strings.ToLower(ev.Message), "stopping container") {
return true
}
}

return false
}

// GetPodContainerLogs returns logs for specified container in pod
func GetPodContainerLogs(
c kubernetes.Interface, name, container, namespace string,
Expand Down