Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat] Added Google Chat Integration #42

Open
wants to merge 28 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
7 changes: 7 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
GOOGLECHAT_WEBHOOK_URL=https://chat.googleapis.com/v1/spaces/SPACE_ID/messages?key=KEY&token=TOKEN
USE_GOOGLE_CHAT=true
CLUSTER_NAME=minikube
MUTE_SECONDS=600
IGNORE_RESTART_COUNT=5
SLACK_WEBHOOK_URL=https://hooks.slack.com/services/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
SLACK_CHANNEL=test
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.env
27 changes: 20 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
FROM golang:1.17.5-alpine3.15 AS builder
COPY go.* /
RUN go mod download
COPY *.go /
RUN CGO_ENABLED=0 go build -o /k8s-pod-restart-info-collector /
FROM golang:alpine3.19 AS builder
WORKDIR /app

FROM alpine:3.15
# Copy go.mod and go.sum files
COPY go.mod go.sum ./

# Download all dependencies
# RUN go mod download

# Vendor dependencies
RUN go mod vendor

# Copy the entire project
COPY . .

# Build the Go app
RUN GOOS=linux go build -mod vendor -o /k8s-pod-restart-info-collector .

# Start a new stage from scratch
FROM alpine:3.19.1
COPY --from=builder /k8s-pod-restart-info-collector /k8s-pod-restart-info-collector
CMD ["/k8s-pod-restart-info-collector"]
CMD ["/k8s-pod-restart-info-collector"]
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ As shown below, by clicking “Show more”, we can see the Reason, “Pod Statu

```bash
export SLACK_WEBHOOK_URL=https://hooks.slack.com/services/xxxxx/xxxxx
export GOOGLECHAT_WEBHOOK_URL=https://chat.googleapis.com/v1/spaces/xxxxx
go run .
```

Expand Down Expand Up @@ -84,7 +85,12 @@ helm uninstall k8s-pod-restart-info-collector
| `ignoreRestartsWithExitCodeZero` | Whether restart events with an exit code of 0 should be ignored | default: `false`
| `slackWebhookUrl` | Slack webhook URL | required if slackWebhooUrlSecretKeyRef is not present |
| `slackWebhookurlSecretKeyRef.key` | Slack webhook URL SecretKeyRef.key | |
| `slackWebhookurlSecretKeyRef.name` | Slack webhook URL SecretKeyRef.name | |
| `slackWebhookurlSecretKeyRef.name` | Slack webhook URL SecretKeyRef.name | |
| `useGooglechat` | Whether to use Google Chat for notifications | default: `true` |
| `googlechatWebhookUrl` | Google Chat webhook URL | required if googlechatWebhookUrlSecretKeyRef is not present |
| `googlechatWebhookUrlSecretKeyRef.key` | Google Chat webhook URL SecretKeyRef.key | |
| `googlechatWebhookUrlSecretKeyRef.name` | Google Chat webhook URL SecretKeyRef.name | |


## FAQ

Expand Down
186 changes: 182 additions & 4 deletions controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import (
"bytes"
"context"
"fmt"
"os"

// "os"
"sort"
"time"

Expand All @@ -29,6 +32,7 @@ const (
type Controller struct {
clientset kubernetes.Interface
slack Slack
googleChat GoogleChat
informerFactory informers.SharedInformerFactory
podInformer coreinformers.PodInformer
queue workqueue.RateLimitingInterface
Expand Down Expand Up @@ -89,6 +93,60 @@ func NewController(clientset kubernetes.Interface, slack Slack) *Controller {
}
}

func NewControllerGooglechat(clientset kubernetes.Interface, googleChat GoogleChat) *Controller {
const resyncPeriod = 0
ignoreRestartCount := getIgnoreRestartCount()

queue := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter())
informerFactory := informers.NewSharedInformerFactory(clientset, resyncPeriod)
podInformer := informerFactory.Core().V1().Pods()
podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
UpdateFunc: func(old interface{}, new interface{}) {
oldPod, ok := old.(*v1.Pod)
if !ok {
return
}

newPod, ok := new.(*v1.Pod)
if !ok {
return
}

if !isWatchedNamespace(newPod.Namespace) || isIgnoredNamespace(newPod.Namespace) {
return
}

if !isWatchedPod(newPod.Name) || isIgnoredPod(newPod.Name) {
return
}

newPodRestartCount := getPodRestartCount(newPod)
// Ignore when restartCount > ignoreRestartCount
if newPodRestartCount > ignoreRestartCount {
klog.Infof("Ignore: %s/%s restartCount: %d > %d\n", newPod.Namespace, newPod.Name, newPodRestartCount, ignoreRestartCount)
return
}

oldPodRestartCount := getPodRestartCount(oldPod)
if newPodRestartCount > oldPodRestartCount {
key, err := cache.MetaNamespaceKeyFunc(new)
if err == nil {
queue.Add(key)
}
klog.Infof("Found: %s/%s restarted, restartCount: %d -> %d\n", newPod.Namespace, newPod.Name, oldPodRestartCount, newPodRestartCount)
}
},
})

return &Controller{
clientset: clientset,
informerFactory: informerFactory,
podInformer: podInformer,
queue: queue,
googleChat: googleChat,
}
}

// Run begins watching and syncing.
func (c *Controller) Run(workers int, stopCh chan struct{}) {
defer runtime.HandleCrash()
Expand Down Expand Up @@ -149,7 +207,7 @@ func (c *Controller) handleErr(err error, key interface{}) {
}

// This controller retries 3 times if something goes wrong. After that, it stops trying.
if c.queue.NumRequeues(key) < 3 {
if c.queue.NumRequeues(key) < 0 {
klog.Infof("Error syncing Pod %v: %v", key, err)

// Re-enqueue the key rate limited. Based on the rate limiter on the
Expand All @@ -173,10 +231,17 @@ func (c *Controller) getAndHandlePod(key string) error {
return err
}

err = c.handlePod(pod)
if err != nil {
return err
var errHandle error
if os.Getenv("USE_GOOGLE_CHAT") == "true" {
errHandle = c.handlePodGooglechat(pod)
} else {
errHandle = c.handlePod(pod)
}

if errHandle != nil {
return errHandle
}

return nil
}

Expand Down Expand Up @@ -293,6 +358,110 @@ func (c *Controller) handlePod(pod *v1.Pod) error {
return nil
}

func (c *Controller) handlePodGooglechat(pod *v1.Pod) error {
podKey := pod.Namespace + "/" + pod.Name
currentTime := time.Now().Local()

lastSentTime, ok := c.googleChat.History[podKey]
if ok && int(currentTime.Sub(lastSentTime).Seconds()) < c.googleChat.MuteSeconds {
klog.Infof("Skip: %s, already sent %s ago.\n", podKey, duration.HumanDuration(time.Since(lastSentTime)))
return nil
}

for _, status := range pod.Status.ContainerStatuses {
if status.RestartCount == 0 || shouldIgnoreRestartsWithExitCodeZero(status) {
continue
}

klog.Infof("Handle: %s restarted, restartCount: %d\n", podKey, status.RestartCount)

podInfo, err := printPod(pod)
if err != nil {
return err
}

containerState, err := describeContainerState(status)
if err != nil {
return err
}

restartReason := printContainerLastStateReason(status)

var containerSpec v1.Container
for _, container := range pod.Spec.Containers {
if status.Name == container.Name {
containerSpec = container
break
}
}
containerResource, err := getContainerResource(containerSpec)
if err != nil {
return err
}

podStatus := fmt.Sprintf("```%s```\n• Reason: `%s`\n• Pod Status\n```\n%s%s```\n", podInfo, restartReason, containerState, containerResource)

podEvents, nodeEvents, containerLogs := "", "", ""
if err := func() error {
var err error
podEvents, err = c.getPodEvents(pod)
if err != nil {
return err
}
nodeEvents, err = c.getNodeAndEvents(pod)
if err != nil {
return err
}
containerLogs, err = c.getContainerLogs(pod, status)
return err
}(); err != nil {
return err
}

if containerLogs == "" {
containerLogs = "• No Logs Before Restart\n"
} else {
maxLogLength := 4000 - len(podStatus+podEvents+nodeEvents)
if maxLogLength > 0 && len(containerLogs) > maxLogLength {
containerLogs = containerLogs[len(containerLogs)-maxLogLength:]
}
containerLogs = fmt.Sprintf("• Pod Logs Before Restart\n```\n%s```\n", containerLogs)
}

msg := GoogleChatMessage{
Text: fmt.Sprintf("*Pod restarted!*\n*cluster: `%s`, pod: `%s`, namespace: `%s`*\n", c.googleChat.ClusterName, pod.Name, pod.Namespace),
}
if err := c.googleChat.sendToRoom(msg); err != nil {
return err
}

msg.Text = podStatus
if err := c.googleChat.sendToRoomPodStatus(msg); err != nil {
return err
}

msg.Text = podEvents
if err := c.googleChat.sendToRoomPodEvent(msg); err != nil {
return err
}

msg.Text = nodeEvents
if err := c.googleChat.sendToRoomNodeEvents(msg); err != nil {
return err
}

msg.Text = containerLogs
if err := c.googleChat.sendToRoomContainerLogs(msg); err != nil {
return err
}

c.googleChat.History[podKey] = currentTime
c.cleanOldGoogleChatHistory()
break
}
return nil
}

func (c *Controller) getPodEvents(pod *v1.Pod) (out string, err error) {
events, err := c.clientset.CoreV1().Events(pod.Namespace).List(context.TODO(), metav1.ListOptions{FieldSelector: "type!=Normal"})
if err != nil {
Expand Down Expand Up @@ -375,6 +544,15 @@ func (c *Controller) cleanOldSlackHistory() {
}
}

func (c *Controller) cleanOldGoogleChatHistory() {
currentTime := time.Now().Local()
for pod, lastSentTime := range c.slack.History {
if currentTime.Sub(lastSentTime).Hours() > 1 {
delete(c.googleChat.History, pod)
}
}
}

// getSlackChannelFromPod gets custom slack channel from pod annotations or labels.
func getSlackChannelFromPod(pod *v1.Pod) string {
if slackChannel, ok := pod.GetAnnotations()[SlackChannelKey]; ok {
Expand Down
79 changes: 74 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,14 +1,83 @@
module kubecollect

go 1.16
go 1.22.2

require (
github.com/slack-go/slack v0.10.0 // indirect
golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f // indirect
github.com/joho/godotenv v1.5.1
github.com/slack-go/slack v0.10.0
k8s.io/api v0.23.0
k8s.io/apimachinery v0.23.0
k8s.io/client-go v0.23.0
k8s.io/klog/v2 v2.30.0
k8s.io/kubectl v0.23.0 // indirect
k8s.io/utils v0.0.0-20210930125809-cb0fa318a74b // indirect
k8s.io/kubectl v0.23.0
k8s.io/utils v0.0.0-20210930125809-cb0fa318a74b
)

require (
cloud.google.com/go/compute v1.23.4 // indirect
cloud.google.com/go/compute/metadata v0.2.3 // indirect
github.com/Azure/go-autorest v14.2.0+incompatible // indirect
github.com/Azure/go-autorest/autorest v0.11.18 // indirect
github.com/Azure/go-autorest/autorest/adal v0.9.13 // indirect
github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect
github.com/Azure/go-autorest/logger v0.2.1 // indirect
github.com/Azure/go-autorest/tracing v0.6.0 // indirect
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
github.com/fatih/camelcase v1.0.0 // indirect
github.com/form3tech-oss/jwt-go v3.2.3+incompatible // indirect
github.com/go-errors/errors v1.0.1 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.19.5 // indirect
github.com/go-openapi/swag v0.19.14 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/btree v1.0.1 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/googleapis/gnostic v0.5.5 // indirect
github.com/gorilla/websocket v1.4.2 // indirect
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect
github.com/imdario/mergo v0.3.5 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
github.com/mailru/easyjson v0.7.6 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/spf13/cobra v1.2.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.7.0 // indirect
github.com/xlab/treeprint v0.0.0-20181112141820-a009c3971eca // indirect
go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5 // indirect
golang.org/x/crypto v0.21.0 // indirect
golang.org/x/net v0.22.0 // indirect
golang.org/x/oauth2 v0.18.0 // indirect
golang.org/x/sys v0.18.0 // indirect
golang.org/x/term v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/protobuf v1.33.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
k8s.io/cli-runtime v0.23.0 // indirect
k8s.io/kube-openapi v0.0.0-20211115234752-e816edb12b65 // indirect
sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6 // indirect
sigs.k8s.io/kustomize/api v0.10.1 // indirect
sigs.k8s.io/kustomize/kyaml v0.13.0 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.1.2 // indirect
sigs.k8s.io/yaml v1.2.0 // indirect
)
Loading