Skip to content

Commit

Permalink
CHAOS-139: Reconcile disruptions when chaos pods are manually deleted (
Browse files Browse the repository at this point in the history
…#451)

* add a reconcile map func

* WIP, but functional commit that informs on namespaced pods

* finished commit that watches pods

* increase informer resync

* we watch pods, we dont own them

* used channel backwards

* add the MetricInformed metric

* slow down the informer

* align tags for logger and metric
  • Loading branch information
ptnapoleon authored Dec 8, 2021
1 parent be96435 commit a567915
Show file tree
Hide file tree
Showing 311 changed files with 23,272 additions and 7 deletions.
98 changes: 98 additions & 0 deletions LICENSE-3rdparty.csv
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,65 @@ k8s.io/client-go,k8s.io/client-go/applyconfigurations/storage/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/discovery,Apache-2.0
k8s.io/client-go,k8s.io/client-go/discovery/fake,Apache-2.0
k8s.io/client-go,k8s.io/client-go/dynamic,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/admissionregistration,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/admissionregistration/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/admissionregistration/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/apiserverinternal,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/apiserverinternal/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/apps,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/apps/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/apps/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/apps/v1beta2,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/autoscaling,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/autoscaling/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/autoscaling/v2beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/autoscaling/v2beta2,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/batch,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/batch/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/batch/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/certificates,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/certificates/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/certificates/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/coordination,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/coordination/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/coordination/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/core,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/core/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/discovery,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/discovery/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/discovery/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/events,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/events/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/events/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/extensions,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/extensions/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/flowcontrol,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/flowcontrol/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/flowcontrol/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/internalinterfaces,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/networking,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/networking/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/networking/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/node,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/node/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/node/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/node/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/policy,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/policy/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/policy/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/rbac,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/rbac/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/rbac/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/rbac/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/scheduling,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/scheduling/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/scheduling/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/scheduling/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/storage,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/storage/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/storage/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/informers/storage/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/kubernetes,Apache-2.0
k8s.io/client-go,k8s.io/client-go/kubernetes/fake,Apache-2.0
k8s.io/client-go,k8s.io/client-go/kubernetes/scheme,Apache-2.0
Expand Down Expand Up @@ -688,6 +747,45 @@ k8s.io/client-go,k8s.io/client-go/kubernetes/typed/storage/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/kubernetes/typed/storage/v1alpha1/fake,Apache-2.0
k8s.io/client-go,k8s.io/client-go/kubernetes/typed/storage/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/kubernetes/typed/storage/v1beta1/fake,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/admissionregistration/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/admissionregistration/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/apiserverinternal/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/apps/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/apps/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/apps/v1beta2,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/autoscaling/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/autoscaling/v2beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/autoscaling/v2beta2,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/batch/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/batch/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/certificates/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/certificates/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/coordination/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/coordination/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/core/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/discovery/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/discovery/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/events/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/events/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/extensions/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/flowcontrol/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/flowcontrol/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/networking/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/networking/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/node/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/node/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/node/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/policy/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/policy/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/rbac/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/rbac/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/rbac/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/scheduling/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/scheduling/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/scheduling/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/storage/v1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/storage/v1alpha1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/listers/storage/v1beta1,Apache-2.0
k8s.io/client-go,k8s.io/client-go/metadata,Apache-2.0
k8s.io/client-go,k8s.io/client-go/pkg/apis/clientauthentication,Apache-2.0
k8s.io/client-go,k8s.io/client-go/pkg/apis/clientauthentication/install,Apache-2.0
Expand Down
33 changes: 31 additions & 2 deletions controllers/disruption_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,14 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
kubeinformers "k8s.io/client-go/informers"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"

chaosv1beta1 "github.com/DataDog/chaos-controller/api/v1beta1"
"github.com/DataDog/chaos-controller/env"
Expand Down Expand Up @@ -1046,10 +1050,35 @@ func (r *DisruptionReconciler) recordEventOnTarget(instance *chaosv1beta1.Disrup
}

// SetupWithManager setups the current reconciler with the given manager
func (r *DisruptionReconciler) SetupWithManager(mgr ctrl.Manager) error {
func (r *DisruptionReconciler) SetupWithManager(mgr ctrl.Manager, kubeInformerFactory kubeinformers.SharedInformerFactory) error {
podToDisruption := func(c client.Object) []reconcile.Request {
// podtoDisruption is a function that maps pods to disruptions. it is meant to be used as an event handler on a pod informer
// this function should safely return an empty list of requests to reconcile if the object we receive is not actually a chaos pod
// which we determine by checking the object labels for the name and namespace labels that we add to all injector pods
disruption := []reconcile.Request{}

if r.log != nil {
r.log.Infow("watching event from pod", "podName", c.GetName(), "podNamespace", c.GetNamespace())
}

r.handleMetricSinkError(r.MetricsSink.MetricInformed([]string{"podName:" + c.GetName(), "podNamespace:" + c.GetNamespace()}))

labels := c.GetLabels()
name := labels[chaostypes.DisruptionNameLabel]
namespace := labels[chaostypes.DisruptionNamespaceLabel]

if name != "" && namespace != "" {
disruption = append(disruption, reconcile.Request{NamespacedName: types.NamespacedName{Name: name, Namespace: namespace}})
}

return disruption
}

informer := kubeInformerFactory.Core().V1().Pods().Informer()

return ctrl.NewControllerManagedBy(mgr).
For(&chaosv1beta1.Disruption{}).
Owns(&corev1.Pod{}).
Watches(&source.Informer{Informer: informer}, handler.EnqueueRequestsFromMapFunc(podToDisruption)).
Complete(r)
}

Expand Down
12 changes: 11 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ import (
"github.com/fsnotify/fsnotify"
"github.com/spf13/pflag"
"k8s.io/apimachinery/pkg/runtime"
kubeinformers "k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"

Expand Down Expand Up @@ -294,11 +296,17 @@ func main() {
ExpiredDisruptionGCDelay: cfg.Controller.ExpiredDisruptionGCDelay,
}

if err := r.SetupWithManager(mgr); err != nil {
informerClient := kubernetes.NewForConfigOrDie(ctrl.GetConfigOrDie())
kubeInformerFactory := kubeinformers.NewSharedInformerFactoryWithOptions(informerClient, time.Minute*5, kubeinformers.WithNamespace(cfg.Injector.ServiceAccount.Namespace))

if err := r.SetupWithManager(mgr, kubeInformerFactory); err != nil {
logger.Errorw("unable to create controller", "controller", "Disruption", "error", err)
os.Exit(1) //nolint:gocritic
}

stopCh := make(chan struct{})
kubeInformerFactory.Start(stopCh)

go r.ReportMetrics()

// register disruption validating webhook
Expand Down Expand Up @@ -330,6 +338,8 @@ func main() {
logger.Infow("restarting chaos-controller")

if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
stopCh <- struct{}{} // stop the informer

logger.Errorw("problem running manager", "error", err)
os.Exit(1) //nolint:gocritic
}
Expand Down
13 changes: 9 additions & 4 deletions metrics/datadog/datadog.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func (d *Sink) MetricPodsGauge(gauge float64) error {
return d.client.Gauge(metricPrefixController+"pods.gauge", gauge, []string{}, 1)
}

// MetricRestart sends an increment of the
// MetricRestart sends an increment of the controller restart metric
func (d *Sink) MetricRestart() error {
return d.metricWithStatus(metricPrefixController+"restart", []string{})
}
Expand All @@ -141,21 +141,26 @@ func (d *Sink) MetricValidationFailed(tags []string) error {
return d.metricWithStatus(metricPrefixController+"validation.failed", tags)
}

// MetricValidationCreated increments the failed validation metric
// MetricValidationCreated increments the created validation metric
func (d *Sink) MetricValidationCreated(tags []string) error {
return d.metricWithStatus(metricPrefixController+"validation.created", tags)
}

// MetricValidationUpdated increments the failed validation metric
// MetricValidationUpdated increments the updated validation metric
func (d *Sink) MetricValidationUpdated(tags []string) error {
return d.metricWithStatus(metricPrefixController+"validation.updated", tags)
}

// MetricValidationDeleted increments the failed validation metric
// MetricValidationDeleted increments the deleted validation metric
func (d *Sink) MetricValidationDeleted(tags []string) error {
return d.metricWithStatus(metricPrefixController+"validation.deleted", tags)
}

// MetricInformed increments when the pod informer receives an event to process before reconciliation
func (d *Sink) MetricInformed(tags []string) error {
return d.metricWithStatus(metricPrefixController+"informed", tags)
}

func boolToStatus(succeed bool) string {
var status string
if succeed {
Expand Down
1 change: 1 addition & 0 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ type Sink interface {
MetricValidationCreated(tags []string) error
MetricValidationUpdated(tags []string) error
MetricValidationDeleted(tags []string) error
MetricInformed(tags []string) error
}

// GetSink returns an initiated sink
Expand Down
6 changes: 6 additions & 0 deletions metrics/noop/noop.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,9 @@ func (n *Sink) MetricValidationDeleted(tags []string) error {

return nil
}

func (n *Sink) MetricInformed(tags []string) error {
fmt.Printf("NOOP: MetricInformed %s\n", tags)

return nil
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a567915

Please sign in to comment.