Skip to content

Commit

Permalink
CHAOS-471: Dont allow disruptions to be created unless they specify w…
Browse files Browse the repository at this point in the history
…here to run (#663)

* Dont allow disruptions to be created unless they specify where to run
  • Loading branch information
ptnapoleon authored Mar 8, 2023
1 parent 83bc409 commit 0dc33f0
Show file tree
Hide file tree
Showing 43 changed files with 115 additions and 12 deletions.
15 changes: 15 additions & 0 deletions api/v1beta1/disruption_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ var handlerEnabled bool
var defaultDuration time.Duration
var cloudServicesProvidersManager *cloudservice.CloudServicesProvidersManager
var chaosNamespace string
var safemodeEnvironment string

const SafemodeEnvironmentAnnotation = "chaos.datadoghq.com/environment"

func (r *Disruption) SetupWebhookWithManager(setupWebhookConfig utils.SetupWebhookWithManagerConfig) error {
if err := ddmark.InitLibrary(EmbeddedChaosAPI, chaostypes.DDMarkChaoslibPrefix); err != nil {
Expand All @@ -63,6 +66,7 @@ func (r *Disruption) SetupWebhookWithManager(setupWebhookConfig utils.SetupWebho
defaultDuration = setupWebhookConfig.DefaultDurationFlag
cloudServicesProvidersManager = setupWebhookConfig.CloudServicesProvidersManager
chaosNamespace = setupWebhookConfig.ChaosNamespace
safemodeEnvironment = setupWebhookConfig.Environment

return ctrl.NewWebhookManagedBy(setupWebhookConfig.Manager).
For(r).
Expand Down Expand Up @@ -100,6 +104,17 @@ func (r *Disruption) ValidateCreate() error {
return fmt.Errorf("invalid disruption name: %w", err)
}

if safemodeEnvironment != "" {
disruptionEnv, ok := r.Annotations[SafemodeEnvironmentAnnotation]
if !ok {
return fmt.Errorf("disruption does not specify an environment to run, but this controller requires it. Set the annotation `%s:\"%s\"` to run on this controller", SafemodeEnvironmentAnnotation, safemodeEnvironment)
}

if disruptionEnv != safemodeEnvironment {
return fmt.Errorf("disruption is configured to run in %s but has been applied in %s. Set the annotation `%s:\\\"%s\\\"` to run on this controller\", SafemodeEnvironmentAnnotation, safemodeEnvironment", disruptionEnv, safemodeEnvironment, SafemodeEnvironmentAnnotation, safemodeEnvironment)
}
}

// handle a disruption using the onInit feature without the handler being enabled
if !handlerEnabled && r.Spec.OnInit {
return errors.New("the chaos handler is disabled but the disruption onInit field is set to true, please enable the handler by specifying the --handler-enabled flag to the controller if you want to use the onInit feature (requires Kubernetes >= 1.15)")
Expand Down
1 change: 1 addition & 0 deletions chart/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ data:
host: {{ .Values.controller.webhook.host | quote }}
port: {{ .Values.controller.webhook.port }}
safeMode:
environment: {{ .Values.controller.safeMode.environment }}
enable: {{ .Values.controller.safeMode.enable }}
namespaceThreshold: {{ .Values.controller.safeMode.namespaceThreshold }}
clusterThreshold: {{ .Values.controller.safeMode.clusterThreshold }}
Expand Down
1 change: 1 addition & 0 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ controller:
host: "" # host to use to serve requests
port: 9443 # port to use to serve requests
safeMode:
environment: lima
enable: false
namespaceThreshold: 80
clusterThreshold: 66
Expand Down
5 changes: 3 additions & 2 deletions controllers/cache_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,9 @@ var _ = Describe("Cache Handler verifications", func() {
targetLabels = targetPodA.Labels
disruption = &v1beta1.Disruption{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
Name: "foo",
Namespace: "default",
Annotations: map[string]string{v1beta1.SafemodeEnvironmentAnnotation: "lima"},
},
Spec: v1beta1.DisruptionSpec{
DryRun: false,
Expand Down
15 changes: 9 additions & 6 deletions controllers/disruption_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,9 @@ var _ = Describe("Disruption Controller", func() {
BeforeEach(func() {
disruption = &chaosv1beta1.Disruption{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
Name: "foo",
Namespace: "default",
Annotations: map[string]string{chaosv1beta1.SafemodeEnvironmentAnnotation: "lima"},
},
Spec: chaosv1beta1.DisruptionSpec{
DryRun: true,
Expand Down Expand Up @@ -280,8 +281,9 @@ var _ = Describe("Disruption Controller", func() {
BeforeEach(func() {
disruption = &chaosv1beta1.Disruption{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
Name: "foo",
Namespace: "default",
Annotations: map[string]string{chaosv1beta1.SafemodeEnvironmentAnnotation: "lima"},
},
Spec: chaosv1beta1.DisruptionSpec{
DryRun: false,
Expand Down Expand Up @@ -598,8 +600,9 @@ var _ = Describe("Disruption Controller", func() {
BeforeEach(func() {
disruption = &chaosv1beta1.Disruption{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
Namespace: "default",
Name: "foo",
Namespace: "default",
Annotations: map[string]string{chaosv1beta1.SafemodeEnvironmentAnnotation: "lima"},
},
Spec: chaosv1beta1.DisruptionSpec{
DryRun: false,
Expand Down
8 changes: 7 additions & 1 deletion docs/safemode.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@ Safemode is always enabled by default and will require manual disabling of safet
`unsafeMode.disableAll` turns off all safety nets. The other options under `unsafeMode` represent individual safety nets which can be disabled independently.
Please take a look at the example below to see how to use `unsafeMode`.

## Explicit Disruption Environments

The chaos-controller operates within the boundaries of a single kubernetes cluster. For users with multiple kubernetes clusters in their environments, they may want to enforce the added safety that Disruptions
intended for a given cluster cannot be accidentally run anywhere else, especially across the dev/prod boundary. Operators of the chaos-controller can optionally set the
`controller.safeMode.specifiedEnvironment` field in the config map to a string of their choice. Disruptions will then be rejected if they do not have a `chaos.datadoghq.com/environment` annotation set to an identical string.

## Ignoring Safety Nets

Because the list of safety nets to be implemented will grow in the future, there will surely be overlap with safety nets which will make it difficult for a user who is confident a specific safety net is not necessary but unsure if others will be.
Therefore the controller allows for you to disable specific safety nets in the Safemode Spec. Checkout out example below to see how to remove certain safety nets.
Therefore, the controller allows for you to disable specific safety nets in the Safemode Spec. Checkout out example below to see how to remove certain safety nets.
Keep in mind that all safety nets are turned on by default.

## Configuring Safety Nets
Expand Down
2 changes: 2 additions & 0 deletions examples/advanced_selector.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: advanced-selector
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
advancedSelector: # advanced selectors can select targets on something else than an exact key/value match
Expand Down
2 changes: 2 additions & 0 deletions examples/annotation_filter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: network-drop
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/complete.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: disruption-sample
namespace: chaos-demo # disruption resource must be in the same namespace as targeted pods
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
dryRun: false # optional, enable dry-run mode (chaos pods will be created but won't inject anything)
reporting: # optional, add custom notification for this disruption
Expand Down
2 changes: 2 additions & 0 deletions examples/container_failure_all_forced.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: container-failure-all-forced
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
selector:
app: demo-curl
Expand Down
2 changes: 2 additions & 0 deletions examples/container_failure_all_graceful.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: container-failure-all-graceful
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
selector:
app: demo-curl
Expand Down
2 changes: 2 additions & 0 deletions examples/container_failure_forced.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: container-failure-forced
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
selector:
app: demo-curl
Expand Down
2 changes: 2 additions & 0 deletions examples/container_failure_graceful.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: container-failure-graceful
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
selector:
app: demo-curl
Expand Down
2 changes: 2 additions & 0 deletions examples/containers_targeting.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: containers-targeting
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/count_percentage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: count-percentage
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/cpu_pressure.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: cpu-pressure
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/disk_pressure_read.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: disk-pressure-read
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/disk_pressure_read_unsafemode.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: disk-pressure-read
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/disk_pressure_write.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: disk-pressure-write
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/dns.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: dns
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/dry_run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: dry-run
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
dryRun: true # enable dry-run mode (chaos pods will be created but won't inject anything)
level: pod
Expand Down
2 changes: 2 additions & 0 deletions examples/grpc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: grpc
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/grpc_error.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: grpc-error
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/grpc_override.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: grpc-override
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/level_node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: level-node
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: node # impact the whole node instead of a single pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/network_allowed_hosts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: network-drop
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/network_bandwidth_limitation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: network-bandwidth-limitation
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/network_cloud.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: network-cloud
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/network_corrupt.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: network-corrupt
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/network_delay.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: network-delay
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/network_drop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: network-drop
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/network_duplication.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: network-duplication
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
selector:
app: demo-curl
Expand Down
2 changes: 2 additions & 0 deletions examples/network_filters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: network-filters
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/network_ingress.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: network-ingress
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
level: pod
selector:
Expand Down
2 changes: 2 additions & 0 deletions examples/node_failure.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: node-failure
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
selector:
app: demo-curl
Expand Down
2 changes: 2 additions & 0 deletions examples/node_failure_shutdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ kind: Disruption
metadata:
name: node-failure-shutdown
namespace: chaos-demo
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
selector:
app: demo-curl
Expand Down
2 changes: 2 additions & 0 deletions examples/on_init.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ apiVersion: chaos.datadoghq.com/v1beta1
kind: Disruption
metadata:
name: on-init
annotations:
chaos.datadoghq.com/environment: "lima"
spec:
onInit: true # apply the disruption on pod initialization (it requires the pod to target to be redeployed with the chaos.datadoghq.com/disrupt-on-init label to be held in the pending state)
level: pod
Expand Down
Loading

0 comments on commit 0dc33f0

Please sign in to comment.