From 481344ed19d34b13cee73557c74af89399adb26d Mon Sep 17 00:00:00 2001 From: Rocio Roman Date: Tue, 8 Nov 2022 14:03:07 -0800 Subject: [PATCH 1/3] bumped golang version from 1.17->1.19 Signed-off-by: rociomroman Signed-off-by: Rocio Roman --- build/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/Dockerfile b/build/Dockerfile index 5840d24a9..149d95855 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -1,6 +1,6 @@ # Multi-stage docker build # Build stage -FROM golang:1.17 AS builder +FROM golang:1.19 AS builder ARG TARGETOS=linux ARG TARGETARCH From e461c82c6a4c48a41a2c88db9803d8e15442728e Mon Sep 17 00:00:00 2001 From: Shubham Chaudhary Date: Mon, 14 Nov 2022 11:30:41 +0530 Subject: [PATCH 2/3] chore(spring-boot): spliting spring-boot-chaos experiment to separate experiments (#594) Signed-off-by: Shubham Chaudhary Signed-off-by: Shubham Chaudhary Signed-off-by: Rocio Roman --- bin/experiment/experiment.go | 18 +- .../lib/spring-boot-chaos.go | 15 +- experiments/spring-boot/README.md | 15 -- .../spring-boot-app-kill/README.md | 15 ++ .../experiment/spring-boot-app-kill.go | 214 ++++++++++++++++++ .../spring-boot-app-kill/rbac.yaml | 36 +++ .../spring-boot-app-kill/test/test.yml | 84 +++++++ .../spring-boot-cpu-stress/README.md | 15 ++ .../experiment/spring-boot-cpu-stress.go | 214 ++++++++++++++++++ .../spring-boot-cpu-stress/rbac.yaml | 36 +++ .../spring-boot-cpu-stress/test/test.yml | 91 ++++++++ .../spring-boot-exceptions/README.md | 15 ++ .../experiment/spring-boot-exceptions.go | 214 ++++++++++++++++++ .../spring-boot-exceptions/rbac.yaml | 36 +++ .../spring-boot-exceptions/test/test.yml | 86 +++++++ .../spring-boot/spring-boot-latency/README.md | 15 ++ .../experiment/spring-boot-latency.go} | 2 +- .../{ => spring-boot-latency}/rbac.yaml | 16 +- .../spring-boot-latency/test/test.yml | 86 +++++++ .../spring-boot-memory-stress/README.md | 15 ++ .../experiment/spring-boot-memory-stress.go | 214 ++++++++++++++++++ .../spring-boot-memory-stress/rbac.yaml | 36 +++ .../test/test.yml | 49 +--- .../environment/environment.go | 158 ++++++++----- .../spring-boot-chaos/types/types.go | 68 ++++-- 25 files changed, 1602 insertions(+), 161 deletions(-) delete mode 100644 experiments/spring-boot/README.md create mode 100644 experiments/spring-boot/spring-boot-app-kill/README.md create mode 100644 experiments/spring-boot/spring-boot-app-kill/experiment/spring-boot-app-kill.go create mode 100644 experiments/spring-boot/spring-boot-app-kill/rbac.yaml create mode 100644 experiments/spring-boot/spring-boot-app-kill/test/test.yml create mode 100644 experiments/spring-boot/spring-boot-cpu-stress/README.md create mode 100644 experiments/spring-boot/spring-boot-cpu-stress/experiment/spring-boot-cpu-stress.go create mode 100644 experiments/spring-boot/spring-boot-cpu-stress/rbac.yaml create mode 100644 experiments/spring-boot/spring-boot-cpu-stress/test/test.yml create mode 100644 experiments/spring-boot/spring-boot-exceptions/README.md create mode 100644 experiments/spring-boot/spring-boot-exceptions/experiment/spring-boot-exceptions.go create mode 100644 experiments/spring-boot/spring-boot-exceptions/rbac.yaml create mode 100644 experiments/spring-boot/spring-boot-exceptions/test/test.yml create mode 100644 experiments/spring-boot/spring-boot-latency/README.md rename experiments/spring-boot/{spring-boot-chaos/experiment/spring-boot-chaos.go => spring-boot-latency/experiment/spring-boot-latency.go} (99%) rename experiments/spring-boot/{ => spring-boot-latency}/rbac.yaml (73%) create mode 100644 experiments/spring-boot/spring-boot-latency/test/test.yml create mode 100644 experiments/spring-boot/spring-boot-memory-stress/README.md create mode 100644 experiments/spring-boot/spring-boot-memory-stress/experiment/spring-boot-memory-stress.go create mode 100644 experiments/spring-boot/spring-boot-memory-stress/rbac.yaml rename experiments/spring-boot/{spring-boot-chaos => spring-boot-memory-stress}/test/test.yml (66%) diff --git a/bin/experiment/experiment.go b/bin/experiment/experiment.go index faae6377c..3b39f267b 100755 --- a/bin/experiment/experiment.go +++ b/bin/experiment/experiment.go @@ -56,7 +56,11 @@ import ( ebsLossByTag "github.com/litmuschaos/litmus-go/experiments/kube-aws/ebs-loss-by-tag/experiment" ec2TerminateByID "github.com/litmuschaos/litmus-go/experiments/kube-aws/ec2-terminate-by-id/experiment" ec2TerminateByTag "github.com/litmuschaos/litmus-go/experiments/kube-aws/ec2-terminate-by-tag/experiment" - springBootChaos "github.com/litmuschaos/litmus-go/experiments/spring-boot/spring-boot-chaos/experiment" + springBootAppKill "github.com/litmuschaos/litmus-go/experiments/spring-boot/spring-boot-app-kill/experiment" + springBootCpuStress "github.com/litmuschaos/litmus-go/experiments/spring-boot/spring-boot-cpu-stress/experiment" + springBootExceptions "github.com/litmuschaos/litmus-go/experiments/spring-boot/spring-boot-exceptions/experiment" + springBootLatency "github.com/litmuschaos/litmus-go/experiments/spring-boot/spring-boot-latency/experiment" + springBootMemoryStress "github.com/litmuschaos/litmus-go/experiments/spring-boot/spring-boot-memory-stress/experiment" vmpoweroff "github.com/litmuschaos/litmus-go/experiments/vmware/vm-poweroff/experiment" "github.com/litmuschaos/litmus-go/pkg/clients" @@ -182,8 +186,16 @@ func main() { gcpVMInstanceStopByLabel.GCPVMInstanceStopByLabel(clients) case "gcp-vm-disk-loss-by-label": gcpVMDiskLossByLabel.GCPVMDiskLossByLabel(clients) - case "spring-boot-chaos": - springBootChaos.Experiment(clients) + case "spring-boot-cpu-stress": + springBootCpuStress.Experiment(clients) + case "spring-boot-memory-stress": + springBootMemoryStress.Experiment(clients) + case "spring-boot-latency": + springBootLatency.Experiment(clients) + case "spring-boot-exceptions": + springBootExceptions.Experiment(clients) + case "spring-boot-app-kill": + springBootAppKill.Experiment(clients) default: log.Errorf("Unsupported -name %v, please provide the correct value of -name args", *experimentName) return diff --git a/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go b/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go index 9b0b614a5..4e1d3501c 100644 --- a/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go +++ b/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go @@ -64,13 +64,6 @@ func PrepareChaos(experimentsDetails *experimentTypes.ExperimentDetails, clients "Controller": experimentsDetails.ChaosMonkeyWatchers.Controller, "RestController": experimentsDetails.ChaosMonkeyWatchers.RestController, }) - log.InfoWithValues("[Info]: Chaos monkeys assaults will be injected to the target pods as follows", logrus.Fields{ - "CPU Assault": experimentsDetails.ChaosMonkeyAssault.CPUActive, - "Memory Assault": experimentsDetails.ChaosMonkeyAssault.MemoryActive, - "Kill App Assault": experimentsDetails.ChaosMonkeyAssault.KillApplicationActive, - "Latency Assault": experimentsDetails.ChaosMonkeyAssault.LatencyActive, - "Exception Assault": experimentsDetails.ChaosMonkeyAssault.ExceptionsActive, - }) switch strings.ToLower(experimentsDetails.Sequence) { case "serial": @@ -156,12 +149,8 @@ func setChaosMonkeyWatchers(chaosMonkeyPort string, chaosMonkeyPath string, watc return nil } -func startAssault(chaosMonkeyPort string, chaosMonkeyPath string, assault experimentTypes.ChaosMonkeyAssault, pod corev1.Pod) error { - jsonValue, err := json.Marshal(assault) - if err != nil { - return err - } - if err := setChaosMonkeyAssault(chaosMonkeyPort, chaosMonkeyPath, jsonValue, pod); err != nil { +func startAssault(chaosMonkeyPort string, chaosMonkeyPath string, assault []byte, pod corev1.Pod) error { + if err := setChaosMonkeyAssault(chaosMonkeyPort, chaosMonkeyPath, assault, pod); err != nil { return err } log.Infof("[Chaos]: Activating Chaos Monkey assault on pod: %v", pod.Name) diff --git a/experiments/spring-boot/README.md b/experiments/spring-boot/README.md deleted file mode 100644 index 1b3a9608d..000000000 --- a/experiments/spring-boot/README.md +++ /dev/null @@ -1,15 +0,0 @@ -## Experiment Metadata - - - - - - - - - - - - -
Name Description Documentation Link
Chaos Monkey Spring Boot This experiment allows injecting Chaos Monkey assaults on Spring Boot applications which have the [Chaos Monkey for Spring Boot](https://codecentric.github.io/chaos-monkey-spring-boot/) in their classpath. It can target random pods with a Spring Boot application and allows configuring the assaults to inject (exception, latency, CPU, memory). It tests the resiliency of the system when some applications are having unexpected faulty behavior. TODO
- diff --git a/experiments/spring-boot/spring-boot-app-kill/README.md b/experiments/spring-boot/spring-boot-app-kill/README.md new file mode 100644 index 000000000..030ed188a --- /dev/null +++ b/experiments/spring-boot/spring-boot-app-kill/README.md @@ -0,0 +1,15 @@ +## Experiment Metadata + + + + + + + + + + + + +
Name Description Documentation Link
Spring Boot App Kill This experiment allows injecting Chaos Monkey app-kill assaults on Spring Boot applications, which have the [Chaos Monkey for Spring Boot](https://codecentric.github.io/chaos-monkey-spring-boot/) in their classpath. It can target random pods with a Spring Boot application and allows configuring the assaults to inject app-kill. It tests the resiliency of the system when some applications are having unexpected faulty behavior. TODO
+ diff --git a/experiments/spring-boot/spring-boot-app-kill/experiment/spring-boot-app-kill.go b/experiments/spring-boot/spring-boot-app-kill/experiment/spring-boot-app-kill.go new file mode 100644 index 000000000..63fd85374 --- /dev/null +++ b/experiments/spring-boot/spring-boot-app-kill/experiment/spring-boot-app-kill.go @@ -0,0 +1,214 @@ +package experiment + +import ( + "os" + + "github.com/litmuschaos/chaos-operator/api/litmuschaos/v1alpha1" + litmusLIB "github.com/litmuschaos/litmus-go/chaoslib/litmus/spring-boot-chaos/lib" + "github.com/litmuschaos/litmus-go/pkg/clients" + "github.com/litmuschaos/litmus-go/pkg/events" + "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/probe" + "github.com/litmuschaos/litmus-go/pkg/result" + experimentEnv "github.com/litmuschaos/litmus-go/pkg/spring-boot/spring-boot-chaos/environment" + experimentTypes "github.com/litmuschaos/litmus-go/pkg/spring-boot/spring-boot-chaos/types" + "github.com/litmuschaos/litmus-go/pkg/status" + "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" + "github.com/sirupsen/logrus" +) + +// Experiment contains steps to inject chaos +func Experiment(clients clients.ClientSets) { + + experimentsDetails := experimentTypes.ExperimentDetails{} + resultDetails := types.ResultDetails{} + eventsDetails := types.EventDetails{} + chaosDetails := types.ChaosDetails{} + + //Fetching all the ENV passed from the runner pod + log.Infof("[PreReq]: Getting the ENV for the %v experiment", os.Getenv("EXPERIMENT_NAME")) + experimentEnv.GetENV(&experimentsDetails, "spring-boot-app-kill") + + // Initialize the chaos attributes + types.InitialiseChaosVariables(&chaosDetails) + + // Initialize Chaos Result Parameters + types.SetResultAttributes(&resultDetails, chaosDetails) + + if experimentsDetails.EngineName != "" { + // Initialize the probe details. Bail out upon error, as we haven't entered exp business logic yet + if err := probe.InitializeProbesInChaosResultDetails(&chaosDetails, clients, &resultDetails); err != nil { + log.Errorf("Unable to initialize the probes, err: %v", err) + return + } + } + + //Updating the chaos result in the beginning of experiment + log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) + if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { + log.Errorf("Unable to Create the Chaos Result, err: %v", err) + failStep := "[pre-chaos]: Failed to update the chaos result of spring-boot-chaos experiment (SOT), err: " + err.Error() + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + // Set the chaos result uid + _ = result.SetResultUID(&resultDetails, clients, &chaosDetails) + + // generating the event in chaosResult to mark the verdict as awaited + msg := "experiment: " + experimentsDetails.ExperimentName + ", Result: Awaited" + types.SetResultEventAttributes(&eventsDetails, types.AwaitedVerdict, msg, "Normal", &resultDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + + //DISPLAY THE APP INFORMATION + log.InfoWithValues("[Info]: The application information is as follows", logrus.Fields{ + "Namespace": experimentsDetails.AppNS, + "Label": experimentsDetails.AppLabel, + "Chaos Duration": experimentsDetails.ChaosDuration, + }) + + // Calling AbortWatcher go routine, it will continuously watch for the abort signal and generate the required events and result + go common.AbortWatcherWithoutExit(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) + + // Select targeted pods + log.Infof("[PreCheck]: Geting targeted pods list") + if err := litmusLIB.SetTargetPodList(&experimentsDetails, clients, &chaosDetails); err != nil { + log.Errorf("Failed to get target pod list, err: %v", err) + failStep := "[pre-chaos]: Failed to get pod list, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "Pods: Not Found", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + podNames := make([]string, 0, 1) + for _, pod := range experimentsDetails.TargetPodList.Items { + podNames = append(podNames, pod.Name) + } + log.Infof("[PreCheck]: Target pods list for chaos, %v", podNames) + + // Check if the targeted pods have the chaos monkey endpoint + log.Infof("[PreCheck]: Checking for ChaosMonkey endpoint in target pods") + if _, err := litmusLIB.CheckChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.TargetPodList); err != nil { + log.Errorf("Some target pods don't have the chaos monkey endpoint, err: %v", err) + failStep := "[pre-chaos]: Some target pods don't have the chaos monkey endpoint, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "ChaosMonkey: Not Found", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + //PRE-CHAOS APPLICATION STATUS CHECK + if chaosDetails.DefaultHealthCheck { + log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") + if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { + log.Errorf("Application status check failed, err: %v", err) + failStep := "[pre-chaos]: Failed to verify that the AUT (Application Under Test) is in running state, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + } + + if experimentsDetails.EngineName != "" { + // marking AUT as running, as we already checked the status of application under test + msg := "AUT: Running" + + // run the probes in the pre-chaos check + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { + log.Errorf("Probe Failed, err: %v", err) + failStep := "[pre-chaos]: Failed while running probes, err: " + err.Error() + msg := "AUT: Running, Probes: Unsuccessful" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + msg = "AUT: Running, Probes: Successful" + } + // generating the events for the pre-chaos check + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + // Including the litmus lib + switch experimentsDetails.ChaosLib { + case "litmus": + if err := litmusLIB.PrepareChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "[chaos]: Failed inside the chaoslib, err: " + err.Error() + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + default: + log.Error("[Invalid]: Please Provide the correct LIB") + failStep := "[chaos]: no match found for specified lib" + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + log.Infof("[Confirmation]: %v chaos has been injected successfully", experimentsDetails.ExperimentName) + resultDetails.Verdict = v1alpha1.ResultVerdictPassed + + // POST-CHAOS APPLICATION STATUS CHECK + if chaosDetails.DefaultHealthCheck { + log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") + if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { + log.Errorf("Application status check failed, err: %v", err) + failStep := "[post-chaos]: Failed to verify that the AUT (Application Under Test) is running, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + } + + if experimentsDetails.EngineName != "" { + // marking AUT as running, as we already checked the status of application under test + msg := "AUT: Running" + + // run the probes in the post-chaos check + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { + log.Errorf("Probes Failed, err: %v", err) + failStep := "[post-chaos]: Failed while running probes, err: " + err.Error() + msg := "AUT: Running, Probes: Unsuccessful" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + msg = "AUT: Running, Probes: Successful" + } + + // generating post chaos event + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + //Updating the chaosResult in the end of experiment + log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) + if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { + log.Errorf("Unable to Update the Chaos Result, err: %v", err) + return + } + + // generating the event in chaosResult to mark the verdict as pass/fail + msg = "experiment: " + experimentsDetails.ExperimentName + ", Result: " + string(resultDetails.Verdict) + reason := types.PassVerdict + eventType := "Normal" + if resultDetails.Verdict != "Pass" { + reason = types.FailVerdict + eventType = "Warning" + } + types.SetResultEventAttributes(&eventsDetails, reason, msg, eventType, &resultDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + + if experimentsDetails.EngineName != "" { + msg := experimentsDetails.ExperimentName + " experiment has been " + string(resultDetails.Verdict) + "ed" + types.SetEngineEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } +} diff --git a/experiments/spring-boot/spring-boot-app-kill/rbac.yaml b/experiments/spring-boot/spring-boot-app-kill/rbac.yaml new file mode 100644 index 000000000..6107575f1 --- /dev/null +++ b/experiments/spring-boot/spring-boot-app-kill/rbac.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: spring-boot-app-kill-sa + namespace: podtato + labels: + name: spring-boot-app-kill-sa +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: spring-boot-app-kill-sa + labels: + name: spring-boot-app-kill-sa +rules: + - apiGroups: [ "","litmuschaos.io","batch","apps" ] + resources: [ "pods","deployments","pods/log","events","jobs","pods/exec","statefulsets","configmaps","chaosengines","chaosexperiments","chaosresults" ] + verbs: [ "create","list","get","patch","delete","update" ] + - apiGroups: [ "" ] + resources: [ "nodes" ] + verbs: [ "get","list" ] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: spring-boot-app-kill-sa + labels: + name: spring-boot-app-kill-sa +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spring-boot-app-kill-sa +subjects: + - kind: ServiceAccount + name: spring-boot-app-kill-sa + namespace: podtato diff --git a/experiments/spring-boot/spring-boot-app-kill/test/test.yml b/experiments/spring-boot/spring-boot-app-kill/test/test.yml new file mode 100644 index 000000000..acb540662 --- /dev/null +++ b/experiments/spring-boot/spring-boot-app-kill/test/test.yml @@ -0,0 +1,84 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litmus-experiment +spec: + replicas: 1 + selector: + matchLabels: + app: litmus-experiment + template: + metadata: + labels: + app: litmus-experiment + spec: + serviceAccountName: spring-boot-app-kill-sa + containers: + - name: gotest + image: litmusgodev:latest + imagePullPolicy: IfNotPresent + command: + - sleep + - "3600" + env: + # provide application namespace + - name: APP_NAMESPACE + value: 'podtato' + - name: EXPERIMENT_NAME + value: 'spring-boot-app-kill' + # provide application labels + - name: APP_LABEL + value: 'app=spring-boot-demo' + - name: SEQUENCE + value: 'serial' + # provide application kind + - name: APP_KIND + value: 'deployment' + # provide the chaos namespace + - name: CHAOS_NAMESPACE + value: 'podtato' + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: CHAOS_SERVICE_ACCOUNT + valueFrom: + fieldRef: + fieldPath: spec.serviceAccountName + + - name: TOTAL_CHAOS_DURATION + value: '600' + # provide auxiliary application details - namespace and labels of the applications + # sample input is - "ns1:app=percona,ns2:name=nginx" + - name: AUXILIARY_APPINFO + value: '' + ## Period to wait before injection of chaos in sec + - name: RAMP_TIME + value: '' + ## env var that describes the library used to execute the chaos + ## default: litmus. Supported values: litmus, powerfulseal, chaoskube + - name: LIB + value: 'litmus' + + # Chaos Monkey Spring Boot configuration + # Level determines at which frequency the assault happens on the application. For a value N, the assaults happens every N requests + - name: CM_LEVEL + value: '1' + + # Whether the level should be used as a deterministic value (attack every x requests) or a chance (on average, 1 in x requests will be attacked) + - name: CM_DETERMINISTIC + value: 'true' + + # Comma separated list of watched Java services. Ex: com.example.application.controller.HelloController.sayHelle + - name: CM_WATCHED_CUSTOM_SERVICES + value: '' + + # Comma separated list of watchers. Possible values: controller, restController, service, repository, component, restTemplate, webClient, actuatorHealth + - name: CM_WATCHERS + value: 'restController' + + # AppKiller assault active + # Memory cron. Cron expression like */1 * * * * ? can be set to enable chaos monkey AppKiller assault on a schedule + - name: CM_KILL_APPLICATION_CRON + value: 'OFF' diff --git a/experiments/spring-boot/spring-boot-cpu-stress/README.md b/experiments/spring-boot/spring-boot-cpu-stress/README.md new file mode 100644 index 000000000..3b412a204 --- /dev/null +++ b/experiments/spring-boot/spring-boot-cpu-stress/README.md @@ -0,0 +1,15 @@ +## Experiment Metadata + + + + + + + + + + + + +
Name Description Documentation Link
Spring Boot CPU Stress This experiment allows injecting Chaos Monkey cpu stress assaults on Spring Boot applications, which have the [Chaos Monkey for Spring Boot](https://codecentric.github.io/chaos-monkey-spring-boot/) in their classpath. It can target random pods with a Spring Boot application and allows configuring the assaults to inject CPU stress. It tests the resiliency of the system when some applications are having unexpected faulty behavior. TODO
+ diff --git a/experiments/spring-boot/spring-boot-cpu-stress/experiment/spring-boot-cpu-stress.go b/experiments/spring-boot/spring-boot-cpu-stress/experiment/spring-boot-cpu-stress.go new file mode 100644 index 000000000..b88e55d9c --- /dev/null +++ b/experiments/spring-boot/spring-boot-cpu-stress/experiment/spring-boot-cpu-stress.go @@ -0,0 +1,214 @@ +package experiment + +import ( + "os" + + "github.com/litmuschaos/chaos-operator/api/litmuschaos/v1alpha1" + litmusLIB "github.com/litmuschaos/litmus-go/chaoslib/litmus/spring-boot-chaos/lib" + "github.com/litmuschaos/litmus-go/pkg/clients" + "github.com/litmuschaos/litmus-go/pkg/events" + "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/probe" + "github.com/litmuschaos/litmus-go/pkg/result" + experimentEnv "github.com/litmuschaos/litmus-go/pkg/spring-boot/spring-boot-chaos/environment" + experimentTypes "github.com/litmuschaos/litmus-go/pkg/spring-boot/spring-boot-chaos/types" + "github.com/litmuschaos/litmus-go/pkg/status" + "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" + "github.com/sirupsen/logrus" +) + +// Experiment contains steps to inject chaos +func Experiment(clients clients.ClientSets) { + + experimentsDetails := experimentTypes.ExperimentDetails{} + resultDetails := types.ResultDetails{} + eventsDetails := types.EventDetails{} + chaosDetails := types.ChaosDetails{} + + //Fetching all the ENV passed from the runner pod + log.Infof("[PreReq]: Getting the ENV for the %v experiment", os.Getenv("EXPERIMENT_NAME")) + experimentEnv.GetENV(&experimentsDetails, "spring-boot-cpu-stress") + + // Initialize the chaos attributes + types.InitialiseChaosVariables(&chaosDetails) + + // Initialize Chaos Result Parameters + types.SetResultAttributes(&resultDetails, chaosDetails) + + if experimentsDetails.EngineName != "" { + // Initialize the probe details. Bail out upon error, as we haven't entered exp business logic yet + if err := probe.InitializeProbesInChaosResultDetails(&chaosDetails, clients, &resultDetails); err != nil { + log.Errorf("Unable to initialize the probes, err: %v", err) + return + } + } + + //Updating the chaos result in the beginning of experiment + log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) + if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { + log.Errorf("Unable to Create the Chaos Result, err: %v", err) + failStep := "[pre-chaos]: Failed to update the chaos result of spring-boot-chaos experiment (SOT), err: " + err.Error() + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + // Set the chaos result uid + _ = result.SetResultUID(&resultDetails, clients, &chaosDetails) + + // generating the event in chaosResult to mark the verdict as awaited + msg := "experiment: " + experimentsDetails.ExperimentName + ", Result: Awaited" + types.SetResultEventAttributes(&eventsDetails, types.AwaitedVerdict, msg, "Normal", &resultDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + + //DISPLAY THE APP INFORMATION + log.InfoWithValues("[Info]: The application information is as follows", logrus.Fields{ + "Namespace": experimentsDetails.AppNS, + "Label": experimentsDetails.AppLabel, + "Chaos Duration": experimentsDetails.ChaosDuration, + }) + + // Calling AbortWatcher go routine, it will continuously watch for the abort signal and generate the required events and result + go common.AbortWatcherWithoutExit(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) + + // Select targeted pods + log.Infof("[PreCheck]: Getting targeted pods list") + if err := litmusLIB.SetTargetPodList(&experimentsDetails, clients, &chaosDetails); err != nil { + log.Errorf("Failed to get target pod list, err: %v", err) + failStep := "[pre-chaos]: Failed to get pod list, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "Pods: Not Found", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + podNames := make([]string, 0, 1) + for _, pod := range experimentsDetails.TargetPodList.Items { + podNames = append(podNames, pod.Name) + } + log.Infof("[PreCheck]: Target pods list for chaos, %v", podNames) + + // Check if the targeted pods have the chaos monkey endpoint + log.Infof("[PreCheck]: Checking for ChaosMonkey endpoint in target pods") + if _, err := litmusLIB.CheckChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.TargetPodList); err != nil { + log.Errorf("Some target pods don't have the chaos monkey endpoint, err: %v", err) + failStep := "[pre-chaos]: Some target pods don't have the chaos monkey endpoint, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "ChaosMonkey: Not Found", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + //PRE-CHAOS APPLICATION STATUS CHECK + if chaosDetails.DefaultHealthCheck { + log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") + if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { + log.Errorf("Application status check failed, err: %v", err) + failStep := "[pre-chaos]: Failed to verify that the AUT (Application Under Test) is in running state, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + } + + if experimentsDetails.EngineName != "" { + // marking AUT as running, as we already checked the status of application under test + msg := "AUT: Running" + + // run the probes in the pre-chaos check + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { + log.Errorf("Probe Failed, err: %v", err) + failStep := "[pre-chaos]: Failed while running probes, err: " + err.Error() + msg := "AUT: Running, Probes: Unsuccessful" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + msg = "AUT: Running, Probes: Successful" + } + // generating the events for the pre-chaos check + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + // Including the litmus lib + switch experimentsDetails.ChaosLib { + case "litmus": + if err := litmusLIB.PrepareChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "[chaos]: Failed inside the chaoslib, err: " + err.Error() + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + default: + log.Error("[Invalid]: Please Provide the correct LIB") + failStep := "[chaos]: no match found for specified lib" + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + log.Infof("[Confirmation]: %v chaos has been injected successfully", experimentsDetails.ExperimentName) + resultDetails.Verdict = v1alpha1.ResultVerdictPassed + + // POST-CHAOS APPLICATION STATUS CHECK + if chaosDetails.DefaultHealthCheck { + log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") + if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { + log.Errorf("Application status check failed, err: %v", err) + failStep := "[post-chaos]: Failed to verify that the AUT (Application Under Test) is running, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + } + + if experimentsDetails.EngineName != "" { + // marking AUT as running, as we already checked the status of application under test + msg := "AUT: Running" + + // run the probes in the post-chaos check + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { + log.Errorf("Probes Failed, err: %v", err) + failStep := "[post-chaos]: Failed while running probes, err: " + err.Error() + msg := "AUT: Running, Probes: Unsuccessful" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + msg = "AUT: Running, Probes: Successful" + } + + // generating post chaos event + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + //Updating the chaosResult in the end of experiment + log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) + if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { + log.Errorf("Unable to Update the Chaos Result, err: %v", err) + return + } + + // generating the event in chaosResult to mark the verdict as pass/fail + msg = "experiment: " + experimentsDetails.ExperimentName + ", Result: " + string(resultDetails.Verdict) + reason := types.PassVerdict + eventType := "Normal" + if resultDetails.Verdict != "Pass" { + reason = types.FailVerdict + eventType = "Warning" + } + types.SetResultEventAttributes(&eventsDetails, reason, msg, eventType, &resultDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + + if experimentsDetails.EngineName != "" { + msg := experimentsDetails.ExperimentName + " experiment has been " + string(resultDetails.Verdict) + "ed" + types.SetEngineEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } +} diff --git a/experiments/spring-boot/spring-boot-cpu-stress/rbac.yaml b/experiments/spring-boot/spring-boot-cpu-stress/rbac.yaml new file mode 100644 index 000000000..a15f6a9d4 --- /dev/null +++ b/experiments/spring-boot/spring-boot-cpu-stress/rbac.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: spring-boot-cpu-stress-sa + namespace: podtato + labels: + name: spring-boot-cpu-stress-sa +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: spring-boot-cpu-stress-sa + labels: + name: spring-boot-cpu-stress-sa +rules: + - apiGroups: [ "","litmuschaos.io","batch","apps" ] + resources: [ "pods","deployments","pods/log","events","jobs","pods/exec","statefulsets","configmaps","chaosengines","chaosexperiments","chaosresults" ] + verbs: [ "create","list","get","patch","delete","update" ] + - apiGroups: [ "" ] + resources: [ "nodes" ] + verbs: [ "get","list" ] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: spring-boot-cpu-stress-sa + labels: + name: spring-boot-cpu-stress-sa +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spring-boot-cpu-stress-sa +subjects: + - kind: ServiceAccount + name: spring-boot-cpu-stress-sa + namespace: podtato diff --git a/experiments/spring-boot/spring-boot-cpu-stress/test/test.yml b/experiments/spring-boot/spring-boot-cpu-stress/test/test.yml new file mode 100644 index 000000000..9ccf1f719 --- /dev/null +++ b/experiments/spring-boot/spring-boot-cpu-stress/test/test.yml @@ -0,0 +1,91 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litmus-experiment +spec: + replicas: 1 + selector: + matchLabels: + app: litmus-experiment + template: + metadata: + labels: + app: litmus-experiment + spec: + serviceAccountName: spring-boot-cpu-stress-sa + containers: + - name: gotest + image: litmusgodev:latest + imagePullPolicy: IfNotPresent + command: + - sleep + - "3600" + env: + # provide application namespace + - name: APP_NAMESPACE + value: 'podtato' + - name: EXPERIMENT_NAME + value: 'spring-boot-cpu-stress' + # provide application labels + - name: APP_LABEL + value: 'app=spring-boot-demo' + - name: SEQUENCE + value: 'serial' + # provide application kind + - name: APP_KIND + value: 'deployment' + # provide the chaos namespace + - name: CHAOS_NAMESPACE + value: 'podtato' + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: CHAOS_SERVICE_ACCOUNT + valueFrom: + fieldRef: + fieldPath: spec.serviceAccountName + + - name: TOTAL_CHAOS_DURATION + value: '600' + # provide auxiliary application details - namespace and labels of the applications + # sample input is - "ns1:app=percona,ns2:name=nginx" + - name: AUXILIARY_APPINFO + value: '' + ## Period to wait before injection of chaos in sec + - name: RAMP_TIME + value: '' + ## env var that describes the library used to execute the chaos + ## default: litmus. Supported values: litmus, powerfulseal, chaoskube + - name: LIB + value: 'litmus' + + # Chaos Monkey Spring Boot configuration + # Level determines at which frequency the assault happens on the application. For a value N, the assaults happens every N requests + - name: CM_LEVEL + value: '1' + + # Whether the level should be used as a deterministic value (attack every x requests) or a chance (on average, 1 in x requests will be attacked) + - name: CM_DETERMINISTIC + value: 'true' + + # Comma separated list of watched Java services. Ex: com.example.application.controller.HelloController.sayHelle + - name: CM_WATCHED_CUSTOM_SERVICES + value: '' + + # Comma separated list of watchers. Possible values: controller, restController, service, repository, component, restTemplate, webClient, actuatorHealth + - name: CM_WATCHERS + value: 'restController' + + # Duration to assault cpu when requested load is reached in ms. + - name: CM_CPU_MS_HOLD_LOAD + value: '90000' + # Final fraction of used cpu by assault. 0.95 equals 95 %. + - name: CM_CPU_LOAD_TARGET_FRACTION + value: '0.9' + # CPU cron. Cron expression like */1 * * * * ? can be set to enable chaos monkey cpu assault on a schedule + - name: CM_CPU_CRON + value: 'OFF' + + diff --git a/experiments/spring-boot/spring-boot-exceptions/README.md b/experiments/spring-boot/spring-boot-exceptions/README.md new file mode 100644 index 000000000..985799bcf --- /dev/null +++ b/experiments/spring-boot/spring-boot-exceptions/README.md @@ -0,0 +1,15 @@ +## Experiment Metadata + + + + + + + + + + + + +
Name Description Documentation Link
Spring Boot Exceptions This experiment allows injecting Chaos Monkey exceptions assaults on Spring Boot applications, which have the [Chaos Monkey for Spring Boot](https://codecentric.github.io/chaos-monkey-spring-boot/) in their classpath. It can target random pods with a Spring Boot application and allows configuring the assaults to inject exception. It tests the resiliency of the system when some applications are having unexpected faulty behavior. TODO
+ diff --git a/experiments/spring-boot/spring-boot-exceptions/experiment/spring-boot-exceptions.go b/experiments/spring-boot/spring-boot-exceptions/experiment/spring-boot-exceptions.go new file mode 100644 index 000000000..8a6e0794a --- /dev/null +++ b/experiments/spring-boot/spring-boot-exceptions/experiment/spring-boot-exceptions.go @@ -0,0 +1,214 @@ +package experiment + +import ( + "os" + + "github.com/litmuschaos/chaos-operator/api/litmuschaos/v1alpha1" + litmusLIB "github.com/litmuschaos/litmus-go/chaoslib/litmus/spring-boot-chaos/lib" + "github.com/litmuschaos/litmus-go/pkg/clients" + "github.com/litmuschaos/litmus-go/pkg/events" + "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/probe" + "github.com/litmuschaos/litmus-go/pkg/result" + experimentEnv "github.com/litmuschaos/litmus-go/pkg/spring-boot/spring-boot-chaos/environment" + experimentTypes "github.com/litmuschaos/litmus-go/pkg/spring-boot/spring-boot-chaos/types" + "github.com/litmuschaos/litmus-go/pkg/status" + "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" + "github.com/sirupsen/logrus" +) + +// Experiment contains steps to inject chaos +func Experiment(clients clients.ClientSets) { + + experimentsDetails := experimentTypes.ExperimentDetails{} + resultDetails := types.ResultDetails{} + eventsDetails := types.EventDetails{} + chaosDetails := types.ChaosDetails{} + + //Fetching all the ENV passed from the runner pod + log.Infof("[PreReq]: Getting the ENV for the %v experiment", os.Getenv("EXPERIMENT_NAME")) + experimentEnv.GetENV(&experimentsDetails, "spring-boot-exceptions") + + // Initialize the chaos attributes + types.InitialiseChaosVariables(&chaosDetails) + + // Initialize Chaos Result Parameters + types.SetResultAttributes(&resultDetails, chaosDetails) + + if experimentsDetails.EngineName != "" { + // Initialize the probe details. Bail out upon error, as we haven't entered exp business logic yet + if err := probe.InitializeProbesInChaosResultDetails(&chaosDetails, clients, &resultDetails); err != nil { + log.Errorf("Unable to initialize the probes, err: %v", err) + return + } + } + + //Updating the chaos result in the beginning of experiment + log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) + if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { + log.Errorf("Unable to Create the Chaos Result, err: %v", err) + failStep := "[pre-chaos]: Failed to update the chaos result of spring-boot-chaos experiment (SOT), err: " + err.Error() + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + // Set the chaos result uid + _ = result.SetResultUID(&resultDetails, clients, &chaosDetails) + + // generating the event in chaosResult to mark the verdict as awaited + msg := "experiment: " + experimentsDetails.ExperimentName + ", Result: Awaited" + types.SetResultEventAttributes(&eventsDetails, types.AwaitedVerdict, msg, "Normal", &resultDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + + //DISPLAY THE APP INFORMATION + log.InfoWithValues("[Info]: The application information is as follows", logrus.Fields{ + "Namespace": experimentsDetails.AppNS, + "Label": experimentsDetails.AppLabel, + "Chaos Duration": experimentsDetails.ChaosDuration, + }) + + // Calling AbortWatcher go routine, it will continuously watch for the abort signal and generate the required events and result + go common.AbortWatcherWithoutExit(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) + + // Select targeted pods + log.Infof("[PreCheck]: Geting targeted pods list") + if err := litmusLIB.SetTargetPodList(&experimentsDetails, clients, &chaosDetails); err != nil { + log.Errorf("Failed to get target pod list, err: %v", err) + failStep := "[pre-chaos]: Failed to get pod list, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "Pods: Not Found", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + podNames := make([]string, 0, 1) + for _, pod := range experimentsDetails.TargetPodList.Items { + podNames = append(podNames, pod.Name) + } + log.Infof("[PreCheck]: Target pods list for chaos, %v", podNames) + + // Check if the targeted pods have the chaos monkey endpoint + log.Infof("[PreCheck]: Checking for ChaosMonkey endpoint in target pods") + if _, err := litmusLIB.CheckChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.TargetPodList); err != nil { + log.Errorf("Some target pods don't have the chaos monkey endpoint, err: %v", err) + failStep := "[pre-chaos]: Some target pods don't have the chaos monkey endpoint, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "ChaosMonkey: Not Found", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + //PRE-CHAOS APPLICATION STATUS CHECK + if chaosDetails.DefaultHealthCheck { + log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") + if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { + log.Errorf("Application status check failed, err: %v", err) + failStep := "[pre-chaos]: Failed to verify that the AUT (Application Under Test) is in running state, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + } + + if experimentsDetails.EngineName != "" { + // marking AUT as running, as we already checked the status of application under test + msg := "AUT: Running" + + // run the probes in the pre-chaos check + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { + log.Errorf("Probe Failed, err: %v", err) + failStep := "[pre-chaos]: Failed while running probes, err: " + err.Error() + msg := "AUT: Running, Probes: Unsuccessful" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + msg = "AUT: Running, Probes: Successful" + } + // generating the events for the pre-chaos check + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + // Including the litmus lib + switch experimentsDetails.ChaosLib { + case "litmus": + if err := litmusLIB.PrepareChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "[chaos]: Failed inside the chaoslib, err: " + err.Error() + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + default: + log.Error("[Invalid]: Please Provide the correct LIB") + failStep := "[chaos]: no match found for specified lib" + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + log.Infof("[Confirmation]: %v chaos has been injected successfully", experimentsDetails.ExperimentName) + resultDetails.Verdict = v1alpha1.ResultVerdictPassed + + // POST-CHAOS APPLICATION STATUS CHECK + if chaosDetails.DefaultHealthCheck { + log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") + if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { + log.Errorf("Application status check failed, err: %v", err) + failStep := "[post-chaos]: Failed to verify that the AUT (Application Under Test) is running, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + } + + if experimentsDetails.EngineName != "" { + // marking AUT as running, as we already checked the status of application under test + msg := "AUT: Running" + + // run the probes in the post-chaos check + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { + log.Errorf("Probes Failed, err: %v", err) + failStep := "[post-chaos]: Failed while running probes, err: " + err.Error() + msg := "AUT: Running, Probes: Unsuccessful" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + msg = "AUT: Running, Probes: Successful" + } + + // generating post chaos event + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + //Updating the chaosResult in the end of experiment + log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) + if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { + log.Errorf("Unable to Update the Chaos Result, err: %v", err) + return + } + + // generating the event in chaosResult to mark the verdict as pass/fail + msg = "experiment: " + experimentsDetails.ExperimentName + ", Result: " + string(resultDetails.Verdict) + reason := types.PassVerdict + eventType := "Normal" + if resultDetails.Verdict != "Pass" { + reason = types.FailVerdict + eventType = "Warning" + } + types.SetResultEventAttributes(&eventsDetails, reason, msg, eventType, &resultDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + + if experimentsDetails.EngineName != "" { + msg := experimentsDetails.ExperimentName + " experiment has been " + string(resultDetails.Verdict) + "ed" + types.SetEngineEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } +} diff --git a/experiments/spring-boot/spring-boot-exceptions/rbac.yaml b/experiments/spring-boot/spring-boot-exceptions/rbac.yaml new file mode 100644 index 000000000..91c0db49e --- /dev/null +++ b/experiments/spring-boot/spring-boot-exceptions/rbac.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: spring-boot-exceptions-sa + namespace: podtato + labels: + name: spring-boot-exceptions-sa +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: spring-boot-exceptions-sa + labels: + name: spring-boot-exceptions-sa +rules: + - apiGroups: [ "","litmuschaos.io","batch","apps" ] + resources: [ "pods","deployments","pods/log","events","jobs","pods/exec","statefulsets","configmaps","chaosengines","chaosexperiments","chaosresults" ] + verbs: [ "create","list","get","patch","delete","update" ] + - apiGroups: [ "" ] + resources: [ "nodes" ] + verbs: [ "get","list" ] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: spring-boot-exceptions-sa + labels: + name: spring-boot-exceptions-sa +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spring-boot-exceptions-sa +subjects: + - kind: ServiceAccount + name: spring-boot-exceptions-sa + namespace: podtato diff --git a/experiments/spring-boot/spring-boot-exceptions/test/test.yml b/experiments/spring-boot/spring-boot-exceptions/test/test.yml new file mode 100644 index 000000000..afd95d939 --- /dev/null +++ b/experiments/spring-boot/spring-boot-exceptions/test/test.yml @@ -0,0 +1,86 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litmus-experiment +spec: + replicas: 1 + selector: + matchLabels: + app: litmus-experiment + template: + metadata: + labels: + app: litmus-experiment + spec: + serviceAccountName: spring-boot-exceptions-sa + containers: + - name: gotest + image: litmusgodev:latest + imagePullPolicy: IfNotPresent + command: + - sleep + - "3600" + env: + # provide application namespace + - name: APP_NAMESPACE + value: 'podtato' + - name: EXPERIMENT_NAME + value: 'spring-boot-exceptions' + # provide application labels + - name: APP_LABEL + value: 'app=spring-boot-demo' + - name: SEQUENCE + value: 'serial' + # provide application kind + - name: APP_KIND + value: 'deployment' + # provide the chaos namespace + - name: CHAOS_NAMESPACE + value: 'podtato' + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: CHAOS_SERVICE_ACCOUNT + valueFrom: + fieldRef: + fieldPath: spec.serviceAccountName + + - name: TOTAL_CHAOS_DURATION + value: '600' + # provide auxiliary application details - namespace and labels of the applications + # sample input is - "ns1:app=percona,ns2:name=nginx" + - name: AUXILIARY_APPINFO + value: '' + ## Period to wait before injection of chaos in sec + - name: RAMP_TIME + value: '' + ## env var that describes the library used to execute the chaos + ## default: litmus. Supported values: litmus, powerfulseal, chaoskube + - name: LIB + value: 'litmus' + + # Chaos Monkey Spring Boot configuration + # Level determines at which frequency the assault happens on the application. For a value N, the assaults happens every N requests + - name: CM_LEVEL + value: '1' + + # Whether the level should be used as a deterministic value (attack every x requests) or a chance (on average, 1 in x requests will be attacked) + - name: CM_DETERMINISTIC + value: 'true' + + # Comma separated list of watched Java services. Ex: com.example.application.controller.HelloController.sayHelle + - name: CM_WATCHED_CUSTOM_SERVICES + value: '' + + # Comma separated list of watchers. Possible values: controller, restController, service, repository, component, restTemplate, webClient, actuatorHealth + - name: CM_WATCHERS + value: 'restController' + + # Type of raised exception + - name: CM_EXCEPTIONS_TYPE + value: 'java.lang.IllegalArgumentException' + # Argument of raised exception + - name: CM_EXCEPTIONS_ARGUMENTS + value: 'java.lang.String:custom illegal argument exception' diff --git a/experiments/spring-boot/spring-boot-latency/README.md b/experiments/spring-boot/spring-boot-latency/README.md new file mode 100644 index 000000000..aba5b37f7 --- /dev/null +++ b/experiments/spring-boot/spring-boot-latency/README.md @@ -0,0 +1,15 @@ +## Experiment Metadata + + + + + + + + + + + + +
Name Description Documentation Link
Spring Boot Latency This experiment allows injecting Chaos Monkey network latency assaults on Spring Boot applications, which have the [Chaos Monkey for Spring Boot](https://codecentric.github.io/chaos-monkey-spring-boot/) in their classpath. It can target random pods with a Spring Boot application and allows configuring the assaults to inject latency. It tests the resiliency of the system when some applications are having unexpected faulty behavior. TODO
+ diff --git a/experiments/spring-boot/spring-boot-chaos/experiment/spring-boot-chaos.go b/experiments/spring-boot/spring-boot-latency/experiment/spring-boot-latency.go similarity index 99% rename from experiments/spring-boot/spring-boot-chaos/experiment/spring-boot-chaos.go rename to experiments/spring-boot/spring-boot-latency/experiment/spring-boot-latency.go index 50e0f6219..2e280ecea 100644 --- a/experiments/spring-boot/spring-boot-chaos/experiment/spring-boot-chaos.go +++ b/experiments/spring-boot/spring-boot-latency/experiment/spring-boot-latency.go @@ -28,7 +28,7 @@ func Experiment(clients clients.ClientSets) { //Fetching all the ENV passed from the runner pod log.Infof("[PreReq]: Getting the ENV for the %v experiment", os.Getenv("EXPERIMENT_NAME")) - experimentEnv.GetENV(&experimentsDetails) + experimentEnv.GetENV(&experimentsDetails, "spring-boot-latency") // Initialize the chaos attributes types.InitialiseChaosVariables(&chaosDetails) diff --git a/experiments/spring-boot/rbac.yaml b/experiments/spring-boot/spring-boot-latency/rbac.yaml similarity index 73% rename from experiments/spring-boot/rbac.yaml rename to experiments/spring-boot/spring-boot-latency/rbac.yaml index 924d97246..e342e5059 100644 --- a/experiments/spring-boot/rbac.yaml +++ b/experiments/spring-boot/spring-boot-latency/rbac.yaml @@ -1,17 +1,17 @@ apiVersion: v1 kind: ServiceAccount metadata: - name: spring-boot-chaos-sa + name: spring-boot-latency-sa namespace: podtato labels: - name: spring-boot-chaos-sa + name: spring-boot-latency-sa --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - name: spring-boot-chaos-sa + name: spring-boot-latency-sa labels: - name: spring-boot-chaos-sa + name: spring-boot-latency-sa rules: - apiGroups: [ "","litmuschaos.io","batch","apps" ] resources: [ "pods","deployments","pods/log","events","jobs","pods/exec","statefulsets","configmaps","chaosengines","chaosexperiments","chaosresults" ] @@ -23,14 +23,14 @@ rules: apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: - name: spring-boot-chaos-sa + name: spring-boot-latency-sa labels: - name: spring-boot-chaos-sa + name: spring-boot-latency-sa roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: spring-boot-chaos-sa + name: spring-boot-latency-sa subjects: - kind: ServiceAccount - name: spring-boot-chaos-sa + name: spring-boot-latency-sa namespace: podtato diff --git a/experiments/spring-boot/spring-boot-latency/test/test.yml b/experiments/spring-boot/spring-boot-latency/test/test.yml new file mode 100644 index 000000000..42f4bc18e --- /dev/null +++ b/experiments/spring-boot/spring-boot-latency/test/test.yml @@ -0,0 +1,86 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litmus-experiment +spec: + replicas: 1 + selector: + matchLabels: + app: litmus-experiment + template: + metadata: + labels: + app: litmus-experiment + spec: + serviceAccountName: spring-boot-latency-sa + containers: + - name: gotest + image: litmusgodev:latest + imagePullPolicy: IfNotPresent + command: + - sleep + - "3600" + env: + # provide application namespace + - name: APP_NAMESPACE + value: 'podtato' + - name: EXPERIMENT_NAME + value: 'spring-boot-latency' + # provide application labels + - name: APP_LABEL + value: 'app=spring-boot-demo' + - name: SEQUENCE + value: 'serial' + # provide application kind + - name: APP_KIND + value: 'deployment' + # provide the chaos namespace + - name: CHAOS_NAMESPACE + value: 'podtato' + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: CHAOS_SERVICE_ACCOUNT + valueFrom: + fieldRef: + fieldPath: spec.serviceAccountName + + - name: TOTAL_CHAOS_DURATION + value: '600' + # provide auxiliary application details - namespace and labels of the applications + # sample input is - "ns1:app=percona,ns2:name=nginx" + - name: AUXILIARY_APPINFO + value: '' + ## Period to wait before injection of chaos in sec + - name: RAMP_TIME + value: '' + ## env var that describes the library used to execute the chaos + ## default: litmus. Supported values: litmus, powerfulseal, chaoskube + - name: LIB + value: 'litmus' + + # Chaos Monkey Spring Boot configuration + # Level determines at which frequency the assault happens on the application. For a value N, the assaults happens every N requests + - name: CM_LEVEL + value: '1' + + # Whether the level should be used as a deterministic value (attack every x requests) or a chance (on average, 1 in x requests will be attacked) + - name: CM_DETERMINISTIC + value: 'true' + + # Comma separated list of watched Java services. Ex: com.example.application.controller.HelloController.sayHelle + - name: CM_WATCHED_CUSTOM_SERVICES + value: '' + + # Comma separated list of watchers. Possible values: controller, restController, service, repository, component, restTemplate, webClient, actuatorHealth + - name: CM_WATCHERS + value: 'restController' + + # Minimum latency (ms) + - name: CM_LATENCY_RANGE_START + value: '500' + # Maxiumu latency (ms) + - name: CM_LATENCY_RANGE_END + value: '500' diff --git a/experiments/spring-boot/spring-boot-memory-stress/README.md b/experiments/spring-boot/spring-boot-memory-stress/README.md new file mode 100644 index 000000000..ab47ce6c3 --- /dev/null +++ b/experiments/spring-boot/spring-boot-memory-stress/README.md @@ -0,0 +1,15 @@ +## Experiment Metadata + + + + + + + + + + + + +
Name Description Documentation Link
Spring Boot Memory Stress This experiment allows injecting Chaos Monkey memory-stress assaults on Spring Boot applications, which have the [Chaos Monkey for Spring Boot](https://codecentric.github.io/chaos-monkey-spring-boot/) in their classpath. It can target random pods with a Spring Boot application and allows configuring the assaults to inject memory-stress. It tests the resiliency of the system when some applications are having unexpected faulty behavior. TODO
+ diff --git a/experiments/spring-boot/spring-boot-memory-stress/experiment/spring-boot-memory-stress.go b/experiments/spring-boot/spring-boot-memory-stress/experiment/spring-boot-memory-stress.go new file mode 100644 index 000000000..f5e9d6e0d --- /dev/null +++ b/experiments/spring-boot/spring-boot-memory-stress/experiment/spring-boot-memory-stress.go @@ -0,0 +1,214 @@ +package experiment + +import ( + "os" + + "github.com/litmuschaos/chaos-operator/api/litmuschaos/v1alpha1" + litmusLIB "github.com/litmuschaos/litmus-go/chaoslib/litmus/spring-boot-chaos/lib" + "github.com/litmuschaos/litmus-go/pkg/clients" + "github.com/litmuschaos/litmus-go/pkg/events" + "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/probe" + "github.com/litmuschaos/litmus-go/pkg/result" + experimentEnv "github.com/litmuschaos/litmus-go/pkg/spring-boot/spring-boot-chaos/environment" + experimentTypes "github.com/litmuschaos/litmus-go/pkg/spring-boot/spring-boot-chaos/types" + "github.com/litmuschaos/litmus-go/pkg/status" + "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" + "github.com/sirupsen/logrus" +) + +// Experiment contains steps to inject chaos +func Experiment(clients clients.ClientSets) { + + experimentsDetails := experimentTypes.ExperimentDetails{} + resultDetails := types.ResultDetails{} + eventsDetails := types.EventDetails{} + chaosDetails := types.ChaosDetails{} + + //Fetching all the ENV passed from the runner pod + log.Infof("[PreReq]: Getting the ENV for the %v experiment", os.Getenv("EXPERIMENT_NAME")) + experimentEnv.GetENV(&experimentsDetails, "spring-boot-memory-stress") + + // Initialize the chaos attributes + types.InitialiseChaosVariables(&chaosDetails) + + // Initialize Chaos Result Parameters + types.SetResultAttributes(&resultDetails, chaosDetails) + + if experimentsDetails.EngineName != "" { + // Initialize the probe details. Bail out upon error, as we haven't entered exp business logic yet + if err := probe.InitializeProbesInChaosResultDetails(&chaosDetails, clients, &resultDetails); err != nil { + log.Errorf("Unable to initialize the probes, err: %v", err) + return + } + } + + //Updating the chaos result in the beginning of experiment + log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) + if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { + log.Errorf("Unable to Create the Chaos Result, err: %v", err) + failStep := "[pre-chaos]: Failed to update the chaos result of spring-boot-chaos experiment (SOT), err: " + err.Error() + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + // Set the chaos result uid + _ = result.SetResultUID(&resultDetails, clients, &chaosDetails) + + // generating the event in chaosResult to mark the verdict as awaited + msg := "experiment: " + experimentsDetails.ExperimentName + ", Result: Awaited" + types.SetResultEventAttributes(&eventsDetails, types.AwaitedVerdict, msg, "Normal", &resultDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + + //DISPLAY THE APP INFORMATION + log.InfoWithValues("[Info]: The application information is as follows", logrus.Fields{ + "Namespace": experimentsDetails.AppNS, + "Label": experimentsDetails.AppLabel, + "Chaos Duration": experimentsDetails.ChaosDuration, + }) + + // Calling AbortWatcher go routine, it will continuously watch for the abort signal and generate the required events and result + go common.AbortWatcherWithoutExit(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) + + // Select targeted pods + log.Infof("[PreCheck]: Geting targeted pods list") + if err := litmusLIB.SetTargetPodList(&experimentsDetails, clients, &chaosDetails); err != nil { + log.Errorf("Failed to get target pod list, err: %v", err) + failStep := "[pre-chaos]: Failed to get pod list, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "Pods: Not Found", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + podNames := make([]string, 0, 1) + for _, pod := range experimentsDetails.TargetPodList.Items { + podNames = append(podNames, pod.Name) + } + log.Infof("[PreCheck]: Target pods list for chaos, %v", podNames) + + // Check if the targeted pods have the chaos monkey endpoint + log.Infof("[PreCheck]: Checking for ChaosMonkey endpoint in target pods") + if _, err := litmusLIB.CheckChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.TargetPodList); err != nil { + log.Errorf("Some target pods don't have the chaos monkey endpoint, err: %v", err) + failStep := "[pre-chaos]: Some target pods don't have the chaos monkey endpoint, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "ChaosMonkey: Not Found", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + //PRE-CHAOS APPLICATION STATUS CHECK + if chaosDetails.DefaultHealthCheck { + log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") + if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { + log.Errorf("Application status check failed, err: %v", err) + failStep := "[pre-chaos]: Failed to verify that the AUT (Application Under Test) is in running state, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + } + + if experimentsDetails.EngineName != "" { + // marking AUT as running, as we already checked the status of application under test + msg := "AUT: Running" + + // run the probes in the pre-chaos check + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { + log.Errorf("Probe Failed, err: %v", err) + failStep := "[pre-chaos]: Failed while running probes, err: " + err.Error() + msg := "AUT: Running, Probes: Unsuccessful" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + msg = "AUT: Running, Probes: Successful" + } + // generating the events for the pre-chaos check + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + // Including the litmus lib + switch experimentsDetails.ChaosLib { + case "litmus": + if err := litmusLIB.PrepareChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "[chaos]: Failed inside the chaoslib, err: " + err.Error() + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + default: + log.Error("[Invalid]: Please Provide the correct LIB") + failStep := "[chaos]: no match found for specified lib" + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + log.Infof("[Confirmation]: %v chaos has been injected successfully", experimentsDetails.ExperimentName) + resultDetails.Verdict = v1alpha1.ResultVerdictPassed + + // POST-CHAOS APPLICATION STATUS CHECK + if chaosDetails.DefaultHealthCheck { + log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") + if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { + log.Errorf("Application status check failed, err: %v", err) + failStep := "[post-chaos]: Failed to verify that the AUT (Application Under Test) is running, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + } + + if experimentsDetails.EngineName != "" { + // marking AUT as running, as we already checked the status of application under test + msg := "AUT: Running" + + // run the probes in the post-chaos check + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { + log.Errorf("Probes Failed, err: %v", err) + failStep := "[post-chaos]: Failed while running probes, err: " + err.Error() + msg := "AUT: Running, Probes: Unsuccessful" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + msg = "AUT: Running, Probes: Successful" + } + + // generating post chaos event + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + //Updating the chaosResult in the end of experiment + log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) + if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { + log.Errorf("Unable to Update the Chaos Result, err: %v", err) + return + } + + // generating the event in chaosResult to mark the verdict as pass/fail + msg = "experiment: " + experimentsDetails.ExperimentName + ", Result: " + string(resultDetails.Verdict) + reason := types.PassVerdict + eventType := "Normal" + if resultDetails.Verdict != "Pass" { + reason = types.FailVerdict + eventType = "Warning" + } + types.SetResultEventAttributes(&eventsDetails, reason, msg, eventType, &resultDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + + if experimentsDetails.EngineName != "" { + msg := experimentsDetails.ExperimentName + " experiment has been " + string(resultDetails.Verdict) + "ed" + types.SetEngineEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } +} diff --git a/experiments/spring-boot/spring-boot-memory-stress/rbac.yaml b/experiments/spring-boot/spring-boot-memory-stress/rbac.yaml new file mode 100644 index 000000000..6c3aba806 --- /dev/null +++ b/experiments/spring-boot/spring-boot-memory-stress/rbac.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: spring-boot-memory-stress-sa + namespace: podtato + labels: + name: spring-boot-memory-stress-sa +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: spring-boot-memory-stress-sa + labels: + name: spring-boot-memory-stress-sa +rules: + - apiGroups: [ "","litmuschaos.io","batch","apps" ] + resources: [ "pods","deployments","pods/log","events","jobs","pods/exec","statefulsets","configmaps","chaosengines","chaosexperiments","chaosresults" ] + verbs: [ "create","list","get","patch","delete","update" ] + - apiGroups: [ "" ] + resources: [ "nodes" ] + verbs: [ "get","list" ] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: spring-boot-memory-stress-sa + labels: + name: spring-boot-memory-stress-sa +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spring-boot-memory-stress-sa +subjects: + - kind: ServiceAccount + name: spring-boot-memory-stress-sa + namespace: podtato diff --git a/experiments/spring-boot/spring-boot-chaos/test/test.yml b/experiments/spring-boot/spring-boot-memory-stress/test/test.yml similarity index 66% rename from experiments/spring-boot/spring-boot-chaos/test/test.yml rename to experiments/spring-boot/spring-boot-memory-stress/test/test.yml index 0aac7005e..a42465d1e 100644 --- a/experiments/spring-boot/spring-boot-chaos/test/test.yml +++ b/experiments/spring-boot/spring-boot-memory-stress/test/test.yml @@ -13,7 +13,7 @@ spec: labels: app: litmus-experiment spec: - serviceAccountName: spring-boot-chaos-sa + serviceAccountName: spring-boot-memory-stress-sa containers: - name: gotest image: litmusgodev:latest @@ -26,7 +26,7 @@ spec: - name: APP_NAMESPACE value: 'podtato' - name: EXPERIMENT_NAME - value: 'spring-boot-chaos' + value: 'spring-boot-memory-stress' # provide application labels - name: APP_LABEL value: 'app=spring-boot-demo' @@ -78,36 +78,6 @@ spec: - name: CM_WATCHERS value: 'restController' - # AppKiller assault active - - name: CM_KILL_APPLICATION_ACTIVE - value: 'false' - # Memory cron. Cron expression like */1 * * * * ? can be set to enable chaos monkey AppKiller assault on a schedule - - name: CM_KILL_APPLICATION_CRON - value: 'OFF' - - # Latency assault active - - name: CM_LATENCY_ACTIVE - value: 'true' - # Minimum latency (ms) - - name: CM_LATENCY_RANGE_START - value: '500' - # Maxiumu latency (ms) - - name: CM_LATENCY_RANGE_END - value: '500' - - # Exception assault active - - name: CM_EXCEPTIONS_ACTIVE - value: 'false' - # Type of raised exception - - name: CM_EXCEPTIONS_TYPE - value: 'java.lang.IllegalArgumentException' - # Argument of raised exception - - name: CM_EXCEPTIONS_ARGUMENTS - value: 'java.lang.String:custom illegal argument exception' - - # Memory assault active - - name: CM_MEMORY_ACTIVE - value: 'false' # Duration to assault memory when requested fill amount is reached in ms. - name: CM_MEMORY_MS_HOLD_FILLED_MEM value: '90000' @@ -123,18 +93,3 @@ spec: # Memory cron. Cron expression like */1 * * * * ? can be set to enable chaos monkey memory assault on a schedule - name: CM_MEMORY_CRON value: 'OFF' - - # CPU assault active - - name: CM_CPU_ACTIVE - value: 'false' - # Duration to assault cpu when requested load is reached in ms. - - name: CM_CPU_MS_HOLD_LOAD - value: '90000' - # Final fraction of used cpu by assault. 0.95 equals 95 %. - - name: CM_CPU_LOAD_TARGET_FRACTION - value: '0.9' - # CPU cron. Cron expression like */1 * * * * ? can be set to enable chaos monkey cpu assault on a schedule - - name: CM_CPU_CRON - value: 'OFF' - - diff --git a/pkg/spring-boot/spring-boot-chaos/environment/environment.go b/pkg/spring-boot/spring-boot-chaos/environment/environment.go index 52a4706a2..64fbb8935 100644 --- a/pkg/spring-boot/spring-boot-chaos/environment/environment.go +++ b/pkg/spring-boot/spring-boot-chaos/environment/environment.go @@ -1,6 +1,9 @@ package environment import ( + "encoding/json" + "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/sirupsen/logrus" "strconv" "strings" @@ -11,8 +14,8 @@ import ( ) // GetENV fetches all the env variables from the runner pod -func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { - experimentDetails.ExperimentName = types.Getenv("EXPERIMENT_NAME", "spring-boot-chaos") +func GetENV(experimentDetails *experimentTypes.ExperimentDetails, expName string) { + experimentDetails.ExperimentName = types.Getenv("EXPERIMENT_NAME", expName) experimentDetails.ChaosNamespace = types.Getenv("CHAOS_NAMESPACE", "litmus") experimentDetails.EngineName = types.Getenv("CHAOSENGINE", "") experimentDetails.ChaosDuration, _ = strconv.Atoi(types.Getenv("TOTAL_CHAOS_DURATION", "30")) @@ -36,62 +39,113 @@ func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { experimentDetails.ChaosMonkeyPath = types.Getenv("CM_PATH", "/actuator/chaosmonkey") experimentDetails.ChaosMonkeyPort = types.Getenv("CM_PORT", "8080") - // Basic assault parameters - assault := experimentTypes.ChaosMonkeyAssault{} - assault.Level, _ = strconv.Atoi(types.Getenv("CM_LEVEL", "1")) - assault.Deterministic, _ = strconv.ParseBool(types.Getenv("CM_DETERMINISTIC", "true")) - assault.WatchedCustomServices = strings.Split(types.Getenv("CM_WATCHED_CUSTOM_SERVICES", ""), ",") - - // kill application assault - assault.KillApplicationActive, _ = strconv.ParseBool(types.Getenv("CM_KILL_APPLICATION_ACTIVE", "false")) - assault.KillApplicationCron = types.Getenv("CM_KILL_APPLICATION_CRON", "OFF") - - // Latency assault - assault.LatencyActive, _ = strconv.ParseBool(types.Getenv("CM_LATENCY_ACTIVE", "false")) - assault.LatencyRangeStart, _ = strconv.Atoi(types.Getenv("CM_LATENCY_RANGE_START", "500")) - assault.LatencyRangeEnd, _ = strconv.Atoi(types.Getenv("CM_LATENCY_RANGE_END", "500")) - - // Memory assault - assault.MemoryActive, _ = strconv.ParseBool(types.Getenv("CM_MEMORY_ACTIVE", "false")) - assault.MemoryMillisecondsHoldFilledMemory, _ = strconv.Atoi(types.Getenv("CM_MEMORY_MS_HOLD_FILLED_MEM", "90000")) - assault.MemoryMillisecondsWaitNextIncrease, _ = strconv.Atoi(types.Getenv("CM_MEMORY_MS_NEXT_INCREASE", "1000")) - assault.MemoryFillIncrementFraction, _ = strconv.ParseFloat(types.Getenv("CM_MEMORY_FILL_INC_FRACTION", "0.15"), 64) - assault.MemoryFillTargetFraction, _ = strconv.ParseFloat(types.Getenv("CM_MEMORY_FILL_TARGET_FRACTION", "0.25"), 64) - assault.MemoryCron = types.Getenv("CM_MEMORY_CRON", "OFF") - - // CPU assault - assault.CPUActive, _ = strconv.ParseBool(types.Getenv("CM_CPU_ACTIVE", "false")) - assault.CPUMillisecondsHoldLoad, _ = strconv.Atoi(types.Getenv("CM_CPU_MS_HOLD_LOAD", "90000")) - assault.CPULoadTargetFraction, _ = strconv.ParseFloat(types.Getenv("CM_CPU_LOAD_TARGET_FRACTION", "0.9"), 64) - assault.CPUCron = types.Getenv("CM_CPU_CRON", "OFF") - - // Exception assault - assault.ExceptionsActive, _ = strconv.ParseBool(types.Getenv("CM_EXCEPTIONS_ACTIVE", "false")) + level, _ := strconv.Atoi(types.Getenv("CM_LEVEL", "1")) + deterministic, _ := strconv.ParseBool(types.Getenv("CM_DETERMINISTIC", "true")) + watchedCustomServices := strings.Split(types.Getenv("CM_WATCHED_CUSTOM_SERVICES", ""), ",") + + switch expName { + case "spring-boot-app-kill": + // kill application assault + assault := experimentTypes.AppKillAssault{ + Level: level, + Deterministic: deterministic, + WatchedCustomServices: watchedCustomServices, + KillApplicationActive: true, + } + assault.KillApplicationCron = types.Getenv("CM_KILL_APPLICATION_CRON", "OFF") + log.InfoWithValues("[Info]: Chaos monkeys app-kill assaults details", logrus.Fields{ + "KillApplicationCron": assault.KillApplicationCron, + }) + experimentDetails.ChaosMonkeyAssault, _ = json.Marshal(assault) + case "spring-boot-latency": + // Latency assault + assault := experimentTypes.LatencyAssault{ + Level: level, + Deterministic: deterministic, + WatchedCustomServices: watchedCustomServices, + LatencyActive: true, + } + assault.LatencyRangeStart, _ = strconv.Atoi(types.Getenv("CM_LATENCY_RANGE_START", "500")) + assault.LatencyRangeEnd, _ = strconv.Atoi(types.Getenv("CM_LATENCY_RANGE_END", "500")) + log.InfoWithValues("[Info]: Chaos monkeys latency assaults details", logrus.Fields{ + "LatencyRangeStart": assault.LatencyRangeStart, + "LatencyRangeEnd": assault.LatencyRangeEnd, + }) + experimentDetails.ChaosMonkeyAssault, _ = json.Marshal(assault) + case "spring-boot-memory-stress": + // Memory assault + assault := experimentTypes.MemoryStressAssault{ + Level: level, + Deterministic: deterministic, + WatchedCustomServices: watchedCustomServices, + MemoryActive: true, + } + assault.MemoryMillisecondsHoldFilledMemory, _ = strconv.Atoi(types.Getenv("CM_MEMORY_MS_HOLD_FILLED_MEM", "90000")) + assault.MemoryMillisecondsWaitNextIncrease, _ = strconv.Atoi(types.Getenv("CM_MEMORY_MS_NEXT_INCREASE", "1000")) + assault.MemoryFillIncrementFraction, _ = strconv.ParseFloat(types.Getenv("CM_MEMORY_FILL_INC_FRACTION", "0.15"), 64) + assault.MemoryFillTargetFraction, _ = strconv.ParseFloat(types.Getenv("CM_MEMORY_FILL_TARGET_FRACTION", "0.25"), 64) + assault.MemoryCron = types.Getenv("CM_MEMORY_CRON", "OFF") + log.InfoWithValues("[Info]: Chaos monkeys memory-stress assaults details", logrus.Fields{ + "MemoryMillisecondsHoldFilledMemory": assault.MemoryMillisecondsHoldFilledMemory, + "MemoryMillisecondsWaitNextIncrease": assault.MemoryMillisecondsWaitNextIncrease, + "MemoryFillIncrementFraction": assault.MemoryFillIncrementFraction, + "MemoryFillTargetFraction": assault.MemoryFillTargetFraction, + "MemoryCron": assault.MemoryCron, + }) + experimentDetails.ChaosMonkeyAssault, _ = json.Marshal(assault) + case "spring-boot-cpu-stress": + // CPU assault + assault := experimentTypes.CPUStressAssault{ + Level: level, + Deterministic: deterministic, + WatchedCustomServices: watchedCustomServices, + CPUActive: true, + } + assault.CPUMillisecondsHoldLoad, _ = strconv.Atoi(types.Getenv("CM_CPU_MS_HOLD_LOAD", "90000")) + assault.CPULoadTargetFraction, _ = strconv.ParseFloat(types.Getenv("CM_CPU_LOAD_TARGET_FRACTION", "0.9"), 64) + assault.CPUCron = types.Getenv("CM_CPU_CRON", "OFF") + log.InfoWithValues("[Info]: Chaos monkeys cpu-stress assaults details", logrus.Fields{ + "CPUMillisecondsHoldLoad": assault.CPUMillisecondsHoldLoad, + "CPULoadTargetFraction": assault.CPULoadTargetFraction, + "CPUCron": assault.CPUCron, + }) + experimentDetails.ChaosMonkeyAssault, _ = json.Marshal(assault) + case "spring-boot-exceptions": + // Exception assault + assault := experimentTypes.ExceptionAssault{ + Level: level, + Deterministic: deterministic, + WatchedCustomServices: watchedCustomServices, + ExceptionsActive: true, + } - // Exception structure, will be like : {type: "", arguments: [{className: "", value: ""]} - assaultException := experimentTypes.AssaultException{} - assaultExceptionArguments := make([]experimentTypes.AssaultExceptionArgument, 0) + // Exception structure, will be like : {type: "", arguments: [{className: "", value: ""]} + assaultException := experimentTypes.AssaultException{} + assaultExceptionArguments := make([]experimentTypes.AssaultExceptionArgument, 0) - assaultException.Type = types.Getenv("CM_EXCEPTIONS_TYPE", "") + assaultException.Type = types.Getenv("CM_EXCEPTIONS_TYPE", "") - envAssaultExceptionArguments := strings.Split(types.Getenv("CM_EXCEPTIONS_ARGUMENTS", ""), ",") + envAssaultExceptionArguments := strings.Split(types.Getenv("CM_EXCEPTIONS_ARGUMENTS", ""), ",") - for _, argument := range envAssaultExceptionArguments { - splitArgument := strings.Split(argument, ":") - assaultExceptionArgument := experimentTypes.AssaultExceptionArgument{ - ClassName: splitArgument[0], - Value: "", - } - if len(splitArgument) > 0 { - assaultExceptionArgument.Value = splitArgument[1] + for _, argument := range envAssaultExceptionArguments { + splitArgument := strings.Split(argument, ":") + assaultExceptionArgument := experimentTypes.AssaultExceptionArgument{ + ClassName: splitArgument[0], + Value: "", + } + if len(splitArgument) > 0 { + assaultExceptionArgument.Value = splitArgument[1] + } + assaultExceptionArguments = append(assaultExceptionArguments, assaultExceptionArgument) } - assaultExceptionArguments = append(assaultExceptionArguments, assaultExceptionArgument) + assaultException.Arguments = assaultExceptionArguments + assault.Exception = assaultException + log.InfoWithValues("[Info]: Chaos monkeys exceptions assaults details", logrus.Fields{ + "Exception Type": assault.Exception.Type, + "Exception Arguments": assault.Exception.Arguments, + }) + experimentDetails.ChaosMonkeyAssault, _ = json.Marshal(assault) } - assaultException.Arguments = assaultExceptionArguments - assault.Exception = assaultException - - // End of assault building - experimentDetails.ChaosMonkeyAssault = assault // Building watchers watchers := experimentTypes.ChaosMonkeyWatchers{ diff --git a/pkg/spring-boot/spring-boot-chaos/types/types.go b/pkg/spring-boot/spring-boot-chaos/types/types.go index e96f1502b..479d3c722 100644 --- a/pkg/spring-boot/spring-boot-chaos/types/types.go +++ b/pkg/spring-boot/spring-boot-chaos/types/types.go @@ -30,7 +30,7 @@ type ExperimentDetails struct { TargetPodList v1.PodList // Chaos monkey parameters - ChaosMonkeyAssault ChaosMonkeyAssault + ChaosMonkeyAssault []byte ChaosMonkeyWatchers ChaosMonkeyWatchers ChaosMonkeyPath string ChaosMonkeyPort string @@ -44,27 +44,51 @@ type ChaosMonkeyAssaultRevert struct { ExceptionsActive bool `json:"exceptionsActive"` } -type ChaosMonkeyAssault struct { - Level int `json:"level"` - Deterministic bool `json:"deterministic"` - LatencyRangeStart int `json:"latencyRangeStart"` - LatencyRangeEnd int `json:"latencyRangeEnd"` - LatencyActive bool `json:"latencyActive"` - ExceptionsActive bool `json:"exceptionsActive"` - Exception AssaultException `json:"exceptions"` - KillApplicationActive bool `json:"killApplicationActive"` - KillApplicationCron string `json:"killApplicationCronExpression"` - WatchedCustomServices []string `json:"watchedCustomServices"` - MemoryActive bool `json:"memoryActive"` - MemoryMillisecondsHoldFilledMemory int `json:"memoryMillisecondsHoldFilledMemory"` - MemoryMillisecondsWaitNextIncrease int `json:"memoryMillisecondsWaitNextIncrease"` - MemoryFillIncrementFraction float64 `json:"memoryFillIncrementFraction"` - MemoryFillTargetFraction float64 `json:"memoryFillTargetFraction"` - MemoryCron string `json:"memoryCronExpression"` - CPUActive bool `json:"cpuActive"` - CPUMillisecondsHoldLoad int `json:"cpuMillisecondsHoldLoad"` - CPULoadTargetFraction float64 `json:"cpuLoadTargetFraction"` - CPUCron string `json:"cpuCronExpression"` +type CPUStressAssault struct { + Level int `json:"level"` + Deterministic bool `json:"deterministic"` + WatchedCustomServices []string `json:"watchedCustomServices"` + CPUActive bool `json:"cpuActive"` + CPUMillisecondsHoldLoad int `json:"cpuMillisecondsHoldLoad"` + CPULoadTargetFraction float64 `json:"cpuLoadTargetFraction"` + CPUCron string `json:"cpuCronExpression"` +} + +type MemoryStressAssault struct { + Level int `json:"level"` + Deterministic bool `json:"deterministic"` + WatchedCustomServices []string `json:"watchedCustomServices"` + MemoryActive bool `json:"memoryActive"` + MemoryMillisecondsHoldFilledMemory int `json:"memoryMillisecondsHoldFilledMemory"` + MemoryMillisecondsWaitNextIncrease int `json:"memoryMillisecondsWaitNextIncrease"` + MemoryFillIncrementFraction float64 `json:"memoryFillIncrementFraction"` + MemoryFillTargetFraction float64 `json:"memoryFillTargetFraction"` + MemoryCron string `json:"memoryCronExpression"` +} + +type LatencyAssault struct { + Level int `json:"level"` + Deterministic bool `json:"deterministic"` + WatchedCustomServices []string `json:"watchedCustomServices"` + LatencyRangeStart int `json:"latencyRangeStart"` + LatencyRangeEnd int `json:"latencyRangeEnd"` + LatencyActive bool `json:"latencyActive"` +} + +type AppKillAssault struct { + Level int `json:"level"` + Deterministic bool `json:"deterministic"` + WatchedCustomServices []string `json:"watchedCustomServices"` + KillApplicationActive bool `json:"killApplicationActive"` + KillApplicationCron string `json:"killApplicationCronExpression"` +} + +type ExceptionAssault struct { + Level int `json:"level"` + Deterministic bool `json:"deterministic"` + WatchedCustomServices []string `json:"watchedCustomServices"` + ExceptionsActive bool `json:"exceptionsActive"` + Exception AssaultException `json:"exceptions"` } type ChaosMonkeyWatchers struct { From f06936493e99e5f48b3d348d2c414ada523b2bd7 Mon Sep 17 00:00:00 2001 From: Soumya Ghosh Dastidar <44349253+gdsoumya@users.noreply.github.com> Date: Mon, 14 Nov 2022 12:49:55 +0530 Subject: [PATCH 3/3] feat: add resource name filtering in k8s probe (#598) * feat: add resource name filtering in k8s probe Signed-off-by: Soumya Ghosh Dastidar Signed-off-by: Rocio Roman --- .../azure-disk-loss/lib/azure-disk-loss.go | 2 +- .../node-memory-hog/lib/node-memory-hog.go | 2 +- go.mod | 2 +- go.sum | 4 +- pkg/clients/clientset.go | 2 +- pkg/probe/k8sprobe.go | 139 ++++++++++++++---- 6 files changed, 115 insertions(+), 36 deletions(-) diff --git a/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go b/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go index 867506603..a2d672bc0 100644 --- a/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go +++ b/chaoslib/litmus/azure-disk-loss/lib/azure-disk-loss.go @@ -275,7 +275,7 @@ func abortWatcher(experimentsDetails *experimentTypes.ExperimentDetails, attache } if diskStatusString != "Attached" { if err := diskStatus.AttachDisk(experimentsDetails.SubscriptionID, experimentsDetails.ResourceGroup, instanceName, experimentsDetails.ScaleSet, diskList); err != nil { - log.Errorf("failed to attach disk '%v, manual revert required, err: %v", err) + log.Errorf("failed to attach disk, manual revert required, err: %v", err) } else { common.SetTargets(*disk.Name, "re-attached", "VirtualDisk", chaosDetails) } diff --git a/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go b/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go index f2321cfb1..fb4691cd5 100644 --- a/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go +++ b/chaoslib/litmus/node-memory-hog/lib/node-memory-hog.go @@ -284,7 +284,7 @@ func calculateMemoryConsumption(experimentsDetails *experimentTypes.ExperimentDe //Get the percentage of memory under chaos wrt allocatable memory totalMemoryConsumption = int((float64(memoryForChaos) / float64(memoryAllocatable)) * 100) if totalMemoryConsumption > 100 { - log.Infof("[Info]: PercentageOfMemoryCapacity To Be Used: %d percent, which is more than 100 percent (%d percent) of Allocatable Memory, so the experiment will only consume upto 100 percent of Allocatable Memory", experimentsDetails.MemoryConsumptionPercentage, totalMemoryConsumption) + log.Infof("[Info]: PercentageOfMemoryCapacity To Be Used: %v percent, which is more than 100 percent (%d percent) of Allocatable Memory, so the experiment will only consume upto 100 percent of Allocatable Memory", experimentsDetails.MemoryConsumptionPercentage, totalMemoryConsumption) MemoryConsumption = "100%" } else { log.Infof("[Info]: PercentageOfMemoryCapacity To Be Used: %v percent, which is %d percent of Allocatable Memory", experimentsDetails.MemoryConsumptionPercentage, totalMemoryConsumption) diff --git a/go.mod b/go.mod index b73b59481..453a174d5 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/aws/aws-sdk-go v1.38.59 github.com/containerd/cgroups v1.0.1 github.com/kyokomi/emoji v2.2.4+incompatible - github.com/litmuschaos/chaos-operator v0.0.0-20220929101337-868b2827f820 + github.com/litmuschaos/chaos-operator v0.0.0-20221114055503-3d12d34d2032 github.com/pkg/errors v0.9.1 github.com/sirupsen/logrus v1.7.0 github.com/spf13/cobra v1.1.1 diff --git a/go.sum b/go.sum index f63cf932f..a0608613c 100644 --- a/go.sum +++ b/go.sum @@ -767,8 +767,8 @@ github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9 github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM= github.com/lightstep/lightstep-tracer-go v0.18.0/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc= -github.com/litmuschaos/chaos-operator v0.0.0-20220929101337-868b2827f820 h1:xMlb6eMbWzdR/2IB6F095p0NDadccZIkiovJBE9fg9I= -github.com/litmuschaos/chaos-operator v0.0.0-20220929101337-868b2827f820/go.mod h1:CJGiHqC06PQkIBySk/JroB7B2zFebDbkhQ1A6ZbYmHA= +github.com/litmuschaos/chaos-operator v0.0.0-20221114055503-3d12d34d2032 h1:VeVpXvz5JVj28rQZs4DI101b+vVKHIKlUNWGfbDF6V0= +github.com/litmuschaos/chaos-operator v0.0.0-20221114055503-3d12d34d2032/go.mod h1:CJGiHqC06PQkIBySk/JroB7B2zFebDbkhQ1A6ZbYmHA= github.com/litmuschaos/elves v0.0.0-20201107015738-552d74669e3c/go.mod h1:DsbHGNUq/78NZozWVVI9Q6eBei4I+JjlkkD5aibJ3MQ= github.com/logrusorgru/aurora v0.0.0-20181002194514-a7b3b318ed4e/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4= github.com/lovoo/gcloud-opentracing v0.3.0/go.mod h1:ZFqk2y38kMDDikZPAK7ynTTGuyt17nSPdS3K5e+ZTBY= diff --git a/pkg/clients/clientset.go b/pkg/clients/clientset.go index 732038dc8..5cc252638 100644 --- a/pkg/clients/clientset.go +++ b/pkg/clients/clientset.go @@ -83,7 +83,7 @@ func buildConfigFromFlags(masterUrl, kubeconfigPath string) (*restclient.Config, if err == nil { return kubeconfig, nil } - klog.Warningf("Neither --kubeconfig nor --master was specified. Using the inClusterConfig. Error creating inClusterConfig: ", err) + klog.Warningf("Neither --kubeconfig nor --master was specified. Using the inClusterConfig. Error creating inClusterConfig: %v", err) } return clientcmd.NewNonInteractiveDeferredLoadingClientConfig( &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeconfigPath}, diff --git a/pkg/probe/k8sprobe.go b/pkg/probe/k8sprobe.go index 443bbc809..c1079051f 100644 --- a/pkg/probe/k8sprobe.go +++ b/pkg/probe/k8sprobe.go @@ -2,6 +2,7 @@ package probe import ( "context" + k8serrors "k8s.io/apimachinery/pkg/api/errors" "strings" "time" @@ -42,7 +43,7 @@ func prepareK8sProbe(probe v1alpha1.ProbeAttributes, resultDetails *types.Result // triggerK8sProbe run the k8s probe command func triggerK8sProbe(probe v1alpha1.ProbeAttributes, clients clients.ClientSets, resultDetails *types.ResultDetails) error { - inputs := probe.K8sProbeInputs + inputs := &probe.K8sProbeInputs // It parse the templated command and return normal string // if command doesn't have template, it will return the same command @@ -56,6 +57,19 @@ func triggerK8sProbe(probe v1alpha1.ProbeAttributes, clients clients.ClientSets, return err } + inputs.ResourceNames, err = parseCommand(inputs.ResourceNames, resultDetails) + if err != nil { + return err + } + + parsedResourceNames := []string{} + if inputs.ResourceNames != "" { + parsedResourceNames = strings.Split(inputs.ResourceNames, ",") + for i := range parsedResourceNames { + parsedResourceNames[i] = strings.TrimSpace(parsedResourceNames[i]) + } + } + // it will retry for some retry count, in each iterations of try it contains following things // it contains a timeout per iteration of retry. if the timeout expires without success then it will go to next try // for a timeout, it will run the command, if it fails wait for the iterval and again execute the command until timeout expires @@ -77,32 +91,19 @@ func triggerK8sProbe(probe v1alpha1.ProbeAttributes, clients clients.ClientSets, return err } case "delete": - if err = deleteResource(probe, gvr, clients); err != nil { + if err = deleteResource(probe, gvr, parsedResourceNames, clients); err != nil { log.Errorf("the %v k8s probe has Failed, err: %v", probe.Name, err) return err } case "present": - resourceList, err := clients.DynamicClient.Resource(gvr).Namespace(inputs.Namespace).List(context.Background(), v1.ListOptions{ - FieldSelector: inputs.FieldSelector, - LabelSelector: inputs.LabelSelector, - }) - if err != nil { + if err = resourcesPresent(probe, gvr, parsedResourceNames, clients); err != nil { log.Errorf("the %v k8s probe has Failed, err: %v", probe.Name, err) - return errors.Errorf("unable to list the resources with matching selector, err: %v", err) - } else if len(resourceList.Items) == 0 { - return errors.Errorf("no resource found with provided selectors") + return err } case "absent": - resourceList, err := clients.DynamicClient.Resource(gvr).Namespace(inputs.Namespace).List(context.Background(), v1.ListOptions{ - FieldSelector: inputs.FieldSelector, - LabelSelector: inputs.LabelSelector, - }) - if err != nil { - return errors.Errorf("unable to list the resources with matching selector, err: %v", err) - } - if len(resourceList.Items) != 0 { + if err = resourcesAbsent(probe, gvr, parsedResourceNames, clients); err != nil { log.Errorf("the %v k8s probe has Failed, err: %v", probe.Name, err) - return errors.Errorf("resource is not deleted yet due to, err: %v", err) + return err } default: return errors.Errorf("operation type '%s' not supported in the k8s probe", inputs.Operation) @@ -165,21 +166,99 @@ func createResource(probe v1alpha1.ProbeAttributes, gvr schema.GroupVersionResou } // deleteResource deletes the resource with matching label & field selector -func deleteResource(probe v1alpha1.ProbeAttributes, gvr schema.GroupVersionResource, clients clients.ClientSets) error { - resourceList, err := clients.DynamicClient.Resource(gvr).Namespace(probe.K8sProbeInputs.Namespace).List(context.Background(), v1.ListOptions{ - FieldSelector: probe.K8sProbeInputs.FieldSelector, - LabelSelector: probe.K8sProbeInputs.LabelSelector, - }) - if err != nil { - return errors.Errorf("unable to list the resources with matching selector, err: %v", err) - } else if len(resourceList.Items) == 0 { - return errors.Errorf("no resource found with provided selectors") +func deleteResource(probe v1alpha1.ProbeAttributes, gvr schema.GroupVersionResource, parsedResourceNames []string, clients clients.ClientSets) error { + // resource name has higher priority + if len(parsedResourceNames) > 0 { + // check if all resources are available + if err := areResourcesWithNamePresent(probe, gvr, parsedResourceNames, clients); err != nil { + return err + } + // delete resources + for _, res := range parsedResourceNames { + if err = clients.DynamicClient.Resource(gvr).Namespace(probe.K8sProbeInputs.Namespace).Delete(context.Background(), res, v1.DeleteOptions{}); err != nil { + return err + } + } + } else { + resourceList, err := clients.DynamicClient.Resource(gvr).Namespace(probe.K8sProbeInputs.Namespace).List(context.Background(), v1.ListOptions{ + FieldSelector: probe.K8sProbeInputs.FieldSelector, + LabelSelector: probe.K8sProbeInputs.LabelSelector, + }) + if err != nil { + return errors.Errorf("unable to list the resources with matching selector, err: %v", err) + } else if len(resourceList.Items) == 0 { + return errors.Errorf("no resource found with provided selectors") + } + + for index := range resourceList.Items { + if err = clients.DynamicClient.Resource(gvr).Namespace(probe.K8sProbeInputs.Namespace).Delete(context.Background(), resourceList.Items[index].GetName(), v1.DeleteOptions{}); err != nil { + return err + } + } } + return nil +} - for index := range resourceList.Items { - if err = clients.DynamicClient.Resource(gvr).Namespace(probe.K8sProbeInputs.Namespace).Delete(context.Background(), resourceList.Items[index].GetName(), v1.DeleteOptions{}); err != nil { +func resourcesPresent(probe v1alpha1.ProbeAttributes, gvr schema.GroupVersionResource, parsedResourceNames []string, clients clients.ClientSets) error { + // resource name has higher priority + if len(parsedResourceNames) > 0 { + // check if all resources are available + if err := areResourcesWithNamePresent(probe, gvr, parsedResourceNames, clients); err != nil { return err } + } else { + resourceList, err := clients.DynamicClient.Resource(gvr).Namespace(probe.K8sProbeInputs.Namespace).List(context.Background(), v1.ListOptions{ + FieldSelector: probe.K8sProbeInputs.FieldSelector, + LabelSelector: probe.K8sProbeInputs.LabelSelector, + }) + if err != nil { + log.Errorf("the %v k8s probe has Failed, err: %v", probe.Name, err) + return errors.Errorf("unable to list the resources with matching selector, err: %v", err) + } else if len(resourceList.Items) == 0 { + return errors.Errorf("no resource found with provided selectors") + } + } + return nil +} + +func areResourcesWithNamePresent(probe v1alpha1.ProbeAttributes, gvr schema.GroupVersionResource, parsedResourceNames []string, clients clients.ClientSets) error { + for _, res := range parsedResourceNames { + resource, err := clients.DynamicClient.Resource(gvr).Namespace(probe.K8sProbeInputs.Namespace).Get(context.Background(), res, v1.GetOptions{}) + if err != nil { + return errors.Errorf("unable to get the resources with name %v, err: %v", res, err) + } else if resource == nil { + return errors.Errorf("unable to get the resources with name %v", res) + } + } + return nil +} + +func resourcesAbsent(probe v1alpha1.ProbeAttributes, gvr schema.GroupVersionResource, parsedResourceNames []string, clients clients.ClientSets) error { + // resource name has higher priority + if len(parsedResourceNames) > 0 { + // check if all resources are absent + for _, res := range parsedResourceNames { + resource, err := clients.DynamicClient.Resource(gvr).Namespace(probe.K8sProbeInputs.Namespace).Get(context.Background(), res, v1.GetOptions{}) + if err != nil { + // ignore not found error, that is the expected outcome + if !k8serrors.IsNotFound(err) { + return errors.Errorf("unable to get the resources with name %v from k8s, err: %v", res, err) + } + } else if resource != nil { + return errors.Errorf("resource '%v' still exists but is expected to be absent", res) + } + } + } else { + resourceList, err := clients.DynamicClient.Resource(gvr).Namespace(probe.K8sProbeInputs.Namespace).List(context.Background(), v1.ListOptions{ + FieldSelector: probe.K8sProbeInputs.FieldSelector, + LabelSelector: probe.K8sProbeInputs.LabelSelector, + }) + if err != nil { + return errors.Errorf("unable to list the resources with matching selector, err: %v", err) + } + if len(resourceList.Items) != 0 { + return errors.Errorf("resource with provided selectors still exists, found %v resources with matching selectors", len(resourceList.Items)) + } } return nil }