From 9abf08b4c5c8d30508054e722523dd52508f6fb5 Mon Sep 17 00:00:00 2001 From: robnester-rh Date: Thu, 12 Dec 2024 12:18:52 -0500 Subject: [PATCH] Add workers flag to `validate input` command This commit reworks the concurrency model of `validate input` to be like that found in `validate image`. This commit adds a `--workers` flag, with a default of 5 that can be used to configure how many simultaneous fetches occur. This avoids situations where validating multiple input files can trigger HTTP 429 errors. Signed-off-by: robnester-rh --- cmd/validate/input.go | 63 ++-- cmd/validate/input_test.go | 290 ++++++++++++++++++ .../modules/ROOT/pages/ec_validate_input.adoc | 1 + 3 files changed, 334 insertions(+), 20 deletions(-) diff --git a/cmd/validate/input.go b/cmd/validate/input.go index d5c36d275..31821ad4e 100644 --- a/cmd/validate/input.go +++ b/cmd/validate/input.go @@ -23,9 +23,9 @@ import ( "runtime/trace" "sort" "strings" - "sync" hd "github.com/MakeNowJust/heredoc" + log "github.com/sirupsen/logrus" "github.com/spf13/cobra" "github.com/enterprise-contract/ec-cli/internal/applicationsnapshot" @@ -50,8 +50,10 @@ func validateInputCmd(validate InputValidationFunc) *cobra.Command { policy policy.Policy policyConfiguration string strict bool + workers int }{ - strict: true, + strict: true, + workers: 5, } cmd := &cobra.Command{ Use: "input", @@ -116,23 +118,25 @@ func validateInputCmd(validate InputValidationFunc) *cobra.Command { policyInput []byte } - ch := make(chan result, len(data.filePaths)) + showSuccesses, _ := cmd.Flags().GetBool("show-successes") - var lock sync.WaitGroup + // Set numWorkers to the value from our flag. The default is 5. + numWorkers := data.workers - showSuccesses, _ := cmd.Flags().GetBool("show-successes") + jobs := make(chan string, len(data.filePaths)) + results := make(chan result, len(data.filePaths)) - for _, f := range data.filePaths { - lock.Add(1) - go func(fpath string) { + // worker function processes one file path at a time. + worker := func(id int, jobs <-chan string, results chan<- result) { + log.Debugf("Starting worker %d", id) + for fpath := range jobs { ctx := cmd.Context() var task *trace.Task if trace.IsEnabled() { ctx, task = trace.NewTask(ctx, "ec:validate-input") + trace.Logf(ctx, "", "workerID=%d, file=%s", id, fpath) } - defer lock.Done() - out, err := validate(ctx, fpath, data.policy, data.info) res := result{ err: err, @@ -141,7 +145,7 @@ func validateInputCmd(validate InputValidationFunc) *cobra.Command { Success: err == nil, }, } - // Skip on err to not panic. Error is return on routine completion. + if err == nil { res.input.Violations = out.Violations() res.input.Warnings = out.Warnings() @@ -151,43 +155,59 @@ func validateInputCmd(validate InputValidationFunc) *cobra.Command { if showSuccesses { res.input.Successes = successes } - res.data = out.Data + if containsOutput(data.output, "data") { + res.data = out.Data + } + res.input.Success = (len(res.input.Violations) == 0) + res.policyInput = out.PolicyInput } - res.input.Success = err == nil && len(res.input.Violations) == 0 if task != nil { task.End() } - ch <- res - }(f) + results <- res + } + log.Debugf("Done with worker %d", id) } - lock.Wait() - close(ch) + // Start the worker pool + for i := 0; i < numWorkers; i++ { + go worker(i, jobs, results) + } + + // Push all jobs (file paths) to the jobs channel + for _, f := range data.filePaths { + jobs <- f + } + close(jobs) var inputs []input.Input var evaluatorData [][]evaluator.Data var manyPolicyInput [][]byte var allErrors error = nil - for r := range ch { + // Collect all results + for i := 0; i < len(data.filePaths); i++ { + r := <-results if r.err != nil { e := fmt.Errorf("error validating file %s: %w", r.input.FilePath, r.err) allErrors = errors.Join(allErrors, e) } else { inputs = append(inputs, r.input) - // evaluator data is duplicated per component, so only collect it once. + // evaluator data is duplicated per input, so only collect it once. if len(evaluatorData) == 0 && containsOutput(data.output, "data") { evaluatorData = append(evaluatorData, r.data) } manyPolicyInput = append(manyPolicyInput, r.policyInput) } } + close(results) + if allErrors != nil { return allErrors } - // Ensure some consistency in output. + // Sort inputs for consistent output sort.Slice(inputs, func(i, j int) bool { return inputs[i].FilePath > inputs[j].FilePath }) @@ -240,6 +260,9 @@ func validateInputCmd(validate InputValidationFunc) *cobra.Command { violations, include the title and the description of the failed policy rule.`)) + cmd.Flags().IntVar(&data.workers, "workers", data.workers, hd.Doc(` + Number of workers to use for validation. Defaults to 5.`)) + if err := cmd.MarkFlagRequired("file"); err != nil { panic(err) } diff --git a/cmd/validate/input_test.go b/cmd/validate/input_test.go index 937c02d25..66d64e7ae 100644 --- a/cmd/validate/input_test.go +++ b/cmd/validate/input_test.go @@ -17,3 +17,293 @@ //go:build unit package validate + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "sort" + "testing" + + "github.com/spf13/afero" + "github.com/spf13/cobra" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/enterprise-contract/ec-cli/internal/evaluator" + "github.com/enterprise-contract/ec-cli/internal/output" + "github.com/enterprise-contract/ec-cli/internal/policy" + "github.com/enterprise-contract/ec-cli/internal/utils" + "github.com/enterprise-contract/ec-cli/internal/utils/oci" + "github.com/enterprise-contract/ec-cli/internal/utils/oci/fake" +) + +// mockValidate is a helper function that returns a specified Output and error for testing. +func mockValidate(out *output.Output, err error) InputValidationFunc { + return func(_ context.Context, fpath string, _ policy.Policy, _ bool) (*output.Output, error) { + // This function ignores the actual file content and always returns the provided output and error. + return out, err + } +} + +func setUpValidateInputCmd(validate InputValidationFunc, fs afero.Fs) (*cobra.Command, *bytes.Buffer) { + cmd := validateInputCmd(validate) + + // Create a fake client and context with a memory filesystem. + client := fake.FakeClient{} + ctx := utils.WithFS(context.Background(), fs) + ctx = oci.WithClient(ctx, &client) + cmd.SetContext(ctx) + + var out bytes.Buffer + cmd.SetOut(&out) + + return cmd, &out +} + +func Test_ValidateInputCmd_SuccessSingleFile(t *testing.T) { + fs := afero.NewMemMapFs() + // Write a dummy file to simulate input + require.NoError(t, afero.WriteFile(fs, "/input.yaml", []byte("some: data"), 0644)) + + // Mock validator: returns success with no violations, one success result. + outMock := &output.Output{ + PolicyCheck: []evaluator.Outcome{ + { + Successes: []evaluator.Result{ + {Message: "Everything looks great!"}, + }, + }, + }, + } + + cmd, buf := setUpValidateInputCmd(mockValidate(outMock, nil), fs) + cmd.SetArgs([]string{ + "input", + "--file", "/input.yaml", + "--policy", `{"publicKey": "testkey"}`, + }) + + utils.SetTestRekorPublicKey(t) + err := cmd.Execute() + assert.NoError(t, err) + + // Parse the JSON output + var outJSON map[string]interface{} + err = json.Unmarshal(buf.Bytes(), &outJSON) + assert.NoError(t, err) + + // Ensure success is true + assert.True(t, outJSON["success"].(bool)) + + inputFiles, ok := outJSON["filepaths"].([]interface{}) + if !ok { + t.Fatal("expected 'filepaths' key in output") + } + assert.Len(t, inputFiles, 1) + inputObj := inputFiles[0].(map[string]interface{}) + assert.Equal(t, "/input.yaml", inputObj["filepath"]) + assert.True(t, inputObj["success"].(bool)) +} + +func Test_ValidateInputCmd_SuccessMultipleFiles(t *testing.T) { + fs := afero.NewMemMapFs() + require.NoError(t, afero.WriteFile(fs, "/input1.yaml", []byte("some: data"), 0644)) + require.NoError(t, afero.WriteFile(fs, "/input2.yaml", []byte("other: data"), 0644)) + + // Mock validator: always returns success. + outMock := &output.Output{ + PolicyCheck: []evaluator.Outcome{ + { + Successes: []evaluator.Result{ + {Message: "Pass"}, + }, + }, + }, + } + + cmd, buf := setUpValidateInputCmd(mockValidate(outMock, nil), fs) + cmd.SetArgs([]string{ + "input", + "--file", "/input1.yaml", + "--file", "/input2.yaml", + "--policy", `{"name":"Default","description":"Stuff and things","sources":[{"name":"Default","policy":["/bacon/and/eggs/policy/lib","/bacon/and/eggs/policy/release"],"data":["/bacon/and/eggs/example/data"],"config":{"include":["sbom_cyclonedx"],"exclude":[]}}]}`, + }) + + utils.SetTestRekorPublicKey(t) + err := cmd.Execute() + assert.NoError(t, err) + + var outJSON map[string]interface{} + err = json.Unmarshal(buf.Bytes(), &outJSON) + assert.NoError(t, err) + assert.True(t, outJSON["success"].(bool)) + + inputFiles, ok := outJSON["filepaths"].([]interface{}) + if !ok { + t.Fatal("expected 'filepaths' key in output") + } + assert.Len(t, inputFiles, 2) + + // Verify sorting by filepath descending as per code + filePaths := []string{} + for _, input := range inputFiles { + f := input.(map[string]interface{})["filepath"].(string) + filePaths = append(filePaths, f) + } + sorted := append([]string{}, filePaths...) + sort.Slice(sorted, func(i, j int) bool { + return sorted[i] > sorted[j] + }) + assert.Equal(t, sorted, filePaths) +} + +func Test_ValidateInputCmd_Failure(t *testing.T) { + fs := afero.NewMemMapFs() + require.NoError(t, afero.WriteFile(fs, "/bad.yaml", []byte("invalid"), 0644)) + + // Mock validator: returns an error + cmd, _ := setUpValidateInputCmd(mockValidate(nil, errors.New("validation failed")), fs) + cmd.SetArgs([]string{ + "input", + "--file", "/bad.yaml", + "--policy", `{"publicKey": "testkey"}`, + }) + + utils.SetTestRekorPublicKey(t) + err := cmd.Execute() + assert.Error(t, err) + assert.EqualError(t, err, "error validating file /bad.yaml: validation failed") +} + +func Test_ValidateInputCmd_StrictMode(t *testing.T) { + fs := afero.NewMemMapFs() + require.NoError(t, afero.WriteFile(fs, "/file.yaml", []byte("some: data"), 0644)) + + // Mock validator: returns no error, but a violation. + outMock := &output.Output{ + PolicyCheck: []evaluator.Outcome{ + { + Failures: []evaluator.Result{ + {Message: "Some violation"}, + }, + }, + }, + } + + cmd, _ := setUpValidateInputCmd(mockValidate(outMock, nil), fs) + cmd.SetArgs([]string{ + "input", + "--file", "/file.yaml", + "--policy", `{"publicKey": "testkey"}`, + "--strict", "true", + }) + + utils.SetTestRekorPublicKey(t) + err := cmd.Execute() + assert.Error(t, err) + assert.Contains(t, err.Error(), "success criteria not met") +} + +func Test_ValidateInputCmd_NonStrictMode(t *testing.T) { + fs := afero.NewMemMapFs() + require.NoError(t, afero.WriteFile(fs, "/file.yaml", []byte("some: data"), 0644)) + + // Mock validator: returns no error but a violation (should not cause non-zero exit in non-strict mode). + outMock := &output.Output{ + PolicyCheck: []evaluator.Outcome{ + { + Failures: []evaluator.Result{ + {Message: "Some violation"}, + }, + }, + }, + } + + cmd, _ := setUpValidateInputCmd(mockValidate(outMock, nil), fs) + cmd.SetArgs([]string{ + "input", + "--file", "/file.yaml", + "--policy", `{"publicKey": "testkey"}`, + "--strict", "false", + }) + + utils.SetTestRekorPublicKey(t) + + // Capture output for assertions + outputBuffer := &bytes.Buffer{} + cmd.SetOut(outputBuffer) + cmd.SetErr(outputBuffer) + + // Execute the command + err := cmd.Execute() + + // Ensure no error is returned in non-strict mode + assert.Error(t, err) + + // Check that the output mentions violations, but the command succeeds + output := outputBuffer.String() + assert.Contains(t, output, "Some violation") + assert.Contains(t, output, "success criteria not met") +} + +func Test_ValidateInputCmd_NoPolicyProvided(t *testing.T) { + fs := afero.NewMemMapFs() + require.NoError(t, afero.WriteFile(fs, "/file.yaml", []byte("some: data"), 0644)) + + cmd, _ := setUpValidateInputCmd(nil, fs) + cmd.SetArgs([]string{ + "input", + "--file", "/file.yaml", + }) + + err := cmd.Execute() + assert.Error(t, err) + assert.Contains(t, err.Error(), "required flag(s) \"policy\" not set") +} + +func Test_ValidateInputCmd_NoFileProvided(t *testing.T) { + cmd, _ := setUpValidateInputCmd(nil, afero.NewMemMapFs()) + cmd.SetArgs([]string{ + "input", + "--policy", `{"publicKey":"testkey"}`, + }) + err := cmd.Execute() + assert.Error(t, err) + assert.Contains(t, err.Error(), "required flag(s) \"file\" not set") +} + +func Test_ValidateInputCmd_PolicyParsingError(t *testing.T) { + fs := afero.NewMemMapFs() + require.NoError(t, afero.WriteFile(fs, "/file.yaml", []byte("some: data"), 0644)) + + cmd, _ := setUpValidateInputCmd(nil, fs) + cmd.SetArgs([]string{ + "input", + "--file", "/file.yaml", + "--policy", `{"invalidjson":"}`, + }) + + err := cmd.Execute() + assert.Error(t, err) + // Adjust the assertion if a different error message is thrown by your policy parser + assert.Contains(t, err.Error(), "unable to parse EnterpriseContractPolicySpec") +} + +func Test_ValidateInputCmd_EmptyPolicyFile(t *testing.T) { + fs := afero.NewMemMapFs() + require.NoError(t, afero.WriteFile(fs, "/file.yaml", []byte("data"), 0644)) + require.NoError(t, afero.WriteFile(fs, "/policy.yaml", []byte{}, 0644)) + + cmd, _ := setUpValidateInputCmd(nil, fs) + cmd.SetArgs([]string{ + "input", + "--file", "/file.yaml", + "--policy", "/policy.yaml", + }) + + err := cmd.Execute() + assert.Error(t, err) + assert.Contains(t, err.Error(), "file /policy.yaml is empty") +} diff --git a/docs/modules/ROOT/pages/ec_validate_input.adoc b/docs/modules/ROOT/pages/ec_validate_input.adoc index a8f6ad930..8310911df 100644 --- a/docs/modules/ROOT/pages/ec_validate_input.adoc +++ b/docs/modules/ROOT/pages/ec_validate_input.adoc @@ -58,6 +58,7 @@ mark (?) sign, for example: --output text=output.txt?show-successes=false * git reference (github.com/user/repo//default?ref=main), or * inline JSON ('{sources: {...}}')") -s, --strict:: Return non-zero status on non-successful validation (Default: true) +--workers:: Number of workers to use for validation. Defaults to 5. (Default: 5) == Options inherited from parent commands