diff --git a/CHANGELOG.md b/CHANGELOG.md index c87b846..af8d43b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## v0.16.0 (Unreleased) + +`Santize` now only removes property `pattern` regexes not supported by Go. + ## v0.15.0 (January 26, 2022) Expand properties in arrays of objects. diff --git a/sanitize.go b/sanitize.go index c65e931..98e1f92 100644 --- a/sanitize.go +++ b/sanitize.go @@ -1,20 +1,57 @@ package cfschema import ( + "bufio" + "encoding/json" "regexp" + "strings" ) var ( - sanitizePattern = regexp.MustCompile(`(?m)^(\s+"pattern"\s*:\s*)".*"`) + sanitizePattern = regexp.MustCompile(`^(\s*"pattern"\s*:\s*)"(.*)"(\s*,?\s*)$`) sanitizePatternProperties = regexp.MustCompile(`(?m)^(\s+"patternProperties"\s*:\s*{\s*)".*?"`) ) // Sanitize returns a sanitized copy of the specified JSON Schema document. -// The santized copy rewrites all pattern and patternProperty regexes to the empty string, -// working around any problems with JSON Schema regex validation. -func Sanitize(document string) string { - document = sanitizePattern.ReplaceAllString(document, `$1""`) +// The sanitized copy works around any problems with JSON Schema regex validation by +// - Rewriting all patternProperty regexes to the empty string (the regex is never used anyway) +// - Rewriting all unsupported (valid for ECMA-262 but not for Go) pattern regexes to the empty string +func Sanitize(document string) (string, error) { document = sanitizePatternProperties.ReplaceAllString(document, `$1""`) - return document + var sb strings.Builder + scanner := bufio.NewScanner(strings.NewReader(document)) + for scanner.Scan() { + line := scanner.Text() + + if v := sanitizePattern.FindStringSubmatch(line); len(v) == 4 { + if expr := v[2]; expr != "" && !isSupportedRegexp(expr) { + line = v[1] + "\"\"" + v[3] + } + } + if _, err := sb.WriteString(line); err != nil { + return "", err + } + if _, err := sb.WriteString("\n"); err != nil { + return "", err + } + } + if err := scanner.Err(); err != nil { + return "", err + } + + return sb.String(), nil +} + +func isSupportedRegexp(expr string) bool { + // github.com/xeipuuv/gojsonschema attempts to compile the regex after it has been unmarshaled from JSON. + var v string + b := []byte("\"" + expr + "\"") + + if err := json.Unmarshal(b, &v); err != nil { + return false + } + + _, err := regexp.Compile(v) + return err == nil } diff --git a/sanitize_test.go b/sanitize_test.go index 8bd67f6..cc24507 100644 --- a/sanitize_test.go +++ b/sanitize_test.go @@ -1,6 +1,7 @@ package cfschema_test import ( + "strings" "testing" cfschema "github.com/hashicorp/aws-cloudformation-resource-schema-sdk-go" @@ -49,8 +50,33 @@ func TestSanitize(t *testing.T) { "KmsKeyId": { "description": "The Amazon Resource Name (ARN) of the CMK to use when encrypting log data.", "type": "string", - "pattern": "^arn:[a-z0-9-]+:kms:[a-z0-9-]+:\\d{12}:(key|alias)/.+\\Z", + "pattern":"^arn:[a-z0-9-]+:kms:[a-z0-9-]+:\\d{12}:(key|alias)/.+\\Z", "maxLength": 256 + }, + "Key" : { + "type" : "string", + "pattern" : "^(?!aws:)[a-zA-Z+-=._:/]+$", + "description" : "The key name of the tag. You can specify a value that is 1 to 128 Unicode characters in length and cannot be prefixed with aws:. You can use any of the following characters: the set of Unicode letters, digits, whitespace, _, ., /, =, +, and -.", + "minLength" : 1, + "maxLength" : 128 + }, + "VirtualMfaDeviceName": { + "minLength": 1, + "maxLength": 226, + "pattern": "[\\w+=,.@-]+", + "type": "string" + }, + "Path": { + "minLength": 1, + "maxLength": 512, + "pattern": "(\\u002F)|(\\u002F[\\u0021-\\u007F]+\\u002F)", + "type": "string" + }, + "SerialNumber": { + "minLength": 9, + "maxLength": 256, + "pattern": "[\\w+=/:,.@-]+", + "type": "string" } } `, @@ -66,8 +92,33 @@ func TestSanitize(t *testing.T) { "KmsKeyId": { "description": "The Amazon Resource Name (ARN) of the CMK to use when encrypting log data.", "type": "string", - "pattern": "", + "pattern":"", "maxLength": 256 + }, + "Key" : { + "type" : "string", + "pattern" : "", + "description" : "The key name of the tag. You can specify a value that is 1 to 128 Unicode characters in length and cannot be prefixed with aws:. You can use any of the following characters: the set of Unicode letters, digits, whitespace, _, ., /, =, +, and -.", + "minLength" : 1, + "maxLength" : 128 + }, + "VirtualMfaDeviceName": { + "minLength": 1, + "maxLength": 226, + "pattern": "[\\w+=,.@-]+", + "type": "string" + }, + "Path": { + "minLength": 1, + "maxLength": 512, + "pattern": "", + "type": "string" + }, + "SerialNumber": { + "minLength": 9, + "maxLength": 256, + "pattern": "[\\w+=/:,.@-]+", + "type": "string" } } `, @@ -125,10 +176,14 @@ func TestSanitize(t *testing.T) { testCase := testCase t.Run(testCase.TestDescription, func(t *testing.T) { - got := cfschema.Sanitize(testCase.InputDocument) + got, err := cfschema.Sanitize(testCase.InputDocument) + + if err != nil { + t.Fatalf("%s", err) + } - if got != testCase.SanitizedDocument { - t.Errorf("expected: %s, got: %s", testCase.SanitizedDocument, got) + if strings.TrimSpace(got) != strings.TrimSpace(testCase.SanitizedDocument) { + t.Errorf("expected: %s\ngot: %s", testCase.SanitizedDocument, got) } }) }