Skip to content

Commit

Permalink
Merge pull request #38 from hashicorp/check-valid-regexp-pattern
Browse files Browse the repository at this point in the history
Only sanitize invalid `pattern` regexes
  • Loading branch information
ewbankkit authored Feb 20, 2022
2 parents 57ca04a + 4f461f1 commit fb5e43f
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 11 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## v0.16.0 (Unreleased)

`Santize` now only removes property `pattern` regexes not supported by Go.

## v0.15.0 (January 26, 2022)

Expand properties in arrays of objects.
Expand Down
49 changes: 43 additions & 6 deletions sanitize.go
Original file line number Diff line number Diff line change
@@ -1,20 +1,57 @@
package cfschema

import (
"bufio"
"encoding/json"
"regexp"
"strings"
)

var (
sanitizePattern = regexp.MustCompile(`(?m)^(\s+"pattern"\s*:\s*)".*"`)
sanitizePattern = regexp.MustCompile(`^(\s*"pattern"\s*:\s*)"(.*)"(\s*,?\s*)$`)
sanitizePatternProperties = regexp.MustCompile(`(?m)^(\s+"patternProperties"\s*:\s*{\s*)".*?"`)
)

// Sanitize returns a sanitized copy of the specified JSON Schema document.
// The santized copy rewrites all pattern and patternProperty regexes to the empty string,
// working around any problems with JSON Schema regex validation.
func Sanitize(document string) string {
document = sanitizePattern.ReplaceAllString(document, `$1""`)
// The sanitized copy works around any problems with JSON Schema regex validation by
// - Rewriting all patternProperty regexes to the empty string (the regex is never used anyway)
// - Rewriting all unsupported (valid for ECMA-262 but not for Go) pattern regexes to the empty string
func Sanitize(document string) (string, error) {
document = sanitizePatternProperties.ReplaceAllString(document, `$1""`)

return document
var sb strings.Builder
scanner := bufio.NewScanner(strings.NewReader(document))
for scanner.Scan() {
line := scanner.Text()

if v := sanitizePattern.FindStringSubmatch(line); len(v) == 4 {
if expr := v[2]; expr != "" && !isSupportedRegexp(expr) {
line = v[1] + "\"\"" + v[3]
}
}
if _, err := sb.WriteString(line); err != nil {
return "", err
}
if _, err := sb.WriteString("\n"); err != nil {
return "", err
}
}
if err := scanner.Err(); err != nil {
return "", err
}

return sb.String(), nil
}

func isSupportedRegexp(expr string) bool {
// github.com/xeipuuv/gojsonschema attempts to compile the regex after it has been unmarshaled from JSON.
var v string
b := []byte("\"" + expr + "\"")

if err := json.Unmarshal(b, &v); err != nil {
return false
}

_, err := regexp.Compile(v)
return err == nil
}
65 changes: 60 additions & 5 deletions sanitize_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cfschema_test

import (
"strings"
"testing"

cfschema "github.com/hashicorp/aws-cloudformation-resource-schema-sdk-go"
Expand Down Expand Up @@ -49,8 +50,33 @@ func TestSanitize(t *testing.T) {
"KmsKeyId": {
"description": "The Amazon Resource Name (ARN) of the CMK to use when encrypting log data.",
"type": "string",
"pattern": "^arn:[a-z0-9-]+:kms:[a-z0-9-]+:\\d{12}:(key|alias)/.+\\Z",
"pattern":"^arn:[a-z0-9-]+:kms:[a-z0-9-]+:\\d{12}:(key|alias)/.+\\Z",
"maxLength": 256
},
"Key" : {
"type" : "string",
"pattern" : "^(?!aws:)[a-zA-Z+-=._:/]+$",
"description" : "The key name of the tag. You can specify a value that is 1 to 128 Unicode characters in length and cannot be prefixed with aws:. You can use any of the following characters: the set of Unicode letters, digits, whitespace, _, ., /, =, +, and -.",
"minLength" : 1,
"maxLength" : 128
},
"VirtualMfaDeviceName": {
"minLength": 1,
"maxLength": 226,
"pattern": "[\\w+=,.@-]+",
"type": "string"
},
"Path": {
"minLength": 1,
"maxLength": 512,
"pattern": "(\\u002F)|(\\u002F[\\u0021-\\u007F]+\\u002F)",
"type": "string"
},
"SerialNumber": {
"minLength": 9,
"maxLength": 256,
"pattern": "[\\w+=/:,.@-]+",
"type": "string"
}
}
`,
Expand All @@ -66,8 +92,33 @@ func TestSanitize(t *testing.T) {
"KmsKeyId": {
"description": "The Amazon Resource Name (ARN) of the CMK to use when encrypting log data.",
"type": "string",
"pattern": "",
"pattern":"",
"maxLength": 256
},
"Key" : {
"type" : "string",
"pattern" : "",
"description" : "The key name of the tag. You can specify a value that is 1 to 128 Unicode characters in length and cannot be prefixed with aws:. You can use any of the following characters: the set of Unicode letters, digits, whitespace, _, ., /, =, +, and -.",
"minLength" : 1,
"maxLength" : 128
},
"VirtualMfaDeviceName": {
"minLength": 1,
"maxLength": 226,
"pattern": "[\\w+=,.@-]+",
"type": "string"
},
"Path": {
"minLength": 1,
"maxLength": 512,
"pattern": "",
"type": "string"
},
"SerialNumber": {
"minLength": 9,
"maxLength": 256,
"pattern": "[\\w+=/:,.@-]+",
"type": "string"
}
}
`,
Expand Down Expand Up @@ -125,10 +176,14 @@ func TestSanitize(t *testing.T) {
testCase := testCase

t.Run(testCase.TestDescription, func(t *testing.T) {
got := cfschema.Sanitize(testCase.InputDocument)
got, err := cfschema.Sanitize(testCase.InputDocument)

if err != nil {
t.Fatalf("%s", err)
}

if got != testCase.SanitizedDocument {
t.Errorf("expected: %s, got: %s", testCase.SanitizedDocument, got)
if strings.TrimSpace(got) != strings.TrimSpace(testCase.SanitizedDocument) {
t.Errorf("expected: %s\ngot: %s", testCase.SanitizedDocument, got)
}
})
}
Expand Down

0 comments on commit fb5e43f

Please sign in to comment.