diff --git a/collection/collection.go b/collection/collection.go index 668608966..5a3eb9d9a 100644 --- a/collection/collection.go +++ b/collection/collection.go @@ -4,8 +4,7 @@ package collection import ( - "regexp" - + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" ) @@ -37,7 +36,7 @@ type Keyed interface { Get(key string) []string // FindRegex returns a slice of MatchData for the regex - FindRegex(key *regexp.Regexp) []types.MatchData + FindRegex(key regexp.Regexp) []types.MatchData // FindString returns a slice of MatchData for the string FindString(key string) []types.MatchData diff --git a/experimental/regexp/regexp.go b/experimental/regexp/regexp.go new file mode 100644 index 000000000..02a5b69a2 --- /dev/null +++ b/experimental/regexp/regexp.go @@ -0,0 +1,28 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +package experimental + +import ( + "fmt" + + "github.com/corazawaf/coraza/v3/experimental/regexp/regexptypes" + "github.com/corazawaf/coraza/v3/internal/regexp" +) + +// SetRegexpCompiler sets the regex compiler used by the WAF. This is specially +// useful when we want to lazily compile regexes in a mono thread environment as +// we don't need to synchronize the regex compilation. +func SetRegexpCompiler(fn func(expr string) (regexptypes.Regexp, error)) { + if fn == nil { + fmt.Println("invalid regex compiler") + return + } + + if regexp.RegexCompiler != nil { + fmt.Println("regex compiler already set") + return + } + + regexp.RegexCompiler = fn +} diff --git a/experimental/regexp/regexptypes/types.go b/experimental/regexp/regexptypes/types.go new file mode 100644 index 000000000..34697387a --- /dev/null +++ b/experimental/regexp/regexptypes/types.go @@ -0,0 +1,19 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +package regexptypes + +import "regexp" + +// Regexp is the interface that wraps the basic MatchString, FindStringSubmatch, +// FindAllStringSubmatch, SubexpNames, Match and String methods. +type Regexp interface { + MatchString(s string) bool + FindStringSubmatch(s string) []string + FindAllStringSubmatch(s string, n int) [][]string + SubexpNames() []string + Match(s []byte) bool + String() string +} + +var _ Regexp = (*regexp.Regexp)(nil) diff --git a/internal/collections/concat.go b/internal/collections/concat.go index f54489e0d..2b235527f 100644 --- a/internal/collections/concat.go +++ b/internal/collections/concat.go @@ -4,11 +4,11 @@ package collections import ( - "regexp" "strings" "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -67,7 +67,7 @@ func (c *ConcatKeyed) Get(key string) []string { } // FindRegex returns a slice of MatchData for the regex -func (c *ConcatKeyed) FindRegex(key *regexp.Regexp) []types.MatchData { +func (c *ConcatKeyed) FindRegex(key regexp.Regexp) []types.MatchData { var res []types.MatchData for _, d := range c.data { res = append(res, replaceVariable(c.variable, d.FindRegex(key))...) diff --git a/internal/collections/concat_test.go b/internal/collections/concat_test.go index 77761e4b7..b19223d91 100644 --- a/internal/collections/concat_test.go +++ b/internal/collections/concat_test.go @@ -4,10 +4,10 @@ package collections import ( - "regexp" "strings" "testing" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/map.go b/internal/collections/map.go index eca4dcd38..104b0ca16 100644 --- a/internal/collections/map.go +++ b/internal/collections/map.go @@ -4,11 +4,11 @@ package collections import ( - "regexp" "strings" "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -40,7 +40,7 @@ func (c *Map) Get(key string) []string { return values } -func (c *Map) FindRegex(key *regexp.Regexp) []types.MatchData { +func (c *Map) FindRegex(key regexp.Regexp) []types.MatchData { var result []types.MatchData for k, data := range c.data { if key.MatchString(k) { diff --git a/internal/collections/map_test.go b/internal/collections/map_test.go index c73e09dd5..0e1f5e136 100644 --- a/internal/collections/map_test.go +++ b/internal/collections/map_test.go @@ -15,9 +15,9 @@ package collections import ( "fmt" - "regexp" "testing" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/named.go b/internal/collections/named.go index 8cfb81240..a49a8a252 100644 --- a/internal/collections/named.go +++ b/internal/collections/named.go @@ -5,11 +5,11 @@ package collections import ( "fmt" - "regexp" "strings" "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -94,7 +94,7 @@ type NamedCollectionNames struct { collection *NamedCollection } -func (c *NamedCollectionNames) FindRegex(key *regexp.Regexp) []types.MatchData { +func (c *NamedCollectionNames) FindRegex(key regexp.Regexp) []types.MatchData { panic("selection operator not supported") } diff --git a/internal/collections/named_test.go b/internal/collections/named_test.go index 0a773b4d4..572b630e3 100644 --- a/internal/collections/named_test.go +++ b/internal/collections/named_test.go @@ -5,9 +5,9 @@ package collections import ( "fmt" - "regexp" "testing" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types/variables" ) diff --git a/internal/collections/sized.go b/internal/collections/sized.go index 146395ddc..67eb3ec23 100644 --- a/internal/collections/sized.go +++ b/internal/collections/sized.go @@ -5,12 +5,12 @@ package collections import ( "fmt" - "regexp" "strconv" "strings" "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -32,7 +32,7 @@ func NewSizeCollection(variable variables.RuleVariable, data ...*NamedCollection } // FindRegex returns a slice of MatchData for the regex -func (c *SizeCollection) FindRegex(*regexp.Regexp) []types.MatchData { +func (c *SizeCollection) FindRegex(regexp.Regexp) []types.MatchData { return c.FindAll() } diff --git a/internal/corazawaf/rule.go b/internal/corazawaf/rule.go index 14016b957..659792050 100644 --- a/internal/corazawaf/rule.go +++ b/internal/corazawaf/rule.go @@ -6,7 +6,6 @@ package corazawaf import ( "fmt" "reflect" - "regexp" "strconv" "strings" "sync" @@ -16,6 +15,7 @@ import ( "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/corazarules" "github.com/corazawaf/coraza/v3/internal/memoize" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -50,7 +50,7 @@ type ruleVariableException struct { // The key for the variable that is going to be requested // If nil, KeyStr is going to be used - KeyRx *regexp.Regexp + KeyRx regexp.Regexp } // RuleVariable is compiled during runtime by transactions @@ -65,7 +65,7 @@ type ruleVariableParams struct { // The key for the variable that is going to be requested // If nil, KeyStr is going to be used - KeyRx *regexp.Regexp + KeyRx regexp.Regexp // The string key for the variable that is going to be requested // If KeyRx is not nil, KeyStr is ignored @@ -454,14 +454,14 @@ func (r *Rule) AddAction(name string, action plugintypes.Action) error { // it will be used to match the variable, in case of string it will // be a fixed match, in case of nil it will match everything func (r *Rule) AddVariable(v variables.RuleVariable, key string, iscount bool) error { - var re *regexp.Regexp + var re regexp.Regexp if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' { key = key[1 : len(key)-1] if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil { return err } else { - re = vare.(*regexp.Regexp) + re = vare.(regexp.Regexp) } } @@ -524,13 +524,13 @@ func (r *Rule) AddVariable(v variables.RuleVariable, key string, iscount bool) e // OK: SecRule !ARGS:id "..." // ERROR: SecRule !ARGS: "..." func (r *Rule) AddVariableNegation(v variables.RuleVariable, key string) error { - var re *regexp.Regexp + var re regexp.Regexp if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' { key = key[1 : len(key)-1] if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil { return err } else { - re = vare.(*regexp.Regexp) + re = vare.(regexp.Regexp) } } // Prevent sigsev diff --git a/internal/corazawaf/transaction_test.go b/internal/corazawaf/transaction_test.go index 938aef632..530036a97 100644 --- a/internal/corazawaf/transaction_test.go +++ b/internal/corazawaf/transaction_test.go @@ -7,7 +7,6 @@ import ( "bytes" "fmt" "io" - "regexp" "runtime/debug" "strconv" "strings" @@ -19,6 +18,7 @@ import ( "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/collections" "github.com/corazawaf/coraza/v3/internal/corazarules" + "github.com/corazawaf/coraza/v3/internal/regexp" utils "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" diff --git a/internal/corazawaf/waf.go b/internal/corazawaf/waf.go index 7af329a10..b2365132a 100644 --- a/internal/corazawaf/waf.go +++ b/internal/corazawaf/waf.go @@ -9,7 +9,6 @@ import ( "io" "io/fs" "os" - "regexp" "strconv" "strings" "time" @@ -18,6 +17,7 @@ import ( "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/auditlog" "github.com/corazawaf/coraza/v3/internal/environment" + "github.com/corazawaf/coraza/v3/internal/regexp" stringutils "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/internal/sync" "github.com/corazawaf/coraza/v3/types" @@ -119,7 +119,7 @@ type WAF struct { AuditLogParts types.AuditLogParts // Contains the regular expression for relevant status audit logging - AuditLogRelevantStatus *regexp.Regexp + AuditLogRelevantStatus regexp.Regexp auditLogWriter plugintypes.AuditLogWriter diff --git a/internal/operators/restpath.go b/internal/operators/restpath.go index f1e4a8911..9d86e6704 100644 --- a/internal/operators/restpath.go +++ b/internal/operators/restpath.go @@ -7,11 +7,11 @@ package operators import ( "fmt" - "regexp" "strings" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/memoize" + "github.com/corazawaf/coraza/v3/internal/regexp" ) var rePathTokenRe = regexp.MustCompile(`\{([^\}]+)\}`) @@ -21,7 +21,7 @@ var rePathTokenRe = regexp.MustCompile(`\{([^\}]+)\}`) // It will later transform the path to a regex and assign the variables to // ARGS_PATH type restpath struct { - re *regexp.Regexp + re regexp.Regexp } var _ plugintypes.Operator = (*restpath)(nil) @@ -36,7 +36,7 @@ func newRESTPath(options plugintypes.OperatorOptions) (plugintypes.Operator, err if err != nil { return nil, err } - return &restpath{re: re.(*regexp.Regexp)}, nil + return &restpath{re: re.(regexp.Regexp)}, nil } func (o *restpath) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/internal/operators/rx.go b/internal/operators/rx.go index e801c9f72..25db5dd90 100644 --- a/internal/operators/rx.go +++ b/internal/operators/rx.go @@ -7,7 +7,6 @@ package operators import ( "fmt" - "regexp" "strconv" "unicode/utf8" @@ -15,10 +14,11 @@ import ( "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/memoize" + "github.com/corazawaf/coraza/v3/internal/regexp" ) type rx struct { - re *regexp.Regexp + re regexp.Regexp } var _ plugintypes.Operator = (*rx)(nil) @@ -40,7 +40,7 @@ func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { if err != nil { return nil, err } - return &rx{re: re.(*regexp.Regexp)}, nil + return &rx{re: re.(regexp.Regexp)}, nil } func (o *rx) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/internal/operators/rx_test.go b/internal/operators/rx_test.go index e9785713b..ffb0288e8 100644 --- a/internal/operators/rx_test.go +++ b/internal/operators/rx_test.go @@ -5,11 +5,11 @@ package operators import ( "fmt" - "regexp" "testing" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/corazawaf" + "github.com/corazawaf/coraza/v3/internal/regexp" ) func TestRx(t *testing.T) { diff --git a/internal/operators/validate_nid.go b/internal/operators/validate_nid.go index 383f160b6..a587c033a 100644 --- a/internal/operators/validate_nid.go +++ b/internal/operators/validate_nid.go @@ -7,19 +7,19 @@ package operators import ( "fmt" - "regexp" "strconv" "strings" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/memoize" + "github.com/corazawaf/coraza/v3/internal/regexp" ) type validateNidFunction = func(input string) bool type validateNid struct { fn validateNidFunction - re *regexp.Regexp + re regexp.Regexp } var _ plugintypes.Operator = (*validateNid)(nil) @@ -46,7 +46,7 @@ func newValidateNID(options plugintypes.OperatorOptions) (plugintypes.Operator, return nil, err } - return &validateNid{fn: fn, re: re.(*regexp.Regexp)}, nil + return &validateNid{fn: fn, re: re.(regexp.Regexp)}, nil } func (o *validateNid) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/internal/regexp/regex.go b/internal/regexp/regex.go new file mode 100644 index 000000000..ee2987303 --- /dev/null +++ b/internal/regexp/regex.go @@ -0,0 +1,31 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +package regexp + +import ( + "regexp" + + "github.com/corazawaf/coraza/v3/experimental/regexp/regexptypes" +) + +var RegexCompiler func(expr string) (regexptypes.Regexp, error) + +func init() { + RegexCompiler = func(expr string) (regexptypes.Regexp, error) { + return regexp.Compile(expr) + } +} + +type Regexp = regexptypes.Regexp + +// MustCompile is like Compile but panics if the expression cannot be parsed. +// It is not intented to use with user input e.g. rules because it panics and +// bypasses whatever logic provided by the users for regex compilation. +func MustCompile(str string) *regexp.Regexp { + return regexp.MustCompile(str) +} + +func Compile(expr string) (regexptypes.Regexp, error) { + return RegexCompiler(expr) +} diff --git a/internal/regexp/regex_test.go b/internal/regexp/regex_test.go new file mode 100644 index 000000000..d488bb97a --- /dev/null +++ b/internal/regexp/regex_test.go @@ -0,0 +1,29 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +package regexp + +import ( + "testing" +) + +func TestCompile(t *testing.T) { + _, err := Compile(`[]`) + if err == nil { + t.Fatalf("expected error") + } + + _, err = Compile("[a-z]+") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestMustCompile(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Errorf("MustCompile panicked with error: %v", r) + } + }() + MustCompile("[a-z]+") +} diff --git a/internal/seclang/directives.go b/internal/seclang/directives.go index 7a158c349..f609e87e9 100644 --- a/internal/seclang/directives.go +++ b/internal/seclang/directives.go @@ -9,7 +9,6 @@ import ( "errors" "fmt" "io/fs" - "regexp" "strconv" "strings" @@ -17,6 +16,7 @@ import ( "github.com/corazawaf/coraza/v3/internal/auditlog" "github.com/corazawaf/coraza/v3/internal/corazawaf" "github.com/corazawaf/coraza/v3/internal/memoize" + "github.com/corazawaf/coraza/v3/internal/regexp" utils "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" ) @@ -737,7 +737,7 @@ func directiveSecAuditLogRelevantStatus(options *DirectiveOptions) error { return err } - options.WAF.AuditLogRelevantStatus = re.(*regexp.Regexp) + options.WAF.AuditLogRelevantStatus = re.(regexp.Regexp) return nil } diff --git a/internal/seclang/rules_test.go b/internal/seclang/rules_test.go index b9b2b3693..88feb85dc 100644 --- a/internal/seclang/rules_test.go +++ b/internal/seclang/rules_test.go @@ -4,11 +4,11 @@ package seclang import ( - "regexp" "strings" "testing" "github.com/corazawaf/coraza/v3/internal/corazawaf" + "github.com/corazawaf/coraza/v3/internal/regexp" "github.com/corazawaf/coraza/v3/types" ) diff --git a/internal/variables/generator/main.go b/internal/variables/generator/main.go index 07fb0d7b2..a33005b01 100644 --- a/internal/variables/generator/main.go +++ b/internal/variables/generator/main.go @@ -14,9 +14,10 @@ import ( "go/types" "log" "os" - "regexp" "strings" "text/template" + + "github.com/corazawaf/coraza/v3/internal/regexp" ) //go:embed variablesmap.go.tmpl