Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: lazy load regexes to save memory. #900

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions collection/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
package collection

import (
"regexp"

"github.com/corazawaf/coraza/v3/internal/regexp"
"github.com/corazawaf/coraza/v3/types"
)

Expand Down Expand Up @@ -37,7 +36,7 @@ type Keyed interface {
Get(key string) []string

// FindRegex returns a slice of MatchData for the regex
FindRegex(key *regexp.Regexp) []types.MatchData
FindRegex(key regexp.Regexp) []types.MatchData

// FindString returns a slice of MatchData for the string
FindString(key string) []types.MatchData
Expand Down
28 changes: 28 additions & 0 deletions experimental/regexp/regexp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

package experimental

import (
"fmt"

"github.com/corazawaf/coraza/v3/experimental/regexp/regexptypes"
"github.com/corazawaf/coraza/v3/internal/regexp"
)

// SetRegexpCompiler sets the regex compiler used by the WAF. This is specially
// useful when we want to lazily compile regexes in a mono thread environment as
// we don't need to synchronize the regex compilation.
func SetRegexpCompiler(fn func(expr string) (regexptypes.Regexp, error)) {
if fn == nil {
fmt.Println("invalid regex compiler")
return
}

Check warning on line 20 in experimental/regexp/regexp.go

View check run for this annotation

Codecov / codecov/patch

experimental/regexp/regexp.go#L16-L20

Added lines #L16 - L20 were not covered by tests

if regexp.RegexCompiler != nil {
fmt.Println("regex compiler already set")
return
}

Check warning on line 25 in experimental/regexp/regexp.go

View check run for this annotation

Codecov / codecov/patch

experimental/regexp/regexp.go#L22-L25

Added lines #L22 - L25 were not covered by tests

regexp.RegexCompiler = fn

Check warning on line 27 in experimental/regexp/regexp.go

View check run for this annotation

Codecov / codecov/patch

experimental/regexp/regexp.go#L27

Added line #L27 was not covered by tests
}
19 changes: 19 additions & 0 deletions experimental/regexp/regexptypes/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

package regexptypes

import "regexp"

// Regexp is the interface that wraps the basic MatchString, FindStringSubmatch,
// FindAllStringSubmatch, SubexpNames, Match and String methods.
type Regexp interface {
MatchString(s string) bool
FindStringSubmatch(s string) []string
FindAllStringSubmatch(s string, n int) [][]string
SubexpNames() []string
Match(s []byte) bool
String() string
}

var _ Regexp = (*regexp.Regexp)(nil)
4 changes: 2 additions & 2 deletions internal/collections/concat.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
package collections

import (
"regexp"
"strings"

"github.com/corazawaf/coraza/v3/collection"
"github.com/corazawaf/coraza/v3/internal/corazarules"
"github.com/corazawaf/coraza/v3/internal/regexp"
"github.com/corazawaf/coraza/v3/types"
"github.com/corazawaf/coraza/v3/types/variables"
)
Expand Down Expand Up @@ -67,7 +67,7 @@ func (c *ConcatKeyed) Get(key string) []string {
}

// FindRegex returns a slice of MatchData for the regex
func (c *ConcatKeyed) FindRegex(key *regexp.Regexp) []types.MatchData {
func (c *ConcatKeyed) FindRegex(key regexp.Regexp) []types.MatchData {
var res []types.MatchData
for _, d := range c.data {
res = append(res, replaceVariable(c.variable, d.FindRegex(key))...)
Expand Down
2 changes: 1 addition & 1 deletion internal/collections/concat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
package collections

import (
"regexp"
"strings"
"testing"

"github.com/corazawaf/coraza/v3/internal/regexp"
"github.com/corazawaf/coraza/v3/types"
"github.com/corazawaf/coraza/v3/types/variables"
)
Expand Down
4 changes: 2 additions & 2 deletions internal/collections/map.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
package collections

import (
"regexp"
"strings"

"github.com/corazawaf/coraza/v3/collection"
"github.com/corazawaf/coraza/v3/internal/corazarules"
"github.com/corazawaf/coraza/v3/internal/regexp"
"github.com/corazawaf/coraza/v3/types"
"github.com/corazawaf/coraza/v3/types/variables"
)
Expand Down Expand Up @@ -40,7 +40,7 @@ func (c *Map) Get(key string) []string {
return values
}

func (c *Map) FindRegex(key *regexp.Regexp) []types.MatchData {
func (c *Map) FindRegex(key regexp.Regexp) []types.MatchData {
var result []types.MatchData
for k, data := range c.data {
if key.MatchString(k) {
Expand Down
2 changes: 1 addition & 1 deletion internal/collections/map_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ package collections

import (
"fmt"
"regexp"
"testing"

"github.com/corazawaf/coraza/v3/internal/regexp"
"github.com/corazawaf/coraza/v3/types/variables"
)

Expand Down
4 changes: 2 additions & 2 deletions internal/collections/named.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@

import (
"fmt"
"regexp"
"strings"

"github.com/corazawaf/coraza/v3/collection"
"github.com/corazawaf/coraza/v3/internal/corazarules"
"github.com/corazawaf/coraza/v3/internal/regexp"
"github.com/corazawaf/coraza/v3/types"
"github.com/corazawaf/coraza/v3/types/variables"
)
Expand Down Expand Up @@ -94,7 +94,7 @@
collection *NamedCollection
}

func (c *NamedCollectionNames) FindRegex(key *regexp.Regexp) []types.MatchData {
func (c *NamedCollectionNames) FindRegex(key regexp.Regexp) []types.MatchData {

Check warning on line 97 in internal/collections/named.go

View check run for this annotation

Codecov / codecov/patch

internal/collections/named.go#L97

Added line #L97 was not covered by tests
panic("selection operator not supported")
}

Expand Down
2 changes: 1 addition & 1 deletion internal/collections/named_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ package collections

import (
"fmt"
"regexp"
"testing"

"github.com/corazawaf/coraza/v3/internal/regexp"
"github.com/corazawaf/coraza/v3/types/variables"
)

Expand Down
4 changes: 2 additions & 2 deletions internal/collections/sized.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

import (
"fmt"
"regexp"
"strconv"
"strings"

"github.com/corazawaf/coraza/v3/collection"
"github.com/corazawaf/coraza/v3/internal/corazarules"
"github.com/corazawaf/coraza/v3/internal/regexp"
"github.com/corazawaf/coraza/v3/types"
"github.com/corazawaf/coraza/v3/types/variables"
)
Expand All @@ -32,7 +32,7 @@
}

// FindRegex returns a slice of MatchData for the regex
func (c *SizeCollection) FindRegex(*regexp.Regexp) []types.MatchData {
func (c *SizeCollection) FindRegex(regexp.Regexp) []types.MatchData {

Check warning on line 35 in internal/collections/sized.go

View check run for this annotation

Codecov / codecov/patch

internal/collections/sized.go#L35

Added line #L35 was not covered by tests
return c.FindAll()
}

Expand Down
14 changes: 7 additions & 7 deletions internal/corazawaf/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package corazawaf
import (
"fmt"
"reflect"
"regexp"
"strconv"
"strings"
"sync"
Expand All @@ -16,6 +15,7 @@ import (
"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
"github.com/corazawaf/coraza/v3/internal/corazarules"
"github.com/corazawaf/coraza/v3/internal/memoize"
"github.com/corazawaf/coraza/v3/internal/regexp"
"github.com/corazawaf/coraza/v3/types"
"github.com/corazawaf/coraza/v3/types/variables"
)
Expand Down Expand Up @@ -50,7 +50,7 @@ type ruleVariableException struct {

// The key for the variable that is going to be requested
// If nil, KeyStr is going to be used
KeyRx *regexp.Regexp
KeyRx regexp.Regexp
}

// RuleVariable is compiled during runtime by transactions
Expand All @@ -65,7 +65,7 @@ type ruleVariableParams struct {

// The key for the variable that is going to be requested
// If nil, KeyStr is going to be used
KeyRx *regexp.Regexp
KeyRx regexp.Regexp

// The string key for the variable that is going to be requested
// If KeyRx is not nil, KeyStr is ignored
Expand Down Expand Up @@ -454,14 +454,14 @@ func (r *Rule) AddAction(name string, action plugintypes.Action) error {
// it will be used to match the variable, in case of string it will
// be a fixed match, in case of nil it will match everything
func (r *Rule) AddVariable(v variables.RuleVariable, key string, iscount bool) error {
var re *regexp.Regexp
var re regexp.Regexp
if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' {
key = key[1 : len(key)-1]

if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil {
return err
} else {
re = vare.(*regexp.Regexp)
re = vare.(regexp.Regexp)
}
}

Expand Down Expand Up @@ -524,13 +524,13 @@ func (r *Rule) AddVariable(v variables.RuleVariable, key string, iscount bool) e
// OK: SecRule !ARGS:id "..."
// ERROR: SecRule !ARGS: "..."
func (r *Rule) AddVariableNegation(v variables.RuleVariable, key string) error {
var re *regexp.Regexp
var re regexp.Regexp
if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' {
key = key[1 : len(key)-1]
if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil {
return err
} else {
re = vare.(*regexp.Regexp)
re = vare.(regexp.Regexp)
}
}
// Prevent sigsev
Expand Down
2 changes: 1 addition & 1 deletion internal/corazawaf/transaction_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"bytes"
"fmt"
"io"
"regexp"
"runtime/debug"
"strconv"
"strings"
Expand All @@ -19,6 +18,7 @@ import (
"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
"github.com/corazawaf/coraza/v3/internal/collections"
"github.com/corazawaf/coraza/v3/internal/corazarules"
"github.com/corazawaf/coraza/v3/internal/regexp"
utils "github.com/corazawaf/coraza/v3/internal/strings"
"github.com/corazawaf/coraza/v3/types"
"github.com/corazawaf/coraza/v3/types/variables"
Expand Down
4 changes: 2 additions & 2 deletions internal/corazawaf/waf.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"io"
"io/fs"
"os"
"regexp"
"strconv"
"strings"
"time"
Expand All @@ -18,6 +17,7 @@ import (
"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
"github.com/corazawaf/coraza/v3/internal/auditlog"
"github.com/corazawaf/coraza/v3/internal/environment"
"github.com/corazawaf/coraza/v3/internal/regexp"
stringutils "github.com/corazawaf/coraza/v3/internal/strings"
"github.com/corazawaf/coraza/v3/internal/sync"
"github.com/corazawaf/coraza/v3/types"
Expand Down Expand Up @@ -119,7 +119,7 @@ type WAF struct {
AuditLogParts types.AuditLogParts

// Contains the regular expression for relevant status audit logging
AuditLogRelevantStatus *regexp.Regexp
AuditLogRelevantStatus regexp.Regexp

auditLogWriter plugintypes.AuditLogWriter

Expand Down
6 changes: 3 additions & 3 deletions internal/operators/restpath.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ package operators

import (
"fmt"
"regexp"
"strings"

"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
"github.com/corazawaf/coraza/v3/internal/memoize"
"github.com/corazawaf/coraza/v3/internal/regexp"
)

var rePathTokenRe = regexp.MustCompile(`\{([^\}]+)\}`)
Expand All @@ -21,7 +21,7 @@ var rePathTokenRe = regexp.MustCompile(`\{([^\}]+)\}`)
// It will later transform the path to a regex and assign the variables to
// ARGS_PATH
type restpath struct {
re *regexp.Regexp
re regexp.Regexp
}

var _ plugintypes.Operator = (*restpath)(nil)
Expand All @@ -36,7 +36,7 @@ func newRESTPath(options plugintypes.OperatorOptions) (plugintypes.Operator, err
if err != nil {
return nil, err
}
return &restpath{re: re.(*regexp.Regexp)}, nil
return &restpath{re: re.(regexp.Regexp)}, nil
}

func (o *restpath) Evaluate(tx plugintypes.TransactionState, value string) bool {
Expand Down
6 changes: 3 additions & 3 deletions internal/operators/rx.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@ package operators

import (
"fmt"
"regexp"
"strconv"
"unicode/utf8"

"rsc.io/binaryregexp"

"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
"github.com/corazawaf/coraza/v3/internal/memoize"
"github.com/corazawaf/coraza/v3/internal/regexp"
)

type rx struct {
re *regexp.Regexp
re regexp.Regexp
}

var _ plugintypes.Operator = (*rx)(nil)
Expand All @@ -40,7 +40,7 @@ func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) {
if err != nil {
return nil, err
}
return &rx{re: re.(*regexp.Regexp)}, nil
return &rx{re: re.(regexp.Regexp)}, nil
}

func (o *rx) Evaluate(tx plugintypes.TransactionState, value string) bool {
Expand Down
2 changes: 1 addition & 1 deletion internal/operators/rx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ package operators

import (
"fmt"
"regexp"
"testing"

"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
"github.com/corazawaf/coraza/v3/internal/corazawaf"
"github.com/corazawaf/coraza/v3/internal/regexp"
)

func TestRx(t *testing.T) {
Expand Down
Loading
Loading