From aaed653c3671be4fbc65972a1de8f53e5b376750 Mon Sep 17 00:00:00 2001 From: soujanyanmbri <54130357+soujanyanmbri@users.noreply.github.com> Date: Wed, 9 Oct 2024 19:15:14 +0530 Subject: [PATCH] Optimise strings.lower() --- experimental/plugins/macro/macro.go | 4 +- internal/actions/actions.go | 6 +- internal/actions/ctl.go | 10 +-- internal/actions/setvar.go | 3 +- internal/auditlog/logger.go | 10 +-- internal/bodyprocessors/bodyprocessors.go | 5 +- internal/collections/concat.go | 4 +- internal/collections/map.go | 13 +-- internal/corazawaf/rule.go | 4 +- internal/corazawaf/transaction.go | 12 +-- internal/operators/pm.go | 3 +- internal/operators/pm_from_file.go | 3 +- internal/operators/validate_nid.go | 3 +- internal/seclang/directives.go | 18 ++-- internal/seclang/directives_test.go | 4 +- internal/seclang/generator/main.go | 4 +- internal/seclang/parser.go | 3 +- internal/seclang/rule_parser.go | 16 ++-- internal/strings/strings.go | 25 ++++++ internal/strings/strings_test.go | 93 ++++++++++++++++++++- internal/transformations/lowercase.go | 4 +- internal/transformations/transformations.go | 6 +- types/severity.go | 5 +- types/waf.go | 6 +- 24 files changed, 197 insertions(+), 67 deletions(-) diff --git a/experimental/plugins/macro/macro.go b/experimental/plugins/macro/macro.go index ec756f0d..39ceaeed 100644 --- a/experimental/plugins/macro/macro.go +++ b/experimental/plugins/macro/macro.go @@ -11,6 +11,8 @@ import ( "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/types/variables" + + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" ) type Macro interface { @@ -134,7 +136,7 @@ func (m *macro) compile(input string) error { m.tokens = append(m.tokens, macroToken{ text: currentToken.String(), variable: v, - key: strings.ToLower(key), + key: stringsutil.AsciiToLower(key), }) currentToken.Reset() continue diff --git a/internal/actions/actions.go b/internal/actions/actions.go index ed8889fb..91bec3dc 100644 --- a/internal/actions/actions.go +++ b/internal/actions/actions.go @@ -6,9 +6,9 @@ package actions import ( "errors" "fmt" - "strings" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" ) var ( @@ -28,7 +28,7 @@ var actionmap = map[string]ruleActionWrapper{} // It can be used also for plugins. // If you register an action with an existing name, it will be overwritten. func Register(name string, a func() plugintypes.Action) { - name = strings.ToLower(name) + name = stringsutil.AsciiToLower(name) actionmap[name] = a } @@ -70,7 +70,7 @@ func init() { // Get returns an unwrapped RuleAction from the actionmap based on the name // If the action does not exist it returns an error func Get(name string) (plugintypes.Action, error) { - name = strings.ToLower(name) + name = stringsutil.AsciiToLower(name) if a, ok := actionmap[name]; ok { return a(), nil } diff --git a/internal/actions/ctl.go b/internal/actions/ctl.go index 9bb0da00..89a7f337 100644 --- a/internal/actions/ctl.go +++ b/internal/actions/ctl.go @@ -13,7 +13,7 @@ import ( "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/collections" "github.com/corazawaf/coraza/v3/internal/corazawaf" - utils "github.com/corazawaf/coraza/v3/internal/strings" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -115,7 +115,7 @@ func (a *ctlFn) Init(_ plugintypes.RuleMetadata, data string) error { // parseOnOff turns a string value into a boolean equivalent on/off into true/false func parseOnOff(s string) (bool, bool) { - val := strings.ToLower(s) + val := stringsutil.AsciiToLower(s) switch val { case "on": return true, true @@ -144,7 +144,7 @@ func (a *ctlFn) Evaluate(_ plugintypes.RuleMetadata, txS plugintypes.Transaction case ctlRuleRemoveTargetByTag: rules := tx.WAF.Rules.GetRules() for _, r := range rules { - if utils.InSlice(a.value, r.Tags_) { + if stringsutil.InSlice(a.value, r.Tags_) { tx.RemoveRuleTargetByID(r.ID(), a.collection, a.colKey) } } @@ -282,7 +282,7 @@ func (a *ctlFn) Evaluate(_ plugintypes.RuleMetadata, txS plugintypes.Transaction case ctlRuleRemoveByTag: rules := tx.WAF.Rules.GetRules() for _, r := range rules { - if utils.InSlice(a.value, r.Tags_) { + if stringsutil.InSlice(a.value, r.Tags_) { tx.RemoveRuleByID(r.ID_) } } @@ -386,7 +386,7 @@ func parseCtl(data string) (ctlFunctionType, string, variables.RuleVariable, str colname, colkey, _ = strings.Cut(col, ":") } collection, _ := variables.Parse(strings.TrimSpace(colname)) - colkey = strings.ToLower(colkey) + colkey = stringsutil.AsciiToLower(colkey) var act ctlFunctionType switch action { case "auditEngine": diff --git a/internal/actions/setvar.go b/internal/actions/setvar.go index 39992177..c40b7196 100644 --- a/internal/actions/setvar.go +++ b/internal/actions/setvar.go @@ -11,6 +11,7 @@ import ( "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/experimental/plugins/macro" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -112,7 +113,7 @@ func (a *setvarFn) Evaluate(r plugintypes.RuleMetadata, tx plugintypes.Transacti Str("var_value", value). Int("rule_id", r.ID()). Msg("Action evaluated") - a.evaluateTxCollection(r, tx, strings.ToLower(key), value) + a.evaluateTxCollection(r, tx, stringsutil.AsciiToLower(key), value) } func (a *setvarFn) Type() plugintypes.ActionType { diff --git a/internal/auditlog/logger.go b/internal/auditlog/logger.go index b60e9d37..796427d0 100644 --- a/internal/auditlog/logger.go +++ b/internal/auditlog/logger.go @@ -5,9 +5,9 @@ package auditlog import ( "fmt" - "strings" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" ) // NewConfig returns a Config with default values. @@ -27,13 +27,13 @@ var formatters = map[string]plugintypes.AuditLogFormatter{} // RegisterWriter registers a new logger // it can be used for plugins func RegisterWriter(name string, writer func() plugintypes.AuditLogWriter) { - writers[strings.ToLower(name)] = writer + writers[stringsutil.AsciiToLower(name)] = writer } // GetWriter returns a logger by name // It returns an error if it doesn't exist func GetWriter(name string) (plugintypes.AuditLogWriter, error) { - logger := writers[strings.ToLower(name)] + logger := writers[stringsutil.AsciiToLower(name)] if logger == nil { return nil, fmt.Errorf("invalid logger %q", name) } @@ -43,13 +43,13 @@ func GetWriter(name string) (plugintypes.AuditLogWriter, error) { // RegisterFormatter registers a new logger format // it can be used for plugins func RegisterFormatter(name string, f plugintypes.AuditLogFormatter) { - formatters[strings.ToLower(name)] = f + formatters[stringsutil.AsciiToLower(name)] = f } // GetFormatter returns a formatter by name // It returns an error if it doesn't exist func GetFormatter(name string) (plugintypes.AuditLogFormatter, error) { - formatter := formatters[strings.ToLower(name)] + formatter := formatters[stringsutil.AsciiToLower(name)] if formatter == nil { return nil, fmt.Errorf("invalid formatter %q", name) } diff --git a/internal/bodyprocessors/bodyprocessors.go b/internal/bodyprocessors/bodyprocessors.go index 9daaf205..f3105e58 100644 --- a/internal/bodyprocessors/bodyprocessors.go +++ b/internal/bodyprocessors/bodyprocessors.go @@ -5,7 +5,8 @@ package bodyprocessors import ( "fmt" - "strings" + + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" ) @@ -24,7 +25,7 @@ func RegisterBodyProcessor(name string, fn func() plugintypes.BodyProcessor) { // GetBodyProcessor returns a body processor by name // If the body processor is not found, it returns an error func GetBodyProcessor(name string) (plugintypes.BodyProcessor, error) { - if fn, ok := processors[strings.ToLower(name)]; ok { + if fn, ok := processors[stringsutil.AsciiToLower(name)]; ok { return fn(), nil } return nil, fmt.Errorf("invalid bodyprocessor %q", name) diff --git a/internal/collections/concat.go b/internal/collections/concat.go index f54489e0..b222b902 100644 --- a/internal/collections/concat.go +++ b/internal/collections/concat.go @@ -5,10 +5,10 @@ package collections import ( "regexp" - "strings" "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -58,7 +58,7 @@ func NewConcatKeyed(variable variables.RuleVariable, data ...collection.Keyed) * } func (c *ConcatKeyed) Get(key string) []string { - keyL := strings.ToLower(key) + keyL := stringsutil.AsciiToLower(key) var res []string for _, c := range c.data { res = append(res, c.Get(keyL)...) diff --git a/internal/collections/map.go b/internal/collections/map.go index 069c8e6a..9c33171b 100644 --- a/internal/collections/map.go +++ b/internal/collections/map.go @@ -9,6 +9,7 @@ import ( "github.com/corazawaf/coraza/v3/collection" "github.com/corazawaf/coraza/v3/internal/corazarules" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -45,7 +46,7 @@ func (c *Map) Get(key string) []string { return nil } if !c.isCaseSensitive { - key = strings.ToLower(key) + key = stringsutil.AsciiToLower(key) } var values []string for _, a := range c.data[key] { @@ -81,7 +82,7 @@ func (c *Map) FindString(key string) []types.MatchData { return nil } if !c.isCaseSensitive { - key = strings.ToLower(key) + key = stringsutil.AsciiToLower(key) } // if key is not empty if e, ok := c.data[key]; ok { @@ -115,7 +116,7 @@ func (c *Map) FindAll() []types.MatchData { func (c *Map) Add(key string, value string) { aVal := keyValue{key: key, value: value} if !c.isCaseSensitive { - key = strings.ToLower(key) + key = stringsutil.AsciiToLower(key) } c.data[key] = append(c.data[key], aVal) } @@ -124,7 +125,7 @@ func (c *Map) Add(key string, value string) { func (c *Map) Set(key string, values []string) { originalKey := key if !c.isCaseSensitive { - key = strings.ToLower(key) + key = stringsutil.AsciiToLower(key) } c.data[key] = make([]keyValue, 0, len(values)) for _, v := range values { @@ -136,7 +137,7 @@ func (c *Map) Set(key string, values []string) { func (c *Map) SetIndex(key string, index int, value string) { originalKey := key if !c.isCaseSensitive { - key = strings.ToLower(key) + key = stringsutil.AsciiToLower(key) } values := c.data[key] av := keyValue{key: originalKey, value: value} @@ -154,7 +155,7 @@ func (c *Map) SetIndex(key string, index int, value string) { // Remove removes a key/value from the map. func (c *Map) Remove(key string) { if !c.isCaseSensitive { - key = strings.ToLower(key) + key = stringsutil.AsciiToLower(key) } if len(c.data) == 0 { return diff --git a/internal/corazawaf/rule.go b/internal/corazawaf/rule.go index cd6cd626..c316d3aa 100644 --- a/internal/corazawaf/rule.go +++ b/internal/corazawaf/rule.go @@ -6,7 +6,6 @@ package corazawaf import ( "fmt" "regexp" - "strings" "sync" "unsafe" @@ -15,6 +14,7 @@ import ( "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/corazarules" "github.com/corazawaf/coraza/v3/internal/memoize" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -486,7 +486,7 @@ func caseSensitiveVariable(v variables.RuleVariable) bool { // but the knowledge of the type of the Map it not here also, so let's start with this. func newRuleVariableParams(v variables.RuleVariable, key string, re *regexp.Regexp, iscount bool) ruleVariableParams { if !caseSensitiveVariable(v) { - key = strings.ToLower(key) + key = stringsutil.AsciiToLower(key) } return ruleVariableParams{ Count: iscount, diff --git a/internal/corazawaf/transaction.go b/internal/corazawaf/transaction.go index c0512827..29212054 100644 --- a/internal/corazawaf/transaction.go +++ b/internal/corazawaf/transaction.go @@ -318,12 +318,12 @@ func (tx *Transaction) AddRequestHeader(key string, value string) { if key == "" { return } - keyl := strings.ToLower(key) + keyl := stringsutil.AsciiToLower(key) tx.variables.requestHeaders.Add(key, value) switch keyl { case "content-type": - val := strings.ToLower(value) + val := stringsutil.AsciiToLower(value) if val == "application/x-www-form-urlencoded" { tx.variables.reqbodyProcessor.Set("URLENCODED") } else if strings.HasPrefix(val, "multipart/form-data") { @@ -360,7 +360,7 @@ func (tx *Transaction) AddResponseHeader(key string, value string) { if key == "" { return } - keyl := strings.ToLower(key) + keyl := stringsutil.AsciiToLower(key) tx.variables.responseHeaders.Add(key, value) // Most headers can be managed like that @@ -592,9 +592,9 @@ func (tx *Transaction) GetField(rv ruleVariableParams) []types.MatchData { for _, c := range matches { isException := false - lkey := strings.ToLower(c.Key()) + lkey := stringsutil.AsciiToLower(c.Key()) for _, ex := range rv.Exceptions { - if (ex.KeyRx != nil && ex.KeyRx.MatchString(lkey)) || strings.ToLower(ex.KeyStr) == lkey { + if (ex.KeyRx != nil && ex.KeyRx.MatchString(lkey)) || stringsutil.AsciiToLower(ex.KeyStr) == lkey { isException = true break } @@ -1020,7 +1020,7 @@ func (tx *Transaction) ProcessRequestBody() (*types.Interruption, error) { } tx.variables.reqbodyProcessor.Set(rbp) } - rbp = strings.ToLower(rbp) + rbp = stringsutil.AsciiToLower(rbp) if rbp == "" { // so there is no bodyprocessor, we don't want to generate an error tx.WAF.Rules.Eval(types.PhaseRequestBody, tx) diff --git a/internal/operators/pm.go b/internal/operators/pm.go index 8c78cc35..8254670b 100644 --- a/internal/operators/pm.go +++ b/internal/operators/pm.go @@ -12,6 +12,7 @@ import ( "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/memoize" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" ) type pm struct { @@ -23,7 +24,7 @@ var _ plugintypes.Operator = (*pm)(nil) func newPM(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { data := options.Arguments - data = strings.ToLower(data) + data = stringsutil.AsciiToLower(data) dict := strings.Split(data, " ") builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{ AsciiCaseInsensitive: true, diff --git a/internal/operators/pm_from_file.go b/internal/operators/pm_from_file.go index 035a448e..b46aa84b 100644 --- a/internal/operators/pm_from_file.go +++ b/internal/operators/pm_from_file.go @@ -14,6 +14,7 @@ import ( "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/memoize" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" ) func newPMFromFile(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { @@ -35,7 +36,7 @@ func newPMFromFile(options plugintypes.OperatorOptions) (plugintypes.Operator, e if l[0] == '#' { continue } - lines = append(lines, strings.ToLower(l)) + lines = append(lines, stringsutil.AsciiToLower(l)) } builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{ diff --git a/internal/operators/validate_nid.go b/internal/operators/validate_nid.go index 383f160b..720d22b3 100644 --- a/internal/operators/validate_nid.go +++ b/internal/operators/validate_nid.go @@ -13,6 +13,7 @@ import ( "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/memoize" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" ) type validateNidFunction = func(input string) bool @@ -72,7 +73,7 @@ func nidCl(nid string) bool { if len(nid) < 8 { return false } - nid = strings.ToLower(nid) + nid = stringsutil.AsciiToLower(nid) nid = nonDigitOrK.ReplaceAllString(nid, "") rut, _ := strconv.Atoi(nid[:len(nid)-1]) dv := nid[len(nid)-1:] diff --git a/internal/seclang/directives.go b/internal/seclang/directives.go index 2bf34a3e..f94f0f4a 100644 --- a/internal/seclang/directives.go +++ b/internal/seclang/directives.go @@ -18,7 +18,7 @@ import ( "github.com/corazawaf/coraza/v3/internal/corazawaf" "github.com/corazawaf/coraza/v3/internal/environment" "github.com/corazawaf/coraza/v3/internal/memoize" - utils "github.com/corazawaf/coraza/v3/internal/strings" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" ) @@ -222,7 +222,7 @@ func directiveSecResponseBodyAccess(options *DirectiveOptions) error { return errEmptyOptions } - b, err := parseBoolean(strings.ToLower(options.Opts)) + b, err := parseBoolean(stringsutil.AsciiToLower(options.Opts)) if err != nil { return err } @@ -263,7 +263,7 @@ func directiveSecRequestBodyAccess(options *DirectiveOptions) error { return errEmptyOptions } - b, err := parseBoolean(strings.ToLower(options.Opts)) + b, err := parseBoolean(stringsutil.AsciiToLower(options.Opts)) if err != nil { return err } @@ -304,7 +304,7 @@ func directiveSecServerSignature(options *DirectiveOptions) error { return errEmptyOptions } - options.WAF.ServerSignature = utils.MaybeRemoveQuotes(options.Opts) + options.WAF.ServerSignature = stringsutil.MaybeRemoveQuotes(options.Opts) return nil } @@ -415,7 +415,7 @@ func directiveSecResponseBodyMimeType(options *DirectiveOptions) error { // compress, obfuscate, or even encrypt data before it is sent back, and therefore // bypass any monitoring device. func directiveSecResponseBodyLimitAction(options *DirectiveOptions) error { - switch strings.ToLower(options.Opts) { + switch stringsutil.AsciiToLower(options.Opts) { case "reject": options.WAF.ResponseBodyLimitAction = types.BodyLimitActionReject case "processpartial": @@ -454,7 +454,7 @@ func directiveSecResponseBodyLimit(options *DirectiveOptions) error { // By default, Coraza will reject a request body that is longer than specified to // avoid OOM issues while buffering the request body prior the inspection. func directiveSecRequestBodyLimitAction(options *DirectiveOptions) error { - switch strings.ToLower(options.Opts) { + switch stringsutil.AsciiToLower(options.Opts) { case "reject": options.WAF.RequestBodyLimitAction = types.BodyLimitActionReject case "processpartial": @@ -489,7 +489,7 @@ func directiveSecRemoteRulesFailAction(options *DirectiveOptions) error { return errEmptyOptions } - switch strings.ToLower(options.Opts) { + switch stringsutil.AsciiToLower(options.Opts) { case "abort": options.WAF.AbortOnRemoteRulesFail = true case "warn": @@ -1063,7 +1063,7 @@ func directiveSecRuleUpdateTargetByTag(options *DirectiveOptions) error { for _, rule := range options.WAF.Rules.GetRules() { inputTag := strings.Trim(tagAndvars[0], "\"") - if utils.InSlice(inputTag, rule.Tags_) { + if stringsutil.InSlice(inputTag, rule.Tags_) { rp := RuleParser{ rule: &rule, options: RuleOptions{}, @@ -1142,7 +1142,7 @@ func directiveSecArgumentsLimit(options *DirectiveOptions) error { } func parseBoolean(data string) (bool, error) { - data = strings.ToLower(data) + data = stringsutil.AsciiToLower(data) switch data { case "on": return true, nil diff --git a/internal/seclang/directives_test.go b/internal/seclang/directives_test.go index 1d828f4c..4eaef5d8 100644 --- a/internal/seclang/directives_test.go +++ b/internal/seclang/directives_test.go @@ -6,10 +6,10 @@ package seclang import ( "os" "regexp" - "strings" "testing" "github.com/corazawaf/coraza/v3/internal/corazawaf" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" ) @@ -281,7 +281,7 @@ func TestDirectives(t *testing.T) { for name, dCases := range directiveCases { t.Run(name, func(t *testing.T) { for _, tCase := range dCases { - d := directivesMap[strings.ToLower(name)] + d := directivesMap[stringsutil.AsciiToLower(name)] t.Run(tCase.opts, func(t *testing.T) { waf := corazawaf.NewWAF() diff --git a/internal/seclang/generator/main.go b/internal/seclang/generator/main.go index ff1d3be7..d4fcb38c 100644 --- a/internal/seclang/generator/main.go +++ b/internal/seclang/generator/main.go @@ -14,6 +14,8 @@ import ( "log" "os" "strings" + + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" ) //go:embed directivesmap.go.tmpl @@ -64,7 +66,7 @@ func main() { } directives = append(directives, DirectivesMap{ - Key: strings.ToLower(directiveName), + Key: stringsutil.AsciiToLower(directiveName), FnName: fnName, }) default: diff --git a/internal/seclang/parser.go b/internal/seclang/parser.go index 2532d9ea..b85c4668 100644 --- a/internal/seclang/parser.go +++ b/internal/seclang/parser.go @@ -15,6 +15,7 @@ import ( "github.com/corazawaf/coraza/v3/internal/corazawaf" "github.com/corazawaf/coraza/v3/internal/environment" "github.com/corazawaf/coraza/v3/internal/io" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" ) // maxIncludeRecursion is used to avoid DDOS by including files that include @@ -148,7 +149,7 @@ func (p *Parser) evaluateLine(l string) error { dir, opts, _ := strings.Cut(l, " ") p.options.WAF.Logger.Debug().Str("line", l).Msg("Parsing directive") - directive := strings.ToLower(dir) + directive := stringsutil.AsciiToLower(dir) if len(opts) >= 3 && opts[0] == '"' && opts[len(opts)-1] == '"' { opts = strings.Trim(opts, `"`) diff --git a/internal/seclang/rule_parser.go b/internal/seclang/rule_parser.go index e80368c2..b7a2fa83 100644 --- a/internal/seclang/rule_parser.go +++ b/internal/seclang/rule_parser.go @@ -12,7 +12,7 @@ import ( actionsmod "github.com/corazawaf/coraza/v3/internal/actions" "github.com/corazawaf/coraza/v3/internal/corazawaf" "github.com/corazawaf/coraza/v3/internal/operators" - utils "github.com/corazawaf/coraza/v3/internal/strings" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -264,7 +264,7 @@ func (rp *RuleParser) ParseActions(actions string) error { } // check if forbidden action: for _, a := range act { - if utils.InSlice(a.Key, disabledActions) { + if stringsutil.InSlice(a.Key, disabledActions) { return fmt.Errorf("%s rule action is disabled", a.Key) } } @@ -359,7 +359,7 @@ func ParseRule(options RuleOptions) (*corazawaf.Rule, error) { if err != nil { return nil, err } - if utils.InSlice(operator, disabledRuleOperators) { + if stringsutil.InSlice(operator, disabledRuleOperators) { return nil, fmt.Errorf("%s rule operator is disabled", operator) } if err := rp.ParseVariables(vars); err != nil { @@ -375,7 +375,7 @@ func ParseRule(options RuleOptions) (*corazawaf.Rule, error) { } } else { // quoted actions separated by comma (,) - actions = utils.MaybeRemoveQuotes(options.Data) + actions = stringsutil.MaybeRemoveQuotes(options.Data) err = rp.ParseActions(actions) if err != nil { return nil, err @@ -425,7 +425,7 @@ func parseActionOperator(data string) (vars string, op string, actions string, e if err != nil { return } - op = utils.MaybeRemoveQuotes(op) + op = stringsutil.MaybeRemoveQuotes(op) rest = strings.TrimLeft(rest, " ") if len(rest) == 0 { @@ -436,7 +436,7 @@ func parseActionOperator(data string) (vars string, op string, actions string, e if len(rest) < 2 || rest[0] != '"' || rest[len(rest)-1] != '"' { return "", "", "", fmt.Errorf("invalid actions for rule with operator: %q", data) } - actions = utils.MaybeRemoveQuotes(rest) + actions = stringsutil.MaybeRemoveQuotes(rest) return } @@ -547,9 +547,9 @@ func parseActions(actions string) ([]ruleAction, error) { } func appendRuleAction(res []ruleAction, key string, val string, disruptiveActionIndex int) ([]ruleAction, int, error) { - key = strings.ToLower(strings.TrimSpace(key)) + key = stringsutil.AsciiToLower(strings.TrimSpace(key)) val = strings.TrimSpace(val) // We may want to keep case sensitive values (e.g. Messages) - val = utils.MaybeRemoveQuotes(val) + val = stringsutil.MaybeRemoveQuotes(val) f, err := actionsmod.Get(key) if err != nil { return res, unset, err diff --git a/internal/strings/strings.go b/internal/strings/strings.go index c523a21b..38a0bd35 100644 --- a/internal/strings/strings.go +++ b/internal/strings/strings.go @@ -106,3 +106,28 @@ func InSlice(a string, list []string) bool { func WrapUnsafe(buf []byte) string { return *(*string)(unsafe.Pointer(&buf)) } + +// AsciiToLower converts ASCII characters in the string to lowercase +// without allocating additional memory, unlike strings.ToLower. +func AsciiToLower(s string) string { + for i := 0; i < len(s); i++ { + if s[i] >= 'A' && s[i] <= 'Z' { + return asciiToLowerInPlace(s, i) + } + } + return s +} + +// asciiToLowerInPlace converts ASCII characters in the string to lowercase +// starting from the specified index. +func asciiToLowerInPlace(s string, start int) string { + res := []byte(s) + res[start] += 'a' - 'A' + + for i := start + 1; i < len(res); i++ { + if res[i] >= 'A' && res[i] <= 'Z' { + res[i] += 'a' - 'A' + } + } + return WrapUnsafe(res) +} diff --git a/internal/strings/strings_test.go b/internal/strings/strings_test.go index 02b08308..aa896457 100644 --- a/internal/strings/strings_test.go +++ b/internal/strings/strings_test.go @@ -6,7 +6,10 @@ package strings -import "testing" +import ( + "strings" + "testing" +) func TestMaybeRemoveQuotes(t *testing.T) { tests := []struct { @@ -89,3 +92,91 @@ func TestRandomStringConcurrency(t *testing.T) { go RandomString(10000) } } + +func TestAsciiToLower(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + {"Standard Uppercase", "HELLO WORLD", "hello world"}, // Standard uppercase input + {"Already Lowercase", "hello world", "hello world"}, // Already lowercase + {"No Letters to Convert", "1234!@#$", "1234!@#$"}, // Non-alphabetic characters + {"Mixed Case", "GoLang", "golang"}, // Mixed case + {"Leading and Trailing Spaces", " SPACES ", " spaces "}, // Leading/trailing spaces + {"Unicode Unchanged", "Привет Мир", "Привет Мир"}, // Unicode characters remain unchanged + {"Mixed with Emojis", "😀😃😄😁🤣 Emoji TEST", "😀😃😄😁🤣 emoji test"}, // Emojis with mixed case text + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if got := AsciiToLower(test.input); got != test.expected { + t.Errorf("AsciiToLower(%q) = %q; want %q", test.input, got, test.expected) + } + }) + } +} + +func BenchmarkAsciiVsUnicodeCaseString(b *testing.B) { + benchmarkCases := []struct { + name string + str string + }{ + {"ASCII Fully Lowercase Sentence", "this is a completely lowercase sentence for testing purposes."}, + {"ASCII Fully Uppercase Sentence", "THIS IS A COMPLETELY UPPERCASE SENTENCE FOR TESTING PURPOSES."}, + {"ASCII Mixed Case Sentence", "This Is A Randomized Mixed Case Sentence For Evaluation."}, + {"ASCII Non-Alphabetic Characters", "@@@@@@@ this text contains non-alphabetic symbols."}, + + // Adding Unicode cases + {"Unicode Greek Alphabet", "Αυτό είναι ένα τεστ με ελληνικούς χαρακτήρες."}, + {"Unicode Cyrillic Alphabet", "Это тест с использованием кириллических символов."}, + {"Unicode Mixed Greek and ASCII", "This is a mixed sentence: Ελληνικά και English."}, + {"Unicode Emoji", "😀😃😄😁🤣 Emoji characters mixed with text."}, + + // Edge cases + {"Empty String", ""}, + {"Only Punctuation", "!!!???...,,,"}, + {"Only Whitespace", " "}, + {"Long Mixed Case String", "This is a really long sentence that is going to be used to test how the various implementations handle longer strings with a mix of cases. This should include UPPERCASE, lowercase, and a variety of symbols like $%^&*."}, + {"Special Turkish Case", "Turkish İ and i cases: İSTANBUL, istanbul, İstanbul, ıstanbul."}, + } + + // Benchmarking AsciiToLower function + b.Run("AsciiToLower Implementation", func(b *testing.B) { + for _, benchmarkCase := range benchmarkCases { + b.Run(benchmarkCase.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = AsciiToLower(benchmarkCase.str) + } + }) + } + }) + + // Benchmarking a manual ASCII conversion method + b.Run("Manual ASCII Conversion", func(b *testing.B) { + for _, benchmarkCase := range benchmarkCases { + b.Run(benchmarkCase.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + byteSlice := []byte(benchmarkCase.str) + for j := 0; j < len(byteSlice); j++ { + if byteSlice[j] >= 'A' && byteSlice[j] <= 'Z' { + byteSlice[j] += 'a' - 'A' + } + } + _ = string(byteSlice) // Convert byte slice back to string + } + }) + } + }) + + // Benchmarking standard Unicode case conversion + b.Run("Standard Unicode ToLower", func(b *testing.B) { + for _, benchmarkCase := range benchmarkCases { + b.Run(benchmarkCase.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = strings.ToLower(benchmarkCase.str) + } + }) + } + }) +} diff --git a/internal/transformations/lowercase.go b/internal/transformations/lowercase.go index 5d35a79d..6bb7d9c6 100644 --- a/internal/transformations/lowercase.go +++ b/internal/transformations/lowercase.go @@ -4,12 +4,12 @@ package transformations import ( - "strings" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" ) func lowerCase(data string) (string, bool, error) { // TODO: Explicit implementation of ToLower would allow optimizing away the byte by byte comparison for returning the changed boolean // See https://github.com/corazawaf/coraza/pull/778#discussion_r1186963422 - transformedData := strings.ToLower(data) + transformedData := stringsutil.AsciiToLower(data) return transformedData, data != transformedData, nil } diff --git a/internal/transformations/transformations.go b/internal/transformations/transformations.go index 704e04ad..a1d2503e 100644 --- a/internal/transformations/transformations.go +++ b/internal/transformations/transformations.go @@ -5,9 +5,9 @@ package transformations import ( "fmt" - "strings" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" ) var transformations = map[string]plugintypes.Transformation{} @@ -15,13 +15,13 @@ var transformations = map[string]plugintypes.Transformation{} // Register registers a transformation by name // If the transformation is already registered, it will be overwritten func Register(name string, trans plugintypes.Transformation) { - transformations[strings.ToLower(name)] = trans + transformations[stringsutil.AsciiToLower(name)] = trans } // GetTransformation returns a transformation by name // If the transformation is not found, it returns an error func GetTransformation(name string) (plugintypes.Transformation, error) { - if t, ok := transformations[strings.ToLower(name)]; ok { + if t, ok := transformations[stringsutil.AsciiToLower(name)]; ok { return t, nil } return nil, fmt.Errorf("invalid transformation name %q", name) diff --git a/types/severity.go b/types/severity.go index a1fd6cde..a54e1cfe 100644 --- a/types/severity.go +++ b/types/severity.go @@ -6,7 +6,8 @@ package types import ( "fmt" "strconv" - "strings" + + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" ) // RuleSeverity represents the severity of a triggered rule @@ -84,7 +85,7 @@ func ParseRuleSeverity(input string) (RuleSeverity, error) { } return RuleSeverity(s), nil } - switch strings.ToLower(input) { + switch stringsutil.AsciiToLower(input) { case "emergency": return RuleSeverityEmergency, nil case "alert": diff --git a/types/waf.go b/types/waf.go index 489a42a8..6e882957 100644 --- a/types/waf.go +++ b/types/waf.go @@ -7,6 +7,8 @@ import ( "errors" "fmt" "strings" + + stringsutil "github.com/corazawaf/coraza/v3/internal/strings" ) // AuditEngineStatus represents the functionality @@ -24,7 +26,7 @@ const ( // ParseAuditEngineStatus parses the audit engine status func ParseAuditEngineStatus(as string) (AuditEngineStatus, error) { - switch strings.ToLower(as) { + switch stringsutil.AsciiToLower(as) { case "on": return AuditEngineOn, nil case "off": @@ -52,7 +54,7 @@ const ( // ParseRuleEngineStatus parses the rule engine status func ParseRuleEngineStatus(re string) (RuleEngineStatus, error) { - switch strings.ToLower(re) { + switch stringsutil.AsciiToLower(re) { case "on": return RuleEngineOn, nil case "detectiononly":