diff --git a/evaluator/bundle.go b/evaluator/bundle.go new file mode 100644 index 0000000..dc1d7fb --- /dev/null +++ b/evaluator/bundle.go @@ -0,0 +1,217 @@ +package evaluator + +import ( + "context" + aho_corasick "github.com/BobuSumisu/aho-corasick" + "github.com/bradleyjkemp/sigma-go" + "github.com/bradleyjkemp/sigma-go/evaluator/modifiers" + "regexp" + "strings" + "unsafe" +) + +// ForRules compiles a set of rule evaluators which are evaluated together allowing for use of +// more efficient string matching algorithms +func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle { + if len(rules) == 0 { + return RuleEvaluatorBundle{} + } + + bundle := RuleEvaluatorBundle{ + ahocorasick: map[string]ahocorasickSearcher{}, + } + + values := map[string][]string{} + + for _, rule := range rules { + e := ForRule(rule, options...) + bundle.evaluators = append(bundle.evaluators, e) + bundle.caseSensitive = e.caseSensitive + + for _, search := range rule.Detection.Searches { + for _, matcher := range search.EventMatchers { + for _, fieldMatcher := range matcher { + contains := false + regex := false + for _, modifier := range fieldMatcher.Modifiers { + if modifier == "contains" { + contains = true + } + if modifier == "re" { + regex = true + } + } + switch { + case contains: // add all values to the needle set + for _, value := range fieldMatcher.Values { + if value == nil { + continue + } + stringValue := modifiers.CoerceString(value) + if !bundle.caseSensitive { + stringValue = strings.ToLower(stringValue) + } + values[fieldMatcher.Field] = append(values[fieldMatcher.Field], stringValue) + } + case regex: // get "necessary" substrings and add to the needle set + for _, value := range fieldMatcher.Values { + ss, caseInsensitive, _ := regexStrings(modifiers.CoerceString(value)) // todo: benchmark this, should save the result? + for _, s := range ss { + if caseInsensitive { + s = strings.ToLower(s) + } + values[fieldMatcher.Field] = append(values[fieldMatcher.Field], s) + } + } + } + + } + } + } + } + + for field, fieldValues := range values { + bundle.ahocorasick[field] = ahocorasickSearcher{ + Trie: aho_corasick.NewTrieBuilder().AddStrings(fieldValues).Build(), + patterns: fieldValues, + results: map[*byte]map[string]bool{}, // used for caching results + } + } + return bundle +} + +type RuleEvaluatorBundle struct { + ahocorasick map[string]ahocorasickSearcher + evaluators []*RuleEvaluator + caseSensitive bool +} + +type ahocorasickSearcher struct { + *aho_corasick.Trie + patterns []string + results map[*byte]map[string]bool +} + +func (as ahocorasickSearcher) getResults(s string, caseSensitive bool) map[string]bool { + key := unsafe.StringData(s) // using the underlying []byte pointer means we only compute results once per interned string + result, ok := as.results[key] + if ok { + return result + } + + // haven't already computed this + if !caseSensitive { + s = strings.ToLower(s) + } + results := map[string]bool{} + as.results[key] = results + for _, match := range as.MatchString(s) { + // TODO: is match.MatchString equivalent to matcher.patterns[match.Pattern()]? + as.results[key][match.MatchString()] = true + } + return results +} + +type RuleResult struct { + Result + sigma.Rule +} + +func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]RuleResult, error) { + if len(bundle.evaluators) == 0 { + return nil, nil + } + + // copy the current rule comparators + comparators := map[string]modifiers.Comparator{} + for name, comparator := range bundle.evaluators[0].comparators { + comparators[name] = comparator + } + + // override the contains comparator to use our custom one + comparators["contains"] = &ahocorasickContains{ + matchers: bundle.ahocorasick, + caseSensitive: bundle.caseSensitive, + } + comparators["re"] = &ahocorasickRe{ + matchers: bundle.ahocorasick, + } + + ruleresults := []RuleResult{} + errs := []error{} + for _, rule := range bundle.evaluators { + result, err := rule.matches(ctx, event, comparators) + if err != nil { + errs = append(errs, err) + continue + } + ruleresults = append(ruleresults, RuleResult{ + Result: result, + Rule: rule.Rule, + }) + } + return ruleresults, nil +} + +type ahocorasickContains struct { + caseSensitive bool + modifiers.Comparator + matchers map[string]ahocorasickSearcher +} + +func (a *ahocorasickContains) MatchesField(field string, actual any, expected any) (bool, error) { + if expected == "" { + // compatability with old |contains behaviour + // possibly a bug? + return true, nil + } + + results := a.matchers[field].getResults(modifiers.CoerceString(actual), a.caseSensitive) + + needle := modifiers.CoerceString(expected) + if !a.caseSensitive { + // when operating in case-insensitive mode, search strings must be canonicalised + // (this is ok because search strings are much smaller than the haystack) + // TODO: should we just modify the rules in this case? (saving the lower-casing every time) + needle = strings.ToLower(needle) + } + return results[needle], nil +} + +type ahocorasickRe struct { + modifiers.Comparator + matchers map[string]ahocorasickSearcher +} + +func (a *ahocorasickRe) MatchesField(field string, actual any, expected any) (bool, error) { + stringRe := modifiers.CoerceString(expected) + re, err := regexp.Compile(stringRe) // todo: cache this? + if err != nil { + return false, err + } + + // this function returns a set of simple strings + // which necessarily appear if the regex matches + // If none are present in `actual`, we don't need to run the regex + ss, caseInsensitive, err := regexStrings(stringRe) + if err != nil { + return false, err + } + + haystack := modifiers.CoerceString(actual) + results := a.matchers[field].getResults(haystack, !caseInsensitive) + found := false + for _, s := range ss { + if results[s] { + found = true + break + } + } + if !found { + return false, nil + } + + // our cheap heuristic says the regex *might* match the string, + // so we have to now run the full regex + return re.MatchString(haystack), nil +} diff --git a/evaluator/evaluate.go b/evaluator/evaluate.go index 745b7e9..1d07ff3 100644 --- a/evaluator/evaluate.go +++ b/evaluator/evaluate.go @@ -4,8 +4,8 @@ import ( "context" "encoding/json" "fmt" - "github.com/bradleyjkemp/sigma-go" + "github.com/bradleyjkemp/sigma-go/evaluator/modifiers" ) type RuleEvaluator struct { @@ -17,6 +17,7 @@ type RuleEvaluator struct { expandPlaceholder func(ctx context.Context, placeholderName string) ([]string, error) caseSensitive bool + comparators map[string]modifiers.Comparator count func(ctx context.Context, gb GroupedByValues) (float64, error) average func(ctx context.Context, gb GroupedByValues, value float64) (float64, error) @@ -30,6 +31,7 @@ type RuleEvaluator struct { // For example, if a Sigma rule has a condition like this (attempting to detect login brute forcing) // // detection: +// // login_attempt: // # something here // condition: @@ -40,6 +42,7 @@ type RuleEvaluator struct { // Each different GroupedByValues points to a different box. // // GroupedByValues +// // || // ___↓↓___ ________ // | User A | | User B | @@ -65,7 +68,7 @@ func (a GroupedByValues) Key() string { } func ForRule(rule sigma.Rule, options ...Option) *RuleEvaluator { - e := &RuleEvaluator{Rule: rule} + e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators} for _, option := range options { option(e) } @@ -93,6 +96,10 @@ func eventValue(e Event, key string) interface{} { } func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, error) { + return rule.matches(ctx, event, rule.comparators) +} + +func (rule RuleEvaluator) matches(ctx context.Context, event Event, comparators map[string]modifiers.Comparator) (Result, error) { result := Result{ Match: false, SearchResults: map[string]bool{}, @@ -100,7 +107,7 @@ func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, err } for identifier, search := range rule.Detection.Searches { var err error - result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event) + result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, rule.comparators) if err != nil { return Result{}, fmt.Errorf("error evaluating search %s: %w", identifier, err) } diff --git a/evaluator/evaluate_search.go b/evaluator/evaluate_search.go index 844e506..bfdcce1 100644 --- a/evaluator/evaluate_search.go +++ b/evaluator/evaluate_search.go @@ -4,14 +4,13 @@ import ( "context" "encoding/json" "fmt" + "github.com/PaesslerAG/jsonpath" + "github.com/bradleyjkemp/sigma-go" "github.com/bradleyjkemp/sigma-go/evaluator/modifiers" "path" "reflect" "regexp" "strings" - - "github.com/PaesslerAG/jsonpath" - "github.com/bradleyjkemp/sigma-go" ) func (rule RuleEvaluator) evaluateSearchExpression(search sigma.SearchExpr, searchResults map[string]bool) bool { @@ -84,7 +83,7 @@ func (rule RuleEvaluator) evaluateSearchExpression(search sigma.SearchExpr, sear panic(fmt.Sprintf("unhandled node type %T", search)) } -func (rule RuleEvaluator) evaluateSearch(ctx context.Context, search sigma.Search, event Event) (bool, error) { +func (rule RuleEvaluator) evaluateSearch(ctx context.Context, search sigma.Search, event Event, comparators map[string]modifiers.Comparator) (bool, error) { if len(search.Keywords) > 0 { return false, fmt.Errorf("keywords unsupported") } @@ -112,11 +111,7 @@ eventMatcher: // field matchers can specify modifiers (FieldName|modifier1|modifier2) which change the matching behaviour var comparator modifiers.ComparatorFunc var err error - if rule.caseSensitive { - comparator, err = modifiers.GetComparatorCaseSensitive(fieldModifiers...) - } else { - comparator, err = modifiers.GetComparator(fieldModifiers...) - } + comparator, err = modifiers.GetComparator(fieldMatcher.Field, comparators, fieldModifiers...) if err != nil { return false, err } @@ -199,6 +194,7 @@ func (rule *RuleEvaluator) GetFieldValuesFromEvent(field string, event Event) ([ actualValues = append(actualValues, toGenericSlice(v)...) } } + return actualValues, nil } @@ -291,7 +287,7 @@ func toGenericSlice(v interface{}) []interface{} { return []interface{}{v} } - var out []interface{} + out := make([]interface{}, 0, rv.Len()) for i := 0; i < rv.Len(); i++ { out = append(out, rv.Index(i).Interface()) } diff --git a/evaluator/evaluate_test.go b/evaluator/evaluate_test.go index 797aea1..cc7cbc5 100644 --- a/evaluator/evaluate_test.go +++ b/evaluator/evaluate_test.go @@ -92,6 +92,64 @@ func TestRuleEvaluator_Matches(t *testing.T) { } } +func TestRuleEvaluatorBundle_Matches(t *testing.T) { + r1 := sigma.Rule{ + Detection: sigma.Detection{ + Searches: map[string]sigma.Search{ + "foo": { + EventMatchers: []sigma.EventMatcher{ + { + { + Field: "field", + Modifiers: []string{"contains"}, + Values: []interface{}{ + "foo", + }, + }, + }, + }, + }, + }, + Conditions: []sigma.Condition{{ + Search: sigma.AllOfThem{}, + }, + }, + }, + } + r2 := sigma.Rule{ + Detection: sigma.Detection{ + Searches: map[string]sigma.Search{ + "foo": { + EventMatchers: []sigma.EventMatcher{ + { + { + Field: "field", + Modifiers: []string{"contains"}, + Values: []interface{}{ + "bar", + }, + }, + }, + }, + }, + }, + Conditions: []sigma.Condition{{ + Search: sigma.AllOfThem{}, + }, + }, + }, + } + + bundle := ForRules([]sigma.Rule{r1, r2}) + + _, err := bundle.Matches(context.Background(), map[string]interface{}{ + "field": "foobar", + }) + if err != nil { + t.Fatal(err) + } +} + func TestRuleEvaluator_Matches_WithPlaceholder(t *testing.T) { rule := ForRule(sigma.Rule{ Detection: sigma.Detection{ diff --git a/evaluator/fuzz_test.go b/evaluator/fuzz_test.go index 5a99f54..11e5809 100644 --- a/evaluator/fuzz_test.go +++ b/evaluator/fuzz_test.go @@ -3,6 +3,9 @@ package evaluator import ( "context" "encoding/json" + "fmt" + "reflect" + "sync" "testing" "github.com/bradleyjkemp/sigma-go" @@ -18,6 +21,16 @@ detection: condition: a and b ` +const testRuleRe = ` +id: TEST_RULE +detection: + a: + Foo|re: bar + b: + Bar|endswith: baz + condition: a and b +` + const testConfig = ` title: Test logsources: @@ -49,3 +62,80 @@ func FuzzRuleMatches(f *testing.F) { }) } + +func FuzzRuleBundleMatches(f *testing.F) { + f.Add(testRule, testRule, testConfig, `{"foo": "bar", "bar": "baz"}`, false) + f.Add(testRule, testRuleRe, testConfig, `{"foo": "bar", "bar": "baz"}`, false) + f.Fuzz(func(t *testing.T, rule1, rule2, config, payload string, caseSensitive bool) { + var r1, r2 sigma.Rule + var c sigma.Config + var err error + wg := sync.WaitGroup{} + wg.Add(1) + + go func() { + defer func() { + wg.Done() + if r := recover(); r != nil { + err = fmt.Errorf("panic in parsing") + } + }() + r1, err = sigma.ParseRule([]byte(rule1)) + if err != nil || len(r1.Detection.Searches) == 0 || len(r1.Detection.Conditions) == 0 { + return + } + r2, err = sigma.ParseRule([]byte(rule2)) + if err != nil || len(r2.Detection.Searches) == 0 || len(r2.Detection.Conditions) == 0 { + return + } + c, err = sigma.ParseConfig([]byte(config)) + if err != nil { + return + } + }() + wg.Wait() + if err != nil { + return + } + + var e Event + if err := json.Unmarshal([]byte(payload), &e); err != nil { + return + } + if reflect.TypeOf(e).Kind() != reflect.Map { + return + } + + options := []Option{WithConfig(c)} + if caseSensitive { + options = append(options, CaseSensitive) + } + + eval1 := ForRule(r1, WithConfig(c)) + eval2 := ForRule(r2, WithConfig(c)) + match1, err1 := eval1.Matches(context.Background(), e) + if err1 != nil { + return + } + match2, err2 := eval2.Matches(context.Background(), e) + if err2 != nil { + return + } + + bundle := ForRules([]sigma.Rule{r1, r2}, WithConfig(c)) + matches, errs := bundle.Matches(context.Background(), e) + if errs != nil { + panic(errs) + } + if len(matches) != 2 { + panic(fmt.Sprint("didn't get 2 matches, got", len(matches), err)) + } + + if !reflect.DeepEqual(matches[0].Result, match1) { + panic(fmt.Sprint("difference in match1\nbundle: ", matches[0].Result, "\nstandalone: ", match1)) + } + if !reflect.DeepEqual(matches[1].Result, match2) { + panic(fmt.Sprint("difference in match2\nbundle: ", matches[1].Result, "\nstandalone: ", match2)) + } + }) +} diff --git a/evaluator/index_test.go b/evaluator/index_test.go index 2c1d259..adc66d5 100644 --- a/evaluator/index_test.go +++ b/evaluator/index_test.go @@ -2,7 +2,6 @@ package evaluator import ( "context" - "fmt" "testing" "github.com/bradleyjkemp/sigma-go" @@ -44,8 +43,6 @@ func TestRuleEvaluator_RelevantToEvent_LogsourceRewriting(t *testing.T) { DefaultIndex: "", })) - fmt.Println(rule.Indexes()) - relevant := []string{ "just-category", "category-rewritten-index", diff --git a/evaluator/indexes.go b/evaluator/indexes.go index 5b3c698..1f45ad5 100644 --- a/evaluator/indexes.go +++ b/evaluator/indexes.go @@ -64,8 +64,8 @@ func (rule RuleEvaluator) Indexes() []string { } // RelevantToEvent calculates whether a rule is applicable to an event based on: -// * Whether the rule has been configured with a config file that matches the eventIndex -// * Whether the event matches the conditions from the config file +// - Whether the rule has been configured with a config file that matches the eventIndex +// - Whether the event matches the conditions from the config file func (rule RuleEvaluator) RelevantToEvent(ctx context.Context, eventIndex string, event Event) (bool, error) { matchedIndex := false for _, index := range rule.indexes { @@ -82,7 +82,7 @@ func (rule RuleEvaluator) RelevantToEvent(ctx context.Context, eventIndex string // need to check for any value constraints that have been specified // TODO: this doesn't yet support the logsourcemerging option to choose between ANDing/ORing these conditions for _, condition := range rule.indexConditions { - searchMatches, err := rule.evaluateSearch(ctx, condition, event) + searchMatches, err := rule.evaluateSearch(ctx, condition, event, rule.comparators) if err != nil { return false, fmt.Errorf("failed to evaluate index condition: %w", err) } diff --git a/evaluator/modifiers/modifiers.go b/evaluator/modifiers/modifiers.go index 58be472..b63f29f 100644 --- a/evaluator/modifiers/modifiers.go +++ b/evaluator/modifiers/modifiers.go @@ -11,15 +11,10 @@ import ( "gopkg.in/yaml.v3" ) -func GetComparator(modifiers ...string) (ComparatorFunc, error) { - return getComparator(Comparators, modifiers...) -} - -func GetComparatorCaseSensitive(modifiers ...string) (ComparatorFunc, error) { - return getComparator(ComparatorsCaseSensitive, modifiers...) -} - -func getComparator(comparators map[string]Comparator, modifiers ...string) (ComparatorFunc, error) { +func GetComparator(field string, comparators map[string]Comparator, modifiers ...string) (ComparatorFunc, error) { + if comparators == nil { + comparators = Comparators + } if len(modifiers) == 0 { return baseComparator{}.Matches, nil } @@ -69,7 +64,11 @@ func getComparator(comparators map[string]Comparator, modifiers ...string) (Comp } } - return comparator.Matches(actual, expected) + if fieldComparator, ok := comparator.(FieldComparator); ok { + return fieldComparator.MatchesField(field, actual, expected) + } else { + return comparator.Matches(actual, expected) + } }, nil } @@ -79,6 +78,11 @@ type Comparator interface { Matches(actual any, expected any) (bool, error) } +// FieldComparator is an optional extension to Comparator which also passes the field name +type FieldComparator interface { + MatchesField(field string, actual any, expected any) (bool, error) +} + type ComparatorFunc func(actual, expected any) (bool, error) // ValueModifier modifies the expected value before it is passed to the comparator. @@ -127,7 +131,7 @@ func (baseComparator) Matches(actual, expected any) (bool, error) { return true, nil default: // The Sigma spec defines that by default comparisons are case-insensitive - return strings.EqualFold(coerceString(actual), coerceString(expected)), nil + return strings.EqualFold(CoerceString(actual), CoerceString(expected)), nil } } @@ -135,67 +139,67 @@ type contains struct{} func (contains) Matches(actual, expected any) (bool, error) { // The Sigma spec defines that by default comparisons are case-insensitive - return strings.Contains(strings.ToLower(coerceString(actual)), strings.ToLower(coerceString(expected))), nil + return strings.Contains(strings.ToLower(CoerceString(actual)), strings.ToLower(CoerceString(expected))), nil } type endswith struct{} func (endswith) Matches(actual, expected any) (bool, error) { // The Sigma spec defines that by default comparisons are case-insensitive - return strings.HasSuffix(strings.ToLower(coerceString(actual)), strings.ToLower(coerceString(expected))), nil + return strings.HasSuffix(strings.ToLower(CoerceString(actual)), strings.ToLower(CoerceString(expected))), nil } type startswith struct{} func (startswith) Matches(actual, expected any) (bool, error) { // The Sigma spec defines that by default comparisons are case-insensitive - return strings.HasPrefix(strings.ToLower(coerceString(actual)), strings.ToLower(coerceString(expected))), nil + return strings.HasPrefix(strings.ToLower(CoerceString(actual)), strings.ToLower(CoerceString(expected))), nil } type containsCS struct{} func (containsCS) Matches(actual, expected any) (bool, error) { - return strings.Contains(coerceString(actual), coerceString(expected)), nil + return strings.Contains(CoerceString(actual), CoerceString(expected)), nil } type endswithCS struct{} func (endswithCS) Matches(actual, expected any) (bool, error) { - return strings.HasSuffix(coerceString(actual), coerceString(expected)), nil + return strings.HasSuffix(CoerceString(actual), CoerceString(expected)), nil } type startswithCS struct{} func (startswithCS) Matches(actual, expected any) (bool, error) { - return strings.HasPrefix(coerceString(actual), coerceString(expected)), nil + return strings.HasPrefix(CoerceString(actual), CoerceString(expected)), nil } type b64 struct{} func (b64) Modify(value any) (any, error) { - return base64.StdEncoding.EncodeToString([]byte(coerceString(value))), nil + return base64.StdEncoding.EncodeToString([]byte(CoerceString(value))), nil } type re struct{} func (re) Matches(actual any, expected any) (bool, error) { - re, err := regexp.Compile(coerceString(expected)) + re, err := regexp.Compile(CoerceString(expected)) if err != nil { return false, err } - return re.MatchString(coerceString(actual)), nil + return re.MatchString(CoerceString(actual)), nil } type cidr struct{} func (cidr) Matches(actual any, expected any) (bool, error) { - _, cidr, err := net.ParseCIDR(coerceString(expected)) + _, cidr, err := net.ParseCIDR(CoerceString(expected)) if err != nil { return false, err } - ip := net.ParseIP(coerceString(actual)) + ip := net.ParseIP(CoerceString(actual)) return cidr.Contains(ip), nil } @@ -227,7 +231,7 @@ func (lte) Matches(actual any, expected any) (bool, error) { return lte, err } -func coerceString(v interface{}) string { +func CoerceString(v interface{}) string { switch vv := v.(type) { case string: return vv diff --git a/evaluator/options.go b/evaluator/options.go index 4684e54..208a2d9 100644 --- a/evaluator/options.go +++ b/evaluator/options.go @@ -2,6 +2,7 @@ package evaluator import ( "context" + "github.com/bradleyjkemp/sigma-go/evaluator/modifiers" "github.com/bradleyjkemp/sigma-go" ) @@ -45,4 +46,5 @@ func WithConfig(config ...sigma.Config) Option { // This can increase performance (especially for larger events) by skipping expensive calls to strings.ToLower func CaseSensitive(e *RuleEvaluator) { e.caseSensitive = true + e.comparators = modifiers.ComparatorsCaseSensitive } diff --git a/evaluator/restring.go b/evaluator/restring.go new file mode 100644 index 0000000..e7507ed --- /dev/null +++ b/evaluator/restring.go @@ -0,0 +1,563 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This file is based on http://code.google.com/p/codesearch/source/browse/index/regexp.go, +// modified to find strings instead of trigrams. + +package evaluator + +import ( + "regexp/syntax" + "sort" + "strings" + "unicode" +) + +// regexStrings returns a set of strings such that any string that matches re must +// contain at least one of the strings in the set. If no such set can be found, +// regexStrings returns an empty set. +func regexStrings(re string) (stringSet, bool, error) { + parsed, err := syntax.Parse(re, syntax.Perl) + if err != nil { + return nil, false, err + } + info := analyze(parsed) + return info.bestSet(), parsed.Flags&syntax.FoldCase > 0, nil +} + +// A regexpInfo summarizes the results of analyzing a regexp. +type regexpInfo struct { + // canEmpty records whether the regexp matches the empty string + canEmpty bool + + // exact is the exact set of strings matching the regexp. + exact stringSet + + // if exact is nil, prefix is the set of possible match prefixes, + // and suffix is the set of possible match suffixes. + prefix stringSet // otherwise: the exact set of matching prefixes ... + suffix stringSet // ... and suffixes + + // internal is a set of strings that match internally (not as prefixes or + // suffixes). + internal stringSet +} + +const ( + // Exact sets are limited to maxExact strings. + // If they get too big, simplify will rewrite the regexpInfo + // to use prefix and suffix instead. It's not worthwhile for + // this to be bigger than maxSet. + maxExact = 100 + + // Prefix and suffix sets are limited to maxSet strings. + // If they get too big, simplify will replace groups of strings + // sharing a common leading prefix (or trailing suffix) with + // that common prefix (or suffix). + maxSet = 200 +) + +// anyMatch returns the regexpInfo describing a regexp that +// matches any string. +func anyMatch() regexpInfo { + return regexpInfo{ + canEmpty: true, + prefix: []string{""}, + suffix: []string{""}, + } +} + +// anyChar returns the regexpInfo describing a regexp that +// matches any single character. +func anyChar() regexpInfo { + return regexpInfo{ + prefix: []string{""}, + suffix: []string{""}, + } +} + +// noMatch returns the regexpInfo describing a regexp that +// matches no strings at all. +func noMatch() regexpInfo { + return regexpInfo{} +} + +// emptyString returns the regexpInfo describing a regexp that +// matches only the empty string. +func emptyString() regexpInfo { + return regexpInfo{ + canEmpty: true, + exact: []string{""}, + } +} + +// analyze returns the regexpInfo for the regexp re. +func analyze(re *syntax.Regexp) (ret regexpInfo) { + var info regexpInfo + switch re.Op { + case syntax.OpNoMatch: + return noMatch() + + case syntax.OpEmptyMatch, + syntax.OpBeginLine, syntax.OpEndLine, + syntax.OpBeginText, syntax.OpEndText, + syntax.OpWordBoundary, syntax.OpNoWordBoundary: + return emptyString() + + case syntax.OpLiteral: + if re.Flags&syntax.FoldCase != 0 { + switch len(re.Rune) { + case 0: + return emptyString() + case 1: + // Single-letter case-folded string: + // rewrite into char class and analyze. + re1 := &syntax.Regexp{ + Op: syntax.OpCharClass, + } + re1.Rune = re1.Rune0[:0] + r0 := re.Rune[0] + re1.Rune = append(re1.Rune, r0, r0) + for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { + re1.Rune = append(re1.Rune, r1, r1) + } + info = analyze(re1) + return info + } + // Multi-letter case-folded string: + // treat as concatenation of single-letter case-folded strings. + re1 := &syntax.Regexp{ + Op: syntax.OpLiteral, + Flags: syntax.FoldCase, + } + info = emptyString() + for i := range re.Rune { + re1.Rune = re.Rune[i : i+1] + info = concat(info, analyze(re1)) + } + return info + } + info.exact = stringSet{string(re.Rune)} + + case syntax.OpAnyCharNotNL, syntax.OpAnyChar: + return anyChar() + + case syntax.OpCapture: + return analyze(re.Sub[0]) + + case syntax.OpConcat: + return fold(concat, re.Sub, emptyString()) + + case syntax.OpAlternate: + return fold(alternate, re.Sub, noMatch()) + + case syntax.OpQuest: + return alternate(analyze(re.Sub[0]), emptyString()) + + case syntax.OpStar: + // We don't know anything, so assume the worst. + return anyMatch() + + case syntax.OpRepeat: + if re.Min == 0 { + // Like OpStar + return anyMatch() + } + fallthrough + case syntax.OpPlus: + // x+ + // Since there has to be at least one x, the prefixes and suffixes + // stay the same. If x was exact, it isn't anymore. + info = analyze(re.Sub[0]) + if info.exact.have() { + info.prefix = info.exact + info.suffix = info.exact.copy() + info.exact = nil + } + + case syntax.OpCharClass: + // Special case. + if len(re.Rune) == 0 { + return noMatch() + } + + // Special case. + if len(re.Rune) == 1 { + info.exact = stringSet{string(re.Rune[0])} + break + } + + n := 0 + for i := 0; i < len(re.Rune); i += 2 { + n += int(re.Rune[i+1] - re.Rune[i]) + } + // If the class is too large, it's okay to overestimate. + if n > 100 { + return anyChar() + } + + info.exact = []string{} + for i := 0; i < len(re.Rune); i += 2 { + lo, hi := re.Rune[i], re.Rune[i+1] + for rr := lo; rr <= hi; rr++ { + info.exact.add(string(rr)) + } + } + } + + info.simplify(false) + return info +} + +// fold is the usual higher-order function. +func fold(f func(x, y regexpInfo) regexpInfo, sub []*syntax.Regexp, zero regexpInfo) regexpInfo { + if len(sub) == 0 { + return zero + } + if len(sub) == 1 { + return analyze(sub[0]) + } + info := f(analyze(sub[0]), analyze(sub[1])) + for i := 2; i < len(sub); i++ { + info = f(info, analyze(sub[i])) + } + return info +} + +// concat returns the regexp info for xy given x and y. +func concat(x, y regexpInfo) (out regexpInfo) { + var xy regexpInfo + + if x.exact.have() && y.exact.have() { + xy.exact = x.exact.cross(y.exact, false) + } else { + if x.exact.have() { + xy.prefix = x.exact.cross(y.prefix, false) + } else { + xy.prefix = x.prefix + if x.canEmpty { + xy.prefix = xy.prefix.union(y.prefix, false) + } + } + if y.exact.have() { + xy.suffix = x.suffix.cross(y.exact, true) + } else { + xy.suffix = y.suffix + if y.canEmpty { + xy.suffix = xy.suffix.union(x.suffix, true) + } + } + } + + // If all the possible strings in the cross product of x.suffix + // and y.prefix are long enough, then the trigram for one + // of them must be present and would not necessarily be + // accounted for in xy.prefix or xy.suffix yet. Cut things off + // at maxSet just to keep the sets manageable. + if !x.exact.have() && !y.exact.have() && + x.suffix.size() <= maxSet && y.prefix.size() <= maxSet && + x.suffix.minLen()+y.prefix.minLen() >= 3 { + xy.internal = x.suffix.cross(y.prefix, false) + } + + xy.internal = mostDistinctive(xy.internal, x.internal, y.internal) + + xy.simplify(false) + return xy +} + +// alternate returns the regexpInfo for x|y given x and y. +func alternate(x, y regexpInfo) (out regexpInfo) { + var xy regexpInfo + if x.exact.have() && y.exact.have() { + xy.exact = x.exact.union(y.exact, false) + } else if x.exact.have() { + xy.prefix = x.exact.union(y.prefix, false) + xy.suffix = x.exact.union(y.suffix, true) + } else if y.exact.have() { + xy.prefix = x.prefix.union(y.exact, false) + xy.suffix = x.suffix.union(y.exact.copy(), true) + } else { + xy.prefix = x.prefix.union(y.prefix, false) + xy.suffix = x.suffix.union(y.suffix, true) + } + xy.canEmpty = x.canEmpty || y.canEmpty + + if !xy.exact.have() { + xb := x.bestSet() + yb := y.bestSet() + if len(xb) > 0 && len(yb) > 0 { + xy.internal = xb.union(yb, false) + } + } + + xy.simplify(false) + return xy +} + +// simplify simplifies the regexpInfo when the exact set gets too large. +func (info *regexpInfo) simplify(force bool) { + // If there are now too many exact strings, + // loop over them, moving + // the relevant pieces into prefix and suffix. + info.exact.clean(false) + if len(info.exact) > maxExact { + for _, s := range info.exact { + info.prefix.add(s) + info.suffix.add(s) + } + info.exact = nil + } + + if !info.exact.have() { + info.simplifySet(&info.prefix) + info.simplifySet(&info.suffix) + info.simplifySet(&info.internal) + } +} + +// simplifySet reduces the size of the given set (either prefix or suffix). +// There is no need to pass around enormous prefix or suffix sets, since +// they will only be used to create trigrams. As they get too big, simplifySet +// moves the information they contain into the match query, which is +// more efficient to pass around. +func (info *regexpInfo) simplifySet(s *stringSet) { + t := *s + t.clean(s == &info.suffix) + + n := 0 + for _, str := range t { + if len(str) > n { + n = len(str) + } + } + + for ; t.size() > maxSet; n-- { + // Replace set by strings of length n-1. + w := 0 + for _, str := range t { + if len(str) >= n { + if s == &info.prefix { + str = str[:n-1] + } else { + str = str[len(str)-n+1:] + } + } + if w == 0 || t[w-1] != str { + t[w] = str + w++ + } + } + t = t[:w] + t.clean(s == &info.suffix) + } + + // Now make sure that the prefix/suffix sets aren't redundant. + // For example, if we know "ab" is a possible prefix, then it + // doesn't help at all to know that "abc" is also a possible + // prefix, so delete "abc". + w := 0 + f := strings.HasPrefix + if s == &info.suffix { + f = strings.HasSuffix + } + for _, str := range t { + if w == 0 || !f(str, t[w-1]) { + t[w] = str + w++ + } + } + t = t[:w] + + *s = t +} + +func (info regexpInfo) String() string { + s := "" + if info.canEmpty { + s += "canempty " + } + if info.exact.have() { + s += "exact:" + strings.Join(info.exact, ",") + } else { + s += "prefix:" + strings.Join(info.prefix, ",") + s += " suffix:" + strings.Join(info.suffix, ",") + } + //s += " match: " + info.match.String() + return s +} + +// mostDistinctive returns the most distinctive stringSet in sets. +// The most distinctive set is the one that has the longest minLen. +func mostDistinctive(sets ...stringSet) stringSet { + best := stringSet(nil) + bestLen := 0 + + for _, s := range sets { + if !s.have() { + continue + } + thisLen := s.minLen() + if thisLen > bestLen { + best, bestLen = s, thisLen + } + } + + return best +} + +// bestSet returns the most distinctive set of strings in info. +func (info regexpInfo) bestSet() stringSet { + if info.exact.have() { + return info.exact + } + + return mostDistinctive(info.prefix, info.suffix, info.internal) +} + +// A stringSet is a set of strings. +// The nil stringSet indicates not having a set. +// The non-nil but empty stringSet is the empty set. +type stringSet []string + +// have reports whether we have a stringSet. +func (s stringSet) have() bool { + return s != nil +} + +// contains reports whether s contains str. +func (s stringSet) contains(str string) bool { + for _, ss := range s { + if ss == str { + return true + } + } + return false +} + +type byPrefix []string + +func (x *byPrefix) Len() int { return len(*x) } +func (x *byPrefix) Swap(i, j int) { (*x)[i], (*x)[j] = (*x)[j], (*x)[i] } +func (x *byPrefix) Less(i, j int) bool { return (*x)[i] < (*x)[j] } + +type bySuffix []string + +func (x *bySuffix) Len() int { return len(*x) } +func (x *bySuffix) Swap(i, j int) { (*x)[i], (*x)[j] = (*x)[j], (*x)[i] } +func (x *bySuffix) Less(i, j int) bool { + s := (*x)[i] + t := (*x)[j] + for i := 1; i <= len(s) && i <= len(t); i++ { + si := s[len(s)-i] + ti := t[len(t)-i] + if si < ti { + return true + } + if si > ti { + return false + } + } + return len(s) < len(t) +} + +// add adds str to the set. +func (s *stringSet) add(str string) { + *s = append(*s, str) +} + +// clean removes duplicates from the stringSet. +func (s *stringSet) clean(isSuffix bool) { + t := *s + if isSuffix { + sort.Sort((*bySuffix)(s)) + } else { + sort.Sort((*byPrefix)(s)) + } + w := 0 + for _, str := range t { + if w == 0 || t[w-1] != str { + t[w] = str + w++ + } + } + *s = t[:w] +} + +// size returns the number of strings in s. +func (s stringSet) size() int { + return len(s) +} + +// minLen returns the length of the shortest string in s. +func (s stringSet) minLen() int { + if len(s) == 0 { + return 0 + } + m := len(s[0]) + for _, str := range s { + if m > len(str) { + m = len(str) + } + } + return m +} + +// maxLen returns the length of the longest string in s. +func (s stringSet) maxLen() int { + if len(s) == 0 { + return 0 + } + m := len(s[0]) + for _, str := range s { + if m < len(str) { + m = len(str) + } + } + return m +} + +// union returns the union of s and t, reusing s's storage. +func (s stringSet) union(t stringSet, isSuffix bool) stringSet { + s = append(s, t...) + s.clean(isSuffix) + return s +} + +// cross returns the cross product of s and t. +func (s stringSet) cross(t stringSet, isSuffix bool) stringSet { + p := stringSet{} + for _, ss := range s { + for _, tt := range t { + p.add(ss + tt) + } + } + p.clean(isSuffix) + return p +} + +// clear empties the set but preserves the storage. +func (s *stringSet) clear() { + *s = (*s)[:0] +} + +// copy returns a copy of the set that does not share storage with the original. +func (s stringSet) copy() stringSet { + return append(stringSet{}, s...) +} + +// isSubsetOf returns true if all strings in s are also in t. +// It assumes both sets are sorted. +func (s stringSet) isSubsetOf(t stringSet) bool { + j := 0 + for _, ss := range s { + for j < len(t) && t[j] < ss { + j++ + } + if j >= len(t) || t[j] != ss { + return false + } + } + return true +} diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1907e011ae8a6fdd b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1907e011ae8a6fdd new file mode 100644 index 0000000..1427fc2 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1907e011ae8a6fdd @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n Foo: BA\n condition: A") +string("detection:\n 0:\n Foo|contains: A\n condition: A") +string("fieldmappings:\n Foo: foo") +string("{\"foo\":\"BA\"}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1b692dbec8c613de b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1b692dbec8c613de new file mode 100644 index 0000000..3ae40e6 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/1b692dbec8c613de @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n Foo|contains: A\n condition: A") +string("detection:\n 0:\n Foo|contains: a\n condition: A") +string("fieldmappings:\n Foo: foo") +string("{\"foo\":\"A\"}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/3a94d65bc4acc663 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/3a94d65bc4acc663 new file mode 100644 index 0000000..62b3328 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/3a94d65bc4acc663 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n 0:") +string("detection:\n 0:\n 0:") +string("") +string("{}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/5767f35675911705 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/5767f35675911705 new file mode 100644 index 0000000..e86d035 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/5767f35675911705 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n 0:\n condition: A") +string("detection:\n 0:\n Foo|contains: >\n condition: A") +string("fieldmappings:\n Foo: foo") +string("{\"foo\":\"0\"}") +bool(true) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/59d99db21bdb3323 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/59d99db21bdb3323 new file mode 100644 index 0000000..4991c8e --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/59d99db21bdb3323 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("<<:\n? 0:") +string("0") +string("0") +string("0") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/6450716b6258ade2 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/6450716b6258ade2 new file mode 100644 index 0000000..8e08926 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/6450716b6258ade2 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("\nid: TEST_RULE\ndetection:\n a:\n Foo|contains: bar\n b:\n Bar|endswith: baz\n condition: a and b\n") +string("\nid: TEST_RULE\ndetection:\n :\n Foo|contains: bar\n b:\n Bar|endswith: baz\n condition: a and b\n") +string("\ntitle: Test\nlogsMMources:\n tes Bar: product: tes \n\nfieldmappings:\n tFoo: $.foo\n Bar: $.foobar.baz\n") +string("{\"foo\": \"bar\", \"bar\": \"baz\"}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/754aab3cbb754e99 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/754aab3cbb754e99 new file mode 100644 index 0000000..bbf62ac --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/754aab3cbb754e99 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection: \n 00:\n 00:") +string("detection: \n 0: \n 0:") +string("") +string("0") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/75e97febfc5feb9e b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/75e97febfc5feb9e new file mode 100644 index 0000000..cd1b85e --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/75e97febfc5feb9e @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n 0:") +string("detection:\n 0:\n 0:") +string("") +string("A") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/85bf2132f746b224 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/85bf2132f746b224 new file mode 100644 index 0000000..737b75d --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/85bf2132f746b224 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n 0: \n condition: A") +string("detection:\n 0:\n 0|contains: >") +string("0") +string("{}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/89bbe22b303a3d8a b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/89bbe22b303a3d8a new file mode 100644 index 0000000..b369b59 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/89bbe22b303a3d8a @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection: \n 0: \n 0:") +string("detection: \n 00:\n 00|:") +string("") +string("{}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/8ef99a169708daef b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/8ef99a169708daef new file mode 100644 index 0000000..a720e5b --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/8ef99a169708daef @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n 0: \n condition: A") +string("detection:\n 0:\n Foo|contains: >\n condition: A") +string("fieldmappings:\n Foo: foo") +string("{\"foo\":\"\"}") +bool(true) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/90c72819f91d52a6 b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/90c72819f91d52a6 new file mode 100644 index 0000000..adfed68 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/90c72819f91d52a6 @@ -0,0 +1,6 @@ +go test fuzz v1 +string("\nid: TEST_RULE\ndetection:\n a:\n Foo|contains: bar\n b:\n Bar|endswith: baz\n condition: a and b\n") +string("\nid: TEST_RULE\ndetection:\n a:\n Foo|contains: bar\n b:\n Bar|endswith: baz\n condition: a and b\n") +string("\ntitle: Test\nlogsources:\n test:\n product: test\n\nfieldmappings:\n Foo: $.foo\n Bar: $.foobar.baz\n") +string("{\"foo\": \"bAr\", \"000\": \"000\"}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/9fab5927760a02ef b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/9fab5927760a02ef new file mode 100644 index 0000000..2c34913 --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/9fab5927760a02ef @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n Foo|contains: BA\n condition: A") +string("detection:\n 0:\n Foo|contains: B\n condition: A") +string("fieldmappings:\n Foo: foo") +string("{\"foo\":\"BA\"}") +bool(false) diff --git a/evaluator/testdata/fuzz/FuzzRuleBundleMatches/ef6ebca9ff3c502d b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/ef6ebca9ff3c502d new file mode 100644 index 0000000..311973c --- /dev/null +++ b/evaluator/testdata/fuzz/FuzzRuleBundleMatches/ef6ebca9ff3c502d @@ -0,0 +1,6 @@ +go test fuzz v1 +string("detection:\n 0:\n 00:\n 1:") +string("detection:\n 0:\n 0:") +string("") +string("{}") +bool(false) diff --git a/go.mod b/go.mod index af8cb26..21e923d 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,9 @@ module github.com/bradleyjkemp/sigma-go -go 1.18 +go 1.21 require ( + github.com/BobuSumisu/aho-corasick v1.0.3 github.com/PaesslerAG/jsonpath v0.1.1 github.com/alecthomas/participle v0.7.1 github.com/bradleyjkemp/cupaloy/v2 v2.6.0 diff --git a/go.sum b/go.sum index 3c7dd53..7b18aa4 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/BobuSumisu/aho-corasick v1.0.3 h1:uuf+JHwU9CHP2Vx+wAy6jcksJThhJS9ehR8a+4nPE9g= +github.com/BobuSumisu/aho-corasick v1.0.3/go.mod h1:hm4jLcvZKI2vRF2WDU1N4p/jpWtpOzp3nLmi9AzX/XE= github.com/PaesslerAG/gval v1.0.0 h1:GEKnRwkWDdf9dOmKcNrar9EA1bz1z9DqPIO1+iLzhd8= github.com/PaesslerAG/gval v1.0.0/go.mod h1:y/nm5yEyTeX6av0OfKJNp9rBNj2XrGhAf5+v24IBN1I= github.com/PaesslerAG/jsonpath v0.1.0/go.mod h1:4BzmtoM/PI8fPO4aQGIusjGxGir2BzcV0grWtFzq1Y8=