Skip to content

Commit

Permalink
Implement more efficient batch rule evaluator (#45)
Browse files Browse the repository at this point in the history
* mvp

* Fuzz, move to new, correct library

* Use same technique for regexes

* cleanup

* dedupe matching logic
  • Loading branch information
bradleyjkemp authored Sep 4, 2024
1 parent 07e7968 commit 0da7f75
Show file tree
Hide file tree
Showing 26 changed files with 1,064 additions and 43 deletions.
217 changes: 217 additions & 0 deletions evaluator/bundle.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
package evaluator

import (
"context"
aho_corasick "github.com/BobuSumisu/aho-corasick"
"github.com/bradleyjkemp/sigma-go"
"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
"regexp"
"strings"
"unsafe"
)

// ForRules compiles a set of rule evaluators which are evaluated together allowing for use of
// more efficient string matching algorithms
func ForRules(rules []sigma.Rule, options ...Option) RuleEvaluatorBundle {
if len(rules) == 0 {
return RuleEvaluatorBundle{}
}

bundle := RuleEvaluatorBundle{
ahocorasick: map[string]ahocorasickSearcher{},
}

values := map[string][]string{}

for _, rule := range rules {
e := ForRule(rule, options...)
bundle.evaluators = append(bundle.evaluators, e)
bundle.caseSensitive = e.caseSensitive

for _, search := range rule.Detection.Searches {
for _, matcher := range search.EventMatchers {
for _, fieldMatcher := range matcher {
contains := false
regex := false
for _, modifier := range fieldMatcher.Modifiers {
if modifier == "contains" {
contains = true
}
if modifier == "re" {
regex = true
}
}
switch {
case contains: // add all values to the needle set
for _, value := range fieldMatcher.Values {
if value == nil {
continue
}
stringValue := modifiers.CoerceString(value)
if !bundle.caseSensitive {
stringValue = strings.ToLower(stringValue)
}
values[fieldMatcher.Field] = append(values[fieldMatcher.Field], stringValue)
}
case regex: // get "necessary" substrings and add to the needle set
for _, value := range fieldMatcher.Values {
ss, caseInsensitive, _ := regexStrings(modifiers.CoerceString(value)) // todo: benchmark this, should save the result?
for _, s := range ss {
if caseInsensitive {
s = strings.ToLower(s)
}
values[fieldMatcher.Field] = append(values[fieldMatcher.Field], s)
}
}
}

}
}
}
}

for field, fieldValues := range values {
bundle.ahocorasick[field] = ahocorasickSearcher{
Trie: aho_corasick.NewTrieBuilder().AddStrings(fieldValues).Build(),
patterns: fieldValues,
results: map[*byte]map[string]bool{}, // used for caching results
}
}
return bundle
}

type RuleEvaluatorBundle struct {
ahocorasick map[string]ahocorasickSearcher
evaluators []*RuleEvaluator
caseSensitive bool
}

type ahocorasickSearcher struct {
*aho_corasick.Trie
patterns []string
results map[*byte]map[string]bool
}

func (as ahocorasickSearcher) getResults(s string, caseSensitive bool) map[string]bool {
key := unsafe.StringData(s) // using the underlying []byte pointer means we only compute results once per interned string
result, ok := as.results[key]
if ok {
return result
}

// haven't already computed this
if !caseSensitive {
s = strings.ToLower(s)
}
results := map[string]bool{}
as.results[key] = results
for _, match := range as.MatchString(s) {
// TODO: is match.MatchString equivalent to matcher.patterns[match.Pattern()]?
as.results[key][match.MatchString()] = true
}
return results
}

type RuleResult struct {
Result
sigma.Rule
}

func (bundle RuleEvaluatorBundle) Matches(ctx context.Context, event Event) ([]RuleResult, error) {
if len(bundle.evaluators) == 0 {
return nil, nil
}

// copy the current rule comparators
comparators := map[string]modifiers.Comparator{}
for name, comparator := range bundle.evaluators[0].comparators {
comparators[name] = comparator
}

// override the contains comparator to use our custom one
comparators["contains"] = &ahocorasickContains{
matchers: bundle.ahocorasick,
caseSensitive: bundle.caseSensitive,
}
comparators["re"] = &ahocorasickRe{
matchers: bundle.ahocorasick,
}

ruleresults := []RuleResult{}
errs := []error{}
for _, rule := range bundle.evaluators {
result, err := rule.matches(ctx, event, comparators)
if err != nil {
errs = append(errs, err)
continue
}
ruleresults = append(ruleresults, RuleResult{
Result: result,
Rule: rule.Rule,
})
}
return ruleresults, nil
}

type ahocorasickContains struct {
caseSensitive bool
modifiers.Comparator
matchers map[string]ahocorasickSearcher
}

func (a *ahocorasickContains) MatchesField(field string, actual any, expected any) (bool, error) {
if expected == "" {
// compatability with old |contains behaviour
// possibly a bug?
return true, nil
}

results := a.matchers[field].getResults(modifiers.CoerceString(actual), a.caseSensitive)

needle := modifiers.CoerceString(expected)
if !a.caseSensitive {
// when operating in case-insensitive mode, search strings must be canonicalised
// (this is ok because search strings are much smaller than the haystack)
// TODO: should we just modify the rules in this case? (saving the lower-casing every time)
needle = strings.ToLower(needle)
}
return results[needle], nil
}

type ahocorasickRe struct {
modifiers.Comparator
matchers map[string]ahocorasickSearcher
}

func (a *ahocorasickRe) MatchesField(field string, actual any, expected any) (bool, error) {
stringRe := modifiers.CoerceString(expected)
re, err := regexp.Compile(stringRe) // todo: cache this?
if err != nil {
return false, err
}

// this function returns a set of simple strings
// which necessarily appear if the regex matches
// If none are present in `actual`, we don't need to run the regex
ss, caseInsensitive, err := regexStrings(stringRe)
if err != nil {
return false, err
}

haystack := modifiers.CoerceString(actual)
results := a.matchers[field].getResults(haystack, !caseInsensitive)
found := false
for _, s := range ss {
if results[s] {
found = true
break
}
}
if !found {
return false, nil
}

// our cheap heuristic says the regex *might* match the string,
// so we have to now run the full regex
return re.MatchString(haystack), nil
}
13 changes: 10 additions & 3 deletions evaluator/evaluate.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import (
"context"
"encoding/json"
"fmt"

"github.com/bradleyjkemp/sigma-go"
"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
)

type RuleEvaluator struct {
Expand All @@ -17,6 +17,7 @@ type RuleEvaluator struct {

expandPlaceholder func(ctx context.Context, placeholderName string) ([]string, error)
caseSensitive bool
comparators map[string]modifiers.Comparator

count func(ctx context.Context, gb GroupedByValues) (float64, error)
average func(ctx context.Context, gb GroupedByValues, value float64) (float64, error)
Expand All @@ -30,6 +31,7 @@ type RuleEvaluator struct {
// For example, if a Sigma rule has a condition like this (attempting to detect login brute forcing)
//
// detection:
//
// login_attempt:
// # something here
// condition:
Expand All @@ -40,6 +42,7 @@ type RuleEvaluator struct {
// Each different GroupedByValues points to a different box.
//
// GroupedByValues
//
// ||
// ___↓↓___ ________
// | User A | | User B |
Expand All @@ -65,7 +68,7 @@ func (a GroupedByValues) Key() string {
}

func ForRule(rule sigma.Rule, options ...Option) *RuleEvaluator {
e := &RuleEvaluator{Rule: rule}
e := &RuleEvaluator{Rule: rule, comparators: modifiers.Comparators}
for _, option := range options {
option(e)
}
Expand Down Expand Up @@ -93,14 +96,18 @@ func eventValue(e Event, key string) interface{} {
}

func (rule RuleEvaluator) Matches(ctx context.Context, event Event) (Result, error) {
return rule.matches(ctx, event, rule.comparators)
}

func (rule RuleEvaluator) matches(ctx context.Context, event Event, comparators map[string]modifiers.Comparator) (Result, error) {
result := Result{
Match: false,
SearchResults: map[string]bool{},
ConditionResults: make([]bool, len(rule.Detection.Conditions)),
}
for identifier, search := range rule.Detection.Searches {
var err error
result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event)
result.SearchResults[identifier], err = rule.evaluateSearch(ctx, search, event, rule.comparators)
if err != nil {
return Result{}, fmt.Errorf("error evaluating search %s: %w", identifier, err)
}
Expand Down
16 changes: 6 additions & 10 deletions evaluator/evaluate_search.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/PaesslerAG/jsonpath"
"github.com/bradleyjkemp/sigma-go"
"github.com/bradleyjkemp/sigma-go/evaluator/modifiers"
"path"
"reflect"
"regexp"
"strings"

"github.com/PaesslerAG/jsonpath"
"github.com/bradleyjkemp/sigma-go"
)

func (rule RuleEvaluator) evaluateSearchExpression(search sigma.SearchExpr, searchResults map[string]bool) bool {
Expand Down Expand Up @@ -84,7 +83,7 @@ func (rule RuleEvaluator) evaluateSearchExpression(search sigma.SearchExpr, sear
panic(fmt.Sprintf("unhandled node type %T", search))
}

func (rule RuleEvaluator) evaluateSearch(ctx context.Context, search sigma.Search, event Event) (bool, error) {
func (rule RuleEvaluator) evaluateSearch(ctx context.Context, search sigma.Search, event Event, comparators map[string]modifiers.Comparator) (bool, error) {
if len(search.Keywords) > 0 {
return false, fmt.Errorf("keywords unsupported")
}
Expand Down Expand Up @@ -112,11 +111,7 @@ eventMatcher:
// field matchers can specify modifiers (FieldName|modifier1|modifier2) which change the matching behaviour
var comparator modifiers.ComparatorFunc
var err error
if rule.caseSensitive {
comparator, err = modifiers.GetComparatorCaseSensitive(fieldModifiers...)
} else {
comparator, err = modifiers.GetComparator(fieldModifiers...)
}
comparator, err = modifiers.GetComparator(fieldMatcher.Field, comparators, fieldModifiers...)
if err != nil {
return false, err
}
Expand Down Expand Up @@ -199,6 +194,7 @@ func (rule *RuleEvaluator) GetFieldValuesFromEvent(field string, event Event) ([
actualValues = append(actualValues, toGenericSlice(v)...)
}
}

return actualValues, nil
}

Expand Down Expand Up @@ -291,7 +287,7 @@ func toGenericSlice(v interface{}) []interface{} {
return []interface{}{v}
}

var out []interface{}
out := make([]interface{}, 0, rv.Len())
for i := 0; i < rv.Len(); i++ {
out = append(out, rv.Index(i).Interface())
}
Expand Down
Loading

0 comments on commit 0da7f75

Please sign in to comment.