Skip to content

Commit

Permalink
Adding Nested fields support
Browse files Browse the repository at this point in the history
This adds support to parse into nested map[string]interface{} maps when
using parseTyped().

To better deal with special characters in field names hashing(md5) is used
for aliases.
  • Loading branch information
securitym0nkey committed Mar 8, 2024
1 parent 53ca963 commit d2256f6
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 9 deletions.
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,36 @@ COMMONAPACHELOG: 127.0.0.1 - - [23/Apr/2014:22:58:32 +0200] "GET /index.php HTTP
clientip: 127.0.0.1
ident: -
```

# Example 3 - nested
```go
package main

import (
"fmt"
"encoding/json"
"github.com/vjeantet/grok"
)

func main() {
g, _ = grok.NewWithConfig(&grok.Config{NamedCapturesOnly: true})
nested_values,_ := g.ParseTyped("%{TIME:time_stamp}: %{USER:[name][first_name]} is %{POSINT:[person][age]:int} years old and %{NUMBER:[person][height]:float} meters tall",`12:23:31: bob is 23 years old and 4.2 meters tall`)

j, _ := json.MarshalIndent(nested_values, "", "\t")
fmt.Println(string(j))
}
```

output:
```
{
"name": {
"first_name": "bob"
},
"person": {
"age": 23,
"height": 4.2
},
"time_stamp": "12:23:31"
}
```
9 changes: 8 additions & 1 deletion example/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package main

import (
"fmt"

"encoding/json"
"github.com/vjeantet/grok"
)

Expand Down Expand Up @@ -31,4 +31,11 @@ func main() {
for k, v := range values {
fmt.Printf("%+15s: %s\n", k, v)
}

fmt.Println("\n# Parse into a Nested map")
g, _ = grok.NewWithConfig(&grok.Config{NamedCapturesOnly: true})
nested_values,_ := g.ParseTyped("%{TIME:time_stamp}: %{USER:[name][first_name]} is %{POSINT:[person][age]:int} years old and %{NUMBER:[person][height]:float} meters tall",`12:23:31: bob is 23 years old and 4.2 meters tall`)

j, _ := json.MarshalIndent(nested_values, "", "\t")
fmt.Println(string(j))
}
75 changes: 67 additions & 8 deletions grok.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@ import (
"strconv"
"strings"
"sync"
"crypto/md5"
)

var (
valid = regexp.MustCompile(`^\w+([-.]\w+)*(:([-.\w]+)(:(string|float|int))?)?$`)
normal = regexp.MustCompile(`%{([\w-.]+(?::[\w-.]+(?::[\w-.]+)?)?)}`)
symbolic = regexp.MustCompile(`\W`)
valid = regexp.MustCompile(`^\w+([-.]\w+)*(:(([-.\w]+)|(\[\w+\])+)(:(string|float|int))?)?$`)
normal = regexp.MustCompile(`%{([\w-.]+(?::[\w-.\[\]]+(?::[\w-.]+)?)?)}`)
nested = regexp.MustCompile(`\[(\w+)\]`)
)

// A Config structure is used to configure a Grok parser.
Expand Down Expand Up @@ -237,7 +238,8 @@ func (g *Grok) Parse(pattern, text string) (map[string]string, error) {
return g.compiledParse(gr, text)
}

// ParseTyped returns a interface{} map with typed captured fields based on provided pattern over the text
// ParseTyped returns a interface{} map with typed captured fields based on provided pattern over the text.
// Is able to return nested map[string]interface{} maps when %{PATTERN:[nested][field]} syntax is used.
func (g *Grok) ParseTyped(pattern string, text string) (map[string]interface{}, error) {
gr, err := g.compile(pattern)
if err != nil {
Expand All @@ -252,17 +254,40 @@ func (g *Grok) ParseTyped(pattern string, text string) (map[string]interface{},
continue
}
name := g.nameToAlias(segmentName)
nested_path := []string{}
nested_names := nested.FindAllStringSubmatch(name, -1)

if nested_names != nil {
for _, element := range nested_names {
nested_path = append(nested_path, element[1])
}
}

if segmentType, ok := gr.typeInfo[name]; ok {
switch segmentType {
case "int":
captures[name], _ = strconv.Atoi(match[i])
value, _ := strconv.Atoi(match[i])
if len(nested_path) > 0 {
addNested(captures, nested_path, value)
} else {
captures[name] = value
}
case "float":
captures[name], _ = strconv.ParseFloat(match[i], 64)
value, _ := strconv.ParseFloat(match[i], 64)
if len(nested_path) > 0 {
addNested(captures, nested_path, value)
} else {
captures[name] = value
}
default:
return nil, fmt.Errorf("ERROR the value %s cannot be converted to %s", match[i], segmentType)
}
} else {
captures[name] = match[i]
if len(nested_path) > 0 {
addNested(captures, nested_path, match[i])
} else {
captures[name] = match[i]
}
}
}

Expand Down Expand Up @@ -345,6 +370,7 @@ func (g *Grok) denormalizePattern(pattern string, storedPatterns map[string]*gPa
alias = g.aliasizePatternName(semantic)
}


// Add type cast information only if type set, and not string
if len(names) == 3 {
if names[2] != "string" {
Expand Down Expand Up @@ -386,7 +412,8 @@ func (g *Grok) denormalizePattern(pattern string, storedPatterns map[string]*gPa
}

func (g *Grok) aliasizePatternName(name string) string {
alias := symbolic.ReplaceAllString(name, "_")
d := []byte(name)
alias := fmt.Sprintf("h%x", md5.Sum(d) )
g.aliases[alias] = name
return alias
}
Expand Down Expand Up @@ -423,3 +450,35 @@ func (g *Grok) ParseStream(reader *bufio.Reader, pattern string, process func(ma
}
}
}

// adds a variable to a string keyed map going as deep as needed
func addNested(n map[string]interface{}, path []string, value interface{}) error {
//pop path element => current element
element, path := path[0], path[1:]

//if this is the leaf element of the path
//just add it to the map
if len(path) == 0 {
n[element] = value
return nil
}

var childmap map[string]interface{}
var ismap bool

//check whether the current element already exists and is a map
child, exists := n[element]
if exists {
childmap, ismap = child.(map[string]interface{})
if !ismap { //in case the current element does exist but is not map it's not possible to walk down the path
return fmt.Errorf("Nesting under an already used key")
}
} else {
//in case the current element does NOT exist make a map
childmap = make(map[string]interface{})
n[element] = childmap
}

//and finally walk down the path recursively
return addNested(childmap, path, value)
}
26 changes: 26 additions & 0 deletions grok_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,32 @@ func TestParseTypedWithAlias(t *testing.T) {
}
}

func TestParseTypedWithNested(t *testing.T) {
g,_ := NewWithConfig(&Config{NamedCapturesOnly: true})
if captures, err := g.ParseTyped("%{TIMESTAMP_ISO8601:time} %{USER:[user][name]}@%{HOSTNAME:[user][host]} %{WORD:action} %{POSINT:[net][bytes]:int} bytes from %{IP:[net][source][ip]}:%{POSINT:[net][source][port]:int}","2023-04-08T11:55:00+0200 [email protected] send 230 bytes from 198.51.100.65:2342"); err != nil {
t.Fatalf("error can not capture : %s", err.Error())
} else {
expected := map[string]interface{}{
"time": "2023-04-08T11:55:00+0200",
"action": "send",
"user": map[string]interface{}{
"name": "john.doe",
"host": "example.com",
},
"net": map[string]interface{}{
"bytes": 230,
"source": map[string]interface{}{
"ip": "198.51.100.65",
"port": 2342,
},
},
}
if fmt.Sprint(expected) != fmt.Sprint(captures) {
t.Fatalf("Expected nested map: %s got %s", expected, captures)
}
}
}

var resultNew *Grok

func BenchmarkNew(b *testing.B) {
Expand Down

0 comments on commit d2256f6

Please sign in to comment.