diff --git a/compiler/semantic/op.go b/compiler/semantic/op.go index 7a041e6646..6d05f182d6 100644 --- a/compiler/semantic/op.go +++ b/compiler/semantic/op.go @@ -619,16 +619,6 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { if err != nil { return nil, err } - // We can do collision checking on static paths, so check what we can. - var fields field.List - for _, a := range assignments { - if this, ok := a.LHS.(*dag.This); ok { - fields = append(fields, this.Path) - } - } - if err := expr.CheckPutFields(fields); err != nil { - return nil, fmt.Errorf("put: %w", err) - } return append(seq, &dag.Put{ Kind: "Put", Args: assignments, diff --git a/docs/language/operators/put.md b/docs/language/operators/put.md index 7cf174ef3c..d55e566d9c 100644 --- a/docs/language/operators/put.md +++ b/docs/language/operators/put.md @@ -82,5 +82,5 @@ echo '{a:1} 1' | zq -z 'b:=2' - => ```mdtest-output {a:1,b:2} -error({message:"put: not a record",on:1}) +error({message:"put: not a puttable element",on:1}) ``` diff --git a/runtime/expr/cutter.go b/runtime/expr/cutter.go index 45c7e773a7..1ae4ed5c5a 100644 --- a/runtime/expr/cutter.go +++ b/runtime/expr/cutter.go @@ -94,7 +94,7 @@ func (c *Cutter) Eval(ectx Context, in *zed.Value) *zed.Value { func (c *Cutter) lookupBuilder(ectx Context, in *zed.Value) (*recordBuilderCachedTypes, field.List, error) { paths := c.fieldRefs[:0] for _, p := range c.lvals { - path, err := p.Eval(ectx, in) + path, err := p.EvalAsRecordPath(ectx, in) if err != nil { return nil, nil, err } diff --git a/runtime/expr/dynfield/path.go b/runtime/expr/dynfield/path.go new file mode 100644 index 0000000000..9e9028fadf --- /dev/null +++ b/runtime/expr/dynfield/path.go @@ -0,0 +1,41 @@ +package dynfield + +import ( + "github.com/brimdata/zed" + "github.com/brimdata/zed/zson" +) + +type Path []zed.Value + +func (p Path) Append(b []byte) []byte { + for i, v := range p { + if i > 0 { + b = append(b, 0) + } + b = append(b, v.Bytes()...) + } + return b +} + +func (p Path) String() string { + var b []byte + for i, v := range p { + if i > 0 { + b = append(b, '.') + } + b = append(b, zson.FormatValue(&v)...) + } + return string(b) +} + +type List []Path + +func (l List) Append(b []byte) []byte { + for i, path := range l { + if i > 0 { + b = append(b, ',') + } + b = path.Append(b) + } + return b +} diff --git a/runtime/expr/lval.go b/runtime/expr/lval.go index 34e044152e..628a306afb 100644 --- a/runtime/expr/lval.go +++ b/runtime/expr/lval.go @@ -5,12 +5,14 @@ import ( "github.com/brimdata/zed" "github.com/brimdata/zed/pkg/field" + "github.com/brimdata/zed/runtime/expr/dynfield" "github.com/brimdata/zed/zson" ) type Lval struct { - Elems []LvalElem - cache field.Path + Elems []LvalElem + cache []zed.Value + fieldCache field.Path } func NewLval(evals []LvalElem) *Lval { @@ -19,18 +21,36 @@ func NewLval(evals []LvalElem) *Lval { // Eval returns the path of the lval. If there's an error the returned *zed.Value // will not be nill. -func (l *Lval) Eval(ectx Context, this *zed.Value) (field.Path, error) { +func (l *Lval) Eval(ectx Context, this *zed.Value) (dynfield.Path, error) { l.cache = l.cache[:0] for _, e := range l.Elems { - name, err := e.Eval(ectx, this) + val, err := e.Eval(ectx, this) if err != nil { return nil, err } - l.cache = append(l.cache, name) + l.cache = append(l.cache, *val) } return l.cache, nil } +func (l *Lval) EvalAsRecordPath(ectx Context, this *zed.Value) (field.Path, error) { + l.fieldCache = l.fieldCache[:0] + for _, e := range l.Elems { + val, err := e.Eval(ectx, this) + if err != nil { + return nil, err + } + if !val.IsString() { + // XXX Add context to error so we know what element is failing but + // let's wait until we can test this so we have a feel for what we + // want to see. + return nil, errors.New("field reference is not a string") + } + l.fieldCache = append(l.fieldCache, val.AsString()) + } + return l.fieldCache, nil +} + // Path returns the receiver's path. Path returns false when the receiver // contains a dynamic element. func (l *Lval) Path() (field.Path, bool) { @@ -46,15 +66,15 @@ func (l *Lval) Path() (field.Path, bool) { } type LvalElem interface { - Eval(ectx Context, this *zed.Value) (string, error) + Eval(ectx Context, this *zed.Value) (*zed.Value, error) } type StaticLvalElem struct { Name string } -func (l *StaticLvalElem) Eval(_ Context, _ *zed.Value) (string, error) { - return l.Name, nil +func (l *StaticLvalElem) Eval(_ Context, _ *zed.Value) (*zed.Value, error) { + return zed.NewString(l.Name), nil } type ExprLvalElem struct { @@ -69,17 +89,12 @@ func NewExprLvalElem(zctx *zed.Context, e Evaluator) *ExprLvalElem { } } -func (l *ExprLvalElem) Eval(ectx Context, this *zed.Value) (string, error) { +func (l *ExprLvalElem) Eval(ectx Context, this *zed.Value) (*zed.Value, error) { val := l.eval.Eval(ectx, this) if val.IsError() { - return "", lvalErr(ectx, val) - } - if !val.IsString() { - if val = l.caster.Eval(ectx, val); val.IsError() { - return "", errors.New("field reference is not a string") - } + return nil, lvalErr(ectx, val) } - return val.AsString(), nil + return val, nil } func lvalErr(ectx Context, errVal *zed.Value) error { diff --git a/runtime/expr/pathbuilder/builder.go b/runtime/expr/pathbuilder/builder.go new file mode 100644 index 0000000000..38c531e02c --- /dev/null +++ b/runtime/expr/pathbuilder/builder.go @@ -0,0 +1,97 @@ +package pathbuilder + +import ( + "errors" + + "github.com/brimdata/zed" + "github.com/brimdata/zed/runtime/expr/dynfield" +) + +type builder struct { + inputCount int + base Step +} + +func New(base zed.Type, paths []dynfield.Path, leafs []zed.Value) (Step, error) { + if len(paths) != len(leafs) { + return nil, errors.New("paths and leafs must be the same length") + } + b := &builder{base: newLeafStep(base, -1)} + for i, p := range paths { + if err := b.Put(p, leafs[i].Type); err != nil { + return nil, err + } + } + return b.base, nil +} + +func (m *builder) Put(p dynfield.Path, leaf zed.Type) error { + defer func() { m.inputCount++ }() + return m.put(&m.base, p, leaf) +} + +func (m *builder) put(parent *Step, p dynfield.Path, typ zed.Type) error { + // Actually let's do this differently. If current is a string then we are + // putting to a record. When we support maps we'll need to check for that. + if p[0].IsString() { + return m.putRecord(parent, p, typ) + } + // This could be for a map or a set but keep it simple for now. + if zed.IsInteger(p[0].Type.ID()) { + return m.putVector(parent, p, typ) + } + // if zed.TypeUnder(parent.typeof()) + return errors.New("unsupported types") +} + +func (m *builder) putRecord(s *Step, p dynfield.Path, typ zed.Type) error { + current, p := p[0], p[1:] + rstep, ok := (*s).(*recordStep) + if !ok { + // If this is a leafStep with a type of record than we need to + // initialize a recordStep with fields, otherwise just replace this will + // a recordStep. + var fields []zed.Field + if lstep, ok := (*s).(*leafStep); ok && zed.TypeRecordOf(lstep.typ) != nil { + fields = zed.TypeRecordOf(lstep.typ).Fields + } + rstep = newRecordStep(fields) + if *s == m.base { + rstep.isBase = true + } + *s = rstep + } + i := rstep.lookup(current.AsString()) + field := &rstep.fields[i] + if len(p) == 0 { + field.step = newLeafStep(typ, m.inputCount) + return nil + } + return m.put(&field.step, p, typ) +} + +func (m *builder) putVector(s *Step, p dynfield.Path, typ zed.Type) error { + current, p := p[0], p[1:] + vstep, ok := (*s).(*vectorStep) + if !ok { + // If this is a leafStep with a type of array than we need to + // initialize a arrayStep with fields, otherwise just replace this with + // an arrayStep. + vstep = &vectorStep{} + if lstep, ok := (*s).(*leafStep); ok && zed.InnerType(lstep.typ) != nil { + vstep.inner = zed.InnerType(lstep.typ) + _, vstep.isSet = zed.TypeUnder(lstep.typ).(*zed.TypeSet) + } + if *s == m.base { + vstep.isBase = true + } + *s = vstep + } + at := vstep.lookup(int(current.AsInt())) + elem := &vstep.elems[at] + if len(p) == 0 { + elem.step = newLeafStep(typ, m.inputCount) + return nil + } + return m.put(&elem.step, p, typ) +} diff --git a/runtime/expr/pathbuilder/builder_test.go b/runtime/expr/pathbuilder/builder_test.go new file mode 100644 index 0000000000..f5f5a401db --- /dev/null +++ b/runtime/expr/pathbuilder/builder_test.go @@ -0,0 +1,108 @@ +package pathbuilder + +import ( + "testing" + + "github.com/brimdata/zed" + "github.com/brimdata/zed/runtime/expr/dynfield" + "github.com/brimdata/zed/zcode" + "github.com/brimdata/zed/zson" + "github.com/stretchr/testify/require" +) + +func parsePath(zctx *zed.Context, ss ...string) dynfield.Path { + var path dynfield.Path + for _, s := range ss { + path = append(path, *zson.MustParseValue(zctx, s)) + } + return path +} + +type testCase struct { + describe string + base string + paths [][]string + values []string + expected string +} + +func runTestCase(t *testing.T, c testCase) { + zctx := zed.NewContext() + var baseTyp zed.Type + var baseBytes []byte + if c.base != "" { + base := zson.MustParseValue(zctx, c.base) + baseTyp, baseBytes = base.Type, base.Bytes() + } + var paths []dynfield.Path + for _, ss := range c.paths { + paths = append(paths, parsePath(zctx, ss...)) + } + var values []zed.Value + for _, s := range c.values { + values = append(values, *zson.MustParseValue(zctx, s)) + } + step, err := New(baseTyp, paths, values) + require.NoError(t, err) + var b zcode.Builder + typ, err := step.Build(zctx, &b, baseBytes, values) + require.NoError(t, err) + val := zed.NewValue(typ, b.Bytes()) + require.Equal(t, c.expected, zson.FormatValue(val)) +} + +func TestIt(t *testing.T) { + runTestCase(t, testCase{ + base: `{"a": 1, "b": 2}`, + paths: [][]string{ + {`"c"`, `"a"`, `"a"`}, + {`"c"`, `"b"`}, + {`"c"`, `"c"`}, + }, + values: []string{ + `45`, + `"string"`, + "127.0.0.1", + }, + expected: `{a:1,b:2,c:{a:{a:45},b:"string",c:127.0.0.1}}`, + }) + runTestCase(t, testCase{ + base: `{"a": [1,{foo:"bar"}]}`, + paths: [][]string{ + {`"a"`, `0`}, + {`"a"`, `1`, `"foo"`}, + }, + values: []string{ + `"hi"`, + `"baz"`, + }, + expected: `{a:["hi",{foo:"baz"}]}`, + }) + runTestCase(t, testCase{ + describe: "create from empty base", + paths: [][]string{ + {`"a"`}, + {`"b"`}, + }, + values: []string{ + `"foo"`, + `"bar"`, + }, + expected: `{a:"foo",b:"bar"}`, + }) + runTestCase(t, testCase{ + describe: "assign to base level array", + base: `["a", "b", "c"]`, + paths: [][]string{ + {`0`}, + {`1`}, + {`2`}, + }, + values: []string{ + `"foo"`, + `"bar"`, + `"baz"`, + }, + expected: `["foo","bar","baz"]`, + }) +} diff --git a/runtime/expr/pathbuilder/getter.go b/runtime/expr/pathbuilder/getter.go new file mode 100644 index 0000000000..906746e54d --- /dev/null +++ b/runtime/expr/pathbuilder/getter.go @@ -0,0 +1,38 @@ +package pathbuilder + +import ( + "fmt" + + "github.com/brimdata/zed/zcode" +) + +// A getter provides random access to values in a zcode container +// using zcode.Iter. It uses a cursor to avoid quadratic re-seeks for +// the common case where values are fetched sequentially. +type getter struct { + cursor int + bytes zcode.Bytes + it zcode.Iter +} + +func newGetter(cont zcode.Bytes) getter { + return getter{ + cursor: -1, + bytes: cont, + it: cont.Iter(), + } +} + +func (ig *getter) nth(n int) (zcode.Bytes, error) { + if n < ig.cursor { + ig.it = ig.bytes.Iter() + } + for !ig.it.Done() { + zv := ig.it.Next() + ig.cursor++ + if ig.cursor == n { + return zv, nil + } + } + return nil, fmt.Errorf("getter.nth: array index %d out of bounds", n) +} diff --git a/runtime/expr/pathbuilder/step.go b/runtime/expr/pathbuilder/step.go new file mode 100644 index 0000000000..b7b2c7582b --- /dev/null +++ b/runtime/expr/pathbuilder/step.go @@ -0,0 +1,186 @@ +package pathbuilder + +import ( + "cmp" + "fmt" + "slices" + + "github.com/brimdata/zed" + "github.com/brimdata/zed/zcode" +) + +type Step interface { + Build(*zed.Context, *zcode.Builder, zcode.Bytes, []zed.Value) (zed.Type, error) +} + +type recordStep struct { + isBase bool + getter getter + fields []recordField +} + +type recordField struct { + index int + name string + step Step +} + +func newRecordStep(fields []zed.Field) *recordStep { + var s recordStep + for i, f := range fields { + f := recordField{ + index: i, + name: f.Name, + step: newLeafStep(f.Type, -1), + } + s.fields = append(s.fields, f) + } + return &s +} + +func (s *recordStep) lookup(name string) int { + i := slices.IndexFunc(s.fields, func(f recordField) bool { + return f.name == name + }) + if i >= 0 { + return i + } + n := len(s.fields) + s.fields = append(s.fields, recordField{name: name, index: -1}) + return n +} + +func (s *recordStep) Build(zctx *zed.Context, b *zcode.Builder, in zcode.Bytes, vals []zed.Value) (zed.Type, error) { + if !s.isBase { + b.BeginContainer() + defer b.EndContainer() + } + s.getter = newGetter(in) + fields := make([]zed.Field, 0, len(s.fields)) + for _, field := range s.fields { + var vb zcode.Bytes + if field.index != -1 { + var err error + if vb, err = s.getter.nth(field.index); err != nil { + return nil, err + } + } + typ, err := field.step.Build(zctx, b, vb, vals) + if err != nil { + return nil, err + } + fields = append(fields, zed.NewField(field.name, typ)) + } + // XXX If there are no downstream vector or map elements we can cache this + // result. + return zctx.LookupTypeRecord(fields) +} + +type vectorStep struct { + elems []vectorElem + getter getter + inner zed.Type + isSet bool + isBase bool +} + +type vectorElem struct { + index int + step Step +} + +func (s *vectorStep) lookup(i int) int { + elem := vectorElem{index: i} + at, ok := slices.BinarySearchFunc(s.elems, elem, func(a, b vectorElem) int { + return cmp.Compare(a.index, b.index) + }) + if !ok { + s.elems = slices.Insert(s.elems, at, elem) + } + return at +} + +func (s *vectorStep) Build(zctx *zed.Context, b *zcode.Builder, in zcode.Bytes, vals []zed.Value) (zed.Type, error) { + if !s.isBase { + b.BeginContainer() + defer b.EndContainer() + } + elems := s.elems + it := in.Iter() + var types []zed.Type + for i := 0; !it.Done(); i++ { + typ, vb := s.inner, it.Next() + if len(elems) > 0 && i == elems[0].index { + var err error + typ, err = elems[0].step.Build(zctx, b, vb, vals) + if err != nil { + return nil, err + } + elems = elems[1:] + } else { + b.Append(vb) + } + types = append(types, typ) + } + if len(elems) > 0 { + return nil, fmt.Errorf("element out of bounds %d", elems[0].index) + } + inner := normalizeVectorElems(zctx, types, b) + if s.isSet { + b.TransformContainer(zed.NormalizeSet) + return zctx.LookupTypeSet(inner), nil + } + return zctx.LookupTypeArray(inner), nil +} + +func normalizeVectorElems(zctx *zed.Context, types []zed.Type, b *zcode.Builder) zed.Type { + i := slices.IndexFunc(types, func(t zed.Type) bool { + _, ok := zed.TypeUnder(t).(*zed.TypeUnion) + return ok + }) + if i >= 0 { + // Untag union values. + b.TransformContainer(func(bytes zcode.Bytes) zcode.Bytes { + var b2 zcode.Builder + for i, it := 0, bytes.Iter(); !it.Done(); i++ { + vb := it.Next() + if union, ok := zed.TypeUnder(types[i]).(*zed.TypeUnion); ok { + types[i], vb = union.Untag(vb) + } + b2.Append(vb) + } + return b2.Bytes() + }) + } + unique := zed.UniqueTypes(slices.Clone(types)) + if len(unique) == 1 { + return unique[0] + } + union := zctx.LookupTypeUnion(unique) + b.TransformContainer(func(bytes zcode.Bytes) zcode.Bytes { + var b2 zcode.Builder + for i, it := 0, bytes.Iter(); !it.Done(); i++ { + zed.BuildUnion(&b2, union.TagOf(types[i]), it.Next()) + } + return b2.Bytes() + }) + return union +} + +type leafStep struct { + inputIndex int + typ zed.Type +} + +func newLeafStep(typ zed.Type, inputIndex int) *leafStep { + return &leafStep{typ: typ, inputIndex: inputIndex} +} + +func (s *leafStep) Build(zctx *zed.Context, b *zcode.Builder, in zcode.Bytes, vals []zed.Value) (zed.Type, error) { + if s.inputIndex != -1 { + b.Append(vals[s.inputIndex].Bytes()) + } else { + b.Append(in) + } + return s.typ, nil +} diff --git a/runtime/expr/putter.go b/runtime/expr/putter.go index 54ba8d44ab..70c33f3d2e 100644 --- a/runtime/expr/putter.go +++ b/runtime/expr/putter.go @@ -1,11 +1,12 @@ package expr import ( + "encoding/binary" "fmt" - "slices" "github.com/brimdata/zed" - "github.com/brimdata/zed/pkg/field" + "github.com/brimdata/zed/runtime/expr/dynfield" + "github.com/brimdata/zed/runtime/expr/pathbuilder" "github.com/brimdata/zed/zcode" ) @@ -19,23 +20,12 @@ type Putter struct { zctx *zed.Context builder zcode.Builder clauses []Assignment - rules map[int]map[string]putRule + rules map[int]map[string]pathbuilder.Step // vals is a slice to avoid re-allocating for every value vals []zed.Value // paths is a slice to avoid re-allocating for every path - paths field.List -} - -// A putRule describes how a given record type is modified by describing -// which input fields should be replaced with which clause expression and -// which clauses should be appended. The type of each clause expression -// is recorded since a new rule must be created if any of the types change. -// Such changes aren't typically expected but are possible in the expression -// language. -type putRule struct { - typ zed.Type - clauseTypes []zed.Type - step putStep + paths dynfield.List + scratch []byte } func NewPutter(zctx *zed.Context, clauses []Assignment) *Putter { @@ -43,11 +33,38 @@ func NewPutter(zctx *zed.Context, clauses []Assignment) *Putter { zctx: zctx, clauses: clauses, vals: make([]zed.Value, len(clauses)), - rules: make(map[int]map[string]putRule), + rules: make(map[int]map[string]pathbuilder.Step), } } -func (p *Putter) eval(ectx Context, this *zed.Value) ([]zed.Value, field.List, error) { +func (p *Putter) Eval(ectx Context, this *zed.Value) *zed.Value { + if k := this.Type.Kind(); k != zed.RecordKind && k != zed.ArrayKind && k != zed.SetKind { + if this.IsError() { + // propagate errors + return this + } + return ectx.CopyValue(*p.zctx.WrapError("put: not an array, record, or set", this)) + } + paths, vals, err := p.eval(ectx, this) + if err != nil { + return ectx.CopyValue(*p.zctx.WrapError(fmt.Sprintf("put: %s", err), this)) + } + if len(vals) == 0 { + return this + } + step, err := p.lookupRule(this.Type, paths, vals) + if err != nil { + return ectx.CopyValue(*p.zctx.WrapError(err.Error(), this)) + } + p.builder.Reset() + typ, err := step.Build(p.zctx, &p.builder, this.Bytes(), vals) + if err != nil { + return ectx.CopyValue(*p.zctx.WrapError(err.Error(), this)) + } + return ectx.NewValue(typ, p.builder.Bytes()) +} + +func (p *Putter) eval(ectx Context, this *zed.Value) (dynfield.List, []zed.Value, error) { p.vals = p.vals[:0] p.paths = p.paths[:0] for _, cl := range p.clauses { @@ -62,275 +79,35 @@ func (p *Putter) eval(ectx Context, this *zed.Value) ([]zed.Value, field.List, e } p.paths = append(p.paths, path) } - return p.vals, p.paths, nil -} - -// A putStep is a recursive data structure encoding a series of steps to be -// carried out to construct an output record from an input record and -// a slice of evaluated clauses. -type putStep struct { - op putOp - index int - container bool - record []putStep // for op == record -} - -func (p *putStep) append(step putStep) { - p.record = append(p.record, step) -} - -type putOp int - -const ( - putFromInput putOp = iota // copy field from input record - putFromClause // copy field from put assignment - putRecord // recurse into record below us -) - -func (p *putStep) build(in zcode.Bytes, b *zcode.Builder, vals []zed.Value) zcode.Bytes { - switch p.op { - case putRecord: - b.Reset() - if err := p.buildRecord(in, b, vals); err != nil { - return nil - } - return b.Bytes() - default: - // top-level op must be a record - panic(fmt.Sprintf("put: unexpected step %v", p.op)) - } -} - -func (p *putStep) buildRecord(in zcode.Bytes, b *zcode.Builder, vals []zed.Value) error { - ig := newGetter(in) - - for _, step := range p.record { - switch step.op { - case putFromInput: - bytes, err := ig.nth(step.index) - if err != nil { - return err - } - b.Append(bytes) - case putFromClause: - b.Append(vals[step.index].Bytes()) - case putRecord: - b.BeginContainer() - bytes, err := in, error(nil) - if step.index >= 0 { - bytes, err = ig.nth(step.index) - if err != nil { - return err - } - } - if err := step.buildRecord(bytes, b, vals); err != nil { - return err - } - b.EndContainer() - } - } - return nil -} - -// A getter provides random access to values in a zcode container -// using zcode.Iter. It uses a cursor to avoid quadratic re-seeks for -// the common case where values are fetched sequentially. -type getter struct { - cursor int - container zcode.Bytes - it zcode.Iter -} - -func newGetter(cont zcode.Bytes) getter { - return getter{ - cursor: -1, - container: cont, - it: cont.Iter(), - } -} - -func (ig *getter) nth(n int) (zcode.Bytes, error) { - if n < ig.cursor { - ig.it = ig.container.Iter() - } - for !ig.it.Done() { - zv := ig.it.Next() - ig.cursor++ - if ig.cursor == n { - return zv, nil - } - } - return nil, fmt.Errorf("getter.nth: array index %d out of bounds", n) -} - -func findOverwriteClause(path field.Path, paths field.List) (int, field.Path, bool) { - for i, lpath := range paths { - if path.Equal(lpath) || lpath.HasStrictPrefix(path) { - return i, lpath, true - } - } - return -1, nil, false -} - -func (p *Putter) deriveSteps(inType *zed.TypeRecord, vals []zed.Value, paths field.List) (putStep, zed.Type) { - return p.deriveRecordSteps(field.Path{}, inType.Fields, vals, paths) -} - -func (p *Putter) deriveRecordSteps(parentPath field.Path, inFields []zed.Field, vals []zed.Value, paths field.List) (putStep, *zed.TypeRecord) { - s := putStep{op: putRecord} - var fields []zed.Field - - // First look at all input fields to see which should - // be copied over and which should be overwritten by - // assignments. - for i, f := range inFields { - path := append(parentPath, f.Name) - matchIndex, matchPath, found := findOverwriteClause(path, paths) - switch { - // input not overwritten by assignment: copy input value. - case !found: - s.append(putStep{ - op: putFromInput, - container: zed.IsContainerType(f.Type), - index: i, - }) - fields = append(fields, f) - // input field overwritten by non-nested assignment: copy assignment value. - case len(path) == len(matchPath): - s.append(putStep{ - op: putFromClause, - container: zed.IsContainerType(vals[matchIndex].Type), - index: matchIndex, - }) - fields = append(fields, zed.NewField(f.Name, vals[matchIndex].Type)) - // input record field overwritten by nested assignment: recurse. - case len(path) < len(matchPath) && zed.IsRecordType(f.Type): - nestedStep, typ := p.deriveRecordSteps(path, zed.TypeRecordOf(f.Type).Fields, vals, paths) - nestedStep.index = i - s.append(nestedStep) - fields = append(fields, zed.NewField(f.Name, typ)) - // input non-record field overwritten by nested assignment(s): recurse. - case len(path) < len(matchPath) && !zed.IsRecordType(f.Type): - nestedStep, typ := p.deriveRecordSteps(path, []zed.Field{}, vals, paths) - nestedStep.index = i - s.append(nestedStep) - fields = append(fields, zed.NewField(f.Name, typ)) - default: - panic("put: internal error computing record steps") - } - } - - appendClause := func(lpath field.Path) bool { - if !lpath.HasPrefix(parentPath) { - return false - } - return !hasField(lpath[len(parentPath)], fields) - } - // Then, look at put assignments to see if there are any new fields to append. - for i, lpath := range paths { - if appendClause(lpath) { - switch { - // Append value at this level - case len(lpath) == len(parentPath)+1: - s.append(putStep{ - op: putFromClause, - container: zed.IsContainerType(vals[i].Type), - index: i, - }) - fields = append(fields, zed.NewField(lpath[len(parentPath)], vals[i].Type)) - // Appended and nest. For example, this would happen with "put b.c=1" applied to a record {"a": 1}. - case len(lpath) > len(parentPath)+1: - path := append(parentPath, lpath[len(parentPath)]) - nestedStep, typ := p.deriveRecordSteps(path, []zed.Field{}, vals, paths) - nestedStep.index = -1 - fields = append(fields, zed.NewField(lpath[len(parentPath)], typ)) - s.append(nestedStep) - } - } - } - typ, err := p.zctx.LookupTypeRecord(fields) - if err != nil { - panic(err) - } - return s, typ -} - -func hasField(name string, fields []zed.Field) bool { - return slices.ContainsFunc(fields, func(f zed.Field) bool { - return f.Name == name - }) + return p.paths, p.vals, nil } -func (p *Putter) lookupRule(inType *zed.TypeRecord, vals []zed.Value, fields field.List) (putRule, error) { +func (p *Putter) lookupRule(inType zed.Type, fields dynfield.List, vals []zed.Value) (pathbuilder.Step, error) { m, ok := p.rules[inType.ID()] if !ok { - m = make(map[string]putRule) + m = make(map[string]pathbuilder.Step) p.rules[inType.ID()] = m } - rule, ok := m[fields.String()] - if ok && sameTypes(rule.clauseTypes, vals) { + p.scratch = encodePaths(p.scratch[:0], fields, vals) + if rule, ok := m[string(p.scratch)]; ok { return rule, nil } - // first check fields - if err := CheckPutFields(fields); err != nil { - return putRule{}, fmt.Errorf("put: %w", err) - } - step, typ := p.deriveSteps(inType, vals, fields) - var clauseTypes []zed.Type - for _, val := range vals { - clauseTypes = append(clauseTypes, val.Type) - } - rule = putRule{typ, clauseTypes, step} - p.rules[inType.ID()][fields.String()] = rule - return rule, nil -} - -func CheckPutFields(fields field.List) error { - for i, f := range fields { - if f.IsEmpty() { - return fmt.Errorf("left-hand side cannot be 'this' (use 'yield' operator)") - } - for _, c := range fields[i+1:] { - if f.Equal(c) { - return fmt.Errorf("multiple assignments to %s", f) - } - if c.HasStrictPrefix(f) { - return fmt.Errorf("conflicting nested assignments to %s and %s", f, c) - } - if f.HasStrictPrefix(c) { - return fmt.Errorf("conflicting nested assignments to %s and %s", c, f) - } - } + step, err := pathbuilder.New(inType, fields, vals) + if err != nil { + return nil, err } - return nil -} - -func sameTypes(types []zed.Type, vals []zed.Value) bool { - return slices.EqualFunc(types, vals, func(typ zed.Type, val zed.Value) bool { - return typ == val.Type - }) + p.rules[inType.ID()][string(p.scratch)] = step + return step, nil } -func (p *Putter) Eval(ectx Context, this *zed.Value) *zed.Value { - recType := zed.TypeRecordOf(this.Type) - if recType == nil { - if this.IsError() { - // propagate errors - return this +func encodePaths(b []byte, fields dynfield.List, vals []zed.Value) []byte { + for i := range fields { + if i > 0 { + b = append(b, ',') } - return ectx.CopyValue(*p.zctx.WrapError("put: not a record", this)) - } - vals, paths, err := p.eval(ectx, this) - if err != nil { - return ectx.CopyValue(*p.zctx.WrapError(fmt.Sprintf("put: %s", err), this)) - } - if len(vals) == 0 { - return this - } - rule, err := p.lookupRule(recType, vals, paths) - if err != nil { - return ectx.CopyValue(*p.zctx.WrapError(err.Error(), this)) + b = fields[i].Append(b) + b = append(b, ':') + b = binary.AppendVarint(b, int64(vals[i].Type.ID())) } - bytes := rule.step.build(this.Bytes(), &p.builder, vals) - return ectx.NewValue(rule.typ, bytes) + return b } diff --git a/runtime/expr/renamer.go b/runtime/expr/renamer.go index d9a097983a..5d9a5a87f6 100644 --- a/runtime/expr/renamer.go +++ b/runtime/expr/renamer.go @@ -70,11 +70,11 @@ func CheckRenameField(src, dst field.Path) error { func (r *Renamer) evalFields(ectx Context, this *zed.Value) (field.List, field.List, error) { var srcs, dsts field.List for i := range r.srcs { - src, err := r.srcs[i].Eval(ectx, this) + src, err := r.srcs[i].EvalAsRecordPath(ectx, this) if err != nil { return nil, nil, err } - dst, err := r.dsts[i].Eval(ectx, this) + dst, err := r.dsts[i].EvalAsRecordPath(ectx, this) if err != nil { return nil, nil, err } diff --git a/runtime/op/ztests/put-array-element.yaml b/runtime/op/ztests/put-array-element.yaml new file mode 100644 index 0000000000..ecf24eebca --- /dev/null +++ b/runtime/op/ztests/put-array-element.yaml @@ -0,0 +1,13 @@ +script: | + echo '["a","b","c"]' | zq -z 'this[0] := "foo", this[1] := 1, this[2] := "baz"' - + echo '|["a","b","c"]|' | zq -z 'this[0] := "foo", this[1] := 1, this[2] := "baz"' - + echo '["a",1]' | zq -z 'this[1] := "b"' - + echo '[0,{a:{b:1}}]' | zq -z 'this[1]["a"]["b"] := "foo"' - + +outputs: + - name: stdout + data: | + ["foo",1,"baz"] + |[1,"baz","foo"]| + ["a","b"] + [0,{a:{b:"foo"}}] diff --git a/runtime/op/ztests/put-dynamic-field.yaml b/runtime/op/ztests/put-dynamic-field.yaml index 7566f09213..0447d6de9a 100644 --- a/runtime/op/ztests/put-dynamic-field.yaml +++ b/runtime/op/ztests/put-dynamic-field.yaml @@ -9,7 +9,7 @@ script: | echo "// ===" echo '{a:"foo"}' | zq -z 'this[a]["bar"] := "baz" | cut foo' - echo "// ===" - # runtime error cases + # Test last value wins. echo '{a:"hello",b:"hello"}' | zq -z 'this[a] := "world1", this[b] := "world2"' - echo "// ===" echo '{a:"foo",b:"bar"}' | zq -z 'this[a][b] := "world", this[a] := "world"' - @@ -31,9 +31,9 @@ outputs: // === {foo:{bar:"baz"}} // === - error({message:"put: multiple assignments to hello",on:{a:"hello",b:"hello"}}) + {a:"hello",b:"hello",hello:"world2"} // === - error({message:"put: conflicting nested assignments to foo and foo.bar",on:{a:"foo",b:"bar"}}) + {a:"foo",b:"bar",foo:"world"} // === error({message:"put: missing",on:{}}) - name: stderr diff --git a/zcode/builder.go b/zcode/builder.go index 44c64eb62b..d6735488e5 100644 --- a/zcode/builder.go +++ b/zcode/builder.go @@ -64,10 +64,12 @@ func (b *Builder) EndContainer() { } // TransformContainer calls transform, passing it the body of the most recently -// opened container and replacing the original body with the return value. It -// panics if the receiver has no open container. +// opened container and replacing the original body with the return value. func (b *Builder) TransformContainer(transform func(Bytes) Bytes) { - bodyOff := b.containers[len(b.containers)-1] + bodyOff := 0 + if len(b.containers) > 0 { + bodyOff = b.containers[len(b.containers)-1] + } body := transform(b.bytes[bodyOff:]) b.bytes = append(b.bytes[:bodyOff], body...) } diff --git a/zcode/builder_test.go b/zcode/builder_test.go index 6230f84ddf..7bf9f5480a 100644 --- a/zcode/builder_test.go +++ b/zcode/builder_test.go @@ -95,12 +95,6 @@ func TestBuilder(t *testing.T) { expected := Append(nil, Append(nil, v2)) require.Exactly(t, expected, b.Bytes()) }) - t.Run("TransformContainer/panic", func(t *testing.T) { - b := NewBuilder() - require.Panics(t, func() { - b.TransformContainer(func(body Bytes) Bytes { return nil }) - }) - }) t.Run("Reset", func(t *testing.T) { b := NewBuilder() b.Append([]byte("1"))