From 271bb189d65ef1cd40529010b2e066bd25ed427e Mon Sep 17 00:00:00 2001 From: Matthew Nibecker Date: Tue, 3 Oct 2023 15:03:01 -0700 Subject: [PATCH] Support dynamic paths for put and cut op --- compiler/ast/dag/expr.go | 38 +++- compiler/ast/dag/op.go | 5 +- compiler/ast/dag/unpack.go | 2 + compiler/kernel/expr.go | 28 ++- compiler/kernel/groupby.go | 14 +- compiler/kernel/op.go | 53 +++--- compiler/optimizer/op.go | 11 +- compiler/semantic/expr.go | 192 ++++++++++++++------- compiler/semantic/op.go | 104 +++++++---- compiler/semantic/sql.go | 16 +- compiler/ztests/dynamic-field-cut.yaml | 41 +++++ compiler/ztests/dynamic-field-put.yaml | 41 +++++ compiler/ztests/where-on-func.yaml | 2 +- docs/language/operators/rename.md | 2 +- docs/tutorials/schools.md | 2 +- runtime/expr/cutter.go | 92 ++++++---- runtime/expr/eval.go | 14 +- runtime/expr/path.go | 66 +++++++ runtime/expr/putter.go | 142 ++++++++------- runtime/expr/ztests/cut-dup-fields.yaml | 9 +- runtime/expr/ztests/cut-not-adjacent.yaml | 8 +- runtime/expr/ztests/rename-error-move.yaml | 2 +- runtime/op/groupby/groupby.go | 18 +- runtime/op/join/join.go | 3 +- zfmt/dag.go | 26 ++- 25 files changed, 662 insertions(+), 269 deletions(-) create mode 100644 compiler/ztests/dynamic-field-cut.yaml create mode 100644 compiler/ztests/dynamic-field-put.yaml create mode 100644 runtime/expr/path.go diff --git a/compiler/ast/dag/expr.go b/compiler/ast/dag/expr.go index a735d4133a..f3d4fa7c69 100644 --- a/compiler/ast/dag/expr.go +++ b/compiler/ast/dag/expr.go @@ -10,6 +10,9 @@ type ( VectorElem interface { vectorElem() } + PathElem interface { + pathElem() + } ) // Exprs @@ -27,7 +30,7 @@ type ( } Assignment struct { Kind string `json:"kind" unpack:""` - LHS Expr `json:"lhs"` + LHS *Path `json:"lhs"` RHS Expr `json:"rhs"` } BinaryExpr struct { @@ -72,6 +75,10 @@ type ( Exprs []Expr `json:"exprs"` Body Seq `json:"body"` } + Path struct { + Kind string `json:"kind" unpack:""` + Path []PathElem `json:"path"` + } RecordExpr struct { Kind string `json:"kind" unpack:""` Elems []RecordElem `json:"elems"` @@ -123,6 +130,7 @@ func (*Func) ExprDAG() {} func (*Literal) ExprDAG() {} func (*MapExpr) ExprDAG() {} func (*OverExpr) ExprDAG() {} +func (*Path) ExprDAG() {} func (*RecordExpr) ExprDAG() {} func (*RegexpMatch) ExprDAG() {} func (*RegexpSearch) ExprDAG() {} @@ -159,6 +167,34 @@ func (*Spread) recordAST() {} func (*Spread) vectorElem() {} func (*VectorValue) vectorElem() {} +func (p *Path) StaticPath() *This { + this := &This{Kind: "This"} + for _, elem := range p.Path { + if p, ok := elem.(*StaticPathElem); ok { + this.Path = append(this.Path, p.Name) + continue + } + return nil + } + return this +} + +func NewStaticPath(path ...string) *Path { + p := &Path{Kind: "Path"} + for _, name := range path { + p.Path = append(p.Path, &StaticPathElem{Kind: "StaticPathElem", Name: name}) + } + return p +} + +type StaticPathElem struct { + Kind string `json:"kind" unpack:""` + Name string `json:"name"` +} + +func (*This) pathElem() {} +func (*StaticPathElem) pathElem() {} + func NewBinaryExpr(op string, lhs, rhs Expr) *BinaryExpr { return &BinaryExpr{ Kind: "BinaryExpr", diff --git a/compiler/ast/dag/op.go b/compiler/ast/dag/op.go index 42330204c4..118445cfcb 100644 --- a/compiler/ast/dag/op.go +++ b/compiler/ast/dag/op.go @@ -95,8 +95,9 @@ type ( Args []Assignment `json:"args"` } Rename struct { - Kind string `json:"kind" unpack:""` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + Dsts []*This `json:"dsts"` + Srcs []*This `json:"srcs"` } Scatter struct { Kind string `json:"kind" unpack:""` diff --git a/compiler/ast/dag/unpack.go b/compiler/ast/dag/unpack.go index 0a03034870..e849f803de 100644 --- a/compiler/ast/dag/unpack.go +++ b/compiler/ast/dag/unpack.go @@ -36,6 +36,8 @@ var unpacker = unpack.New( Over{}, OverExpr{}, Pass{}, + Path{}, + StaticPathElem{}, PoolScan{}, Put{}, RecordExpr{}, diff --git a/compiler/kernel/expr.go b/compiler/kernel/expr.go index 80d5dae6d4..a12aa7c02f 100644 --- a/compiler/kernel/expr.go +++ b/compiler/kernel/expr.go @@ -65,6 +65,12 @@ func (b *Builder) compileExpr(e dag.Expr) (expr.Evaluator, error) { return expr.NewDottedExpr(b.zctx(), field.Path(e.Path)), nil case *dag.Dot: return b.compileDotExpr(e) + case *dag.Path: + // Path only works as a general expression if it is a static path. + if this := e.StaticPath(); this != nil { + return expr.NewDottedExpr(b.zctx(), field.Path(this.Path)), nil + } + return nil, fmt.Errorf("internal error: invalid path expression %s", e) case *dag.UnaryExpr: return b.compileUnary(*e) case *dag.BinaryExpr: @@ -263,15 +269,27 @@ func (b *Builder) compileDotExpr(dot *dag.Dot) (expr.Evaluator, error) { return expr.NewDotExpr(b.zctx(), record, dot.RHS), nil } -func compileLval(e dag.Expr) (field.Path, error) { - if this, ok := e.(*dag.This); ok { - return field.Path(this.Path), nil +func (b *Builder) compilePath(e *dag.Path) (*expr.Path, error) { + elems := make([]expr.PathElem, 0, len(e.Path)) + for _, elem := range e.Path { + switch e := elem.(type) { + case *dag.This: + eval, err := b.compileExpr(e) + if err != nil { + return nil, err + } + elems = append(elems, expr.NewPathElemExpr(b.octx.Zctx, eval)) + case *dag.StaticPathElem: + elems = append(elems, &expr.StaticPathElem{Name: e.Name}) + default: + return nil, fmt.Errorf("internal error: invalid lval type %T", e) + } } - return nil, errors.New("invalid expression on lhs of assignment") + return expr.NewPath(elems), nil } func (b *Builder) compileAssignment(node *dag.Assignment) (expr.Assignment, error) { - lhs, err := compileLval(node.LHS) + lhs, err := b.compilePath(node.LHS) if err != nil { return expr.Assignment{}, err } diff --git a/compiler/kernel/groupby.go b/compiler/kernel/groupby.go index d0ab780abb..300f86a46d 100644 --- a/compiler/kernel/groupby.go +++ b/compiler/kernel/groupby.go @@ -2,7 +2,6 @@ package kernel import ( "errors" - "fmt" "github.com/brimdata/zed/compiler/ast/dag" "github.com/brimdata/zed/order" @@ -10,10 +9,11 @@ import ( "github.com/brimdata/zed/runtime/expr" "github.com/brimdata/zed/runtime/op/groupby" "github.com/brimdata/zed/zbuf" + "golang.org/x/exp/slices" ) func (b *Builder) compileGroupBy(parent zbuf.Puller, summarize *dag.Summarize) (*groupby.Op, error) { - keys, err := b.compileAssignments(summarize.Keys) + keyPaths, keyVals, err := b.compileStaticAssignments(summarize.Keys) if err != nil { return nil, err } @@ -22,7 +22,7 @@ func (b *Builder) compileGroupBy(parent zbuf.Puller, summarize *dag.Summarize) ( return nil, err } dir := order.Direction(summarize.InputSortDir) - return groupby.New(b.octx, parent, keys, names, reducers, summarize.Limit, dir, summarize.PartialsIn, summarize.PartialsOut) + return groupby.New(b.octx, parent, keyPaths, keyVals, names, reducers, summarize.Limit, dir, summarize.PartialsIn, summarize.PartialsOut) } func (b *Builder) compileAggAssignments(assignments []dag.Assignment) (field.List, []*expr.Aggregator, error) { @@ -44,12 +44,12 @@ func (b *Builder) compileAggAssignment(assignment dag.Assignment) (field.Path, * if !ok { return nil, nil, errors.New("aggregator is not an aggregation expression") } - lhs, err := compileLval(assignment.LHS) - if err != nil { - return nil, nil, fmt.Errorf("lhs of aggregation: %w", err) + this := assignment.LHS.StaticPath() + if this == nil { + return nil, nil, errors.New("internal error: aggregator assignment must be a static path") } m, err := b.compileAgg(aggAST) - return lhs, m, err + return slices.Clone(this.Path), m, err } func (b *Builder) compileAgg(agg *dag.Agg) (*expr.Aggregator, error) { diff --git a/compiler/kernel/op.go b/compiler/kernel/op.go index e51e960209..971f25dd2a 100644 --- a/compiler/kernel/op.go +++ b/compiler/kernel/op.go @@ -174,36 +174,13 @@ func (b *Builder) compileLeaf(o dag.Op, parent zbuf.Puller) (zbuf.Puller, error) if err != nil { return nil, err } - putter, err := expr.NewPutter(b.octx.Zctx, clauses) - if err != nil { - return nil, err - } + putter := expr.NewPutter(b.octx.Zctx, clauses) return op.NewApplier(b.octx, parent, putter), nil case *dag.Rename: var srcs, dsts field.List - for _, fa := range v.Args { - dst, err := compileLval(fa.LHS) - if err != nil { - return nil, err - } - // We call CompileLval on the RHS because renames are - // restricted to dotted field name expressions. - src, err := compileLval(fa.RHS) - if err != nil { - return nil, err - } - if len(dst) != len(src) { - return nil, fmt.Errorf("cannot rename %s to %s", src, dst) - } - // Check that the prefixes match and, if not, report first place - // that they don't. - for i := 0; i <= len(src)-2; i++ { - if src[i] != dst[i] { - return nil, fmt.Errorf("cannot rename %s to %s (differ in %s vs %s)", src, dst, src[i], dst[i]) - } - } - dsts = append(dsts, dst) - srcs = append(srcs, src) + for k := range v.Dsts { + srcs = append(srcs, v.Srcs[k].Path) + dsts = append(dsts, v.Dsts[k].Path) } renamer := expr.NewRenamer(b.octx.Zctx, srcs, dsts) return op.NewApplier(b.octx, parent, renamer), nil @@ -376,6 +353,24 @@ func (b *Builder) compileOver(parent zbuf.Puller, over *dag.Over) (zbuf.Puller, return scope.NewExit(exit), nil } +func (b *Builder) compileStaticAssignments(assignments []dag.Assignment) ([]field.Path, []expr.Evaluator, error) { + lhs := make([]field.Path, 0, len(assignments)) + rhs := make([]expr.Evaluator, 0, len(assignments)) + for _, a := range assignments { + this := a.LHS.StaticPath() + if this == nil { + return nil, nil, errors.New("internal error: dynamic lhs assignment when expecting a static path") + } + lhs = append(lhs, slices.Clone(this.Path)) + r, err := b.compileExpr(a.RHS) + if err != nil { + return nil, nil, err + } + rhs = append(rhs, r) + } + return lhs, rhs, nil +} + func (b *Builder) compileAssignments(assignments []dag.Assignment) ([]expr.Assignment, error) { keys := make([]expr.Assignment, 0, len(assignments)) for _, assignment := range assignments { @@ -388,9 +383,9 @@ func (b *Builder) compileAssignments(assignments []dag.Assignment) ([]expr.Assig return keys, nil } -func splitAssignments(assignments []expr.Assignment) (field.List, []expr.Evaluator) { +func splitAssignments(assignments []expr.Assignment) ([]*expr.Path, []expr.Evaluator) { n := len(assignments) - lhs := make(field.List, 0, n) + lhs := make([]*expr.Path, 0, n) rhs := make([]expr.Evaluator, 0, n) for _, a := range assignments { lhs = append(lhs, a.LHS) diff --git a/compiler/optimizer/op.go b/compiler/optimizer/op.go index d6c00cbd4b..cc716f6506 100644 --- a/compiler/optimizer/op.go +++ b/compiler/optimizer/op.go @@ -52,9 +52,9 @@ func (o *Optimizer) analyzeSortKey(op dag.Op, in order.SortKey) (order.SortKey, return in, nil case *dag.Rename: out := in - for _, assignment := range op.Args { - if fieldOf(assignment.RHS).Equal(key) { - lhs := fieldOf(assignment.LHS) + for k := range op.Dsts { + if fieldOf(op.Srcs[k]).Equal(key) { + lhs := fieldOf(op.Dsts[k]) out = order.NewSortKey(in.Order, field.List{lhs}) } } @@ -211,6 +211,11 @@ func fieldOf(e dag.Expr) field.Path { if this, ok := e.(*dag.This); ok { return this.Path } + if path, ok := e.(*dag.Path); ok { + if this := path.StaticPath(); this != nil { + return this.Path + } + } return nil } diff --git a/compiler/semantic/expr.go b/compiler/semantic/expr.go index 68c21341eb..804034bdae 100644 --- a/compiler/semantic/expr.go +++ b/compiler/semantic/expr.go @@ -420,6 +420,55 @@ func (a *analyzer) semBinary(e *ast.BinaryExpr) (dag.Expr, error) { }, nil } +func (a *analyzer) semPath(e ast.Expr) (*dag.Path, error) { + switch e := e.(type) { + case *ast.BinaryExpr: + if e.Op == "." { + lhs, err := a.semPath(e.LHS) + if err != nil || lhs == nil { + return nil, err + } + id, ok := e.RHS.(*ast.ID) + if !ok { + return nil, nil + } + lhs.Path = append(lhs.Path, &dag.StaticPathElem{Kind: "StaticPathElem", Name: id.Name}) + return lhs, nil + } + if e.Op == "[" { + lhs, err := a.semPath(e.LHS) + if lhs == nil || err != nil { + return nil, nil + } + rhs, err := a.semExpr(e.RHS) + if err != nil { + return nil, err + } + if this, ok := rhs.(*dag.This); ok { + lhs.Path = append(lhs.Path, this) + return lhs, nil + } + if p, ok := isStringConst(a.zctx, rhs); ok { + lhs.Path = append(lhs.Path, &dag.StaticPathElem{Kind: "StaticPathElem", Name: p}) + return lhs, nil + } + } + return nil, nil + case *ast.ID: + id, err := a.semID(e) + if err != nil { + return nil, err + } + if this, ok := id.(*dag.This); ok { + return dag.NewStaticPath(this.Path...), nil + } + return nil, nil + } + // This includes a null Expr, which can happen if the AST is missing + // a field or sets it to null. + return nil, nil +} + func (a *analyzer) isIndexOfThis(lhs, rhs dag.Expr) *dag.This { if this, ok := lhs.(*dag.This); ok { if s, ok := isStringConst(a.zctx, rhs); ok { @@ -507,10 +556,10 @@ func (a *analyzer) semExprs(in []ast.Expr) ([]dag.Expr, error) { return exprs, nil } -func (a *analyzer) semAssignments(assignments []ast.Assignment, summarize bool) ([]dag.Assignment, error) { +func (a *analyzer) semAssignments(assignments []ast.Assignment) ([]dag.Assignment, error) { out := make([]dag.Assignment, 0, len(assignments)) for _, e := range assignments { - a, err := a.semAssignment(e, summarize) + a, err := a.semAssignment(e) if err != nil { return nil, err } @@ -519,70 +568,74 @@ func (a *analyzer) semAssignments(assignments []ast.Assignment, summarize bool) return out, nil } -func (a *analyzer) semAssignment(assign ast.Assignment, summarize bool) (dag.Assignment, error) { - rhs, err := a.semExpr(assign.RHS) +func (a *analyzer) semAssignment(e ast.Assignment) (dag.Assignment, error) { + rhs, err := a.semExpr(e.RHS) if err != nil { - return dag.Assignment{}, fmt.Errorf("rhs of assignment expression: %w", err) - } - if _, ok := rhs.(*dag.Agg); ok { - summarize = true + return dag.Assignment{}, err } - var lhs dag.Expr - if assign.LHS != nil { - lhs, err = a.semExpr(assign.LHS) + var lhs *dag.Path + if e.LHS == nil { + path, err := derriveLHSPath(rhs) if err != nil { - return dag.Assignment{}, fmt.Errorf("lhs of assigment expression: %w", err) + return dag.Assignment{}, err + } + lhs = dag.NewStaticPath(path...) + } else { + if lhs, err = a.semPath(e.LHS); lhs == nil { + if err == nil { + err = errors.New("illegal left-hand side of assignment") + } + return dag.Assignment{}, err + } + } + if len(lhs.Path) == 0 { + return dag.Assignment{}, errors.New("cannot assign to 'this'") + } + return dag.Assignment{Kind: "Assignment", LHS: lhs, RHS: rhs}, nil +} + +func assignHasDynamicPath(assignments []dag.Assignment) bool { + for _, a := range assignments { + if a.LHS.StaticPath() == nil { + return true } - } else if call, ok := assign.RHS.(*ast.Call); ok { - path := []string{call.Name} - switch call.Name { + } + return false +} + +func derriveLHSPath(rhs dag.Expr) ([]string, error) { + var path []string + switch rhs := rhs.(type) { + case *dag.Call: + path = []string{rhs.Name} + switch rhs.Name { case "every": // If LHS is nil and the call is every() make the LHS field ts since // field ts assumed with every. path = []string{"ts"} case "quiet": - if len(call.Args) > 0 { - if p, ok := rhs.(*dag.Call).Args[0].(*dag.This); ok { - path = p.Path + if len(rhs.Args) > 0 { + if this, ok := rhs.Args[0].(*dag.This); ok { + path = this.Path } } } - lhs = &dag.This{Kind: "This", Path: path} - } else if agg, ok := assign.RHS.(*ast.Agg); ok { - lhs = &dag.This{Kind: "This", Path: []string{agg.Name}} - } else if v, ok := rhs.(*dag.Var); ok { - lhs = &dag.This{Kind: "This", Path: []string{v.Name}} - } else { - lhs, err = a.semExpr(assign.RHS) - if err != nil { - return dag.Assignment{}, errors.New("assignment name could not be inferred from rhs expression") - } - } - if summarize { - // Summarize always outputs its results as new records of "this" - // so if we have an "as" that overrides "this", we just - // convert it back to a local this. - if dot, ok := lhs.(*dag.Dot); ok { - if v, ok := dot.LHS.(*dag.Var); ok && v.Name == "this" { - lhs = &dag.This{Kind: "This", Path: []string{dot.RHS}} - } - } - } - // Make sure we have a valid lval for lhs. - this, ok := lhs.(*dag.This) - if !ok { - return dag.Assignment{}, errors.New("illegal left-hand side of assignment") - } - if len(this.Path) == 0 { - return dag.Assignment{}, errors.New("cannot assign to 'this'") + case *dag.Agg: + path = []string{rhs.Name} + case *dag.Var: + path = []string{rhs.Name} + case *dag.This: + path = rhs.Path + default: + return nil, errors.New("assignment name could not be inferred from rhs expression") } - return dag.Assignment{Kind: "Assignment", LHS: lhs, RHS: rhs}, nil + return path, nil } -func (a *analyzer) semFields(exprs []ast.Expr) ([]dag.Expr, error) { +func (a *analyzer) semStaticFields(exprs []ast.Expr) ([]dag.Expr, error) { fields := make([]dag.Expr, 0, len(exprs)) for _, e := range exprs { - f, err := a.semField(e) + f, err := a.semStaticField(e) if err != nil { return nil, err } @@ -591,9 +644,9 @@ func (a *analyzer) semFields(exprs []ast.Expr) ([]dag.Expr, error) { return fields, nil } -// semField analyzes the expression f and makes sure that it's +// semStaticField analyzes the expression f and makes sure that it's // a field reference returning an error if not. -func (a *analyzer) semField(f ast.Expr) (*dag.This, error) { +func (a *analyzer) semStaticField(f ast.Expr) (*dag.This, error) { e, err := a.semExpr(f) if err != nil { return nil, errors.New("invalid expression used as a field") @@ -641,11 +694,11 @@ func (a *analyzer) maybeConvertAgg(call *ast.Call) (dag.Expr, error) { }, nil } -func DotExprToFieldPath(e ast.Expr) *dag.This { +func (a *analyzer) dotExprToFieldPath(e ast.Expr) *dag.This { switch e := e.(type) { case *ast.BinaryExpr: if e.Op == "." { - lhs := DotExprToFieldPath(e.LHS) + lhs := a.dotExprToFieldPath(e.LHS) if lhs == nil { return nil } @@ -653,23 +706,42 @@ func DotExprToFieldPath(e ast.Expr) *dag.This { if !ok { return nil } - lhs.Path = append(lhs.Path, id.Name) + o, err := a.semID(id) + if err != nil { + return nil + } + this, ok := o.(*dag.This) + if !ok { + return nil + } + lhs.Path = append(lhs.Path, this.Path...) return lhs } if e.Op == "[" { - lhs := DotExprToFieldPath(e.LHS) + lhs := a.dotExprToFieldPath(e.LHS) if lhs == nil { return nil } - id, ok := e.RHS.(*astzed.Primitive) - if !ok || id.Type != "string" { + rhs, err := a.semExpr(e.RHS) + if err != nil { return nil } - lhs.Path = append(lhs.Path, id.Text) - return lhs + if s, ok := isStringConst(a.zctx, rhs); ok { + lhs.Path = append(lhs.Path, s) + return lhs + } + return nil } case *ast.ID: - return pathOf(e.Name) + o, err := a.semID(e) + if err != nil { + return nil + } + this, ok := o.(*dag.This) + if !ok { + return nil + } + return this } // This includes a null Expr, which can happen if the AST is missing // a field or sets it to null. diff --git a/compiler/semantic/op.go b/compiler/semantic/op.go index be1f5b658e..23e0e94f9d 100644 --- a/compiler/semantic/op.go +++ b/compiler/semantic/op.go @@ -85,7 +85,7 @@ func (a *analyzer) semTrunk(trunk ast.Trunk, out dag.Seq) (dag.Seq, error) { func (a *analyzer) semSource(source ast.Source) ([]dag.Op, error) { switch p := source.(type) { case *ast.File: - sortKey, err := semSortKey(p.SortKey) + sortKey, err := a.semSortKey(p.SortKey) if err != nil { return nil, err } @@ -110,7 +110,7 @@ func (a *analyzer) semSource(source ast.Source) ([]dag.Op, error) { }, }, nil case *ast.HTTP: - sortKey, err := semSortKey(p.SortKey) + sortKey, err := a.semSortKey(p.SortKey) if err != nil { return nil, err } @@ -191,13 +191,13 @@ func unmarshalHeaders(val *zed.Value) (map[string][]string, error) { } return headers, nil } -func semSortKey(p *ast.SortKey) (order.SortKey, error) { +func (a *analyzer) semSortKey(p *ast.SortKey) (order.SortKey, error) { if p == nil || p.Keys == nil { return order.Nil, nil } var keys field.List for _, key := range p.Keys { - this := DotExprToFieldPath(key) + this := a.dotExprToFieldPath(key) if this == nil { return order.Nil, fmt.Errorf("bad key expr of type %T in file operator", key) } @@ -410,19 +410,25 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { case *ast.From: return a.semFrom(o, seq) case *ast.Summarize: - keys, err := a.semAssignments(o.Keys, true) + keys, err := a.semAssignments(o.Keys) if err != nil { return nil, err } + if assignHasDynamicPath(keys) { + return nil, errors.New("summarize: keys must be static field references") + } if len(keys) == 0 && len(o.Aggs) == 1 { if seq := a.singletonAgg(o.Aggs[0], seq); seq != nil { return seq, nil } } - aggs, err := a.semAssignments(o.Aggs, true) + aggs, err := a.semAssignments(o.Aggs) if err != nil { return nil, err } + if assignHasDynamicPath(keys) { + return nil, errors.New("summarize: aggs must be static field references") + } // Note: InputSortDir is copied in here but it's not meaningful // coming from a parser AST, only from a worker using the kernel DSL, // which is another reason why we need separate parser and kernel ASTs. @@ -497,16 +503,26 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { case *ast.Shape: return append(seq, &dag.Shape{Kind: "Shape"}), nil case *ast.Cut: - assignments, err := a.semAssignments(o.Args, false) + assignments, err := a.semAssignments(o.Args) if err != nil { return nil, err } + // Collect static paths so we can check on what is available. + var fields field.List + for _, a := range assignments { + if this := a.LHS.StaticPath(); this != nil { + fields = append(fields, this.Path) + } + } + if _, err = zed.NewRecordBuilder(a.zctx, fields); err != nil { + return nil, fmt.Errorf("cut: %w", err) + } return append(seq, &dag.Cut{ Kind: "Cut", Args: assignments, }), nil case *ast.Drop: - args, err := a.semFields(o.Args) + args, err := a.semStaticFields(o.Args) if err != nil { return nil, fmt.Errorf("drop: %w", err) } @@ -596,10 +612,33 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Limit: o.Limit, }), nil case *ast.Put: - assignments, err := a.semAssignments(o.Args, false) + assignments, err := a.semAssignments(o.Args) if err != nil { return nil, err } + // We can do collision checking on static paths, so check what we can. + var fields field.List + for _, a := range assignments { + if this := a.LHS.StaticPath(); this != nil { + fields = append(fields, this.Path) + } + } + for i, f := range fields { + if f.IsEmpty() { + return nil, fmt.Errorf("put: LHS cannot be 'this' (use 'yield' operator)") + } + for j, c := range fields { + if i == j { + continue + } + if f.Equal(c) { + return nil, fmt.Errorf("put: multiple assignments to %s", f) + } + if c.HasStrictPrefix(f) { + return nil, fmt.Errorf("put: conflicting nested assignments to %s and %s", f, c) + } + } + } return append(seq, &dag.Put{ Kind: "Put", Args: assignments, @@ -611,31 +650,33 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { } return append(seq, converted), nil case *ast.Rename: - var assignments []dag.Assignment - for _, fa := range o.Args { - dst, err := a.semField(fa.LHS) + var dsts, srcs []*dag.This + for _, arg := range o.Args { + dst, err := a.semStaticField(arg.LHS) if err != nil { - return nil, errors.New("'rename' requires explicit field references") + return nil, errors.New("rename: requires explicit field references") } - src, err := a.semField(fa.RHS) + src, err := a.semStaticField(arg.RHS) if err != nil { - return nil, errors.New("'rename' requires explicit field references") + return nil, errors.New("rename: requires explicit field references") } if len(dst.Path) != len(src.Path) { - return nil, fmt.Errorf("cannot rename %s to %s", src, dst) + return nil, fmt.Errorf("rename: cannot rename %s to %s", src, dst) } // Check that the prefixes match and, if not, report first place // that they don't. for i := 0; i <= len(src.Path)-2; i++ { if src.Path[i] != dst.Path[i] { - return nil, fmt.Errorf("cannot rename %s to %s (differ in %s vs %s)", src, dst, src.Path[i], dst.Path[i]) + return nil, fmt.Errorf("rename: cannot rename %s to %s (differ in %s vs %s)", src, dst, src.Path[i], dst.Path[i]) } } - assignments = append(assignments, dag.Assignment{Kind: "Assignment", LHS: dst, RHS: src}) + dsts = append(dsts, dst) + srcs = append(srcs, src) } return append(seq, &dag.Rename{ Kind: "Rename", - Args: assignments, + Dsts: dsts, + Srcs: srcs, }), nil case *ast.Fuse: return append(seq, &dag.Fuse{Kind: "Fuse"}), nil @@ -652,7 +693,7 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { if err != nil { return nil, err } - assignments, err := a.semAssignments(o.Args, false) + assignments, err := a.semAssignments(o.Args) if err != nil { return nil, err } @@ -763,14 +804,14 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Aggs: []dag.Assignment{ { Kind: "Assignment", - LHS: &dag.This{Kind: "This", Path: field.Path{"sample"}}, + LHS: dag.NewStaticPath("sample"), RHS: &dag.Agg{Kind: "Agg", Name: "any", Expr: e}, }, }, Keys: []dag.Assignment{ { Kind: "Assignment", - LHS: &dag.This{Kind: "This", Path: field.Path{"shape"}}, + LHS: dag.NewStaticPath("shape"), RHS: &dag.Call{Kind: "Call", Name: "typeof", Args: []dag.Expr{e}}, }, }, @@ -812,15 +853,15 @@ func (a *analyzer) singletonAgg(agg ast.Assignment, seq dag.Seq) dag.Seq { if agg.LHS != nil { return nil } - out, err := a.semAssignment(agg, true) + out, err := a.semAssignment(agg) if err != nil { return nil } yield := &dag.Yield{ Kind: "Yield", } - this, ok := out.LHS.(*dag.This) - if !ok || len(this.Path) != 1 { + this := out.LHS.StaticPath() + if this == nil || len(this.Path) != 1 { return nil } yield.Exprs = append(yield.Exprs, this) @@ -942,14 +983,17 @@ func (a *analyzer) semOpAssignment(p *ast.OpAssignment) (dag.Op, error) { // Parition assignments into agg vs. puts. // It's okay to pass false here for the summarize bool because // semAssignment will check if the RHS is a dag.Agg and override. - assignment, err := a.semAssignment(assign, false) + a, err := a.semAssignment(assign) if err != nil { return nil, err } - if _, ok := assignment.RHS.(*dag.Agg); ok { - aggs = append(aggs, assignment) + if _, ok := a.RHS.(*dag.Agg); ok { + if a.LHS.StaticPath() == nil { + return nil, errors.New("summarize: illegal use of dynamic assignment in aggregation") + } + aggs = append(aggs, a) } else { - puts = append(puts, assignment) + puts = append(puts, a) } } if len(puts) > 0 && len(aggs) > 0 { @@ -1031,7 +1075,7 @@ func (a *analyzer) semCallOp(call *ast.Call, seq dag.Seq) (dag.Seq, error) { Aggs: []dag.Assignment{ { Kind: "Assignment", - LHS: &dag.This{Kind: "This", Path: field.Path{call.Name}}, + LHS: dag.NewStaticPath(call.Name), RHS: agg, }, }, diff --git a/compiler/semantic/sql.go b/compiler/semantic/sql.go index 6c6f1d9f8b..fa446d0be9 100644 --- a/compiler/semantic/sql.go +++ b/compiler/semantic/sql.go @@ -228,7 +228,7 @@ func (a *analyzer) convertSQLAlias(e ast.Expr) (*dag.Cut, string, error) { if e == nil { return nil, "", nil } - fld, err := a.semField(e) + fld, err := a.semStaticField(e) if err != nil { return nil, "", fmt.Errorf("illegal SQL alias: %w", err) } @@ -238,7 +238,7 @@ func (a *analyzer) convertSQLAlias(e ast.Expr) (*dag.Cut, string, error) { } assignment := dag.Assignment{ Kind: "Assignment", - LHS: fld, + LHS: dag.NewStaticPath(fld.Path...), RHS: &dag.This{Kind: "This"}, } return &dag.Cut{ @@ -291,7 +291,7 @@ func (a *analyzer) convertSQLJoin(leftPath []dag.Op, sqlJoin ast.SQLJoin) ([]dag } alias := dag.Assignment{ Kind: "Assignment", - LHS: &dag.This{Kind: "This", Path: field.Path{aliasID}}, + LHS: dag.NewStaticPath(aliasID), RHS: &dag.This{Kind: "This", Path: field.Path{aliasID}}, } join := &dag.Join{ @@ -433,7 +433,7 @@ func (a *analyzer) newSQLSelection(assignments []ast.Assignment) (sqlSelection, if err != nil { return nil, err } - assignment, err := a.semAssignment(assign, false) + assignment, err := a.semAssignment(assign) if err != nil { return nil, err } @@ -515,18 +515,18 @@ func (a *analyzer) isAgg(e ast.Expr) (*dag.Agg, error) { } func (a *analyzer) deriveAs(as ast.Assignment) (field.Path, error) { - sa, err := a.semAssignment(as, false) + sa, err := a.semAssignment(as) if err != nil { return nil, fmt.Errorf("AS clause of SELECT: %w", err) } - if f, ok := sa.LHS.(*dag.This); ok { - return f.Path, nil + if this := sa.LHS.StaticPath(); this != nil { + return this.Path, nil } return nil, fmt.Errorf("AS clause not a field: %w", err) } func (a *analyzer) sqlField(e ast.Expr) (field.Path, error) { - f, err := a.semField(e) + f, err := a.semStaticField(e) if err != nil { return nil, errors.New("expression is not a field reference") } diff --git a/compiler/ztests/dynamic-field-cut.yaml b/compiler/ztests/dynamic-field-cut.yaml new file mode 100644 index 0000000000..e75c488bd1 --- /dev/null +++ b/compiler/ztests/dynamic-field-cut.yaml @@ -0,0 +1,41 @@ +script: | + echo '{a:"hi",b:"hello"}' | zq -z 'cut this[a][b] := "world"' - + echo "// ===" + echo '{a:{b:"hello"}}' | zq -z 'cut this[a.b]:="world"' - + echo "// ===" + echo '{a:"hello"}' | zq -z 'cut this[this["a"]] := "world"' - + echo "// ===" + echo '{a:{},b:"hello"}' | zq -z 'cut a[b] := "world"' - + echo "// ===" + echo '{a:"foo"}' | zq -z 'cut this[a]["bar"] := "baz"' - + echo "// ===" + # runtime error cases + echo '{a:"hello",b:"hello"}' | zq -z 'cut this[a] := "world1", this[b] := "world2"' - + echo "// ===" + echo '{a:"foo",b:"bar"}' | zq -z 'cut this[a][b] := "world", this[a] := "world"' - + echo "// ===" + # will display nothing because put ignores missing error type + echo {} | zq -z 'cut this[doesnotexist] := "world"' - + # semantic error cases + ! echo {} | zq -z 'op foo(): ( yield "error" ) cut this[foo] := "hello world"' - + +outputs: + - name: stdout + data: | + {hi:{hello:"world"}} + // === + {hello:"world"} + // === + {hello:"world"} + // === + {a:{hello:"world"}} + // === + {foo:{bar:"baz"}} + // === + error("cut: duplicate field: \"hello\"") + // === + error("cut: duplicate field: \"foo\"") + // === + - name: stderr + data: | + symbol "foo" is not bound to an expression diff --git a/compiler/ztests/dynamic-field-put.yaml b/compiler/ztests/dynamic-field-put.yaml new file mode 100644 index 0000000000..5b70eb75e8 --- /dev/null +++ b/compiler/ztests/dynamic-field-put.yaml @@ -0,0 +1,41 @@ +script: | + echo '{a:"hi",b:"hello"}' | zq -z 'this[a][b] := "world" | drop a, b' - + echo "// ===" + echo '{a:{b:"hello"}}' | zq -z 'this[a.b]:="world" | drop a' - + echo "// ===" + echo '{a:"hello"}' | zq -z 'this[this["a"]] := "world" | drop a' - + echo "// ===" + echo '{a:{},b:"hello"}' | zq -z 'a[b] := "world" | drop b' - + echo "// ===" + echo '{a:"foo"}' | zq -z 'this[a]["bar"] := "baz" | cut foo' - + echo "// ===" + # runtime error cases + echo '{a:"hello",b:"hello"}' | zq -z 'this[a] := "world1", this[b] := "world2"' - + echo "// ===" + echo '{a:"foo",b:"bar"}' | zq -z 'this[a][b] := "world", this[a] := "world"' - + echo "// ===" + # will display nothing because put ignores missing error type + echo {} | zq -z 'this[doesnotexist] := "world"' - + # semantic error cases + ! echo {} | zq -z 'op foo(): ( yield "error" ) put this[foo] := "hello world"' - + +outputs: + - name: stdout + data: | + {hi:{hello:"world"}} + // === + {hello:"world"} + // === + {hello:"world"} + // === + {a:{hello:"world"}} + // === + {foo:{bar:"baz"}} + // === + error({message:"put: multiple assignments to hello",on:{a:"hello",b:"hello"}}) + // === + error({message:"put: conflicting nested assignments to foo and foo.bar",on:{a:"foo",b:"bar"}}) + // === + - name: stderr + data: | + symbol "foo" is not bound to an expression diff --git a/compiler/ztests/where-on-func.yaml b/compiler/ztests/where-on-func.yaml index 3e6ca5dd70..21ae7245d6 100644 --- a/compiler/ztests/where-on-func.yaml +++ b/compiler/ztests/where-on-func.yaml @@ -1,3 +1,3 @@ zed: cut hex := hex(this) where this % 2 == 0 -errorRE: "rhs of assignment expression: 'where' clause on non-aggregation function: hex" +errorRE: "'where' clause on non-aggregation function: hex" diff --git a/docs/language/operators/rename.md b/docs/language/operators/rename.md index f224f84ab5..70be76e194 100644 --- a/docs/language/operators/rename.md +++ b/docs/language/operators/rename.md @@ -49,7 +49,7 @@ echo '{a:1,r:{b:2,c:3}}' | zq -z 'rename w:=r.b' - ``` => ```mdtest-output -cannot rename r.b to w +rename: cannot rename r.b to w ``` _Record literals can be used instead of rename for mutation_ ```mdtest-command diff --git a/docs/tutorials/schools.md b/docs/tutorials/schools.md index 1369a1cfb0..790e3edde8 100644 --- a/docs/tutorials/schools.md +++ b/docs/tutorials/schools.md @@ -843,7 +843,7 @@ zq -Z 'rename toplevel:=outer.inner' nested.zson ``` produces this compile-time error message and the query is not run: ```mdtest-output -cannot rename outer.inner to toplevel +rename: cannot rename outer.inner to toplevel ``` This goal could instead be achieved by combining [`put`](#44-put) and [`drop`](#42-drop), e.g., diff --git a/runtime/expr/cutter.go b/runtime/expr/cutter.go index 1a8a10b6cd..c21b7949fb 100644 --- a/runtime/expr/cutter.go +++ b/runtime/expr/cutter.go @@ -1,22 +1,21 @@ package expr import ( - "errors" - "github.com/brimdata/zed" "github.com/brimdata/zed/pkg/field" ) type Cutter struct { zctx *zed.Context - builder *zed.RecordBuilder fieldRefs field.List fieldExprs []Evaluator - typeCache []zed.Type + lvals []*Path outTypes *zed.TypeVectorTable recordTypes map[int]*zed.TypeRecord + typeCache []zed.Type - droppers []*Dropper + builders map[string]*zed.RecordBuilder + droppers map[string]*Dropper dropperCache []*Dropper dirty bool quiet bool @@ -26,31 +25,18 @@ type Cutter struct { // the Cutter copies fields that are not in fieldnames. If complement // is false, the Cutter copies any fields in fieldnames, where targets // specifies the copied field names. -func NewCutter(zctx *zed.Context, fieldRefs field.List, fieldExprs []Evaluator) (*Cutter, error) { - for _, f := range fieldRefs { - if f.IsEmpty() { - return nil, errors.New("cut: 'this' not allowed (use record literal)") - } - } - var b *zed.RecordBuilder - if len(fieldRefs) == 0 || !fieldRefs[0].IsEmpty() { - // A root field will cause NewFieldBuilder to panic. - var err error - b, err = zed.NewRecordBuilder(zctx, fieldRefs) - if err != nil { - return nil, err - } - } +func NewCutter(zctx *zed.Context, fieldRefs []*Path, fieldExprs []Evaluator) (*Cutter, error) { n := len(fieldRefs) return &Cutter{ zctx: zctx, - builder: b, - fieldRefs: fieldRefs, + builders: make(map[string]*zed.RecordBuilder), + fieldRefs: make(field.List, n), fieldExprs: fieldExprs, - typeCache: make([]zed.Type, len(fieldRefs)), + lvals: fieldRefs, outTypes: zed.NewTypeVectorTable(), recordTypes: make(map[int]*zed.TypeRecord), - droppers: make([]*Dropper, n), + typeCache: make([]zed.Type, len(fieldRefs)), + droppers: make(map[string]*Dropper), dropperCache: make([]*Dropper, n), }, nil } @@ -67,30 +53,39 @@ func (c *Cutter) FoundCut() bool { // receiver's configuration. If the resulting record would be empty, Apply // returns zed.Missing. func (c *Cutter) Eval(ectx Context, in *zed.Value) *zed.Value { + rb, paths, verr := c.lookupBuilder(ectx, in) + if verr != nil { + return verr + } types := c.typeCache - b := c.builder - b.Reset() droppers := c.dropperCache[:0] for k, e := range c.fieldExprs { val := e.Eval(ectx, in) if val.IsQuiet() { // ignore this field - if c.droppers[k] == nil { - c.droppers[k] = NewDropper(c.zctx, c.fieldRefs[k:k+1]) + // This no worky because the path may be dynamic. + pathID := paths[k].String() + if c.droppers[pathID] == nil { + c.droppers[pathID] = NewDropper(c.zctx, field.List{paths[k]}) } - droppers = append(droppers, c.droppers[k]) - b.Append(val.Bytes()) + droppers = append(droppers, c.droppers[pathID]) + rb.Append(val.Bytes()) types[k] = zed.TypeNull continue } - b.Append(val.Bytes()) + rb.Append(val.Bytes()) types[k] = val.Type } - bytes, err := b.Encode() + // check paths + bytes, err := rb.Encode() if err != nil { panic(err) } - rec := ectx.NewValue(c.lookupTypeRecord(types), bytes) + typ, err := c.lookupTypeRecord(types, rb) + if err != nil { + return ectx.CopyValue(*c.zctx.NewErrorf("cut: %s", err)) + } + rec := ectx.NewValue(typ, bytes) for _, d := range droppers { rec = d.Eval(ectx, rec) } @@ -100,12 +95,37 @@ func (c *Cutter) Eval(ectx Context, in *zed.Value) *zed.Value { return rec } -func (c *Cutter) lookupTypeRecord(types []zed.Type) *zed.TypeRecord { +func (c *Cutter) lookupBuilder(ectx Context, in *zed.Value) (*zed.RecordBuilder, field.List, *zed.Value) { + paths := c.fieldRefs[:0] + for _, p := range c.lvals { + path, verr := p.Eval(ectx, in) + if verr != nil { + // XXX What to do with quiet? + return nil, nil, verr + } + if path.IsEmpty() { + return nil, nil, ectx.CopyValue(*c.zctx.WrapError("cut: 'this' not allowed (use record literal)", in)) + } + paths = append(paths, path) + } + builder, ok := c.builders[paths.String()] + if !ok { + var err error + if builder, err = zed.NewRecordBuilder(c.zctx, paths); err != nil { + return nil, nil, ectx.CopyValue(*c.zctx.NewErrorf("cut: %s", err)) + } + c.builders[paths.String()] = builder + } + builder.Reset() + return builder, paths, nil +} + +func (c *Cutter) lookupTypeRecord(types []zed.Type, builder *zed.RecordBuilder) (*zed.TypeRecord, error) { id := c.outTypes.Lookup(types) typ, ok := c.recordTypes[id] if !ok { - typ = c.builder.Type(types) + typ = builder.Type(types) c.recordTypes[id] = typ } - return typ + return typ, nil } diff --git a/runtime/expr/eval.go b/runtime/expr/eval.go index b62be5c94d..5751244482 100644 --- a/runtime/expr/eval.go +++ b/runtime/expr/eval.go @@ -807,19 +807,23 @@ func (c *Call) Eval(ectx Context, this *zed.Value) *zed.Value { } type Assignment struct { - LHS field.Path + LHS *Path RHS Evaluator } -func NewAssignments(zctx *zed.Context, dsts field.List, srcs field.List) (field.List, []Evaluator) { +func NewAssignments(zctx *zed.Context, dsts field.List, srcs field.List) ([]*Path, []Evaluator) { if len(srcs) != len(dsts) { panic("NewAssignments: argument mismatch") } var resolvers []Evaluator - var fields field.List + var paths []*Path for k, dst := range dsts { - fields = append(fields, dst) + elems := make([]PathElem, 0, len(dst)) + for _, d := range dst { + elems = append(elems, &StaticPathElem{Name: d}) + } + paths = append(paths, NewPath(elems)) resolvers = append(resolvers, NewDottedExpr(zctx, srcs[k])) } - return fields, resolvers + return paths, resolvers } diff --git a/runtime/expr/path.go b/runtime/expr/path.go new file mode 100644 index 0000000000..6b1f0f0af9 --- /dev/null +++ b/runtime/expr/path.go @@ -0,0 +1,66 @@ +package expr + +import ( + "github.com/brimdata/zed" + "github.com/brimdata/zed/pkg/field" +) + +type PathElem interface { + Eval(ectx Context, this *zed.Value) (string, *zed.Value) +} + +type Path struct { + elems []PathElem + cache field.Path +} + +func NewPath(evals []PathElem) *Path { + return &Path{elems: evals} +} + +// Eval returns the path of the lval. If there's an error the returned *zed.Value +// will not be nill. +func (l *Path) Eval(ectx Context, this *zed.Value) (field.Path, *zed.Value) { + l.cache = l.cache[:0] + for _, e := range l.elems { + name, val := e.Eval(ectx, this) + if val != nil { + return nil, val + } + l.cache = append(l.cache, name) + } + return l.cache, nil +} + +type StaticPathElem struct { + Name string +} + +func (l *StaticPathElem) Eval(_ Context, _ *zed.Value) (string, *zed.Value) { + return l.Name, nil +} + +type ExprPathElem struct { + caster Evaluator + eval Evaluator +} + +func NewPathElemExpr(zctx *zed.Context, e Evaluator) *ExprPathElem { + return &ExprPathElem{ + eval: e, + caster: LookupPrimitiveCaster(zctx, zed.TypeString), + } +} + +func (l *ExprPathElem) Eval(ectx Context, this *zed.Value) (string, *zed.Value) { + val := l.eval.Eval(ectx, this) + if val.IsError() { + return "", val + } + if !val.IsString() { + if val = l.caster.Eval(ectx, val); val.IsError() { + return "", val + } + } + return val.AsString(), nil +} diff --git a/runtime/expr/putter.go b/runtime/expr/putter.go index cc0e21dfa5..159b0e01e1 100644 --- a/runtime/expr/putter.go +++ b/runtime/expr/putter.go @@ -19,12 +19,12 @@ type Putter struct { zctx *zed.Context builder zcode.Builder clauses []Assignment - // valClauses is a slice to avoid re-allocating for every value - valClauses []Assignment + rules map[int]map[string]putRule + warned map[string]struct{} // vals is a slice to avoid re-allocating for every value - vals []zed.Value - rules map[int]putRule - warned map[string]struct{} + vals []zed.Value + // paths is a slice to avoid re-allocating for every path + paths []field.Path } // A putRule describes how a given record type is modified by describing @@ -39,43 +39,32 @@ type putRule struct { step putStep } -func NewPutter(zctx *zed.Context, clauses []Assignment) (*Putter, error) { - for i, p := range clauses { - if p.LHS.IsEmpty() { - return nil, fmt.Errorf("put: LHS cannot be 'this' (use 'yield' operator)") - } - for j, c := range clauses { - if i == j { - continue - } - if p.LHS.Equal(c.LHS) { - return nil, fmt.Errorf("put: multiple assignments to %s", p.LHS) - } - if c.LHS.HasStrictPrefix(p.LHS) { - return nil, fmt.Errorf("put: conflicting nested assignments to %s and %s", p.LHS, c.LHS) - } - } - } +func NewPutter(zctx *zed.Context, clauses []Assignment) *Putter { return &Putter{ zctx: zctx, clauses: clauses, vals: make([]zed.Value, len(clauses)), - rules: make(map[int]putRule), + rules: make(map[int]map[string]putRule), warned: make(map[string]struct{}), - }, nil + } } -func (p *Putter) eval(ectx Context, this *zed.Value) ([]zed.Value, []Assignment) { - p.valClauses = p.valClauses[:0] +func (p *Putter) eval(ectx Context, this *zed.Value) ([]zed.Value, []field.Path, *zed.Value) { p.vals = p.vals[:0] + p.paths = p.paths[:0] for _, cl := range p.clauses { val := *cl.RHS.Eval(ectx, this) - if !val.IsQuiet() { - p.vals = append(p.vals, val) - p.valClauses = append(p.valClauses, cl) + if val.IsQuiet() { + continue + } + p.vals = append(p.vals, val) + path, verr := cl.LHS.Eval(ectx, this) + if verr != nil { + return nil, nil, verr } + p.paths = append(p.paths, path) } - return p.vals, p.valClauses + return p.vals, p.paths, nil } // A putStep is a recursive data structure encoding a series of steps to be @@ -175,20 +164,20 @@ func (ig *getter) nth(n int) (zcode.Bytes, error) { return nil, fmt.Errorf("getter.nth: array index %d out of bounds", n) } -func findOverwriteClause(path field.Path, clauses []Assignment) (int, field.Path, bool) { - for i, cand := range clauses { - if path.Equal(cand.LHS) || cand.LHS.HasStrictPrefix(path) { - return i, cand.LHS, true +func findOverwriteClause(path field.Path, paths []field.Path) (int, field.Path, bool) { + for i, lpath := range paths { + if path.Equal(lpath) || lpath.HasStrictPrefix(path) { + return i, lpath, true } } return -1, nil, false } -func (p *Putter) deriveSteps(inType *zed.TypeRecord, vals []zed.Value, clauses []Assignment) (putStep, zed.Type) { - return p.deriveRecordSteps(field.Path{}, inType.Fields, vals, clauses) +func (p *Putter) deriveSteps(inType *zed.TypeRecord, vals []zed.Value, paths []field.Path) (putStep, zed.Type) { + return p.deriveRecordSteps(field.Path{}, inType.Fields, vals, paths) } -func (p *Putter) deriveRecordSteps(parentPath field.Path, inFields []zed.Field, vals []zed.Value, clauses []Assignment) (putStep, *zed.TypeRecord) { +func (p *Putter) deriveRecordSteps(parentPath field.Path, inFields []zed.Field, vals []zed.Value, paths []field.Path) (putStep, *zed.TypeRecord) { s := putStep{op: putRecord} var fields []zed.Field @@ -197,7 +186,7 @@ func (p *Putter) deriveRecordSteps(parentPath field.Path, inFields []zed.Field, // assignments. for i, f := range inFields { path := append(parentPath, f.Name) - matchIndex, matchPath, found := findOverwriteClause(path, clauses) + matchIndex, matchPath, found := findOverwriteClause(path, paths) switch { // input not overwritten by assignment: copy input value. case !found: @@ -217,13 +206,13 @@ func (p *Putter) deriveRecordSteps(parentPath field.Path, inFields []zed.Field, fields = append(fields, zed.NewField(f.Name, vals[matchIndex].Type)) // input record field overwritten by nested assignment: recurse. case len(path) < len(matchPath) && zed.IsRecordType(f.Type): - nestedStep, typ := p.deriveRecordSteps(path, zed.TypeRecordOf(f.Type).Fields, vals, clauses) + nestedStep, typ := p.deriveRecordSteps(path, zed.TypeRecordOf(f.Type).Fields, vals, paths) nestedStep.index = i s.append(nestedStep) fields = append(fields, zed.NewField(f.Name, typ)) // input non-record field overwritten by nested assignment(s): recurse. case len(path) < len(matchPath) && !zed.IsRecordType(f.Type): - nestedStep, typ := p.deriveRecordSteps(path, []zed.Field{}, vals, clauses) + nestedStep, typ := p.deriveRecordSteps(path, []zed.Field{}, vals, paths) nestedStep.index = i s.append(nestedStep) fields = append(fields, zed.NewField(f.Name, typ)) @@ -232,30 +221,30 @@ func (p *Putter) deriveRecordSteps(parentPath field.Path, inFields []zed.Field, } } - appendClause := func(cl Assignment) bool { - if !cl.LHS.HasPrefix(parentPath) { + appendClause := func(lpath field.Path) bool { + if !lpath.HasPrefix(parentPath) { return false } - return !hasField(cl.LHS[len(parentPath)], fields) + return !hasField(lpath[len(parentPath)], fields) } // Then, look at put assignments to see if there are any new fields to append. - for i, cl := range clauses { - if appendClause(cl) { + for i, lpath := range paths { + if appendClause(lpath) { switch { // Append value at this level - case len(cl.LHS) == len(parentPath)+1: + case len(lpath) == len(parentPath)+1: s.append(putStep{ op: putFromClause, container: zed.IsContainerType(vals[i].Type), index: i, }) - fields = append(fields, zed.NewField(cl.LHS[len(parentPath)], vals[i].Type)) + fields = append(fields, zed.NewField(lpath[len(parentPath)], vals[i].Type)) // Appended and nest. For example, this would happen with "put b.c=1" applied to a record {"a": 1}. - case len(cl.LHS) > len(parentPath)+1: - path := append(parentPath, cl.LHS[len(parentPath)]) - nestedStep, typ := p.deriveRecordSteps(path, []zed.Field{}, vals, clauses) + case len(lpath) > len(parentPath)+1: + path := append(parentPath, lpath[len(parentPath)]) + nestedStep, typ := p.deriveRecordSteps(path, []zed.Field{}, vals, paths) nestedStep.index = -1 - fields = append(fields, zed.NewField(cl.LHS[len(parentPath)], typ)) + fields = append(fields, zed.NewField(lpath[len(parentPath)], typ)) s.append(nestedStep) } } @@ -273,19 +262,48 @@ func hasField(name string, fields []zed.Field) bool { }) } -func (p *Putter) lookupRule(inType *zed.TypeRecord, vals []zed.Value, clauses []Assignment) putRule { - rule, ok := p.rules[inType.ID()] +func (p *Putter) lookupRule(inType *zed.TypeRecord, vals []zed.Value, fields field.List) (putRule, error) { + m, ok := p.rules[inType.ID()] + if !ok { + m = make(map[string]putRule) + p.rules[inType.ID()] = m + } + rule, ok := m[fields.String()] if ok && sameTypes(rule.clauseTypes, vals) { - return rule + return rule, nil + } + // first check fields + if err := checkFields(fields); err != nil { + return putRule{}, err } - step, typ := p.deriveSteps(inType, vals, clauses) + step, typ := p.deriveSteps(inType, vals, fields) var clauseTypes []zed.Type for _, val := range vals { clauseTypes = append(clauseTypes, val.Type) } rule = putRule{typ, clauseTypes, step} - p.rules[inType.ID()] = rule - return rule + p.rules[inType.ID()][fields.String()] = rule + return rule, nil +} + +func checkFields(fields field.List) error { + for i, f := range fields { + if f.IsEmpty() { + return fmt.Errorf("put: LHS cannot be 'this' (use 'yield' operator)") + } + for j, c := range fields { + if i == j { + continue + } + if f.Equal(c) { + return fmt.Errorf("put: multiple assignments to %s", f) + } + if c.HasStrictPrefix(f) { + return fmt.Errorf("put: conflicting nested assignments to %s and %s", f, c) + } + } + } + return nil } func sameTypes(types []zed.Type, vals []zed.Value) bool { @@ -303,11 +321,17 @@ func (p *Putter) Eval(ectx Context, this *zed.Value) *zed.Value { } return ectx.CopyValue(*p.zctx.WrapError("put: not a record", this)) } - vals, clauses := p.eval(ectx, this) + vals, paths, verr := p.eval(ectx, this) + if verr != nil { + return verr + } if len(vals) == 0 { return this } - rule := p.lookupRule(recType, vals, clauses) + rule, err := p.lookupRule(recType, vals, paths) + if err != nil { + return ectx.CopyValue(*p.zctx.WrapError(err.Error(), this)) + } bytes := rule.step.build(this.Bytes(), &p.builder, vals) return ectx.NewValue(rule.typ, bytes) } diff --git a/runtime/expr/ztests/cut-dup-fields.yaml b/runtime/expr/ztests/cut-dup-fields.yaml index c476641af2..0a1d724c0e 100644 --- a/runtime/expr/ztests/cut-dup-fields.yaml +++ b/runtime/expr/ztests/cut-dup-fields.yaml @@ -3,6 +3,7 @@ script: | ! zq -z "cut rec.sub1,rec.sub1" in.zson ! zq -z "cut rec.sub,rec.sub.sub" in.zson ! zq -z "cut rec.sub.sub,rec.sub" in.zson + # XXX Add runtime evaluatable cut (i.e., dynamic path). inputs: - name: in.zson @@ -12,7 +13,7 @@ inputs: outputs: - name: stderr data: | - duplicate field: "rec" - duplicate field: "rec.sub1" - duplicate field: "rec.sub.sub" - duplicate field: "rec.sub" + cut: duplicate field: "rec" + cut: duplicate field: "rec.sub1" + cut: duplicate field: "rec.sub.sub" + cut: duplicate field: "rec.sub" diff --git a/runtime/expr/ztests/cut-not-adjacent.yaml b/runtime/expr/ztests/cut-not-adjacent.yaml index ac4bcd713b..de13172053 100644 --- a/runtime/expr/ztests/cut-not-adjacent.yaml +++ b/runtime/expr/ztests/cut-not-adjacent.yaml @@ -12,7 +12,7 @@ inputs: outputs: - name: stderr data: | - fields in record rec must be adjacent - fields in record rec1 must be adjacent - fields in record rec1 must be adjacent - fields in record t.rec must be adjacent + cut: fields in record rec must be adjacent + cut: fields in record rec1 must be adjacent + cut: fields in record rec1 must be adjacent + cut: fields in record t.rec must be adjacent diff --git a/runtime/expr/ztests/rename-error-move.yaml b/runtime/expr/ztests/rename-error-move.yaml index 136fa0cc39..9fc181586e 100644 --- a/runtime/expr/ztests/rename-error-move.yaml +++ b/runtime/expr/ztests/rename-error-move.yaml @@ -3,4 +3,4 @@ zed: rename dst:=id.resp_h input: | {id:{orig_h:10.164.94.120,orig_p:39681(port=uint16),resp_h:10.47.3.155,resp_p:3389(port)}} -errorRE: "cannot rename id.resp_h to dst" +errorRE: "rename: cannot rename id.resp_h to dst" diff --git a/runtime/op/groupby/groupby.go b/runtime/op/groupby/groupby.go index 1b3a5d8895..090e1e3e6e 100644 --- a/runtime/op/groupby/groupby.go +++ b/runtime/op/groupby/groupby.go @@ -109,10 +109,10 @@ func NewAggregator(ctx context.Context, zctx *zed.Context, keyRefs, keyExprs, ag }, nil } -func New(octx *op.Context, parent zbuf.Puller, keys []expr.Assignment, aggNames field.List, aggs []*expr.Aggregator, limit int, inputSortDir order.Direction, partialsIn, partialsOut bool) (*Op, error) { - names := make(field.List, 0, len(keys)+len(aggNames)) - for _, e := range keys { - names = append(names, e.LHS) +func New(octx *op.Context, parent zbuf.Puller, keyPaths []field.Path, keyVals []expr.Evaluator, aggNames field.List, aggs []*expr.Aggregator, limit int, inputSortDir order.Direction, partialsIn, partialsOut bool) (*Op, error) { + names := make(field.List, 0, len(keyPaths)+len(aggNames)) + for _, p := range keyPaths { + names = append(names, p) } names = append(names, aggNames...) builder, err := zed.NewRecordBuilder(octx.Zctx, names) @@ -123,11 +123,11 @@ func New(octx *op.Context, parent zbuf.Puller, keys []expr.Assignment, aggNames for _, fieldName := range aggNames { valRefs = append(valRefs, expr.NewDottedExpr(octx.Zctx, fieldName)) } - keyRefs := make([]expr.Evaluator, 0, len(keys)) - keyExprs := make([]expr.Evaluator, 0, len(keys)) - for _, e := range keys { - keyRefs = append(keyRefs, expr.NewDottedExpr(octx.Zctx, e.LHS)) - keyExprs = append(keyExprs, e.RHS) + keyRefs := make([]expr.Evaluator, 0, len(keyPaths)) + keyExprs := make([]expr.Evaluator, 0, len(keyPaths)) + for i, p := range keyPaths { + keyRefs = append(keyRefs, expr.NewDottedExpr(octx.Zctx, p)) + keyExprs = append(keyExprs, keyVals[i]) } agg, err := NewAggregator(octx.Context, octx.Zctx, keyRefs, keyExprs, valRefs, aggs, builder, limit, inputSortDir, partialsIn, partialsOut) if err != nil { diff --git a/runtime/op/join/join.go b/runtime/op/join/join.go index d4238a93b4..d641f20541 100644 --- a/runtime/op/join/join.go +++ b/runtime/op/join/join.go @@ -7,7 +7,6 @@ import ( "github.com/brimdata/zed" "github.com/brimdata/zed/order" - "github.com/brimdata/zed/pkg/field" "github.com/brimdata/zed/runtime/expr" "github.com/brimdata/zed/runtime/op" "github.com/brimdata/zed/runtime/op/sort" @@ -35,7 +34,7 @@ type Op struct { } func New(octx *op.Context, anti, inner bool, left, right zbuf.Puller, leftKey, rightKey expr.Evaluator, - leftDir, rightDir order.Direction, lhs field.List, + leftDir, rightDir order.Direction, lhs []*expr.Path, rhs []expr.Evaluator) (*Op, error) { cutter, err := expr.NewCutter(octx.Zctx, lhs, rhs) if err != nil { diff --git a/zfmt/dag.go b/zfmt/dag.go index 713dc6b5d5..c5b7296d32 100644 --- a/zfmt/dag.go +++ b/zfmt/dag.go @@ -121,6 +121,24 @@ func (c *canonDAG) expr(e dag.Expr, parent string) { c.write("search(%s)", e.Value) case *dag.This: c.fieldpath(e.Path) + case *dag.Path: + if p := e.StaticPath(); p != nil { + c.fieldpath(p.Path) + return + } + for k, elem := range e.Path { + if k == 0 { + c.write("this") + } + c.write("[") + switch elem := elem.(type) { + case *dag.This: + c.fieldpath(elem.Path) + case *dag.StaticPathElem: + c.write(zson.QuotedName(elem.Name)) + } + c.write("]") + } case *dag.Var: c.write("%s", e.Name) case *dag.Literal: @@ -422,7 +440,13 @@ func (c *canonDAG) op(p dag.Op) { case *dag.Rename: c.next() c.write("rename ") - c.assignments(p.Args) + for k := range p.Dsts { + c.fieldpath(p.Dsts[k].Path) + c.write(":=") + c.fieldpath(p.Srcs[k].Path) + } + // XXX + // c.assignments(p.Args) case *dag.Fuse: c.next() c.write("fuse")