From fe5477abb815abb662951dc38bb3294fe3a8e590 Mon Sep 17 00:00:00 2001 From: Matthew Nibecker Date: Wed, 8 May 2024 10:20:30 -0700 Subject: [PATCH] Add positional information to semantic errors This commit changes errors produced from semantic analysis so they include a reference to the AST node which produced the error. The allows for highlighting the specific portion of the source that caused a semantic error. This commit also changes how semantic errors are collected in that the semantic analyzer no longer exits as soon as the first error is found and instead goes on the analyze the rest of the AST, reporting all semantic errors in the AST. --- compiler/ast/ast.go | 57 +- compiler/ast/dag/expr.go | 7 + compiler/ast/dag/op.go | 6 + compiler/ast/dag/unpack.go | 2 + compiler/parser/errors.go | 5 +- compiler/semantic/analyzer.go | 38 +- compiler/semantic/errors.go | 18 + compiler/semantic/expr.go | 441 +++++------ compiler/semantic/op.go | 695 ++++++++---------- compiler/ztests/badshaper.yaml | 4 +- compiler/ztests/const-source.yaml | 12 +- compiler/ztests/from-error.yaml | 20 +- compiler/ztests/head.yaml | 12 +- compiler/ztests/summarize-lhs-error.yaml | 12 +- compiler/ztests/tail.yaml | 12 +- compiler/ztests/udf-err.yaml | 12 +- compiler/ztests/where-on-func.yaml | 2 +- docs/language/operators/rename.md | 4 +- docs/language/statements.md | 4 +- docs/tutorials/schools.md | 4 +- lake/ztests/dirs.yaml | 4 +- lake/ztests/drop.yaml | 4 +- lake/ztests/query-error.yaml | 12 +- runtime/sam/expr/ztests/cut-dup-fields.yaml | 16 +- runtime/sam/expr/ztests/cut-not-adjacent.yaml | 16 +- .../sam/expr/ztests/rename-error-move.yaml | 2 +- runtime/sam/op/ztests/cut-dynamic-field.yaml | 4 +- runtime/sam/op/ztests/put-dynamic-field.yaml | 4 +- runtime/sam/op/ztests/user-errors.yaml | 10 +- service/ztests/curl-query-error.yaml | 12 +- service/ztests/python.yaml | 2 +- service/ztests/query-bad-commit.yaml | 4 +- service/ztests/query-error.yaml | 15 +- 33 files changed, 702 insertions(+), 770 deletions(-) create mode 100644 compiler/semantic/errors.go diff --git a/compiler/ast/ast.go b/compiler/ast/ast.go index 8b24aa1730..4806fe437e 100644 --- a/compiler/ast/ast.go +++ b/compiler/ast/ast.go @@ -110,7 +110,7 @@ func (c *Call) End() int { if c.Where != nil { return c.Where.End() } - return c.Rparen + return c.Rparen + 1 } type Cast struct { @@ -180,7 +180,7 @@ type Regexp struct { } func (r *Regexp) Pos() int { return r.PatternPos } -func (r *Regexp) End() int { return r.PatternPos + len(r.Pattern) } +func (r *Regexp) End() int { return r.PatternPos + len(r.Pattern) + 2 } type String struct { Kind string `json:"kind" unpack:""` @@ -432,9 +432,9 @@ type ( NullsFirst bool `json:"nullsfirst"` } Cut struct { - Kind string `json:"kind" unpack:""` - KeywordPos int `json:"keyword_pos"` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + KeywordPos int `json:"keyword_pos"` + Args Assignments `json:"args"` } Drop struct { Kind string `json:"kind" unpack:""` @@ -471,10 +471,10 @@ type ( Kind string `json:"kind" unpack:""` // StartPos is not called KeywordPos for Summarize since the "summarize" // keyword is optional. - StartPos int `json:"start_pos"` - Limit int `json:"limit"` - Keys []Assignment `json:"keys"` - Aggs []Assignment `json:"aggs"` + StartPos int `json:"start_pos"` + Limit int `json:"limit"` + Keys Assignments `json:"keys"` + Aggs Assignments `json:"aggs"` } Top struct { Kind string `json:"kind" unpack:""` @@ -484,9 +484,9 @@ type ( Flush bool `json:"flush"` } Put struct { - Kind string `json:"kind" unpack:""` - KeywordPos int `json:"keyword_pos"` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + KeywordPos int `json:"keyword_pos"` + Args Assignments `json:"args"` } Merge struct { Kind string `json:"kind" unpack:""` @@ -520,8 +520,8 @@ type ( // is unknown: It could be a Summarize or Put operator. This will be // determined in the semantic phase. OpAssignment struct { - Kind string `json:"kind" unpack:""` - Assignments []Assignment `json:"assignments"` + Kind string `json:"kind" unpack:""` + Assignments Assignments `json:"assignments"` } // An OpExpr operator is an expression that appears as an operator // and requires semantic analysis to determine if it is a filter, a yield, @@ -531,22 +531,22 @@ type ( Expr Expr `json:"expr"` } Rename struct { - Kind string `json:"kind" unpack:""` - KeywordPos int `json:"keyword_pos"` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + KeywordPos int `json:"keyword_pos"` + Args Assignments `json:"args"` } Fuse struct { Kind string `json:"kind" unpack:""` KeywordPos int `json:"keyword_pos"` } Join struct { - Kind string `json:"kind" unpack:""` - KeywordPos int `json:"keyword_pos"` - Style string `json:"style"` - RightInput Seq `json:"right_input"` - LeftKey Expr `json:"left_key"` - RightKey Expr `json:"right_key"` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + KeywordPos int `json:"keyword_pos"` + Style string `json:"style"` + RightInput Seq `json:"right_input"` + LeftKey Expr `json:"left_key"` + RightKey Expr `json:"right_key"` + Args Assignments `json:"args"` } Sample struct { Kind string `json:"kind" unpack:""` @@ -671,14 +671,19 @@ type Assignment struct { RHS Expr `json:"rhs"` } -func (a *Assignment) Pos() int { +func (a Assignment) Pos() int { if a.LHS != nil { return a.LHS.Pos() } return a.RHS.Pos() } -func (a *Assignment) End() int { return a.RHS.Pos() } +func (a Assignment) End() int { return a.RHS.End() } + +type Assignments []Assignment + +func (a Assignments) Pos() int { return a[0].Pos() } +func (a Assignments) End() int { return a[len(a)-1].End() } // Def is like Assignment but the LHS is an identifier that may be later // referenced. This is used for const blocks in Sequential and var blocks diff --git a/compiler/ast/dag/expr.go b/compiler/ast/dag/expr.go index e36de6c1e0..748cd4ec9d 100644 --- a/compiler/ast/dag/expr.go +++ b/compiler/ast/dag/expr.go @@ -30,6 +30,12 @@ type ( LHS Expr `json:"lhs"` RHS Expr `json:"rhs"` } + // A BadExpr node is a placeholder for an expression containing semantic + // errors. + BadExpr struct { + Kind string `json:"kind" unpack:""` + } + BinaryExpr struct { Kind string `json:"kind" unpack:""` Op string `json:"op"` @@ -131,6 +137,7 @@ type ( func (*Agg) ExprDAG() {} func (*ArrayExpr) ExprDAG() {} func (*Assignment) ExprDAG() {} +func (*BadExpr) ExprDAG() {} func (*BinaryExpr) ExprDAG() {} func (*Call) ExprDAG() {} func (*Conditional) ExprDAG() {} diff --git a/compiler/ast/dag/op.go b/compiler/ast/dag/op.go index 93cce1c08f..a1d2282ab1 100644 --- a/compiler/ast/dag/op.go +++ b/compiler/ast/dag/op.go @@ -26,6 +26,11 @@ type Seq []Op // Ops type ( + // A BadOp node is a placeholder for an expression containing semantic + // errors. + BadOp struct { + Kind string `json:"kind" unpack:""` + } Combine struct { Kind string `json:"kind" unpack:""` } @@ -281,6 +286,7 @@ type ( } ) +func (*BadOp) OpNode() {} func (*Fork) OpNode() {} func (*Scatter) OpNode() {} func (*Switch) OpNode() {} diff --git a/compiler/ast/dag/unpack.go b/compiler/ast/dag/unpack.go index 89d70b2f4e..f7bed32a64 100644 --- a/compiler/ast/dag/unpack.go +++ b/compiler/ast/dag/unpack.go @@ -10,6 +10,8 @@ var unpacker = unpack.New( Agg{}, ArrayExpr{}, Assignment{}, + BadOp{}, + BadExpr{}, BinaryExpr{}, Call{}, Combine{}, diff --git a/compiler/parser/errors.go b/compiler/parser/errors.go index db01a532d8..b2f5320385 100644 --- a/compiler/parser/errors.go +++ b/compiler/parser/errors.go @@ -79,6 +79,9 @@ func (e *LocalizedError) Error() string { } func (e *LocalizedError) errorContext() string { + if !e.Open.IsValid() { + return "" + } var b strings.Builder b.WriteString(e.Line + "\n") if e.Close.IsValid() { @@ -102,7 +105,7 @@ func (e *LocalizedError) spanError(b *strings.Builder) { b.WriteString(strings.Repeat(" ", col)) end := len(e.Line) - col if e.Open.Line == e.Close.Line { - end = e.Close.Column - col + end = e.Close.Column - 1 - col } b.WriteString(strings.Repeat("~", end)) } diff --git a/compiler/semantic/analyzer.go b/compiler/semantic/analyzer.go index 824211686d..2eaa3bba6e 100644 --- a/compiler/semantic/analyzer.go +++ b/compiler/semantic/analyzer.go @@ -8,6 +8,7 @@ import ( "github.com/brimdata/zed/compiler/ast/dag" "github.com/brimdata/zed/compiler/data" "github.com/brimdata/zed/lakeparse" + "go.uber.org/multierr" ) // Analyze performs a semantic analysis of the AST, translating it from AST @@ -15,9 +16,9 @@ import ( // After semantic analysis, the DAG is ready for either optimization or compilation. func Analyze(ctx context.Context, seq ast.Seq, source *data.Source, head *lakeparse.Commitish) (dag.Seq, error) { a := newAnalyzer(ctx, source, head) - s, err := a.semSeq(seq) - if err != nil { - return nil, err + s := a.semSeq(seq) + if a.errors != nil { + return nil, a.errors } return s, nil } @@ -26,12 +27,12 @@ func Analyze(ctx context.Context, seq ast.Seq, source *data.Source, head *lakepa // DAG does not have one. func AnalyzeAddSource(ctx context.Context, seq ast.Seq, source *data.Source, head *lakeparse.Commitish) (dag.Seq, error) { a := newAnalyzer(ctx, source, head) - s, err := a.semSeq(seq) - if err != nil { - return nil, err + s := a.semSeq(seq) + if a.errors != nil { + return nil, a.errors } if !HasSource(s) { - if err = a.addDefaultSource(&s); err != nil { + if err := a.addDefaultSource(&s); err != nil { return nil, err } } @@ -40,6 +41,7 @@ func AnalyzeAddSource(ctx context.Context, seq ast.Seq, source *data.Source, hea type analyzer struct { ctx context.Context + errors error head *lakeparse.Commitish opStack []*ast.OpDecl source *data.Source @@ -78,6 +80,10 @@ func (a *analyzer) addDefaultSource(seq *dag.Seq) error { seq.Prepend(&dag.DefaultScan{Kind: "DefaultScan"}) return nil } + // Verify pool exists for HEAD + if _, err := a.source.PoolID(a.ctx, a.head.Pool); err != nil { + return err + } pool := &ast.Pool{ Kind: "Pool", Spec: ast.PoolSpec{ @@ -87,10 +93,7 @@ func (a *analyzer) addDefaultSource(seq *dag.Seq) error { }, }, } - ops, err := a.semPool(pool) - if err != nil { - return err - } + ops := a.semPool(pool) seq.Prepend(ops[0]) return nil } @@ -116,6 +119,7 @@ func (a *analyzer) exitScope() { type opDecl struct { ast *ast.OpDecl scope *Scope // parent scope of op declaration. + bad bool } type opCycleError []*ast.OpDecl @@ -130,3 +134,15 @@ func (e opCycleError) Error() string { } return b } + +func badExpr() dag.Expr { + return &dag.BadExpr{Kind: "BadExpr"} +} + +func badOp() dag.Op { + return &dag.BadOp{Kind: "BadOp"} +} + +func (a *analyzer) error(n ast.Node, err error) { + a.errors = multierr.Append(a.errors, &Error{n, err}) +} diff --git a/compiler/semantic/errors.go b/compiler/semantic/errors.go new file mode 100644 index 0000000000..9e429c26c5 --- /dev/null +++ b/compiler/semantic/errors.go @@ -0,0 +1,18 @@ +package semantic + +import ( + "fmt" + + "github.com/brimdata/zed/compiler/ast" + "github.com/brimdata/zed/compiler/parser" +) + +type Error struct { + ast.Node + inner error +} + +var _ parser.PositionalError = (*Error)(nil) + +func (e *Error) Error() string { return fmt.Sprintf("%d: %s", e.Pos(), e.inner.Error()) } +func (e *Error) Message() string { return e.inner.Error() } diff --git a/compiler/semantic/expr.go b/compiler/semantic/expr.go index 1e56fa0aa3..8915acadc9 100644 --- a/compiler/semantic/expr.go +++ b/compiler/semantic/expr.go @@ -16,33 +16,34 @@ import ( "github.com/brimdata/zed/zson" ) -func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { +func (a *analyzer) semExpr(e ast.Expr) dag.Expr { switch e := e.(type) { case nil: - return nil, errors.New("semantic analysis: illegal null value encountered in AST") + panic("semantic analysis: illegal null value encountered in AST") case *ast.Regexp: return &dag.RegexpSearch{ Kind: "RegexpSearch", Pattern: e.Pattern, Expr: pathOf("this"), - }, nil + } case *ast.Glob: return &dag.RegexpSearch{ Kind: "RegexpSearch", Pattern: reglob.Reglob(e.Pattern), Expr: pathOf("this"), - }, nil + } case *ast.Grep: return a.semGrep(e) case *astzed.Primitive: val, err := zson.ParsePrimitive(e.Type, e.Text) if err != nil { - return nil, err + a.error(e, err) + return badExpr() } return &dag.Literal{ Kind: "Literal", Value: zson.FormatValue(val), - }, nil + } case *ast.ID: return a.semID(e) case *ast.Term: @@ -51,109 +52,78 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { case *astzed.Primitive: v, err := zson.ParsePrimitive(t.Type, t.Text) if err != nil { - return nil, err + a.error(e, err) + return badExpr() } val = zson.FormatValue(v) case *astzed.TypeValue: tv, err := a.semType(t.Value) if err != nil { - return nil, err + a.error(e, err) + return badExpr() } val = "<" + tv + ">" default: - return nil, fmt.Errorf("unexpected term value: %s", e.Kind) + a.error(e, fmt.Errorf("unexpected term value: %s", e.Kind)) + return badExpr() } return &dag.Search{ Kind: "Search", Text: e.Text, Value: val, Expr: pathOf("this"), - }, nil - case *ast.UnaryExpr: - expr, err := a.semExpr(e.Operand) - if err != nil { - return nil, err } + case *ast.UnaryExpr: return &dag.UnaryExpr{ Kind: "UnaryExpr", Op: e.Op, - Operand: expr, - }, nil + Operand: a.semExpr(e.Operand), + } case *ast.BinaryExpr: return a.semBinary(e) case *ast.Conditional: - cond, err := a.semExpr(e.Cond) - if err != nil { - return nil, err - } - thenExpr, err := a.semExpr(e.Then) - if err != nil { - return nil, err - } - elseExpr, err := a.semExpr(e.Else) - if err != nil { - return nil, err - } + cond := a.semExpr(e.Cond) + thenExpr := a.semExpr(e.Then) + elseExpr := a.semExpr(e.Else) return &dag.Conditional{ Kind: "Conditional", Cond: cond, Then: thenExpr, Else: elseExpr, - }, nil + } case *ast.Call: return a.semCall(e) case *ast.Cast: - expr, err := a.semExpr(e.Expr) - if err != nil { - return nil, err - } - typ, err := a.semExpr(e.Type) - if err != nil { - return nil, err - } + expr := a.semExpr(e.Expr) + typ := a.semExpr(e.Type) return &dag.Call{ Kind: "Call", Name: "cast", Args: []dag.Expr{expr, typ}, - }, nil - case *ast.IndexExpr: - expr, err := a.semExpr(e.Expr) - if err != nil { - return nil, err - } - index, err := a.semExpr(e.Index) - if err != nil { - return nil, err } + case *ast.IndexExpr: + expr := a.semExpr(e.Expr) + index := a.semExpr(e.Index) // If expr is a path and index is a string, then just extend the path. if path := a.isIndexOfThis(expr, index); path != nil { - return path, nil + return path } return &dag.IndexExpr{ Kind: "IndexExpr", Expr: expr, Index: index, - }, nil - case *ast.SliceExpr: - expr, err := a.semExpr(e.Expr) - if err != nil { - return nil, err } + case *ast.SliceExpr: + expr := a.semExpr(e.Expr) // XXX Literal indices should be type checked as int. - from, err := a.semExprNullable(e.From) - if err != nil { - return nil, err - } - to, err := a.semExprNullable(e.To) - if err != nil { - return nil, err - } + from := a.semExprNullable(e.From) + to := a.semExprNullable(e.To) return &dag.SliceExpr{ Kind: "SliceExpr", Expr: expr, From: from, To: to, - }, nil + } case *astzed.TypeValue: typ, err := a.semType(e.Value) if err != nil { @@ -167,32 +137,28 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { // complaining about the type not existing. // XXX See issue #3413 if e := semDynamicType(e.Value); e != nil { - return e, nil + return e } - return nil, err + a.error(e, err) + return badExpr() } return &dag.Literal{ Kind: "Literal", Value: "<" + typ + ">", - }, nil - case *ast.Agg: - expr, err := a.semExprNullable(e.Expr) - if err != nil { - return nil, err } + case *ast.Agg: + expr := a.semExprNullable(e.Expr) if expr == nil && e.Name != "count" { - return nil, fmt.Errorf("aggregator '%s' requires argument", e.Name) - } - where, err := a.semExprNullable(e.Where) - if err != nil { - return nil, err + a.error(e, fmt.Errorf("aggregator '%s' requires argument", e.Name)) + return badExpr() } + where := a.semExprNullable(e.Where) return &dag.Agg{ Kind: "Agg", Name: e.Name, Expr: expr, Where: where, - }, nil + } case *ast.RecordExpr: fields := map[string]struct{}{} var out []dag.RecordElem @@ -200,13 +166,12 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { switch elem := elem.(type) { case *ast.Field: if _, ok := fields[elem.Name]; ok { - return nil, fmt.Errorf("record expression: %w", &zed.DuplicateFieldError{Name: elem.Name}) + a.error(elem, fmt.Errorf("record expression: %w", &zed.DuplicateFieldError{Name: elem.Name})) + // XXX Add BadRecordElem? + continue } fields[elem.Name] = struct{}{} - e, err := a.semExpr(elem.Value) - if err != nil { - return nil, err - } + e := a.semExpr(elem.Value) out = append(out, &dag.Field{ Kind: "Field", Name: elem.Name, @@ -214,23 +179,19 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { }) case *ast.ID: if _, ok := fields[elem.Name]; ok { - return nil, fmt.Errorf("record expression: %w", &zed.DuplicateFieldError{Name: elem.Name}) + a.error(elem, fmt.Errorf("record expression: %w", &zed.DuplicateFieldError{Name: elem.Name})) + // XXX Add BadRecordElem? + continue } fields[elem.Name] = struct{}{} - v, err := a.semID(elem) - if err != nil { - return nil, err - } + v := a.semID(elem) out = append(out, &dag.Field{ Kind: "Field", Name: elem.Name, Value: v, }) case *ast.Spread: - e, err := a.semExpr(elem.Expr) - if err != nil { - return nil, err - } + e := a.semExpr(elem.Expr) out = append(out, &dag.Spread{ Kind: "Spread", Expr: e, @@ -240,71 +201,50 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { return &dag.RecordExpr{ Kind: "RecordExpr", Elems: out, - }, nil - case *ast.ArrayExpr: - elems, err := a.semVectorElems(e.Elems) - if err != nil { - return nil, err } + case *ast.ArrayExpr: + elems := a.semVectorElems(e.Elems) return &dag.ArrayExpr{ Kind: "ArrayExpr", Elems: elems, - }, nil - case *ast.SetExpr: - elems, err := a.semVectorElems(e.Elems) - if err != nil { - return nil, err } + case *ast.SetExpr: + elems := a.semVectorElems(e.Elems) return &dag.SetExpr{ Kind: "SetExpr", Elems: elems, - }, nil + } case *ast.MapExpr: var entries []dag.Entry for _, entry := range e.Entries { - key, err := a.semExpr(entry.Key) - if err != nil { - return nil, err - } - val, err := a.semExpr(entry.Value) - if err != nil { - return nil, err - } + key := a.semExpr(entry.Key) + val := a.semExpr(entry.Value) entries = append(entries, dag.Entry{Key: key, Value: val}) } return &dag.MapExpr{ Kind: "MapExpr", Entries: entries, - }, nil - case *ast.OverExpr: - exprs, err := a.semExprs(e.Exprs) - if err != nil { - return nil, err } + case *ast.OverExpr: + exprs := a.semExprs(e.Exprs) if e.Body == nil { - return nil, errors.New("over expression missing lateral scope") + a.error(e, errors.New("over expression missing lateral scope")) + return badExpr() } a.enterScope() defer a.exitScope() - locals, err := a.semVars(e.Locals) - if err != nil { - return nil, err - } - body, err := a.semSeq(e.Body) - if err != nil { - return nil, err - } return &dag.OverExpr{ Kind: "OverExpr", - Defs: locals, + Defs: a.semVars(e.Locals), Exprs: exprs, - Body: body, - }, nil + Body: a.semSeq(e.Body), + } } - return nil, fmt.Errorf("invalid expression type %T", e) + a.error(e, errors.New("invalid expression type")) + return badExpr() } -func (a *analyzer) semID(id *ast.ID) (dag.Expr, error) { +func (a *analyzer) semID(id *ast.ID) dag.Expr { // We use static scoping here to see if an identifier is // a "var" reference to the name or a field access // and transform the AST node appropriately. The resulting DAG @@ -312,12 +252,13 @@ func (a *analyzer) semID(id *ast.ID) (dag.Expr, error) { // one way or the other. ref, err := a.scope.LookupExpr(id.Name) if err != nil { - return nil, err + a.error(id, err) + return badExpr() } if ref != nil { - return ref, nil + return ref } - return pathOf(id.Name), nil + return pathOf(id.Name) } func semDynamicType(tv astzed.Type) *dag.Call { @@ -341,21 +282,15 @@ func dynamicTypeName(name string) *dag.Call { } } -func (a *analyzer) semGrep(grep *ast.Grep) (dag.Expr, error) { +func (a *analyzer) semGrep(grep *ast.Grep) dag.Expr { e := dag.Expr(&dag.This{Kind: "This"}) if grep.Expr != nil { - var err error - if e, err = a.semExpr(grep.Expr); err != nil { - return nil, err - } - } - p, err := a.semExpr(grep.Pattern) - if err != nil { - return nil, err + e = a.semExpr(grep.Expr) } + p := a.semExpr(grep.Pattern) if s, ok := p.(*dag.RegexpSearch); ok { s.Expr = e - return s, nil + return s } if s, ok := isStringConst(a.zctx, p); ok { return &dag.Search{ @@ -363,75 +298,66 @@ func (a *analyzer) semGrep(grep *ast.Grep) (dag.Expr, error) { Text: s, Value: zson.QuotedString([]byte(s)), Expr: e, - }, nil + } } return &dag.Call{ Kind: "Call", Name: "grep", Args: []dag.Expr{p, e}, - }, nil + } } -func (a *analyzer) semRegexp(b *ast.BinaryExpr) (dag.Expr, error) { +func (a *analyzer) semRegexp(b *ast.BinaryExpr) dag.Expr { if b.Op != "~" { - return nil, nil + return nil } re, ok := b.RHS.(*ast.Regexp) if !ok { - return nil, errors.New(`right-hand side of ~ expression must be a regular expression`) + a.error(b, errors.New(`right-hand side of ~ expression must be a regular expression`)) + return badExpr() } if _, err := expr.CompileRegexp(re.Pattern); err != nil { - return nil, err - } - e, err := a.semExpr(b.LHS) - if err != nil { - return nil, err + a.error(b.RHS, err) + return badExpr() } + e := a.semExpr(b.LHS) return &dag.RegexpMatch{ Kind: "RegexpMatch", Pattern: re.Pattern, Expr: e, - }, nil + } } -func (a *analyzer) semBinary(e *ast.BinaryExpr) (dag.Expr, error) { - if e, err := a.semRegexp(e); e != nil || err != nil { - return e, err +func (a *analyzer) semBinary(e *ast.BinaryExpr) dag.Expr { + if e := a.semRegexp(e); e != nil { + return e } op := e.Op if op == "." { - lhs, err := a.semExpr(e.LHS) - if err != nil { - return nil, err - } + lhs := a.semExpr(e.LHS) id, ok := e.RHS.(*ast.ID) if !ok { - return nil, errors.New("RHS of dot operator is not an identifier") + a.error(e, errors.New("RHS of dot operator is not an identifier")) + return badExpr() } if lhs, ok := lhs.(*dag.This); ok { lhs.Path = append(lhs.Path, id.Name) - return lhs, nil + return lhs } return &dag.Dot{ Kind: "Dot", LHS: lhs, RHS: id.Name, - }, nil - } - lhs, err := a.semExpr(e.LHS) - if err != nil { - return nil, err - } - rhs, err := a.semExpr(e.RHS) - if err != nil { - return nil, err + } } + lhs := a.semExpr(e.LHS) + rhs := a.semExpr(e.RHS) return &dag.BinaryExpr{ Kind: "BinaryExpr", Op: e.Op, LHS: lhs, RHS: rhs, - }, nil + } } func (a *analyzer) isIndexOfThis(lhs, rhs dag.Expr) *dag.This { @@ -452,151 +378,140 @@ func isStringConst(zctx *zed.Context, e dag.Expr) (field string, ok bool) { return "", false } -func (a *analyzer) semSlice(slice *ast.BinaryExpr) (*dag.BinaryExpr, error) { - sliceFrom, err := a.semExprNullable(slice.LHS) - if err != nil { - return nil, err - } - sliceTo, err := a.semExprNullable(slice.RHS) - if err != nil { - return nil, err - } +func (a *analyzer) semSlice(slice *ast.BinaryExpr) *dag.BinaryExpr { + sliceFrom := a.semExprNullable(slice.LHS) + sliceTo := a.semExprNullable(slice.RHS) return &dag.BinaryExpr{ Kind: "BinaryExpr", Op: ":", LHS: sliceFrom, RHS: sliceTo, - }, nil + } } -func (a *analyzer) semExprNullable(e ast.Expr) (dag.Expr, error) { +func (a *analyzer) semExprNullable(e ast.Expr) dag.Expr { if e == nil { - return nil, nil + return nil } return a.semExpr(e) } -func (a *analyzer) semCall(call *ast.Call) (dag.Expr, error) { - if e, err := a.maybeConvertAgg(call); e != nil || err != nil { - return e, err +func (a *analyzer) semCall(call *ast.Call) dag.Expr { + if e := a.maybeConvertAgg(call); e != nil { + return e } if call.Where != nil { - return nil, fmt.Errorf("'where' clause on non-aggregation function: %s", call.Name) - } - exprs, err := a.semExprs(call.Args) - if err != nil { - return nil, fmt.Errorf("%s(): bad argument: %w", call.Name, err) + a.error(call, errors.New("'where' clause on non-aggregation function")) + return badExpr() } + exprs := a.semExprs(call.Args) name, nargs := call.Name, len(call.Args) // Call could be to a user defined func. Check if we have a matching func in // scope. udf, err := a.scope.LookupExpr(name) if err != nil { - return nil, err + a.error(call, err) + return badExpr() } switch { // udf should be checked first since a udf can override builtin functions. case udf != nil: f, ok := udf.(*dag.Func) if !ok { - return nil, fmt.Errorf("%s(): definition is not a function type: %T", name, udf) + a.error(call, fmt.Errorf("definition is not a function type: %T", udf)) + return badExpr() } if len(f.Params) != nargs { - return nil, fmt.Errorf("%s(): expects %d argument(s)", name, len(f.Params)) + a.error(call, fmt.Errorf("call expects %d argument(s)", len(f.Params))) + return badExpr() } case zed.LookupPrimitive(name) != nil: // Primitive function call, change this to a cast. if err := function.CheckArgCount(nargs, 1, 1); err != nil { - return nil, fmt.Errorf("%s(): %w", name, err) + a.error(call, err) + return badExpr() } exprs = append(exprs, &dag.Literal{Kind: "Literal", Value: "<" + name + ">"}) name = "cast" case expr.NewShaperTransform(name) != 0: if err := function.CheckArgCount(nargs, 1, 2); err != nil { - return nil, fmt.Errorf("%s(): %w", name, err) + a.error(call, err) + return badExpr() } if nargs == 1 { exprs = append([]dag.Expr{&dag.This{Kind: "This"}}, exprs...) } case name == "map": if err := function.CheckArgCount(nargs, 2, 2); err != nil { - return nil, fmt.Errorf("%s(): %w", name, err) + a.error(call, err) + return badExpr() } id, ok := call.Args[1].(*ast.ID) if !ok { - return nil, fmt.Errorf("%s(): second argument must be the identifier of a function", name) + a.error(call.Args[1], errors.New("second argument must be the identifier of a function")) + return badExpr() } - inner, err := a.semCall(&ast.Call{ + inner := a.semCall(&ast.Call{ Kind: "Call", Name: id.Name, Args: []ast.Expr{&ast.ID{Kind: "ID", Name: "this"}}, }) - if err != nil { - return nil, err - } return &dag.MapCall{ Kind: "MapCall", Expr: exprs[0], Inner: inner, - }, nil + } default: if _, _, err = function.New(a.zctx, name, nargs); err != nil { - return nil, fmt.Errorf("%s(): %w", name, err) + a.error(call, err) + return badExpr() } } return &dag.Call{ Kind: "Call", Name: name, Args: exprs, - }, nil + } } -func (a *analyzer) semExprs(in []ast.Expr) ([]dag.Expr, error) { +func (a *analyzer) semExprs(in []ast.Expr) []dag.Expr { exprs := make([]dag.Expr, 0, len(in)) for _, e := range in { - expr, err := a.semExpr(e) - if err != nil { - return nil, err - } - exprs = append(exprs, expr) + exprs = append(exprs, a.semExpr(e)) } - return exprs, nil + return exprs } -func (a *analyzer) semAssignments(assignments []ast.Assignment) ([]dag.Assignment, error) { +func (a *analyzer) semAssignments(assignments []ast.Assignment) []dag.Assignment { out := make([]dag.Assignment, 0, len(assignments)) for _, e := range assignments { - a, err := a.semAssignment(e) - if err != nil { - return nil, err - } - out = append(out, a) + out = append(out, a.semAssignment(e)) } - return out, nil + return out } -func (a *analyzer) semAssignment(assign ast.Assignment) (dag.Assignment, error) { - rhs, err := a.semExpr(assign.RHS) - if err != nil { - return dag.Assignment{}, fmt.Errorf("right-hand side of assignment: %w", err) - } +func (a *analyzer) semAssignment(assign ast.Assignment) dag.Assignment { + rhs := a.semExpr(assign.RHS) var lhs dag.Expr if assign.LHS == nil { - path, err := deriveLHSPath(rhs) - if err != nil { - return dag.Assignment{}, err + if path, err := deriveLHSPath(rhs); err != nil { + a.error(assign, err) + lhs = badExpr() + } else { + lhs = &dag.This{Kind: "This", Path: path} } - lhs = &dag.This{Kind: "This", Path: path} - } else if lhs, err = a.semExpr(assign.LHS); err != nil { - return dag.Assignment{}, fmt.Errorf("left-hand side of assignment: %w", err) + } else { + lhs = a.semExpr(assign.LHS) } if !isLval(lhs) { - return dag.Assignment{}, errors.New("illegal left-hand side of assignment") + a.error(assign, errors.New("illegal left-hand side of assignment")) + lhs = badExpr() } if this, ok := lhs.(*dag.This); ok && len(this.Path) == 0 { - return dag.Assignment{}, errors.New("cannot assign to 'this'") + a.error(assign, errors.New("cannot assign to 'this'")) + lhs = badExpr() } - return dag.Assignment{Kind: "Assignment", LHS: lhs, RHS: rhs}, nil + return dag.Assignment{Kind: "Assignment", LHS: lhs, RHS: rhs} } func isLval(e dag.Expr) bool { @@ -637,66 +552,54 @@ func deriveLHSPath(rhs dag.Expr) ([]string, error) { return nil, errors.New("cannot infer field from expression") } -func (a *analyzer) semFields(exprs []ast.Expr) ([]dag.Expr, error) { +func (a *analyzer) semFields(exprs []ast.Expr) []dag.Expr { fields := make([]dag.Expr, 0, len(exprs)) for _, e := range exprs { - f, err := a.semField(e) - if err != nil { - return nil, err - } - fields = append(fields, f) + fields = append(fields, a.semField(e)) } - return fields, nil + return fields } // semField analyzes the expression f and makes sure that it's // a field reference returning an error if not. -func (a *analyzer) semField(f ast.Expr) (*dag.This, error) { - e, err := a.semExpr(f) - if err != nil { - return nil, errors.New("invalid expression used as a field") - } +func (a *analyzer) semField(f ast.Expr) dag.Expr { + e := a.semExpr(f) field, ok := e.(*dag.This) if !ok { - return nil, errors.New("invalid expression used as a field") + a.error(f, errors.New("invalid expression used as a field")) + return badExpr() } if len(field.Path) == 0 { - return nil, errors.New("cannot use 'this' as a field reference") + a.error(f, errors.New("cannot use 'this' as a field reference")) + return badExpr() } - return field, nil + return field } -func (a *analyzer) maybeConvertAgg(call *ast.Call) (dag.Expr, error) { +func (a *analyzer) maybeConvertAgg(call *ast.Call) dag.Expr { if _, err := agg.NewPattern(call.Name, true); err != nil { - return nil, nil + return nil } var e dag.Expr - if len(call.Args) > 1 { + if err := function.CheckArgCount(len(call.Args), 0, 1); err != nil { if call.Name == "min" || call.Name == "max" { // min and max are special cases as they are also functions. If the // number of args is greater than 1 they're probably a function so do not // return an error. - return nil, nil + return nil } - return nil, fmt.Errorf("%s: wrong number of arguments", call.Name) + a.error(call, err) + return badExpr() } if len(call.Args) == 1 { - var err error - e, err = a.semExpr(call.Args[0]) - if err != nil { - return nil, err - } - } - where, err := a.semExprNullable(call.Where) - if err != nil { - return nil, err + e = a.semExpr(call.Args[0]) } return &dag.Agg{ Kind: "Agg", Name: call.Name, Expr: e, - Where: where, - }, nil + Where: a.semExprNullable(call.Where), + } } func DotExprToFieldPath(e ast.Expr) *dag.This { @@ -749,23 +652,17 @@ func (a *analyzer) semType(typ astzed.Type) (string, error) { return zson.FormatType(ztype), nil } -func (a *analyzer) semVectorElems(elems []ast.VectorElem) ([]dag.VectorElem, error) { +func (a *analyzer) semVectorElems(elems []ast.VectorElem) []dag.VectorElem { var out []dag.VectorElem for _, elem := range elems { switch elem := elem.(type) { case *ast.Spread: - e, err := a.semExpr(elem.Expr) - if err != nil { - return nil, err - } + e := a.semExpr(elem.Expr) out = append(out, &dag.Spread{Kind: "Spread", Expr: e}) case *ast.VectorValue: - e, err := a.semExpr(elem.Expr) - if err != nil { - return nil, err - } + e := a.semExpr(elem.Expr) out = append(out, &dag.VectorValue{Kind: "VectorValue", Expr: e}) } } - return out, nil + return out } diff --git a/compiler/semantic/op.go b/compiler/semantic/op.go index 9b7f8b7096..23e76bca47 100644 --- a/compiler/semantic/op.go +++ b/compiler/semantic/op.go @@ -22,165 +22,151 @@ import ( "github.com/segmentio/ksuid" ) -func (a *analyzer) semSeq(seq ast.Seq) (dag.Seq, error) { +func (a *analyzer) semSeq(seq ast.Seq) dag.Seq { var converted dag.Seq for _, op := range seq { - var err error - converted, err = a.semOp(op, converted) - if err != nil { - return nil, err - } + converted = a.semOp(op, converted) } - return converted, nil + return converted } -func (a *analyzer) semFrom(from *ast.From, seq dag.Seq) (dag.Seq, error) { +func (a *analyzer) semFrom(from *ast.From, seq dag.Seq) dag.Seq { switch len(from.Trunks) { case 0: - return nil, errors.New("internal error: from operator has no paths") + a.error(from, errors.New("from operator has no paths")) + return append(seq, badOp()) case 1: return a.semTrunk(from.Trunks[0], seq) default: paths := make([]dag.Seq, 0, len(from.Trunks)) for _, in := range from.Trunks { - converted, err := a.semTrunk(in, nil) - if err != nil { - return nil, err - } - paths = append(paths, converted) + paths = append(paths, a.semTrunk(in, nil)) } return append(seq, &dag.Fork{ Kind: "Fork", Paths: paths, - }), nil + }) } } -func (a *analyzer) semTrunk(trunk ast.Trunk, out dag.Seq) (dag.Seq, error) { +func (a *analyzer) semTrunk(trunk ast.Trunk, out dag.Seq) dag.Seq { if pool, ok := trunk.Source.(*ast.Pool); ok && trunk.Seq != nil { switch pool.Spec.Pool.(type) { case *ast.Glob, *ast.Regexp: - return nil, errors.New("=> not allowed after pool pattern in 'from' operator") + a.error(&trunk, errors.New("=> not allowed after pool pattern in 'from' operator")) + return append(out, badOp()) } } - sources, err := a.semSource(trunk.Source) - if err != nil { - return nil, err - } - seq, err := a.semSeq(trunk.Seq) - if err != nil { - return nil, err - } + sources := a.semSource(trunk.Source) + seq := a.semSeq(trunk.Seq) if len(sources) == 1 { - return append(out, append(dag.Seq{sources[0]}, seq...)...), nil + return append(out, append(dag.Seq{sources[0]}, seq...)...) } paths := make([]dag.Seq, 0, len(sources)) for _, source := range sources { paths = append(paths, append(dag.Seq{source}, seq...)) } - return append(out, &dag.Fork{Kind: "Fork", Paths: paths}), nil + return append(out, &dag.Fork{Kind: "Fork", Paths: paths}) } //XXX make sure you can't read files from a lake instance -func (a *analyzer) semOpSource(source ast.Source, out dag.Seq) (dag.Seq, error) { - sources, err := a.semSource(source) - if err != nil { - return nil, err - } +func (a *analyzer) semOpSource(source ast.Source, out dag.Seq) dag.Seq { + sources := a.semSource(source) if len(sources) == 1 { - return append(sources, out...), nil + return append(sources, out...) } var paths []dag.Seq for _, s := range sources { paths = append(paths, dag.Seq{s}) } - return append(out, &dag.Fork{Kind: "Fork", Paths: paths}), nil + return append(out, &dag.Fork{Kind: "Fork", Paths: paths}) } -func (a *analyzer) semSource(source ast.Source) ([]dag.Op, error) { - switch p := source.(type) { +func (a *analyzer) semSource(source ast.Source) []dag.Op { + switch s := source.(type) { case *ast.File: - sortKey, err := semSortKey(p.SortKey) + sortKey, err := semSortKey(s.SortKey) if err != nil { - return nil, err + a.error(source, err) } var path string - switch p := p.Path.(type) { + switch p := s.Path.(type) { case *ast.QuotedString: path = p.Text case *ast.String: // This can be either a reference to a constant or a string. if path, err = a.maybeStringConst(p.Text); err != nil { - return nil, fmt.Errorf("invalid file path: %w", err) + a.error(s.Path, err) } default: - return nil, fmt.Errorf("semantic analyzer: unknown AST file type %T", p) + panic(fmt.Errorf("semantic analyzer: unknown AST file type %T", p)) } return []dag.Op{ &dag.FileScan{ Kind: "FileScan", Path: path, - Format: p.Format, + Format: s.Format, SortKey: sortKey, }, - }, nil + } case *ast.HTTP: - sortKey, err := semSortKey(p.SortKey) + sortKey, err := semSortKey(s.SortKey) if err != nil { - return nil, err + a.error(source, err) } var headers map[string][]string - if p.Headers != nil { - expr, err := a.semExpr(p.Headers) - if err != nil { - return nil, err - } + if s.Headers != nil { + expr := a.semExpr(s.Headers) val, err := kernel.EvalAtCompileTime(a.zctx, expr) if err != nil { - return nil, fmt.Errorf("headers: %w", err) - } - headers, err = unmarshalHeaders(val) - if err != nil { - return nil, err + a.error(s.Headers, err) + } else { + headers, err = unmarshalHeaders(val) + if err != nil { + a.error(s.Headers, err) + } } } var url string - switch p := p.URL.(type) { + switch p := s.URL.(type) { case *ast.QuotedString: url = p.Text case *ast.String: // This can be either a reference to a constant or a string. if url, err = a.maybeStringConst(p.Text); err != nil { - return nil, fmt.Errorf("invalid file path: %w", err) + a.error(s.URL, err) + // Set url so we don't report an error for this twice. + url = "http://error" } default: - return nil, fmt.Errorf("semantic analyzer: unsupported AST get type %T", p) + panic(fmt.Errorf("semantic analyzer: unsupported AST get type %T", p)) } if !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "https://") { - return nil, fmt.Errorf("get: invalid URL %s", url) + a.error(s.URL, fmt.Errorf("invalid URL %s", url)) } return []dag.Op{ &dag.HTTPScan{ Kind: "HTTPScan", URL: url, - Format: p.Format, + Format: s.Format, SortKey: sortKey, - Method: p.Method, + Method: s.Method, Headers: headers, - Body: p.Body, + Body: s.Body, }, - }, nil + } case *ast.Pool: if !a.source.IsLake() { - return nil, errors.New("semantic analyzer: from pool cannot be used without a lake") + a.error(s, errors.New("from pool cannot be used without a lake")) + return []dag.Op{badOp()} } - return a.semPool(p) + return a.semPool(s) case *ast.Pass: //XXX just connect parent - return []dag.Op{dag.PassOp}, nil + return []dag.Op{dag.PassOp} default: - return nil, fmt.Errorf("semantic analyzer: unknown AST source type %T", p) + panic(fmt.Errorf("semantic analyzer: unknown AST source type %T", s)) } } @@ -206,6 +192,8 @@ func unmarshalHeaders(val zed.Value) (map[string][]string, error) { return headers, nil } +// XXX At some point SortKey should be an ast.Node but for now just have this +// return and error and let the parent log the semantic error. func semSortKey(p *ast.SortKey) (order.SortKey, error) { if p == nil || p.Keys == nil { return order.Nil, nil @@ -225,7 +213,7 @@ func semSortKey(p *ast.SortKey) (order.SortKey, error) { return order.NewSortKey(which, keys), nil } -func (a *analyzer) semPool(p *ast.Pool) ([]dag.Op, error) { +func (a *analyzer) semPool(p *ast.Pool) []dag.Op { var poolNames []string var err error switch specPool := p.Spec.Pool.(type) { @@ -238,28 +226,27 @@ func (a *analyzer) semPool(p *ast.Pool) ([]dag.Op, error) { poolNames, err = a.matchPools(specPool.Pattern, specPool.Pattern, "regexp") case *ast.String: // This can be either a reference to a constant or a string. - name, err := a.maybeStringConst(specPool.Text) - if err != nil { - return nil, fmt.Errorf("invalid pool name: %w", err) + if name, cerr := a.maybeStringConst(specPool.Text); cerr != nil { + err = fmt.Errorf("invalid pool name: %w", cerr) + } else { + poolNames = []string{name} } - poolNames = []string{name} case *ast.QuotedString: poolNames = []string{specPool.Text} default: - return nil, fmt.Errorf("semantic analyzer: unknown AST pool type %T", specPool) + panic(fmt.Errorf("semantic analyzer: unknown AST pool type %T", specPool)) } if err != nil { - return nil, err + // XXX PoolSpec should have a node position but for now just report + // error as entire source. + a.error(p.Spec.Pool, err) + return []dag.Op{badOp()} } var sources []dag.Op for _, name := range poolNames { - source, err := a.semPoolWithName(p, name) - if err != nil { - return nil, err - } - sources = append(sources, source) + sources = append(sources, a.semPoolWithName(p, name)) } - return sources, nil + return sources } func (a *analyzer) maybeStringConst(name string) (string, error) { @@ -278,11 +265,12 @@ func (a *analyzer) maybeStringConst(name string) (string, error) { return val.AsString(), nil } -func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) { +func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) dag.Op { commit := p.Spec.Commit if poolName == "HEAD" { if a.head == nil { - return nil, errors.New("cannot scan from unknown HEAD") + a.error(p.Spec.Pool, errors.New("cannot scan from unknown HEAD")) + return badOp() } poolName = a.head.Pool commit = a.head.Branch @@ -290,27 +278,30 @@ func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) if poolName == "" { meta := p.Spec.Meta if meta == "" { - return nil, errors.New("pool name missing") + a.error(p, errors.New("pool name missing")) + return badOp() } if _, ok := dag.LakeMetas[meta]; !ok { - return nil, fmt.Errorf("unknown lake metadata type %q in from operator", meta) + a.error(p, fmt.Errorf("unknown lake metadata type %q in from operator", meta)) + return badOp() } return &dag.LakeMetaScan{ Kind: "LakeMetaScan", Meta: p.Spec.Meta, - }, nil + } } poolID, err := a.source.PoolID(a.ctx, poolName) if err != nil { - return nil, err + a.error(p.Spec.Pool, err) + return badOp() } var commitID ksuid.KSUID if commit != "" { - commitID, err = lakeparse.ParseID(commit) - if err != nil { + if commitID, err = lakeparse.ParseID(commit); err != nil { commitID, err = a.source.CommitObject(a.ctx, poolID, commit) if err != nil { - return nil, err + a.error(p, err) + return badOp() } } } @@ -319,7 +310,8 @@ func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) if commitID == ksuid.Nil { commitID, err = a.source.CommitObject(a.ctx, poolID, "main") if err != nil { - return nil, err + a.error(p, err) + return badOp() } } return &dag.CommitMetaScan{ @@ -328,23 +320,25 @@ func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) Pool: poolID, Commit: commitID, Tap: p.Spec.Tap, - }, nil + } } if _, ok := dag.PoolMetas[meta]; ok { return &dag.PoolMetaScan{ Kind: "PoolMetaScan", Meta: meta, ID: poolID, - }, nil + } } - return nil, fmt.Errorf("unknown metadata type %q in from operator", meta) + a.error(p, fmt.Errorf("unknown metadata type %q", meta)) + return badOp() } if commitID == ksuid.Nil { // This trick here allows us to default to the main branch when // there is a "from pool" operator with no meta query or commit object. commitID, err = a.source.CommitObject(a.ctx, poolID, "main") if err != nil { - return nil, err + a.error(p, err) + return badOp() } } if p.Delete { @@ -352,13 +346,13 @@ func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) Kind: "DeleteScan", ID: poolID, Commit: commitID, - }, nil + } } return &dag.PoolScan{ Kind: "PoolScan", ID: poolID, Commit: commitID, - }, nil + } } func (a *analyzer) matchPools(pattern, origPattern, patternDesc string) ([]string, error) { @@ -382,23 +376,16 @@ func (a *analyzer) matchPools(pattern, origPattern, patternDesc string) ([]strin return matches, nil } -func (a *analyzer) semScope(op *ast.Scope) (*dag.Scope, error) { +func (a *analyzer) semScope(op *ast.Scope) *dag.Scope { a.scope = NewScope(a.scope) defer a.exitScope() - consts, funcs, err := a.semDecls(op.Decls) - if err != nil { - return nil, err - } - body, err := a.semSeq(op.Body) - if err != nil { - return nil, err - } + consts, funcs := a.semDecls(op.Decls) return &dag.Scope{ Kind: "Scope", Consts: consts, Funcs: funcs, - Body: body, - }, nil + Body: a.semSeq(op.Body), + } } // semOp does a semantic analysis on a flowgraph to an @@ -406,32 +393,22 @@ func (a *analyzer) semScope(op *ast.Scope) (*dag.Scope, error) { // object. Currently, it only replaces the group-by duration with // a bucket call on the ts and replaces FunctionCalls in op context // with either a group-by or filter op based on the function's name. -func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { +func (a *analyzer) semOp(o ast.Op, seq dag.Seq) dag.Seq { switch o := o.(type) { case *ast.From: return a.semFrom(o, seq) case *ast.Pool, *ast.File, *ast.HTTP: return a.semOpSource(o.(ast.Source), seq) case *ast.Summarize: - keys, err := a.semAssignments(o.Keys) - if err != nil { - return nil, err - } - if assignmentHasDynamicLHS(keys) { - return nil, errors.New("summarize: key output field must be static") - } + keys := a.semAssignments(o.Keys) + a.checkStaticAssignment(o.Keys, keys) if len(keys) == 0 && len(o.Aggs) == 1 { if seq := a.singletonAgg(o.Aggs[0], seq); seq != nil { - return seq, nil + return seq } } - aggs, err := a.semAssignments(o.Aggs) - if err != nil { - return nil, err - } - if assignmentHasDynamicLHS(aggs) { - return nil, errors.New("summarize: aggregate output field must be static") - } + aggs := a.semAssignments(o.Aggs) + a.checkStaticAssignment(o.Aggs, aggs) // Note: InputSortDir is copied in here but it's not meaningful // coming from a parser AST, only from a worker using the kernel DSL, // which is another reason why we need separate parser and kernel ASTs. @@ -446,44 +423,28 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Limit: o.Limit, Keys: keys, Aggs: aggs, - }), nil + }) case *ast.Parallel: var paths []dag.Seq for _, seq := range o.Paths { - converted, err := a.semSeq(seq) - if err != nil { - return nil, err - } - paths = append(paths, converted) + paths = append(paths, a.semSeq(seq)) } return append(seq, &dag.Fork{ Kind: "Fork", Paths: paths, - }), nil + }) case *ast.Scope: - op, err := a.semScope(o) - if err != nil { - return nil, err - } - return append(seq, op), nil + return append(seq, a.semScope(o)) case *ast.Switch: var expr dag.Expr if o.Expr != nil { - var err error - expr, err = a.semExpr(o.Expr) - if err != nil { - return nil, err - } + expr = a.semExpr(o.Expr) } var cases []dag.Case for _, c := range o.Cases { var e dag.Expr if c.Expr != nil { - var err error - e, err = a.semExpr(c.Expr) - if err != nil { - return nil, err - } + e = a.semExpr(c.Expr) } else if o.Expr == nil { // c.Expr == nil indicates the default case, // whose handling depends on p.Expr. @@ -492,24 +453,18 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Value: "true", } } - path, err := a.semSeq(c.Path) - if err != nil { - return nil, err - } + path := a.semSeq(c.Path) cases = append(cases, dag.Case{Expr: e, Path: path}) } return append(seq, &dag.Switch{ Kind: "Switch", Expr: expr, Cases: cases, - }), nil + }) case *ast.Shape: - return append(seq, &dag.Shape{Kind: "Shape"}), nil + return append(seq, &dag.Shape{Kind: "Shape"}) case *ast.Cut: - assignments, err := a.semAssignments(o.Args) - if err != nil { - return nil, err - } + assignments := a.semAssignments(o.Args) // Collect static paths so we can check on what is available. var fields field.List for _, a := range assignments { @@ -517,116 +472,108 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { fields = append(fields, this.Path) } } - if _, err = zed.NewRecordBuilder(a.zctx, fields); err != nil { - return nil, fmt.Errorf("cut: %w", err) + if _, err := zed.NewRecordBuilder(a.zctx, fields); err != nil { + a.error(o.Args, err) + return append(seq, badOp()) } return append(seq, &dag.Cut{ Kind: "Cut", Args: assignments, - }), nil + }) case *ast.Drop: - args, err := a.semFields(o.Args) - if err != nil { - return nil, fmt.Errorf("drop: %w", err) - } + args := a.semFields(o.Args) if len(args) == 0 { - return nil, errors.New("drop: no fields given") + a.error(o, errors.New("no fields given")) } return append(seq, &dag.Drop{ Kind: "Drop", Args: args, - }), nil + }) case *ast.Sort: - exprs, err := a.semExprs(o.Args) - if err != nil { - return nil, fmt.Errorf("sort: %w", err) - } + exprs := a.semExprs(o.Args) return append(seq, &dag.Sort{ Kind: "Sort", Args: exprs, Order: o.Order, NullsFirst: o.NullsFirst, - }), nil + }) case *ast.Head: val := zed.NewInt64(1) if o.Count != nil { - expr, err := a.semExpr(o.Count) - if err != nil { - return nil, fmt.Errorf("head: %w", err) - } + expr := a.semExpr(o.Count) + var err error if val, err = kernel.EvalAtCompileTime(a.zctx, expr); err != nil { - return nil, fmt.Errorf("head: %w", err) + a.error(o.Count, err) + return append(seq, badOp()) + } + if !zed.IsInteger(val.Type().ID()) { + a.error(o.Count, fmt.Errorf("expression value must be an integer value: %s", zson.FormatValue(val))) + return append(seq, badOp()) } } if val.AsInt() < 1 { - return nil, fmt.Errorf("head: expression value is not a positive integer: %s", zson.FormatValue(val)) + a.error(o.Count, errors.New("expression value must be a positive integer")) } return append(seq, &dag.Head{ Kind: "Head", Count: int(val.AsInt()), - }), nil + }) case *ast.Tail: val := zed.NewInt64(1) if o.Count != nil { - expr, err := a.semExpr(o.Count) - if err != nil { - return nil, fmt.Errorf("tail: %w", err) - } + expr := a.semExpr(o.Count) + var err error if val, err = kernel.EvalAtCompileTime(a.zctx, expr); err != nil { - return nil, fmt.Errorf("tail: %w", err) + a.error(o.Count, err) + return append(seq, badOp()) + } + if !zed.IsInteger(val.Type().ID()) { + a.error(o.Count, fmt.Errorf("expression value must be an integer value: %s", zson.FormatValue(val))) + return append(seq, badOp()) } } if val.AsInt() < 1 { - return nil, fmt.Errorf("tail: expression value is not a positive integer: %s", zson.FormatValue(val)) + a.error(o.Count, errors.New("expression value must be a positive integer")) } return append(seq, &dag.Tail{ Kind: "Tail", Count: int(val.AsInt()), - }), nil + }) case *ast.Uniq: return append(seq, &dag.Uniq{ Kind: "Uniq", Cflag: o.Cflag, - }), nil + }) case *ast.Pass: - return append(seq, dag.PassOp), nil + return append(seq, dag.PassOp) case *ast.OpExpr: return a.semOpExpr(o.Expr, seq) case *ast.Search: - e, err := a.semExpr(o.Expr) - if err != nil { - return nil, err - } - return append(seq, dag.NewFilter(e)), nil + e := a.semExpr(o.Expr) + return append(seq, dag.NewFilter(e)) case *ast.Where: - e, err := a.semExpr(o.Expr) - if err != nil { - return nil, err - } - return append(seq, dag.NewFilter(e)), nil + e := a.semExpr(o.Expr) + return append(seq, dag.NewFilter(e)) case *ast.Top: - args, err := a.semExprs(o.Args) - if err != nil { - return nil, fmt.Errorf("top: %w", err) - } + args := a.semExprs(o.Args) if len(args) == 0 { - return nil, errors.New("top: no arguments given") + a.error(o, errors.New("no arguments given")) } limit := 1 if o.Limit != nil { - l, err := a.semExpr(o.Limit) - if err != nil { - return nil, err - } + l := a.semExpr(o.Limit) val, err := kernel.EvalAtCompileTime(a.zctx, l) if err != nil { - return nil, err + a.error(o.Limit, err) + return append(seq, badOp()) } if !zed.IsSigned(val.Type().ID()) { - return nil, errors.New("top: limit argument must be an integer") + a.error(o.Limit, errors.New("limit argument must be an integer")) + return append(seq, badOp()) } if limit = int(val.Int()); limit < 1 { - return nil, errors.New("top: limit argument value must be greater than 0") + a.error(o.Limit, errors.New("limit argument value must be greater than 0")) + return append(seq, badOp()) } } return append(seq, &dag.Top{ @@ -634,12 +581,9 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Args: args, Flush: o.Flush, Limit: limit, - }), nil + }) case *ast.Put: - assignments, err := a.semAssignments(o.Args) - if err != nil { - return nil, err - } + assignments := a.semAssignments(o.Args) // We can do collision checking on static paths, so check what we can. var fields field.List for _, a := range assignments { @@ -648,27 +592,20 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { } } if err := expr.CheckPutFields(fields); err != nil { - return nil, fmt.Errorf("put: %w", err) + a.error(o, err) } return append(seq, &dag.Put{ Kind: "Put", Args: assignments, - }), nil + }) case *ast.OpAssignment: - converted, err := a.semOpAssignment(o) - if err != nil { - return nil, err - } - return append(seq, converted), nil + return append(seq, a.semOpAssignment(o)) case *ast.Rename: var assignments []dag.Assignment for _, fa := range o.Args { - assign, err := a.semAssignment(fa) - if err != nil { - return nil, fmt.Errorf("rename: %w", err) - } + assign := a.semAssignment(fa) if !isLval(assign.RHS) { - return nil, fmt.Errorf("rename: illegal right-hand side of assignment") + a.error(fa.RHS, fmt.Errorf("illegal right-hand side of assignment")) } // If both paths are static validate them. Otherwise this will be // done at runtime. @@ -676,7 +613,7 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { rhs, rhsOk := assign.RHS.(*dag.This) if rhsOk && lhsOk { if err := expr.CheckRenameField(lhs.Path, rhs.Path); err != nil { - return nil, fmt.Errorf("rename: %w", err) + a.error(&fa, err) } } assignments = append(assignments, assign) @@ -684,27 +621,15 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { return append(seq, &dag.Rename{ Kind: "Rename", Args: assignments, - }), nil + }) case *ast.Fuse: - return append(seq, &dag.Fuse{Kind: "Fuse"}), nil + return append(seq, &dag.Fuse{Kind: "Fuse"}) case *ast.Join: - rightInput, err := a.semSeq(o.RightInput) - if err != nil { - return nil, err - } - leftKey, err := a.semExpr(o.LeftKey) - if err != nil { - return nil, err - } + rightInput := a.semSeq(o.RightInput) + leftKey := a.semExpr(o.LeftKey) rightKey := leftKey if o.RightKey != nil { - if rightKey, err = a.semExpr(o.RightKey); err != nil { - return nil, err - } - } - assignments, err := a.semAssignments(o.Args) - if err != nil { - return nil, err + rightKey = a.semExpr(o.RightKey) } join := &dag.Join{ Kind: "Join", @@ -713,7 +638,7 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { LeftKey: leftKey, RightDir: order.Unknown, RightKey: rightKey, - Args: assignments, + Args: a.semAssignments(o.Args), } if rightInput != nil { par := &dag.Fork{ @@ -722,29 +647,26 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { } seq = append(seq, par) } - return append(seq, join), nil + return append(seq, join) case *ast.Explode: typ, err := a.semType(o.Type) if err != nil { - return nil, err - } - args, err := a.semExprs(o.Args) - if err != nil { - return nil, err + a.error(o.Type, err) + typ = "" } + args := a.semExprs(o.Args) var as string if o.As == nil { as = "value" } else { - e, err := a.semExpr(o.As) - if err != nil { - return nil, err - } + e := a.semExpr(o.As) this, ok := e.(*dag.This) if !ok { - return nil, errors.New("explode: as clause must be a field reference") + a.error(o.As, errors.New("as clause must be a field reference")) + return append(seq, badOp()) } else if len(this.Path) != 1 { - return nil, errors.New("explode: field must be a top-level field") + a.error(o.As, errors.New("field must be a top-level field")) + return append(seq, badOp()) } as = this.Path[0] } @@ -753,51 +675,35 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Args: args, Type: typ, As: as, - }), nil + }) case *ast.Merge: - expr, err := a.semExpr(o.Expr) - if err != nil { - return nil, fmt.Errorf("merge: %w", err) - } return append(seq, &dag.Merge{ Kind: "Merge", - Expr: expr, + Expr: a.semExpr(o.Expr), Order: order.Asc, //XXX - }), nil + }) case *ast.Over: if len(o.Locals) != 0 && o.Body == nil { - return nil, errors.New("over operator: cannot have a with clause without a lateral query") + a.error(o, errors.New("cannot have a with clause without a lateral query")) } a.enterScope() defer a.exitScope() - locals, err := a.semVars(o.Locals) - if err != nil { - return nil, err - } - exprs, err := a.semExprs(o.Exprs) - if err != nil { - return nil, err - } + locals := a.semVars(o.Locals) + exprs := a.semExprs(o.Exprs) var body dag.Seq if o.Body != nil { - body, err = a.semSeq(o.Body) - if err != nil { - return nil, err - } + body = a.semSeq(o.Body) } return append(seq, &dag.Over{ Kind: "Over", Defs: locals, Exprs: exprs, Body: body, - }), nil + }) case *ast.Sample: e := dag.Expr(&dag.This{Kind: "This"}) if o.Expr != nil { - var err error - if e, err = a.semExpr(o.Expr); err != nil { - return nil, err - } + e = a.semExpr(o.Expr) } seq = append(seq, &dag.Summarize{ Kind: "Summarize", @@ -819,12 +725,9 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { return append(seq, &dag.Yield{ Kind: "Yield", Exprs: []dag.Expr{&dag.This{Kind: "This", Path: field.Path{"sample"}}}, - }), nil + }) case *ast.Assert: - cond, err := a.semExpr(o.Expr) - if err != nil { - return nil, err - } + cond := a.semExpr(o.Expr) // 'assert EXPR' is equivalent to // 'yield EXPR ? this : error({message: "assertion failed", "expr": EXPR_text, "on": this}' // where EXPR_text is the literal text of EXPR. @@ -861,22 +764,20 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { }, }, }, - }), nil + }) case *ast.Yield: - exprs, err := a.semExprs(o.Exprs) - if err != nil { - return nil, err - } + exprs := a.semExprs(o.Exprs) return append(seq, &dag.Yield{ Kind: "Yield", Exprs: exprs, - }), nil + }) case *ast.Load: poolID, err := lakeparse.ParseID(o.Pool) if err != nil { poolID, err = a.source.PoolID(a.ctx, o.Pool) if err != nil { - return nil, err + a.error(o, err) + return append(seq, badOp()) } } return append(seq, &dag.Load{ @@ -886,19 +787,16 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Author: o.Author, Message: o.Message, Meta: o.Meta, - }), nil + }) } - return nil, fmt.Errorf("semantic transform: unknown AST operator type: %T", o) + panic(fmt.Errorf("semantic transform: unknown AST operator type: %T", o)) } func (a *analyzer) singletonAgg(agg ast.Assignment, seq dag.Seq) dag.Seq { if agg.LHS != nil { return nil } - out, err := a.semAssignment(agg) - if err != nil { - return nil - } + out := a.semAssignment(agg) this, ok := out.LHS.(*dag.This) if !ok || len(this.Path) != 1 { return nil @@ -915,70 +813,55 @@ func (a *analyzer) singletonAgg(agg ast.Assignment, seq dag.Seq) dag.Seq { ) } -func (a *analyzer) semDecls(decls []ast.Decl) ([]dag.Def, []*dag.Func, error) { +func (a *analyzer) semDecls(decls []ast.Decl) ([]dag.Def, []*dag.Func) { var consts []dag.Def var fnDecls []*ast.FuncDecl for _, d := range decls { switch d := d.(type) { case *ast.ConstDecl: - c, err := a.semConstDecl(d) - if err != nil { - return nil, nil, err - } - consts = append(consts, c) + consts = append(consts, a.semConstDecl(d)) case *ast.FuncDecl: fnDecls = append(fnDecls, d) case *ast.OpDecl: - if err := a.semOpDecl(d); err != nil { - return nil, nil, err - } + a.semOpDecl(d) case *ast.TypeDecl: - c, err := a.semTypeDecl(d) - if err != nil { - return nil, nil, err - } - consts = append(consts, c) + consts = append(consts, a.semTypeDecl(d)) default: - return nil, nil, fmt.Errorf("invalid declaration type %T", d) + panic(fmt.Errorf("invalid declaration type %T", d)) } } - funcs, err := a.semFuncDecls(fnDecls) - if err != nil { - return nil, nil, err - } - return consts, funcs, nil + funcs := a.semFuncDecls(fnDecls) + return consts, funcs } -func (a *analyzer) semConstDecl(c *ast.ConstDecl) (dag.Def, error) { - e, err := a.semExpr(c.Expr) - if err != nil { - return dag.Def{}, err - } +func (a *analyzer) semConstDecl(c *ast.ConstDecl) dag.Def { + e := a.semExpr(c.Expr) if err := a.scope.DefineConst(a.zctx, c.Name, e); err != nil { - return dag.Def{}, err + a.error(c, err) } return dag.Def{ Name: c.Name, Expr: e, - }, nil + } } -func (a *analyzer) semTypeDecl(d *ast.TypeDecl) (dag.Def, error) { +func (a *analyzer) semTypeDecl(d *ast.TypeDecl) dag.Def { typ, err := a.semType(d.Type) if err != nil { - return dag.Def{}, err + a.error(d.Type, err) + typ = "error(\"string\")" } e := &dag.Literal{ Kind: "Literal", Value: fmt.Sprintf("<%s=%s>", zson.QuotedName(d.Name), typ), } if err := a.scope.DefineConst(a.zctx, d.Name, e); err != nil { - return dag.Def{}, err + a.error(d, err) } - return dag.Def{Name: d.Name, Expr: e}, nil + return dag.Def{Name: d.Name, Expr: e} } -func (a *analyzer) semFuncDecls(decls []*ast.FuncDecl) ([]*dag.Func, error) { +func (a *analyzer) semFuncDecls(decls []*ast.FuncDecl) []*dag.Func { funcs := make([]*dag.Func, 0, len(decls)) for _, d := range decls { f := &dag.Func{ @@ -987,118 +870,117 @@ func (a *analyzer) semFuncDecls(decls []*ast.FuncDecl) ([]*dag.Func, error) { Params: slices.Clone(d.Params), } if err := a.scope.DefineAs(f.Name, f); err != nil { - return nil, err + a.error(d, err) } funcs = append(funcs, f) } for i, d := range decls { - var err error - if funcs[i].Expr, err = a.semFuncBody(d.Params, d.Expr); err != nil { - return nil, err - } + funcs[i].Expr = a.semFuncBody(d, d.Params, d.Expr) } - return funcs, nil + return funcs } -func (a *analyzer) semFuncBody(params []string, body ast.Expr) (dag.Expr, error) { +func (a *analyzer) semFuncBody(d *ast.FuncDecl, params []string, body ast.Expr) dag.Expr { a.enterScope() defer a.exitScope() for _, p := range params { if err := a.scope.DefineVar(p); err != nil { - return nil, err + // XXX Each param should be a node but now just report the error + // as the entire declaration. + a.error(d, err) } } return a.semExpr(body) } -func (a *analyzer) semOpDecl(d *ast.OpDecl) error { +func (a *analyzer) semOpDecl(d *ast.OpDecl) { m := make(map[string]bool) for _, p := range d.Params { if m[p] { - return fmt.Errorf("%s: duplicate parameter %q", d.Name, p) + a.error(d, fmt.Errorf("duplicate parameter %q", p)) + a.scope.DefineAs(d.Name, &opDecl{bad: true}) + return } m[p] = true } - return a.scope.DefineAs(d.Name, &opDecl{ast: d, scope: a.scope}) + if err := a.scope.DefineAs(d.Name, &opDecl{ast: d, scope: a.scope}); err != nil { + a.error(d, err) + } } -func (a *analyzer) semVars(defs []ast.Def) ([]dag.Def, error) { +func (a *analyzer) semVars(defs []ast.Def) []dag.Def { var locals []dag.Def for _, def := range defs { - e, err := a.semExpr(def.Expr) - if err != nil { - return nil, err - } - name := def.Name - if err := a.scope.DefineVar(name); err != nil { - return nil, err + e := a.semExpr(def.Expr) + if err := a.scope.DefineVar(def.Name); err != nil { + a.error(def, err) + continue } locals = append(locals, dag.Def{ - Name: name, + Name: def.Name, Expr: e, }) } - return locals, nil + return locals } -func (a *analyzer) semOpAssignment(p *ast.OpAssignment) (dag.Op, error) { +func (a *analyzer) semOpAssignment(p *ast.OpAssignment) dag.Op { var aggs, puts []dag.Assignment - for _, assign := range p.Assignments { + for _, astAssign := range p.Assignments { // Parition assignments into agg vs. puts. - a, err := a.semAssignment(assign) - if err != nil { - return nil, err - } - if _, ok := a.RHS.(*dag.Agg); ok { - if _, ok := a.LHS.(*dag.This); !ok { - return nil, errors.New("summarize: aggregate output field must be static") + assign := a.semAssignment(astAssign) + if _, ok := assign.RHS.(*dag.Agg); ok { + if _, ok := assign.LHS.(*dag.This); !ok { + a.error(astAssign.LHS, errors.New("aggregate output field must be static")) } - aggs = append(aggs, a) + aggs = append(aggs, assign) } else { - puts = append(puts, a) + puts = append(puts, assign) } } if len(puts) > 0 && len(aggs) > 0 { - return nil, errors.New("mix of aggregations and non-aggregations in assignment list") + a.error(p, errors.New("mix of aggregations and non-aggregations in assignment list")) + return badOp() } if len(puts) > 0 { return &dag.Put{ Kind: "Put", Args: puts, - }, nil + } } return &dag.Summarize{ Kind: "Summarize", Aggs: aggs, - }, nil + } } -func assignmentHasDynamicLHS(assignments []dag.Assignment) bool { - for _, a := range assignments { - if _, ok := a.LHS.(*dag.This); !ok { +func (a *analyzer) checkStaticAssignment(asts []ast.Assignment, assignments []dag.Assignment) bool { + for k, assign := range assignments { + if _, ok := assign.LHS.(*dag.BadExpr); ok { + continue + } + if _, ok := assign.LHS.(*dag.This); !ok { + a.error(asts[k].LHS, errors.New("assignment key must be static")) return true } } return false } -func (a *analyzer) semOpExpr(e ast.Expr, seq dag.Seq) (dag.Seq, error) { +func (a *analyzer) semOpExpr(e ast.Expr, seq dag.Seq) dag.Seq { if call, ok := e.(*ast.Call); ok { - if seq, err := a.semCallOp(call, seq); seq != nil || err != nil { - return seq, err + if seq := a.semCallOp(call, seq); seq != nil { + return seq } } - out, err := a.semExpr(e) - if err != nil { - return nil, err - } + out := a.semExpr(e) if a.isBool(out) { - return append(seq, dag.NewFilter(out)), nil + return append(seq, dag.NewFilter(out)) } return append(seq, &dag.Yield{ Kind: "Yield", Exprs: []dag.Expr{out}, - }), nil + }) } func (a *analyzer) isBool(e dag.Expr) bool { @@ -1138,13 +1020,11 @@ func (a *analyzer) isBool(e dag.Expr) bool { } } -func (a *analyzer) semCallOp(call *ast.Call, seq dag.Seq) (dag.Seq, error) { - if body, err := a.maybeConvertUserOp(call, seq); err != nil { - return nil, err - } else if body != nil { - return append(seq, body...), nil +func (a *analyzer) semCallOp(call *ast.Call, seq dag.Seq) dag.Seq { + if body := a.maybeConvertUserOp(call); body != nil { + return append(seq, body...) } - if agg, err := a.maybeConvertAgg(call); err == nil && agg != nil { + if agg := a.maybeConvertAgg(call); agg != nil { summarize := &dag.Summarize{ Kind: "Summarize", Aggs: []dag.Assignment{ @@ -1159,49 +1039,54 @@ func (a *analyzer) semCallOp(call *ast.Call, seq dag.Seq) (dag.Seq, error) { Kind: "Yield", Exprs: []dag.Expr{&dag.This{Kind: "This", Path: field.Path{call.Name}}}, } - return append(append(seq, summarize), yield), nil + return append(append(seq, summarize), yield) } if !function.HasBoolResult(call.Name) { - return nil, nil - } - c, err := a.semCall(call) - if err != nil { - return nil, err + return nil } - return append(seq, dag.NewFilter(c)), nil + c := a.semCall(call) + return append(seq, dag.NewFilter(c)) } // maybeConvertUserOp returns nil, nil if the call is determined to not be a // UserOp, otherwise it returns the compiled op or the encountered error. -func (a *analyzer) maybeConvertUserOp(call *ast.Call, seq dag.Seq) (dag.Seq, error) { +func (a *analyzer) maybeConvertUserOp(call *ast.Call) dag.Seq { decl, err := a.scope.lookupOp(call.Name) - if decl == nil || err != nil { - return nil, nil + if decl == nil { + return nil + } + if err != nil { + a.error(call, err) + return dag.Seq{badOp()} + } + if decl.bad { + return dag.Seq{badOp()} } if call.Where != nil { - return nil, fmt.Errorf("%s(): user defined operators cannot have a where clause", call.Name) + a.error(call, errors.New("user defined operators cannot have a where clause")) + return dag.Seq{badOp()} } params, args := decl.ast.Params, call.Args if len(params) != len(args) { - return nil, fmt.Errorf("%s(): %d arg%s provided when operator expects %d arg%s", call.Name, len(params), plural.Slice(params, "s"), len(args), plural.Slice(args, "s")) + a.error(call, fmt.Errorf("%d arg%s provided when operator expects %d arg%s", len(params), plural.Slice(params, "s"), len(args), plural.Slice(args, "s"))) + return dag.Seq{badOp()} } exprs := make([]dag.Expr, len(decl.ast.Params)) for i, arg := range args { - e, err := a.semExpr(arg) - if err != nil { - return nil, err - } + e := a.semExpr(arg) // Transform non-path arguments into literals. if _, ok := e.(*dag.This); !ok { val, err := kernel.EvalAtCompileTime(a.zctx, e) if err != nil { - return nil, err + a.error(arg, err) + exprs[i] = badExpr() + continue } if val.IsError() { if val.IsMissing() { - return nil, fmt.Errorf("%q: non-path arguments cannot have variable dependency", decl.ast.Params[i]) + a.error(arg, errors.New("non-path arguments cannot have variable dependency")) } else { - return nil, fmt.Errorf("%q: %q", decl.ast.Params[i], string(val.Bytes())) + a.error(arg, errors.New(string(val.Bytes()))) } } e = &dag.Literal{ @@ -1212,7 +1097,8 @@ func (a *analyzer) maybeConvertUserOp(call *ast.Call, seq dag.Seq) (dag.Seq, err exprs[i] = e } if slices.Contains(a.opStack, decl.ast) { - return nil, opCycleError(append(a.opStack, decl.ast)) + a.error(call, opCycleError(append(a.opStack, decl.ast))) + return dag.Seq{badOp()} } a.opStack = append(a.opStack, decl.ast) oldscope := a.scope @@ -1223,7 +1109,8 @@ func (a *analyzer) maybeConvertUserOp(call *ast.Call, seq dag.Seq) (dag.Seq, err }() for i, p := range params { if err := a.scope.DefineAs(p, exprs[i]); err != nil { - return nil, err + a.error(call, err) + return dag.Seq{badOp()} } } return a.semSeq(decl.ast.Body) diff --git a/compiler/ztests/badshaper.yaml b/compiler/ztests/badshaper.yaml index 91745b9e49..e70adf50e7 100644 --- a/compiler/ztests/badshaper.yaml +++ b/compiler/ztests/badshaper.yaml @@ -13,4 +13,6 @@ inputs: outputs: - name: stderr data: | - no such type name: "null" + no such type name: "null" (badshaper.zed: line 1, column 10): + type foo={_path:string,testfield:"null"} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/ztests/const-source.yaml b/compiler/ztests/const-source.yaml index 661433b912..0f6b819e3e 100644 --- a/compiler/ztests/const-source.yaml +++ b/compiler/ztests/const-source.yaml @@ -33,6 +33,12 @@ outputs: ) - name: stderr data: | - invalid pool name: POOL: string value required - invalid file path: FILE: string value required - invalid file path: URL: string value required + invalid pool name: POOL: string value required (line 1, column 24): + const POOL = 3.14 from POOL + ~~~~ + FILE: string value required (line 1, column 29): + const FILE = 127.0.0.1 file FILE + ~~~~ + URL: string value required (line 1, column 22): + const URL = true get URL + ~~~ diff --git a/compiler/ztests/from-error.yaml b/compiler/ztests/from-error.yaml index c40e85b392..0ca5d7d787 100644 --- a/compiler/ztests/from-error.yaml +++ b/compiler/ztests/from-error.yaml @@ -15,12 +15,22 @@ script: | outputs: - name: stderr data: | - semantic analyzer: from pool cannot be used without a lake + from pool cannot be used without a lake (line 1, column 1): + from p + ~~~~~~ === - test: pool not found + test: pool not found (line 1, column 6): + from test + ~~~~ === - test*: pool matching glob not found + test*: pool matching glob not found (line 1, column 6): + from test* + ~~~~~ === - test: pool matching regexp not found + test: pool matching regexp not found (line 1, column 6): + from /test/ + ~~~~~~ === - => not allowed after pool pattern in 'from' operator + => not allowed after pool pattern in 'from' operator (line 1, column 7): + from (pool * => count()) + ~~~~~~~~~~~~~~~~~ diff --git a/compiler/ztests/head.yaml b/compiler/ztests/head.yaml index a6406b03c5..e8bcc39ebe 100644 --- a/compiler/ztests/head.yaml +++ b/compiler/ztests/head.yaml @@ -20,6 +20,12 @@ outputs: ) - name: stderr data: | - head: expression value is not a positive integer: 1. - head: expression value is not a positive integer: "1" - head: expression value is not a positive integer: error("missing") + expression value must be an integer value: 1. (line 1, column 6): + head 1. + ~~ + expression value must be an integer value: "1" (line 1, column 7): + head "1" + ~ + expression value must be an integer value: error("missing") (line 1, column 6): + head x + ~ diff --git a/compiler/ztests/summarize-lhs-error.yaml b/compiler/ztests/summarize-lhs-error.yaml index 1f9e865c1d..cdbee3f0b0 100644 --- a/compiler/ztests/summarize-lhs-error.yaml +++ b/compiler/ztests/summarize-lhs-error.yaml @@ -6,6 +6,12 @@ script: | outputs: - name: stderr data: | - summarize: key output field must be static - summarize: aggregate output field must be static - summarize: aggregate output field must be static + assignment key must be static (line 1, column 12): + count() by this[a] := key + ~~~~~~~ + assignment key must be static (line 1, column 1): + this[a] := count() by key + ~~~~~~~ + aggregate output field must be static (line 1, column 1): + this[a] := count() + ~~~~~~~ diff --git a/compiler/ztests/tail.yaml b/compiler/ztests/tail.yaml index b4da9ef4a7..8b621b1816 100644 --- a/compiler/ztests/tail.yaml +++ b/compiler/ztests/tail.yaml @@ -20,6 +20,12 @@ outputs: ) - name: stderr data: | - tail: expression value is not a positive integer: 1. - tail: expression value is not a positive integer: "1" - tail: expression value is not a positive integer: error("missing") + expression value must be an integer value: 1. (line 1, column 6): + tail 1. + ~~ + expression value must be an integer value: "1" (line 1, column 7): + tail "1" + ~ + expression value must be an integer value: error("missing") (line 1, column 6): + tail x + ~ diff --git a/compiler/ztests/udf-err.yaml b/compiler/ztests/udf-err.yaml index a70ec602bd..41771d3a56 100644 --- a/compiler/ztests/udf-err.yaml +++ b/compiler/ztests/udf-err.yaml @@ -21,6 +21,12 @@ inputs: outputs: - name: stderr data: | - symbol "dup" redefined - notAFunc(): definition is not a function type: *dag.Literal - f(): expects 2 argument(s) + symbol "dup" redefined (duplicate.zed: line 2, column 1): + func dup(n): (n+2) + ~~~~~~~~~~~~~~~~~ + definition is not a function type: *dag.Literal (call-non-func.zed: line 2, column 7): + yield notAFunc(this) + ~~~~~~~~~~~~~~ + call expects 2 argument(s) (wrong-args.zed: line 2, column 7): + yield f(this) + ~~~~~~~ diff --git a/compiler/ztests/where-on-func.yaml b/compiler/ztests/where-on-func.yaml index 21ae7245d6..6866bb9ad6 100644 --- a/compiler/ztests/where-on-func.yaml +++ b/compiler/ztests/where-on-func.yaml @@ -1,3 +1,3 @@ zed: cut hex := hex(this) where this % 2 == 0 -errorRE: "'where' clause on non-aggregation function: hex" +errorRE: "'where' clause on non-aggregation function" diff --git a/docs/language/operators/rename.md b/docs/language/operators/rename.md index 094359ed54..c9acd3d363 100644 --- a/docs/language/operators/rename.md +++ b/docs/language/operators/rename.md @@ -49,7 +49,9 @@ echo '{a:1,r:{b:2,c:3}}' | zq -z 'rename w:=r.b' - ``` => ```mdtest-output -rename: left-hand side and right-hand side must have the same depth (w vs r.b) +left-hand side and right-hand side must have the same depth (w vs r.b) (line 1, column 8): +rename w:=r.b + ~~~~~~ ``` _Record literals can be used instead of rename for mutation_ ```mdtest-command diff --git a/docs/language/statements.md b/docs/language/statements.md index ed58f90a38..f2f56676d8 100644 --- a/docs/language/statements.md +++ b/docs/language/statements.md @@ -158,7 +158,9 @@ echo '{greeting: "hi"}' | zq -z -I params.zed 'AddMessage("message", "hello")' - ``` which produces ```mdtest-output -illegal left-hand side of assignment +illegal left-hand side of assignment (params.zed: line 2, column 3): + field_for_message:=msg + ~~~~~~~~~~~~~~~~~~~~~~ ``` A constant value must be used to pass a parameter that will be referenced as diff --git a/docs/tutorials/schools.md b/docs/tutorials/schools.md index dfef58d0e7..1b8e1297d0 100644 --- a/docs/tutorials/schools.md +++ b/docs/tutorials/schools.md @@ -843,7 +843,9 @@ zq -Z 'rename toplevel:=outer.inner' nested.zson ``` produces this compile-time error message and the query is not run: ```mdtest-output -rename: left-hand side and right-hand side must have the same depth (toplevel vs outer.inner) +left-hand side and right-hand side must have the same depth (toplevel vs outer.inner) (line 1, column 8): +rename toplevel:=outer.inner + ~~~~~~~~~~~~~~~~~~~~~ ``` This goal could instead be achieved by combining [`put`](#44-put) and [`drop`](#42-drop), e.g., diff --git a/lake/ztests/dirs.yaml b/lake/ztests/dirs.yaml index 7005fd147f..6fb495095e 100644 --- a/lake/ztests/dirs.yaml +++ b/lake/ztests/dirs.yaml @@ -17,4 +17,6 @@ outputs: {min:2020-04-21T22:40:30.06852324Z,max:2020-04-22T01:23:40.0622373Z} - name: stderr data: | - logs: pool not found + logs: pool not found (line 1, column 6): + from logs@main:objects + ~~~~ diff --git a/lake/ztests/drop.yaml b/lake/ztests/drop.yaml index 18667a7e91..db4ffa6a84 100644 --- a/lake/ztests/drop.yaml +++ b/lake/ztests/drop.yaml @@ -12,4 +12,6 @@ inputs: outputs: - name: stderr data: | - logs: pool not found + logs: pool not found (line 1, column 6): + from 'logs'@'main':log + ~~~~~~ diff --git a/lake/ztests/query-error.yaml b/lake/ztests/query-error.yaml index d140c166df..01c7517f17 100644 --- a/lake/ztests/query-error.yaml +++ b/lake/ztests/query-error.yaml @@ -10,6 +10,12 @@ outputs: - name: stderr data: | query must include a 'from' operator - cannot scan from unknown HEAD - unknown lake metadata type "unknownmeta" in from operator - doesnotexist: pool not found + cannot scan from unknown HEAD (line 1, column 6): + from HEAD + ~~~~ + unknown lake metadata type "unknownmeta" in from operator (line 1, column 1): + from :unknownmeta + ~~~~~~~~~~~~~~~~~ + doesnotexist: pool not found (line 1, column 6): + from doesnotexist + ~~~~~~~~~~~~ diff --git a/runtime/sam/expr/ztests/cut-dup-fields.yaml b/runtime/sam/expr/ztests/cut-dup-fields.yaml index 4ffd78207c..5cf99c92a6 100644 --- a/runtime/sam/expr/ztests/cut-dup-fields.yaml +++ b/runtime/sam/expr/ztests/cut-dup-fields.yaml @@ -12,7 +12,15 @@ inputs: outputs: - name: stderr data: | - cut: duplicate field: "rec" - cut: duplicate field: "rec.sub1" - cut: duplicate field: "rec.sub.sub" - cut: duplicate field: "rec.sub" + duplicate field: "rec" (line 1, column 5): + cut rec,other,rec + ~~~~~~~~~~~~~ + duplicate field: "rec.sub1" (line 1, column 5): + cut rec.sub1,rec.sub1 + ~~~~~~~~~~~~~~~~~ + duplicate field: "rec.sub.sub" (line 1, column 5): + cut rec.sub,rec.sub.sub + ~~~~~~~~~~~~~~~~~~~ + duplicate field: "rec.sub" (line 1, column 5): + cut rec.sub.sub,rec.sub + ~~~~~~~~~~~~~~~~~~~ diff --git a/runtime/sam/expr/ztests/cut-not-adjacent.yaml b/runtime/sam/expr/ztests/cut-not-adjacent.yaml index de13172053..4bb9f17eb2 100644 --- a/runtime/sam/expr/ztests/cut-not-adjacent.yaml +++ b/runtime/sam/expr/ztests/cut-not-adjacent.yaml @@ -12,7 +12,15 @@ inputs: outputs: - name: stderr data: | - cut: fields in record rec must be adjacent - cut: fields in record rec1 must be adjacent - cut: fields in record rec1 must be adjacent - cut: fields in record t.rec must be adjacent + fields in record rec must be adjacent (line 1, column 5): + cut rec.sub1,other,rec.sub2 + ~~~~~~~~~~~~~~~~~~~~~~~ + fields in record rec1 must be adjacent (line 1, column 5): + cut rec1.rec2.sub1,other,rec1.sub2 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + fields in record rec1 must be adjacent (line 1, column 5): + cut rec1.rec2.sub1,other,rec1.rec2.sub2 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + fields in record t.rec must be adjacent (line 1, column 5): + cut t.rec.sub1,t.other,t.rec.sub2 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/runtime/sam/expr/ztests/rename-error-move.yaml b/runtime/sam/expr/ztests/rename-error-move.yaml index 4aa70ad092..08b5d973fa 100644 --- a/runtime/sam/expr/ztests/rename-error-move.yaml +++ b/runtime/sam/expr/ztests/rename-error-move.yaml @@ -3,4 +3,4 @@ zed: rename dst:=id.resp_h input: | {id:{orig_h:10.164.94.120,orig_p:39681(port=uint16),resp_h:10.47.3.155,resp_p:3389(port)}} -errorRE: "rename: left-hand side and right-hand side must have the same depth \\(dst vs id.resp_h\\)" +errorRE: "left-hand side and right-hand side must have the same depth \\(dst vs id.resp_h\\)" diff --git a/runtime/sam/op/ztests/cut-dynamic-field.yaml b/runtime/sam/op/ztests/cut-dynamic-field.yaml index 22f43a2eb5..38c86319f0 100644 --- a/runtime/sam/op/ztests/cut-dynamic-field.yaml +++ b/runtime/sam/op/ztests/cut-dynamic-field.yaml @@ -43,4 +43,6 @@ outputs: error({message:"cut: missing",on:{}}) - name: stderr data: | - left-hand side of assignment: symbol "foo" is not bound to an expression + symbol "foo" is not bound to an expression (line 1, column 38): + op foo(): ( yield "error" ) cut this[foo] := "hello world" + ~~~ diff --git a/runtime/sam/op/ztests/put-dynamic-field.yaml b/runtime/sam/op/ztests/put-dynamic-field.yaml index 7566f09213..dcd6a12410 100644 --- a/runtime/sam/op/ztests/put-dynamic-field.yaml +++ b/runtime/sam/op/ztests/put-dynamic-field.yaml @@ -38,4 +38,6 @@ outputs: error({message:"put: missing",on:{}}) - name: stderr data: | - left-hand side of assignment: symbol "foo" is not bound to an expression + symbol "foo" is not bound to an expression (line 1, column 38): + op foo(): ( yield "error" ) put this[foo] := "hello world" + ~~~ diff --git a/runtime/sam/op/ztests/user-errors.yaml b/runtime/sam/op/ztests/user-errors.yaml index 1a951ba938..701b9977cd 100644 --- a/runtime/sam/op/ztests/user-errors.yaml +++ b/runtime/sam/op/ztests/user-errors.yaml @@ -8,7 +8,7 @@ inputs: op test(a, a): ( pass ) - op("a", "b") + test("a", "b") - name: error-const-lhs.zed data: | op test(a): ( @@ -19,5 +19,9 @@ inputs: outputs: - name: stderr data: | - test: duplicate parameter "a" - illegal left-hand side of assignment + duplicate parameter "a" (line 2, column 1): + op test(a, a): ( + ~~~~~~~~~~~~~~~~ + illegal left-hand side of assignment (line 3, column 3): + a := a + ~~~~~~ diff --git a/service/ztests/curl-query-error.yaml b/service/ztests/curl-query-error.yaml index 375fcd9ede..e8823352f2 100644 --- a/service/ztests/curl-query-error.yaml +++ b/service/ztests/curl-query-error.yaml @@ -13,13 +13,13 @@ inputs: outputs: - name: stdout data: | - {"type":"Error","kind":"invalid operation","error":"pool name missing"} + {"type":"Error","kind":"invalid operation","error":"no pool name given"} code 400 - {"type":"Error","kind":"invalid operation","error":"pool name missing"} + {"type":"Error","kind":"invalid operation","error":"no pool name given"} code 400 - {"type":"Error","kind":"invalid operation","error":"pool name missing"} + {"type":"Error","kind":"invalid operation","error":"pool name missing (line 1, column 1):\nfrom HEAD\n~~~~~~~~~","info":[{"kind":"LocalizedError","filename":"","line":"from HEAD","open":{"pos":0,"offset":0,"line":1,"column":1},"close":{"pos":9,"offset":9,"line":1,"column":10},"error":"pool name missing"}]} code 400 - {"type":"Error","kind":"invalid operation","error":"unknown lake metadata type \"unknownmeta\" in from operator"} + {"type":"Error","kind":"invalid operation","error":"unknown lake metadata type \"unknownmeta\" in from operator (line 1, column 1):\nfrom :unknownmeta\n~~~~~~~~~~~~~~~~~","info":[{"kind":"LocalizedError","filename":"","line":"from :unknownmeta","open":{"pos":0,"offset":0,"line":1,"column":1},"close":{"pos":17,"offset":17,"line":1,"column":18},"error":"unknown lake metadata type \"unknownmeta\" in from operator"}]} + code 400 + {"type":"Error","kind":"invalid operation","error":"doesnotexist: pool not found (line 1, column 6):\nfrom doesnotexist\n ~~~~~~~~~~~~","info":[{"kind":"LocalizedError","filename":"","line":"from doesnotexist","open":{"pos":5,"offset":5,"line":1,"column":6},"close":{"pos":17,"offset":17,"line":1,"column":18},"error":"doesnotexist: pool not found"}]} code 400 - {"type":"Error","kind":"item does not exist","error":"doesnotexist: pool not found"} - code 404 diff --git a/service/ztests/python.yaml b/service/ztests/python.yaml index c897028ddb..79fd287d01 100644 --- a/service/ztests/python.yaml +++ b/service/ztests/python.yaml @@ -161,4 +161,4 @@ outputs: === RequestError('test: pool already exists') === - RequestError('nosuchpool: pool not found') + RequestError('nosuchpool: pool not found (line 1, column 6):\nfrom nosuchpool\n ~~~~~~~~~~') diff --git a/service/ztests/query-bad-commit.yaml b/service/ztests/query-bad-commit.yaml index c375e795cc..81c0388a62 100644 --- a/service/ztests/query-bad-commit.yaml +++ b/service/ztests/query-bad-commit.yaml @@ -10,4 +10,6 @@ inputs: outputs: - name: stderr data: | - status code 404: "doesnotexist": branch not found + "doesnotexist": branch not found (line 1, column 1): + from test@doesnotexist + ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/service/ztests/query-error.yaml b/service/ztests/query-error.yaml index ae74877fbb..73b306297e 100644 --- a/service/ztests/query-error.yaml +++ b/service/ztests/query-error.yaml @@ -12,7 +12,14 @@ inputs: outputs: - name: stderr data: | - status code 400: pool name missing - status code 400: pool name missing - status code 400: unknown lake metadata type "unknownmeta" in from operator - status code 404: doesnotexist: pool not found + status code 400: no pool name given + pool name missing (line 1, column 1): + from HEAD + ~~~~~~~~~ + unknown lake metadata type "unknownmeta" in from operator (line 1, column 1): + from :unknownmeta + ~~~~~~~~~~~~~~~~~ + doesnotexist: pool not found (line 1, column 6): + from doesnotexist + ~~~~~~~~~~~~ +