From 280ebf1df1ec813808b1f9efba8bbd34429ebdf9 Mon Sep 17 00:00:00 2001 From: Matthew Nibecker Date: Wed, 8 May 2024 10:20:30 -0700 Subject: [PATCH] Add positional information to semantic errors This commit changes errors produced from semantic analysis so they include a reference to the AST node which produced the error. The allows for highlighting the specific portion of the source that caused a semantic error. This commit also changes how semantic errors are collected in that the semantic analyzer no longer exits as soon as the first error is found and instead goes on the analyze the rest of the AST, reporting all semantic errors in the AST. --- cli/queryflags/flags.go | 16 +- cli/zq/command.go | 4 +- cmd/zed/dev/compile/command.go | 7 +- cmd/zed/log/command.go | 4 + compiler/ast/ast.go | 55 +- compiler/ast/dag/expr.go | 6 + compiler/ast/dag/op.go | 6 + compiler/ast/dag/unpack.go | 2 + compiler/job.go | 19 +- compiler/parser/parser_test.go | 2 +- compiler/semantic/analyzer.go | 38 +- compiler/semantic/expr.go | 437 +++++------ compiler/semantic/op.go | 694 ++++++++---------- compiler/ztests/badshaper.yaml | 4 +- compiler/ztests/const-source.yaml | 12 +- compiler/ztests/from-error.yaml | 20 +- compiler/ztests/head.yaml | 12 +- compiler/ztests/summarize-lhs-error.yaml | 12 +- compiler/ztests/tail.yaml | 12 +- compiler/ztests/udf-err.yaml | 12 +- compiler/ztests/where-on-func.yaml | 2 +- docs/language/operators/rename.md | 4 +- docs/language/statements.md | 4 +- docs/tutorials/schools.md | 4 +- fuzz/fuzz.go | 4 +- lake/api/local.go | 13 +- lake/ztests/dirs.yaml | 4 +- lake/ztests/query-error.yaml | 12 +- runtime/compiler.go | 13 +- runtime/sam/expr/filter_test.go | 4 +- runtime/sam/expr/ztests/cut-dup-fields.yaml | 16 +- runtime/sam/expr/ztests/cut-not-adjacent.yaml | 16 +- .../sam/expr/ztests/rename-error-move.yaml | 2 +- runtime/sam/op/groupby/groupby_test.go | 2 +- runtime/sam/op/ztests/cut-dynamic-field.yaml | 4 +- runtime/sam/op/ztests/put-dynamic-field.yaml | 4 +- runtime/sam/op/ztests/user-errors.yaml | 10 +- service/handlers.go | 17 +- service/ztests/curl-query-error.yaml | 12 +- service/ztests/python.yaml | 2 +- service/ztests/query-bad-commit.yaml | 4 +- service/ztests/query-error.yaml | 15 +- zed_test.go | 2 +- ztest/ztest.go | 4 +- 44 files changed, 740 insertions(+), 808 deletions(-) diff --git a/cli/queryflags/flags.go b/cli/queryflags/flags.go index cde18ff72f..f1295f5d41 100644 --- a/cli/queryflags/flags.go +++ b/cli/queryflags/flags.go @@ -32,7 +32,7 @@ func (f *Flags) SetFlags(fs *flag.FlagSet) { fs.Var(&f.Includes, "I", "source file containing Zed query text (may be used multiple times)") } -func (f *Flags) ParseSourcesAndInputs(paths []string) ([]string, ast.Seq, bool, error) { +func (f *Flags) ParseSourcesAndInputs(paths []string) ([]string, ast.Seq, *parser.SourceSet, bool, error) { var src string if len(paths) != 0 && !cli.FileExists(paths[0]) && !isURLWithKnownScheme(paths[0], "http", "https", "s3") { src = paths[0] @@ -41,25 +41,25 @@ func (f *Flags) ParseSourcesAndInputs(paths []string) ([]string, ast.Seq, bool, // Consider a lone argument to be a query if it compiles // and appears to start with a from or yield operator. // Otherwise, consider it a file. - query, err := compiler.Parse(src, f.Includes...) + query, sset, err := compiler.Parse(src, f.Includes...) if err == nil { if s, err := semantic.Analyze(context.Background(), query, data.NewSource(storage.NewLocalEngine(), nil), nil); err == nil { if semantic.HasSource(s) { - return nil, query, false, nil + return nil, query, sset, false, nil } if semantic.StartsWithYield(s) { - return nil, query, true, nil + return nil, query, sset, true, nil } } } - return nil, nil, false, singleArgError(src, err) + return nil, nil, nil, false, singleArgError(src, err) } } - query, err := compiler.Parse(src, f.Includes...) + query, sset, err := parser.ParseZed(f.Includes, src) if err != nil { - return nil, nil, false, err + return nil, nil, nil, false, err } - return paths, query, false, nil + return paths, query, sset, false, nil } func isURLWithKnownScheme(path string, schemes ...string) bool { diff --git a/cli/zq/command.go b/cli/zq/command.go index bf0e6f7661..ab22100229 100644 --- a/cli/zq/command.go +++ b/cli/zq/command.go @@ -129,7 +129,7 @@ func (c *Command) Run(args []string) error { // Prevent ParseSourcesAndInputs from treating args[0] as a path. args = append(args, "-") } - paths, flowgraph, null, err := c.queryFlags.ParseSourcesAndInputs(args) + paths, flowgraph, sset, null, err := c.queryFlags.ParseSourcesAndInputs(args) if err != nil { return fmt.Errorf("zq: %w", err) } @@ -154,7 +154,7 @@ func (c *Command) Run(args []string) error { return err } comp := compiler.NewFileSystemCompiler(local) - query, err := runtime.CompileQuery(ctx, zctx, comp, flowgraph, readers) + query, err := runtime.CompileQuery(ctx, zctx, comp, flowgraph, sset, readers) if err != nil { return err } diff --git a/cmd/zed/dev/compile/command.go b/cmd/zed/dev/compile/command.go index 6fd85e5dec..02f0279b78 100644 --- a/cmd/zed/dev/compile/command.go +++ b/cmd/zed/dev/compile/command.go @@ -12,6 +12,7 @@ import ( "github.com/brimdata/zed/compiler/ast" "github.com/brimdata/zed/compiler/ast/dag" "github.com/brimdata/zed/compiler/data" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/lake" "github.com/brimdata/zed/pkg/charm" "github.com/brimdata/zed/runtime" @@ -130,7 +131,7 @@ func (c *Command) header(msg string) { } func (c *Command) parse(z string, lk *lake.Root) error { - seq, err := compiler.Parse(z, c.includes...) + seq, sset, err := compiler.Parse(z, c.includes...) if err != nil { return err } @@ -143,6 +144,7 @@ func (c *Command) parse(z string, lk *lake.Root) error { fmt.Println(normalize(b)) } if c.proc { + c.header("proc") c.writeAST(seq) } @@ -151,6 +153,9 @@ func (c *Command) parse(z string, lk *lake.Root) error { } runtime, err := compiler.NewJob(runtime.DefaultContext(), seq, data.NewSource(nil, lk), nil) if err != nil { + if list, ok := err.(parser.ErrorList); ok { + list.SetSourceSet(sset) + } return err } if c.semantic { diff --git a/cmd/zed/log/command.go b/cmd/zed/log/command.go index b396df3073..d4390e990e 100644 --- a/cmd/zed/log/command.go +++ b/cmd/zed/log/command.go @@ -7,6 +7,7 @@ import ( "github.com/brimdata/zed/cli/outputflags" "github.com/brimdata/zed/cli/poolflags" "github.com/brimdata/zed/cmd/zed/root" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/pkg/charm" "github.com/brimdata/zed/pkg/storage" "github.com/brimdata/zed/zio" @@ -70,6 +71,9 @@ func (c *Command) Run(args []string) error { defer w.Close() q, err := lake.Query(ctx, nil, query) if err != nil { + if list := (parser.ErrorList)(nil); errors.As(err, &list) && len(list) == 1 { + return errors.New(list[0].Msg) + } return err } defer q.Close() diff --git a/compiler/ast/ast.go b/compiler/ast/ast.go index 8b24aa1730..d08b73abae 100644 --- a/compiler/ast/ast.go +++ b/compiler/ast/ast.go @@ -110,7 +110,7 @@ func (c *Call) End() int { if c.Where != nil { return c.Where.End() } - return c.Rparen + return c.Rparen + 1 } type Cast struct { @@ -180,7 +180,7 @@ type Regexp struct { } func (r *Regexp) Pos() int { return r.PatternPos } -func (r *Regexp) End() int { return r.PatternPos + len(r.Pattern) } +func (r *Regexp) End() int { return r.PatternPos + len(r.Pattern) + 2 } type String struct { Kind string `json:"kind" unpack:""` @@ -432,9 +432,9 @@ type ( NullsFirst bool `json:"nullsfirst"` } Cut struct { - Kind string `json:"kind" unpack:""` - KeywordPos int `json:"keyword_pos"` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + KeywordPos int `json:"keyword_pos"` + Args Assignments `json:"args"` } Drop struct { Kind string `json:"kind" unpack:""` @@ -471,10 +471,10 @@ type ( Kind string `json:"kind" unpack:""` // StartPos is not called KeywordPos for Summarize since the "summarize" // keyword is optional. - StartPos int `json:"start_pos"` - Limit int `json:"limit"` - Keys []Assignment `json:"keys"` - Aggs []Assignment `json:"aggs"` + StartPos int `json:"start_pos"` + Limit int `json:"limit"` + Keys Assignments `json:"keys"` + Aggs Assignments `json:"aggs"` } Top struct { Kind string `json:"kind" unpack:""` @@ -484,9 +484,9 @@ type ( Flush bool `json:"flush"` } Put struct { - Kind string `json:"kind" unpack:""` - KeywordPos int `json:"keyword_pos"` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + KeywordPos int `json:"keyword_pos"` + Args Assignments `json:"args"` } Merge struct { Kind string `json:"kind" unpack:""` @@ -520,8 +520,8 @@ type ( // is unknown: It could be a Summarize or Put operator. This will be // determined in the semantic phase. OpAssignment struct { - Kind string `json:"kind" unpack:""` - Assignments []Assignment `json:"assignments"` + Kind string `json:"kind" unpack:""` + Assignments Assignments `json:"assignments"` } // An OpExpr operator is an expression that appears as an operator // and requires semantic analysis to determine if it is a filter, a yield, @@ -531,22 +531,22 @@ type ( Expr Expr `json:"expr"` } Rename struct { - Kind string `json:"kind" unpack:""` - KeywordPos int `json:"keyword_pos"` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + KeywordPos int `json:"keyword_pos"` + Args Assignments `json:"args"` } Fuse struct { Kind string `json:"kind" unpack:""` KeywordPos int `json:"keyword_pos"` } Join struct { - Kind string `json:"kind" unpack:""` - KeywordPos int `json:"keyword_pos"` - Style string `json:"style"` - RightInput Seq `json:"right_input"` - LeftKey Expr `json:"left_key"` - RightKey Expr `json:"right_key"` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + KeywordPos int `json:"keyword_pos"` + Style string `json:"style"` + RightInput Seq `json:"right_input"` + LeftKey Expr `json:"left_key"` + RightKey Expr `json:"right_key"` + Args Assignments `json:"args"` } Sample struct { Kind string `json:"kind" unpack:""` @@ -678,7 +678,12 @@ func (a *Assignment) Pos() int { return a.RHS.Pos() } -func (a *Assignment) End() int { return a.RHS.Pos() } +func (a *Assignment) End() int { return a.RHS.End() } + +type Assignments []Assignment + +func (a Assignments) Pos() int { return a[0].Pos() } +func (a Assignments) End() int { return a[len(a)-1].End() } // Def is like Assignment but the LHS is an identifier that may be later // referenced. This is used for const blocks in Sequential and var blocks diff --git a/compiler/ast/dag/expr.go b/compiler/ast/dag/expr.go index e36de6c1e0..2a5cac887c 100644 --- a/compiler/ast/dag/expr.go +++ b/compiler/ast/dag/expr.go @@ -30,6 +30,11 @@ type ( LHS Expr `json:"lhs"` RHS Expr `json:"rhs"` } + // A BadExpr node is a placeholder for an expression containing semantic + // errors. + BadExpr struct { + Kind string `json:"kind" unpack:""` + } BinaryExpr struct { Kind string `json:"kind" unpack:""` Op string `json:"op"` @@ -131,6 +136,7 @@ type ( func (*Agg) ExprDAG() {} func (*ArrayExpr) ExprDAG() {} func (*Assignment) ExprDAG() {} +func (*BadExpr) ExprDAG() {} func (*BinaryExpr) ExprDAG() {} func (*Call) ExprDAG() {} func (*Conditional) ExprDAG() {} diff --git a/compiler/ast/dag/op.go b/compiler/ast/dag/op.go index 93cce1c08f..a1d2282ab1 100644 --- a/compiler/ast/dag/op.go +++ b/compiler/ast/dag/op.go @@ -26,6 +26,11 @@ type Seq []Op // Ops type ( + // A BadOp node is a placeholder for an expression containing semantic + // errors. + BadOp struct { + Kind string `json:"kind" unpack:""` + } Combine struct { Kind string `json:"kind" unpack:""` } @@ -281,6 +286,7 @@ type ( } ) +func (*BadOp) OpNode() {} func (*Fork) OpNode() {} func (*Scatter) OpNode() {} func (*Switch) OpNode() {} diff --git a/compiler/ast/dag/unpack.go b/compiler/ast/dag/unpack.go index 89d70b2f4e..f7bed32a64 100644 --- a/compiler/ast/dag/unpack.go +++ b/compiler/ast/dag/unpack.go @@ -10,6 +10,8 @@ var unpacker = unpack.New( Agg{}, ArrayExpr{}, Assignment{}, + BadOp{}, + BadExpr{}, BinaryExpr{}, Call{}, Combine{}, diff --git a/compiler/job.go b/compiler/job.go index 68fd9a2c27..8d9b3a87a4 100644 --- a/compiler/job.go +++ b/compiler/job.go @@ -88,14 +88,13 @@ func (j *Job) Parallelize(n int) error { return err } -func Parse(src string, filenames ...string) (ast.Seq, error) { - seq, _, err := parser.ParseZed(filenames, src) - return seq, err +func Parse(src string, filenames ...string) (ast.Seq, *parser.SourceSet, error) { + return parser.ParseZed(filenames, src) } // MustParse is like Parse but panics if an error is encountered. func MustParse(query string) ast.Seq { - seq, err := (*anyCompiler)(nil).Parse(query) + seq, _, err := (*anyCompiler)(nil).Parse(query) if err != nil { panic(err) } @@ -136,7 +135,7 @@ type anyCompiler struct{} // Parse concatenates the source files in filenames followed by src and parses // the resulting program. -func (*anyCompiler) Parse(src string, filenames ...string) (ast.Seq, error) { +func (*anyCompiler) Parse(src string, filenames ...string) (ast.Seq, *parser.SourceSet, error) { return Parse(src, filenames...) } @@ -145,13 +144,16 @@ func (*anyCompiler) Parse(src string, filenames ...string) (ast.Seq, error) { // nor does it compute the demand of the query to prune the projection // from the vcache. func VectorCompile(rctx *runtime.Context, query string, object *vcache.Object) (zbuf.Puller, error) { - seq, err := Parse(query) + seq, sset, err := Parse(query) if err != nil { return nil, err } src := &data.Source{} entry, err := semantic.Analyze(rctx.Context, seq, src, nil) if err != nil { + if list := (parser.ErrorList)(nil); errors.As(err, &list) { + list.SetSourceSet(sset) + } return nil, err } puller := vam.NewVectorProjection(rctx.Zctx, object, nil) //XXX project all @@ -177,12 +179,15 @@ func VectorFilterCompile(rctx *runtime.Context, query string, src *data.Source, if err != nil { return nil, err } - seq, err := Parse(query) + seq, sset, err := Parse(query) if err != nil { return nil, err } entry, err := semantic.Analyze(rctx.Context, seq, src, nil) if err != nil { + if list := (parser.ErrorList)(nil); errors.As(err, &list) { + list.SetSourceSet(sset) + } return nil, err } if len(entry) != 1 { diff --git a/compiler/parser/parser_test.go b/compiler/parser/parser_test.go index 25c3bc7a51..9a3bcde7c7 100644 --- a/compiler/parser/parser_test.go +++ b/compiler/parser/parser_test.go @@ -40,7 +40,7 @@ func searchForZed() ([]string, error) { } func parseOp(z string) ([]byte, error) { - o, err := compiler.Parse(z) + o, _, err := compiler.Parse(z) if err != nil { return nil, err } diff --git a/compiler/semantic/analyzer.go b/compiler/semantic/analyzer.go index cfaf292566..81dadeca22 100644 --- a/compiler/semantic/analyzer.go +++ b/compiler/semantic/analyzer.go @@ -7,6 +7,7 @@ import ( "github.com/brimdata/zed/compiler/ast" "github.com/brimdata/zed/compiler/ast/dag" "github.com/brimdata/zed/compiler/data" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/lakeparse" ) @@ -15,9 +16,9 @@ import ( // After semantic analysis, the DAG is ready for either optimization or compilation. func Analyze(ctx context.Context, seq ast.Seq, source *data.Source, head *lakeparse.Commitish) (dag.Seq, error) { a := newAnalyzer(ctx, source, head) - s, err := a.semSeq(seq) - if err != nil { - return nil, err + s := a.semSeq(seq) + if a.errors != nil { + return nil, a.errors } return s, nil } @@ -26,12 +27,12 @@ func Analyze(ctx context.Context, seq ast.Seq, source *data.Source, head *lakepa // DAG does not have one. func AnalyzeAddSource(ctx context.Context, seq ast.Seq, source *data.Source, head *lakeparse.Commitish) (dag.Seq, error) { a := newAnalyzer(ctx, source, head) - s, err := a.semSeq(seq) - if err != nil { - return nil, err + s := a.semSeq(seq) + if a.errors != nil { + return nil, a.errors } if !HasSource(s) { - if err = a.addDefaultSource(&s); err != nil { + if err := a.addDefaultSource(&s); err != nil { return nil, err } } @@ -40,6 +41,7 @@ func AnalyzeAddSource(ctx context.Context, seq ast.Seq, source *data.Source, hea type analyzer struct { ctx context.Context + errors parser.ErrorList head *lakeparse.Commitish opStack []*ast.OpDecl source *data.Source @@ -80,6 +82,10 @@ func (a *analyzer) addDefaultSource(seq *dag.Seq) error { seq.Prepend(&dag.DefaultScan{Kind: "DefaultScan"}) return nil } + // Verify pool exists for HEAD + if _, err := a.source.PoolID(a.ctx, a.head.Pool); err != nil { + return err + } pool := &ast.Pool{ Kind: "Pool", Spec: ast.PoolSpec{ @@ -89,10 +95,7 @@ func (a *analyzer) addDefaultSource(seq *dag.Seq) error { }, }, } - ops, err := a.semPool(pool) - if err != nil { - return err - } + ops := a.semPool(pool) seq.Prepend(ops[0]) return nil } @@ -118,6 +121,7 @@ func (a *analyzer) exitScope() { type opDecl struct { ast *ast.OpDecl scope *Scope // parent scope of op declaration. + bad bool } type opCycleError []*ast.OpDecl @@ -132,3 +136,15 @@ func (e opCycleError) Error() string { } return b } + +func badExpr() dag.Expr { + return &dag.BadExpr{Kind: "BadExpr"} +} + +func badOp() dag.Op { + return &dag.BadOp{Kind: "BadOp"} +} + +func (a *analyzer) error(n ast.Node, err error) { + a.errors.Append(err.Error(), n.Pos(), n.End()) +} diff --git a/compiler/semantic/expr.go b/compiler/semantic/expr.go index c7ebc05361..5bd23e48a2 100644 --- a/compiler/semantic/expr.go +++ b/compiler/semantic/expr.go @@ -16,33 +16,34 @@ import ( "github.com/brimdata/zed/zson" ) -func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { +func (a *analyzer) semExpr(e ast.Expr) dag.Expr { switch e := e.(type) { case nil: - return nil, errors.New("semantic analysis: illegal null value encountered in AST") + panic("semantic analysis: illegal null value encountered in AST") case *ast.Regexp: return &dag.RegexpSearch{ Kind: "RegexpSearch", Pattern: e.Pattern, Expr: pathOf("this"), - }, nil + } case *ast.Glob: return &dag.RegexpSearch{ Kind: "RegexpSearch", Pattern: reglob.Reglob(e.Pattern), Expr: pathOf("this"), - }, nil + } case *ast.Grep: return a.semGrep(e) case *astzed.Primitive: val, err := zson.ParsePrimitive(a.arena, e.Type, e.Text) if err != nil { - return nil, err + a.error(e, err) + return badExpr() } return &dag.Literal{ Kind: "Literal", Value: zson.FormatValue(val), - }, nil + } case *ast.ID: return a.semID(e) case *ast.Term: @@ -51,109 +52,77 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { case *astzed.Primitive: v, err := zson.ParsePrimitive(a.arena, t.Type, t.Text) if err != nil { - return nil, err + a.error(e, err) + return badExpr() } val = zson.FormatValue(v) case *astzed.TypeValue: tv, err := a.semType(t.Value) if err != nil { - return nil, err + a.error(e, err) + return badExpr() } val = "<" + tv + ">" default: - return nil, fmt.Errorf("unexpected term value: %s", e.Kind) + panic(fmt.Errorf("unexpected term value: %s", e.Kind)) } return &dag.Search{ Kind: "Search", Text: e.Text, Value: val, Expr: pathOf("this"), - }, nil - case *ast.UnaryExpr: - expr, err := a.semExpr(e.Operand) - if err != nil { - return nil, err } + case *ast.UnaryExpr: return &dag.UnaryExpr{ Kind: "UnaryExpr", Op: e.Op, - Operand: expr, - }, nil + Operand: a.semExpr(e.Operand), + } case *ast.BinaryExpr: return a.semBinary(e) case *ast.Conditional: - cond, err := a.semExpr(e.Cond) - if err != nil { - return nil, err - } - thenExpr, err := a.semExpr(e.Then) - if err != nil { - return nil, err - } - elseExpr, err := a.semExpr(e.Else) - if err != nil { - return nil, err - } + cond := a.semExpr(e.Cond) + thenExpr := a.semExpr(e.Then) + elseExpr := a.semExpr(e.Else) return &dag.Conditional{ Kind: "Conditional", Cond: cond, Then: thenExpr, Else: elseExpr, - }, nil + } case *ast.Call: return a.semCall(e) case *ast.Cast: - expr, err := a.semExpr(e.Expr) - if err != nil { - return nil, err - } - typ, err := a.semExpr(e.Type) - if err != nil { - return nil, err - } + expr := a.semExpr(e.Expr) + typ := a.semExpr(e.Type) return &dag.Call{ Kind: "Call", Name: "cast", Args: []dag.Expr{expr, typ}, - }, nil - case *ast.IndexExpr: - expr, err := a.semExpr(e.Expr) - if err != nil { - return nil, err - } - index, err := a.semExpr(e.Index) - if err != nil { - return nil, err } + case *ast.IndexExpr: + expr := a.semExpr(e.Expr) + index := a.semExpr(e.Index) // If expr is a path and index is a string, then just extend the path. if path := a.isIndexOfThis(expr, index); path != nil { - return path, nil + return path } return &dag.IndexExpr{ Kind: "IndexExpr", Expr: expr, Index: index, - }, nil - case *ast.SliceExpr: - expr, err := a.semExpr(e.Expr) - if err != nil { - return nil, err } + case *ast.SliceExpr: + expr := a.semExpr(e.Expr) // XXX Literal indices should be type checked as int. - from, err := a.semExprNullable(e.From) - if err != nil { - return nil, err - } - to, err := a.semExprNullable(e.To) - if err != nil { - return nil, err - } + from := a.semExprNullable(e.From) + to := a.semExprNullable(e.To) return &dag.SliceExpr{ Kind: "SliceExpr", Expr: expr, From: from, To: to, - }, nil + } case *astzed.TypeValue: typ, err := a.semType(e.Value) if err != nil { @@ -167,32 +136,28 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { // complaining about the type not existing. // XXX See issue #3413 if e := semDynamicType(e.Value); e != nil { - return e, nil + return e } - return nil, err + a.error(e, err) + return badExpr() } return &dag.Literal{ Kind: "Literal", Value: "<" + typ + ">", - }, nil - case *ast.Agg: - expr, err := a.semExprNullable(e.Expr) - if err != nil { - return nil, err } + case *ast.Agg: + expr := a.semExprNullable(e.Expr) if expr == nil && e.Name != "count" { - return nil, fmt.Errorf("aggregator '%s' requires argument", e.Name) - } - where, err := a.semExprNullable(e.Where) - if err != nil { - return nil, err + a.error(e, fmt.Errorf("aggregator '%s' requires argument", e.Name)) + return badExpr() } + where := a.semExprNullable(e.Where) return &dag.Agg{ Kind: "Agg", Name: e.Name, Expr: expr, Where: where, - }, nil + } case *ast.RecordExpr: fields := map[string]struct{}{} var out []dag.RecordElem @@ -200,13 +165,11 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { switch elem := elem.(type) { case *ast.Field: if _, ok := fields[elem.Name]; ok { - return nil, fmt.Errorf("record expression: %w", &zed.DuplicateFieldError{Name: elem.Name}) + a.error(elem, fmt.Errorf("record expression: %w", &zed.DuplicateFieldError{Name: elem.Name})) + continue } fields[elem.Name] = struct{}{} - e, err := a.semExpr(elem.Value) - if err != nil { - return nil, err - } + e := a.semExpr(elem.Value) out = append(out, &dag.Field{ Kind: "Field", Name: elem.Name, @@ -214,23 +177,18 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { }) case *ast.ID: if _, ok := fields[elem.Name]; ok { - return nil, fmt.Errorf("record expression: %w", &zed.DuplicateFieldError{Name: elem.Name}) + a.error(elem, fmt.Errorf("record expression: %w", &zed.DuplicateFieldError{Name: elem.Name})) + continue } fields[elem.Name] = struct{}{} - v, err := a.semID(elem) - if err != nil { - return nil, err - } + v := a.semID(elem) out = append(out, &dag.Field{ Kind: "Field", Name: elem.Name, Value: v, }) case *ast.Spread: - e, err := a.semExpr(elem.Expr) - if err != nil { - return nil, err - } + e := a.semExpr(elem.Expr) out = append(out, &dag.Spread{ Kind: "Spread", Expr: e, @@ -240,71 +198,49 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { return &dag.RecordExpr{ Kind: "RecordExpr", Elems: out, - }, nil - case *ast.ArrayExpr: - elems, err := a.semVectorElems(e.Elems) - if err != nil { - return nil, err } + case *ast.ArrayExpr: + elems := a.semVectorElems(e.Elems) return &dag.ArrayExpr{ Kind: "ArrayExpr", Elems: elems, - }, nil - case *ast.SetExpr: - elems, err := a.semVectorElems(e.Elems) - if err != nil { - return nil, err } + case *ast.SetExpr: + elems := a.semVectorElems(e.Elems) return &dag.SetExpr{ Kind: "SetExpr", Elems: elems, - }, nil + } case *ast.MapExpr: var entries []dag.Entry for _, entry := range e.Entries { - key, err := a.semExpr(entry.Key) - if err != nil { - return nil, err - } - val, err := a.semExpr(entry.Value) - if err != nil { - return nil, err - } + key := a.semExpr(entry.Key) + val := a.semExpr(entry.Value) entries = append(entries, dag.Entry{Key: key, Value: val}) } return &dag.MapExpr{ Kind: "MapExpr", Entries: entries, - }, nil - case *ast.OverExpr: - exprs, err := a.semExprs(e.Exprs) - if err != nil { - return nil, err } + case *ast.OverExpr: + exprs := a.semExprs(e.Exprs) if e.Body == nil { - return nil, errors.New("over expression missing lateral scope") + a.error(e, errors.New("over expression missing lateral scope")) + return badExpr() } a.enterScope() defer a.exitScope() - locals, err := a.semVars(e.Locals) - if err != nil { - return nil, err - } - body, err := a.semSeq(e.Body) - if err != nil { - return nil, err - } return &dag.OverExpr{ Kind: "OverExpr", - Defs: locals, + Defs: a.semVars(e.Locals), Exprs: exprs, - Body: body, - }, nil + Body: a.semSeq(e.Body), + } } - return nil, fmt.Errorf("invalid expression type %T", e) + panic(errors.New("invalid expression type")) } -func (a *analyzer) semID(id *ast.ID) (dag.Expr, error) { +func (a *analyzer) semID(id *ast.ID) dag.Expr { // We use static scoping here to see if an identifier is // a "var" reference to the name or a field access // and transform the AST node appropriately. The resulting DAG @@ -312,12 +248,13 @@ func (a *analyzer) semID(id *ast.ID) (dag.Expr, error) { // one way or the other. ref, err := a.scope.LookupExpr(id.Name) if err != nil { - return nil, err + a.error(id, err) + return badExpr() } if ref != nil { - return ref, nil + return ref } - return pathOf(id.Name), nil + return pathOf(id.Name) } func semDynamicType(tv astzed.Type) *dag.Call { @@ -341,21 +278,15 @@ func dynamicTypeName(name string) *dag.Call { } } -func (a *analyzer) semGrep(grep *ast.Grep) (dag.Expr, error) { +func (a *analyzer) semGrep(grep *ast.Grep) dag.Expr { e := dag.Expr(&dag.This{Kind: "This"}) if grep.Expr != nil { - var err error - if e, err = a.semExpr(grep.Expr); err != nil { - return nil, err - } - } - p, err := a.semExpr(grep.Pattern) - if err != nil { - return nil, err + e = a.semExpr(grep.Expr) } + p := a.semExpr(grep.Pattern) if s, ok := p.(*dag.RegexpSearch); ok { s.Expr = e - return s, nil + return s } if s, ok := a.isStringConst(p); ok { return &dag.Search{ @@ -363,75 +294,66 @@ func (a *analyzer) semGrep(grep *ast.Grep) (dag.Expr, error) { Text: s, Value: zson.QuotedString([]byte(s)), Expr: e, - }, nil + } } return &dag.Call{ Kind: "Call", Name: "grep", Args: []dag.Expr{p, e}, - }, nil + } } -func (a *analyzer) semRegexp(b *ast.BinaryExpr) (dag.Expr, error) { +func (a *analyzer) semRegexp(b *ast.BinaryExpr) dag.Expr { if b.Op != "~" { - return nil, nil + return nil } re, ok := b.RHS.(*ast.Regexp) if !ok { - return nil, errors.New(`right-hand side of ~ expression must be a regular expression`) + a.error(b, errors.New(`right-hand side of ~ expression must be a regular expression`)) + return badExpr() } if _, err := expr.CompileRegexp(re.Pattern); err != nil { - return nil, err - } - e, err := a.semExpr(b.LHS) - if err != nil { - return nil, err + a.error(b.RHS, err) + return badExpr() } + e := a.semExpr(b.LHS) return &dag.RegexpMatch{ Kind: "RegexpMatch", Pattern: re.Pattern, Expr: e, - }, nil + } } -func (a *analyzer) semBinary(e *ast.BinaryExpr) (dag.Expr, error) { - if e, err := a.semRegexp(e); e != nil || err != nil { - return e, err +func (a *analyzer) semBinary(e *ast.BinaryExpr) dag.Expr { + if e := a.semRegexp(e); e != nil { + return e } op := e.Op if op == "." { - lhs, err := a.semExpr(e.LHS) - if err != nil { - return nil, err - } + lhs := a.semExpr(e.LHS) id, ok := e.RHS.(*ast.ID) if !ok { - return nil, errors.New("RHS of dot operator is not an identifier") + a.error(e, errors.New("RHS of dot operator is not an identifier")) + return badExpr() } if lhs, ok := lhs.(*dag.This); ok { lhs.Path = append(lhs.Path, id.Name) - return lhs, nil + return lhs } return &dag.Dot{ Kind: "Dot", LHS: lhs, RHS: id.Name, - }, nil - } - lhs, err := a.semExpr(e.LHS) - if err != nil { - return nil, err - } - rhs, err := a.semExpr(e.RHS) - if err != nil { - return nil, err + } } + lhs := a.semExpr(e.LHS) + rhs := a.semExpr(e.RHS) return &dag.BinaryExpr{ Kind: "BinaryExpr", Op: e.Op, LHS: lhs, RHS: rhs, - }, nil + } } func (a *analyzer) isIndexOfThis(lhs, rhs dag.Expr) *dag.This { @@ -452,151 +374,140 @@ func (a *analyzer) isStringConst(e dag.Expr) (field string, ok bool) { return "", false } -func (a *analyzer) semSlice(slice *ast.BinaryExpr) (*dag.BinaryExpr, error) { - sliceFrom, err := a.semExprNullable(slice.LHS) - if err != nil { - return nil, err - } - sliceTo, err := a.semExprNullable(slice.RHS) - if err != nil { - return nil, err - } +func (a *analyzer) semSlice(slice *ast.BinaryExpr) *dag.BinaryExpr { + sliceFrom := a.semExprNullable(slice.LHS) + sliceTo := a.semExprNullable(slice.RHS) return &dag.BinaryExpr{ Kind: "BinaryExpr", Op: ":", LHS: sliceFrom, RHS: sliceTo, - }, nil + } } -func (a *analyzer) semExprNullable(e ast.Expr) (dag.Expr, error) { +func (a *analyzer) semExprNullable(e ast.Expr) dag.Expr { if e == nil { - return nil, nil + return nil } return a.semExpr(e) } -func (a *analyzer) semCall(call *ast.Call) (dag.Expr, error) { - if e, err := a.maybeConvertAgg(call); e != nil || err != nil { - return e, err +func (a *analyzer) semCall(call *ast.Call) dag.Expr { + if e := a.maybeConvertAgg(call); e != nil { + return e } if call.Where != nil { - return nil, fmt.Errorf("'where' clause on non-aggregation function: %s", call.Name) - } - exprs, err := a.semExprs(call.Args) - if err != nil { - return nil, fmt.Errorf("%s(): bad argument: %w", call.Name, err) + a.error(call, errors.New("'where' clause on non-aggregation function")) + return badExpr() } + exprs := a.semExprs(call.Args) name, nargs := call.Name, len(call.Args) // Call could be to a user defined func. Check if we have a matching func in // scope. udf, err := a.scope.LookupExpr(name) if err != nil { - return nil, err + a.error(call, err) + return badExpr() } switch { // udf should be checked first since a udf can override builtin functions. case udf != nil: f, ok := udf.(*dag.Func) if !ok { - return nil, fmt.Errorf("%s(): definition is not a function type: %T", name, udf) + a.error(call, errors.New("not a function")) + return badExpr() } if len(f.Params) != nargs { - return nil, fmt.Errorf("%s(): expects %d argument(s)", name, len(f.Params)) + a.error(call, fmt.Errorf("call expects %d argument(s)", len(f.Params))) + return badExpr() } case zed.LookupPrimitive(name) != nil: // Primitive function call, change this to a cast. if err := function.CheckArgCount(nargs, 1, 1); err != nil { - return nil, fmt.Errorf("%s(): %w", name, err) + a.error(call, err) + return badExpr() } exprs = append(exprs, &dag.Literal{Kind: "Literal", Value: "<" + name + ">"}) name = "cast" case expr.NewShaperTransform(name) != 0: if err := function.CheckArgCount(nargs, 1, 2); err != nil { - return nil, fmt.Errorf("%s(): %w", name, err) + a.error(call, err) + return badExpr() } if nargs == 1 { exprs = append([]dag.Expr{&dag.This{Kind: "This"}}, exprs...) } case name == "map": if err := function.CheckArgCount(nargs, 2, 2); err != nil { - return nil, fmt.Errorf("%s(): %w", name, err) + a.error(call, err) + return badExpr() } id, ok := call.Args[1].(*ast.ID) if !ok { - return nil, fmt.Errorf("%s(): second argument must be the identifier of a function", name) + a.error(call.Args[1], errors.New("second argument must be the identifier of a function")) + return badExpr() } - inner, err := a.semCall(&ast.Call{ + inner := a.semCall(&ast.Call{ Kind: "Call", Name: id.Name, Args: []ast.Expr{&ast.ID{Kind: "ID", Name: "this"}}, }) - if err != nil { - return nil, err - } return &dag.MapCall{ Kind: "MapCall", Expr: exprs[0], Inner: inner, - }, nil + } default: if _, _, err = function.New(a.zctx, name, nargs); err != nil { - return nil, fmt.Errorf("%s(): %w", name, err) + a.error(call, err) + return badExpr() } } return &dag.Call{ Kind: "Call", Name: name, Args: exprs, - }, nil + } } -func (a *analyzer) semExprs(in []ast.Expr) ([]dag.Expr, error) { +func (a *analyzer) semExprs(in []ast.Expr) []dag.Expr { exprs := make([]dag.Expr, 0, len(in)) for _, e := range in { - expr, err := a.semExpr(e) - if err != nil { - return nil, err - } - exprs = append(exprs, expr) + exprs = append(exprs, a.semExpr(e)) } - return exprs, nil + return exprs } -func (a *analyzer) semAssignments(assignments []ast.Assignment) ([]dag.Assignment, error) { +func (a *analyzer) semAssignments(assignments []ast.Assignment) []dag.Assignment { out := make([]dag.Assignment, 0, len(assignments)) for _, e := range assignments { - a, err := a.semAssignment(e) - if err != nil { - return nil, err - } - out = append(out, a) + out = append(out, a.semAssignment(e)) } - return out, nil + return out } -func (a *analyzer) semAssignment(assign ast.Assignment) (dag.Assignment, error) { - rhs, err := a.semExpr(assign.RHS) - if err != nil { - return dag.Assignment{}, fmt.Errorf("right-hand side of assignment: %w", err) - } +func (a *analyzer) semAssignment(assign ast.Assignment) dag.Assignment { + rhs := a.semExpr(assign.RHS) var lhs dag.Expr if assign.LHS == nil { - path, err := deriveLHSPath(rhs) - if err != nil { - return dag.Assignment{}, err + if path, err := deriveLHSPath(rhs); err != nil { + a.error(&assign, err) + lhs = badExpr() + } else { + lhs = &dag.This{Kind: "This", Path: path} } - lhs = &dag.This{Kind: "This", Path: path} - } else if lhs, err = a.semExpr(assign.LHS); err != nil { - return dag.Assignment{}, fmt.Errorf("left-hand side of assignment: %w", err) + } else { + lhs = a.semExpr(assign.LHS) } if !isLval(lhs) { - return dag.Assignment{}, errors.New("illegal left-hand side of assignment") + a.error(&assign, errors.New("illegal left-hand side of assignment")) + lhs = badExpr() } if this, ok := lhs.(*dag.This); ok && len(this.Path) == 0 { - return dag.Assignment{}, errors.New("cannot assign to 'this'") + a.error(&assign, errors.New("cannot assign to 'this'")) + lhs = badExpr() } - return dag.Assignment{Kind: "Assignment", LHS: lhs, RHS: rhs}, nil + return dag.Assignment{Kind: "Assignment", LHS: lhs, RHS: rhs} } func isLval(e dag.Expr) bool { @@ -637,66 +548,54 @@ func deriveLHSPath(rhs dag.Expr) ([]string, error) { return nil, errors.New("cannot infer field from expression") } -func (a *analyzer) semFields(exprs []ast.Expr) ([]dag.Expr, error) { +func (a *analyzer) semFields(exprs []ast.Expr) []dag.Expr { fields := make([]dag.Expr, 0, len(exprs)) for _, e := range exprs { - f, err := a.semField(e) - if err != nil { - return nil, err - } - fields = append(fields, f) + fields = append(fields, a.semField(e)) } - return fields, nil + return fields } // semField analyzes the expression f and makes sure that it's // a field reference returning an error if not. -func (a *analyzer) semField(f ast.Expr) (*dag.This, error) { - e, err := a.semExpr(f) - if err != nil { - return nil, errors.New("invalid expression used as a field") - } +func (a *analyzer) semField(f ast.Expr) dag.Expr { + e := a.semExpr(f) field, ok := e.(*dag.This) if !ok { - return nil, errors.New("invalid expression used as a field") + a.error(f, errors.New("invalid expression used as a field")) + return badExpr() } if len(field.Path) == 0 { - return nil, errors.New("cannot use 'this' as a field reference") + a.error(f, errors.New("cannot use 'this' as a field reference")) + return badExpr() } - return field, nil + return field } -func (a *analyzer) maybeConvertAgg(call *ast.Call) (dag.Expr, error) { +func (a *analyzer) maybeConvertAgg(call *ast.Call) dag.Expr { if _, err := agg.NewPattern(call.Name, true); err != nil { - return nil, nil + return nil } var e dag.Expr - if len(call.Args) > 1 { + if err := function.CheckArgCount(len(call.Args), 0, 1); err != nil { if call.Name == "min" || call.Name == "max" { // min and max are special cases as they are also functions. If the // number of args is greater than 1 they're probably a function so do not // return an error. - return nil, nil + return nil } - return nil, fmt.Errorf("%s: wrong number of arguments", call.Name) + a.error(call, err) + return badExpr() } if len(call.Args) == 1 { - var err error - e, err = a.semExpr(call.Args[0]) - if err != nil { - return nil, err - } - } - where, err := a.semExprNullable(call.Where) - if err != nil { - return nil, err + e = a.semExpr(call.Args[0]) } return &dag.Agg{ Kind: "Agg", Name: call.Name, Expr: e, - Where: where, - }, nil + Where: a.semExprNullable(call.Where), + } } func DotExprToFieldPath(e ast.Expr) *dag.This { @@ -749,23 +648,17 @@ func (a *analyzer) semType(typ astzed.Type) (string, error) { return zson.FormatType(ztype), nil } -func (a *analyzer) semVectorElems(elems []ast.VectorElem) ([]dag.VectorElem, error) { +func (a *analyzer) semVectorElems(elems []ast.VectorElem) []dag.VectorElem { var out []dag.VectorElem for _, elem := range elems { switch elem := elem.(type) { case *ast.Spread: - e, err := a.semExpr(elem.Expr) - if err != nil { - return nil, err - } + e := a.semExpr(elem.Expr) out = append(out, &dag.Spread{Kind: "Spread", Expr: e}) case *ast.VectorValue: - e, err := a.semExpr(elem.Expr) - if err != nil { - return nil, err - } + e := a.semExpr(elem.Expr) out = append(out, &dag.VectorValue{Kind: "VectorValue", Expr: e}) } } - return out, nil + return out } diff --git a/compiler/semantic/op.go b/compiler/semantic/op.go index a8a9923c51..4d3dd571ee 100644 --- a/compiler/semantic/op.go +++ b/compiler/semantic/op.go @@ -22,165 +22,151 @@ import ( "github.com/segmentio/ksuid" ) -func (a *analyzer) semSeq(seq ast.Seq) (dag.Seq, error) { +func (a *analyzer) semSeq(seq ast.Seq) dag.Seq { var converted dag.Seq for _, op := range seq { - var err error - converted, err = a.semOp(op, converted) - if err != nil { - return nil, err - } + converted = a.semOp(op, converted) } - return converted, nil + return converted } -func (a *analyzer) semFrom(from *ast.From, seq dag.Seq) (dag.Seq, error) { +func (a *analyzer) semFrom(from *ast.From, seq dag.Seq) dag.Seq { switch len(from.Trunks) { case 0: - return nil, errors.New("internal error: from operator has no paths") + a.error(from, errors.New("from operator has no paths")) + return append(seq, badOp()) case 1: return a.semTrunk(from.Trunks[0], seq) default: paths := make([]dag.Seq, 0, len(from.Trunks)) for _, in := range from.Trunks { - converted, err := a.semTrunk(in, nil) - if err != nil { - return nil, err - } - paths = append(paths, converted) + paths = append(paths, a.semTrunk(in, nil)) } return append(seq, &dag.Fork{ Kind: "Fork", Paths: paths, - }), nil + }) } } -func (a *analyzer) semTrunk(trunk ast.Trunk, out dag.Seq) (dag.Seq, error) { +func (a *analyzer) semTrunk(trunk ast.Trunk, out dag.Seq) dag.Seq { if pool, ok := trunk.Source.(*ast.Pool); ok && trunk.Seq != nil { switch pool.Spec.Pool.(type) { case *ast.Glob, *ast.Regexp: - return nil, errors.New("=> not allowed after pool pattern in 'from' operator") + a.error(&trunk, errors.New("=> not allowed after pool pattern in 'from' operator")) + return append(out, badOp()) } } - sources, err := a.semSource(trunk.Source) - if err != nil { - return nil, err - } - seq, err := a.semSeq(trunk.Seq) - if err != nil { - return nil, err - } + sources := a.semSource(trunk.Source) + seq := a.semSeq(trunk.Seq) if len(sources) == 1 { - return append(out, append(dag.Seq{sources[0]}, seq...)...), nil + return append(out, append(dag.Seq{sources[0]}, seq...)...) } paths := make([]dag.Seq, 0, len(sources)) for _, source := range sources { paths = append(paths, append(dag.Seq{source}, seq...)) } - return append(out, &dag.Fork{Kind: "Fork", Paths: paths}), nil + return append(out, &dag.Fork{Kind: "Fork", Paths: paths}) } //XXX make sure you can't read files from a lake instance -func (a *analyzer) semOpSource(source ast.Source, out dag.Seq) (dag.Seq, error) { - sources, err := a.semSource(source) - if err != nil { - return nil, err - } +func (a *analyzer) semOpSource(source ast.Source, out dag.Seq) dag.Seq { + sources := a.semSource(source) if len(sources) == 1 { - return append(sources, out...), nil + return append(sources, out...) } var paths []dag.Seq for _, s := range sources { paths = append(paths, dag.Seq{s}) } - return append(out, &dag.Fork{Kind: "Fork", Paths: paths}), nil + return append(out, &dag.Fork{Kind: "Fork", Paths: paths}) } -func (a *analyzer) semSource(source ast.Source) ([]dag.Op, error) { - switch p := source.(type) { +func (a *analyzer) semSource(source ast.Source) []dag.Op { + switch s := source.(type) { case *ast.File: - sortKey, err := semSortKey(p.SortKey) + sortKey, err := semSortKey(s.SortKey) if err != nil { - return nil, err + a.error(source, err) } var path string - switch p := p.Path.(type) { + switch p := s.Path.(type) { case *ast.QuotedString: path = p.Text case *ast.String: // This can be either a reference to a constant or a string. if path, err = a.maybeStringConst(p.Text); err != nil { - return nil, fmt.Errorf("invalid file path: %w", err) + a.error(s.Path, err) } default: - return nil, fmt.Errorf("semantic analyzer: unknown AST file type %T", p) + panic(fmt.Errorf("semantic analyzer: unknown AST file type %T", p)) } return []dag.Op{ &dag.FileScan{ Kind: "FileScan", Path: path, - Format: p.Format, + Format: s.Format, SortKey: sortKey, }, - }, nil + } case *ast.HTTP: - sortKey, err := semSortKey(p.SortKey) + sortKey, err := semSortKey(s.SortKey) if err != nil { - return nil, err + a.error(source, err) } var headers map[string][]string - if p.Headers != nil { - expr, err := a.semExpr(p.Headers) - if err != nil { - return nil, err - } + if s.Headers != nil { + expr := a.semExpr(s.Headers) val, err := kernel.EvalAtCompileTime(a.zctx, a.arena, expr) if err != nil { - return nil, fmt.Errorf("headers: %w", err) - } - headers, err = unmarshalHeaders(a.arena, val) - if err != nil { - return nil, err + a.error(s.Headers, err) + } else { + headers, err = unmarshalHeaders(a.arena, val) + if err != nil { + a.error(s.Headers, err) + } } } var url string - switch p := p.URL.(type) { + switch p := s.URL.(type) { case *ast.QuotedString: url = p.Text case *ast.String: // This can be either a reference to a constant or a string. if url, err = a.maybeStringConst(p.Text); err != nil { - return nil, fmt.Errorf("invalid file path: %w", err) + a.error(s.URL, err) + // Set url so we don't report an error for this twice. + url = "http://error" } default: - return nil, fmt.Errorf("semantic analyzer: unsupported AST get type %T", p) + panic(fmt.Errorf("semantic analyzer: unsupported AST get type %T", p)) } if !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "https://") { - return nil, fmt.Errorf("get: invalid URL %s", url) + a.error(s.URL, fmt.Errorf("invalid URL %s", url)) } return []dag.Op{ &dag.HTTPScan{ Kind: "HTTPScan", URL: url, - Format: p.Format, + Format: s.Format, SortKey: sortKey, - Method: p.Method, + Method: s.Method, Headers: headers, - Body: p.Body, + Body: s.Body, }, - }, nil + } case *ast.Pool: if !a.source.IsLake() { - return nil, errors.New("semantic analyzer: from pool cannot be used without a lake") + a.error(s, errors.New("from pool cannot be used without a lake")) + return []dag.Op{badOp()} } - return a.semPool(p) + return a.semPool(s) case *ast.Pass: //XXX just connect parent - return []dag.Op{dag.PassOp}, nil + return []dag.Op{dag.PassOp} default: - return nil, fmt.Errorf("semantic analyzer: unknown AST source type %T", p) + panic(fmt.Errorf("semantic analyzer: unknown AST source type %T", s)) } } @@ -206,6 +192,8 @@ func unmarshalHeaders(arena *zed.Arena, val zed.Value) (map[string][]string, err return headers, nil } +// XXX At some point SortKey should be an ast.Node but for now just have this +// return and error and let the parent log the semantic error. func semSortKey(p *ast.SortKey) (order.SortKey, error) { if p == nil || p.Keys == nil { return order.Nil, nil @@ -225,7 +213,7 @@ func semSortKey(p *ast.SortKey) (order.SortKey, error) { return order.NewSortKey(which, keys), nil } -func (a *analyzer) semPool(p *ast.Pool) ([]dag.Op, error) { +func (a *analyzer) semPool(p *ast.Pool) []dag.Op { var poolNames []string var err error switch specPool := p.Spec.Pool.(type) { @@ -238,28 +226,26 @@ func (a *analyzer) semPool(p *ast.Pool) ([]dag.Op, error) { poolNames, err = a.matchPools(specPool.Pattern, specPool.Pattern, "regexp") case *ast.String: // This can be either a reference to a constant or a string. - name, err := a.maybeStringConst(specPool.Text) - if err != nil { - return nil, fmt.Errorf("invalid pool name: %w", err) + var name string + if name, err = a.maybeStringConst(specPool.Text); err == nil { + poolNames = []string{name} } - poolNames = []string{name} case *ast.QuotedString: poolNames = []string{specPool.Text} default: - return nil, fmt.Errorf("semantic analyzer: unknown AST pool type %T", specPool) + panic(fmt.Errorf("semantic analyzer: unknown AST pool type %T", specPool)) } if err != nil { - return nil, err + // XXX PoolSpec should have a node position but for now just report + // error as entire source. + a.error(p.Spec.Pool, err) + return []dag.Op{badOp()} } var sources []dag.Op for _, name := range poolNames { - source, err := a.semPoolWithName(p, name) - if err != nil { - return nil, err - } - sources = append(sources, source) + sources = append(sources, a.semPoolWithName(p, name)) } - return sources, nil + return sources } func (a *analyzer) maybeStringConst(name string) (string, error) { @@ -278,11 +264,12 @@ func (a *analyzer) maybeStringConst(name string) (string, error) { return val.AsString(), nil } -func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) { +func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) dag.Op { commit := p.Spec.Commit if poolName == "HEAD" { if a.head == nil { - return nil, errors.New("cannot scan from unknown HEAD") + a.error(p.Spec.Pool, errors.New("cannot scan from unknown HEAD")) + return badOp() } poolName = a.head.Pool commit = a.head.Branch @@ -290,27 +277,30 @@ func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) if poolName == "" { meta := p.Spec.Meta if meta == "" { - return nil, errors.New("pool name missing") + a.error(p, errors.New("pool name missing")) + return badOp() } if _, ok := dag.LakeMetas[meta]; !ok { - return nil, fmt.Errorf("unknown lake metadata type %q in from operator", meta) + a.error(p, fmt.Errorf("unknown lake metadata type %q in from operator", meta)) + return badOp() } return &dag.LakeMetaScan{ Kind: "LakeMetaScan", Meta: p.Spec.Meta, - }, nil + } } poolID, err := a.source.PoolID(a.ctx, poolName) if err != nil { - return nil, err + a.error(p.Spec.Pool, err) + return badOp() } var commitID ksuid.KSUID if commit != "" { - commitID, err = lakeparse.ParseID(commit) - if err != nil { + if commitID, err = lakeparse.ParseID(commit); err != nil { commitID, err = a.source.CommitObject(a.ctx, poolID, commit) if err != nil { - return nil, err + a.error(p, err) + return badOp() } } } @@ -319,7 +309,8 @@ func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) if commitID == ksuid.Nil { commitID, err = a.source.CommitObject(a.ctx, poolID, "main") if err != nil { - return nil, err + a.error(p, err) + return badOp() } } return &dag.CommitMetaScan{ @@ -328,23 +319,25 @@ func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) Pool: poolID, Commit: commitID, Tap: p.Spec.Tap, - }, nil + } } if _, ok := dag.PoolMetas[meta]; ok { return &dag.PoolMetaScan{ Kind: "PoolMetaScan", Meta: meta, ID: poolID, - }, nil + } } - return nil, fmt.Errorf("unknown metadata type %q in from operator", meta) + a.error(p, fmt.Errorf("unknown metadata type %q", meta)) + return badOp() } if commitID == ksuid.Nil { // This trick here allows us to default to the main branch when // there is a "from pool" operator with no meta query or commit object. commitID, err = a.source.CommitObject(a.ctx, poolID, "main") if err != nil { - return nil, err + a.error(p, err) + return badOp() } } if p.Delete { @@ -352,13 +345,13 @@ func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) Kind: "DeleteScan", ID: poolID, Commit: commitID, - }, nil + } } return &dag.PoolScan{ Kind: "PoolScan", ID: poolID, Commit: commitID, - }, nil + } } func (a *analyzer) matchPools(pattern, origPattern, patternDesc string) ([]string, error) { @@ -382,23 +375,16 @@ func (a *analyzer) matchPools(pattern, origPattern, patternDesc string) ([]strin return matches, nil } -func (a *analyzer) semScope(op *ast.Scope) (*dag.Scope, error) { +func (a *analyzer) semScope(op *ast.Scope) *dag.Scope { a.scope = NewScope(a.scope) defer a.exitScope() - consts, funcs, err := a.semDecls(op.Decls) - if err != nil { - return nil, err - } - body, err := a.semSeq(op.Body) - if err != nil { - return nil, err - } + consts, funcs := a.semDecls(op.Decls) return &dag.Scope{ Kind: "Scope", Consts: consts, Funcs: funcs, - Body: body, - }, nil + Body: a.semSeq(op.Body), + } } // semOp does a semantic analysis on a flowgraph to an @@ -406,32 +392,22 @@ func (a *analyzer) semScope(op *ast.Scope) (*dag.Scope, error) { // object. Currently, it only replaces the group-by duration with // a bucket call on the ts and replaces FunctionCalls in op context // with either a group-by or filter op based on the function's name. -func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { +func (a *analyzer) semOp(o ast.Op, seq dag.Seq) dag.Seq { switch o := o.(type) { case *ast.From: return a.semFrom(o, seq) case *ast.Pool, *ast.File, *ast.HTTP: return a.semOpSource(o.(ast.Source), seq) case *ast.Summarize: - keys, err := a.semAssignments(o.Keys) - if err != nil { - return nil, err - } - if assignmentHasDynamicLHS(keys) { - return nil, errors.New("summarize: key output field must be static") - } + keys := a.semAssignments(o.Keys) + a.checkStaticAssignment(o.Keys, keys) if len(keys) == 0 && len(o.Aggs) == 1 { if seq := a.singletonAgg(o.Aggs[0], seq); seq != nil { - return seq, nil + return seq } } - aggs, err := a.semAssignments(o.Aggs) - if err != nil { - return nil, err - } - if assignmentHasDynamicLHS(aggs) { - return nil, errors.New("summarize: aggregate output field must be static") - } + aggs := a.semAssignments(o.Aggs) + a.checkStaticAssignment(o.Aggs, aggs) // Note: InputSortDir is copied in here but it's not meaningful // coming from a parser AST, only from a worker using the kernel DSL, // which is another reason why we need separate parser and kernel ASTs. @@ -446,44 +422,28 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Limit: o.Limit, Keys: keys, Aggs: aggs, - }), nil + }) case *ast.Parallel: var paths []dag.Seq for _, seq := range o.Paths { - converted, err := a.semSeq(seq) - if err != nil { - return nil, err - } - paths = append(paths, converted) + paths = append(paths, a.semSeq(seq)) } return append(seq, &dag.Fork{ Kind: "Fork", Paths: paths, - }), nil + }) case *ast.Scope: - op, err := a.semScope(o) - if err != nil { - return nil, err - } - return append(seq, op), nil + return append(seq, a.semScope(o)) case *ast.Switch: var expr dag.Expr if o.Expr != nil { - var err error - expr, err = a.semExpr(o.Expr) - if err != nil { - return nil, err - } + expr = a.semExpr(o.Expr) } var cases []dag.Case for _, c := range o.Cases { var e dag.Expr if c.Expr != nil { - var err error - e, err = a.semExpr(c.Expr) - if err != nil { - return nil, err - } + e = a.semExpr(c.Expr) } else if o.Expr == nil { // c.Expr == nil indicates the default case, // whose handling depends on p.Expr. @@ -492,24 +452,18 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Value: "true", } } - path, err := a.semSeq(c.Path) - if err != nil { - return nil, err - } + path := a.semSeq(c.Path) cases = append(cases, dag.Case{Expr: e, Path: path}) } return append(seq, &dag.Switch{ Kind: "Switch", Expr: expr, Cases: cases, - }), nil + }) case *ast.Shape: - return append(seq, &dag.Shape{Kind: "Shape"}), nil + return append(seq, &dag.Shape{Kind: "Shape"}) case *ast.Cut: - assignments, err := a.semAssignments(o.Args) - if err != nil { - return nil, err - } + assignments := a.semAssignments(o.Args) // Collect static paths so we can check on what is available. var fields field.List for _, a := range assignments { @@ -517,116 +471,108 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { fields = append(fields, this.Path) } } - if _, err = zed.NewRecordBuilder(a.zctx, fields); err != nil { - return nil, fmt.Errorf("cut: %w", err) + if _, err := zed.NewRecordBuilder(a.zctx, fields); err != nil { + a.error(o.Args, err) + return append(seq, badOp()) } return append(seq, &dag.Cut{ Kind: "Cut", Args: assignments, - }), nil + }) case *ast.Drop: - args, err := a.semFields(o.Args) - if err != nil { - return nil, fmt.Errorf("drop: %w", err) - } + args := a.semFields(o.Args) if len(args) == 0 { - return nil, errors.New("drop: no fields given") + a.error(o, errors.New("no fields given")) } return append(seq, &dag.Drop{ Kind: "Drop", Args: args, - }), nil + }) case *ast.Sort: - exprs, err := a.semExprs(o.Args) - if err != nil { - return nil, fmt.Errorf("sort: %w", err) - } + exprs := a.semExprs(o.Args) return append(seq, &dag.Sort{ Kind: "Sort", Args: exprs, Order: o.Order, NullsFirst: o.NullsFirst, - }), nil + }) case *ast.Head: val := zed.NewInt64(1) if o.Count != nil { - expr, err := a.semExpr(o.Count) - if err != nil { - return nil, fmt.Errorf("head: %w", err) - } + expr := a.semExpr(o.Count) + var err error if val, err = kernel.EvalAtCompileTime(a.zctx, a.arena, expr); err != nil { - return nil, fmt.Errorf("head: %w", err) + a.error(o.Count, err) + return append(seq, badOp()) + } + if !zed.IsInteger(val.Type().ID()) { + a.error(o.Count, fmt.Errorf("expression value must be an integer value: %s", zson.FormatValue(val))) + return append(seq, badOp()) } } if val.AsInt() < 1 { - return nil, fmt.Errorf("head: expression value is not a positive integer: %s", zson.FormatValue(val)) + a.error(o.Count, errors.New("expression value must be a positive integer")) } return append(seq, &dag.Head{ Kind: "Head", Count: int(val.AsInt()), - }), nil + }) case *ast.Tail: val := zed.NewInt64(1) if o.Count != nil { - expr, err := a.semExpr(o.Count) - if err != nil { - return nil, fmt.Errorf("tail: %w", err) - } + expr := a.semExpr(o.Count) + var err error if val, err = kernel.EvalAtCompileTime(a.zctx, a.arena, expr); err != nil { - return nil, fmt.Errorf("tail: %w", err) + a.error(o.Count, err) + return append(seq, badOp()) + } + if !zed.IsInteger(val.Type().ID()) { + a.error(o.Count, fmt.Errorf("expression value must be an integer value: %s", zson.FormatValue(val))) + return append(seq, badOp()) } } if val.AsInt() < 1 { - return nil, fmt.Errorf("tail: expression value is not a positive integer: %s", zson.FormatValue(val)) + a.error(o.Count, errors.New("expression value must be a positive integer")) } return append(seq, &dag.Tail{ Kind: "Tail", Count: int(val.AsInt()), - }), nil + }) case *ast.Uniq: return append(seq, &dag.Uniq{ Kind: "Uniq", Cflag: o.Cflag, - }), nil + }) case *ast.Pass: - return append(seq, dag.PassOp), nil + return append(seq, dag.PassOp) case *ast.OpExpr: return a.semOpExpr(o.Expr, seq) case *ast.Search: - e, err := a.semExpr(o.Expr) - if err != nil { - return nil, err - } - return append(seq, dag.NewFilter(e)), nil + e := a.semExpr(o.Expr) + return append(seq, dag.NewFilter(e)) case *ast.Where: - e, err := a.semExpr(o.Expr) - if err != nil { - return nil, err - } - return append(seq, dag.NewFilter(e)), nil + e := a.semExpr(o.Expr) + return append(seq, dag.NewFilter(e)) case *ast.Top: - args, err := a.semExprs(o.Args) - if err != nil { - return nil, fmt.Errorf("top: %w", err) - } + args := a.semExprs(o.Args) if len(args) == 0 { - return nil, errors.New("top: no arguments given") + a.error(o, errors.New("no arguments given")) } limit := 1 if o.Limit != nil { - l, err := a.semExpr(o.Limit) - if err != nil { - return nil, err - } + l := a.semExpr(o.Limit) val, err := kernel.EvalAtCompileTime(a.zctx, a.arena, l) if err != nil { - return nil, err + a.error(o.Limit, err) + return append(seq, badOp()) } if !zed.IsSigned(val.Type().ID()) { - return nil, errors.New("top: limit argument must be an integer") + a.error(o.Limit, errors.New("limit argument must be an integer")) + return append(seq, badOp()) } if limit = int(val.Int()); limit < 1 { - return nil, errors.New("top: limit argument value must be greater than 0") + a.error(o.Limit, errors.New("limit argument value must be greater than 0")) + return append(seq, badOp()) } } return append(seq, &dag.Top{ @@ -634,12 +580,9 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Args: args, Flush: o.Flush, Limit: limit, - }), nil + }) case *ast.Put: - assignments, err := a.semAssignments(o.Args) - if err != nil { - return nil, err - } + assignments := a.semAssignments(o.Args) // We can do collision checking on static paths, so check what we can. var fields field.List for _, a := range assignments { @@ -648,27 +591,20 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { } } if err := expr.CheckPutFields(fields); err != nil { - return nil, fmt.Errorf("put: %w", err) + a.error(o, err) } return append(seq, &dag.Put{ Kind: "Put", Args: assignments, - }), nil + }) case *ast.OpAssignment: - converted, err := a.semOpAssignment(o) - if err != nil { - return nil, err - } - return append(seq, converted), nil + return append(seq, a.semOpAssignment(o)) case *ast.Rename: var assignments []dag.Assignment for _, fa := range o.Args { - assign, err := a.semAssignment(fa) - if err != nil { - return nil, fmt.Errorf("rename: %w", err) - } + assign := a.semAssignment(fa) if !isLval(assign.RHS) { - return nil, fmt.Errorf("rename: illegal right-hand side of assignment") + a.error(fa.RHS, fmt.Errorf("illegal right-hand side of assignment")) } // If both paths are static validate them. Otherwise this will be // done at runtime. @@ -676,7 +612,7 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { rhs, rhsOk := assign.RHS.(*dag.This) if rhsOk && lhsOk { if err := expr.CheckRenameField(lhs.Path, rhs.Path); err != nil { - return nil, fmt.Errorf("rename: %w", err) + a.error(&fa, err) } } assignments = append(assignments, assign) @@ -684,27 +620,15 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { return append(seq, &dag.Rename{ Kind: "Rename", Args: assignments, - }), nil + }) case *ast.Fuse: - return append(seq, &dag.Fuse{Kind: "Fuse"}), nil + return append(seq, &dag.Fuse{Kind: "Fuse"}) case *ast.Join: - rightInput, err := a.semSeq(o.RightInput) - if err != nil { - return nil, err - } - leftKey, err := a.semExpr(o.LeftKey) - if err != nil { - return nil, err - } + rightInput := a.semSeq(o.RightInput) + leftKey := a.semExpr(o.LeftKey) rightKey := leftKey if o.RightKey != nil { - if rightKey, err = a.semExpr(o.RightKey); err != nil { - return nil, err - } - } - assignments, err := a.semAssignments(o.Args) - if err != nil { - return nil, err + rightKey = a.semExpr(o.RightKey) } join := &dag.Join{ Kind: "Join", @@ -713,7 +637,7 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { LeftKey: leftKey, RightDir: order.Unknown, RightKey: rightKey, - Args: assignments, + Args: a.semAssignments(o.Args), } if rightInput != nil { par := &dag.Fork{ @@ -722,29 +646,26 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { } seq = append(seq, par) } - return append(seq, join), nil + return append(seq, join) case *ast.Explode: typ, err := a.semType(o.Type) if err != nil { - return nil, err - } - args, err := a.semExprs(o.Args) - if err != nil { - return nil, err + a.error(o.Type, err) + typ = "" } + args := a.semExprs(o.Args) var as string if o.As == nil { as = "value" } else { - e, err := a.semExpr(o.As) - if err != nil { - return nil, err - } + e := a.semExpr(o.As) this, ok := e.(*dag.This) if !ok { - return nil, errors.New("explode: as clause must be a field reference") + a.error(o.As, errors.New("as clause must be a field reference")) + return append(seq, badOp()) } else if len(this.Path) != 1 { - return nil, errors.New("explode: field must be a top-level field") + a.error(o.As, errors.New("field must be a top-level field")) + return append(seq, badOp()) } as = this.Path[0] } @@ -753,51 +674,35 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Args: args, Type: typ, As: as, - }), nil + }) case *ast.Merge: - expr, err := a.semExpr(o.Expr) - if err != nil { - return nil, fmt.Errorf("merge: %w", err) - } return append(seq, &dag.Merge{ Kind: "Merge", - Expr: expr, + Expr: a.semExpr(o.Expr), Order: order.Asc, //XXX - }), nil + }) case *ast.Over: if len(o.Locals) != 0 && o.Body == nil { - return nil, errors.New("over operator: cannot have a with clause without a lateral query") + a.error(o, errors.New("cannot have a with clause without a lateral query")) } a.enterScope() defer a.exitScope() - locals, err := a.semVars(o.Locals) - if err != nil { - return nil, err - } - exprs, err := a.semExprs(o.Exprs) - if err != nil { - return nil, err - } + locals := a.semVars(o.Locals) + exprs := a.semExprs(o.Exprs) var body dag.Seq if o.Body != nil { - body, err = a.semSeq(o.Body) - if err != nil { - return nil, err - } + body = a.semSeq(o.Body) } return append(seq, &dag.Over{ Kind: "Over", Defs: locals, Exprs: exprs, Body: body, - }), nil + }) case *ast.Sample: e := dag.Expr(&dag.This{Kind: "This"}) if o.Expr != nil { - var err error - if e, err = a.semExpr(o.Expr); err != nil { - return nil, err - } + e = a.semExpr(o.Expr) } seq = append(seq, &dag.Summarize{ Kind: "Summarize", @@ -819,12 +724,9 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { return append(seq, &dag.Yield{ Kind: "Yield", Exprs: []dag.Expr{&dag.This{Kind: "This", Path: field.Path{"sample"}}}, - }), nil + }) case *ast.Assert: - cond, err := a.semExpr(o.Expr) - if err != nil { - return nil, err - } + cond := a.semExpr(o.Expr) // 'assert EXPR' is equivalent to // 'yield EXPR ? this : error({message: "assertion failed", "expr": EXPR_text, "on": this}' // where EXPR_text is the literal text of EXPR. @@ -861,22 +763,20 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { }, }, }, - }), nil + }) case *ast.Yield: - exprs, err := a.semExprs(o.Exprs) - if err != nil { - return nil, err - } + exprs := a.semExprs(o.Exprs) return append(seq, &dag.Yield{ Kind: "Yield", Exprs: exprs, - }), nil + }) case *ast.Load: poolID, err := lakeparse.ParseID(o.Pool) if err != nil { poolID, err = a.source.PoolID(a.ctx, o.Pool) if err != nil { - return nil, err + a.error(o, err) + return append(seq, badOp()) } } return append(seq, &dag.Load{ @@ -886,19 +786,16 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Author: o.Author, Message: o.Message, Meta: o.Meta, - }), nil + }) } - return nil, fmt.Errorf("semantic transform: unknown AST operator type: %T", o) + panic(fmt.Errorf("semantic transform: unknown AST operator type: %T", o)) } func (a *analyzer) singletonAgg(agg ast.Assignment, seq dag.Seq) dag.Seq { if agg.LHS != nil { return nil } - out, err := a.semAssignment(agg) - if err != nil { - return nil - } + out := a.semAssignment(agg) this, ok := out.LHS.(*dag.This) if !ok || len(this.Path) != 1 { return nil @@ -915,70 +812,55 @@ func (a *analyzer) singletonAgg(agg ast.Assignment, seq dag.Seq) dag.Seq { ) } -func (a *analyzer) semDecls(decls []ast.Decl) ([]dag.Def, []*dag.Func, error) { +func (a *analyzer) semDecls(decls []ast.Decl) ([]dag.Def, []*dag.Func) { var consts []dag.Def var fnDecls []*ast.FuncDecl for _, d := range decls { switch d := d.(type) { case *ast.ConstDecl: - c, err := a.semConstDecl(d) - if err != nil { - return nil, nil, err - } - consts = append(consts, c) + consts = append(consts, a.semConstDecl(d)) case *ast.FuncDecl: fnDecls = append(fnDecls, d) case *ast.OpDecl: - if err := a.semOpDecl(d); err != nil { - return nil, nil, err - } + a.semOpDecl(d) case *ast.TypeDecl: - c, err := a.semTypeDecl(d) - if err != nil { - return nil, nil, err - } - consts = append(consts, c) + consts = append(consts, a.semTypeDecl(d)) default: - return nil, nil, fmt.Errorf("invalid declaration type %T", d) + panic(fmt.Errorf("invalid declaration type %T", d)) } } - funcs, err := a.semFuncDecls(fnDecls) - if err != nil { - return nil, nil, err - } - return consts, funcs, nil + funcs := a.semFuncDecls(fnDecls) + return consts, funcs } -func (a *analyzer) semConstDecl(c *ast.ConstDecl) (dag.Def, error) { - e, err := a.semExpr(c.Expr) - if err != nil { - return dag.Def{}, err - } +func (a *analyzer) semConstDecl(c *ast.ConstDecl) dag.Def { + e := a.semExpr(c.Expr) if err := a.scope.DefineConst(a.zctx, a.arena, c.Name, e); err != nil { - return dag.Def{}, err + a.error(c, err) } return dag.Def{ Name: c.Name, Expr: e, - }, nil + } } -func (a *analyzer) semTypeDecl(d *ast.TypeDecl) (dag.Def, error) { +func (a *analyzer) semTypeDecl(d *ast.TypeDecl) dag.Def { typ, err := a.semType(d.Type) if err != nil { - return dag.Def{}, err + a.error(d.Type, err) + typ = "null" } e := &dag.Literal{ Kind: "Literal", Value: fmt.Sprintf("<%s=%s>", zson.QuotedName(d.Name), typ), } if err := a.scope.DefineConst(a.zctx, a.arena, d.Name, e); err != nil { - return dag.Def{}, err + a.error(d, err) } - return dag.Def{Name: d.Name, Expr: e}, nil + return dag.Def{Name: d.Name, Expr: e} } -func (a *analyzer) semFuncDecls(decls []*ast.FuncDecl) ([]*dag.Func, error) { +func (a *analyzer) semFuncDecls(decls []*ast.FuncDecl) []*dag.Func { funcs := make([]*dag.Func, 0, len(decls)) for _, d := range decls { f := &dag.Func{ @@ -987,118 +869,117 @@ func (a *analyzer) semFuncDecls(decls []*ast.FuncDecl) ([]*dag.Func, error) { Params: slices.Clone(d.Params), } if err := a.scope.DefineAs(f.Name, f); err != nil { - return nil, err + a.error(d, err) } funcs = append(funcs, f) } for i, d := range decls { - var err error - if funcs[i].Expr, err = a.semFuncBody(d.Params, d.Expr); err != nil { - return nil, err - } + funcs[i].Expr = a.semFuncBody(d, d.Params, d.Expr) } - return funcs, nil + return funcs } -func (a *analyzer) semFuncBody(params []string, body ast.Expr) (dag.Expr, error) { +func (a *analyzer) semFuncBody(d *ast.FuncDecl, params []string, body ast.Expr) dag.Expr { a.enterScope() defer a.exitScope() for _, p := range params { if err := a.scope.DefineVar(p); err != nil { - return nil, err + // XXX Each param should be a node but now just report the error + // as the entire declaration. + a.error(d, err) } } return a.semExpr(body) } -func (a *analyzer) semOpDecl(d *ast.OpDecl) error { +func (a *analyzer) semOpDecl(d *ast.OpDecl) { m := make(map[string]bool) for _, p := range d.Params { if m[p] { - return fmt.Errorf("%s: duplicate parameter %q", d.Name, p) + a.error(d, fmt.Errorf("duplicate parameter %q", p)) + a.scope.DefineAs(d.Name, &opDecl{bad: true}) + return } m[p] = true } - return a.scope.DefineAs(d.Name, &opDecl{ast: d, scope: a.scope}) + if err := a.scope.DefineAs(d.Name, &opDecl{ast: d, scope: a.scope}); err != nil { + a.error(d, err) + } } -func (a *analyzer) semVars(defs []ast.Def) ([]dag.Def, error) { +func (a *analyzer) semVars(defs []ast.Def) []dag.Def { var locals []dag.Def for _, def := range defs { - e, err := a.semExpr(def.Expr) - if err != nil { - return nil, err - } - name := def.Name - if err := a.scope.DefineVar(name); err != nil { - return nil, err + e := a.semExpr(def.Expr) + if err := a.scope.DefineVar(def.Name); err != nil { + a.error(def, err) + continue } locals = append(locals, dag.Def{ - Name: name, + Name: def.Name, Expr: e, }) } - return locals, nil + return locals } -func (a *analyzer) semOpAssignment(p *ast.OpAssignment) (dag.Op, error) { +func (a *analyzer) semOpAssignment(p *ast.OpAssignment) dag.Op { var aggs, puts []dag.Assignment - for _, assign := range p.Assignments { + for _, astAssign := range p.Assignments { // Parition assignments into agg vs. puts. - a, err := a.semAssignment(assign) - if err != nil { - return nil, err - } - if _, ok := a.RHS.(*dag.Agg); ok { - if _, ok := a.LHS.(*dag.This); !ok { - return nil, errors.New("summarize: aggregate output field must be static") + assign := a.semAssignment(astAssign) + if _, ok := assign.RHS.(*dag.Agg); ok { + if _, ok := assign.LHS.(*dag.This); !ok { + a.error(astAssign.LHS, errors.New("aggregate output field must be static")) } - aggs = append(aggs, a) + aggs = append(aggs, assign) } else { - puts = append(puts, a) + puts = append(puts, assign) } } if len(puts) > 0 && len(aggs) > 0 { - return nil, errors.New("mix of aggregations and non-aggregations in assignment list") + a.error(p, errors.New("mix of aggregations and non-aggregations in assignment list")) + return badOp() } if len(puts) > 0 { return &dag.Put{ Kind: "Put", Args: puts, - }, nil + } } return &dag.Summarize{ Kind: "Summarize", Aggs: aggs, - }, nil + } } -func assignmentHasDynamicLHS(assignments []dag.Assignment) bool { - for _, a := range assignments { - if _, ok := a.LHS.(*dag.This); !ok { +func (a *analyzer) checkStaticAssignment(asts []ast.Assignment, assignments []dag.Assignment) bool { + for k, assign := range assignments { + if _, ok := assign.LHS.(*dag.BadExpr); ok { + continue + } + if _, ok := assign.LHS.(*dag.This); !ok { + a.error(asts[k].LHS, errors.New("output field must be static")) return true } } return false } -func (a *analyzer) semOpExpr(e ast.Expr, seq dag.Seq) (dag.Seq, error) { +func (a *analyzer) semOpExpr(e ast.Expr, seq dag.Seq) dag.Seq { if call, ok := e.(*ast.Call); ok { - if seq, err := a.semCallOp(call, seq); seq != nil || err != nil { - return seq, err + if seq := a.semCallOp(call, seq); seq != nil { + return seq } } - out, err := a.semExpr(e) - if err != nil { - return nil, err - } + out := a.semExpr(e) if a.isBool(out) { - return append(seq, dag.NewFilter(out)), nil + return append(seq, dag.NewFilter(out)) } return append(seq, &dag.Yield{ Kind: "Yield", Exprs: []dag.Expr{out}, - }), nil + }) } func (a *analyzer) isBool(e dag.Expr) bool { @@ -1138,13 +1019,11 @@ func (a *analyzer) isBool(e dag.Expr) bool { } } -func (a *analyzer) semCallOp(call *ast.Call, seq dag.Seq) (dag.Seq, error) { - if body, err := a.maybeConvertUserOp(call, seq); err != nil { - return nil, err - } else if body != nil { - return append(seq, body...), nil +func (a *analyzer) semCallOp(call *ast.Call, seq dag.Seq) dag.Seq { + if body := a.maybeConvertUserOp(call); body != nil { + return append(seq, body...) } - if agg, err := a.maybeConvertAgg(call); err == nil && agg != nil { + if agg := a.maybeConvertAgg(call); agg != nil { summarize := &dag.Summarize{ Kind: "Summarize", Aggs: []dag.Assignment{ @@ -1159,49 +1038,54 @@ func (a *analyzer) semCallOp(call *ast.Call, seq dag.Seq) (dag.Seq, error) { Kind: "Yield", Exprs: []dag.Expr{&dag.This{Kind: "This", Path: field.Path{call.Name}}}, } - return append(append(seq, summarize), yield), nil + return append(append(seq, summarize), yield) } if !function.HasBoolResult(call.Name) { - return nil, nil - } - c, err := a.semCall(call) - if err != nil { - return nil, err + return nil } - return append(seq, dag.NewFilter(c)), nil + c := a.semCall(call) + return append(seq, dag.NewFilter(c)) } // maybeConvertUserOp returns nil, nil if the call is determined to not be a // UserOp, otherwise it returns the compiled op or the encountered error. -func (a *analyzer) maybeConvertUserOp(call *ast.Call, seq dag.Seq) (dag.Seq, error) { +func (a *analyzer) maybeConvertUserOp(call *ast.Call) dag.Seq { decl, err := a.scope.lookupOp(call.Name) - if decl == nil || err != nil { - return nil, nil + if decl == nil { + return nil + } + if err != nil { + a.error(call, err) + return dag.Seq{badOp()} + } + if decl.bad { + return dag.Seq{badOp()} } if call.Where != nil { - return nil, fmt.Errorf("%s(): user defined operators cannot have a where clause", call.Name) + a.error(call, errors.New("user defined operators cannot have a where clause")) + return dag.Seq{badOp()} } params, args := decl.ast.Params, call.Args if len(params) != len(args) { - return nil, fmt.Errorf("%s(): %d arg%s provided when operator expects %d arg%s", call.Name, len(params), plural.Slice(params, "s"), len(args), plural.Slice(args, "s")) + a.error(call, fmt.Errorf("%d arg%s provided when operator expects %d arg%s", len(params), plural.Slice(params, "s"), len(args), plural.Slice(args, "s"))) + return dag.Seq{badOp()} } exprs := make([]dag.Expr, len(decl.ast.Params)) for i, arg := range args { - e, err := a.semExpr(arg) - if err != nil { - return nil, err - } + e := a.semExpr(arg) // Transform non-path arguments into literals. if _, ok := e.(*dag.This); !ok { val, err := kernel.EvalAtCompileTime(a.zctx, a.arena, e) if err != nil { - return nil, err + a.error(arg, err) + exprs[i] = badExpr() + continue } if val.IsError() { if val.IsMissing() { - return nil, fmt.Errorf("%q: non-path arguments cannot have variable dependency", decl.ast.Params[i]) + a.error(arg, errors.New("non-path arguments cannot have variable dependency")) } else { - return nil, fmt.Errorf("%q: %q", decl.ast.Params[i], string(val.Bytes())) + a.error(arg, errors.New(string(val.Bytes()))) } } e = &dag.Literal{ @@ -1212,7 +1096,8 @@ func (a *analyzer) maybeConvertUserOp(call *ast.Call, seq dag.Seq) (dag.Seq, err exprs[i] = e } if slices.Contains(a.opStack, decl.ast) { - return nil, opCycleError(append(a.opStack, decl.ast)) + a.error(call, opCycleError(append(a.opStack, decl.ast))) + return dag.Seq{badOp()} } a.opStack = append(a.opStack, decl.ast) oldscope := a.scope @@ -1223,7 +1108,8 @@ func (a *analyzer) maybeConvertUserOp(call *ast.Call, seq dag.Seq) (dag.Seq, err }() for i, p := range params { if err := a.scope.DefineAs(p, exprs[i]); err != nil { - return nil, err + a.error(call, err) + return dag.Seq{badOp()} } } return a.semSeq(decl.ast.Body) diff --git a/compiler/ztests/badshaper.yaml b/compiler/ztests/badshaper.yaml index 91745b9e49..bf3277dcf9 100644 --- a/compiler/ztests/badshaper.yaml +++ b/compiler/ztests/badshaper.yaml @@ -13,4 +13,6 @@ inputs: outputs: - name: stderr data: | - no such type name: "null" + no such type name: "null" in badshaper.zed at line 1, column 10: + type foo={_path:string,testfield:"null"} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/ztests/const-source.yaml b/compiler/ztests/const-source.yaml index 661433b912..2f74d6ff07 100644 --- a/compiler/ztests/const-source.yaml +++ b/compiler/ztests/const-source.yaml @@ -33,6 +33,12 @@ outputs: ) - name: stderr data: | - invalid pool name: POOL: string value required - invalid file path: FILE: string value required - invalid file path: URL: string value required + POOL: string value required at line 1, column 24: + const POOL = 3.14 from POOL + ~~~~ + FILE: string value required at line 1, column 29: + const FILE = 127.0.0.1 file FILE + ~~~~ + URL: string value required at line 1, column 22: + const URL = true get URL + ~~~ diff --git a/compiler/ztests/from-error.yaml b/compiler/ztests/from-error.yaml index c40e85b392..ee31a22987 100644 --- a/compiler/ztests/from-error.yaml +++ b/compiler/ztests/from-error.yaml @@ -15,12 +15,22 @@ script: | outputs: - name: stderr data: | - semantic analyzer: from pool cannot be used without a lake + from pool cannot be used without a lake at line 1, column 1: + from p + ~~~~~~ === - test: pool not found + test: pool not found at line 1, column 6: + from test + ~~~~ === - test*: pool matching glob not found + test*: pool matching glob not found at line 1, column 6: + from test* + ~~~~~ === - test: pool matching regexp not found + test: pool matching regexp not found at line 1, column 6: + from /test/ + ~~~~~~ === - => not allowed after pool pattern in 'from' operator + => not allowed after pool pattern in 'from' operator at line 1, column 7: + from (pool * => count()) + ~~~~~~~~~~~~~~~~~ diff --git a/compiler/ztests/head.yaml b/compiler/ztests/head.yaml index a6406b03c5..e4e6ca31ac 100644 --- a/compiler/ztests/head.yaml +++ b/compiler/ztests/head.yaml @@ -20,6 +20,12 @@ outputs: ) - name: stderr data: | - head: expression value is not a positive integer: 1. - head: expression value is not a positive integer: "1" - head: expression value is not a positive integer: error("missing") + expression value must be an integer value: 1. at line 1, column 6: + head 1. + ~~ + expression value must be an integer value: "1" at line 1, column 7: + head "1" + ~ + expression value must be an integer value: error("missing") at line 1, column 6: + head x + ~ diff --git a/compiler/ztests/summarize-lhs-error.yaml b/compiler/ztests/summarize-lhs-error.yaml index 1f9e865c1d..1686e677f0 100644 --- a/compiler/ztests/summarize-lhs-error.yaml +++ b/compiler/ztests/summarize-lhs-error.yaml @@ -6,6 +6,12 @@ script: | outputs: - name: stderr data: | - summarize: key output field must be static - summarize: aggregate output field must be static - summarize: aggregate output field must be static + output field must be static at line 1, column 12: + count() by this[a] := key + ~~~~~~~ + output field must be static at line 1, column 1: + this[a] := count() by key + ~~~~~~~ + aggregate output field must be static at line 1, column 1: + this[a] := count() + ~~~~~~~ diff --git a/compiler/ztests/tail.yaml b/compiler/ztests/tail.yaml index b4da9ef4a7..76a4f5f767 100644 --- a/compiler/ztests/tail.yaml +++ b/compiler/ztests/tail.yaml @@ -20,6 +20,12 @@ outputs: ) - name: stderr data: | - tail: expression value is not a positive integer: 1. - tail: expression value is not a positive integer: "1" - tail: expression value is not a positive integer: error("missing") + expression value must be an integer value: 1. at line 1, column 6: + tail 1. + ~~ + expression value must be an integer value: "1" at line 1, column 7: + tail "1" + ~ + expression value must be an integer value: error("missing") at line 1, column 6: + tail x + ~ diff --git a/compiler/ztests/udf-err.yaml b/compiler/ztests/udf-err.yaml index a70ec602bd..b18ef8d985 100644 --- a/compiler/ztests/udf-err.yaml +++ b/compiler/ztests/udf-err.yaml @@ -21,6 +21,12 @@ inputs: outputs: - name: stderr data: | - symbol "dup" redefined - notAFunc(): definition is not a function type: *dag.Literal - f(): expects 2 argument(s) + symbol "dup" redefined in duplicate.zed at line 2, column 1: + func dup(n): (n+2) + ~~~~~~~~~~~~~~~~~ + not a function in call-non-func.zed at line 2, column 7: + yield notAFunc(this) + ~~~~~~~~~~~~~~ + call expects 2 argument(s) in wrong-args.zed at line 2, column 7: + yield f(this) + ~~~~~~~ diff --git a/compiler/ztests/where-on-func.yaml b/compiler/ztests/where-on-func.yaml index 21ae7245d6..6866bb9ad6 100644 --- a/compiler/ztests/where-on-func.yaml +++ b/compiler/ztests/where-on-func.yaml @@ -1,3 +1,3 @@ zed: cut hex := hex(this) where this % 2 == 0 -errorRE: "'where' clause on non-aggregation function: hex" +errorRE: "'where' clause on non-aggregation function" diff --git a/docs/language/operators/rename.md b/docs/language/operators/rename.md index 094359ed54..0a60058a4f 100644 --- a/docs/language/operators/rename.md +++ b/docs/language/operators/rename.md @@ -49,7 +49,9 @@ echo '{a:1,r:{b:2,c:3}}' | zq -z 'rename w:=r.b' - ``` => ```mdtest-output -rename: left-hand side and right-hand side must have the same depth (w vs r.b) +left-hand side and right-hand side must have the same depth (w vs r.b) at line 1, column 8: +rename w:=r.b + ~~~~~~ ``` _Record literals can be used instead of rename for mutation_ ```mdtest-command diff --git a/docs/language/statements.md b/docs/language/statements.md index ed58f90a38..601de40dd1 100644 --- a/docs/language/statements.md +++ b/docs/language/statements.md @@ -158,7 +158,9 @@ echo '{greeting: "hi"}' | zq -z -I params.zed 'AddMessage("message", "hello")' - ``` which produces ```mdtest-output -illegal left-hand side of assignment +illegal left-hand side of assignment in params.zed at line 2, column 3: + field_for_message:=msg + ~~~~~~~~~~~~~~~~~~~~~~ ``` A constant value must be used to pass a parameter that will be referenced as diff --git a/docs/tutorials/schools.md b/docs/tutorials/schools.md index 0e3df2986f..24968787d8 100644 --- a/docs/tutorials/schools.md +++ b/docs/tutorials/schools.md @@ -843,7 +843,9 @@ zq -Z 'rename toplevel:=outer.inner' nested.zson ``` produces this compile-time error message and the query is not run: ```mdtest-output -rename: left-hand side and right-hand side must have the same depth (toplevel vs outer.inner) +left-hand side and right-hand side must have the same depth (toplevel vs outer.inner) at line 1, column 8: +rename toplevel:=outer.inner + ~~~~~~~~~~~~~~~~~~~~~ ``` This goal could instead be achieved by combining [`put`](#44-put) and [`drop`](#42-drop), e.g., diff --git a/fuzz/fuzz.go b/fuzz/fuzz.go index 865f4c5863..8bc9702b2c 100644 --- a/fuzz/fuzz.go +++ b/fuzz/fuzz.go @@ -95,11 +95,11 @@ func RunQuery(t testing.TB, zctx *zed.Context, readers []zio.Reader, querySource // Compile query engine := mock.NewMockEngine(gomock.NewController(t)) comp := compiler.NewFileSystemCompiler(engine) - ast, err := compiler.Parse(querySource) + ast, sset, err := compiler.Parse(querySource) if err != nil { t.Skipf("%v", err) } - query, err := runtime.CompileQuery(ctx, zctx, comp, ast, readers) + query, err := runtime.CompileQuery(ctx, zctx, comp, ast, sset, readers) if err != nil { t.Skipf("%v", err) } diff --git a/lake/api/local.go b/lake/api/local.go index bffb699208..74d4a25df7 100644 --- a/lake/api/local.go +++ b/lake/api/local.go @@ -7,6 +7,7 @@ import ( "github.com/brimdata/zed" "github.com/brimdata/zed/api" "github.com/brimdata/zed/compiler" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/lake" "github.com/brimdata/zed/lakeparse" "github.com/brimdata/zed/order" @@ -113,11 +114,11 @@ func (l *local) Query(ctx context.Context, head *lakeparse.Commitish, src string } func (l *local) QueryWithControl(ctx context.Context, head *lakeparse.Commitish, src string, srcfiles ...string) (zbuf.ProgressReadCloser, error) { - flowgraph, err := l.compiler.Parse(src, srcfiles...) + flowgraph, sset, err := parser.ParseZed(srcfiles, src) if err != nil { return nil, err } - q, err := runtime.CompileLakeQuery(ctx, zed.NewContext(), l.compiler, flowgraph, head) + q, err := runtime.CompileLakeQuery(ctx, zed.NewContext(), l.compiler, flowgraph, sset, head) if err != nil { return nil, err } @@ -173,7 +174,7 @@ func (l *local) Delete(ctx context.Context, poolID ksuid.KSUID, branchName strin } func (l *local) DeleteWhere(ctx context.Context, poolID ksuid.KSUID, branchName, src string, commit api.CommitMessage) (ksuid.KSUID, error) { - op, err := l.compiler.Parse(src) + op, sset, err := l.compiler.Parse(src) if err != nil { return ksuid.Nil, err } @@ -181,7 +182,11 @@ func (l *local) DeleteWhere(ctx context.Context, poolID ksuid.KSUID, branchName, if err != nil { return ksuid.Nil, err } - return branch.DeleteWhere(ctx, l.compiler, op, commit.Author, commit.Body, commit.Meta) + commitID, err := branch.DeleteWhere(ctx, l.compiler, op, commit.Author, commit.Body, commit.Meta) + if list, ok := err.(parser.ErrorList); ok { + list.SetSourceSet(sset) + } + return commitID, err } func (l *local) Revert(ctx context.Context, poolID ksuid.KSUID, branchName string, commitID ksuid.KSUID, message api.CommitMessage) (ksuid.KSUID, error) { diff --git a/lake/ztests/dirs.yaml b/lake/ztests/dirs.yaml index 7005fd147f..0105563742 100644 --- a/lake/ztests/dirs.yaml +++ b/lake/ztests/dirs.yaml @@ -17,4 +17,6 @@ outputs: {min:2020-04-21T22:40:30.06852324Z,max:2020-04-22T01:23:40.0622373Z} - name: stderr data: | - logs: pool not found + logs: pool not found at line 1, column 6: + from logs@main:objects + ~~~~ diff --git a/lake/ztests/query-error.yaml b/lake/ztests/query-error.yaml index d140c166df..8499ece1bf 100644 --- a/lake/ztests/query-error.yaml +++ b/lake/ztests/query-error.yaml @@ -10,6 +10,12 @@ outputs: - name: stderr data: | query must include a 'from' operator - cannot scan from unknown HEAD - unknown lake metadata type "unknownmeta" in from operator - doesnotexist: pool not found + cannot scan from unknown HEAD at line 1, column 6: + from HEAD + ~~~~ + unknown lake metadata type "unknownmeta" in from operator at line 1, column 1: + from :unknownmeta + ~~~~~~~~~~~~~~~~~ + doesnotexist: pool not found at line 1, column 6: + from doesnotexist + ~~~~~~~~~~~~ diff --git a/runtime/compiler.go b/runtime/compiler.go index ba6b478e22..5918910e75 100644 --- a/runtime/compiler.go +++ b/runtime/compiler.go @@ -6,6 +6,7 @@ import ( "github.com/brimdata/zed" "github.com/brimdata/zed/compiler/ast" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/lakeparse" "github.com/brimdata/zed/zbuf" "github.com/brimdata/zed/zio" @@ -16,7 +17,7 @@ type Compiler interface { NewQuery(*Context, ast.Seq, []zio.Reader) (Query, error) NewLakeQuery(*Context, ast.Seq, int, *lakeparse.Commitish) (Query, error) NewLakeDeleteQuery(*Context, ast.Seq, *lakeparse.Commitish) (DeleteQuery, error) - Parse(string, ...string) (ast.Seq, error) + Parse(string, ...string) (ast.Seq, *parser.SourceSet, error) } type Query interface { @@ -43,21 +44,27 @@ func AsProgressReadCloser(q Query) zbuf.ProgressReadCloser { }{AsReader(q), q, q} } -func CompileQuery(ctx context.Context, zctx *zed.Context, c Compiler, program ast.Seq, readers []zio.Reader) (Query, error) { +func CompileQuery(ctx context.Context, zctx *zed.Context, c Compiler, program ast.Seq, sset *parser.SourceSet, readers []zio.Reader) (Query, error) { rctx := NewContext(ctx, zctx) q, err := c.NewQuery(rctx, program, readers) if err != nil { rctx.Cancel() + if list, ok := err.(parser.ErrorList); ok { + list.SetSourceSet(sset) + } return nil, err } return q, nil } -func CompileLakeQuery(ctx context.Context, zctx *zed.Context, c Compiler, program ast.Seq, head *lakeparse.Commitish) (Query, error) { +func CompileLakeQuery(ctx context.Context, zctx *zed.Context, c Compiler, program ast.Seq, sset *parser.SourceSet, head *lakeparse.Commitish) (Query, error) { rctx := NewContext(ctx, zctx) q, err := c.NewLakeQuery(rctx, program, 0, head) if err != nil { rctx.Cancel() + if list, ok := err.(parser.ErrorList); ok { + list.SetSourceSet(sset) + } return nil, err } return q, nil diff --git a/runtime/sam/expr/filter_test.go b/runtime/sam/expr/filter_test.go index be9c5a9217..75d659075b 100644 --- a/runtime/sam/expr/filter_test.go +++ b/runtime/sam/expr/filter_test.go @@ -54,7 +54,7 @@ func runCasesHelper(t *testing.T, record string, cases []testcase, expectBufferF for _, c := range cases { t.Run(c.filter, func(t *testing.T) { t.Helper() - p, err := compiler.Parse(c.filter) + p, _, err := compiler.Parse(c.filter) require.NoError(t, err, "filter: %q", c.filter) rctx := runtime.NewContext(context.Background(), zctx) job, err := compiler.NewJob(rctx, p, nil, nil) @@ -405,6 +405,6 @@ func TestFilters(t *testing.T) { func TestBadFilter(t *testing.T) { t.Parallel() - _, err := compiler.Parse(`s matches \xa8*`) + _, _, err := compiler.Parse(`s matches \xa8*`) require.Error(t, err) } diff --git a/runtime/sam/expr/ztests/cut-dup-fields.yaml b/runtime/sam/expr/ztests/cut-dup-fields.yaml index 4ffd78207c..3dc3652ec8 100644 --- a/runtime/sam/expr/ztests/cut-dup-fields.yaml +++ b/runtime/sam/expr/ztests/cut-dup-fields.yaml @@ -12,7 +12,15 @@ inputs: outputs: - name: stderr data: | - cut: duplicate field: "rec" - cut: duplicate field: "rec.sub1" - cut: duplicate field: "rec.sub.sub" - cut: duplicate field: "rec.sub" + duplicate field: "rec" at line 1, column 5: + cut rec,other,rec + ~~~~~~~~~~~~~ + duplicate field: "rec.sub1" at line 1, column 5: + cut rec.sub1,rec.sub1 + ~~~~~~~~~~~~~~~~~ + duplicate field: "rec.sub.sub" at line 1, column 5: + cut rec.sub,rec.sub.sub + ~~~~~~~~~~~~~~~~~~~ + duplicate field: "rec.sub" at line 1, column 5: + cut rec.sub.sub,rec.sub + ~~~~~~~~~~~~~~~~~~~ diff --git a/runtime/sam/expr/ztests/cut-not-adjacent.yaml b/runtime/sam/expr/ztests/cut-not-adjacent.yaml index de13172053..df2db99381 100644 --- a/runtime/sam/expr/ztests/cut-not-adjacent.yaml +++ b/runtime/sam/expr/ztests/cut-not-adjacent.yaml @@ -12,7 +12,15 @@ inputs: outputs: - name: stderr data: | - cut: fields in record rec must be adjacent - cut: fields in record rec1 must be adjacent - cut: fields in record rec1 must be adjacent - cut: fields in record t.rec must be adjacent + fields in record rec must be adjacent at line 1, column 5: + cut rec.sub1,other,rec.sub2 + ~~~~~~~~~~~~~~~~~~~~~~~ + fields in record rec1 must be adjacent at line 1, column 5: + cut rec1.rec2.sub1,other,rec1.sub2 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + fields in record rec1 must be adjacent at line 1, column 5: + cut rec1.rec2.sub1,other,rec1.rec2.sub2 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + fields in record t.rec must be adjacent at line 1, column 5: + cut t.rec.sub1,t.other,t.rec.sub2 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/runtime/sam/expr/ztests/rename-error-move.yaml b/runtime/sam/expr/ztests/rename-error-move.yaml index 4aa70ad092..08b5d973fa 100644 --- a/runtime/sam/expr/ztests/rename-error-move.yaml +++ b/runtime/sam/expr/ztests/rename-error-move.yaml @@ -3,4 +3,4 @@ zed: rename dst:=id.resp_h input: | {id:{orig_h:10.164.94.120,orig_p:39681(port=uint16),resp_h:10.47.3.155,resp_p:3389(port)}} -errorRE: "rename: left-hand side and right-hand side must have the same depth \\(dst vs id.resp_h\\)" +errorRE: "left-hand side and right-hand side must have the same depth \\(dst vs id.resp_h\\)" diff --git a/runtime/sam/op/groupby/groupby_test.go b/runtime/sam/op/groupby/groupby_test.go index e979f8f502..1f6e81ed51 100644 --- a/runtime/sam/op/groupby/groupby_test.go +++ b/runtime/sam/op/groupby/groupby_test.go @@ -106,7 +106,7 @@ func TestGroupbyStreamingSpill(t *testing.T) { defer arena.Unref() runOne := func(inputSortKey string) []string { - proc, err := compiler.Parse("count() by every(1s), ip") + proc, _, err := compiler.Parse("count() by every(1s), ip") assert.NoError(t, err) zctx := zed.NewContext() diff --git a/runtime/sam/op/ztests/cut-dynamic-field.yaml b/runtime/sam/op/ztests/cut-dynamic-field.yaml index 22f43a2eb5..16a70a561c 100644 --- a/runtime/sam/op/ztests/cut-dynamic-field.yaml +++ b/runtime/sam/op/ztests/cut-dynamic-field.yaml @@ -43,4 +43,6 @@ outputs: error({message:"cut: missing",on:{}}) - name: stderr data: | - left-hand side of assignment: symbol "foo" is not bound to an expression + symbol "foo" is not bound to an expression at line 1, column 38: + op foo(): ( yield "error" ) cut this[foo] := "hello world" + ~~~ diff --git a/runtime/sam/op/ztests/put-dynamic-field.yaml b/runtime/sam/op/ztests/put-dynamic-field.yaml index 7566f09213..ed5d924853 100644 --- a/runtime/sam/op/ztests/put-dynamic-field.yaml +++ b/runtime/sam/op/ztests/put-dynamic-field.yaml @@ -38,4 +38,6 @@ outputs: error({message:"put: missing",on:{}}) - name: stderr data: | - left-hand side of assignment: symbol "foo" is not bound to an expression + symbol "foo" is not bound to an expression at line 1, column 38: + op foo(): ( yield "error" ) put this[foo] := "hello world" + ~~~ diff --git a/runtime/sam/op/ztests/user-errors.yaml b/runtime/sam/op/ztests/user-errors.yaml index 1a951ba938..c2a005a835 100644 --- a/runtime/sam/op/ztests/user-errors.yaml +++ b/runtime/sam/op/ztests/user-errors.yaml @@ -8,7 +8,7 @@ inputs: op test(a, a): ( pass ) - op("a", "b") + test("a", "b") - name: error-const-lhs.zed data: | op test(a): ( @@ -19,5 +19,9 @@ inputs: outputs: - name: stderr data: | - test: duplicate parameter "a" - illegal left-hand side of assignment + duplicate parameter "a" in error-duplicate-parameters.zed at line 1, column 1: + op test(a, a): ( + ~~~~~~~~~~~~~~~~ + illegal left-hand side of assignment in error-const-lhs.zed at line 2, column 3: + a := a + ~~~~~~ diff --git a/service/handlers.go b/service/handlers.go index 42995e7030..32cc944c19 100644 --- a/service/handlers.go +++ b/service/handlers.go @@ -11,8 +11,8 @@ import ( "github.com/brimdata/zed/api" "github.com/brimdata/zed/api/queryio" "github.com/brimdata/zed/compiler" - "github.com/brimdata/zed/compiler/ast" "github.com/brimdata/zed/compiler/optimizer/demand" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/lake" lakeapi "github.com/brimdata/zed/lake/api" "github.com/brimdata/zed/lake/commits" @@ -51,12 +51,12 @@ func handleQuery(c *Core, w *ResponseWriter, r *Request) { // The client must look at the return code and interpret the result // accordingly and when it sees a ZNG error after underway, // the error should be relay that to the caller/user. - query, err := c.compiler.Parse(req.Query) + query, sset, err := parser.ParseZed(nil, req.Query) if err != nil { w.Error(srverr.ErrInvalid(err)) return } - flowgraph, err := runtime.CompileLakeQuery(r.Context(), zed.NewContext(), c.compiler, query, &req.Head) + flowgraph, err := runtime.CompileLakeQuery(r.Context(), zed.NewContext(), c.compiler, query, sset, &req.Head) if err != nil { w.Error(srverr.ErrInvalid(err)) return @@ -162,7 +162,7 @@ func handleCompile(c *Core, w *ResponseWriter, r *Request) { if !r.Unmarshal(w, &req) { return } - ast, err := c.compiler.Parse(req.Query) + ast, _, err := c.compiler.Parse(req.Query) if err != nil { w.Error(srverr.ErrInvalid(err)) return @@ -552,12 +552,15 @@ func handleDelete(c *Core, w *ResponseWriter, r *Request) { w.Error(srverr.ErrInvalid("either object_ids or where must be set")) return } - var program ast.Seq - if program, err = c.compiler.Parse(payload.Where); err != nil { - w.Error(srverr.ErrInvalid(err)) + program, sset, err2 := c.compiler.Parse(payload.Where) + if err != nil { + w.Error(srverr.ErrInvalid(err2)) return } commit, err = branch.DeleteWhere(r.Context(), c.compiler, program, message.Author, message.Body, message.Meta) + if list, ok := err.(parser.ErrorList); ok { + list.SetSourceSet(sset) + } if errors.Is(err, commits.ErrEmptyTransaction) || errors.Is(err, &compiler.InvalidDeleteWhereQuery{}) { err = srverr.ErrInvalid(err) diff --git a/service/ztests/curl-query-error.yaml b/service/ztests/curl-query-error.yaml index 375fcd9ede..5bb7515a74 100644 --- a/service/ztests/curl-query-error.yaml +++ b/service/ztests/curl-query-error.yaml @@ -13,13 +13,13 @@ inputs: outputs: - name: stdout data: | - {"type":"Error","kind":"invalid operation","error":"pool name missing"} + {"type":"Error","kind":"invalid operation","error":"no pool name given"} code 400 - {"type":"Error","kind":"invalid operation","error":"pool name missing"} + {"type":"Error","kind":"invalid operation","error":"no pool name given"} code 400 - {"type":"Error","kind":"invalid operation","error":"pool name missing"} + {"type":"Error","kind":"invalid operation","error":"pool name missing at line 1, column 1:\nfrom HEAD\n~~~~~~~~~","compilation_errors":[{"Msg":"pool name missing","Pos":0,"End":9}]} code 400 - {"type":"Error","kind":"invalid operation","error":"unknown lake metadata type \"unknownmeta\" in from operator"} + {"type":"Error","kind":"invalid operation","error":"unknown lake metadata type \"unknownmeta\" in from operator at line 1, column 1:\nfrom :unknownmeta\n~~~~~~~~~~~~~~~~~","compilation_errors":[{"Msg":"unknown lake metadata type \"unknownmeta\" in from operator","Pos":0,"End":17}]} + code 400 + {"type":"Error","kind":"invalid operation","error":"doesnotexist: pool not found at line 1, column 6:\nfrom doesnotexist\n ~~~~~~~~~~~~","compilation_errors":[{"Msg":"doesnotexist: pool not found","Pos":5,"End":17}]} code 400 - {"type":"Error","kind":"item does not exist","error":"doesnotexist: pool not found"} - code 404 diff --git a/service/ztests/python.yaml b/service/ztests/python.yaml index c897028ddb..da582d8c54 100644 --- a/service/ztests/python.yaml +++ b/service/ztests/python.yaml @@ -161,4 +161,4 @@ outputs: === RequestError('test: pool already exists') === - RequestError('nosuchpool: pool not found') + RequestError('nosuchpool: pool not found at line 1, column 6:\nfrom nosuchpool\n ~~~~~~~~~~') diff --git a/service/ztests/query-bad-commit.yaml b/service/ztests/query-bad-commit.yaml index c375e795cc..90438292a9 100644 --- a/service/ztests/query-bad-commit.yaml +++ b/service/ztests/query-bad-commit.yaml @@ -10,4 +10,6 @@ inputs: outputs: - name: stderr data: | - status code 404: "doesnotexist": branch not found + "doesnotexist": branch not found at line 1, column 1: + from test@doesnotexist + ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/service/ztests/query-error.yaml b/service/ztests/query-error.yaml index ae74877fbb..597e5ec4f7 100644 --- a/service/ztests/query-error.yaml +++ b/service/ztests/query-error.yaml @@ -12,7 +12,14 @@ inputs: outputs: - name: stderr data: | - status code 400: pool name missing - status code 400: pool name missing - status code 400: unknown lake metadata type "unknownmeta" in from operator - status code 404: doesnotexist: pool not found + status code 400: no pool name given + pool name missing at line 1, column 1: + from HEAD + ~~~~~~~~~ + unknown lake metadata type "unknownmeta" in from operator at line 1, column 1: + from :unknownmeta + ~~~~~~~~~~~~~~~~~ + doesnotexist: pool not found at line 1, column 6: + from doesnotexist + ~~~~~~~~~~~~ + diff --git a/zed_test.go b/zed_test.go index 65a623a48d..2e914969a0 100644 --- a/zed_test.go +++ b/zed_test.go @@ -136,7 +136,7 @@ func runOneBoomerang(t *testing.T, format, data string) { dataReader := zio.Reader(dataReadCloser) if format == "parquet" { // Fuse for formats that require uniform values. - proc, err := compiler.NewCompiler().Parse("fuse") + proc, _, err := compiler.NewCompiler().Parse("fuse") require.NoError(t, err) rctx := runtime.NewContext(context.Background(), zctx) q, err := compiler.NewCompiler().NewQuery(rctx, proc, []zio.Reader{dataReadCloser}) diff --git a/ztest/ztest.go b/ztest/ztest.go index b3eb4dca14..847db75bdc 100644 --- a/ztest/ztest.go +++ b/ztest/ztest.go @@ -480,7 +480,7 @@ func runzq(path, zedProgram, input string, outputFlags []string, inputFlags []st // tests. return outbuf.String(), errbuf.String(), err } - proc, err := compiler.NewCompiler().Parse(zedProgram) + proc, sset, err := compiler.Parse(zedProgram) if err != nil { return "", err.Error(), err } @@ -513,7 +513,7 @@ func runzq(path, zedProgram, input string, outputFlags []string, inputFlags []st if err != nil { return "", "", err } - q, err := compiler.NewCompiler().NewQuery(runtime.NewContext(context.Background(), zctx), proc, []zio.Reader{zrc}) + q, err := runtime.CompileQuery(context.Background(), zctx, compiler.NewCompiler(), proc, sset, []zio.Reader{zrc}) if err != nil { zw.Close() return "", err.Error(), err