diff --git a/cli/queryflags/flags.go b/cli/queryflags/flags.go index cde18ff72f..f1295f5d41 100644 --- a/cli/queryflags/flags.go +++ b/cli/queryflags/flags.go @@ -32,7 +32,7 @@ func (f *Flags) SetFlags(fs *flag.FlagSet) { fs.Var(&f.Includes, "I", "source file containing Zed query text (may be used multiple times)") } -func (f *Flags) ParseSourcesAndInputs(paths []string) ([]string, ast.Seq, bool, error) { +func (f *Flags) ParseSourcesAndInputs(paths []string) ([]string, ast.Seq, *parser.SourceSet, bool, error) { var src string if len(paths) != 0 && !cli.FileExists(paths[0]) && !isURLWithKnownScheme(paths[0], "http", "https", "s3") { src = paths[0] @@ -41,25 +41,25 @@ func (f *Flags) ParseSourcesAndInputs(paths []string) ([]string, ast.Seq, bool, // Consider a lone argument to be a query if it compiles // and appears to start with a from or yield operator. // Otherwise, consider it a file. - query, err := compiler.Parse(src, f.Includes...) + query, sset, err := compiler.Parse(src, f.Includes...) if err == nil { if s, err := semantic.Analyze(context.Background(), query, data.NewSource(storage.NewLocalEngine(), nil), nil); err == nil { if semantic.HasSource(s) { - return nil, query, false, nil + return nil, query, sset, false, nil } if semantic.StartsWithYield(s) { - return nil, query, true, nil + return nil, query, sset, true, nil } } } - return nil, nil, false, singleArgError(src, err) + return nil, nil, nil, false, singleArgError(src, err) } } - query, err := compiler.Parse(src, f.Includes...) + query, sset, err := parser.ParseZed(f.Includes, src) if err != nil { - return nil, nil, false, err + return nil, nil, nil, false, err } - return paths, query, false, nil + return paths, query, sset, false, nil } func isURLWithKnownScheme(path string, schemes ...string) bool { diff --git a/cli/zq/command.go b/cli/zq/command.go index bf0e6f7661..ab22100229 100644 --- a/cli/zq/command.go +++ b/cli/zq/command.go @@ -129,7 +129,7 @@ func (c *Command) Run(args []string) error { // Prevent ParseSourcesAndInputs from treating args[0] as a path. args = append(args, "-") } - paths, flowgraph, null, err := c.queryFlags.ParseSourcesAndInputs(args) + paths, flowgraph, sset, null, err := c.queryFlags.ParseSourcesAndInputs(args) if err != nil { return fmt.Errorf("zq: %w", err) } @@ -154,7 +154,7 @@ func (c *Command) Run(args []string) error { return err } comp := compiler.NewFileSystemCompiler(local) - query, err := runtime.CompileQuery(ctx, zctx, comp, flowgraph, readers) + query, err := runtime.CompileQuery(ctx, zctx, comp, flowgraph, sset, readers) if err != nil { return err } diff --git a/cmd/zed/dev/compile/command.go b/cmd/zed/dev/compile/command.go index 6fd85e5dec..02f0279b78 100644 --- a/cmd/zed/dev/compile/command.go +++ b/cmd/zed/dev/compile/command.go @@ -12,6 +12,7 @@ import ( "github.com/brimdata/zed/compiler/ast" "github.com/brimdata/zed/compiler/ast/dag" "github.com/brimdata/zed/compiler/data" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/lake" "github.com/brimdata/zed/pkg/charm" "github.com/brimdata/zed/runtime" @@ -130,7 +131,7 @@ func (c *Command) header(msg string) { } func (c *Command) parse(z string, lk *lake.Root) error { - seq, err := compiler.Parse(z, c.includes...) + seq, sset, err := compiler.Parse(z, c.includes...) if err != nil { return err } @@ -143,6 +144,7 @@ func (c *Command) parse(z string, lk *lake.Root) error { fmt.Println(normalize(b)) } if c.proc { + c.header("proc") c.writeAST(seq) } @@ -151,6 +153,9 @@ func (c *Command) parse(z string, lk *lake.Root) error { } runtime, err := compiler.NewJob(runtime.DefaultContext(), seq, data.NewSource(nil, lk), nil) if err != nil { + if list, ok := err.(parser.ErrorList); ok { + list.SetSourceSet(sset) + } return err } if c.semantic { diff --git a/cmd/zed/log/command.go b/cmd/zed/log/command.go index b396df3073..d4390e990e 100644 --- a/cmd/zed/log/command.go +++ b/cmd/zed/log/command.go @@ -7,6 +7,7 @@ import ( "github.com/brimdata/zed/cli/outputflags" "github.com/brimdata/zed/cli/poolflags" "github.com/brimdata/zed/cmd/zed/root" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/pkg/charm" "github.com/brimdata/zed/pkg/storage" "github.com/brimdata/zed/zio" @@ -70,6 +71,9 @@ func (c *Command) Run(args []string) error { defer w.Close() q, err := lake.Query(ctx, nil, query) if err != nil { + if list := (parser.ErrorList)(nil); errors.As(err, &list) && len(list) == 1 { + return errors.New(list[0].Msg) + } return err } defer q.Close() diff --git a/compiler/ast/ast.go b/compiler/ast/ast.go index 8b24aa1730..d08b73abae 100644 --- a/compiler/ast/ast.go +++ b/compiler/ast/ast.go @@ -110,7 +110,7 @@ func (c *Call) End() int { if c.Where != nil { return c.Where.End() } - return c.Rparen + return c.Rparen + 1 } type Cast struct { @@ -180,7 +180,7 @@ type Regexp struct { } func (r *Regexp) Pos() int { return r.PatternPos } -func (r *Regexp) End() int { return r.PatternPos + len(r.Pattern) } +func (r *Regexp) End() int { return r.PatternPos + len(r.Pattern) + 2 } type String struct { Kind string `json:"kind" unpack:""` @@ -432,9 +432,9 @@ type ( NullsFirst bool `json:"nullsfirst"` } Cut struct { - Kind string `json:"kind" unpack:""` - KeywordPos int `json:"keyword_pos"` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + KeywordPos int `json:"keyword_pos"` + Args Assignments `json:"args"` } Drop struct { Kind string `json:"kind" unpack:""` @@ -471,10 +471,10 @@ type ( Kind string `json:"kind" unpack:""` // StartPos is not called KeywordPos for Summarize since the "summarize" // keyword is optional. - StartPos int `json:"start_pos"` - Limit int `json:"limit"` - Keys []Assignment `json:"keys"` - Aggs []Assignment `json:"aggs"` + StartPos int `json:"start_pos"` + Limit int `json:"limit"` + Keys Assignments `json:"keys"` + Aggs Assignments `json:"aggs"` } Top struct { Kind string `json:"kind" unpack:""` @@ -484,9 +484,9 @@ type ( Flush bool `json:"flush"` } Put struct { - Kind string `json:"kind" unpack:""` - KeywordPos int `json:"keyword_pos"` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + KeywordPos int `json:"keyword_pos"` + Args Assignments `json:"args"` } Merge struct { Kind string `json:"kind" unpack:""` @@ -520,8 +520,8 @@ type ( // is unknown: It could be a Summarize or Put operator. This will be // determined in the semantic phase. OpAssignment struct { - Kind string `json:"kind" unpack:""` - Assignments []Assignment `json:"assignments"` + Kind string `json:"kind" unpack:""` + Assignments Assignments `json:"assignments"` } // An OpExpr operator is an expression that appears as an operator // and requires semantic analysis to determine if it is a filter, a yield, @@ -531,22 +531,22 @@ type ( Expr Expr `json:"expr"` } Rename struct { - Kind string `json:"kind" unpack:""` - KeywordPos int `json:"keyword_pos"` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + KeywordPos int `json:"keyword_pos"` + Args Assignments `json:"args"` } Fuse struct { Kind string `json:"kind" unpack:""` KeywordPos int `json:"keyword_pos"` } Join struct { - Kind string `json:"kind" unpack:""` - KeywordPos int `json:"keyword_pos"` - Style string `json:"style"` - RightInput Seq `json:"right_input"` - LeftKey Expr `json:"left_key"` - RightKey Expr `json:"right_key"` - Args []Assignment `json:"args"` + Kind string `json:"kind" unpack:""` + KeywordPos int `json:"keyword_pos"` + Style string `json:"style"` + RightInput Seq `json:"right_input"` + LeftKey Expr `json:"left_key"` + RightKey Expr `json:"right_key"` + Args Assignments `json:"args"` } Sample struct { Kind string `json:"kind" unpack:""` @@ -678,7 +678,12 @@ func (a *Assignment) Pos() int { return a.RHS.Pos() } -func (a *Assignment) End() int { return a.RHS.Pos() } +func (a *Assignment) End() int { return a.RHS.End() } + +type Assignments []Assignment + +func (a Assignments) Pos() int { return a[0].Pos() } +func (a Assignments) End() int { return a[len(a)-1].End() } // Def is like Assignment but the LHS is an identifier that may be later // referenced. This is used for const blocks in Sequential and var blocks diff --git a/compiler/ast/dag/expr.go b/compiler/ast/dag/expr.go index e36de6c1e0..2a5cac887c 100644 --- a/compiler/ast/dag/expr.go +++ b/compiler/ast/dag/expr.go @@ -30,6 +30,11 @@ type ( LHS Expr `json:"lhs"` RHS Expr `json:"rhs"` } + // A BadExpr node is a placeholder for an expression containing semantic + // errors. + BadExpr struct { + Kind string `json:"kind" unpack:""` + } BinaryExpr struct { Kind string `json:"kind" unpack:""` Op string `json:"op"` @@ -131,6 +136,7 @@ type ( func (*Agg) ExprDAG() {} func (*ArrayExpr) ExprDAG() {} func (*Assignment) ExprDAG() {} +func (*BadExpr) ExprDAG() {} func (*BinaryExpr) ExprDAG() {} func (*Call) ExprDAG() {} func (*Conditional) ExprDAG() {} diff --git a/compiler/ast/dag/op.go b/compiler/ast/dag/op.go index 93cce1c08f..a1d2282ab1 100644 --- a/compiler/ast/dag/op.go +++ b/compiler/ast/dag/op.go @@ -26,6 +26,11 @@ type Seq []Op // Ops type ( + // A BadOp node is a placeholder for an expression containing semantic + // errors. + BadOp struct { + Kind string `json:"kind" unpack:""` + } Combine struct { Kind string `json:"kind" unpack:""` } @@ -281,6 +286,7 @@ type ( } ) +func (*BadOp) OpNode() {} func (*Fork) OpNode() {} func (*Scatter) OpNode() {} func (*Switch) OpNode() {} diff --git a/compiler/ast/dag/unpack.go b/compiler/ast/dag/unpack.go index 89d70b2f4e..f7bed32a64 100644 --- a/compiler/ast/dag/unpack.go +++ b/compiler/ast/dag/unpack.go @@ -10,6 +10,8 @@ var unpacker = unpack.New( Agg{}, ArrayExpr{}, Assignment{}, + BadOp{}, + BadExpr{}, BinaryExpr{}, Call{}, Combine{}, diff --git a/compiler/job.go b/compiler/job.go index 68fd9a2c27..8d9b3a87a4 100644 --- a/compiler/job.go +++ b/compiler/job.go @@ -88,14 +88,13 @@ func (j *Job) Parallelize(n int) error { return err } -func Parse(src string, filenames ...string) (ast.Seq, error) { - seq, _, err := parser.ParseZed(filenames, src) - return seq, err +func Parse(src string, filenames ...string) (ast.Seq, *parser.SourceSet, error) { + return parser.ParseZed(filenames, src) } // MustParse is like Parse but panics if an error is encountered. func MustParse(query string) ast.Seq { - seq, err := (*anyCompiler)(nil).Parse(query) + seq, _, err := (*anyCompiler)(nil).Parse(query) if err != nil { panic(err) } @@ -136,7 +135,7 @@ type anyCompiler struct{} // Parse concatenates the source files in filenames followed by src and parses // the resulting program. -func (*anyCompiler) Parse(src string, filenames ...string) (ast.Seq, error) { +func (*anyCompiler) Parse(src string, filenames ...string) (ast.Seq, *parser.SourceSet, error) { return Parse(src, filenames...) } @@ -145,13 +144,16 @@ func (*anyCompiler) Parse(src string, filenames ...string) (ast.Seq, error) { // nor does it compute the demand of the query to prune the projection // from the vcache. func VectorCompile(rctx *runtime.Context, query string, object *vcache.Object) (zbuf.Puller, error) { - seq, err := Parse(query) + seq, sset, err := Parse(query) if err != nil { return nil, err } src := &data.Source{} entry, err := semantic.Analyze(rctx.Context, seq, src, nil) if err != nil { + if list := (parser.ErrorList)(nil); errors.As(err, &list) { + list.SetSourceSet(sset) + } return nil, err } puller := vam.NewVectorProjection(rctx.Zctx, object, nil) //XXX project all @@ -177,12 +179,15 @@ func VectorFilterCompile(rctx *runtime.Context, query string, src *data.Source, if err != nil { return nil, err } - seq, err := Parse(query) + seq, sset, err := Parse(query) if err != nil { return nil, err } entry, err := semantic.Analyze(rctx.Context, seq, src, nil) if err != nil { + if list := (parser.ErrorList)(nil); errors.As(err, &list) { + list.SetSourceSet(sset) + } return nil, err } if len(entry) != 1 { diff --git a/compiler/parser/parser_test.go b/compiler/parser/parser_test.go index 25c3bc7a51..9a3bcde7c7 100644 --- a/compiler/parser/parser_test.go +++ b/compiler/parser/parser_test.go @@ -40,7 +40,7 @@ func searchForZed() ([]string, error) { } func parseOp(z string) ([]byte, error) { - o, err := compiler.Parse(z) + o, _, err := compiler.Parse(z) if err != nil { return nil, err } diff --git a/compiler/semantic/analyzer.go b/compiler/semantic/analyzer.go index cfaf292566..81dadeca22 100644 --- a/compiler/semantic/analyzer.go +++ b/compiler/semantic/analyzer.go @@ -7,6 +7,7 @@ import ( "github.com/brimdata/zed/compiler/ast" "github.com/brimdata/zed/compiler/ast/dag" "github.com/brimdata/zed/compiler/data" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/lakeparse" ) @@ -15,9 +16,9 @@ import ( // After semantic analysis, the DAG is ready for either optimization or compilation. func Analyze(ctx context.Context, seq ast.Seq, source *data.Source, head *lakeparse.Commitish) (dag.Seq, error) { a := newAnalyzer(ctx, source, head) - s, err := a.semSeq(seq) - if err != nil { - return nil, err + s := a.semSeq(seq) + if a.errors != nil { + return nil, a.errors } return s, nil } @@ -26,12 +27,12 @@ func Analyze(ctx context.Context, seq ast.Seq, source *data.Source, head *lakepa // DAG does not have one. func AnalyzeAddSource(ctx context.Context, seq ast.Seq, source *data.Source, head *lakeparse.Commitish) (dag.Seq, error) { a := newAnalyzer(ctx, source, head) - s, err := a.semSeq(seq) - if err != nil { - return nil, err + s := a.semSeq(seq) + if a.errors != nil { + return nil, a.errors } if !HasSource(s) { - if err = a.addDefaultSource(&s); err != nil { + if err := a.addDefaultSource(&s); err != nil { return nil, err } } @@ -40,6 +41,7 @@ func AnalyzeAddSource(ctx context.Context, seq ast.Seq, source *data.Source, hea type analyzer struct { ctx context.Context + errors parser.ErrorList head *lakeparse.Commitish opStack []*ast.OpDecl source *data.Source @@ -80,6 +82,10 @@ func (a *analyzer) addDefaultSource(seq *dag.Seq) error { seq.Prepend(&dag.DefaultScan{Kind: "DefaultScan"}) return nil } + // Verify pool exists for HEAD + if _, err := a.source.PoolID(a.ctx, a.head.Pool); err != nil { + return err + } pool := &ast.Pool{ Kind: "Pool", Spec: ast.PoolSpec{ @@ -89,10 +95,7 @@ func (a *analyzer) addDefaultSource(seq *dag.Seq) error { }, }, } - ops, err := a.semPool(pool) - if err != nil { - return err - } + ops := a.semPool(pool) seq.Prepend(ops[0]) return nil } @@ -118,6 +121,7 @@ func (a *analyzer) exitScope() { type opDecl struct { ast *ast.OpDecl scope *Scope // parent scope of op declaration. + bad bool } type opCycleError []*ast.OpDecl @@ -132,3 +136,15 @@ func (e opCycleError) Error() string { } return b } + +func badExpr() dag.Expr { + return &dag.BadExpr{Kind: "BadExpr"} +} + +func badOp() dag.Op { + return &dag.BadOp{Kind: "BadOp"} +} + +func (a *analyzer) error(n ast.Node, err error) { + a.errors.Append(err.Error(), n.Pos(), n.End()) +} diff --git a/compiler/semantic/expr.go b/compiler/semantic/expr.go index c7ebc05361..8056b4fdb8 100644 --- a/compiler/semantic/expr.go +++ b/compiler/semantic/expr.go @@ -16,33 +16,34 @@ import ( "github.com/brimdata/zed/zson" ) -func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { +func (a *analyzer) semExpr(e ast.Expr) dag.Expr { switch e := e.(type) { case nil: - return nil, errors.New("semantic analysis: illegal null value encountered in AST") + panic("semantic analysis: illegal null value encountered in AST") case *ast.Regexp: return &dag.RegexpSearch{ Kind: "RegexpSearch", Pattern: e.Pattern, Expr: pathOf("this"), - }, nil + } case *ast.Glob: return &dag.RegexpSearch{ Kind: "RegexpSearch", Pattern: reglob.Reglob(e.Pattern), Expr: pathOf("this"), - }, nil + } case *ast.Grep: return a.semGrep(e) case *astzed.Primitive: val, err := zson.ParsePrimitive(a.arena, e.Type, e.Text) if err != nil { - return nil, err + a.error(e, err) + return badExpr() } return &dag.Literal{ Kind: "Literal", Value: zson.FormatValue(val), - }, nil + } case *ast.ID: return a.semID(e) case *ast.Term: @@ -51,109 +52,77 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { case *astzed.Primitive: v, err := zson.ParsePrimitive(a.arena, t.Type, t.Text) if err != nil { - return nil, err + a.error(e, err) + return badExpr() } val = zson.FormatValue(v) case *astzed.TypeValue: tv, err := a.semType(t.Value) if err != nil { - return nil, err + a.error(e, err) + return badExpr() } val = "<" + tv + ">" default: - return nil, fmt.Errorf("unexpected term value: %s", e.Kind) + panic(fmt.Errorf("unexpected term value: %s", e.Kind)) } return &dag.Search{ Kind: "Search", Text: e.Text, Value: val, Expr: pathOf("this"), - }, nil - case *ast.UnaryExpr: - expr, err := a.semExpr(e.Operand) - if err != nil { - return nil, err } + case *ast.UnaryExpr: return &dag.UnaryExpr{ Kind: "UnaryExpr", Op: e.Op, - Operand: expr, - }, nil + Operand: a.semExpr(e.Operand), + } case *ast.BinaryExpr: return a.semBinary(e) case *ast.Conditional: - cond, err := a.semExpr(e.Cond) - if err != nil { - return nil, err - } - thenExpr, err := a.semExpr(e.Then) - if err != nil { - return nil, err - } - elseExpr, err := a.semExpr(e.Else) - if err != nil { - return nil, err - } + cond := a.semExpr(e.Cond) + thenExpr := a.semExpr(e.Then) + elseExpr := a.semExpr(e.Else) return &dag.Conditional{ Kind: "Conditional", Cond: cond, Then: thenExpr, Else: elseExpr, - }, nil + } case *ast.Call: return a.semCall(e) case *ast.Cast: - expr, err := a.semExpr(e.Expr) - if err != nil { - return nil, err - } - typ, err := a.semExpr(e.Type) - if err != nil { - return nil, err - } + expr := a.semExpr(e.Expr) + typ := a.semExpr(e.Type) return &dag.Call{ Kind: "Call", Name: "cast", Args: []dag.Expr{expr, typ}, - }, nil - case *ast.IndexExpr: - expr, err := a.semExpr(e.Expr) - if err != nil { - return nil, err - } - index, err := a.semExpr(e.Index) - if err != nil { - return nil, err } + case *ast.IndexExpr: + expr := a.semExpr(e.Expr) + index := a.semExpr(e.Index) // If expr is a path and index is a string, then just extend the path. if path := a.isIndexOfThis(expr, index); path != nil { - return path, nil + return path } return &dag.IndexExpr{ Kind: "IndexExpr", Expr: expr, Index: index, - }, nil - case *ast.SliceExpr: - expr, err := a.semExpr(e.Expr) - if err != nil { - return nil, err } + case *ast.SliceExpr: + expr := a.semExpr(e.Expr) // XXX Literal indices should be type checked as int. - from, err := a.semExprNullable(e.From) - if err != nil { - return nil, err - } - to, err := a.semExprNullable(e.To) - if err != nil { - return nil, err - } + from := a.semExprNullable(e.From) + to := a.semExprNullable(e.To) return &dag.SliceExpr{ Kind: "SliceExpr", Expr: expr, From: from, To: to, - }, nil + } case *astzed.TypeValue: typ, err := a.semType(e.Value) if err != nil { @@ -167,32 +136,28 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { // complaining about the type not existing. // XXX See issue #3413 if e := semDynamicType(e.Value); e != nil { - return e, nil + return e } - return nil, err + a.error(e, err) + return badExpr() } return &dag.Literal{ Kind: "Literal", Value: "<" + typ + ">", - }, nil - case *ast.Agg: - expr, err := a.semExprNullable(e.Expr) - if err != nil { - return nil, err } + case *ast.Agg: + expr := a.semExprNullable(e.Expr) if expr == nil && e.Name != "count" { - return nil, fmt.Errorf("aggregator '%s' requires argument", e.Name) - } - where, err := a.semExprNullable(e.Where) - if err != nil { - return nil, err + a.error(e, fmt.Errorf("aggregator '%s' requires argument", e.Name)) + return badExpr() } + where := a.semExprNullable(e.Where) return &dag.Agg{ Kind: "Agg", Name: e.Name, Expr: expr, Where: where, - }, nil + } case *ast.RecordExpr: fields := map[string]struct{}{} var out []dag.RecordElem @@ -200,13 +165,11 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { switch elem := elem.(type) { case *ast.Field: if _, ok := fields[elem.Name]; ok { - return nil, fmt.Errorf("record expression: %w", &zed.DuplicateFieldError{Name: elem.Name}) + a.error(elem, fmt.Errorf("record expression: %w", &zed.DuplicateFieldError{Name: elem.Name})) + continue } fields[elem.Name] = struct{}{} - e, err := a.semExpr(elem.Value) - if err != nil { - return nil, err - } + e := a.semExpr(elem.Value) out = append(out, &dag.Field{ Kind: "Field", Name: elem.Name, @@ -214,23 +177,19 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { }) case *ast.ID: if _, ok := fields[elem.Name]; ok { - return nil, fmt.Errorf("record expression: %w", &zed.DuplicateFieldError{Name: elem.Name}) + a.error(elem, fmt.Errorf("record expression: %w", &zed.DuplicateFieldError{Name: elem.Name})) + // XXX Add BadRecordElem? + continue } fields[elem.Name] = struct{}{} - v, err := a.semID(elem) - if err != nil { - return nil, err - } + v := a.semID(elem) out = append(out, &dag.Field{ Kind: "Field", Name: elem.Name, Value: v, }) case *ast.Spread: - e, err := a.semExpr(elem.Expr) - if err != nil { - return nil, err - } + e := a.semExpr(elem.Expr) out = append(out, &dag.Spread{ Kind: "Spread", Expr: e, @@ -240,71 +199,49 @@ func (a *analyzer) semExpr(e ast.Expr) (dag.Expr, error) { return &dag.RecordExpr{ Kind: "RecordExpr", Elems: out, - }, nil - case *ast.ArrayExpr: - elems, err := a.semVectorElems(e.Elems) - if err != nil { - return nil, err } + case *ast.ArrayExpr: + elems := a.semVectorElems(e.Elems) return &dag.ArrayExpr{ Kind: "ArrayExpr", Elems: elems, - }, nil - case *ast.SetExpr: - elems, err := a.semVectorElems(e.Elems) - if err != nil { - return nil, err } + case *ast.SetExpr: + elems := a.semVectorElems(e.Elems) return &dag.SetExpr{ Kind: "SetExpr", Elems: elems, - }, nil + } case *ast.MapExpr: var entries []dag.Entry for _, entry := range e.Entries { - key, err := a.semExpr(entry.Key) - if err != nil { - return nil, err - } - val, err := a.semExpr(entry.Value) - if err != nil { - return nil, err - } + key := a.semExpr(entry.Key) + val := a.semExpr(entry.Value) entries = append(entries, dag.Entry{Key: key, Value: val}) } return &dag.MapExpr{ Kind: "MapExpr", Entries: entries, - }, nil - case *ast.OverExpr: - exprs, err := a.semExprs(e.Exprs) - if err != nil { - return nil, err } + case *ast.OverExpr: + exprs := a.semExprs(e.Exprs) if e.Body == nil { - return nil, errors.New("over expression missing lateral scope") + a.error(e, errors.New("over expression missing lateral scope")) + return badExpr() } a.enterScope() defer a.exitScope() - locals, err := a.semVars(e.Locals) - if err != nil { - return nil, err - } - body, err := a.semSeq(e.Body) - if err != nil { - return nil, err - } return &dag.OverExpr{ Kind: "OverExpr", - Defs: locals, + Defs: a.semVars(e.Locals), Exprs: exprs, - Body: body, - }, nil + Body: a.semSeq(e.Body), + } } - return nil, fmt.Errorf("invalid expression type %T", e) + panic(errors.New("invalid expression type")) } -func (a *analyzer) semID(id *ast.ID) (dag.Expr, error) { +func (a *analyzer) semID(id *ast.ID) dag.Expr { // We use static scoping here to see if an identifier is // a "var" reference to the name or a field access // and transform the AST node appropriately. The resulting DAG @@ -312,12 +249,13 @@ func (a *analyzer) semID(id *ast.ID) (dag.Expr, error) { // one way or the other. ref, err := a.scope.LookupExpr(id.Name) if err != nil { - return nil, err + a.error(id, err) + return badExpr() } if ref != nil { - return ref, nil + return ref } - return pathOf(id.Name), nil + return pathOf(id.Name) } func semDynamicType(tv astzed.Type) *dag.Call { @@ -341,21 +279,15 @@ func dynamicTypeName(name string) *dag.Call { } } -func (a *analyzer) semGrep(grep *ast.Grep) (dag.Expr, error) { +func (a *analyzer) semGrep(grep *ast.Grep) dag.Expr { e := dag.Expr(&dag.This{Kind: "This"}) if grep.Expr != nil { - var err error - if e, err = a.semExpr(grep.Expr); err != nil { - return nil, err - } - } - p, err := a.semExpr(grep.Pattern) - if err != nil { - return nil, err + e = a.semExpr(grep.Expr) } + p := a.semExpr(grep.Pattern) if s, ok := p.(*dag.RegexpSearch); ok { s.Expr = e - return s, nil + return s } if s, ok := a.isStringConst(p); ok { return &dag.Search{ @@ -363,75 +295,66 @@ func (a *analyzer) semGrep(grep *ast.Grep) (dag.Expr, error) { Text: s, Value: zson.QuotedString([]byte(s)), Expr: e, - }, nil + } } return &dag.Call{ Kind: "Call", Name: "grep", Args: []dag.Expr{p, e}, - }, nil + } } -func (a *analyzer) semRegexp(b *ast.BinaryExpr) (dag.Expr, error) { +func (a *analyzer) semRegexp(b *ast.BinaryExpr) dag.Expr { if b.Op != "~" { - return nil, nil + return nil } re, ok := b.RHS.(*ast.Regexp) if !ok { - return nil, errors.New(`right-hand side of ~ expression must be a regular expression`) + a.error(b, errors.New(`right-hand side of ~ expression must be a regular expression`)) + return badExpr() } if _, err := expr.CompileRegexp(re.Pattern); err != nil { - return nil, err - } - e, err := a.semExpr(b.LHS) - if err != nil { - return nil, err + a.error(b.RHS, err) + return badExpr() } + e := a.semExpr(b.LHS) return &dag.RegexpMatch{ Kind: "RegexpMatch", Pattern: re.Pattern, Expr: e, - }, nil + } } -func (a *analyzer) semBinary(e *ast.BinaryExpr) (dag.Expr, error) { - if e, err := a.semRegexp(e); e != nil || err != nil { - return e, err +func (a *analyzer) semBinary(e *ast.BinaryExpr) dag.Expr { + if e := a.semRegexp(e); e != nil { + return e } op := e.Op if op == "." { - lhs, err := a.semExpr(e.LHS) - if err != nil { - return nil, err - } + lhs := a.semExpr(e.LHS) id, ok := e.RHS.(*ast.ID) if !ok { - return nil, errors.New("RHS of dot operator is not an identifier") + a.error(e, errors.New("RHS of dot operator is not an identifier")) + return badExpr() } if lhs, ok := lhs.(*dag.This); ok { lhs.Path = append(lhs.Path, id.Name) - return lhs, nil + return lhs } return &dag.Dot{ Kind: "Dot", LHS: lhs, RHS: id.Name, - }, nil - } - lhs, err := a.semExpr(e.LHS) - if err != nil { - return nil, err - } - rhs, err := a.semExpr(e.RHS) - if err != nil { - return nil, err + } } + lhs := a.semExpr(e.LHS) + rhs := a.semExpr(e.RHS) return &dag.BinaryExpr{ Kind: "BinaryExpr", Op: e.Op, LHS: lhs, RHS: rhs, - }, nil + } } func (a *analyzer) isIndexOfThis(lhs, rhs dag.Expr) *dag.This { @@ -452,151 +375,140 @@ func (a *analyzer) isStringConst(e dag.Expr) (field string, ok bool) { return "", false } -func (a *analyzer) semSlice(slice *ast.BinaryExpr) (*dag.BinaryExpr, error) { - sliceFrom, err := a.semExprNullable(slice.LHS) - if err != nil { - return nil, err - } - sliceTo, err := a.semExprNullable(slice.RHS) - if err != nil { - return nil, err - } +func (a *analyzer) semSlice(slice *ast.BinaryExpr) *dag.BinaryExpr { + sliceFrom := a.semExprNullable(slice.LHS) + sliceTo := a.semExprNullable(slice.RHS) return &dag.BinaryExpr{ Kind: "BinaryExpr", Op: ":", LHS: sliceFrom, RHS: sliceTo, - }, nil + } } -func (a *analyzer) semExprNullable(e ast.Expr) (dag.Expr, error) { +func (a *analyzer) semExprNullable(e ast.Expr) dag.Expr { if e == nil { - return nil, nil + return nil } return a.semExpr(e) } -func (a *analyzer) semCall(call *ast.Call) (dag.Expr, error) { - if e, err := a.maybeConvertAgg(call); e != nil || err != nil { - return e, err +func (a *analyzer) semCall(call *ast.Call) dag.Expr { + if e := a.maybeConvertAgg(call); e != nil { + return e } if call.Where != nil { - return nil, fmt.Errorf("'where' clause on non-aggregation function: %s", call.Name) - } - exprs, err := a.semExprs(call.Args) - if err != nil { - return nil, fmt.Errorf("%s(): bad argument: %w", call.Name, err) + a.error(call, errors.New("'where' clause on non-aggregation function")) + return badExpr() } + exprs := a.semExprs(call.Args) name, nargs := call.Name, len(call.Args) // Call could be to a user defined func. Check if we have a matching func in // scope. udf, err := a.scope.LookupExpr(name) if err != nil { - return nil, err + a.error(call, err) + return badExpr() } switch { // udf should be checked first since a udf can override builtin functions. case udf != nil: f, ok := udf.(*dag.Func) if !ok { - return nil, fmt.Errorf("%s(): definition is not a function type: %T", name, udf) + a.error(call, errors.New("not a function")) + return badExpr() } if len(f.Params) != nargs { - return nil, fmt.Errorf("%s(): expects %d argument(s)", name, len(f.Params)) + a.error(call, fmt.Errorf("call expects %d argument(s)", len(f.Params))) + return badExpr() } case zed.LookupPrimitive(name) != nil: // Primitive function call, change this to a cast. if err := function.CheckArgCount(nargs, 1, 1); err != nil { - return nil, fmt.Errorf("%s(): %w", name, err) + a.error(call, err) + return badExpr() } exprs = append(exprs, &dag.Literal{Kind: "Literal", Value: "<" + name + ">"}) name = "cast" case expr.NewShaperTransform(name) != 0: if err := function.CheckArgCount(nargs, 1, 2); err != nil { - return nil, fmt.Errorf("%s(): %w", name, err) + a.error(call, err) + return badExpr() } if nargs == 1 { exprs = append([]dag.Expr{&dag.This{Kind: "This"}}, exprs...) } case name == "map": if err := function.CheckArgCount(nargs, 2, 2); err != nil { - return nil, fmt.Errorf("%s(): %w", name, err) + a.error(call, err) + return badExpr() } id, ok := call.Args[1].(*ast.ID) if !ok { - return nil, fmt.Errorf("%s(): second argument must be the identifier of a function", name) + a.error(call.Args[1], errors.New("second argument must be the identifier of a function")) + return badExpr() } - inner, err := a.semCall(&ast.Call{ + inner := a.semCall(&ast.Call{ Kind: "Call", Name: id.Name, Args: []ast.Expr{&ast.ID{Kind: "ID", Name: "this"}}, }) - if err != nil { - return nil, err - } return &dag.MapCall{ Kind: "MapCall", Expr: exprs[0], Inner: inner, - }, nil + } default: if _, _, err = function.New(a.zctx, name, nargs); err != nil { - return nil, fmt.Errorf("%s(): %w", name, err) + a.error(call, err) + return badExpr() } } return &dag.Call{ Kind: "Call", Name: name, Args: exprs, - }, nil + } } -func (a *analyzer) semExprs(in []ast.Expr) ([]dag.Expr, error) { +func (a *analyzer) semExprs(in []ast.Expr) []dag.Expr { exprs := make([]dag.Expr, 0, len(in)) for _, e := range in { - expr, err := a.semExpr(e) - if err != nil { - return nil, err - } - exprs = append(exprs, expr) + exprs = append(exprs, a.semExpr(e)) } - return exprs, nil + return exprs } -func (a *analyzer) semAssignments(assignments []ast.Assignment) ([]dag.Assignment, error) { +func (a *analyzer) semAssignments(assignments []ast.Assignment) []dag.Assignment { out := make([]dag.Assignment, 0, len(assignments)) for _, e := range assignments { - a, err := a.semAssignment(e) - if err != nil { - return nil, err - } - out = append(out, a) + out = append(out, a.semAssignment(e)) } - return out, nil + return out } -func (a *analyzer) semAssignment(assign ast.Assignment) (dag.Assignment, error) { - rhs, err := a.semExpr(assign.RHS) - if err != nil { - return dag.Assignment{}, fmt.Errorf("right-hand side of assignment: %w", err) - } +func (a *analyzer) semAssignment(assign ast.Assignment) dag.Assignment { + rhs := a.semExpr(assign.RHS) var lhs dag.Expr if assign.LHS == nil { - path, err := deriveLHSPath(rhs) - if err != nil { - return dag.Assignment{}, err + if path, err := deriveLHSPath(rhs); err != nil { + a.error(&assign, err) + lhs = badExpr() + } else { + lhs = &dag.This{Kind: "This", Path: path} } - lhs = &dag.This{Kind: "This", Path: path} - } else if lhs, err = a.semExpr(assign.LHS); err != nil { - return dag.Assignment{}, fmt.Errorf("left-hand side of assignment: %w", err) + } else { + lhs = a.semExpr(assign.LHS) } if !isLval(lhs) { - return dag.Assignment{}, errors.New("illegal left-hand side of assignment") + a.error(&assign, errors.New("illegal left-hand side of assignment")) + lhs = badExpr() } if this, ok := lhs.(*dag.This); ok && len(this.Path) == 0 { - return dag.Assignment{}, errors.New("cannot assign to 'this'") + a.error(&assign, errors.New("cannot assign to 'this'")) + lhs = badExpr() } - return dag.Assignment{Kind: "Assignment", LHS: lhs, RHS: rhs}, nil + return dag.Assignment{Kind: "Assignment", LHS: lhs, RHS: rhs} } func isLval(e dag.Expr) bool { @@ -637,66 +549,54 @@ func deriveLHSPath(rhs dag.Expr) ([]string, error) { return nil, errors.New("cannot infer field from expression") } -func (a *analyzer) semFields(exprs []ast.Expr) ([]dag.Expr, error) { +func (a *analyzer) semFields(exprs []ast.Expr) []dag.Expr { fields := make([]dag.Expr, 0, len(exprs)) for _, e := range exprs { - f, err := a.semField(e) - if err != nil { - return nil, err - } - fields = append(fields, f) + fields = append(fields, a.semField(e)) } - return fields, nil + return fields } // semField analyzes the expression f and makes sure that it's // a field reference returning an error if not. -func (a *analyzer) semField(f ast.Expr) (*dag.This, error) { - e, err := a.semExpr(f) - if err != nil { - return nil, errors.New("invalid expression used as a field") - } +func (a *analyzer) semField(f ast.Expr) dag.Expr { + e := a.semExpr(f) field, ok := e.(*dag.This) if !ok { - return nil, errors.New("invalid expression used as a field") + a.error(f, errors.New("invalid expression used as a field")) + return badExpr() } if len(field.Path) == 0 { - return nil, errors.New("cannot use 'this' as a field reference") + a.error(f, errors.New("cannot use 'this' as a field reference")) + return badExpr() } - return field, nil + return field } -func (a *analyzer) maybeConvertAgg(call *ast.Call) (dag.Expr, error) { +func (a *analyzer) maybeConvertAgg(call *ast.Call) dag.Expr { if _, err := agg.NewPattern(call.Name, true); err != nil { - return nil, nil + return nil } var e dag.Expr - if len(call.Args) > 1 { + if err := function.CheckArgCount(len(call.Args), 0, 1); err != nil { if call.Name == "min" || call.Name == "max" { // min and max are special cases as they are also functions. If the // number of args is greater than 1 they're probably a function so do not // return an error. - return nil, nil + return nil } - return nil, fmt.Errorf("%s: wrong number of arguments", call.Name) + a.error(call, err) + return badExpr() } if len(call.Args) == 1 { - var err error - e, err = a.semExpr(call.Args[0]) - if err != nil { - return nil, err - } - } - where, err := a.semExprNullable(call.Where) - if err != nil { - return nil, err + e = a.semExpr(call.Args[0]) } return &dag.Agg{ Kind: "Agg", Name: call.Name, Expr: e, - Where: where, - }, nil + Where: a.semExprNullable(call.Where), + } } func DotExprToFieldPath(e ast.Expr) *dag.This { @@ -749,23 +649,17 @@ func (a *analyzer) semType(typ astzed.Type) (string, error) { return zson.FormatType(ztype), nil } -func (a *analyzer) semVectorElems(elems []ast.VectorElem) ([]dag.VectorElem, error) { +func (a *analyzer) semVectorElems(elems []ast.VectorElem) []dag.VectorElem { var out []dag.VectorElem for _, elem := range elems { switch elem := elem.(type) { case *ast.Spread: - e, err := a.semExpr(elem.Expr) - if err != nil { - return nil, err - } + e := a.semExpr(elem.Expr) out = append(out, &dag.Spread{Kind: "Spread", Expr: e}) case *ast.VectorValue: - e, err := a.semExpr(elem.Expr) - if err != nil { - return nil, err - } + e := a.semExpr(elem.Expr) out = append(out, &dag.VectorValue{Kind: "VectorValue", Expr: e}) } } - return out, nil + return out } diff --git a/compiler/semantic/op.go b/compiler/semantic/op.go index a8a9923c51..5bf614b2c8 100644 --- a/compiler/semantic/op.go +++ b/compiler/semantic/op.go @@ -22,165 +22,151 @@ import ( "github.com/segmentio/ksuid" ) -func (a *analyzer) semSeq(seq ast.Seq) (dag.Seq, error) { +func (a *analyzer) semSeq(seq ast.Seq) dag.Seq { var converted dag.Seq for _, op := range seq { - var err error - converted, err = a.semOp(op, converted) - if err != nil { - return nil, err - } + converted = a.semOp(op, converted) } - return converted, nil + return converted } -func (a *analyzer) semFrom(from *ast.From, seq dag.Seq) (dag.Seq, error) { +func (a *analyzer) semFrom(from *ast.From, seq dag.Seq) dag.Seq { switch len(from.Trunks) { case 0: - return nil, errors.New("internal error: from operator has no paths") + a.error(from, errors.New("from operator has no paths")) + return append(seq, badOp()) case 1: return a.semTrunk(from.Trunks[0], seq) default: paths := make([]dag.Seq, 0, len(from.Trunks)) for _, in := range from.Trunks { - converted, err := a.semTrunk(in, nil) - if err != nil { - return nil, err - } - paths = append(paths, converted) + paths = append(paths, a.semTrunk(in, nil)) } return append(seq, &dag.Fork{ Kind: "Fork", Paths: paths, - }), nil + }) } } -func (a *analyzer) semTrunk(trunk ast.Trunk, out dag.Seq) (dag.Seq, error) { +func (a *analyzer) semTrunk(trunk ast.Trunk, out dag.Seq) dag.Seq { if pool, ok := trunk.Source.(*ast.Pool); ok && trunk.Seq != nil { switch pool.Spec.Pool.(type) { case *ast.Glob, *ast.Regexp: - return nil, errors.New("=> not allowed after pool pattern in 'from' operator") + a.error(&trunk, errors.New("=> not allowed after pool pattern in 'from' operator")) + return append(out, badOp()) } } - sources, err := a.semSource(trunk.Source) - if err != nil { - return nil, err - } - seq, err := a.semSeq(trunk.Seq) - if err != nil { - return nil, err - } + sources := a.semSource(trunk.Source) + seq := a.semSeq(trunk.Seq) if len(sources) == 1 { - return append(out, append(dag.Seq{sources[0]}, seq...)...), nil + return append(out, append(dag.Seq{sources[0]}, seq...)...) } paths := make([]dag.Seq, 0, len(sources)) for _, source := range sources { paths = append(paths, append(dag.Seq{source}, seq...)) } - return append(out, &dag.Fork{Kind: "Fork", Paths: paths}), nil + return append(out, &dag.Fork{Kind: "Fork", Paths: paths}) } //XXX make sure you can't read files from a lake instance -func (a *analyzer) semOpSource(source ast.Source, out dag.Seq) (dag.Seq, error) { - sources, err := a.semSource(source) - if err != nil { - return nil, err - } +func (a *analyzer) semOpSource(source ast.Source, out dag.Seq) dag.Seq { + sources := a.semSource(source) if len(sources) == 1 { - return append(sources, out...), nil + return append(sources, out...) } var paths []dag.Seq for _, s := range sources { paths = append(paths, dag.Seq{s}) } - return append(out, &dag.Fork{Kind: "Fork", Paths: paths}), nil + return append(out, &dag.Fork{Kind: "Fork", Paths: paths}) } -func (a *analyzer) semSource(source ast.Source) ([]dag.Op, error) { - switch p := source.(type) { +func (a *analyzer) semSource(source ast.Source) []dag.Op { + switch s := source.(type) { case *ast.File: - sortKey, err := semSortKey(p.SortKey) + sortKey, err := semSortKey(s.SortKey) if err != nil { - return nil, err + a.error(source, err) } var path string - switch p := p.Path.(type) { + switch p := s.Path.(type) { case *ast.QuotedString: path = p.Text case *ast.String: // This can be either a reference to a constant or a string. if path, err = a.maybeStringConst(p.Text); err != nil { - return nil, fmt.Errorf("invalid file path: %w", err) + a.error(s.Path, err) } default: - return nil, fmt.Errorf("semantic analyzer: unknown AST file type %T", p) + panic(fmt.Errorf("semantic analyzer: unknown AST file type %T", p)) } return []dag.Op{ &dag.FileScan{ Kind: "FileScan", Path: path, - Format: p.Format, + Format: s.Format, SortKey: sortKey, }, - }, nil + } case *ast.HTTP: - sortKey, err := semSortKey(p.SortKey) + sortKey, err := semSortKey(s.SortKey) if err != nil { - return nil, err + a.error(source, err) } var headers map[string][]string - if p.Headers != nil { - expr, err := a.semExpr(p.Headers) - if err != nil { - return nil, err - } + if s.Headers != nil { + expr := a.semExpr(s.Headers) val, err := kernel.EvalAtCompileTime(a.zctx, a.arena, expr) if err != nil { - return nil, fmt.Errorf("headers: %w", err) - } - headers, err = unmarshalHeaders(a.arena, val) - if err != nil { - return nil, err + a.error(s.Headers, err) + } else { + headers, err = unmarshalHeaders(a.arena, val) + if err != nil { + a.error(s.Headers, err) + } } } var url string - switch p := p.URL.(type) { + switch p := s.URL.(type) { case *ast.QuotedString: url = p.Text case *ast.String: // This can be either a reference to a constant or a string. if url, err = a.maybeStringConst(p.Text); err != nil { - return nil, fmt.Errorf("invalid file path: %w", err) + a.error(s.URL, err) + // Set url so we don't report an error for this twice. + url = "http://error" } default: - return nil, fmt.Errorf("semantic analyzer: unsupported AST get type %T", p) + panic(fmt.Errorf("semantic analyzer: unsupported AST get type %T", p)) } if !strings.HasPrefix(url, "http://") && !strings.HasPrefix(url, "https://") { - return nil, fmt.Errorf("get: invalid URL %s", url) + a.error(s.URL, fmt.Errorf("invalid URL %s", url)) } return []dag.Op{ &dag.HTTPScan{ Kind: "HTTPScan", URL: url, - Format: p.Format, + Format: s.Format, SortKey: sortKey, - Method: p.Method, + Method: s.Method, Headers: headers, - Body: p.Body, + Body: s.Body, }, - }, nil + } case *ast.Pool: if !a.source.IsLake() { - return nil, errors.New("semantic analyzer: from pool cannot be used without a lake") + a.error(s, errors.New("from pool cannot be used without a lake")) + return []dag.Op{badOp()} } - return a.semPool(p) + return a.semPool(s) case *ast.Pass: //XXX just connect parent - return []dag.Op{dag.PassOp}, nil + return []dag.Op{dag.PassOp} default: - return nil, fmt.Errorf("semantic analyzer: unknown AST source type %T", p) + panic(fmt.Errorf("semantic analyzer: unknown AST source type %T", s)) } } @@ -206,6 +192,8 @@ func unmarshalHeaders(arena *zed.Arena, val zed.Value) (map[string][]string, err return headers, nil } +// XXX At some point SortKey should be an ast.Node but for now just have this +// return and error and let the parent log the semantic error. func semSortKey(p *ast.SortKey) (order.SortKey, error) { if p == nil || p.Keys == nil { return order.Nil, nil @@ -225,7 +213,7 @@ func semSortKey(p *ast.SortKey) (order.SortKey, error) { return order.NewSortKey(which, keys), nil } -func (a *analyzer) semPool(p *ast.Pool) ([]dag.Op, error) { +func (a *analyzer) semPool(p *ast.Pool) []dag.Op { var poolNames []string var err error switch specPool := p.Spec.Pool.(type) { @@ -238,28 +226,26 @@ func (a *analyzer) semPool(p *ast.Pool) ([]dag.Op, error) { poolNames, err = a.matchPools(specPool.Pattern, specPool.Pattern, "regexp") case *ast.String: // This can be either a reference to a constant or a string. - name, err := a.maybeStringConst(specPool.Text) - if err != nil { - return nil, fmt.Errorf("invalid pool name: %w", err) + var name string + if name, err = a.maybeStringConst(specPool.Text); err == nil { + poolNames = []string{name} } - poolNames = []string{name} case *ast.QuotedString: poolNames = []string{specPool.Text} default: - return nil, fmt.Errorf("semantic analyzer: unknown AST pool type %T", specPool) + panic(fmt.Errorf("semantic analyzer: unknown AST pool type %T", specPool)) } if err != nil { - return nil, err + // XXX PoolSpec should have a node position but for now just report + // error as entire source. + a.error(p.Spec.Pool, err) + return []dag.Op{badOp()} } var sources []dag.Op for _, name := range poolNames { - source, err := a.semPoolWithName(p, name) - if err != nil { - return nil, err - } - sources = append(sources, source) + sources = append(sources, a.semPoolWithName(p, name)) } - return sources, nil + return sources } func (a *analyzer) maybeStringConst(name string) (string, error) { @@ -278,11 +264,12 @@ func (a *analyzer) maybeStringConst(name string) (string, error) { return val.AsString(), nil } -func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) { +func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) dag.Op { commit := p.Spec.Commit if poolName == "HEAD" { if a.head == nil { - return nil, errors.New("cannot scan from unknown HEAD") + a.error(p.Spec.Pool, errors.New("cannot scan from unknown HEAD")) + return badOp() } poolName = a.head.Pool commit = a.head.Branch @@ -290,27 +277,30 @@ func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) if poolName == "" { meta := p.Spec.Meta if meta == "" { - return nil, errors.New("pool name missing") + a.error(p, errors.New("pool name missing")) + return badOp() } if _, ok := dag.LakeMetas[meta]; !ok { - return nil, fmt.Errorf("unknown lake metadata type %q in from operator", meta) + a.error(p, fmt.Errorf("unknown lake metadata type %q in from operator", meta)) + return badOp() } return &dag.LakeMetaScan{ Kind: "LakeMetaScan", Meta: p.Spec.Meta, - }, nil + } } poolID, err := a.source.PoolID(a.ctx, poolName) if err != nil { - return nil, err + a.error(p.Spec.Pool, err) + return badOp() } var commitID ksuid.KSUID if commit != "" { - commitID, err = lakeparse.ParseID(commit) - if err != nil { + if commitID, err = lakeparse.ParseID(commit); err != nil { commitID, err = a.source.CommitObject(a.ctx, poolID, commit) if err != nil { - return nil, err + a.error(p, err) + return badOp() } } } @@ -319,7 +309,8 @@ func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) if commitID == ksuid.Nil { commitID, err = a.source.CommitObject(a.ctx, poolID, "main") if err != nil { - return nil, err + a.error(p, err) + return badOp() } } return &dag.CommitMetaScan{ @@ -328,23 +319,25 @@ func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) Pool: poolID, Commit: commitID, Tap: p.Spec.Tap, - }, nil + } } if _, ok := dag.PoolMetas[meta]; ok { return &dag.PoolMetaScan{ Kind: "PoolMetaScan", Meta: meta, ID: poolID, - }, nil + } } - return nil, fmt.Errorf("unknown metadata type %q in from operator", meta) + a.error(p, fmt.Errorf("unknown metadata type %q", meta)) + return badOp() } if commitID == ksuid.Nil { // This trick here allows us to default to the main branch when // there is a "from pool" operator with no meta query or commit object. commitID, err = a.source.CommitObject(a.ctx, poolID, "main") if err != nil { - return nil, err + a.error(p, err) + return badOp() } } if p.Delete { @@ -352,13 +345,13 @@ func (a *analyzer) semPoolWithName(p *ast.Pool, poolName string) (dag.Op, error) Kind: "DeleteScan", ID: poolID, Commit: commitID, - }, nil + } } return &dag.PoolScan{ Kind: "PoolScan", ID: poolID, Commit: commitID, - }, nil + } } func (a *analyzer) matchPools(pattern, origPattern, patternDesc string) ([]string, error) { @@ -382,23 +375,16 @@ func (a *analyzer) matchPools(pattern, origPattern, patternDesc string) ([]strin return matches, nil } -func (a *analyzer) semScope(op *ast.Scope) (*dag.Scope, error) { +func (a *analyzer) semScope(op *ast.Scope) *dag.Scope { a.scope = NewScope(a.scope) defer a.exitScope() - consts, funcs, err := a.semDecls(op.Decls) - if err != nil { - return nil, err - } - body, err := a.semSeq(op.Body) - if err != nil { - return nil, err - } + consts, funcs := a.semDecls(op.Decls) return &dag.Scope{ Kind: "Scope", Consts: consts, Funcs: funcs, - Body: body, - }, nil + Body: a.semSeq(op.Body), + } } // semOp does a semantic analysis on a flowgraph to an @@ -406,32 +392,22 @@ func (a *analyzer) semScope(op *ast.Scope) (*dag.Scope, error) { // object. Currently, it only replaces the group-by duration with // a bucket call on the ts and replaces FunctionCalls in op context // with either a group-by or filter op based on the function's name. -func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { +func (a *analyzer) semOp(o ast.Op, seq dag.Seq) dag.Seq { switch o := o.(type) { case *ast.From: return a.semFrom(o, seq) case *ast.Pool, *ast.File, *ast.HTTP: return a.semOpSource(o.(ast.Source), seq) case *ast.Summarize: - keys, err := a.semAssignments(o.Keys) - if err != nil { - return nil, err - } - if assignmentHasDynamicLHS(keys) { - return nil, errors.New("summarize: key output field must be static") - } + keys := a.semAssignments(o.Keys) + a.checkStaticAssignment(o.Keys, keys) if len(keys) == 0 && len(o.Aggs) == 1 { if seq := a.singletonAgg(o.Aggs[0], seq); seq != nil { - return seq, nil + return seq } } - aggs, err := a.semAssignments(o.Aggs) - if err != nil { - return nil, err - } - if assignmentHasDynamicLHS(aggs) { - return nil, errors.New("summarize: aggregate output field must be static") - } + aggs := a.semAssignments(o.Aggs) + a.checkStaticAssignment(o.Aggs, aggs) // Note: InputSortDir is copied in here but it's not meaningful // coming from a parser AST, only from a worker using the kernel DSL, // which is another reason why we need separate parser and kernel ASTs. @@ -446,44 +422,28 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Limit: o.Limit, Keys: keys, Aggs: aggs, - }), nil + }) case *ast.Parallel: var paths []dag.Seq for _, seq := range o.Paths { - converted, err := a.semSeq(seq) - if err != nil { - return nil, err - } - paths = append(paths, converted) + paths = append(paths, a.semSeq(seq)) } return append(seq, &dag.Fork{ Kind: "Fork", Paths: paths, - }), nil + }) case *ast.Scope: - op, err := a.semScope(o) - if err != nil { - return nil, err - } - return append(seq, op), nil + return append(seq, a.semScope(o)) case *ast.Switch: var expr dag.Expr if o.Expr != nil { - var err error - expr, err = a.semExpr(o.Expr) - if err != nil { - return nil, err - } + expr = a.semExpr(o.Expr) } var cases []dag.Case for _, c := range o.Cases { var e dag.Expr if c.Expr != nil { - var err error - e, err = a.semExpr(c.Expr) - if err != nil { - return nil, err - } + e = a.semExpr(c.Expr) } else if o.Expr == nil { // c.Expr == nil indicates the default case, // whose handling depends on p.Expr. @@ -492,24 +452,18 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Value: "true", } } - path, err := a.semSeq(c.Path) - if err != nil { - return nil, err - } + path := a.semSeq(c.Path) cases = append(cases, dag.Case{Expr: e, Path: path}) } return append(seq, &dag.Switch{ Kind: "Switch", Expr: expr, Cases: cases, - }), nil + }) case *ast.Shape: - return append(seq, &dag.Shape{Kind: "Shape"}), nil + return append(seq, &dag.Shape{Kind: "Shape"}) case *ast.Cut: - assignments, err := a.semAssignments(o.Args) - if err != nil { - return nil, err - } + assignments := a.semAssignments(o.Args) // Collect static paths so we can check on what is available. var fields field.List for _, a := range assignments { @@ -517,116 +471,108 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { fields = append(fields, this.Path) } } - if _, err = zed.NewRecordBuilder(a.zctx, fields); err != nil { - return nil, fmt.Errorf("cut: %w", err) + if _, err := zed.NewRecordBuilder(a.zctx, fields); err != nil { + a.error(o.Args, err) + return append(seq, badOp()) } return append(seq, &dag.Cut{ Kind: "Cut", Args: assignments, - }), nil + }) case *ast.Drop: - args, err := a.semFields(o.Args) - if err != nil { - return nil, fmt.Errorf("drop: %w", err) - } + args := a.semFields(o.Args) if len(args) == 0 { - return nil, errors.New("drop: no fields given") + a.error(o, errors.New("no fields given")) } return append(seq, &dag.Drop{ Kind: "Drop", Args: args, - }), nil + }) case *ast.Sort: - exprs, err := a.semExprs(o.Args) - if err != nil { - return nil, fmt.Errorf("sort: %w", err) - } + exprs := a.semExprs(o.Args) return append(seq, &dag.Sort{ Kind: "Sort", Args: exprs, Order: o.Order, NullsFirst: o.NullsFirst, - }), nil + }) case *ast.Head: val := zed.NewInt64(1) if o.Count != nil { - expr, err := a.semExpr(o.Count) - if err != nil { - return nil, fmt.Errorf("head: %w", err) - } + expr := a.semExpr(o.Count) + var err error if val, err = kernel.EvalAtCompileTime(a.zctx, a.arena, expr); err != nil { - return nil, fmt.Errorf("head: %w", err) + a.error(o.Count, err) + return append(seq, badOp()) + } + if !zed.IsInteger(val.Type().ID()) { + a.error(o.Count, fmt.Errorf("expression value must be an integer value: %s", zson.FormatValue(val))) + return append(seq, badOp()) } } if val.AsInt() < 1 { - return nil, fmt.Errorf("head: expression value is not a positive integer: %s", zson.FormatValue(val)) + a.error(o.Count, errors.New("expression value must be a positive integer")) } return append(seq, &dag.Head{ Kind: "Head", Count: int(val.AsInt()), - }), nil + }) case *ast.Tail: val := zed.NewInt64(1) if o.Count != nil { - expr, err := a.semExpr(o.Count) - if err != nil { - return nil, fmt.Errorf("tail: %w", err) - } + expr := a.semExpr(o.Count) + var err error if val, err = kernel.EvalAtCompileTime(a.zctx, a.arena, expr); err != nil { - return nil, fmt.Errorf("tail: %w", err) + a.error(o.Count, err) + return append(seq, badOp()) + } + if !zed.IsInteger(val.Type().ID()) { + a.error(o.Count, fmt.Errorf("expression value must be an integer value: %s", zson.FormatValue(val))) + return append(seq, badOp()) } } if val.AsInt() < 1 { - return nil, fmt.Errorf("tail: expression value is not a positive integer: %s", zson.FormatValue(val)) + a.error(o.Count, errors.New("expression value must be a positive integer")) } return append(seq, &dag.Tail{ Kind: "Tail", Count: int(val.AsInt()), - }), nil + }) case *ast.Uniq: return append(seq, &dag.Uniq{ Kind: "Uniq", Cflag: o.Cflag, - }), nil + }) case *ast.Pass: - return append(seq, dag.PassOp), nil + return append(seq, dag.PassOp) case *ast.OpExpr: return a.semOpExpr(o.Expr, seq) case *ast.Search: - e, err := a.semExpr(o.Expr) - if err != nil { - return nil, err - } - return append(seq, dag.NewFilter(e)), nil + e := a.semExpr(o.Expr) + return append(seq, dag.NewFilter(e)) case *ast.Where: - e, err := a.semExpr(o.Expr) - if err != nil { - return nil, err - } - return append(seq, dag.NewFilter(e)), nil + e := a.semExpr(o.Expr) + return append(seq, dag.NewFilter(e)) case *ast.Top: - args, err := a.semExprs(o.Args) - if err != nil { - return nil, fmt.Errorf("top: %w", err) - } + args := a.semExprs(o.Args) if len(args) == 0 { - return nil, errors.New("top: no arguments given") + a.error(o, errors.New("no arguments given")) } limit := 1 if o.Limit != nil { - l, err := a.semExpr(o.Limit) - if err != nil { - return nil, err - } + l := a.semExpr(o.Limit) val, err := kernel.EvalAtCompileTime(a.zctx, a.arena, l) if err != nil { - return nil, err + a.error(o.Limit, err) + return append(seq, badOp()) } if !zed.IsSigned(val.Type().ID()) { - return nil, errors.New("top: limit argument must be an integer") + a.error(o.Limit, errors.New("limit argument must be an integer")) + return append(seq, badOp()) } if limit = int(val.Int()); limit < 1 { - return nil, errors.New("top: limit argument value must be greater than 0") + a.error(o.Limit, errors.New("limit argument value must be greater than 0")) + return append(seq, badOp()) } } return append(seq, &dag.Top{ @@ -634,12 +580,9 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Args: args, Flush: o.Flush, Limit: limit, - }), nil + }) case *ast.Put: - assignments, err := a.semAssignments(o.Args) - if err != nil { - return nil, err - } + assignments := a.semAssignments(o.Args) // We can do collision checking on static paths, so check what we can. var fields field.List for _, a := range assignments { @@ -648,27 +591,20 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { } } if err := expr.CheckPutFields(fields); err != nil { - return nil, fmt.Errorf("put: %w", err) + a.error(o, err) } return append(seq, &dag.Put{ Kind: "Put", Args: assignments, - }), nil + }) case *ast.OpAssignment: - converted, err := a.semOpAssignment(o) - if err != nil { - return nil, err - } - return append(seq, converted), nil + return append(seq, a.semOpAssignment(o)) case *ast.Rename: var assignments []dag.Assignment for _, fa := range o.Args { - assign, err := a.semAssignment(fa) - if err != nil { - return nil, fmt.Errorf("rename: %w", err) - } + assign := a.semAssignment(fa) if !isLval(assign.RHS) { - return nil, fmt.Errorf("rename: illegal right-hand side of assignment") + a.error(fa.RHS, fmt.Errorf("illegal right-hand side of assignment")) } // If both paths are static validate them. Otherwise this will be // done at runtime. @@ -676,7 +612,7 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { rhs, rhsOk := assign.RHS.(*dag.This) if rhsOk && lhsOk { if err := expr.CheckRenameField(lhs.Path, rhs.Path); err != nil { - return nil, fmt.Errorf("rename: %w", err) + a.error(&fa, err) } } assignments = append(assignments, assign) @@ -684,27 +620,15 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { return append(seq, &dag.Rename{ Kind: "Rename", Args: assignments, - }), nil + }) case *ast.Fuse: - return append(seq, &dag.Fuse{Kind: "Fuse"}), nil + return append(seq, &dag.Fuse{Kind: "Fuse"}) case *ast.Join: - rightInput, err := a.semSeq(o.RightInput) - if err != nil { - return nil, err - } - leftKey, err := a.semExpr(o.LeftKey) - if err != nil { - return nil, err - } + rightInput := a.semSeq(o.RightInput) + leftKey := a.semExpr(o.LeftKey) rightKey := leftKey if o.RightKey != nil { - if rightKey, err = a.semExpr(o.RightKey); err != nil { - return nil, err - } - } - assignments, err := a.semAssignments(o.Args) - if err != nil { - return nil, err + rightKey = a.semExpr(o.RightKey) } join := &dag.Join{ Kind: "Join", @@ -713,7 +637,7 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { LeftKey: leftKey, RightDir: order.Unknown, RightKey: rightKey, - Args: assignments, + Args: a.semAssignments(o.Args), } if rightInput != nil { par := &dag.Fork{ @@ -722,29 +646,26 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { } seq = append(seq, par) } - return append(seq, join), nil + return append(seq, join) case *ast.Explode: typ, err := a.semType(o.Type) if err != nil { - return nil, err - } - args, err := a.semExprs(o.Args) - if err != nil { - return nil, err + a.error(o.Type, err) + typ = "" } + args := a.semExprs(o.Args) var as string if o.As == nil { as = "value" } else { - e, err := a.semExpr(o.As) - if err != nil { - return nil, err - } + e := a.semExpr(o.As) this, ok := e.(*dag.This) if !ok { - return nil, errors.New("explode: as clause must be a field reference") + a.error(o.As, errors.New("as clause must be a field reference")) + return append(seq, badOp()) } else if len(this.Path) != 1 { - return nil, errors.New("explode: field must be a top-level field") + a.error(o.As, errors.New("field must be a top-level field")) + return append(seq, badOp()) } as = this.Path[0] } @@ -753,51 +674,35 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Args: args, Type: typ, As: as, - }), nil + }) case *ast.Merge: - expr, err := a.semExpr(o.Expr) - if err != nil { - return nil, fmt.Errorf("merge: %w", err) - } return append(seq, &dag.Merge{ Kind: "Merge", - Expr: expr, + Expr: a.semExpr(o.Expr), Order: order.Asc, //XXX - }), nil + }) case *ast.Over: if len(o.Locals) != 0 && o.Body == nil { - return nil, errors.New("over operator: cannot have a with clause without a lateral query") + a.error(o, errors.New("cannot have a with clause without a lateral query")) } a.enterScope() defer a.exitScope() - locals, err := a.semVars(o.Locals) - if err != nil { - return nil, err - } - exprs, err := a.semExprs(o.Exprs) - if err != nil { - return nil, err - } + locals := a.semVars(o.Locals) + exprs := a.semExprs(o.Exprs) var body dag.Seq if o.Body != nil { - body, err = a.semSeq(o.Body) - if err != nil { - return nil, err - } + body = a.semSeq(o.Body) } return append(seq, &dag.Over{ Kind: "Over", Defs: locals, Exprs: exprs, Body: body, - }), nil + }) case *ast.Sample: e := dag.Expr(&dag.This{Kind: "This"}) if o.Expr != nil { - var err error - if e, err = a.semExpr(o.Expr); err != nil { - return nil, err - } + e = a.semExpr(o.Expr) } seq = append(seq, &dag.Summarize{ Kind: "Summarize", @@ -819,12 +724,9 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { return append(seq, &dag.Yield{ Kind: "Yield", Exprs: []dag.Expr{&dag.This{Kind: "This", Path: field.Path{"sample"}}}, - }), nil + }) case *ast.Assert: - cond, err := a.semExpr(o.Expr) - if err != nil { - return nil, err - } + cond := a.semExpr(o.Expr) // 'assert EXPR' is equivalent to // 'yield EXPR ? this : error({message: "assertion failed", "expr": EXPR_text, "on": this}' // where EXPR_text is the literal text of EXPR. @@ -861,22 +763,20 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { }, }, }, - }), nil + }) case *ast.Yield: - exprs, err := a.semExprs(o.Exprs) - if err != nil { - return nil, err - } + exprs := a.semExprs(o.Exprs) return append(seq, &dag.Yield{ Kind: "Yield", Exprs: exprs, - }), nil + }) case *ast.Load: poolID, err := lakeparse.ParseID(o.Pool) if err != nil { poolID, err = a.source.PoolID(a.ctx, o.Pool) if err != nil { - return nil, err + a.error(o, err) + return append(seq, badOp()) } } return append(seq, &dag.Load{ @@ -886,19 +786,16 @@ func (a *analyzer) semOp(o ast.Op, seq dag.Seq) (dag.Seq, error) { Author: o.Author, Message: o.Message, Meta: o.Meta, - }), nil + }) } - return nil, fmt.Errorf("semantic transform: unknown AST operator type: %T", o) + panic(fmt.Errorf("semantic transform: unknown AST operator type: %T", o)) } func (a *analyzer) singletonAgg(agg ast.Assignment, seq dag.Seq) dag.Seq { if agg.LHS != nil { return nil } - out, err := a.semAssignment(agg) - if err != nil { - return nil - } + out := a.semAssignment(agg) this, ok := out.LHS.(*dag.This) if !ok || len(this.Path) != 1 { return nil @@ -915,70 +812,55 @@ func (a *analyzer) singletonAgg(agg ast.Assignment, seq dag.Seq) dag.Seq { ) } -func (a *analyzer) semDecls(decls []ast.Decl) ([]dag.Def, []*dag.Func, error) { +func (a *analyzer) semDecls(decls []ast.Decl) ([]dag.Def, []*dag.Func) { var consts []dag.Def var fnDecls []*ast.FuncDecl for _, d := range decls { switch d := d.(type) { case *ast.ConstDecl: - c, err := a.semConstDecl(d) - if err != nil { - return nil, nil, err - } - consts = append(consts, c) + consts = append(consts, a.semConstDecl(d)) case *ast.FuncDecl: fnDecls = append(fnDecls, d) case *ast.OpDecl: - if err := a.semOpDecl(d); err != nil { - return nil, nil, err - } + a.semOpDecl(d) case *ast.TypeDecl: - c, err := a.semTypeDecl(d) - if err != nil { - return nil, nil, err - } - consts = append(consts, c) + consts = append(consts, a.semTypeDecl(d)) default: - return nil, nil, fmt.Errorf("invalid declaration type %T", d) + panic(fmt.Errorf("invalid declaration type %T", d)) } } - funcs, err := a.semFuncDecls(fnDecls) - if err != nil { - return nil, nil, err - } - return consts, funcs, nil + funcs := a.semFuncDecls(fnDecls) + return consts, funcs } -func (a *analyzer) semConstDecl(c *ast.ConstDecl) (dag.Def, error) { - e, err := a.semExpr(c.Expr) - if err != nil { - return dag.Def{}, err - } +func (a *analyzer) semConstDecl(c *ast.ConstDecl) dag.Def { + e := a.semExpr(c.Expr) if err := a.scope.DefineConst(a.zctx, a.arena, c.Name, e); err != nil { - return dag.Def{}, err + a.error(c, err) } return dag.Def{ Name: c.Name, Expr: e, - }, nil + } } -func (a *analyzer) semTypeDecl(d *ast.TypeDecl) (dag.Def, error) { +func (a *analyzer) semTypeDecl(d *ast.TypeDecl) dag.Def { typ, err := a.semType(d.Type) if err != nil { - return dag.Def{}, err + a.error(d.Type, err) + typ = "null" } e := &dag.Literal{ Kind: "Literal", Value: fmt.Sprintf("<%s=%s>", zson.QuotedName(d.Name), typ), } if err := a.scope.DefineConst(a.zctx, a.arena, d.Name, e); err != nil { - return dag.Def{}, err + a.error(d, err) } - return dag.Def{Name: d.Name, Expr: e}, nil + return dag.Def{Name: d.Name, Expr: e} } -func (a *analyzer) semFuncDecls(decls []*ast.FuncDecl) ([]*dag.Func, error) { +func (a *analyzer) semFuncDecls(decls []*ast.FuncDecl) []*dag.Func { funcs := make([]*dag.Func, 0, len(decls)) for _, d := range decls { f := &dag.Func{ @@ -987,118 +869,117 @@ func (a *analyzer) semFuncDecls(decls []*ast.FuncDecl) ([]*dag.Func, error) { Params: slices.Clone(d.Params), } if err := a.scope.DefineAs(f.Name, f); err != nil { - return nil, err + a.error(d, err) } funcs = append(funcs, f) } for i, d := range decls { - var err error - if funcs[i].Expr, err = a.semFuncBody(d.Params, d.Expr); err != nil { - return nil, err - } + funcs[i].Expr = a.semFuncBody(d, d.Params, d.Expr) } - return funcs, nil + return funcs } -func (a *analyzer) semFuncBody(params []string, body ast.Expr) (dag.Expr, error) { +func (a *analyzer) semFuncBody(d *ast.FuncDecl, params []string, body ast.Expr) dag.Expr { a.enterScope() defer a.exitScope() for _, p := range params { if err := a.scope.DefineVar(p); err != nil { - return nil, err + // XXX Each param should be a node but now just report the error + // as the entire declaration. + a.error(d, err) } } return a.semExpr(body) } -func (a *analyzer) semOpDecl(d *ast.OpDecl) error { +func (a *analyzer) semOpDecl(d *ast.OpDecl) { m := make(map[string]bool) for _, p := range d.Params { if m[p] { - return fmt.Errorf("%s: duplicate parameter %q", d.Name, p) + a.error(d, fmt.Errorf("duplicate parameter %q", p)) + a.scope.DefineAs(d.Name, &opDecl{bad: true}) + return } m[p] = true } - return a.scope.DefineAs(d.Name, &opDecl{ast: d, scope: a.scope}) + if err := a.scope.DefineAs(d.Name, &opDecl{ast: d, scope: a.scope}); err != nil { + a.error(d, err) + } } -func (a *analyzer) semVars(defs []ast.Def) ([]dag.Def, error) { +func (a *analyzer) semVars(defs []ast.Def) []dag.Def { var locals []dag.Def for _, def := range defs { - e, err := a.semExpr(def.Expr) - if err != nil { - return nil, err - } - name := def.Name - if err := a.scope.DefineVar(name); err != nil { - return nil, err + e := a.semExpr(def.Expr) + if err := a.scope.DefineVar(def.Name); err != nil { + a.error(def, err) + continue } locals = append(locals, dag.Def{ - Name: name, + Name: def.Name, Expr: e, }) } - return locals, nil + return locals } -func (a *analyzer) semOpAssignment(p *ast.OpAssignment) (dag.Op, error) { +func (a *analyzer) semOpAssignment(p *ast.OpAssignment) dag.Op { var aggs, puts []dag.Assignment - for _, assign := range p.Assignments { + for _, astAssign := range p.Assignments { // Parition assignments into agg vs. puts. - a, err := a.semAssignment(assign) - if err != nil { - return nil, err - } - if _, ok := a.RHS.(*dag.Agg); ok { - if _, ok := a.LHS.(*dag.This); !ok { - return nil, errors.New("summarize: aggregate output field must be static") + assign := a.semAssignment(astAssign) + if _, ok := assign.RHS.(*dag.Agg); ok { + if _, ok := assign.LHS.(*dag.This); !ok { + a.error(astAssign.LHS, errors.New("aggregate output field must be static")) } - aggs = append(aggs, a) + aggs = append(aggs, assign) } else { - puts = append(puts, a) + puts = append(puts, assign) } } if len(puts) > 0 && len(aggs) > 0 { - return nil, errors.New("mix of aggregations and non-aggregations in assignment list") + a.error(p, errors.New("mix of aggregations and non-aggregations in assignment list")) + return badOp() } if len(puts) > 0 { return &dag.Put{ Kind: "Put", Args: puts, - }, nil + } } return &dag.Summarize{ Kind: "Summarize", Aggs: aggs, - }, nil + } } -func assignmentHasDynamicLHS(assignments []dag.Assignment) bool { - for _, a := range assignments { - if _, ok := a.LHS.(*dag.This); !ok { +func (a *analyzer) checkStaticAssignment(asts []ast.Assignment, assignments []dag.Assignment) bool { + for k, assign := range assignments { + if _, ok := assign.LHS.(*dag.BadExpr); ok { + continue + } + if _, ok := assign.LHS.(*dag.This); !ok { + a.error(asts[k].LHS, errors.New("output key must be static")) return true } } return false } -func (a *analyzer) semOpExpr(e ast.Expr, seq dag.Seq) (dag.Seq, error) { +func (a *analyzer) semOpExpr(e ast.Expr, seq dag.Seq) dag.Seq { if call, ok := e.(*ast.Call); ok { - if seq, err := a.semCallOp(call, seq); seq != nil || err != nil { - return seq, err + if seq := a.semCallOp(call, seq); seq != nil { + return seq } } - out, err := a.semExpr(e) - if err != nil { - return nil, err - } + out := a.semExpr(e) if a.isBool(out) { - return append(seq, dag.NewFilter(out)), nil + return append(seq, dag.NewFilter(out)) } return append(seq, &dag.Yield{ Kind: "Yield", Exprs: []dag.Expr{out}, - }), nil + }) } func (a *analyzer) isBool(e dag.Expr) bool { @@ -1138,13 +1019,11 @@ func (a *analyzer) isBool(e dag.Expr) bool { } } -func (a *analyzer) semCallOp(call *ast.Call, seq dag.Seq) (dag.Seq, error) { - if body, err := a.maybeConvertUserOp(call, seq); err != nil { - return nil, err - } else if body != nil { - return append(seq, body...), nil +func (a *analyzer) semCallOp(call *ast.Call, seq dag.Seq) dag.Seq { + if body := a.maybeConvertUserOp(call); body != nil { + return append(seq, body...) } - if agg, err := a.maybeConvertAgg(call); err == nil && agg != nil { + if agg := a.maybeConvertAgg(call); agg != nil { summarize := &dag.Summarize{ Kind: "Summarize", Aggs: []dag.Assignment{ @@ -1159,49 +1038,54 @@ func (a *analyzer) semCallOp(call *ast.Call, seq dag.Seq) (dag.Seq, error) { Kind: "Yield", Exprs: []dag.Expr{&dag.This{Kind: "This", Path: field.Path{call.Name}}}, } - return append(append(seq, summarize), yield), nil + return append(append(seq, summarize), yield) } if !function.HasBoolResult(call.Name) { - return nil, nil - } - c, err := a.semCall(call) - if err != nil { - return nil, err + return nil } - return append(seq, dag.NewFilter(c)), nil + c := a.semCall(call) + return append(seq, dag.NewFilter(c)) } // maybeConvertUserOp returns nil, nil if the call is determined to not be a // UserOp, otherwise it returns the compiled op or the encountered error. -func (a *analyzer) maybeConvertUserOp(call *ast.Call, seq dag.Seq) (dag.Seq, error) { +func (a *analyzer) maybeConvertUserOp(call *ast.Call) dag.Seq { decl, err := a.scope.lookupOp(call.Name) - if decl == nil || err != nil { - return nil, nil + if decl == nil { + return nil + } + if err != nil { + a.error(call, err) + return dag.Seq{badOp()} + } + if decl.bad { + return dag.Seq{badOp()} } if call.Where != nil { - return nil, fmt.Errorf("%s(): user defined operators cannot have a where clause", call.Name) + a.error(call, errors.New("user defined operators cannot have a where clause")) + return dag.Seq{badOp()} } params, args := decl.ast.Params, call.Args if len(params) != len(args) { - return nil, fmt.Errorf("%s(): %d arg%s provided when operator expects %d arg%s", call.Name, len(params), plural.Slice(params, "s"), len(args), plural.Slice(args, "s")) + a.error(call, fmt.Errorf("%d arg%s provided when operator expects %d arg%s", len(params), plural.Slice(params, "s"), len(args), plural.Slice(args, "s"))) + return dag.Seq{badOp()} } exprs := make([]dag.Expr, len(decl.ast.Params)) for i, arg := range args { - e, err := a.semExpr(arg) - if err != nil { - return nil, err - } + e := a.semExpr(arg) // Transform non-path arguments into literals. if _, ok := e.(*dag.This); !ok { val, err := kernel.EvalAtCompileTime(a.zctx, a.arena, e) if err != nil { - return nil, err + a.error(arg, err) + exprs[i] = badExpr() + continue } if val.IsError() { if val.IsMissing() { - return nil, fmt.Errorf("%q: non-path arguments cannot have variable dependency", decl.ast.Params[i]) + a.error(arg, errors.New("non-path arguments cannot have variable dependency")) } else { - return nil, fmt.Errorf("%q: %q", decl.ast.Params[i], string(val.Bytes())) + a.error(arg, errors.New(string(val.Bytes()))) } } e = &dag.Literal{ @@ -1212,7 +1096,8 @@ func (a *analyzer) maybeConvertUserOp(call *ast.Call, seq dag.Seq) (dag.Seq, err exprs[i] = e } if slices.Contains(a.opStack, decl.ast) { - return nil, opCycleError(append(a.opStack, decl.ast)) + a.error(call, opCycleError(append(a.opStack, decl.ast))) + return dag.Seq{badOp()} } a.opStack = append(a.opStack, decl.ast) oldscope := a.scope @@ -1223,7 +1108,8 @@ func (a *analyzer) maybeConvertUserOp(call *ast.Call, seq dag.Seq) (dag.Seq, err }() for i, p := range params { if err := a.scope.DefineAs(p, exprs[i]); err != nil { - return nil, err + a.error(call, err) + return dag.Seq{badOp()} } } return a.semSeq(decl.ast.Body) diff --git a/compiler/ztests/badshaper.yaml b/compiler/ztests/badshaper.yaml index 91745b9e49..bf3277dcf9 100644 --- a/compiler/ztests/badshaper.yaml +++ b/compiler/ztests/badshaper.yaml @@ -13,4 +13,6 @@ inputs: outputs: - name: stderr data: | - no such type name: "null" + no such type name: "null" in badshaper.zed at line 1, column 10: + type foo={_path:string,testfield:"null"} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/compiler/ztests/const-source.yaml b/compiler/ztests/const-source.yaml index 661433b912..e80350054d 100644 --- a/compiler/ztests/const-source.yaml +++ b/compiler/ztests/const-source.yaml @@ -33,6 +33,12 @@ outputs: ) - name: stderr data: | - invalid pool name: POOL: string value required - invalid file path: FILE: string value required - invalid file path: URL: string value required + invalid pool name: POOL: string value required at line 1, column 24: + const POOL = 3.14 from POOL + ~~~~ + FILE: string value required at line 1, column 29: + const FILE = 127.0.0.1 file FILE + ~~~~ + URL: string value required at line 1, column 22: + const URL = true get URL + ~~~ diff --git a/compiler/ztests/from-error.yaml b/compiler/ztests/from-error.yaml index c40e85b392..ee31a22987 100644 --- a/compiler/ztests/from-error.yaml +++ b/compiler/ztests/from-error.yaml @@ -15,12 +15,22 @@ script: | outputs: - name: stderr data: | - semantic analyzer: from pool cannot be used without a lake + from pool cannot be used without a lake at line 1, column 1: + from p + ~~~~~~ === - test: pool not found + test: pool not found at line 1, column 6: + from test + ~~~~ === - test*: pool matching glob not found + test*: pool matching glob not found at line 1, column 6: + from test* + ~~~~~ === - test: pool matching regexp not found + test: pool matching regexp not found at line 1, column 6: + from /test/ + ~~~~~~ === - => not allowed after pool pattern in 'from' operator + => not allowed after pool pattern in 'from' operator at line 1, column 7: + from (pool * => count()) + ~~~~~~~~~~~~~~~~~ diff --git a/compiler/ztests/head.yaml b/compiler/ztests/head.yaml index a6406b03c5..e4e6ca31ac 100644 --- a/compiler/ztests/head.yaml +++ b/compiler/ztests/head.yaml @@ -20,6 +20,12 @@ outputs: ) - name: stderr data: | - head: expression value is not a positive integer: 1. - head: expression value is not a positive integer: "1" - head: expression value is not a positive integer: error("missing") + expression value must be an integer value: 1. at line 1, column 6: + head 1. + ~~ + expression value must be an integer value: "1" at line 1, column 7: + head "1" + ~ + expression value must be an integer value: error("missing") at line 1, column 6: + head x + ~ diff --git a/compiler/ztests/summarize-lhs-error.yaml b/compiler/ztests/summarize-lhs-error.yaml index 1f9e865c1d..c03ec7084e 100644 --- a/compiler/ztests/summarize-lhs-error.yaml +++ b/compiler/ztests/summarize-lhs-error.yaml @@ -6,6 +6,12 @@ script: | outputs: - name: stderr data: | - summarize: key output field must be static - summarize: aggregate output field must be static - summarize: aggregate output field must be static + assignment key must be static at line 1, column 12: + count() by this[a] := key + ~~~~~~~ + assignment key must be static at line 1, column 1: + this[a] := count() by key + ~~~~~~~ + aggregate output field must be static at line 1, column 1: + this[a] := count() + ~~~~~~~ diff --git a/compiler/ztests/tail.yaml b/compiler/ztests/tail.yaml index b4da9ef4a7..76a4f5f767 100644 --- a/compiler/ztests/tail.yaml +++ b/compiler/ztests/tail.yaml @@ -20,6 +20,12 @@ outputs: ) - name: stderr data: | - tail: expression value is not a positive integer: 1. - tail: expression value is not a positive integer: "1" - tail: expression value is not a positive integer: error("missing") + expression value must be an integer value: 1. at line 1, column 6: + tail 1. + ~~ + expression value must be an integer value: "1" at line 1, column 7: + tail "1" + ~ + expression value must be an integer value: error("missing") at line 1, column 6: + tail x + ~ diff --git a/compiler/ztests/udf-err.yaml b/compiler/ztests/udf-err.yaml index a70ec602bd..7c13300fdf 100644 --- a/compiler/ztests/udf-err.yaml +++ b/compiler/ztests/udf-err.yaml @@ -21,6 +21,12 @@ inputs: outputs: - name: stderr data: | - symbol "dup" redefined - notAFunc(): definition is not a function type: *dag.Literal - f(): expects 2 argument(s) + symbol "dup" redefined in duplicate.zed at line 2, column 1: + func dup(n): (n+2) + ~~~~~~~~~~~~~~~~~ + definition is not a function type: *dag.Literal in call-non-func.zed at line 2, column 7: + yield notAFunc(this) + ~~~~~~~~~~~~~~ + call expects 2 argument(s) in wrong-args.zed at line 2, column 7: + yield f(this) + ~~~~~~~ diff --git a/compiler/ztests/where-on-func.yaml b/compiler/ztests/where-on-func.yaml index 21ae7245d6..6866bb9ad6 100644 --- a/compiler/ztests/where-on-func.yaml +++ b/compiler/ztests/where-on-func.yaml @@ -1,3 +1,3 @@ zed: cut hex := hex(this) where this % 2 == 0 -errorRE: "'where' clause on non-aggregation function: hex" +errorRE: "'where' clause on non-aggregation function" diff --git a/docs/language/operators/rename.md b/docs/language/operators/rename.md index 094359ed54..0a60058a4f 100644 --- a/docs/language/operators/rename.md +++ b/docs/language/operators/rename.md @@ -49,7 +49,9 @@ echo '{a:1,r:{b:2,c:3}}' | zq -z 'rename w:=r.b' - ``` => ```mdtest-output -rename: left-hand side and right-hand side must have the same depth (w vs r.b) +left-hand side and right-hand side must have the same depth (w vs r.b) at line 1, column 8: +rename w:=r.b + ~~~~~~ ``` _Record literals can be used instead of rename for mutation_ ```mdtest-command diff --git a/docs/language/statements.md b/docs/language/statements.md index ed58f90a38..601de40dd1 100644 --- a/docs/language/statements.md +++ b/docs/language/statements.md @@ -158,7 +158,9 @@ echo '{greeting: "hi"}' | zq -z -I params.zed 'AddMessage("message", "hello")' - ``` which produces ```mdtest-output -illegal left-hand side of assignment +illegal left-hand side of assignment in params.zed at line 2, column 3: + field_for_message:=msg + ~~~~~~~~~~~~~~~~~~~~~~ ``` A constant value must be used to pass a parameter that will be referenced as diff --git a/docs/tutorials/schools.md b/docs/tutorials/schools.md index 0e3df2986f..24968787d8 100644 --- a/docs/tutorials/schools.md +++ b/docs/tutorials/schools.md @@ -843,7 +843,9 @@ zq -Z 'rename toplevel:=outer.inner' nested.zson ``` produces this compile-time error message and the query is not run: ```mdtest-output -rename: left-hand side and right-hand side must have the same depth (toplevel vs outer.inner) +left-hand side and right-hand side must have the same depth (toplevel vs outer.inner) at line 1, column 8: +rename toplevel:=outer.inner + ~~~~~~~~~~~~~~~~~~~~~ ``` This goal could instead be achieved by combining [`put`](#44-put) and [`drop`](#42-drop), e.g., diff --git a/fuzz/fuzz.go b/fuzz/fuzz.go index 865f4c5863..8bc9702b2c 100644 --- a/fuzz/fuzz.go +++ b/fuzz/fuzz.go @@ -95,11 +95,11 @@ func RunQuery(t testing.TB, zctx *zed.Context, readers []zio.Reader, querySource // Compile query engine := mock.NewMockEngine(gomock.NewController(t)) comp := compiler.NewFileSystemCompiler(engine) - ast, err := compiler.Parse(querySource) + ast, sset, err := compiler.Parse(querySource) if err != nil { t.Skipf("%v", err) } - query, err := runtime.CompileQuery(ctx, zctx, comp, ast, readers) + query, err := runtime.CompileQuery(ctx, zctx, comp, ast, sset, readers) if err != nil { t.Skipf("%v", err) } diff --git a/lake/api/local.go b/lake/api/local.go index bffb699208..74d4a25df7 100644 --- a/lake/api/local.go +++ b/lake/api/local.go @@ -7,6 +7,7 @@ import ( "github.com/brimdata/zed" "github.com/brimdata/zed/api" "github.com/brimdata/zed/compiler" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/lake" "github.com/brimdata/zed/lakeparse" "github.com/brimdata/zed/order" @@ -113,11 +114,11 @@ func (l *local) Query(ctx context.Context, head *lakeparse.Commitish, src string } func (l *local) QueryWithControl(ctx context.Context, head *lakeparse.Commitish, src string, srcfiles ...string) (zbuf.ProgressReadCloser, error) { - flowgraph, err := l.compiler.Parse(src, srcfiles...) + flowgraph, sset, err := parser.ParseZed(srcfiles, src) if err != nil { return nil, err } - q, err := runtime.CompileLakeQuery(ctx, zed.NewContext(), l.compiler, flowgraph, head) + q, err := runtime.CompileLakeQuery(ctx, zed.NewContext(), l.compiler, flowgraph, sset, head) if err != nil { return nil, err } @@ -173,7 +174,7 @@ func (l *local) Delete(ctx context.Context, poolID ksuid.KSUID, branchName strin } func (l *local) DeleteWhere(ctx context.Context, poolID ksuid.KSUID, branchName, src string, commit api.CommitMessage) (ksuid.KSUID, error) { - op, err := l.compiler.Parse(src) + op, sset, err := l.compiler.Parse(src) if err != nil { return ksuid.Nil, err } @@ -181,7 +182,11 @@ func (l *local) DeleteWhere(ctx context.Context, poolID ksuid.KSUID, branchName, if err != nil { return ksuid.Nil, err } - return branch.DeleteWhere(ctx, l.compiler, op, commit.Author, commit.Body, commit.Meta) + commitID, err := branch.DeleteWhere(ctx, l.compiler, op, commit.Author, commit.Body, commit.Meta) + if list, ok := err.(parser.ErrorList); ok { + list.SetSourceSet(sset) + } + return commitID, err } func (l *local) Revert(ctx context.Context, poolID ksuid.KSUID, branchName string, commitID ksuid.KSUID, message api.CommitMessage) (ksuid.KSUID, error) { diff --git a/lake/ztests/dirs.yaml b/lake/ztests/dirs.yaml index 7005fd147f..0105563742 100644 --- a/lake/ztests/dirs.yaml +++ b/lake/ztests/dirs.yaml @@ -17,4 +17,6 @@ outputs: {min:2020-04-21T22:40:30.06852324Z,max:2020-04-22T01:23:40.0622373Z} - name: stderr data: | - logs: pool not found + logs: pool not found at line 1, column 6: + from logs@main:objects + ~~~~ diff --git a/lake/ztests/query-error.yaml b/lake/ztests/query-error.yaml index d140c166df..8499ece1bf 100644 --- a/lake/ztests/query-error.yaml +++ b/lake/ztests/query-error.yaml @@ -10,6 +10,12 @@ outputs: - name: stderr data: | query must include a 'from' operator - cannot scan from unknown HEAD - unknown lake metadata type "unknownmeta" in from operator - doesnotexist: pool not found + cannot scan from unknown HEAD at line 1, column 6: + from HEAD + ~~~~ + unknown lake metadata type "unknownmeta" in from operator at line 1, column 1: + from :unknownmeta + ~~~~~~~~~~~~~~~~~ + doesnotexist: pool not found at line 1, column 6: + from doesnotexist + ~~~~~~~~~~~~ diff --git a/runtime/compiler.go b/runtime/compiler.go index ba6b478e22..5918910e75 100644 --- a/runtime/compiler.go +++ b/runtime/compiler.go @@ -6,6 +6,7 @@ import ( "github.com/brimdata/zed" "github.com/brimdata/zed/compiler/ast" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/lakeparse" "github.com/brimdata/zed/zbuf" "github.com/brimdata/zed/zio" @@ -16,7 +17,7 @@ type Compiler interface { NewQuery(*Context, ast.Seq, []zio.Reader) (Query, error) NewLakeQuery(*Context, ast.Seq, int, *lakeparse.Commitish) (Query, error) NewLakeDeleteQuery(*Context, ast.Seq, *lakeparse.Commitish) (DeleteQuery, error) - Parse(string, ...string) (ast.Seq, error) + Parse(string, ...string) (ast.Seq, *parser.SourceSet, error) } type Query interface { @@ -43,21 +44,27 @@ func AsProgressReadCloser(q Query) zbuf.ProgressReadCloser { }{AsReader(q), q, q} } -func CompileQuery(ctx context.Context, zctx *zed.Context, c Compiler, program ast.Seq, readers []zio.Reader) (Query, error) { +func CompileQuery(ctx context.Context, zctx *zed.Context, c Compiler, program ast.Seq, sset *parser.SourceSet, readers []zio.Reader) (Query, error) { rctx := NewContext(ctx, zctx) q, err := c.NewQuery(rctx, program, readers) if err != nil { rctx.Cancel() + if list, ok := err.(parser.ErrorList); ok { + list.SetSourceSet(sset) + } return nil, err } return q, nil } -func CompileLakeQuery(ctx context.Context, zctx *zed.Context, c Compiler, program ast.Seq, head *lakeparse.Commitish) (Query, error) { +func CompileLakeQuery(ctx context.Context, zctx *zed.Context, c Compiler, program ast.Seq, sset *parser.SourceSet, head *lakeparse.Commitish) (Query, error) { rctx := NewContext(ctx, zctx) q, err := c.NewLakeQuery(rctx, program, 0, head) if err != nil { rctx.Cancel() + if list, ok := err.(parser.ErrorList); ok { + list.SetSourceSet(sset) + } return nil, err } return q, nil diff --git a/runtime/sam/expr/filter_test.go b/runtime/sam/expr/filter_test.go index be9c5a9217..75d659075b 100644 --- a/runtime/sam/expr/filter_test.go +++ b/runtime/sam/expr/filter_test.go @@ -54,7 +54,7 @@ func runCasesHelper(t *testing.T, record string, cases []testcase, expectBufferF for _, c := range cases { t.Run(c.filter, func(t *testing.T) { t.Helper() - p, err := compiler.Parse(c.filter) + p, _, err := compiler.Parse(c.filter) require.NoError(t, err, "filter: %q", c.filter) rctx := runtime.NewContext(context.Background(), zctx) job, err := compiler.NewJob(rctx, p, nil, nil) @@ -405,6 +405,6 @@ func TestFilters(t *testing.T) { func TestBadFilter(t *testing.T) { t.Parallel() - _, err := compiler.Parse(`s matches \xa8*`) + _, _, err := compiler.Parse(`s matches \xa8*`) require.Error(t, err) } diff --git a/runtime/sam/expr/ztests/cut-dup-fields.yaml b/runtime/sam/expr/ztests/cut-dup-fields.yaml index 4ffd78207c..3dc3652ec8 100644 --- a/runtime/sam/expr/ztests/cut-dup-fields.yaml +++ b/runtime/sam/expr/ztests/cut-dup-fields.yaml @@ -12,7 +12,15 @@ inputs: outputs: - name: stderr data: | - cut: duplicate field: "rec" - cut: duplicate field: "rec.sub1" - cut: duplicate field: "rec.sub.sub" - cut: duplicate field: "rec.sub" + duplicate field: "rec" at line 1, column 5: + cut rec,other,rec + ~~~~~~~~~~~~~ + duplicate field: "rec.sub1" at line 1, column 5: + cut rec.sub1,rec.sub1 + ~~~~~~~~~~~~~~~~~ + duplicate field: "rec.sub.sub" at line 1, column 5: + cut rec.sub,rec.sub.sub + ~~~~~~~~~~~~~~~~~~~ + duplicate field: "rec.sub" at line 1, column 5: + cut rec.sub.sub,rec.sub + ~~~~~~~~~~~~~~~~~~~ diff --git a/runtime/sam/expr/ztests/cut-not-adjacent.yaml b/runtime/sam/expr/ztests/cut-not-adjacent.yaml index de13172053..df2db99381 100644 --- a/runtime/sam/expr/ztests/cut-not-adjacent.yaml +++ b/runtime/sam/expr/ztests/cut-not-adjacent.yaml @@ -12,7 +12,15 @@ inputs: outputs: - name: stderr data: | - cut: fields in record rec must be adjacent - cut: fields in record rec1 must be adjacent - cut: fields in record rec1 must be adjacent - cut: fields in record t.rec must be adjacent + fields in record rec must be adjacent at line 1, column 5: + cut rec.sub1,other,rec.sub2 + ~~~~~~~~~~~~~~~~~~~~~~~ + fields in record rec1 must be adjacent at line 1, column 5: + cut rec1.rec2.sub1,other,rec1.sub2 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + fields in record rec1 must be adjacent at line 1, column 5: + cut rec1.rec2.sub1,other,rec1.rec2.sub2 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + fields in record t.rec must be adjacent at line 1, column 5: + cut t.rec.sub1,t.other,t.rec.sub2 + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/runtime/sam/expr/ztests/rename-error-move.yaml b/runtime/sam/expr/ztests/rename-error-move.yaml index 4aa70ad092..08b5d973fa 100644 --- a/runtime/sam/expr/ztests/rename-error-move.yaml +++ b/runtime/sam/expr/ztests/rename-error-move.yaml @@ -3,4 +3,4 @@ zed: rename dst:=id.resp_h input: | {id:{orig_h:10.164.94.120,orig_p:39681(port=uint16),resp_h:10.47.3.155,resp_p:3389(port)}} -errorRE: "rename: left-hand side and right-hand side must have the same depth \\(dst vs id.resp_h\\)" +errorRE: "left-hand side and right-hand side must have the same depth \\(dst vs id.resp_h\\)" diff --git a/runtime/sam/op/groupby/groupby_test.go b/runtime/sam/op/groupby/groupby_test.go index e979f8f502..1f6e81ed51 100644 --- a/runtime/sam/op/groupby/groupby_test.go +++ b/runtime/sam/op/groupby/groupby_test.go @@ -106,7 +106,7 @@ func TestGroupbyStreamingSpill(t *testing.T) { defer arena.Unref() runOne := func(inputSortKey string) []string { - proc, err := compiler.Parse("count() by every(1s), ip") + proc, _, err := compiler.Parse("count() by every(1s), ip") assert.NoError(t, err) zctx := zed.NewContext() diff --git a/runtime/sam/op/ztests/cut-dynamic-field.yaml b/runtime/sam/op/ztests/cut-dynamic-field.yaml index 22f43a2eb5..16a70a561c 100644 --- a/runtime/sam/op/ztests/cut-dynamic-field.yaml +++ b/runtime/sam/op/ztests/cut-dynamic-field.yaml @@ -43,4 +43,6 @@ outputs: error({message:"cut: missing",on:{}}) - name: stderr data: | - left-hand side of assignment: symbol "foo" is not bound to an expression + symbol "foo" is not bound to an expression at line 1, column 38: + op foo(): ( yield "error" ) cut this[foo] := "hello world" + ~~~ diff --git a/runtime/sam/op/ztests/put-dynamic-field.yaml b/runtime/sam/op/ztests/put-dynamic-field.yaml index 7566f09213..ed5d924853 100644 --- a/runtime/sam/op/ztests/put-dynamic-field.yaml +++ b/runtime/sam/op/ztests/put-dynamic-field.yaml @@ -38,4 +38,6 @@ outputs: error({message:"put: missing",on:{}}) - name: stderr data: | - left-hand side of assignment: symbol "foo" is not bound to an expression + symbol "foo" is not bound to an expression at line 1, column 38: + op foo(): ( yield "error" ) put this[foo] := "hello world" + ~~~ diff --git a/runtime/sam/op/ztests/user-errors.yaml b/runtime/sam/op/ztests/user-errors.yaml index 1a951ba938..c2a005a835 100644 --- a/runtime/sam/op/ztests/user-errors.yaml +++ b/runtime/sam/op/ztests/user-errors.yaml @@ -8,7 +8,7 @@ inputs: op test(a, a): ( pass ) - op("a", "b") + test("a", "b") - name: error-const-lhs.zed data: | op test(a): ( @@ -19,5 +19,9 @@ inputs: outputs: - name: stderr data: | - test: duplicate parameter "a" - illegal left-hand side of assignment + duplicate parameter "a" in error-duplicate-parameters.zed at line 1, column 1: + op test(a, a): ( + ~~~~~~~~~~~~~~~~ + illegal left-hand side of assignment in error-const-lhs.zed at line 2, column 3: + a := a + ~~~~~~ diff --git a/service/handlers.go b/service/handlers.go index 42995e7030..32cc944c19 100644 --- a/service/handlers.go +++ b/service/handlers.go @@ -11,8 +11,8 @@ import ( "github.com/brimdata/zed/api" "github.com/brimdata/zed/api/queryio" "github.com/brimdata/zed/compiler" - "github.com/brimdata/zed/compiler/ast" "github.com/brimdata/zed/compiler/optimizer/demand" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/lake" lakeapi "github.com/brimdata/zed/lake/api" "github.com/brimdata/zed/lake/commits" @@ -51,12 +51,12 @@ func handleQuery(c *Core, w *ResponseWriter, r *Request) { // The client must look at the return code and interpret the result // accordingly and when it sees a ZNG error after underway, // the error should be relay that to the caller/user. - query, err := c.compiler.Parse(req.Query) + query, sset, err := parser.ParseZed(nil, req.Query) if err != nil { w.Error(srverr.ErrInvalid(err)) return } - flowgraph, err := runtime.CompileLakeQuery(r.Context(), zed.NewContext(), c.compiler, query, &req.Head) + flowgraph, err := runtime.CompileLakeQuery(r.Context(), zed.NewContext(), c.compiler, query, sset, &req.Head) if err != nil { w.Error(srverr.ErrInvalid(err)) return @@ -162,7 +162,7 @@ func handleCompile(c *Core, w *ResponseWriter, r *Request) { if !r.Unmarshal(w, &req) { return } - ast, err := c.compiler.Parse(req.Query) + ast, _, err := c.compiler.Parse(req.Query) if err != nil { w.Error(srverr.ErrInvalid(err)) return @@ -552,12 +552,15 @@ func handleDelete(c *Core, w *ResponseWriter, r *Request) { w.Error(srverr.ErrInvalid("either object_ids or where must be set")) return } - var program ast.Seq - if program, err = c.compiler.Parse(payload.Where); err != nil { - w.Error(srverr.ErrInvalid(err)) + program, sset, err2 := c.compiler.Parse(payload.Where) + if err != nil { + w.Error(srverr.ErrInvalid(err2)) return } commit, err = branch.DeleteWhere(r.Context(), c.compiler, program, message.Author, message.Body, message.Meta) + if list, ok := err.(parser.ErrorList); ok { + list.SetSourceSet(sset) + } if errors.Is(err, commits.ErrEmptyTransaction) || errors.Is(err, &compiler.InvalidDeleteWhereQuery{}) { err = srverr.ErrInvalid(err) diff --git a/service/ztests/curl-query-error.yaml b/service/ztests/curl-query-error.yaml index 375fcd9ede..5bb7515a74 100644 --- a/service/ztests/curl-query-error.yaml +++ b/service/ztests/curl-query-error.yaml @@ -13,13 +13,13 @@ inputs: outputs: - name: stdout data: | - {"type":"Error","kind":"invalid operation","error":"pool name missing"} + {"type":"Error","kind":"invalid operation","error":"no pool name given"} code 400 - {"type":"Error","kind":"invalid operation","error":"pool name missing"} + {"type":"Error","kind":"invalid operation","error":"no pool name given"} code 400 - {"type":"Error","kind":"invalid operation","error":"pool name missing"} + {"type":"Error","kind":"invalid operation","error":"pool name missing at line 1, column 1:\nfrom HEAD\n~~~~~~~~~","compilation_errors":[{"Msg":"pool name missing","Pos":0,"End":9}]} code 400 - {"type":"Error","kind":"invalid operation","error":"unknown lake metadata type \"unknownmeta\" in from operator"} + {"type":"Error","kind":"invalid operation","error":"unknown lake metadata type \"unknownmeta\" in from operator at line 1, column 1:\nfrom :unknownmeta\n~~~~~~~~~~~~~~~~~","compilation_errors":[{"Msg":"unknown lake metadata type \"unknownmeta\" in from operator","Pos":0,"End":17}]} + code 400 + {"type":"Error","kind":"invalid operation","error":"doesnotexist: pool not found at line 1, column 6:\nfrom doesnotexist\n ~~~~~~~~~~~~","compilation_errors":[{"Msg":"doesnotexist: pool not found","Pos":5,"End":17}]} code 400 - {"type":"Error","kind":"item does not exist","error":"doesnotexist: pool not found"} - code 404 diff --git a/service/ztests/python.yaml b/service/ztests/python.yaml index c897028ddb..da582d8c54 100644 --- a/service/ztests/python.yaml +++ b/service/ztests/python.yaml @@ -161,4 +161,4 @@ outputs: === RequestError('test: pool already exists') === - RequestError('nosuchpool: pool not found') + RequestError('nosuchpool: pool not found at line 1, column 6:\nfrom nosuchpool\n ~~~~~~~~~~') diff --git a/service/ztests/query-bad-commit.yaml b/service/ztests/query-bad-commit.yaml index c375e795cc..90438292a9 100644 --- a/service/ztests/query-bad-commit.yaml +++ b/service/ztests/query-bad-commit.yaml @@ -10,4 +10,6 @@ inputs: outputs: - name: stderr data: | - status code 404: "doesnotexist": branch not found + "doesnotexist": branch not found at line 1, column 1: + from test@doesnotexist + ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/service/ztests/query-error.yaml b/service/ztests/query-error.yaml index ae74877fbb..597e5ec4f7 100644 --- a/service/ztests/query-error.yaml +++ b/service/ztests/query-error.yaml @@ -12,7 +12,14 @@ inputs: outputs: - name: stderr data: | - status code 400: pool name missing - status code 400: pool name missing - status code 400: unknown lake metadata type "unknownmeta" in from operator - status code 404: doesnotexist: pool not found + status code 400: no pool name given + pool name missing at line 1, column 1: + from HEAD + ~~~~~~~~~ + unknown lake metadata type "unknownmeta" in from operator at line 1, column 1: + from :unknownmeta + ~~~~~~~~~~~~~~~~~ + doesnotexist: pool not found at line 1, column 6: + from doesnotexist + ~~~~~~~~~~~~ + diff --git a/zed_test.go b/zed_test.go index 65a623a48d..2e914969a0 100644 --- a/zed_test.go +++ b/zed_test.go @@ -136,7 +136,7 @@ func runOneBoomerang(t *testing.T, format, data string) { dataReader := zio.Reader(dataReadCloser) if format == "parquet" { // Fuse for formats that require uniform values. - proc, err := compiler.NewCompiler().Parse("fuse") + proc, _, err := compiler.NewCompiler().Parse("fuse") require.NoError(t, err) rctx := runtime.NewContext(context.Background(), zctx) q, err := compiler.NewCompiler().NewQuery(rctx, proc, []zio.Reader{dataReadCloser}) diff --git a/ztest/ztest.go b/ztest/ztest.go index b3eb4dca14..847db75bdc 100644 --- a/ztest/ztest.go +++ b/ztest/ztest.go @@ -480,7 +480,7 @@ func runzq(path, zedProgram, input string, outputFlags []string, inputFlags []st // tests. return outbuf.String(), errbuf.String(), err } - proc, err := compiler.NewCompiler().Parse(zedProgram) + proc, sset, err := compiler.Parse(zedProgram) if err != nil { return "", err.Error(), err } @@ -513,7 +513,7 @@ func runzq(path, zedProgram, input string, outputFlags []string, inputFlags []st if err != nil { return "", "", err } - q, err := compiler.NewCompiler().NewQuery(runtime.NewContext(context.Background(), zctx), proc, []zio.Reader{zrc}) + q, err := runtime.CompileQuery(context.Background(), zctx, compiler.NewCompiler(), proc, sset, []zio.Reader{zrc}) if err != nil { zw.Close() return "", err.Error(), err