From 261d933757f1e5bc5e43d4fc4296431da0688aac Mon Sep 17 00:00:00 2001 From: Matthew Nibecker Date: Fri, 3 May 2024 11:09:55 -0700 Subject: [PATCH] ast.Errors This commit introduces ast.Errors- an error type that should be used whenever a generated error refers to a place in the query source. This commit switches the parser package to use ast.Error but a follow up commit will use ast.Errors for the semantic package as well. --- api/client/connection.go | 28 +++-- cli/clierrors/format.go | 69 ++++++++++++ cli/queryflags/flags.go | 53 +++++---- cli/zq/command.go | 5 +- cmd/zed/dev/compile/command.go | 49 ++++---- cmd/zed/query/command.go | 10 +- cmd/zq/ztests/single-arg-error.yaml | 3 +- compiler/ast/ast.go | 41 +++++++ compiler/job.go | 8 +- compiler/parser/api.go | 137 +++-------------------- compiler/parser/source.go | 110 ++++++++++++++++++ compiler/parser/ztests/syntax-error.yaml | 2 +- docs/tutorials/schools.md | 2 +- go.mod | 2 +- lake/api/api.go | 4 +- lake/api/local.go | 8 +- lake/api/remote.go | 8 +- lake/ztests/query-parse-error.yaml | 16 +-- runtime/compiler.go | 2 +- service/request.go | 22 +++- service/ztests/compile.yaml | 2 +- service/ztests/query-parse-error.yaml | 16 +-- 22 files changed, 373 insertions(+), 224 deletions(-) create mode 100644 cli/clierrors/format.go create mode 100644 compiler/parser/source.go diff --git a/api/client/connection.go b/api/client/connection.go index 636486f7f6..3cb3d1fbfa 100644 --- a/api/client/connection.go +++ b/api/client/connection.go @@ -16,14 +16,16 @@ import ( "github.com/brimdata/zed" "github.com/brimdata/zed/api" "github.com/brimdata/zed/api/client/auth0" - "github.com/brimdata/zed/compiler/parser" + "github.com/brimdata/zed/compiler/ast" "github.com/brimdata/zed/lake" "github.com/brimdata/zed/lake/branches" "github.com/brimdata/zed/lakeparse" + "github.com/brimdata/zed/pkg/unpack" "github.com/brimdata/zed/runtime/exec" "github.com/brimdata/zed/zio/zngio" "github.com/brimdata/zed/zson" "github.com/segmentio/ksuid" + "go.uber.org/multierr" ) const ( @@ -292,15 +294,13 @@ func (c *Connection) Revert(ctx context.Context, poolID ksuid.KSUID, branchName return commit, err } +var queryErrUnpacker = unpack.New(ast.Error{}) + // Query assembles a query from src and filenames and runs it. // // As for Connection.Do, if the returned error is nil, the user is expected to // call Response.Body.Close. -func (c *Connection) Query(ctx context.Context, head *lakeparse.Commitish, src string, filenames ...string) (*Response, error) { - src, srcInfo, err := parser.ConcatSource(filenames, src) - if err != nil { - return nil, err - } +func (c *Connection) Query(ctx context.Context, head *lakeparse.Commitish, src string) (*Response, error) { body := api.QueryRequest{Query: src} if head != nil { body.Head = *head @@ -308,11 +308,19 @@ func (c *Connection) Query(ctx context.Context, head *lakeparse.Commitish, src s req := c.NewRequest(ctx, http.MethodPost, "/query?ctrl=T", body) res, err := c.Do(req) var ae *api.Error - if errors.As(err, &ae) { - if m, ok := ae.Info.(map[string]interface{}); ok { - if offset, ok := m["parse_error_offset"].(float64); ok { - return res, parser.NewError(src, srcInfo, int(offset)) + if errors.As(err, &ae) && ae.Info != nil { + if list, ok := ae.Info.([]any); ok { + var errs error + for _, l := range list { + var lerr *ast.Error + if queryErrUnpacker.UnmarshalObject(l, &lerr) != nil { + // If an error is encountered here just return the parent + // error since this is more interesting. + return nil, err + } + errs = multierr.Append(errs, lerr) } + return nil, errs } } return res, err diff --git a/cli/clierrors/format.go b/cli/clierrors/format.go new file mode 100644 index 0000000000..9e118fa48e --- /dev/null +++ b/cli/clierrors/format.go @@ -0,0 +1,69 @@ +package clierrors + +import ( + "errors" + "fmt" + "strings" + + "github.com/brimdata/zed/compiler/ast" + "github.com/brimdata/zed/compiler/parser" + "go.uber.org/multierr" +) + +func Format(set *parser.SourceSet, err error) error { + if err == nil { + return err + } + var errs []error + for _, err := range multierr.Errors(err) { + if asterr, ok := err.(*ast.Error); ok { + err = formatASTError(set, asterr) + } + errs = append(errs, err) + } + return errors.Join(errs...) +} + +func formatASTError(set *parser.SourceSet, err *ast.Error) error { + src := set.SourceOf(err.Pos) + start := src.Position(err.Pos) + end := src.Position(err.End) + var b strings.Builder + b.WriteString(err.Error()) + b.WriteString(" (") + if src.Filename != "" { + fmt.Fprintf(&b, "%s: ", src.Filename) + } + fmt.Fprintf(&b, "line %d, ", start.Line) + fmt.Fprintf(&b, "column %d):\n", start.Column) + line := src.LineOfPos(set.Contents, err.Pos) + b.WriteString(line + "\n") + if end.IsValid() { + formatSpanError(&b, line, start, end) + } else { + formatPointError(&b, start) + } + return errors.New(b.String()) +} + +func formatSpanError(b *strings.Builder, line string, start, end parser.Position) { + col := start.Column - 1 + b.WriteString(strings.Repeat(" ", col)) + n := len(line) - col + if start.Line == end.Line { + n = end.Column - col + } + b.WriteString(strings.Repeat("~", n)) +} + +func formatPointError(b *strings.Builder, start parser.Position) { + col := start.Column - 1 + for k := 0; k < col; k++ { + if k >= col-4 && k != col-1 { + b.WriteByte('=') + } else { + b.WriteByte(' ') + } + } + b.WriteString("^ ===") +} diff --git a/cli/queryflags/flags.go b/cli/queryflags/flags.go index 618a4b0fc4..07fa69da4a 100644 --- a/cli/queryflags/flags.go +++ b/cli/queryflags/flags.go @@ -11,7 +11,7 @@ import ( "strings" "github.com/brimdata/zed/cli" - "github.com/brimdata/zed/compiler" + "github.com/brimdata/zed/cli/clierrors" "github.com/brimdata/zed/compiler/ast" "github.com/brimdata/zed/compiler/data" "github.com/brimdata/zed/compiler/parser" @@ -19,6 +19,7 @@ import ( "github.com/brimdata/zed/pkg/storage" "github.com/brimdata/zed/zbuf" "github.com/brimdata/zed/zson" + "go.uber.org/multierr" ) type Flags struct { @@ -32,7 +33,7 @@ func (f *Flags) SetFlags(fs *flag.FlagSet) { fs.Var(&f.Includes, "I", "source file containing Zed query text (may be used multiple times)") } -func (f *Flags) ParseSourcesAndInputs(paths []string) ([]string, ast.Seq, bool, error) { +func (f *Flags) ParseSourcesAndInputs(paths []string) ([]string, ast.Seq, *parser.SourceSet, bool, error) { var src string if len(paths) != 0 && !cli.FileExists(paths[0]) && !isURLWithKnownScheme(paths[0], "http", "https", "s3") { src = paths[0] @@ -41,25 +42,34 @@ func (f *Flags) ParseSourcesAndInputs(paths []string) ([]string, ast.Seq, bool, // Consider a lone argument to be a query if it compiles // and appears to start with a from or yield operator. // Otherwise, consider it a file. - query, err := compiler.Parse(src, f.Includes...) + query, set, err := parse(src, f.Includes...) if err == nil { if s, err := semantic.Analyze(context.Background(), query, data.NewSource(storage.NewLocalEngine(), nil), nil); err == nil { if semantic.HasSource(s) { - return nil, query, false, nil + return nil, query, set, false, nil } if semantic.StartsWithYield(s) { - return nil, query, true, nil + return nil, query, set, true, nil } } } - return nil, nil, false, singleArgError(src, err) + return nil, nil, nil, false, singleArgError(src, set, err) } } - query, err := compiler.Parse(src, f.Includes...) + query, set, err := parse(src, f.Includes...) if err != nil { - return nil, nil, false, err + return nil, nil, set, false, clierrors.Format(set, err) } - return paths, query, false, nil + return paths, query, set, false, nil +} + +func parse(src string, includes ...string) (ast.Seq, *parser.SourceSet, error) { + set, err := parser.ConcatSource(includes, src) + if err != nil { + return nil, nil, err + } + seq, err := parser.ParseZed(string(set.Contents)) + return seq, set, err } func isURLWithKnownScheme(path string, schemes ...string) bool { @@ -80,25 +90,30 @@ func (f *Flags) PrintStats(stats zbuf.Progress) { } } -func singleArgError(src string, err error) error { +func singleArgError(src string, set *parser.SourceSet, err error) error { var b strings.Builder b.WriteString("could not invoke zq with a single argument because:") if len(src) > 20 { src = src[:20] + "..." } fmt.Fprintf(&b, "\n - a file could not be found with the name %q", src) - var perr *parser.Error - if errors.As(err, &perr) { - b.WriteString("\n - the argument could not be compiled as a valid Zed query due to parse error (") - if perr.LineNum > 0 { - fmt.Fprintf(&b, "line %d, ", perr.LineNum) - } - fmt.Fprintf(&b, "column %d):", perr.Column) - for _, l := range strings.Split(perr.ParseErrorContext(), "\n") { - fmt.Fprintf(&b, "\n %s", l) + if hasASTErrors(err) { + b.WriteString("\n - the argument could not be compiled as a valid Zed query:") + for _, line := range strings.Split(clierrors.Format(set, err).Error(), "\n") { + fmt.Fprintf(&b, "\n %s", line) } } else { b.WriteString("\n - the argument did not parse as a valid Zed query") } return errors.New(b.String()) } + +func hasASTErrors(errs error) bool { + for _, err := range multierr.Errors(errs) { + var aerr *ast.Error + if errors.As(err, &aerr) { + return true + } + } + return false +} diff --git a/cli/zq/command.go b/cli/zq/command.go index c5238df676..e8c60572be 100644 --- a/cli/zq/command.go +++ b/cli/zq/command.go @@ -6,6 +6,7 @@ import ( "github.com/brimdata/zed" "github.com/brimdata/zed/cli" + "github.com/brimdata/zed/cli/clierrors" "github.com/brimdata/zed/cli/inputflags" "github.com/brimdata/zed/cli/outputflags" "github.com/brimdata/zed/cli/queryflags" @@ -129,7 +130,7 @@ func (c *Command) Run(args []string) error { // Prevent ParseSourcesAndInputs from treating args[0] as a path. args = append(args, "-") } - paths, flowgraph, null, err := c.queryFlags.ParseSourcesAndInputs(args) + paths, flowgraph, set, null, err := c.queryFlags.ParseSourcesAndInputs(args) if err != nil { return fmt.Errorf("zq: %w", err) } @@ -156,7 +157,7 @@ func (c *Command) Run(args []string) error { comp := compiler.NewFileSystemCompiler(local) query, err := runtime.CompileQuery(ctx, zctx, comp, flowgraph, readers) if err != nil { - return err + return clierrors.Format(set, err) } defer query.Pull(true) err = zbuf.CopyPuller(writer, query) diff --git a/cmd/zed/dev/compile/command.go b/cmd/zed/dev/compile/command.go index ee214a14ac..215f0d9561 100644 --- a/cmd/zed/dev/compile/command.go +++ b/cmd/zed/dev/compile/command.go @@ -5,9 +5,9 @@ import ( "errors" "flag" "fmt" - "os" "strings" + "github.com/brimdata/zed/cli/clierrors" "github.com/brimdata/zed/cmd/zed/dev" "github.com/brimdata/zed/cmd/zed/root" "github.com/brimdata/zed/compiler" @@ -112,17 +112,10 @@ func (c *Command) Run(args []string) error { c.pigeon = true } } - var src string - if len(c.includes) > 0 { - for _, path := range c.includes { - b, err := os.ReadFile(path) - if err != nil { - return err - } - src += "\n" + string(b) - } + set, err := parser.ConcatSource(c.includes, strings.Join(args, " ")) + if err != nil { + return err } - src += strings.Join(args, " ") var lk *lake.Root if c.semantic || c.optimize || c.parallel != 0 { lakeAPI, err := c.LakeFlags.Open(ctx) @@ -130,7 +123,7 @@ func (c *Command) Run(args []string) error { lk = lakeAPI.Root() } } - return c.parse(src, lk) + return c.parse(set, lk) } func (c *Command) header(msg string) { @@ -142,9 +135,9 @@ func (c *Command) header(msg string) { } } -func (c *Command) parse(z string, lk *lake.Root) error { +func (c *Command) parse(set *parser.SourceSet, lk *lake.Root) error { if c.pigeon { - s, err := parsePigeon(z) + s, err := parsePigeon(set) if err != nil { return err } @@ -152,7 +145,7 @@ func (c *Command) parse(z string, lk *lake.Root) error { fmt.Println(s) } if c.proc { - seq, err := compiler.Parse(z) + seq, err := compiler.Parse(string(set.Contents)) if err != nil { return err } @@ -160,7 +153,7 @@ func (c *Command) parse(z string, lk *lake.Root) error { c.writeAST(seq) } if c.semantic { - runtime, err := c.compile(z, lk) + runtime, err := c.compile(set, lk) if err != nil { return err } @@ -168,7 +161,7 @@ func (c *Command) parse(z string, lk *lake.Root) error { c.writeDAG(runtime.Entry()) } if c.optimize { - runtime, err := c.compile(z, lk) + runtime, err := c.compile(set, lk) if err != nil { return err } @@ -179,7 +172,7 @@ func (c *Command) parse(z string, lk *lake.Root) error { c.writeDAG(runtime.Entry()) } if c.parallel > 0 { - runtime, err := c.compile(z, lk) + runtime, err := c.compile(set, lk) if err != nil { return err } @@ -213,12 +206,16 @@ func (c *Command) writeDAG(seq dag.Seq) { } } -func (c *Command) compile(z string, lk *lake.Root) (*compiler.Job, error) { - p, err := compiler.Parse(z) +func (c *Command) compile(set *parser.SourceSet, lk *lake.Root) (*compiler.Job, error) { + p, err := compiler.Parse(string(set.Contents)) if err != nil { - return nil, err + return nil, clierrors.Format(set, err) } - return compiler.NewJob(runtime.DefaultContext(), p, data.NewSource(nil, lk), nil) + job, err := compiler.NewJob(runtime.DefaultContext(), p, data.NewSource(nil, lk), nil) + if err != nil { + return nil, clierrors.Format(set, err) + } + return job, nil } func normalize(b []byte) (string, error) { @@ -253,14 +250,14 @@ func dagFmt(seq dag.Seq, canon bool) (string, error) { return normalize(dagJSON) } -func parsePigeon(z string) (string, error) { - ast, err := parser.Parse("", []byte(z)) +func parsePigeon(set *parser.SourceSet) (string, error) { + ast, err := parser.ParseZed(string(set.Contents)) if err != nil { - return "", err + return "", clierrors.Format(set, err) } goPEGJSON, err := json.Marshal(ast) if err != nil { - return "", errors.New("go peg parser returned bad value for: " + z) + return "", errors.New("go peg parser returned bad value for: " + string(set.Contents)) } return normalize(goPEGJSON) } diff --git a/cmd/zed/query/command.go b/cmd/zed/query/command.go index c81139d440..ca2da4dff9 100644 --- a/cmd/zed/query/command.go +++ b/cmd/zed/query/command.go @@ -3,11 +3,13 @@ package query import ( "flag" + "github.com/brimdata/zed/cli/clierrors" "github.com/brimdata/zed/cli/outputflags" "github.com/brimdata/zed/cli/poolflags" "github.com/brimdata/zed/cli/queryflags" "github.com/brimdata/zed/cli/runtimeflags" "github.com/brimdata/zed/cmd/zed/root" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/pkg/charm" "github.com/brimdata/zed/pkg/storage" "github.com/brimdata/zed/zbuf" @@ -63,11 +65,15 @@ func (c *Command) Run(args []string) error { return err } head, _ := c.poolFlags.HEAD() - query, err := lake.QueryWithControl(ctx, head, src, c.queryFlags.Includes...) + set, err := parser.ConcatSource(c.queryFlags.Includes, src) if err != nil { - w.Close() return err } + query, err := lake.QueryWithControl(ctx, head, string(set.Contents)) + if err != nil { + w.Close() + return clierrors.Format(set, err) + } defer query.Close() err = zio.Copy(w, zbuf.NoControl(query)) if closeErr := w.Close(); err == nil { diff --git a/cmd/zq/ztests/single-arg-error.yaml b/cmd/zq/ztests/single-arg-error.yaml index e59212acc9..9702eea859 100644 --- a/cmd/zq/ztests/single-arg-error.yaml +++ b/cmd/zq/ztests/single-arg-error.yaml @@ -6,6 +6,7 @@ outputs: data: | zq: could not invoke zq with a single argument because: - a file could not be found with the name "file sample.zson | c..." - - the argument could not be compiled as a valid Zed query due to parse error (column 25): + - the argument could not be compiled as a valid Zed query: + error parsing Zed (line 1, column 26): file sample.zson | count( === ^ === diff --git a/compiler/ast/ast.go b/compiler/ast/ast.go index 8b24aa1730..d57b953e32 100644 --- a/compiler/ast/ast.go +++ b/compiler/ast/ast.go @@ -3,6 +3,9 @@ package ast import ( + "encoding/json" + "errors" + astzed "github.com/brimdata/zed/compiler/ast/zed" "github.com/brimdata/zed/order" "github.com/brimdata/zed/pkg/field" @@ -897,3 +900,41 @@ func (a *Agg) End() int { } return a.Rparen + 1 } + +// Error represents an error attached to a particular Node or place in the AST. +type Error struct { + Kind string `json:"kind" unpack:""` + Err error + Pos int + End int +} + +func NewError(err error, pos, end int) *Error { + return &Error{Kind: "Kind", Err: err, Pos: pos, End: end} +} + +func (e *Error) Error() string { + return e.Err.Error() +} + +func (e *Error) Unwrap() error { return e.Err } + +type errJSON struct { + Kind string `json:"kind"` + Error string `json:"error"` + Pos int `json:"pos"` + End int `json:"end"` +} + +func (e *Error) MarshalJSON() ([]byte, error) { + return json.Marshal(errJSON{e.Kind, e.Err.Error(), e.Pos, e.End}) +} + +func (e *Error) UnmarshalJSON(b []byte) error { + var v errJSON + if err := json.Unmarshal(b, &v); err != nil { + return err + } + e.Kind, e.Err, e.Pos, e.End = v.Kind, errors.New(v.Error), v.Pos, v.End + return nil +} diff --git a/compiler/job.go b/compiler/job.go index 41f84e047d..8b99accceb 100644 --- a/compiler/job.go +++ b/compiler/job.go @@ -88,8 +88,8 @@ func (j *Job) Parallelize(n int) error { return err } -func Parse(src string, filenames ...string) (ast.Seq, error) { - return parser.ParseZed(filenames, src) +func Parse(src string) (ast.Seq, error) { + return parser.ParseZed(src) } // MustParse is like Parse but panics if an error is encountered. @@ -135,8 +135,8 @@ type anyCompiler struct{} // Parse concatenates the source files in filenames followed by src and parses // the resulting program. -func (*anyCompiler) Parse(src string, filenames ...string) (ast.Seq, error) { - return Parse(src, filenames...) +func (*anyCompiler) Parse(src string) (ast.Seq, error) { + return Parse(src) } // VectorCompile is used for testing queries over single VNG object scans diff --git a/compiler/parser/api.go b/compiler/parser/api.go index 047b0249b4..0bfe315023 100644 --- a/compiler/parser/api.go +++ b/compiler/parser/api.go @@ -1,145 +1,36 @@ package parser import ( - "fmt" - "os" - "strings" + "errors" "github.com/brimdata/zed/compiler/ast" + "go.uber.org/multierr" ) +var ParsingError = errors.New("error parsing Zed") + // ParseZed calls ConcatSource followed by Parse. If Parse fails, it calls // ImproveError. -func ParseZed(filenames []string, src string) (ast.Seq, error) { - src, srcInfo, err := ConcatSource(filenames, src) - if err != nil { - return nil, err +func ParseZed(src string) (ast.Seq, error) { + if src == "" { + src = "*" } p, err := Parse("", []byte(src)) if err != nil { - return nil, ImproveError(err, src, srcInfo) + return nil, convertErrors(err) } return sliceOf[ast.Op](p), nil } -// SourceInfo holds source file offsets. -type SourceInfo struct { - filename string - start int - end int -} - -// ConcatSource concatenates the source files in filenames followed by src, -// returning the result and a corresponding slice of SourceInfos. -func ConcatSource(filenames []string, src string) (string, []SourceInfo, error) { - var b strings.Builder - var sis []SourceInfo - for _, f := range filenames { - bb, err := os.ReadFile(f) - if err != nil { - return "", nil, err - } - start := b.Len() - b.Write(bb) - sis = append(sis, SourceInfo{f, start, b.Len()}) - b.WriteByte('\n') - } - start := b.Len() - b.WriteString(src) - sis = append(sis, SourceInfo{"", start, b.Len()}) - if b.Len() == 0 { - return "*", nil, nil - } - return b.String(), sis, nil -} - -// ImproveError tries to improve an error from Parse. err is the error. src is -// the source code for which Parse return err. If src came from ConcatSource, -// sis is the corresponding slice of SourceInfo; otherwise, sis is nil. -func ImproveError(err error, src string, sis []SourceInfo) error { - el, ok := err.(errList) - if !ok || len(el) != 1 { - return err - } - pe, ok := el[0].(*parserError) +func convertErrors(err error) error { + errs, ok := err.(errList) if !ok { return err } - return NewError(src, sis, pe.pos.offset) -} - -// Error is a parse error with nice formatting. It includes the source code -// line containing the error. -type Error struct { - Offset int // offset into original source code - - filename string // omitted from formatting if "" - LineNum int // zero-based; omitted from formatting if negative - - line string // contains no newlines - Column int // zero-based -} - -// NewError returns an Error. src is the source code containing the error. If -// src came from ConcatSource, sis is the corresponding slice of SourceInfo; -// otherwise, src is nil. offset is the offset of the error within src. -func NewError(src string, sis []SourceInfo, offset int) error { - var filename string - for _, si := range sis { - if offset < si.end { - filename = si.filename - offset -= si.start - src = src[si.start:si.end] - break - } - } - lineNum := -1 - if filename != "" || strings.Count(src, "\n") > 0 { - lineNum = strings.Count(src[:offset], "\n") - } - column := offset - if i := strings.LastIndexByte(src[:offset], '\n'); i != -1 { - column -= i + 1 - src = src[i+1:] - } - if i := strings.IndexByte(src, '\n'); i != -1 { - src = src[:i] - } - return &Error{ - Offset: offset, - LineNum: lineNum, - Column: column, - filename: filename, - line: src, - } -} - -func (e *Error) Error() string { - var b strings.Builder - b.WriteString("error parsing Zed ") - if e.filename != "" { - fmt.Fprintf(&b, "in %s ", e.filename) - } - b.WriteString("at ") - if e.LineNum >= 0 { - fmt.Fprintf(&b, "line %d, ", e.LineNum+1) - } - fmt.Fprintf(&b, "column %d:\n", e.Column+1) - b.WriteString(e.ParseErrorContext()) - return b.String() -} - -func (e *Error) ParseErrorContext() string { - var b strings.Builder - b.WriteString(e.line + "\n") - for k := 0; k < e.Column; k++ { - if k >= e.Column-4 && k != e.Column-1 { - b.WriteByte('=') - } else { - b.WriteByte(' ') + for i, err := range errs { + if pe, ok := err.(*parserError); ok { + errs[i] = ast.NewError(ParsingError, pe.pos.offset, -1) } } - b.WriteByte('^') - b.WriteString(" ===") - return b.String() + return multierr.Combine(errs...) } diff --git a/compiler/parser/source.go b/compiler/parser/source.go new file mode 100644 index 0000000000..2a6c775942 --- /dev/null +++ b/compiler/parser/source.go @@ -0,0 +1,110 @@ +package parser + +import ( + "bytes" + "os" + "sort" +) + +// ConcatSource concatenates the source files in filenames followed by src, +// returning the result and a corresponding slice of SourceInfos. +func ConcatSource(filenames []string, src string) (*SourceSet, error) { + var b bytes.Buffer + set := new(SourceSet) + for _, f := range filenames { + bb, err := os.ReadFile(f) + if err != nil { + return nil, err + } + set.Sources = append(set.Sources, newSourceInfo(f, b.Len(), bb)) + b.Write(bb) + b.WriteByte('\n') + } + if b.Len() == 0 && src == "" { + src = "*" + } + set.Sources = append(set.Sources, newSourceInfo("", b.Len(), []byte(src))) + b.WriteString(src) + set.Contents = b.Bytes() + return set, nil +} + +type SourceSet struct { + Contents []byte + Sources []*SourceInfo +} + +func (s *SourceSet) SourceOf(pos int) *SourceInfo { + i := sort.Search(len(s.Sources), func(i int) bool { return s.Sources[i].start > pos }) - 1 + return s.Sources[i] +} + +// SourceInfo holds source file offsets. +type SourceInfo struct { + Filename string + lines []int + size int + start int +} + +func newSourceInfo(filename string, start int, src []byte) *SourceInfo { + var lines []int + line := 0 + for offset, b := range src { + if line >= 0 { + lines = append(lines, line) + } + line = -1 + if b == '\n' { + line = offset + 1 + } + } + return &SourceInfo{ + Filename: filename, + lines: lines, + size: len(src), + start: start, + } +} + +func (s *SourceInfo) Position(pos int) Position { + if pos < 0 { + return Position{-1, -1, -1, -1} + } + offset := pos - s.start + i := searchLine(s.lines, offset) + return Position{ + Pos: pos, + Offset: offset, + Line: i + 1, + Column: offset - s.lines[i] + 1, + } +} + +func (s *SourceInfo) LineOfPos(src []byte, pos int) string { + i := searchLine(s.lines, pos-s.start) + start := s.lines[i] + end := s.size + if i+1 < len(s.lines) { + end = s.lines[i+1] + } + b := src[s.start+start : s.start+end] + if b[len(b)-1] == '\n' { + b = b[:len(b)-1] + } + return string(b) +} + +func searchLine(lines []int, offset int) int { + return sort.Search(len(lines), func(i int) bool { return lines[i] > offset }) - 1 + +} + +type Position struct { + Pos int `json:"pos"` // Offset relative to SourceSet. + Offset int `json:"offset"` // Offset relative to file start. + Line int `json:"line"` // 1-based line number. + Column int `json:"column"` // 1-based column number. +} + +func (p Position) IsValid() bool { return p.Pos >= 0 } diff --git a/compiler/parser/ztests/syntax-error.yaml b/compiler/parser/ztests/syntax-error.yaml index 949ab350ce..111e1d0abc 100644 --- a/compiler/parser/ztests/syntax-error.yaml +++ b/compiler/parser/ztests/syntax-error.yaml @@ -8,6 +8,6 @@ inputs: outputs: - name: stderr data: | - zq: error parsing Zed at column 12: + zq: error parsing Zed (line 1, column 12): count() by ,x,y === ^ === diff --git a/docs/tutorials/schools.md b/docs/tutorials/schools.md index daeacc4d3a..dfef58d0e7 100644 --- a/docs/tutorials/schools.md +++ b/docs/tutorials/schools.md @@ -229,7 +229,7 @@ zq -z 'Defunct=' *.zson ``` produces ```mdtest-output -zq: error parsing Zed at column 8: +zq: error parsing Zed (line 1, column 8): Defunct= === ^ === ``` diff --git a/go.mod b/go.mod index 2098930d8f..c69d7433d4 100644 --- a/go.mod +++ b/go.mod @@ -29,6 +29,7 @@ require ( github.com/stretchr/testify v1.8.4 github.com/x448/float16 v0.8.4 github.com/yuin/goldmark v1.4.13 + go.uber.org/multierr v1.8.0 go.uber.org/zap v1.23.0 golang.org/x/exp v0.0.0-20231006140011-7918f672742d golang.org/x/sync v0.4.0 @@ -67,7 +68,6 @@ require ( github.com/zeebo/xxh3 v1.0.2 // indirect go.opentelemetry.io/otel v0.16.0 // indirect go.uber.org/atomic v1.7.0 // indirect - go.uber.org/multierr v1.8.0 // indirect golang.org/x/mod v0.13.0 // indirect golang.org/x/net v0.17.0 // indirect golang.org/x/tools v0.14.0 // indirect diff --git a/lake/api/api.go b/lake/api/api.go index 2be1408f7c..033f795aa1 100644 --- a/lake/api/api.go +++ b/lake/api/api.go @@ -22,8 +22,8 @@ import ( type Interface interface { Root() *lake.Root - Query(ctx context.Context, head *lakeparse.Commitish, src string, srcfiles ...string) (zio.ReadCloser, error) - QueryWithControl(ctx context.Context, head *lakeparse.Commitish, src string, srcfiles ...string) (zbuf.ProgressReadCloser, error) + Query(ctx context.Context, head *lakeparse.Commitish, src string) (zio.ReadCloser, error) + QueryWithControl(ctx context.Context, head *lakeparse.Commitish, src string) (zbuf.ProgressReadCloser, error) PoolID(ctx context.Context, poolName string) (ksuid.KSUID, error) CommitObject(ctx context.Context, poolID ksuid.KSUID, branchName string) (ksuid.KSUID, error) CreatePool(context.Context, string, order.SortKey, int, int64) (ksuid.KSUID, error) diff --git a/lake/api/local.go b/lake/api/local.go index bffb699208..c0345648f4 100644 --- a/lake/api/local.go +++ b/lake/api/local.go @@ -104,16 +104,16 @@ func (l *local) Compact(ctx context.Context, poolID ksuid.KSUID, branchName stri return exec.Compact(ctx, l.root, pool, branchName, objects, writeVectors, commit.Author, commit.Body, commit.Meta) } -func (l *local) Query(ctx context.Context, head *lakeparse.Commitish, src string, srcfiles ...string) (zio.ReadCloser, error) { - q, err := l.QueryWithControl(ctx, head, src, srcfiles...) +func (l *local) Query(ctx context.Context, head *lakeparse.Commitish, src string) (zio.ReadCloser, error) { + q, err := l.QueryWithControl(ctx, head, src) if err != nil { return nil, err } return zio.NewReadCloser(zbuf.NoControl(q), q), nil } -func (l *local) QueryWithControl(ctx context.Context, head *lakeparse.Commitish, src string, srcfiles ...string) (zbuf.ProgressReadCloser, error) { - flowgraph, err := l.compiler.Parse(src, srcfiles...) +func (l *local) QueryWithControl(ctx context.Context, head *lakeparse.Commitish, src string) (zbuf.ProgressReadCloser, error) { + flowgraph, err := l.compiler.Parse(src) if err != nil { return nil, err } diff --git a/lake/api/remote.go b/lake/api/remote.go index 57fd1be941..0a1aed23b8 100644 --- a/lake/api/remote.go +++ b/lake/api/remote.go @@ -115,16 +115,16 @@ func (r *remote) Revert(ctx context.Context, poolID ksuid.KSUID, branchName stri return res.Commit, err } -func (r *remote) Query(ctx context.Context, head *lakeparse.Commitish, src string, srcfiles ...string) (zio.ReadCloser, error) { - q, err := r.QueryWithControl(ctx, head, src, srcfiles...) +func (r *remote) Query(ctx context.Context, head *lakeparse.Commitish, src string) (zio.ReadCloser, error) { + q, err := r.QueryWithControl(ctx, head, src) if err != nil { return nil, err } return zio.NewReadCloser(zbuf.NoControl(q), q), nil } -func (r *remote) QueryWithControl(ctx context.Context, head *lakeparse.Commitish, src string, srcfiles ...string) (zbuf.ProgressReadCloser, error) { - res, err := r.conn.Query(ctx, head, src, srcfiles...) +func (r *remote) QueryWithControl(ctx context.Context, head *lakeparse.Commitish, src string) (zbuf.ProgressReadCloser, error) { + res, err := r.conn.Query(ctx, head, src) if err != nil { return nil, err } diff --git a/lake/ztests/query-parse-error.yaml b/lake/ztests/query-parse-error.yaml index 4c7138c253..ae9b507730 100644 --- a/lake/ztests/query-parse-error.yaml +++ b/lake/ztests/query-parse-error.yaml @@ -34,34 +34,34 @@ outputs: - name: stderr data: | =1= - error parsing Zed at column 11: + error parsing Zed (line 1, column 11): from test \ count() === ^ === =2= - error parsing Zed at line 2, column 6: + error parsing Zed (line 2, column 6): test \ count() === ^ === =3= - error parsing Zed at column 11: + error parsing Zed (line 1, column 11): from test \ count() === ^ === =4= - error parsing Zed at line 2, column 6: + error parsing Zed (line 2, column 6): test \ count() === ^ === =5= - error parsing Zed in bad-single-line.zed at line 1, column 11: + error parsing Zed (bad-single-line.zed: line 1, column 11): from test \ count() === ^ === =6= - error parsing Zed in bad-multiple-lines.zed at line 2, column 6: + error parsing Zed (bad-multiple-lines.zed: line 2, column 6): test \ count() === ^ === =7= - error parsing Zed in bad-single-line.zed at line 1, column 11: + error parsing Zed (bad-single-line.zed: line 1, column 11): from test \ count() === ^ === =8= - error parsing Zed in bad-multiple-lines.zed at line 2, column 6: + error parsing Zed (bad-multiple-lines.zed: line 2, column 6): test \ count() === ^ === diff --git a/runtime/compiler.go b/runtime/compiler.go index ba6b478e22..6d71094325 100644 --- a/runtime/compiler.go +++ b/runtime/compiler.go @@ -16,7 +16,7 @@ type Compiler interface { NewQuery(*Context, ast.Seq, []zio.Reader) (Query, error) NewLakeQuery(*Context, ast.Seq, int, *lakeparse.Commitish) (Query, error) NewLakeDeleteQuery(*Context, ast.Seq, *lakeparse.Commitish) (DeleteQuery, error) - Parse(string, ...string) (ast.Seq, error) + Parse(string) (ast.Seq, error) } type Query interface { diff --git a/service/request.go b/service/request.go index ec83afa0bf..8def7dd49d 100644 --- a/service/request.go +++ b/service/request.go @@ -13,8 +13,8 @@ import ( "github.com/brimdata/zed" "github.com/brimdata/zed/api" + "github.com/brimdata/zed/compiler/ast" "github.com/brimdata/zed/compiler/optimizer/demand" - "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/lake" "github.com/brimdata/zed/lake/branches" "github.com/brimdata/zed/lake/commits" @@ -27,6 +27,7 @@ import ( "github.com/brimdata/zed/zson" "github.com/gorilla/mux" "github.com/segmentio/ksuid" + "go.uber.org/multierr" "go.uber.org/zap" ) @@ -303,15 +304,11 @@ func errorResponse(e error) (status int, ae *api.Error) { status = http.StatusInternalServerError ae = &api.Error{Type: "Error"} - var pe *parser.Error - if errors.As(e, &pe) { - ae.Info = map[string]int{"parse_error_offset": pe.Offset} - } - var ze *srverr.Error if !errors.As(e, &ze) { ze = &srverr.Error{Err: e} } + checkASTErrors(ae, ze.Err) switch { case errors.Is(e, branches.ErrExists) || errors.Is(e, pools.ErrExists): @@ -340,3 +337,16 @@ func errorResponse(e error) (status int, ae *api.Error) { ae.Message = ze.Message() return } + +func checkASTErrors(ae *api.Error, zerr error) { + var errs []*ast.Error + for _, err := range multierr.Errors(zerr) { + var aerr *ast.Error + if errors.As(err, &aerr) { + errs = append(errs, aerr) + } + } + if len(errs) > 0 { + ae.Info = errs + } +} diff --git a/service/ztests/compile.yaml b/service/ztests/compile.yaml index 8431371bbb..aa06d52fab 100644 --- a/service/ztests/compile.yaml +++ b/service/ztests/compile.yaml @@ -8,4 +8,4 @@ inputs: outputs: - name: stdout data: | - {info:{parse_error_offset:6}} + {info:[{kind:"Kind",error:"error parsing Zed",pos:6,end:-1}]} diff --git a/service/ztests/query-parse-error.yaml b/service/ztests/query-parse-error.yaml index f9197cbc1f..723d061d3c 100644 --- a/service/ztests/query-parse-error.yaml +++ b/service/ztests/query-parse-error.yaml @@ -34,34 +34,34 @@ outputs: - name: stderr data: | =1= - error parsing Zed at column 11: + error parsing Zed (line 1, column 11): from test \ count() === ^ === =2= - error parsing Zed at line 2, column 6: + error parsing Zed (line 2, column 6): test \ count() === ^ === =3= - error parsing Zed at column 11: + error parsing Zed (line 1, column 11): from test \ count() === ^ === =4= - error parsing Zed at line 2, column 6: + error parsing Zed (line 2, column 6): test \ count() === ^ === =5= - error parsing Zed in bad-single-line.zed at line 1, column 11: + error parsing Zed (bad-single-line.zed: line 1, column 11): from test \ count() === ^ === =6= - error parsing Zed in bad-multiple-lines.zed at line 2, column 6: + error parsing Zed (bad-multiple-lines.zed: line 2, column 6): test \ count() === ^ === =7= - error parsing Zed in bad-single-line.zed at line 1, column 11: + error parsing Zed (bad-single-line.zed: line 1, column 11): from test \ count() === ^ === =8= - error parsing Zed in bad-multiple-lines.zed at line 2, column 6: + error parsing Zed (bad-multiple-lines.zed: line 2, column 6): test \ count() === ^ ===