From 24eb6a0bcacd2e0b2ac082a08892f59df56f6b83 Mon Sep 17 00:00:00 2001 From: Noah Treuhaft Date: Thu, 30 May 2024 12:11:38 -0400 Subject: [PATCH] Rework errors in compiler/parser * Add ErrorList, a list of Errors, and SourceSet, a struct containing Zed program text and source file offsets. In the future, compiler/semantic will use these types to return multiple errors that point into the program. * Remove ImproveError and NewError. * Change ParseZed to return an ErrorList when Parse fails. * Change ParseZed and ConcatSource to return a SourceSet. * In api.Error, replace the `Info interface{}` field with `ComplationErrors parser.ErrorList`. Co-authored-by: Matthew Nibecker --- api/api.go | 9 +- api/client/connection.go | 14 +- cli/queryflags/flags.go | 13 +- cmd/zq/ztests/single-arg-error.yaml | 3 +- compiler/job.go | 3 +- compiler/parser/api.go | 177 +++++++++++------------ compiler/parser/source.go | 85 +++++++++++ compiler/parser/ztests/syntax-error.yaml | 2 +- docs/tutorials/schools.md | 2 +- lake/ztests/query-parse-error.yaml | 4 +- service/request.go | 5 +- service/ztests/compile.yaml | 4 +- service/ztests/query-parse-error.yaml | 4 +- 13 files changed, 199 insertions(+), 126 deletions(-) create mode 100644 compiler/parser/source.go diff --git a/api/api.go b/api/api.go index 7c0b27f208..63e92a58f1 100644 --- a/api/api.go +++ b/api/api.go @@ -3,6 +3,7 @@ package api import ( "context" + "github.com/brimdata/zed/compiler/parser" "github.com/brimdata/zed/lakeparse" "github.com/brimdata/zed/order" "github.com/brimdata/zed/pkg/nano" @@ -20,10 +21,10 @@ func RequestIDFromContext(ctx context.Context) string { } type Error struct { - Type string `json:"type"` - Kind string `json:"kind"` - Message string `json:"error"` - Info interface{} `json:"info,omitempty"` + Type string `json:"type"` + Kind string `json:"kind"` + Message string `json:"error"` + CompilationErrors parser.ErrorList `json:"compilation_errors,omitempty"` } func (e Error) Error() string { diff --git a/api/client/connection.go b/api/client/connection.go index 636486f7f6..16a0f19b33 100644 --- a/api/client/connection.go +++ b/api/client/connection.go @@ -297,23 +297,19 @@ func (c *Connection) Revert(ctx context.Context, poolID ksuid.KSUID, branchName // As for Connection.Do, if the returned error is nil, the user is expected to // call Response.Body.Close. func (c *Connection) Query(ctx context.Context, head *lakeparse.Commitish, src string, filenames ...string) (*Response, error) { - src, srcInfo, err := parser.ConcatSource(filenames, src) + sset, err := parser.ConcatSource(filenames, src) if err != nil { return nil, err } - body := api.QueryRequest{Query: src} + body := api.QueryRequest{Query: string(sset.Text)} if head != nil { body.Head = *head } req := c.NewRequest(ctx, http.MethodPost, "/query?ctrl=T", body) res, err := c.Do(req) - var ae *api.Error - if errors.As(err, &ae) { - if m, ok := ae.Info.(map[string]interface{}); ok { - if offset, ok := m["parse_error_offset"].(float64); ok { - return res, parser.NewError(src, srcInfo, int(offset)) - } - } + if ae := (*api.Error)(nil); errors.As(err, &ae) && len(ae.CompilationErrors) > 0 { + ae.CompilationErrors.SetSourceSet(sset) + return nil, ae.CompilationErrors } return res, err } diff --git a/cli/queryflags/flags.go b/cli/queryflags/flags.go index 618a4b0fc4..cde18ff72f 100644 --- a/cli/queryflags/flags.go +++ b/cli/queryflags/flags.go @@ -87,15 +87,10 @@ func singleArgError(src string, err error) error { src = src[:20] + "..." } fmt.Fprintf(&b, "\n - a file could not be found with the name %q", src) - var perr *parser.Error - if errors.As(err, &perr) { - b.WriteString("\n - the argument could not be compiled as a valid Zed query due to parse error (") - if perr.LineNum > 0 { - fmt.Fprintf(&b, "line %d, ", perr.LineNum) - } - fmt.Fprintf(&b, "column %d):", perr.Column) - for _, l := range strings.Split(perr.ParseErrorContext(), "\n") { - fmt.Fprintf(&b, "\n %s", l) + if list := (parser.ErrorList)(nil); errors.As(err, &list) { + b.WriteString("\n - the argument could not be compiled as a valid Zed query:") + for _, line := range strings.Split(list.Error(), "\n") { + fmt.Fprintf(&b, "\n %s", line) } } else { b.WriteString("\n - the argument did not parse as a valid Zed query") diff --git a/cmd/zq/ztests/single-arg-error.yaml b/cmd/zq/ztests/single-arg-error.yaml index e59212acc9..701036ffc6 100644 --- a/cmd/zq/ztests/single-arg-error.yaml +++ b/cmd/zq/ztests/single-arg-error.yaml @@ -6,6 +6,7 @@ outputs: data: | zq: could not invoke zq with a single argument because: - a file could not be found with the name "file sample.zson | c..." - - the argument could not be compiled as a valid Zed query due to parse error (column 25): + - the argument could not be compiled as a valid Zed query: + error parsing Zed at line 1, column 26: file sample.zson | count( === ^ === diff --git a/compiler/job.go b/compiler/job.go index 41f84e047d..68fd9a2c27 100644 --- a/compiler/job.go +++ b/compiler/job.go @@ -89,7 +89,8 @@ func (j *Job) Parallelize(n int) error { } func Parse(src string, filenames ...string) (ast.Seq, error) { - return parser.ParseZed(filenames, src) + seq, _, err := parser.ParseZed(filenames, src) + return seq, err } // MustParse is like Parse but panics if an error is encountered. diff --git a/compiler/parser/api.go b/compiler/parser/api.go index 047b0249b4..be73b641ed 100644 --- a/compiler/parser/api.go +++ b/compiler/parser/api.go @@ -8,138 +8,133 @@ import ( "github.com/brimdata/zed/compiler/ast" ) -// ParseZed calls ConcatSource followed by Parse. If Parse fails, it calls -// ImproveError. -func ParseZed(filenames []string, src string) (ast.Seq, error) { - src, srcInfo, err := ConcatSource(filenames, src) +// ParseZed calls ConcatSource followed by Parse. If Parse returns an error, +// ConcatSource tries to convert it to an ErrorList. +func ParseZed(filenames []string, src string) (ast.Seq, *SourceSet, error) { + sset, err := ConcatSource(filenames, src) if err != nil { - return nil, err + return nil, nil, err } - p, err := Parse("", []byte(src)) + p, err := Parse("", []byte(sset.Text)) if err != nil { - return nil, ImproveError(err, src, srcInfo) + return nil, nil, convertErrList(err, sset) } - return sliceOf[ast.Op](p), nil -} - -// SourceInfo holds source file offsets. -type SourceInfo struct { - filename string - start int - end int + return sliceOf[ast.Op](p), sset, nil } // ConcatSource concatenates the source files in filenames followed by src, -// returning the result and a corresponding slice of SourceInfos. -func ConcatSource(filenames []string, src string) (string, []SourceInfo, error) { +// returning a SourceSet. +func ConcatSource(filenames []string, src string) (*SourceSet, error) { var b strings.Builder - var sis []SourceInfo + var sis []*SourceInfo for _, f := range filenames { bb, err := os.ReadFile(f) if err != nil { - return "", nil, err + return nil, err } - start := b.Len() + sis = append(sis, newSourceInfo(f, b.Len(), bb)) b.Write(bb) - sis = append(sis, SourceInfo{f, start, b.Len()}) b.WriteByte('\n') } - start := b.Len() - b.WriteString(src) - sis = append(sis, SourceInfo{"", start, b.Len()}) - if b.Len() == 0 { - return "*", nil, nil + if b.Len() == 0 && src == "" { + src = "*" } - return b.String(), sis, nil + sis = append(sis, newSourceInfo("", b.Len(), []byte(src))) + b.WriteString(src) + return &SourceSet{b.String(), sis}, nil } -// ImproveError tries to improve an error from Parse. err is the error. src is -// the source code for which Parse return err. If src came from ConcatSource, -// sis is the corresponding slice of SourceInfo; otherwise, sis is nil. -func ImproveError(err error, src string, sis []SourceInfo) error { - el, ok := err.(errList) - if !ok || len(el) != 1 { - return err - } - pe, ok := el[0].(*parserError) +func convertErrList(err error, sset *SourceSet) error { + errs, ok := err.(errList) if !ok { return err } - return NewError(src, sis, pe.pos.offset) + var out ErrorList + for _, e := range errs { + pe, ok := e.(*parserError) + if !ok { + return err + } + out.Append("error parsing Zed", pe.pos.offset, -1) + } + out.SetSourceSet(sset) + return out } -// Error is a parse error with nice formatting. It includes the source code -// line containing the error. -type Error struct { - Offset int // offset into original source code - - filename string // omitted from formatting if "" - LineNum int // zero-based; omitted from formatting if negative +// ErrList is a list of Errors. +type ErrorList []*Error - line string // contains no newlines - Column int // zero-based +// Append appends an Error to e. +func (e *ErrorList) Append(msg string, pos, end int) { + *e = append(*e, &Error{msg, pos, end, nil}) } -// NewError returns an Error. src is the source code containing the error. If -// src came from ConcatSource, sis is the corresponding slice of SourceInfo; -// otherwise, src is nil. offset is the offset of the error within src. -func NewError(src string, sis []SourceInfo, offset int) error { - var filename string - for _, si := range sis { - if offset < si.end { - filename = si.filename - offset -= si.start - src = src[si.start:si.end] - break +// Error concatenates the errors in e with a newline between each. +func (e ErrorList) Error() string { + var b strings.Builder + for i, err := range e { + if i > 0 { + b.WriteByte('\n') } + b.WriteString(err.Error()) } - lineNum := -1 - if filename != "" || strings.Count(src, "\n") > 0 { - lineNum = strings.Count(src[:offset], "\n") - } - column := offset - if i := strings.LastIndexByte(src[:offset], '\n'); i != -1 { - column -= i + 1 - src = src[i+1:] - } - if i := strings.IndexByte(src, '\n'); i != -1 { - src = src[:i] - } - return &Error{ - Offset: offset, - LineNum: lineNum, - Column: column, - filename: filename, - line: src, + return b.String() +} + +// SetSourceSet sets the SourceSet for every Error in e. +func (e ErrorList) SetSourceSet(sset *SourceSet) { + for i := range e { + e[i].sset = sset } } +type Error struct { + Msg string + Pos int + End int + sset *SourceSet +} + func (e *Error) Error() string { + if e.sset == nil { + return e.Msg + } + src := e.sset.SourceOf(e.Pos) + start := src.Position(e.Pos) + end := src.Position(e.End) var b strings.Builder - b.WriteString("error parsing Zed ") - if e.filename != "" { - fmt.Fprintf(&b, "in %s ", e.filename) + b.WriteString(e.Msg) + if src.Filename != "" { + fmt.Fprintf(&b, " in %s", src.Filename) } - b.WriteString("at ") - if e.LineNum >= 0 { - fmt.Fprintf(&b, "line %d, ", e.LineNum+1) + line := src.LineOfPos(e.sset.Text, e.Pos) + fmt.Fprintf(&b, " at line %d, column %d:\n%s\n", start.Line, start.Column, line) + if end.IsValid() { + formatSpanError(&b, line, start, end) + } else { + formatPointError(&b, start) } - fmt.Fprintf(&b, "column %d:\n", e.Column+1) - b.WriteString(e.ParseErrorContext()) return b.String() } -func (e *Error) ParseErrorContext() string { - var b strings.Builder - b.WriteString(e.line + "\n") - for k := 0; k < e.Column; k++ { - if k >= e.Column-4 && k != e.Column-1 { +func formatSpanError(b *strings.Builder, line string, start, end Position) { + col := start.Column - 1 + b.WriteString(strings.Repeat(" ", col)) + n := len(line) - col + if start.Line == end.Line { + n = end.Column - 1 - col + } + b.WriteString(strings.Repeat("~", n)) +} + +func formatPointError(b *strings.Builder, start Position) { + col := start.Column - 1 + for k := 0; k < col; k++ { + if k >= col-4 && k != col-1 { b.WriteByte('=') } else { b.WriteByte(' ') } } - b.WriteByte('^') - b.WriteString(" ===") - return b.String() + b.WriteString("^ ===") } diff --git a/compiler/parser/source.go b/compiler/parser/source.go new file mode 100644 index 0000000000..c80110ed57 --- /dev/null +++ b/compiler/parser/source.go @@ -0,0 +1,85 @@ +package parser + +import ( + "sort" +) + +type SourceSet struct { + Text string + Sources []*SourceInfo +} + +func (s *SourceSet) SourceOf(pos int) *SourceInfo { + i := sort.Search(len(s.Sources), func(i int) bool { return s.Sources[i].start > pos }) - 1 + return s.Sources[i] +} + +// SourceInfo holds source file offsets. +type SourceInfo struct { + Filename string + lines []int + size int + start int +} + +func newSourceInfo(filename string, start int, src []byte) *SourceInfo { + var lines []int + line := 0 + for offset, b := range src { + if line >= 0 { + lines = append(lines, line) + } + line = -1 + if b == '\n' { + line = offset + 1 + } + } + return &SourceInfo{ + Filename: filename, + lines: lines, + size: len(src), + start: start, + } +} + +func (s *SourceInfo) Position(pos int) Position { + if pos < 0 { + return Position{-1, -1, -1, -1} + } + offset := pos - s.start + i := searchLine(s.lines, offset) + return Position{ + Pos: pos, + Offset: offset, + Line: i + 1, + Column: offset - s.lines[i] + 1, + } +} + +func (s *SourceInfo) LineOfPos(src string, pos int) string { + i := searchLine(s.lines, pos-s.start) + start := s.lines[i] + end := s.size + if i+1 < len(s.lines) { + end = s.lines[i+1] + } + b := src[s.start+start : s.start+end] + if b[len(b)-1] == '\n' { + b = b[:len(b)-1] + } + return string(b) +} + +func searchLine(lines []int, offset int) int { + return sort.Search(len(lines), func(i int) bool { return lines[i] > offset }) - 1 + +} + +type Position struct { + Pos int `json:"pos"` // Offset relative to SourceSet. + Offset int `json:"offset"` // Offset relative to file start. + Line int `json:"line"` // 1-based line number. + Column int `json:"column"` // 1-based column number. +} + +func (p Position) IsValid() bool { return p.Pos >= 0 } diff --git a/compiler/parser/ztests/syntax-error.yaml b/compiler/parser/ztests/syntax-error.yaml index 949ab350ce..5a2b05a32c 100644 --- a/compiler/parser/ztests/syntax-error.yaml +++ b/compiler/parser/ztests/syntax-error.yaml @@ -8,6 +8,6 @@ inputs: outputs: - name: stderr data: | - zq: error parsing Zed at column 12: + zq: error parsing Zed at line 1, column 12: count() by ,x,y === ^ === diff --git a/docs/tutorials/schools.md b/docs/tutorials/schools.md index daeacc4d3a..0e3df2986f 100644 --- a/docs/tutorials/schools.md +++ b/docs/tutorials/schools.md @@ -229,7 +229,7 @@ zq -z 'Defunct=' *.zson ``` produces ```mdtest-output -zq: error parsing Zed at column 8: +zq: error parsing Zed at line 1, column 8: Defunct= === ^ === ``` diff --git a/lake/ztests/query-parse-error.yaml b/lake/ztests/query-parse-error.yaml index 4c7138c253..96a7844523 100644 --- a/lake/ztests/query-parse-error.yaml +++ b/lake/ztests/query-parse-error.yaml @@ -34,7 +34,7 @@ outputs: - name: stderr data: | =1= - error parsing Zed at column 11: + error parsing Zed at line 1, column 11: from test \ count() === ^ === =2= @@ -42,7 +42,7 @@ outputs: test \ count() === ^ === =3= - error parsing Zed at column 11: + error parsing Zed at line 1, column 11: from test \ count() === ^ === =4= diff --git a/service/request.go b/service/request.go index 2a97b2d046..118d0a1a59 100644 --- a/service/request.go +++ b/service/request.go @@ -303,9 +303,8 @@ func errorResponse(e error) (status int, ae *api.Error) { status = http.StatusInternalServerError ae = &api.Error{Type: "Error"} - var pe *parser.Error - if errors.As(e, &pe) { - ae.Info = map[string]int{"parse_error_offset": pe.Offset} + if list := (parser.ErrorList)(nil); errors.As(e, &list) { + ae.CompilationErrors = list } var ze *srverr.Error diff --git a/service/ztests/compile.yaml b/service/ztests/compile.yaml index 8431371bbb..08d7d41733 100644 --- a/service/ztests/compile.yaml +++ b/service/ztests/compile.yaml @@ -1,6 +1,6 @@ script: | source service.sh - curl -d '{"query":"count("}' $ZED_LAKE/compile | zq -z 'cut info' - + curl -d '{"query":"count("}' $ZED_LAKE/compile | zq -z 'cut compilation_errors' - inputs: - name: service.sh @@ -8,4 +8,4 @@ inputs: outputs: - name: stdout data: | - {info:{parse_error_offset:6}} + {compilation_errors:[{Msg:"error parsing Zed",Pos:6,End:-1}]} diff --git a/service/ztests/query-parse-error.yaml b/service/ztests/query-parse-error.yaml index f9197cbc1f..9385ba584f 100644 --- a/service/ztests/query-parse-error.yaml +++ b/service/ztests/query-parse-error.yaml @@ -34,7 +34,7 @@ outputs: - name: stderr data: | =1= - error parsing Zed at column 11: + error parsing Zed at line 1, column 11: from test \ count() === ^ === =2= @@ -42,7 +42,7 @@ outputs: test \ count() === ^ === =3= - error parsing Zed at column 11: + error parsing Zed at line 1, column 11: from test \ count() === ^ === =4=