Skip to content

Commit

Permalink
Add positional information to semantic errors
Browse files Browse the repository at this point in the history
This commit changes errors produced from semantic analysis so they
include a reference to the AST node which produced the error. The allows
for highlighting the specific portion of the source that caused a
semantic error.

This commit also changes how semantic errors are collected in that the
semantic analyzer no longer exits as soon as the first error is found
and instead goes on the analyze the rest of the AST, reporting all
semantic errors in the AST.
  • Loading branch information
mattnibs committed May 8, 2024
1 parent 1352416 commit fe5477a
Show file tree
Hide file tree
Showing 33 changed files with 702 additions and 770 deletions.
57 changes: 31 additions & 26 deletions compiler/ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ func (c *Call) End() int {
if c.Where != nil {
return c.Where.End()
}
return c.Rparen
return c.Rparen + 1
}

type Cast struct {
Expand Down Expand Up @@ -180,7 +180,7 @@ type Regexp struct {
}

func (r *Regexp) Pos() int { return r.PatternPos }
func (r *Regexp) End() int { return r.PatternPos + len(r.Pattern) }
func (r *Regexp) End() int { return r.PatternPos + len(r.Pattern) + 2 }

type String struct {
Kind string `json:"kind" unpack:""`
Expand Down Expand Up @@ -432,9 +432,9 @@ type (
NullsFirst bool `json:"nullsfirst"`
}
Cut struct {
Kind string `json:"kind" unpack:""`
KeywordPos int `json:"keyword_pos"`
Args []Assignment `json:"args"`
Kind string `json:"kind" unpack:""`
KeywordPos int `json:"keyword_pos"`
Args Assignments `json:"args"`
}
Drop struct {
Kind string `json:"kind" unpack:""`
Expand Down Expand Up @@ -471,10 +471,10 @@ type (
Kind string `json:"kind" unpack:""`
// StartPos is not called KeywordPos for Summarize since the "summarize"
// keyword is optional.
StartPos int `json:"start_pos"`
Limit int `json:"limit"`
Keys []Assignment `json:"keys"`
Aggs []Assignment `json:"aggs"`
StartPos int `json:"start_pos"`
Limit int `json:"limit"`
Keys Assignments `json:"keys"`
Aggs Assignments `json:"aggs"`
}
Top struct {
Kind string `json:"kind" unpack:""`
Expand All @@ -484,9 +484,9 @@ type (
Flush bool `json:"flush"`
}
Put struct {
Kind string `json:"kind" unpack:""`
KeywordPos int `json:"keyword_pos"`
Args []Assignment `json:"args"`
Kind string `json:"kind" unpack:""`
KeywordPos int `json:"keyword_pos"`
Args Assignments `json:"args"`
}
Merge struct {
Kind string `json:"kind" unpack:""`
Expand Down Expand Up @@ -520,8 +520,8 @@ type (
// is unknown: It could be a Summarize or Put operator. This will be
// determined in the semantic phase.
OpAssignment struct {
Kind string `json:"kind" unpack:""`
Assignments []Assignment `json:"assignments"`
Kind string `json:"kind" unpack:""`
Assignments Assignments `json:"assignments"`
}
// An OpExpr operator is an expression that appears as an operator
// and requires semantic analysis to determine if it is a filter, a yield,
Expand All @@ -531,22 +531,22 @@ type (
Expr Expr `json:"expr"`
}
Rename struct {
Kind string `json:"kind" unpack:""`
KeywordPos int `json:"keyword_pos"`
Args []Assignment `json:"args"`
Kind string `json:"kind" unpack:""`
KeywordPos int `json:"keyword_pos"`
Args Assignments `json:"args"`
}
Fuse struct {
Kind string `json:"kind" unpack:""`
KeywordPos int `json:"keyword_pos"`
}
Join struct {
Kind string `json:"kind" unpack:""`
KeywordPos int `json:"keyword_pos"`
Style string `json:"style"`
RightInput Seq `json:"right_input"`
LeftKey Expr `json:"left_key"`
RightKey Expr `json:"right_key"`
Args []Assignment `json:"args"`
Kind string `json:"kind" unpack:""`
KeywordPos int `json:"keyword_pos"`
Style string `json:"style"`
RightInput Seq `json:"right_input"`
LeftKey Expr `json:"left_key"`
RightKey Expr `json:"right_key"`
Args Assignments `json:"args"`
}
Sample struct {
Kind string `json:"kind" unpack:""`
Expand Down Expand Up @@ -671,14 +671,19 @@ type Assignment struct {
RHS Expr `json:"rhs"`
}

func (a *Assignment) Pos() int {
func (a Assignment) Pos() int {
if a.LHS != nil {
return a.LHS.Pos()
}
return a.RHS.Pos()
}

func (a *Assignment) End() int { return a.RHS.Pos() }
func (a Assignment) End() int { return a.RHS.End() }

type Assignments []Assignment

func (a Assignments) Pos() int { return a[0].Pos() }
func (a Assignments) End() int { return a[len(a)-1].End() }

// Def is like Assignment but the LHS is an identifier that may be later
// referenced. This is used for const blocks in Sequential and var blocks
Expand Down
7 changes: 7 additions & 0 deletions compiler/ast/dag/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ type (
LHS Expr `json:"lhs"`
RHS Expr `json:"rhs"`
}
// A BadExpr node is a placeholder for an expression containing semantic
// errors.
BadExpr struct {
Kind string `json:"kind" unpack:""`
}

BinaryExpr struct {
Kind string `json:"kind" unpack:""`
Op string `json:"op"`
Expand Down Expand Up @@ -131,6 +137,7 @@ type (
func (*Agg) ExprDAG() {}
func (*ArrayExpr) ExprDAG() {}
func (*Assignment) ExprDAG() {}
func (*BadExpr) ExprDAG() {}
func (*BinaryExpr) ExprDAG() {}
func (*Call) ExprDAG() {}
func (*Conditional) ExprDAG() {}
Expand Down
6 changes: 6 additions & 0 deletions compiler/ast/dag/op.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ type Seq []Op
// Ops

type (
// A BadOp node is a placeholder for an expression containing semantic
// errors.
BadOp struct {
Kind string `json:"kind" unpack:""`
}
Combine struct {
Kind string `json:"kind" unpack:""`
}
Expand Down Expand Up @@ -281,6 +286,7 @@ type (
}
)

func (*BadOp) OpNode() {}
func (*Fork) OpNode() {}
func (*Scatter) OpNode() {}
func (*Switch) OpNode() {}
Expand Down
2 changes: 2 additions & 0 deletions compiler/ast/dag/unpack.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ var unpacker = unpack.New(
Agg{},
ArrayExpr{},
Assignment{},
BadOp{},
BadExpr{},
BinaryExpr{},
Call{},
Combine{},
Expand Down
5 changes: 4 additions & 1 deletion compiler/parser/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ func (e *LocalizedError) Error() string {
}

func (e *LocalizedError) errorContext() string {
if !e.Open.IsValid() {
return ""
}
var b strings.Builder
b.WriteString(e.Line + "\n")
if e.Close.IsValid() {
Expand All @@ -102,7 +105,7 @@ func (e *LocalizedError) spanError(b *strings.Builder) {
b.WriteString(strings.Repeat(" ", col))
end := len(e.Line) - col
if e.Open.Line == e.Close.Line {
end = e.Close.Column - col
end = e.Close.Column - 1 - col
}
b.WriteString(strings.Repeat("~", end))
}
Expand Down
38 changes: 27 additions & 11 deletions compiler/semantic/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,17 @@ import (
"github.com/brimdata/zed/compiler/ast/dag"
"github.com/brimdata/zed/compiler/data"
"github.com/brimdata/zed/lakeparse"
"go.uber.org/multierr"
)

// Analyze performs a semantic analysis of the AST, translating it from AST
// to DAG form, resolving syntax ambiguities, and performing constant propagation.
// After semantic analysis, the DAG is ready for either optimization or compilation.
func Analyze(ctx context.Context, seq ast.Seq, source *data.Source, head *lakeparse.Commitish) (dag.Seq, error) {
a := newAnalyzer(ctx, source, head)
s, err := a.semSeq(seq)
if err != nil {
return nil, err
s := a.semSeq(seq)
if a.errors != nil {
return nil, a.errors
}
return s, nil
}
Expand All @@ -26,12 +27,12 @@ func Analyze(ctx context.Context, seq ast.Seq, source *data.Source, head *lakepa
// DAG does not have one.
func AnalyzeAddSource(ctx context.Context, seq ast.Seq, source *data.Source, head *lakeparse.Commitish) (dag.Seq, error) {
a := newAnalyzer(ctx, source, head)
s, err := a.semSeq(seq)
if err != nil {
return nil, err
s := a.semSeq(seq)
if a.errors != nil {
return nil, a.errors
}
if !HasSource(s) {
if err = a.addDefaultSource(&s); err != nil {
if err := a.addDefaultSource(&s); err != nil {
return nil, err
}
}
Expand All @@ -40,6 +41,7 @@ func AnalyzeAddSource(ctx context.Context, seq ast.Seq, source *data.Source, hea

type analyzer struct {
ctx context.Context
errors error
head *lakeparse.Commitish
opStack []*ast.OpDecl
source *data.Source
Expand Down Expand Up @@ -78,6 +80,10 @@ func (a *analyzer) addDefaultSource(seq *dag.Seq) error {
seq.Prepend(&dag.DefaultScan{Kind: "DefaultScan"})
return nil
}
// Verify pool exists for HEAD
if _, err := a.source.PoolID(a.ctx, a.head.Pool); err != nil {
return err
}
pool := &ast.Pool{
Kind: "Pool",
Spec: ast.PoolSpec{
Expand All @@ -87,10 +93,7 @@ func (a *analyzer) addDefaultSource(seq *dag.Seq) error {
},
},
}
ops, err := a.semPool(pool)
if err != nil {
return err
}
ops := a.semPool(pool)
seq.Prepend(ops[0])
return nil
}
Expand All @@ -116,6 +119,7 @@ func (a *analyzer) exitScope() {
type opDecl struct {
ast *ast.OpDecl
scope *Scope // parent scope of op declaration.
bad bool
}

type opCycleError []*ast.OpDecl
Expand All @@ -130,3 +134,15 @@ func (e opCycleError) Error() string {
}
return b
}

func badExpr() dag.Expr {
return &dag.BadExpr{Kind: "BadExpr"}
}

func badOp() dag.Op {
return &dag.BadOp{Kind: "BadOp"}
}

func (a *analyzer) error(n ast.Node, err error) {
a.errors = multierr.Append(a.errors, &Error{n, err})
}
18 changes: 18 additions & 0 deletions compiler/semantic/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package semantic

import (
"fmt"

"github.com/brimdata/zed/compiler/ast"
"github.com/brimdata/zed/compiler/parser"
)

type Error struct {
ast.Node
inner error
}

var _ parser.PositionalError = (*Error)(nil)

func (e *Error) Error() string { return fmt.Sprintf("%d: %s", e.Pos(), e.inner.Error()) }
func (e *Error) Message() string { return e.inner.Error() }
Loading

0 comments on commit fe5477a

Please sign in to comment.