Add positional information to semantic errors

This commit changes errors produced from semantic analysis so they include a reference to the AST node which produced the error. The allows for highlighting the specific portion of the source that caused a semantic error. This commit also changes how semantic errors are collected in that the semantic analyzer no longer exits as soon as the first error is found and instead goes on the analyze the rest of the AST, reporting all semantic errors in the AST.
brimdata · May 8, 2024 · fe5477a · fe5477a
1 parent 1352416
commit fe5477a
Show file tree

Hide file tree

Showing 33 changed files with 702 additions and 770 deletions.
diff --git a/compiler/ast/ast.go b/compiler/ast/ast.go
@@ -110,7 +110,7 @@ func (c *Call) End() int {
 	if c.Where != nil {
 		return c.Where.End()
 	}
-	return c.Rparen
+	return c.Rparen + 1
 }
 
 type Cast struct {
@@ -180,7 +180,7 @@ type Regexp struct {
 }
 
 func (r *Regexp) Pos() int { return r.PatternPos }
-func (r *Regexp) End() int { return r.PatternPos + len(r.Pattern) }
+func (r *Regexp) End() int { return r.PatternPos + len(r.Pattern) + 2 }
 
 type String struct {
 	Kind    string `json:"kind" unpack:""`
@@ -432,9 +432,9 @@ type (
 		NullsFirst bool        `json:"nullsfirst"`
 	}
 	Cut struct {
-		Kind       string       `json:"kind" unpack:""`
-		KeywordPos int          `json:"keyword_pos"`
-		Args       []Assignment `json:"args"`
+		Kind       string      `json:"kind" unpack:""`
+		KeywordPos int         `json:"keyword_pos"`
+		Args       Assignments `json:"args"`
 	}
 	Drop struct {
 		Kind       string `json:"kind" unpack:""`
@@ -471,10 +471,10 @@ type (
 		Kind string `json:"kind" unpack:""`
 		// StartPos is not called KeywordPos for Summarize since the "summarize"
 		// keyword is optional.
-		StartPos int          `json:"start_pos"`
-		Limit    int          `json:"limit"`
-		Keys     []Assignment `json:"keys"`
-		Aggs     []Assignment `json:"aggs"`
+		StartPos int         `json:"start_pos"`
+		Limit    int         `json:"limit"`
+		Keys     Assignments `json:"keys"`
+		Aggs     Assignments `json:"aggs"`
 	}
 	Top struct {
 		Kind       string `json:"kind" unpack:""`
@@ -484,9 +484,9 @@ type (
 		Flush      bool   `json:"flush"`
 	}
 	Put struct {
-		Kind       string       `json:"kind" unpack:""`
-		KeywordPos int          `json:"keyword_pos"`
-		Args       []Assignment `json:"args"`
+		Kind       string      `json:"kind" unpack:""`
+		KeywordPos int         `json:"keyword_pos"`
+		Args       Assignments `json:"args"`
 	}
 	Merge struct {
 		Kind       string `json:"kind" unpack:""`
@@ -520,8 +520,8 @@ type (
 	// is unknown: It could be a Summarize or Put operator. This will be
 	// determined in the semantic phase.
 	OpAssignment struct {
-		Kind        string       `json:"kind" unpack:""`
-		Assignments []Assignment `json:"assignments"`
+		Kind        string      `json:"kind" unpack:""`
+		Assignments Assignments `json:"assignments"`
 	}
 	// An OpExpr operator is an expression that appears as an operator
 	// and requires semantic analysis to determine if it is a filter, a yield,
@@ -531,22 +531,22 @@ type (
 		Expr Expr   `json:"expr"`
 	}
 	Rename struct {
-		Kind       string       `json:"kind" unpack:""`
-		KeywordPos int          `json:"keyword_pos"`
-		Args       []Assignment `json:"args"`
+		Kind       string      `json:"kind" unpack:""`
+		KeywordPos int         `json:"keyword_pos"`
+		Args       Assignments `json:"args"`
 	}
 	Fuse struct {
 		Kind       string `json:"kind" unpack:""`
 		KeywordPos int    `json:"keyword_pos"`
 	}
 	Join struct {
-		Kind       string       `json:"kind" unpack:""`
-		KeywordPos int          `json:"keyword_pos"`
-		Style      string       `json:"style"`
-		RightInput Seq          `json:"right_input"`
-		LeftKey    Expr         `json:"left_key"`
-		RightKey   Expr         `json:"right_key"`
-		Args       []Assignment `json:"args"`
+		Kind       string      `json:"kind" unpack:""`
+		KeywordPos int         `json:"keyword_pos"`
+		Style      string      `json:"style"`
+		RightInput Seq         `json:"right_input"`
+		LeftKey    Expr        `json:"left_key"`
+		RightKey   Expr        `json:"right_key"`
+		Args       Assignments `json:"args"`
 	}
 	Sample struct {
 		Kind       string `json:"kind" unpack:""`
@@ -671,14 +671,19 @@ type Assignment struct {
 	RHS  Expr   `json:"rhs"`
 }
 
-func (a *Assignment) Pos() int {
+func (a Assignment) Pos() int {
 	if a.LHS != nil {
 		return a.LHS.Pos()
 	}
 	return a.RHS.Pos()
 }
 
-func (a *Assignment) End() int { return a.RHS.Pos() }
+func (a Assignment) End() int { return a.RHS.End() }
+
+type Assignments []Assignment
+
+func (a Assignments) Pos() int { return a[0].Pos() }
+func (a Assignments) End() int { return a[len(a)-1].End() }
 
 // Def is like Assignment but the LHS is an identifier that may be later
 // referenced.  This is used for const blocks in Sequential and var blocks

diff --git a/compiler/ast/dag/expr.go b/compiler/ast/dag/expr.go
@@ -30,6 +30,12 @@ type (
 		LHS  Expr   `json:"lhs"`
 		RHS  Expr   `json:"rhs"`
 	}
+	// A BadExpr node is a placeholder for an expression containing semantic
+	// errors.
+	BadExpr struct {
+		Kind string `json:"kind" unpack:""`
+	}
+
 	BinaryExpr struct {
 		Kind string `json:"kind" unpack:""`
 		Op   string `json:"op"`
@@ -131,6 +137,7 @@ type (
 func (*Agg) ExprDAG()          {}
 func (*ArrayExpr) ExprDAG()    {}
 func (*Assignment) ExprDAG()   {}
+func (*BadExpr) ExprDAG()      {}
 func (*BinaryExpr) ExprDAG()   {}
 func (*Call) ExprDAG()         {}
 func (*Conditional) ExprDAG()  {}

diff --git a/compiler/ast/dag/op.go b/compiler/ast/dag/op.go
@@ -26,6 +26,11 @@ type Seq []Op
 // Ops
 
 type (
+	// A BadOp node is a placeholder for an expression containing semantic
+	// errors.
+	BadOp struct {
+		Kind string `json:"kind" unpack:""`
+	}
 	Combine struct {
 		Kind string `json:"kind" unpack:""`
 	}
@@ -281,6 +286,7 @@ type (
 	}
 )
 
+func (*BadOp) OpNode()     {}
 func (*Fork) OpNode()      {}
 func (*Scatter) OpNode()   {}
 func (*Switch) OpNode()    {}

diff --git a/compiler/ast/dag/unpack.go b/compiler/ast/dag/unpack.go
@@ -10,6 +10,8 @@ var unpacker = unpack.New(
 	Agg{},
 	ArrayExpr{},
 	Assignment{},
+	BadOp{},
+	BadExpr{},
 	BinaryExpr{},
 	Call{},
 	Combine{},

diff --git a/compiler/parser/errors.go b/compiler/parser/errors.go
@@ -79,6 +79,9 @@ func (e *LocalizedError) Error() string {
 }
 
 func (e *LocalizedError) errorContext() string {
+	if !e.Open.IsValid() {
+		return ""
+	}
 	var b strings.Builder
 	b.WriteString(e.Line + "\n")
 	if e.Close.IsValid() {
@@ -102,7 +105,7 @@ func (e *LocalizedError) spanError(b *strings.Builder) {
 	b.WriteString(strings.Repeat(" ", col))
 	end := len(e.Line) - col
 	if e.Open.Line == e.Close.Line {
-		end = e.Close.Column - col
+		end = e.Close.Column - 1 - col
 	}
 	b.WriteString(strings.Repeat("~", end))
 }

diff --git a/compiler/semantic/analyzer.go b/compiler/semantic/analyzer.go
@@ -8,16 +8,17 @@ import (
 	"github.com/brimdata/zed/compiler/ast/dag"
 	"github.com/brimdata/zed/compiler/data"
 	"github.com/brimdata/zed/lakeparse"
+	"go.uber.org/multierr"
 )
 
 // Analyze performs a semantic analysis of the AST, translating it from AST
 // to DAG form, resolving syntax ambiguities, and performing constant propagation.
 // After semantic analysis, the DAG is ready for either optimization or compilation.
 func Analyze(ctx context.Context, seq ast.Seq, source *data.Source, head *lakeparse.Commitish) (dag.Seq, error) {
 	a := newAnalyzer(ctx, source, head)
-	s, err := a.semSeq(seq)
-	if err != nil {
-		return nil, err
+	s := a.semSeq(seq)
+	if a.errors != nil {
+		return nil, a.errors
 	}
 	return s, nil
 }
@@ -26,12 +27,12 @@ func Analyze(ctx context.Context, seq ast.Seq, source *data.Source, head *lakepa
 // DAG does not have one.
 func AnalyzeAddSource(ctx context.Context, seq ast.Seq, source *data.Source, head *lakeparse.Commitish) (dag.Seq, error) {
 	a := newAnalyzer(ctx, source, head)
-	s, err := a.semSeq(seq)
-	if err != nil {
-		return nil, err
+	s := a.semSeq(seq)
+	if a.errors != nil {
+		return nil, a.errors
 	}
 	if !HasSource(s) {
-		if err = a.addDefaultSource(&s); err != nil {
+		if err := a.addDefaultSource(&s); err != nil {
 			return nil, err
 		}
 	}
@@ -40,6 +41,7 @@ func AnalyzeAddSource(ctx context.Context, seq ast.Seq, source *data.Source, hea
 
 type analyzer struct {
 	ctx     context.Context
+	errors  error
 	head    *lakeparse.Commitish
 	opStack []*ast.OpDecl
 	source  *data.Source
@@ -78,6 +80,10 @@ func (a *analyzer) addDefaultSource(seq *dag.Seq) error {
 		seq.Prepend(&dag.DefaultScan{Kind: "DefaultScan"})
 		return nil
 	}
+	// Verify pool exists for HEAD
+	if _, err := a.source.PoolID(a.ctx, a.head.Pool); err != nil {
+		return err
+	}
 	pool := &ast.Pool{
 		Kind: "Pool",
 		Spec: ast.PoolSpec{
@@ -87,10 +93,7 @@ func (a *analyzer) addDefaultSource(seq *dag.Seq) error {
 			},
 		},
 	}
-	ops, err := a.semPool(pool)
-	if err != nil {
-		return err
-	}
+	ops := a.semPool(pool)
 	seq.Prepend(ops[0])
 	return nil
 }
@@ -116,6 +119,7 @@ func (a *analyzer) exitScope() {
 type opDecl struct {
 	ast   *ast.OpDecl
 	scope *Scope // parent scope of op declaration.
+	bad   bool
 }
 
 type opCycleError []*ast.OpDecl
@@ -130,3 +134,15 @@ func (e opCycleError) Error() string {
 	}
 	return b
 }
+
+func badExpr() dag.Expr {
+	return &dag.BadExpr{Kind: "BadExpr"}
+}
+
+func badOp() dag.Op {
+	return &dag.BadOp{Kind: "BadOp"}
+}
+
+func (a *analyzer) error(n ast.Node, err error) {
+	a.errors = multierr.Append(a.errors, &Error{n, err})
+}
diff --git a/compiler/semantic/errors.go b/compiler/semantic/errors.go
@@ -0,0 +1,18 @@
+package semantic
+
+import (
+	"fmt"
+
+	"github.com/brimdata/zed/compiler/ast"
+	"github.com/brimdata/zed/compiler/parser"
+)
+
+type Error struct {
+	ast.Node
+	inner error
+}
+
+var _ parser.PositionalError = (*Error)(nil)
+
+func (e *Error) Error() string   { return fmt.Sprintf("%d: %s", e.Pos(), e.inner.Error()) }
+func (e *Error) Message() string { return e.inner.Error() }