cmd/compile/internal/syntax: rudimentary support for error reporting

If an ErrorHandler is provided with a syntax.ReadXXX
function, it is invoked for each error encountered.
Will need to be refined, but should enable progress
with all.bash.

Also:
- added tests for lexical errors
- fixed endless loops when encountering non-terminated
  strings and comments
This commit is contained in:
Robert Griesemer 2016-06-07 18:29:22 -07:00 committed by Matthew Dempsky
parent f50329534a
commit fe52bcc8e6
8 changed files with 147 additions and 78 deletions

View File

@ -14,7 +14,7 @@ func TestDump(t *testing.T) {
t.Skip("skipping test in short mode")
}
ast, err := ReadFile(*src, 0)
ast, err := ReadFile(*src, nil, 0)
if err != nil {
t.Fatal(err)
}

View File

@ -6,33 +6,38 @@ package syntax
import (
"fmt"
"io"
"strings"
)
const debug = false
const trace = false
// ----------------------------------------------------------------------------
// "Inherited" globals - TODO(gri): eliminate
var nerrors int
//go:noinline
func Yyerror(format string, args ...interface{}) {
fmt.Printf(format, args...)
fmt.Println()
nerrors++
panic(7)
}
// ----------------------------------------------------------------------------
type parser struct {
scanner
fnest int // function nesting level (for error handling)
xnest int // expression nesting level (for complit ambiguity resolution)
indent []byte // tracing support
nerrors int // error count
}
func (p *parser) init(src io.Reader, errh ErrorHandler) {
p.scanner.init(src, func(pos, line int, msg string) {
p.nerrors++
if errh != nil {
errh(pos, line, msg)
return
}
fmt.Printf("%d: %s\n", line, msg)
})
p.fnest = 0
p.xnest = 0
p.indent = nil
p.nerrors = 0
}
func (p *parser) got(tok token) bool {
@ -53,12 +58,13 @@ func (p *parser) want(tok token) {
// ----------------------------------------------------------------------------
// Error handling
// syntax_error reports a syntax error at the current line.
func (p *parser) syntax_error(msg string) {
if trace {
defer p.trace("syntax_error (" + msg + ")")()
}
if p.tok == _EOF && nerrors > 0 {
if p.tok == _EOF && p.nerrors > 0 {
return // avoid meaningless follow-up errors
}
@ -72,7 +78,7 @@ func (p *parser) syntax_error(msg string) {
msg = ", " + msg
default:
// plain error - we don't care about current token
Yyerror("%d: syntax error: %s", p.line, msg)
p.error("syntax error: " + msg)
return
}
@ -92,11 +98,12 @@ func (p *parser) syntax_error(msg string) {
tok = tokstring(p.tok)
}
Yyerror("%d: syntax error: unexpected %s%s", p.line, tok, msg)
p.error("syntax error: unexpected " + tok + msg)
}
// Like syntax_error, but reports error at given line rather than current lexer line.
func (p *parser) syntax_error_at(lineno uint32, msg string) {
// TODO(gri) fix this
// defer func(lineno int32) {
// lexlineno = lineno
// }(lexlineno)
@ -193,7 +200,7 @@ func (p *parser) file() *File {
p.want(_Semi)
// don't bother continuing if package clause has errors
if nerrors > 0 {
if p.nerrors > 0 {
return nil
}
@ -376,12 +383,12 @@ func (p *parser) funcDecl() *FuncDecl {
rcvr := p.paramList()
switch len(rcvr) {
case 0:
Yyerror("method has no receiver")
p.error("method has no receiver")
return nil // TODO(gri) better solution
case 1:
f.Recv = rcvr[0]
default:
Yyerror("method has multiple receivers")
p.error("method has multiple receivers")
return nil // TODO(gri) better solution
}
}
@ -396,13 +403,13 @@ func (p *parser) funcDecl() *FuncDecl {
// if name.Sym.Name == "init" {
// name = renameinit()
// if params != nil || result != nil {
// Yyerror("func init must have no arguments and no return values")
// p.error("func init must have no arguments and no return values")
// }
// }
// if localpkg.Name == "main" && name.Name == "main" {
// if params != nil || result != nil {
// Yyerror("func main must have no arguments and no return values")
// p.error("func main must have no arguments and no return values")
// }
// }
@ -412,7 +419,7 @@ func (p *parser) funcDecl() *FuncDecl {
// TODO(gri) deal with function properties
// if noescape && body != nil {
// Yyerror("can only use //go:noescape with external func implementations")
// p.error("can only use //go:noescape with external func implementations")
// }
return f
@ -543,10 +550,10 @@ func (p *parser) callStmt() *CallStmt {
case *CallExpr:
s.Call = x
case *ParenExpr:
Yyerror("expression in %s must not be parenthesized", s.Tok)
p.error(fmt.Sprintf("expression in %s must not be parenthesized", s.Tok))
// already progressed, no need to advance
default:
Yyerror("expression in %s must be function call", s.Tok)
p.error(fmt.Sprintf("expression in %s must be function call", s.Tok))
// already progressed, no need to advance
}
@ -728,13 +735,13 @@ loop:
if p.got(_Colon) {
// x[i:j:...]
if t.Index[1] == nil {
Yyerror("middle index required in 3-index slice")
p.error("middle index required in 3-index slice")
}
if p.tok != _Rbrack {
// x[i:j:k...
t.Index[2] = p.expr()
} else {
Yyerror("final index required in 3-index slice")
p.error("final index required in 3-index slice")
}
}
p.want(_Rbrack)
@ -1141,7 +1148,7 @@ func (p *parser) fieldDecl(styp *StructType) {
p.want(_Rparen)
tag := p.oliteral()
p.addField(styp, nil, typ, tag)
Yyerror("cannot parenthesize embedded type")
p.error("cannot parenthesize embedded type")
} else {
// '(' embed ')' oliteral
@ -1149,7 +1156,7 @@ func (p *parser) fieldDecl(styp *StructType) {
p.want(_Rparen)
tag := p.oliteral()
p.addField(styp, nil, typ, tag)
Yyerror("cannot parenthesize embedded type")
p.error("cannot parenthesize embedded type")
}
case _Star:
@ -1160,7 +1167,7 @@ func (p *parser) fieldDecl(styp *StructType) {
p.want(_Rparen)
tag := p.oliteral()
p.addField(styp, nil, typ, tag)
Yyerror("cannot parenthesize embedded type")
p.error("cannot parenthesize embedded type")
} else {
// '*' embed oliteral
@ -1227,7 +1234,7 @@ func (p *parser) methodDecl() *Field {
f.init(p)
f.Type = p.qualifiedName(nil)
p.want(_Rparen)
Yyerror("cannot parenthesize embedded type")
p.error("cannot parenthesize embedded type")
return f
default:
@ -1294,7 +1301,7 @@ func (p *parser) dotsType() *DotsType {
p.want(_DotDotDot)
t.Elem = p.tryType()
if t.Elem == nil {
Yyerror("final argument in variadic function missing type")
p.error("final argument in variadic function missing type")
}
return t
@ -1441,12 +1448,12 @@ func (p *parser) simpleStmt(lhs Expr, rangeOk bool) SimpleStmt {
if x, ok := rhs.(*AssertExpr); ok && x.Type == nil {
// x.(type)
// if len(rhs) > 1 {
// Yyerror("expr.(type) must be alone on ths")
// p.error("expr.(type) must be alone on ths")
// }
// if len(lhs) > 1 {
// Yyerror("argument count mismatch: %d = %d", len(lhs), 1)
// p.error("argument count mismatch: %d = %d", len(lhs), 1)
// } else if x, ok := lhs[0].(*Name); !ok {
// Yyerror("invalid variable name %s in type switch", x)
// p.error("invalid variable name %s in type switch", x)
// }
}
@ -1557,7 +1564,7 @@ func (p *parser) header(forStmt bool) (init SimpleStmt, cond Expr, post SimpleSt
if p.tok != _Semi {
// accept potential varDecl but complain
if p.got(_Var) {
Yyerror("var declaration not allowed in initializer")
p.error("var declaration not allowed in initializer")
}
init = p.simpleStmt(nil, forStmt)
// If we have a range clause, we are done.
@ -1597,7 +1604,7 @@ func (p *parser) header(forStmt bool) (init SimpleStmt, cond Expr, post SimpleSt
cond = p.unpackCond(name, s.Rhs)
}
default:
Yyerror("invalid condition, tag, or type switch guard")
p.error("invalid condition, tag, or type switch guard")
}
p.xnest = outer
@ -1614,7 +1621,7 @@ func (p *parser) unpackCond(lhs *Name, x Expr) Expr {
}
if lhs != nil {
Yyerror("invalid type switch guard")
p.error("invalid type switch guard")
}
return x
@ -1631,7 +1638,7 @@ func (p *parser) ifStmt() *IfStmt {
p.want(_If)
s.Init, s.Cond, _ = p.header(false)
if s.Cond == nil {
Yyerror("missing condition in if statement")
p.error("missing condition in if statement")
}
s.Then = p.stmtBody("if clause")

View File

@ -22,7 +22,7 @@ var src = flag.String("src", "parser.go", "source file to parse")
var verify = flag.Bool("verify", false, "verify idempotent printing")
func TestParse(t *testing.T) {
_, err := ReadFile(*src, 0)
_, err := ReadFile(*src, nil, 0)
if err != nil {
t.Fatal(err)
}
@ -48,7 +48,7 @@ func TestStdLib(t *testing.T) {
if debug {
fmt.Printf("parsing %s\n", filename)
}
ast, err := ReadFile(filename, 0)
ast, err := ReadFile(filename, nil, 0)
if err != nil {
t.Fatal(err)
}
@ -129,7 +129,7 @@ func verifyPrint(filename string, ast1 *File) {
panic(err)
}
ast2, err := ReadBytes(buf1.Bytes(), 0)
ast2, err := ReadBytes(buf1.Bytes(), nil, 0)
if err != nil {
panic(err)
}

View File

@ -15,7 +15,7 @@ func TestPrint(t *testing.T) {
t.Skip("skipping test in short mode")
}
ast, err := ReadFile(*src, 0)
ast, err := ReadFile(*src, nil, 0)
if err != nil {
t.Fatal(err)
}

View File

@ -26,8 +26,8 @@ type scanner struct {
pragmas []Pragma
}
func (s *scanner) init(src io.Reader) {
s.source.init(src)
func (s *scanner) init(src io.Reader, errh ErrorHandler) {
s.source.init(src, errh)
s.nlsemi = false
}
@ -218,7 +218,7 @@ redo:
goto assignop
case '~':
panic("bitwise complement operator is ^")
s.error("bitwise complement operator is ^")
fallthrough
case '^':
@ -283,8 +283,7 @@ redo:
default:
s.tok = 0
fmt.Printf("invalid rune %q\n", c)
panic("invalid rune")
s.error(fmt.Sprintf("invalid rune %q", c))
goto redo
}
@ -386,7 +385,7 @@ func (s *scanner) number(c rune) {
hasDigit = true
}
if !hasDigit {
panic("malformed hex constant")
s.error("malformed hex constant")
}
s.ungetr()
s.lit = string(s.stopLit())
@ -404,7 +403,7 @@ func (s *scanner) number(c rune) {
if c != '.' && c != 'e' && c != 'E' && c != 'i' {
// octal
if has8or9 {
panic("malformed octal constant")
s.error("malformed octal constant")
}
s.ungetr()
s.lit = string(s.stopLit())
@ -434,7 +433,7 @@ func (s *scanner) number(c rune) {
c = s.getr()
}
if !isDigit(c) {
panic("malformed floating-point constant exponent")
s.error("malformed floating-point constant exponent")
}
for isDigit(c) {
c = s.getr()
@ -454,13 +453,14 @@ func (s *scanner) stdString() {
for {
r := s.getr()
if r == '\\' && !s.escape('"') {
panic(0)
continue // error already reported
}
if r == '"' {
break
}
if r < 0 {
panic("string not terminated")
s.error("string not terminated")
break
}
}
s.lit = string(s.stopLit())
@ -474,7 +474,8 @@ func (s *scanner) rawString() {
break
}
if r < 0 {
panic("string not terminated")
s.error("string not terminated")
break
}
// TODO(gri) deal with CRs (or don't?)
}
@ -547,7 +548,8 @@ func (s *scanner) fullComment() {
}
}
if r < 0 {
panic("comment not terminated")
s.error("comment not terminated")
return
}
}
}
@ -578,7 +580,7 @@ func (s *scanner) escape(quote rune) bool {
} else {
msg = "escape sequence not terminated"
}
panic(msg)
s.error(msg)
return false
}
@ -601,7 +603,7 @@ loop:
} else {
msg = "escape sequence not terminated"
}
panic(msg)
s.error(msg)
break loop
}
// d < base
@ -611,7 +613,7 @@ loop:
s.ungetr()
if x > max || 0xD800 <= x && x < 0xE000 /* surrogate range */ {
panic("escape sequence is invalid Unicode code point")
s.error("escape sequence is invalid Unicode code point")
return false
}

View File

@ -22,7 +22,7 @@ func TestScanner(t *testing.T) {
defer src.Close()
var s scanner
s.init(src)
s.init(src, nil)
for {
s.next()
if s.tok == _EOF {
@ -51,7 +51,7 @@ func TestTokens(t *testing.T) {
// scan source
var got scanner
got.init(&bytesReader{buf})
got.init(&bytesReader{buf}, nil)
got.next()
for i, want := range sampleTokens {
nlsemi := false
@ -252,3 +252,45 @@ var sampleTokens = [...]struct {
{_Type, "type", 0, 0},
{_Var, "var", 0, 0},
}
func TestScanErrors(t *testing.T) {
for _, test := range []struct {
src, msg string
}{
// rune-level errors
{"fo\x00o", "invalid NUL character"},
{"fo\ufeffo", "invalid BOM in the middle of the file"},
{"\xff", "invalid UTF-8 encoding"},
// token-level errors
{"~", "bitwise complement operator is ^"},
{"$", "invalid rune '$'"},
{"0xyz", "malformed hex constant"},
{"08", "malformed octal constant"},
{"1.0e+x", "malformed floating-point constant exponent"},
{`"foo`, "string not terminated"},
{"`foo", "string not terminated"},
{"/* foo", "comment not terminated"},
{`"foo\z"`, "unknown escape sequence"},
// {`"\x`, "escape sequence not terminated"},
{`"\x"`, "illegal character U+0022 '\"' in escape sequence"},
{`"\Uffffffff"`, "escape sequence is invalid Unicode code point"},
} {
var s scanner
hasError := false
s.init(&bytesReader{[]byte(test.src)}, func(_, line int, msg string) {
hasError = true
// TODO(gri) test exact position as well
if line != 1 {
t.Errorf("got line = %d; want 1", line)
}
if msg != test.msg {
t.Errorf("got msg = %q; want %q", msg, test.msg)
}
})
s.next()
if !hasError {
t.Errorf("%q: got no error; want %q", test.src, test.msg)
}
}
}

View File

@ -5,6 +5,7 @@
package syntax
import (
"fmt"
"io"
"unicode/utf8"
)
@ -15,7 +16,8 @@ import (
// suf r0 r w
type source struct {
src io.Reader
src io.Reader
errh ErrorHandler
// source buffer
buf [4 << 10]byte
@ -29,8 +31,10 @@ type source struct {
suf int // literal suffix; suf >= 0 means we are scanning a literal
}
func (s *source) init(src io.Reader) {
func (s *source) init(src io.Reader, errh ErrorHandler) {
s.src = src
s.errh = errh
s.buf[0] = utf8.RuneSelf // terminate with sentinel
s.offs = 0
s.r0, s.r, s.w = 0, 0, 0
@ -41,6 +45,18 @@ func (s *source) init(src io.Reader) {
s.suf = -1
}
func (s *source) error(msg string) {
s.error_at(s.pos(), s.line, msg)
}
func (s *source) error_at(pos, line int, msg string) {
if s.errh != nil {
s.errh(pos, line, msg)
return
}
panic(fmt.Sprintf("%d: %s", line, msg))
}
func (s *source) pos() int {
return s.offs + s.r
}
@ -57,7 +73,7 @@ func (s *source) getr() rune {
if b := s.buf[s.r]; b < utf8.RuneSelf {
s.r++
if b == 0 {
panic("invalid NUL character")
s.error("invalid NUL character")
continue
}
if b == '\n' {
@ -73,7 +89,7 @@ func (s *source) getr() rune {
// BOM's are only allowed as the first character in a file
const BOM = 0xfeff
if r == BOM && s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1)
panic("invalid BOM in the middle of the file")
s.error("invalid BOM in the middle of the file")
continue
}
return r
@ -81,14 +97,14 @@ func (s *source) getr() rune {
if w == 0 && s.err != nil {
if s.err != io.EOF {
panic(s.err)
s.error(s.err.Error())
}
return -1
}
if w == 1 && (s.r+utf8.UTFMax <= s.w || utf8.FullRune(s.buf[s.r:s.w])) {
s.r++
panic("invalid UTF-8 encoding")
s.error("invalid UTF-8 encoding")
continue
}
@ -119,7 +135,7 @@ func (s *source) fill() {
for i := 100; i > 0; i-- {
n, err := s.src.Read(s.buf[s.w : len(s.buf)-1]) // -1 to leave space for sentinel
if n < 0 {
panic("negative read")
s.error("negative read")
}
s.w += n
if n > 0 || err != nil {
@ -131,7 +147,7 @@ func (s *source) fill() {
}
}
panic("no progress")
s.error("no progress")
}
func (s *source) startLit() {

View File

@ -12,15 +12,17 @@ import (
type Mode uint
type ErrorHandler func(pos, line int, msg string)
// TODO(gri) These need a lot more work.
func ReadFile(filename string, mode Mode) (*File, error) {
func ReadFile(filename string, errh ErrorHandler, mode Mode) (*File, error) {
src, err := os.Open(filename)
if err != nil {
return nil, err
}
defer src.Close()
return Read(src, mode)
return Read(src, errh, mode)
}
type bytesReader struct {
@ -36,13 +38,13 @@ func (r *bytesReader) Read(p []byte) (int, error) {
return 0, io.EOF
}
func ReadBytes(src []byte, mode Mode) (*File, error) {
return Read(&bytesReader{src}, mode)
func ReadBytes(src []byte, errh ErrorHandler, mode Mode) (*File, error) {
return Read(&bytesReader{src}, errh, mode)
}
func Read(src io.Reader, mode Mode) (*File, error) {
func Read(src io.Reader, errh ErrorHandler, mode Mode) (*File, error) {
var p parser
p.init(src)
p.init(src, errh)
// skip initial BOM if present
if p.getr() != '\ufeff' {
@ -52,8 +54,8 @@ func Read(src io.Reader, mode Mode) (*File, error) {
p.next()
ast := p.file()
if nerrors > 0 {
return nil, fmt.Errorf("%d syntax errors", nerrors)
if p.nerrors > 0 {
return nil, fmt.Errorf("%d syntax errors", p.nerrors)
}
return ast, nil