mirror of https://github.com/golang/go.git
383 lines
11 KiB
Go
383 lines
11 KiB
Go
// Copyright 2016 The Go Authors. All rights reserved.
|
||
// Use of this source code is governed by a BSD-style
|
||
// license that can be found in the LICENSE file.
|
||
|
||
package syntax
|
||
|
||
import (
|
||
"fmt"
|
||
"os"
|
||
"strings"
|
||
"testing"
|
||
)
|
||
|
||
func TestScanner(t *testing.T) {
|
||
if testing.Short() {
|
||
t.Skip("skipping test in short mode")
|
||
}
|
||
|
||
src, err := os.Open("parser.go")
|
||
if err != nil {
|
||
t.Fatal(err)
|
||
}
|
||
defer src.Close()
|
||
|
||
var s scanner
|
||
s.init(src, nil, nil)
|
||
for {
|
||
s.next()
|
||
if s.tok == _EOF {
|
||
break
|
||
}
|
||
switch s.tok {
|
||
case _Name:
|
||
fmt.Println(s.line, s.tok, "=>", s.lit)
|
||
case _Operator:
|
||
fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
|
||
default:
|
||
fmt.Println(s.line, s.tok)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestTokens(t *testing.T) {
|
||
// make source
|
||
var buf []byte
|
||
for i, s := range sampleTokens {
|
||
buf = append(buf, "\t\t\t\t"[:i&3]...) // leading indentation
|
||
buf = append(buf, s.src...) // token
|
||
buf = append(buf, " "[:i&7]...) // trailing spaces
|
||
buf = append(buf, "/* foo */ // bar\n"...) // comments
|
||
}
|
||
|
||
// scan source
|
||
var got scanner
|
||
got.init(&bytesReader{buf}, nil, nil)
|
||
got.next()
|
||
for i, want := range sampleTokens {
|
||
nlsemi := false
|
||
|
||
if got.line != uint(i+linebase) {
|
||
t.Errorf("got line %d; want %d", got.line, i+linebase)
|
||
}
|
||
|
||
if got.tok != want.tok {
|
||
t.Errorf("got tok = %s; want %s", got.tok, want.tok)
|
||
continue
|
||
}
|
||
|
||
switch want.tok {
|
||
case _Semi:
|
||
if got.lit != "semicolon" {
|
||
t.Errorf("got %s; want semicolon", got.lit)
|
||
}
|
||
|
||
case _Name, _Literal:
|
||
if got.lit != want.src {
|
||
t.Errorf("got lit = %q; want %q", got.lit, want.src)
|
||
continue
|
||
}
|
||
nlsemi = true
|
||
|
||
case _Operator, _AssignOp, _IncOp:
|
||
if got.op != want.op {
|
||
t.Errorf("got op = %s; want %s", got.op, want.op)
|
||
continue
|
||
}
|
||
if got.prec != want.prec {
|
||
t.Errorf("got prec = %d; want %d", got.prec, want.prec)
|
||
continue
|
||
}
|
||
nlsemi = want.tok == _IncOp
|
||
|
||
case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
|
||
nlsemi = true
|
||
}
|
||
|
||
if nlsemi {
|
||
got.next()
|
||
if got.tok != _Semi {
|
||
t.Errorf("got tok = %s; want ;", got.tok)
|
||
continue
|
||
}
|
||
if got.lit != "newline" {
|
||
t.Errorf("got %s; want newline", got.lit)
|
||
}
|
||
}
|
||
|
||
got.next()
|
||
}
|
||
|
||
if got.tok != _EOF {
|
||
t.Errorf("got %q; want _EOF", got.tok)
|
||
}
|
||
}
|
||
|
||
var sampleTokens = [...]struct {
|
||
tok token
|
||
src string
|
||
op Operator
|
||
prec int
|
||
}{
|
||
// name samples
|
||
{_Name, "x", 0, 0},
|
||
{_Name, "X123", 0, 0},
|
||
{_Name, "foo", 0, 0},
|
||
{_Name, "Foo123", 0, 0},
|
||
{_Name, "foo_bar", 0, 0},
|
||
{_Name, "_", 0, 0},
|
||
{_Name, "_foobar", 0, 0},
|
||
{_Name, "a۰۱۸", 0, 0},
|
||
{_Name, "foo६४", 0, 0},
|
||
{_Name, "bar9876", 0, 0},
|
||
{_Name, "ŝ", 0, 0},
|
||
{_Name, "ŝfoo", 0, 0},
|
||
|
||
// literal samples
|
||
{_Literal, "0", 0, 0},
|
||
{_Literal, "1", 0, 0},
|
||
{_Literal, "12345", 0, 0},
|
||
{_Literal, "123456789012345678890123456789012345678890", 0, 0},
|
||
{_Literal, "01234567", 0, 0},
|
||
{_Literal, "0x0", 0, 0},
|
||
{_Literal, "0xcafebabe", 0, 0},
|
||
{_Literal, "0.", 0, 0},
|
||
{_Literal, "0.e0", 0, 0},
|
||
{_Literal, "0.e-1", 0, 0},
|
||
{_Literal, "0.e+123", 0, 0},
|
||
{_Literal, ".0", 0, 0},
|
||
{_Literal, ".0E00", 0, 0},
|
||
{_Literal, ".0E-0123", 0, 0},
|
||
{_Literal, ".0E+12345678901234567890", 0, 0},
|
||
{_Literal, ".45e1", 0, 0},
|
||
{_Literal, "3.14159265", 0, 0},
|
||
{_Literal, "1e0", 0, 0},
|
||
{_Literal, "1e+100", 0, 0},
|
||
{_Literal, "1e-100", 0, 0},
|
||
{_Literal, "2.71828e-1000", 0, 0},
|
||
{_Literal, "0i", 0, 0},
|
||
{_Literal, "1i", 0, 0},
|
||
{_Literal, "012345678901234567889i", 0, 0},
|
||
{_Literal, "123456789012345678890i", 0, 0},
|
||
{_Literal, "0.i", 0, 0},
|
||
{_Literal, ".0i", 0, 0},
|
||
{_Literal, "3.14159265i", 0, 0},
|
||
{_Literal, "1e0i", 0, 0},
|
||
{_Literal, "1e+100i", 0, 0},
|
||
{_Literal, "1e-100i", 0, 0},
|
||
{_Literal, "2.71828e-1000i", 0, 0},
|
||
{_Literal, "'a'", 0, 0},
|
||
{_Literal, "'\\000'", 0, 0},
|
||
{_Literal, "'\\xFF'", 0, 0},
|
||
{_Literal, "'\\uff16'", 0, 0},
|
||
{_Literal, "'\\U0000ff16'", 0, 0},
|
||
{_Literal, "`foobar`", 0, 0},
|
||
{_Literal, "`foo\tbar`", 0, 0},
|
||
{_Literal, "`\r`", 0, 0},
|
||
|
||
// operators
|
||
{_Operator, "||", OrOr, precOrOr},
|
||
|
||
{_Operator, "&&", AndAnd, precAndAnd},
|
||
|
||
{_Operator, "==", Eql, precCmp},
|
||
{_Operator, "!=", Neq, precCmp},
|
||
{_Operator, "<", Lss, precCmp},
|
||
{_Operator, "<=", Leq, precCmp},
|
||
{_Operator, ">", Gtr, precCmp},
|
||
{_Operator, ">=", Geq, precCmp},
|
||
|
||
{_Operator, "+", Add, precAdd},
|
||
{_Operator, "-", Sub, precAdd},
|
||
{_Operator, "|", Or, precAdd},
|
||
{_Operator, "^", Xor, precAdd},
|
||
|
||
{_Star, "*", Mul, precMul},
|
||
{_Operator, "/", Div, precMul},
|
||
{_Operator, "%", Rem, precMul},
|
||
{_Operator, "&", And, precMul},
|
||
{_Operator, "&^", AndNot, precMul},
|
||
{_Operator, "<<", Shl, precMul},
|
||
{_Operator, ">>", Shr, precMul},
|
||
|
||
// assignment operations
|
||
{_AssignOp, "+=", Add, precAdd},
|
||
{_AssignOp, "-=", Sub, precAdd},
|
||
{_AssignOp, "|=", Or, precAdd},
|
||
{_AssignOp, "^=", Xor, precAdd},
|
||
|
||
{_AssignOp, "*=", Mul, precMul},
|
||
{_AssignOp, "/=", Div, precMul},
|
||
{_AssignOp, "%=", Rem, precMul},
|
||
{_AssignOp, "&=", And, precMul},
|
||
{_AssignOp, "&^=", AndNot, precMul},
|
||
{_AssignOp, "<<=", Shl, precMul},
|
||
{_AssignOp, ">>=", Shr, precMul},
|
||
|
||
// other operations
|
||
{_IncOp, "++", Add, precAdd},
|
||
{_IncOp, "--", Sub, precAdd},
|
||
{_Assign, "=", 0, 0},
|
||
{_Define, ":=", 0, 0},
|
||
{_Arrow, "<-", 0, 0},
|
||
|
||
// delimiters
|
||
{_Lparen, "(", 0, 0},
|
||
{_Lbrack, "[", 0, 0},
|
||
{_Lbrace, "{", 0, 0},
|
||
{_Rparen, ")", 0, 0},
|
||
{_Rbrack, "]", 0, 0},
|
||
{_Rbrace, "}", 0, 0},
|
||
{_Comma, ",", 0, 0},
|
||
{_Semi, ";", 0, 0},
|
||
{_Colon, ":", 0, 0},
|
||
{_Dot, ".", 0, 0},
|
||
{_DotDotDot, "...", 0, 0},
|
||
|
||
// keywords
|
||
{_Break, "break", 0, 0},
|
||
{_Case, "case", 0, 0},
|
||
{_Chan, "chan", 0, 0},
|
||
{_Const, "const", 0, 0},
|
||
{_Continue, "continue", 0, 0},
|
||
{_Default, "default", 0, 0},
|
||
{_Defer, "defer", 0, 0},
|
||
{_Else, "else", 0, 0},
|
||
{_Fallthrough, "fallthrough", 0, 0},
|
||
{_For, "for", 0, 0},
|
||
{_Func, "func", 0, 0},
|
||
{_Go, "go", 0, 0},
|
||
{_Goto, "goto", 0, 0},
|
||
{_If, "if", 0, 0},
|
||
{_Import, "import", 0, 0},
|
||
{_Interface, "interface", 0, 0},
|
||
{_Map, "map", 0, 0},
|
||
{_Package, "package", 0, 0},
|
||
{_Range, "range", 0, 0},
|
||
{_Return, "return", 0, 0},
|
||
{_Select, "select", 0, 0},
|
||
{_Struct, "struct", 0, 0},
|
||
{_Switch, "switch", 0, 0},
|
||
{_Type, "type", 0, 0},
|
||
{_Var, "var", 0, 0},
|
||
}
|
||
|
||
func TestScanErrors(t *testing.T) {
|
||
for _, test := range []struct {
|
||
src, msg string
|
||
line, col uint // 0-based
|
||
}{
|
||
// Note: Positions for lexical errors are the earliest position
|
||
// where the error is apparent, not the beginning of the respective
|
||
// token.
|
||
|
||
// rune-level errors
|
||
{"fo\x00o", "invalid NUL character", 0, 2},
|
||
{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0},
|
||
{"foo\n\n\xff ", "invalid UTF-8 encoding", 2, 0},
|
||
|
||
// token-level errors
|
||
{"\u00BD" /* ½ */, "invalid identifier character U+00BD '½'", 0, 0},
|
||
{"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid identifier character U+00BD '½'", 0, 13 /* byte offset */},
|
||
{"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 0, 0},
|
||
{"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid identifier character U+00BD '½'", 0, 8 /* byte offset */},
|
||
|
||
{"x + ~y", "bitwise complement operator is ^", 0, 4},
|
||
{"foo$bar = 0", "invalid character U+0024 '$'", 0, 3},
|
||
{"const x = 0xyz", "malformed hex constant", 0, 12},
|
||
{"0123456789", "malformed octal constant", 0, 10},
|
||
{"0123456789. /* foobar", "comment not terminated", 0, 12}, // valid float constant
|
||
{"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant
|
||
{"var a, b = 08, 07\n", "malformed octal constant", 0, 13},
|
||
{"(x + 1.0e+x)", "malformed floating-point constant exponent", 0, 10},
|
||
|
||
{`''`, "empty character literal or unescaped ' in character literal", 0, 1},
|
||
{"'\n", "newline in character literal", 0, 1},
|
||
{`'\`, "invalid character literal (missing closing ')", 0, 0},
|
||
{`'\'`, "invalid character literal (missing closing ')", 0, 0},
|
||
{`'\x`, "invalid character literal (missing closing ')", 0, 0},
|
||
{`'\x'`, "non-hex character in escape sequence: '", 0, 3},
|
||
{`'\y'`, "unknown escape sequence", 0, 2},
|
||
{`'\x0'`, "non-hex character in escape sequence: '", 0, 4},
|
||
{`'\00'`, "non-octal character in escape sequence: '", 0, 4},
|
||
{`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape
|
||
{`'\378`, "non-octal character in escape sequence: 8", 0, 4},
|
||
{`'\400'`, "octal escape value > 255: 256", 0, 5},
|
||
{`'xx`, "invalid character literal (missing closing ')", 0, 0},
|
||
{`'xx'`, "invalid character literal (more than one character)", 0, 0},
|
||
|
||
{"\"\n", "newline in string", 0, 1},
|
||
{`"`, "string not terminated", 0, 0},
|
||
{`"foo`, "string not terminated", 0, 0},
|
||
{"`", "string not terminated", 0, 0},
|
||
{"`foo", "string not terminated", 0, 0},
|
||
{"/*/", "comment not terminated", 0, 0},
|
||
{"/*\n\nfoo", "comment not terminated", 0, 0},
|
||
{"/*\n\nfoo", "comment not terminated", 0, 0},
|
||
{`"\`, "string not terminated", 0, 0},
|
||
{`"\"`, "string not terminated", 0, 0},
|
||
{`"\x`, "string not terminated", 0, 0},
|
||
{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
|
||
{`"\y"`, "unknown escape sequence", 0, 2},
|
||
{`"\x0"`, "non-hex character in escape sequence: \"", 0, 4},
|
||
{`"\00"`, "non-octal character in escape sequence: \"", 0, 4},
|
||
{`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape
|
||
{`"\378"`, "non-octal character in escape sequence: 8", 0, 4},
|
||
{`"\400"`, "octal escape value > 255: 256", 0, 5},
|
||
|
||
{`s := "foo\z"`, "unknown escape sequence", 0, 10},
|
||
{`s := "foo\z00\nbar"`, "unknown escape sequence", 0, 10},
|
||
{`"\x`, "string not terminated", 0, 0},
|
||
{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
|
||
{`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18},
|
||
{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 0, 18},
|
||
|
||
// former problem cases
|
||
{"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},
|
||
} {
|
||
var s scanner
|
||
nerrors := 0
|
||
s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) {
|
||
nerrors++
|
||
// only check the first error
|
||
if nerrors == 1 {
|
||
if msg != test.msg {
|
||
t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
|
||
}
|
||
if line != test.line+linebase {
|
||
t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase)
|
||
}
|
||
if col != test.col+colbase {
|
||
t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase)
|
||
}
|
||
} else if nerrors > 1 {
|
||
// TODO(gri) make this use position info
|
||
t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
|
||
}
|
||
}, nil)
|
||
|
||
for {
|
||
s.next()
|
||
if s.tok == _EOF {
|
||
break
|
||
}
|
||
}
|
||
|
||
if nerrors == 0 {
|
||
t.Errorf("%q: got no error; want %q", test.src, test.msg)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestIssue21938(t *testing.T) {
|
||
s := "/*" + strings.Repeat(" ", 4089) + "*/ .5"
|
||
|
||
var got scanner
|
||
got.init(strings.NewReader(s), nil, nil)
|
||
got.next()
|
||
|
||
if got.tok != _Literal || got.lit != ".5" {
|
||
t.Errorf("got %s %q; want %s %q", got.tok, got.lit, _Literal, ".5")
|
||
}
|
||
}
|