go/src/cmd/compile/internal/syntax/scanner_test.go

383 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package syntax
import (
"fmt"
"os"
"strings"
"testing"
)
func TestScanner(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode")
}
src, err := os.Open("parser.go")
if err != nil {
t.Fatal(err)
}
defer src.Close()
var s scanner
s.init(src, nil, nil)
for {
s.next()
if s.tok == _EOF {
break
}
switch s.tok {
case _Name:
fmt.Println(s.line, s.tok, "=>", s.lit)
case _Operator:
fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
default:
fmt.Println(s.line, s.tok)
}
}
}
func TestTokens(t *testing.T) {
// make source
var buf []byte
for i, s := range sampleTokens {
buf = append(buf, "\t\t\t\t"[:i&3]...) // leading indentation
buf = append(buf, s.src...) // token
buf = append(buf, " "[:i&7]...) // trailing spaces
buf = append(buf, "/* foo */ // bar\n"...) // comments
}
// scan source
var got scanner
got.init(&bytesReader{buf}, nil, nil)
got.next()
for i, want := range sampleTokens {
nlsemi := false
if got.line != uint(i+linebase) {
t.Errorf("got line %d; want %d", got.line, i+linebase)
}
if got.tok != want.tok {
t.Errorf("got tok = %s; want %s", got.tok, want.tok)
continue
}
switch want.tok {
case _Semi:
if got.lit != "semicolon" {
t.Errorf("got %s; want semicolon", got.lit)
}
case _Name, _Literal:
if got.lit != want.src {
t.Errorf("got lit = %q; want %q", got.lit, want.src)
continue
}
nlsemi = true
case _Operator, _AssignOp, _IncOp:
if got.op != want.op {
t.Errorf("got op = %s; want %s", got.op, want.op)
continue
}
if got.prec != want.prec {
t.Errorf("got prec = %d; want %d", got.prec, want.prec)
continue
}
nlsemi = want.tok == _IncOp
case _Rparen, _Rbrack, _Rbrace, _Break, _Continue, _Fallthrough, _Return:
nlsemi = true
}
if nlsemi {
got.next()
if got.tok != _Semi {
t.Errorf("got tok = %s; want ;", got.tok)
continue
}
if got.lit != "newline" {
t.Errorf("got %s; want newline", got.lit)
}
}
got.next()
}
if got.tok != _EOF {
t.Errorf("got %q; want _EOF", got.tok)
}
}
var sampleTokens = [...]struct {
tok token
src string
op Operator
prec int
}{
// name samples
{_Name, "x", 0, 0},
{_Name, "X123", 0, 0},
{_Name, "foo", 0, 0},
{_Name, "Foo123", 0, 0},
{_Name, "foo_bar", 0, 0},
{_Name, "_", 0, 0},
{_Name, "_foobar", 0, 0},
{_Name, "a۰۱۸", 0, 0},
{_Name, "foo६४", 0, 0},
{_Name, "bar", 0, 0},
{_Name, "ŝ", 0, 0},
{_Name, "ŝfoo", 0, 0},
// literal samples
{_Literal, "0", 0, 0},
{_Literal, "1", 0, 0},
{_Literal, "12345", 0, 0},
{_Literal, "123456789012345678890123456789012345678890", 0, 0},
{_Literal, "01234567", 0, 0},
{_Literal, "0x0", 0, 0},
{_Literal, "0xcafebabe", 0, 0},
{_Literal, "0.", 0, 0},
{_Literal, "0.e0", 0, 0},
{_Literal, "0.e-1", 0, 0},
{_Literal, "0.e+123", 0, 0},
{_Literal, ".0", 0, 0},
{_Literal, ".0E00", 0, 0},
{_Literal, ".0E-0123", 0, 0},
{_Literal, ".0E+12345678901234567890", 0, 0},
{_Literal, ".45e1", 0, 0},
{_Literal, "3.14159265", 0, 0},
{_Literal, "1e0", 0, 0},
{_Literal, "1e+100", 0, 0},
{_Literal, "1e-100", 0, 0},
{_Literal, "2.71828e-1000", 0, 0},
{_Literal, "0i", 0, 0},
{_Literal, "1i", 0, 0},
{_Literal, "012345678901234567889i", 0, 0},
{_Literal, "123456789012345678890i", 0, 0},
{_Literal, "0.i", 0, 0},
{_Literal, ".0i", 0, 0},
{_Literal, "3.14159265i", 0, 0},
{_Literal, "1e0i", 0, 0},
{_Literal, "1e+100i", 0, 0},
{_Literal, "1e-100i", 0, 0},
{_Literal, "2.71828e-1000i", 0, 0},
{_Literal, "'a'", 0, 0},
{_Literal, "'\\000'", 0, 0},
{_Literal, "'\\xFF'", 0, 0},
{_Literal, "'\\uff16'", 0, 0},
{_Literal, "'\\U0000ff16'", 0, 0},
{_Literal, "`foobar`", 0, 0},
{_Literal, "`foo\tbar`", 0, 0},
{_Literal, "`\r`", 0, 0},
// operators
{_Operator, "||", OrOr, precOrOr},
{_Operator, "&&", AndAnd, precAndAnd},
{_Operator, "==", Eql, precCmp},
{_Operator, "!=", Neq, precCmp},
{_Operator, "<", Lss, precCmp},
{_Operator, "<=", Leq, precCmp},
{_Operator, ">", Gtr, precCmp},
{_Operator, ">=", Geq, precCmp},
{_Operator, "+", Add, precAdd},
{_Operator, "-", Sub, precAdd},
{_Operator, "|", Or, precAdd},
{_Operator, "^", Xor, precAdd},
{_Star, "*", Mul, precMul},
{_Operator, "/", Div, precMul},
{_Operator, "%", Rem, precMul},
{_Operator, "&", And, precMul},
{_Operator, "&^", AndNot, precMul},
{_Operator, "<<", Shl, precMul},
{_Operator, ">>", Shr, precMul},
// assignment operations
{_AssignOp, "+=", Add, precAdd},
{_AssignOp, "-=", Sub, precAdd},
{_AssignOp, "|=", Or, precAdd},
{_AssignOp, "^=", Xor, precAdd},
{_AssignOp, "*=", Mul, precMul},
{_AssignOp, "/=", Div, precMul},
{_AssignOp, "%=", Rem, precMul},
{_AssignOp, "&=", And, precMul},
{_AssignOp, "&^=", AndNot, precMul},
{_AssignOp, "<<=", Shl, precMul},
{_AssignOp, ">>=", Shr, precMul},
// other operations
{_IncOp, "++", Add, precAdd},
{_IncOp, "--", Sub, precAdd},
{_Assign, "=", 0, 0},
{_Define, ":=", 0, 0},
{_Arrow, "<-", 0, 0},
// delimiters
{_Lparen, "(", 0, 0},
{_Lbrack, "[", 0, 0},
{_Lbrace, "{", 0, 0},
{_Rparen, ")", 0, 0},
{_Rbrack, "]", 0, 0},
{_Rbrace, "}", 0, 0},
{_Comma, ",", 0, 0},
{_Semi, ";", 0, 0},
{_Colon, ":", 0, 0},
{_Dot, ".", 0, 0},
{_DotDotDot, "...", 0, 0},
// keywords
{_Break, "break", 0, 0},
{_Case, "case", 0, 0},
{_Chan, "chan", 0, 0},
{_Const, "const", 0, 0},
{_Continue, "continue", 0, 0},
{_Default, "default", 0, 0},
{_Defer, "defer", 0, 0},
{_Else, "else", 0, 0},
{_Fallthrough, "fallthrough", 0, 0},
{_For, "for", 0, 0},
{_Func, "func", 0, 0},
{_Go, "go", 0, 0},
{_Goto, "goto", 0, 0},
{_If, "if", 0, 0},
{_Import, "import", 0, 0},
{_Interface, "interface", 0, 0},
{_Map, "map", 0, 0},
{_Package, "package", 0, 0},
{_Range, "range", 0, 0},
{_Return, "return", 0, 0},
{_Select, "select", 0, 0},
{_Struct, "struct", 0, 0},
{_Switch, "switch", 0, 0},
{_Type, "type", 0, 0},
{_Var, "var", 0, 0},
}
func TestScanErrors(t *testing.T) {
for _, test := range []struct {
src, msg string
line, col uint // 0-based
}{
// Note: Positions for lexical errors are the earliest position
// where the error is apparent, not the beginning of the respective
// token.
// rune-level errors
{"fo\x00o", "invalid NUL character", 0, 2},
{"foo\n\ufeff bar", "invalid BOM in the middle of the file", 1, 0},
{"foo\n\n\xff ", "invalid UTF-8 encoding", 2, 0},
// token-level errors
{"\u00BD" /* ½ */, "invalid identifier character U+00BD '½'", 0, 0},
{"\U0001d736\U0001d737\U0001d738_½" /* 𝜶𝜷𝜸_½ */, "invalid identifier character U+00BD '½'", 0, 13 /* byte offset */},
{"\U0001d7d8" /* 𝟘 */, "identifier cannot begin with digit U+1D7D8 '𝟘'", 0, 0},
{"foo\U0001d7d8_½" /* foo𝟘_½ */, "invalid identifier character U+00BD '½'", 0, 8 /* byte offset */},
{"x + ~y", "bitwise complement operator is ^", 0, 4},
{"foo$bar = 0", "invalid character U+0024 '$'", 0, 3},
{"const x = 0xyz", "malformed hex constant", 0, 12},
{"0123456789", "malformed octal constant", 0, 10},
{"0123456789. /* foobar", "comment not terminated", 0, 12}, // valid float constant
{"0123456789e0 /*\nfoobar", "comment not terminated", 0, 13}, // valid float constant
{"var a, b = 08, 07\n", "malformed octal constant", 0, 13},
{"(x + 1.0e+x)", "malformed floating-point constant exponent", 0, 10},
{`''`, "empty character literal or unescaped ' in character literal", 0, 1},
{"'\n", "newline in character literal", 0, 1},
{`'\`, "invalid character literal (missing closing ')", 0, 0},
{`'\'`, "invalid character literal (missing closing ')", 0, 0},
{`'\x`, "invalid character literal (missing closing ')", 0, 0},
{`'\x'`, "non-hex character in escape sequence: '", 0, 3},
{`'\y'`, "unknown escape sequence", 0, 2},
{`'\x0'`, "non-hex character in escape sequence: '", 0, 4},
{`'\00'`, "non-octal character in escape sequence: '", 0, 4},
{`'\377' /*`, "comment not terminated", 0, 7}, // valid octal escape
{`'\378`, "non-octal character in escape sequence: 8", 0, 4},
{`'\400'`, "octal escape value > 255: 256", 0, 5},
{`'xx`, "invalid character literal (missing closing ')", 0, 0},
{`'xx'`, "invalid character literal (more than one character)", 0, 0},
{"\"\n", "newline in string", 0, 1},
{`"`, "string not terminated", 0, 0},
{`"foo`, "string not terminated", 0, 0},
{"`", "string not terminated", 0, 0},
{"`foo", "string not terminated", 0, 0},
{"/*/", "comment not terminated", 0, 0},
{"/*\n\nfoo", "comment not terminated", 0, 0},
{"/*\n\nfoo", "comment not terminated", 0, 0},
{`"\`, "string not terminated", 0, 0},
{`"\"`, "string not terminated", 0, 0},
{`"\x`, "string not terminated", 0, 0},
{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
{`"\y"`, "unknown escape sequence", 0, 2},
{`"\x0"`, "non-hex character in escape sequence: \"", 0, 4},
{`"\00"`, "non-octal character in escape sequence: \"", 0, 4},
{`"\377" /*`, "comment not terminated", 0, 7}, // valid octal escape
{`"\378"`, "non-octal character in escape sequence: 8", 0, 4},
{`"\400"`, "octal escape value > 255: 256", 0, 5},
{`s := "foo\z"`, "unknown escape sequence", 0, 10},
{`s := "foo\z00\nbar"`, "unknown escape sequence", 0, 10},
{`"\x`, "string not terminated", 0, 0},
{`"\x"`, "non-hex character in escape sequence: \"", 0, 3},
{`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18},
{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 0, 18},
// former problem cases
{"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},
} {
var s scanner
nerrors := 0
s.init(&bytesReader{[]byte(test.src)}, func(line, col uint, msg string) {
nerrors++
// only check the first error
if nerrors == 1 {
if msg != test.msg {
t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
}
if line != test.line+linebase {
t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase)
}
if col != test.col+colbase {
t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase)
}
} else if nerrors > 1 {
// TODO(gri) make this use position info
t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
}
}, nil)
for {
s.next()
if s.tok == _EOF {
break
}
}
if nerrors == 0 {
t.Errorf("%q: got no error; want %q", test.src, test.msg)
}
}
}
func TestIssue21938(t *testing.T) {
s := "/*" + strings.Repeat(" ", 4089) + "*/ .5"
var got scanner
got.init(strings.NewReader(s), nil, nil)
got.next()
if got.tok != _Literal || got.lit != ".5" {
t.Errorf("got %s %q; want %s %q", got.tok, got.lit, _Literal, ".5")
}
}