diff --git a/src/go/parser/error_test.go b/src/go/parser/error_test.go index c2b9ca0b72..a4e17dd6db 100644 --- a/src/go/parser/error_test.go +++ b/src/go/parser/error_test.go @@ -62,8 +62,9 @@ func getPos(fset *token.FileSet, filename string, offset int) token.Pos { // a regular expression that matches the expected error message. // The special form /* ERROR HERE "rx" */ must be used for error // messages that appear immediately after a token, rather than at -// a token's position. -var errRx = regexp.MustCompile(`^/\* *ERROR *(HERE)? *"([^"]*)" *\*/$`) +// a token's position, and ERROR AFTER means after the comment +// (e.g. at end of line). +var errRx = regexp.MustCompile(`^/\* *ERROR *(HERE|AFTER)? *"([^"]*)" *\*/$`) // expectedErrors collects the regular expressions of ERROR comments found // in files and returns them as a map of error positions to error messages. @@ -86,9 +87,12 @@ func expectedErrors(fset *token.FileSet, filename string, src []byte) map[token. case token.COMMENT: s := errRx.FindStringSubmatch(lit) if len(s) == 3 { - pos := prev if s[1] == "HERE" { - pos = here + pos = here // start of comment + } else if s[1] == "AFTER" { + pos += token.Pos(len(lit)) // end of comment + } else { + pos = prev // token prior to comment } errors[pos] = s[2] } diff --git a/src/go/parser/parser.go b/src/go/parser/parser.go index 1edc2e9a4d..5ee53fc81e 100644 --- a/src/go/parser/parser.go +++ b/src/go/parser/parser.go @@ -213,7 +213,7 @@ func (p *parser) next() { // The comment is on same line as the previous token; it // cannot be a lead comment but may be a line comment. comment, endline = p.consumeCommentGroup(0) - if p.file.Line(p.pos) != endline || p.tok == token.EOF { + if p.file.Line(p.pos) != endline || p.tok == token.SEMICOLON || p.tok == token.EOF { // The next token is on a different line, thus // the last comment group is a line comment. p.lineComment = comment @@ -313,7 +313,8 @@ func (p *parser) expectClosing(tok token.Token, context string) token.Pos { return p.expect(tok) } -func (p *parser) expectSemi() { +// expectSemi consumes a semicolon and returns the applicable line comment. +func (p *parser) expectSemi() (comment *ast.CommentGroup) { // semicolon is optional before a closing ')' or '}' if p.tok != token.RPAREN && p.tok != token.RBRACE { switch p.tok { @@ -322,12 +323,22 @@ func (p *parser) expectSemi() { p.errorExpected(p.pos, "';'") fallthrough case token.SEMICOLON: - p.next() + if p.lit == ";" { + // explicit semicolon + p.next() + comment = p.lineComment // use following comments + } else { + // artificial semicolon + comment = p.lineComment // use preceding comments + p.next() + } + return comment default: p.errorExpected(p.pos, "';'") p.advance(stmtStart) } } + return nil } func (p *parser) atComma(context string, follow token.Token) bool { @@ -695,9 +706,9 @@ func (p *parser) parseFieldDecl() *ast.Field { p.next() } - p.expectSemi() // call before accessing p.linecomment + comment := p.expectSemi() - field := &ast.Field{Doc: doc, Names: names, Type: typ, Tag: tag, Comment: p.lineComment} + field := &ast.Field{Doc: doc, Names: names, Type: typ, Tag: tag, Comment: comment} return field } @@ -1176,19 +1187,16 @@ parseElements: if f.Names == nil { f.Type = p.embeddedElem(f.Type) } - p.expectSemi() - f.Comment = p.lineComment + f.Comment = p.expectSemi() list = append(list, f) case p.tok == token.TILDE: typ := p.embeddedElem(nil) - p.expectSemi() - comment := p.lineComment + comment := p.expectSemi() list = append(list, &ast.Field{Type: typ, Comment: comment}) default: if t := p.tryIdentOrType(); t != nil { typ := p.embeddedElem(t) - p.expectSemi() - comment := p.lineComment + comment := p.expectSemi() list = append(list, &ast.Field{Type: typ, Comment: comment}) } else { break parseElements @@ -2446,14 +2454,14 @@ func (p *parser) parseImportSpec(doc *ast.CommentGroup, _ token.Token, _ int) as p.error(pos, "missing import path") p.advance(exprEnd) } - p.expectSemi() // call before accessing p.linecomment + comment := p.expectSemi() // collect imports spec := &ast.ImportSpec{ Doc: doc, Name: ident, Path: &ast.BasicLit{ValuePos: pos, Kind: token.STRING, Value: path}, - Comment: p.lineComment, + Comment: comment, } p.imports = append(p.imports, spec) @@ -2489,14 +2497,14 @@ func (p *parser) parseValueSpec(doc *ast.CommentGroup, keyword token.Token, iota default: panic("unreachable") } - p.expectSemi() // call before accessing p.linecomment + comment := p.expectSemi() spec := &ast.ValueSpec{ Doc: doc, Names: idents, Type: typ, Values: values, - Comment: p.lineComment, + Comment: comment, } return spec } @@ -2589,8 +2597,7 @@ func (p *parser) parseTypeSpec(doc *ast.CommentGroup, _ token.Token, _ int) ast. spec.Type = p.parseType() } - p.expectSemi() // call before accessing p.linecomment - spec.Comment = p.lineComment + spec.Comment = p.expectSemi() return spec } diff --git a/src/go/parser/parser_test.go b/src/go/parser/parser_test.go index cae5602882..6d559e231c 100644 --- a/src/go/parser/parser_test.go +++ b/src/go/parser/parser_test.go @@ -415,6 +415,11 @@ type T struct { F2 int // F2 line comment // f3 lead comment f3 int // f3 line comment + + f4 int /* not a line comment */ ; + f5 int ; // f5 line comment + f6 int ; /* f6 line comment */ + f7 int ; /*f7a*/ /*f7b*/ //f7c } `, ParseComments) if err != nil { @@ -423,6 +428,11 @@ type T struct { checkFieldComments(t, f, "T.F1", "/* F1 lead comment *///", "/* F1 */// line comment") checkFieldComments(t, f, "T.F2", "// F2 lead// comment", "// F2 line comment") checkFieldComments(t, f, "T.f3", "// f3 lead comment", "// f3 line comment") + checkFieldComments(t, f, "T.f4", "", "") + checkFieldComments(t, f, "T.f5", "", "// f5 line comment") + checkFieldComments(t, f, "T.f6", "", "/* f6 line comment */") + checkFieldComments(t, f, "T.f7", "", "/*f7a*//*f7b*///f7c") + ast.FileExports(f) checkFieldComments(t, f, "T.F1", "/* F1 lead comment *///", "/* F1 */// line comment") checkFieldComments(t, f, "T.F2", "// F2 lead// comment", "// F2 line comment") diff --git a/src/go/parser/testdata/commas.src b/src/go/parser/testdata/commas.src index e0603cf9f7..2c52ae5ae4 100644 --- a/src/go/parser/testdata/commas.src +++ b/src/go/parser/testdata/commas.src @@ -8,12 +8,12 @@ package p var _ = []int{ - 0/* ERROR HERE "missing ','" */ + 0/* ERROR AFTER "missing ','" */ } var _ = []int{ 0, 1, 2, - 3/* ERROR HERE "missing ','" */ + 3/* ERROR AFTER "missing ','" */ } diff --git a/src/go/scanner/example_test.go b/src/go/scanner/example_test.go index 9004a4ad33..7493585372 100644 --- a/src/go/scanner/example_test.go +++ b/src/go/scanner/example_test.go @@ -41,6 +41,6 @@ func ExampleScanner_Scan() { // 1:16 ( "" // 1:17 IDENT "x" // 1:18 ) "" - // 1:20 ; "\n" // 1:20 COMMENT "// Euler" + // 1:28 ; "\n" } diff --git a/src/go/scanner/scanner.go b/src/go/scanner/scanner.go index 07e07581f7..1ff1b07dd8 100644 --- a/src/go/scanner/scanner.go +++ b/src/go/scanner/scanner.go @@ -35,11 +35,12 @@ type Scanner struct { mode Mode // scanning mode // scanning state - ch rune // current character - offset int // character offset - rdOffset int // reading offset (position after current character) - lineOffset int // current line offset - insertSemi bool // insert a semicolon before next newline + ch rune // current character + offset int // character offset + rdOffset int // reading offset (position after current character) + lineOffset int // current line offset + insertSemi bool // insert a semicolon before next newline + nlPos token.Pos // position of newline in preceding comment // public state - ok to modify ErrorCount int // number of errors encountered @@ -154,11 +155,15 @@ func (s *Scanner) errorf(offs int, format string, args ...any) { s.error(offs, fmt.Sprintf(format, args...)) } -func (s *Scanner) scanComment() string { +// scanComment returns the text of the comment and (if nonzero) +// the offset of the first newline within it, which implies a +// /*...*/ comment. +func (s *Scanner) scanComment() (string, int) { // initial '/' already consumed; s.ch == '/' || s.ch == '*' offs := s.offset - 1 // position of initial '/' next := -1 // position immediately following the comment; < 0 means invalid comment numCR := 0 + nlOffset := 0 // offset of first newline within /*...*/ comment if s.ch == '/' { //-style comment @@ -184,6 +189,8 @@ func (s *Scanner) scanComment() string { ch := s.ch if ch == '\r' { numCR++ + } else if ch == '\n' && nlOffset == 0 { + nlOffset = s.offset } s.next() if ch == '*' && s.ch == '/' { @@ -218,7 +225,7 @@ exit: lit = stripCR(lit, lit[1] == '*') } - return string(lit) + return string(lit), nlOffset } var prefix = []byte("line ") @@ -295,50 +302,6 @@ func trailingDigits(text []byte) (int, int, bool) { return i + 1, int(n), err == nil } -func (s *Scanner) findLineEnd() bool { - // initial '/' already consumed - - defer func(offs int) { - // reset scanner state to where it was upon calling findLineEnd - s.ch = '/' - s.offset = offs - s.rdOffset = offs + 1 - s.next() // consume initial '/' again - }(s.offset - 1) - - // read ahead until a newline, EOF, or non-comment token is found - for s.ch == '/' || s.ch == '*' { - if s.ch == '/' { - //-style comment always contains a newline - return true - } - /*-style comment: look for newline */ - s.next() - for s.ch >= 0 { - ch := s.ch - if ch == '\n' { - return true - } - s.next() - if ch == '*' && s.ch == '/' { - s.next() - break - } - } - s.skipWhitespace() // s.insertSemi is set - if s.ch < 0 || s.ch == '\n' { - return true - } - if s.ch != '/' { - // non-comment token - return false - } - s.next() // consume '/' - } - - return false -} - func isLetter(ch rune) bool { return 'a' <= lower(ch) && lower(ch) <= 'z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch) } @@ -822,6 +785,14 @@ func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Tok // and thus relative to the file set. func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) { scanAgain: + if s.nlPos.IsValid() { + // Return artificial ';' token after /*...*/ comment + // containing newline, at position of first newline. + pos, tok, lit = s.nlPos, token.SEMICOLON, "\n" + s.nlPos = token.NoPos + return + } + s.skipWhitespace() // current token start @@ -918,23 +889,23 @@ scanAgain: case '/': if s.ch == '/' || s.ch == '*' { // comment - if s.insertSemi && s.findLineEnd() { - // reset position to the beginning of the comment - s.ch = '/' - s.offset = s.file.Offset(pos) - s.rdOffset = s.offset + 1 - s.insertSemi = false // newline consumed - return pos, token.SEMICOLON, "\n" + comment, nlOffset := s.scanComment() + if s.insertSemi && nlOffset != 0 { + // For /*...*/ containing \n, return + // COMMENT then artificial SEMICOLON. + s.nlPos = s.file.Pos(nlOffset) + s.insertSemi = false + } else { + insertSemi = s.insertSemi // preserve insertSemi info } - comment := s.scanComment() if s.mode&ScanComments == 0 { // skip comment - s.insertSemi = false // newline consumed goto scanAgain } tok = token.COMMENT lit = comment } else { + // division tok = s.switch2(token.QUO, token.QUO_ASSIGN) } case '%': diff --git a/src/go/scanner/scanner_test.go b/src/go/scanner/scanner_test.go index de45e16606..d31b34a4a0 100644 --- a/src/go/scanner/scanner_test.go +++ b/src/go/scanner/scanner_test.go @@ -334,173 +334,171 @@ func TestStripCR(t *testing.T) { } } -func checkSemi(t *testing.T, line string, mode Mode) { - var S Scanner - file := fset.AddFile("TestSemis", fset.Base(), len(line)) - S.Init(file, []byte(line), nil, mode) - pos, tok, lit := S.Scan() - for tok != token.EOF { - if tok == token.ILLEGAL { - // the illegal token literal indicates what - // kind of semicolon literal to expect - semiLit := "\n" - if lit[0] == '#' { - semiLit = ";" - } - // next token must be a semicolon - semiPos := file.Position(pos) - semiPos.Offset++ - semiPos.Column++ - pos, tok, lit = S.Scan() - if tok == token.SEMICOLON { - if lit != semiLit { - t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit) - } - checkPos(t, line, pos, semiPos) - } else { - t.Errorf("bad token for %q: got %s, expected ;", line, tok) - } - } else if tok == token.SEMICOLON { - t.Errorf("bad token for %q: got ;, expected no ;", line) +func checkSemi(t *testing.T, input, want string, mode Mode) { + if mode&ScanComments == 0 { + want = strings.ReplaceAll(want, "COMMENT ", "") + want = strings.ReplaceAll(want, " COMMENT", "") // if at end + want = strings.ReplaceAll(want, "COMMENT", "") // if sole token + } + + file := fset.AddFile("TestSemis", fset.Base(), len(input)) + var scan Scanner + scan.Init(file, []byte(input), nil, mode) + var tokens []string + for { + pos, tok, lit := scan.Scan() + if tok == token.EOF { + break } - pos, tok, lit = S.Scan() + if tok == token.SEMICOLON && lit != ";" { + // Artifical semicolon: + // assert that position is EOF or that of a newline. + off := file.Offset(pos) + if off != len(input) && input[off] != '\n' { + t.Errorf("scanning <<%s>>, got SEMICOLON at offset %d, want newline or EOF", input, off) + } + } + lit = tok.String() // "\n" => ";" + tokens = append(tokens, lit) + } + if got := strings.Join(tokens, " "); got != want { + t.Errorf("scanning <<%s>>, got [%s], want [%s]", input, got, want) } } -var lines = []string{ - // # indicates a semicolon present in the source - // $ indicates an automatically inserted semicolon - "", - "\ufeff#;", // first BOM is ignored - "#;", - "foo$\n", - "123$\n", - "1.2$\n", - "'x'$\n", - `"x"` + "$\n", - "`x`$\n", +var semicolonTests = [...]struct{ input, want string }{ + {"", ""}, + {"\ufeff;", ";"}, // first BOM is ignored + {";", ";"}, + {"foo\n", "IDENT ;"}, + {"123\n", "INT ;"}, + {"1.2\n", "FLOAT ;"}, + {"'x'\n", "CHAR ;"}, + {`"x"` + "\n", "STRING ;"}, + {"`x`\n", "STRING ;"}, - "+\n", - "-\n", - "*\n", - "/\n", - "%\n", + {"+\n", "+"}, + {"-\n", "-"}, + {"*\n", "*"}, + {"/\n", "/"}, + {"%\n", "%"}, - "&\n", - "|\n", - "^\n", - "<<\n", - ">>\n", - "&^\n", + {"&\n", "&"}, + {"|\n", "|"}, + {"^\n", "^"}, + {"<<\n", "<<"}, + {">>\n", ">>"}, + {"&^\n", "&^"}, - "+=\n", - "-=\n", - "*=\n", - "/=\n", - "%=\n", + {"+=\n", "+="}, + {"-=\n", "-="}, + {"*=\n", "*="}, + {"/=\n", "/="}, + {"%=\n", "%="}, - "&=\n", - "|=\n", - "^=\n", - "<<=\n", - ">>=\n", - "&^=\n", + {"&=\n", "&="}, + {"|=\n", "|="}, + {"^=\n", "^="}, + {"<<=\n", "<<="}, + {">>=\n", ">>="}, + {"&^=\n", "&^="}, - "&&\n", - "||\n", - "<-\n", - "++$\n", - "--$\n", + {"&&\n", "&&"}, + {"||\n", "||"}, + {"<-\n", "<-"}, + {"++\n", "++ ;"}, + {"--\n", "-- ;"}, - "==\n", - "<\n", - ">\n", - "=\n", - "!\n", + {"==\n", "=="}, + {"<\n", "<"}, + {">\n", ">"}, + {"=\n", "="}, + {"!\n", "!"}, - "!=\n", - "<=\n", - ">=\n", - ":=\n", - "...\n", + {"!=\n", "!="}, + {"<=\n", "<="}, + {">=\n", ">="}, + {":=\n", ":="}, + {"...\n", "..."}, - "(\n", - "[\n", - "{\n", - ",\n", - ".\n", + {"(\n", "("}, + {"[\n", "["}, + {"{\n", "{"}, + {",\n", ","}, + {".\n", "."}, - ")$\n", - "]$\n", - "}$\n", - "#;\n", - ":\n", + {")\n", ") ;"}, + {"]\n", "] ;"}, + {"}\n", "} ;"}, + {";\n", ";"}, + {":\n", ":"}, - "break$\n", - "case\n", - "chan\n", - "const\n", - "continue$\n", + {"break\n", "break ;"}, + {"case\n", "case"}, + {"chan\n", "chan"}, + {"const\n", "const"}, + {"continue\n", "continue ;"}, - "default\n", - "defer\n", - "else\n", - "fallthrough$\n", - "for\n", + {"default\n", "default"}, + {"defer\n", "defer"}, + {"else\n", "else"}, + {"fallthrough\n", "fallthrough ;"}, + {"for\n", "for"}, - "func\n", - "go\n", - "goto\n", - "if\n", - "import\n", + {"func\n", "func"}, + {"go\n", "go"}, + {"goto\n", "goto"}, + {"if\n", "if"}, + {"import\n", "import"}, - "interface\n", - "map\n", - "package\n", - "range\n", - "return$\n", + {"interface\n", "interface"}, + {"map\n", "map"}, + {"package\n", "package"}, + {"range\n", "range"}, + {"return\n", "return ;"}, - "select\n", - "struct\n", - "switch\n", - "type\n", - "var\n", + {"select\n", "select"}, + {"struct\n", "struct"}, + {"switch\n", "switch"}, + {"type\n", "type"}, + {"var\n", "var"}, - "foo$//comment\n", - "foo$//comment", - "foo$/*comment*/\n", - "foo$/*\n*/", - "foo$/*comment*/ \n", - "foo$/*\n*/ ", + {"foo//comment\n", "IDENT COMMENT ;"}, + {"foo//comment", "IDENT COMMENT ;"}, + {"foo/*comment*/\n", "IDENT COMMENT ;"}, + {"foo/*\n*/", "IDENT COMMENT ;"}, + {"foo/*comment*/ \n", "IDENT COMMENT ;"}, + {"foo/*\n*/ ", "IDENT COMMENT ;"}, - "foo $// comment\n", - "foo $// comment", - "foo $/*comment*/\n", - "foo $/*\n*/", - "foo $/* */ /* \n */ bar$/**/\n", - "foo $/*0*/ /*1*/ /*2*/\n", + {"foo // comment\n", "IDENT COMMENT ;"}, + {"foo // comment", "IDENT COMMENT ;"}, + {"foo /*comment*/\n", "IDENT COMMENT ;"}, + {"foo /*\n*/", "IDENT COMMENT ;"}, + {"foo /* */ /* \n */ bar/**/\n", "IDENT COMMENT COMMENT ; IDENT COMMENT ;"}, + {"foo /*0*/ /*1*/ /*2*/\n", "IDENT COMMENT COMMENT COMMENT ;"}, - "foo $/*comment*/ \n", - "foo $/*0*/ /*1*/ /*2*/ \n", - "foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa$\n", - "foo $/* an EOF terminates a line */", - "foo $/* an EOF terminates a line */ /*", - "foo $/* an EOF terminates a line */ //", + {"foo /*comment*/ \n", "IDENT COMMENT ;"}, + {"foo /*0*/ /*1*/ /*2*/ \n", "IDENT COMMENT COMMENT COMMENT ;"}, + {"foo /**/ /*-------------*/ /*----\n*/bar /* \n*/baa\n", "IDENT COMMENT COMMENT COMMENT ; IDENT COMMENT ; IDENT ;"}, + {"foo /* an EOF terminates a line */", "IDENT COMMENT ;"}, + {"foo /* an EOF terminates a line */ /*", "IDENT COMMENT COMMENT ;"}, + {"foo /* an EOF terminates a line */ //", "IDENT COMMENT COMMENT ;"}, - "package main$\n\nfunc main() {\n\tif {\n\t\treturn /* */ }$\n}$\n", - "package main$", + {"package main\n\nfunc main() {\n\tif {\n\t\treturn /* */ }\n}\n", "package IDENT ; func IDENT ( ) { if { return COMMENT } ; } ;"}, + {"package main", "package IDENT ;"}, } -func TestSemis(t *testing.T) { - for _, line := range lines { - checkSemi(t, line, 0) - checkSemi(t, line, ScanComments) +func TestSemicolons(t *testing.T) { + for _, test := range semicolonTests { + input, want := test.input, test.want + checkSemi(t, input, want, 0) + checkSemi(t, input, want, ScanComments) // if the input ended in newlines, the input must tokenize the // same with or without those newlines - for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- { - checkSemi(t, line[0:i], 0) - checkSemi(t, line[0:i], ScanComments) + for i := len(input) - 1; i >= 0 && input[i] == '\n'; i-- { + checkSemi(t, input[0:i], want, 0) + checkSemi(t, input[0:i], want, ScanComments) } } }