mirror of https://github.com/golang/go.git
go/scanner: reject BOMs that are not at the beginning
For compliance with gc. See also issue 5265. Not Go1.1 critical, but harmless. R=r CC=golang-dev https://golang.org/cl/8736043
This commit is contained in:
parent
d4d063580f
commit
968732b677
|
|
@ -48,6 +48,8 @@ type Scanner struct {
|
|||
ErrorCount int // number of errors encountered
|
||||
}
|
||||
|
||||
const bom = 0xFEFF // byte order mark, only permitted as very first character
|
||||
|
||||
// Read the next Unicode char into s.ch.
|
||||
// s.ch < 0 means end-of-file.
|
||||
//
|
||||
|
|
@ -67,6 +69,8 @@ func (s *Scanner) next() {
|
|||
r, w = utf8.DecodeRune(s.src[s.rdOffset:])
|
||||
if r == utf8.RuneError && w == 1 {
|
||||
s.error(s.offset, "illegal UTF-8 encoding")
|
||||
} else if r == bom && s.offset > 0 {
|
||||
s.error(s.offset, "illegal byte order mark")
|
||||
}
|
||||
}
|
||||
s.rdOffset += w
|
||||
|
|
@ -125,8 +129,8 @@ func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode
|
|||
s.ErrorCount = 0
|
||||
|
||||
s.next()
|
||||
if s.ch == '\uFEFF' {
|
||||
s.next() // ignore BOM
|
||||
if s.ch == bom {
|
||||
s.next() // ignore BOM at file beginning
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -713,7 +717,10 @@ scanAgain:
|
|||
case '|':
|
||||
tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
|
||||
default:
|
||||
s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
|
||||
// next reports unexpected BOMs - don't repeat
|
||||
if ch != bom {
|
||||
s.error(s.file.Offset(pos), fmt.Sprintf("illegal character %#U", ch))
|
||||
}
|
||||
insertSemi = s.insertSemi // preserve insertSemi info
|
||||
tok = token.ILLEGAL
|
||||
lit = string(ch)
|
||||
|
|
|
|||
|
|
@ -695,7 +695,10 @@ var errors = []struct {
|
|||
{"0X", token.INT, 0, "illegal hexadecimal number"},
|
||||
{"\"abc\x00def\"", token.STRING, 4, "illegal character NUL"},
|
||||
{"\"abc\x80def\"", token.STRING, 4, "illegal UTF-8 encoding"},
|
||||
{"\ufeff\ufeff", token.ILLEGAL, 3, "illegal character U+FEFF"}, // only first BOM is ignored
|
||||
{"\ufeff\ufeff", token.ILLEGAL, 3, "illegal byte order mark"}, // only first BOM is ignored
|
||||
{"//\ufeff", token.COMMENT, 2, "illegal byte order mark"}, // only first BOM is ignored
|
||||
{"'\ufeff" + `'`, token.CHAR, 1, "illegal byte order mark"}, // only first BOM is ignored
|
||||
{`"` + "abc\ufeffdef" + `"`, token.STRING, 4, "illegal byte order mark"}, // only first BOM is ignored
|
||||
}
|
||||
|
||||
func TestScanErrors(t *testing.T) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue