mirror of https://github.com/golang/go.git
scanner: match go/scanner and disallow NUL character;
also check for illegal UTF-8 sequences R=rsc CC=golang-dev https://golang.org/cl/218061
This commit is contained in:
parent
0485a999ff
commit
22e960547f
|
|
@ -2,9 +2,10 @@
|
||||||
// Use of this source code is governed by a BSD-style
|
// Use of this source code is governed by a BSD-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
// A general-purpose scanner for text. Takes an io.Reader
|
// A general-purpose scanner for UTF-8 encoded text. Takes an io.Reader
|
||||||
// providing the source which then can be tokenized through
|
// providing the source which then can be tokenized through repeated
|
||||||
// repeated calls to the Scan function.
|
// calls to the Scan function. For compatibility with existing tools,
|
||||||
|
// the NUL character is not allowed (implementation restriction).
|
||||||
//
|
//
|
||||||
// By default, a Scanner skips white space and comments and
|
// By default, a Scanner skips white space and comments and
|
||||||
// recognizes literals as defined by the Go language spec.
|
// recognizes literals as defined by the Go language spec.
|
||||||
|
|
@ -245,13 +246,20 @@ func (s *Scanner) next() int {
|
||||||
// uncommon case: not ASCII
|
// uncommon case: not ASCII
|
||||||
var width int
|
var width int
|
||||||
ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd])
|
ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd])
|
||||||
|
if ch == utf8.RuneError && width == 1 {
|
||||||
|
s.error("illegal UTF-8 encoding")
|
||||||
|
}
|
||||||
s.srcPos += width - 1
|
s.srcPos += width - 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s.srcPos++
|
s.srcPos++
|
||||||
s.column++
|
s.column++
|
||||||
if ch == '\n' {
|
switch ch {
|
||||||
|
case 0:
|
||||||
|
// implementation restriction for compatibility with other tools
|
||||||
|
s.error("illegal character NUL")
|
||||||
|
case '\n':
|
||||||
s.line++
|
s.line++
|
||||||
s.column = 0
|
s.column = 0
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -226,7 +226,7 @@ var tokenList = []token{
|
||||||
token{String, "`" + f100 + "`"},
|
token{String, "`" + f100 + "`"},
|
||||||
|
|
||||||
token{Comment, "// individual characters\n"},
|
token{Comment, "// individual characters\n"},
|
||||||
token{'\x00', "\x00"},
|
// NUL character is not allowed
|
||||||
token{'\x01', "\x01"},
|
token{'\x01', "\x01"},
|
||||||
token{' ' - 1, string(' ' - 1)},
|
token{' ' - 1, string(' ' - 1)},
|
||||||
token{'+', "+"},
|
token{'+', "+"},
|
||||||
|
|
@ -390,7 +390,8 @@ func TestScanNext(t *testing.T) {
|
||||||
func TestScanWhitespace(t *testing.T) {
|
func TestScanWhitespace(t *testing.T) {
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
var ws uint64
|
var ws uint64
|
||||||
for ch := byte(0); ch < ' '; ch++ {
|
// start at 1, NUL character is not allowed
|
||||||
|
for ch := byte(1); ch < ' '; ch++ {
|
||||||
buf.WriteByte(ch)
|
buf.WriteByte(ch)
|
||||||
ws |= 1 << ch
|
ws |= 1 << ch
|
||||||
}
|
}
|
||||||
|
|
@ -442,6 +443,8 @@ func TestError(t *testing.T) {
|
||||||
testError(t, "`abc", "literal not terminated", String)
|
testError(t, "`abc", "literal not terminated", String)
|
||||||
testError(t, `//`, "comment not terminated", EOF)
|
testError(t, `//`, "comment not terminated", EOF)
|
||||||
testError(t, `/*/`, "comment not terminated", EOF)
|
testError(t, `/*/`, "comment not terminated", EOF)
|
||||||
|
testError(t, `"abc`+"\x00"+`def"`, "illegal character NUL", String)
|
||||||
|
testError(t, `"abc`+"\xff"+`def"`, "illegal UTF-8 encoding", String)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue