mirror of https://github.com/golang/go.git
text/scanner: provide facility for custom identifiers
LGTM=r R=golang-codereviews, r CC=golang-codereviews https://golang.org/cl/108030044
This commit is contained in:
parent
54bc760ad7
commit
60c0b3b5cf
|
|
@ -11,7 +11,7 @@
|
|||
// By default, a Scanner skips white space and Go comments and recognizes all
|
||||
// literals as defined by the Go language specification. It may be
|
||||
// customized to recognize only a subset of those literals and to recognize
|
||||
// different white space characters.
|
||||
// different identifier and white space characters.
|
||||
//
|
||||
// Basic usage pattern:
|
||||
//
|
||||
|
|
@ -34,8 +34,6 @@ import (
|
|||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TODO(gri): Consider changing this to use the new (token) Position package.
|
||||
|
||||
// A source position is represented by a Position value.
|
||||
// A position is valid if Line > 0.
|
||||
type Position struct {
|
||||
|
|
@ -164,6 +162,13 @@ type Scanner struct {
|
|||
// for values ch > ' '). The field may be changed at any time.
|
||||
Whitespace uint64
|
||||
|
||||
// IsIdentRune is a predicate controlling the characters accepted
|
||||
// as the ith rune in an identifier. The set of valid characters
|
||||
// must not intersect with the set of white space characters.
|
||||
// If no IsIdentRune function is set, regular Go identifiers are
|
||||
// accepted instead. The field may be changed at any time.
|
||||
IsIdentRune func(ch rune, i int) bool
|
||||
|
||||
// Start position of most recently scanned token; set by Scan.
|
||||
// Calling Init or Next invalidates the position (Line == 0).
|
||||
// The Filename field is always left untouched by the Scanner.
|
||||
|
|
@ -334,9 +339,17 @@ func (s *Scanner) error(msg string) {
|
|||
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
|
||||
}
|
||||
|
||||
func (s *Scanner) isIdentRune(ch rune, i int) bool {
|
||||
if s.IsIdentRune != nil {
|
||||
return s.IsIdentRune(ch, i)
|
||||
}
|
||||
return ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) && i > 0
|
||||
}
|
||||
|
||||
func (s *Scanner) scanIdentifier() rune {
|
||||
ch := s.next() // read character after first '_' or letter
|
||||
for ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) {
|
||||
// we know the zero'th rune is OK; start with 2nd one
|
||||
ch := s.next()
|
||||
for i := 1; s.isIdentRune(ch, i); i++ {
|
||||
ch = s.next()
|
||||
}
|
||||
return ch
|
||||
|
|
@ -563,7 +576,7 @@ redo:
|
|||
// determine token value
|
||||
tok := ch
|
||||
switch {
|
||||
case unicode.IsLetter(ch) || ch == '_':
|
||||
case s.isIdentRune(ch, 0):
|
||||
if s.Mode&ScanIdents != 0 {
|
||||
tok = Ident
|
||||
ch = s.scanIdentifier()
|
||||
|
|
|
|||
|
|
@ -357,6 +357,28 @@ func TestScanSelectedMask(t *testing.T) {
|
|||
testScanSelectedMode(t, ScanComments, Comment)
|
||||
}
|
||||
|
||||
func TestScanCustomIdent(t *testing.T) {
|
||||
const src = "faab12345 a12b123 a12 3b"
|
||||
s := new(Scanner).Init(strings.NewReader(src))
|
||||
// ident = ( 'a' | 'b' ) { digit } .
|
||||
// digit = '0' .. '3' .
|
||||
// with a maximum length of 4
|
||||
s.IsIdentRune = func(ch rune, i int) bool {
|
||||
return i == 0 && (ch == 'a' || ch == 'b') || 0 < i && i < 4 && '0' <= ch && ch <= '3'
|
||||
}
|
||||
checkTok(t, s, 1, s.Scan(), 'f', "f")
|
||||
checkTok(t, s, 1, s.Scan(), Ident, "a")
|
||||
checkTok(t, s, 1, s.Scan(), Ident, "a")
|
||||
checkTok(t, s, 1, s.Scan(), Ident, "b123")
|
||||
checkTok(t, s, 1, s.Scan(), Int, "45")
|
||||
checkTok(t, s, 1, s.Scan(), Ident, "a12")
|
||||
checkTok(t, s, 1, s.Scan(), Ident, "b123")
|
||||
checkTok(t, s, 1, s.Scan(), Ident, "a12")
|
||||
checkTok(t, s, 1, s.Scan(), Int, "3")
|
||||
checkTok(t, s, 1, s.Scan(), Ident, "b")
|
||||
checkTok(t, s, 1, s.Scan(), EOF, "")
|
||||
}
|
||||
|
||||
func TestScanNext(t *testing.T) {
|
||||
const BOM = '\uFEFF'
|
||||
BOMs := string(BOM)
|
||||
|
|
|
|||
Loading…
Reference in New Issue