- scanner to track line/col number instead of byte position only

- fixed a parameter name in tabwriter R=rsc DELTA=110 (21 added, 17 deleted, 72 changed) OCL=26123 CL=26127
2009-03-11 12:48:45 -07:00 · 2009-03-11 12:48:45 -07:00 · 68c69fac9e
parent 32bf48c6d8
commit 68c69fac9e
3 changed files with 73 additions and 69 deletions
--- a/src/lib/go/scanner.go
+++ b/src/lib/go/scanner.go
@ -32,13 +32,21 @@ import (
 )


+// Source locations are represented by a Location value.
+type Location struct {
+	Pos int;  // byte position in source
+	Line int;  // line count, starting at 1
+	Col int;  // column, starting at 1 (character count)
+}
+
+
 // An implementation of an ErrorHandler must be provided to the Scanner.
-// If a syntax error is encountered, Error is called with the exact
-// token position (the byte position of the token in the source) and the
-// error message.
+// If a syntax error is encountered, Error is called with a location and
+// an error message. The location points at the beginning of the offending
+// token.
 //
 type ErrorHandler interface {
-	Error(pos int, msg string);
+	Error(loc Location, msg string);
 }


@ -54,9 +62,9 @@ type Scanner struct {
 	scan_comments bool;  // if set, comments are reported as tokens

 	// scanning state
-	pos int;  // current reading position
+	loc Location;  // location of ch
+	pos int;  // current reading position (position after ch)
 	ch int;  // one char look-ahead
-	chpos int;  // position of ch
 }


@ -64,18 +72,22 @@ type Scanner struct {
 // S.ch < 0 means end-of-file.
 func (S *Scanner) next() {
 	if S.pos < len(S.src) {
-		// assume ASCII
+		S.loc.Pos = S.pos;
+		S.loc.Col++;
 		r, w := int(S.src[S.pos]), 1;
-		if r >= 0x80 {
+		switch {
+		case r == '\n':
+			S.loc.Line++;
+			S.loc.Col = 1;
+		case r >= 0x80:
 			// not ASCII
 			r, w = utf8.DecodeRune(S.src[S.pos : len(S.src)]);
 		}
-		S.ch = r;
-		S.chpos = S.pos;
 		S.pos += w;
+		S.ch = r;
 	} else {
+		S.loc.Pos = len(S.src);
 		S.ch = -1;  // eof
-		S.chpos = len(S.src);
 	}
 }

@ -90,6 +102,7 @@ func (S *Scanner) Init(src []byte, err ErrorHandler, scan_comments bool) {
 	S.src = src;
 	S.err = err;
 	S.scan_comments = scan_comments;
+	S.loc.Line = 1;
 	S.next();
 }

@ -111,14 +124,14 @@ func charString(ch int) string {
 }


-func (S *Scanner) error(pos int, msg string) {
-	S.err.Error(pos, msg);
+func (S *Scanner) error(loc Location, msg string) {
+	S.err.Error(loc, msg);
 }


 func (S *Scanner) expect(ch int) {
 	if S.ch != ch {
-		S.error(S.chpos, "expected " + charString(ch) + ", found " + charString(S.ch));
+		S.error(S.loc, "expected " + charString(ch) + ", found " + charString(S.ch));
 	}
 	S.next();  // always make progress
 }
@ -142,9 +155,8 @@ func (S *Scanner) skipWhitespace() {
 }


-func (S *Scanner) scanComment() []byte {
+func (S *Scanner) scanComment(loc Location) {
 	// first '/' already consumed
-	pos := S.chpos - 1;

 	if S.ch == '/' {
 		//-style comment
@ -154,7 +166,7 @@ func (S *Scanner) scanComment() []byte {
 				// '\n' terminates comment but we do not include
 				// it in the comment (otherwise we don't see the
 				// start of a newline in skipWhitespace()).
-				return S.src[pos : S.chpos];
+				return;
 			}
 		}

@ -166,13 +178,12 @@ func (S *Scanner) scanComment() []byte {
 			S.next();
 			if ch == '*' && S.ch == '/' {
 				S.next();
-				return S.src[pos : S.chpos];
+				return;
 			}
 		}
 	}

-	S.error(pos, "comment not terminated");
-	return S.src[pos : S.chpos];
+	S.error(loc, "comment not terminated");
 }


@ -192,13 +203,12 @@ func isDigit(ch int) bool {
 }


-func (S *Scanner) scanIdentifier() (tok int, lit []byte) {
-	pos := S.chpos;
+func (S *Scanner) scanIdentifier() int {
+	pos := S.loc.Pos;
 	for isLetter(S.ch) || isDigit(S.ch) {
 		S.next();
 	}
-	lit = S.src[pos : S.chpos];
-	return token.Lookup(lit), lit;
+	return token.Lookup(S.src[pos : S.loc.Pos]);
 }


@ -219,13 +229,11 @@ func (S *Scanner) scanMantissa(base int) {
 }


-func (S *Scanner) scanNumber(seen_decimal_point bool) (tok int, lit []byte) {
-	pos := S.chpos;
-	tok = token.INT;
+func (S *Scanner) scanNumber(seen_decimal_point bool) int {
+	tok := token.INT;

 	if seen_decimal_point {
 		tok = token.FLOAT;
-		pos--;  // '.' is one byte
 		S.scanMantissa(10);
 		goto exponent;
 	}
@ -273,7 +281,7 @@ exponent:
 	}

 exit:
-	return tok, S.src[pos : S.chpos];
+	return tok;
 }


@ -283,14 +291,14 @@ func (S *Scanner) scanDigits(base, length int) {
 		length--;
 	}
 	if length > 0 {
-		S.error(S.chpos, "illegal char escape");
+		S.error(S.loc, "illegal char escape");
 	}
 }


 func (S *Scanner) scanEscape(quote int) {
+	loc := S.loc;
 	ch := S.ch;
-	pos := S.chpos;
 	S.next();
 	switch ch {
 	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
@ -304,15 +312,14 @@ func (S *Scanner) scanEscape(quote int) {
 	case 'U':
 		S.scanDigits(16, 8);
 	default:
-		S.error(pos, "illegal char escape");
+		S.error(loc, "illegal char escape");
 	}
 }


-func (S *Scanner) scanChar() []byte {
+func (S *Scanner) scanChar() {
 	// '\'' already consumed

-	pos := S.chpos - 1;
 	ch := S.ch;
 	S.next();
 	if ch == '\\' {
@ -320,19 +327,17 @@ func (S *Scanner) scanChar() []byte {
 	}

 	S.expect('\'');
-	return S.src[pos : S.chpos];
 }


-func (S *Scanner) scanString() []byte {
+func (S *Scanner) scanString(loc Location) {
 	// '"' already consumed

-	pos := S.chpos - 1;
 	for S.ch != '"' {
 		ch := S.ch;
 		S.next();
 		if ch == '\n' || ch < 0 {
-			S.error(pos, "string not terminated");
+			S.error(loc, "string not terminated");
 			break;
 		}
 		if ch == '\\' {
@ -341,25 +346,22 @@ func (S *Scanner) scanString() []byte {
 	}

 	S.next();
-	return S.src[pos : S.chpos];
 }


-func (S *Scanner) scanRawString() []byte {
+func (S *Scanner) scanRawString(loc Location) {
 	// '`' already consumed

-	pos := S.chpos - 1;
 	for S.ch != '`' {
 		ch := S.ch;
 		S.next();
 		if ch == '\n' || ch < 0 {
-			S.error(pos, "string not terminated");
+			S.error(loc, "string not terminated");
 			break;
 		}
 	}

 	S.next();
-	return S.src[pos : S.chpos];
 }


@ -408,34 +410,33 @@ func (S *Scanner) switch4(tok0, tok1, ch2, tok2, tok3 int) int {
 }


-// Scan scans the next token and returns the token byte position in the
-// source, its token value, and the corresponding literal text if the token
-// is an identifier, basic type literal (token.IsLiteral(tok) == true), or
-// comment.
+// Scan scans the next token and returns the token location loc,
+// the token tok, and the literal text lit corresponding to the
+// token.
 //
-func (S *Scanner) Scan() (pos, tok int, lit []byte) {
+func (S *Scanner) Scan() (loc Location, tok int, lit []byte) {
 scan_again:
 	S.skipWhitespace();

-	pos, tok = S.chpos, token.ILLEGAL;
+	loc, tok = S.loc, token.ILLEGAL;

 	switch ch := S.ch; {
 	case isLetter(ch):
-		tok, lit = S.scanIdentifier();
+		tok = S.scanIdentifier();
 	case digitVal(ch) < 10:
-		tok, lit = S.scanNumber(false);
+		tok = S.scanNumber(false);
 	default:
 		S.next();  // always make progress
 		switch ch {
 		case -1  : tok = token.EOF;
-		case '\n': tok, lit = token.COMMENT, []byte{'\n'};
-		case '"' : tok, lit = token.STRING, S.scanString();
-		case '\'': tok, lit = token.CHAR, S.scanChar();
-		case '`' : tok, lit = token.STRING, S.scanRawString();
+		case '\n': tok = token.COMMENT;
+		case '"' : tok = token.STRING; S.scanString(loc);
+		case '\'': tok = token.CHAR; S.scanChar();
+		case '`' : tok = token.STRING; S.scanRawString(loc);
 		case ':' : tok = S.switch2(token.COLON, token.DEFINE);
 		case '.' :
 			if digitVal(S.ch) < 10 {
-				tok, lit = S.scanNumber(true);
+				tok = S.scanNumber(true);
 			} else if S.ch == '.' {
 				S.next();
 				if S.ch == '.' {
@ -458,7 +459,8 @@ scan_again:
 		case '*': tok = S.switch2(token.MUL, token.MUL_ASSIGN);
 		case '/':
 			if S.ch == '/' || S.ch == '*' {
-				tok, lit = token.COMMENT, S.scanComment();
+				S.scanComment(loc);
+				tok = token.COMMENT;
 				if !S.scan_comments {
 					goto scan_again;
 				}
@ -479,9 +481,9 @@ scan_again:
 		case '!': tok = S.switch2(token.NOT, token.NEQ);
 		case '&': tok = S.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND);
 		case '|': tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR);
-		default: S.error(pos, "illegal character " + charString(ch));
+		default: S.error(loc, "illegal character " + charString(ch));
 		}
 	}

-	return pos, tok, lit;
+	return loc, tok, S.src[loc.Pos : S.loc.Pos];
 }
--- a/src/lib/go/scanner_test.go
+++ b/src/lib/go/scanner_test.go
@ -153,12 +153,14 @@ var tokens = [...]elt{
 }


+const whitespace = "  \t  ";  // to separate tokens
+
 func init() {
 	// set pos fields
 	pos := 0;
 	for i := 0; i < len(tokens); i++ {
 		tokens[i].pos = pos;
-		pos += len(tokens[i].lit) + 1;  // + 1 for space in between
+		pos += len(tokens[i].lit) + len(whitespace);
 	}
 }

@ -167,8 +169,8 @@ type TestErrorHandler struct {
 	t *testing.T
 }

-func (h *TestErrorHandler) Error(pos int, msg string) {
-	h.t.Errorf("Error() called (pos = %d, msg = %s)", pos, msg);
+func (h *TestErrorHandler) Error(loc scanner.Location, msg string) {
+	h.t.Errorf("Error() called (msg = %s)", msg);
 }


@ -176,7 +178,7 @@ func Test(t *testing.T) {
 	// make source
 	var src string;
 	for i, e := range tokens {
-		src += e.lit + " ";
+		src += e.lit + whitespace;
 	}

 	// set up scanner
@ -185,9 +187,9 @@ func Test(t *testing.T) {

 	// verify scan
 	for i, e := range tokens {
-		pos, tok, lit := s.Scan();
-		if pos != e.pos {
-			t.Errorf("bad position for %s: got %d, expected %d", e.lit, pos, e.pos);
+		loc, tok, lit := s.Scan();
+		if loc.Pos != e.pos {
+			t.Errorf("bad position for %s: got %d, expected %d", e.lit, loc.Pos, e.pos);
 		}
 		if tok != e.tok {
 			t.Errorf("bad token for %s: got %s, expected %s", e.lit, token.TokenString(tok), token.TokenString(e.tok));
@ -199,7 +201,7 @@ func Test(t *testing.T) {
 			t.Errorf("bad class for %s: got %d, expected %d", e.lit, tokenclass(tok), e.class);
 		}
 	}
-	pos, tok, lit := s.Scan();
+	loc, tok, lit := s.Scan();
 	if tok != token.EOF {
 		t.Errorf("bad token at eof: got %s, expected EOF", token.TokenString(tok));
 	}
--- a/src/lib/tabwriter/tabwriter.go
+++ b/src/lib/tabwriter/tabwriter.go
@ -485,6 +485,6 @@ func (b *Writer) Write(buf []byte) (written int, err *os.Error) {
 // NewWriter allocates and initializes a new tabwriter.Writer.
 // The parameters are the same as for the the Init function.
 //
-func NewWriter(writer io.Write, cellwidth, padding int, padchar byte, flags uint) *Writer {
-	return new(Writer).Init(writer, cellwidth, padding, padchar, flags)
+func NewWriter(output io.Write, cellwidth, padding int, padchar byte, flags uint) *Writer {
+	return new(Writer).Init(output, cellwidth, padding, padchar, flags)
 }