go/build: refactor per-file info & reader

Make code cleaner and a bit more adaptable: instead of an ever-growing list of arguments and results for readImports, put everything in a fileInfo struct, and rename function to readGoInfo. (Not a goInfo struct because it gets used for non-Go source files as well, but that processing is much simpler.) The refactoring simplifies the embed work in the next CL, but this CL makes no semantic changes. For #41191. Change-Id: Id2de2a3b8d351adc1c919dcf79dfbe79fc3d5301 Reviewed-on: https://go-review.googlesource.com/c/go/+/243940 Trust: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Jay Conrod <jayconrod@google.com>
2020-10-18 20:26:46 -04:00 · 2020-10-18 20:26:46 -04:00 · 80182d45b5
parent 7f736694fe
commit 80182d45b5
4 changed files with 145 additions and 111 deletions
--- a/src/go/build/build.go
+++ b/src/go/build/build.go
@ -10,7 +10,6 @@ import (
 	"fmt"
 	"go/ast"
 	"go/doc"
-	"go/parser"
 	"go/token"
 	"internal/goroot"
 	"internal/goversion"
@ -812,12 +811,12 @@ Found:
 			p.InvalidGoFiles = append(p.InvalidGoFiles, name)
 		}

-		match, data, filename, err := ctxt.matchFile(p.Dir, name, allTags, &p.BinaryOnly)
+		info, err := ctxt.matchFile(p.Dir, name, allTags, &p.BinaryOnly, fset)
 		if err != nil {
 			badFile(err)
 			continue
 		}
-		if !match {
+		if info == nil {
 			if strings.HasPrefix(name, "_") || strings.HasPrefix(name, ".") {
 				// not due to build constraints - don't report
 			} else if ext == ".go" {
@ -827,6 +826,7 @@ Found:
 			}
 			continue
 		}
+		data, filename := info.header, info.name

 		// Going to save the file. For non-Go files, can stop here.
 		switch ext {
@ -843,11 +843,11 @@ Found:
 			continue
 		}

-		pf, err := parser.ParseFile(fset, filename, data, parser.ImportsOnly|parser.ParseComments)
-		if err != nil {
-			badFile(err)
+		if info.parseErr != nil {
+			badFile(info.parseErr)
 			continue
 		}
+		pf := info.parsed

 		pkg := pf.Name.Name
 		if pkg == "documentation" {
@ -894,42 +894,17 @@ Found:
 		}

 		// Record imports and information about cgo.
-		type importPos struct {
-			path string
-			pos  token.Pos
-		}
-		var fileImports []importPos
 		isCgo := false
-		for _, decl := range pf.Decls {
-			d, ok := decl.(*ast.GenDecl)
-			if !ok {
-				continue
-			}
-			for _, dspec := range d.Specs {
-				spec, ok := dspec.(*ast.ImportSpec)
-				if !ok {
+		for _, imp := range info.imports {
+			if imp.path == "C" {
+				if isTest {
+					badFile(fmt.Errorf("use of cgo in test %s not supported", filename))
 					continue
 				}
-				quoted := spec.Path.Value
-				path, err := strconv.Unquote(quoted)
-				if err != nil {
-					panic(fmt.Sprintf("%s: parser returned invalid quoted string: <%s>", filename, quoted))
-				}
-				fileImports = append(fileImports, importPos{path, spec.Pos()})
-				if path == "C" {
-					if isTest {
-						badFile(fmt.Errorf("use of cgo in test %s not supported", filename))
-					} else {
-						cg := spec.Doc
-						if cg == nil && len(d.Specs) == 1 {
-							cg = d.Doc
-						}
-						if cg != nil {
-							if err := ctxt.saveCgo(filename, p, cg); err != nil {
-								badFile(err)
-							}
-						}
-						isCgo = true
+				isCgo = true
+				if imp.doc != nil {
+					if err := ctxt.saveCgo(filename, p, imp.doc); err != nil {
+						badFile(err)
 					}
 				}
 			}
@ -959,7 +934,7 @@ Found:
 		}
 		*fileList = append(*fileList, name)
 		if importMap != nil {
-			for _, imp := range fileImports {
+			for _, imp := range info.imports {
 				importMap[imp.path] = append(importMap[imp.path], fset.Position(imp.pos))
 			}
 		}
@ -1309,24 +1284,44 @@ func parseWord(data []byte) (word, rest []byte) {
 // MatchFile considers the name of the file and may use ctxt.OpenFile to
 // read some or all of the file's content.
 func (ctxt *Context) MatchFile(dir, name string) (match bool, err error) {
-	match, _, _, err = ctxt.matchFile(dir, name, nil, nil)
-	return
+	info, err := ctxt.matchFile(dir, name, nil, nil, nil)
+	return info != nil, err
 }

 var dummyPkg Package

+// fileInfo records information learned about a file included in a build.
+type fileInfo struct {
+	name     string // full name including dir
+	header   []byte
+	fset     *token.FileSet
+	parsed   *ast.File
+	parseErr error
+	imports  []fileImport
+}
+
+type fileImport struct {
+	path string
+	pos  token.Pos
+	doc  *ast.CommentGroup
+}
+
 // matchFile determines whether the file with the given name in the given directory
 // should be included in the package being constructed.
-// It returns the data read from the file.
+// If the file should be included, matchFile returns a non-nil *fileInfo (and a nil error).
+// Non-nil errors are reserved for unexpected problems.
+//
 // If name denotes a Go program, matchFile reads until the end of the
-// imports (and returns that data) even though it only considers text
-// until the first non-comment.
+// imports and returns that section of the file in the fileInfo's header field,
+// even though it only considers text until the first non-comment
+// for +build lines.
+//
 // If allTags is non-nil, matchFile records any encountered build tag
 // by setting allTags[tag] = true.
-func (ctxt *Context) matchFile(dir, name string, allTags map[string]bool, binaryOnly *bool) (match bool, data []byte, filename string, err error) {
+func (ctxt *Context) matchFile(dir, name string, allTags map[string]bool, binaryOnly *bool, fset *token.FileSet) (*fileInfo, error) {
 	if strings.HasPrefix(name, "_") ||
 		strings.HasPrefix(name, ".") {
-		return
+		return nil, nil
 	}

 	i := strings.LastIndex(name, ".")
@ -1336,55 +1331,53 @@ func (ctxt *Context) matchFile(dir, name string, allTags map[string]bool, binary
 	ext := name[i:]

 	if !ctxt.goodOSArchFile(name, allTags) && !ctxt.UseAllFiles {
-		return
+		return nil, nil
 	}

 	if ext != ".go" && fileListForExt(&dummyPkg, ext) == nil {
 		// skip
-		return
+		return nil, nil
 	}

+	info := &fileInfo{name: ctxt.joinPath(dir, name), fset: fset}
 	if ext == ".syso" {
 		// binary, no reading
-		match = true
-		return
+		return info, nil
 	}

-	filename = ctxt.joinPath(dir, name)
-	f, err := ctxt.openFile(filename)
+	f, err := ctxt.openFile(info.name)
 	if err != nil {
-		return
+		return nil, err
 	}

-	if strings.HasSuffix(filename, ".go") {
-		data, err = readImports(f, false, nil)
-		if strings.HasSuffix(filename, "_test.go") {
+	if strings.HasSuffix(name, ".go") {
+		err = readGoInfo(f, info)
+		if strings.HasSuffix(name, "_test.go") {
 			binaryOnly = nil // ignore //go:binary-only-package comments in _test.go files
 		}
 	} else {
 		binaryOnly = nil // ignore //go:binary-only-package comments in non-Go sources
-		data, err = readComments(f)
+		info.header, err = readComments(f)
 	}
 	f.Close()
 	if err != nil {
-		err = fmt.Errorf("read %s: %v", filename, err)
-		return
+		return nil, fmt.Errorf("read %s: %v", info.name, err)
 	}

 	// Look for +build comments to accept or reject the file.
-	ok, sawBinaryOnly, err := ctxt.shouldBuild(data, allTags)
+	ok, sawBinaryOnly, err := ctxt.shouldBuild(info.header, allTags)
 	if err != nil {
-		return // non-nil err
+		return nil, err
 	}
 	if !ok && !ctxt.UseAllFiles {
-		return // nil err
+		return nil, nil
 	}

 	if binaryOnly != nil && sawBinaryOnly {
 		*binaryOnly = true
 	}
-	match = true
-	return
+
+	return info, nil
 }

 func cleanImports(m map[string][]token.Position) ([]string, map[string][]token.Position) {
--- a/src/go/build/deps_test.go
+++ b/src/go/build/deps_test.go
@ -17,7 +17,6 @@ import (
 	"path/filepath"
 	"runtime"
 	"sort"
-	"strconv"
 	"strings"
 	"testing"
 )
@ -606,24 +605,22 @@ func findImports(pkg string) ([]string, error) {
 		if !strings.HasSuffix(name, ".go") || strings.HasSuffix(name, "_test.go") {
 			continue
 		}
-		f, err := os.Open(filepath.Join(dir, name))
+		var info fileInfo
+		info.name = filepath.Join(dir, name)
+		f, err := os.Open(info.name)
 		if err != nil {
 			return nil, err
 		}
-		var imp []string
-		data, err := readImports(f, false, &imp)
+		err = readGoInfo(f, &info)
 		f.Close()
 		if err != nil {
 			return nil, fmt.Errorf("reading %v: %v", name, err)
 		}
-		if bytes.Contains(data, buildIgnore) {
+		if bytes.Contains(info.header, buildIgnore) {
 			continue
 		}
-		for _, quoted := range imp {
-			path, err := strconv.Unquote(quoted)
-			if err != nil {
-				continue
-			}
+		for _, imp := range info.imports {
+			path := imp.path
 			if !haveImport[path] {
 				haveImport[path] = true
 				imports = append(imports, path)
--- a/src/go/build/read.go
+++ b/src/go/build/read.go
@ -7,7 +7,11 @@ package build
 import (
 	"bufio"
 	"errors"
+	"fmt"
+	"go/ast"
+	"go/parser"
 	"io"
+	"strconv"
 	"unicode/utf8"
 )

@ -147,15 +151,11 @@ func (r *importReader) readIdent() {

 // readString reads a quoted string literal from the input.
 // If an identifier is not present, readString records a syntax error.
-func (r *importReader) readString(save *[]string) {
+func (r *importReader) readString() {
 	switch r.nextByte(true) {
 	case '`':
-		start := len(r.buf) - 1
 		for r.err == nil {
 			if r.nextByte(false) == '`' {
-				if save != nil {
-					*save = append(*save, string(r.buf[start:]))
-				}
 				break
 			}
 			if r.eof {
@ -163,13 +163,9 @@ func (r *importReader) readString(save *[]string) {
 			}
 		}
 	case '"':
-		start := len(r.buf) - 1
 		for r.err == nil {
 			c := r.nextByte(false)
 			if c == '"' {
-				if save != nil {
-					*save = append(*save, string(r.buf[start:]))
-				}
 				break
 			}
 			if r.eof || c == '\n' {
@ -186,17 +182,17 @@ func (r *importReader) readString(save *[]string) {

 // readImport reads an import clause - optional identifier followed by quoted string -
 // from the input.
-func (r *importReader) readImport(imports *[]string) {
+func (r *importReader) readImport() {
 	c := r.peekByte(true)
 	if c == '.' {
 		r.peek = 0
 	} else if isIdent(c) {
 		r.readIdent()
 	}
-	r.readString(imports)
+	r.readString()
 }

-// readComments is like ioutil.ReadAll, except that it only reads the leading
+// readComments is like io.ReadAll, except that it only reads the leading
 // block of comments in the file.
 func readComments(f io.Reader) ([]byte, error) {
 	r := &importReader{b: bufio.NewReader(f)}
@ -208,9 +204,14 @@ func readComments(f io.Reader) ([]byte, error) {
 	return r.buf, r.err
 }

-// readImports is like ioutil.ReadAll, except that it expects a Go file as input
-// and stops reading the input once the imports have completed.
-func readImports(f io.Reader, reportSyntaxError bool, imports *[]string) ([]byte, error) {
+// readGoInfo expects a Go file as input and reads the file up to and including the import section.
+// It records what it learned in *info.
+// If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
+// and info.imports.
+//
+// It only returns an error if there are problems reading the file,
+// not for syntax errors in the file itself.
+func readGoInfo(f io.Reader, info *fileInfo) error {
 	r := &importReader{b: bufio.NewReader(f)}

 	r.readKeyword("package")
@ -220,28 +221,68 @@ func readImports(f io.Reader, reportSyntaxError bool, imports *[]string) ([]byte
 		if r.peekByte(true) == '(' {
 			r.nextByte(false)
 			for r.peekByte(true) != ')' && r.err == nil {
-				r.readImport(imports)
+				r.readImport()
 			}
 			r.nextByte(false)
 		} else {
-			r.readImport(imports)
+			r.readImport()
 		}
 	}

+	info.header = r.buf
+
 	// If we stopped successfully before EOF, we read a byte that told us we were done.
 	// Return all but that last byte, which would cause a syntax error if we let it through.
 	if r.err == nil && !r.eof {
-		return r.buf[:len(r.buf)-1], nil
+		info.header = r.buf[:len(r.buf)-1]
 	}

 	// If we stopped for a syntax error, consume the whole file so that
 	// we are sure we don't change the errors that go/parser returns.
-	if r.err == errSyntax && !reportSyntaxError {
+	if r.err == errSyntax {
 		r.err = nil
 		for r.err == nil && !r.eof {
 			r.readByte()
 		}
+		info.header = r.buf
+	}
+	if r.err != nil {
+		return r.err
 	}

-	return r.buf, r.err
+	if info.fset == nil {
+		return nil
+	}
+
+	// Parse file header & record imports.
+	info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
+	if info.parseErr != nil {
+		return nil
+	}
+
+	for _, decl := range info.parsed.Decls {
+		d, ok := decl.(*ast.GenDecl)
+		if !ok {
+			continue
+		}
+		for _, dspec := range d.Specs {
+			spec, ok := dspec.(*ast.ImportSpec)
+			if !ok {
+				continue
+			}
+			quoted := spec.Path.Value
+			path, err := strconv.Unquote(quoted)
+			if err != nil {
+				return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
+			}
+
+			doc := spec.Doc
+			if doc == nil && len(d.Specs) == 1 {
+				doc = d.Doc
+			}
+			info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
+		}
+	}
+
+	return nil
 }
--- a/src/go/build/read_test.go
+++ b/src/go/build/read_test.go
@ -13,12 +13,12 @@ import (
 const quote = "`"

 type readTest struct {
-	// Test input contains ℙ where readImports should stop.
+	// Test input contains ℙ where readGoInfo should stop.
 	in  string
 	err string
 }

-var readImportsTests = []readTest{
+var readGoInfoTests = []readTest{
 	{
 		`package p`,
 		"",
@ -37,15 +37,15 @@ var readImportsTests = []readTest{
 	},
 	{
 		`package p
-		
+
 		// comment
-		
+
 		import "x"
 		import _ "x"
 		import a "x"
-		
+
 		/* comment */
-		
+
 		import (
 			"x" /* comment */
 			_ "x"
@ -59,7 +59,7 @@ var readImportsTests = []readTest{
 		import ()
 		import()import()import()
 		import();import();import()
-		
+
 		ℙvar x = 1
 		`,
 		"",
@ -85,7 +85,7 @@ var readCommentsTests = []readTest{
 		/* bar */

 		/* quux */ // baz
-		
+
 		/*/ zot */

 		// asdf
@ -127,8 +127,12 @@ func testRead(t *testing.T, tests []readTest, read func(io.Reader) ([]byte, erro
 	}
 }

-func TestReadImports(t *testing.T) {
-	testRead(t, readImportsTests, func(r io.Reader) ([]byte, error) { return readImports(r, true, nil) })
+func TestReadGoInfo(t *testing.T) {
+	testRead(t, readGoInfoTests, func(r io.Reader) ([]byte, error) {
+		var info fileInfo
+		err := readGoInfo(r, &info)
+		return info.header, err
+	})
 }

 func TestReadComments(t *testing.T) {
@ -202,11 +206,6 @@ var readFailuresTests = []readTest{
 	},
 }

-func TestReadFailures(t *testing.T) {
-	// Errors should be reported (true arg to readImports).
-	testRead(t, readFailuresTests, func(r io.Reader) ([]byte, error) { return readImports(r, true, nil) })
-}
-
 func TestReadFailuresIgnored(t *testing.T) {
 	// Syntax errors should not be reported (false arg to readImports).
 	// Instead, entire file should be the output and no error.
@ -219,5 +218,9 @@ func TestReadFailuresIgnored(t *testing.T) {
 			tt.err = ""
 		}
 	}
-	testRead(t, tests, func(r io.Reader) ([]byte, error) { return readImports(r, false, nil) })
+	testRead(t, tests, func(r io.Reader) ([]byte, error) {
+		var info fileInfo
+		err := readGoInfo(r, &info)
+		return info.header, err
+	})
 }