cmd/compile/internal/gc: escape binary export data so it contains no '$'

Necessary to ensure that subsequent tools can continue to find then end of the export data section simply by searching for "$$". Adjusted gcimporter used by go/types accordingly. Also, fixed a bug in gcimporter related to reading export data in debug format. Change-Id: Iaea4ed05edd8a5bab28ebe5b19a4740f5e537d35 Reviewed-on: https://go-review.googlesource.com/16283 Reviewed-by: Chris Manghane <cmang@golang.org>
2015-10-23 16:01:09 -07:00 · 2015-10-23 16:01:09 -07:00 · e6ccfc1ad1
parent d3df04cd8c
commit e6ccfc1ad1
4 changed files with 131 additions and 67 deletions
--- a/src/cmd/compile/internal/gc/bexport.go
+++ b/src/cmd/compile/internal/gc/bexport.go
@ -828,10 +828,8 @@ func (p *exporter) string(s string) {
 		p.tracef("%q ", s)
 	}
 	p.rawInt64(int64(len(s)))
-	w, err := obj.Bwritestring(p.out, s)
+	for i := 0; i < len(s); i++ {
-	p.written += w
+		p.byte(s[i])
 	if w != len(s) || err != nil {
 		Fatalf("write error: %v (wrote %d bytes of %d)", err, w, len(s))
 	}
 }
@ -843,22 +841,39 @@ func (p *exporter) marker(m byte) {
 	p.rawInt64(int64(p.written))
 }
 func (p *exporter) byte(b byte) {
 	obj.Bputc(p.out, b)
 	p.written++
 }
 // rawInt64 should only be used by low-level encoders
 func (p *exporter) rawInt64(x int64) {
 	var tmp [binary.MaxVarintLen64]byte
 	n := binary.PutVarint(tmp[:], x)
-	w, err := p.out.Write(tmp[:n])
+	for i := 0; i < n; i++ {
-	p.written += w
+		p.byte(tmp[i])
 	if err != nil {
 		Fatalf("write error: %v", err)
 	}
 }
 // byte is the bottleneck interface to write to p.out.
 // byte escapes b as follows (any encoding does that
 // hides '$'):
 //
 //	'$'  => '|' 'S'
 //	'|'  => '|' '|'
 //
 // Necessary so other tools can find the end of the
 // export data by searching for "$$".
 func (p *exporter) byte(b byte) {
 	switch b {
 	case '$':
 		// write '$' as '|' 'S'
 		b = 'S'
 		fallthrough
 	case '|':
 		// write '|' as '|' '|'
 		obj.Bputc(p.out, '|')
 		p.written++
 	}
 	obj.Bputc(p.out, b)
 	p.written++
 }
 // tracef is like fmt.Printf but it rewrites the format string
 // to take care of indentation.
 func (p *exporter) tracef(format string, args ...interface{}) {
--- a/src/cmd/compile/internal/gc/bimport.go
+++ b/src/cmd/compile/internal/gc/bimport.go
@ -573,10 +573,8 @@ func (p *importer) string() string {
 		} else {
 			p.buf = p.buf[:n]
 		}
-		r := obj.Bread(p.in, p.buf)
+		for i := 0; i < n; i++ {
-		p.read += r
+			p.buf[i] = p.byte()
 		if r != n {
 			Fatalf("read error: read %d bytes of %d", r, n)
 		}
 		return string(p.buf)
 	}
@ -595,15 +593,6 @@ func (p *importer) marker(want byte) {
 	}
 }
 func (p *importer) byte() byte {
 	if c := obj.Bgetc(p.in); c >= 0 {
 		p.read++
 		return byte(c)
 	}
 	Fatalf("read error")
 	return 0
 }
 // rawInt64 should only be used by low-level decoders
 func (p *importer) rawInt64() int64 {
 	i, err := binary.ReadVarint(p)
@ -617,3 +606,29 @@ func (p *importer) rawInt64() int64 {
 func (p *importer) ReadByte() (byte, error) {
 	return p.byte(), nil
 }
 // byte is the bottleneck interface for reading from p.in.
 // It unescapes '|' 'S' to '$' and '|' '|' to '|'.
 func (p *importer) byte() byte {
 	c := obj.Bgetc(p.in)
 	p.read++
 	if c < 0 {
 		Fatalf("read error")
 	}
 	if c == '|' {
 		c = obj.Bgetc(p.in)
 		p.read++
 		if c < 0 {
 			Fatalf("read error")
 		}
 		switch c {
 		case 'S':
 			c = '$'
 		case '|':
 			// nothing to do
 		default:
 			Fatalf("unexpected escape sequence in export data")
 		}
 	}
 	return byte(c)
 }
--- a/src/cmd/compile/internal/gc/export.go
+++ b/src/cmd/compile/internal/gc/export.go
@ -375,13 +375,9 @@ func dumpexport() {
 			if n, err := bout.Write(copy.Bytes()); n != size || err != nil {
 				Fatalf("error writing export data: got %d bytes, want %d bytes, err = %v", n, size, err)
 			}
-
+			// export data must contain no '$' so that we can find the end by searching for "$$"
-			// verify there's no "\n$$\n" inside the export data
+			if bytes.IndexByte(copy.Bytes(), '$') >= 0 {
-			// TODO(gri) fragile - the end marker needs to be fixed
+				Fatalf("export data contains $")
 			// TODO(gri) investigate if exporting a string containing "\n$$\n"
 			//           causes problems (old and new format)
 			if bytes.Index(copy.Bytes(), []byte("\n$$\n")) >= 0 {
 				Fatalf("export data contains end marker in its midst")
 			}
 			// verify that we can read the copied export data back in
--- a/src/go/internal/gcimporter/bimport.go
+++ b/src/go/internal/gcimporter/bimport.go
@ -20,27 +20,24 @@ import (
 // If data is obviously malformed, an error is returned but in
 // general it is not recommended to call BImportData on untrusted data.
 func BImportData(imports map[string]*types.Package, data []byte, path string) (int, *types.Package, error) {
-	// determine low-level encoding format
+	p := importer{
-	read := 0
+		imports: imports,
-	var format byte = 'm' // missing format
+		data:    data,
 	if len(data) > 0 {
 		format = data[0]
 		data = data[1:]
 		read++
 	}
-	if format != 'c' && format != 'd' {
+	p.buf = p.bufarray[:]
-		return read, nil, fmt.Errorf("invalid encoding format in export data: got %q; want 'c' or 'd'", format)
+
 	// read low-level encoding format
 	switch format := p.byte(); format {
 	case 'c':
 		// compact format - nothing to do
 	case 'd':
 		p.debugFormat = true
 	default:
 		return p.read, nil, fmt.Errorf("invalid encoding format in export data: got %q; want 'c' or 'd'", format)
 	}
 	// --- generic export data ---
 	p := importer{
 		imports:     imports,
 		data:        data,
 		debugFormat: format == 'd',
 		read:        read,
 	}
 	if v := p.string(); v != "v0" {
 		return p.read, nil, fmt.Errorf("unknown version: %s", v)
 	}
@ -103,6 +100,8 @@ func BImportData(imports map[string]*types.Package, data []byte, path string) (i
 		_ = p.typ().(*types.Named)
 	}
 	// ignore compiler-specific import data
 	// complete interfaces
 	for _, typ := range p.typList {
 		if it, ok := typ.(*types.Interface); ok {
@ -122,10 +121,12 @@ func BImportData(imports map[string]*types.Package, data []byte, path string) (i
 }
 type importer struct {
-	imports map[string]*types.Package
+	imports  map[string]*types.Package
-	data    []byte
+	data     []byte
-	pkgList []*types.Package
+	buf      []byte   // for reading strings
-	typList []types.Type
+	bufarray [64]byte // initial underlying array for buf, large enough to avoid allocation when compiling std lib
 	pkgList  []*types.Package
 	typList  []types.Type
 	debugFormat bool
 	read        int // bytes read
@ -440,7 +441,7 @@ func exported(name string) bool {
 }
 func (p *importer) value() constant.Value {
-	switch kind := constant.Kind(p.int()); kind {
+	switch tag := p.tagOrIndex(); tag {
 	case falseTag:
 		return constant.MakeBool(false)
 	case trueTag:
@ -456,7 +457,7 @@ func (p *importer) value() constant.Value {
 	case stringTag:
 		return constant.MakeString(p.string())
 	default:
-		panic(fmt.Sprintf("unexpected value kind %d", kind))
+		panic(fmt.Sprintf("unexpected value tag %d", tag))
 	}
 }
@ -517,7 +518,11 @@ func (p *importer) tagOrIndex() int {
 }
 func (p *importer) int() int {
-	return int(p.int64())
+	x := p.int64()
 	if int64(int(x)) != x {
 		panic("exported integer too large")
 	}
 	return int(x)
 }
 func (p *importer) int64() int64 {
@ -533,21 +538,25 @@ func (p *importer) string() string {
 		p.marker('s')
 	}
 	var b []byte
 	if n := int(p.rawInt64()); n > 0 {
-		b = p.data[:n]
+		if cap(p.buf) < n {
-		p.data = p.data[n:]
+			p.buf = make([]byte, n)
-		p.read += n
+		} else {
 			p.buf = p.buf[:n]
 		}
 		for i := 0; i < n; i++ {
 			p.buf[i] = p.byte()
 		}
 		return string(p.buf)
 	}
-	return string(b)
+
 	return ""
 }
 func (p *importer) marker(want byte) {
-	if got := p.data[0]; got != want {
+	if got := p.byte(); got != want {
 		panic(fmt.Sprintf("incorrect marker: got %c; want %c (pos = %d)", got, want, p.read))
 	}
 	p.data = p.data[1:]
 	p.read++
 	pos := p.read
 	if n := int(p.rawInt64()); n != pos {
@ -557,12 +566,41 @@ func (p *importer) marker(want byte) {
 // rawInt64 should only be used by low-level decoders
 func (p *importer) rawInt64() int64 {
-	i, n := binary.Varint(p.data)
+	i, err := binary.ReadVarint(p)
-	p.data = p.data[n:]
+	if err != nil {
-	p.read += n
+		panic(fmt.Sprintf("read error: %v", err))
 	}
 	return i
 }
 // needed for binary.ReadVarint in rawInt64
 func (p *importer) ReadByte() (byte, error) {
 	return p.byte(), nil
 }
 // byte is the bottleneck interface for reading p.data.
 // It unescapes '|' 'S' to '$' and '|' '|' to '|'.
 func (p *importer) byte() byte {
 	b := p.data[0]
 	r := 1
 	if b == '|' {
 		b = p.data[1]
 		r = 2
 		switch b {
 		case 'S':
 			b = '$'
 		case '|':
 			// nothing to do
 		default:
 			panic("unexpected escape sequence in export data")
 		}
 	}
 	p.data = p.data[r:]
 	p.read += r
 	return b
 }
 // ----------------------------------------------------------------------------
 // Export format