mirror of https://github.com/golang/go.git
gopls/internal/hooks: fixes to diff disaster logic
Previously, the disaster logic in the new diff implementation would "encrypt" the before/after files using a monoalphabetic substitution, which has been insecure since the 9th century. Instead, save plain text, in file with mode 0600, and invite the user to audit the file before sharing it with us. Also, separate the two files using a NUL byte, not a newline, which is highly ambiguous. Also, in the JSON diff stats writer: - print a warning if we can't create the log file. (The previous code was subtle--it stored a nil *os.File in an io.Writer, which caused Writes to fail with an error, in effect, silently.) - Don't hold the mutex around the write operation. - Fix minor off-by-one error (re: 15) - Crash if JSON encoding fails; it "can't happen". Change-Id: I9b6a4145451afd77594f0ef9868143634a9c4561 Reviewed-on: https://go-review.googlesource.com/c/tools/+/445580 Run-TryBot: Alan Donovan <adonovan@google.com> Reviewed-by: Robert Findley <rfindley@google.com> gopls-CI: kokoro <noreply+kokoro@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
parent
e4bb34383f
commit
2af106efed
|
|
@ -5,19 +5,15 @@
|
|||
package hooks
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"math/big"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
"github.com/sergi/go-diff/diffmatchpatch"
|
||||
"golang.org/x/tools/internal/bug"
|
||||
|
|
@ -36,108 +32,74 @@ type diffstat struct {
|
|||
}
|
||||
|
||||
var (
|
||||
mu sync.Mutex // serializes writes and protects ignored
|
||||
difffd io.Writer
|
||||
ignored int // lots of the diff calls have 0 diffs
|
||||
ignoredMu sync.Mutex
|
||||
ignored int // counter of diff requests on equal strings
|
||||
|
||||
diffStatsOnce sync.Once
|
||||
diffStats *os.File // never closed
|
||||
)
|
||||
|
||||
var fileonce sync.Once
|
||||
|
||||
// save writes a JSON record of statistics about diff requests to a temporary file.
|
||||
func (s *diffstat) save() {
|
||||
// save log records in a file in os.TempDir().
|
||||
// diff is frequently called with identical strings, so
|
||||
// these are somewhat compressed out
|
||||
fileonce.Do(func() {
|
||||
fname := filepath.Join(os.TempDir(), fmt.Sprintf("gopls-diff-%x", os.Getpid()))
|
||||
fd, err := os.Create(fname)
|
||||
diffStatsOnce.Do(func() {
|
||||
f, err := ioutil.TempFile("", "gopls-diff-stats-*")
|
||||
if err != nil {
|
||||
// now what?
|
||||
}
|
||||
difffd = fd
|
||||
})
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if s.Oldedits == 0 && s.Newedits == 0 {
|
||||
if ignored < 15 {
|
||||
// keep track of repeated instances of no diffs
|
||||
// but only print every 15th
|
||||
ignored++
|
||||
log.Printf("can't create diff stats temp file: %v", err) // e.g. disk full
|
||||
return
|
||||
}
|
||||
s.Ignored = ignored + 1
|
||||
} else {
|
||||
s.Ignored = ignored
|
||||
diffStats = f
|
||||
})
|
||||
if diffStats == nil {
|
||||
return
|
||||
}
|
||||
|
||||
// diff is frequently called with equal strings,
|
||||
// so we count repeated instances but only print every 15th.
|
||||
ignoredMu.Lock()
|
||||
if s.Oldedits == 0 && s.Newedits == 0 {
|
||||
ignored++
|
||||
if ignored < 15 {
|
||||
ignoredMu.Unlock()
|
||||
return
|
||||
}
|
||||
}
|
||||
s.Ignored = ignored
|
||||
ignored = 0
|
||||
// it would be really nice to see why diff was called
|
||||
_, f, l, ok := runtime.Caller(2)
|
||||
if ok {
|
||||
var fname string
|
||||
fname = filepath.Base(f) // diff is only called from a few places
|
||||
s.Stack = fmt.Sprintf("%s:%d", fname, l)
|
||||
ignoredMu.Unlock()
|
||||
|
||||
// Record the name of the file in which diff was called.
|
||||
// There aren't many calls, so only the base name is needed.
|
||||
if _, file, line, ok := runtime.Caller(2); ok {
|
||||
s.Stack = fmt.Sprintf("%s:%d", filepath.Base(file), line)
|
||||
}
|
||||
x, err := json.Marshal(s)
|
||||
if err != nil {
|
||||
log.Print(err) // failure to print statistics should not stop gopls
|
||||
log.Fatalf("internal error marshalling JSON: %v", err)
|
||||
}
|
||||
fmt.Fprintf(difffd, "%s\n", x)
|
||||
fmt.Fprintf(diffStats, "%s\n", x)
|
||||
}
|
||||
|
||||
// save encrypted versions of the broken input and return the file name
|
||||
// (the saved strings will have the same diff behavior as the user's strings)
|
||||
// disaster is called when the diff algorithm panics or produces a
|
||||
// diff that cannot be applied. It saves the broken input in a
|
||||
// new temporary file and logs the file name, which is returned.
|
||||
func disaster(before, after string) string {
|
||||
// encrypt before and after for privacy. (randomized monoalphabetic cipher)
|
||||
// got will contain the substitution cipher
|
||||
// for the runes in before and after
|
||||
got := map[rune]rune{}
|
||||
for _, r := range before {
|
||||
got[r] = ' ' // value doesn't matter
|
||||
}
|
||||
for _, r := range after {
|
||||
got[r] = ' '
|
||||
}
|
||||
repl := initrepl(len(got))
|
||||
i := 0
|
||||
for k := range got { // randomized
|
||||
got[k] = repl[i]
|
||||
i++
|
||||
}
|
||||
// use got to encrypt before and after
|
||||
subst := func(r rune) rune { return got[r] }
|
||||
first := strings.Map(subst, before)
|
||||
second := strings.Map(subst, after)
|
||||
// We use the pid to salt the name, not os.TempFile,
|
||||
// so that each process creates at most one file.
|
||||
// One is sufficient for a bug report.
|
||||
filename := fmt.Sprintf("%s/gopls-diff-bug-%x", os.TempDir(), os.Getpid())
|
||||
|
||||
// one failure per session is enough, and more private.
|
||||
// this saves the last one.
|
||||
fname := fmt.Sprintf("%s/gopls-failed-%x", os.TempDir(), os.Getpid())
|
||||
fd, err := os.Create(fname)
|
||||
defer fd.Close()
|
||||
_, err = fmt.Fprintf(fd, "%s\n%s\n", first, second)
|
||||
if err != nil {
|
||||
// what do we tell the user?
|
||||
// We use NUL as a separator: it should never appear in Go source.
|
||||
data := before + "\x00" + after
|
||||
|
||||
if err := ioutil.WriteFile(filename, []byte(data), 0600); err != nil {
|
||||
log.Printf("failed to write diff bug report: %v", err)
|
||||
return ""
|
||||
}
|
||||
// ask the user to send us the file, somehow
|
||||
return fname
|
||||
}
|
||||
|
||||
func initrepl(n int) []rune {
|
||||
repl := make([]rune, 0, n)
|
||||
for r := rune(0); len(repl) < n; r++ {
|
||||
if unicode.IsLetter(r) || unicode.IsNumber(r) {
|
||||
repl = append(repl, r)
|
||||
}
|
||||
}
|
||||
// randomize repl
|
||||
rdr := rand.Reader
|
||||
lim := big.NewInt(int64(len(repl)))
|
||||
for i := 1; i < n; i++ {
|
||||
v, _ := rand.Int(rdr, lim)
|
||||
k := v.Int64()
|
||||
repl[i], repl[k] = repl[k], repl[i]
|
||||
}
|
||||
return repl
|
||||
// TODO(adonovan): is there a better way to surface this?
|
||||
log.Printf("Bug detected in diff algorithm! Please send file %s to the maintainers of gopls if you are comfortable sharing its contents.", filename)
|
||||
|
||||
return filename
|
||||
}
|
||||
|
||||
// BothDiffs edits calls both the new and old diffs, checks that the new diffs
|
||||
|
|
@ -145,7 +107,7 @@ func initrepl(n int) []rune {
|
|||
func BothDiffs(before, after string) (edits []diff.Edit) {
|
||||
// The new diff code contains a lot of internal checks that panic when they
|
||||
// fail. This code catches the panics, or other failures, tries to save
|
||||
// the failing example (and ut wiykd ask the user to send it back to us, and
|
||||
// the failing example (and it would ask the user to send it back to us, and
|
||||
// changes options.newDiff to 'old', if only we could figure out how.)
|
||||
stat := diffstat{Before: len(before), After: len(after)}
|
||||
now := time.Now()
|
||||
|
|
|
|||
|
|
@ -5,11 +5,9 @@
|
|||
package hooks
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/tools/internal/diff/difftest"
|
||||
)
|
||||
|
|
@ -18,40 +16,18 @@ func TestDiff(t *testing.T) {
|
|||
difftest.DiffTest(t, ComputeEdits)
|
||||
}
|
||||
|
||||
func TestRepl(t *testing.T) {
|
||||
t.Skip("just for checking repl by looking at it")
|
||||
repl := initrepl(800)
|
||||
t.Errorf("%q", string(repl))
|
||||
t.Errorf("%d", len(repl))
|
||||
}
|
||||
|
||||
func TestDisaster(t *testing.T) {
|
||||
a := "This is a string,(\u0995) just for basic functionality"
|
||||
b := "Ths is another string, (\u0996) to see if disaster will store stuff correctly"
|
||||
a := "This is a string,(\u0995) just for basic\nfunctionality"
|
||||
b := "This is another string, (\u0996) to see if disaster will store stuff correctly"
|
||||
fname := disaster(a, b)
|
||||
buf, err := ioutil.ReadFile(fname)
|
||||
if err != nil {
|
||||
t.Errorf("error %v reading %s", err, fname)
|
||||
t.Fatal(err)
|
||||
}
|
||||
var x, y string
|
||||
n, err := fmt.Sscanf(string(buf), "%s\n%s\n", &x, &y)
|
||||
if n != 2 {
|
||||
t.Errorf("got %d, expected 2", n)
|
||||
t.Logf("read %q", string(buf))
|
||||
if string(buf) != a+"\x00"+b {
|
||||
t.Error("failed to record original strings")
|
||||
}
|
||||
if a == x || b == y {
|
||||
t.Error("failed to encrypt")
|
||||
}
|
||||
err = os.Remove(fname)
|
||||
if err != nil {
|
||||
t.Errorf("%v removing %s", err, fname)
|
||||
}
|
||||
alen, blen := utf8.RuneCount([]byte(a)), utf8.RuneCount([]byte(b))
|
||||
xlen, ylen := utf8.RuneCount([]byte(x)), utf8.RuneCount([]byte(y))
|
||||
if alen != xlen {
|
||||
t.Errorf("a; got %d, expected %d", xlen, alen)
|
||||
}
|
||||
if blen != ylen {
|
||||
t.Errorf("b: got %d expected %d", ylen, blen)
|
||||
if err := os.Remove(fname); err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue