gopls/internal/hooks: fixes to diff disaster logic

Previously, the disaster logic in the new diff implementation
would "encrypt" the before/after files using a monoalphabetic
substitution, which has been insecure since the 9th century.
Instead, save plain text, in file with mode 0600, and invite
the user to audit the file before sharing it with us.

Also, separate the two files using a NUL byte, not a newline,
which is highly ambiguous.

Also, in the JSON diff stats writer:
- print a warning if we can't create the log file.
  (The previous code was subtle--it stored a nil *os.File in
  an io.Writer, which caused Writes to fail with an error,
  in effect, silently.)
- Don't hold the mutex around the write operation.
- Fix minor off-by-one error (re: 15)
- Crash if JSON encoding fails; it "can't happen".

Change-Id: I9b6a4145451afd77594f0ef9868143634a9c4561
Reviewed-on: https://go-review.googlesource.com/c/tools/+/445580
Run-TryBot: Alan Donovan <adonovan@google.com>
Reviewed-by: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
Alan Donovan 2022-10-26 18:09:24 -04:00
parent e4bb34383f
commit 2af106efed
2 changed files with 58 additions and 120 deletions

View File

@ -5,19 +5,15 @@
package hooks
import (
"crypto/rand"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"log"
"math/big"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"time"
"unicode"
"github.com/sergi/go-diff/diffmatchpatch"
"golang.org/x/tools/internal/bug"
@ -36,108 +32,74 @@ type diffstat struct {
}
var (
mu sync.Mutex // serializes writes and protects ignored
difffd io.Writer
ignored int // lots of the diff calls have 0 diffs
ignoredMu sync.Mutex
ignored int // counter of diff requests on equal strings
diffStatsOnce sync.Once
diffStats *os.File // never closed
)
var fileonce sync.Once
// save writes a JSON record of statistics about diff requests to a temporary file.
func (s *diffstat) save() {
// save log records in a file in os.TempDir().
// diff is frequently called with identical strings, so
// these are somewhat compressed out
fileonce.Do(func() {
fname := filepath.Join(os.TempDir(), fmt.Sprintf("gopls-diff-%x", os.Getpid()))
fd, err := os.Create(fname)
diffStatsOnce.Do(func() {
f, err := ioutil.TempFile("", "gopls-diff-stats-*")
if err != nil {
// now what?
}
difffd = fd
})
mu.Lock()
defer mu.Unlock()
if s.Oldedits == 0 && s.Newedits == 0 {
if ignored < 15 {
// keep track of repeated instances of no diffs
// but only print every 15th
ignored++
log.Printf("can't create diff stats temp file: %v", err) // e.g. disk full
return
}
s.Ignored = ignored + 1
} else {
s.Ignored = ignored
diffStats = f
})
if diffStats == nil {
return
}
// diff is frequently called with equal strings,
// so we count repeated instances but only print every 15th.
ignoredMu.Lock()
if s.Oldedits == 0 && s.Newedits == 0 {
ignored++
if ignored < 15 {
ignoredMu.Unlock()
return
}
}
s.Ignored = ignored
ignored = 0
// it would be really nice to see why diff was called
_, f, l, ok := runtime.Caller(2)
if ok {
var fname string
fname = filepath.Base(f) // diff is only called from a few places
s.Stack = fmt.Sprintf("%s:%d", fname, l)
ignoredMu.Unlock()
// Record the name of the file in which diff was called.
// There aren't many calls, so only the base name is needed.
if _, file, line, ok := runtime.Caller(2); ok {
s.Stack = fmt.Sprintf("%s:%d", filepath.Base(file), line)
}
x, err := json.Marshal(s)
if err != nil {
log.Print(err) // failure to print statistics should not stop gopls
log.Fatalf("internal error marshalling JSON: %v", err)
}
fmt.Fprintf(difffd, "%s\n", x)
fmt.Fprintf(diffStats, "%s\n", x)
}
// save encrypted versions of the broken input and return the file name
// (the saved strings will have the same diff behavior as the user's strings)
// disaster is called when the diff algorithm panics or produces a
// diff that cannot be applied. It saves the broken input in a
// new temporary file and logs the file name, which is returned.
func disaster(before, after string) string {
// encrypt before and after for privacy. (randomized monoalphabetic cipher)
// got will contain the substitution cipher
// for the runes in before and after
got := map[rune]rune{}
for _, r := range before {
got[r] = ' ' // value doesn't matter
}
for _, r := range after {
got[r] = ' '
}
repl := initrepl(len(got))
i := 0
for k := range got { // randomized
got[k] = repl[i]
i++
}
// use got to encrypt before and after
subst := func(r rune) rune { return got[r] }
first := strings.Map(subst, before)
second := strings.Map(subst, after)
// We use the pid to salt the name, not os.TempFile,
// so that each process creates at most one file.
// One is sufficient for a bug report.
filename := fmt.Sprintf("%s/gopls-diff-bug-%x", os.TempDir(), os.Getpid())
// one failure per session is enough, and more private.
// this saves the last one.
fname := fmt.Sprintf("%s/gopls-failed-%x", os.TempDir(), os.Getpid())
fd, err := os.Create(fname)
defer fd.Close()
_, err = fmt.Fprintf(fd, "%s\n%s\n", first, second)
if err != nil {
// what do we tell the user?
// We use NUL as a separator: it should never appear in Go source.
data := before + "\x00" + after
if err := ioutil.WriteFile(filename, []byte(data), 0600); err != nil {
log.Printf("failed to write diff bug report: %v", err)
return ""
}
// ask the user to send us the file, somehow
return fname
}
func initrepl(n int) []rune {
repl := make([]rune, 0, n)
for r := rune(0); len(repl) < n; r++ {
if unicode.IsLetter(r) || unicode.IsNumber(r) {
repl = append(repl, r)
}
}
// randomize repl
rdr := rand.Reader
lim := big.NewInt(int64(len(repl)))
for i := 1; i < n; i++ {
v, _ := rand.Int(rdr, lim)
k := v.Int64()
repl[i], repl[k] = repl[k], repl[i]
}
return repl
// TODO(adonovan): is there a better way to surface this?
log.Printf("Bug detected in diff algorithm! Please send file %s to the maintainers of gopls if you are comfortable sharing its contents.", filename)
return filename
}
// BothDiffs edits calls both the new and old diffs, checks that the new diffs
@ -145,7 +107,7 @@ func initrepl(n int) []rune {
func BothDiffs(before, after string) (edits []diff.Edit) {
// The new diff code contains a lot of internal checks that panic when they
// fail. This code catches the panics, or other failures, tries to save
// the failing example (and ut wiykd ask the user to send it back to us, and
// the failing example (and it would ask the user to send it back to us, and
// changes options.newDiff to 'old', if only we could figure out how.)
stat := diffstat{Before: len(before), After: len(after)}
now := time.Now()

View File

@ -5,11 +5,9 @@
package hooks
import (
"fmt"
"io/ioutil"
"os"
"testing"
"unicode/utf8"
"golang.org/x/tools/internal/diff/difftest"
)
@ -18,40 +16,18 @@ func TestDiff(t *testing.T) {
difftest.DiffTest(t, ComputeEdits)
}
func TestRepl(t *testing.T) {
t.Skip("just for checking repl by looking at it")
repl := initrepl(800)
t.Errorf("%q", string(repl))
t.Errorf("%d", len(repl))
}
func TestDisaster(t *testing.T) {
a := "This is a string,(\u0995) just for basic functionality"
b := "Ths is another string, (\u0996) to see if disaster will store stuff correctly"
a := "This is a string,(\u0995) just for basic\nfunctionality"
b := "This is another string, (\u0996) to see if disaster will store stuff correctly"
fname := disaster(a, b)
buf, err := ioutil.ReadFile(fname)
if err != nil {
t.Errorf("error %v reading %s", err, fname)
t.Fatal(err)
}
var x, y string
n, err := fmt.Sscanf(string(buf), "%s\n%s\n", &x, &y)
if n != 2 {
t.Errorf("got %d, expected 2", n)
t.Logf("read %q", string(buf))
if string(buf) != a+"\x00"+b {
t.Error("failed to record original strings")
}
if a == x || b == y {
t.Error("failed to encrypt")
}
err = os.Remove(fname)
if err != nil {
t.Errorf("%v removing %s", err, fname)
}
alen, blen := utf8.RuneCount([]byte(a)), utf8.RuneCount([]byte(b))
xlen, ylen := utf8.RuneCount([]byte(x)), utf8.RuneCount([]byte(y))
if alen != xlen {
t.Errorf("a; got %d, expected %d", xlen, alen)
}
if blen != ylen {
t.Errorf("b: got %d expected %d", ylen, blen)
if err := os.Remove(fname); err != nil {
t.Error(err)
}
}