From 817f567fa8be4753acfad0b795ec41ab1c0cdb65 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Mon, 5 Nov 2018 12:39:11 +0100
Subject: [PATCH 01/25] runtime/internal/sys: regenerate zgoos_*.go files

zgoos_aix.go is missing GoosJs, the order of GoosAndroid and GoosAix is
mixed up in all files and GoosHurd was added after CL 146023 introduced
GOOS=hurd.

Change-Id: I7e2f5a15645272e9020cfca86e44c364fc072a2b
Reviewed-on: https://go-review.googlesource.com/c/147397
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 src/runtime/internal/sys/zgoos_aix.go       |  4 +++-
 src/runtime/internal/sys/zgoos_android.go   |  3 ++-
 src/runtime/internal/sys/zgoos_darwin.go    |  3 ++-
 src/runtime/internal/sys/zgoos_dragonfly.go |  3 ++-
 src/runtime/internal/sys/zgoos_freebsd.go   |  3 ++-
 src/runtime/internal/sys/zgoos_hurd.go      | 23 +++++++++++++++++++++
 src/runtime/internal/sys/zgoos_js.go        |  2 ++
 src/runtime/internal/sys/zgoos_linux.go     |  3 ++-
 src/runtime/internal/sys/zgoos_nacl.go      |  3 ++-
 src/runtime/internal/sys/zgoos_netbsd.go    |  3 ++-
 src/runtime/internal/sys/zgoos_openbsd.go   |  3 ++-
 src/runtime/internal/sys/zgoos_plan9.go     |  3 ++-
 src/runtime/internal/sys/zgoos_solaris.go   |  3 ++-
 src/runtime/internal/sys/zgoos_windows.go   |  3 ++-
 src/runtime/internal/sys/zgoos_zos.go       |  3 ++-
 15 files changed, 52 insertions(+), 13 deletions(-)
 create mode 100644 src/runtime/internal/sys/zgoos_hurd.go

diff --git a/src/runtime/internal/sys/zgoos_aix.go b/src/runtime/internal/sys/zgoos_aix.go
index 9ce5b3434f..909bfc5e93 100644
--- a/src/runtime/internal/sys/zgoos_aix.go
+++ b/src/runtime/internal/sys/zgoos_aix.go
@@ -6,11 +6,13 @@ package sys
 
 const GOOS = `aix`
 
-const GoosAndroid = 0
 const GoosAix = 1
+const GoosAndroid = 0
 const GoosDarwin = 0
 const GoosDragonfly = 0
 const GoosFreebsd = 0
+const GoosHurd = 0
+const GoosJs = 0
 const GoosLinux = 0
 const GoosNacl = 0
 const GoosNetbsd = 0
diff --git a/src/runtime/internal/sys/zgoos_android.go b/src/runtime/internal/sys/zgoos_android.go
index 36a5768ab6..434ce46712 100644
--- a/src/runtime/internal/sys/zgoos_android.go
+++ b/src/runtime/internal/sys/zgoos_android.go
@@ -6,11 +6,12 @@ package sys
 
 const GOOS = `android`
 
-const GoosAndroid = 1
 const GoosAix = 0
+const GoosAndroid = 1
 const GoosDarwin = 0
 const GoosDragonfly = 0
 const GoosFreebsd = 0
+const GoosHurd = 0
 const GoosJs = 0
 const GoosLinux = 0
 const GoosNacl = 0
diff --git a/src/runtime/internal/sys/zgoos_darwin.go b/src/runtime/internal/sys/zgoos_darwin.go
index 10c0e88e9a..b645d1cf5f 100644
--- a/src/runtime/internal/sys/zgoos_darwin.go
+++ b/src/runtime/internal/sys/zgoos_darwin.go
@@ -6,11 +6,12 @@ package sys
 
 const GOOS = `darwin`
 
-const GoosAndroid = 0
 const GoosAix = 0
+const GoosAndroid = 0
 const GoosDarwin = 1
 const GoosDragonfly = 0
 const GoosFreebsd = 0
+const GoosHurd = 0
 const GoosJs = 0
 const GoosLinux = 0
 const GoosNacl = 0
diff --git a/src/runtime/internal/sys/zgoos_dragonfly.go b/src/runtime/internal/sys/zgoos_dragonfly.go
index 5cb47cb84e..154cec370f 100644
--- a/src/runtime/internal/sys/zgoos_dragonfly.go
+++ b/src/runtime/internal/sys/zgoos_dragonfly.go
@@ -6,11 +6,12 @@ package sys
 
 const GOOS = `dragonfly`
 
-const GoosAndroid = 0
 const GoosAix = 0
+const GoosAndroid = 0
 const GoosDarwin = 0
 const GoosDragonfly = 1
 const GoosFreebsd = 0
+const GoosHurd = 0
 const GoosJs = 0
 const GoosLinux = 0
 const GoosNacl = 0
diff --git a/src/runtime/internal/sys/zgoos_freebsd.go b/src/runtime/internal/sys/zgoos_freebsd.go
index 470406ce5f..5f41c03445 100644
--- a/src/runtime/internal/sys/zgoos_freebsd.go
+++ b/src/runtime/internal/sys/zgoos_freebsd.go
@@ -6,11 +6,12 @@ package sys
 
 const GOOS = `freebsd`
 
-const GoosAndroid = 0
 const GoosAix = 0
+const GoosAndroid = 0
 const GoosDarwin = 0
 const GoosDragonfly = 0
 const GoosFreebsd = 1
+const GoosHurd = 0
 const GoosJs = 0
 const GoosLinux = 0
 const GoosNacl = 0
diff --git a/src/runtime/internal/sys/zgoos_hurd.go b/src/runtime/internal/sys/zgoos_hurd.go
new file mode 100644
index 0000000000..53f7fc384b
--- /dev/null
+++ b/src/runtime/internal/sys/zgoos_hurd.go
@@ -0,0 +1,23 @@
+// Code generated by gengoos.go using 'go generate'. DO NOT EDIT.
+
+// +build hurd
+
+package sys
+
+const GOOS = `hurd`
+
+const GoosAix = 0
+const GoosAndroid = 0
+const GoosDarwin = 0
+const GoosDragonfly = 0
+const GoosFreebsd = 0
+const GoosHurd = 1
+const GoosJs = 0
+const GoosLinux = 0
+const GoosNacl = 0
+const GoosNetbsd = 0
+const GoosOpenbsd = 0
+const GoosPlan9 = 0
+const GoosSolaris = 0
+const GoosWindows = 0
+const GoosZos = 0
diff --git a/src/runtime/internal/sys/zgoos_js.go b/src/runtime/internal/sys/zgoos_js.go
index cc8eef080f..c6cca49bd9 100644
--- a/src/runtime/internal/sys/zgoos_js.go
+++ b/src/runtime/internal/sys/zgoos_js.go
@@ -6,10 +6,12 @@ package sys
 
 const GOOS = `js`
 
+const GoosAix = 0
 const GoosAndroid = 0
 const GoosDarwin = 0
 const GoosDragonfly = 0
 const GoosFreebsd = 0
+const GoosHurd = 0
 const GoosJs = 1
 const GoosLinux = 0
 const GoosNacl = 0
diff --git a/src/runtime/internal/sys/zgoos_linux.go b/src/runtime/internal/sys/zgoos_linux.go
index 76235b748c..088dbc105b 100644
--- a/src/runtime/internal/sys/zgoos_linux.go
+++ b/src/runtime/internal/sys/zgoos_linux.go
@@ -7,11 +7,12 @@ package sys
 
 const GOOS = `linux`
 
-const GoosAndroid = 0
 const GoosAix = 0
+const GoosAndroid = 0
 const GoosDarwin = 0
 const GoosDragonfly = 0
 const GoosFreebsd = 0
+const GoosHurd = 0
 const GoosJs = 0
 const GoosLinux = 1
 const GoosNacl = 0
diff --git a/src/runtime/internal/sys/zgoos_nacl.go b/src/runtime/internal/sys/zgoos_nacl.go
index 6d28b59667..65bec4af9e 100644
--- a/src/runtime/internal/sys/zgoos_nacl.go
+++ b/src/runtime/internal/sys/zgoos_nacl.go
@@ -6,11 +6,12 @@ package sys
 
 const GOOS = `nacl`
 
-const GoosAndroid = 0
 const GoosAix = 0
+const GoosAndroid = 0
 const GoosDarwin = 0
 const GoosDragonfly = 0
 const GoosFreebsd = 0
+const GoosHurd = 0
 const GoosJs = 0
 const GoosLinux = 0
 const GoosNacl = 1
diff --git a/src/runtime/internal/sys/zgoos_netbsd.go b/src/runtime/internal/sys/zgoos_netbsd.go
index ef8d938ddb..93d0fa7e11 100644
--- a/src/runtime/internal/sys/zgoos_netbsd.go
+++ b/src/runtime/internal/sys/zgoos_netbsd.go
@@ -6,11 +6,12 @@ package sys
 
 const GOOS = `netbsd`
 
-const GoosAndroid = 0
 const GoosAix = 0
+const GoosAndroid = 0
 const GoosDarwin = 0
 const GoosDragonfly = 0
 const GoosFreebsd = 0
+const GoosHurd = 0
 const GoosJs = 0
 const GoosLinux = 0
 const GoosNacl = 0
diff --git a/src/runtime/internal/sys/zgoos_openbsd.go b/src/runtime/internal/sys/zgoos_openbsd.go
index 2e43847396..79193593f5 100644
--- a/src/runtime/internal/sys/zgoos_openbsd.go
+++ b/src/runtime/internal/sys/zgoos_openbsd.go
@@ -6,11 +6,12 @@ package sys
 
 const GOOS = `openbsd`
 
-const GoosAndroid = 0
 const GoosAix = 0
+const GoosAndroid = 0
 const GoosDarwin = 0
 const GoosDragonfly = 0
 const GoosFreebsd = 0
+const GoosHurd = 0
 const GoosJs = 0
 const GoosLinux = 0
 const GoosNacl = 0
diff --git a/src/runtime/internal/sys/zgoos_plan9.go b/src/runtime/internal/sys/zgoos_plan9.go
index ed598dcaac..2b95e08080 100644
--- a/src/runtime/internal/sys/zgoos_plan9.go
+++ b/src/runtime/internal/sys/zgoos_plan9.go
@@ -6,11 +6,12 @@ package sys
 
 const GOOS = `plan9`
 
-const GoosAndroid = 0
 const GoosAix = 0
+const GoosAndroid = 0
 const GoosDarwin = 0
 const GoosDragonfly = 0
 const GoosFreebsd = 0
+const GoosHurd = 0
 const GoosJs = 0
 const GoosLinux = 0
 const GoosNacl = 0
diff --git a/src/runtime/internal/sys/zgoos_solaris.go b/src/runtime/internal/sys/zgoos_solaris.go
index fe690df6c2..6e3988aed0 100644
--- a/src/runtime/internal/sys/zgoos_solaris.go
+++ b/src/runtime/internal/sys/zgoos_solaris.go
@@ -6,11 +6,12 @@ package sys
 
 const GOOS = `solaris`
 
-const GoosAndroid = 0
 const GoosAix = 0
+const GoosAndroid = 0
 const GoosDarwin = 0
 const GoosDragonfly = 0
 const GoosFreebsd = 0
+const GoosHurd = 0
 const GoosJs = 0
 const GoosLinux = 0
 const GoosNacl = 0
diff --git a/src/runtime/internal/sys/zgoos_windows.go b/src/runtime/internal/sys/zgoos_windows.go
index ea7c43bdf4..a56e12544a 100644
--- a/src/runtime/internal/sys/zgoos_windows.go
+++ b/src/runtime/internal/sys/zgoos_windows.go
@@ -6,11 +6,12 @@ package sys
 
 const GOOS = `windows`
 
-const GoosAndroid = 0
 const GoosAix = 0
+const GoosAndroid = 0
 const GoosDarwin = 0
 const GoosDragonfly = 0
 const GoosFreebsd = 0
+const GoosHurd = 0
 const GoosJs = 0
 const GoosLinux = 0
 const GoosNacl = 0
diff --git a/src/runtime/internal/sys/zgoos_zos.go b/src/runtime/internal/sys/zgoos_zos.go
index d4027cf876..0f56e46002 100644
--- a/src/runtime/internal/sys/zgoos_zos.go
+++ b/src/runtime/internal/sys/zgoos_zos.go
@@ -6,11 +6,12 @@ package sys
 
 const GOOS = `zos`
 
-const GoosAndroid = 0
 const GoosAix = 0
+const GoosAndroid = 0
 const GoosDarwin = 0
 const GoosDragonfly = 0
 const GoosFreebsd = 0
+const GoosHurd = 0
 const GoosJs = 0
 const GoosLinux = 0
 const GoosNacl = 0

From f999576dd8df6e4d09e3c67c23ba4d8dc18d53d3 Mon Sep 17 00:00:00 2001
From: Muhammad Falak R Wani <falakreyaz@gmail.com>
Date: Mon, 5 Nov 2018 22:04:46 +0530
Subject: [PATCH 02/25] cmd/addr2line: defer closing objfile

Change-Id: I19ff9d231c4cc779b0737802c3c40ee2e00934dd
Reviewed-on: https://go-review.googlesource.com/c/147477
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---
 src/cmd/addr2line/main.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/cmd/addr2line/main.go b/src/cmd/addr2line/main.go
index 267f4170a8..018802940b 100644
--- a/src/cmd/addr2line/main.go
+++ b/src/cmd/addr2line/main.go
@@ -61,6 +61,7 @@ func main() {
 	if err != nil {
 		log.Fatal(err)
 	}
+	defer f.Close()
 
 	tab, err := f.PCLineTable()
 	if err != nil {

From 3053788cac0343c2fd29806ebc358d2f63976695 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Mon, 24 Jul 2017 11:37:59 -0400
Subject: [PATCH 03/25] cmd/trace: add minimum mutator utilization (MMU) plot

This adds an endpoint to the trace tool that plots the minimum mutator
utilization curve using information on mark assists and GC pauses from
the trace.

This commit implements a fairly straightforward O(nm) algorithm for
computing the MMU (and tests against an even more direct but slower
algorithm). Future commits will extend and optimize this algorithm.

This should be useful for debugging and understanding mutator
utilization issues like #14951, #14812, #18155. #18534, #21107,
particularly once follow-up CLs add trace cross-referencing.

Change-Id: Ic2866869e7da1e6c56ba3e809abbcb2eb9c4923a
Reviewed-on: https://go-review.googlesource.com/c/60790
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
---
 src/cmd/trace/main.go               |   1 +
 src/cmd/trace/mmu.go                | 160 ++++++++++++++++++
 src/internal/traceparser/gc.go      | 248 ++++++++++++++++++++++++++++
 src/internal/traceparser/gc_test.go | 158 ++++++++++++++++++
 4 files changed, 567 insertions(+)
 create mode 100644 src/cmd/trace/mmu.go
 create mode 100644 src/internal/traceparser/gc.go
 create mode 100644 src/internal/traceparser/gc_test.go

diff --git a/src/cmd/trace/main.go b/src/cmd/trace/main.go
index a33d2f4679..f6ec38d673 100644
--- a/src/cmd/trace/main.go
+++ b/src/cmd/trace/main.go
@@ -202,6 +202,7 @@ var templMain = template.Must(template.New("").Parse(`
 <a href="/sched">Scheduler latency profile</a> (<a href="/sche?raw=1" download="sched.profile">⬇</a>)<br>
 <a href="/usertasks">User-defined tasks</a><br>
 <a href="/userregions">User-defined regions</a><br>
+<a href="/mmu">Minimum mutator utilization</a><br>
 </body>
 </html>
 `))
diff --git a/src/cmd/trace/mmu.go b/src/cmd/trace/mmu.go
new file mode 100644
index 0000000000..cc14025d38
--- /dev/null
+++ b/src/cmd/trace/mmu.go
@@ -0,0 +1,160 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Minimum mutator utilization (MMU) graphing.
+
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	trace "internal/traceparser"
+	"log"
+	"math"
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+)
+
+func init() {
+	http.HandleFunc("/mmu", httpMMU)
+	http.HandleFunc("/mmuPlot", httpMMUPlot)
+}
+
+var mmuCache struct {
+	init     sync.Once
+	util     []trace.MutatorUtil
+	mmuCurve *trace.MMUCurve
+	err      error
+}
+
+func getMMUCurve() ([]trace.MutatorUtil, *trace.MMUCurve, error) {
+	mmuCache.init.Do(func() {
+		tr, err := parseTrace()
+		if err != nil {
+			mmuCache.err = err
+		} else {
+			mmuCache.util = tr.MutatorUtilization()
+			mmuCache.mmuCurve = trace.NewMMUCurve(mmuCache.util)
+		}
+	})
+	return mmuCache.util, mmuCache.mmuCurve, mmuCache.err
+}
+
+// httpMMU serves the MMU plot page.
+func httpMMU(w http.ResponseWriter, r *http.Request) {
+	http.ServeContent(w, r, "", time.Time{}, strings.NewReader(templMMU))
+}
+
+// httpMMUPlot serves the JSON data for the MMU plot.
+func httpMMUPlot(w http.ResponseWriter, r *http.Request) {
+	mu, mmuCurve, err := getMMUCurve()
+	if err != nil {
+		http.Error(w, fmt.Sprintf("failed to parse events: %v", err), http.StatusInternalServerError)
+		return
+	}
+
+	// Find a nice starting point for the plot.
+	xMin := time.Second
+	for xMin > 1 {
+		if mmu := mmuCurve.MMU(xMin); mmu < 0.0001 {
+			break
+		}
+		xMin /= 1000
+	}
+	// Cover six orders of magnitude.
+	xMax := xMin * 1e6
+	// But no more than the length of the trace.
+	if maxMax := time.Duration(mu[len(mu)-1].Time - mu[0].Time); xMax > maxMax {
+		xMax = maxMax
+	}
+	// Compute MMU curve.
+	logMin, logMax := math.Log(float64(xMin)), math.Log(float64(xMax))
+	const samples = 100
+	plot := make([][2]float64, samples)
+	for i := 0; i < samples; i++ {
+		window := time.Duration(math.Exp(float64(i)/(samples-1)*(logMax-logMin) + logMin))
+		y := mmuCurve.MMU(window)
+		plot[i] = [2]float64{float64(window), y}
+	}
+
+	// Create JSON response.
+	err = json.NewEncoder(w).Encode(map[string]interface{}{"xMin": int64(xMin), "xMax": int64(xMax), "curve": plot})
+	if err != nil {
+		log.Printf("failed to serialize response: %v", err)
+		return
+	}
+}
+
+var templMMU = `<!doctype html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
+    <script type="text/javascript" src="https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
+    <script type="text/javascript">
+      google.charts.load('current', {'packages':['corechart']});
+      google.charts.setOnLoadCallback(refreshChart);
+
+      function niceDuration(ns) {
+          if (ns < 1e3) { return ns + 'ns'; }
+          else if (ns < 1e6) { return ns / 1e3 + 'µs'; }
+          else if (ns < 1e9) { return ns / 1e6 + 'ms'; }
+          else { return ns / 1e9 + 's'; }
+      }
+
+      function refreshChart() {
+        $.getJSON('/mmuPlot')
+         .fail(function(xhr, status, error) {
+           alert('failed to load plot: ' + status);
+         })
+         .done(drawChart);
+      }
+
+      function drawChart(plotData) {
+        var curve = plotData.curve;
+        var data = new google.visualization.DataTable();
+        data.addColumn('number', 'Window duration');
+        data.addColumn('number', 'Minimum mutator utilization');
+        data.addRows(curve);
+        for (var i = 0; i < curve.length; i++) {
+          data.setFormattedValue(i, 0, niceDuration(curve[i][0]));
+        }
+
+        var options = {
+          chart: {
+            title: 'Minimum mutator utilization',
+          },
+          hAxis: {
+            title: 'Window duration',
+            scaleType: 'log',
+            ticks: [],
+          },
+          vAxis: {
+            title: 'Minimum mutator utilization',
+            minValue: 0.0,
+            maxValue: 1.0,
+          },
+          legend: { position: 'none' },
+          width: 900,
+          height: 500,
+          chartArea: { width: '80%', height: '80%' },
+        };
+        for (var v = plotData.xMin; v <= plotData.xMax; v *= 10) {
+          options.hAxis.ticks.push({v:v, f:niceDuration(v)});
+        }
+
+        var container = $('#mmu_chart');
+        container.empty();
+        var chart = new google.visualization.LineChart(container[0]);
+        chart.draw(data, options);
+      }
+    </script>
+  </head>
+  <body>
+    <div id="mmu_chart" style="width: 900px; height: 500px">Loading plot...</div>
+  </body>
+</html>
+`
diff --git a/src/internal/traceparser/gc.go b/src/internal/traceparser/gc.go
new file mode 100644
index 0000000000..7e349308d7
--- /dev/null
+++ b/src/internal/traceparser/gc.go
@@ -0,0 +1,248 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package traceparser
+
+import (
+	"strings"
+	"time"
+)
+
+// MutatorUtil is a change in mutator utilization at a particular
+// time. Mutator utilization functions are represented as a
+// time-ordered []MutatorUtil.
+type MutatorUtil struct {
+	Time int64
+	// Util is the mean mutator utilization starting at Time. This
+	// is in the range [0, 1].
+	Util float64
+}
+
+// MutatorUtilization returns the mutator utilization function for the
+// given trace. This function will always end with 0 utilization. The
+// bounds of the function are implicit in the first and last event;
+// outside of these bounds the function is undefined.
+func (p *Parsed) MutatorUtilization() []MutatorUtil {
+	events := p.Events
+	if len(events) == 0 {
+		return nil
+	}
+
+	gomaxprocs, gcPs, stw := 1, 0, 0
+	out := []MutatorUtil{{events[0].Ts, 1}}
+	assists := map[uint64]bool{}
+	block := map[uint64]*Event{}
+	bgMark := map[uint64]bool{}
+	for _, ev := range events {
+		switch ev.Type {
+		case EvGomaxprocs:
+			gomaxprocs = int(ev.Args[0])
+		case EvGCSTWStart:
+			stw++
+		case EvGCSTWDone:
+			stw--
+		case EvGCMarkAssistStart:
+			gcPs++
+			assists[ev.G] = true
+		case EvGCMarkAssistDone:
+			gcPs--
+			delete(assists, ev.G)
+		case EvGoStartLabel:
+			if strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
+				// Background mark worker.
+				bgMark[ev.G] = true
+				gcPs++
+			}
+			fallthrough
+		case EvGoStart:
+			if assists[ev.G] {
+				// Unblocked during assist.
+				gcPs++
+			}
+			block[ev.G] = ev.Link
+		default:
+			if ev != block[ev.G] {
+				continue
+			}
+
+			if assists[ev.G] {
+				// Blocked during assist.
+				gcPs--
+			}
+			if bgMark[ev.G] {
+				// Background mark worker done.
+				gcPs--
+				delete(bgMark, ev.G)
+			}
+			delete(block, ev.G)
+		}
+
+		ps := gcPs
+		if stw > 0 {
+			ps = gomaxprocs
+		}
+		mu := MutatorUtil{ev.Ts, 1 - float64(ps)/float64(gomaxprocs)}
+		if mu.Util == out[len(out)-1].Util {
+			// No change.
+			continue
+		}
+		if mu.Time == out[len(out)-1].Time {
+			// Take the lowest utilization at a time stamp.
+			if mu.Util < out[len(out)-1].Util {
+				out[len(out)-1] = mu
+			}
+		} else {
+			out = append(out, mu)
+		}
+	}
+
+	// Add final 0 utilization event. This is important to mark
+	// the end of the trace. The exact value shouldn't matter
+	// since no window should extend beyond this, but using 0 is
+	// symmetric with the start of the trace.
+	endTime := events[len(events)-1].Ts
+	if out[len(out)-1].Time == endTime {
+		out[len(out)-1].Util = 0
+	} else {
+		out = append(out, MutatorUtil{endTime, 0})
+	}
+
+	return out
+}
+
+// totalUtil is total utilization, measured in nanoseconds. This is a
+// separate type primarily to distinguish it from mean utilization,
+// which is also a float64.
+type totalUtil float64
+
+func totalUtilOf(meanUtil float64, dur int64) totalUtil {
+	return totalUtil(meanUtil * float64(dur))
+}
+
+// mean returns the mean utilization over dur.
+func (u totalUtil) mean(dur time.Duration) float64 {
+	return float64(u) / float64(dur)
+}
+
+// An MMUCurve is the minimum mutator utilization curve across
+// multiple window sizes.
+type MMUCurve struct {
+	util []MutatorUtil
+	// sums[j] is the cumulative sum of util[:j].
+	sums []totalUtil
+}
+
+// NewMMUCurve returns an MMU curve for the given mutator utilization
+// function.
+func NewMMUCurve(util []MutatorUtil) *MMUCurve {
+	// Compute cumulative sum.
+	sums := make([]totalUtil, len(util))
+	var prev MutatorUtil
+	var sum totalUtil
+	for j, u := range util {
+		sum += totalUtilOf(prev.Util, u.Time-prev.Time)
+		sums[j] = sum
+		prev = u
+	}
+
+	return &MMUCurve{util, sums}
+}
+
+// MMU returns the minimum mutator utilization for the given time
+// window. This is the minimum utilization for all windows of this
+// duration across the execution. The returned value is in the range
+// [0, 1].
+func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
+	if window <= 0 {
+		return 0
+	}
+	util := c.util
+	if max := time.Duration(util[len(util)-1].Time - util[0].Time); window > max {
+		window = max
+	}
+
+	mmu = 1.0
+
+	// We think of the mutator utilization over time as the
+	// box-filtered utilization function, which we call the
+	// "windowed mutator utilization function". The resulting
+	// function is continuous and piecewise linear (unless
+	// window==0, which we handle elsewhere), where the boundaries
+	// between segments occur when either edge of the window
+	// encounters a change in the instantaneous mutator
+	// utilization function. Hence, the minimum of this function
+	// will always occur when one of the edges of the window
+	// aligns with a utilization change, so these are the only
+	// points we need to consider.
+	//
+	// We compute the mutator utilization function incrementally
+	// by tracking the integral from t=0 to the left edge of the
+	// window and to the right edge of the window.
+	left := integrator{c, 0}
+	right := left
+	time := util[0].Time
+	for {
+		// Advance edges to time and time+window.
+		mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
+		if mu < mmu {
+			mmu = mu
+			if mmu == 0 {
+				// The minimum can't go any lower than
+				// zero, so stop early.
+				break
+			}
+		}
+
+		// Advance the window to the next time where either
+		// the left or right edge of the window encounters a
+		// change in the utilization curve.
+		if t1, t2 := left.next(time), right.next(time+int64(window))-int64(window); t1 < t2 {
+			time = t1
+		} else {
+			time = t2
+		}
+		if time > util[len(util)-1].Time-int64(window) {
+			break
+		}
+	}
+	return mmu
+}
+
+// An integrator tracks a position in a utilization function and
+// integrates it.
+type integrator struct {
+	u *MMUCurve
+	// pos is the index in u.util of the current time's non-strict
+	// predecessor.
+	pos int
+}
+
+// advance returns the integral of the utilization function from 0 to
+// time. advance must be called on monotonically increasing values of
+// times.
+func (in *integrator) advance(time int64) totalUtil {
+	util, pos := in.u.util, in.pos
+	// Advance pos until pos+1 is time's strict successor (making
+	// pos time's non-strict predecessor).
+	for pos+1 < len(util) && util[pos+1].Time <= time {
+		pos++
+	}
+	in.pos = pos
+	var partial totalUtil
+	if time != util[pos].Time {
+		partial = totalUtilOf(util[pos].Util, time-util[pos].Time)
+	}
+	return in.u.sums[pos] + partial
+}
+
+// next returns the smallest time t' > time of a change in the
+// utilization function.
+func (in *integrator) next(time int64) int64 {
+	for _, u := range in.u.util[in.pos:] {
+		if u.Time > time {
+			return u.Time
+		}
+	}
+	return 1<<63 - 1
+}
diff --git a/src/internal/traceparser/gc_test.go b/src/internal/traceparser/gc_test.go
new file mode 100644
index 0000000000..821b0f217c
--- /dev/null
+++ b/src/internal/traceparser/gc_test.go
@@ -0,0 +1,158 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package traceparser
+
+import (
+	"math"
+	"testing"
+	"time"
+)
+
+// aeq returns true if x and y are equal up to 8 digits (1 part in 100
+// million).
+func aeq(x, y float64) bool {
+	if x < 0 && y < 0 {
+		x, y = -x, -y
+	}
+	const digits = 8
+	factor := 1 - math.Pow(10, -digits+1)
+	return x*factor <= y && y*factor <= x
+}
+
+func TestMMU(t *testing.T) {
+	t.Parallel()
+
+	// MU
+	// 1.0  *****   *****   *****
+	// 0.5      *   *   *   *
+	// 0.0      *****   *****
+	//      0   1   2   3   4   5
+	util := []MutatorUtil{
+		{0e9, 1},
+		{1e9, 0},
+		{2e9, 1},
+		{3e9, 0},
+		{4e9, 1},
+		{5e9, 0},
+	}
+	mmuCurve := NewMMUCurve(util)
+
+	for _, test := range []struct {
+		window time.Duration
+		want   float64
+	}{
+		{0, 0},
+		{time.Millisecond, 0},
+		{time.Second, 0},
+		{2 * time.Second, 0.5},
+		{3 * time.Second, 1 / 3.0},
+		{4 * time.Second, 0.5},
+		{5 * time.Second, 3 / 5.0},
+		{6 * time.Second, 3 / 5.0},
+	} {
+		if got := mmuCurve.MMU(test.window); !aeq(test.want, got) {
+			t.Errorf("for %s window, want mu = %f, got %f", test.window, test.want, got)
+		}
+	}
+}
+
+func TestMMUTrace(t *testing.T) {
+	t.Parallel()
+
+	p, err := New("../trace/testdata/stress_1_10_good")
+	if err != nil {
+		t.Fatalf("failed to read input file: %v", err)
+	}
+	if err := p.Parse(0, 1<<62, nil); err != nil {
+		t.Fatalf("failed to parse trace: %s", err)
+	}
+	mu := p.MutatorUtilization()
+	mmuCurve := NewMMUCurve(mu)
+
+	// Test the optimized implementation against the "obviously
+	// correct" implementation.
+	for window := time.Nanosecond; window < 10*time.Second; window *= 10 {
+		want := mmuSlow(mu, window)
+		got := mmuCurve.MMU(window)
+		if !aeq(want, got) {
+			t.Errorf("want %f, got %f mutator utilization in window %s", want, got, window)
+		}
+	}
+}
+
+func BenchmarkMMU(b *testing.B) {
+	p, err := New("../trace/testdata/stress_1_10_good")
+	if err != nil {
+		b.Fatalf("failed to read input file: %v", err)
+	}
+	if err := p.Parse(0, 1<<62, nil); err != nil {
+		b.Fatalf("failed to parse trace: %s", err)
+	}
+	mu := p.MutatorUtilization()
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		mmuCurve := NewMMUCurve(mu)
+		xMin, xMax := time.Microsecond, time.Second
+		logMin, logMax := math.Log(float64(xMin)), math.Log(float64(xMax))
+		const samples = 100
+		for i := 0; i < samples; i++ {
+			window := time.Duration(math.Exp(float64(i)/(samples-1)*(logMax-logMin) + logMin))
+			mmuCurve.MMU(window)
+		}
+	}
+}
+
+func mmuSlow(util []MutatorUtil, window time.Duration) (mmu float64) {
+	if max := time.Duration(util[len(util)-1].Time - util[0].Time); window > max {
+		window = max
+	}
+
+	mmu = 1.0
+
+	// muInWindow returns the mean mutator utilization between
+	// util[0].Time and end.
+	muInWindow := func(util []MutatorUtil, end int64) float64 {
+		total := 0.0
+		var prevU MutatorUtil
+		for _, u := range util {
+			if u.Time > end {
+				total += prevU.Util * float64(end-prevU.Time)
+				break
+			}
+			total += prevU.Util * float64(u.Time-prevU.Time)
+			prevU = u
+		}
+		return total / float64(end-util[0].Time)
+	}
+	update := func() {
+		for i, u := range util {
+			if u.Time+int64(window) > util[len(util)-1].Time {
+				break
+			}
+			mmu = math.Min(mmu, muInWindow(util[i:], u.Time+int64(window)))
+		}
+	}
+
+	// Consider all left-aligned windows.
+	update()
+	// Reverse the trace. Slightly subtle because each MutatorUtil
+	// is a *change*.
+	rutil := make([]MutatorUtil, len(util))
+	if util[len(util)-1].Util != 0 {
+		panic("irreversible trace")
+	}
+	for i, u := range util {
+		util1 := 0.0
+		if i != 0 {
+			util1 = util[i-1].Util
+		}
+		rutil[len(rutil)-i-1] = MutatorUtil{Time: -u.Time, Util: util1}
+	}
+	util = rutil
+	// Consider all right-aligned windows.
+	update()
+	return
+}

From c6c602a92612a855d8eb2f649f3dff75bb5fb9ad Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Mon, 28 Aug 2017 12:29:07 -0400
Subject: [PATCH 04/25] internal/trace: use MU slope to optimize MMU

This commit speeds up MMU construction by ~10X (and reduces the number
of windows considered by ~20X) by using an observation about the
maximum slope of the windowed mutator utilization function to advance
the window time in jumps if the window's current mean mutator
utilization is much larger than the current minimum.

Change-Id: If3cba5da0c4adc37b568740f940793e491e96a51
Reviewed-on: https://go-review.googlesource.com/c/60791
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
---
 src/internal/traceparser/gc.go | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/src/internal/traceparser/gc.go b/src/internal/traceparser/gc.go
index 7e349308d7..66c68cb450 100644
--- a/src/internal/traceparser/gc.go
+++ b/src/internal/traceparser/gc.go
@@ -194,6 +194,12 @@ func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
 			}
 		}
 
+		// The maximum slope of the windowed mutator
+		// utilization function is 1/window, so we can always
+		// advance the time by at least (mu - mmu) * window
+		// without dropping below mmu.
+		minTime := time + int64((mu-mmu)*float64(window))
+
 		// Advance the window to the next time where either
 		// the left or right edge of the window encounters a
 		// change in the utilization curve.
@@ -202,6 +208,9 @@ func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
 		} else {
 			time = t2
 		}
+		if time < minTime {
+			time = minTime
+		}
 		if time > util[len(util)-1].Time-int64(window) {
 			break
 		}
@@ -225,8 +234,28 @@ func (in *integrator) advance(time int64) totalUtil {
 	util, pos := in.u.util, in.pos
 	// Advance pos until pos+1 is time's strict successor (making
 	// pos time's non-strict predecessor).
-	for pos+1 < len(util) && util[pos+1].Time <= time {
-		pos++
+	//
+	// Very often, this will be nearby, so we optimize that case,
+	// but it may be arbitrarily far away, so we handled that
+	// efficiently, too.
+	const maxSeq = 8
+	if pos+maxSeq < len(util) && util[pos+maxSeq].Time > time {
+		// Nearby. Use a linear scan.
+		for pos+1 < len(util) && util[pos+1].Time <= time {
+			pos++
+		}
+	} else {
+		// Far. Binary search for time's strict successor.
+		l, r := pos, len(util)
+		for l < r {
+			h := int(uint(l+r) >> 1)
+			if util[h].Time <= time {
+				l = h + 1
+			} else {
+				r = h
+			}
+		}
+		pos = l - 1 // Non-strict predecessor.
 	}
 	in.pos = pos
 	var partial totalUtil

From 52ee654b25970a0b8c0ce9d2c8eda604df2026bb Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Tue, 8 Aug 2017 18:15:32 -0400
Subject: [PATCH 05/25] internal/trace: use banding to optimize MMU computation

This further optimizes MMU construction by first computing a
low-resolution summary of the utilization curve. This "band" summary
lets us compute the worst-possible window starting in each of these
low-resolution bands (even without knowing where in the band the
window falls). This in turn lets us compute precise minimum mutator
utilization only in the worst low-resolution bands until we can show
that any remaining bands can't possibly contain a worse window.

This slows down MMU construction for small traces, but these are
reasonably fast to compute either way. For large traces (e.g.,
150,000+ utilization changes) it's significantly faster.

Change-Id: Ie66454e71f3fb06be3f6173b6d91ad75c61bda48
Reviewed-on: https://go-review.googlesource.com/c/60792
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
---
 src/go/build/deps_test.go      |   2 +-
 src/internal/traceparser/gc.go | 182 ++++++++++++++++++++++++++++++++-
 2 files changed, 179 insertions(+), 5 deletions(-)

diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go
index ec6e6b4890..d632954d0c 100644
--- a/src/go/build/deps_test.go
+++ b/src/go/build/deps_test.go
@@ -273,7 +273,7 @@ var pkgDeps = map[string][]string{
 	"internal/goroot":                {"L4", "OS"},
 	"internal/singleflight":          {"sync"},
 	"internal/trace":                 {"L4", "OS"},
-	"internal/traceparser":           {"L4", "internal/traceparser/filebuf"},
+	"internal/traceparser":           {"L4", "internal/traceparser/filebuf", "container/heap"},
 	"internal/traceparser/filebuf":   {"L4", "OS"},
 	"math/big":                       {"L4"},
 	"mime":                           {"L4", "OS", "syscall", "internal/syscall/windows/registry"},
diff --git a/src/internal/traceparser/gc.go b/src/internal/traceparser/gc.go
index 66c68cb450..0be78e71e3 100644
--- a/src/internal/traceparser/gc.go
+++ b/src/internal/traceparser/gc.go
@@ -5,6 +5,8 @@
 package traceparser
 
 import (
+	"container/heap"
+	"math"
 	"strings"
 	"time"
 )
@@ -131,6 +133,24 @@ type MMUCurve struct {
 	util []MutatorUtil
 	// sums[j] is the cumulative sum of util[:j].
 	sums []totalUtil
+	// bands summarizes util in non-overlapping bands of duration
+	// bandDur.
+	bands []mmuBand
+	// bandDur is the duration of each band.
+	bandDur int64
+}
+
+type mmuBand struct {
+	// minUtil is the minimum instantaneous mutator utilization in
+	// this band.
+	minUtil float64
+	// cumUtil is the cumulative total mutator utilization between
+	// time 0 and the left edge of this band.
+	cumUtil totalUtil
+
+	// integrator is the integrator for the left edge of this
+	// band.
+	integrator integrator
 }
 
 // NewMMUCurve returns an MMU curve for the given mutator utilization
@@ -146,7 +166,77 @@ func NewMMUCurve(util []MutatorUtil) *MMUCurve {
 		prev = u
 	}
 
-	return &MMUCurve{util, sums}
+	// Divide the utilization curve up into equal size
+	// non-overlapping "bands" and compute a summary for each of
+	// these bands.
+	//
+	// Compute the duration of each band.
+	numBands := 1000
+	if numBands > len(util) {
+		// There's no point in having lots of bands if there
+		// aren't many events.
+		numBands = len(util)
+	}
+	dur := util[len(util)-1].Time - util[0].Time
+	bandDur := (dur + int64(numBands) - 1) / int64(numBands)
+	if bandDur < 1 {
+		bandDur = 1
+	}
+	// Compute the bands. There are numBands+1 bands in order to
+	// record the final cumulative sum.
+	bands := make([]mmuBand, numBands+1)
+	c := MMUCurve{util, sums, bands, bandDur}
+	leftSum := integrator{&c, 0}
+	for i := range bands {
+		startTime, endTime := c.bandTime(i)
+		cumUtil := leftSum.advance(startTime)
+		predIdx := leftSum.pos
+		minUtil := 1.0
+		for i := predIdx; i < len(util) && util[i].Time < endTime; i++ {
+			minUtil = math.Min(minUtil, util[i].Util)
+		}
+		bands[i] = mmuBand{minUtil, cumUtil, leftSum}
+	}
+
+	return &c
+}
+
+func (c *MMUCurve) bandTime(i int) (start, end int64) {
+	start = int64(i)*c.bandDur + c.util[0].Time
+	end = start + c.bandDur
+	return
+}
+
+type bandUtil struct {
+	// Band index
+	i int
+	// Lower bound of mutator utilization for all windows
+	// with a left edge in this band.
+	utilBound float64
+}
+
+type bandUtilHeap []bandUtil
+
+func (h bandUtilHeap) Len() int {
+	return len(h)
+}
+
+func (h bandUtilHeap) Less(i, j int) bool {
+	return h[i].utilBound < h[j].utilBound
+}
+
+func (h bandUtilHeap) Swap(i, j int) {
+	h[i], h[j] = h[j], h[i]
+}
+
+func (h *bandUtilHeap) Push(x interface{}) {
+	*h = append(*h, x.(bandUtil))
+}
+
+func (h *bandUtilHeap) Pop() interface{} {
+	x := (*h)[len(*h)-1]
+	*h = (*h)[:len(*h)-1]
+	return x
 }
 
 // MMU returns the minimum mutator utilization for the given time
@@ -162,7 +252,88 @@ func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
 		window = max
 	}
 
+	bandU := bandUtilHeap(c.mkBandUtil(window))
+
+	// Process bands from lowest utilization bound to highest.
+	heap.Init(&bandU)
+
+	// Refine each band into a precise window and MMU until the
+	// precise MMU is less than the lowest band bound.
 	mmu = 1.0
+	for len(bandU) > 0 && bandU[0].utilBound < mmu {
+		mmu = c.bandMMU(bandU[0].i, window, mmu)
+		heap.Pop(&bandU)
+	}
+	return mmu
+}
+
+func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil {
+	// For each band, compute the worst-possible total mutator
+	// utilization for all windows that start in that band.
+
+	// minBands is the minimum number of bands a window can span
+	// and maxBands is the maximum number of bands a window can
+	// span in any alignment.
+	minBands := int((int64(window) + c.bandDur - 1) / c.bandDur)
+	maxBands := int((int64(window) + 2*(c.bandDur-1)) / c.bandDur)
+	if window > 1 && maxBands < 2 {
+		panic("maxBands < 2")
+	}
+	tailDur := int64(window) % c.bandDur
+	nUtil := len(c.bands) - maxBands + 1
+	if nUtil < 0 {
+		nUtil = 0
+	}
+	bandU := make([]bandUtil, nUtil)
+	for i := range bandU {
+		// To compute the worst-case MU, we assume the minimum
+		// for any bands that are only partially overlapped by
+		// some window and the mean for any bands that are
+		// completely covered by all windows.
+		var util totalUtil
+
+		// Find the lowest and second lowest of the partial
+		// bands.
+		l := c.bands[i].minUtil
+		r1 := c.bands[i+minBands-1].minUtil
+		r2 := c.bands[i+maxBands-1].minUtil
+		minBand := math.Min(l, math.Min(r1, r2))
+		// Assume the worst window maximally overlaps the
+		// worst minimum and then the rest overlaps the second
+		// worst minimum.
+		if minBands == 1 {
+			util += totalUtilOf(minBand, int64(window))
+		} else {
+			util += totalUtilOf(minBand, c.bandDur)
+			midBand := 0.0
+			switch {
+			case minBand == l:
+				midBand = math.Min(r1, r2)
+			case minBand == r1:
+				midBand = math.Min(l, r2)
+			case minBand == r2:
+				midBand = math.Min(l, r1)
+			}
+			util += totalUtilOf(midBand, tailDur)
+		}
+
+		// Add the total mean MU of bands that are completely
+		// overlapped by all windows.
+		if minBands > 2 {
+			util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
+		}
+
+		bandU[i] = bandUtil{i, util.mean(window)}
+	}
+
+	return bandU
+}
+
+// bandMMU computes the precise minimum mutator utilization for
+// windows with a left edge in band bandIdx.
+func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, curMMU float64) (mmu float64) {
+	util := c.util
+	mmu = curMMU
 
 	// We think of the mutator utilization over time as the
 	// box-filtered utilization function, which we call the
@@ -179,9 +350,12 @@ func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
 	// We compute the mutator utilization function incrementally
 	// by tracking the integral from t=0 to the left edge of the
 	// window and to the right edge of the window.
-	left := integrator{c, 0}
+	left := c.bands[bandIdx].integrator
 	right := left
-	time := util[0].Time
+	time, endTime := c.bandTime(bandIdx)
+	if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime {
+		endTime = utilEnd
+	}
 	for {
 		// Advance edges to time and time+window.
 		mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
@@ -211,7 +385,7 @@ func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
 		if time < minTime {
 			time = minTime
 		}
-		if time > util[len(util)-1].Time-int64(window) {
+		if time >= endTime {
 			break
 		}
 	}

From cbe04e8d70c32b9477ebcde5265bc17cc41af4a7 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Wed, 26 Jul 2017 12:21:15 -0400
Subject: [PATCH 06/25] internal/trace: track worst N mutator utilization
 windows

This will let the trace viewer show specifically when poor utilization
happened and link to specific instances in the trace.

Change-Id: I1f03a0f9d9a7570009bb15762e7b8b6f215e9423
Reviewed-on: https://go-review.googlesource.com/c/60793
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
---
 src/internal/traceparser/gc.go      | 151 ++++++++++++++++++++++++----
 src/internal/traceparser/gc_test.go |  32 ++++--
 2 files changed, 157 insertions(+), 26 deletions(-)

diff --git a/src/internal/traceparser/gc.go b/src/internal/traceparser/gc.go
index 0be78e71e3..313e23edf6 100644
--- a/src/internal/traceparser/gc.go
+++ b/src/internal/traceparser/gc.go
@@ -7,6 +7,7 @@ package traceparser
 import (
 	"container/heap"
 	"math"
+	"sort"
 	"strings"
 	"time"
 )
@@ -239,13 +240,135 @@ func (h *bandUtilHeap) Pop() interface{} {
 	return x
 }
 
+// UtilWindow is a specific window at Time.
+type UtilWindow struct {
+	Time int64
+	// MutatorUtil is the mean mutator utilization in this window.
+	MutatorUtil float64
+}
+
+type utilHeap []UtilWindow
+
+func (h utilHeap) Len() int {
+	return len(h)
+}
+
+func (h utilHeap) Less(i, j int) bool {
+	if h[i].MutatorUtil != h[j].MutatorUtil {
+		return h[i].MutatorUtil > h[j].MutatorUtil
+	}
+	return h[i].Time > h[j].Time
+}
+
+func (h utilHeap) Swap(i, j int) {
+	h[i], h[j] = h[j], h[i]
+}
+
+func (h *utilHeap) Push(x interface{}) {
+	*h = append(*h, x.(UtilWindow))
+}
+
+func (h *utilHeap) Pop() interface{} {
+	x := (*h)[len(*h)-1]
+	*h = (*h)[:len(*h)-1]
+	return x
+}
+
+// An accumulator collects different MMU-related statistics depending
+// on what's desired.
+type accumulator struct {
+	mmu float64
+
+	// bound is the mutator utilization bound where adding any
+	// mutator utilization above this bound cannot affect the
+	// accumulated statistics.
+	bound float64
+
+	// Worst N window tracking
+	nWorst int
+	wHeap  utilHeap
+}
+
+// addMU records mutator utilization mu over the given window starting
+// at time.
+//
+// It returns true if further calls to addMU would be pointless.
+func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool {
+	if mu < acc.mmu {
+		acc.mmu = mu
+	}
+	acc.bound = acc.mmu
+
+	if acc.nWorst == 0 {
+		// If the minimum has reached zero, it can't go any
+		// lower, so we can stop early.
+		return mu == 0
+	}
+
+	// Consider adding this window to the n worst.
+	if len(acc.wHeap) < acc.nWorst || mu < acc.wHeap[0].MutatorUtil {
+		// This window is lower than the K'th worst window.
+		//
+		// Check if there's any overlapping window
+		// already in the heap and keep whichever is
+		// worse.
+		for i, ui := range acc.wHeap {
+			if time+int64(window) > ui.Time && ui.Time+int64(window) > time {
+				if ui.MutatorUtil <= mu {
+					// Keep the first window.
+					goto keep
+				} else {
+					// Replace it with this window.
+					heap.Remove(&acc.wHeap, i)
+					break
+				}
+			}
+		}
+
+		heap.Push(&acc.wHeap, UtilWindow{time, mu})
+		if len(acc.wHeap) > acc.nWorst {
+			heap.Pop(&acc.wHeap)
+		}
+	keep:
+	}
+	if len(acc.wHeap) < acc.nWorst {
+		// We don't have N windows yet, so keep accumulating.
+		acc.bound = 1.0
+	} else {
+		// Anything above the least worst window has no effect.
+		acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil)
+	}
+
+	// If we've found enough 0 utilizations, we can stop immediately.
+	return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0
+}
+
 // MMU returns the minimum mutator utilization for the given time
 // window. This is the minimum utilization for all windows of this
 // duration across the execution. The returned value is in the range
 // [0, 1].
 func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
+	acc := accumulator{mmu: 1.0, bound: 1.0}
+	c.mmu(window, &acc)
+	return acc.mmu
+}
+
+// Examples returns n specific examples of the lowest mutator
+// utilization for the given window size. The returned windows will be
+// disjoint (otherwise there would be a huge number of
+// mostly-overlapping windows at the single lowest point). There are
+// no guarantees on which set of disjoint windows this returns.
+func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) {
+	acc := accumulator{mmu: 1.0, bound: 1.0, nWorst: n}
+	c.mmu(window, &acc)
+	sort.Sort(sort.Reverse(acc.wHeap))
+	return ([]UtilWindow)(acc.wHeap)
+}
+
+func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
 	if window <= 0 {
-		return 0
+		acc.mmu = 0
+		return
 	}
 	util := c.util
 	if max := time.Duration(util[len(util)-1].Time - util[0].Time); window > max {
@@ -257,14 +380,13 @@ func (c *MMUCurve) MMU(window time.Duration) (mmu float64) {
 	// Process bands from lowest utilization bound to highest.
 	heap.Init(&bandU)
 
-	// Refine each band into a precise window and MMU until the
-	// precise MMU is less than the lowest band bound.
-	mmu = 1.0
-	for len(bandU) > 0 && bandU[0].utilBound < mmu {
-		mmu = c.bandMMU(bandU[0].i, window, mmu)
+	// Refine each band into a precise window and MMU until
+	// refining the next lowest band can no longer affect the MMU
+	// or windows.
+	for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
+		c.bandMMU(bandU[0].i, window, acc)
 		heap.Pop(&bandU)
 	}
-	return mmu
 }
 
 func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil {
@@ -331,9 +453,8 @@ func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil {
 
 // bandMMU computes the precise minimum mutator utilization for
 // windows with a left edge in band bandIdx.
-func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, curMMU float64) (mmu float64) {
+func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
 	util := c.util
-	mmu = curMMU
 
 	// We think of the mutator utilization over time as the
 	// box-filtered utilization function, which we call the
@@ -359,20 +480,15 @@ func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, curMMU float64) (m
 	for {
 		// Advance edges to time and time+window.
 		mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
-		if mu < mmu {
-			mmu = mu
-			if mmu == 0 {
-				// The minimum can't go any lower than
-				// zero, so stop early.
-				break
-			}
+		if acc.addMU(time, mu, window) {
+			break
 		}
 
 		// The maximum slope of the windowed mutator
 		// utilization function is 1/window, so we can always
 		// advance the time by at least (mu - mmu) * window
 		// without dropping below mmu.
-		minTime := time + int64((mu-mmu)*float64(window))
+		minTime := time + int64((mu-acc.bound)*float64(window))
 
 		// Advance the window to the next time where either
 		// the left or right edge of the window encounters a
@@ -389,7 +505,6 @@ func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, curMMU float64) (m
 			break
 		}
 	}
-	return mmu
 }
 
 // An integrator tracks a position in a utilization function and
diff --git a/src/internal/traceparser/gc_test.go b/src/internal/traceparser/gc_test.go
index 821b0f217c..65772be717 100644
--- a/src/internal/traceparser/gc_test.go
+++ b/src/internal/traceparser/gc_test.go
@@ -42,19 +42,35 @@ func TestMMU(t *testing.T) {
 	for _, test := range []struct {
 		window time.Duration
 		want   float64
+		worst  []float64
 	}{
-		{0, 0},
-		{time.Millisecond, 0},
-		{time.Second, 0},
-		{2 * time.Second, 0.5},
-		{3 * time.Second, 1 / 3.0},
-		{4 * time.Second, 0.5},
-		{5 * time.Second, 3 / 5.0},
-		{6 * time.Second, 3 / 5.0},
+		{0, 0, []float64{}},
+		{time.Millisecond, 0, []float64{0, 0}},
+		{time.Second, 0, []float64{0, 0}},
+		{2 * time.Second, 0.5, []float64{0.5, 0.5}},
+		{3 * time.Second, 1 / 3.0, []float64{1 / 3.0}},
+		{4 * time.Second, 0.5, []float64{0.5}},
+		{5 * time.Second, 3 / 5.0, []float64{3 / 5.0}},
+		{6 * time.Second, 3 / 5.0, []float64{3 / 5.0}},
 	} {
 		if got := mmuCurve.MMU(test.window); !aeq(test.want, got) {
 			t.Errorf("for %s window, want mu = %f, got %f", test.window, test.want, got)
 		}
+		worst := mmuCurve.Examples(test.window, 2)
+		// Which exact windows are returned is unspecified
+		// (and depends on the exact banding), so we just
+		// check that we got the right number with the right
+		// utilizations.
+		if len(worst) != len(test.worst) {
+			t.Errorf("for %s window, want worst %v, got %v", test.window, test.worst, worst)
+		} else {
+			for i := range worst {
+				if worst[i].MutatorUtil != test.worst[i] {
+					t.Errorf("for %s window, want worst %v, got %v", test.window, test.worst, worst)
+					break
+				}
+			}
+		}
 	}
 }
 

From 603af813d6b93cf734b67551c2e776a1417e4603 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 28 Jul 2017 10:24:39 -0400
Subject: [PATCH 07/25] cmd/trace: list and link to worst mutator utilization
 windows

This adds the ability to click a point on the MMU graph to show a list
of the worst 10 mutator utilization windows of the selected size. This
list in turn links to the trace viewer to drill down on specifically
what happened in each specific window.

Change-Id: Ic1b72d8b37fbf2212211c513cf36b34788b30133
Reviewed-on: https://go-review.googlesource.com/c/60795
Run-TryBot: Austin Clements <austin@google.com>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/cmd/trace/main.go  |  2 +-
 src/cmd/trace/mmu.go   | 81 ++++++++++++++++++++++++++++++++++++++++--
 src/cmd/trace/trace.go | 30 +++++++++++-----
 3 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/src/cmd/trace/main.go b/src/cmd/trace/main.go
index f6ec38d673..2f71a3d4bd 100644
--- a/src/cmd/trace/main.go
+++ b/src/cmd/trace/main.go
@@ -189,7 +189,7 @@ var templMain = template.Must(template.New("").Parse(`
 <body>
 {{if $}}
 	{{range $e := $}}
-		<a href="/trace?start={{$e.Start}}&end={{$e.End}}">View trace ({{$e.Name}})</a><br>
+		<a href="{{$e.URL}}">View trace ({{$e.Name}})</a><br>
 	{{end}}
 	<br>
 {{else}}
diff --git a/src/cmd/trace/mmu.go b/src/cmd/trace/mmu.go
index cc14025d38..f76e0d0e5f 100644
--- a/src/cmd/trace/mmu.go
+++ b/src/cmd/trace/mmu.go
@@ -13,6 +13,7 @@ import (
 	"log"
 	"math"
 	"net/http"
+	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -21,6 +22,7 @@ import (
 func init() {
 	http.HandleFunc("/mmu", httpMMU)
 	http.HandleFunc("/mmuPlot", httpMMUPlot)
+	http.HandleFunc("/mmuDetails", httpMMUDetails)
 }
 
 var mmuCache struct {
@@ -98,6 +100,9 @@ var templMMU = `<!doctype html>
       google.charts.load('current', {'packages':['corechart']});
       google.charts.setOnLoadCallback(refreshChart);
 
+      var chart;
+      var curve;
+
       function niceDuration(ns) {
           if (ns < 1e3) { return ns + 'ns'; }
           else if (ns < 1e6) { return ns / 1e3 + 'µs'; }
@@ -114,7 +119,7 @@ var templMMU = `<!doctype html>
       }
 
       function drawChart(plotData) {
-        var curve = plotData.curve;
+        curve = plotData.curve;
         var data = new google.visualization.DataTable();
         data.addColumn('number', 'Window duration');
         data.addColumn('number', 'Minimum mutator utilization');
@@ -148,13 +153,85 @@ var templMMU = `<!doctype html>
 
         var container = $('#mmu_chart');
         container.empty();
-        var chart = new google.visualization.LineChart(container[0]);
+        chart = new google.visualization.LineChart(container[0]);
+        chart = new google.visualization.LineChart(document.getElementById('mmu_chart'));
         chart.draw(data, options);
+
+        google.visualization.events.addListener(chart, 'select', selectHandler);
+      }
+
+      function selectHandler() {
+        var items = chart.getSelection();
+        if (items.length === 0) {
+          return;
+        }
+        var details = $('#details');
+        details.empty();
+        var windowNS = curve[items[0].row][0];
+        var url = '/mmuDetails?window=' + windowNS;
+        $.getJSON(url)
+         .fail(function(xhr, status, error) {
+            details.text(status + ': ' + url + ' could not be loaded');
+         })
+         .done(function(worst) {
+            details.text('Lowest mutator utilization in ' + niceDuration(windowNS) + ' windows:');
+            for (var i = 0; i < worst.length; i++) {
+              details.append($('<br/>'));
+              var text = worst[i].MutatorUtil.toFixed(3) + ' at time ' + niceDuration(worst[i].Time);
+              details.append($('<a/>').text(text).attr('href', worst[i].URL));
+            }
+         });
       }
     </script>
   </head>
   <body>
     <div id="mmu_chart" style="width: 900px; height: 500px">Loading plot...</div>
+    <div id="details">Select a point for details.</div>
   </body>
 </html>
 `
+
+// httpMMUDetails serves details of an MMU graph at a particular window.
+func httpMMUDetails(w http.ResponseWriter, r *http.Request) {
+	_, mmuCurve, err := getMMUCurve()
+	if err != nil {
+		http.Error(w, fmt.Sprintf("failed to parse events: %v", err), http.StatusInternalServerError)
+		return
+	}
+
+	windowStr := r.FormValue("window")
+	window, err := strconv.ParseUint(windowStr, 10, 64)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("failed to parse window parameter %q: %v", windowStr, err), http.StatusBadRequest)
+		return
+	}
+	worst := mmuCurve.Examples(time.Duration(window), 10)
+
+	// Construct a link for each window.
+	var links []linkedUtilWindow
+	for _, ui := range worst {
+		links = append(links, newLinkedUtilWindow(ui, time.Duration(window)))
+	}
+
+	err = json.NewEncoder(w).Encode(links)
+	if err != nil {
+		log.Printf("failed to serialize trace: %v", err)
+		return
+	}
+}
+
+type linkedUtilWindow struct {
+	trace.UtilWindow
+	URL string
+}
+
+func newLinkedUtilWindow(ui trace.UtilWindow, window time.Duration) linkedUtilWindow {
+	// Find the range containing this window.
+	var r Range
+	for _, r = range ranges {
+		if r.EndTime > ui.Time {
+			break
+		}
+	}
+	return linkedUtilWindow{ui, fmt.Sprintf("%s#%v:%v", r.URL(), float64(ui.Time)/1e6, float64(ui.Time+int64(window))/1e6)}
+}
diff --git a/src/cmd/trace/trace.go b/src/cmd/trace/trace.go
index d0e0acd78c..d467f371fa 100644
--- a/src/cmd/trace/trace.go
+++ b/src/cmd/trace/trace.go
@@ -272,9 +272,15 @@ func httpJSONTrace(w http.ResponseWriter, r *http.Request) {
 
 // Range is a named range
 type Range struct {
-	Name  string
-	Start int
-	End   int
+	Name      string
+	Start     int
+	End       int
+	StartTime int64
+	EndTime   int64
+}
+
+func (r Range) URL() string {
+	return fmt.Sprintf("/trace?start=%d&end=%d", r.Start, r.End)
 }
 
 // splitTrace splits the trace into a number of ranges,
@@ -345,10 +351,14 @@ func splittingTraceConsumer(max int) (*splitter, traceConsumer) {
 			start := 0
 			for i, ev := range sizes {
 				if sum+ev.Sz > max {
+					startTime := time.Duration(sizes[start].Time * 1000)
+					endTime := time.Duration(ev.Time * 1000)
 					ranges = append(ranges, Range{
-						Name:  fmt.Sprintf("%v-%v", time.Duration(sizes[start].Time*1000), time.Duration(ev.Time*1000)),
-						Start: start,
-						End:   i + 1,
+						Name:      fmt.Sprintf("%v-%v", startTime, endTime),
+						Start:     start,
+						End:       i + 1,
+						StartTime: int64(startTime),
+						EndTime:   int64(endTime),
 					})
 					start = i + 1
 					sum = minSize
@@ -363,9 +373,11 @@ func splittingTraceConsumer(max int) (*splitter, traceConsumer) {
 
 			if end := len(sizes) - 1; start < end {
 				ranges = append(ranges, Range{
-					Name:  fmt.Sprintf("%v-%v", time.Duration(sizes[start].Time*1000), time.Duration(sizes[end].Time*1000)),
-					Start: start,
-					End:   end,
+					Name:      fmt.Sprintf("%v-%v", time.Duration(sizes[start].Time*1000), time.Duration(sizes[end].Time*1000)),
+					Start:     start,
+					End:       end,
+					StartTime: int64(sizes[start].Time * 1000),
+					EndTime:   int64(sizes[end].Time * 1000),
 				})
 			}
 			s.Ranges = ranges

From 27920c8ddc609662540deaf5a3d3b4fce03abeea Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 28 Jul 2017 13:51:58 -0400
Subject: [PATCH 08/25] internal/trace: flags for what to include in GC
 utilization

Change-Id: I4ba963b003cb25b39d7575d423f17930d84f3f69
Reviewed-on: https://go-review.googlesource.com/c/60796
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
---
 src/cmd/trace/mmu.go                |  2 +-
 src/internal/traceparser/gc.go      | 48 ++++++++++++++++++++++++-----
 src/internal/traceparser/gc_test.go |  4 +--
 3 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/src/cmd/trace/mmu.go b/src/cmd/trace/mmu.go
index f76e0d0e5f..2a07be4ba2 100644
--- a/src/cmd/trace/mmu.go
+++ b/src/cmd/trace/mmu.go
@@ -38,7 +38,7 @@ func getMMUCurve() ([]trace.MutatorUtil, *trace.MMUCurve, error) {
 		if err != nil {
 			mmuCache.err = err
 		} else {
-			mmuCache.util = tr.MutatorUtilization()
+			mmuCache.util = tr.MutatorUtilization(trace.UtilSTW | trace.UtilBackground | trace.UtilAssist)
 			mmuCache.mmuCurve = trace.NewMMUCurve(mmuCache.util)
 		}
 	})
diff --git a/src/internal/traceparser/gc.go b/src/internal/traceparser/gc.go
index 313e23edf6..ab0c640e26 100644
--- a/src/internal/traceparser/gc.go
+++ b/src/internal/traceparser/gc.go
@@ -22,11 +22,27 @@ type MutatorUtil struct {
 	Util float64
 }
 
+// UtilFlags controls the behavior of MutatorUtilization.
+type UtilFlags int
+
+const (
+	// UtilSTW means utilization should account for STW events.
+	UtilSTW UtilFlags = 1 << iota
+	// UtilBackground means utilization should account for
+	// background mark workers.
+	UtilBackground
+	// UtilAssist means utilization should account for mark
+	// assists.
+	UtilAssist
+	// UtilSweep means utilization should account for sweeping.
+	UtilSweep
+)
+
 // MutatorUtilization returns the mutator utilization function for the
 // given trace. This function will always end with 0 utilization. The
 // bounds of the function are implicit in the first and last event;
 // outside of these bounds the function is undefined.
-func (p *Parsed) MutatorUtilization() []MutatorUtil {
+func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil {
 	events := p.Events
 	if len(events) == 0 {
 		return nil
@@ -42,17 +58,33 @@ func (p *Parsed) MutatorUtilization() []MutatorUtil {
 		case EvGomaxprocs:
 			gomaxprocs = int(ev.Args[0])
 		case EvGCSTWStart:
-			stw++
+			if flags&UtilSTW != 0 {
+				stw++
+			}
 		case EvGCSTWDone:
-			stw--
+			if flags&UtilSTW != 0 {
+				stw--
+			}
 		case EvGCMarkAssistStart:
-			gcPs++
-			assists[ev.G] = true
+			if flags&UtilAssist != 0 {
+				gcPs++
+				assists[ev.G] = true
+			}
 		case EvGCMarkAssistDone:
-			gcPs--
-			delete(assists, ev.G)
+			if flags&UtilAssist != 0 {
+				gcPs--
+				delete(assists, ev.G)
+			}
+		case EvGCSweepStart:
+			if flags&UtilSweep != 0 {
+				gcPs++
+			}
+		case EvGCSweepDone:
+			if flags&UtilSweep != 0 {
+				gcPs--
+			}
 		case EvGoStartLabel:
-			if strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
+			if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
 				// Background mark worker.
 				bgMark[ev.G] = true
 				gcPs++
diff --git a/src/internal/traceparser/gc_test.go b/src/internal/traceparser/gc_test.go
index 65772be717..f1416fa9f9 100644
--- a/src/internal/traceparser/gc_test.go
+++ b/src/internal/traceparser/gc_test.go
@@ -84,7 +84,7 @@ func TestMMUTrace(t *testing.T) {
 	if err := p.Parse(0, 1<<62, nil); err != nil {
 		t.Fatalf("failed to parse trace: %s", err)
 	}
-	mu := p.MutatorUtilization()
+	mu := p.MutatorUtilization(UtilSTW | UtilBackground | UtilAssist)
 	mmuCurve := NewMMUCurve(mu)
 
 	// Test the optimized implementation against the "obviously
@@ -106,7 +106,7 @@ func BenchmarkMMU(b *testing.B) {
 	if err := p.Parse(0, 1<<62, nil); err != nil {
 		b.Fatalf("failed to parse trace: %s", err)
 	}
-	mu := p.MutatorUtilization()
+	mu := p.MutatorUtilization(UtilSTW | UtilBackground | UtilAssist | UtilSweep)
 	b.ResetTimer()
 
 	for i := 0; i < b.N; i++ {

From bef4efc822794ea2e7310756bc546bf6930fc066 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 28 Jul 2017 16:26:51 -0400
Subject: [PATCH 09/25] internal/trace: add "per-P" MMU analysis

The current MMU analysis considers all Ps together, so if, for
example, one of four Ps is blocked, mutator utilization is 75%.
However, this is less useful for understanding the impact on
individual goroutines because that one blocked goroutine could be
blocked for a very long time, but we still appear to have good
utilization.

Hence, this introduces a new flag that does a "per-P" analysis where
the utilization of each P is considered independently. The MMU is then
the combination of the MMU for each P's utilization function.

Change-Id: Id67b980d4d82b511d28300cdf92ccbb5ae8f0c78
Reviewed-on: https://go-review.googlesource.com/c/60797
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
---
 src/cmd/trace/mmu.go                |  15 +-
 src/internal/traceparser/gc.go      | 216 ++++++++++++++++++++--------
 src/internal/traceparser/gc_test.go |   6 +-
 3 files changed, 172 insertions(+), 65 deletions(-)

diff --git a/src/cmd/trace/mmu.go b/src/cmd/trace/mmu.go
index 2a07be4ba2..d3b6768686 100644
--- a/src/cmd/trace/mmu.go
+++ b/src/cmd/trace/mmu.go
@@ -27,12 +27,12 @@ func init() {
 
 var mmuCache struct {
 	init     sync.Once
-	util     []trace.MutatorUtil
+	util     [][]trace.MutatorUtil
 	mmuCurve *trace.MMUCurve
 	err      error
 }
 
-func getMMUCurve() ([]trace.MutatorUtil, *trace.MMUCurve, error) {
+func getMMUCurve() ([][]trace.MutatorUtil, *trace.MMUCurve, error) {
 	mmuCache.init.Do(func() {
 		tr, err := parseTrace()
 		if err != nil {
@@ -69,7 +69,16 @@ func httpMMUPlot(w http.ResponseWriter, r *http.Request) {
 	// Cover six orders of magnitude.
 	xMax := xMin * 1e6
 	// But no more than the length of the trace.
-	if maxMax := time.Duration(mu[len(mu)-1].Time - mu[0].Time); xMax > maxMax {
+	minEvent, maxEvent := mu[0][0].Time, mu[0][len(mu[0])-1].Time
+	for _, mu1 := range mu[1:] {
+		if mu1[0].Time < minEvent {
+			minEvent = mu1[0].Time
+		}
+		if mu1[len(mu1)-1].Time > maxEvent {
+			maxEvent = mu1[len(mu1)-1].Time
+		}
+	}
+	if maxMax := time.Duration(maxEvent - minEvent); xMax > maxMax {
 		xMax = maxMax
 	}
 	// Compute MMU curve.
diff --git a/src/internal/traceparser/gc.go b/src/internal/traceparser/gc.go
index ab0c640e26..569ab86b82 100644
--- a/src/internal/traceparser/gc.go
+++ b/src/internal/traceparser/gc.go
@@ -36,27 +36,64 @@ const (
 	UtilAssist
 	// UtilSweep means utilization should account for sweeping.
 	UtilSweep
+
+	// UtilPerProc means each P should be given a separate
+	// utilization function. Otherwise, there is a single function
+	// and each P is given a fraction of the utilization.
+	UtilPerProc
 )
 
-// MutatorUtilization returns the mutator utilization function for the
-// given trace. This function will always end with 0 utilization. The
-// bounds of the function are implicit in the first and last event;
-// outside of these bounds the function is undefined.
-func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil {
+// MutatorUtilization returns a set of mutator utilization functions
+// for the given trace. Each function will always end with 0
+// utilization. The bounds of each function are implicit in the first
+// and last event; outside of these bounds each function is undefined.
+//
+// If the UtilPerProc flag is not given, this always returns a single
+// utilization function. Otherwise, it returns one function per P.
+func (p *Parsed) MutatorUtilization(flags UtilFlags) [][]MutatorUtil {
 	events := p.Events
 	if len(events) == 0 {
 		return nil
 	}
 
-	gomaxprocs, gcPs, stw := 1, 0, 0
-	out := []MutatorUtil{{events[0].Ts, 1}}
+	type perP struct {
+		// gc > 0 indicates that GC is active on this P.
+		gc int
+		// series the logical series number for this P. This
+		// is necessary because Ps may be removed and then
+		// re-added, and then the new P needs a new series.
+		series int
+	}
+	ps := []perP{}
+	stw := 0
+
+	out := [][]MutatorUtil{}
 	assists := map[uint64]bool{}
 	block := map[uint64]*Event{}
 	bgMark := map[uint64]bool{}
+
 	for _, ev := range events {
 		switch ev.Type {
 		case EvGomaxprocs:
-			gomaxprocs = int(ev.Args[0])
+			gomaxprocs := int(ev.Args[0])
+			if len(ps) > gomaxprocs {
+				if flags&UtilPerProc != 0 {
+					// End each P's series.
+					for _, p := range ps[gomaxprocs:] {
+						out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, 0})
+					}
+				}
+				ps = ps[:gomaxprocs]
+			}
+			for len(ps) < gomaxprocs {
+				// Start new P's series.
+				series := 0
+				if flags&UtilPerProc != 0 || len(out) == 0 {
+					series = len(out)
+					out = append(out, []MutatorUtil{{ev.Ts, 1}})
+				}
+				ps = append(ps, perP{series: series})
+			}
 		case EvGCSTWStart:
 			if flags&UtilSTW != 0 {
 				stw++
@@ -67,33 +104,41 @@ func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil {
 			}
 		case EvGCMarkAssistStart:
 			if flags&UtilAssist != 0 {
-				gcPs++
+				ps[ev.P].gc++
 				assists[ev.G] = true
 			}
 		case EvGCMarkAssistDone:
 			if flags&UtilAssist != 0 {
-				gcPs--
+				ps[ev.P].gc--
 				delete(assists, ev.G)
 			}
 		case EvGCSweepStart:
 			if flags&UtilSweep != 0 {
-				gcPs++
+				ps[ev.P].gc++
 			}
 		case EvGCSweepDone:
 			if flags&UtilSweep != 0 {
-				gcPs--
+				ps[ev.P].gc--
 			}
 		case EvGoStartLabel:
 			if flags&UtilBackground != 0 && strings.HasPrefix(ev.SArgs[0], "GC ") && ev.SArgs[0] != "GC (idle)" {
 				// Background mark worker.
-				bgMark[ev.G] = true
-				gcPs++
+				//
+				// If we're in per-proc mode, we don't
+				// count dedicated workers because
+				// they kick all of the goroutines off
+				// that P, so don't directly
+				// contribute to goroutine latency.
+				if !(flags&UtilPerProc != 0 && ev.SArgs[0] == "GC (dedicated)") {
+					bgMark[ev.G] = true
+					ps[ev.P].gc++
+				}
 			}
 			fallthrough
 		case EvGoStart:
 			if assists[ev.G] {
 				// Unblocked during assist.
-				gcPs++
+				ps[ev.P].gc++
 			}
 			block[ev.G] = ev.Link
 		default:
@@ -103,49 +148,77 @@ func (p *Parsed) MutatorUtilization(flags UtilFlags) []MutatorUtil {
 
 			if assists[ev.G] {
 				// Blocked during assist.
-				gcPs--
+				ps[ev.P].gc--
 			}
 			if bgMark[ev.G] {
 				// Background mark worker done.
-				gcPs--
+				ps[ev.P].gc--
 				delete(bgMark, ev.G)
 			}
 			delete(block, ev.G)
 		}
 
-		ps := gcPs
-		if stw > 0 {
-			ps = gomaxprocs
-		}
-		mu := MutatorUtil{ev.Ts, 1 - float64(ps)/float64(gomaxprocs)}
-		if mu.Util == out[len(out)-1].Util {
-			// No change.
-			continue
-		}
-		if mu.Time == out[len(out)-1].Time {
-			// Take the lowest utilization at a time stamp.
-			if mu.Util < out[len(out)-1].Util {
-				out[len(out)-1] = mu
+		if flags&UtilPerProc == 0 {
+			// Compute the current average utilization.
+			if len(ps) == 0 {
+				continue
 			}
+			gcPs := 0
+			if stw > 0 {
+				gcPs = len(ps)
+			} else {
+				for i := range ps {
+					if ps[i].gc > 0 {
+						gcPs++
+					}
+				}
+			}
+			mu := MutatorUtil{ev.Ts, 1 - float64(gcPs)/float64(len(ps))}
+
+			// Record the utilization change. (Since
+			// len(ps) == len(out), we know len(out) > 0.)
+			out[0] = addUtil(out[0], mu)
 		} else {
-			out = append(out, mu)
+			// Check for per-P utilization changes.
+			for i := range ps {
+				p := &ps[i]
+				util := 1.0
+				if stw > 0 || p.gc > 0 {
+					util = 0.0
+				}
+				out[p.series] = addUtil(out[p.series], MutatorUtil{ev.Ts, util})
+			}
 		}
 	}
 
-	// Add final 0 utilization event. This is important to mark
-	// the end of the trace. The exact value shouldn't matter
-	// since no window should extend beyond this, but using 0 is
-	// symmetric with the start of the trace.
-	endTime := events[len(events)-1].Ts
-	if out[len(out)-1].Time == endTime {
-		out[len(out)-1].Util = 0
-	} else {
-		out = append(out, MutatorUtil{endTime, 0})
+	// Add final 0 utilization event to any remaining series. This
+	// is important to mark the end of the trace. The exact value
+	// shouldn't matter since no window should extend beyond this,
+	// but using 0 is symmetric with the start of the trace.
+	mu := MutatorUtil{events[len(events)-1].Ts, 0}
+	for i := range ps {
+		out[ps[i].series] = addUtil(out[ps[i].series], mu)
 	}
-
 	return out
 }
 
+func addUtil(util []MutatorUtil, mu MutatorUtil) []MutatorUtil {
+	if len(util) > 0 {
+		if mu.Util == util[len(util)-1].Util {
+			// No change.
+			return util
+		}
+		if mu.Time == util[len(util)-1].Time {
+			// Take the lowest utilization at a time stamp.
+			if mu.Util < util[len(util)-1].Util {
+				util[len(util)-1] = mu
+			}
+			return util
+		}
+	}
+	return append(util, mu)
+}
+
 // totalUtil is total utilization, measured in nanoseconds. This is a
 // separate type primarily to distinguish it from mean utilization,
 // which is also a float64.
@@ -163,6 +236,10 @@ func (u totalUtil) mean(dur time.Duration) float64 {
 // An MMUCurve is the minimum mutator utilization curve across
 // multiple window sizes.
 type MMUCurve struct {
+	series []mmuSeries
+}
+
+type mmuSeries struct {
 	util []MutatorUtil
 	// sums[j] is the cumulative sum of util[:j].
 	sums []totalUtil
@@ -188,7 +265,15 @@ type mmuBand struct {
 
 // NewMMUCurve returns an MMU curve for the given mutator utilization
 // function.
-func NewMMUCurve(util []MutatorUtil) *MMUCurve {
+func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
+	series := make([]mmuSeries, len(utils))
+	for i, util := range utils {
+		series[i] = newMMUSeries(util)
+	}
+	return &MMUCurve{series}
+}
+
+func newMMUSeries(util []MutatorUtil) mmuSeries {
 	// Compute cumulative sum.
 	sums := make([]totalUtil, len(util))
 	var prev MutatorUtil
@@ -218,10 +303,10 @@ func NewMMUCurve(util []MutatorUtil) *MMUCurve {
 	// Compute the bands. There are numBands+1 bands in order to
 	// record the final cumulative sum.
 	bands := make([]mmuBand, numBands+1)
-	c := MMUCurve{util, sums, bands, bandDur}
-	leftSum := integrator{&c, 0}
+	s := mmuSeries{util, sums, bands, bandDur}
+	leftSum := integrator{&s, 0}
 	for i := range bands {
-		startTime, endTime := c.bandTime(i)
+		startTime, endTime := s.bandTime(i)
 		cumUtil := leftSum.advance(startTime)
 		predIdx := leftSum.pos
 		minUtil := 1.0
@@ -231,16 +316,18 @@ func NewMMUCurve(util []MutatorUtil) *MMUCurve {
 		bands[i] = mmuBand{minUtil, cumUtil, leftSum}
 	}
 
-	return &c
+	return s
 }
 
-func (c *MMUCurve) bandTime(i int) (start, end int64) {
-	start = int64(i)*c.bandDur + c.util[0].Time
-	end = start + c.bandDur
+func (s *mmuSeries) bandTime(i int) (start, end int64) {
+	start = int64(i)*s.bandDur + s.util[0].Time
+	end = start + s.bandDur
 	return
 }
 
 type bandUtil struct {
+	// Utilization series index
+	series int
 	// Band index
 	i int
 	// Lower bound of mutator utilization for all windows
@@ -402,12 +489,22 @@ func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
 		acc.mmu = 0
 		return
 	}
-	util := c.util
-	if max := time.Duration(util[len(util)-1].Time - util[0].Time); window > max {
-		window = max
-	}
 
-	bandU := bandUtilHeap(c.mkBandUtil(window))
+	var bandU bandUtilHeap
+	windows := make([]time.Duration, len(c.series))
+	for i, s := range c.series {
+		windows[i] = window
+		if max := time.Duration(s.util[len(s.util)-1].Time - s.util[0].Time); window > max {
+			windows[i] = max
+		}
+
+		bandU1 := bandUtilHeap(s.mkBandUtil(i, windows[i]))
+		if bandU == nil {
+			bandU = bandU1
+		} else {
+			bandU = append(bandU, bandU1...)
+		}
+	}
 
 	// Process bands from lowest utilization bound to highest.
 	heap.Init(&bandU)
@@ -416,12 +513,13 @@ func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
 	// refining the next lowest band can no longer affect the MMU
 	// or windows.
 	for len(bandU) > 0 && bandU[0].utilBound < acc.bound {
-		c.bandMMU(bandU[0].i, window, acc)
+		i := bandU[0].series
+		c.series[i].bandMMU(bandU[0].i, windows[i], acc)
 		heap.Pop(&bandU)
 	}
 }
 
-func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil {
+func (c *mmuSeries) mkBandUtil(series int, window time.Duration) []bandUtil {
 	// For each band, compute the worst-possible total mutator
 	// utilization for all windows that start in that band.
 
@@ -477,7 +575,7 @@ func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil {
 			util += c.bands[i+minBands-1].cumUtil - c.bands[i+1].cumUtil
 		}
 
-		bandU[i] = bandUtil{i, util.mean(window)}
+		bandU[i] = bandUtil{series, i, util.mean(window)}
 	}
 
 	return bandU
@@ -485,7 +583,7 @@ func (c *MMUCurve) mkBandUtil(window time.Duration) []bandUtil {
 
 // bandMMU computes the precise minimum mutator utilization for
 // windows with a left edge in band bandIdx.
-func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
+func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator) {
 	util := c.util
 
 	// We think of the mutator utilization over time as the
@@ -542,7 +640,7 @@ func (c *MMUCurve) bandMMU(bandIdx int, window time.Duration, acc *accumulator)
 // An integrator tracks a position in a utilization function and
 // integrates it.
 type integrator struct {
-	u *MMUCurve
+	u *mmuSeries
 	// pos is the index in u.util of the current time's non-strict
 	// predecessor.
 	pos int
diff --git a/src/internal/traceparser/gc_test.go b/src/internal/traceparser/gc_test.go
index f1416fa9f9..b438a2931f 100644
--- a/src/internal/traceparser/gc_test.go
+++ b/src/internal/traceparser/gc_test.go
@@ -29,14 +29,14 @@ func TestMMU(t *testing.T) {
 	// 0.5      *   *   *   *
 	// 0.0      *****   *****
 	//      0   1   2   3   4   5
-	util := []MutatorUtil{
+	util := [][]MutatorUtil{{
 		{0e9, 1},
 		{1e9, 0},
 		{2e9, 1},
 		{3e9, 0},
 		{4e9, 1},
 		{5e9, 0},
-	}
+	}}
 	mmuCurve := NewMMUCurve(util)
 
 	for _, test := range []struct {
@@ -90,7 +90,7 @@ func TestMMUTrace(t *testing.T) {
 	// Test the optimized implementation against the "obviously
 	// correct" implementation.
 	for window := time.Nanosecond; window < 10*time.Second; window *= 10 {
-		want := mmuSlow(mu, window)
+		want := mmuSlow(mu[0], window)
 		got := mmuCurve.MMU(window)
 		if !aeq(want, got) {
 			t.Errorf("want %f, got %f mutator utilization in window %s", want, got, window)

From b2e8dd187343cf3059e373374426833d1a676a3e Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Fri, 28 Jul 2017 16:30:05 -0400
Subject: [PATCH 10/25] cmd/trace: expose MMU analysis flags in web UI

Change-Id: I672240487172380c9eef61837b41698021aaf834
Reviewed-on: https://go-review.googlesource.com/c/60798
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
---
 src/cmd/trace/mmu.go | 133 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 119 insertions(+), 14 deletions(-)

diff --git a/src/cmd/trace/mmu.go b/src/cmd/trace/mmu.go
index d3b6768686..3fae3d6645 100644
--- a/src/cmd/trace/mmu.go
+++ b/src/cmd/trace/mmu.go
@@ -25,24 +25,54 @@ func init() {
 	http.HandleFunc("/mmuDetails", httpMMUDetails)
 }
 
-var mmuCache struct {
+var utilFlagNames = map[string]trace.UtilFlags{
+	"perProc":    trace.UtilPerProc,
+	"stw":        trace.UtilSTW,
+	"background": trace.UtilBackground,
+	"assist":     trace.UtilAssist,
+	"sweep":      trace.UtilSweep,
+}
+
+type mmuCacheEntry struct {
 	init     sync.Once
 	util     [][]trace.MutatorUtil
 	mmuCurve *trace.MMUCurve
 	err      error
 }
 
-func getMMUCurve() ([][]trace.MutatorUtil, *trace.MMUCurve, error) {
-	mmuCache.init.Do(func() {
+var mmuCache struct {
+	m    map[trace.UtilFlags]*mmuCacheEntry
+	lock sync.Mutex
+}
+
+func init() {
+	mmuCache.m = make(map[trace.UtilFlags]*mmuCacheEntry)
+}
+
+func getMMUCurve(r *http.Request) ([][]trace.MutatorUtil, *trace.MMUCurve, error) {
+	var flags trace.UtilFlags
+	for _, flagStr := range strings.Split(r.FormValue("flags"), "|") {
+		flags |= utilFlagNames[flagStr]
+	}
+
+	mmuCache.lock.Lock()
+	c := mmuCache.m[flags]
+	if c == nil {
+		c = new(mmuCacheEntry)
+		mmuCache.m[flags] = c
+	}
+	mmuCache.lock.Unlock()
+
+	c.init.Do(func() {
 		tr, err := parseTrace()
 		if err != nil {
-			mmuCache.err = err
+			c.err = err
 		} else {
-			mmuCache.util = tr.MutatorUtilization(trace.UtilSTW | trace.UtilBackground | trace.UtilAssist)
-			mmuCache.mmuCurve = trace.NewMMUCurve(mmuCache.util)
+			c.util = tr.MutatorUtilization(flags)
+			c.mmuCurve = trace.NewMMUCurve(c.util)
 		}
 	})
-	return mmuCache.util, mmuCache.mmuCurve, mmuCache.err
+	return c.util, c.mmuCurve, c.err
 }
 
 // httpMMU serves the MMU plot page.
@@ -52,7 +82,7 @@ func httpMMU(w http.ResponseWriter, r *http.Request) {
 
 // httpMMUPlot serves the JSON data for the MMU plot.
 func httpMMUPlot(w http.ResponseWriter, r *http.Request) {
-	mu, mmuCurve, err := getMMUCurve()
+	mu, mmuCurve, err := getMMUCurve(r)
 	if err != nil {
 		http.Error(w, fmt.Sprintf("failed to parse events: %v", err), http.StatusInternalServerError)
 		return
@@ -107,7 +137,8 @@ var templMMU = `<!doctype html>
     <script type="text/javascript" src="https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js"></script>
     <script type="text/javascript">
       google.charts.load('current', {'packages':['corechart']});
-      google.charts.setOnLoadCallback(refreshChart);
+      var chartsReady = false;
+      google.charts.setOnLoadCallback(function() { chartsReady = true; refreshChart(); });
 
       var chart;
       var curve;
@@ -119,13 +150,31 @@ var templMMU = `<!doctype html>
           else { return ns / 1e9 + 's'; }
       }
 
+      function mmuFlags() {
+        var flags = "";
+        $("#options input").each(function(i, elt) {
+          if (elt.checked)
+            flags += "|" + elt.id;
+        });
+        return flags.substr(1);
+      }
+
       function refreshChart() {
-        $.getJSON('/mmuPlot')
+        if (!chartsReady) return;
+        var container = $('#mmu_chart');
+        container.css('opacity', '.5');
+        refreshChart.count++;
+        var seq = refreshChart.count;
+        $.getJSON('/mmuPlot?flags=' + mmuFlags())
          .fail(function(xhr, status, error) {
            alert('failed to load plot: ' + status);
          })
-         .done(drawChart);
+         .done(function(result) {
+           if (refreshChart.count === seq)
+             drawChart(result);
+         });
       }
+      refreshChart.count = 0;
 
       function drawChart(plotData) {
         curve = plotData.curve;
@@ -162,11 +211,13 @@ var templMMU = `<!doctype html>
 
         var container = $('#mmu_chart');
         container.empty();
+        container.css('opacity', '');
         chart = new google.visualization.LineChart(container[0]);
         chart = new google.visualization.LineChart(document.getElementById('mmu_chart'));
         chart.draw(data, options);
 
         google.visualization.events.addListener(chart, 'select', selectHandler);
+        $('#details').empty();
       }
 
       function selectHandler() {
@@ -177,7 +228,7 @@ var templMMU = `<!doctype html>
         var details = $('#details');
         details.empty();
         var windowNS = curve[items[0].row][0];
-        var url = '/mmuDetails?window=' + windowNS;
+        var url = '/mmuDetails?window=' + windowNS + '&flags=' + mmuFlags();
         $.getJSON(url)
          .fail(function(xhr, status, error) {
             details.text(status + ': ' + url + ' could not be loaded');
@@ -191,10 +242,64 @@ var templMMU = `<!doctype html>
             }
          });
       }
+
+      $.when($.ready).then(function() {
+        $("#options input").click(refreshChart);
+      });
     </script>
+    <style>
+      .help {
+        display: inline-block;
+        position: relative;
+        width: 1em;
+        height: 1em;
+        border-radius: 50%;
+        color: #fff;
+        background: #555;
+        text-align: center;
+        cursor: help;
+      }
+      .help > span {
+        display: none;
+      }
+      .help:hover > span {
+        display: block;
+        position: absolute;
+        left: 1.1em;
+        top: 1.1em;
+        background: #555;
+        text-align: left;
+        width: 20em;
+        padding: 0.5em;
+        border-radius: 0.5em;
+        z-index: 5;
+      }
+    </style>
   </head>
   <body>
-    <div id="mmu_chart" style="width: 900px; height: 500px">Loading plot...</div>
+    <div style="position: relative">
+      <div id="mmu_chart" style="width: 900px; height: 500px; display: inline-block; vertical-align: top">Loading plot...</div>
+      <div id="options" style="display: inline-block; vertical-align: top">
+        <p>
+          <b>View</b><br/>
+          <input type="radio" name="view" id="system" checked><label for="system">System</label>
+          <span class="help">?<span>Consider whole system utilization. For example, if one of four procs is available to the mutator, mutator utilization will be 0.25. This is the standard definition of an MMU.</span></span><br/>
+          <input type="radio" name="view" id="perProc"><label for="perProc">Per-goroutine</label>
+          <span class="help">?<span>Consider per-goroutine utilization. When even one goroutine is interrupted by GC, mutator utilization is 0.</span></span><br/>
+        </p>
+        <p>
+          <b>Include</b><br/>
+          <input type="checkbox" id="stw" checked><label for="stw">STW</label>
+          <span class="help">?<span>Stop-the-world stops all goroutines simultaneously.</span></span><br/>
+          <input type="checkbox" id="background" checked><label for="background">Background workers</label>
+          <span class="help">?<span>Background workers are GC-specific goroutines. 25% of the CPU is dedicated to background workers during GC.</span></span><br/>
+          <input type="checkbox" id="assist" checked><label for="assist">Mark assist</label>
+          <span class="help">?<span>Mark assists are performed by allocation to prevent the mutator from outpacing GC.</span></span><br/>
+          <input type="checkbox" id="sweep"><label for="sweep">Sweep</label>
+          <span class="help">?<span>Sweep reclaims unused memory between GCs. (Enabling this may be very slow.).</span></span><br/>
+        </p>
+      </div>
+    </div>
     <div id="details">Select a point for details.</div>
   </body>
 </html>
@@ -202,7 +307,7 @@ var templMMU = `<!doctype html>
 
 // httpMMUDetails serves details of an MMU graph at a particular window.
 func httpMMUDetails(w http.ResponseWriter, r *http.Request) {
-	_, mmuCurve, err := getMMUCurve()
+	_, mmuCurve, err := getMMUCurve(r)
 	if err != nil {
 		http.Error(w, fmt.Sprintf("failed to parse events: %v", err), http.StatusInternalServerError)
 		return

From 33563d1cfc7939d99d18e58cea7eedd6fb1c6ed6 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Thu, 17 Aug 2017 11:31:03 -0400
Subject: [PATCH 11/25] internal/trace: support for mutator utilization
 distributions

This adds support for computing the quantiles of a mutator utilization
distribution.

Change-Id: Ia8b3ed14bf415c234e2f567360fd1b361d28bd40
Reviewed-on: https://go-review.googlesource.com/c/60799
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
---
 src/internal/traceparser/gc.go       | 141 ++++++++++++++++-
 src/internal/traceparser/gc_test.go  |  22 ++-
 src/internal/traceparser/mud.go      | 223 +++++++++++++++++++++++++++
 src/internal/traceparser/mud_test.go |  87 +++++++++++
 4 files changed, 466 insertions(+), 7 deletions(-)
 create mode 100644 src/internal/traceparser/mud.go
 create mode 100644 src/internal/traceparser/mud_test.go

diff --git a/src/internal/traceparser/gc.go b/src/internal/traceparser/gc.go
index 569ab86b82..a5b29112b3 100644
--- a/src/internal/traceparser/gc.go
+++ b/src/internal/traceparser/gc.go
@@ -273,6 +273,10 @@ func NewMMUCurve(utils [][]MutatorUtil) *MMUCurve {
 	return &MMUCurve{series}
 }
 
+// bandsPerSeries is the number of bands to divide each series into.
+// This is only changed by tests.
+var bandsPerSeries = 1000
+
 func newMMUSeries(util []MutatorUtil) mmuSeries {
 	// Compute cumulative sum.
 	sums := make([]totalUtil, len(util))
@@ -289,7 +293,7 @@ func newMMUSeries(util []MutatorUtil) mmuSeries {
 	// these bands.
 	//
 	// Compute the duration of each band.
-	numBands := 1000
+	numBands := bandsPerSeries
 	if numBands > len(util) {
 		// There's no point in having lots of bands if there
 		// aren't many events.
@@ -393,8 +397,8 @@ func (h *utilHeap) Pop() interface{} {
 	return x
 }
 
-// An accumulator collects different MMU-related statistics depending
-// on what's desired.
+// An accumulator takes a windowed mutator utilization function and
+// tracks various statistics for that function.
 type accumulator struct {
 	mmu float64
 
@@ -406,10 +410,30 @@ type accumulator struct {
 	// Worst N window tracking
 	nWorst int
 	wHeap  utilHeap
+
+	// Mutator utilization distribution tracking
+	mud *mud
+	// preciseMass is the distribution mass that must be precise
+	// before accumulation is stopped.
+	preciseMass float64
+	// lastTime and lastMU are the previous point added to the
+	// windowed mutator utilization function.
+	lastTime int64
+	lastMU   float64
 }
 
-// addMU records mutator utilization mu over the given window starting
-// at time.
+// resetTime declares a discontinuity in the windowed mutator
+// utilization function by resetting the current time.
+func (acc *accumulator) resetTime() {
+	// This only matters for distribution collection, since that's
+	// the only thing that depends on the progression of the
+	// windowed mutator utilization function.
+	acc.lastTime = math.MaxInt64
+}
+
+// addMU adds a point to the windowed mutator utilization function at
+// (time, mu). This must be called for monotonically increasing values
+// of time.
 //
 // It returns true if further calls to addMU would be pointless.
 func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool {
@@ -458,6 +482,25 @@ func (acc *accumulator) addMU(time int64, mu float64, window time.Duration) bool
 		acc.bound = math.Max(acc.bound, acc.wHeap[0].MutatorUtil)
 	}
 
+	if acc.mud != nil {
+		if acc.lastTime != math.MaxInt64 {
+			// Update distribution.
+			acc.mud.add(acc.lastMU, mu, float64(time-acc.lastTime))
+		}
+		acc.lastTime, acc.lastMU = time, mu
+		if _, mudBound, ok := acc.mud.approxInvCumulativeSum(); ok {
+			acc.bound = math.Max(acc.bound, mudBound)
+		} else {
+			// We haven't accumulated enough total precise
+			// mass yet to even reach our goal, so keep
+			// accumulating.
+			acc.bound = 1
+		}
+		// It's not worth checking percentiles every time, so
+		// just keep accumulating this band.
+		return false
+	}
+
 	// If we've found enough 0 utilizations, we can stop immediately.
 	return len(acc.wHeap) == acc.nWorst && acc.wHeap[0].MutatorUtil == 0
 }
@@ -484,6 +527,85 @@ func (c *MMUCurve) Examples(window time.Duration, n int) (worst []UtilWindow) {
 	return ([]UtilWindow)(acc.wHeap)
 }
 
+// MUD returns mutator utilization distribution quantiles for the
+// given window size.
+//
+// The mutator utilization distribution is the distribution of mean
+// mutator utilization across all windows of the given window size in
+// the trace.
+//
+// The minimum mutator utilization is the minimum (0th percentile) of
+// this distribution. (However, if only the minimum is desired, it's
+// more efficient to use the MMU method.)
+func (c *MMUCurve) MUD(window time.Duration, quantiles []float64) []float64 {
+	if len(quantiles) == 0 {
+		return []float64{}
+	}
+
+	// Each unrefined band contributes a known total mass to the
+	// distribution (bandDur except at the end), but in an unknown
+	// way. However, we know that all the mass it contributes must
+	// be at or above its worst-case mean mutator utilization.
+	//
+	// Hence, we refine bands until the highest desired
+	// distribution quantile is less than the next worst-case mean
+	// mutator utilization. At this point, all further
+	// contributions to the distribution must be beyond the
+	// desired quantile and hence cannot affect it.
+	//
+	// First, find the highest desired distribution quantile.
+	maxQ := quantiles[0]
+	for _, q := range quantiles {
+		if q > maxQ {
+			maxQ = q
+		}
+	}
+	// The distribution's mass is in units of time (it's not
+	// normalized because this would make it more annoying to
+	// account for future contributions of unrefined bands). The
+	// total final mass will be the duration of the trace itself
+	// minus the window size. Using this, we can compute the mass
+	// corresponding to quantile maxQ.
+	var duration int64
+	for _, s := range c.series {
+		duration1 := s.util[len(s.util)-1].Time - s.util[0].Time
+		if duration1 >= int64(window) {
+			duration += duration1 - int64(window)
+		}
+	}
+	qMass := float64(duration) * maxQ
+
+	// Accumulate the MUD until we have precise information for
+	// everything to the left of qMass.
+	acc := accumulator{mmu: 1.0, bound: 1.0, preciseMass: qMass, mud: new(mud)}
+	acc.mud.setTrackMass(qMass)
+	c.mmu(window, &acc)
+
+	// Evaluate the quantiles on the accumulated MUD.
+	out := make([]float64, len(quantiles))
+	for i := range out {
+		mu, _ := acc.mud.invCumulativeSum(float64(duration) * quantiles[i])
+		if math.IsNaN(mu) {
+			// There are a few legitimate ways this can
+			// happen:
+			//
+			// 1. If the window is the full trace
+			// duration, then the windowed MU function is
+			// only defined at a single point, so the MU
+			// distribution is not well-defined.
+			//
+			// 2. If there are no events, then the MU
+			// distribution has no mass.
+			//
+			// Either way, all of the quantiles will have
+			// converged toward the MMU at this point.
+			mu = acc.mmu
+		}
+		out[i] = mu
+	}
+	return out
+}
+
 func (c *MMUCurve) mmu(window time.Duration, acc *accumulator) {
 	if window <= 0 {
 		acc.mmu = 0
@@ -607,12 +729,16 @@ func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator)
 	if utilEnd := util[len(util)-1].Time - int64(window); utilEnd < endTime {
 		endTime = utilEnd
 	}
+	acc.resetTime()
 	for {
 		// Advance edges to time and time+window.
 		mu := (right.advance(time+int64(window)) - left.advance(time)).mean(window)
 		if acc.addMU(time, mu, window) {
 			break
 		}
+		if time == endTime {
+			break
+		}
 
 		// The maximum slope of the windowed mutator
 		// utilization function is 1/window, so we can always
@@ -632,7 +758,10 @@ func (c *mmuSeries) bandMMU(bandIdx int, window time.Duration, acc *accumulator)
 			time = minTime
 		}
 		if time >= endTime {
-			break
+			// For MMUs we could stop here, but for MUDs
+			// it's important that we span the entire
+			// band.
+			time = endTime
 		}
 	}
 }
diff --git a/src/internal/traceparser/gc_test.go b/src/internal/traceparser/gc_test.go
index b438a2931f..1cd8fb6f78 100644
--- a/src/internal/traceparser/gc_test.go
+++ b/src/internal/traceparser/gc_test.go
@@ -75,7 +75,8 @@ func TestMMU(t *testing.T) {
 }
 
 func TestMMUTrace(t *testing.T) {
-	t.Parallel()
+	// Can't be t.Parallel() because it modifies the
+	// testingOneBand package variable.
 
 	p, err := New("../trace/testdata/stress_1_10_good")
 	if err != nil {
@@ -96,6 +97,25 @@ func TestMMUTrace(t *testing.T) {
 			t.Errorf("want %f, got %f mutator utilization in window %s", want, got, window)
 		}
 	}
+
+	// Test MUD with band optimization against MUD without band
+	// optimization. We don't have a simple testing implementation
+	// of MUDs (the simplest implementation is still quite
+	// complex), but this is still a pretty good test.
+	defer func(old int) { bandsPerSeries = old }(bandsPerSeries)
+	bandsPerSeries = 1
+	mmuCurve2 := NewMMUCurve(mu)
+	quantiles := []float64{0, 1 - .999, 1 - .99}
+	for window := time.Microsecond; window < time.Second; window *= 10 {
+		mud1 := mmuCurve.MUD(window, quantiles)
+		mud2 := mmuCurve2.MUD(window, quantiles)
+		for i := range mud1 {
+			if !aeq(mud1[i], mud2[i]) {
+				t.Errorf("for quantiles %v at window %v, want %v, got %v", quantiles, window, mud2, mud1)
+				break
+			}
+		}
+	}
 }
 
 func BenchmarkMMU(b *testing.B) {
diff --git a/src/internal/traceparser/mud.go b/src/internal/traceparser/mud.go
new file mode 100644
index 0000000000..8eed89ff36
--- /dev/null
+++ b/src/internal/traceparser/mud.go
@@ -0,0 +1,223 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package traceparser
+
+import (
+	"math"
+	"sort"
+)
+
+// mud is an updatable mutator utilization distribution.
+//
+// This is a continuous distribution of duration over mutator
+// utilization. For example, the integral from mutator utilization a
+// to b is the total duration during which the mutator utilization was
+// in the range [a, b].
+//
+// This distribution is *not* normalized (it is not a probability
+// distribution). This makes it easier to work with as it's being
+// updated.
+//
+// It is represented as the sum of scaled uniform distribution
+// functions and Dirac delta functions (which are treated as
+// degenerate uniform distributions).
+type mud struct {
+	sorted, unsorted []edge
+
+	// trackMass is the inverse cumulative sum to track as the
+	// distribution is updated.
+	trackMass float64
+	// trackBucket is the bucket in which trackMass falls. If the
+	// total mass of the distribution is < trackMass, this is
+	// len(hist).
+	trackBucket int
+	// trackSum is the cumulative sum of hist[:trackBucket]. Once
+	// trackSum >= trackMass, trackBucket must be recomputed.
+	trackSum float64
+
+	// hist is a hierarchical histogram of distribution mass.
+	hist [mudDegree]float64
+}
+
+const (
+	// mudDegree is the number of buckets in the MUD summary
+	// histogram.
+	mudDegree = 1024
+)
+
+type edge struct {
+	// At x, the function increases by y.
+	x, delta float64
+	// Additionally at x is a Dirac delta function with area dirac.
+	dirac float64
+}
+
+// add adds a uniform function over [l, r] scaled so the total weight
+// of the uniform is area. If l==r, this adds a Dirac delta function.
+func (d *mud) add(l, r, area float64) {
+	if area == 0 {
+		return
+	}
+
+	if r < l {
+		l, r = r, l
+	}
+
+	// Add the edges.
+	if l == r {
+		d.unsorted = append(d.unsorted, edge{l, 0, area})
+	} else {
+		delta := area / (r - l)
+		d.unsorted = append(d.unsorted, edge{l, delta, 0}, edge{r, -delta, 0})
+	}
+
+	// Update the histogram.
+	h := &d.hist
+	lbFloat, lf := math.Modf(l * mudDegree)
+	lb := int(lbFloat)
+	if lb >= mudDegree {
+		lb, lf = mudDegree-1, 1
+	}
+	if l == r {
+		h[lb] += area
+	} else {
+		rbFloat, rf := math.Modf(r * mudDegree)
+		rb := int(rbFloat)
+		if rb >= mudDegree {
+			rb, rf = mudDegree-1, 1
+		}
+		if lb == rb {
+			h[lb] += area
+		} else {
+			perBucket := area / (r - l) / mudDegree
+			h[lb] += perBucket * (1 - lf)
+			h[rb] += perBucket * rf
+			for i := lb + 1; i < rb; i++ {
+				h[i] += perBucket
+			}
+		}
+	}
+
+	// Update mass tracking.
+	if thresh := float64(d.trackBucket) / mudDegree; l < thresh {
+		if r < thresh {
+			d.trackSum += area
+		} else {
+			d.trackSum += area * (thresh - l) / (r - l)
+		}
+		if d.trackSum >= d.trackMass {
+			// The tracked mass now falls in a different
+			// bucket. Recompute the inverse cumulative sum.
+			d.setTrackMass(d.trackMass)
+		}
+	}
+}
+
+// setTrackMass sets the mass to track the inverse cumulative sum for.
+//
+// Specifically, mass is a cumulative duration, and the mutator
+// utilization bounds for this duration can be queried using
+// approxInvCumulativeSum.
+func (d *mud) setTrackMass(mass float64) {
+	d.trackMass = mass
+
+	// Find the bucket currently containing trackMass by computing
+	// the cumulative sum.
+	sum := 0.0
+	for i, val := range d.hist[:] {
+		newSum := sum + val
+		if newSum > mass {
+			// mass falls in bucket i.
+			d.trackBucket = i
+			d.trackSum = sum
+			return
+		}
+		sum = newSum
+	}
+	d.trackBucket = len(d.hist)
+	d.trackSum = sum
+}
+
+// approxInvCumulativeSum is like invCumulativeSum, but specifically
+// operates on the tracked mass and returns an upper and lower bound
+// approximation of the inverse cumulative sum.
+//
+// The true inverse cumulative sum will be in the range [lower, upper).
+func (d *mud) approxInvCumulativeSum() (float64, float64, bool) {
+	if d.trackBucket == len(d.hist) {
+		return math.NaN(), math.NaN(), false
+	}
+	return float64(d.trackBucket) / mudDegree, float64(d.trackBucket+1) / mudDegree, true
+}
+
+// invCumulativeSum returns x such that the integral of d from -∞ to x
+// is y. If the total weight of d is less than y, it returns the
+// maximum of the distribution and false.
+//
+// Specifically, y is a cumulative duration, and invCumulativeSum
+// returns the mutator utilization x such that at least y time has
+// been spent with mutator utilization <= x.
+func (d *mud) invCumulativeSum(y float64) (float64, bool) {
+	if len(d.sorted) == 0 && len(d.unsorted) == 0 {
+		return math.NaN(), false
+	}
+
+	// Sort edges.
+	edges := d.unsorted
+	sort.Slice(edges, func(i, j int) bool {
+		return edges[i].x < edges[j].x
+	})
+	// Merge with sorted edges.
+	d.unsorted = nil
+	if d.sorted == nil {
+		d.sorted = edges
+	} else {
+		oldSorted := d.sorted
+		newSorted := make([]edge, len(oldSorted)+len(edges))
+		i, j := 0, 0
+		for o := range newSorted {
+			if i >= len(oldSorted) {
+				copy(newSorted[o:], edges[j:])
+				break
+			} else if j >= len(edges) {
+				copy(newSorted[o:], oldSorted[i:])
+				break
+			} else if oldSorted[i].x < edges[j].x {
+				newSorted[o] = oldSorted[i]
+				i++
+			} else {
+				newSorted[o] = edges[j]
+				j++
+			}
+		}
+		d.sorted = newSorted
+	}
+
+	// Traverse edges in order computing a cumulative sum.
+	csum, rate, prevX := 0.0, 0.0, 0.0
+	for _, e := range d.sorted {
+		newCsum := csum + (e.x-prevX)*rate
+		if newCsum >= y {
+			// y was exceeded between the previous edge
+			// and this one.
+			if rate == 0 {
+				// Anywhere between prevX and
+				// e.x will do. We return e.x
+				// because that takes care of
+				// the y==0 case naturally.
+				return e.x, true
+			}
+			return (y-csum)/rate + prevX, true
+		}
+		newCsum += e.dirac
+		if newCsum >= y {
+			// y was exceeded by the Dirac delta at e.x.
+			return e.x, true
+		}
+		csum, prevX = newCsum, e.x
+		rate += e.delta
+	}
+	return prevX, false
+}
diff --git a/src/internal/traceparser/mud_test.go b/src/internal/traceparser/mud_test.go
new file mode 100644
index 0000000000..6e048fcf19
--- /dev/null
+++ b/src/internal/traceparser/mud_test.go
@@ -0,0 +1,87 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package traceparser
+
+import (
+	"math/rand"
+	"testing"
+)
+
+func TestMUD(t *testing.T) {
+	// Insert random uniforms and check histogram mass and
+	// cumulative sum approximations.
+	rnd := rand.New(rand.NewSource(42))
+	mass := 0.0
+	var mud mud
+	for i := 0; i < 100; i++ {
+		area, l, r := rnd.Float64(), rnd.Float64(), rnd.Float64()
+		if rnd.Intn(10) == 0 {
+			r = l
+		}
+		t.Log(l, r, area)
+		mud.add(l, r, area)
+		mass += area
+
+		// Check total histogram weight.
+		hmass := 0.0
+		for _, val := range mud.hist {
+			hmass += val
+		}
+		if !aeq(mass, hmass) {
+			t.Fatalf("want mass %g, got %g", mass, hmass)
+		}
+
+		// Check inverse cumulative sum approximations.
+		for j := 0.0; j < mass; j += mass * 0.099 {
+			mud.setTrackMass(j)
+			l, u, ok := mud.approxInvCumulativeSum()
+			inv, ok2 := mud.invCumulativeSum(j)
+			if !ok || !ok2 {
+				t.Fatalf("inverse cumulative sum failed: approx %v, exact %v", ok, ok2)
+			}
+			if !(l <= inv && inv < u) {
+				t.Fatalf("inverse(%g) = %g, not ∈ [%g, %g)", j, inv, l, u)
+			}
+		}
+	}
+}
+
+func TestMUDTracking(t *testing.T) {
+	// Test that the tracked mass is tracked correctly across
+	// updates.
+	rnd := rand.New(rand.NewSource(42))
+	const uniforms = 100
+	for trackMass := 0.0; trackMass < uniforms; trackMass += uniforms / 50 {
+		var mud mud
+		mass := 0.0
+		mud.setTrackMass(trackMass)
+		for i := 0; i < uniforms; i++ {
+			area, l, r := rnd.Float64(), rnd.Float64(), rnd.Float64()
+			mud.add(l, r, area)
+			mass += area
+			l, u, ok := mud.approxInvCumulativeSum()
+			inv, ok2 := mud.invCumulativeSum(trackMass)
+
+			if mass < trackMass {
+				if ok {
+					t.Errorf("approx(%g) = [%g, %g), but mass = %g", trackMass, l, u, mass)
+				}
+				if ok2 {
+					t.Errorf("exact(%g) = %g, but mass = %g", trackMass, inv, mass)
+				}
+			} else {
+				if !ok {
+					t.Errorf("approx(%g) failed, but mass = %g", trackMass, mass)
+				}
+				if !ok2 {
+					t.Errorf("exact(%g) failed, but mass = %g", trackMass, mass)
+				}
+				if ok && ok2 && !(l <= inv && inv < u) {
+					t.Errorf("inverse(%g) = %g, not ∈ [%g, %g)", trackMass, inv, l, u)
+				}
+			}
+		}
+	}
+}

From b251d7fbe6d69e1ce81baf7959062ae489858f31 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Thu, 17 Aug 2017 18:11:01 -0400
Subject: [PATCH 12/25] cmd/trace: display p99.9, p99 and p95 MUT

This uses the mutator utilization distribution to compute the p99.9,
p99, and p95 mutator utilization topograph lines and display them
along with the MMU.

Change-Id: I8c7e0ec326aa4bc00619ec7562854253f01cc802
Reviewed-on: https://go-review.googlesource.com/c/60800
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
---
 src/cmd/trace/mmu.go | 41 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/src/cmd/trace/mmu.go b/src/cmd/trace/mmu.go
index 3fae3d6645..062e5ad2ca 100644
--- a/src/cmd/trace/mmu.go
+++ b/src/cmd/trace/mmu.go
@@ -88,6 +88,14 @@ func httpMMUPlot(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
+	var quantiles []float64
+	for _, flagStr := range strings.Split(r.FormValue("flags"), "|") {
+		if flagStr == "mut" {
+			quantiles = []float64{0, 1 - .999, 1 - .99, 1 - .95}
+			break
+		}
+	}
+
 	// Find a nice starting point for the plot.
 	xMin := time.Second
 	for xMin > 1 {
@@ -114,15 +122,21 @@ func httpMMUPlot(w http.ResponseWriter, r *http.Request) {
 	// Compute MMU curve.
 	logMin, logMax := math.Log(float64(xMin)), math.Log(float64(xMax))
 	const samples = 100
-	plot := make([][2]float64, samples)
+	plot := make([][]float64, samples)
 	for i := 0; i < samples; i++ {
 		window := time.Duration(math.Exp(float64(i)/(samples-1)*(logMax-logMin) + logMin))
-		y := mmuCurve.MMU(window)
-		plot[i] = [2]float64{float64(window), y}
+		if quantiles == nil {
+			plot[i] = make([]float64, 2)
+			plot[i][1] = mmuCurve.MMU(window)
+		} else {
+			plot[i] = make([]float64, 1+len(quantiles))
+			copy(plot[i][1:], mmuCurve.MUD(window, quantiles))
+		}
+		plot[i][0] = float64(window)
 	}
 
 	// Create JSON response.
-	err = json.NewEncoder(w).Encode(map[string]interface{}{"xMin": int64(xMin), "xMax": int64(xMax), "curve": plot})
+	err = json.NewEncoder(w).Encode(map[string]interface{}{"xMin": int64(xMin), "xMax": int64(xMax), "quantiles": quantiles, "curve": plot})
 	if err != nil {
 		log.Printf("failed to serialize response: %v", err)
 		return
@@ -150,6 +164,10 @@ var templMMU = `<!doctype html>
           else { return ns / 1e9 + 's'; }
       }
 
+      function niceQuantile(q) {
+        return 'p' + q*100;
+      }
+
       function mmuFlags() {
         var flags = "";
         $("#options input").each(function(i, elt) {
@@ -181,6 +199,11 @@ var templMMU = `<!doctype html>
         var data = new google.visualization.DataTable();
         data.addColumn('number', 'Window duration');
         data.addColumn('number', 'Minimum mutator utilization');
+        if (plotData.quantiles) {
+          for (var i = 1; i < plotData.quantiles.length; i++) {
+            data.addColumn('number', niceQuantile(1 - plotData.quantiles[i]) + ' MU');
+          }
+        }
         data.addRows(curve);
         for (var i = 0; i < curve.length; i++) {
           data.setFormattedValue(i, 0, niceDuration(curve[i][0]));
@@ -201,6 +224,7 @@ var templMMU = `<!doctype html>
             maxValue: 1.0,
           },
           legend: { position: 'none' },
+          focusTarget: 'category',
           width: 900,
           height: 500,
           chartArea: { width: '80%', height: '80%' },
@@ -208,6 +232,10 @@ var templMMU = `<!doctype html>
         for (var v = plotData.xMin; v <= plotData.xMax; v *= 10) {
           options.hAxis.ticks.push({v:v, f:niceDuration(v)});
         }
+        if (plotData.quantiles) {
+          options.vAxis.title = 'Mutator utilization';
+          options.legend.position = 'in';
+        }
 
         var container = $('#mmu_chart');
         container.empty();
@@ -298,6 +326,11 @@ var templMMU = `<!doctype html>
           <input type="checkbox" id="sweep"><label for="sweep">Sweep</label>
           <span class="help">?<span>Sweep reclaims unused memory between GCs. (Enabling this may be very slow.).</span></span><br/>
         </p>
+        <p>
+          <b>Display</b><br/>
+          <input type="checkbox" id="mut"><label for="mut">Show percentiles</label>
+          <span class="help">?<span>Display percentile mutator utilization in addition to minimum. E.g., p99 MU drops the worst 1% of windows.</span></span><br/>
+        </p>
       </div>
     </div>
     <div id="details">Select a point for details.</div>

From e72595ee0f97746be3ce594834a7003d5e804795 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Tue, 25 Jul 2017 15:03:44 -0400
Subject: [PATCH 13/25] cmd/trace: notes on MMU view improvements

Change-Id: Ib9dcdc76095f6718f1cdc83349503f52567c76d4
Reviewed-on: https://go-review.googlesource.com/c/60801
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
---
 src/cmd/trace/mmu.go | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/cmd/trace/mmu.go b/src/cmd/trace/mmu.go
index 062e5ad2ca..6a7d28e61d 100644
--- a/src/cmd/trace/mmu.go
+++ b/src/cmd/trace/mmu.go
@@ -4,6 +4,25 @@
 
 // Minimum mutator utilization (MMU) graphing.
 
+// TODO:
+//
+// In worst window list, show break-down of GC utilization sources
+// (STW, assist, etc). Probably requires a different MutatorUtil
+// representation.
+//
+// When a window size is selected, show a second plot of the mutator
+// utilization distribution for that window size.
+//
+// Render plot progressively so rough outline is visible quickly even
+// for very complex MUTs. Start by computing just a few window sizes
+// and then add more window sizes.
+//
+// Consider using sampling to compute an approximate MUT. This would
+// work by sampling the mutator utilization at randomly selected
+// points in time in the trace to build an empirical distribution. We
+// could potentially put confidence intervals on these estimates and
+// render this progressively as we refine the distributions.
+
 package main
 
 import (

From 2ae8bf7054b3320682e547396d9b6b5e51f5ade1 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Wed, 17 Oct 2018 20:16:45 +0000
Subject: [PATCH 14/25] runtime: fix stale comments about mheap and mspan

As of 07e738e all spans are allocated out of a treap, and not just
large spans or spans for large objects. Also, now we have a separate
treap for spans that have been scavenged.

Change-Id: I9c2cb7b6798fc536bbd34835da2e888224fd7ed4
Reviewed-on: https://go-review.googlesource.com/c/142958
Reviewed-by: Austin Clements <austin@google.com>
---
 src/runtime/mheap.go | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 8f6db8eec5..56ec3d4465 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -21,7 +21,7 @@ import (
 const minPhysPageSize = 4096
 
 // Main malloc heap.
-// The heap itself is the "free[]" and "large" arrays,
+// The heap itself is the "free" and "scav" treaps,
 // but all the other global data is here too.
 //
 // mheap must not be heap-allocated because it contains mSpanLists,
@@ -147,7 +147,7 @@ type mheap struct {
 
 	spanalloc             fixalloc // allocator for span*
 	cachealloc            fixalloc // allocator for mcache*
-	treapalloc            fixalloc // allocator for treapNodes* used by large objects
+	treapalloc            fixalloc // allocator for treapNodes*
 	specialfinalizeralloc fixalloc // allocator for specialfinalizer*
 	specialprofilealloc   fixalloc // allocator for specialprofile*
 	speciallock           mutex    // lock for special record allocators.
@@ -198,15 +198,16 @@ type arenaHint struct {
 
 // An MSpan is a run of pages.
 //
-// When a MSpan is in the heap free list, state == mSpanFree
+// When a MSpan is in the heap free treap, state == mSpanFree
 // and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
+// If the MSpan is in the heap scav treap, then in addition to the
+// above scavenged == true. scavenged == false in all other cases.
 //
 // When a MSpan is allocated, state == mSpanInUse or mSpanManual
 // and heapmap(i) == span for all s->start <= i < s->start+s->npages.
 
-// Every MSpan is in one doubly-linked list,
-// either one of the MHeap's free lists or one of the
-// MCentral's span lists.
+// Every MSpan is in one doubly-linked list, either in the MHeap's
+// busy list or one of the MCentral's span lists.
 
 // An MSpan representing actual memory has state mSpanInUse,
 // mSpanManual, or mSpanFree. Transitions between these states are
@@ -848,7 +849,7 @@ func (h *mheap) setSpans(base, npage uintptr, s *mspan) {
 
 // Allocates a span of the given size.  h must be locked.
 // The returned span has been removed from the
-// free list, but its state is still mSpanFree.
+// free structures, but its state is still mSpanFree.
 func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
 	var s *mspan
 

From 3377b4673d6e0ca1a9bba1c7196d7e673ddb8108 Mon Sep 17 00:00:00 2001
From: Robert Griesemer <gri@golang.org>
Date: Fri, 2 Nov 2018 21:41:41 -0700
Subject: [PATCH 15/25] cmd/compile: encapsulate and document two types.Type
 internal fields

Change-Id: I5f7d2155c2c3a47dabdf16fe46b122ede81de4fc
Reviewed-on: https://go-review.googlesource.com/c/147284
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
---
 src/cmd/compile/internal/gc/reflect.go   |  2 +-
 src/cmd/compile/internal/gc/typecheck.go |  6 ++----
 src/cmd/compile/internal/types/type.go   | 20 ++++++++++++++------
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go
index 415d3cd594..50b741358f 100644
--- a/src/cmd/compile/internal/gc/reflect.go
+++ b/src/cmd/compile/internal/gc/reflect.go
@@ -812,7 +812,7 @@ func dcommontype(lsym *obj.LSym, t *types.Type) int {
 
 	sptrWeak := true
 	var sptr *obj.LSym
-	if !t.IsPtr() || t.PtrBase != nil {
+	if !t.IsPtr() || t.IsPtrElem() {
 		tptr := types.NewPtr(t)
 		if t.Sym != nil || methods(tptr) != nil {
 			sptrWeak = false
diff --git a/src/cmd/compile/internal/gc/typecheck.go b/src/cmd/compile/internal/gc/typecheck.go
index be11a9841f..2a59521484 100644
--- a/src/cmd/compile/internal/gc/typecheck.go
+++ b/src/cmd/compile/internal/gc/typecheck.go
@@ -3671,8 +3671,7 @@ func copytype(n *Node, t *types.Type) {
 	embedlineno := n.Type.ForwardType().Embedlineno
 	l := n.Type.ForwardType().Copyto
 
-	ptrBase := n.Type.PtrBase
-	sliceOf := n.Type.SliceOf
+	cache := n.Type.Cache
 
 	// TODO(mdempsky): Fix Type rekinding.
 	*n.Type = *t
@@ -3693,8 +3692,7 @@ func copytype(n *Node, t *types.Type) {
 
 	t.Nod = asTypesNode(n)
 	t.SetDeferwidth(false)
-	t.PtrBase = ptrBase
-	t.SliceOf = sliceOf
+	t.Cache = cache
 
 	// Propagate go:notinheap pragma from the Name to the Type.
 	if n.Name != nil && n.Name.Param != nil && n.Name.Param.Pragma&NotInHeap != 0 {
diff --git a/src/cmd/compile/internal/types/type.go b/src/cmd/compile/internal/types/type.go
index 39f4d2aa7b..b20039239b 100644
--- a/src/cmd/compile/internal/types/type.go
+++ b/src/cmd/compile/internal/types/type.go
@@ -149,8 +149,11 @@ type Type struct {
 	Nod  *Node // canonical OTYPE node
 	Orig *Type // original type (type literal or predefined type)
 
-	SliceOf *Type
-	PtrBase *Type
+	// Cache of composite types, with this type being the element type.
+	Cache struct {
+		ptr   *Type // *T, or nil
+		slice *Type // []T, or nil
+	}
 
 	Sym    *Sym  // symbol containing name, for named types
 	Vargen int32 // unique name for OTYPE/ONAME
@@ -488,7 +491,7 @@ func NewArray(elem *Type, bound int64) *Type {
 
 // NewSlice returns the slice Type with element type elem.
 func NewSlice(elem *Type) *Type {
-	if t := elem.SliceOf; t != nil {
+	if t := elem.Cache.slice; t != nil {
 		if t.Elem() != elem {
 			Fatalf("elem mismatch")
 		}
@@ -497,7 +500,7 @@ func NewSlice(elem *Type) *Type {
 
 	t := New(TSLICE)
 	t.Extra = Slice{Elem: elem}
-	elem.SliceOf = t
+	elem.Cache.slice = t
 	return t
 }
 
@@ -551,7 +554,7 @@ func NewPtr(elem *Type) *Type {
 		Fatalf("NewPtr: pointer to elem Type is nil")
 	}
 
-	if t := elem.PtrBase; t != nil {
+	if t := elem.Cache.ptr; t != nil {
 		if t.Elem() != elem {
 			Fatalf("NewPtr: elem mismatch")
 		}
@@ -563,7 +566,7 @@ func NewPtr(elem *Type) *Type {
 	t.Width = int64(Widthptr)
 	t.Align = uint8(Widthptr)
 	if NewPtrCacheEnabled {
-		elem.PtrBase = t
+		elem.Cache.ptr = t
 	}
 	return t
 }
@@ -1258,6 +1261,11 @@ func (t *Type) IsPtr() bool {
 	return t.Etype == TPTR
 }
 
+// IsPtrElem reports whether t is the element of a pointer (to t).
+func (t *Type) IsPtrElem() bool {
+	return t.Cache.ptr != nil
+}
+
 // IsUnsafePtr reports whether t is an unsafe pointer.
 func (t *Type) IsUnsafePtr() bool {
 	return t.Etype == TUNSAFEPTR

From e6305380a067c51223a59baf8a77575595a5f1e6 Mon Sep 17 00:00:00 2001
From: Robert Griesemer <gri@golang.org>
Date: Fri, 2 Nov 2018 23:28:26 -0700
Subject: [PATCH 16/25] cmd/compile: reintroduce work-around for cyclic alias
 declarations

This change re-introduces (temporarily) a work-around for recursive
alias type declarations, originally in https://golang.org/cl/35831/
(intended as fix for #18640). The work-around was removed later
for a more comprehensive cycle detection check. That check
contained a subtle error which made the code appear to work,
while in fact creating incorrect types internally. See #25838
for details.

By re-introducing the original work-around, we eliminate problems
with many simple recursive type declarations involving aliases;
specifically cases such as #27232 and #27267. However, the more
general problem remains.

This CL also fixes the subtle error (incorrect variable use when
analyzing a type cycle) mentioned above and now issues a fatal
error with a reference to the relevant issue (rather than crashing
later during the compilation). While not great, this is better
than the current status. The long-term solution will need to
address these cycles (see #25838).

As a consequence, several old test cases are not accepted anymore
by the compiler since they happened to work accidentally only.
This CL disables parts or all code of those test cases. The issues
are: #18640, #23823, and #24939.

One of the new test cases (fixedbugs/issue27232.go) exposed a
go/types issue. The test case is excluded from the go/types test
suite and an issue was filed (#28576).

Updates #18640.
Updates #23823.
Updates #24939.
Updates #25838.
Updates #28576.

Fixes #27232.
Fixes #27267.

Change-Id: I6c2d10da98bfc6f4f445c755fcaab17fc7b214c5
Reviewed-on: https://go-review.googlesource.com/c/147286
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
---
 src/cmd/compile/internal/gc/main.go      |  8 ++++++--
 src/cmd/compile/internal/gc/typecheck.go | 12 ++++++++++--
 src/go/types/stdlib_test.go              |  1 +
 test/fixedbugs/issue18640.go             |  5 ++---
 test/fixedbugs/issue23823.go             |  8 ++++++--
 test/fixedbugs/issue24939.go             |  4 +++-
 test/fixedbugs/issue27232.go             | 19 +++++++++++++++++++
 test/fixedbugs/issue27267.go             | 21 +++++++++++++++++++++
 8 files changed, 68 insertions(+), 10 deletions(-)
 create mode 100644 test/fixedbugs/issue27232.go
 create mode 100644 test/fixedbugs/issue27267.go

diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index 49a4e05d99..78142d3bf8 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -491,13 +491,17 @@ func Main(archInit func(*Arch)) {
 	// Phase 1: const, type, and names and types of funcs.
 	//   This will gather all the information about types
 	//   and methods but doesn't depend on any of it.
+	//
+	//   We also defer type alias declarations until phase 2
+	//   to avoid cycles like #18640.
+	//   TODO(gri) Remove this again once we have a fix for #25838.
 	defercheckwidth()
 
 	// Don't use range--typecheck can add closures to xtop.
 	timings.Start("fe", "typecheck", "top1")
 	for i := 0; i < len(xtop); i++ {
 		n := xtop[i]
-		if op := n.Op; op != ODCL && op != OAS && op != OAS2 {
+		if op := n.Op; op != ODCL && op != OAS && op != OAS2 && (op != ODCLTYPE || !n.Left.Name.Param.Alias) {
 			xtop[i] = typecheck(n, Etop)
 		}
 	}
@@ -509,7 +513,7 @@ func Main(archInit func(*Arch)) {
 	timings.Start("fe", "typecheck", "top2")
 	for i := 0; i < len(xtop); i++ {
 		n := xtop[i]
-		if op := n.Op; op == ODCL || op == OAS || op == OAS2 {
+		if op := n.Op; op == ODCL || op == OAS || op == OAS2 || op == ODCLTYPE && n.Left.Name.Param.Alias {
 			xtop[i] = typecheck(n, Etop)
 		}
 	}
diff --git a/src/cmd/compile/internal/gc/typecheck.go b/src/cmd/compile/internal/gc/typecheck.go
index 2a59521484..06dd176b37 100644
--- a/src/cmd/compile/internal/gc/typecheck.go
+++ b/src/cmd/compile/internal/gc/typecheck.go
@@ -255,8 +255,16 @@ func typecheck(n *Node, top int) (res *Node) {
 				// since it would expand indefinitely when aliases
 				// are substituted.
 				cycle := cycleFor(n)
-				for _, n := range cycle {
-					if n.Name != nil && !n.Name.Param.Alias {
+				for _, n1 := range cycle {
+					if n1.Name != nil && !n1.Name.Param.Alias {
+						// Cycle is ok. But if n is an alias type and doesn't
+						// have a type yet, we have a recursive type declaration
+						// with aliases that we can't handle properly yet.
+						// Report an error rather than crashing later.
+						if n.Name != nil && n.Name.Param.Alias && n.Type == nil {
+							lineno = n.Pos
+							Fatalf("cannot handle alias type declaration (issue #25838): %v", n)
+						}
 						lineno = lno
 						return n
 					}
diff --git a/src/go/types/stdlib_test.go b/src/go/types/stdlib_test.go
index 84908fd190..a4ff1ab9a8 100644
--- a/src/go/types/stdlib_test.go
+++ b/src/go/types/stdlib_test.go
@@ -180,6 +180,7 @@ func TestStdFixed(t *testing.T) {
 		"issue22200b.go", // go/types does not have constraints on stack size
 		"issue25507.go",  // go/types does not have constraints on stack size
 		"issue20780.go",  // go/types does not have constraints on stack size
+		"issue27232.go",  // go/types has a bug with alias type (issue #28576)
 	)
 }
 
diff --git a/test/fixedbugs/issue18640.go b/test/fixedbugs/issue18640.go
index 60abd31f76..091bbe596b 100644
--- a/test/fixedbugs/issue18640.go
+++ b/test/fixedbugs/issue18640.go
@@ -20,8 +20,7 @@ type (
 	d = c
 )
 
-// The compiler reports an incorrect (non-alias related)
-// type cycle here (via dowith()). Disabled for now.
+// The compiler cannot handle these cases. Disabled for now.
 // See issue #25838.
 /*
 type (
@@ -32,7 +31,6 @@ type (
 	i = j
 	j = e
 )
-*/
 
 type (
 	a1 struct{ *b1 }
@@ -45,3 +43,4 @@ type (
 	b2 = c2
 	c2 struct{ *b2 }
 )
+*/
diff --git a/test/fixedbugs/issue23823.go b/test/fixedbugs/issue23823.go
index 2f802d0988..9297966cbd 100644
--- a/test/fixedbugs/issue23823.go
+++ b/test/fixedbugs/issue23823.go
@@ -1,4 +1,4 @@
-// errorcheck
+// compile
 
 // Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
@@ -6,10 +6,14 @@
 
 package p
 
+// The compiler cannot handle this. Disabled for now.
+// See issue #25838.
+/*
 type I1 = interface {
 	I2
 }
 
-type I2 interface { // ERROR "invalid recursive type"
+type I2 interface {
 	I1
 }
+*/
diff --git a/test/fixedbugs/issue24939.go b/test/fixedbugs/issue24939.go
index 26530e95b2..0ae6f2b9f2 100644
--- a/test/fixedbugs/issue24939.go
+++ b/test/fixedbugs/issue24939.go
@@ -15,7 +15,9 @@ type M interface {
 }
 
 type P = interface {
-	I() M
+	// The compiler cannot handle this case. Disabled for now.
+	// See issue #25838.
+	// I() M
 }
 
 func main() {}
diff --git a/test/fixedbugs/issue27232.go b/test/fixedbugs/issue27232.go
new file mode 100644
index 0000000000..3a1cc87e4c
--- /dev/null
+++ b/test/fixedbugs/issue27232.go
@@ -0,0 +1,19 @@
+// compile
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package p
+
+type F = func(T)
+
+type T interface {
+	m(F)
+}
+
+type t struct{}
+
+func (t) m(F) {}
+
+var _ T = &t{}
diff --git a/test/fixedbugs/issue27267.go b/test/fixedbugs/issue27267.go
new file mode 100644
index 0000000000..ebae44f48f
--- /dev/null
+++ b/test/fixedbugs/issue27267.go
@@ -0,0 +1,21 @@
+// compile
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package p
+
+// 1st test case from issue
+type F = func(E) // compiles if not type alias or moved below E struct
+type E struct {
+	f F
+}
+
+var x = E{func(E) {}}
+
+// 2nd test case from issue
+type P = *S
+type S struct {
+	p P
+}

From a540aa338a3145ab32ca4409919c82722f8724f3 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <iant@golang.org>
Date: Mon, 5 Nov 2018 10:33:28 -0800
Subject: [PATCH 17/25] test: add test that gccgo failed to compile

Updates #28601

Change-Id: I734fc5ded153126d384f0df912ecd4d208005e49
Reviewed-on: https://go-review.googlesource.com/c/147537
Run-TryBot: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
---
 test/fixedbugs/issue28601.go | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 test/fixedbugs/issue28601.go

diff --git a/test/fixedbugs/issue28601.go b/test/fixedbugs/issue28601.go
new file mode 100644
index 0000000000..ec367e9282
--- /dev/null
+++ b/test/fixedbugs/issue28601.go
@@ -0,0 +1,15 @@
+// compile
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Failed to compile with gccgo.
+
+package p
+
+import "unsafe"
+
+const w int = int(unsafe.Sizeof(0))
+
+var a [w]byte

From f1a9f1df5070f69685e269de940c6218f899d228 Mon Sep 17 00:00:00 2001
From: Yury Smolsky <yury@smolsky.by>
Date: Wed, 31 Oct 2018 00:19:35 +0200
Subject: [PATCH 18/25] go/doc: inspect function signature for building
 playground examples

This documentation example was broken:
https://golang.org/pkg/image/png/#example_Decode.
It did not have the "io" package imported,
The package was referenced in the result type of the function.

The "playExample" function did not inspect
the result types of declared functions.

This CL adds inspecting of parameters and result types of functions.

Fixes #28492
Updates #9679

Change-Id: I6d8b11bad2db8ea8ba69039cfaa914093bdd5132
Reviewed-on: https://go-review.googlesource.com/c/146118
Run-TryBot: Yury Smolsky <yury@smolsky.by>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
---
 src/go/doc/example.go      | 12 ++++++++
 src/go/doc/example_test.go | 62 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/src/go/doc/example.go b/src/go/doc/example.go
index d6d4ece3a8..cf3547810a 100644
--- a/src/go/doc/example.go
+++ b/src/go/doc/example.go
@@ -219,6 +219,18 @@ func playExample(file *ast.File, f *ast.FuncDecl) *ast.File {
 	for i := 0; i < len(depDecls); i++ {
 		switch d := depDecls[i].(type) {
 		case *ast.FuncDecl:
+			// Inspect types of parameters and results. See #28492.
+			if d.Type.Params != nil {
+				for _, p := range d.Type.Params.List {
+					ast.Inspect(p.Type, inspectFunc)
+				}
+			}
+			if d.Type.Results != nil {
+				for _, r := range d.Type.Results.List {
+					ast.Inspect(r.Type, inspectFunc)
+				}
+			}
+
 			ast.Inspect(d.Body, inspectFunc)
 		case *ast.GenDecl:
 			for _, spec := range d.Specs {
diff --git a/src/go/doc/example_test.go b/src/go/doc/example_test.go
index 552a51bf74..0d2bf72e31 100644
--- a/src/go/doc/example_test.go
+++ b/src/go/doc/example_test.go
@@ -351,6 +351,68 @@ func TestExamplesWholeFile(t *testing.T) {
 	}
 }
 
+const exampleInspectSignature = `package foo_test
+
+import (
+	"bytes"
+	"io"
+)
+
+func getReader() io.Reader { return nil }
+
+func do(b bytes.Reader) {}
+
+func Example() {
+	getReader()
+	do()
+	// Output:
+}
+
+func ExampleIgnored() {
+}
+`
+
+const exampleInspectSignatureOutput = `package main
+
+import (
+	"bytes"
+	"io"
+)
+
+func getReader() io.Reader { return nil }
+
+func do(b bytes.Reader) {}
+
+func main() {
+	getReader()
+	do()
+}
+`
+
+func TestExampleInspectSignature(t *testing.T) {
+	// Verify that "bytes" and "io" are imported. See issue #28492.
+	fset := token.NewFileSet()
+	file, err := parser.ParseFile(fset, "test.go", strings.NewReader(exampleInspectSignature), parser.ParseComments)
+	if err != nil {
+		t.Fatal(err)
+	}
+	es := doc.Examples(file)
+	if len(es) != 2 {
+		t.Fatalf("wrong number of examples; got %d want 2", len(es))
+	}
+	// We are interested in the first example only.
+	e := es[0]
+	if e.Name != "" {
+		t.Errorf("got Name == %q, want %q", e.Name, "")
+	}
+	if g, w := formatFile(t, fset, e.Play), exampleInspectSignatureOutput; g != w {
+		t.Errorf("got Play == %q, want %q", g, w)
+	}
+	if g, w := e.Output, ""; g != w {
+		t.Errorf("got Output == %q, want %q", g, w)
+	}
+}
+
 func formatFile(t *testing.T, fset *token.FileSet, n *ast.File) string {
 	if n == nil {
 		return "<nil>"

From 9c89923266a372e9357dc3296b6c53bb931dd4a9 Mon Sep 17 00:00:00 2001
From: Austin Clements <austin@google.com>
Date: Mon, 5 Nov 2018 12:36:42 -0500
Subject: [PATCH 19/25] runtime: deflake TestTracebackAncestors

TestTracebackAncestors has a ~0.1% chance of failing with more
goroutines in the traceback than expected. This happens because
there's a window between each goroutine starting its child and that
goroutine actually exiting. The test captures its own stack trace
after everything is "done", but if this happens during that window, it
will include the goroutine that's in the process of being torn down.
Here's an example of such a failure:
https://build.golang.org/log/fad10d0625295eb79fa879f53b8b32b9d0596af8

This CL fixes this by recording the goroutines that are expected to
exit and removing them from the stack trace. With this fix, this test
passed 15,000 times with no failures.

Change-Id: I71e7c6282987a15e8b74188b9c585aa2ca97cbcd
Reviewed-on: https://go-review.googlesource.com/c/147517
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
---
 .../testdata/testprog/traceback_ancestors.go  | 56 +++++++++++++++++--
 1 file changed, 51 insertions(+), 5 deletions(-)

diff --git a/src/runtime/testdata/testprog/traceback_ancestors.go b/src/runtime/testdata/testprog/traceback_ancestors.go
index fe57c1c157..0ee402c4bd 100644
--- a/src/runtime/testdata/testprog/traceback_ancestors.go
+++ b/src/runtime/testdata/testprog/traceback_ancestors.go
@@ -5,8 +5,10 @@
 package main
 
 import (
+	"bytes"
 	"fmt"
 	"runtime"
+	"strings"
 )
 
 func init() {
@@ -18,25 +20,50 @@ const numFrames = 2
 
 func TracebackAncestors() {
 	w := make(chan struct{})
-	recurseThenCallGo(w, numGoroutines, numFrames)
+	recurseThenCallGo(w, numGoroutines, numFrames, true)
 	<-w
 	printStack()
 	close(w)
 }
 
+var ignoreGoroutines = make(map[string]bool)
+
 func printStack() {
 	buf := make([]byte, 1024)
 	for {
 		n := runtime.Stack(buf, true)
 		if n < len(buf) {
-			fmt.Print(string(buf[:n]))
+			tb := string(buf[:n])
+
+			// Delete any ignored goroutines, if present.
+			pos := 0
+			for pos < len(tb) {
+				next := pos + strings.Index(tb[pos:], "\n\n")
+				if next < pos {
+					next = len(tb)
+				} else {
+					next += len("\n\n")
+				}
+
+				if strings.HasPrefix(tb[pos:], "goroutine ") {
+					id := tb[pos+len("goroutine "):]
+					id = id[:strings.IndexByte(id, ' ')]
+					if ignoreGoroutines[id] {
+						tb = tb[:pos] + tb[next:]
+						next = pos
+					}
+				}
+				pos = next
+			}
+
+			fmt.Print(tb)
 			return
 		}
 		buf = make([]byte, 2*len(buf))
 	}
 }
 
-func recurseThenCallGo(w chan struct{}, frames int, goroutines int) {
+func recurseThenCallGo(w chan struct{}, frames int, goroutines int, main bool) {
 	if frames == 0 {
 		// Signal to TracebackAncestors that we are done recursing and starting goroutines.
 		w <- struct{}{}
@@ -44,10 +71,29 @@ func recurseThenCallGo(w chan struct{}, frames int, goroutines int) {
 		return
 	}
 	if goroutines == 0 {
+		// Record which goroutine this is so we can ignore it
+		// in the traceback if it hasn't finished exiting by
+		// the time we printStack.
+		if !main {
+			ignoreGoroutines[goroutineID()] = true
+		}
+
 		// Start the next goroutine now that there are no more recursions left
 		// for this current goroutine.
-		go recurseThenCallGo(w, frames-1, numFrames)
+		go recurseThenCallGo(w, frames-1, numFrames, false)
 		return
 	}
-	recurseThenCallGo(w, frames, goroutines-1)
+	recurseThenCallGo(w, frames, goroutines-1, main)
+}
+
+func goroutineID() string {
+	buf := make([]byte, 128)
+	runtime.Stack(buf, false)
+	const prefix = "goroutine "
+	if !bytes.HasPrefix(buf, []byte(prefix)) {
+		panic(fmt.Sprintf("expected %q at beginning of traceback:\n%s", prefix, buf))
+	}
+	buf = buf[len(prefix):]
+	n := bytes.IndexByte(buf, ' ')
+	return string(buf[:n])
 }

From 44dcb5cb61aee5435e0b3c78544a1d3352a4cc98 Mon Sep 17 00:00:00 2001
From: Michael Anthony Knyszek <mknyszek@google.com>
Date: Mon, 5 Nov 2018 19:26:25 +0000
Subject: [PATCH 20/25] runtime: clean up MSpan* MCache* MCentral* in docs

This change cleans up references to MSpan, MCache, and MCentral in the
docs via a bunch of sed invocations to better reflect the Go names for
the equivalent structures (i.e. mspan, mcache, mcentral) and their
methods (i.e. MSpan_Sweep -> mspan.sweep).

Change-Id: Ie911ac975a24bd25200a273086dd835ab78b1711
Reviewed-on: https://go-review.googlesource.com/c/147557
Reviewed-by: Austin Clements <austin@google.com>
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/runtime/heapdump.go  |  4 ++--
 src/runtime/mcache.go    |  2 +-
 src/runtime/mcentral.go  | 10 ++++-----
 src/runtime/mfixalloc.go |  2 +-
 src/runtime/mgcmark.go   |  2 +-
 src/runtime/mgcsweep.go  | 20 +++++++++---------
 src/runtime/mheap.go     | 44 ++++++++++++++++++++--------------------
 src/runtime/mstats.go    |  2 +-
 8 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
index eadbcaeee1..ca56708a04 100644
--- a/src/runtime/heapdump.go
+++ b/src/runtime/heapdump.go
@@ -428,7 +428,7 @@ func dumproots() {
 	dumpmemrange(unsafe.Pointer(firstmoduledata.bss), firstmoduledata.ebss-firstmoduledata.bss)
 	dumpfields(firstmoduledata.gcbssmask)
 
-	// MSpan.types
+	// mspan.types
 	for _, s := range mheap_.allspans {
 		if s.state == mSpanInUse {
 			// Finalizers
@@ -661,7 +661,7 @@ func writeheapdump_m(fd uintptr) {
 	_g_.waitreason = waitReasonDumpingHeap
 
 	// Update stats so we can dump them.
-	// As a side effect, flushes all the MCaches so the MSpan.freelist
+	// As a side effect, flushes all the mcaches so the mspan.freelist
 	// lists contain all the free objects.
 	updatememstats()
 
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
index e20e92cdf4..7895e489bc 100644
--- a/src/runtime/mcache.go
+++ b/src/runtime/mcache.go
@@ -79,7 +79,7 @@ type stackfreelist struct {
 	size uintptr   // total size of stacks in list
 }
 
-// dummy MSpan that contains no free objects.
+// dummy mspan that contains no free objects.
 var emptymspan mspan
 
 func allocmcache() *mcache {
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go
index f108bfc31e..a60eb9fd0c 100644
--- a/src/runtime/mcentral.go
+++ b/src/runtime/mcentral.go
@@ -6,8 +6,8 @@
 //
 // See malloc.go for an overview.
 //
-// The MCentral doesn't actually contain the list of free objects; the MSpan does.
-// Each MCentral is two lists of MSpans: those with free objects (c->nonempty)
+// The mcentral doesn't actually contain the list of free objects; the mspan does.
+// Each mcentral is two lists of mspans: those with free objects (c->nonempty)
 // and those that are completely allocated (c->empty).
 
 package runtime
@@ -36,7 +36,7 @@ func (c *mcentral) init(spc spanClass) {
 	c.empty.init()
 }
 
-// Allocate a span to use in an MCache.
+// Allocate a span to use in an mcache.
 func (c *mcentral) cacheSpan() *mspan {
 	// Deduct credit for this span allocation and sweep if necessary.
 	spanBytes := uintptr(class_to_allocnpages[c.spanclass.sizeclass()]) * _PageSize
@@ -146,7 +146,7 @@ havespan:
 	return s
 }
 
-// Return span from an MCache.
+// Return span from an mcache.
 func (c *mcentral) uncacheSpan(s *mspan) {
 	if s.allocCount == 0 {
 		throw("uncaching span but s.allocCount == 0")
@@ -231,7 +231,7 @@ func (c *mcentral) freeSpan(s *mspan, preserve bool, wasempty bool) bool {
 	}
 
 	// delay updating sweepgen until here. This is the signal that
-	// the span may be used in an MCache, so it must come after the
+	// the span may be used in an mcache, so it must come after the
 	// linked list operations above (actually, just after the
 	// lock of c above.)
 	atomic.Store(&s.sweepgen, mheap_.sweepgen)
diff --git a/src/runtime/mfixalloc.go b/src/runtime/mfixalloc.go
index 1febe782bb..f9dd6ca474 100644
--- a/src/runtime/mfixalloc.go
+++ b/src/runtime/mfixalloc.go
@@ -12,7 +12,7 @@ import "unsafe"
 
 // FixAlloc is a simple free-list allocator for fixed size objects.
 // Malloc uses a FixAlloc wrapped around sysAlloc to manage its
-// MCache and MSpan objects.
+// mcache and mspan objects.
 //
 // Memory returned by fixalloc.alloc is zeroed by default, but the
 // caller may take responsibility for zeroing allocations by setting
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 14f09700ee..28260ab706 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -178,7 +178,7 @@ func markroot(gcw *gcWork, i uint32) {
 		systemstack(markrootFreeGStacks)
 
 	case baseSpans <= i && i < baseStacks:
-		// mark MSpan.specials
+		// mark mspan.specials
 		markrootSpans(gcw, int(i-baseSpans))
 
 	default:
diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go
index 627a6a023f..6733aa9b4a 100644
--- a/src/runtime/mgcsweep.go
+++ b/src/runtime/mgcsweep.go
@@ -152,7 +152,7 @@ func (s *mspan) ensureSwept() {
 	// (if GC is triggered on another goroutine).
 	_g_ := getg()
 	if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
-		throw("MSpan_EnsureSwept: m is not locked")
+		throw("mspan.ensureSwept: m is not locked")
 	}
 
 	sg := mheap_.sweepgen
@@ -178,7 +178,7 @@ func (s *mspan) ensureSwept() {
 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
 // It clears the mark bits in preparation for the next GC round.
 // Returns true if the span was returned to heap.
-// If preserve=true, don't return it to heap nor relink in MCentral lists;
+// If preserve=true, don't return it to heap nor relink in mcentral lists;
 // caller takes care of it.
 //TODO go:nowritebarrier
 func (s *mspan) sweep(preserve bool) bool {
@@ -186,12 +186,12 @@ func (s *mspan) sweep(preserve bool) bool {
 	// GC must not start while we are in the middle of this function.
 	_g_ := getg()
 	if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
-		throw("MSpan_Sweep: m is not locked")
+		throw("mspan.sweep: m is not locked")
 	}
 	sweepgen := mheap_.sweepgen
 	if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
-		print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
-		throw("MSpan_Sweep: bad span state")
+		print("mspan.sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+		throw("mspan.sweep: bad span state")
 	}
 
 	if trace.enabled {
@@ -327,8 +327,8 @@ func (s *mspan) sweep(preserve bool) bool {
 		// The span must be in our exclusive ownership until we update sweepgen,
 		// check for potential races.
 		if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
-			print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
-			throw("MSpan_Sweep: bad span state after sweep")
+			print("mspan.sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+			throw("mspan.sweep: bad span state after sweep")
 		}
 		// Serialization point.
 		// At this point the mark bits are cleared and allocation ready
@@ -339,7 +339,7 @@ func (s *mspan) sweep(preserve bool) bool {
 	if nfreed > 0 && spc.sizeclass() != 0 {
 		c.local_nsmallfree[spc.sizeclass()] += uintptr(nfreed)
 		res = mheap_.central[spc].mcentral.freeSpan(s, preserve, wasempty)
-		// MCentral_FreeSpan updates sweepgen
+		// mcentral.freeSpan updates sweepgen
 	} else if freeToHeap {
 		// Free large span to heap
 
@@ -351,12 +351,12 @@ func (s *mspan) sweep(preserve bool) bool {
 		// calling sysFree here without any kind of adjustment of the
 		// heap data structures means that when the memory does
 		// come back to us, we have the wrong metadata for it, either in
-		// the MSpan structures or in the garbage collection bitmap.
+		// the mspan structures or in the garbage collection bitmap.
 		// Using sysFault here means that the program will run out of
 		// memory fairly quickly in efence mode, but at least it won't
 		// have mysterious crashes due to confused memory reuse.
 		// It should be possible to switch back to sysFree if we also
-		// implement and then call some kind of MHeap_DeleteSpan.
+		// implement and then call some kind of mheap.deleteSpan.
 		if debug.efence > 0 {
 			s.limit = 0 // prevent mlookup from finding this span
 			sysFault(unsafe.Pointer(s.base()), size)
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 56ec3d4465..43f59adb8a 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -136,8 +136,8 @@ type mheap struct {
 	// _ uint32 // ensure 64-bit alignment of central
 
 	// central free lists for small size classes.
-	// the padding makes sure that the MCentrals are
-	// spaced CacheLinePadSize bytes apart, so that each MCentral.lock
+	// the padding makes sure that the mcentrals are
+	// spaced CacheLinePadSize bytes apart, so that each mcentral.lock
 	// gets its own cache line.
 	// central is indexed by spanClass.
 	central [numSpanClasses]struct {
@@ -196,20 +196,20 @@ type arenaHint struct {
 	next *arenaHint
 }
 
-// An MSpan is a run of pages.
+// An mspan is a run of pages.
 //
-// When a MSpan is in the heap free treap, state == mSpanFree
+// When a mspan is in the heap free treap, state == mSpanFree
 // and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
-// If the MSpan is in the heap scav treap, then in addition to the
+// If the mspan is in the heap scav treap, then in addition to the
 // above scavenged == true. scavenged == false in all other cases.
 //
-// When a MSpan is allocated, state == mSpanInUse or mSpanManual
+// When a mspan is allocated, state == mSpanInUse or mSpanManual
 // and heapmap(i) == span for all s->start <= i < s->start+s->npages.
 
-// Every MSpan is in one doubly-linked list, either in the MHeap's
-// busy list or one of the MCentral's span lists.
+// Every mspan is in one doubly-linked list, either in the mheap's
+// busy list or one of the mcentral's span lists.
 
-// An MSpan representing actual memory has state mSpanInUse,
+// An mspan representing actual memory has state mSpanInUse,
 // mSpanManual, or mSpanFree. Transitions between these states are
 // constrained as follows:
 //
@@ -880,10 +880,10 @@ func (h *mheap) allocSpanLocked(npage uintptr, stat *uint64) *mspan {
 HaveSpan:
 	// Mark span in use.
 	if s.state != mSpanFree {
-		throw("MHeap_AllocLocked - MSpan not free")
+		throw("mheap.allocLocked - mspan not free")
 	}
 	if s.npages < npage {
-		throw("MHeap_AllocLocked - bad npages")
+		throw("mheap.allocLocked - bad npages")
 	}
 
 	// First, subtract any memory that was released back to
@@ -1022,16 +1022,16 @@ func (h *mheap) freeSpanLocked(s *mspan, acctinuse, acctidle bool, unusedsince i
 	switch s.state {
 	case mSpanManual:
 		if s.allocCount != 0 {
-			throw("MHeap_FreeSpanLocked - invalid stack free")
+			throw("mheap.freeSpanLocked - invalid stack free")
 		}
 	case mSpanInUse:
 		if s.allocCount != 0 || s.sweepgen != h.sweepgen {
-			print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
-			throw("MHeap_FreeSpanLocked - invalid free")
+			print("mheap.freeSpanLocked - span ", s, " ptr ", hex(s.base()), " allocCount ", s.allocCount, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
+			throw("mheap.freeSpanLocked - invalid free")
 		}
 		h.pagesInUse -= uint64(s.npages)
 	default:
-		throw("MHeap_FreeSpanLocked - invalid span state")
+		throw("mheap.freeSpanLocked - invalid span state")
 	}
 
 	if acctinuse {
@@ -1251,9 +1251,9 @@ func (list *mSpanList) init() {
 
 func (list *mSpanList) remove(span *mspan) {
 	if span.list != list {
-		print("runtime: failed MSpanList_Remove span.npages=", span.npages,
+		print("runtime: failed mSpanList.remove span.npages=", span.npages,
 			" span=", span, " prev=", span.prev, " span.list=", span.list, " list=", list, "\n")
-		throw("MSpanList_Remove")
+		throw("mSpanList.remove")
 	}
 	if list.first == span {
 		list.first = span.next
@@ -1276,8 +1276,8 @@ func (list *mSpanList) isEmpty() bool {
 
 func (list *mSpanList) insert(span *mspan) {
 	if span.next != nil || span.prev != nil || span.list != nil {
-		println("runtime: failed MSpanList_Insert", span, span.next, span.prev, span.list)
-		throw("MSpanList_Insert")
+		println("runtime: failed mSpanList.insert", span, span.next, span.prev, span.list)
+		throw("mSpanList.insert")
 	}
 	span.next = list.first
 	if list.first != nil {
@@ -1294,8 +1294,8 @@ func (list *mSpanList) insert(span *mspan) {
 
 func (list *mSpanList) insertBack(span *mspan) {
 	if span.next != nil || span.prev != nil || span.list != nil {
-		println("runtime: failed MSpanList_InsertBack", span, span.next, span.prev, span.list)
-		throw("MSpanList_InsertBack")
+		println("runtime: failed mSpanList.insertBack", span, span.next, span.prev, span.list)
+		throw("mSpanList.insertBack")
 	}
 	span.prev = list.last
 	if list.last != nil {
@@ -1523,7 +1523,7 @@ func setprofilebucket(p unsafe.Pointer, b *bucket) {
 }
 
 // Do whatever cleanup needs to be done to deallocate s. It has
-// already been unlinked from the MSpan specials list.
+// already been unlinked from the mspan specials list.
 func freespecial(s *special, p unsafe.Pointer, size uintptr) {
 	switch s.kind {
 	case _KindSpecialFinalizer:
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index fd576b7ae0..9250865ed1 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -529,7 +529,7 @@ func updatememstats() {
 		memstats.by_size[i].nfree = 0
 	}
 
-	// Flush MCache's to MCentral.
+	// Flush mcache's to mcentral.
 	systemstack(flushallmcaches)
 
 	// Aggregate local stats.

From 9e619739fdc4cbaeb00a10ef95ce3e5d6996e8a7 Mon Sep 17 00:00:00 2001
From: Josh Bleecher Snyder <josharian@gmail.com>
Date: Sat, 3 Nov 2018 09:09:28 -0700
Subject: [PATCH 21/25] cmd/compile: copy all fields during SubstAny
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Consider these functions:

func f(a any) int
func g(a any) int

Prior to this change, since f and g have identical signatures,
they would share a single generated func type.

types.SubstAny makes a shallow type copy, even after instantiation,
f and g share a single generated Result type.
So if you instantiate f with any=T, call dowidth,
instantiate g with any=U, and call dowidth,
and if sizeof(T) != sizeof(U),
then the Offset of the result for f is now wrong.

I don't believe this happens at all right now, but it bit me hard when
experimenting with some other compiler changes.
And it's hard to debug. It results in rare stack corruption, causing
problems far from the actual source of the problem.

To fix this, change SubstAny to make deep copies of TSTRUCTs.

name        old alloc/op      new alloc/op      delta
Template         35.3MB ± 0%       35.4MB ± 0%  +0.23%  (p=0.008 n=5+5)
Unicode          29.1MB ± 0%       29.1MB ± 0%  +0.16%  (p=0.008 n=5+5)
GoTypes           122MB ± 0%        122MB ± 0%  +0.16%  (p=0.008 n=5+5)
Compiler          513MB ± 0%        514MB ± 0%  +0.19%  (p=0.008 n=5+5)
SSA              1.94GB ± 0%       1.94GB ± 0%  +0.01%  (p=0.008 n=5+5)
Flate            24.2MB ± 0%       24.2MB ± 0%  +0.08%  (p=0.008 n=5+5)
GoParser         28.5MB ± 0%       28.5MB ± 0%  +0.24%  (p=0.008 n=5+5)
Reflect          86.2MB ± 0%       86.3MB ± 0%  +0.09%  (p=0.008 n=5+5)
Tar              34.9MB ± 0%       34.9MB ± 0%  +0.13%  (p=0.008 n=5+5)
XML              47.0MB ± 0%       47.1MB ± 0%  +0.18%  (p=0.008 n=5+5)
[Geo mean]       80.9MB            81.0MB       +0.15%

name        old allocs/op     new allocs/op     delta
Template           348k ± 0%         349k ± 0%  +0.38%  (p=0.008 n=5+5)
Unicode            340k ± 0%         340k ± 0%  +0.21%  (p=0.008 n=5+5)
GoTypes           1.27M ± 0%        1.28M ± 0%  +0.27%  (p=0.008 n=5+5)
Compiler          4.90M ± 0%        4.92M ± 0%  +0.36%  (p=0.008 n=5+5)
SSA               15.3M ± 0%        15.3M ± 0%  +0.03%  (p=0.008 n=5+5)
Flate              232k ± 0%         233k ± 0%  +0.14%  (p=0.008 n=5+5)
GoParser           291k ± 0%         292k ± 0%  +0.42%  (p=0.008 n=5+5)
Reflect           1.05M ± 0%        1.05M ± 0%  +0.14%  (p=0.008 n=5+5)
Tar                343k ± 0%         344k ± 0%  +0.22%  (p=0.008 n=5+5)
XML                428k ± 0%         430k ± 0%  +0.36%  (p=0.008 n=5+5)
[Geo mean]         807k              809k       +0.25%

Change-Id: I62134db642206cded01920dc1d8a7da61f7ca0ac
Reviewed-on: https://go-review.googlesource.com/c/147038
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
---
 src/cmd/compile/internal/types/type.go | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/cmd/compile/internal/types/type.go b/src/cmd/compile/internal/types/type.go
index b20039239b..45355e5798 100644
--- a/src/cmd/compile/internal/types/type.go
+++ b/src/cmd/compile/internal/types/type.go
@@ -665,23 +665,18 @@ func SubstAny(t *Type, types *[]*Type) *Type {
 		}
 
 	case TSTRUCT:
+		// Make a copy of all fields, including ones whose type does not change.
+		// This prevents aliasing across functions, which can lead to later
+		// fields getting their Offset incorrectly overwritten.
 		fields := t.FieldSlice()
-		var nfs []*Field
+		nfs := make([]*Field, len(fields))
 		for i, f := range fields {
 			nft := SubstAny(f.Type, types)
-			if nft == f.Type {
-				continue
-			}
-			if nfs == nil {
-				nfs = append([]*Field(nil), fields...)
-			}
 			nfs[i] = f.Copy()
 			nfs[i].Type = nft
 		}
-		if nfs != nil {
-			t = t.copy()
-			t.SetFields(nfs)
-		}
+		t = t.copy()
+		t.SetFields(nfs)
 	}
 
 	return t

From 5848b6c9b854546473814c8752ee117a71bb8b54 Mon Sep 17 00:00:00 2001
From: Josh Bleecher Snyder <josharian@gmail.com>
Date: Mon, 29 Oct 2018 13:54:24 -0700
Subject: [PATCH 22/25] cmd/compile: shrink specialized convT2x call sites
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

convT2E16 and other specialized type-to-interface routines
accept a type/itab argument and return a complete interface value.
However, we know enough in the routine to do without the type.
And the caller can construct the interface value using the type.

Doing so shrinks the call sites of ten of the specialized convT2x routines.
It also lets us unify the empty and non-empty interface routines.

Cuts 12k off cmd/go.

name                         old time/op  new time/op  delta
ConvT2ESmall-8               2.96ns ± 2%  2.34ns ± 4%  -21.01%  (p=0.000 n=175+189)
ConvT2EUintptr-8             3.00ns ± 3%  2.34ns ± 4%  -22.02%  (p=0.000 n=189+187)
ConvT2ELarge-8               21.3ns ± 7%  21.5ns ± 5%   +1.02%  (p=0.000 n=200+197)
ConvT2ISmall-8               2.99ns ± 4%  2.33ns ± 3%  -21.95%  (p=0.000 n=193+184)
ConvT2IUintptr-8             3.02ns ± 3%  2.33ns ± 3%  -22.82%  (p=0.000 n=198+190)
ConvT2ILarge-8               21.7ns ± 5%  22.2ns ± 4%   +2.31%  (p=0.000 n=199+198)
ConvT2Ezero/zero/16-8        2.96ns ± 2%  2.33ns ± 3%  -21.11%  (p=0.000 n=174+187)
ConvT2Ezero/zero/32-8        2.96ns ± 1%  2.35ns ± 4%  -20.62%  (p=0.000 n=163+193)
ConvT2Ezero/zero/64-8        2.99ns ± 2%  2.34ns ± 4%  -21.78%  (p=0.000 n=183+188)
ConvT2Ezero/zero/str-8       3.27ns ± 3%  2.54ns ± 3%  -22.32%  (p=0.000 n=195+192)
ConvT2Ezero/zero/slice-8     3.46ns ± 4%  2.81ns ± 3%  -18.96%  (p=0.000 n=197+164)
ConvT2Ezero/zero/big-8       88.4ns ±20%  90.0ns ±20%   +1.84%  (p=0.000 n=196+198)
ConvT2Ezero/nonzero/16-8     12.6ns ± 3%  12.3ns ± 3%   -2.34%  (p=0.000 n=167+196)
ConvT2Ezero/nonzero/32-8     12.3ns ± 4%  11.9ns ± 3%   -2.95%  (p=0.000 n=187+193)
ConvT2Ezero/nonzero/64-8     14.2ns ± 6%  13.8ns ± 5%   -2.94%  (p=0.000 n=198+199)
ConvT2Ezero/nonzero/str-8    27.2ns ± 5%  26.8ns ± 5%   -1.33%  (p=0.000 n=200+198)
ConvT2Ezero/nonzero/slice-8  33.3ns ± 8%  33.1ns ± 6%   -0.82%  (p=0.000 n=199+200)
ConvT2Ezero/nonzero/big-8    88.8ns ±22%  90.2ns ±18%   +1.58%  (p=0.000 n=200+199)


Neligible toolspeed impact.

name        old alloc/op      new alloc/op      delta
Template         35.4MB ± 0%       35.3MB ± 0%  -0.06%  (p=0.008 n=5+5)
Unicode          29.1MB ± 0%       29.1MB ± 0%    ~     (p=0.310 n=5+5)
GoTypes           122MB ± 0%        122MB ± 0%  -0.08%  (p=0.008 n=5+5)
Compiler          514MB ± 0%        513MB ± 0%  -0.02%  (p=0.008 n=5+5)
SSA              1.94GB ± 0%       1.94GB ± 0%  -0.01%  (p=0.008 n=5+5)
Flate            24.2MB ± 0%       24.2MB ± 0%    ~     (p=0.548 n=5+5)
GoParser         28.5MB ± 0%       28.5MB ± 0%  -0.05%  (p=0.016 n=5+5)
Reflect          86.3MB ± 0%       86.2MB ± 0%  -0.02%  (p=0.008 n=5+5)
Tar              34.9MB ± 0%       34.9MB ± 0%    ~     (p=0.095 n=5+5)
XML              47.1MB ± 0%       47.1MB ± 0%  -0.05%  (p=0.008 n=5+5)
[Geo mean]       81.0MB            81.0MB       -0.03%

name        old allocs/op     new allocs/op     delta
Template           349k ± 0%         349k ± 0%  -0.08%  (p=0.008 n=5+5)
Unicode            340k ± 0%         340k ± 0%    ~     (p=0.111 n=5+5)
GoTypes           1.28M ± 0%        1.28M ± 0%  -0.09%  (p=0.008 n=5+5)
Compiler          4.92M ± 0%        4.92M ± 0%  -0.08%  (p=0.008 n=5+5)
SSA               15.3M ± 0%        15.3M ± 0%  -0.03%  (p=0.008 n=5+5)
Flate              233k ± 0%         233k ± 0%    ~     (p=0.500 n=5+5)
GoParser           292k ± 0%         292k ± 0%  -0.06%  (p=0.008 n=5+5)
Reflect           1.05M ± 0%        1.05M ± 0%  -0.02%  (p=0.008 n=5+5)
Tar                344k ± 0%         343k ± 0%  -0.06%  (p=0.008 n=5+5)
XML                430k ± 0%         429k ± 0%  -0.08%  (p=0.008 n=5+5)
[Geo mean]         809k              809k       -0.05%

name        old object-bytes  new object-bytes  delta
Template          507kB ± 0%        507kB ± 0%  -0.04%  (p=0.008 n=5+5)
Unicode           225kB ± 0%        225kB ± 0%    ~     (all equal)
GoTypes          1.85MB ± 0%       1.85MB ± 0%  -0.08%  (p=0.008 n=5+5)
Compiler         6.75MB ± 0%       6.75MB ± 0%  +0.01%  (p=0.008 n=5+5)
SSA              21.4MB ± 0%       21.4MB ± 0%  -0.02%  (p=0.008 n=5+5)
Flate             328kB ± 0%        328kB ± 0%  -0.03%  (p=0.008 n=5+5)
GoParser          403kB ± 0%        402kB ± 0%  -0.06%  (p=0.008 n=5+5)
Reflect          1.41MB ± 0%       1.41MB ± 0%  -0.03%  (p=0.008 n=5+5)
Tar               457kB ± 0%        457kB ± 0%  -0.05%  (p=0.008 n=5+5)
XML               601kB ± 0%        600kB ± 0%  -0.16%  (p=0.008 n=5+5)
[Geo mean]       1.05MB            1.04MB       -0.05%


Change-Id: I677a4108c0ecd32617549294036aa84f9214c4fe
Reviewed-on: https://go-review.googlesource.com/c/147360
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>
---
 src/cmd/compile/internal/gc/builtin.go        | 302 +++++++++---------
 .../compile/internal/gc/builtin/runtime.go    |  22 +-
 src/cmd/compile/internal/gc/walk.go           |  71 ++--
 src/runtime/iface.go                          | 134 +++-----
 4 files changed, 242 insertions(+), 287 deletions(-)

diff --git a/src/cmd/compile/internal/gc/builtin.go b/src/cmd/compile/internal/gc/builtin.go
index 325bf4aa0e..4e9f11c8b3 100644
--- a/src/cmd/compile/internal/gc/builtin.go
+++ b/src/cmd/compile/internal/gc/builtin.go
@@ -51,110 +51,105 @@ var runtimeDecls = [...]struct {
 	{"decoderune", funcTag, 50},
 	{"countrunes", funcTag, 51},
 	{"convI2I", funcTag, 52},
-	{"convT2E", funcTag, 53},
-	{"convT2E16", funcTag, 52},
-	{"convT2E32", funcTag, 52},
-	{"convT2E64", funcTag, 52},
-	{"convT2Estring", funcTag, 52},
-	{"convT2Eslice", funcTag, 52},
-	{"convT2Enoptr", funcTag, 53},
-	{"convT2I", funcTag, 53},
-	{"convT2I16", funcTag, 52},
-	{"convT2I32", funcTag, 52},
-	{"convT2I64", funcTag, 52},
-	{"convT2Istring", funcTag, 52},
-	{"convT2Islice", funcTag, 52},
-	{"convT2Inoptr", funcTag, 53},
+	{"convT16", funcTag, 54},
+	{"convT32", funcTag, 54},
+	{"convT64", funcTag, 54},
+	{"convTstring", funcTag, 54},
+	{"convTslice", funcTag, 54},
+	{"convT2E", funcTag, 55},
+	{"convT2Enoptr", funcTag, 55},
+	{"convT2I", funcTag, 55},
+	{"convT2Inoptr", funcTag, 55},
 	{"assertE2I", funcTag, 52},
-	{"assertE2I2", funcTag, 54},
+	{"assertE2I2", funcTag, 56},
 	{"assertI2I", funcTag, 52},
-	{"assertI2I2", funcTag, 54},
-	{"panicdottypeE", funcTag, 55},
-	{"panicdottypeI", funcTag, 55},
-	{"panicnildottype", funcTag, 56},
-	{"ifaceeq", funcTag, 59},
-	{"efaceeq", funcTag, 59},
-	{"fastrand", funcTag, 61},
-	{"makemap64", funcTag, 63},
-	{"makemap", funcTag, 64},
-	{"makemap_small", funcTag, 65},
-	{"mapaccess1", funcTag, 66},
-	{"mapaccess1_fast32", funcTag, 67},
-	{"mapaccess1_fast64", funcTag, 67},
-	{"mapaccess1_faststr", funcTag, 67},
-	{"mapaccess1_fat", funcTag, 68},
-	{"mapaccess2", funcTag, 69},
-	{"mapaccess2_fast32", funcTag, 70},
-	{"mapaccess2_fast64", funcTag, 70},
-	{"mapaccess2_faststr", funcTag, 70},
-	{"mapaccess2_fat", funcTag, 71},
-	{"mapassign", funcTag, 66},
-	{"mapassign_fast32", funcTag, 67},
-	{"mapassign_fast32ptr", funcTag, 67},
-	{"mapassign_fast64", funcTag, 67},
-	{"mapassign_fast64ptr", funcTag, 67},
-	{"mapassign_faststr", funcTag, 67},
-	{"mapiterinit", funcTag, 72},
-	{"mapdelete", funcTag, 72},
-	{"mapdelete_fast32", funcTag, 73},
-	{"mapdelete_fast64", funcTag, 73},
-	{"mapdelete_faststr", funcTag, 73},
-	{"mapiternext", funcTag, 74},
-	{"mapclear", funcTag, 75},
-	{"makechan64", funcTag, 77},
-	{"makechan", funcTag, 78},
-	{"chanrecv1", funcTag, 80},
-	{"chanrecv2", funcTag, 81},
-	{"chansend1", funcTag, 83},
+	{"assertI2I2", funcTag, 56},
+	{"panicdottypeE", funcTag, 57},
+	{"panicdottypeI", funcTag, 57},
+	{"panicnildottype", funcTag, 58},
+	{"ifaceeq", funcTag, 60},
+	{"efaceeq", funcTag, 60},
+	{"fastrand", funcTag, 62},
+	{"makemap64", funcTag, 64},
+	{"makemap", funcTag, 65},
+	{"makemap_small", funcTag, 66},
+	{"mapaccess1", funcTag, 67},
+	{"mapaccess1_fast32", funcTag, 68},
+	{"mapaccess1_fast64", funcTag, 68},
+	{"mapaccess1_faststr", funcTag, 68},
+	{"mapaccess1_fat", funcTag, 69},
+	{"mapaccess2", funcTag, 70},
+	{"mapaccess2_fast32", funcTag, 71},
+	{"mapaccess2_fast64", funcTag, 71},
+	{"mapaccess2_faststr", funcTag, 71},
+	{"mapaccess2_fat", funcTag, 72},
+	{"mapassign", funcTag, 67},
+	{"mapassign_fast32", funcTag, 68},
+	{"mapassign_fast32ptr", funcTag, 68},
+	{"mapassign_fast64", funcTag, 68},
+	{"mapassign_fast64ptr", funcTag, 68},
+	{"mapassign_faststr", funcTag, 68},
+	{"mapiterinit", funcTag, 73},
+	{"mapdelete", funcTag, 73},
+	{"mapdelete_fast32", funcTag, 74},
+	{"mapdelete_fast64", funcTag, 74},
+	{"mapdelete_faststr", funcTag, 74},
+	{"mapiternext", funcTag, 75},
+	{"mapclear", funcTag, 76},
+	{"makechan64", funcTag, 78},
+	{"makechan", funcTag, 79},
+	{"chanrecv1", funcTag, 81},
+	{"chanrecv2", funcTag, 82},
+	{"chansend1", funcTag, 84},
 	{"closechan", funcTag, 23},
-	{"writeBarrier", varTag, 85},
-	{"typedmemmove", funcTag, 86},
-	{"typedmemclr", funcTag, 87},
-	{"typedslicecopy", funcTag, 88},
-	{"selectnbsend", funcTag, 89},
-	{"selectnbrecv", funcTag, 90},
-	{"selectnbrecv2", funcTag, 92},
-	{"selectsetpc", funcTag, 56},
-	{"selectgo", funcTag, 93},
+	{"writeBarrier", varTag, 86},
+	{"typedmemmove", funcTag, 87},
+	{"typedmemclr", funcTag, 88},
+	{"typedslicecopy", funcTag, 89},
+	{"selectnbsend", funcTag, 90},
+	{"selectnbrecv", funcTag, 91},
+	{"selectnbrecv2", funcTag, 93},
+	{"selectsetpc", funcTag, 58},
+	{"selectgo", funcTag, 94},
 	{"block", funcTag, 5},
-	{"makeslice", funcTag, 94},
-	{"makeslice64", funcTag, 95},
-	{"growslice", funcTag, 97},
-	{"memmove", funcTag, 98},
-	{"memclrNoHeapPointers", funcTag, 99},
-	{"memclrHasPointers", funcTag, 99},
-	{"memequal", funcTag, 100},
-	{"memequal8", funcTag, 101},
-	{"memequal16", funcTag, 101},
-	{"memequal32", funcTag, 101},
-	{"memequal64", funcTag, 101},
-	{"memequal128", funcTag, 101},
-	{"int64div", funcTag, 102},
-	{"uint64div", funcTag, 103},
-	{"int64mod", funcTag, 102},
-	{"uint64mod", funcTag, 103},
-	{"float64toint64", funcTag, 104},
-	{"float64touint64", funcTag, 105},
-	{"float64touint32", funcTag, 106},
-	{"int64tofloat64", funcTag, 107},
-	{"uint64tofloat64", funcTag, 108},
-	{"uint32tofloat64", funcTag, 109},
-	{"complex128div", funcTag, 110},
-	{"racefuncenter", funcTag, 111},
+	{"makeslice", funcTag, 95},
+	{"makeslice64", funcTag, 96},
+	{"growslice", funcTag, 98},
+	{"memmove", funcTag, 99},
+	{"memclrNoHeapPointers", funcTag, 100},
+	{"memclrHasPointers", funcTag, 100},
+	{"memequal", funcTag, 101},
+	{"memequal8", funcTag, 102},
+	{"memequal16", funcTag, 102},
+	{"memequal32", funcTag, 102},
+	{"memequal64", funcTag, 102},
+	{"memequal128", funcTag, 102},
+	{"int64div", funcTag, 103},
+	{"uint64div", funcTag, 104},
+	{"int64mod", funcTag, 103},
+	{"uint64mod", funcTag, 104},
+	{"float64toint64", funcTag, 105},
+	{"float64touint64", funcTag, 106},
+	{"float64touint32", funcTag, 107},
+	{"int64tofloat64", funcTag, 108},
+	{"uint64tofloat64", funcTag, 109},
+	{"uint32tofloat64", funcTag, 110},
+	{"complex128div", funcTag, 111},
+	{"racefuncenter", funcTag, 112},
 	{"racefuncenterfp", funcTag, 5},
 	{"racefuncexit", funcTag, 5},
-	{"raceread", funcTag, 111},
-	{"racewrite", funcTag, 111},
-	{"racereadrange", funcTag, 112},
-	{"racewriterange", funcTag, 112},
-	{"msanread", funcTag, 112},
-	{"msanwrite", funcTag, 112},
+	{"raceread", funcTag, 112},
+	{"racewrite", funcTag, 112},
+	{"racereadrange", funcTag, 113},
+	{"racewriterange", funcTag, 113},
+	{"msanread", funcTag, 113},
+	{"msanwrite", funcTag, 113},
 	{"support_popcnt", varTag, 11},
 	{"support_sse41", varTag, 11},
 }
 
 func runtimeTypes() []*types.Type {
-	var typs [113]*types.Type
+	var typs [114]*types.Type
 	typs[0] = types.Bytetype
 	typs[1] = types.NewPtr(typs[0])
 	typs[2] = types.Types[TANY]
@@ -208,65 +203,66 @@ func runtimeTypes() []*types.Type {
 	typs[50] = functype(nil, []*Node{anonfield(typs[21]), anonfield(typs[32])}, []*Node{anonfield(typs[40]), anonfield(typs[32])})
 	typs[51] = functype(nil, []*Node{anonfield(typs[21])}, []*Node{anonfield(typs[32])})
 	typs[52] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[2])}, []*Node{anonfield(typs[2])})
-	typs[53] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3])}, []*Node{anonfield(typs[2])})
-	typs[54] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[2])}, []*Node{anonfield(typs[2]), anonfield(typs[11])})
-	typs[55] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[1])}, nil)
-	typs[56] = functype(nil, []*Node{anonfield(typs[1])}, nil)
-	typs[57] = types.NewPtr(typs[47])
-	typs[58] = types.Types[TUNSAFEPTR]
-	typs[59] = functype(nil, []*Node{anonfield(typs[57]), anonfield(typs[58]), anonfield(typs[58])}, []*Node{anonfield(typs[11])})
-	typs[60] = types.Types[TUINT32]
-	typs[61] = functype(nil, nil, []*Node{anonfield(typs[60])})
-	typs[62] = types.NewMap(typs[2], typs[2])
-	typs[63] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[3])}, []*Node{anonfield(typs[62])})
-	typs[64] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[32]), anonfield(typs[3])}, []*Node{anonfield(typs[62])})
-	typs[65] = functype(nil, nil, []*Node{anonfield(typs[62])})
-	typs[66] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[62]), anonfield(typs[3])}, []*Node{anonfield(typs[3])})
-	typs[67] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[62]), anonfield(typs[2])}, []*Node{anonfield(typs[3])})
-	typs[68] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[62]), anonfield(typs[3]), anonfield(typs[1])}, []*Node{anonfield(typs[3])})
-	typs[69] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[62]), anonfield(typs[3])}, []*Node{anonfield(typs[3]), anonfield(typs[11])})
-	typs[70] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[62]), anonfield(typs[2])}, []*Node{anonfield(typs[3]), anonfield(typs[11])})
-	typs[71] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[62]), anonfield(typs[3]), anonfield(typs[1])}, []*Node{anonfield(typs[3]), anonfield(typs[11])})
-	typs[72] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[62]), anonfield(typs[3])}, nil)
-	typs[73] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[62]), anonfield(typs[2])}, nil)
-	typs[74] = functype(nil, []*Node{anonfield(typs[3])}, nil)
-	typs[75] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[62])}, nil)
-	typs[76] = types.NewChan(typs[2], types.Cboth)
-	typs[77] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15])}, []*Node{anonfield(typs[76])})
-	typs[78] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[32])}, []*Node{anonfield(typs[76])})
-	typs[79] = types.NewChan(typs[2], types.Crecv)
-	typs[80] = functype(nil, []*Node{anonfield(typs[79]), anonfield(typs[3])}, nil)
-	typs[81] = functype(nil, []*Node{anonfield(typs[79]), anonfield(typs[3])}, []*Node{anonfield(typs[11])})
-	typs[82] = types.NewChan(typs[2], types.Csend)
-	typs[83] = functype(nil, []*Node{anonfield(typs[82]), anonfield(typs[3])}, nil)
-	typs[84] = types.NewArray(typs[0], 3)
-	typs[85] = tostruct([]*Node{namedfield("enabled", typs[11]), namedfield("pad", typs[84]), namedfield("needed", typs[11]), namedfield("cgo", typs[11]), namedfield("alignme", typs[17])})
-	typs[86] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3]), anonfield(typs[3])}, nil)
-	typs[87] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3])}, nil)
-	typs[88] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[2]), anonfield(typs[2])}, []*Node{anonfield(typs[32])})
-	typs[89] = functype(nil, []*Node{anonfield(typs[82]), anonfield(typs[3])}, []*Node{anonfield(typs[11])})
-	typs[90] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[79])}, []*Node{anonfield(typs[11])})
-	typs[91] = types.NewPtr(typs[11])
-	typs[92] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[91]), anonfield(typs[79])}, []*Node{anonfield(typs[11])})
-	typs[93] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[32])}, []*Node{anonfield(typs[32]), anonfield(typs[11])})
-	typs[94] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[32]), anonfield(typs[32])}, []*Node{anonfield(typs[58])})
-	typs[95] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15])}, []*Node{anonfield(typs[58])})
-	typs[96] = types.NewSlice(typs[2])
-	typs[97] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[96]), anonfield(typs[32])}, []*Node{anonfield(typs[96])})
-	typs[98] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[47])}, nil)
-	typs[99] = functype(nil, []*Node{anonfield(typs[58]), anonfield(typs[47])}, nil)
-	typs[100] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[47])}, []*Node{anonfield(typs[11])})
-	typs[101] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3])}, []*Node{anonfield(typs[11])})
-	typs[102] = functype(nil, []*Node{anonfield(typs[15]), anonfield(typs[15])}, []*Node{anonfield(typs[15])})
-	typs[103] = functype(nil, []*Node{anonfield(typs[17]), anonfield(typs[17])}, []*Node{anonfield(typs[17])})
-	typs[104] = functype(nil, []*Node{anonfield(typs[13])}, []*Node{anonfield(typs[15])})
-	typs[105] = functype(nil, []*Node{anonfield(typs[13])}, []*Node{anonfield(typs[17])})
-	typs[106] = functype(nil, []*Node{anonfield(typs[13])}, []*Node{anonfield(typs[60])})
-	typs[107] = functype(nil, []*Node{anonfield(typs[15])}, []*Node{anonfield(typs[13])})
-	typs[108] = functype(nil, []*Node{anonfield(typs[17])}, []*Node{anonfield(typs[13])})
-	typs[109] = functype(nil, []*Node{anonfield(typs[60])}, []*Node{anonfield(typs[13])})
-	typs[110] = functype(nil, []*Node{anonfield(typs[19]), anonfield(typs[19])}, []*Node{anonfield(typs[19])})
-	typs[111] = functype(nil, []*Node{anonfield(typs[47])}, nil)
-	typs[112] = functype(nil, []*Node{anonfield(typs[47]), anonfield(typs[47])}, nil)
+	typs[53] = types.Types[TUNSAFEPTR]
+	typs[54] = functype(nil, []*Node{anonfield(typs[2])}, []*Node{anonfield(typs[53])})
+	typs[55] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3])}, []*Node{anonfield(typs[2])})
+	typs[56] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[2])}, []*Node{anonfield(typs[2]), anonfield(typs[11])})
+	typs[57] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[1])}, nil)
+	typs[58] = functype(nil, []*Node{anonfield(typs[1])}, nil)
+	typs[59] = types.NewPtr(typs[47])
+	typs[60] = functype(nil, []*Node{anonfield(typs[59]), anonfield(typs[53]), anonfield(typs[53])}, []*Node{anonfield(typs[11])})
+	typs[61] = types.Types[TUINT32]
+	typs[62] = functype(nil, nil, []*Node{anonfield(typs[61])})
+	typs[63] = types.NewMap(typs[2], typs[2])
+	typs[64] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[3])}, []*Node{anonfield(typs[63])})
+	typs[65] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[32]), anonfield(typs[3])}, []*Node{anonfield(typs[63])})
+	typs[66] = functype(nil, nil, []*Node{anonfield(typs[63])})
+	typs[67] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[3])}, []*Node{anonfield(typs[3])})
+	typs[68] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[2])}, []*Node{anonfield(typs[3])})
+	typs[69] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[3]), anonfield(typs[1])}, []*Node{anonfield(typs[3])})
+	typs[70] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[3])}, []*Node{anonfield(typs[3]), anonfield(typs[11])})
+	typs[71] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[2])}, []*Node{anonfield(typs[3]), anonfield(typs[11])})
+	typs[72] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[3]), anonfield(typs[1])}, []*Node{anonfield(typs[3]), anonfield(typs[11])})
+	typs[73] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[3])}, nil)
+	typs[74] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[2])}, nil)
+	typs[75] = functype(nil, []*Node{anonfield(typs[3])}, nil)
+	typs[76] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[63])}, nil)
+	typs[77] = types.NewChan(typs[2], types.Cboth)
+	typs[78] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15])}, []*Node{anonfield(typs[77])})
+	typs[79] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[32])}, []*Node{anonfield(typs[77])})
+	typs[80] = types.NewChan(typs[2], types.Crecv)
+	typs[81] = functype(nil, []*Node{anonfield(typs[80]), anonfield(typs[3])}, nil)
+	typs[82] = functype(nil, []*Node{anonfield(typs[80]), anonfield(typs[3])}, []*Node{anonfield(typs[11])})
+	typs[83] = types.NewChan(typs[2], types.Csend)
+	typs[84] = functype(nil, []*Node{anonfield(typs[83]), anonfield(typs[3])}, nil)
+	typs[85] = types.NewArray(typs[0], 3)
+	typs[86] = tostruct([]*Node{namedfield("enabled", typs[11]), namedfield("pad", typs[85]), namedfield("needed", typs[11]), namedfield("cgo", typs[11]), namedfield("alignme", typs[17])})
+	typs[87] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3]), anonfield(typs[3])}, nil)
+	typs[88] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3])}, nil)
+	typs[89] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[2]), anonfield(typs[2])}, []*Node{anonfield(typs[32])})
+	typs[90] = functype(nil, []*Node{anonfield(typs[83]), anonfield(typs[3])}, []*Node{anonfield(typs[11])})
+	typs[91] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[80])}, []*Node{anonfield(typs[11])})
+	typs[92] = types.NewPtr(typs[11])
+	typs[93] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[92]), anonfield(typs[80])}, []*Node{anonfield(typs[11])})
+	typs[94] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[32])}, []*Node{anonfield(typs[32]), anonfield(typs[11])})
+	typs[95] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[32]), anonfield(typs[32])}, []*Node{anonfield(typs[53])})
+	typs[96] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15])}, []*Node{anonfield(typs[53])})
+	typs[97] = types.NewSlice(typs[2])
+	typs[98] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[97]), anonfield(typs[32])}, []*Node{anonfield(typs[97])})
+	typs[99] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[47])}, nil)
+	typs[100] = functype(nil, []*Node{anonfield(typs[53]), anonfield(typs[47])}, nil)
+	typs[101] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[47])}, []*Node{anonfield(typs[11])})
+	typs[102] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3])}, []*Node{anonfield(typs[11])})
+	typs[103] = functype(nil, []*Node{anonfield(typs[15]), anonfield(typs[15])}, []*Node{anonfield(typs[15])})
+	typs[104] = functype(nil, []*Node{anonfield(typs[17]), anonfield(typs[17])}, []*Node{anonfield(typs[17])})
+	typs[105] = functype(nil, []*Node{anonfield(typs[13])}, []*Node{anonfield(typs[15])})
+	typs[106] = functype(nil, []*Node{anonfield(typs[13])}, []*Node{anonfield(typs[17])})
+	typs[107] = functype(nil, []*Node{anonfield(typs[13])}, []*Node{anonfield(typs[61])})
+	typs[108] = functype(nil, []*Node{anonfield(typs[15])}, []*Node{anonfield(typs[13])})
+	typs[109] = functype(nil, []*Node{anonfield(typs[17])}, []*Node{anonfield(typs[13])})
+	typs[110] = functype(nil, []*Node{anonfield(typs[61])}, []*Node{anonfield(typs[13])})
+	typs[111] = functype(nil, []*Node{anonfield(typs[19]), anonfield(typs[19])}, []*Node{anonfield(typs[19])})
+	typs[112] = functype(nil, []*Node{anonfield(typs[47])}, nil)
+	typs[113] = functype(nil, []*Node{anonfield(typs[47]), anonfield(typs[47])}, nil)
 	return typs[:]
 }
diff --git a/src/cmd/compile/internal/gc/builtin/runtime.go b/src/cmd/compile/internal/gc/builtin/runtime.go
index e6d174bc4b..1eaf332e50 100644
--- a/src/cmd/compile/internal/gc/builtin/runtime.go
+++ b/src/cmd/compile/internal/gc/builtin/runtime.go
@@ -61,23 +61,23 @@ func slicestringcopy(to any, fr any) int
 func decoderune(string, int) (retv rune, retk int)
 func countrunes(string) int
 
-// interface conversions
+// Non-empty-interface to non-empty-interface conversion.
 func convI2I(typ *byte, elem any) (ret any)
 
+// Specialized type-to-interface conversion.
+// These return only a data pointer.
+func convT16(val any) unsafe.Pointer     // val must be uint16-like (same size and alignment as a uint16)
+func convT32(val any) unsafe.Pointer     // val must be uint32-like (same size and alignment as a uint32)
+func convT64(val any) unsafe.Pointer     // val must be uint64-like (same size and alignment as a uint64 and contains no pointers)
+func convTstring(val any) unsafe.Pointer // val must be a string
+func convTslice(val any) unsafe.Pointer  // val must be a slice
+
+// Type to empty-interface conversion.
 func convT2E(typ *byte, elem *any) (ret any)
-func convT2E16(typ *byte, val any) (ret any)
-func convT2E32(typ *byte, val any) (ret any)
-func convT2E64(typ *byte, val any) (ret any)
-func convT2Estring(typ *byte, val any) (ret any) // val must be a string
-func convT2Eslice(typ *byte, val any) (ret any)  // val must be a slice
 func convT2Enoptr(typ *byte, elem *any) (ret any)
 
+// Type to non-empty-interface conversion.
 func convT2I(tab *byte, elem *any) (ret any)
-func convT2I16(tab *byte, val any) (ret any)
-func convT2I32(tab *byte, val any) (ret any)
-func convT2I64(tab *byte, val any) (ret any)
-func convT2Istring(tab *byte, val any) (ret any) // val must be a string
-func convT2Islice(tab *byte, val any) (ret any)  // val must be a slice
 func convT2Inoptr(tab *byte, elem *any) (ret any)
 
 // interface type assertions x.(T)
diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go
index 0e07efa0d9..fd484a6472 100644
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -384,41 +384,31 @@ func convFuncName(from, to *types.Type) (fnname string, needsaddr bool) {
 	tkind := to.Tie()
 	switch from.Tie() {
 	case 'I':
-		switch tkind {
-		case 'I':
+		if tkind == 'I' {
 			return "convI2I", false
 		}
 	case 'T':
+		switch {
+		case from.Size() == 2 && from.Align == 2:
+			return "convT16", false
+		case from.Size() == 4 && from.Align == 4 && !types.Haspointers(from):
+			return "convT32", false
+		case from.Size() == 8 && from.Align == types.Types[TUINT64].Align && !types.Haspointers(from):
+			return "convT64", false
+		case from.IsString():
+			return "convTstring", false
+		case from.IsSlice():
+			return "convTslice", false
+		}
+
 		switch tkind {
 		case 'E':
-			switch {
-			case from.Size() == 2 && from.Align == 2:
-				return "convT2E16", false
-			case from.Size() == 4 && from.Align == 4 && !types.Haspointers(from):
-				return "convT2E32", false
-			case from.Size() == 8 && from.Align == types.Types[TUINT64].Align && !types.Haspointers(from):
-				return "convT2E64", false
-			case from.IsString():
-				return "convT2Estring", false
-			case from.IsSlice():
-				return "convT2Eslice", false
-			case !types.Haspointers(from):
+			if !types.Haspointers(from) {
 				return "convT2Enoptr", true
 			}
 			return "convT2E", true
 		case 'I':
-			switch {
-			case from.Size() == 2 && from.Align == 2:
-				return "convT2I16", false
-			case from.Size() == 4 && from.Align == 4 && !types.Haspointers(from):
-				return "convT2I32", false
-			case from.Size() == 8 && from.Align == types.Types[TUINT64].Align && !types.Haspointers(from):
-				return "convT2I64", false
-			case from.IsString():
-				return "convT2Istring", false
-			case from.IsSlice():
-				return "convT2Islice", false
-			case !types.Haspointers(from):
+			if !types.Haspointers(from) {
 				return "convT2Inoptr", true
 			}
 			return "convT2I", true
@@ -925,6 +915,34 @@ opswitch:
 			break
 		}
 
+		fnname, needsaddr := convFuncName(n.Left.Type, n.Type)
+
+		if !needsaddr && !n.Left.Type.IsInterface() {
+			// Use a specialized conversion routine that only returns a data pointer.
+			// ptr = convT2X(val)
+			// e = iface{typ/tab, ptr}
+			fn := syslook(fnname)
+			dowidth(n.Left.Type)
+			fn = substArgTypes(fn, n.Left.Type)
+			dowidth(fn.Type)
+			call := nod(OCALL, fn, nil)
+			call.List.Set1(n.Left)
+			call = typecheck(call, Erv)
+			call = walkexpr(call, init)
+			call = safeexpr(call, init)
+			var tab *Node
+			if n.Type.IsEmptyInterface() {
+				tab = typename(n.Left.Type)
+			} else {
+				tab = itabname(n.Left.Type, n.Type)
+			}
+			e := nod(OEFACE, tab, call)
+			e.Type = n.Type
+			e.SetTypecheck(1)
+			n = e
+			break
+		}
+
 		var ll []*Node
 		if n.Type.IsEmptyInterface() {
 			if !n.Left.Type.IsInterface() {
@@ -938,7 +956,6 @@ opswitch:
 			}
 		}
 
-		fnname, needsaddr := convFuncName(n.Left.Type, n.Type)
 		v := n.Left
 		if needsaddr {
 			// Types of large or unknown size are passed by reference.
diff --git a/src/runtime/iface.go b/src/runtime/iface.go
index 1ef9825a48..8eca2e849d 100644
--- a/src/runtime/iface.go
+++ b/src/runtime/iface.go
@@ -267,6 +267,34 @@ func panicnildottype(want *_type) {
 	// Just to match other nil conversion errors, we don't for now.
 }
 
+// The specialized convTx routines need a type descriptor to use when calling mallocgc.
+// We don't need the type to be exact, just to have the correct size, alignment, and pointer-ness.
+// However, when debugging, it'd be nice to have some indication in mallocgc where the types came from,
+// so we use named types here.
+// We then construct interface values of these types,
+// and then extract the type word to use as needed.
+type (
+	uint16InterfacePtr uint16
+	uint32InterfacePtr uint32
+	uint64InterfacePtr uint64
+	stringInterfacePtr string
+	sliceInterfacePtr  []byte
+)
+
+var (
+	uint16Eface interface{} = uint16InterfacePtr(0)
+	uint32Eface interface{} = uint32InterfacePtr(0)
+	uint64Eface interface{} = uint64InterfacePtr(0)
+	stringEface interface{} = stringInterfacePtr("")
+	sliceEface  interface{} = sliceInterfacePtr(nil)
+
+	uint16Type *_type = (*eface)(unsafe.Pointer(&uint16Eface))._type
+	uint32Type *_type = (*eface)(unsafe.Pointer(&uint32Eface))._type
+	uint64Type *_type = (*eface)(unsafe.Pointer(&uint64Eface))._type
+	stringType *_type = (*eface)(unsafe.Pointer(&stringEface))._type
+	sliceType  *_type = (*eface)(unsafe.Pointer(&sliceEface))._type
+)
+
 // The conv and assert functions below do very similar things.
 // The convXXX functions are guaranteed by the compiler to succeed.
 // The assertXXX functions may fail (either panicking or returning false,
@@ -290,69 +318,54 @@ func convT2E(t *_type, elem unsafe.Pointer) (e eface) {
 	return
 }
 
-func convT2E16(t *_type, val uint16) (e eface) {
-	var x unsafe.Pointer
+func convT16(val uint16) (x unsafe.Pointer) {
 	if val == 0 {
 		x = unsafe.Pointer(&zeroVal[0])
 	} else {
-		x = mallocgc(2, t, false)
+		x = mallocgc(2, uint16Type, false)
 		*(*uint16)(x) = val
 	}
-	e._type = t
-	e.data = x
 	return
 }
 
-func convT2E32(t *_type, val uint32) (e eface) {
-	var x unsafe.Pointer
+func convT32(val uint32) (x unsafe.Pointer) {
 	if val == 0 {
 		x = unsafe.Pointer(&zeroVal[0])
 	} else {
-		x = mallocgc(4, t, false)
+		x = mallocgc(4, uint32Type, false)
 		*(*uint32)(x) = val
 	}
-	e._type = t
-	e.data = x
 	return
 }
 
-func convT2E64(t *_type, val uint64) (e eface) {
-	var x unsafe.Pointer
+func convT64(val uint64) (x unsafe.Pointer) {
 	if val == 0 {
 		x = unsafe.Pointer(&zeroVal[0])
 	} else {
-		x = mallocgc(8, t, false)
+		x = mallocgc(8, uint64Type, false)
 		*(*uint64)(x) = val
 	}
-	e._type = t
-	e.data = x
 	return
 }
 
-func convT2Estring(t *_type, val string) (e eface) {
-	var x unsafe.Pointer
+func convTstring(val string) (x unsafe.Pointer) {
 	if val == "" {
 		x = unsafe.Pointer(&zeroVal[0])
 	} else {
-		x = mallocgc(unsafe.Sizeof(val), t, true)
+		x = mallocgc(unsafe.Sizeof(val), stringType, true)
 		*(*string)(x) = val
 	}
-	e._type = t
-	e.data = x
 	return
 }
 
-func convT2Eslice(t *_type, val []byte) (e eface) {
+func convTslice(val []byte) (x unsafe.Pointer) {
 	// Note: this must work for any element type, not just byte.
-	var x unsafe.Pointer
 	if (*slice)(unsafe.Pointer(&val)).array == nil {
 		x = unsafe.Pointer(&zeroVal[0])
 	} else {
-		x = mallocgc(unsafe.Sizeof(val), t, true)
+		x = mallocgc(unsafe.Sizeof(val), sliceType, true)
 		*(*[]byte)(x) = val
 	}
-	e._type = t
-	e.data = x
 	return
 }
 
@@ -385,77 +398,6 @@ func convT2I(tab *itab, elem unsafe.Pointer) (i iface) {
 	return
 }
 
-func convT2I16(tab *itab, val uint16) (i iface) {
-	t := tab._type
-	var x unsafe.Pointer
-	if val == 0 {
-		x = unsafe.Pointer(&zeroVal[0])
-	} else {
-		x = mallocgc(2, t, false)
-		*(*uint16)(x) = val
-	}
-	i.tab = tab
-	i.data = x
-	return
-}
-
-func convT2I32(tab *itab, val uint32) (i iface) {
-	t := tab._type
-	var x unsafe.Pointer
-	if val == 0 {
-		x = unsafe.Pointer(&zeroVal[0])
-	} else {
-		x = mallocgc(4, t, false)
-		*(*uint32)(x) = val
-	}
-	i.tab = tab
-	i.data = x
-	return
-}
-
-func convT2I64(tab *itab, val uint64) (i iface) {
-	t := tab._type
-	var x unsafe.Pointer
-	if val == 0 {
-		x = unsafe.Pointer(&zeroVal[0])
-	} else {
-		x = mallocgc(8, t, false)
-		*(*uint64)(x) = val
-	}
-	i.tab = tab
-	i.data = x
-	return
-}
-
-func convT2Istring(tab *itab, val string) (i iface) {
-	t := tab._type
-	var x unsafe.Pointer
-	if val == "" {
-		x = unsafe.Pointer(&zeroVal[0])
-	} else {
-		x = mallocgc(unsafe.Sizeof(val), t, true)
-		*(*string)(x) = val
-	}
-	i.tab = tab
-	i.data = x
-	return
-}
-
-func convT2Islice(tab *itab, val []byte) (i iface) {
-	// Note: this must work for any element type, not just byte.
-	t := tab._type
-	var x unsafe.Pointer
-	if (*slice)(unsafe.Pointer(&val)).array == nil {
-		x = unsafe.Pointer(&zeroVal[0])
-	} else {
-		x = mallocgc(unsafe.Sizeof(val), t, true)
-		*(*[]byte)(x) = val
-	}
-	i.tab = tab
-	i.data = x
-	return
-}
-
 func convT2Inoptr(tab *itab, elem unsafe.Pointer) (i iface) {
 	t := tab._type
 	if raceenabled {

From 9fc22d29092933460fe00bdaccea179f29e9960d Mon Sep 17 00:00:00 2001
From: Michael Stapelberg <stapelberg@google.com>
Date: Fri, 2 Nov 2018 11:23:54 +0100
Subject: [PATCH 23/25] net: update zoneCache on cache misses to cover
 appearing interfaces

performance differences are in measurement noise as per benchcmp:

benchmark                            old ns/op     new ns/op     delta
BenchmarkUDP6LinkLocalUnicast-12     5012          5009          -0.06%

Fixes #28535

Change-Id: Id022e2ed089ce8388a2398e755848ec94e77e653
Reviewed-on: https://go-review.googlesource.com/c/146941
Run-TryBot: Mikio Hara <mikioh.public.networking@gmail.com>
Reviewed-by: Mikio Hara <mikioh.public.networking@gmail.com>
---
 src/net/interface.go            | 40 ++++++++++++++++++++++++---------
 src/net/interface_bsd_test.go   |  5 +++++
 src/net/interface_linux_test.go | 25 +++++++++++++++++++++
 src/net/interface_unix_test.go  | 34 ++++++++++++++++++++++++++++
 4 files changed, 94 insertions(+), 10 deletions(-)

diff --git a/src/net/interface.go b/src/net/interface.go
index 375a4568e3..46b0400f2f 100644
--- a/src/net/interface.go
+++ b/src/net/interface.go
@@ -102,7 +102,7 @@ func Interfaces() ([]Interface, error) {
 		return nil, &OpError{Op: "route", Net: "ip+net", Source: nil, Addr: nil, Err: err}
 	}
 	if len(ift) != 0 {
-		zoneCache.update(ift)
+		zoneCache.update(ift, false)
 	}
 	return ift, nil
 }
@@ -159,7 +159,7 @@ func InterfaceByName(name string) (*Interface, error) {
 		return nil, &OpError{Op: "route", Net: "ip+net", Source: nil, Addr: nil, Err: err}
 	}
 	if len(ift) != 0 {
-		zoneCache.update(ift)
+		zoneCache.update(ift, false)
 	}
 	for _, ifi := range ift {
 		if name == ifi.Name {
@@ -187,18 +187,21 @@ var zoneCache = ipv6ZoneCache{
 	toName:  make(map[int]string),
 }
 
-func (zc *ipv6ZoneCache) update(ift []Interface) {
+// update refreshes the network interface information if the cache was last
+// updated more than 1 minute ago, or if force is set. It returns whether the
+// cache was updated.
+func (zc *ipv6ZoneCache) update(ift []Interface, force bool) (updated bool) {
 	zc.Lock()
 	defer zc.Unlock()
 	now := time.Now()
-	if zc.lastFetched.After(now.Add(-60 * time.Second)) {
-		return
+	if !force && zc.lastFetched.After(now.Add(-60*time.Second)) {
+		return false
 	}
 	zc.lastFetched = now
 	if len(ift) == 0 {
 		var err error
 		if ift, err = interfaceTable(0); err != nil {
-			return
+			return false
 		}
 	}
 	zc.toIndex = make(map[string]int, len(ift))
@@ -209,16 +212,25 @@ func (zc *ipv6ZoneCache) update(ift []Interface) {
 			zc.toName[ifi.Index] = ifi.Name
 		}
 	}
+	return true
 }
 
 func (zc *ipv6ZoneCache) name(index int) string {
 	if index == 0 {
 		return ""
 	}
-	zoneCache.update(nil)
+	updated := zoneCache.update(nil, false)
 	zoneCache.RLock()
-	defer zoneCache.RUnlock()
 	name, ok := zoneCache.toName[index]
+	zoneCache.RUnlock()
+	if !ok {
+		if !updated {
+			zoneCache.update(nil, true)
+			zoneCache.RLock()
+			name, ok = zoneCache.toName[index]
+			zoneCache.RUnlock()
+		}
+	}
 	if !ok {
 		name = uitoa(uint(index))
 	}
@@ -229,10 +241,18 @@ func (zc *ipv6ZoneCache) index(name string) int {
 	if name == "" {
 		return 0
 	}
-	zoneCache.update(nil)
+	updated := zoneCache.update(nil, false)
 	zoneCache.RLock()
-	defer zoneCache.RUnlock()
 	index, ok := zoneCache.toIndex[name]
+	zoneCache.RUnlock()
+	if !ok {
+		if !updated {
+			zoneCache.update(nil, true)
+			zoneCache.RLock()
+			index, ok = zoneCache.toIndex[name]
+			zoneCache.RUnlock()
+		}
+	}
 	if !ok {
 		index, _, _ = dtoi(name)
 	}
diff --git a/src/net/interface_bsd_test.go b/src/net/interface_bsd_test.go
index 69b0fbcab3..947dde71e6 100644
--- a/src/net/interface_bsd_test.go
+++ b/src/net/interface_bsd_test.go
@@ -7,6 +7,7 @@
 package net
 
 import (
+	"errors"
 	"fmt"
 	"os/exec"
 	"runtime"
@@ -53,3 +54,7 @@ func (ti *testInterface) setPointToPoint(suffix int) error {
 	})
 	return nil
 }
+
+func (ti *testInterface) setLinkLocal(suffix int) error {
+	return errors.New("not yet implemented for BSD")
+}
diff --git a/src/net/interface_linux_test.go b/src/net/interface_linux_test.go
index 6959ddb3d9..0699fec636 100644
--- a/src/net/interface_linux_test.go
+++ b/src/net/interface_linux_test.go
@@ -35,6 +35,31 @@ func (ti *testInterface) setBroadcast(suffix int) error {
 	return nil
 }
 
+func (ti *testInterface) setLinkLocal(suffix int) error {
+	ti.name = fmt.Sprintf("gotest%d", suffix)
+	xname, err := exec.LookPath("ip")
+	if err != nil {
+		return err
+	}
+	ti.setupCmds = append(ti.setupCmds, &exec.Cmd{
+		Path: xname,
+		Args: []string{"ip", "link", "add", ti.name, "type", "dummy"},
+	})
+	ti.setupCmds = append(ti.setupCmds, &exec.Cmd{
+		Path: xname,
+		Args: []string{"ip", "address", "add", ti.local, "dev", ti.name},
+	})
+	ti.teardownCmds = append(ti.teardownCmds, &exec.Cmd{
+		Path: xname,
+		Args: []string{"ip", "address", "del", ti.local, "dev", ti.name},
+	})
+	ti.teardownCmds = append(ti.teardownCmds, &exec.Cmd{
+		Path: xname,
+		Args: []string{"ip", "link", "delete", ti.name, "type", "dummy"},
+	})
+	return nil
+}
+
 func (ti *testInterface) setPointToPoint(suffix int) error {
 	ti.name = fmt.Sprintf("gotest%d", suffix)
 	xname, err := exec.LookPath("ip")
diff --git a/src/net/interface_unix_test.go b/src/net/interface_unix_test.go
index c3d981dc5c..20e75cd036 100644
--- a/src/net/interface_unix_test.go
+++ b/src/net/interface_unix_test.go
@@ -176,3 +176,37 @@ func TestInterfaceArrivalAndDeparture(t *testing.T) {
 		}
 	}
 }
+
+func TestInterfaceArrivalAndDepartureZoneCache(t *testing.T) {
+	if testing.Short() {
+		t.Skip("avoid external network")
+	}
+	if os.Getuid() != 0 {
+		t.Skip("must be root")
+	}
+
+	// Ensure zoneCache is filled:
+	_, _ = Listen("tcp", "[fe80::1%nonexistant]:0")
+
+	ti := &testInterface{local: "fe80::1"}
+	if err := ti.setLinkLocal(0); err != nil {
+		t.Skipf("test requires external command: %v", err)
+	}
+	if err := ti.setup(); err != nil {
+		t.Fatal(err)
+	}
+	defer ti.teardown()
+
+	time.Sleep(3 * time.Millisecond)
+
+	// If Listen fails (on Linux with “bind: invalid argument”), zoneCache was
+	// not updated when encountering a nonexistant interface:
+	ln, err := Listen("tcp", "[fe80::1%"+ti.name+"]:0")
+	if err != nil {
+		t.Fatal(err)
+	}
+	ln.Close()
+	if err := ti.teardown(); err != nil {
+		t.Fatal(err)
+	}
+}

From c1a16b7dadfee27b03a2a70a20c3cf339a069a40 Mon Sep 17 00:00:00 2001
From: Josh Bleecher Snyder <josharian@gmail.com>
Date: Sun, 28 Oct 2018 11:19:33 -0700
Subject: [PATCH 24/25] cmd/compile: loop in disjoint OpOffPtr check

We collapse OpOffPtrs during generic rewrites.
However, we also use disjoint at the same time.
Instead of waiting for all OpOffPtrs to be collapsed
before the disjointness rules can kick in,
burrow through all OpOffPtrs immediately.

Change-Id: I60d0a70a9b4605b1817db7c4aab0c0d789651c90
Reviewed-on: https://go-review.googlesource.com/c/145206
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
Reviewed-by: Michael Munday <mike.munday@ibm.com>
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/cmd/compile/internal/ssa/rewrite.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index ed5bce861e..17d7cb3414 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -542,7 +542,7 @@ func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
 	}
 	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
 		base, offset = ptr, 0
-		if base.Op == OpOffPtr {
+		for base.Op == OpOffPtr {
 			offset += base.AuxInt
 			base = base.Args[0]
 		}

From 510eea2dfcabbc8916c7c59aa37046269ad29497 Mon Sep 17 00:00:00 2001
From: Mikio Hara <mikioh.public.networking@gmail.com>
Date: Tue, 6 Nov 2018 12:48:17 +0900
Subject: [PATCH 25/25] net/http: update bundled SOCKS client

Updates socks_bundle.go to git rev 26e67e7 for:
- 26e67e7 internal/socks: fix socket descriptor leakage in Dialer.Dial

Change-Id: I9ab27a85504d77f1ca2e97cb005f5e37fd3c3ff4
Reviewed-on: https://go-review.googlesource.com/c/147717
Run-TryBot: Mikio Hara <mikioh.public.networking@gmail.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/net/http/socks_bundle.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/net/http/socks_bundle.go b/src/net/http/socks_bundle.go
index e4314b4128..e6640dd404 100644
--- a/src/net/http/socks_bundle.go
+++ b/src/net/http/socks_bundle.go
@@ -380,6 +380,7 @@ func (d *socksDialer) Dial(network, address string) (net.Conn, error) {
 		return nil, &net.OpError{Op: d.cmd.String(), Net: network, Source: proxy, Addr: dst, Err: err}
 	}
 	if _, err := d.DialWithConn(context.Background(), c, network, address); err != nil {
+		c.Close()
 		return nil, err
 	}
 	return c, nil