unique: add unique package and implement Make/Handle

This change adds the unique package for canonicalizing values, as
described by the proposal in #62483.

Fixes #62483.

Change-Id: I1dc3d34ec12351cb4dc3838a8ea29a5368d59e99
Reviewed-on: https://go-review.googlesource.com/c/go/+/574355
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Ingo Oeser <nightlyone@googlemail.com>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Michael Anthony Knyszek 2024-04-04 04:51:24 +00:00 committed by Michael Knyszek
parent 654c3368e5
commit a088e230d4
12 changed files with 537 additions and 1 deletions

3
api/next/62483.txt Normal file
View File

@ -0,0 +1,3 @@
pkg unique, func Make[$0 comparable]($0) Handle[$0] #62483
pkg unique, method (Handle[$0]) Value() $0 #62483
pkg unique, type Handle[$0 comparable] struct #62483

View File

@ -0,0 +1,13 @@
### New unique package
The new [unique](/pkg/unique) package provides facilites for
canonicalizing values (like "interning" or "hash-consing").
Any value of comparable type may be canonicalized with the new
`Make[T]` function, which produces a reference to a canonical copy of
the value in the form of a `Handle[T]`.
Two `Handle[T]` are equal if and only if the values used to produce the
handles are equal, allowing programs to deduplicate values and reduce
their memory footprint.
Comparing two `Handle[T]` values is efficient, reducing down to a simple
pointer comparison.

View File

@ -0,0 +1 @@
<!-- This is a new package; covered in 6-stdlib/2-unique.md. -->

View File

@ -164,6 +164,9 @@ var depsRules = `
bufio, path, strconv bufio, path, strconv
< STR; < STR;
RUNTIME, internal/concurrent
< unique;
# OS is basic OS access, including helpers (path/filepath, os/exec, etc). # OS is basic OS access, including helpers (path/filepath, os/exec, etc).
# OS includes string routines, but those must be layered above package os. # OS includes string routines, but those must be layered above package os.
# OS does not include reflection. # OS does not include reflection.

View File

@ -43,5 +43,6 @@ var stdPkgs = []string{
"testing", "testing",
"time", "time",
"unicode", "unicode",
"unique",
"unsafe", "unsafe",
} }

View File

@ -1695,6 +1695,7 @@ func gcResetMarkState() {
var poolcleanup func() var poolcleanup func()
var boringCaches []unsafe.Pointer // for crypto/internal/boring var boringCaches []unsafe.Pointer // for crypto/internal/boring
var uniqueMapCleanup chan struct{} // for unique
//go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup //go:linkname sync_runtime_registerPoolCleanup sync.runtime_registerPoolCleanup
func sync_runtime_registerPoolCleanup(f func()) { func sync_runtime_registerPoolCleanup(f func()) {
@ -1706,6 +1707,18 @@ func boring_registerCache(p unsafe.Pointer) {
boringCaches = append(boringCaches, p) boringCaches = append(boringCaches, p)
} }
//go:linkname unique_runtime_registerUniqueMapCleanup unique.runtime_registerUniqueMapCleanup
func unique_runtime_registerUniqueMapCleanup(f func()) {
// Start the goroutine in the runtime so it's counted as a system goroutine.
uniqueMapCleanup = make(chan struct{}, 1)
go func(cleanup func()) {
for {
<-uniqueMapCleanup
cleanup()
}
}(f)
}
func clearpools() { func clearpools() {
// clear sync.Pools // clear sync.Pools
if poolcleanup != nil { if poolcleanup != nil {
@ -1717,6 +1730,14 @@ func clearpools() {
atomicstorep(p, nil) atomicstorep(p, nil)
} }
// clear unique maps
if uniqueMapCleanup != nil {
select {
case uniqueMapCleanup <- struct{}{}:
default:
}
}
// Clear central sudog cache. // Clear central sudog cache.
// Leave per-P caches alone, they have strictly bounded size. // Leave per-P caches alone, they have strictly bounded size.
// Disconnect cached list before dropping it on the floor, // Disconnect cached list before dropping it on the floor,

100
src/unique/clone.go Normal file
View File

@ -0,0 +1,100 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package unique
import (
"internal/abi"
"unsafe"
)
// clone makes a copy of value, and may update string values found in value
// with a cloned version of those strings. The purpose of explicitly cloning
// strings is to avoid accidentally giving a large string a long lifetime.
//
// Note that this will clone strings in structs and arrays found in value,
// and will clone value if it itself is a string. It will not, however, clone
// strings if value is of interface or slice type (that is, found via an
// indirection).
func clone[T comparable](value T, seq *cloneSeq) T {
for _, offset := range seq.stringOffsets {
ps := (*string)(unsafe.Pointer(uintptr(unsafe.Pointer(&value)) + offset))
*ps = cloneString(*ps)
}
return value
}
// singleStringClone describes how to clone a single string.
var singleStringClone = cloneSeq{stringOffsets: []uintptr{0}}
// cloneSeq describes how to clone a value of a particular type.
type cloneSeq struct {
stringOffsets []uintptr
}
// makeCloneSeq creates a cloneSeq for a type.
func makeCloneSeq(typ *abi.Type) cloneSeq {
if typ == nil {
return cloneSeq{}
}
if typ.Kind() == abi.String {
return singleStringClone
}
var seq cloneSeq
switch typ.Kind() {
case abi.Struct:
buildStructCloneSeq(typ, &seq, 0)
case abi.Array:
buildArrayCloneSeq(typ, &seq, 0)
}
return seq
}
// buildStructCloneSeq populates a cloneSeq for an abi.Type that has Kind abi.Struct.
func buildStructCloneSeq(typ *abi.Type, seq *cloneSeq, baseOffset uintptr) {
styp := typ.StructType()
for i := range styp.Fields {
f := &styp.Fields[i]
switch f.Typ.Kind() {
case abi.String:
seq.stringOffsets = append(seq.stringOffsets, baseOffset+f.Offset)
case abi.Struct:
buildStructCloneSeq(f.Typ, seq, baseOffset+f.Offset)
case abi.Array:
buildArrayCloneSeq(f.Typ, seq, baseOffset+f.Offset)
}
}
}
// buildArrayCloneSeq populates a cloneSeq for an abi.Type that has Kind abi.Array.
func buildArrayCloneSeq(typ *abi.Type, seq *cloneSeq, baseOffset uintptr) {
atyp := typ.ArrayType()
etyp := atyp.Elem
offset := baseOffset
for range atyp.Len {
switch etyp.Kind() {
case abi.String:
seq.stringOffsets = append(seq.stringOffsets, offset)
case abi.Struct:
buildStructCloneSeq(etyp, seq, offset)
case abi.Array:
buildArrayCloneSeq(etyp, seq, offset)
}
offset += etyp.Size()
align := uintptr(etyp.FieldAlign())
offset = (offset + align - 1) &^ (align - 1)
}
}
// cloneString is a copy of strings.Clone, because we can't depend on the strings
// package here. Several packages that might make use of unique, like net, explicitly
// forbid depending on unicode, which strings depends on.
func cloneString(s string) string {
if len(s) == 0 {
return ""
}
b := make([]byte, len(s))
copy(b, s)
return unsafe.String(&b[0], len(b))
}

37
src/unique/clone_test.go Normal file
View File

@ -0,0 +1,37 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package unique
import (
"internal/abi"
"internal/goarch"
"reflect"
"testing"
)
func TestMakeCloneSeq(t *testing.T) {
testCloneSeq[testString](t, cSeq(0))
testCloneSeq[testIntArray](t, cSeq())
testCloneSeq[testEface](t, cSeq())
testCloneSeq[testStringArray](t, cSeq(0, 2*goarch.PtrSize, 4*goarch.PtrSize))
testCloneSeq[testStringStruct](t, cSeq(0))
testCloneSeq[testStringStructArrayStruct](t, cSeq(0, 2*goarch.PtrSize))
testCloneSeq[testStruct](t, cSeq(8))
}
func cSeq(stringOffsets ...uintptr) cloneSeq {
return cloneSeq{stringOffsets: stringOffsets}
}
func testCloneSeq[T any](t *testing.T, want cloneSeq) {
typName := reflect.TypeFor[T]().Name()
typ := abi.TypeOf(*new(T))
t.Run(typName, func(t *testing.T) {
got := makeCloneSeq(typ)
if !reflect.DeepEqual(got, want) {
t.Errorf("unexpected cloneSeq for type %s: got %#v, want %#v", typName, got, want)
}
})
}

9
src/unique/doc.go Normal file
View File

@ -0,0 +1,9 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
The unique package provides facilities for canonicalizing ("interning")
comparable values.
*/
package unique

174
src/unique/handle.go Normal file
View File

@ -0,0 +1,174 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package unique
import (
"internal/abi"
"internal/concurrent"
"internal/weak"
"runtime"
"sync"
_ "unsafe"
)
// Handle is a globally unique identity for some value of type T.
//
// Two handles compare equal exactly if the two values used to create the handles
// would have also compared equal. The comparison of two handles is trivial and
// typically much more efficient than comparing the values used to create them.
type Handle[T comparable] struct {
value *T
}
// Value returns a shallow copy of the T value that produced the Handle.
func (h Handle[T]) Value() T {
return *h.value
}
// Make returns a globally unique handle for a value of type T. Handles
// are equal if and only if the values used to produce them are equal.
func Make[T comparable](value T) Handle[T] {
// Find the map for type T.
typ := abi.TypeOf(value)
ma, ok := uniqueMaps.Load(typ)
if !ok {
// This is a good time to initialize cleanup, since we must go through
// this path on the first use of Make, and it's not on the hot path.
setupMake.Do(registerCleanup)
ma = addUniqueMap[T](typ)
}
m := ma.(*uniqueMap[T])
// Keep around any values we allocate for insertion. There
// are a few different ways we can race with other threads
// and create values that we might discard. By keeping
// the first one we make around, we can avoid generating
// more than one per racing thread.
var (
toInsert *T // Keep this around to keep it alive.
toInsertWeak weak.Pointer[T]
)
newValue := func() weak.Pointer[T] {
if toInsert == nil {
toInsert = new(T)
*toInsert = clone(value, &m.cloneSeq)
toInsertWeak = weak.Make(toInsert)
}
return toInsertWeak
}
var ptr *T
for {
// Check the map.
wp, ok := m.Load(value)
if !ok {
// Try to insert a new value into the map.
wp, _ = m.LoadOrStore(value, newValue())
}
// Now that we're sure there's a value in the map, let's
// try to get the pointer we need out of it.
ptr = wp.Strong()
if ptr != nil {
break
}
// The weak pointer is nil, so the old value is truly dead.
// Try to remove it and start over.
m.CompareAndDelete(value, wp)
}
runtime.KeepAlive(toInsert)
return Handle[T]{ptr}
}
var (
// uniqueMaps is an index of type-specific concurrent maps used for unique.Make.
//
// The two-level map might seem odd at first since the HashTrieMap could have "any"
// as its key type, but the issue is escape analysis. We do not want to force lookups
// to escape the argument, and using a type-specific map allows us to avoid that where
// possible (for example, for strings and plain-ol'-data structs). We also get the
// benefit of not cramming every different type into a single map, but that's certainly
// not enough to outweigh the cost of two map lookups. What is worth it though, is saving
// on those allocations.
uniqueMaps = concurrent.NewHashTrieMap[*abi.Type, any]() // any is always a *uniqueMap[T].
// cleanupFuncs are functions that clean up dead weak pointers in type-specific
// maps in uniqueMaps. We express cleanup this way because there's no way to iterate
// over the sync.Map and call functions on the type-specific data structures otherwise.
// These cleanup funcs each close over one of these type-specific maps.
//
// cleanupMu protects cleanupNotify and is held across the entire cleanup. Used for testing.
// cleanupNotify is a test-only mechanism that allow tests to wait for the cleanup to run.
cleanupMu sync.Mutex
cleanupFuncsMu sync.Mutex
cleanupFuncs []func()
cleanupNotify []func() // One-time notifcations when cleanups finish.
)
type uniqueMap[T comparable] struct {
*concurrent.HashTrieMap[T, weak.Pointer[T]]
cloneSeq
}
func addUniqueMap[T comparable](typ *abi.Type) *uniqueMap[T] {
// Create a map for T and try to register it. We could
// race with someone else, but that's fine; it's one
// small, stray allocation. The number of allocations
// this can create is bounded by a small constant.
m := &uniqueMap[T]{
HashTrieMap: concurrent.NewHashTrieMap[T, weak.Pointer[T]](),
cloneSeq: makeCloneSeq(typ),
}
a, loaded := uniqueMaps.LoadOrStore(typ, m)
if !loaded {
// Add a cleanup function for the new map.
cleanupFuncsMu.Lock()
cleanupFuncs = append(cleanupFuncs, func() {
// Delete all the entries whose weak references are nil and clean up
// deleted entries.
m.Enumerate(func(key T, wp weak.Pointer[T]) bool {
if wp.Strong() == nil {
m.CompareAndDelete(key, wp)
}
return true
})
})
cleanupFuncsMu.Unlock()
}
return a.(*uniqueMap[T])
}
// setupMake is used to perform initial setup for unique.Make.
var setupMake sync.Once
// startBackgroundCleanup sets up a background goroutine to occasionally call cleanupFuncs.
func registerCleanup() {
runtime_registerUniqueMapCleanup(func() {
// Lock for cleanup.
cleanupMu.Lock()
// Grab funcs to run.
cleanupFuncsMu.Lock()
cf := cleanupFuncs
cleanupFuncsMu.Unlock()
// Run cleanup.
for _, f := range cf {
f()
}
// Run cleanup notifications.
for _, f := range cleanupNotify {
f()
}
cleanupNotify = nil
// Finished.
cleanupMu.Unlock()
})
}
// Implemented in runtime.
//go:linkname runtime_registerUniqueMapCleanup
func runtime_registerUniqueMapCleanup(cleanup func())

View File

@ -0,0 +1,63 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package unique
import (
"fmt"
"runtime"
"testing"
)
func BenchmarkMake(b *testing.B) {
benchmarkMake(b, []string{"foo"})
}
func BenchmarkMakeMany(b *testing.B) {
benchmarkMake(b, testData[:])
}
func BenchmarkMakeManyMany(b *testing.B) {
benchmarkMake(b, testDataLarge[:])
}
func benchmarkMake(b *testing.B, testData []string) {
handles := make([]Handle[string], 0, len(testData))
for i := range testData {
handles = append(handles, Make(testData[i]))
}
b.ReportAllocs()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
i := 0
for pb.Next() {
_ = Make(testData[i])
i++
if i >= len(testData) {
i = 0
}
}
})
b.StopTimer()
runtime.GC()
runtime.GC()
}
var (
testData [128]string
testDataLarge [128 << 10]string
)
func init() {
for i := range testData {
testData[i] = fmt.Sprintf("%b", i)
}
for i := range testDataLarge {
testDataLarge[i] = fmt.Sprintf("%b", i)
}
}

111
src/unique/handle_test.go Normal file
View File

@ -0,0 +1,111 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package unique
import (
"fmt"
"internal/abi"
"reflect"
"runtime"
"testing"
)
// Set up special types. Because the internal maps are sharded by type,
// this will ensure that we're not overlapping with other tests.
type testString string
type testIntArray [4]int
type testEface any
type testStringArray [3]string
type testStringStruct struct {
a string
}
type testStringStructArrayStruct struct {
s [2]testStringStruct
}
type testStruct struct {
z float64
b string
}
func TestHandle(t *testing.T) {
testHandle[testString](t, "foo")
testHandle[testString](t, "bar")
testHandle[testString](t, "")
testHandle[testIntArray](t, [4]int{7, 77, 777, 7777})
testHandle[testEface](t, nil)
testHandle[testStringArray](t, [3]string{"a", "b", "c"})
testHandle[testStringStruct](t, testStringStruct{"x"})
testHandle[testStringStructArrayStruct](t, testStringStructArrayStruct{
s: [2]testStringStruct{testStringStruct{"y"}, testStringStruct{"z"}},
})
testHandle[testStruct](t, testStruct{0.5, "184"})
}
func testHandle[T comparable](t *testing.T, value T) {
name := reflect.TypeFor[T]().Name()
t.Run(fmt.Sprintf("%s/%#v", name, value), func(t *testing.T) {
t.Parallel()
v0 := Make(value)
v1 := Make(value)
if v0.Value() != v1.Value() {
t.Error("v0.Value != v1.Value")
}
if v0.Value() != value {
t.Errorf("v0.Value not %#v", value)
}
if v0 != v1 {
t.Error("v0 != v1")
}
drainMaps(t)
checkMapsFor(t, value)
})
}
// drainMaps ensures that the internal maps are drained.
func drainMaps(t *testing.T) {
t.Helper()
wait := make(chan struct{}, 1)
// Set up a one-time notification for the next time the cleanup runs.
// Note: this will only run if there's no other active cleanup, so
// we can be sure that the next time cleanup runs, it'll see the new
// notification.
cleanupMu.Lock()
cleanupNotify = append(cleanupNotify, func() {
select {
case wait <- struct{}{}:
default:
}
})
runtime.GC()
cleanupMu.Unlock()
// Wait until cleanup runs.
<-wait
}
func checkMapsFor[T comparable](t *testing.T, value T) {
// Manually load the value out of the map.
typ := abi.TypeOf(value)
a, ok := uniqueMaps.Load(typ)
if !ok {
return
}
m := a.(*uniqueMap[T])
wp, ok := m.Load(value)
if !ok {
return
}
if wp.Strong() != nil {
t.Errorf("value %v still referenced a handle (or tiny block?) ", value)
return
}
t.Errorf("failed to drain internal maps of %v", value)
}