runtime,hash/maphash: make tests use less memory

Use a list + sorting instead of a map to keep track of collisions.
Should save a fair amount of memory.

Change-Id: I7b48cdb5f7060d12ebbb950d56b5c479f131c027
Reviewed-on: https://go-review.googlesource.com/c/go/+/574275
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: qiulaidongfeng <2645477756@qq.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Emmanuel Odeke <emmanuel@orijtech.com>
Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
Keith Randall 2024-03-25 13:53:07 -07:00 committed by Keith Randall
parent 340f73ce34
commit 2b5e99455d
2 changed files with 32 additions and 14 deletions

View File

@ -11,6 +11,7 @@ import (
"math"
"math/rand"
"runtime"
"slices"
"strings"
"testing"
"unsafe"
@ -71,16 +72,14 @@ func randBytes(r *rand.Rand, b []byte) {
// A hashSet measures the frequency of hash collisions.
type hashSet struct {
m map[uint64]struct{} // set of hashes added
n int // number of hashes added
list []uint64 // list of hashes added
}
func newHashSet() *hashSet {
return &hashSet{make(map[uint64]struct{}), 0}
return &hashSet{list: make([]uint64, 0, 1024)}
}
func (s *hashSet) add(h uint64) {
s.m[h] = struct{}{}
s.n++
s.list = append(s.list, h)
}
func (s *hashSet) addS(x string) {
s.add(stringHash(x))
@ -95,9 +94,19 @@ func (s *hashSet) addS_seed(x string, seed Seed) {
s.add(h.Sum64())
}
func (s *hashSet) check(t *testing.T) {
list := s.list
slices.Sort(list)
collisions := 0
for i := 1; i < len(list); i++ {
if list[i] == list[i-1] {
collisions++
}
}
n := len(list)
const SLOP = 10.0
collisions := s.n - len(s.m)
pairs := int64(s.n) * int64(s.n-1) / 2
pairs := int64(n) * int64(n-1) / 2
expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
stddev := math.Sqrt(expected)
if float64(collisions) > expected+SLOP*(3*stddev+1) {

View File

@ -10,6 +10,7 @@ import (
"math"
"math/rand"
. "runtime"
"slices"
"strings"
"testing"
"unsafe"
@ -83,16 +84,14 @@ func TestSmhasherSanity(t *testing.T) {
}
type HashSet struct {
m map[uintptr]struct{} // set of hashes added
n int // number of hashes added
list []uintptr // list of hashes added
}
func newHashSet() *HashSet {
return &HashSet{make(map[uintptr]struct{}), 0}
return &HashSet{list: make([]uintptr, 0, 1024)}
}
func (s *HashSet) add(h uintptr) {
s.m[h] = struct{}{}
s.n++
s.list = append(s.list, h)
}
func (s *HashSet) addS(x string) {
s.add(StringHash(x, 0))
@ -104,9 +103,19 @@ func (s *HashSet) addS_seed(x string, seed uintptr) {
s.add(StringHash(x, seed))
}
func (s *HashSet) check(t *testing.T) {
list := s.list
slices.Sort(list)
collisions := 0
for i := 1; i < len(list); i++ {
if list[i] == list[i-1] {
collisions++
}
}
n := len(list)
const SLOP = 50.0
collisions := s.n - len(s.m)
pairs := int64(s.n) * int64(s.n-1) / 2
pairs := int64(n) * int64(n-1) / 2
expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
stddev := math.Sqrt(expected)
if float64(collisions) > expected+SLOP*(3*stddev+1) {