Source file src/runtime/hash_test.go

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"encoding/binary"
     9  	"fmt"
    10  	"internal/race"
    11  	"internal/testenv"
    12  	"math"
    13  	"math/rand"
    14  	"os"
    15  	. "runtime"
    16  	"slices"
    17  	"strings"
    18  	"testing"
    19  	"unsafe"
    20  )
    21  
    22  func TestMemHash32Equality(t *testing.T) {
    23  	if *UseAeshash {
    24  		t.Skip("skipping since AES hash implementation is used")
    25  	}
    26  	var b [4]byte
    27  	r := rand.New(rand.NewSource(1234))
    28  	seed := uintptr(r.Uint64())
    29  	for i := 0; i < 100; i++ {
    30  		randBytes(r, b[:])
    31  		got := MemHash32(unsafe.Pointer(&b), seed)
    32  		want := MemHash(unsafe.Pointer(&b), seed, 4)
    33  		if got != want {
    34  			t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want)
    35  		}
    36  	}
    37  }
    38  
    39  func TestMemHash64Equality(t *testing.T) {
    40  	if *UseAeshash {
    41  		t.Skip("skipping since AES hash implementation is used")
    42  	}
    43  	var b [8]byte
    44  	r := rand.New(rand.NewSource(1234))
    45  	seed := uintptr(r.Uint64())
    46  	for i := 0; i < 100; i++ {
    47  		randBytes(r, b[:])
    48  		got := MemHash64(unsafe.Pointer(&b), seed)
    49  		want := MemHash(unsafe.Pointer(&b), seed, 8)
    50  		if got != want {
    51  			t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want)
    52  		}
    53  	}
    54  }
    55  
    56  // Smhasher is a torture test for hash functions.
    57  // https://code.google.com/p/smhasher/
    58  // This code is a port of some of the Smhasher tests to Go.
    59  //
    60  // The current AES hash function passes Smhasher. Our fallback
    61  // hash functions don't, so we only enable the difficult tests when
    62  // we know the AES implementation is available.
    63  
    64  // Sanity checks.
    65  // hash should not depend on values outside key.
    66  // hash should not depend on alignment.
    67  func TestSmhasherSanity(t *testing.T) {
    68  	r := rand.New(rand.NewSource(1234))
    69  	const REP = 10
    70  	const KEYMAX = 128
    71  	const PAD = 16
    72  	const OFFMAX = 16
    73  	for k := 0; k < REP; k++ {
    74  		for n := 0; n < KEYMAX; n++ {
    75  			for i := 0; i < OFFMAX; i++ {
    76  				var b [KEYMAX + OFFMAX + 2*PAD]byte
    77  				var c [KEYMAX + OFFMAX + 2*PAD]byte
    78  				randBytes(r, b[:])
    79  				randBytes(r, c[:])
    80  				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
    81  				if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
    82  					t.Errorf("hash depends on bytes outside key")
    83  				}
    84  			}
    85  		}
    86  	}
    87  }
    88  
    89  type HashSet struct {
    90  	list []uintptr // list of hashes added
    91  }
    92  
    93  func newHashSet() *HashSet {
    94  	return &HashSet{list: make([]uintptr, 0, 1024)}
    95  }
    96  func (s *HashSet) add(h uintptr) {
    97  	s.list = append(s.list, h)
    98  }
    99  func (s *HashSet) addS(x string) {
   100  	s.add(StringHash(x, 0))
   101  }
   102  func (s *HashSet) addB(x []byte) {
   103  	s.add(BytesHash(x, 0))
   104  }
   105  func (s *HashSet) addS_seed(x string, seed uintptr) {
   106  	s.add(StringHash(x, seed))
   107  }
   108  func (s *HashSet) check(t *testing.T) {
   109  	list := s.list
   110  	slices.Sort(list)
   111  
   112  	collisions := 0
   113  	for i := 1; i < len(list); i++ {
   114  		if list[i] == list[i-1] {
   115  			collisions++
   116  		}
   117  	}
   118  	n := len(list)
   119  
   120  	const SLOP = 50.0
   121  	pairs := int64(n) * int64(n-1) / 2
   122  	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
   123  	stddev := math.Sqrt(expected)
   124  	if float64(collisions) > expected+SLOP*(3*stddev+1) {
   125  		t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f threshold=%f", collisions, expected, stddev, expected+SLOP*(3*stddev+1))
   126  	}
   127  	// Reset for reuse
   128  	s.list = s.list[:0]
   129  }
   130  
   131  // a string plus adding zeros must make distinct hashes
   132  func TestSmhasherAppendedZeros(t *testing.T) {
   133  	s := "hello" + strings.Repeat("\x00", 256)
   134  	h := newHashSet()
   135  	for i := 0; i <= len(s); i++ {
   136  		h.addS(s[:i])
   137  	}
   138  	h.check(t)
   139  }
   140  
   141  // All 0-3 byte strings have distinct hashes.
   142  func TestSmhasherSmallKeys(t *testing.T) {
   143  	if race.Enabled {
   144  		t.Skip("Too long for race mode")
   145  	}
   146  	h := newHashSet()
   147  	var b [3]byte
   148  	for i := 0; i < 256; i++ {
   149  		b[0] = byte(i)
   150  		h.addB(b[:1])
   151  		for j := 0; j < 256; j++ {
   152  			b[1] = byte(j)
   153  			h.addB(b[:2])
   154  			if !testing.Short() {
   155  				for k := 0; k < 256; k++ {
   156  					b[2] = byte(k)
   157  					h.addB(b[:3])
   158  				}
   159  			}
   160  		}
   161  	}
   162  	h.check(t)
   163  }
   164  
   165  // Different length strings of all zeros have distinct hashes.
   166  func TestSmhasherZeros(t *testing.T) {
   167  	N := 256 * 1024
   168  	if testing.Short() {
   169  		N = 1024
   170  	}
   171  	h := newHashSet()
   172  	b := make([]byte, N)
   173  	for i := 0; i <= N; i++ {
   174  		h.addB(b[:i])
   175  	}
   176  	h.check(t)
   177  }
   178  
   179  // Strings with up to two nonzero bytes all have distinct hashes.
   180  func TestSmhasherTwoNonzero(t *testing.T) {
   181  	if GOARCH == "wasm" {
   182  		t.Skip("Too slow on wasm")
   183  	}
   184  	if testing.Short() {
   185  		t.Skip("Skipping in short mode")
   186  	}
   187  	if race.Enabled {
   188  		t.Skip("Too long for race mode")
   189  	}
   190  	h := newHashSet()
   191  	for n := 2; n <= 16; n++ {
   192  		twoNonZero(h, n)
   193  	}
   194  	h.check(t)
   195  }
   196  func twoNonZero(h *HashSet, n int) {
   197  	b := make([]byte, n)
   198  
   199  	// all zero
   200  	h.addB(b)
   201  
   202  	// one non-zero byte
   203  	for i := 0; i < n; i++ {
   204  		for x := 1; x < 256; x++ {
   205  			b[i] = byte(x)
   206  			h.addB(b)
   207  			b[i] = 0
   208  		}
   209  	}
   210  
   211  	// two non-zero bytes
   212  	for i := 0; i < n; i++ {
   213  		for x := 1; x < 256; x++ {
   214  			b[i] = byte(x)
   215  			for j := i + 1; j < n; j++ {
   216  				for y := 1; y < 256; y++ {
   217  					b[j] = byte(y)
   218  					h.addB(b)
   219  					b[j] = 0
   220  				}
   221  			}
   222  			b[i] = 0
   223  		}
   224  	}
   225  }
   226  
   227  // Test strings with repeats, like "abcdabcdabcdabcd..."
   228  func TestSmhasherCyclic(t *testing.T) {
   229  	if testing.Short() {
   230  		t.Skip("Skipping in short mode")
   231  	}
   232  	if race.Enabled {
   233  		t.Skip("Too long for race mode")
   234  	}
   235  	r := rand.New(rand.NewSource(1234))
   236  	const REPEAT = 8
   237  	const N = 1000000
   238  	h := newHashSet()
   239  	for n := 4; n <= 12; n++ {
   240  		b := make([]byte, REPEAT*n)
   241  		for i := 0; i < N; i++ {
   242  			b[0] = byte(i * 79 % 97)
   243  			b[1] = byte(i * 43 % 137)
   244  			b[2] = byte(i * 151 % 197)
   245  			b[3] = byte(i * 199 % 251)
   246  			randBytes(r, b[4:n])
   247  			for j := n; j < n*REPEAT; j++ {
   248  				b[j] = b[j-n]
   249  			}
   250  			h.addB(b)
   251  		}
   252  		h.check(t)
   253  	}
   254  }
   255  
   256  // Test strings with only a few bits set
   257  func TestSmhasherSparse(t *testing.T) {
   258  	if GOARCH == "wasm" {
   259  		t.Skip("Too slow on wasm")
   260  	}
   261  	if testing.Short() {
   262  		t.Skip("Skipping in short mode")
   263  	}
   264  	h := newHashSet()
   265  	sparse(t, h, 32, 6)
   266  	sparse(t, h, 40, 6)
   267  	sparse(t, h, 48, 5)
   268  	sparse(t, h, 56, 5)
   269  	sparse(t, h, 64, 5)
   270  	sparse(t, h, 96, 4)
   271  	sparse(t, h, 256, 3)
   272  	sparse(t, h, 2048, 2)
   273  }
   274  func sparse(t *testing.T, h *HashSet, n int, k int) {
   275  	b := make([]byte, n/8)
   276  	setbits(h, b, 0, k)
   277  	h.check(t)
   278  }
   279  
   280  // set up to k bits at index i and greater
   281  func setbits(h *HashSet, b []byte, i int, k int) {
   282  	h.addB(b)
   283  	if k == 0 {
   284  		return
   285  	}
   286  	for j := i; j < len(b)*8; j++ {
   287  		b[j/8] |= byte(1 << uint(j&7))
   288  		setbits(h, b, j+1, k-1)
   289  		b[j/8] &= byte(^(1 << uint(j&7)))
   290  	}
   291  }
   292  
   293  // Test all possible combinations of n blocks from the set s.
   294  // "permutation" is a bad name here, but it is what Smhasher uses.
   295  func TestSmhasherPermutation(t *testing.T) {
   296  	if GOARCH == "wasm" {
   297  		t.Skip("Too slow on wasm")
   298  	}
   299  	if testing.Short() {
   300  		t.Skip("Skipping in short mode")
   301  	}
   302  	if race.Enabled {
   303  		t.Skip("Too long for race mode")
   304  	}
   305  	h := newHashSet()
   306  	permutation(t, h, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
   307  	permutation(t, h, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
   308  	permutation(t, h, []uint32{0, 1}, 20)
   309  	permutation(t, h, []uint32{0, 1 << 31}, 20)
   310  	permutation(t, h, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
   311  }
   312  func permutation(t *testing.T, h *HashSet, s []uint32, n int) {
   313  	b := make([]byte, n*4)
   314  	genPerm(h, b, s, 0)
   315  	h.check(t)
   316  }
   317  func genPerm(h *HashSet, b []byte, s []uint32, n int) {
   318  	h.addB(b[:n])
   319  	if n == len(b) {
   320  		return
   321  	}
   322  	for _, v := range s {
   323  		b[n] = byte(v)
   324  		b[n+1] = byte(v >> 8)
   325  		b[n+2] = byte(v >> 16)
   326  		b[n+3] = byte(v >> 24)
   327  		genPerm(h, b, s, n+4)
   328  	}
   329  }
   330  
   331  type Key interface {
   332  	clear()              // set bits all to 0
   333  	random(r *rand.Rand) // set key to something random
   334  	bits() int           // how many bits key has
   335  	flipBit(i int)       // flip bit i of the key
   336  	hash() uintptr       // hash the key
   337  	name() string        // for error reporting
   338  }
   339  
   340  type BytesKey struct {
   341  	b []byte
   342  }
   343  
   344  func (k *BytesKey) clear() {
   345  	clear(k.b)
   346  }
   347  func (k *BytesKey) random(r *rand.Rand) {
   348  	randBytes(r, k.b)
   349  }
   350  func (k *BytesKey) bits() int {
   351  	return len(k.b) * 8
   352  }
   353  func (k *BytesKey) flipBit(i int) {
   354  	k.b[i>>3] ^= byte(1 << uint(i&7))
   355  }
   356  func (k *BytesKey) hash() uintptr {
   357  	return BytesHash(k.b, 0)
   358  }
   359  func (k *BytesKey) name() string {
   360  	return fmt.Sprintf("bytes%d", len(k.b))
   361  }
   362  
   363  type Int32Key struct {
   364  	i uint32
   365  }
   366  
   367  func (k *Int32Key) clear() {
   368  	k.i = 0
   369  }
   370  func (k *Int32Key) random(r *rand.Rand) {
   371  	k.i = r.Uint32()
   372  }
   373  func (k *Int32Key) bits() int {
   374  	return 32
   375  }
   376  func (k *Int32Key) flipBit(i int) {
   377  	k.i ^= 1 << uint(i)
   378  }
   379  func (k *Int32Key) hash() uintptr {
   380  	return Int32Hash(k.i, 0)
   381  }
   382  func (k *Int32Key) name() string {
   383  	return "int32"
   384  }
   385  
   386  type Int64Key struct {
   387  	i uint64
   388  }
   389  
   390  func (k *Int64Key) clear() {
   391  	k.i = 0
   392  }
   393  func (k *Int64Key) random(r *rand.Rand) {
   394  	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
   395  }
   396  func (k *Int64Key) bits() int {
   397  	return 64
   398  }
   399  func (k *Int64Key) flipBit(i int) {
   400  	k.i ^= 1 << uint(i)
   401  }
   402  func (k *Int64Key) hash() uintptr {
   403  	return Int64Hash(k.i, 0)
   404  }
   405  func (k *Int64Key) name() string {
   406  	return "int64"
   407  }
   408  
   409  type EfaceKey struct {
   410  	i any
   411  }
   412  
   413  func (k *EfaceKey) clear() {
   414  	k.i = nil
   415  }
   416  func (k *EfaceKey) random(r *rand.Rand) {
   417  	k.i = uint64(r.Int63())
   418  }
   419  func (k *EfaceKey) bits() int {
   420  	// use 64 bits. This tests inlined interfaces
   421  	// on 64-bit targets and indirect interfaces on
   422  	// 32-bit targets.
   423  	return 64
   424  }
   425  func (k *EfaceKey) flipBit(i int) {
   426  	k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
   427  }
   428  func (k *EfaceKey) hash() uintptr {
   429  	return EfaceHash(k.i, 0)
   430  }
   431  func (k *EfaceKey) name() string {
   432  	return "Eface"
   433  }
   434  
   435  type IfaceKey struct {
   436  	i interface {
   437  		F()
   438  	}
   439  }
   440  type fInter uint64
   441  
   442  func (x fInter) F() {
   443  }
   444  
   445  func (k *IfaceKey) clear() {
   446  	k.i = nil
   447  }
   448  func (k *IfaceKey) random(r *rand.Rand) {
   449  	k.i = fInter(r.Int63())
   450  }
   451  func (k *IfaceKey) bits() int {
   452  	// use 64 bits. This tests inlined interfaces
   453  	// on 64-bit targets and indirect interfaces on
   454  	// 32-bit targets.
   455  	return 64
   456  }
   457  func (k *IfaceKey) flipBit(i int) {
   458  	k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
   459  }
   460  func (k *IfaceKey) hash() uintptr {
   461  	return IfaceHash(k.i, 0)
   462  }
   463  func (k *IfaceKey) name() string {
   464  	return "Iface"
   465  }
   466  
   467  // Flipping a single bit of a key should flip each output bit with 50% probability.
   468  func TestSmhasherAvalanche(t *testing.T) {
   469  	if GOARCH == "wasm" {
   470  		t.Skip("Too slow on wasm")
   471  	}
   472  	if testing.Short() {
   473  		t.Skip("Skipping in short mode")
   474  	}
   475  	if race.Enabled {
   476  		t.Skip("Too long for race mode")
   477  	}
   478  	avalancheTest1(t, &BytesKey{make([]byte, 2)})
   479  	avalancheTest1(t, &BytesKey{make([]byte, 4)})
   480  	avalancheTest1(t, &BytesKey{make([]byte, 8)})
   481  	avalancheTest1(t, &BytesKey{make([]byte, 16)})
   482  	avalancheTest1(t, &BytesKey{make([]byte, 32)})
   483  	avalancheTest1(t, &BytesKey{make([]byte, 200)})
   484  	avalancheTest1(t, &Int32Key{})
   485  	avalancheTest1(t, &Int64Key{})
   486  	avalancheTest1(t, &EfaceKey{})
   487  	avalancheTest1(t, &IfaceKey{})
   488  }
   489  func avalancheTest1(t *testing.T, k Key) {
   490  	const REP = 100000
   491  	r := rand.New(rand.NewSource(1234))
   492  	n := k.bits()
   493  
   494  	// grid[i][j] is a count of whether flipping
   495  	// input bit i affects output bit j.
   496  	grid := make([][hashSize]int, n)
   497  
   498  	for z := 0; z < REP; z++ {
   499  		// pick a random key, hash it
   500  		k.random(r)
   501  		h := k.hash()
   502  
   503  		// flip each bit, hash & compare the results
   504  		for i := 0; i < n; i++ {
   505  			k.flipBit(i)
   506  			d := h ^ k.hash()
   507  			k.flipBit(i)
   508  
   509  			// record the effects of that bit flip
   510  			g := &grid[i]
   511  			for j := 0; j < hashSize; j++ {
   512  				g[j] += int(d & 1)
   513  				d >>= 1
   514  			}
   515  		}
   516  	}
   517  
   518  	// Each entry in the grid should be about REP/2.
   519  	// More precisely, we did N = k.bits() * hashSize experiments where
   520  	// each is the sum of REP coin flips. We want to find bounds on the
   521  	// sum of coin flips such that a truly random experiment would have
   522  	// all sums inside those bounds with 99% probability.
   523  	N := n * hashSize
   524  	var c float64
   525  	// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
   526  	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
   527  	}
   528  	c *= 11.0 // allowed slack: 40% to 60% - we don't need to be perfectly random
   529  	mean := .5 * REP
   530  	stddev := .5 * math.Sqrt(REP)
   531  	low := int(mean - c*stddev)
   532  	high := int(mean + c*stddev)
   533  	for i := 0; i < n; i++ {
   534  		for j := 0; j < hashSize; j++ {
   535  			x := grid[i][j]
   536  			if x < low || x > high {
   537  				t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
   538  			}
   539  		}
   540  	}
   541  }
   542  
   543  // All bit rotations of a set of distinct keys
   544  func TestSmhasherWindowed(t *testing.T) {
   545  	if race.Enabled {
   546  		t.Skip("Too long for race mode")
   547  	}
   548  	h := newHashSet()
   549  	t.Logf("32 bit keys")
   550  	windowed(t, h, &Int32Key{})
   551  	t.Logf("64 bit keys")
   552  	windowed(t, h, &Int64Key{})
   553  	t.Logf("string keys")
   554  	windowed(t, h, &BytesKey{make([]byte, 128)})
   555  }
   556  func windowed(t *testing.T, h *HashSet, k Key) {
   557  	if GOARCH == "wasm" {
   558  		t.Skip("Too slow on wasm")
   559  	}
   560  	if PtrSize == 4 {
   561  		// This test tends to be flaky on 32-bit systems.
   562  		// There's not enough bits in the hash output, so we
   563  		// expect a nontrivial number of collisions, and it is
   564  		// often quite a bit higher than expected. See issue 43130.
   565  		t.Skip("Flaky on 32-bit systems")
   566  	}
   567  	if testing.Short() {
   568  		t.Skip("Skipping in short mode")
   569  	}
   570  	const BITS = 16
   571  
   572  	for r := 0; r < k.bits(); r++ {
   573  		for i := 0; i < 1<<BITS; i++ {
   574  			k.clear()
   575  			for j := 0; j < BITS; j++ {
   576  				if i>>uint(j)&1 != 0 {
   577  					k.flipBit((j + r) % k.bits())
   578  				}
   579  			}
   580  			h.add(k.hash())
   581  		}
   582  		h.check(t)
   583  	}
   584  }
   585  
   586  // All keys of the form prefix + [A-Za-z0-9]*N + suffix.
   587  func TestSmhasherText(t *testing.T) {
   588  	if testing.Short() {
   589  		t.Skip("Skipping in short mode")
   590  	}
   591  	h := newHashSet()
   592  	text(t, h, "Foo", "Bar")
   593  	text(t, h, "FooBar", "")
   594  	text(t, h, "", "FooBar")
   595  }
   596  func text(t *testing.T, h *HashSet, prefix, suffix string) {
   597  	const N = 4
   598  	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
   599  	const L = len(S)
   600  	b := make([]byte, len(prefix)+N+len(suffix))
   601  	copy(b, prefix)
   602  	copy(b[len(prefix)+N:], suffix)
   603  	c := b[len(prefix):]
   604  	for i := 0; i < L; i++ {
   605  		c[0] = S[i]
   606  		for j := 0; j < L; j++ {
   607  			c[1] = S[j]
   608  			for k := 0; k < L; k++ {
   609  				c[2] = S[k]
   610  				for x := 0; x < L; x++ {
   611  					c[3] = S[x]
   612  					h.addB(b)
   613  				}
   614  			}
   615  		}
   616  	}
   617  	h.check(t)
   618  }
   619  
   620  // Make sure different seed values generate different hashes.
   621  func TestSmhasherSeed(t *testing.T) {
   622  	h := newHashSet()
   623  	const N = 100000
   624  	s := "hello"
   625  	for i := 0; i < N; i++ {
   626  		h.addS_seed(s, uintptr(i))
   627  	}
   628  	h.check(t)
   629  }
   630  
   631  func TestIssue66841(t *testing.T) {
   632  	testenv.MustHaveExec(t)
   633  	if *UseAeshash && os.Getenv("TEST_ISSUE_66841") == "" {
   634  		// We want to test the backup hash, so if we're running on a machine
   635  		// that uses aeshash, exec ourselves while turning aes off.
   636  		cmd := testenv.CleanCmdEnv(testenv.Command(t, os.Args[0], "-test.run=^TestIssue66841$"))
   637  		cmd.Env = append(cmd.Env, "GODEBUG=cpu.aes=off", "TEST_ISSUE_66841=1")
   638  		out, err := cmd.CombinedOutput()
   639  		if err != nil {
   640  			t.Errorf("%s", string(out))
   641  		}
   642  		// Fall through. Might as well run this test when aeshash is on also.
   643  	}
   644  	h := newHashSet()
   645  	var b [16]byte
   646  	binary.LittleEndian.PutUint64(b[:8], 0xe7037ed1a0b428db) // runtime.m2
   647  	for i := 0; i < 1000; i++ {
   648  		binary.LittleEndian.PutUint64(b[8:], uint64(i))
   649  		h.addB(b[:])
   650  	}
   651  	h.check(t)
   652  }
   653  
   654  // size of the hash output (32 or 64 bits)
   655  const hashSize = 32 + int(^uintptr(0)>>63<<5)
   656  
   657  func randBytes(r *rand.Rand, b []byte) {
   658  	for i := range b {
   659  		b[i] = byte(r.Uint32())
   660  	}
   661  }
   662  
   663  func benchmarkHash(b *testing.B, n int) {
   664  	s := strings.Repeat("A", n)
   665  
   666  	for i := 0; i < b.N; i++ {
   667  		StringHash(s, 0)
   668  	}
   669  	b.SetBytes(int64(n))
   670  }
   671  
   672  func BenchmarkHash5(b *testing.B)     { benchmarkHash(b, 5) }
   673  func BenchmarkHash16(b *testing.B)    { benchmarkHash(b, 16) }
   674  func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
   675  func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
   676  func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
   677  
   678  func TestArrayHash(t *testing.T) {
   679  	// Make sure that "" in arrays hash correctly. The hash
   680  	// should at least scramble the input seed so that, e.g.,
   681  	// {"","foo"} and {"foo",""} have different hashes.
   682  
   683  	// If the hash is bad, then all (8 choose 4) = 70 keys
   684  	// have the same hash. If so, we allocate 70/8 = 8
   685  	// overflow buckets. If the hash is good we don't
   686  	// normally allocate any overflow buckets, and the
   687  	// probability of even one or two overflows goes down rapidly.
   688  	// (There is always 1 allocation of the bucket array. The map
   689  	// header is allocated on the stack.)
   690  	f := func() {
   691  		// Make the key type at most 128 bytes. Otherwise,
   692  		// we get an allocation per key.
   693  		type key [8]string
   694  		m := make(map[key]bool, 70)
   695  
   696  		// fill m with keys that have 4 "foo"s and 4 ""s.
   697  		for i := 0; i < 256; i++ {
   698  			var k key
   699  			cnt := 0
   700  			for j := uint(0); j < 8; j++ {
   701  				if i>>j&1 != 0 {
   702  					k[j] = "foo"
   703  					cnt++
   704  				}
   705  			}
   706  			if cnt == 4 {
   707  				m[k] = true
   708  			}
   709  		}
   710  		if len(m) != 70 {
   711  			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
   712  		}
   713  	}
   714  	if n := testing.AllocsPerRun(10, f); n > 6 {
   715  		t.Errorf("too many allocs %f - hash not balanced", n)
   716  	}
   717  }
   718  func TestStructHash(t *testing.T) {
   719  	// See the comment in TestArrayHash.
   720  	f := func() {
   721  		type key struct {
   722  			a, b, c, d, e, f, g, h string
   723  		}
   724  		m := make(map[key]bool, 70)
   725  
   726  		// fill m with keys that have 4 "foo"s and 4 ""s.
   727  		for i := 0; i < 256; i++ {
   728  			var k key
   729  			cnt := 0
   730  			if i&1 != 0 {
   731  				k.a = "foo"
   732  				cnt++
   733  			}
   734  			if i&2 != 0 {
   735  				k.b = "foo"
   736  				cnt++
   737  			}
   738  			if i&4 != 0 {
   739  				k.c = "foo"
   740  				cnt++
   741  			}
   742  			if i&8 != 0 {
   743  				k.d = "foo"
   744  				cnt++
   745  			}
   746  			if i&16 != 0 {
   747  				k.e = "foo"
   748  				cnt++
   749  			}
   750  			if i&32 != 0 {
   751  				k.f = "foo"
   752  				cnt++
   753  			}
   754  			if i&64 != 0 {
   755  				k.g = "foo"
   756  				cnt++
   757  			}
   758  			if i&128 != 0 {
   759  				k.h = "foo"
   760  				cnt++
   761  			}
   762  			if cnt == 4 {
   763  				m[k] = true
   764  			}
   765  		}
   766  		if len(m) != 70 {
   767  			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
   768  		}
   769  	}
   770  	if n := testing.AllocsPerRun(10, f); n > 6 {
   771  		t.Errorf("too many allocs %f - hash not balanced", n)
   772  	}
   773  }
   774  
   775  var sink uint64
   776  
   777  func BenchmarkAlignedLoad(b *testing.B) {
   778  	var buf [16]byte
   779  	p := unsafe.Pointer(&buf[0])
   780  	var s uint64
   781  	for i := 0; i < b.N; i++ {
   782  		s += ReadUnaligned64(p)
   783  	}
   784  	sink = s
   785  }
   786  
   787  func BenchmarkUnalignedLoad(b *testing.B) {
   788  	var buf [16]byte
   789  	p := unsafe.Pointer(&buf[1])
   790  	var s uint64
   791  	for i := 0; i < b.N; i++ {
   792  		s += ReadUnaligned64(p)
   793  	}
   794  	sink = s
   795  }
   796  
   797  func TestCollisions(t *testing.T) {
   798  	if testing.Short() {
   799  		t.Skip("Skipping in short mode")
   800  	}
   801  	for i := 0; i < 16; i++ {
   802  		for j := 0; j < 16; j++ {
   803  			if j == i {
   804  				continue
   805  			}
   806  			var a [16]byte
   807  			m := make(map[uint16]struct{}, 1<<16)
   808  			for n := 0; n < 1<<16; n++ {
   809  				a[i] = byte(n)
   810  				a[j] = byte(n >> 8)
   811  				m[uint16(BytesHash(a[:], 0))] = struct{}{}
   812  			}
   813  			// N balls in N bins, for N=65536
   814  			avg := 41427
   815  			stdDev := 123
   816  			if len(m) < avg-40*stdDev || len(m) > avg+40*stdDev {
   817  				t.Errorf("bad number of collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m))
   818  			}
   819  		}
   820  	}
   821  }
   822  

View as plain text