blob: 6b229bdb88192d8308c90316f016b9caf4b30b1b [file] [log] [blame]
Keith Randall78338d82013-09-06 16:23:46 -07001// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime_test
6
7import (
8 "fmt"
9 "math"
10 "math/rand"
11 . "runtime"
12 "strings"
13 "testing"
14)
15
16// Smhasher is a torture test for hash functions.
17// https://code.google.com/p/smhasher/
18// This code is a port of some of the Smhasher tests to Go.
19//
20// The current AES hash function passes Smhasher. Our fallback
21// hash functions don't, so we only enable the difficult tests when
22// we know the AES implementation is available.
23
24// Sanity checks.
25// hash should not depend on values outside key.
26// hash should not depend on alignment.
27func TestSmhasherSanity(t *testing.T) {
28 r := rand.New(rand.NewSource(1234))
29 const REP = 10
30 const KEYMAX = 128
31 const PAD = 16
32 const OFFMAX = 16
33 for k := 0; k < REP; k++ {
34 for n := 0; n < KEYMAX; n++ {
35 for i := 0; i < OFFMAX; i++ {
36 var b [KEYMAX + OFFMAX + 2*PAD]byte
37 var c [KEYMAX + OFFMAX + 2*PAD]byte
38 randBytes(r, b[:])
39 randBytes(r, c[:])
40 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
41 if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
42 t.Errorf("hash depends on bytes outside key")
43 }
44 }
45 }
46 }
47}
48
49type HashSet struct {
50 m map[uintptr]struct{} // set of hashes added
51 n int // number of hashes added
52}
53
54func newHashSet() *HashSet {
55 return &HashSet{make(map[uintptr]struct{}), 0}
56}
57func (s *HashSet) add(h uintptr) {
58 s.m[h] = struct{}{}
59 s.n++
60}
61func (s *HashSet) addS(x string) {
62 s.add(StringHash(x, 0))
63}
64func (s *HashSet) addB(x []byte) {
65 s.add(BytesHash(x, 0))
66}
67func (s *HashSet) addS_seed(x string, seed uintptr) {
68 s.add(StringHash(x, seed))
69}
70func (s *HashSet) check(t *testing.T) {
71 const SLOP = 10.0
72 collisions := s.n - len(s.m)
73 //fmt.Printf("%d/%d\n", len(s.m), s.n)
74 pairs := int64(s.n) * int64(s.n-1) / 2
75 expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
76 stddev := math.Sqrt(expected)
77 if float64(collisions) > expected+SLOP*3*stddev {
78 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
79 }
80}
81
82// a string plus adding zeros must make distinct hashes
83func TestSmhasherAppendedZeros(t *testing.T) {
84 s := "hello" + strings.Repeat("\x00", 256)
85 h := newHashSet()
86 for i := 0; i <= len(s); i++ {
87 h.addS(s[:i])
88 }
89 h.check(t)
90}
91
92// All 0-3 byte strings have distinct hashes.
93func TestSmhasherSmallKeys(t *testing.T) {
94 h := newHashSet()
95 var b [3]byte
96 for i := 0; i < 256; i++ {
97 b[0] = byte(i)
98 h.addB(b[:1])
99 for j := 0; j < 256; j++ {
100 b[1] = byte(j)
101 h.addB(b[:2])
102 if !testing.Short() {
103 for k := 0; k < 256; k++ {
104 b[2] = byte(k)
105 h.addB(b[:3])
106 }
107 }
108 }
109 }
110 h.check(t)
111}
112
113// Different length strings of all zeros have distinct hashes.
114func TestSmhasherZeros(t *testing.T) {
115 N := 256 * 1024
116 if testing.Short() {
117 N = 1024
118 }
119 h := newHashSet()
120 b := make([]byte, N)
121 for i := 0; i <= N; i++ {
122 h.addB(b[:i])
123 }
124 h.check(t)
125}
126
127// Strings with up to two nonzero bytes all have distinct hashes.
128func TestSmhasherTwoNonzero(t *testing.T) {
129 if testing.Short() {
130 t.Skip("Skipping in short mode")
131 }
132 h := newHashSet()
133 for n := 2; n <= 16; n++ {
134 twoNonZero(h, n)
135 }
136 h.check(t)
137}
138func twoNonZero(h *HashSet, n int) {
139 b := make([]byte, n)
140
141 // all zero
142 h.addB(b[:])
143
144 // one non-zero byte
145 for i := 0; i < n; i++ {
146 for x := 1; x < 256; x++ {
147 b[i] = byte(x)
148 h.addB(b[:])
149 b[i] = 0
150 }
151 }
152
153 // two non-zero bytes
154 for i := 0; i < n; i++ {
155 for x := 1; x < 256; x++ {
156 b[i] = byte(x)
157 for j := i + 1; j < n; j++ {
158 for y := 1; y < 256; y++ {
159 b[j] = byte(y)
160 h.addB(b[:])
161 b[j] = 0
162 }
163 }
164 b[i] = 0
165 }
166 }
167}
168
169// Test strings with repeats, like "abcdabcdabcdabcd..."
170func TestSmhasherCyclic(t *testing.T) {
171 if testing.Short() {
172 t.Skip("Skipping in short mode")
173 }
Keith Randall78338d82013-09-06 16:23:46 -0700174 r := rand.New(rand.NewSource(1234))
175 const REPEAT = 8
176 const N = 1000000
177 for n := 4; n <= 12; n++ {
178 h := newHashSet()
179 b := make([]byte, REPEAT*n)
180 for i := 0; i < N; i++ {
181 b[0] = byte(i * 79 % 97)
182 b[1] = byte(i * 43 % 137)
183 b[2] = byte(i * 151 % 197)
184 b[3] = byte(i * 199 % 251)
185 randBytes(r, b[4:n])
186 for j := n; j < n*REPEAT; j++ {
187 b[j] = b[j-n]
188 }
189 h.addB(b)
190 }
191 h.check(t)
192 }
193}
194
195// Test strings with only a few bits set
196func TestSmhasherSparse(t *testing.T) {
197 if testing.Short() {
198 t.Skip("Skipping in short mode")
199 }
200 sparse(t, 32, 6)
201 sparse(t, 40, 6)
202 sparse(t, 48, 5)
203 sparse(t, 56, 5)
204 sparse(t, 64, 5)
205 sparse(t, 96, 4)
206 sparse(t, 256, 3)
207 sparse(t, 2048, 2)
208}
209func sparse(t *testing.T, n int, k int) {
210 b := make([]byte, n/8)
211 h := newHashSet()
212 setbits(h, b, 0, k)
213 h.check(t)
214}
215
216// set up to k bits at index i and greater
217func setbits(h *HashSet, b []byte, i int, k int) {
218 h.addB(b)
219 if k == 0 {
220 return
221 }
222 for j := i; j < len(b)*8; j++ {
223 b[j/8] |= byte(1 << uint(j&7))
224 setbits(h, b, j+1, k-1)
225 b[j/8] &= byte(^(1 << uint(j&7)))
226 }
227}
228
229// Test all possible combinations of n blocks from the set s.
230// "permutation" is a bad name here, but it is what Smhasher uses.
231func TestSmhasherPermutation(t *testing.T) {
232 if testing.Short() {
233 t.Skip("Skipping in short mode")
234 }
Keith Randall78338d82013-09-06 16:23:46 -0700235 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
236 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
237 permutation(t, []uint32{0, 1}, 20)
238 permutation(t, []uint32{0, 1 << 31}, 20)
239 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
240}
241func permutation(t *testing.T, s []uint32, n int) {
242 b := make([]byte, n*4)
243 h := newHashSet()
244 genPerm(h, b, s, 0)
245 h.check(t)
246}
247func genPerm(h *HashSet, b []byte, s []uint32, n int) {
248 h.addB(b[:n])
249 if n == len(b) {
250 return
251 }
252 for _, v := range s {
253 b[n] = byte(v)
254 b[n+1] = byte(v >> 8)
255 b[n+2] = byte(v >> 16)
256 b[n+3] = byte(v >> 24)
257 genPerm(h, b, s, n+4)
258 }
259}
260
261type Key interface {
262 clear() // set bits all to 0
263 random(r *rand.Rand) // set key to something random
264 bits() int // how many bits key has
265 flipBit(i int) // flip bit i of the key
266 hash() uintptr // hash the key
267 name() string // for error reporting
268}
269
270type BytesKey struct {
271 b []byte
272}
273
274func (k *BytesKey) clear() {
275 for i := range k.b {
276 k.b[i] = 0
277 }
278}
279func (k *BytesKey) random(r *rand.Rand) {
280 randBytes(r, k.b)
281}
282func (k *BytesKey) bits() int {
283 return len(k.b) * 8
284}
285func (k *BytesKey) flipBit(i int) {
286 k.b[i>>3] ^= byte(1 << uint(i&7))
287}
288func (k *BytesKey) hash() uintptr {
289 return BytesHash(k.b, 0)
290}
291func (k *BytesKey) name() string {
292 return fmt.Sprintf("bytes%d", len(k.b))
293}
294
295type Int32Key struct {
296 i uint32
297}
298
299func (k *Int32Key) clear() {
300 k.i = 0
301}
302func (k *Int32Key) random(r *rand.Rand) {
303 k.i = r.Uint32()
304}
305func (k *Int32Key) bits() int {
306 return 32
307}
308func (k *Int32Key) flipBit(i int) {
309 k.i ^= 1 << uint(i)
310}
311func (k *Int32Key) hash() uintptr {
312 return Int32Hash(k.i, 0)
313}
314func (k *Int32Key) name() string {
315 return "int32"
316}
317
318type Int64Key struct {
319 i uint64
320}
321
322func (k *Int64Key) clear() {
323 k.i = 0
324}
325func (k *Int64Key) random(r *rand.Rand) {
326 k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
327}
328func (k *Int64Key) bits() int {
329 return 64
330}
331func (k *Int64Key) flipBit(i int) {
332 k.i ^= 1 << uint(i)
333}
334func (k *Int64Key) hash() uintptr {
335 return Int64Hash(k.i, 0)
336}
337func (k *Int64Key) name() string {
338 return "int64"
339}
340
Keith Randall3d7e3692014-08-07 12:33:20 -0700341type EfaceKey struct {
342 i interface{}
343}
344
345func (k *EfaceKey) clear() {
346 k.i = nil
347}
348func (k *EfaceKey) random(r *rand.Rand) {
349 k.i = uint64(r.Int63())
350}
351func (k *EfaceKey) bits() int {
352 // use 64 bits. This tests inlined interfaces
353 // on 64-bit targets and indirect interfaces on
354 // 32-bit targets.
355 return 64
356}
357func (k *EfaceKey) flipBit(i int) {
358 k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
359}
360func (k *EfaceKey) hash() uintptr {
361 return EfaceHash(k.i, 0)
362}
363func (k *EfaceKey) name() string {
364 return "Eface"
365}
366
367type IfaceKey struct {
368 i interface {
369 F()
370 }
371}
372type fInter uint64
373
374func (x fInter) F() {
375}
376
377func (k *IfaceKey) clear() {
378 k.i = nil
379}
380func (k *IfaceKey) random(r *rand.Rand) {
381 k.i = fInter(r.Int63())
382}
383func (k *IfaceKey) bits() int {
384 // use 64 bits. This tests inlined interfaces
385 // on 64-bit targets and indirect interfaces on
386 // 32-bit targets.
387 return 64
388}
389func (k *IfaceKey) flipBit(i int) {
390 k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
391}
392func (k *IfaceKey) hash() uintptr {
393 return IfaceHash(k.i, 0)
394}
395func (k *IfaceKey) name() string {
396 return "Iface"
397}
398
Keith Randall78338d82013-09-06 16:23:46 -0700399// Flipping a single bit of a key should flip each output bit with 50% probability.
400func TestSmhasherAvalanche(t *testing.T) {
Keith Randall78338d82013-09-06 16:23:46 -0700401 if testing.Short() {
402 t.Skip("Skipping in short mode")
403 }
404 avalancheTest1(t, &BytesKey{make([]byte, 2)})
405 avalancheTest1(t, &BytesKey{make([]byte, 4)})
406 avalancheTest1(t, &BytesKey{make([]byte, 8)})
407 avalancheTest1(t, &BytesKey{make([]byte, 16)})
408 avalancheTest1(t, &BytesKey{make([]byte, 32)})
409 avalancheTest1(t, &BytesKey{make([]byte, 200)})
410 avalancheTest1(t, &Int32Key{})
411 avalancheTest1(t, &Int64Key{})
Keith Randall3d7e3692014-08-07 12:33:20 -0700412 avalancheTest1(t, &EfaceKey{})
413 avalancheTest1(t, &IfaceKey{})
Keith Randall78338d82013-09-06 16:23:46 -0700414}
415func avalancheTest1(t *testing.T, k Key) {
416 const REP = 100000
417 r := rand.New(rand.NewSource(1234))
418 n := k.bits()
419
420 // grid[i][j] is a count of whether flipping
421 // input bit i affects output bit j.
422 grid := make([][hashSize]int, n)
423
424 for z := 0; z < REP; z++ {
425 // pick a random key, hash it
426 k.random(r)
427 h := k.hash()
428
429 // flip each bit, hash & compare the results
430 for i := 0; i < n; i++ {
431 k.flipBit(i)
432 d := h ^ k.hash()
433 k.flipBit(i)
434
435 // record the effects of that bit flip
436 g := &grid[i]
437 for j := 0; j < hashSize; j++ {
438 g[j] += int(d & 1)
439 d >>= 1
440 }
441 }
442 }
443
444 // Each entry in the grid should be about REP/2.
445 // More precisely, we did N = k.bits() * hashSize experiments where
446 // each is the sum of REP coin flips. We want to find bounds on the
447 // sum of coin flips such that a truly random experiment would have
448 // all sums inside those bounds with 99% probability.
449 N := n * hashSize
450 var c float64
Keith Randall711d1ad2014-05-09 15:50:57 -0700451 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
452 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
Keith Randall78338d82013-09-06 16:23:46 -0700453 }
Keith Randall711d1ad2014-05-09 15:50:57 -0700454 c *= 4.0 // allowed slack - we don't need to be perfectly random
Keith Randall78338d82013-09-06 16:23:46 -0700455 mean := .5 * REP
456 stddev := .5 * math.Sqrt(REP)
457 low := int(mean - c*stddev)
458 high := int(mean + c*stddev)
459 for i := 0; i < n; i++ {
460 for j := 0; j < hashSize; j++ {
461 x := grid[i][j]
462 if x < low || x > high {
463 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
464 }
465 }
466 }
467}
468
469// All bit rotations of a set of distinct keys
470func TestSmhasherWindowed(t *testing.T) {
471 windowed(t, &Int32Key{})
472 windowed(t, &Int64Key{})
473 windowed(t, &BytesKey{make([]byte, 128)})
474}
475func windowed(t *testing.T, k Key) {
476 if testing.Short() {
477 t.Skip("Skipping in short mode")
478 }
479 const BITS = 16
480
481 for r := 0; r < k.bits(); r++ {
482 h := newHashSet()
483 for i := 0; i < 1<<BITS; i++ {
484 k.clear()
485 for j := 0; j < BITS; j++ {
486 if i>>uint(j)&1 != 0 {
487 k.flipBit((j + r) % k.bits())
488 }
489 }
490 h.add(k.hash())
491 }
492 h.check(t)
493 }
494}
495
496// All keys of the form prefix + [A-Za-z0-9]*N + suffix.
497func TestSmhasherText(t *testing.T) {
498 if testing.Short() {
499 t.Skip("Skipping in short mode")
500 }
501 text(t, "Foo", "Bar")
502 text(t, "FooBar", "")
503 text(t, "", "FooBar")
504}
505func text(t *testing.T, prefix, suffix string) {
506 const N = 4
507 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
508 const L = len(S)
509 b := make([]byte, len(prefix)+N+len(suffix))
510 copy(b, prefix)
511 copy(b[len(prefix)+N:], suffix)
512 h := newHashSet()
513 c := b[len(prefix):]
514 for i := 0; i < L; i++ {
515 c[0] = S[i]
516 for j := 0; j < L; j++ {
517 c[1] = S[j]
518 for k := 0; k < L; k++ {
519 c[2] = S[k]
520 for x := 0; x < L; x++ {
521 c[3] = S[x]
522 h.addB(b)
523 }
524 }
525 }
526 }
527 h.check(t)
528}
529
530// Make sure different seed values generate different hashes.
531func TestSmhasherSeed(t *testing.T) {
532 h := newHashSet()
533 const N = 100000
534 s := "hello"
535 for i := 0; i < N; i++ {
536 h.addS_seed(s, uintptr(i))
537 }
538 h.check(t)
539}
540
541// size of the hash output (32 or 64 bits)
542const hashSize = 32 + int(^uintptr(0)>>63<<5)
543
544func randBytes(r *rand.Rand, b []byte) {
545 for i := range b {
546 b[i] = byte(r.Uint32())
547 }
548}
549
550func benchmarkHash(b *testing.B, n int) {
551 s := strings.Repeat("A", n)
552
553 for i := 0; i < b.N; i++ {
554 StringHash(s, 0)
555 }
556 b.SetBytes(int64(n))
557}
558
559func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) }
560func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) }
561func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) }
562func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) }
563func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }