Blame - src/runtime/hash_test.go - go

blob: 6b229bdb88192d8308c90316f016b9caf4b30b1b [file] [log] [blame]

Keith Randall	78338d8	2013-09-06 16:23:46 -0700	[diff] [blame]	1	// Copyright 2013 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	package runtime_test
				6
				7	import (
				8	"fmt"
				9	"math"
				10	"math/rand"
				11	. "runtime"
				12	"strings"
				13	"testing"
				14	)
				15
				16	// Smhasher is a torture test for hash functions.
				17	// https://code.google.com/p/smhasher/
				18	// This code is a port of some of the Smhasher tests to Go.
				19	//
				20	// The current AES hash function passes Smhasher. Our fallback
				21	// hash functions don't, so we only enable the difficult tests when
				22	// we know the AES implementation is available.
				23
				24	// Sanity checks.
				25	// hash should not depend on values outside key.
				26	// hash should not depend on alignment.
				27	func TestSmhasherSanity(t *testing.T) {
				28	r := rand.New(rand.NewSource(1234))
				29	const REP = 10
				30	const KEYMAX = 128
				31	const PAD = 16
				32	const OFFMAX = 16
				33	for k := 0; k < REP; k++ {
				34	for n := 0; n < KEYMAX; n++ {
				35	for i := 0; i < OFFMAX; i++ {
				36	var b [KEYMAX + OFFMAX + 2*PAD]byte
				37	var c [KEYMAX + OFFMAX + 2*PAD]byte
				38	randBytes(r, b[:])
				39	randBytes(r, c[:])
				40	copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
				41	if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
				42	t.Errorf("hash depends on bytes outside key")
				43	}
				44	}
				45	}
				46	}
				47	}
				48
				49	type HashSet struct {
				50	m map[uintptr]struct{} // set of hashes added
				51	n int // number of hashes added
				52	}
				53
				54	func newHashSet() *HashSet {
				55	return &HashSet{make(map[uintptr]struct{}), 0}
				56	}
				57	func (s *HashSet) add(h uintptr) {
				58	s.m[h] = struct{}{}
				59	s.n++
				60	}
				61	func (s *HashSet) addS(x string) {
				62	s.add(StringHash(x, 0))
				63	}
				64	func (s *HashSet) addB(x []byte) {
				65	s.add(BytesHash(x, 0))
				66	}
				67	func (s *HashSet) addS_seed(x string, seed uintptr) {
				68	s.add(StringHash(x, seed))
				69	}
				70	func (s HashSet) check(t testing.T) {
				71	const SLOP = 10.0
				72	collisions := s.n - len(s.m)
				73	//fmt.Printf("%d/%d\n", len(s.m), s.n)
				74	pairs := int64(s.n) * int64(s.n-1) / 2
				75	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
				76	stddev := math.Sqrt(expected)
				77	if float64(collisions) > expected+SLOP3stddev {
				78	t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
				79	}
				80	}
				81
				82	// a string plus adding zeros must make distinct hashes
				83	func TestSmhasherAppendedZeros(t *testing.T) {
				84	s := "hello" + strings.Repeat("\x00", 256)
				85	h := newHashSet()
				86	for i := 0; i <= len(s); i++ {
				87	h.addS(s[:i])
				88	}
				89	h.check(t)
				90	}
				91
				92	// All 0-3 byte strings have distinct hashes.
				93	func TestSmhasherSmallKeys(t *testing.T) {
				94	h := newHashSet()
				95	var b [3]byte
				96	for i := 0; i < 256; i++ {
				97	b[0] = byte(i)
				98	h.addB(b[:1])
				99	for j := 0; j < 256; j++ {
				100	b[1] = byte(j)
				101	h.addB(b[:2])
				102	if !testing.Short() {
				103	for k := 0; k < 256; k++ {
				104	b[2] = byte(k)
				105	h.addB(b[:3])
				106	}
				107	}
				108	}
				109	}
				110	h.check(t)
				111	}
				112
				113	// Different length strings of all zeros have distinct hashes.
				114	func TestSmhasherZeros(t *testing.T) {
				115	N := 256 * 1024
				116	if testing.Short() {
				117	N = 1024
				118	}
				119	h := newHashSet()
				120	b := make([]byte, N)
				121	for i := 0; i <= N; i++ {
				122	h.addB(b[:i])
				123	}
				124	h.check(t)
				125	}
				126
				127	// Strings with up to two nonzero bytes all have distinct hashes.
				128	func TestSmhasherTwoNonzero(t *testing.T) {
				129	if testing.Short() {
				130	t.Skip("Skipping in short mode")
				131	}
				132	h := newHashSet()
				133	for n := 2; n <= 16; n++ {
				134	twoNonZero(h, n)
				135	}
				136	h.check(t)
				137	}
				138	func twoNonZero(h *HashSet, n int) {
				139	b := make([]byte, n)
				140
				141	// all zero
				142	h.addB(b[:])
				143
				144	// one non-zero byte
				145	for i := 0; i < n; i++ {
				146	for x := 1; x < 256; x++ {
				147	b[i] = byte(x)
				148	h.addB(b[:])
				149	b[i] = 0
				150	}
				151	}
				152
				153	// two non-zero bytes
				154	for i := 0; i < n; i++ {
				155	for x := 1; x < 256; x++ {
				156	b[i] = byte(x)
				157	for j := i + 1; j < n; j++ {
				158	for y := 1; y < 256; y++ {
				159	b[j] = byte(y)
				160	h.addB(b[:])
				161	b[j] = 0
				162	}
				163	}
				164	b[i] = 0
				165	}
				166	}
				167	}
				168
				169	// Test strings with repeats, like "abcdabcdabcdabcd..."
				170	func TestSmhasherCyclic(t *testing.T) {
				171	if testing.Short() {
				172	t.Skip("Skipping in short mode")
				173	}
Keith Randall	78338d8	2013-09-06 16:23:46 -0700	[diff] [blame]	174	r := rand.New(rand.NewSource(1234))
				175	const REPEAT = 8
				176	const N = 1000000
				177	for n := 4; n <= 12; n++ {
				178	h := newHashSet()
				179	b := make([]byte, REPEAT*n)
				180	for i := 0; i < N; i++ {
				181	b[0] = byte(i * 79 % 97)
				182	b[1] = byte(i * 43 % 137)
				183	b[2] = byte(i * 151 % 197)
				184	b[3] = byte(i * 199 % 251)
				185	randBytes(r, b[4:n])
				186	for j := n; j < n*REPEAT; j++ {
				187	b[j] = b[j-n]
				188	}
				189	h.addB(b)
				190	}
				191	h.check(t)
				192	}
				193	}
				194
				195	// Test strings with only a few bits set
				196	func TestSmhasherSparse(t *testing.T) {
				197	if testing.Short() {
				198	t.Skip("Skipping in short mode")
				199	}
				200	sparse(t, 32, 6)
				201	sparse(t, 40, 6)
				202	sparse(t, 48, 5)
				203	sparse(t, 56, 5)
				204	sparse(t, 64, 5)
				205	sparse(t, 96, 4)
				206	sparse(t, 256, 3)
				207	sparse(t, 2048, 2)
				208	}
				209	func sparse(t *testing.T, n int, k int) {
				210	b := make([]byte, n/8)
				211	h := newHashSet()
				212	setbits(h, b, 0, k)
				213	h.check(t)
				214	}
				215
				216	// set up to k bits at index i and greater
				217	func setbits(h *HashSet, b []byte, i int, k int) {
				218	h.addB(b)
				219	if k == 0 {
				220	return
				221	}
				222	for j := i; j < len(b)*8; j++ {
				223	b[j/8] \|= byte(1 << uint(j&7))
				224	setbits(h, b, j+1, k-1)
				225	b[j/8] &= byte(^(1 << uint(j&7)))
				226	}
				227	}
				228
				229	// Test all possible combinations of n blocks from the set s.
				230	// "permutation" is a bad name here, but it is what Smhasher uses.
				231	func TestSmhasherPermutation(t *testing.T) {
				232	if testing.Short() {
				233	t.Skip("Skipping in short mode")
				234	}
Keith Randall	78338d8	2013-09-06 16:23:46 -0700	[diff] [blame]	235	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
				236	permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
				237	permutation(t, []uint32{0, 1}, 20)
				238	permutation(t, []uint32{0, 1 << 31}, 20)
				239	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
				240	}
				241	func permutation(t *testing.T, s []uint32, n int) {
				242	b := make([]byte, n*4)
				243	h := newHashSet()
				244	genPerm(h, b, s, 0)
				245	h.check(t)
				246	}
				247	func genPerm(h *HashSet, b []byte, s []uint32, n int) {
				248	h.addB(b[:n])
				249	if n == len(b) {
				250	return
				251	}
				252	for _, v := range s {
				253	b[n] = byte(v)
				254	b[n+1] = byte(v >> 8)
				255	b[n+2] = byte(v >> 16)
				256	b[n+3] = byte(v >> 24)
				257	genPerm(h, b, s, n+4)
				258	}
				259	}
				260
				261	type Key interface {
				262	clear() // set bits all to 0
				263	random(r *rand.Rand) // set key to something random
				264	bits() int // how many bits key has
				265	flipBit(i int) // flip bit i of the key
				266	hash() uintptr // hash the key
				267	name() string // for error reporting
				268	}
				269
				270	type BytesKey struct {
				271	b []byte
				272	}
				273
				274	func (k *BytesKey) clear() {
				275	for i := range k.b {
				276	k.b[i] = 0
				277	}
				278	}
				279	func (k BytesKey) random(r rand.Rand) {
				280	randBytes(r, k.b)
				281	}
				282	func (k *BytesKey) bits() int {
				283	return len(k.b) * 8
				284	}
				285	func (k *BytesKey) flipBit(i int) {
				286	k.b[i>>3] ^= byte(1 << uint(i&7))
				287	}
				288	func (k *BytesKey) hash() uintptr {
				289	return BytesHash(k.b, 0)
				290	}
				291	func (k *BytesKey) name() string {
				292	return fmt.Sprintf("bytes%d", len(k.b))
				293	}
				294
				295	type Int32Key struct {
				296	i uint32
				297	}
				298
				299	func (k *Int32Key) clear() {
				300	k.i = 0
				301	}
				302	func (k Int32Key) random(r rand.Rand) {
				303	k.i = r.Uint32()
				304	}
				305	func (k *Int32Key) bits() int {
				306	return 32
				307	}
				308	func (k *Int32Key) flipBit(i int) {
				309	k.i ^= 1 << uint(i)
				310	}
				311	func (k *Int32Key) hash() uintptr {
				312	return Int32Hash(k.i, 0)
				313	}
				314	func (k *Int32Key) name() string {
				315	return "int32"
				316	}
				317
				318	type Int64Key struct {
				319	i uint64
				320	}
				321
				322	func (k *Int64Key) clear() {
				323	k.i = 0
				324	}
				325	func (k Int64Key) random(r rand.Rand) {
				326	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
				327	}
				328	func (k *Int64Key) bits() int {
				329	return 64
				330	}
				331	func (k *Int64Key) flipBit(i int) {
				332	k.i ^= 1 << uint(i)
				333	}
				334	func (k *Int64Key) hash() uintptr {
				335	return Int64Hash(k.i, 0)
				336	}
				337	func (k *Int64Key) name() string {
				338	return "int64"
				339	}
				340
Keith Randall	3d7e369	2014-08-07 12:33:20 -0700	[diff] [blame]	341	type EfaceKey struct {
				342	i interface{}
				343	}
				344
				345	func (k *EfaceKey) clear() {
				346	k.i = nil
				347	}
				348	func (k EfaceKey) random(r rand.Rand) {
				349	k.i = uint64(r.Int63())
				350	}
				351	func (k *EfaceKey) bits() int {
				352	// use 64 bits. This tests inlined interfaces
				353	// on 64-bit targets and indirect interfaces on
				354	// 32-bit targets.
				355	return 64
				356	}
				357	func (k *EfaceKey) flipBit(i int) {
				358	k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
				359	}
				360	func (k *EfaceKey) hash() uintptr {
				361	return EfaceHash(k.i, 0)
				362	}
				363	func (k *EfaceKey) name() string {
				364	return "Eface"
				365	}
				366
				367	type IfaceKey struct {
				368	i interface {
				369	F()
				370	}
				371	}
				372	type fInter uint64
				373
				374	func (x fInter) F() {
				375	}
				376
				377	func (k *IfaceKey) clear() {
				378	k.i = nil
				379	}
				380	func (k IfaceKey) random(r rand.Rand) {
				381	k.i = fInter(r.Int63())
				382	}
				383	func (k *IfaceKey) bits() int {
				384	// use 64 bits. This tests inlined interfaces
				385	// on 64-bit targets and indirect interfaces on
				386	// 32-bit targets.
				387	return 64
				388	}
				389	func (k *IfaceKey) flipBit(i int) {
				390	k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
				391	}
				392	func (k *IfaceKey) hash() uintptr {
				393	return IfaceHash(k.i, 0)
				394	}
				395	func (k *IfaceKey) name() string {
				396	return "Iface"
				397	}
				398
Keith Randall	78338d8	2013-09-06 16:23:46 -0700	[diff] [blame]	399	// Flipping a single bit of a key should flip each output bit with 50% probability.
				400	func TestSmhasherAvalanche(t *testing.T) {
Keith Randall	78338d8	2013-09-06 16:23:46 -0700	[diff] [blame]	401	if testing.Short() {
				402	t.Skip("Skipping in short mode")
				403	}
				404	avalancheTest1(t, &BytesKey{make([]byte, 2)})
				405	avalancheTest1(t, &BytesKey{make([]byte, 4)})
				406	avalancheTest1(t, &BytesKey{make([]byte, 8)})
				407	avalancheTest1(t, &BytesKey{make([]byte, 16)})
				408	avalancheTest1(t, &BytesKey{make([]byte, 32)})
				409	avalancheTest1(t, &BytesKey{make([]byte, 200)})
				410	avalancheTest1(t, &Int32Key{})
				411	avalancheTest1(t, &Int64Key{})
Keith Randall	3d7e369	2014-08-07 12:33:20 -0700	[diff] [blame]	412	avalancheTest1(t, &EfaceKey{})
				413	avalancheTest1(t, &IfaceKey{})
Keith Randall	78338d8	2013-09-06 16:23:46 -0700	[diff] [blame]	414	}
				415	func avalancheTest1(t *testing.T, k Key) {
				416	const REP = 100000
				417	r := rand.New(rand.NewSource(1234))
				418	n := k.bits()
				419
				420	// grid[i][j] is a count of whether flipping
				421	// input bit i affects output bit j.
				422	grid := make([][hashSize]int, n)
				423
				424	for z := 0; z < REP; z++ {
				425	// pick a random key, hash it
				426	k.random(r)
				427	h := k.hash()
				428
				429	// flip each bit, hash & compare the results
				430	for i := 0; i < n; i++ {
				431	k.flipBit(i)
				432	d := h ^ k.hash()
				433	k.flipBit(i)
				434
				435	// record the effects of that bit flip
				436	g := &grid[i]
				437	for j := 0; j < hashSize; j++ {
				438	g[j] += int(d & 1)
				439	d >>= 1
				440	}
				441	}
				442	}
				443
				444	// Each entry in the grid should be about REP/2.
				445	// More precisely, we did N = k.bits() * hashSize experiments where
				446	// each is the sum of REP coin flips. We want to find bounds on the
				447	// sum of coin flips such that a truly random experiment would have
				448	// all sums inside those bounds with 99% probability.
				449	N := n * hashSize
				450	var c float64
Keith Randall	711d1ad	2014-05-09 15:50:57 -0700	[diff] [blame]	451	// find c such that Prob(mean-cstddev < x < mean+cstddev)^N > .9999
				452	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
Keith Randall	78338d8	2013-09-06 16:23:46 -0700	[diff] [blame]	453	}
Keith Randall	711d1ad	2014-05-09 15:50:57 -0700	[diff] [blame]	454	c *= 4.0 // allowed slack - we don't need to be perfectly random
Keith Randall	78338d8	2013-09-06 16:23:46 -0700	[diff] [blame]	455	mean := .5 * REP
				456	stddev := .5 * math.Sqrt(REP)
				457	low := int(mean - c*stddev)
				458	high := int(mean + c*stddev)
				459	for i := 0; i < n; i++ {
				460	for j := 0; j < hashSize; j++ {
				461	x := grid[i][j]
				462	if x < low \|\| x > high {
				463	t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
				464	}
				465	}
				466	}
				467	}
				468
				469	// All bit rotations of a set of distinct keys
				470	func TestSmhasherWindowed(t *testing.T) {
				471	windowed(t, &Int32Key{})
				472	windowed(t, &Int64Key{})
				473	windowed(t, &BytesKey{make([]byte, 128)})
				474	}
				475	func windowed(t *testing.T, k Key) {
				476	if testing.Short() {
				477	t.Skip("Skipping in short mode")
				478	}
				479	const BITS = 16
				480
				481	for r := 0; r < k.bits(); r++ {
				482	h := newHashSet()
				483	for i := 0; i < 1<<BITS; i++ {
				484	k.clear()
				485	for j := 0; j < BITS; j++ {
				486	if i>>uint(j)&1 != 0 {
				487	k.flipBit((j + r) % k.bits())
				488	}
				489	}
				490	h.add(k.hash())
				491	}
				492	h.check(t)
				493	}
				494	}
				495
				496	// All keys of the form prefix + [A-Za-z0-9]*N + suffix.
				497	func TestSmhasherText(t *testing.T) {
				498	if testing.Short() {
				499	t.Skip("Skipping in short mode")
				500	}
				501	text(t, "Foo", "Bar")
				502	text(t, "FooBar", "")
				503	text(t, "", "FooBar")
				504	}
				505	func text(t *testing.T, prefix, suffix string) {
				506	const N = 4
				507	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
				508	const L = len(S)
				509	b := make([]byte, len(prefix)+N+len(suffix))
				510	copy(b, prefix)
				511	copy(b[len(prefix)+N:], suffix)
				512	h := newHashSet()
				513	c := b[len(prefix):]
				514	for i := 0; i < L; i++ {
				515	c[0] = S[i]
				516	for j := 0; j < L; j++ {
				517	c[1] = S[j]
				518	for k := 0; k < L; k++ {
				519	c[2] = S[k]
				520	for x := 0; x < L; x++ {
				521	c[3] = S[x]
				522	h.addB(b)
				523	}
				524	}
				525	}
				526	}
				527	h.check(t)
				528	}
				529
				530	// Make sure different seed values generate different hashes.
				531	func TestSmhasherSeed(t *testing.T) {
				532	h := newHashSet()
				533	const N = 100000
				534	s := "hello"
				535	for i := 0; i < N; i++ {
				536	h.addS_seed(s, uintptr(i))
				537	}
				538	h.check(t)
				539	}
				540
				541	// size of the hash output (32 or 64 bits)
				542	const hashSize = 32 + int(^uintptr(0)>>63<<5)
				543
				544	func randBytes(r *rand.Rand, b []byte) {
				545	for i := range b {
				546	b[i] = byte(r.Uint32())
				547	}
				548	}
				549
				550	func benchmarkHash(b *testing.B, n int) {
				551	s := strings.Repeat("A", n)
				552
				553	for i := 0; i < b.N; i++ {
				554	StringHash(s, 0)
				555	}
				556	b.SetBytes(int64(n))
				557	}
				558
				559	func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) }
				560	func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) }
				561	func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) }
				562	func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) }
				563	func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }