Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 1 | // Copyright 2009 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 5 | /* A hash table. |
| 6 | Example, hashing nul-terminated char*s: |
| 7 | hash_hash_t str_hash (void *v) { |
| 8 | char *s; |
| 9 | hash_hash_t hash = 0; |
| 10 | for (s = *(char **)v; *s != 0; s++) { |
| 11 | hash = (hash ^ *s) * 2654435769U; |
| 12 | } |
| 13 | return (hash); |
| 14 | } |
| 15 | int str_eq (void *a, void *b) { |
| 16 | return (strcmp (*(char **)a, *(char **)b) == 0); |
| 17 | } |
| 18 | void str_del (void *arg, void *data) { |
| 19 | *(char **)arg = *(char **)data; |
| 20 | } |
| 21 | |
| 22 | struct hash *h = hash_new (sizeof (char *), &str_hash, &str_eq, &str_del, 3, 12, 15); |
| 23 | ... 3=> 2**3 entries initial size |
| 24 | ... 12=> 2**12 entries before sprouting sub-tables |
| 25 | ... 15=> number of adjacent probes to attempt before growing |
| 26 | |
| 27 | Example lookup: |
| 28 | char *key = "foobar"; |
| 29 | char **result_ptr; |
| 30 | if (hash_lookup (h, &key, (void **) &result_ptr)) { |
| 31 | printf ("found in table: %s\n", *result_ptr); |
| 32 | } else { |
| 33 | printf ("not found in table\n"); |
| 34 | } |
| 35 | |
| 36 | Example insertion: |
| 37 | char *key = strdup ("foobar"); |
| 38 | char **result_ptr; |
| 39 | if (hash_lookup (h, &key, (void **) &result_ptr)) { |
| 40 | printf ("found in table: %s\n", *result_ptr); |
| 41 | printf ("to overwrite, do *result_ptr = key\n"); |
| 42 | } else { |
| 43 | printf ("not found in table; inserted as %s\n", *result_ptr); |
| 44 | assert (*result_ptr == key); |
| 45 | } |
| 46 | |
| 47 | Example deletion: |
| 48 | char *key = "foobar"; |
| 49 | char *result; |
| 50 | if (hash_remove (h, &key, &result)) { |
| 51 | printf ("key found and deleted from table\n"); |
| 52 | printf ("called str_del (&result, data) to copy data to result: %s\n", result); |
| 53 | } else { |
| 54 | printf ("not found in table\n"); |
| 55 | } |
Russ Cox | a52fb81 | 2009-06-04 21:09:06 -0700 | [diff] [blame] | 56 | |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 57 | Example iteration over the elements of *h: |
| 58 | char **data; |
| 59 | struct hash_iter it; |
| 60 | hash_iter_init (h, &it); |
| 61 | for (data = hash_next (&it); data != 0; data = hash_next (&it)) { |
| 62 | printf ("%s\n", *data); |
| 63 | } |
| 64 | */ |
| 65 | |
Russ Cox | 68b4255 | 2010-11-04 14:00:19 -0400 | [diff] [blame] | 66 | #define malloc runtime·mal |
Russ Cox | 68b4255 | 2010-11-04 14:00:19 -0400 | [diff] [blame] | 67 | #define memset(a,b,c) runtime·memclr((byte*)(a), (uint32)(c)) |
| 68 | #define memcpy(a,b,c) runtime·mcpy((byte*)(a),(byte*)(b),(uint32)(c)) |
| 69 | #define assert(a) if(!(a)) runtime·throw("assert") |
| 70 | #define free(x) runtime·free(x) |
| 71 | #define memmove(a,b,c) runtime·memmove(a, b, c) |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 72 | |
Luuk van Dijk | 7400be8 | 2011-01-31 12:27:28 +0100 | [diff] [blame] | 73 | struct Hmap; /* opaque */ |
Ken Thompson | 26b357c | 2008-12-05 18:24:05 -0800 | [diff] [blame] | 74 | struct hash_subtable; /* opaque */ |
| 75 | struct hash_entry; /* opaque */ |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 76 | |
Russ Cox | a52fb81 | 2009-06-04 21:09:06 -0700 | [diff] [blame] | 77 | typedef uintptr uintptr_t; |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 78 | typedef uintptr_t hash_hash_t; |
| 79 | |
| 80 | struct hash_iter { |
Ken Thompson | 26b357c | 2008-12-05 18:24:05 -0800 | [diff] [blame] | 81 | uint8* data; /* returned from next */ |
| 82 | int32 elemsize; /* size of elements in table */ |
| 83 | int32 changes; /* number of changes observed last time */ |
| 84 | int32 i; /* stack pointer in subtable_state */ |
| 85 | hash_hash_t last_hash; /* last hash value returned */ |
Luuk van Dijk | 7400be8 | 2011-01-31 12:27:28 +0100 | [diff] [blame] | 86 | struct Hmap *h; /* the hash table */ |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 87 | struct hash_iter_sub { |
Ken Thompson | 26b357c | 2008-12-05 18:24:05 -0800 | [diff] [blame] | 88 | struct hash_entry *e; /* pointer into subtable */ |
| 89 | struct hash_entry *start; /* start of subtable */ |
| 90 | struct hash_entry *end; /* end of subtable */ |
| 91 | } subtable_state[4]; /* Should be large enough unless the hashing is |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 92 | so bad that many distinct data values hash |
| 93 | to the same hash value. */ |
| 94 | }; |
| 95 | |
| 96 | /* Return a hashtable h 2**init_power empty entries, each with |
Russ Cox | a52fb81 | 2009-06-04 21:09:06 -0700 | [diff] [blame] | 97 | "datasize" data bytes. |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 98 | (*data_hash)(a) should return the hash value of data element *a. |
| 99 | (*data_eq)(a,b) should return whether the data at "a" and the data at "b" |
| 100 | are equal. |
| 101 | (*data_del)(arg, a) will be invoked when data element *a is about to be removed |
| 102 | from the table. "arg" is the argument passed to "hash_remove()". |
| 103 | |
| 104 | Growing is accomplished by resizing if the current tables size is less than |
| 105 | a threshold, and by adding subtables otherwise. hint should be set |
| 106 | the expected maximum size of the table. |
| 107 | "datasize" should be in [sizeof (void*), ..., 255]. If you need a |
| 108 | bigger "datasize", store a pointer to another piece of memory. */ |
| 109 | |
| 110 | //struct hash *hash_new (int32 datasize, |
| 111 | // hash_hash_t (*data_hash) (void *), |
| 112 | // int32 (*data_eq) (void *, void *), |
| 113 | // void (*data_del) (void *, void *), |
| 114 | // int64 hint); |
| 115 | |
| 116 | /* Lookup *data in *h. If the data is found, return 1 and place a pointer to |
| 117 | the found element in *pres. Otherwise return 0 and place 0 in *pres. */ |
Russ Cox | 68b4255 | 2010-11-04 14:00:19 -0400 | [diff] [blame] | 118 | // int32 hash_lookup (struct hash *h, void *data, void **pres); |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 119 | |
| 120 | /* Lookup *data in *h. If the data is found, execute (*data_del) (arg, p) |
| 121 | where p points to the data in the table, then remove it from *h and return |
| 122 | 1. Otherwise return 0. */ |
Russ Cox | 68b4255 | 2010-11-04 14:00:19 -0400 | [diff] [blame] | 123 | // int32 hash_remove (struct hash *h, void *data, void *arg); |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 124 | |
| 125 | /* Lookup *data in *h. If the data is found, return 1, and place a pointer |
| 126 | to the found element in *pres. Otherwise, return 0, allocate a region |
| 127 | for the data to be inserted, and place a pointer to the inserted element |
| 128 | in *pres; it is the caller's responsibility to copy the data to be |
| 129 | inserted to the pointer returned in *pres in this case. |
| 130 | |
| 131 | If using garbage collection, it is the caller's responsibility to |
| 132 | add references for **pres if HASH_ADDED is returned. */ |
Russ Cox | 68b4255 | 2010-11-04 14:00:19 -0400 | [diff] [blame] | 133 | // int32 hash_insert (struct hash *h, void *data, void **pres); |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 134 | |
| 135 | /* Return the number of elements in the table. */ |
Russ Cox | 68b4255 | 2010-11-04 14:00:19 -0400 | [diff] [blame] | 136 | // uint32 hash_count (struct hash *h); |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 137 | |
| 138 | /* The following call is useful only if not using garbage collection on the |
| 139 | table. |
| 140 | Remove all sub-tables associated with *h. |
| 141 | This undoes the effects of hash_init(). |
| 142 | If other memory pointed to by user data must be freed, the caller is |
| 143 | responsible for doiing do by iterating over *h first; see |
| 144 | hash_iter_init()/hash_next(). */ |
Russ Cox | 68b4255 | 2010-11-04 14:00:19 -0400 | [diff] [blame] | 145 | // void hash_destroy (struct hash *h); |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 146 | |
| 147 | /*----- iteration -----*/ |
| 148 | |
| 149 | /* Initialize *it from *h. */ |
Russ Cox | 68b4255 | 2010-11-04 14:00:19 -0400 | [diff] [blame] | 150 | // void hash_iter_init (struct hash *h, struct hash_iter *it); |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 151 | |
| 152 | /* Return the next used entry in the table which which *it was initialized. */ |
Russ Cox | 68b4255 | 2010-11-04 14:00:19 -0400 | [diff] [blame] | 153 | // void *hash_next (struct hash_iter *it); |
Ken Thompson | bc0b4f0 | 2008-11-13 10:35:44 -0800 | [diff] [blame] | 154 | |
| 155 | /*---- test interface ----*/ |
| 156 | /* Call (*data_visit) (arg, level, data) for every data entry in the table, |
| 157 | whether used or not. "level" is the subtable level, 0 means first level. */ |
| 158 | /* TESTING ONLY: DO NOT USE THIS ROUTINE IN NORMAL CODE */ |
Russ Cox | 68b4255 | 2010-11-04 14:00:19 -0400 | [diff] [blame] | 159 | // void hash_visit (struct hash *h, void (*data_visit) (void *arg, int32 level, void *data), void *arg); |