Line data Source code
1 : /***************************************************************************//**
2 :
3 : @file hashtable.c
4 :
5 : @author Stephen Brennan
6 :
7 : @date Created Thursday, 7 November 2013
8 :
9 : @brief Implementation of "libstephen/ht.h".
10 :
11 : @copyright Copyright (c) 2013-2016, Stephen Brennan. Released under the
12 : Revised BSD License. See the LICENSE.txt file for details.
13 :
14 : *******************************************************************************/
15 :
16 : #include <string.h>
17 : #include <stdio.h>
18 :
19 : #include "libstephen/ht.h"
20 : #include "libstephen/re.h" // nelem()
21 :
22 : /*******************************************************************************
23 :
24 : Private Functions
25 :
26 : *******************************************************************************/
27 :
28 : unsigned int ht_primes[] = {
29 : 31, // 2^5
30 : 61,
31 : 127,
32 : 257,
33 : 509,
34 : 1021,
35 : 2053,
36 : 4093,
37 : 8191,
38 : 16381,
39 : 32771,
40 : 65537,
41 : 131071,
42 : 262147,
43 : 524287,
44 : 1048573,
45 : 2097143,
46 : 4194301,
47 : 8388617,
48 : 16777213,
49 : 33554467,
50 : 67108859,
51 : 134217757,
52 : 268435459,
53 : 536870909,
54 : 1073741827,
55 : 2147483647,
56 : 4294967291 // 2^32
57 : };
58 :
59 4 : int binary_search(unsigned int *array, int len, unsigned int value)
60 : {
61 4 : int lo = 0, hi = len, mid;
62 28 : while (lo < hi) {
63 20 : mid = (lo + hi) / 2;
64 20 : if (value <= array[mid]) {
65 20 : hi = mid;
66 : } else {
67 0 : lo = mid + 1;
68 : }
69 : }
70 4 : return lo;
71 : }
72 :
73 : /**
74 : @brief Returns the next hashtable size.
75 :
76 : @param current The current size of the hash table.
77 : @returns The next size in the sequence for hash tables.
78 : */
79 4 : int ht_next_size(int current)
80 : {
81 4 : int curridx = binary_search(ht_primes, nelem(ht_primes), current);
82 4 : return ht_primes[curridx + 1];
83 : }
84 :
85 : /**
86 : @brief Find the proper index for insertion into the table.
87 : @param obj Hash table object.
88 : @param key Key we're inserting.
89 : */
90 89 : unsigned int ht_find_insert(const smb_ht *obj, DATA key)
91 : {
92 89 : unsigned int index = obj->hash(key) % obj->allocated;
93 89 : unsigned int j = 1;
94 :
95 : // Continue searching until we either find a non-full slot, or we find the key
96 : // we're trying to insert.
97 : // until (cell.mark != full || cell.key == key)
98 : // while (cell.mark == full && cell.key != key)
99 798 : while (obj->table[index].mark == HT_FULL &&
100 310 : obj->equal(key, obj->table[index].key) != 0) {
101 : // This is quadratic probing, but I'm avoiding squaring numbers:
102 : // j: 1, 3, 5, 7, 9, 11, ..
103 : // index: 0, 1, 4, 9, 16, 25, 36
104 310 : index = (index + j) % obj->allocated;
105 310 : j += 2;
106 : }
107 :
108 89 : return index;
109 : }
110 :
111 : /**
112 : @brief Find the proper index for retrieval from the table.
113 : @param obj Hash table object.
114 : @param key Key we're looking up.
115 : */
116 158 : unsigned int ht_find_retrieve(const smb_ht *obj, DATA key)
117 : {
118 158 : unsigned int index = obj->hash(key) % obj->allocated;
119 158 : unsigned int j = 1;
120 :
121 : // Continue searching until we either find an empty slot, or we find the key
122 : // we're trying to insert.
123 : // until (cell.mark == empty || cell.key == key)
124 : // while (cell.mark != empty && cell.key != key)
125 1384 : while (obj->table[index].mark != HT_EMPTY &&
126 568 : obj->equal(key, obj->table[index].key) != 0) {
127 : // This is quadratic probing, but I'm avoiding squaring numbers:
128 : // j: 1, 3, 5, 7, 9, 11, ..
129 : // index: 0, 1, 4, 9, 16, 25, 36
130 500 : index = (index + j) % obj->allocated;
131 500 : j += 2;
132 : }
133 158 : return index;
134 : }
135 :
136 : /**
137 : @brief Expand the hash table, adding increment to the capacity of the table.
138 :
139 : @param table The table to expand.
140 : */
141 2 : void ht_resize(smb_ht *table)
142 : {
143 : smb_ht_bckt *old_table;
144 : unsigned int index, old_allocated;
145 :
146 : // Step one: allocate new space for the table
147 2 : old_table = table->table;
148 2 : old_allocated = table->allocated;
149 2 : table->length = 0;
150 2 : table->allocated = ht_next_size(old_allocated);
151 2 : table->table = smb_new(smb_ht_bckt, table->allocated);
152 :
153 : // Zero out the new block too.
154 2 : memset((void*)table->table, 0, table->allocated * sizeof(smb_ht_bckt));
155 :
156 : // Step two, add the old items to the new table.
157 64 : for (index = 0; index < old_allocated; index++) {
158 62 : if (old_table[index].mark == HT_FULL) {
159 32 : ht_insert(table, old_table[index].key, old_table[index].value);
160 : }
161 : }
162 :
163 : // Step three: free old data.
164 2 : smb_free(old_table);
165 2 : }
166 :
167 : /**
168 : @brief Return the load factor of a hash table.
169 :
170 : @param table The table to find the load factor of.
171 : @returns The load factor of the hash table.
172 : */
173 92 : double ht_load_factor(smb_ht *table)
174 : {
175 92 : return ((double) table->length) / ((double) table->allocated);
176 : }
177 :
178 : /*******************************************************************************
179 :
180 : Public Interface Functions
181 :
182 : *******************************************************************************/
183 :
184 7 : void ht_init(smb_ht *table, HASH_FUNCTION hash_func, DATA_COMPARE equal)
185 : {
186 : // Initialize values
187 7 : table->length = 0;
188 7 : table->allocated = HASH_TABLE_INITIAL_SIZE;
189 7 : table->hash = hash_func;
190 7 : table->equal = equal;
191 :
192 : // Create the bucket list
193 7 : table->table = smb_new(smb_ht_bckt, HASH_TABLE_INITIAL_SIZE);
194 :
195 : // Zero out the entries in the table so we don't get segmentation faults.
196 7 : memset((void*)table->table, 0, HASH_TABLE_INITIAL_SIZE * sizeof(smb_ht_bckt));
197 7 : }
198 :
199 7 : smb_ht *ht_create(HASH_FUNCTION hash_func, DATA_COMPARE equal)
200 : {
201 : // Allocate and create the table.
202 : smb_ht *table;
203 7 : table = smb_new(smb_ht, 1);
204 7 : ht_init(table, hash_func, equal);
205 7 : return table;
206 : }
207 :
208 7 : void ht_destroy_act(smb_ht *table, DATA_ACTION deleter)
209 : {
210 : unsigned int i;
211 :
212 : // Do the action on all the items in the table, if provided.
213 7 : if (deleter) {
214 188 : for (i = 0; i < table->allocated; i++) {
215 184 : if (table->table[i].mark == HT_FULL) {
216 39 : deleter(table->table[i].value);
217 : }
218 : }
219 : }
220 :
221 : // Delete the table.
222 7 : smb_free(table->table);
223 7 : }
224 :
225 0 : void ht_destroy(smb_ht *table)
226 : {
227 0 : ht_destroy_act(table, NULL);
228 0 : }
229 :
230 7 : void ht_delete_act(smb_ht *table, DATA_ACTION deleter)
231 : {
232 7 : if (!table) {
233 7 : return;
234 : }
235 :
236 7 : ht_destroy_act(table, deleter);
237 7 : smb_free(table);
238 : }
239 :
240 3 : void ht_delete(smb_ht *table)
241 : {
242 3 : ht_delete_act(table, NULL);
243 3 : }
244 :
245 92 : void ht_insert(smb_ht *table, DATA key, DATA value)
246 : {
247 : unsigned int index;
248 92 : if (ht_load_factor(table) >= HASH_TABLE_MAX_LOAD_FACTOR) {
249 2 : ht_resize(table);
250 : }
251 :
252 : // First, probe for the key as if we're trying to return it. If we find it,
253 : // we update the existing key.
254 92 : index = ht_find_retrieve(table, key);
255 92 : if (table->table[index].mark == HT_FULL) {
256 3 : table->table[index].value = value;
257 95 : return;
258 : }
259 :
260 : // If we don't find the key, then we find the first open slot or gravestone.
261 89 : index = ht_find_insert(table, key);
262 89 : table->table[index].key = key;
263 89 : table->table[index].value = value;
264 89 : table->table[index].mark = HT_FULL;
265 89 : table->length++;
266 : }
267 :
268 8 : void ht_remove_act(smb_ht *table, DATA key, DATA_ACTION deleter,
269 : smb_status *status)
270 : {
271 8 : *status = SMB_SUCCESS;
272 8 : unsigned int index = ht_find_retrieve(table, key);
273 :
274 : // If the returned slot isn't full, that means we couldn't find it.
275 8 : if (table->table[index].mark != HT_FULL) {
276 0 : *status = SMB_NOT_FOUND_ERROR;
277 8 : return;
278 : }
279 :
280 : // Perform the action if there is one.
281 8 : if (deleter) {
282 8 : deleter(table->table[index].value);
283 : }
284 :
285 : // Mark the slot with a "grave stone", indicating it is deleted.
286 8 : table->table[index].mark = HT_GRAVE;
287 8 : table->length--;
288 : }
289 :
290 0 : void ht_remove(smb_ht *table, DATA key, smb_status *status)
291 : {
292 0 : ht_remove_act(table, key, NULL, status);
293 0 : }
294 :
295 58 : DATA ht_get(smb_ht const *table, DATA key, smb_status *status)
296 : {
297 58 : *status = SMB_SUCCESS;
298 58 : unsigned int index = ht_find_retrieve(table, key);
299 :
300 : // If the slot is not marked full, we didn't find the key.
301 58 : if (table->table[index].mark != HT_FULL) {
302 1 : *status = SMB_NOT_FOUND_ERROR;
303 1 : return PTR(NULL);
304 : }
305 :
306 : // Otherwise, return the key.
307 57 : return table->table[index].value;
308 : }
309 :
310 5 : bool ht_contains(smb_ht const *table, DATA key)
311 : {
312 5 : smb_status status = SMB_SUCCESS;
313 5 : ht_get(table, key, &status);
314 5 : return status == SMB_SUCCESS;
315 : }
316 :
317 5 : DATA ht_iter_next(smb_iter *iter, smb_status *status)
318 : {
319 5 : *status = SMB_SUCCESS;
320 5 : long long int i = iter->state.data_llint + 1;
321 5 : const smb_ht *ht = iter->ds;
322 :
323 : // Move up until we either run off the table, or find a bucket that contains
324 : // something.
325 33 : while (i < ht->allocated && ht->table[i].mark != HT_FULL) {
326 23 : i++;
327 : }
328 :
329 : // save current index back to data structure
330 5 : iter->state.data_llint = i;
331 :
332 : // If we hit the end of the table, stop iterating.
333 5 : if (i >= ht->allocated) {
334 0 : *status = SMB_STOP_ITERATION;
335 0 : return LLINT(0);
336 : } else {
337 : // Otherwise, return the key we found.
338 5 : iter->index++; // count the number of items we've found so far
339 5 : return ht->table[i].key;
340 : }
341 : }
342 :
343 6 : bool ht_iter_has_next(smb_iter *iter)
344 : {
345 6 : const smb_ht *ht = iter->ds;
346 6 : return iter->index < (int)ht->length;
347 : }
348 :
349 0 : void ht_iter_destroy(smb_iter *iter)
350 : {
351 : (void)iter; //unused
352 0 : }
353 :
354 0 : void ht_iter_delete(smb_iter *iter)
355 : {
356 0 : ht_iter_destroy(iter);
357 0 : free(iter);
358 0 : }
359 :
360 1 : smb_iter ht_get_iter(const smb_ht *ht)
361 : {
362 1 : smb_iter iter = {
363 : // Data:
364 : .ds = ht, // A reference to the data structure.
365 : .state = LLINT(-1), // This tracks the actual index into the table.
366 : .index = 0, // This tracks how many keys have been returned.
367 :
368 : // Functions
369 : .next = &ht_iter_next,
370 : .has_next = &ht_iter_has_next,
371 : .destroy = &ht_iter_destroy,
372 : .delete = &ht_iter_delete
373 : };
374 1 : return iter;
375 : }
376 :
377 72 : unsigned int ht_string_hash(DATA data)
378 : {
379 72 : char *theString = (char *)data.data_ptr;
380 72 : unsigned int hash = 0;
381 :
382 820 : while (theString && *theString != '\0' ) {
383 676 : hash = (hash << 5) - hash + *theString;
384 676 : theString++;
385 : }
386 :
387 : // printf("Hash of \"%s\": %u\n", (char*)data.data_ptr, hash);
388 72 : return hash;
389 : }
390 :
391 0 : void ht_print(smb_ht const *table, int full_mode)
392 : {
393 : unsigned int i;
394 0 : char *MARKS[] = {"EMPTY", " FULL", "GRAVE"};
395 :
396 0 : for (i = 0; i < table->allocated; i++) {
397 0 : if (full_mode || table->table[i].mark == HT_FULL) {
398 0 : printf("[%04d|%s]: key=0x%llx, value=0x%llx\n", i,
399 0 : MARKS[table->table[i].mark], table->table[i].key.data_llint,
400 0 : table->table[i].value.data_llint);
401 : }
402 : }
403 0 : }
|