LCOV - code coverage report
Current view: top level - src - hashtable.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 122 146 83.6 %
Date: 2016-12-21 02:12:01 Functions: 19 24 79.2 %

          Line data    Source code
       1             : /***************************************************************************//**
       2             : 
       3             :   @file         hashtable.c
       4             : 
       5             :   @author       Stephen Brennan
       6             : 
       7             :   @date         Created Thursday, 7 November 2013
       8             : 
       9             :   @brief        Implementation of "libstephen/ht.h".
      10             : 
      11             :   @copyright    Copyright (c) 2013-2016, Stephen Brennan.  Released under the
      12             :                 Revised BSD License.  See the LICENSE.txt file for details.
      13             : 
      14             : *******************************************************************************/
      15             : 
      16             : #include <string.h>
      17             : #include <stdio.h>
      18             : 
      19             : #include "libstephen/ht.h"
      20             : #include "libstephen/re.h" // nelem()
      21             : 
      22             : /*******************************************************************************
      23             : 
      24             :                                Private Functions
      25             : 
      26             : *******************************************************************************/
      27             : 
      28             : unsigned int ht_primes[] = {
      29             :   31, // 2^5
      30             :   61,
      31             :   127,
      32             :   257,
      33             :   509,
      34             :   1021,
      35             :   2053,
      36             :   4093,
      37             :   8191,
      38             :   16381,
      39             :   32771,
      40             :   65537,
      41             :   131071,
      42             :   262147,
      43             :   524287,
      44             :   1048573,
      45             :   2097143,
      46             :   4194301,
      47             :   8388617,
      48             :   16777213,
      49             :   33554467,
      50             :   67108859,
      51             :   134217757,
      52             :   268435459,
      53             :   536870909,
      54             :   1073741827,
      55             :   2147483647,
      56             :   4294967291 // 2^32
      57             : };
      58             : 
      59           4 : int binary_search(unsigned int *array, int len, unsigned int value)
      60             : {
      61           4 :   int lo = 0, hi = len, mid;
      62          28 :   while (lo < hi) {
      63          20 :     mid = (lo + hi) / 2;
      64          20 :     if (value <= array[mid]) {
      65          20 :       hi = mid;
      66             :     } else {
      67           0 :       lo = mid + 1;
      68             :     }
      69             :   }
      70           4 :   return lo;
      71             : }
      72             : 
      73             : /**
      74             :    @brief Returns the next hashtable size.
      75             : 
      76             :    @param current The current size of the hash table.
      77             :    @returns The next size in the sequence for hash tables.
      78             :  */
      79           4 : int ht_next_size(int current)
      80             : {
      81           4 :   int curridx = binary_search(ht_primes, nelem(ht_primes), current);
      82           4 :   return ht_primes[curridx + 1];
      83             : }
      84             : 
      85             : /**
      86             :    @brief Find the proper index for insertion into the table.
      87             :    @param obj Hash table object.
      88             :    @param key Key we're inserting.
      89             :  */
      90          89 : unsigned int ht_find_insert(const smb_ht *obj, DATA key)
      91             : {
      92          89 :   unsigned int index = obj->hash(key) % obj->allocated;
      93          89 :   unsigned int j = 1;
      94             : 
      95             :   // Continue searching until we either find a non-full slot, or we find the key
      96             :   // we're trying to insert.
      97             :   // until (cell.mark != full || cell.key == key)
      98             :   // while (cell.mark == full && cell.key != key)
      99         798 :   while (obj->table[index].mark == HT_FULL &&
     100         310 :          obj->equal(key, obj->table[index].key) != 0) {
     101             :     // This is quadratic probing, but I'm avoiding squaring numbers:
     102             :     // j:     1, 3, 5, 7,  9, 11, ..
     103             :     // index: 0, 1, 4, 9, 16, 25, 36
     104         310 :     index = (index + j) % obj->allocated;
     105         310 :     j += 2;
     106             :   }
     107             : 
     108          89 :   return index;
     109             : }
     110             : 
     111             : /**
     112             :    @brief Find the proper index for retrieval from the table.
     113             :    @param obj Hash table object.
     114             :    @param key Key we're looking up.
     115             :  */
     116         158 : unsigned int ht_find_retrieve(const smb_ht *obj, DATA key)
     117             : {
     118         158 :   unsigned int index = obj->hash(key) % obj->allocated;
     119         158 :   unsigned int j = 1;
     120             : 
     121             :   // Continue searching until we either find an empty slot, or we find the key
     122             :   // we're trying to insert.
     123             :   // until (cell.mark == empty || cell.key == key)
     124             :   // while (cell.mark != empty && cell.key != key)
     125        1384 :   while (obj->table[index].mark != HT_EMPTY &&
     126         568 :          obj->equal(key, obj->table[index].key) != 0) {
     127             :     // This is quadratic probing, but I'm avoiding squaring numbers:
     128             :     // j:     1, 3, 5, 7,  9, 11, ..
     129             :     // index: 0, 1, 4, 9, 16, 25, 36
     130         500 :     index = (index + j) % obj->allocated;
     131         500 :     j += 2;
     132             :   }
     133         158 :   return index;
     134             : }
     135             : 
     136             : /**
     137             :    @brief Expand the hash table, adding increment to the capacity of the table.
     138             : 
     139             :    @param table The table to expand.
     140             :  */
     141           2 : void ht_resize(smb_ht *table)
     142             : {
     143             :   smb_ht_bckt *old_table;
     144             :   unsigned int index, old_allocated;
     145             : 
     146             :   // Step one: allocate new space for the table
     147           2 :   old_table = table->table;
     148           2 :   old_allocated = table->allocated;
     149           2 :   table->length = 0;
     150           2 :   table->allocated = ht_next_size(old_allocated);
     151           2 :   table->table = smb_new(smb_ht_bckt, table->allocated);
     152             : 
     153             :   // Zero out the new block too.
     154           2 :   memset((void*)table->table, 0, table->allocated * sizeof(smb_ht_bckt));
     155             : 
     156             :   // Step two, add the old items to the new table.
     157          64 :   for (index = 0; index < old_allocated; index++) {
     158          62 :     if (old_table[index].mark == HT_FULL) {
     159          32 :       ht_insert(table, old_table[index].key, old_table[index].value);
     160             :     }
     161             :   }
     162             : 
     163             :   // Step three: free old data.
     164           2 :   smb_free(old_table);
     165           2 : }
     166             : 
     167             : /**
     168             :    @brief Return the load factor of a hash table.
     169             : 
     170             :    @param table The table to find the load factor of.
     171             :    @returns The load factor of the hash table.
     172             :  */
     173          92 : double ht_load_factor(smb_ht *table)
     174             : {
     175          92 :   return ((double) table->length) / ((double) table->allocated);
     176             : }
     177             : 
     178             : /*******************************************************************************
     179             : 
     180             :                            Public Interface Functions
     181             : 
     182             : *******************************************************************************/
     183             : 
     184           7 : void ht_init(smb_ht *table, HASH_FUNCTION hash_func, DATA_COMPARE equal)
     185             : {
     186             :   // Initialize values
     187           7 :   table->length = 0;
     188           7 :   table->allocated = HASH_TABLE_INITIAL_SIZE;
     189           7 :   table->hash = hash_func;
     190           7 :   table->equal = equal;
     191             : 
     192             :   // Create the bucket list
     193           7 :   table->table = smb_new(smb_ht_bckt, HASH_TABLE_INITIAL_SIZE);
     194             : 
     195             :   // Zero out the entries in the table so we don't get segmentation faults.
     196           7 :   memset((void*)table->table, 0, HASH_TABLE_INITIAL_SIZE * sizeof(smb_ht_bckt));
     197           7 : }
     198             : 
     199           7 : smb_ht *ht_create(HASH_FUNCTION hash_func, DATA_COMPARE equal)
     200             : {
     201             :   // Allocate and create the table.
     202             :   smb_ht *table;
     203           7 :   table = smb_new(smb_ht, 1);
     204           7 :   ht_init(table, hash_func, equal);
     205           7 :   return table;
     206             : }
     207             : 
     208           7 : void ht_destroy_act(smb_ht *table, DATA_ACTION deleter)
     209             : {
     210             :   unsigned int i;
     211             : 
     212             :   // Do the action on all the items in the table, if provided.
     213           7 :   if (deleter) {
     214         188 :     for (i = 0; i < table->allocated; i++) {
     215         184 :       if (table->table[i].mark == HT_FULL) {
     216          39 :         deleter(table->table[i].value);
     217             :       }
     218             :     }
     219             :   }
     220             : 
     221             :   // Delete the table.
     222           7 :   smb_free(table->table);
     223           7 : }
     224             : 
     225           0 : void ht_destroy(smb_ht *table)
     226             : {
     227           0 :   ht_destroy_act(table, NULL);
     228           0 : }
     229             : 
     230           7 : void ht_delete_act(smb_ht *table, DATA_ACTION deleter)
     231             : {
     232           7 :   if (!table) {
     233           7 :     return;
     234             :   }
     235             : 
     236           7 :   ht_destroy_act(table, deleter);
     237           7 :   smb_free(table);
     238             : }
     239             : 
     240           3 : void ht_delete(smb_ht *table)
     241             : {
     242           3 :   ht_delete_act(table, NULL);
     243           3 : }
     244             : 
     245          92 : void ht_insert(smb_ht *table, DATA key, DATA value)
     246             : {
     247             :   unsigned int index;
     248          92 :   if (ht_load_factor(table) >= HASH_TABLE_MAX_LOAD_FACTOR) {
     249           2 :     ht_resize(table);
     250             :   }
     251             : 
     252             :   // First, probe for the key as if we're trying to return it.  If we find it,
     253             :   // we update the existing key.
     254          92 :   index = ht_find_retrieve(table, key);
     255          92 :   if (table->table[index].mark == HT_FULL) {
     256           3 :     table->table[index].value = value;
     257          95 :     return;
     258             :   }
     259             : 
     260             :   // If we don't find the key, then we find the first open slot or gravestone.
     261          89 :   index = ht_find_insert(table, key);
     262          89 :   table->table[index].key = key;
     263          89 :   table->table[index].value = value;
     264          89 :   table->table[index].mark = HT_FULL;
     265          89 :   table->length++;
     266             : }
     267             : 
     268           8 : void ht_remove_act(smb_ht *table, DATA key, DATA_ACTION deleter,
     269             :                    smb_status *status)
     270             : {
     271           8 :   *status = SMB_SUCCESS;
     272           8 :   unsigned int index = ht_find_retrieve(table, key);
     273             : 
     274             :   // If the returned slot isn't full, that means we couldn't find it.
     275           8 :   if (table->table[index].mark != HT_FULL) {
     276           0 :     *status = SMB_NOT_FOUND_ERROR;
     277           8 :     return;
     278             :   }
     279             : 
     280             :   // Perform the action if there is one.
     281           8 :   if (deleter) {
     282           8 :     deleter(table->table[index].value);
     283             :   }
     284             : 
     285             :   // Mark the slot with a "grave stone", indicating it is deleted.
     286           8 :   table->table[index].mark = HT_GRAVE;
     287           8 :   table->length--;
     288             : }
     289             : 
     290           0 : void ht_remove(smb_ht *table, DATA key, smb_status *status)
     291             : {
     292           0 :   ht_remove_act(table, key, NULL, status);
     293           0 : }
     294             : 
     295          58 : DATA ht_get(smb_ht const *table, DATA key, smb_status *status)
     296             : {
     297          58 :   *status = SMB_SUCCESS;
     298          58 :   unsigned int index = ht_find_retrieve(table, key);
     299             : 
     300             :   // If the slot is not marked full, we didn't find the key.
     301          58 :   if (table->table[index].mark != HT_FULL) {
     302           1 :     *status = SMB_NOT_FOUND_ERROR;
     303           1 :     return PTR(NULL);
     304             :   }
     305             : 
     306             :   // Otherwise, return the key.
     307          57 :   return table->table[index].value;
     308             : }
     309             : 
     310           5 : bool ht_contains(smb_ht const *table, DATA key)
     311             : {
     312           5 :   smb_status status = SMB_SUCCESS;
     313           5 :   ht_get(table, key, &status);
     314           5 :   return status == SMB_SUCCESS;
     315             : }
     316             : 
     317           5 : DATA ht_iter_next(smb_iter *iter, smb_status *status)
     318             : {
     319           5 :   *status = SMB_SUCCESS;
     320           5 :   long long int i = iter->state.data_llint + 1;
     321           5 :   const smb_ht *ht = iter->ds;
     322             : 
     323             :   // Move up until we either run off the table, or find a bucket that contains
     324             :   // something.
     325          33 :   while (i < ht->allocated && ht->table[i].mark != HT_FULL) {
     326          23 :     i++;
     327             :   }
     328             : 
     329             :   // save current index back to data structure
     330           5 :   iter->state.data_llint = i;
     331             : 
     332             :   // If we hit the end of the table, stop iterating.
     333           5 :   if (i >= ht->allocated) {
     334           0 :     *status = SMB_STOP_ITERATION;
     335           0 :     return LLINT(0);
     336             :   } else {
     337             :     // Otherwise, return the key we found.
     338           5 :     iter->index++; // count the number of items we've found so far
     339           5 :     return ht->table[i].key;
     340             :   }
     341             : }
     342             : 
     343           6 : bool ht_iter_has_next(smb_iter *iter)
     344             : {
     345           6 :   const smb_ht *ht = iter->ds;
     346           6 :   return iter->index < (int)ht->length;
     347             : }
     348             : 
     349           0 : void ht_iter_destroy(smb_iter *iter)
     350             : {
     351             :   (void)iter; //unused
     352           0 : }
     353             : 
     354           0 : void ht_iter_delete(smb_iter *iter)
     355             : {
     356           0 :   ht_iter_destroy(iter);
     357           0 :   free(iter);
     358           0 : }
     359             : 
     360           1 : smb_iter ht_get_iter(const smb_ht *ht)
     361             : {
     362           1 :   smb_iter iter = {
     363             :     // Data:
     364             :     .ds = ht,           // A reference to the data structure.
     365             :     .state = LLINT(-1), // This tracks the actual index into the table.
     366             :     .index = 0,         // This tracks how many keys have been returned.
     367             : 
     368             :     // Functions
     369             :     .next = &ht_iter_next,
     370             :     .has_next = &ht_iter_has_next,
     371             :     .destroy = &ht_iter_destroy,
     372             :     .delete = &ht_iter_delete
     373             :   };
     374           1 :   return iter;
     375             : }
     376             : 
     377          72 : unsigned int ht_string_hash(DATA data)
     378             : {
     379          72 :   char *theString = (char *)data.data_ptr;
     380          72 :   unsigned int hash = 0;
     381             : 
     382         820 :   while (theString && *theString != '\0' ) {
     383         676 :     hash = (hash << 5) - hash + *theString;
     384         676 :     theString++;
     385             :   }
     386             : 
     387             :   // printf("Hash of \"%s\": %u\n", (char*)data.data_ptr, hash);
     388          72 :   return hash;
     389             : }
     390             : 
     391           0 : void ht_print(smb_ht const *table, int full_mode)
     392             : {
     393             :   unsigned int i;
     394           0 :   char *MARKS[] = {"EMPTY", " FULL", "GRAVE"};
     395             : 
     396           0 :   for (i = 0; i < table->allocated; i++) {
     397           0 :     if (full_mode || table->table[i].mark == HT_FULL) {
     398           0 :       printf("[%04d|%s]: key=0x%llx, value=0x%llx\n", i,
     399           0 :              MARKS[table->table[i].mark], table->table[i].key.data_llint,
     400           0 :              table->table[i].value.data_llint);
     401             :     }
     402             :   }
     403           0 : }

Generated by: LCOV version 1.11