@@ -10,7 +10,9 @@ in the source distribution for its full text.
1010#include "Hashtable.h"
1111
1212#include <assert.h>
13+ #include <stddef.h>
1314#include <stdint.h>
15+ #include <inttypes.h>
1416#include <stdlib.h>
1517#include <string.h>
1618
@@ -88,32 +90,94 @@ size_t Hashtable_count(const Hashtable* this) {
8890
8991#endif /* NDEBUG */
9092
91- /* https://oeis.org/A014234 */
92- static const uint64_t OEISprimes [] = {
93- 7 , 13 , 31 , 61 , 127 , 251 , 509 , 1021 , 2039 , 4093 , 8191 ,
94- 16381 , 32749 , 65521 , 131071 , 262139 , 524287 , 1048573 ,
95- 2097143 , 4194301 , 8388593 , 16777213 , 33554393 ,
96- 67108859 , 134217689 , 268435399 , 536870909 , 1073741789 ,
97- 2147483647 , 4294967291 , 8589934583 , 17179869143 ,
98- 34359738337 , 68719476731 , 137438953447
93+ #define MIN_TABLE_SIZE 11
94+
95+ /* Primes borrowed from gnulib/lib/gl_anyhash_primes.h.
96+
97+ Array of primes, approximately in steps of factor 1.2.
98+ This table was computed by executing the Common Lisp expression
99+ (dotimes (i 244) (format t "nextprime(~D)~%" (ceiling (expt 1.2d0 i))))
100+ and feeding the result to PARI/gp. */
101+ static const size_t primes [] = {
102+ MIN_TABLE_SIZE , 13 , 17 , 19 , 23 , 29 , 37 , 41 , 47 , 59 , 67 , 83 , 97 , 127 , 139 ,
103+ 167 , 199 , 239 , 293 , 347 , 419 , 499 , 593 , 709 , 853 , 1021 , 1229 , 1471 , 1777 ,
104+ 2129 , 2543 , 3049 , 3659 , 4391 , 5273 , 6323 , 7589 , 9103 , 10937 , 13109 , 15727 ,
105+ 18899 , 22651 , 27179 , 32609 , 39133 , 46957 , 56359 , 67619 , 81157 , 97369 ,
106+ 116849 , 140221 , 168253 , 201907 , 242309 , 290761 , 348889 , 418667 , 502409 ,
107+ 602887 , 723467 , 868151 , 1041779 , 1250141 , 1500181 , 1800191 , 2160233 ,
108+ 2592277 , 3110741 , 3732887 , 4479463 , 5375371 , 6450413 , 7740517 , 9288589 ,
109+ 11146307 , 13375573 , 16050689 , 19260817 , 23112977 , 27735583 , 33282701 ,
110+ 39939233 , 47927081 , 57512503 , 69014987 , 82818011 , 99381577 , 119257891 ,
111+ 143109469 , 171731387 , 206077643 , 247293161 , 296751781 , 356102141 , 427322587 ,
112+ 512787097 , 615344489 , 738413383 , 886096061 , 1063315271 , 1275978331 ,
113+ 1531174013 , 1837408799 , 2204890543UL , 2645868653UL , 3175042391UL ,
114+ 3810050851UL ,
115+ /* on 32-bit make sure we do not return primes not fitting in size_t */
116+ #if SIZE_MAX > 4294967295ULL
117+ 4572061027ULL , 5486473229ULL , 6583767889ULL , 7900521449ULL , 9480625733ULL ,
118+ /* Largest possible size should be 13652101063ULL == GROWTH_RATE((UINT32_MAX/3)*4)
119+ we include some larger values in case the above math is wrong */
120+ 11376750877ULL , 13652101063ULL , 16382521261ULL , 19659025513ULL , 23590830631ULL ,
121+ #endif
99122};
100123
101124static size_t nextPrime (size_t n ) {
102- /* on 32-bit make sure we do not return primes not fitting in size_t */
103- for ( size_t i = 0 ; i < ARRAYSIZE ( OEISprimes ) && OEISprimes [i ] < SIZE_MAX ; i ++ ) {
104- if ( n <= OEISprimes [i ])
105- return OEISprimes [ i ];
125+ for ( size_t i = 0 ; i < ARRAYSIZE ( primes ); i ++ ) {
126+ if ( n < primes [i ]) {
127+ return primes [i ];
128+ }
106129 }
107130
108131 CRT_fatalError ("Hashtable: no prime found" );
109132}
110133
134+ /* USABLE_FRACTION is the maximum hash map load.
135+ * Currently set to 2/3 capacity.
136+ *
137+ * Testing indicates that the median and average probe length
138+ * increases significantly after 2/3 of the hash map capacity.
139+ *
140+ * load = {size,capacity} / items
141+ *
142+ * | load | probe max | probe avg | probe median |
143+ * | ---- | --------- | --------- | ------------ |
144+ * | 0.60 | 90.58 | 20.19 | 0.65 |
145+ * | 0.65 | 167.00 | 37.07 | 1.58 |
146+ * | 0.70 | 230.54 | 61.40 | 15.54 |
147+ * | 0.75 | 287.00 | 85.23 | 26.15 |
148+ * | 0.80 | 287.00 | 94.71 | 55.93 |
149+ *
150+ */
151+ #define USABLE_FRACTION (n ) (((n) << 1)/3)
152+
153+ /* SHRINK_THRESHOLD is number of items at with the hash map should shrink.
154+ * Currently set to 1/4 of the USABLE_FRACTION, which is ~13% of the total
155+ * hash map size.
156+ */
157+ #define SHRINK_THRESHOLD (n ) (USABLE_FRACTION((n)) / 4)
158+
159+ /* GROWTH_RATE. Growth rate upon hitting maximum load.
160+ * Currently set to items*3.
161+ * This means that hashes double in size when growing without deletions,
162+ * but have more head room when the number of deletions is on a par with the
163+ * number of insertions.
164+ */
165+ #define GROWTH_RATE (h ) ((h)->items*3)
166+
167+ static inline bool Hashtable_shouldResize (const Hashtable * this ) {
168+ /* grow table */
169+ return this -> items >= USABLE_FRACTION (this -> size ) ||
170+ /* shrink table */
171+ (this -> size > MIN_TABLE_SIZE && this -> items <= SHRINK_THRESHOLD (this -> size ));
172+ }
173+
111174Hashtable * Hashtable_new (size_t size , bool owner ) {
175+ assert (MIN_TABLE_SIZE == primes [0 ]);
112176 Hashtable * this ;
113177
114178 this = xMalloc (sizeof (Hashtable ));
179+ this -> size = nextPrime (size );
115180 this -> items = 0 ;
116- this -> size = size ? nextPrime (size ) : 13 ;
117181 this -> buckets = (HashtableItem * ) xCalloc (this -> size , sizeof (HashtableItem ));
118182 this -> owner = owner ;
119183
@@ -141,6 +205,10 @@ void Hashtable_clear(Hashtable* this) {
141205 assert (Hashtable_isConsistent (this ));
142206}
143207
208+ static inline size_t inc_index (size_t index , size_t size ) {
209+ return ++ index != size ? index : 0 ;
210+ }
211+
144212static void insert (Hashtable * this , ht_key_t key , void * value ) {
145213 size_t index = key % this -> size ;
146214 size_t probe = 0 ;
@@ -177,7 +245,7 @@ static void insert(Hashtable* this, ht_key_t key, void* value) {
177245 value = tmp .value ;
178246 }
179247
180- index = (index + 1 ) % this -> size ;
248+ index = inc_index (index , this -> size ) ;
181249 probe ++ ;
182250
183251 assert (index != origIndex );
@@ -188,12 +256,11 @@ void Hashtable_setSize(Hashtable* this, size_t size) {
188256
189257 assert (Hashtable_isConsistent (this ));
190258
191- if (size <= this -> items )
192- return ;
193-
259+ /* newSize will always be >= MIN_TABLE_SIZE */
194260 size_t newSize = nextPrime (size );
195261 if (newSize == this -> size )
196262 return ;
263+ assert (newSize > this -> items );
197264
198265 HashtableItem * oldBuckets = this -> buckets ;
199266 size_t oldSize = this -> size ;
@@ -221,14 +288,10 @@ void Hashtable_put(Hashtable* this, ht_key_t key, void* value) {
221288 assert (this -> size > 0 );
222289 assert (value );
223290
224- /* grow on load-factor > 0.7 */
225- if (10 * this -> items > 7 * this -> size ) {
226- if (SIZE_MAX / 2 < this -> size )
227- CRT_fatalError ("Hashtable: size overflow" );
228-
229- Hashtable_setSize (this , 2 * this -> size );
291+ /* Resize the hash table, if necessary, before inserting */
292+ if (Hashtable_shouldResize (this )) {
293+ Hashtable_setSize (this , GROWTH_RATE (this ));
230294 }
231-
232295 insert (this , key , value );
233296
234297 assert (Hashtable_isConsistent (this ));
@@ -255,14 +318,14 @@ void* Hashtable_remove(Hashtable* this, ht_key_t key) {
255318 res = this -> buckets [index ].value ;
256319 }
257320
258- size_t next = (index + 1 ) % this -> size ;
321+ size_t next = inc_index (index , this -> size ) ;
259322
260323 while (this -> buckets [next ].value && this -> buckets [next ].probe > 0 ) {
261324 this -> buckets [index ] = this -> buckets [next ];
262325 this -> buckets [index ].probe -= 1 ;
263326
264327 index = next ;
265- next = (index + 1 ) % this -> size ;
328+ next = inc_index (index , this -> size ) ;
266329 }
267330
268331 /* set empty after backward shifting */
@@ -275,7 +338,7 @@ void* Hashtable_remove(Hashtable* this, ht_key_t key) {
275338 if (this -> buckets [index ].probe < probe )
276339 break ;
277340
278- index = (index + 1 ) % this -> size ;
341+ index = inc_index (index , this -> size ) ;
279342 probe ++ ;
280343
281344 assert (index != origIndex );
@@ -284,14 +347,10 @@ void* Hashtable_remove(Hashtable* this, ht_key_t key) {
284347 assert (Hashtable_isConsistent (this ));
285348 assert (Hashtable_get (this , key ) == NULL );
286349
287- /* shrink on load-factor < 0.125 */
288- if (8 * this -> items < this -> size )
289- Hashtable_setSize (this , this -> size / 3 ); /* account for nextPrime rounding up */
290-
291350 return res ;
292351}
293352
294- void * Hashtable_get (Hashtable * this , ht_key_t key ) {
353+ void * Hashtable_get (const Hashtable * this , ht_key_t key ) {
295354 size_t index = key % this -> size ;
296355 size_t probe = 0 ;
297356 void * res = NULL ;
@@ -310,7 +369,7 @@ void* Hashtable_get(Hashtable* this, ht_key_t key) {
310369 if (this -> buckets [index ].probe < probe )
311370 break ;
312371
313- index = (index + 1 ) != this -> size ? ( index + 1 ) : 0 ;
372+ index = inc_index (index , this -> size ) ;
314373 probe ++ ;
315374
316375 assert (index != origIndex );
@@ -319,7 +378,7 @@ void* Hashtable_get(Hashtable* this, ht_key_t key) {
319378 return res ;
320379}
321380
322- void Hashtable_foreach (Hashtable * this , Hashtable_PairFunction f , void * userData ) {
381+ void Hashtable_foreach (const Hashtable * this , Hashtable_PairFunction f , void * userData ) {
323382 assert (Hashtable_isConsistent (this ));
324383 for (size_t i = 0 ; i < this -> size ; i ++ ) {
325384 HashtableItem * walk = & this -> buckets [i ];
0 commit comments