Skip to content

Commit 4c7d027

Browse files
committed
Hashtable: only shrink in Put and fix various other issues
Changes: * Only resize the hash in Hashtable_put to reduce thrashing, otherwise we a large deletion results in multiple incremental resizes. * Reduce the load factor to 2/3 from 0.7 since testing shows the average and median probe lengths increases quickly after 2/3 of capacity is used. * Eliminate modulo math when incrementing the index in put, get, and remove. * Increase the number of available prime sizes. * Define the hash key type as uint32_t instead of "unsigned int", which is stable across platforms and large enough to hold any PID. * Change Hashtable_get to take a "const" Hashtable. This was originally meant to fix an issue with the Hashtable shrink factor that was fixed with 230dc9c.
1 parent 4b8b61f commit 4c7d027

2 files changed

Lines changed: 96 additions & 37 deletions

File tree

Hashtable.c

Lines changed: 92 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ in the source distribution for its full text.
1010
#include "Hashtable.h"
1111

1212
#include <assert.h>
13+
#include <stddef.h>
1314
#include <stdint.h>
15+
#include <inttypes.h>
1416
#include <stdlib.h>
1517
#include <string.h>
1618

@@ -88,32 +90,94 @@ size_t Hashtable_count(const Hashtable* this) {
8890

8991
#endif /* NDEBUG */
9092

91-
/* https://oeis.org/A014234 */
92-
static const uint64_t OEISprimes[] = {
93-
7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191,
94-
16381, 32749, 65521, 131071, 262139, 524287, 1048573,
95-
2097143, 4194301, 8388593, 16777213, 33554393,
96-
67108859, 134217689, 268435399, 536870909, 1073741789,
97-
2147483647, 4294967291, 8589934583, 17179869143,
98-
34359738337, 68719476731, 137438953447
93+
#define MIN_TABLE_SIZE 11
94+
95+
/* Primes borrowed from gnulib/lib/gl_anyhash_primes.h.
96+
97+
Array of primes, approximately in steps of factor 1.2.
98+
This table was computed by executing the Common Lisp expression
99+
(dotimes (i 244) (format t "nextprime(~D)~%" (ceiling (expt 1.2d0 i))))
100+
and feeding the result to PARI/gp. */
101+
static const size_t primes[] = {
102+
MIN_TABLE_SIZE, 13, 17, 19, 23, 29, 37, 41, 47, 59, 67, 83, 97, 127, 139,
103+
167, 199, 239, 293, 347, 419, 499, 593, 709, 853, 1021, 1229, 1471, 1777,
104+
2129, 2543, 3049, 3659, 4391, 5273, 6323, 7589, 9103, 10937, 13109, 15727,
105+
18899, 22651, 27179, 32609, 39133, 46957, 56359, 67619, 81157, 97369,
106+
116849, 140221, 168253, 201907, 242309, 290761, 348889, 418667, 502409,
107+
602887, 723467, 868151, 1041779, 1250141, 1500181, 1800191, 2160233,
108+
2592277, 3110741, 3732887, 4479463, 5375371, 6450413, 7740517, 9288589,
109+
11146307, 13375573, 16050689, 19260817, 23112977, 27735583, 33282701,
110+
39939233, 47927081, 57512503, 69014987, 82818011, 99381577, 119257891,
111+
143109469, 171731387, 206077643, 247293161, 296751781, 356102141, 427322587,
112+
512787097, 615344489, 738413383, 886096061, 1063315271, 1275978331,
113+
1531174013, 1837408799, 2204890543UL, 2645868653UL, 3175042391UL,
114+
3810050851UL,
115+
/* on 32-bit make sure we do not return primes not fitting in size_t */
116+
#if SIZE_MAX > 4294967295ULL
117+
4572061027ULL, 5486473229ULL, 6583767889ULL, 7900521449ULL, 9480625733ULL,
118+
/* Largest possible size should be 13652101063ULL == GROWTH_RATE((UINT32_MAX/3)*4)
119+
we include some larger values in case the above math is wrong */
120+
11376750877ULL, 13652101063ULL, 16382521261ULL, 19659025513ULL, 23590830631ULL,
121+
#endif
99122
};
100123

101124
static size_t nextPrime(size_t n) {
102-
/* on 32-bit make sure we do not return primes not fitting in size_t */
103-
for (size_t i = 0; i < ARRAYSIZE(OEISprimes) && OEISprimes[i] < SIZE_MAX; i++) {
104-
if (n <= OEISprimes[i])
105-
return OEISprimes[i];
125+
for (size_t i = 0; i < ARRAYSIZE(primes); i++) {
126+
if (n < primes[i]) {
127+
return primes[i];
128+
}
106129
}
107130

108131
CRT_fatalError("Hashtable: no prime found");
109132
}
110133

134+
/* USABLE_FRACTION is the maximum hash map load.
135+
* Currently set to 2/3 capacity.
136+
*
137+
* Testing indicates that the median and average probe length
138+
* increases significantly after 2/3 of the hash map capacity.
139+
*
140+
* load = {size,capacity} / items
141+
*
142+
* | load | probe max | probe avg | probe median |
143+
* | ---- | --------- | --------- | ------------ |
144+
* | 0.60 | 90.58 | 20.19 | 0.65 |
145+
* | 0.65 | 167.00 | 37.07 | 1.58 |
146+
* | 0.70 | 230.54 | 61.40 | 15.54 |
147+
* | 0.75 | 287.00 | 85.23 | 26.15 |
148+
* | 0.80 | 287.00 | 94.71 | 55.93 |
149+
*
150+
*/
151+
#define USABLE_FRACTION(n) (((n) << 1)/3)
152+
153+
/* SHRINK_THRESHOLD is number of items at with the hash map should shrink.
154+
* Currently set to 1/4 of the USABLE_FRACTION, which is ~13% of the total
155+
* hash map size.
156+
*/
157+
#define SHRINK_THRESHOLD(n) (USABLE_FRACTION((n)) / 4)
158+
159+
/* GROWTH_RATE. Growth rate upon hitting maximum load.
160+
* Currently set to items*3.
161+
* This means that hashes double in size when growing without deletions,
162+
* but have more head room when the number of deletions is on a par with the
163+
* number of insertions.
164+
*/
165+
#define GROWTH_RATE(h) ((h)->items*3)
166+
167+
static inline bool Hashtable_shouldResize(const Hashtable* this) {
168+
/* grow table */
169+
return this->items >= USABLE_FRACTION(this->size) ||
170+
/* shrink table */
171+
(this->size > MIN_TABLE_SIZE && this->items <= SHRINK_THRESHOLD(this->size));
172+
}
173+
111174
Hashtable* Hashtable_new(size_t size, bool owner) {
175+
assert(MIN_TABLE_SIZE == primes[0]);
112176
Hashtable* this;
113177

114178
this = xMalloc(sizeof(Hashtable));
179+
this->size = nextPrime(size);
115180
this->items = 0;
116-
this->size = size ? nextPrime(size) : 13;
117181
this->buckets = (HashtableItem*) xCalloc(this->size, sizeof(HashtableItem));
118182
this->owner = owner;
119183

@@ -141,6 +205,10 @@ void Hashtable_clear(Hashtable* this) {
141205
assert(Hashtable_isConsistent(this));
142206
}
143207

208+
static inline size_t inc_index(size_t index, size_t size) {
209+
return ++index != size ? index : 0;
210+
}
211+
144212
static void insert(Hashtable* this, ht_key_t key, void* value) {
145213
size_t index = key % this->size;
146214
size_t probe = 0;
@@ -177,7 +245,7 @@ static void insert(Hashtable* this, ht_key_t key, void* value) {
177245
value = tmp.value;
178246
}
179247

180-
index = (index + 1) % this->size;
248+
index = inc_index(index, this->size);
181249
probe++;
182250

183251
assert(index != origIndex);
@@ -186,11 +254,9 @@ static void insert(Hashtable* this, ht_key_t key, void* value) {
186254

187255
void Hashtable_setSize(Hashtable* this, size_t size) {
188256

257+
assert(size == 0 || size > this->items);
189258
assert(Hashtable_isConsistent(this));
190259

191-
if (size <= this->items)
192-
return;
193-
194260
size_t newSize = nextPrime(size);
195261
if (newSize == this->size)
196262
return;
@@ -221,14 +287,10 @@ void Hashtable_put(Hashtable* this, ht_key_t key, void* value) {
221287
assert(this->size > 0);
222288
assert(value);
223289

224-
/* grow on load-factor > 0.7 */
225-
if (10 * this->items > 7 * this->size) {
226-
if (SIZE_MAX / 2 < this->size)
227-
CRT_fatalError("Hashtable: size overflow");
228-
229-
Hashtable_setSize(this, 2 * this->size);
290+
/* Resize the hash table, if necessary, before inserting */
291+
if (Hashtable_shouldResize(this)) {
292+
Hashtable_setSize(this, GROWTH_RATE(this));
230293
}
231-
232294
insert(this, key, value);
233295

234296
assert(Hashtable_isConsistent(this));
@@ -255,14 +317,14 @@ void* Hashtable_remove(Hashtable* this, ht_key_t key) {
255317
res = this->buckets[index].value;
256318
}
257319

258-
size_t next = (index + 1) % this->size;
320+
size_t next = inc_index(index, this->size);
259321

260322
while (this->buckets[next].value && this->buckets[next].probe > 0) {
261323
this->buckets[index] = this->buckets[next];
262324
this->buckets[index].probe -= 1;
263325

264326
index = next;
265-
next = (index + 1) % this->size;
327+
next = inc_index(index, this->size);
266328
}
267329

268330
/* set empty after backward shifting */
@@ -275,7 +337,7 @@ void* Hashtable_remove(Hashtable* this, ht_key_t key) {
275337
if (this->buckets[index].probe < probe)
276338
break;
277339

278-
index = (index + 1) % this->size;
340+
index = inc_index(index, this->size);
279341
probe++;
280342

281343
assert(index != origIndex);
@@ -284,14 +346,10 @@ void* Hashtable_remove(Hashtable* this, ht_key_t key) {
284346
assert(Hashtable_isConsistent(this));
285347
assert(Hashtable_get(this, key) == NULL);
286348

287-
/* shrink on load-factor < 0.125 */
288-
if (8 * this->items < this->size)
289-
Hashtable_setSize(this, this->size / 3); /* account for nextPrime rounding up */
290-
291349
return res;
292350
}
293351

294-
void* Hashtable_get(Hashtable* this, ht_key_t key) {
352+
void* Hashtable_get(const Hashtable* this, ht_key_t key) {
295353
size_t index = key % this->size;
296354
size_t probe = 0;
297355
void* res = NULL;
@@ -310,7 +368,7 @@ void* Hashtable_get(Hashtable* this, ht_key_t key) {
310368
if (this->buckets[index].probe < probe)
311369
break;
312370

313-
index = (index + 1) != this->size ? (index + 1) : 0;
371+
index = inc_index(index, this->size);
314372
probe++;
315373

316374
assert(index != origIndex);
@@ -319,7 +377,7 @@ void* Hashtable_get(Hashtable* this, ht_key_t key) {
319377
return res;
320378
}
321379

322-
void Hashtable_foreach(Hashtable* this, Hashtable_PairFunction f, void* userData) {
380+
void Hashtable_foreach(const Hashtable* this, Hashtable_PairFunction f, void* userData) {
323381
assert(Hashtable_isConsistent(this));
324382
for (size_t i = 0; i < this->size; i++) {
325383
HashtableItem* walk = &this->buckets[i];

Hashtable.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ in the source distribution for its full text.
99

1010
#include <stdbool.h>
1111
#include <stddef.h>
12+
#include <stdint.h>
1213

1314

14-
typedef unsigned int ht_key_t;
15+
typedef uint32_t ht_key_t;
1516

1617
typedef void(*Hashtable_PairFunction)(ht_key_t key, void* value, void* userdata);
1718

@@ -35,8 +36,8 @@ void Hashtable_put(Hashtable* this, ht_key_t key, void* value);
3536

3637
void* Hashtable_remove(Hashtable* this, ht_key_t key);
3738

38-
void* Hashtable_get(Hashtable* this, ht_key_t key);
39+
void* Hashtable_get(const Hashtable* this, ht_key_t key);
3940

40-
void Hashtable_foreach(Hashtable* this, Hashtable_PairFunction f, void* userData);
41+
void Hashtable_foreach(const Hashtable* this, Hashtable_PairFunction f, void* userData);
4142

4243
#endif

0 commit comments

Comments
 (0)