raw
vtools_genesis          1 /* A generic hash table package.  */
vtools_genesis 2
vtools_genesis 3 /* Define |USE_OBSTACK| to 1 if you want the allocator to use obstacks
vtools_genesis 4 instead of |malloc|. If you change |USE_OBSTACK|, you have to
vtools_genesis 5 recompile! */
vtools_genesis 6
vtools_genesis 7 #include "hash.h"
vtools_genesis 8 #include "system.h"
vtools_genesis 9
vtools_genesis 10 #include <stdlib.h>
vtools_genesis 11 #include <limits.h>
vtools_genesis 12
vtools_genesis 13 #if USE_OBSTACK
vtools_genesis 14 # include "obstack.h"
vtools_genesis 15 # ifndef obstack_chunk_alloc
vtools_genesis 16 # define obstack_chunk_alloc malloc
vtools_genesis 17 # endif
vtools_genesis 18 # ifndef obstack_chunk_free
vtools_genesis 19 # define obstack_chunk_free free
vtools_genesis 20 # endif
vtools_genesis 21 #endif
vtools_genesis 22
vtools_genesis 23 struct hash_entry {
vtools_genesis 24 void *data;
vtools_genesis 25 struct hash_entry *next;
vtools_genesis 26 };
vtools_genesis 27
vtools_genesis 28 struct hash_table {
vtools_genesis 29 /* The array of buckets starts at |bucket| and extends to
vtools_genesis 30 |bucket_limit-1|, for a possibility of |n_buckets|. Among
vtools_genesis 31 those, |n_buckets_used| buckets are not empty, there are
vtools_genesis 32 |n_entries| active entries in the table. */
vtools_genesis 33 struct hash_entry *bucket;
vtools_genesis 34 struct hash_entry const *bucket_limit;
vtools_genesis 35 size_t n_buckets;
vtools_genesis 36 size_t n_buckets_used;
vtools_genesis 37 size_t n_entries;
vtools_genesis 38
vtools_genesis 39 /* Tuning arguments, kept in a physically separate structure. */
vtools_genesis 40 const Hash_tuning *tuning;
vtools_genesis 41
vtools_genesis 42 /* Three functions are given to |hash_initialize|, see the
vtools_genesis 43 documentation block for this function. In a word, |hasher|
vtools_genesis 44 randomizes a user entry into a number up from 0 up to some
vtools_genesis 45 maximum minus 1; |comparator| returns true if two user entries
vtools_genesis 46 compare equally; and |data_freer| is the cleanup function for a
vtools_genesis 47 user entry. */
vtools_genesis 48 Hash_hasher hasher;
vtools_genesis 49 Hash_comparator comparator;
vtools_genesis 50 Hash_data_freer data_freer;
vtools_genesis 51
vtools_genesis 52 /* A linked list of freed struct |hash_entry| structs. */
vtools_genesis 53 struct hash_entry *free_entry_list;
vtools_genesis 54
vtools_genesis 55 #if USE_OBSTACK
vtools_genesis 56 /* Whenever obstacks are used, it is possible to allocate all
vtools_genesis 57 overflowed entries into a single stack, so they all can be
vtools_genesis 58 freed in a single operation. It is not clear if the speedup is
vtools_genesis 59 worth the trouble. */
vtools_genesis 60 struct obstack entry_stack;
vtools_genesis 61 #endif
vtools_genesis 62 };
vtools_genesis 63
vtools_genesis 64 /* A hash table contains many internal entries, each holding a pointer
vtools_genesis 65 to some user-provided data (also called a user entry). An entry
vtools_genesis 66 indistinctly refers to both the internal entry and its associated
vtools_genesis 67 user entry. A user entry contents may be hashed by a randomization
vtools_genesis 68 function (the hashing function, or just "hasher" for short) into a
vtools_genesis 69 number (or "slot") between 0 and the current table size. At each
vtools_genesis 70 slot position in the hash table, starts a linked chain of entries
vtools_genesis 71 for which the user data all hash to this slot. A bucket is the
vtools_genesis 72 collection of all entries hashing to the same slot.
vtools_genesis 73
vtools_genesis 74 A good "hasher" function will distribute entries rather evenly in
vtools_genesis 75 buckets. In the ideal case, the length of each bucket is roughly
vtools_genesis 76 the number of entries divided by the table size. Finding the slot
vtools_genesis 77 for a data is usually done in constant time by the "hasher", and
vtools_genesis 78 the later finding of a precise entry is linear in time with the
vtools_genesis 79 size of the bucket. Consequently, a larger hash table size (that
vtools_genesis 80 is, a larger number of buckets) is prone to yielding shorter
vtools_genesis 81 chains, {\bf given} the "hasher" function behaves properly.
vtools_genesis 82
vtools_genesis 83 Long buckets slow down the lookup algorithm. One might use big
vtools_genesis 84 hash table sizes in hope to reduce the average length of buckets,
vtools_genesis 85 but this might become inordinate, as unused slots in the hash table
vtools_genesis 86 take some space. The best bet is to make sure you are using a good
vtools_genesis 87 "hasher" function (beware that those are not that easy to write!
vtools_genesis 88 :-), and to use a table size larger than the actual number of
vtools_genesis 89 entries. */
vtools_genesis 90
vtools_genesis 91 /* If an insertion makes the ratio of nonempty buckets to table size
vtools_genesis 92 larger than the growth threshold (a number between 0.0 and 1.0),
vtools_genesis 93 then increase the table size by multiplying by the growth factor (a
vtools_genesis 94 number greater than 1.0). The growth threshold defaults to 0.8,
vtools_genesis 95 and the growth factor defaults to 1.414, meaning that the table
vtools_genesis 96 will have doubled its size every second time 80\% of the buckets
vtools_genesis 97 get used. */
vtools_genesis 98 #define DEFAULT_GROWTH_THRESHOLD 0.8f
vtools_genesis 99 #define DEFAULT_GROWTH_FACTOR 1.414f
vtools_genesis 100
vtools_genesis 101 /* If a deletion empties a bucket and causes the ratio of used buckets
vtools_genesis 102 to table size to become smaller than the shrink threshold (a number
vtools_genesis 103 between 0.0 and 1.0), then shrink the table by multiplying by the
vtools_genesis 104 shrink factor (a number greater than the shrink threshold but
vtools_genesis 105 smaller than 1.0). The shrink threshold and factor default to 0.0
vtools_genesis 106 and 1.0, meaning that the table never shrinks. */
vtools_genesis 107 #define DEFAULT_SHRINK_THRESHOLD 0.0f
vtools_genesis 108 #define DEFAULT_SHRINK_FACTOR 1.0f
vtools_genesis 109
vtools_genesis 110 /* Use this to initialize or reset a |tuning| structure to some
vtools_genesis 111 sensible values. */
vtools_genesis 112 static const Hash_tuning default_tuning =
vtools_genesis 113 {
vtools_genesis 114 DEFAULT_SHRINK_THRESHOLD,
vtools_genesis 115 DEFAULT_SHRINK_FACTOR,
vtools_genesis 116 DEFAULT_GROWTH_THRESHOLD,
vtools_genesis 117 DEFAULT_GROWTH_FACTOR,
vtools_genesis 118 false
vtools_genesis 119 };
vtools_genesis 120
vtools_genesis 121 /* Information and lookup. */
vtools_genesis 122
vtools_genesis 123 /* The following few functions provide information about the overall
vtools_genesis 124 hash table organization: the number of entries, number of buckets
vtools_genesis 125 and maximum length of buckets. */
vtools_genesis 126
vtools_genesis 127 /* Return the number of buckets in the hash table. The table size,
vtools_genesis 128 the total number of buckets (used plus unused), or the maximum
vtools_genesis 129 number of slots, are the same quantity. */
vtools_genesis 130
vtools_genesis 131 size_t
vtools_genesis 132 hash_get_n_buckets(const Hash_table *table) {
vtools_genesis 133 return table->n_buckets;
vtools_genesis 134 }
vtools_genesis 135
vtools_genesis 136 /* Return the number of slots in use (non-empty buckets). */
vtools_genesis 137
vtools_genesis 138 size_t
vtools_genesis 139 hash_get_n_buckets_used(const Hash_table *table) {
vtools_genesis 140 return table->n_buckets_used;
vtools_genesis 141 }
vtools_genesis 142
vtools_genesis 143 /* Return the number of active entries. */
vtools_genesis 144
vtools_genesis 145 size_t
vtools_genesis 146 hash_get_n_entries(const Hash_table *table) {
vtools_genesis 147 return table->n_entries;
vtools_genesis 148 }
vtools_genesis 149
vtools_genesis 150 /* Return the length of the longest chain (bucket). */
vtools_genesis 151
vtools_genesis 152 size_t
vtools_genesis 153 hash_get_max_bucket_length(const Hash_table *table) {
vtools_genesis 154 struct hash_entry const *bucket;
vtools_genesis 155 size_t max_bucket_length = 0;
vtools_genesis 156
vtools_genesis 157 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
vtools_genesis 158 if (bucket->data) {
vtools_genesis 159 struct hash_entry const *cursor = bucket;
vtools_genesis 160 size_t bucket_length = 1;
vtools_genesis 161
vtools_genesis 162 while (cursor = cursor->next, cursor)
vtools_genesis 163 bucket_length++;
vtools_genesis 164
vtools_genesis 165 if (bucket_length > max_bucket_length)
vtools_genesis 166 max_bucket_length = bucket_length;
vtools_genesis 167 }
vtools_genesis 168 }
vtools_genesis 169
vtools_genesis 170 return max_bucket_length;
vtools_genesis 171 }
vtools_genesis 172
vtools_genesis 173 /* Do a mild validation of a hash table, by traversing it and checking
vtools_genesis 174 two statistics. */
vtools_genesis 175
vtools_genesis 176 bool
vtools_genesis 177 hash_table_ok(const Hash_table *table) {
vtools_genesis 178 struct hash_entry const *bucket;
vtools_genesis 179 size_t n_buckets_used = 0;
vtools_genesis 180 size_t n_entries = 0;
vtools_genesis 181
vtools_genesis 182 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
vtools_genesis 183 if (bucket->data) {
vtools_genesis 184 struct hash_entry const *cursor = bucket;
vtools_genesis 185
vtools_genesis 186 /* Count bucket head. */
vtools_genesis 187 n_buckets_used++;
vtools_genesis 188 n_entries++;
vtools_genesis 189
vtools_genesis 190 /* Count bucket overflow. */
vtools_genesis 191 while (cursor = cursor->next, cursor)
vtools_genesis 192 n_entries++;
vtools_genesis 193 }
vtools_genesis 194 }
vtools_genesis 195
vtools_genesis 196 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
vtools_genesis 197 return true;
vtools_genesis 198
vtools_genesis 199 return false;
vtools_genesis 200 }
vtools_genesis 201
vtools_genesis 202 void
vtools_genesis 203 hash_print_statistics(const Hash_table *table, FILE *stream) {
vtools_genesis 204 size_t n_entries = hash_get_n_entries(table);
vtools_genesis 205 size_t n_buckets = hash_get_n_buckets(table);
vtools_genesis 206 size_t n_buckets_used = hash_get_n_buckets_used(table);
vtools_genesis 207 size_t max_bucket_length = hash_get_max_bucket_length(table);
vtools_genesis 208
vtools_genesis 209 fprintf(stream, "# entries: %lu\n", (unsigned long int) n_entries);
vtools_genesis 210 fprintf(stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
vtools_genesis 211 fprintf(stream, "# buckets used: %lu (%.2f%%)\n",
vtools_genesis 212 (unsigned long int) n_buckets_used,
vtools_genesis 213 (100.0 * n_buckets_used) / n_buckets);
vtools_genesis 214 fprintf(stream, "max bucket length: %lu\n",
vtools_genesis 215 (unsigned long int) max_bucket_length);
vtools_genesis 216 }
vtools_genesis 217
vtools_genesis 218 /* Hash |key| and return a pointer to the selected bucket. If
vtools_genesis 219 |table->hasher| misbehaves, abort. */
vtools_genesis 220 static struct hash_entry *
vtools_genesis 221 safe_hasher(const Hash_table *table, const void *key) {
vtools_genesis 222 size_t n = table->hasher(key, table->n_buckets);
vtools_genesis 223 if (!(n < table->n_buckets))
vtools_genesis 224 abort();
vtools_genesis 225 return table->bucket + n;
vtools_genesis 226 }
vtools_genesis 227
vtools_genesis 228 /* If |entry| matches an entry already in the hash table, return the
vtools_genesis 229 entry from the table. Otherwise, return |NULL|. */
vtools_genesis 230
vtools_genesis 231 void *
vtools_genesis 232 hash_lookup(const Hash_table *table, const void *entry) {
vtools_genesis 233 struct hash_entry const *bucket = safe_hasher(table, entry);
vtools_genesis 234 struct hash_entry const *cursor;
vtools_genesis 235
vtools_genesis 236 if (bucket->data == NULL)
vtools_genesis 237 return NULL;
vtools_genesis 238
vtools_genesis 239 for (cursor = bucket; cursor; cursor = cursor->next)
vtools_genesis 240 if (entry == cursor->data || table->comparator(entry, cursor->data))
vtools_genesis 241 return cursor->data;
vtools_genesis 242
vtools_genesis 243 return NULL;
vtools_genesis 244 }
vtools_genesis 245
vtools_genesis 246 /* Walking. */
vtools_genesis 247
vtools_genesis 248 /* The functions in this page traverse the hash table and process the
vtools_genesis 249 contained entries. For the traversal to work properly, the hash
vtools_genesis 250 table should not be resized nor modified while any particular entry
vtools_genesis 251 is being processed. In particular, entries should not be added,
vtools_genesis 252 and an entry may be removed only if there is no shrink threshold
vtools_genesis 253 and the entry being removed has already been passed to
vtools_genesis 254 |hash_get_next|. */
vtools_genesis 255
vtools_genesis 256 /* Return the first data in the table, or |NULL| if the table is
vtools_genesis 257 empty. */
vtools_genesis 258
vtools_genesis 259 void *
vtools_genesis 260 hash_get_first(const Hash_table *table) {
vtools_genesis 261 struct hash_entry const *bucket;
vtools_genesis 262
vtools_genesis 263 if (table->n_entries == 0)
vtools_genesis 264 return NULL;
vtools_genesis 265
vtools_genesis 266 for (bucket = table->bucket;; bucket++)
vtools_genesis 267 if (!(bucket < table->bucket_limit))
vtools_genesis 268 abort();
vtools_genesis 269 else if (bucket->data)
vtools_genesis 270 return bucket->data;
vtools_genesis 271 }
vtools_genesis 272
vtools_genesis 273 /* Return the user data for the entry following |entry|, where |entry|
vtools_genesis 274 has been returned by a previous call to either |hash_get_first| or
vtools_genesis 275 |hash_get_next|. Return |NULL| if there are no more entries. */
vtools_genesis 276
vtools_genesis 277 void *
vtools_genesis 278 hash_get_next(const Hash_table *table, const void *entry) {
vtools_genesis 279 struct hash_entry const *bucket = safe_hasher(table, entry);
vtools_genesis 280 struct hash_entry const *cursor;
vtools_genesis 281
vtools_genesis 282 /* Find next entry in the same bucket. */
vtools_genesis 283 cursor = bucket;
vtools_genesis 284 do {
vtools_genesis 285 if (cursor->data == entry && cursor->next)
vtools_genesis 286 return cursor->next->data;
vtools_genesis 287 cursor = cursor->next;
vtools_genesis 288 } while (cursor != NULL);
vtools_genesis 289
vtools_genesis 290 /* Find first entry in any subsequent bucket. */
vtools_genesis 291 while (++bucket < table->bucket_limit)
vtools_genesis 292 if (bucket->data)
vtools_genesis 293 return bucket->data;
vtools_genesis 294
vtools_genesis 295 /* None found. */
vtools_genesis 296 return NULL;
vtools_genesis 297 }
vtools_genesis 298
vtools_genesis 299 /* Fill |buffer| with pointers to active user entries in the hash
vtools_genesis 300 table, then return the number of pointers copied. Do not copy more
vtools_genesis 301 than |buffer_size| pointers. */
vtools_genesis 302
vtools_genesis 303 size_t
vtools_genesis 304 hash_get_entries(const Hash_table *table, void **buffer,
vtools_genesis 305 size_t buffer_size) {
vtools_genesis 306 size_t counter = 0;
vtools_genesis 307 struct hash_entry const *bucket;
vtools_genesis 308 struct hash_entry const *cursor;
vtools_genesis 309
vtools_genesis 310 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
vtools_genesis 311 if (bucket->data) {
vtools_genesis 312 for (cursor = bucket; cursor; cursor = cursor->next) {
vtools_genesis 313 if (counter >= buffer_size)
vtools_genesis 314 return counter;
vtools_genesis 315 buffer[counter++] = cursor->data;
vtools_genesis 316 }
vtools_genesis 317 }
vtools_genesis 318 }
vtools_genesis 319
vtools_genesis 320 return counter;
vtools_genesis 321 }
vtools_genesis 322
vtools_genesis 323 /* Call a |processor| function for each entry of a hash table, and
vtools_genesis 324 return the number of entries for which the processor function
vtools_genesis 325 returned success. A pointer to some |processor_data| which will be
vtools_genesis 326 made available to each call to the processor function. The
vtools_genesis 327 |processor| accepts two arguments: the first is the user entry
vtools_genesis 328 being walked into, the second is the value of |processor_data| as
vtools_genesis 329 received. The walking continue for as long as the |processor|
vtools_genesis 330 function returns nonzero. When it returns zero, the walking is
vtools_genesis 331 interrupted. */
vtools_genesis 332
vtools_genesis 333 size_t
vtools_genesis 334 hash_do_for_each(const Hash_table *table, Hash_processor processor,
vtools_genesis 335 void *processor_data) {
vtools_genesis 336 size_t counter = 0;
vtools_genesis 337 struct hash_entry const *bucket;
vtools_genesis 338 struct hash_entry const *cursor;
vtools_genesis 339
vtools_genesis 340 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
vtools_genesis 341 if (bucket->data) {
vtools_genesis 342 for (cursor = bucket; cursor; cursor = cursor->next) {
vtools_genesis 343 if (!processor(cursor->data, processor_data))
vtools_genesis 344 return counter;
vtools_genesis 345 counter++;
vtools_genesis 346 }
vtools_genesis 347 }
vtools_genesis 348 }
vtools_genesis 349
vtools_genesis 350 return counter;
vtools_genesis 351 }
vtools_genesis 352
vtools_genesis 353 /* Allocation and clean-up. */
vtools_genesis 354
vtools_genesis 355 /* Return a hash index for a |NULL|-terminated |string| between |0| and
vtools_genesis 356 |n_buckets-1|. This is a convenience routine for constructing
vtools_genesis 357 other hashing functions. */
vtools_genesis 358
vtools_genesis 359 #if USE_DIFF_HASH
vtools_genesis 360
vtools_genesis 361 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01:
vtools_genesis 362 ``Please see B. J. McKenzie, R. Harries \& T. Bell, Selecting a
vtools_genesis 363 hashing algorithm, Software--practice \& experience 20, 2 (Feb
vtools_genesis 364 1990), 209-224. Good hash algorithms tend to be domain-specific,
vtools_genesis 365 so what's good for [diffutils'] |io.c| may not be good for your
vtools_genesis 366 application.'' */
vtools_genesis 367
vtools_genesis 368 size_t
vtools_genesis 369 hash_string (const char *string, size_t n_buckets)
vtools_genesis 370 {
vtools_genesis 371 # define HASH_ONE_CHAR(Value, Byte) \
vtools_genesis 372 ((Byte) + rotl_sz (Value, 7))
vtools_genesis 373
vtools_genesis 374 size_t value = 0;
vtools_genesis 375 unsigned char ch;
vtools_genesis 376
vtools_genesis 377 for (; (ch = *string); string++)
vtools_genesis 378 value = HASH_ONE_CHAR (value, ch);
vtools_genesis 379 return value % n_buckets;
vtools_genesis 380
vtools_genesis 381 # undef HASH_ONE_CHAR
vtools_genesis 382 }
vtools_genesis 383
vtools_genesis 384 #else
vtools_genesis 385
vtools_genesis 386 /* This one comes from |recode|, and performs a bit better than the
vtools_genesis 387 above as per a few experiments. It is inspired from a hashing
vtools_genesis 388 routine found in the very old Cyber `snoop', itself written in
vtools_genesis 389 typical Greg Mansfield style. (By the way, what happened to this
vtools_genesis 390 excellent man? Is he still alive?) */
vtools_genesis 391
vtools_genesis 392 size_t
vtools_genesis 393 hash_string(const char *string, size_t n_buckets) {
vtools_genesis 394 size_t value = 0;
vtools_genesis 395 unsigned char ch;
vtools_genesis 396
vtools_genesis 397 for (; (ch = *string); string++)
vtools_genesis 398 value = (value * 31 + ch) % n_buckets;
vtools_genesis 399 return value;
vtools_genesis 400 }
vtools_genesis 401
vtools_genesis 402 #endif
vtools_genesis 403
vtools_genesis 404 /* Return true if |candidate| is a prime number. |candidate| should
vtools_genesis 405 be an odd number at least equal to 11. */
vtools_genesis 406
vtools_genesis 407 static bool
vtools_genesis 408 is_prime(size_t candidate) {
vtools_genesis 409 size_t divisor = 3;
vtools_genesis 410 size_t square = divisor * divisor;
vtools_genesis 411
vtools_genesis 412 while (square < candidate && (candidate % divisor)) {
vtools_genesis 413 divisor++;
vtools_genesis 414 square += 4 * divisor;
vtools_genesis 415 divisor++;
vtools_genesis 416 }
vtools_genesis 417
vtools_genesis 418 return (candidate % divisor ? true : false);
vtools_genesis 419 }
vtools_genesis 420
vtools_genesis 421 /* Round a given |candidate| number up to the nearest prime, and
vtools_genesis 422 return that prime. Primes lower than 10 are merely skipped. */
vtools_genesis 423
vtools_genesis 424 static size_t
vtools_genesis 425 next_prime(size_t candidate) {
vtools_genesis 426 /* Skip small primes. */
vtools_genesis 427 if (candidate < 10)
vtools_genesis 428 candidate = 10;
vtools_genesis 429
vtools_genesis 430 /* Make it definitely odd. */
vtools_genesis 431 candidate |= 1;
vtools_genesis 432
vtools_genesis 433 while (SIZE_MAX != candidate && !is_prime(candidate))
vtools_genesis 434 candidate += 2;
vtools_genesis 435
vtools_genesis 436 return candidate;
vtools_genesis 437 }
vtools_genesis 438
vtools_genesis 439 void
vtools_genesis 440 hash_reset_tuning(Hash_tuning *tuning) {
vtools_genesis 441 *tuning = default_tuning;
vtools_genesis 442 }
vtools_genesis 443
vtools_genesis 444 /* Given a |size_t| argument |x|, return the value corresponding to
vtools_genesis 445 rotating the bits |n| steps to the right. |n| must be between 1 to
vtools_genesis 446 |(CHAR_BIT * sizeof (size_t) - 1)| inclusive. */
vtools_genesis 447 static inline size_t
vtools_genesis 448 rotr_sz(size_t x, int n) {
vtools_genesis 449 return ((x >> n) | (x << ((CHAR_BIT * sizeof x) - n))) & SIZE_MAX;
vtools_genesis 450 }
vtools_genesis 451
vtools_genesis 452 /* If the user passes a |NULL| hasher, we hash the raw pointer. */
vtools_genesis 453 static size_t
vtools_genesis 454 raw_hasher(const void *data, size_t n) {
vtools_genesis 455 /* When hashing unique pointers, it is often the case that they
vtools_genesis 456 were generated by malloc and thus have the property that the
vtools_genesis 457 low-order bits are 0. As this tends to give poorer performance
vtools_genesis 458 with small tables, we rotate the pointer value before
vtools_genesis 459 performing division, in an attempt to improve hash quality. */
vtools_genesis 460 size_t val = rotr_sz((size_t) data, 3);
vtools_genesis 461 return val % n;
vtools_genesis 462 }
vtools_genesis 463
vtools_genesis 464 /* If the user passes a |NULL| comparator, we use pointer
vtools_genesis 465 comparison. */
vtools_genesis 466 static bool
vtools_genesis 467 raw_comparator(const void *a, const void *b) {
vtools_genesis 468 return a == b;
vtools_genesis 469 }
vtools_genesis 470
vtools_genesis 471
vtools_genesis 472 /* For the given hash |table|, check the user supplied tuning
vtools_genesis 473 structure for reasonable values, and return true if there is no
vtools_genesis 474 gross error with it. Otherwise, definitively reset the |tuning|
vtools_genesis 475 field to some acceptable default in the hash table (that is, the
vtools_genesis 476 user loses the right of further modifying tuning arguments), and
vtools_genesis 477 return false. */
vtools_genesis 478
vtools_genesis 479 static bool
vtools_genesis 480 check_tuning(Hash_table *table) {
vtools_genesis 481 const Hash_tuning *tuning = table->tuning;
vtools_genesis 482 float epsilon;
vtools_genesis 483 if (tuning == &default_tuning)
vtools_genesis 484 return true;
vtools_genesis 485
vtools_genesis 486 /* Be a bit stricter than mathematics would require, so that
vtools_genesis 487 rounding errors in size calculations do not cause allocations
vtools_genesis 488 to fail to grow or shrink as they should. The smallest
vtools_genesis 489 allocation is 11 (due to |next_prime|'s algorithm), so an
vtools_genesis 490 epsilon of 0.1 should be good enough. */
vtools_genesis 491 epsilon = 0.1f;
vtools_genesis 492
vtools_genesis 493 if (epsilon < tuning->growth_threshold
vtools_genesis 494 && tuning->growth_threshold < 1 - epsilon
vtools_genesis 495 && 1 + epsilon < tuning->growth_factor
vtools_genesis 496 && 0 <= tuning->shrink_threshold
vtools_genesis 497 && tuning->shrink_threshold + epsilon < tuning->shrink_factor
vtools_genesis 498 && tuning->shrink_factor <= 1
vtools_genesis 499 && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
vtools_genesis 500 return true;
vtools_genesis 501
vtools_genesis 502 table->tuning = &default_tuning;
vtools_genesis 503 return false;
vtools_genesis 504 }
vtools_genesis 505
vtools_genesis 506 /* Compute the size of the bucket array for the given |candidate| and
vtools_genesis 507 |tuning|, or return 0 if there is no possible way to allocate that
vtools_genesis 508 many entries. */
vtools_genesis 509
vtools_genesis 510 static size_t
vtools_genesis 511 compute_bucket_size(size_t candidate, const Hash_tuning *tuning) {
vtools_genesis 512 if (!tuning->is_n_buckets) {
vtools_genesis 513 float new_candidate = candidate / tuning->growth_threshold;
vtools_genesis 514 if (SIZE_MAX <= new_candidate)
vtools_genesis 515 return 0;
vtools_genesis 516 candidate = new_candidate;
vtools_genesis 517 }
vtools_genesis 518 candidate = next_prime(candidate);
vtools_genesis 519 if (xalloc_oversized (candidate, sizeof(struct hash_entry *)))
vtools_genesis 520 return 0;
vtools_genesis 521 return candidate;
vtools_genesis 522 }
vtools_genesis 523
vtools_genesis 524 /* Allocate and return a new hash table, or |NULL| upon failure. The
vtools_genesis 525 initial number of buckets is automatically selected so as to {\it
vtools_genesis 526 guarantee} that you may insert at least |candidate| different user
vtools_genesis 527 entries before any growth of the hash table size occurs. So, if
vtools_genesis 528 have a reasonably tight a-priori upper bound on the number of
vtools_genesis 529 entries you intend to insert in the hash table, you may save some
vtools_genesis 530 table memory and insertion time, by specifying it here. If the
vtools_genesis 531 |is_n_buckets| field of the |tuning| structure is true, the
vtools_genesis 532 |candidate| argument has its meaning changed to the wanted number
vtools_genesis 533 of buckets.
vtools_genesis 534
vtools_genesis 535 |tuning| points to a structure of user-supplied values, in case
vtools_genesis 536 some fine tuning is wanted over the default behavior of the hasher.
vtools_genesis 537 If |tuning| is |NULL|, the default tuning parameters are used
vtools_genesis 538 instead. If |tuning| is provided but the values requested are out
vtools_genesis 539 of bounds or might cause rounding errors, return |NULL|.
vtools_genesis 540
vtools_genesis 541 The user-supplied |hasher| function, when not |NULL|, accepts two
vtools_genesis 542 arguments |entry| and |table_size|. It computes, by hashing
vtools_genesis 543 |entry| contents, a slot number for that entry which should be in
vtools_genesis 544 the range |0..table_size-1|. This slot number is then returned.
vtools_genesis 545
vtools_genesis 546 The user-supplied |comparator| function, when not |NULL|, accepts
vtools_genesis 547 two arguments pointing to user data, it then returns true for a
vtools_genesis 548 pair of entries that compare equal, or false otherwise. This
vtools_genesis 549 function is internally called on entries which are already known to
vtools_genesis 550 hash to the same bucket index, but which are distinct pointers.
vtools_genesis 551
vtools_genesis 552 The user-supplied |data_freer| function, when not |NULL|, may be
vtools_genesis 553 later called with the user data as an argument, just before the
vtools_genesis 554 entry containing the data gets freed. This happens from within
vtools_genesis 555 |hash_free| or |hash_clear|. You should specify this function only
vtools_genesis 556 if you want these functions to free all of your |data| data. This
vtools_genesis 557 is typically the case when your data is simply an auxiliary struct
vtools_genesis 558 that you have |malloc|'d to aggregate several values. */
vtools_genesis 559
vtools_genesis 560 Hash_table *
vtools_genesis 561 hash_initialize(size_t candidate, const Hash_tuning *tuning,
vtools_genesis 562 Hash_hasher hasher, Hash_comparator comparator,
vtools_genesis 563 Hash_data_freer data_freer) {
vtools_genesis 564 Hash_table *table;
vtools_genesis 565
vtools_genesis 566 if (hasher == NULL)
vtools_genesis 567 hasher = raw_hasher;
vtools_genesis 568 if (comparator == NULL)
vtools_genesis 569 comparator = raw_comparator;
vtools_genesis 570
vtools_genesis 571 table = malloc(sizeof *table);
vtools_genesis 572 if (table == NULL)
vtools_genesis 573 return NULL;
vtools_genesis 574
vtools_genesis 575 if (!tuning)
vtools_genesis 576 tuning = &default_tuning;
vtools_genesis 577 table->tuning = tuning;
vtools_genesis 578 if (!check_tuning(table)) {
vtools_genesis 579 /* Fail if the tuning options are invalid. This is the only
vtools_genesis 580 occasion when the user gets some feedback about it. Once
vtools_genesis 581 the table is created, if the user provides invalid tuning
vtools_genesis 582 options, we silently revert to using the defaults, and
vtools_genesis 583 ignore further request to change the tuning options. */
vtools_genesis 584 goto fail;
vtools_genesis 585 }
vtools_genesis 586
vtools_genesis 587 table->n_buckets = compute_bucket_size(candidate, tuning);
vtools_genesis 588 if (!table->n_buckets)
vtools_genesis 589 goto fail;
vtools_genesis 590
vtools_genesis 591 table->bucket = calloc(table->n_buckets, sizeof *table->bucket);
vtools_genesis 592 if (table->bucket == NULL)
vtools_genesis 593 goto fail;
vtools_genesis 594 table->bucket_limit = table->bucket + table->n_buckets;
vtools_genesis 595 table->n_buckets_used = 0;
vtools_genesis 596 table->n_entries = 0;
vtools_genesis 597
vtools_genesis 598 table->hasher = hasher;
vtools_genesis 599 table->comparator = comparator;
vtools_genesis 600 table->data_freer = data_freer;
vtools_genesis 601
vtools_genesis 602 table->free_entry_list = NULL;
vtools_genesis 603 #if USE_OBSTACK
vtools_genesis 604 obstack_init (&table->entry_stack);
vtools_genesis 605 #endif
vtools_genesis 606 return table;
vtools_genesis 607
vtools_genesis 608 fail:
vtools_genesis 609 free(table);
vtools_genesis 610 return NULL;
vtools_genesis 611 }
vtools_genesis 612
vtools_genesis 613 /* Make all buckets empty, placing any chained entries on the free
vtools_genesis 614 list. Apply the user-specified function |data_freer| (if any) to
vtools_genesis 615 the datas of any affected entries. */
vtools_genesis 616
vtools_genesis 617 void
vtools_genesis 618 hash_clear(Hash_table *table) {
vtools_genesis 619 struct hash_entry *bucket;
vtools_genesis 620
vtools_genesis 621 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
vtools_genesis 622 if (bucket->data) {
vtools_genesis 623 struct hash_entry *cursor;
vtools_genesis 624 struct hash_entry *next;
vtools_genesis 625
vtools_genesis 626 /* Free the bucket overflow. */
vtools_genesis 627 for (cursor = bucket->next; cursor; cursor = next) {
vtools_genesis 628 if (table->data_freer)
vtools_genesis 629 table->data_freer(cursor->data);
vtools_genesis 630 cursor->data = NULL;
vtools_genesis 631
vtools_genesis 632 next = cursor->next;
vtools_genesis 633 /* Relinking is done one entry at a time, as it is to
vtools_genesis 634 be expected that overflows are either rare or
vtools_genesis 635 short. */
vtools_genesis 636 cursor->next = table->free_entry_list;
vtools_genesis 637 table->free_entry_list = cursor;
vtools_genesis 638 }
vtools_genesis 639
vtools_genesis 640 /* Free the bucket head. */
vtools_genesis 641 if (table->data_freer)
vtools_genesis 642 table->data_freer(bucket->data);
vtools_genesis 643 bucket->data = NULL;
vtools_genesis 644 bucket->next = NULL;
vtools_genesis 645 }
vtools_genesis 646 }
vtools_genesis 647
vtools_genesis 648 table->n_buckets_used = 0;
vtools_genesis 649 table->n_entries = 0;
vtools_genesis 650 }
vtools_genesis 651
vtools_genesis 652 /* Reclaim all storage associated with a hash table. If a
vtools_genesis 653 |data_freer| function has been supplied by the user when the hash
vtools_genesis 654 table was created, this function applies it to the data of each
vtools_genesis 655 entry before freeing that entry. */
vtools_genesis 656
vtools_genesis 657 void
vtools_genesis 658 hash_free(Hash_table *table) {
vtools_genesis 659 struct hash_entry *bucket;
vtools_genesis 660 struct hash_entry *cursor;
vtools_genesis 661 struct hash_entry *next;
vtools_genesis 662
vtools_genesis 663 /* Call the user |data_freer| function. */
vtools_genesis 664 if (table->data_freer && table->n_entries) {
vtools_genesis 665 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
vtools_genesis 666 if (bucket->data) {
vtools_genesis 667 for (cursor = bucket; cursor; cursor = cursor->next)
vtools_genesis 668 table->data_freer(cursor->data);
vtools_genesis 669 }
vtools_genesis 670 }
vtools_genesis 671 }
vtools_genesis 672
vtools_genesis 673 #if USE_OBSTACK
vtools_genesis 674
vtools_genesis 675 obstack_free (&table->entry_stack, NULL);
vtools_genesis 676
vtools_genesis 677 #else
vtools_genesis 678
vtools_genesis 679 /* Free all bucket overflowed entries. */
vtools_genesis 680 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
vtools_genesis 681 for (cursor = bucket->next; cursor; cursor = next) {
vtools_genesis 682 next = cursor->next;
vtools_genesis 683 free(cursor);
vtools_genesis 684 }
vtools_genesis 685 }
vtools_genesis 686
vtools_genesis 687 /* Also reclaim the internal list of previously freed entries. */
vtools_genesis 688 for (cursor = table->free_entry_list; cursor; cursor = next) {
vtools_genesis 689 next = cursor->next;
vtools_genesis 690 free(cursor);
vtools_genesis 691 }
vtools_genesis 692
vtools_genesis 693 #endif
vtools_genesis 694
vtools_genesis 695 /* Free the remainder of the hash table structure. */
vtools_genesis 696 free(table->bucket);
vtools_genesis 697 free(table);
vtools_genesis 698 }
vtools_genesis 699
vtools_genesis 700 /* Insertion and deletion. */
vtools_genesis 701
vtools_genesis 702 /* Get a new hash entry for a bucket overflow, possibly by recycling a
vtools_genesis 703 previously freed one. If this is not possible, allocate a new
vtools_genesis 704 one. */
vtools_genesis 705
vtools_genesis 706 static struct hash_entry *
vtools_genesis 707 allocate_entry(Hash_table *table) {
vtools_genesis 708 struct hash_entry *new;
vtools_genesis 709
vtools_genesis 710 if (table->free_entry_list) {
vtools_genesis 711 new = table->free_entry_list;
vtools_genesis 712 table->free_entry_list = new->next;
vtools_genesis 713 } else {
vtools_genesis 714 #if USE_OBSTACK
vtools_genesis 715 new = obstack_alloc (&table->entry_stack, sizeof *new);
vtools_genesis 716 #else
vtools_genesis 717 new = malloc(sizeof *new);
vtools_genesis 718 #endif
vtools_genesis 719 }
vtools_genesis 720
vtools_genesis 721 return new;
vtools_genesis 722 }
vtools_genesis 723
vtools_genesis 724 /* Free a hash entry which was part of some bucket overflow, saving it
vtools_genesis 725 for later recycling. */
vtools_genesis 726
vtools_genesis 727 static void
vtools_genesis 728 free_entry(Hash_table *table, struct hash_entry *entry) {
vtools_genesis 729 entry->data = NULL;
vtools_genesis 730 entry->next = table->free_entry_list;
vtools_genesis 731 table->free_entry_list = entry;
vtools_genesis 732 }
vtools_genesis 733
vtools_genesis 734 /* This private function is used to help with insertion and deletion.
vtools_genesis 735 When |entry| matches an entry in the table, return a pointer to the
vtools_genesis 736 corresponding user data and set |*bucket_head| to the head of the
vtools_genesis 737 selected bucket. Otherwise, return |NULL|. When |delete| is true
vtools_genesis 738 and |entry| matches an entry in the table, unlink the matching
vtools_genesis 739 entry. */
vtools_genesis 740
vtools_genesis 741 static void *
vtools_genesis 742 hash_find_entry(Hash_table *table, const void *entry,
vtools_genesis 743 struct hash_entry **bucket_head, bool delete) {
vtools_genesis 744 struct hash_entry *bucket = safe_hasher(table, entry);
vtools_genesis 745 struct hash_entry *cursor;
vtools_genesis 746
vtools_genesis 747 *bucket_head = bucket;
vtools_genesis 748
vtools_genesis 749 /* Test for empty bucket. */
vtools_genesis 750 if (bucket->data == NULL)
vtools_genesis 751 return NULL;
vtools_genesis 752
vtools_genesis 753 /* See if the entry is the first in the bucket. */
vtools_genesis 754 if (entry == bucket->data || table->comparator(entry, bucket->data)) {
vtools_genesis 755 void *data = bucket->data;
vtools_genesis 756
vtools_genesis 757 if (delete) {
vtools_genesis 758 if (bucket->next) {
vtools_genesis 759 struct hash_entry *next = bucket->next;
vtools_genesis 760
vtools_genesis 761 /* Bump the first overflow entry into the bucket head, then save
vtools_genesis 762 the previous first overflow entry for later recycling. */
vtools_genesis 763 *bucket = *next;
vtools_genesis 764 free_entry(table, next);
vtools_genesis 765 } else {
vtools_genesis 766 bucket->data = NULL;
vtools_genesis 767 }
vtools_genesis 768 }
vtools_genesis 769
vtools_genesis 770 return data;
vtools_genesis 771 }
vtools_genesis 772
vtools_genesis 773 /* Scan the bucket overflow. */
vtools_genesis 774 for (cursor = bucket; cursor->next; cursor = cursor->next) {
vtools_genesis 775 if (entry == cursor->next->data
vtools_genesis 776 || table->comparator(entry, cursor->next->data)) {
vtools_genesis 777 void *data = cursor->next->data;
vtools_genesis 778
vtools_genesis 779 if (delete) {
vtools_genesis 780 struct hash_entry *next = cursor->next;
vtools_genesis 781
vtools_genesis 782 /* Unlink the entry to delete, then save the freed entry for later
vtools_genesis 783 recycling. */
vtools_genesis 784 cursor->next = next->next;
vtools_genesis 785 free_entry(table, next);
vtools_genesis 786 }
vtools_genesis 787
vtools_genesis 788 return data;
vtools_genesis 789 }
vtools_genesis 790 }
vtools_genesis 791
vtools_genesis 792 /* No entry found. */
vtools_genesis 793 return NULL;
vtools_genesis 794 }
vtools_genesis 795
vtools_genesis 796 /* Internal helper, to move entries from |src| to |dst|. Both tables
vtools_genesis 797 must share the same free entry list. If |safe|, only move overflow
vtools_genesis 798 entries, saving bucket heads for later, so that no allocations will
vtools_genesis 799 occur. Return false if the free entry list is exhausted and an
vtools_genesis 800 allocation fails. */
vtools_genesis 801
vtools_genesis 802 static bool
vtools_genesis 803 transfer_entries(Hash_table *dst, Hash_table *src, bool safe) {
vtools_genesis 804 struct hash_entry *bucket;
vtools_genesis 805 struct hash_entry *cursor;
vtools_genesis 806 struct hash_entry *next;
vtools_genesis 807 for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
vtools_genesis 808 if (bucket->data) {
vtools_genesis 809 void *data;
vtools_genesis 810 struct hash_entry *new_bucket;
vtools_genesis 811
vtools_genesis 812 /* Within each bucket, transfer overflow entries first and
vtools_genesis 813 then the bucket head, to minimize memory pressure.
vtools_genesis 814 After all, the only time we might allocate is when
vtools_genesis 815 moving the bucket head, but moving overflow entries
vtools_genesis 816 first may create free entries that can be recycled by
vtools_genesis 817 the time we finally get to the bucket head. */
vtools_genesis 818 for (cursor = bucket->next; cursor; cursor = next) {
vtools_genesis 819 data = cursor->data;
vtools_genesis 820 new_bucket = safe_hasher(dst, data);
vtools_genesis 821
vtools_genesis 822 next = cursor->next;
vtools_genesis 823
vtools_genesis 824 if (new_bucket->data) {
vtools_genesis 825 /* Merely relink an existing entry, when moving
vtools_genesis 826 from a bucket overflow into a bucket
vtools_genesis 827 overflow. */
vtools_genesis 828 cursor->next = new_bucket->next;
vtools_genesis 829 new_bucket->next = cursor;
vtools_genesis 830 } else {
vtools_genesis 831 /* Free an existing entry, when moving from a
vtools_genesis 832 bucket overflow into a bucket header. */
vtools_genesis 833 new_bucket->data = data;
vtools_genesis 834 dst->n_buckets_used++;
vtools_genesis 835 free_entry(dst, cursor);
vtools_genesis 836 }
vtools_genesis 837 }
vtools_genesis 838 /* Now move the bucket head. Be sure that if we fail due
vtools_genesis 839 to allocation failure that the src table is in a
vtools_genesis 840 consistent state. */
vtools_genesis 841 data = bucket->data;
vtools_genesis 842 bucket->next = NULL;
vtools_genesis 843 if (safe)
vtools_genesis 844 continue;
vtools_genesis 845 new_bucket = safe_hasher(dst, data);
vtools_genesis 846
vtools_genesis 847 if (new_bucket->data) {
vtools_genesis 848 /* Allocate or recycle an entry, when moving from a
vtools_genesis 849 bucket header into a bucket overflow. */
vtools_genesis 850 struct hash_entry *new_entry = allocate_entry(dst);
vtools_genesis 851
vtools_genesis 852 if (new_entry == NULL)
vtools_genesis 853 return false;
vtools_genesis 854
vtools_genesis 855 new_entry->data = data;
vtools_genesis 856 new_entry->next = new_bucket->next;
vtools_genesis 857 new_bucket->next = new_entry;
vtools_genesis 858 } else {
vtools_genesis 859 /* Move from one bucket header to another. */
vtools_genesis 860 new_bucket->data = data;
vtools_genesis 861 dst->n_buckets_used++;
vtools_genesis 862 }
vtools_genesis 863 bucket->data = NULL;
vtools_genesis 864 src->n_buckets_used--;
vtools_genesis 865 }
vtools_genesis 866 return true;
vtools_genesis 867 }
vtools_genesis 868
vtools_genesis 869 /* For an already existing hash table, change the number of buckets
vtools_genesis 870 through specifying |candidate|. The contents of the hash table are
vtools_genesis 871 preserved. The new number of buckets is automatically selected so
vtools_genesis 872 as to {\it guarantee} that the table may receive at least
vtools_genesis 873 |candidate| different user entries, including those already in the
vtools_genesis 874 table, before any other growth of the hash table size occurs. If
vtools_genesis 875 |tuning->is_n_buckets| is true, then |candidate| specifies the
vtools_genesis 876 exact number of buckets desired. Return true iff the rehash
vtools_genesis 877 succeeded. */
vtools_genesis 878
vtools_genesis 879 bool
vtools_genesis 880 hash_rehash(Hash_table *table, size_t candidate) {
vtools_genesis 881 Hash_table storage;
vtools_genesis 882 Hash_table *new_table;
vtools_genesis 883 size_t new_size = compute_bucket_size(candidate, table->tuning);
vtools_genesis 884
vtools_genesis 885 if (!new_size)
vtools_genesis 886 return false;
vtools_genesis 887 if (new_size == table->n_buckets)
vtools_genesis 888 return true;
vtools_genesis 889 new_table = &storage;
vtools_genesis 890 new_table->bucket = calloc(new_size, sizeof *new_table->bucket);
vtools_genesis 891 if (new_table->bucket == NULL)
vtools_genesis 892 return false;
vtools_genesis 893 new_table->n_buckets = new_size;
vtools_genesis 894 new_table->bucket_limit = new_table->bucket + new_size;
vtools_genesis 895 new_table->n_buckets_used = 0;
vtools_genesis 896 new_table->n_entries = 0;
vtools_genesis 897 new_table->tuning = table->tuning;
vtools_genesis 898 new_table->hasher = table->hasher;
vtools_genesis 899 new_table->comparator = table->comparator;
vtools_genesis 900 new_table->data_freer = table->data_freer;
vtools_genesis 901
vtools_genesis 902 /* In order for the transfer to successfully complete, we need
vtools_genesis 903 additional overflow entries when distinct buckets in the old
vtools_genesis 904 table collide into a common bucket in the new table. The worst
vtools_genesis 905 case possible is a hasher that gives a good spread with the old
vtools_genesis 906 size, but returns a constant with the new size; if we were to
vtools_genesis 907 guarantee |table->n_buckets_used-1| free entries in advance, then
vtools_genesis 908 the transfer would be guaranteed to not allocate memory.
vtools_genesis 909 However, for large tables, a guarantee of no further allocation
vtools_genesis 910 introduces a lot of extra memory pressure, all for an unlikely
vtools_genesis 911 corner case (most rehashes reduce, rather than increase, the
vtools_genesis 912 number of overflow entries needed). So, we instead ensure that
vtools_genesis 913 the transfer process can be reversed if we hit a memory
vtools_genesis 914 allocation failure mid-transfer. */
vtools_genesis 915
vtools_genesis 916 /* Merely reuse the extra old space into the new table. */
vtools_genesis 917 #if USE_OBSTACK
vtools_genesis 918 new_table->entry_stack = table->entry_stack;
vtools_genesis 919 #endif
vtools_genesis 920 new_table->free_entry_list = table->free_entry_list;
vtools_genesis 921
vtools_genesis 922 if (transfer_entries(new_table, table, false)) {
vtools_genesis 923 /* Entries transferred successfully; tie up the loose ends. */
vtools_genesis 924 free(table->bucket);
vtools_genesis 925 table->bucket = new_table->bucket;
vtools_genesis 926 table->bucket_limit = new_table->bucket_limit;
vtools_genesis 927 table->n_buckets = new_table->n_buckets;
vtools_genesis 928 table->n_buckets_used = new_table->n_buckets_used;
vtools_genesis 929 table->free_entry_list = new_table->free_entry_list;
vtools_genesis 930 /* |table->n_entries| and |table->entry_stack| already hold their value. */
vtools_genesis 931 return true;
vtools_genesis 932 }
vtools_genesis 933
vtools_genesis 934 /* We've allocated |new_table->bucket| (and possibly some
vtools_genesis 935 entries), exhausted the free list, and moved some but not all
vtools_genesis 936 entries into |new_table|. We must undo the partial move before
vtools_genesis 937 returning failure. The only way to get into this situation is
vtools_genesis 938 if |new_table| uses fewer buckets than the old table, so we
vtools_genesis 939 will reclaim some free entries as overflows in the new table
vtools_genesis 940 are put back into distinct buckets in the old table.
vtools_genesis 941
vtools_genesis 942 There are some pathological cases where a single pass through
vtools_genesis 943 the table requires more intermediate overflow entries than
vtools_genesis 944 using two passes. Two passes give worse cache performance and
vtools_genesis 945 takes longer, but at this point, we're already out of memory,
vtools_genesis 946 so slow and safe is better than failure. */
vtools_genesis 947 table->free_entry_list = new_table->free_entry_list;
vtools_genesis 948 if (!(transfer_entries(table, new_table, true)
vtools_genesis 949 && transfer_entries(table, new_table, false)))
vtools_genesis 950 abort();
vtools_genesis 951 /* |table->n_entries| already holds its value. */
vtools_genesis 952 free(new_table->bucket);
vtools_genesis 953 return false;
vtools_genesis 954 }
vtools_genesis 955
vtools_genesis 956 /* Insert |entry| into hash |table| if there is not already a matching
vtools_genesis 957 entry.
vtools_genesis 958
vtools_genesis 959 Return -1 upon memory allocation failure.
vtools_genesis 960 Return 1 if insertion succeeded.
vtools_genesis 961 Return 0 if there is already a matching entry in the table, and in
vtools_genesis 962 that case, if |matched_ent| is non-|NULL|, set |*matched_ent| to
vtools_genesis 963 that entry.
vtools_genesis 964
vtools_genesis 965 This interface is easier to use than |hash_insert| when you must
vtools_genesis 966 distinguish between the latter two cases. More importantly,
vtools_genesis 967 |hash_insert| is unusable for some types of |entry| values. When
vtools_genesis 968 using |hash_insert|, the only way to distinguish those cases is to
vtools_genesis 969 compare the return value and |entry|. That works only when you can
vtools_genesis 970 have two different |entry| values that point to data that compares
vtools_genesis 971 "equal". Thus, when the |entry| value is a simple scalar, you must
vtools_genesis 972 use |hash_insert_if_absent|. |entry| must not be |NULL|. */
vtools_genesis 973 int
vtools_genesis 974 hash_insert_if_absent(Hash_table *table, void const *entry,
vtools_genesis 975 void const **matched_ent) {
vtools_genesis 976 void *data;
vtools_genesis 977 struct hash_entry *bucket;
vtools_genesis 978
vtools_genesis 979 /* The caller cannot insert a |NULL| entry, since |hash_lookup|
vtools_genesis 980 returns |NULL| to indicate "not found", and |hash_find_entry|
vtools_genesis 981 uses |bucket->data == NULL| to indicate an empty bucket. */
vtools_genesis 982 if (!entry)
vtools_genesis 983 abort();
vtools_genesis 984
vtools_genesis 985 /* If there's a matching entry already in the table, return that. */
vtools_genesis 986 if ((data = hash_find_entry(table, entry, &bucket, false)) != NULL) {
vtools_genesis 987 if (matched_ent)
vtools_genesis 988 *matched_ent = data;
vtools_genesis 989 return 0;
vtools_genesis 990 }
vtools_genesis 991
vtools_genesis 992 /* If the growth threshold of the buckets in use has been reached,
vtools_genesis 993 increase the table size and rehash. There's no point in
vtools_genesis 994 checking the number of entries: if the hashing function is
vtools_genesis 995 ill-conditioned, rehashing is not likely to improve it. */
vtools_genesis 996
vtools_genesis 997 if (table->n_buckets_used
vtools_genesis 998 > table->tuning->growth_threshold * table->n_buckets) {
vtools_genesis 999 /* Check more fully, before starting real work. If tuning
vtools_genesis 1000 arguments became invalid, the second check will rely on
vtools_genesis 1001 proper defaults. */
vtools_genesis 1002 check_tuning(table);
vtools_genesis 1003 if (table->n_buckets_used
vtools_genesis 1004 > table->tuning->growth_threshold * table->n_buckets) {
vtools_genesis 1005 const Hash_tuning *tuning = table->tuning;
vtools_genesis 1006 float candidate =
vtools_genesis 1007 (tuning->is_n_buckets
vtools_genesis 1008 ? (table->n_buckets * tuning->growth_factor)
vtools_genesis 1009 : (table->n_buckets * tuning->growth_factor
vtools_genesis 1010 * tuning->growth_threshold));
vtools_genesis 1011
vtools_genesis 1012 if (SIZE_MAX <= candidate)
vtools_genesis 1013 return -1;
vtools_genesis 1014
vtools_genesis 1015 /* If the rehash fails, arrange to return |NULL|. */
vtools_genesis 1016 if (!hash_rehash(table, candidate))
vtools_genesis 1017 return -1;
vtools_genesis 1018
vtools_genesis 1019 /* Update the bucket we are interested in. */
vtools_genesis 1020 if (hash_find_entry(table, entry, &bucket, false) != NULL)
vtools_genesis 1021 abort();
vtools_genesis 1022 }
vtools_genesis 1023 }
vtools_genesis 1024
vtools_genesis 1025 /* |entry| is not matched, it should be inserted. */
vtools_genesis 1026
vtools_genesis 1027 if (bucket->data) {
vtools_genesis 1028 struct hash_entry *new_entry = allocate_entry(table);
vtools_genesis 1029
vtools_genesis 1030 if (new_entry == NULL)
vtools_genesis 1031 return -1;
vtools_genesis 1032
vtools_genesis 1033 /* Add |entry| in the overflow of the bucket. */
vtools_genesis 1034
vtools_genesis 1035 new_entry->data = (void *) entry;
vtools_genesis 1036 new_entry->next = bucket->next;
vtools_genesis 1037 bucket->next = new_entry;
vtools_genesis 1038 table->n_entries++;
vtools_genesis 1039 return 1;
vtools_genesis 1040 }
vtools_genesis 1041
vtools_genesis 1042 /* Add |entry| right in the bucket head. */
vtools_genesis 1043
vtools_genesis 1044 bucket->data = (void *) entry;
vtools_genesis 1045 table->n_entries++;
vtools_genesis 1046 table->n_buckets_used++;
vtools_genesis 1047
vtools_genesis 1048 return 1;
vtools_genesis 1049 }
vtools_genesis 1050
vtools_genesis 1051 /* If |entry| matches an entry already in the hash table, return the
vtools_genesis 1052 pointer to the entry from the table. Otherwise, insert |entry| and
vtools_genesis 1053 return |entry|. Return |NULL| if the storage required for
vtools_genesis 1054 insertion cannot be allocated. This implementation does not
vtools_genesis 1055 support duplicate entries or insertion of |NULL|. */
vtools_genesis 1056
vtools_genesis 1057 void *
vtools_genesis 1058 hash_insert(Hash_table *table, void const *entry) {
vtools_genesis 1059 void const *matched_ent;
vtools_genesis 1060 int err = hash_insert_if_absent(table, entry, &matched_ent);
vtools_genesis 1061 return (err == -1
vtools_genesis 1062 ? NULL
vtools_genesis 1063 : (void *) (err == 0 ? matched_ent : entry));
vtools_genesis 1064 }
vtools_genesis 1065
vtools_genesis 1066 /* If |entry| is already in the table, remove it and return the
vtools_genesis 1067 just-deleted data (the user may want to deallocate its storage).
vtools_genesis 1068 If |entry| is not in the table, don't modify the table and return
vtools_genesis 1069 |NULL|. */
vtools_genesis 1070
vtools_genesis 1071 void *
vtools_genesis 1072 hash_delete(Hash_table *table, const void *entry) {
vtools_genesis 1073 void *data;
vtools_genesis 1074 struct hash_entry *bucket;
vtools_genesis 1075
vtools_genesis 1076 data = hash_find_entry(table, entry, &bucket, true);
vtools_genesis 1077 if (!data)
vtools_genesis 1078 return NULL;
vtools_genesis 1079
vtools_genesis 1080 table->n_entries--;
vtools_genesis 1081 if (!bucket->data) {
vtools_genesis 1082 table->n_buckets_used--;
vtools_genesis 1083
vtools_genesis 1084 /* If the shrink threshold of the buckets in use has been
vtools_genesis 1085 reached, rehash into a smaller table. */
vtools_genesis 1086
vtools_genesis 1087 if (table->n_buckets_used
vtools_genesis 1088 < table->tuning->shrink_threshold * table->n_buckets) {
vtools_genesis 1089 /* Check more fully, before starting real work. If tuning
vtools_genesis 1090 arguments became invalid, the second check will rely on
vtools_genesis 1091 proper defaults. */
vtools_genesis 1092 check_tuning(table);
vtools_genesis 1093 if (table->n_buckets_used
vtools_genesis 1094 < table->tuning->shrink_threshold * table->n_buckets) {
vtools_genesis 1095 const Hash_tuning *tuning = table->tuning;
vtools_genesis 1096 size_t candidate =
vtools_genesis 1097 (tuning->is_n_buckets
vtools_genesis 1098 ? table->n_buckets * tuning->shrink_factor
vtools_genesis 1099 : (table->n_buckets * tuning->shrink_factor
vtools_genesis 1100 * tuning->growth_threshold));
vtools_genesis 1101
vtools_genesis 1102 if (!hash_rehash(table, candidate)) {
vtools_genesis 1103 /* Failure to allocate memory in an attempt to
vtools_genesis 1104 shrink the table is not fatal. But since
vtools_genesis 1105 memory is low, we can at least be kind and free
vtools_genesis 1106 any spare entries, rather than keeping them
vtools_genesis 1107 tied up in the free entry list. */
vtools_genesis 1108 #if !USE_OBSTACK
vtools_genesis 1109 struct hash_entry *cursor = table->free_entry_list;
vtools_genesis 1110 struct hash_entry *next;
vtools_genesis 1111 while (cursor) {
vtools_genesis 1112 next = cursor->next;
vtools_genesis 1113 free(cursor);
vtools_genesis 1114 cursor = next;
vtools_genesis 1115 }
vtools_genesis 1116 table->free_entry_list = NULL;
vtools_genesis 1117 #endif
vtools_genesis 1118 }
vtools_genesis 1119 }
vtools_genesis 1120 }
vtools_genesis 1121 }
vtools_genesis 1122
vtools_genesis 1123 return data;
vtools_genesis 1124 }
vtools_genesis 1125
vtools_genesis 1126 /* Testing. */
vtools_genesis 1127
vtools_genesis 1128 #if 0
vtools_genesis 1129
vtools_genesis 1130 void
vtools_genesis 1131 hash_print (const Hash_table *table)
vtools_genesis 1132 {
vtools_genesis 1133 struct hash_entry *bucket = (struct hash_entry *) table->bucket;
vtools_genesis 1134
vtools_genesis 1135 for ( ; bucket < table->bucket_limit; bucket++)
vtools_genesis 1136 {
vtools_genesis 1137 struct hash_entry *cursor;
vtools_genesis 1138
vtools_genesis 1139 if (bucket)
vtools_genesis 1140 printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
vtools_genesis 1141
vtools_genesis 1142 for (cursor = bucket; cursor; cursor = cursor->next)
vtools_genesis 1143 {
vtools_genesis 1144 char const *s = cursor->data;
vtools_genesis 1145 /* FIXME */
vtools_genesis 1146 if (s)
vtools_genesis 1147 printf (" %s\n", s);
vtools_genesis 1148 }
vtools_genesis 1149 }
vtools_genesis 1150 }
vtools_genesis 1151
vtools_genesis 1152 #endif