File Coverage

bpc_hashtable.c
Criterion Covered Total %
statement 0 166 0.0
branch 0 104 0.0
condition n/a
subroutine n/a
pod n/a
total 0 270 0.0


line stmt bran cond sub pod time code
1             /*
2             * Routines to provide a memory-efficient hashtable.
3             *
4             * Copyright (C) 2007-2009 Wayne Davison
5             *
6             * Modified for BackupPC to use arbitrary-length binary keys, and supporting
7             * a rudimentary delete feature by Craig Barratt. In 6/2016 rewrote to
8             * make the storage an array of pointers to entries, instead of inplace.
9             * That way entries fetched from the hashtable are still value after a
10             * resize. Still no chaining.
11             *
12             * This program is free software; you can redistribute it and/or modify
13             * it under the terms of the GNU General Public License as published by
14             * the Free Software Foundation; either version 3 of the License, or
15             * (at your option) any later version.
16             *
17             * This program is distributed in the hope that it will be useful,
18             * but WITHOUT ANY WARRANTY; without even the implied warranty of
19             * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20             * GNU General Public License for more details.
21             *
22             * You should have received a copy of the GNU General Public License along
23             * with this program; if not, visit the http://fsf.org website.
24             */
25              
26             #include "backuppc.h"
27              
28             /*
29             * Simple freelist of hash table entries. We maintain a single linked list of
30             * unused entries of each size, indexed by the FREELIST_SIZE2IDX() macro.
31             *
32             * FreeList[0] isn't used,
33             * FreeList[1] is a free list of blocks of size 8,
34             * FreeList[2] is a free list of blocks of size 16, ...
35             *
36             * eg, if you ask for a block of size 9, a block of size 16 will be returned.
37             */
38             static bpc_hashtable_key **FreeList;
39             static uint32 FreeListSz;
40              
41             /*
42             * to map size to the FreeList index we round up to the nearest multiple of 8
43             */
44             #define FREELIST_SIZE2IDX(size) (((size) + 7) / 8)
45             #define FREELIST_IDX2SIZE(idx) ((idx) * 8)
46             /*
47             * how many new blocks to allocate when the free list is empty
48             */
49             #define FREELIST_ALLOC_CNT (512)
50              
51             /*
52             * allocate a single block of a given size by grabbing one off the FreeList
53             */
54 0           static bpc_hashtable_key *bpc_hashtable_entryAlloc(uint32 size)
55             {
56 0           uint32 freeListIdx = FREELIST_SIZE2IDX(size);
57             bpc_hashtable_key *key;
58              
59 0           size = FREELIST_IDX2SIZE(freeListIdx);
60 0 0         if ( freeListIdx >= FreeListSz ) {
61             /*
62             * need a bigger array of freelists
63             */
64 0 0         if ( !(FreeList = (bpc_hashtable_key**)realloc(FreeList, 2 * freeListIdx * sizeof(bpc_hashtable_key*))) ) {
65 0           bpc_logErrf("bpc_hashtable_entryAlloc: out of memory\n");
66 0           return NULL;
67             }
68 0           memset(FreeList + FreeListSz, 0, (2 * freeListIdx - FreeListSz) * sizeof(bpc_hashtable_key*));
69 0           FreeListSz = 2 * freeListIdx;
70             }
71 0 0         if ( !FreeList[freeListIdx] ) {
72             char *newBuf;
73             uint32 i;
74             /*
75             * need to populate the freelist with more blocks
76             */
77 0 0         if ( !(newBuf = (char*)malloc(size * FREELIST_ALLOC_CNT)) ) {
78 0           bpc_logErrf("bpc_hashtable_entryAlloc: out of memory\n");
79 0           return NULL;
80             }
81 0           FreeList[freeListIdx] = (bpc_hashtable_key*)newBuf;
82             /*
83             * chain all the buffers together in a linked list
84             */
85 0           key = (bpc_hashtable_key*)newBuf;
86 0 0         for ( i = 0 ; i < FREELIST_ALLOC_CNT - 1 ; i++ ) {
87 0           key->key = (void*)key + size;
88 0           key = key->key;
89             }
90 0           key->key = NULL;
91             }
92 0           key = FreeList[freeListIdx];
93 0           FreeList[freeListIdx] = key->key;
94 0           memset(key, 0, size);
95 0           return key;
96             }
97              
98             /*
99             * free a block of a given size by putting it back on the FreeList
100             */
101 0           static void bpc_hashtable_entryFree(bpc_hashtable_key *key, uint32 size)
102             {
103 0           uint32 freeListIdx = FREELIST_SIZE2IDX(size);
104              
105 0           key->key = FreeList[freeListIdx];
106 0           FreeList[freeListIdx] = key;
107 0           }
108              
109             #define HASH_LOAD_LIMIT(size) ((size)*3/4)
110              
111             /*
112             * This implements a very simple linear hash table (no chaining etc).
113             *
114             * It has rudimentary support for delete, by flagging the deleted node. It doesn't
115             * shift other nodes on delete, but can re-use a deleted node on insert.
116             */
117              
118             /*
119             * Create a hash table of the initial given size, with entries of size nodeSize
120             */
121 0           void bpc_hashtable_create(bpc_hashtable *tbl, uint32 size, uint32 nodeSize)
122             {
123             /* Pick a power of 2 that can hold the requested size. */
124 0 0         if ( (size & (size-1)) || size < 16 ) {
    0          
125 0           uint32 req = size;
126 0           size = 16;
127 0 0         while ( size < req ) {
128 0           size *= 2;
129             }
130             }
131 0 0         if ( !(tbl->nodes = calloc(size, sizeof(tbl->nodes[0]))) ) {
132 0           bpc_logErrf("bpc_hashtable_create: out of memory\n");
133 0           return;
134             }
135 0           tbl->size = size;
136 0           tbl->entries = 0;
137 0           tbl->entriesDel = 0;
138 0           tbl->nodeSize = nodeSize;
139              
140 0           return;
141             }
142              
143 0           void bpc_hashtable_destroy(bpc_hashtable *tbl)
144             {
145             uint32 i;
146 0 0         for ( i = 0 ; i < tbl->size ; i++ ) {
147 0 0         if ( tbl->nodes[i] ) {
148 0           bpc_hashtable_entryFree(tbl->nodes[i], tbl->nodeSize);
149             }
150             }
151 0           free(tbl->nodes);
152 0           }
153              
154 0           void bpc_hashtable_erase(bpc_hashtable *tbl)
155             {
156             uint32 i;
157 0 0         for ( i = 0 ; i < tbl->size ; i++ ) {
158 0 0         if ( tbl->nodes[i] ) {
159 0           bpc_hashtable_entryFree(tbl->nodes[i], tbl->nodeSize);
160             }
161             }
162 0           memset(tbl->nodes, 0, tbl->size * sizeof(tbl->nodes[0]));
163 0           tbl->entries = 0;
164 0           tbl->entriesDel = 0;
165 0           }
166              
167             /*
168             * Compute a hash for a given key. Note that it is *not* modulo the table size - the returned
169             * hash is independent of the table size, so we don't have to recompute this hash if we
170             * resize the table. However, the current implementation does recompute the hash when
171             * we resize the hash table :(. Oh well.
172             */
173 0           uint32 bpc_hashtable_hash(uchar *key, uint32 keyLen)
174             {
175             /* Based on Jenkins One-at-a-time hash. */
176             uint32 ndx;
177              
178 0 0         for ( ndx = 0 ; keyLen > 0 ; keyLen-- ) {
179 0           ndx += *key++;
180 0           ndx += (ndx << 10);
181 0           ndx ^= (ndx >> 6);
182             }
183 0           ndx += (ndx << 3);
184 0           ndx ^= (ndx >> 11);
185 0           ndx += (ndx << 15);
186              
187 0           return ndx;
188             }
189              
190             #if 0
191             static void bpc_hashttable_check(bpc_hashtable *tbl, char *str)
192             {
193             bpc_hashtable_key **node = tbl->nodes;
194             uint i, entries = 0, entriesDel = 0;
195              
196             for ( i = 0 ; i < tbl->size ; i++, node++ ) {
197             bpc_hashtable_key *keyInfo = *node;
198             if ( !keyInfo ) {
199             continue;
200             }
201             if ( !keyInfo->key && keyInfo->keyLen == 1 ) {
202             entriesDel++;
203             } else {
204             entries++;
205             }
206             }
207             if ( entries != tbl->entries ) {
208             bpc_logErrf("bpc_hashttable_check: botch at %s on HT (%u,%u): got %u entries vs %u expected\n",
209             str, tbl->size, tbl->nodeSize, entries, tbl->entries);
210             tbl->entries = entries;
211             }
212             if ( entriesDel != tbl->entriesDel ) {
213             bpc_logErrf("bpc_hashttable_check: botch at %s on HT (%u,%u): got %u entriesDel vs %u expected\n",
214             str, tbl->size, tbl->nodeSize, entriesDel, tbl->entriesDel);
215             tbl->entriesDel = entriesDel;
216             }
217             }
218             #endif
219              
220             /*
221             * Ensure the hash table is of size at least newSize
222             */
223 0           void bpc_hashtable_growSize(bpc_hashtable *tbl, uint32 newSize)
224             {
225 0           bpc_hashtable_key **old_nodes = tbl->nodes;
226 0           bpc_hashtable_key **old_node = tbl->nodes;
227 0           uint32 oldSize = tbl->size;
228             uint i, j, ndx;
229              
230             /* Pick a power of 2 that can hold the requested newSize. */
231 0 0         if ( (newSize & (newSize-1)) || newSize < 16 ) {
    0          
232 0           uint32 req = newSize;
233 0           newSize = 16;
234 0 0         while ( newSize < req ) {
235 0           newSize *= 2;
236             }
237             }
238 0 0         if ( tbl->size >= newSize ) return;
239 0 0         if ( !(tbl->nodes = (bpc_hashtable_key**)calloc(newSize, sizeof(tbl->nodes[0]))) ) {
240 0           bpc_logErrf("bpc_hashtable_create: out of memory\n");
241 0           return;
242             }
243 0           tbl->entries = 0;
244 0           tbl->entriesDel = 0;
245 0           tbl->size = newSize;
246              
247 0 0         for ( i = 0 ; i < oldSize ; i++, old_node++ ) {
248 0           bpc_hashtable_key *keyInfo = *old_node;
249              
250             /* empty slot */
251 0 0         if ( !keyInfo ) continue;
252              
253             /* deleted slot: free it and don't reinsert */
254 0 0         if ( !keyInfo->key && keyInfo->keyLen == 1 ) {
    0          
255 0           bpc_hashtable_entryFree(keyInfo, tbl->nodeSize);
256 0           continue;
257             }
258 0           ndx = keyInfo->keyHash & (tbl->size - 1);
259 0 0         for ( j = 0 ; j < tbl->size ; j++, ndx++ ) {
260 0 0         if ( ndx >= tbl->size ) ndx = 0;
261 0 0         if ( tbl->nodes[ndx] ) continue;
262 0           tbl->nodes[ndx] = keyInfo;
263 0           tbl->entries++;
264 0           break;
265             }
266 0 0         if ( j >= tbl->size ) {
267 0           bpc_logErrf("bpc_hashtable_growSize: botch on filling new hashtable (%d,%d)\n", newSize, tbl->entries);
268 0           return;
269             }
270             }
271 0           free(old_nodes);
272             }
273              
274             /*
275             * This returns the node for the indicated key, either newly created or
276             * already existing. Returns NULL if not allocating and not found.
277             */
278 0           void *bpc_hashtable_find(bpc_hashtable *tbl, unsigned char *key, unsigned int keyLen, int allocate_if_missing)
279             {
280 0           bpc_hashtable_key *keyInfo, *keyDeleted = NULL;
281             uint32 i, ndx, keyHash;
282              
283 0 0         if ( allocate_if_missing && tbl->entries + tbl->entriesDel > HASH_LOAD_LIMIT(tbl->size) ) {
    0          
284 0           bpc_hashtable_growSize(tbl, tbl->size * 2);
285             }
286              
287             /* bpc_hashttable_check(tbl, "find"); */
288              
289             /*
290             * If it already exists, return the node. If we're not
291             * allocating, return NULL if the key is not found.
292             */
293 0           ndx = keyHash = bpc_hashtable_hash(key, keyLen);
294 0           ndx &= tbl->size - 1;
295 0 0         for ( i = 0 ; i < tbl->size ; i++ ) {
296 0           keyInfo = tbl->nodes[ndx];
297              
298 0 0         if ( !keyInfo ) {
299             /*
300             * Not found since we hit an empty node (ie: not a deleted one)
301             * If requested, place the new at a prior deleted node, or here
302             */
303 0 0         if ( allocate_if_missing ) {
304 0           tbl->entries++;
305 0 0         if ( keyDeleted ) {
306             /*
307             * we found a prior deleted entry, so use it instead
308             */
309 0           keyInfo = keyDeleted;
310 0           tbl->entriesDel--;
311             } else {
312 0           tbl->nodes[ndx] = keyInfo = bpc_hashtable_entryAlloc(tbl->nodeSize);
313             }
314 0           keyInfo->key = key;
315 0           keyInfo->keyLen = keyLen;
316 0           keyInfo->keyHash = keyHash;
317             /* TODO - check this? */
318 0 0         if ( !key ) {
319 0           bpc_logErrf("bpc_hashtable_find: botch adding NULL key to hT (%d,%d)\n", tbl->size, tbl->nodeSize);
320             }
321 0           return (void*)keyInfo;
322             }
323 0           return (void*)NULL;
324             } else {
325 0 0         if ( !keyInfo->key && keyInfo->keyLen == 1 ) {
    0          
326 0 0         if ( !keyDeleted ) {
327             /*
328             * this is the first deleted slot, which we remember so we can insert a new entry
329             * here if we don't find the desired entry, and allocate_if_missing != 0
330             */
331 0           keyDeleted = keyInfo;
332             }
333 0 0         } else if ( keyInfo->keyHash == keyHash && keyInfo->keyLen == keyLen && !memcmp(key, keyInfo->key, keyLen) ) {
    0          
    0          
334 0           return (void*)keyInfo;
335             }
336             }
337 0           ndx++;
338 0 0         if ( ndx >= tbl->size ) ndx = 0;
339             }
340 0           return (void*)NULL;
341             }
342              
343             /*
344             * Remove a node from the hash table. Node must be a valid node returned by bpc_hashtable_find!
345             * Node gets cleared.
346             */
347 0           void bpc_hashtable_nodeDelete(bpc_hashtable *tbl, void *node)
348             {
349 0           bpc_hashtable_key *keyInfo = (bpc_hashtable_key*)node;
350              
351 0           memset(node, 0, tbl->nodeSize);
352             /*
353             * special delete flag (key is NULL, keyLen is 1), so that the linear hash table continues
354             * finding entries past this point.
355             * TODO optimization: if the next entry is empty, then we can make this empty too.
356             */
357 0           keyInfo->keyLen = 1;
358 0           tbl->entries--;
359 0           tbl->entriesDel++;
360              
361             /* bpc_hashttable_check(tbl, "delete"); */
362 0           }
363              
364             /*
365             * Iterate over all the entries in the hash table, calling a callback for each valid entry
366             *
367             * Note: this function won't work if the callback adds new entries to the hash table while
368             * iterating over the entries. You can update or delete entries, but adding an entry might
369             * cause the * hash table size to be bumped, which breaks the indexing. So don't add new
370             * entries while iterating over the table.
371             */
372 0           void bpc_hashtable_iterate(bpc_hashtable *tbl, void (*callback)(void*, void*), void *arg1)
373             {
374 0           uint i, entries = 0, entriesDel = 0;
375              
376             /* bpc_hashttable_check(tbl, "iterate"); */
377              
378 0 0         for ( i = 0 ; i < tbl->size ; i++ ) {
379 0           bpc_hashtable_key *keyInfo = tbl->nodes[i];
380              
381 0 0         if ( !keyInfo ) continue;
382 0 0         if ( !keyInfo->key ) {
383 0 0         if ( keyInfo->keyLen == 1 ) entriesDel++;
384 0           continue;
385             }
386 0           (*callback)((void*)keyInfo, arg1);
387 0 0         if ( !keyInfo->key ) {
388 0 0         if ( keyInfo->keyLen == 1 ) entriesDel++;
389 0           continue;
390             } else {
391 0           entries++;
392             }
393             }
394 0 0         if ( entries != tbl->entries ) {
395 0           bpc_logErrf("bpc_hashtable_iterate: botch on HT (%u,%u): got %u entries vs %u expected\n",
396             tbl->size, tbl->nodeSize, entries, tbl->entries);
397 0           tbl->entries = entries;
398             }
399 0 0         if ( entriesDel != tbl->entriesDel ) {
400 0           bpc_logErrf("bpc_hashtable_iterate: botch on HT (%u,%u): got %u entriesDel vs %u expected\n",
401             tbl->size, tbl->nodeSize, entriesDel, tbl->entriesDel);
402 0           tbl->entriesDel = entriesDel;
403             }
404 0           }
405              
406             /*
407             * An alternative way to iterate over all the hash table entries. Initially index should
408             * be zero, and is updated on each call. A pointer to each entry is returned. After
409             * the last entry, NULL is returned, and idx is set back to zero.
410             *
411             * Note: this function won't work if you add new entries to the hash table while iterating
412             * over the entries. You can update or delete entries, but adding an entry might cause the
413             * hash table size to be bumped, which breaks the indexing. So don't add new entries while
414             * iterating over the table.
415             */
416 0           void *bpc_hashtable_nextEntry(bpc_hashtable *tbl, uint *idx)
417             {
418 0           uint i = *idx;
419              
420             /* bpc_hashttable_check(tbl, "next entry"); */
421              
422 0 0         for ( ; i < (uint)tbl->size ; i++ ) {
423 0           bpc_hashtable_key *keyInfo = tbl->nodes[i];
424 0 0         if ( !keyInfo || !keyInfo->key ) continue;
    0          
425 0           *idx = i + 1;
426 0           return (void*)keyInfo;
427             }
428 0           *idx = 0;
429 0           return NULL;
430             }
431              
432             /*
433             * Return the number of entries in the hash table
434             */
435 0           int bpc_hashtable_entryCount(bpc_hashtable *tbl)
436             {
437             /* bpc_hashttable_check(tbl, "entryCount"); */
438 0           return tbl->entries;
439             }