1*1f5207b7SJohn Levon /* Copyright (C) 2004 Christopher Clark <firstname.lastname@cl.cam.ac.uk> */
2*1f5207b7SJohn Levon 
3*1f5207b7SJohn Levon #include "hashtable.h"
4*1f5207b7SJohn Levon #include "hashtable_private.h"
5*1f5207b7SJohn Levon #include <stdlib.h>
6*1f5207b7SJohn Levon #include <stdio.h>
7*1f5207b7SJohn Levon #include <string.h>
8*1f5207b7SJohn Levon #include <math.h>
9*1f5207b7SJohn Levon 
10*1f5207b7SJohn Levon /*
11*1f5207b7SJohn Levon Credit for primes table: Aaron Krowne
12*1f5207b7SJohn Levon  http://br.endernet.org/~akrowne/
13*1f5207b7SJohn Levon  http://planetmath.org/encyclopedia/GoodHashTablePrimes.html
14*1f5207b7SJohn Levon */
15*1f5207b7SJohn Levon static const unsigned int primes[] = {
16*1f5207b7SJohn Levon 53, 97, 193, 389,
17*1f5207b7SJohn Levon 769, 1543, 3079, 6151,
18*1f5207b7SJohn Levon 12289, 24593, 49157, 98317,
19*1f5207b7SJohn Levon 196613, 393241, 786433, 1572869,
20*1f5207b7SJohn Levon 3145739, 6291469, 12582917, 25165843,
21*1f5207b7SJohn Levon 50331653, 100663319, 201326611, 402653189,
22*1f5207b7SJohn Levon 805306457, 1610612741
23*1f5207b7SJohn Levon };
24*1f5207b7SJohn Levon const unsigned int prime_table_length = sizeof(primes)/sizeof(primes[0]);
25*1f5207b7SJohn Levon const float max_load_factor = 0.65;
26*1f5207b7SJohn Levon 
27*1f5207b7SJohn Levon /*****************************************************************************/
28*1f5207b7SJohn Levon struct hashtable *
create_hashtable(unsigned int minsize,unsigned int (* hashf)(void *),int (* eqf)(void *,void *))29*1f5207b7SJohn Levon create_hashtable(unsigned int minsize,
30*1f5207b7SJohn Levon                  unsigned int (*hashf) (void*),
31*1f5207b7SJohn Levon                  int (*eqf) (void*,void*))
32*1f5207b7SJohn Levon {
33*1f5207b7SJohn Levon     struct hashtable *h;
34*1f5207b7SJohn Levon     unsigned int pindex, size = primes[0];
35*1f5207b7SJohn Levon     /* Check requested hashtable isn't too large */
36*1f5207b7SJohn Levon     if (minsize > (1u << 30)) return NULL;
37*1f5207b7SJohn Levon     /* Enforce size as prime */
38*1f5207b7SJohn Levon     for (pindex=0; pindex < prime_table_length; pindex++) {
39*1f5207b7SJohn Levon         if (primes[pindex] > minsize) { size = primes[pindex]; break; }
40*1f5207b7SJohn Levon     }
41*1f5207b7SJohn Levon     h = (struct hashtable *)malloc(sizeof(struct hashtable));
42*1f5207b7SJohn Levon     if (NULL == h) return NULL; /*oom*/
43*1f5207b7SJohn Levon     h->table = (struct entry **)malloc(sizeof(struct entry*) * size);
44*1f5207b7SJohn Levon     if (NULL == h->table) { free(h); return NULL; } /*oom*/
45*1f5207b7SJohn Levon     memset(h->table, 0, size * sizeof(struct entry *));
46*1f5207b7SJohn Levon     h->tablelength  = size;
47*1f5207b7SJohn Levon     h->primeindex   = pindex;
48*1f5207b7SJohn Levon     h->entrycount   = 0;
49*1f5207b7SJohn Levon     h->hashfn       = hashf;
50*1f5207b7SJohn Levon     h->eqfn         = eqf;
51*1f5207b7SJohn Levon     h->loadlimit    = (unsigned int) ceil(size * max_load_factor);
52*1f5207b7SJohn Levon     return h;
53*1f5207b7SJohn Levon }
54*1f5207b7SJohn Levon 
55*1f5207b7SJohn Levon /*****************************************************************************/
56*1f5207b7SJohn Levon unsigned int
hash(struct hashtable * h,void * k)57*1f5207b7SJohn Levon hash(struct hashtable *h, void *k)
58*1f5207b7SJohn Levon {
59*1f5207b7SJohn Levon     /* Aim to protect against poor hash functions by adding logic here
60*1f5207b7SJohn Levon      * - logic taken from java 1.4 hashtable source */
61*1f5207b7SJohn Levon     unsigned int i = h->hashfn(k);
62*1f5207b7SJohn Levon     i += ~(i << 9);
63*1f5207b7SJohn Levon     i ^=  ((i >> 14) | (i << 18)); /* >>> */
64*1f5207b7SJohn Levon     i +=  (i << 4);
65*1f5207b7SJohn Levon     i ^=  ((i >> 10) | (i << 22)); /* >>> */
66*1f5207b7SJohn Levon     return i;
67*1f5207b7SJohn Levon }
68*1f5207b7SJohn Levon 
69*1f5207b7SJohn Levon /*****************************************************************************/
70*1f5207b7SJohn Levon static int
hashtable_expand(struct hashtable * h)71*1f5207b7SJohn Levon hashtable_expand(struct hashtable *h)
72*1f5207b7SJohn Levon {
73*1f5207b7SJohn Levon     /* Double the size of the table to accomodate more entries */
74*1f5207b7SJohn Levon     struct entry **newtable;
75*1f5207b7SJohn Levon     struct entry *e;
76*1f5207b7SJohn Levon     struct entry **pE;
77*1f5207b7SJohn Levon     unsigned int newsize, i, index;
78*1f5207b7SJohn Levon     /* Check we're not hitting max capacity */
79*1f5207b7SJohn Levon     if (h->primeindex == (prime_table_length - 1)) return 0;
80*1f5207b7SJohn Levon     newsize = primes[++(h->primeindex)];
81*1f5207b7SJohn Levon 
82*1f5207b7SJohn Levon     newtable = (struct entry **)malloc(sizeof(struct entry*) * newsize);
83*1f5207b7SJohn Levon     if (NULL != newtable)
84*1f5207b7SJohn Levon     {
85*1f5207b7SJohn Levon         memset(newtable, 0, newsize * sizeof(struct entry *));
86*1f5207b7SJohn Levon         /* This algorithm is not 'stable'. ie. it reverses the list
87*1f5207b7SJohn Levon          * when it transfers entries between the tables */
88*1f5207b7SJohn Levon         for (i = 0; i < h->tablelength; i++) {
89*1f5207b7SJohn Levon             while (NULL != (e = h->table[i])) {
90*1f5207b7SJohn Levon                 h->table[i] = e->next;
91*1f5207b7SJohn Levon                 index = indexFor(newsize,e->h);
92*1f5207b7SJohn Levon                 e->next = newtable[index];
93*1f5207b7SJohn Levon                 newtable[index] = e;
94*1f5207b7SJohn Levon             }
95*1f5207b7SJohn Levon         }
96*1f5207b7SJohn Levon         free(h->table);
97*1f5207b7SJohn Levon         h->table = newtable;
98*1f5207b7SJohn Levon     }
99*1f5207b7SJohn Levon     /* Plan B: realloc instead */
100*1f5207b7SJohn Levon     else
101*1f5207b7SJohn Levon     {
102*1f5207b7SJohn Levon         newtable = (struct entry **)
103*1f5207b7SJohn Levon                    realloc(h->table, newsize * sizeof(struct entry *));
104*1f5207b7SJohn Levon         if (NULL == newtable) { (h->primeindex)--; return 0; }
105*1f5207b7SJohn Levon         h->table = newtable;
106*1f5207b7SJohn Levon         memset(newtable[h->tablelength], 0, newsize - h->tablelength);
107*1f5207b7SJohn Levon         for (i = 0; i < h->tablelength; i++) {
108*1f5207b7SJohn Levon             for (pE = &(newtable[i]), e = *pE; e != NULL; e = *pE) {
109*1f5207b7SJohn Levon                 index = indexFor(newsize,e->h);
110*1f5207b7SJohn Levon                 if (index == i)
111*1f5207b7SJohn Levon                 {
112*1f5207b7SJohn Levon                     pE = &(e->next);
113*1f5207b7SJohn Levon                 }
114*1f5207b7SJohn Levon                 else
115*1f5207b7SJohn Levon                 {
116*1f5207b7SJohn Levon                     *pE = e->next;
117*1f5207b7SJohn Levon                     e->next = newtable[index];
118*1f5207b7SJohn Levon                     newtable[index] = e;
119*1f5207b7SJohn Levon                 }
120*1f5207b7SJohn Levon             }
121*1f5207b7SJohn Levon         }
122*1f5207b7SJohn Levon     }
123*1f5207b7SJohn Levon     h->tablelength = newsize;
124*1f5207b7SJohn Levon     h->loadlimit   = (unsigned int) ceil(newsize * max_load_factor);
125*1f5207b7SJohn Levon     return -1;
126*1f5207b7SJohn Levon }
127*1f5207b7SJohn Levon 
128*1f5207b7SJohn Levon /*****************************************************************************/
129*1f5207b7SJohn Levon unsigned int
hashtable_count(struct hashtable * h)130*1f5207b7SJohn Levon hashtable_count(struct hashtable *h)
131*1f5207b7SJohn Levon {
132*1f5207b7SJohn Levon     return h->entrycount;
133*1f5207b7SJohn Levon }
134*1f5207b7SJohn Levon 
135*1f5207b7SJohn Levon /*****************************************************************************/
136*1f5207b7SJohn Levon int
hashtable_insert(struct hashtable * h,void * k,void * v)137*1f5207b7SJohn Levon hashtable_insert(struct hashtable *h, void *k, void *v)
138*1f5207b7SJohn Levon {
139*1f5207b7SJohn Levon     /* This method allows duplicate keys - but they shouldn't be used */
140*1f5207b7SJohn Levon     unsigned int index;
141*1f5207b7SJohn Levon     struct entry *e;
142*1f5207b7SJohn Levon     if (++(h->entrycount) > h->loadlimit)
143*1f5207b7SJohn Levon     {
144*1f5207b7SJohn Levon         /* Ignore the return value. If expand fails, we should
145*1f5207b7SJohn Levon          * still try cramming just this value into the existing table
146*1f5207b7SJohn Levon          * -- we may not have memory for a larger table, but one more
147*1f5207b7SJohn Levon          * element may be ok. Next time we insert, we'll try expanding again.*/
148*1f5207b7SJohn Levon         hashtable_expand(h);
149*1f5207b7SJohn Levon     }
150*1f5207b7SJohn Levon     e = (struct entry *)malloc(sizeof(struct entry));
151*1f5207b7SJohn Levon     if (NULL == e) { --(h->entrycount); return 0; } /*oom*/
152*1f5207b7SJohn Levon     e->h = hash(h,k);
153*1f5207b7SJohn Levon     index = indexFor(h->tablelength,e->h);
154*1f5207b7SJohn Levon     e->k = k;
155*1f5207b7SJohn Levon     e->v = v;
156*1f5207b7SJohn Levon     e->next = h->table[index];
157*1f5207b7SJohn Levon     h->table[index] = e;
158*1f5207b7SJohn Levon     return -1;
159*1f5207b7SJohn Levon }
160*1f5207b7SJohn Levon 
161*1f5207b7SJohn Levon /*****************************************************************************/
162*1f5207b7SJohn Levon void * /* returns value associated with key */
hashtable_search(struct hashtable * h,void * k)163*1f5207b7SJohn Levon hashtable_search(struct hashtable *h, void *k)
164*1f5207b7SJohn Levon {
165*1f5207b7SJohn Levon     struct entry *e;
166*1f5207b7SJohn Levon     unsigned int hashvalue, index;
167*1f5207b7SJohn Levon     hashvalue = hash(h,k);
168*1f5207b7SJohn Levon     index = indexFor(h->tablelength,hashvalue);
169*1f5207b7SJohn Levon     e = h->table[index];
170*1f5207b7SJohn Levon     while (NULL != e)
171*1f5207b7SJohn Levon     {
172*1f5207b7SJohn Levon         /* Check hash value to short circuit heavier comparison */
173*1f5207b7SJohn Levon         if ((hashvalue == e->h) && (h->eqfn(k, e->k))) return e->v;
174*1f5207b7SJohn Levon         e = e->next;
175*1f5207b7SJohn Levon     }
176*1f5207b7SJohn Levon     return NULL;
177*1f5207b7SJohn Levon }
178*1f5207b7SJohn Levon 
179*1f5207b7SJohn Levon /*****************************************************************************/
180*1f5207b7SJohn Levon void * /* returns value associated with key */
hashtable_remove(struct hashtable * h,void * k)181*1f5207b7SJohn Levon hashtable_remove(struct hashtable *h, void *k)
182*1f5207b7SJohn Levon {
183*1f5207b7SJohn Levon     /* TODO: consider compacting the table when the load factor drops enough,
184*1f5207b7SJohn Levon      *       or provide a 'compact' method. */
185*1f5207b7SJohn Levon 
186*1f5207b7SJohn Levon     struct entry *e;
187*1f5207b7SJohn Levon     struct entry **pE;
188*1f5207b7SJohn Levon     void *v;
189*1f5207b7SJohn Levon     unsigned int hashvalue, index;
190*1f5207b7SJohn Levon 
191*1f5207b7SJohn Levon     hashvalue = hash(h,k);
192*1f5207b7SJohn Levon     index = indexFor(h->tablelength,hash(h,k));
193*1f5207b7SJohn Levon     pE = &(h->table[index]);
194*1f5207b7SJohn Levon     e = *pE;
195*1f5207b7SJohn Levon     while (NULL != e)
196*1f5207b7SJohn Levon     {
197*1f5207b7SJohn Levon         /* Check hash value to short circuit heavier comparison */
198*1f5207b7SJohn Levon         if ((hashvalue == e->h) && (h->eqfn(k, e->k)))
199*1f5207b7SJohn Levon         {
200*1f5207b7SJohn Levon             *pE = e->next;
201*1f5207b7SJohn Levon             h->entrycount--;
202*1f5207b7SJohn Levon             v = e->v;
203*1f5207b7SJohn Levon             freekey(e->k);
204*1f5207b7SJohn Levon             free(e);
205*1f5207b7SJohn Levon             return v;
206*1f5207b7SJohn Levon         }
207*1f5207b7SJohn Levon         pE = &(e->next);
208*1f5207b7SJohn Levon         e = e->next;
209*1f5207b7SJohn Levon     }
210*1f5207b7SJohn Levon     return NULL;
211*1f5207b7SJohn Levon }
212*1f5207b7SJohn Levon 
213*1f5207b7SJohn Levon /*****************************************************************************/
214*1f5207b7SJohn Levon /* destroy */
215*1f5207b7SJohn Levon void
hashtable_destroy(struct hashtable * h,int free_values)216*1f5207b7SJohn Levon hashtable_destroy(struct hashtable *h, int free_values)
217*1f5207b7SJohn Levon {
218*1f5207b7SJohn Levon     unsigned int i;
219*1f5207b7SJohn Levon     struct entry *e, *f;
220*1f5207b7SJohn Levon     struct entry **table = h->table;
221*1f5207b7SJohn Levon     if (free_values)
222*1f5207b7SJohn Levon     {
223*1f5207b7SJohn Levon         for (i = 0; i < h->tablelength; i++)
224*1f5207b7SJohn Levon         {
225*1f5207b7SJohn Levon             e = table[i];
226*1f5207b7SJohn Levon             while (NULL != e)
227*1f5207b7SJohn Levon             { f = e; e = e->next; freekey(f->k); free(f->v); free(f); }
228*1f5207b7SJohn Levon         }
229*1f5207b7SJohn Levon     }
230*1f5207b7SJohn Levon     else
231*1f5207b7SJohn Levon     {
232*1f5207b7SJohn Levon         for (i = 0; i < h->tablelength; i++)
233*1f5207b7SJohn Levon         {
234*1f5207b7SJohn Levon             e = table[i];
235*1f5207b7SJohn Levon             while (NULL != e)
236*1f5207b7SJohn Levon             { f = e; e = e->next; freekey(f->k); free(f); }
237*1f5207b7SJohn Levon         }
238*1f5207b7SJohn Levon     }
239*1f5207b7SJohn Levon     free(h->table);
240*1f5207b7SJohn Levon     free(h);
241*1f5207b7SJohn Levon }
242*1f5207b7SJohn Levon 
243*1f5207b7SJohn Levon /*
244*1f5207b7SJohn Levon  * Copyright (c) 2002, Christopher Clark
245*1f5207b7SJohn Levon  * All rights reserved.
246*1f5207b7SJohn Levon  *
247*1f5207b7SJohn Levon  * Redistribution and use in source and binary forms, with or without
248*1f5207b7SJohn Levon  * modification, are permitted provided that the following conditions
249*1f5207b7SJohn Levon  * are met:
250*1f5207b7SJohn Levon  *
251*1f5207b7SJohn Levon  * * Redistributions of source code must retain the above copyright
252*1f5207b7SJohn Levon  * notice, this list of conditions and the following disclaimer.
253*1f5207b7SJohn Levon  *
254*1f5207b7SJohn Levon  * * Redistributions in binary form must reproduce the above copyright
255*1f5207b7SJohn Levon  * notice, this list of conditions and the following disclaimer in the
256*1f5207b7SJohn Levon  * documentation and/or other materials provided with the distribution.
257*1f5207b7SJohn Levon  *
258*1f5207b7SJohn Levon  * * Neither the name of the original author; nor the names of any contributors
259*1f5207b7SJohn Levon  * may be used to endorse or promote products derived from this software
260*1f5207b7SJohn Levon  * without specific prior written permission.
261*1f5207b7SJohn Levon  *
262*1f5207b7SJohn Levon  *
263*1f5207b7SJohn Levon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
264*1f5207b7SJohn Levon  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
265*1f5207b7SJohn Levon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
266*1f5207b7SJohn Levon  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
267*1f5207b7SJohn Levon  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
268*1f5207b7SJohn Levon  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
269*1f5207b7SJohn Levon  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
270*1f5207b7SJohn Levon  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
271*1f5207b7SJohn Levon  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
272*1f5207b7SJohn Levon  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
273*1f5207b7SJohn Levon  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
274*1f5207b7SJohn Levon */
275