xref: /illumos-gate/usr/src/cmd/mandoc/compat_ohash.c (revision 4d131170)
1*4d131170SRobert Mustacchi /* $Id: compat_ohash.c,v 1.7 2020/06/15 01:37:15 schwarze Exp $ */
2371584c2SYuri Pankov /* $OpenBSD: ohash.c,v 1.1 2014/06/02 18:52:03 deraadt Exp $ */
3371584c2SYuri Pankov 
4371584c2SYuri Pankov /* Copyright (c) 1999, 2004 Marc Espie <espie@openbsd.org>
5371584c2SYuri Pankov  *
6371584c2SYuri Pankov  * Permission to use, copy, modify, and distribute this software for any
7371584c2SYuri Pankov  * purpose with or without fee is hereby granted, provided that the above
8371584c2SYuri Pankov  * copyright notice and this permission notice appear in all copies.
9371584c2SYuri Pankov  *
10371584c2SYuri Pankov  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11371584c2SYuri Pankov  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12371584c2SYuri Pankov  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13371584c2SYuri Pankov  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14371584c2SYuri Pankov  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15371584c2SYuri Pankov  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16371584c2SYuri Pankov  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17371584c2SYuri Pankov  */
18*4d131170SRobert Mustacchi #include "config.h"
19371584c2SYuri Pankov 
20371584c2SYuri Pankov #include <sys/types.h>
21371584c2SYuri Pankov #include <stddef.h>
22371584c2SYuri Pankov #include <stdint.h>
23371584c2SYuri Pankov #include <stdlib.h>
24371584c2SYuri Pankov #include <string.h>
25371584c2SYuri Pankov #include <limits.h>
26371584c2SYuri Pankov #include "compat_ohash.h"
27371584c2SYuri Pankov 
28371584c2SYuri Pankov struct _ohash_record {
29371584c2SYuri Pankov 	uint32_t	hv;
30371584c2SYuri Pankov 	const char	*p;
31371584c2SYuri Pankov };
32371584c2SYuri Pankov 
33371584c2SYuri Pankov #define DELETED		((const char *)h)
34371584c2SYuri Pankov #define NONE		(h->size)
35371584c2SYuri Pankov 
36371584c2SYuri Pankov /* Don't bother changing the hash table if the change is small enough.  */
37371584c2SYuri Pankov #define MINSIZE		(1UL << 4)
38371584c2SYuri Pankov #define MINDELETED	4
39371584c2SYuri Pankov 
40371584c2SYuri Pankov static void ohash_resize(struct ohash *);
41371584c2SYuri Pankov 
42371584c2SYuri Pankov 
43371584c2SYuri Pankov /* This handles the common case of variable length keys, where the
44371584c2SYuri Pankov  * key is stored at the end of the record.
45371584c2SYuri Pankov  */
46371584c2SYuri Pankov void *
ohash_create_entry(struct ohash_info * i,const char * start,const char ** end)47371584c2SYuri Pankov ohash_create_entry(struct ohash_info *i, const char *start, const char **end)
48371584c2SYuri Pankov {
49371584c2SYuri Pankov 	char *p;
50371584c2SYuri Pankov 
51371584c2SYuri Pankov 	if (!*end)
52371584c2SYuri Pankov 		*end = start + strlen(start);
53371584c2SYuri Pankov 	p = (i->alloc)(i->key_offset + (*end - start) + 1, i->data);
54371584c2SYuri Pankov 	if (p) {
55371584c2SYuri Pankov 		memcpy(p+i->key_offset, start, *end-start);
56371584c2SYuri Pankov 		p[i->key_offset + (*end - start)] = '\0';
57371584c2SYuri Pankov 	}
58371584c2SYuri Pankov 	return (void *)p;
59371584c2SYuri Pankov }
60371584c2SYuri Pankov 
61371584c2SYuri Pankov /* hash_delete only frees the hash structure. Use hash_first/hash_next
62371584c2SYuri Pankov  * to free entries as well.  */
63371584c2SYuri Pankov void
ohash_delete(struct ohash * h)64371584c2SYuri Pankov ohash_delete(struct ohash *h)
65371584c2SYuri Pankov {
66371584c2SYuri Pankov 	(h->info.free)(h->t, h->info.data);
67371584c2SYuri Pankov #ifndef NDEBUG
68371584c2SYuri Pankov 	h->t = NULL;
69371584c2SYuri Pankov #endif
70371584c2SYuri Pankov }
71371584c2SYuri Pankov 
72371584c2SYuri Pankov static void
ohash_resize(struct ohash * h)73371584c2SYuri Pankov ohash_resize(struct ohash *h)
74371584c2SYuri Pankov {
75371584c2SYuri Pankov 	struct _ohash_record *n;
76371584c2SYuri Pankov 	size_t ns;
77371584c2SYuri Pankov 	unsigned int	j;
78371584c2SYuri Pankov 	unsigned int	i, incr;
79371584c2SYuri Pankov 
80371584c2SYuri Pankov 	if (4 * h->deleted < h->total) {
81371584c2SYuri Pankov 		if (h->size >= (UINT_MAX >> 1U))
82371584c2SYuri Pankov 			ns = UINT_MAX;
83371584c2SYuri Pankov 		else
84371584c2SYuri Pankov 			ns = h->size << 1U;
85371584c2SYuri Pankov 	} else if (3 * h->deleted > 2 * h->total)
86371584c2SYuri Pankov 		ns = h->size >> 1U;
87371584c2SYuri Pankov 	else
88371584c2SYuri Pankov 		ns = h->size;
89371584c2SYuri Pankov 	if (ns < MINSIZE)
90371584c2SYuri Pankov 		ns = MINSIZE;
91371584c2SYuri Pankov #ifdef STATS_HASH
92371584c2SYuri Pankov 	STAT_HASH_EXPAND++;
93371584c2SYuri Pankov 	STAT_HASH_SIZE += ns - h->size;
94371584c2SYuri Pankov #endif
95371584c2SYuri Pankov 
96371584c2SYuri Pankov 	n = (h->info.calloc)(ns, sizeof(struct _ohash_record), h->info.data);
97371584c2SYuri Pankov 	if (!n)
98371584c2SYuri Pankov 		return;
99371584c2SYuri Pankov 
100371584c2SYuri Pankov 	for (j = 0; j < h->size; j++) {
101371584c2SYuri Pankov 		if (h->t[j].p != NULL && h->t[j].p != DELETED) {
102371584c2SYuri Pankov 			i = h->t[j].hv % ns;
103371584c2SYuri Pankov 			incr = ((h->t[j].hv % (ns - 2)) & ~1) + 1;
104371584c2SYuri Pankov 			while (n[i].p != NULL) {
105371584c2SYuri Pankov 				i += incr;
106371584c2SYuri Pankov 				if (i >= ns)
107371584c2SYuri Pankov 					i -= ns;
108371584c2SYuri Pankov 			}
109371584c2SYuri Pankov 			n[i].hv = h->t[j].hv;
110371584c2SYuri Pankov 			n[i].p = h->t[j].p;
111371584c2SYuri Pankov 		}
112371584c2SYuri Pankov 	}
113371584c2SYuri Pankov 	(h->info.free)(h->t, h->info.data);
114371584c2SYuri Pankov 	h->t = n;
115371584c2SYuri Pankov 	h->size = ns;
116371584c2SYuri Pankov 	h->total -= h->deleted;
117371584c2SYuri Pankov 	h->deleted = 0;
118371584c2SYuri Pankov }
119371584c2SYuri Pankov 
120371584c2SYuri Pankov void *
ohash_remove(struct ohash * h,unsigned int i)121371584c2SYuri Pankov ohash_remove(struct ohash *h, unsigned int i)
122371584c2SYuri Pankov {
123371584c2SYuri Pankov 	void		*result = (void *)h->t[i].p;
124371584c2SYuri Pankov 
125371584c2SYuri Pankov 	if (result == NULL || result == DELETED)
126371584c2SYuri Pankov 		return NULL;
127371584c2SYuri Pankov 
128371584c2SYuri Pankov #ifdef STATS_HASH
129371584c2SYuri Pankov 	STAT_HASH_ENTRIES--;
130371584c2SYuri Pankov #endif
131371584c2SYuri Pankov 	h->t[i].p = DELETED;
132371584c2SYuri Pankov 	h->deleted++;
133371584c2SYuri Pankov 	if (h->deleted >= MINDELETED && 4 * h->deleted > h->total)
134371584c2SYuri Pankov 		ohash_resize(h);
135371584c2SYuri Pankov 	return result;
136371584c2SYuri Pankov }
137371584c2SYuri Pankov 
138371584c2SYuri Pankov void *
ohash_find(struct ohash * h,unsigned int i)139371584c2SYuri Pankov ohash_find(struct ohash *h, unsigned int i)
140371584c2SYuri Pankov {
141371584c2SYuri Pankov 	if (h->t[i].p == DELETED)
142371584c2SYuri Pankov 		return NULL;
143371584c2SYuri Pankov 	else
144371584c2SYuri Pankov 		return (void *)h->t[i].p;
145371584c2SYuri Pankov }
146371584c2SYuri Pankov 
147371584c2SYuri Pankov void *
ohash_insert(struct ohash * h,unsigned int i,void * p)148371584c2SYuri Pankov ohash_insert(struct ohash *h, unsigned int i, void *p)
149371584c2SYuri Pankov {
150371584c2SYuri Pankov #ifdef STATS_HASH
151371584c2SYuri Pankov 	STAT_HASH_ENTRIES++;
152371584c2SYuri Pankov #endif
153371584c2SYuri Pankov 	if (h->t[i].p == DELETED) {
154371584c2SYuri Pankov 		h->deleted--;
155371584c2SYuri Pankov 		h->t[i].p = p;
156371584c2SYuri Pankov 	} else {
157371584c2SYuri Pankov 		h->t[i].p = p;
158371584c2SYuri Pankov 		/* Arbitrary resize boundary.  Tweak if not efficient enough.  */
159371584c2SYuri Pankov 		if (++h->total * 4 > h->size * 3)
160371584c2SYuri Pankov 			ohash_resize(h);
161371584c2SYuri Pankov 	}
162371584c2SYuri Pankov 	return p;
163371584c2SYuri Pankov }
164371584c2SYuri Pankov 
165371584c2SYuri Pankov unsigned int
ohash_entries(struct ohash * h)166371584c2SYuri Pankov ohash_entries(struct ohash *h)
167371584c2SYuri Pankov {
168371584c2SYuri Pankov 	return h->total - h->deleted;
169371584c2SYuri Pankov }
170371584c2SYuri Pankov 
171371584c2SYuri Pankov void *
ohash_first(struct ohash * h,unsigned int * pos)172371584c2SYuri Pankov ohash_first(struct ohash *h, unsigned int *pos)
173371584c2SYuri Pankov {
174371584c2SYuri Pankov 	*pos = 0;
175371584c2SYuri Pankov 	return ohash_next(h, pos);
176371584c2SYuri Pankov }
177371584c2SYuri Pankov 
178371584c2SYuri Pankov void *
ohash_next(struct ohash * h,unsigned int * pos)179371584c2SYuri Pankov ohash_next(struct ohash *h, unsigned int *pos)
180371584c2SYuri Pankov {
181371584c2SYuri Pankov 	for (; *pos < h->size; (*pos)++)
182371584c2SYuri Pankov 		if (h->t[*pos].p != DELETED && h->t[*pos].p != NULL)
183371584c2SYuri Pankov 			return (void *)h->t[(*pos)++].p;
184371584c2SYuri Pankov 	return NULL;
185371584c2SYuri Pankov }
186371584c2SYuri Pankov 
187371584c2SYuri Pankov void
ohash_init(struct ohash * h,unsigned int size,struct ohash_info * info)188371584c2SYuri Pankov ohash_init(struct ohash *h, unsigned int size, struct ohash_info *info)
189371584c2SYuri Pankov {
190371584c2SYuri Pankov 	h->size = 1UL << size;
191371584c2SYuri Pankov 	if (h->size < MINSIZE)
192371584c2SYuri Pankov 		h->size = MINSIZE;
193371584c2SYuri Pankov #ifdef STATS_HASH
194371584c2SYuri Pankov 	STAT_HASH_CREATION++;
195371584c2SYuri Pankov 	STAT_HASH_SIZE += h->size;
196371584c2SYuri Pankov #endif
197371584c2SYuri Pankov 	/* Copy info so that caller may free it.  */
198371584c2SYuri Pankov 	h->info.key_offset = info->key_offset;
199371584c2SYuri Pankov 	h->info.calloc = info->calloc;
200371584c2SYuri Pankov 	h->info.free = info->free;
201371584c2SYuri Pankov 	h->info.alloc = info->alloc;
202371584c2SYuri Pankov 	h->info.data = info->data;
203371584c2SYuri Pankov 	h->t = (h->info.calloc)(h->size, sizeof(struct _ohash_record),
204371584c2SYuri Pankov 		    h->info.data);
205371584c2SYuri Pankov 	h->total = h->deleted = 0;
206371584c2SYuri Pankov }
207371584c2SYuri Pankov 
208371584c2SYuri Pankov uint32_t
ohash_interval(const char * s,const char ** e)209371584c2SYuri Pankov ohash_interval(const char *s, const char **e)
210371584c2SYuri Pankov {
211371584c2SYuri Pankov 	uint32_t k;
212371584c2SYuri Pankov 
213371584c2SYuri Pankov 	if (!*e)
214371584c2SYuri Pankov 		*e = s + strlen(s);
215371584c2SYuri Pankov 	if (s == *e)
216371584c2SYuri Pankov 		k = 0;
217371584c2SYuri Pankov 	else
218371584c2SYuri Pankov 		k = *s++;
219371584c2SYuri Pankov 	while (s != *e)
220371584c2SYuri Pankov 		k =  ((k << 2) | (k >> 30)) ^ *s++;
221371584c2SYuri Pankov 	return k;
222371584c2SYuri Pankov }
223371584c2SYuri Pankov 
224371584c2SYuri Pankov unsigned int
ohash_lookup_interval(struct ohash * h,const char * start,const char * end,uint32_t hv)225371584c2SYuri Pankov ohash_lookup_interval(struct ohash *h, const char *start, const char *end,
226371584c2SYuri Pankov     uint32_t hv)
227371584c2SYuri Pankov {
228371584c2SYuri Pankov 	unsigned int	i, incr;
229371584c2SYuri Pankov 	unsigned int	empty;
230371584c2SYuri Pankov 
231371584c2SYuri Pankov #ifdef STATS_HASH
232371584c2SYuri Pankov 	STAT_HASH_LOOKUP++;
233371584c2SYuri Pankov #endif
234371584c2SYuri Pankov 	empty = NONE;
235371584c2SYuri Pankov 	i = hv % h->size;
236371584c2SYuri Pankov 	incr = ((hv % (h->size-2)) & ~1) + 1;
237371584c2SYuri Pankov 	while (h->t[i].p != NULL) {
238371584c2SYuri Pankov #ifdef STATS_HASH
239371584c2SYuri Pankov 		STAT_HASH_LENGTH++;
240371584c2SYuri Pankov #endif
241371584c2SYuri Pankov 		if (h->t[i].p == DELETED) {
242371584c2SYuri Pankov 			if (empty == NONE)
243371584c2SYuri Pankov 				empty = i;
244371584c2SYuri Pankov 		} else if (h->t[i].hv == hv &&
245371584c2SYuri Pankov 		    strncmp(h->t[i].p+h->info.key_offset, start,
246371584c2SYuri Pankov 			end - start) == 0 &&
247371584c2SYuri Pankov 		    (h->t[i].p+h->info.key_offset)[end-start] == '\0') {
248371584c2SYuri Pankov 			if (empty != NONE) {
249371584c2SYuri Pankov 				h->t[empty].hv = hv;
250371584c2SYuri Pankov 				h->t[empty].p = h->t[i].p;
251371584c2SYuri Pankov 				h->t[i].p = DELETED;
252371584c2SYuri Pankov 				return empty;
253371584c2SYuri Pankov 			} else {
254371584c2SYuri Pankov #ifdef STATS_HASH
255371584c2SYuri Pankov 				STAT_HASH_POSITIVE++;
256371584c2SYuri Pankov #endif
257371584c2SYuri Pankov 				return i;
258371584c2SYuri Pankov 			}
259371584c2SYuri Pankov 		}
260371584c2SYuri Pankov 		i += incr;
261371584c2SYuri Pankov 		if (i >= h->size)
262371584c2SYuri Pankov 			i -= h->size;
263371584c2SYuri Pankov 	}
264371584c2SYuri Pankov 
265371584c2SYuri Pankov 	/* Found an empty position.  */
266371584c2SYuri Pankov 	if (empty != NONE)
267371584c2SYuri Pankov 		i = empty;
268371584c2SYuri Pankov 	h->t[i].hv = hv;
269371584c2SYuri Pankov 	return i;
270371584c2SYuri Pankov }
271371584c2SYuri Pankov 
272371584c2SYuri Pankov unsigned int
ohash_lookup_memory(struct ohash * h,const char * k,size_t size,uint32_t hv)273371584c2SYuri Pankov ohash_lookup_memory(struct ohash *h, const char *k, size_t size, uint32_t hv)
274371584c2SYuri Pankov {
275371584c2SYuri Pankov 	unsigned int	i, incr;
276371584c2SYuri Pankov 	unsigned int	empty;
277371584c2SYuri Pankov 
278371584c2SYuri Pankov #ifdef STATS_HASH
279371584c2SYuri Pankov 	STAT_HASH_LOOKUP++;
280371584c2SYuri Pankov #endif
281371584c2SYuri Pankov 	empty = NONE;
282371584c2SYuri Pankov 	i = hv % h->size;
283371584c2SYuri Pankov 	incr = ((hv % (h->size-2)) & ~1) + 1;
284371584c2SYuri Pankov 	while (h->t[i].p != NULL) {
285371584c2SYuri Pankov #ifdef STATS_HASH
286371584c2SYuri Pankov 		STAT_HASH_LENGTH++;
287371584c2SYuri Pankov #endif
288371584c2SYuri Pankov 		if (h->t[i].p == DELETED) {
289371584c2SYuri Pankov 			if (empty == NONE)
290371584c2SYuri Pankov 				empty = i;
291371584c2SYuri Pankov 		} else if (h->t[i].hv == hv &&
292371584c2SYuri Pankov 		    memcmp(h->t[i].p+h->info.key_offset, k, size) == 0) {
293371584c2SYuri Pankov 			if (empty != NONE) {
294371584c2SYuri Pankov 				h->t[empty].hv = hv;
295371584c2SYuri Pankov 				h->t[empty].p = h->t[i].p;
296371584c2SYuri Pankov 				h->t[i].p = DELETED;
297371584c2SYuri Pankov 				return empty;
298371584c2SYuri Pankov 			} else {
299371584c2SYuri Pankov #ifdef STATS_HASH
300371584c2SYuri Pankov 				STAT_HASH_POSITIVE++;
301371584c2SYuri Pankov #endif
302371584c2SYuri Pankov 			}	return i;
303371584c2SYuri Pankov 		}
304371584c2SYuri Pankov 		i += incr;
305371584c2SYuri Pankov 		if (i >= h->size)
306371584c2SYuri Pankov 			i -= h->size;
307371584c2SYuri Pankov 	}
308371584c2SYuri Pankov 
309371584c2SYuri Pankov 	/* Found an empty position.  */
310371584c2SYuri Pankov 	if (empty != NONE)
311371584c2SYuri Pankov 		i = empty;
312371584c2SYuri Pankov 	h->t[i].hv = hv;
313371584c2SYuri Pankov 	return i;
314371584c2SYuri Pankov }
315371584c2SYuri Pankov 
316371584c2SYuri Pankov unsigned int
ohash_qlookup(struct ohash * h,const char * s)317371584c2SYuri Pankov ohash_qlookup(struct ohash *h, const char *s)
318371584c2SYuri Pankov {
319371584c2SYuri Pankov 	const char *e = NULL;
320371584c2SYuri Pankov 	return ohash_qlookupi(h, s, &e);
321371584c2SYuri Pankov }
322371584c2SYuri Pankov 
323371584c2SYuri Pankov unsigned int
ohash_qlookupi(struct ohash * h,const char * s,const char ** e)324371584c2SYuri Pankov ohash_qlookupi(struct ohash *h, const char *s, const char **e)
325371584c2SYuri Pankov {
326371584c2SYuri Pankov 	uint32_t hv;
327371584c2SYuri Pankov 
328371584c2SYuri Pankov 	hv = ohash_interval(s, e);
329371584c2SYuri Pankov 	return ohash_lookup_interval(h, s, *e, hv);
330371584c2SYuri Pankov }
331