xref: /illumos-gate/usr/src/lib/krb5/plugins/kdb/db2/libdb2/hash/hash.c (revision 7c64d3750da7fda7e450b8f9b0b963905ded6379)
1 #pragma ident	"%Z%%M%	%I%	%E% SMI"
2 
3 
4 /*-
5  * Copyright (c) 1990, 1993, 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Margo Seltzer.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #if defined(LIBC_SCCS) && !defined(lint)
41 static char sccsid[] = "@(#)hash.c	8.12 (Berkeley) 11/7/95";
42 #endif /* LIBC_SCCS and not lint */
43 
44 #undef _TS_ERRNO_
45 #include <sys/param.h>
46 #include <sys/stat.h>
47 
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <unistd.h>
54 #include <libintl.h>
55 #ifdef DEBUG
56 #include <assert.h>
57 #endif
58 
59 #include "db-int.h"
60 #include "hash.h"
61 #include "page.h"
62 #include "extern.h"
63 
64 static int32_t flush_meta __P((HTAB *));
65 static int32_t hash_access __P((HTAB *, ACTION, const DBT *, DBT *));
66 static int32_t hash_close __P((DB *));
67 static int32_t hash_delete __P((const DB *, const DBT *, u_int32_t));
68 static int32_t hash_fd __P((const DB *));
69 static int32_t hash_get __P((const DB *, const DBT *, DBT *, u_int32_t));
70 static int32_t hash_put __P((const DB *, DBT *, const DBT *, u_int32_t));
71 static int32_t hash_seq __P((const DB *, DBT *, DBT *, u_int32_t));
72 static int32_t hash_sync __P((const DB *, u_int32_t));
73 static int32_t hdestroy __P((HTAB *));
74 static int32_t cursor_get __P((const DB *, CURSOR *, DBT *, DBT *, \
75 	u_int32_t));
76 static int32_t cursor_delete __P((const DB *, CURSOR *, u_int32_t));
77 static HTAB *init_hash __P((HTAB *, const char *, const HASHINFO *));
78 static int32_t init_htab __P((HTAB *, int32_t));
79 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
80 static void swap_header __P((HTAB *));
81 static void swap_header_copy __P((HASHHDR *, HASHHDR *));
82 #endif
83 static u_int32_t hget_header __P((HTAB *, u_int32_t));
84 static void hput_header __P((HTAB *));
85 
86 #define RETURN_ERROR(ERR, LOC)	{ save_errno = ERR; goto LOC; }
87 
88 /* Return values */
89 #define	SUCCESS	 (0)
90 #define	ERROR	(-1)
91 #define	ABNORMAL (1)
92 
93 #ifdef HASH_STATISTICS
94 u_int32_t hash_accesses, hash_collisions, hash_expansions, hash_overflows,
95 	hash_bigpages;
96 #endif
97 
98 /************************** INTERFACE ROUTINES ***************************/
99 /* OPEN/CLOSE */
100 
101 extern DB *
102 __kdb2_hash_open(file, flags, mode, info, dflags)
103 	const char *file;
104 	int32_t flags, mode, dflags;
105 	const HASHINFO *info;	/* Special directives for create */
106 {
107 	struct stat statbuf;
108 	DB *dbp;
109 	DBT mpool_key;
110 	HTAB *hashp;
111 	int32_t bpages, csize, new_table, save_errno, specified_file;
112 
113 	if ((flags & O_ACCMODE) == O_WRONLY) {
114 		errno = EINVAL;
115 		return (NULL);
116 	}
117 	if (!(hashp = (HTAB *)calloc(1, sizeof(HTAB)))) {
118 		errno = ENOMEM;
119 		return (NULL);
120 	}
121 	hashp->fp = -1;
122 
123 	/* set this now, before file goes away... */
124 	specified_file = (file != NULL);
125 	if (!file) {
126 		/*
127 		 * If we are root and thus have access to /var/run, then use
128 		 * it, else tempnam(3) will use /var/tmp.
129 		 */
130 		if (!(file = tempnam("/var/run", NULL))) {
131 			/*
132 			 * No memory here is not the only failure
133 			 * possibility, but probably the most likely.
134 			 */
135 			errno = ENOMEM;
136 			free(hashp);
137 			return(NULL);
138 		}
139 
140 		/* store the file name so that we can unlink it later */
141 		hashp->fname = file;
142 #ifdef DEBUG
143 			fprintf(stderr, dgettext(TEXT_DOMAIN,
144 			"Using file name %s.\n"), file);
145 #endif
146 	}
147 	/*
148 	 * Even if user wants write only, we need to be able to read
149 	 * the actual file, so we need to open it read/write. But, the
150 	 * field in the hashp structure needs to be accurate so that
151 	 * we can check accesses.
152 	 */
153 	hashp->flags = flags;
154 	hashp->save_file = specified_file && (hashp->flags & O_RDWR);
155 
156 	new_table = 0;
157 	if (!file || (flags & O_TRUNC) ||
158 	    (stat(file, &statbuf) && (errno == ENOENT))) {
159 		if (errno == ENOENT)
160 			errno = 0;	/* In case someone looks at errno. */
161 		new_table = 1;
162 	}
163 	if (file) {
164 		if ((hashp->fp = open(file, flags|O_BINARY, mode)) == -1)
165 			RETURN_ERROR(errno, error0);
166 		(void)fcntl(hashp->fp, F_SETFD, 1);
167 	}
168 
169 	/* Process arguments to set up hash table header. */
170 	if (new_table) {
171 		if (!(hashp = init_hash(hashp, file, info)))
172 			RETURN_ERROR(errno, error1);
173 	} else {
174 		/* Table already exists */
175 		if (info && info->hash)
176 			hashp->hash = info->hash;
177 		else
178 			hashp->hash = __default_hash;
179 
180 		/* copy metadata from page into header */
181 		if (hget_header(hashp,
182 		    (info && info->bsize ? info->bsize : DEF_BUCKET_SIZE)) !=
183 		    sizeof(HASHHDR))
184 			RETURN_ERROR(EFTYPE, error1);
185 
186 		/* Verify file type, versions and hash function */
187 		if (hashp->hdr.magic != HASHMAGIC)
188 			RETURN_ERROR(EFTYPE, error1);
189 #define	OLDHASHVERSION	1
190 		if (hashp->hdr.version != HASHVERSION &&
191 		    hashp->hdr.version != OLDHASHVERSION)
192 			RETURN_ERROR(EFTYPE, error1);
193 		if (hashp->hash(CHARKEY, sizeof(CHARKEY))
194 		    != hashp->hdr.h_charkey)
195 			RETURN_ERROR(EFTYPE, error1);
196 		/*
197 		 * Figure out how many segments we need.  Max_Bucket is the
198 		 * maximum bucket number, so the number of buckets is
199 		 * max_bucket + 1.
200 		 */
201 
202 		/* Read in bitmaps */
203 		bpages = (hashp->hdr.spares[hashp->hdr.ovfl_point] +
204 		    (hashp->hdr.bsize << BYTE_SHIFT) - 1) >>
205 		    (hashp->hdr.bshift + BYTE_SHIFT);
206 
207 		hashp->nmaps = bpages;
208 		(void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_int32_t *));
209 	}
210 
211 	/* start up mpool */
212 	mpool_key.data = (u_int8_t *)file;
213 	mpool_key.size = strlen(file);
214 
215 	if (info && info->cachesize)
216 		csize = info->cachesize / hashp->hdr.bsize;
217 	else
218 		csize = DEF_CACHESIZE / hashp->hdr.bsize;
219 	hashp->mp = mpool_open(&mpool_key, hashp->fp, hashp->hdr.bsize, csize);
220 
221 	if (!hashp->mp)
222 		RETURN_ERROR(errno, error1);
223 	mpool_filter(hashp->mp, __pgin_routine, __pgout_routine, hashp);
224 
225 	/*
226 	 * For a new table, set up the bitmaps.
227 	 */
228 	if (new_table &&
229 	   init_htab(hashp, info && info->nelem ? info->nelem : 1))
230 		goto error2;
231 
232 	/* initialize the cursor queue */
233 	TAILQ_INIT(&hashp->curs_queue);
234 	hashp->seq_cursor = NULL;
235 
236 
237 	/* get a chunk of memory for our split buffer */
238 	hashp->split_buf = (PAGE16 *)malloc(hashp->hdr.bsize);
239 	if (!hashp->split_buf)
240 		goto error2;
241 
242 	hashp->new_file = new_table;
243 
244 	if (!(dbp = (DB *)malloc(sizeof(DB))))
245 		goto error2;
246 
247 	dbp->internal = hashp;
248 	dbp->close = hash_close;
249 	dbp->del = hash_delete;
250 	dbp->fd = hash_fd;
251 	dbp->get = hash_get;
252 	dbp->put = hash_put;
253 	dbp->seq = hash_seq;
254 	dbp->sync = hash_sync;
255 	dbp->type = DB_HASH;
256 
257 #ifdef DEBUG
258 	(void)fprintf(stderr,
259 	    "%s\n%s%lx\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
260 	    "init_htab:",
261 	    "TABLE POINTER   ", (void *)hashp,
262 	    "BUCKET SIZE     ", hashp->hdr.bsize,
263 	    "BUCKET SHIFT    ", hashp->hdr.bshift,
264 	    "FILL FACTOR     ", hashp->hdr.ffactor,
265 	    "MAX BUCKET      ", hashp->hdr.max_bucket,
266 	    "OVFL POINT      ", hashp->hdr.ovfl_point,
267 	    "LAST FREED      ", hashp->hdr.last_freed,
268 	    "HIGH MASK       ", hashp->hdr.high_mask,
269 	    "LOW  MASK       ", hashp->hdr.low_mask,
270 	    "NKEYS           ", hashp->hdr.nkeys);
271 #endif
272 #ifdef HASH_STATISTICS
273 	hash_overflows = hash_accesses = hash_collisions = hash_expansions = 0;
274 	hash_bigpages = 0;
275 #endif
276 	return (dbp);
277 
278 error2:
279 	hdestroy(hashp);
280 	errno = save_errno;
281 	return (NULL);
282 
283 error1:
284 	if (hashp != NULL)
285 		(void)close(hashp->fp);
286 
287 error0:
288 	if (!specified_file)
289 		free((void*)(hashp->fname)); /* SUNW14resync */
290 	free(hashp);
291 	errno = save_errno;
292 	return (NULL);
293 }
294 
295 static int32_t
296 hash_close(dbp)
297 	DB *dbp;
298 {
299 	HTAB *hashp;
300 	int32_t retval;
301 
302 	if (!dbp)
303 		return (ERROR);
304 
305 	hashp = (HTAB *)dbp->internal;
306 	retval = hdestroy(hashp);
307 	free(dbp);
308 	return (retval);
309 }
310 
311 static int32_t
312 hash_fd(dbp)
313 	const DB *dbp;
314 {
315 	HTAB *hashp;
316 
317 	if (!dbp)
318 		return (ERROR);
319 
320 	hashp = (HTAB *)dbp->internal;
321 	if (hashp->fp == -1) {
322 		errno = ENOENT;
323 		return (-1);
324 	}
325 	return (hashp->fp);
326 }
327 
328 /************************** LOCAL CREATION ROUTINES **********************/
329 static HTAB *
330 init_hash(hashp, file, info)
331 	HTAB *hashp;
332 	const char *file;
333 	const HASHINFO *info;
334 {
335 	struct stat statbuf;
336 	int32_t nelem;
337 
338 	nelem = 1;
339 	hashp->hdr.nkeys = 0;
340 	hashp->hdr.lorder = DB_BYTE_ORDER;
341 	hashp->hdr.bsize = DEF_BUCKET_SIZE;
342 	hashp->hdr.bshift = DEF_BUCKET_SHIFT;
343 	hashp->hdr.ffactor = DEF_FFACTOR;
344 	hashp->hash = __default_hash;
345 	memset(hashp->hdr.spares, 0, sizeof(hashp->hdr.spares));
346 	memset(hashp->hdr.bitmaps, 0, sizeof(hashp->hdr.bitmaps));
347 
348 	/* Fix bucket size to be optimal for file system */
349 	if (file != NULL) {
350 		if (stat(file, &statbuf))
351 			return (NULL);
352 		hashp->hdr.bsize = statbuf.st_blksize;
353 		hashp->hdr.bshift = __log2(hashp->hdr.bsize);
354 	}
355 	if (info) {
356 		if (info->bsize) {
357 			/* Round pagesize up to power of 2 */
358 			hashp->hdr.bshift = __log2(info->bsize);
359 			hashp->hdr.bsize = 1 << hashp->hdr.bshift;
360 			if (hashp->hdr.bsize > MAX_BSIZE) {
361 				errno = EINVAL;
362 				return (NULL);
363 			}
364 		}
365 		if (info->ffactor)
366 			hashp->hdr.ffactor = info->ffactor;
367 		if (info->hash)
368 			hashp->hash = info->hash;
369 		if (info->lorder) {
370 			if ((info->lorder != DB_BIG_ENDIAN) &&
371 			    (info->lorder != DB_LITTLE_ENDIAN)) {
372 				errno = EINVAL;
373 				return (NULL);
374 			}
375 			hashp->hdr.lorder = info->lorder;
376 		}
377 	}
378 	return (hashp);
379 }
380 
381 /*
382  * Returns 0 on No Error
383  */
384 static int32_t
385 init_htab(hashp, nelem)
386 	HTAB *hashp;
387 	int32_t nelem;
388 {
389 	int32_t l2, nbuckets;
390 
391 	/*
392 	 * Divide number of elements by the fill factor and determine a
393 	 * desired number of buckets.  Allocate space for the next greater
394 	 * power of two number of buckets.
395 	 */
396 	nelem = (nelem - 1) / hashp->hdr.ffactor + 1;
397 
398 	l2 = __log2(MAX(nelem, 2));
399 	nbuckets = 1 << l2;
400 
401 	hashp->hdr.spares[l2] = l2 + 1;
402 	hashp->hdr.spares[l2 + 1] = l2 + 1;
403 	hashp->hdr.ovfl_point = l2;
404 	hashp->hdr.last_freed = 2;
405 
406 	hashp->hdr.max_bucket = hashp->hdr.low_mask = nbuckets - 1;
407 	hashp->hdr.high_mask = (nbuckets << 1) - 1;
408 
409 	/*
410 	 * The number of header pages is the size of the header divided by
411 	 * the amount of freespace on header pages (the page size - the
412 	 * size of 1 integer where the length of the header info on that
413 	 * page is stored) plus another page if it didn't divide evenly.
414 	 */
415 	hashp->hdr.hdrpages =
416 	    (sizeof(HASHHDR) / (hashp->hdr.bsize - HEADER_OVERHEAD)) +
417 	    (((sizeof(HASHHDR) % (hashp->hdr.bsize - HEADER_OVERHEAD)) == 0)
418 	    ? 0 : 1);
419 
420 	/* Create pages for these buckets */
421 	/*
422 	for (i = 0; i <= hashp->hdr.max_bucket; i++) {
423 		if (__new_page(hashp, (u_int32_t)i, A_BUCKET) != 0)
424 			return (-1);
425 	}
426 	*/
427 
428 	/* First bitmap page is at: splitpoint l2 page offset 1 */
429 	if (__ibitmap(hashp, OADDR_OF(l2, 1), l2 + 1, 0))
430 		return (-1);
431 
432 	return (0);
433 }
434 
435 /*
436  * Functions to get/put hash header.  We access the file directly.
437  */
438 static u_int32_t
439 hget_header(hashp, page_size)
440 	HTAB *hashp;
441 	u_int32_t page_size;
442 {
443 	u_int32_t num_copied, i;
444 	u_int8_t *hdr_dest;
445 
446 	num_copied = 0;
447 	i = 0;
448 
449 	hdr_dest = (u_int8_t *)&hashp->hdr;
450 
451 	/*
452 	 * XXX
453 	 * This should not be printing to stderr on a "normal" error case.
454 	 */
455 	lseek(hashp->fp, 0, SEEK_SET);
456 	num_copied = read(hashp->fp, hdr_dest, sizeof(HASHHDR));
457 	if (num_copied != sizeof(HASHHDR)) {
458 		/* Solaris Kerberos: Make sure to print a newline */
459 		fprintf(stderr, dgettext(TEXT_DOMAIN,
460 			"hash: could not retrieve header\n"));
461 		return (0);
462 	}
463 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
464 	swap_header(hashp);
465 #endif
466 	return (num_copied);
467 }
468 
469 static void
470 hput_header(hashp)
471 	HTAB *hashp;
472 {
473 	HASHHDR *whdrp;
474 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
475 	HASHHDR whdr;
476 #endif
477 	u_int32_t num_copied, i;
478 
479 	num_copied = i = 0;
480 
481 	whdrp = &hashp->hdr;
482 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
483 	whdrp = &whdr;
484 	swap_header_copy(&hashp->hdr, whdrp);
485 #endif
486 
487 	lseek(hashp->fp, 0, SEEK_SET);
488 	num_copied = write(hashp->fp, whdrp, sizeof(HASHHDR));
489 	if (num_copied != sizeof(HASHHDR))
490 		/* Solaris Kerberos: Make sure to print a newline */
491 		(void)fprintf(stderr, dgettext(TEXT_DOMAIN,
492 			"hash: could not write hash header\n"));
493 	return;
494 }
495 
496 /********************** DESTROY/CLOSE ROUTINES ************************/
497 
498 /*
499  * Flushes any changes to the file if necessary and destroys the hashp
500  * structure, freeing all allocated space.
501  */
502 static int32_t
503 hdestroy(hashp)
504 	HTAB *hashp;
505 {
506 	int32_t save_errno;
507 
508 	save_errno = 0;
509 
510 #ifdef HASH_STATISTICS
511 	{ int i;
512 	(void)fprintf(stderr, dgettext(TEXT_DOMAIN,
513 			"hdestroy: accesses %ld collisions %ld\n"),
514 	    hash_accesses, hash_collisions);
515 	(void)fprintf(stderr,
516 	   dgettext(TEXT_DOMAIN,
517 			 "hdestroy: expansions %ld\n"), hash_expansions);
518 	(void)fprintf(stderr,
519 	    dgettext(TEXT_DOMAIN,
520 			"hdestroy: overflows %ld\n"), hash_overflows);
521 	(void)fprintf(stderr,
522 	    dgettext(TEXT_DOMAIN,
523 			"hdestroy: big key/data pages %ld\n"), hash_bigpages);
524 	(void)fprintf(stderr,
525 	    "keys %ld maxp %d\n", hashp->hdr.nkeys, hashp->hdr.max_bucket);
526 
527 	for (i = 0; i < NCACHED; i++)
528 		(void)fprintf(stderr,
529 		    "spares[%d] = %d\n", i, hashp->hdr.spares[i]);
530 	}
531 #endif
532 
533 	if (flush_meta(hashp) && !save_errno)
534 		save_errno = errno;
535 
536 	/* Free the split page */
537 	if (hashp->split_buf)
538 		free(hashp->split_buf);
539 
540 	/* Free the big key and big data returns */
541 	if (hashp->bigkey_buf)
542 		free(hashp->bigkey_buf);
543 	if (hashp->bigdata_buf)
544 		free(hashp->bigdata_buf);
545 
546 	/* XXX This should really iterate over the cursor queue, but
547 	   it's not clear how to do that, and the only cursor a hash
548 	   table ever creates is the one used by hash_seq().  Passing
549 	   NULL as the first arg is also a kludge, but I know that
550 	   it's never used, so I do it.  The intent is to plug the
551 	   memory leak.  Correctness can come later. */
552 
553 	if (hashp->seq_cursor)
554 		hashp->seq_cursor->delete(NULL, hashp->seq_cursor, 0);
555 
556 	/* shut down mpool */
557 	mpool_sync(hashp->mp);
558 	mpool_close(hashp->mp);
559 
560 	if (hashp->fp != -1)
561 		(void)close(hashp->fp);
562 
563 	/*
564 	 * *** This may cause problems if hashp->fname is set in any case
565 	 * other than the case that we are generating a temporary file name.
566 	 * Note that the new version of mpool should support temporary
567 	 * files within mpool itself.
568 	 */
569 	if (hashp->fname && !hashp->save_file) {
570 #ifdef DEBUG
571 		fprintf(stderr, dgettext(TEXT_DOMAIN,
572 			"Unlinking file %s.\n"), hashp->fname);
573 #endif
574 		/* we need to chmod the file to allow it to be deleted... */
575 		chmod(hashp->fname, 0700);
576 		unlink(hashp->fname);
577 		/* destroy the temporary name */
578 		free((void *)(hashp->fname)); /* SUNW14resync */
579 	}
580 	free(hashp);
581 
582 	if (save_errno) {
583 		errno = save_errno;
584 		return (ERROR);
585 	}
586 	return (SUCCESS);
587 }
588 
589 /*
590  * Write modified pages to disk
591  *
592  * Returns:
593  *	 0 == OK
594  *	-1 ERROR
595  */
596 static int32_t
597 hash_sync(dbp, flags)
598 	const DB *dbp;
599 	u_int32_t flags;
600 {
601 	HTAB *hashp;
602 
603 	hashp = (HTAB *)dbp->internal;
604 
605 	/*
606 	 * XXX
607 	 * Check success/failure conditions.
608 	 */
609 	return (flush_meta(hashp) || mpool_sync(hashp->mp));
610 }
611 
612 /*
613  * Returns:
614  *	 0 == OK
615  *	-1 indicates that errno should be set
616  */
617 static int32_t
618 flush_meta(hashp)
619 	HTAB *hashp;
620 {
621 	int32_t i;
622 
623 	if (!hashp->save_file)
624 		return (0);
625 	hashp->hdr.magic = HASHMAGIC;
626 	hashp->hdr.version = HASHVERSION;
627 	hashp->hdr.h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
628 
629 	/* write out metadata */
630 	hput_header(hashp);
631 
632 	for (i = 0; i < NCACHED; i++)
633 		if (hashp->mapp[i]) {
634 			if (__put_page(hashp,
635 			    (PAGE16 *)hashp->mapp[i], A_BITMAP, 1))
636 				return (-1);
637 			hashp->mapp[i] = NULL;
638 		}
639 	return (0);
640 }
641 
642 /*******************************SEARCH ROUTINES *****************************/
643 /*
644  * All the access routines return
645  *
646  * Returns:
647  *	 0 on SUCCESS
648  *	 1 to indicate an external ERROR (i.e. key not found, etc)
649  *	-1 to indicate an internal ERROR (i.e. out of memory, etc)
650  */
651 
652 /* *** make sure this is true! */
653 
654 static int32_t
655 hash_get(dbp, key, data, flag)
656 	const DB *dbp;
657 	const DBT *key;
658 	DBT *data;
659 	u_int32_t flag;
660 {
661 	HTAB *hashp;
662 
663 	hashp = (HTAB *)dbp->internal;
664 	if (flag) {
665 		hashp->local_errno = errno = EINVAL;
666 		return (ERROR);
667 	}
668 	return (hash_access(hashp, HASH_GET, key, data));
669 }
670 
671 static int32_t
672 hash_put(dbp, key, data, flag)
673 	const DB *dbp;
674 	DBT *key;
675 	const DBT *data;
676 	u_int32_t flag;
677 {
678 	HTAB *hashp;
679 
680 	hashp = (HTAB *)dbp->internal;
681 	if (flag && flag != R_NOOVERWRITE) {
682 		hashp->local_errno = errno = EINVAL;
683 		return (ERROR);
684 	}
685 	if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
686 		hashp->local_errno = errno = EPERM;
687 		return (ERROR);
688 	}
689 	return (hash_access(hashp, flag == R_NOOVERWRITE ?
690 		HASH_PUTNEW : HASH_PUT, key, (DBT *)data));
691 }
692 
693 static int32_t
694 hash_delete(dbp, key, flag)
695 	const DB *dbp;
696 	const DBT *key;
697 	u_int32_t flag;		/* Ignored */
698 {
699 	HTAB *hashp;
700 
701 	hashp = (HTAB *)dbp->internal;
702 	if (flag) {
703 		hashp->local_errno = errno = EINVAL;
704 		return (ERROR);
705 	}
706 	if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
707 		hashp->local_errno = errno = EPERM;
708 		return (ERROR);
709 	}
710 
711 	return (hash_access(hashp, HASH_DELETE, key, NULL));
712 }
713 
714 /*
715  * Assume that hashp has been set in wrapper routine.
716  */
717 static int32_t
718 hash_access(hashp, action, key, val)
719 	HTAB *hashp;
720 	ACTION action;
721 	const DBT *key;
722 	DBT *val;
723 {
724 	DBT page_key, page_val;
725 	CURSOR cursor;
726 	ITEM_INFO item_info;
727 	u_int32_t bucket;
728 	u_int32_t num_items;
729 
730 #ifdef HASH_STATISTICS
731 	hash_accesses++;
732 #endif
733 
734 	num_items = 0;
735 
736 	/*
737 	 * Set up item_info so that we're looking for space to add an item
738 	 * as we cycle through the pages looking for the key.
739 	 */
740 	if (action == HASH_PUT || action == HASH_PUTNEW) {
741 		if (ISBIG(key->size + val->size, hashp))
742 			item_info.seek_size = PAIR_OVERHEAD;
743 		else
744 			item_info.seek_size = key->size + val->size;
745 	} else
746 		item_info.seek_size = 0;
747 	item_info.seek_found_page = 0;
748 
749 	bucket = __call_hash(hashp, (int8_t *)key->data, key->size);
750 
751 	cursor.pagep = NULL;
752 	__get_item_reset(hashp, &cursor);
753 
754 	cursor.bucket = bucket;
755 	while (1) {
756 		__get_item_next(hashp, &cursor, &page_key, &page_val, &item_info);
757 		if (item_info.status == ITEM_ERROR)
758 			return (ABNORMAL);
759 		if (item_info.status == ITEM_NO_MORE)
760 			break;
761 		num_items++;
762 		if (item_info.key_off == BIGPAIR) {
763 			/*
764 			 * !!!
765 			 * 0 is a valid index.
766 			 */
767 			if (__find_bigpair(hashp, &cursor, (int8_t *)key->data,
768 			    key->size) > 0)
769 				goto found;
770 		} else if (key->size == page_key.size &&
771 		    !memcmp(key->data, page_key.data, key->size))
772 			goto found;
773 	}
774 #ifdef HASH_STATISTICS
775 	hash_collisions++;
776 #endif
777 	__get_item_done(hashp, &cursor);
778 
779 	/*
780 	 * At this point, item_info will list either the last page in
781 	 * the chain, or the last page in the chain plus a pgno for where
782 	 * to find the first page in the chain with space for the
783 	 * item we wish to add.
784 	 */
785 
786 	/* Not found */
787 	switch (action) {
788 	case HASH_PUT:
789 	case HASH_PUTNEW:
790 		if (__addel(hashp, &item_info, key, val, num_items, 0))
791 			return (ERROR);
792 		break;
793 	case HASH_GET:
794 	case HASH_DELETE:
795 	default:
796 		return (ABNORMAL);
797 	}
798 
799 	if (item_info.caused_expand)
800 		__expand_table(hashp);
801 	return (SUCCESS);
802 
803 found:	__get_item_done(hashp, &cursor);
804 
805 	switch (action) {
806 	case HASH_PUTNEW:
807 		/* mpool_put(hashp->mp, pagep, 0); */
808 		return (ABNORMAL);
809 	case HASH_GET:
810 		if (item_info.key_off == BIGPAIR) {
811 			if (__big_return(hashp, &item_info, val, 0))
812 				return (ERROR);
813 		} else {
814 			val->data = page_val.data;
815 			val->size = page_val.size;
816 		}
817 		/* *** data may not be available! */
818 		break;
819 	case HASH_PUT:
820 		if (__delpair(hashp, &cursor, &item_info) ||
821 		    __addel(hashp, &item_info, key, val, UNKNOWN, 0))
822 			return (ERROR);
823 		__get_item_done(hashp, &cursor);
824 		if (item_info.caused_expand)
825 			__expand_table(hashp);
826 		break;
827 	case HASH_DELETE:
828 		if (__delpair(hashp, &cursor, &item_info))
829 			return (ERROR);
830 		break;
831 	default:
832 		abort();
833 	}
834 	return (SUCCESS);
835 }
836 
837 /* ****************** CURSORS ********************************** */
838 CURSOR *
839 __cursor_creat(dbp)
840 	const DB *dbp;
841 {
842 	CURSOR *new_curs;
843 	HTAB *hashp;
844 
845 	new_curs = (CURSOR *)malloc(sizeof(struct cursor_t));
846 	if (!new_curs)
847 		return NULL;
848 	new_curs->internal =
849 	    (struct item_info *)malloc(sizeof(struct item_info));
850 	if (!new_curs->internal) {
851 		free(new_curs);
852 		return NULL;
853 	}
854 	new_curs->get = cursor_get;
855 	new_curs->delete = cursor_delete;
856 
857 	new_curs->bucket = 0;
858 	new_curs->pgno = INVALID_PGNO;
859 	new_curs->ndx = 0;
860 	new_curs->pgndx = 0;
861 	new_curs->pagep = NULL;
862 
863 	/* place onto queue of cursors */
864 	hashp = (HTAB *)dbp->internal;
865 	TAILQ_INSERT_TAIL(&hashp->curs_queue, new_curs, queue);
866 
867 	return new_curs;
868 }
869 
870 static int32_t
871 cursor_get(dbp, cursorp, key, val, flags)
872 	const DB *dbp;
873 	CURSOR *cursorp;
874 	DBT *key, *val;
875 	u_int32_t flags;
876 {
877 	HTAB *hashp;
878 	ITEM_INFO item_info;
879 
880 	hashp = (HTAB *)dbp->internal;
881 
882 	if (flags && flags != R_FIRST && flags != R_NEXT) {
883 		hashp->local_errno = errno = EINVAL;
884 		return (ERROR);
885 	}
886 #ifdef HASH_STATISTICS
887 	hash_accesses++;
888 #endif
889 
890 	item_info.seek_size = 0;
891 
892 	if (flags == R_FIRST)
893 		__get_item_first(hashp, cursorp, key, val, &item_info);
894 	else
895 		__get_item_next(hashp, cursorp, key, val, &item_info);
896 
897 	/*
898 	 * This needs to be changed around.  As is, get_item_next advances
899 	 * the pointers on the page but this function actually advances
900 	 * bucket pointers.  This works, since the only other place we
901 	 * use get_item_next is in hash_access which only deals with one
902 	 * bucket at a time.  However, there is the problem that certain other
903 	 * functions (such as find_bigpair and delpair) depend on the
904 	 * pgndx member of the cursor.  Right now, they are using pngdx - 1
905 	 * since indices refer to the __next__ item that is to be fetched
906 	 * from the page.  This is ugly, as you may have noticed, whoever
907 	 * you are.  The best solution would be to depend on item_infos to
908 	 * deal with _current_ information, and have the cursors only
909 	 * deal with _next_ information.  In that scheme, get_item_next
910 	 * would also advance buckets.  Version 3...
911 	 */
912 
913 
914 	/*
915 	 * Must always enter this loop to do error handling and
916 	 * check for big key/data pair.
917 	 */
918 	while (1) {
919 		if (item_info.status == ITEM_OK) {
920 			if (item_info.key_off == BIGPAIR &&
921 			    __big_keydata(hashp, cursorp->pagep, key, val,
922 			    item_info.pgndx))
923 				return (ABNORMAL);
924 
925 			break;
926 		} else if (item_info.status != ITEM_NO_MORE)
927 			return (ABNORMAL);
928 
929 		__put_page(hashp, cursorp->pagep, A_RAW, 0);
930 		cursorp->ndx = cursorp->pgndx = 0;
931 		cursorp->bucket++;
932 		cursorp->pgno = INVALID_PGNO;
933 		cursorp->pagep = NULL;
934 		if (cursorp->bucket > hashp->hdr.max_bucket)
935 			return (ABNORMAL);
936 		__get_item_next(hashp, cursorp, key, val, &item_info);
937 	}
938 
939 	__get_item_done(hashp, cursorp);
940 	return (0);
941 }
942 
943 static int32_t
944 cursor_delete(dbp, cursor, flags)
945 	const DB *dbp;
946 	CURSOR *cursor;
947 	u_int32_t flags;
948 {
949 	/* XXX this is empirically determined, so it might not be completely
950 	   correct, but it seems to work.  At the very least it fixes
951 	   a memory leak */
952 
953 	free(cursor->internal);
954 	free(cursor);
955 
956 	return (0);
957 }
958 
959 static int32_t
960 hash_seq(dbp, key, val, flag)
961 	const DB *dbp;
962 	DBT *key, *val;
963 	u_int32_t flag;
964 {
965 	HTAB *hashp;
966 
967 	/*
968 	 * Seq just uses the default cursor to go sequecing through the
969 	 * database.  Note that the default cursor is the first in the list.
970 	 */
971 
972 	hashp = (HTAB *)dbp->internal;
973 	if (!hashp->seq_cursor)
974 		hashp->seq_cursor = __cursor_creat(dbp);
975 
976 	return (hashp->seq_cursor->get(dbp, hashp->seq_cursor, key, val, flag));
977 }
978 
979 /********************************* UTILITIES ************************/
980 
981 /*
982  * Returns:
983  *	 0 ==> OK
984  *	-1 ==> Error
985  */
986 int32_t
987 __expand_table(hashp)
988 	HTAB *hashp;
989 {
990 	u_int32_t old_bucket, new_bucket;
991 	int32_t spare_ndx;
992 
993 #ifdef HASH_STATISTICS
994 	hash_expansions++;
995 #endif
996 	new_bucket = ++hashp->hdr.max_bucket;
997 	old_bucket = (hashp->hdr.max_bucket & hashp->hdr.low_mask);
998 
999 	/* Get a page for this new bucket */
1000 	if (__new_page(hashp, new_bucket, A_BUCKET) != 0)
1001 		return (-1);
1002 
1003 	/*
1004 	 * If the split point is increasing (hdr.max_bucket's log base 2
1005 	 * increases), we need to copy the current contents of the spare
1006 	 * split bucket to the next bucket.
1007 	 */
1008 	spare_ndx = __log2(hashp->hdr.max_bucket + 1);
1009 	if (spare_ndx > hashp->hdr.ovfl_point) {
1010 		hashp->hdr.spares[spare_ndx] = hashp->hdr.spares[hashp->hdr.ovfl_point];
1011 		hashp->hdr.ovfl_point = spare_ndx;
1012 	}
1013 	if (new_bucket > hashp->hdr.high_mask) {
1014 		/* Starting a new doubling */
1015 		hashp->hdr.low_mask = hashp->hdr.high_mask;
1016 		hashp->hdr.high_mask = new_bucket | hashp->hdr.low_mask;
1017 	}
1018 	if (BUCKET_TO_PAGE(new_bucket) > MAX_PAGES(hashp)) {
1019 		fprintf(stderr,dgettext(TEXT_DOMAIN,
1020 			 "hash: Cannot allocate new bucket.  Pages exhausted.\n"));
1021 		return (-1);
1022 	}
1023 	/* Relocate records to the new bucket */
1024 	return (__split_page(hashp, old_bucket, new_bucket));
1025 }
1026 
1027 u_int32_t
1028 __call_hash(hashp, k, len)
1029 	HTAB *hashp;
1030 	int8_t *k;
1031 	int32_t len;
1032 {
1033 	int32_t n, bucket;
1034 
1035 	n = hashp->hash(k, len);
1036 	bucket = n & hashp->hdr.high_mask;
1037 	if (bucket > hashp->hdr.max_bucket)
1038 		bucket = bucket & hashp->hdr.low_mask;
1039 	return (bucket);
1040 }
1041 
1042 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
1043 /*
1044  * Hashp->hdr needs to be byteswapped.
1045  */
1046 static void
1047 swap_header_copy(srcp, destp)
1048 	HASHHDR *srcp, *destp;
1049 {
1050 	int32_t i;
1051 
1052 	P_32_COPY(srcp->magic, destp->magic);
1053 	P_32_COPY(srcp->version, destp->version);
1054 	P_32_COPY(srcp->lorder, destp->lorder);
1055 	P_32_COPY(srcp->bsize, destp->bsize);
1056 	P_32_COPY(srcp->bshift, destp->bshift);
1057 	P_32_COPY(srcp->ovfl_point, destp->ovfl_point);
1058 	P_32_COPY(srcp->last_freed, destp->last_freed);
1059 	P_32_COPY(srcp->max_bucket, destp->max_bucket);
1060 	P_32_COPY(srcp->high_mask, destp->high_mask);
1061 	P_32_COPY(srcp->low_mask, destp->low_mask);
1062 	P_32_COPY(srcp->ffactor, destp->ffactor);
1063 	P_32_COPY(srcp->nkeys, destp->nkeys);
1064 	P_32_COPY(srcp->hdrpages, destp->hdrpages);
1065 	P_32_COPY(srcp->h_charkey, destp->h_charkey);
1066 	for (i = 0; i < NCACHED; i++) {
1067 		P_32_COPY(srcp->spares[i], destp->spares[i]);
1068 		P_16_COPY(srcp->bitmaps[i], destp->bitmaps[i]);
1069 	}
1070 }
1071 
1072 static void
1073 swap_header(hashp)
1074 	HTAB *hashp;
1075 {
1076 	HASHHDR *hdrp;
1077 	int32_t i;
1078 
1079 	hdrp = &hashp->hdr;
1080 
1081 	M_32_SWAP(hdrp->magic);
1082 	M_32_SWAP(hdrp->version);
1083 	M_32_SWAP(hdrp->lorder);
1084 	M_32_SWAP(hdrp->bsize);
1085 	M_32_SWAP(hdrp->bshift);
1086 	M_32_SWAP(hdrp->ovfl_point);
1087 	M_32_SWAP(hdrp->last_freed);
1088 	M_32_SWAP(hdrp->max_bucket);
1089 	M_32_SWAP(hdrp->high_mask);
1090 	M_32_SWAP(hdrp->low_mask);
1091 	M_32_SWAP(hdrp->ffactor);
1092 	M_32_SWAP(hdrp->nkeys);
1093 	M_32_SWAP(hdrp->hdrpages);
1094 	M_32_SWAP(hdrp->h_charkey);
1095 	for (i = 0; i < NCACHED; i++) {
1096 		M_32_SWAP(hdrp->spares[i]);
1097 		M_16_SWAP(hdrp->bitmaps[i]);
1098 	}
1099 }
1100 #endif /* DB_BYTE_ORDER == DB_LITTLE_ENDIAN */
1101