xref: /illumos-gate/usr/src/lib/krb5/plugins/kdb/db2/libdb2/hash/hash.c (revision 495ee6847d0d3e288f47ba026d98a830e51cbc06)
1 /*
2  * Copyright (c) 1990, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Margo Seltzer.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #undef _TS_ERRNO_
38 #include <sys/param.h>
39 #include <sys/stat.h>
40 
41 #include <errno.h>
42 #include <fcntl.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 #include <libintl.h>
48 #ifdef DEBUG
49 #include <assert.h>
50 #endif
51 
52 #include "db-int.h"
53 #include "hash.h"
54 #include "page.h"
55 #include "extern.h"
56 
57 static int32_t flush_meta __P((HTAB *));
58 static int32_t hash_access __P((HTAB *, ACTION, const DBT *, DBT *));
59 static int32_t hash_close __P((DB *));
60 static int32_t hash_delete __P((const DB *, const DBT *, u_int32_t));
61 static int32_t hash_fd __P((const DB *));
62 static int32_t hash_get __P((const DB *, const DBT *, DBT *, u_int32_t));
63 static int32_t hash_put __P((const DB *, DBT *, const DBT *, u_int32_t));
64 static int32_t hash_seq __P((const DB *, DBT *, DBT *, u_int32_t));
65 static int32_t hash_sync __P((const DB *, u_int32_t));
66 static int32_t hdestroy __P((HTAB *));
67 static int32_t cursor_get __P((const DB *, CURSOR *, DBT *, DBT *, \
68 	u_int32_t));
69 static int32_t cursor_delete __P((const DB *, CURSOR *, u_int32_t));
70 static HTAB *init_hash __P((HTAB *, const char *, const HASHINFO *));
71 static int32_t init_htab __P((HTAB *, int32_t));
72 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
73 static void swap_header __P((HTAB *));
74 static void swap_header_copy __P((HASHHDR *, HASHHDR *));
75 #endif
76 static u_int32_t hget_header __P((HTAB *, u_int32_t));
77 static void hput_header __P((HTAB *));
78 
79 #define RETURN_ERROR(ERR, LOC)	{ save_errno = ERR; goto LOC; }
80 
81 /* Return values */
82 #define	SUCCESS	 (0)
83 #define	ERROR	(-1)
84 #define	ABNORMAL (1)
85 
86 #ifdef HASH_STATISTICS
87 u_int32_t hash_accesses, hash_collisions, hash_expansions, hash_overflows,
88 	hash_bigpages;
89 #endif
90 
91 /************************** INTERFACE ROUTINES ***************************/
92 /* OPEN/CLOSE */
93 
94 extern DB *
95 __kdb2_hash_open(file, flags, mode, info, dflags)
96 	const char *file;
97 	int32_t flags, mode, dflags;
98 	const HASHINFO *info;	/* Special directives for create */
99 {
100 	struct stat statbuf;
101 	DB *dbp;
102 	DBT mpool_key;
103 	HTAB *hashp;
104 	int32_t bpages, csize, new_table, save_errno, specified_file;
105 
106 	if ((flags & O_ACCMODE) == O_WRONLY) {
107 		errno = EINVAL;
108 		return (NULL);
109 	}
110 	if (!(hashp = (HTAB *)calloc(1, sizeof(HTAB))))
111 		return (NULL);
112 	hashp->fp = -1;
113 
114 	/* set this now, before file goes away... */
115 	specified_file = (file != NULL);
116 	if (!file) {
117 		/*
118 		 * If we are root and thus have access to /var/run, then use
119 		 * it, else tempnam(3) will use /var/tmp.
120 		 */
121 		if (!(file = tempnam("/var/run", NULL))) {
122 			/*
123 			 * No memory here is not the only failure
124 			 * possibility, but probably the most likely.
125 			 */
126 			errno = ENOMEM;
127 			free(hashp);
128 			return(NULL);
129 		}
130 
131 		/* store the file name so that we can unlink it later */
132 		hashp->fname = file;
133 #ifdef DEBUG
134 			fprintf(stderr, dgettext(TEXT_DOMAIN,
135 			"Using file name %s.\n"), file);
136 #endif
137 	}
138 	/*
139 	 * Even if user wants write only, we need to be able to read
140 	 * the actual file, so we need to open it read/write. But, the
141 	 * field in the hashp structure needs to be accurate so that
142 	 * we can check accesses.
143 	 */
144 	hashp->flags = flags;
145 	hashp->save_file = specified_file && (hashp->flags & O_RDWR);
146 
147 	new_table = 0;
148 	if (!file || (flags & O_TRUNC) ||
149 	    (stat(file, &statbuf) && (errno == ENOENT))) {
150 		if (errno == ENOENT)
151 			errno = 0;	/* In case someone looks at errno. */
152 		new_table = 1;
153 	}
154 	if (file) {
155 		if ((hashp->fp = open(file, flags|O_BINARY, mode)) == -1)
156 			RETURN_ERROR(errno, error0);
157 		(void)fcntl(hashp->fp, F_SETFD, 1);
158 	}
159 
160 	/* Process arguments to set up hash table header. */
161 	if (new_table) {
162 		if (!(hashp = init_hash(hashp, file, info)))
163 			RETURN_ERROR(errno, error1);
164 	} else {
165 		/* Table already exists */
166 		if (info && info->hash)
167 			hashp->hash = info->hash;
168 		else
169 			hashp->hash = __default_hash;
170 
171 		/* copy metadata from page into header */
172 		if (hget_header(hashp,
173 		    (info && info->bsize ? info->bsize : DEF_BUCKET_SIZE)) !=
174 		    sizeof(HASHHDR))
175 			RETURN_ERROR(EFTYPE, error1);
176 
177 		/* Verify file type, versions and hash function */
178 		if (hashp->hdr.magic != HASHMAGIC)
179 			RETURN_ERROR(EFTYPE, error1);
180 #define	OLDHASHVERSION	1
181 		if (hashp->hdr.version != HASHVERSION &&
182 		    hashp->hdr.version != OLDHASHVERSION)
183 			RETURN_ERROR(EFTYPE, error1);
184 		if (hashp->hash(CHARKEY, sizeof(CHARKEY))
185 		    != hashp->hdr.h_charkey)
186 			RETURN_ERROR(EFTYPE, error1);
187 		/*
188 		 * Figure out how many segments we need.  Max_Bucket is the
189 		 * maximum bucket number, so the number of buckets is
190 		 * max_bucket + 1.
191 		 */
192 
193 		/* Read in bitmaps */
194 		bpages = (hashp->hdr.spares[hashp->hdr.ovfl_point] +
195 		    (hashp->hdr.bsize << BYTE_SHIFT) - 1) >>
196 		    (hashp->hdr.bshift + BYTE_SHIFT);
197 
198 		hashp->nmaps = bpages;
199 		(void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_int32_t *));
200 	}
201 
202 	/* start up mpool */
203 	mpool_key.data = (u_int8_t *)file;
204 	mpool_key.size = strlen(file);
205 
206 	if (info && info->cachesize)
207 		csize = info->cachesize / hashp->hdr.bsize;
208 	else
209 		csize = DEF_CACHESIZE / hashp->hdr.bsize;
210 	hashp->mp = mpool_open(&mpool_key, hashp->fp, hashp->hdr.bsize, csize);
211 
212 	if (!hashp->mp)
213 		RETURN_ERROR(errno, error1);
214 	mpool_filter(hashp->mp, __pgin_routine, __pgout_routine, hashp);
215 
216 	/*
217 	 * For a new table, set up the bitmaps.
218 	 */
219 	if (new_table &&
220 	   init_htab(hashp, info && info->nelem ? info->nelem : 1))
221 		goto error2;
222 
223 	/* initialize the cursor queue */
224 	TAILQ_INIT(&hashp->curs_queue);
225 	hashp->seq_cursor = NULL;
226 
227 
228 	/* get a chunk of memory for our split buffer */
229 	hashp->split_buf = (PAGE16 *)malloc(hashp->hdr.bsize);
230 	if (!hashp->split_buf)
231 		goto error2;
232 
233 	hashp->new_file = new_table;
234 
235 	if (!(dbp = (DB *)malloc(sizeof(DB))))
236 		goto error2;
237 
238 	dbp->internal = hashp;
239 	dbp->close = hash_close;
240 	dbp->del = hash_delete;
241 	dbp->fd = hash_fd;
242 	dbp->get = hash_get;
243 	dbp->put = hash_put;
244 	dbp->seq = hash_seq;
245 	dbp->sync = hash_sync;
246 	dbp->type = DB_HASH;
247 
248 #ifdef DEBUG
249 	(void)fprintf(stderr,
250 	    "%s\n%s%lx\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
251 	    "init_htab:",
252 	    "TABLE POINTER   ", (void *)hashp,
253 	    "BUCKET SIZE     ", hashp->hdr.bsize,
254 	    "BUCKET SHIFT    ", hashp->hdr.bshift,
255 	    "FILL FACTOR     ", hashp->hdr.ffactor,
256 	    "MAX BUCKET      ", hashp->hdr.max_bucket,
257 	    "OVFL POINT      ", hashp->hdr.ovfl_point,
258 	    "LAST FREED      ", hashp->hdr.last_freed,
259 	    "HIGH MASK       ", hashp->hdr.high_mask,
260 	    "LOW  MASK       ", hashp->hdr.low_mask,
261 	    "NKEYS           ", hashp->hdr.nkeys);
262 #endif
263 #ifdef HASH_STATISTICS
264 	hash_overflows = hash_accesses = hash_collisions = hash_expansions = 0;
265 	hash_bigpages = 0;
266 #endif
267 	return (dbp);
268 
269 error2:
270 	save_errno = errno;
271 	hdestroy(hashp);
272 	errno = save_errno;
273 	return (NULL);
274 
275 error1:
276 	if (hashp != NULL)
277 		(void)close(hashp->fp);
278 
279 error0:
280 	if (!specified_file)
281 		free((void*)(hashp->fname)); /* SUNW14resync */
282 	free(hashp);
283 	errno = save_errno;
284 	return (NULL);
285 }
286 
287 static int32_t
288 hash_close(dbp)
289 	DB *dbp;
290 {
291 	HTAB *hashp;
292 	int32_t retval;
293 
294 	if (!dbp)
295 		return (ERROR);
296 
297 	hashp = (HTAB *)dbp->internal;
298 	retval = hdestroy(hashp);
299 	free(dbp);
300 	return (retval);
301 }
302 
303 static int32_t
304 hash_fd(dbp)
305 	const DB *dbp;
306 {
307 	HTAB *hashp;
308 
309 	if (!dbp)
310 		return (ERROR);
311 
312 	hashp = (HTAB *)dbp->internal;
313 	if (hashp->fp == -1) {
314 		errno = ENOENT;
315 		return (-1);
316 	}
317 	return (hashp->fp);
318 }
319 
320 /************************** LOCAL CREATION ROUTINES **********************/
321 static HTAB *
322 init_hash(hashp, file, info)
323 	HTAB *hashp;
324 	const char *file;
325 	const HASHINFO *info;
326 {
327 	struct stat statbuf;
328 
329 	hashp->hdr.nkeys = 0;
330 	hashp->hdr.lorder = DB_BYTE_ORDER;
331 	hashp->hdr.bsize = DEF_BUCKET_SIZE;
332 	hashp->hdr.bshift = DEF_BUCKET_SHIFT;
333 	hashp->hdr.ffactor = DEF_FFACTOR;
334 	hashp->hash = __default_hash;
335 	memset(hashp->hdr.spares, 0, sizeof(hashp->hdr.spares));
336 	memset(hashp->hdr.bitmaps, 0, sizeof(hashp->hdr.bitmaps));
337 
338 	/* Fix bucket size to be optimal for file system */
339 	if (file != NULL) {
340 		if (stat(file, &statbuf))
341 			return (NULL);
342 		hashp->hdr.bsize = statbuf.st_blksize;
343 		hashp->hdr.bshift = __log2(hashp->hdr.bsize);
344 	}
345 	if (info) {
346 		if (info->bsize) {
347 			/* Round pagesize up to power of 2 */
348 			hashp->hdr.bshift = __log2(info->bsize);
349 			hashp->hdr.bsize = 1 << hashp->hdr.bshift;
350 			if (hashp->hdr.bsize > MAX_BSIZE) {
351 				errno = EINVAL;
352 				return (NULL);
353 			}
354 		}
355 		if (info->ffactor)
356 			hashp->hdr.ffactor = info->ffactor;
357 		if (info->hash)
358 			hashp->hash = info->hash;
359 		if (info->lorder) {
360 			if ((info->lorder != DB_BIG_ENDIAN) &&
361 			    (info->lorder != DB_LITTLE_ENDIAN)) {
362 				errno = EINVAL;
363 				return (NULL);
364 			}
365 			hashp->hdr.lorder = info->lorder;
366 		}
367 	}
368 	return (hashp);
369 }
370 
371 /*
372  * Returns 0 on No Error
373  */
374 static int32_t
375 init_htab(hashp, nelem)
376 	HTAB *hashp;
377 	int32_t nelem;
378 {
379 	int32_t l2, nbuckets;
380 
381 	/*
382 	 * Divide number of elements by the fill factor and determine a
383 	 * desired number of buckets.  Allocate space for the next greater
384 	 * power of two number of buckets.
385 	 */
386 	nelem = (nelem - 1) / hashp->hdr.ffactor + 1;
387 
388 	l2 = __log2(MAX(nelem, 2));
389 	nbuckets = 1 << l2;
390 
391 	hashp->hdr.spares[l2] = l2 + 1;
392 	hashp->hdr.spares[l2 + 1] = l2 + 1;
393 	hashp->hdr.ovfl_point = l2;
394 	hashp->hdr.last_freed = 2;
395 
396 	hashp->hdr.max_bucket = hashp->hdr.low_mask = nbuckets - 1;
397 	hashp->hdr.high_mask = (nbuckets << 1) - 1;
398 
399 	/*
400 	 * The number of header pages is the size of the header divided by
401 	 * the amount of freespace on header pages (the page size - the
402 	 * size of 1 integer where the length of the header info on that
403 	 * page is stored) plus another page if it didn't divide evenly.
404 	 */
405 	hashp->hdr.hdrpages =
406 	    (sizeof(HASHHDR) / (hashp->hdr.bsize - HEADER_OVERHEAD)) +
407 	    (((sizeof(HASHHDR) % (hashp->hdr.bsize - HEADER_OVERHEAD)) == 0)
408 	    ? 0 : 1);
409 
410 	/* Create pages for these buckets */
411 	/*
412 	for (i = 0; i <= hashp->hdr.max_bucket; i++) {
413 		if (__new_page(hashp, (u_int32_t)i, A_BUCKET) != 0)
414 			return (-1);
415 	}
416 	*/
417 
418 	/* First bitmap page is at: splitpoint l2 page offset 1 */
419 	if (__ibitmap(hashp, OADDR_OF(l2, 1), l2 + 1, 0))
420 		return (-1);
421 
422 	return (0);
423 }
424 
425 /*
426  * Functions to get/put hash header.  We access the file directly.
427  */
428 static u_int32_t
429 hget_header(hashp, page_size)
430 	HTAB *hashp;
431 	u_int32_t page_size;
432 {
433 	u_int32_t num_copied;
434 	u_int8_t *hdr_dest;
435 
436 	num_copied = 0;
437 
438 	hdr_dest = (u_int8_t *)&hashp->hdr;
439 
440 	/*
441 	 * XXX
442 	 * This should not be printing to stderr on a "normal" error case.
443 	 */
444 	lseek(hashp->fp, 0, SEEK_SET);
445 	num_copied = read(hashp->fp, hdr_dest, sizeof(HASHHDR));
446 	if (num_copied != sizeof(HASHHDR)) {
447 		/* Solaris Kerberos: Make sure to print a newline */
448 		fprintf(stderr, dgettext(TEXT_DOMAIN,
449 			"hash: could not retrieve header\n"));
450 		return (0);
451 	}
452 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
453 	swap_header(hashp);
454 #endif
455 	return (num_copied);
456 }
457 
458 static void
459 hput_header(hashp)
460 	HTAB *hashp;
461 {
462 	HASHHDR *whdrp;
463 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
464 	HASHHDR whdr;
465 #endif
466 	u_int32_t num_copied, i;
467 
468 	num_copied = i = 0;
469 
470 	whdrp = &hashp->hdr;
471 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
472 	whdrp = &whdr;
473 	swap_header_copy(&hashp->hdr, whdrp);
474 #endif
475 
476 	lseek(hashp->fp, 0, SEEK_SET);
477 	num_copied = write(hashp->fp, whdrp, sizeof(HASHHDR));
478 	if (num_copied != sizeof(HASHHDR))
479 		/* Solaris Kerberos: Make sure to print a newline */
480 		(void)fprintf(stderr, dgettext(TEXT_DOMAIN,
481 			"hash: could not write hash header\n"));
482 	return;
483 }
484 
485 /********************** DESTROY/CLOSE ROUTINES ************************/
486 
487 /*
488  * Flushes any changes to the file if necessary and destroys the hashp
489  * structure, freeing all allocated space.
490  */
491 static int32_t
492 hdestroy(hashp)
493 	HTAB *hashp;
494 {
495 	int32_t save_errno;
496 
497 	save_errno = 0;
498 
499 #ifdef HASH_STATISTICS
500 	{ int i;
501 	(void)fprintf(stderr, dgettext(TEXT_DOMAIN,
502 			"hdestroy: accesses %ld collisions %ld\n"),
503 	    hash_accesses, hash_collisions);
504 	(void)fprintf(stderr,
505 	   dgettext(TEXT_DOMAIN,
506 			 "hdestroy: expansions %ld\n"), hash_expansions);
507 	(void)fprintf(stderr,
508 	    dgettext(TEXT_DOMAIN,
509 			"hdestroy: overflows %ld\n"), hash_overflows);
510 	(void)fprintf(stderr,
511 	    dgettext(TEXT_DOMAIN,
512 			"hdestroy: big key/data pages %ld\n"), hash_bigpages);
513 	(void)fprintf(stderr,
514 	    "keys %ld maxp %d\n", hashp->hdr.nkeys, hashp->hdr.max_bucket);
515 
516 	for (i = 0; i < NCACHED; i++)
517 		(void)fprintf(stderr,
518 		    "spares[%d] = %d\n", i, hashp->hdr.spares[i]);
519 	}
520 #endif
521 
522 	if (flush_meta(hashp) && !save_errno)
523 		save_errno = errno;
524 
525 	/* Free the split page */
526 	if (hashp->split_buf)
527 		free(hashp->split_buf);
528 
529 	/* Free the big key and big data returns */
530 	if (hashp->bigkey_buf)
531 		free(hashp->bigkey_buf);
532 	if (hashp->bigdata_buf)
533 		free(hashp->bigdata_buf);
534 
535 	/* XXX This should really iterate over the cursor queue, but
536 	   it's not clear how to do that, and the only cursor a hash
537 	   table ever creates is the one used by hash_seq().  Passing
538 	   NULL as the first arg is also a kludge, but I know that
539 	   it's never used, so I do it.  The intent is to plug the
540 	   memory leak.  Correctness can come later. */
541 
542 	if (hashp->seq_cursor)
543 		hashp->seq_cursor->delete(NULL, hashp->seq_cursor, 0);
544 
545 	/* shut down mpool */
546 	mpool_sync(hashp->mp);
547 	mpool_close(hashp->mp);
548 
549 	if (hashp->fp != -1)
550 		(void)close(hashp->fp);
551 
552 	/*
553 	 * *** This may cause problems if hashp->fname is set in any case
554 	 * other than the case that we are generating a temporary file name.
555 	 * Note that the new version of mpool should support temporary
556 	 * files within mpool itself.
557 	 */
558 	if (hashp->fname && !hashp->save_file) {
559 #ifdef DEBUG
560 		fprintf(stderr, dgettext(TEXT_DOMAIN,
561 			"Unlinking file %s.\n"), hashp->fname);
562 #endif
563 		/* we need to chmod the file to allow it to be deleted... */
564 		chmod(hashp->fname, 0700);
565 		unlink(hashp->fname);
566 		/* destroy the temporary name */
567 		free((void *)(hashp->fname)); /* SUNW14resync */
568 	}
569 	free(hashp);
570 
571 	if (save_errno) {
572 		errno = save_errno;
573 		return (ERROR);
574 	}
575 	return (SUCCESS);
576 }
577 
578 /*
579  * Write modified pages to disk
580  *
581  * Returns:
582  *	 0 == OK
583  *	-1 ERROR
584  */
585 static int32_t
586 hash_sync(dbp, flags)
587 	const DB *dbp;
588 	u_int32_t flags;
589 {
590 	HTAB *hashp;
591 
592 	hashp = (HTAB *)dbp->internal;
593 
594 	/*
595 	 * XXX
596 	 * Check success/failure conditions.
597 	 */
598 	return (flush_meta(hashp) || mpool_sync(hashp->mp));
599 }
600 
601 /*
602  * Returns:
603  *	 0 == OK
604  *	-1 indicates that errno should be set
605  */
606 static int32_t
607 flush_meta(hashp)
608 	HTAB *hashp;
609 {
610 	int32_t i;
611 
612 	if (!hashp->save_file)
613 		return (0);
614 	hashp->hdr.magic = HASHMAGIC;
615 	hashp->hdr.version = HASHVERSION;
616 	hashp->hdr.h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
617 
618 	/* write out metadata */
619 	hput_header(hashp);
620 
621 	for (i = 0; i < NCACHED; i++)
622 		if (hashp->mapp[i]) {
623 			if (__put_page(hashp,
624 			    (PAGE16 *)hashp->mapp[i], A_BITMAP, 1))
625 				return (-1);
626 			hashp->mapp[i] = NULL;
627 		}
628 	return (0);
629 }
630 
631 /*******************************SEARCH ROUTINES *****************************/
632 /*
633  * All the access routines return
634  *
635  * Returns:
636  *	 0 on SUCCESS
637  *	 1 to indicate an external ERROR (i.e. key not found, etc)
638  *	-1 to indicate an internal ERROR (i.e. out of memory, etc)
639  */
640 
641 /* *** make sure this is true! */
642 
643 static int32_t
644 hash_get(dbp, key, data, flag)
645 	const DB *dbp;
646 	const DBT *key;
647 	DBT *data;
648 	u_int32_t flag;
649 {
650 	HTAB *hashp;
651 
652 	hashp = (HTAB *)dbp->internal;
653 	if (flag) {
654 		hashp->local_errno = errno = EINVAL;
655 		return (ERROR);
656 	}
657 	return (hash_access(hashp, HASH_GET, key, data));
658 }
659 
660 static int32_t
661 hash_put(dbp, key, data, flag)
662 	const DB *dbp;
663 	DBT *key;
664 	const DBT *data;
665 	u_int32_t flag;
666 {
667 	HTAB *hashp;
668 
669 	hashp = (HTAB *)dbp->internal;
670 	if (flag && flag != R_NOOVERWRITE) {
671 		hashp->local_errno = errno = EINVAL;
672 		return (ERROR);
673 	}
674 	if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
675 		hashp->local_errno = errno = EPERM;
676 		return (ERROR);
677 	}
678 	return (hash_access(hashp, flag == R_NOOVERWRITE ?
679 		HASH_PUTNEW : HASH_PUT, key, (DBT *)data));
680 }
681 
682 static int32_t
683 hash_delete(dbp, key, flag)
684 	const DB *dbp;
685 	const DBT *key;
686 	u_int32_t flag;		/* Ignored */
687 {
688 	HTAB *hashp;
689 
690 	hashp = (HTAB *)dbp->internal;
691 	if (flag) {
692 		hashp->local_errno = errno = EINVAL;
693 		return (ERROR);
694 	}
695 	if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
696 		hashp->local_errno = errno = EPERM;
697 		return (ERROR);
698 	}
699 
700 	return (hash_access(hashp, HASH_DELETE, key, NULL));
701 }
702 
703 /*
704  * Assume that hashp has been set in wrapper routine.
705  */
706 static int32_t
707 hash_access(hashp, action, key, val)
708 	HTAB *hashp;
709 	ACTION action;
710 	const DBT *key;
711 	DBT *val;
712 {
713 	DBT page_key, page_val;
714 	CURSOR cursor;
715 	ITEM_INFO item_info;
716 	u_int32_t bucket;
717 	u_int32_t num_items;
718 
719 #ifdef HASH_STATISTICS
720 	hash_accesses++;
721 #endif
722 
723 	num_items = 0;
724 
725 	/*
726 	 * Set up item_info so that we're looking for space to add an item
727 	 * as we cycle through the pages looking for the key.
728 	 */
729 	if (action == HASH_PUT || action == HASH_PUTNEW) {
730 		if (ISBIG(key->size + val->size, hashp))
731 			item_info.seek_size = PAIR_OVERHEAD;
732 		else
733 			item_info.seek_size = key->size + val->size;
734 	} else
735 		item_info.seek_size = 0;
736 	item_info.seek_found_page = 0;
737 
738 	bucket = __call_hash(hashp, (int8_t *)key->data, key->size);
739 
740 	cursor.pagep = NULL;
741 	__get_item_reset(hashp, &cursor);
742 
743 	cursor.bucket = bucket;
744 	while (1) {
745 		__get_item_next(hashp, &cursor, &page_key, &page_val, &item_info);
746 		if (item_info.status == ITEM_ERROR)
747 			return (ABNORMAL);
748 		if (item_info.status == ITEM_NO_MORE)
749 			break;
750 		num_items++;
751 		if (item_info.key_off == BIGPAIR) {
752 			/*
753 			 * !!!
754 			 * 0 is a valid index.
755 			 */
756 			if (__find_bigpair(hashp, &cursor, (int8_t *)key->data,
757 			    key->size) > 0)
758 				goto found;
759 		} else if (key->size == page_key.size &&
760 		    !memcmp(key->data, page_key.data, key->size))
761 			goto found;
762 	}
763 #ifdef HASH_STATISTICS
764 	hash_collisions++;
765 #endif
766 	__get_item_done(hashp, &cursor);
767 
768 	/*
769 	 * At this point, item_info will list either the last page in
770 	 * the chain, or the last page in the chain plus a pgno for where
771 	 * to find the first page in the chain with space for the
772 	 * item we wish to add.
773 	 */
774 
775 	/* Not found */
776 	switch (action) {
777 	case HASH_PUT:
778 	case HASH_PUTNEW:
779 		if (__addel(hashp, &item_info, key, val, num_items, 0))
780 			return (ERROR);
781 		break;
782 	case HASH_GET:
783 	case HASH_DELETE:
784 	default:
785 		return (ABNORMAL);
786 	}
787 
788 	if (item_info.caused_expand)
789 		__expand_table(hashp);
790 	return (SUCCESS);
791 
792 found:	__get_item_done(hashp, &cursor);
793 
794 	switch (action) {
795 	case HASH_PUTNEW:
796 		/* mpool_put(hashp->mp, pagep, 0); */
797 		return (ABNORMAL);
798 	case HASH_GET:
799 		if (item_info.key_off == BIGPAIR) {
800 			if (__big_return(hashp, &item_info, val, 0))
801 				return (ERROR);
802 		} else {
803 			val->data = page_val.data;
804 			val->size = page_val.size;
805 		}
806 		/* *** data may not be available! */
807 		break;
808 	case HASH_PUT:
809 		if (__delpair(hashp, &cursor, &item_info) ||
810 		    __addel(hashp, &item_info, key, val, UNKNOWN, 0))
811 			return (ERROR);
812 		__get_item_done(hashp, &cursor);
813 		if (item_info.caused_expand)
814 			__expand_table(hashp);
815 		break;
816 	case HASH_DELETE:
817 		if (__delpair(hashp, &cursor, &item_info))
818 			return (ERROR);
819 		break;
820 	default:
821 		abort();
822 	}
823 	return (SUCCESS);
824 }
825 
826 /* ****************** CURSORS ********************************** */
827 CURSOR *
828 __cursor_creat(dbp)
829 	const DB *dbp;
830 {
831 	CURSOR *new_curs;
832 	HTAB *hashp;
833 
834 	new_curs = (CURSOR *)malloc(sizeof(struct cursor_t));
835 	if (!new_curs)
836 		return NULL;
837 	new_curs->internal =
838 	    (struct item_info *)malloc(sizeof(struct item_info));
839 	if (!new_curs->internal) {
840 		free(new_curs);
841 		return NULL;
842 	}
843 	new_curs->get = cursor_get;
844 	new_curs->delete = cursor_delete;
845 
846 	new_curs->bucket = 0;
847 	new_curs->pgno = INVALID_PGNO;
848 	new_curs->ndx = 0;
849 	new_curs->pgndx = 0;
850 	new_curs->pagep = NULL;
851 
852 	/* place onto queue of cursors */
853 	hashp = (HTAB *)dbp->internal;
854 	TAILQ_INSERT_TAIL(&hashp->curs_queue, new_curs, queue);
855 
856 	return new_curs;
857 }
858 
859 static int32_t
860 cursor_get(dbp, cursorp, key, val, flags)
861 	const DB *dbp;
862 	CURSOR *cursorp;
863 	DBT *key, *val;
864 	u_int32_t flags;
865 {
866 	HTAB *hashp;
867 	ITEM_INFO item_info;
868 
869 	hashp = (HTAB *)dbp->internal;
870 
871 	if (flags && flags != R_FIRST && flags != R_NEXT) {
872 		hashp->local_errno = errno = EINVAL;
873 		return (ERROR);
874 	}
875 #ifdef HASH_STATISTICS
876 	hash_accesses++;
877 #endif
878 
879 	item_info.seek_size = 0;
880 
881 	if (flags == R_FIRST)
882 		__get_item_first(hashp, cursorp, key, val, &item_info);
883 	else
884 		__get_item_next(hashp, cursorp, key, val, &item_info);
885 
886 	/*
887 	 * This needs to be changed around.  As is, get_item_next advances
888 	 * the pointers on the page but this function actually advances
889 	 * bucket pointers.  This works, since the only other place we
890 	 * use get_item_next is in hash_access which only deals with one
891 	 * bucket at a time.  However, there is the problem that certain other
892 	 * functions (such as find_bigpair and delpair) depend on the
893 	 * pgndx member of the cursor.  Right now, they are using pngdx - 1
894 	 * since indices refer to the __next__ item that is to be fetched
895 	 * from the page.  This is ugly, as you may have noticed, whoever
896 	 * you are.  The best solution would be to depend on item_infos to
897 	 * deal with _current_ information, and have the cursors only
898 	 * deal with _next_ information.  In that scheme, get_item_next
899 	 * would also advance buckets.  Version 3...
900 	 */
901 
902 
903 	/*
904 	 * Must always enter this loop to do error handling and
905 	 * check for big key/data pair.
906 	 */
907 	while (1) {
908 		if (item_info.status == ITEM_OK) {
909 			if (item_info.key_off == BIGPAIR &&
910 			    __big_keydata(hashp, cursorp->pagep, key, val,
911 			    item_info.pgndx))
912 				return (ABNORMAL);
913 
914 			break;
915 		} else if (item_info.status != ITEM_NO_MORE)
916 			return (ABNORMAL);
917 
918 		__put_page(hashp, cursorp->pagep, A_RAW, 0);
919 		cursorp->ndx = cursorp->pgndx = 0;
920 		cursorp->bucket++;
921 		cursorp->pgno = INVALID_PGNO;
922 		cursorp->pagep = NULL;
923 		if (cursorp->bucket > hashp->hdr.max_bucket)
924 			return (ABNORMAL);
925 		__get_item_next(hashp, cursorp, key, val, &item_info);
926 	}
927 
928 	__get_item_done(hashp, cursorp);
929 	return (0);
930 }
931 
932 static int32_t
933 cursor_delete(dbp, cursor, flags)
934 	const DB *dbp;
935 	CURSOR *cursor;
936 	u_int32_t flags;
937 {
938 	/* XXX this is empirically determined, so it might not be completely
939 	   correct, but it seems to work.  At the very least it fixes
940 	   a memory leak */
941 
942 	free(cursor->internal);
943 	free(cursor);
944 
945 	return (0);
946 }
947 
948 static int32_t
949 hash_seq(dbp, key, val, flag)
950 	const DB *dbp;
951 	DBT *key, *val;
952 	u_int32_t flag;
953 {
954 	HTAB *hashp;
955 
956 	/*
957 	 * Seq just uses the default cursor to go sequecing through the
958 	 * database.  Note that the default cursor is the first in the list.
959 	 */
960 
961 	hashp = (HTAB *)dbp->internal;
962 	if (!hashp->seq_cursor)
963 		hashp->seq_cursor = __cursor_creat(dbp);
964 
965 	return (hashp->seq_cursor->get(dbp, hashp->seq_cursor, key, val, flag));
966 }
967 
968 /********************************* UTILITIES ************************/
969 
970 /*
971  * Returns:
972  *	 0 ==> OK
973  *	-1 ==> Error
974  */
975 int32_t
976 __expand_table(hashp)
977 	HTAB *hashp;
978 {
979 	u_int32_t old_bucket, new_bucket;
980 	int32_t spare_ndx;
981 
982 #ifdef HASH_STATISTICS
983 	hash_expansions++;
984 #endif
985 	new_bucket = ++hashp->hdr.max_bucket;
986 	old_bucket = (hashp->hdr.max_bucket & hashp->hdr.low_mask);
987 
988 	/* Get a page for this new bucket */
989 	if (__new_page(hashp, new_bucket, A_BUCKET) != 0)
990 		return (-1);
991 
992 	/*
993 	 * If the split point is increasing (hdr.max_bucket's log base 2
994 	 * increases), we need to copy the current contents of the spare
995 	 * split bucket to the next bucket.
996 	 */
997 	spare_ndx = __log2(hashp->hdr.max_bucket + 1);
998 	if (spare_ndx > hashp->hdr.ovfl_point) {
999 		hashp->hdr.spares[spare_ndx] = hashp->hdr.spares[hashp->hdr.ovfl_point];
1000 		hashp->hdr.ovfl_point = spare_ndx;
1001 	}
1002 	if (new_bucket > hashp->hdr.high_mask) {
1003 		/* Starting a new doubling */
1004 		hashp->hdr.low_mask = hashp->hdr.high_mask;
1005 		hashp->hdr.high_mask = new_bucket | hashp->hdr.low_mask;
1006 	}
1007 	if (BUCKET_TO_PAGE(new_bucket) > MAX_PAGES(hashp)) {
1008 		fprintf(stderr,dgettext(TEXT_DOMAIN,
1009 			 "hash: Cannot allocate new bucket.  Pages exhausted.\n"));
1010 		return (-1);
1011 	}
1012 	/* Relocate records to the new bucket */
1013 	return (__split_page(hashp, old_bucket, new_bucket));
1014 }
1015 
1016 u_int32_t
1017 __call_hash(hashp, k, len)
1018 	HTAB *hashp;
1019 	int8_t *k;
1020 	int32_t len;
1021 {
1022 	int32_t n, bucket;
1023 
1024 	n = hashp->hash(k, len);
1025 	bucket = n & hashp->hdr.high_mask;
1026 	if (bucket > hashp->hdr.max_bucket)
1027 		bucket = bucket & hashp->hdr.low_mask;
1028 	return (bucket);
1029 }
1030 
1031 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
1032 /*
1033  * Hashp->hdr needs to be byteswapped.
1034  */
1035 static void
1036 swap_header_copy(srcp, destp)
1037 	HASHHDR *srcp, *destp;
1038 {
1039 	int32_t i;
1040 
1041 	P_32_COPY(srcp->magic, destp->magic);
1042 	P_32_COPY(srcp->version, destp->version);
1043 	P_32_COPY(srcp->lorder, destp->lorder);
1044 	P_32_COPY(srcp->bsize, destp->bsize);
1045 	P_32_COPY(srcp->bshift, destp->bshift);
1046 	P_32_COPY(srcp->ovfl_point, destp->ovfl_point);
1047 	P_32_COPY(srcp->last_freed, destp->last_freed);
1048 	P_32_COPY(srcp->max_bucket, destp->max_bucket);
1049 	P_32_COPY(srcp->high_mask, destp->high_mask);
1050 	P_32_COPY(srcp->low_mask, destp->low_mask);
1051 	P_32_COPY(srcp->ffactor, destp->ffactor);
1052 	P_32_COPY(srcp->nkeys, destp->nkeys);
1053 	P_32_COPY(srcp->hdrpages, destp->hdrpages);
1054 	P_32_COPY(srcp->h_charkey, destp->h_charkey);
1055 	for (i = 0; i < NCACHED; i++) {
1056 		P_32_COPY(srcp->spares[i], destp->spares[i]);
1057 		P_16_COPY(srcp->bitmaps[i], destp->bitmaps[i]);
1058 	}
1059 }
1060 
1061 static void
1062 swap_header(hashp)
1063 	HTAB *hashp;
1064 {
1065 	HASHHDR *hdrp;
1066 	int32_t i;
1067 
1068 	hdrp = &hashp->hdr;
1069 
1070 	M_32_SWAP(hdrp->magic);
1071 	M_32_SWAP(hdrp->version);
1072 	M_32_SWAP(hdrp->lorder);
1073 	M_32_SWAP(hdrp->bsize);
1074 	M_32_SWAP(hdrp->bshift);
1075 	M_32_SWAP(hdrp->ovfl_point);
1076 	M_32_SWAP(hdrp->last_freed);
1077 	M_32_SWAP(hdrp->max_bucket);
1078 	M_32_SWAP(hdrp->high_mask);
1079 	M_32_SWAP(hdrp->low_mask);
1080 	M_32_SWAP(hdrp->ffactor);
1081 	M_32_SWAP(hdrp->nkeys);
1082 	M_32_SWAP(hdrp->hdrpages);
1083 	M_32_SWAP(hdrp->h_charkey);
1084 	for (i = 0; i < NCACHED; i++) {
1085 		M_32_SWAP(hdrp->spares[i]);
1086 		M_16_SWAP(hdrp->bitmaps[i]);
1087 	}
1088 }
1089 #endif /* DB_BYTE_ORDER == DB_LITTLE_ENDIAN */
1090