1/*
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#undef _TS_ERRNO_
38#include <sys/param.h>
39#include <sys/stat.h>
40
41#include <errno.h>
42#include <fcntl.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <string.h>
46#include <unistd.h>
47#include <libintl.h>
48#ifdef DEBUG
49#include <assert.h>
50#endif
51
52#include "db-int.h"
53#include "hash.h"
54#include "page.h"
55#include "extern.h"
56
57static int32_t flush_meta __P((HTAB *));
58static int32_t hash_access __P((HTAB *, ACTION, const DBT *, DBT *));
59static int32_t hash_close __P((DB *));
60static int32_t hash_delete __P((const DB *, const DBT *, u_int32_t));
61static int32_t hash_fd __P((const DB *));
62static int32_t hash_get __P((const DB *, const DBT *, DBT *, u_int32_t));
63static int32_t hash_put __P((const DB *, DBT *, const DBT *, u_int32_t));
64static int32_t hash_seq __P((const DB *, DBT *, DBT *, u_int32_t));
65static int32_t hash_sync __P((const DB *, u_int32_t));
66static int32_t hdestroy __P((HTAB *));
67static int32_t cursor_get __P((const DB *, CURSOR *, DBT *, DBT *, \
68	u_int32_t));
69static int32_t cursor_delete __P((const DB *, CURSOR *, u_int32_t));
70static HTAB *init_hash __P((HTAB *, const char *, const HASHINFO *));
71static int32_t init_htab __P((HTAB *, int32_t));
72#if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
73static void swap_header __P((HTAB *));
74static void swap_header_copy __P((HASHHDR *, HASHHDR *));
75#endif
76static u_int32_t hget_header __P((HTAB *, u_int32_t));
77static void hput_header __P((HTAB *));
78
79#define RETURN_ERROR(ERR, LOC)	{ save_errno = ERR; goto LOC; }
80
81/* Return values */
82#define	SUCCESS	 (0)
83#define	ERROR	(-1)
84#define	ABNORMAL (1)
85
86#ifdef HASH_STATISTICS
87u_int32_t hash_accesses, hash_collisions, hash_expansions, hash_overflows,
88	hash_bigpages;
89#endif
90
91/************************** INTERFACE ROUTINES ***************************/
92/* OPEN/CLOSE */
93
94extern DB *
95__kdb2_hash_open(file, flags, mode, info, dflags)
96	const char *file;
97	int32_t flags, mode, dflags;
98	const HASHINFO *info;	/* Special directives for create */
99{
100	struct stat statbuf;
101	DB *dbp;
102	DBT mpool_key;
103	HTAB *hashp;
104	int32_t bpages, csize, new_table, save_errno, specified_file;
105
106	if ((flags & O_ACCMODE) == O_WRONLY) {
107		errno = EINVAL;
108		return (NULL);
109	}
110	if (!(hashp = (HTAB *)calloc(1, sizeof(HTAB))))
111		return (NULL);
112	hashp->fp = -1;
113
114	/* set this now, before file goes away... */
115	specified_file = (file != NULL);
116	if (!file) {
117		/*
118		 * If we are root and thus have access to /var/run, then use
119		 * it, else tempnam(3) will use /var/tmp.
120		 */
121		if (!(file = tempnam("/var/run", NULL))) {
122			/*
123			 * No memory here is not the only failure
124			 * possibility, but probably the most likely.
125			 */
126			errno = ENOMEM;
127			free(hashp);
128			return(NULL);
129		}
130
131		/* store the file name so that we can unlink it later */
132		hashp->fname = file;
133#ifdef DEBUG
134			fprintf(stderr, dgettext(TEXT_DOMAIN,
135			"Using file name %s.\n"), file);
136#endif
137	}
138	/*
139	 * Even if user wants write only, we need to be able to read
140	 * the actual file, so we need to open it read/write. But, the
141	 * field in the hashp structure needs to be accurate so that
142	 * we can check accesses.
143	 */
144	hashp->flags = flags;
145	hashp->save_file = specified_file && (hashp->flags & O_RDWR);
146
147	new_table = 0;
148	if (!file || (flags & O_TRUNC) ||
149	    (stat(file, &statbuf) && (errno == ENOENT))) {
150		if (errno == ENOENT)
151			errno = 0;	/* In case someone looks at errno. */
152		new_table = 1;
153	}
154	if (file) {
155		if ((hashp->fp = open(file, flags|O_BINARY, mode)) == -1)
156			RETURN_ERROR(errno, error0);
157		(void)fcntl(hashp->fp, F_SETFD, 1);
158	}
159
160	/* Process arguments to set up hash table header. */
161	if (new_table) {
162		if (!(hashp = init_hash(hashp, file, info)))
163			RETURN_ERROR(errno, error1);
164	} else {
165		/* Table already exists */
166		if (info && info->hash)
167			hashp->hash = info->hash;
168		else
169			hashp->hash = __default_hash;
170
171		/* copy metadata from page into header */
172		if (hget_header(hashp,
173		    (info && info->bsize ? info->bsize : DEF_BUCKET_SIZE)) !=
174		    sizeof(HASHHDR))
175			RETURN_ERROR(EFTYPE, error1);
176
177		/* Verify file type, versions and hash function */
178		if (hashp->hdr.magic != HASHMAGIC)
179			RETURN_ERROR(EFTYPE, error1);
180#define	OLDHASHVERSION	1
181		if (hashp->hdr.version != HASHVERSION &&
182		    hashp->hdr.version != OLDHASHVERSION)
183			RETURN_ERROR(EFTYPE, error1);
184		if (hashp->hash(CHARKEY, sizeof(CHARKEY))
185		    != hashp->hdr.h_charkey)
186			RETURN_ERROR(EFTYPE, error1);
187		/*
188		 * Figure out how many segments we need.  Max_Bucket is the
189		 * maximum bucket number, so the number of buckets is
190		 * max_bucket + 1.
191		 */
192
193		/* Read in bitmaps */
194		bpages = (hashp->hdr.spares[hashp->hdr.ovfl_point] +
195		    (hashp->hdr.bsize << BYTE_SHIFT) - 1) >>
196		    (hashp->hdr.bshift + BYTE_SHIFT);
197
198		hashp->nmaps = bpages;
199		(void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_int32_t *));
200	}
201
202	/* start up mpool */
203	mpool_key.data = (u_int8_t *)file;
204	mpool_key.size = strlen(file);
205
206	if (info && info->cachesize)
207		csize = info->cachesize / hashp->hdr.bsize;
208	else
209		csize = DEF_CACHESIZE / hashp->hdr.bsize;
210	hashp->mp = mpool_open(&mpool_key, hashp->fp, hashp->hdr.bsize, csize);
211
212	if (!hashp->mp)
213		RETURN_ERROR(errno, error1);
214	mpool_filter(hashp->mp, __pgin_routine, __pgout_routine, hashp);
215
216	/*
217	 * For a new table, set up the bitmaps.
218	 */
219	if (new_table &&
220	   init_htab(hashp, info && info->nelem ? info->nelem : 1))
221		goto error2;
222
223	/* initialize the cursor queue */
224	TAILQ_INIT(&hashp->curs_queue);
225	hashp->seq_cursor = NULL;
226
227
228	/* get a chunk of memory for our split buffer */
229	hashp->split_buf = (PAGE16 *)malloc(hashp->hdr.bsize);
230	if (!hashp->split_buf)
231		goto error2;
232
233	hashp->new_file = new_table;
234
235	if (!(dbp = (DB *)malloc(sizeof(DB))))
236		goto error2;
237
238	dbp->internal = hashp;
239	dbp->close = hash_close;
240	dbp->del = hash_delete;
241	dbp->fd = hash_fd;
242	dbp->get = hash_get;
243	dbp->put = hash_put;
244	dbp->seq = hash_seq;
245	dbp->sync = hash_sync;
246	dbp->type = DB_HASH;
247
248#ifdef DEBUG
249	(void)fprintf(stderr,
250	    "%s\n%s%lx\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
251	    "init_htab:",
252	    "TABLE POINTER   ", (void *)hashp,
253	    "BUCKET SIZE     ", hashp->hdr.bsize,
254	    "BUCKET SHIFT    ", hashp->hdr.bshift,
255	    "FILL FACTOR     ", hashp->hdr.ffactor,
256	    "MAX BUCKET      ", hashp->hdr.max_bucket,
257	    "OVFL POINT      ", hashp->hdr.ovfl_point,
258	    "LAST FREED      ", hashp->hdr.last_freed,
259	    "HIGH MASK       ", hashp->hdr.high_mask,
260	    "LOW  MASK       ", hashp->hdr.low_mask,
261	    "NKEYS           ", hashp->hdr.nkeys);
262#endif
263#ifdef HASH_STATISTICS
264	hash_overflows = hash_accesses = hash_collisions = hash_expansions = 0;
265	hash_bigpages = 0;
266#endif
267	return (dbp);
268
269error2:
270	save_errno = errno;
271	hdestroy(hashp);
272	errno = save_errno;
273	return (NULL);
274
275error1:
276	if (hashp != NULL)
277		(void)close(hashp->fp);
278
279error0:
280	if (!specified_file)
281		free((void*)(hashp->fname)); /* SUNW14resync */
282	free(hashp);
283	errno = save_errno;
284	return (NULL);
285}
286
287static int32_t
288hash_close(dbp)
289	DB *dbp;
290{
291	HTAB *hashp;
292	int32_t retval;
293
294	if (!dbp)
295		return (ERROR);
296
297	hashp = (HTAB *)dbp->internal;
298	retval = hdestroy(hashp);
299	free(dbp);
300	return (retval);
301}
302
303static int32_t
304hash_fd(dbp)
305	const DB *dbp;
306{
307	HTAB *hashp;
308
309	if (!dbp)
310		return (ERROR);
311
312	hashp = (HTAB *)dbp->internal;
313	if (hashp->fp == -1) {
314		errno = ENOENT;
315		return (-1);
316	}
317	return (hashp->fp);
318}
319
320/************************** LOCAL CREATION ROUTINES **********************/
321static HTAB *
322init_hash(hashp, file, info)
323	HTAB *hashp;
324	const char *file;
325	const HASHINFO *info;
326{
327	struct stat statbuf;
328
329	hashp->hdr.nkeys = 0;
330	hashp->hdr.lorder = DB_BYTE_ORDER;
331	hashp->hdr.bsize = DEF_BUCKET_SIZE;
332	hashp->hdr.bshift = DEF_BUCKET_SHIFT;
333	hashp->hdr.ffactor = DEF_FFACTOR;
334	hashp->hash = __default_hash;
335	memset(hashp->hdr.spares, 0, sizeof(hashp->hdr.spares));
336	memset(hashp->hdr.bitmaps, 0, sizeof(hashp->hdr.bitmaps));
337
338	/* Fix bucket size to be optimal for file system */
339	if (file != NULL) {
340		if (stat(file, &statbuf))
341			return (NULL);
342		hashp->hdr.bsize = statbuf.st_blksize;
343		hashp->hdr.bshift = __log2(hashp->hdr.bsize);
344	}
345	if (info) {
346		if (info->bsize) {
347			/* Round pagesize up to power of 2 */
348			hashp->hdr.bshift = __log2(info->bsize);
349			hashp->hdr.bsize = 1 << hashp->hdr.bshift;
350			if (hashp->hdr.bsize > MAX_BSIZE) {
351				errno = EINVAL;
352				return (NULL);
353			}
354		}
355		if (info->ffactor)
356			hashp->hdr.ffactor = info->ffactor;
357		if (info->hash)
358			hashp->hash = info->hash;
359		if (info->lorder) {
360			if ((info->lorder != DB_BIG_ENDIAN) &&
361			    (info->lorder != DB_LITTLE_ENDIAN)) {
362				errno = EINVAL;
363				return (NULL);
364			}
365			hashp->hdr.lorder = info->lorder;
366		}
367	}
368	return (hashp);
369}
370
371/*
372 * Returns 0 on No Error
373 */
374static int32_t
375init_htab(hashp, nelem)
376	HTAB *hashp;
377	int32_t nelem;
378{
379	int32_t l2, nbuckets;
380
381	/*
382	 * Divide number of elements by the fill factor and determine a
383	 * desired number of buckets.  Allocate space for the next greater
384	 * power of two number of buckets.
385	 */
386	nelem = (nelem - 1) / hashp->hdr.ffactor + 1;
387
388	l2 = __log2(MAX(nelem, 2));
389	nbuckets = 1 << l2;
390
391	hashp->hdr.spares[l2] = l2 + 1;
392	hashp->hdr.spares[l2 + 1] = l2 + 1;
393	hashp->hdr.ovfl_point = l2;
394	hashp->hdr.last_freed = 2;
395
396	hashp->hdr.max_bucket = hashp->hdr.low_mask = nbuckets - 1;
397	hashp->hdr.high_mask = (nbuckets << 1) - 1;
398
399	/*
400	 * The number of header pages is the size of the header divided by
401	 * the amount of freespace on header pages (the page size - the
402	 * size of 1 integer where the length of the header info on that
403	 * page is stored) plus another page if it didn't divide evenly.
404	 */
405	hashp->hdr.hdrpages =
406	    (sizeof(HASHHDR) / (hashp->hdr.bsize - HEADER_OVERHEAD)) +
407	    (((sizeof(HASHHDR) % (hashp->hdr.bsize - HEADER_OVERHEAD)) == 0)
408	    ? 0 : 1);
409
410	/* Create pages for these buckets */
411	/*
412	for (i = 0; i <= hashp->hdr.max_bucket; i++) {
413		if (__new_page(hashp, (u_int32_t)i, A_BUCKET) != 0)
414			return (-1);
415	}
416	*/
417
418	/* First bitmap page is at: splitpoint l2 page offset 1 */
419	if (__ibitmap(hashp, OADDR_OF(l2, 1), l2 + 1, 0))
420		return (-1);
421
422	return (0);
423}
424
425/*
426 * Functions to get/put hash header.  We access the file directly.
427 */
428static u_int32_t
429hget_header(hashp, page_size)
430	HTAB *hashp;
431	u_int32_t page_size;
432{
433	u_int32_t num_copied;
434	u_int8_t *hdr_dest;
435
436	num_copied = 0;
437
438	hdr_dest = (u_int8_t *)&hashp->hdr;
439
440	/*
441	 * XXX
442	 * This should not be printing to stderr on a "normal" error case.
443	 */
444	lseek(hashp->fp, 0, SEEK_SET);
445	num_copied = read(hashp->fp, hdr_dest, sizeof(HASHHDR));
446	if (num_copied != sizeof(HASHHDR)) {
447		/* Solaris Kerberos: Make sure to print a newline */
448		fprintf(stderr, dgettext(TEXT_DOMAIN,
449			"hash: could not retrieve header\n"));
450		return (0);
451	}
452#if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
453	swap_header(hashp);
454#endif
455	return (num_copied);
456}
457
458static void
459hput_header(hashp)
460	HTAB *hashp;
461{
462	HASHHDR *whdrp;
463#if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
464	HASHHDR whdr;
465#endif
466	u_int32_t num_copied, i;
467
468	num_copied = i = 0;
469
470	whdrp = &hashp->hdr;
471#if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
472	whdrp = &whdr;
473	swap_header_copy(&hashp->hdr, whdrp);
474#endif
475
476	lseek(hashp->fp, 0, SEEK_SET);
477	num_copied = write(hashp->fp, whdrp, sizeof(HASHHDR));
478	if (num_copied != sizeof(HASHHDR))
479		/* Solaris Kerberos: Make sure to print a newline */
480		(void)fprintf(stderr, dgettext(TEXT_DOMAIN,
481			"hash: could not write hash header\n"));
482	return;
483}
484
485/********************** DESTROY/CLOSE ROUTINES ************************/
486
487/*
488 * Flushes any changes to the file if necessary and destroys the hashp
489 * structure, freeing all allocated space.
490 */
491static int32_t
492hdestroy(hashp)
493	HTAB *hashp;
494{
495	int32_t save_errno;
496
497	save_errno = 0;
498
499#ifdef HASH_STATISTICS
500	{ int i;
501	(void)fprintf(stderr, dgettext(TEXT_DOMAIN,
502			"hdestroy: accesses %ld collisions %ld\n"),
503	    hash_accesses, hash_collisions);
504	(void)fprintf(stderr,
505	   dgettext(TEXT_DOMAIN,
506			 "hdestroy: expansions %ld\n"), hash_expansions);
507	(void)fprintf(stderr,
508	    dgettext(TEXT_DOMAIN,
509			"hdestroy: overflows %ld\n"), hash_overflows);
510	(void)fprintf(stderr,
511	    dgettext(TEXT_DOMAIN,
512			"hdestroy: big key/data pages %ld\n"), hash_bigpages);
513	(void)fprintf(stderr,
514	    "keys %ld maxp %d\n", hashp->hdr.nkeys, hashp->hdr.max_bucket);
515
516	for (i = 0; i < NCACHED; i++)
517		(void)fprintf(stderr,
518		    "spares[%d] = %d\n", i, hashp->hdr.spares[i]);
519	}
520#endif
521
522	if (flush_meta(hashp) && !save_errno)
523		save_errno = errno;
524
525	/* Free the split page */
526	if (hashp->split_buf)
527		free(hashp->split_buf);
528
529	/* Free the big key and big data returns */
530	if (hashp->bigkey_buf)
531		free(hashp->bigkey_buf);
532	if (hashp->bigdata_buf)
533		free(hashp->bigdata_buf);
534
535	/* XXX This should really iterate over the cursor queue, but
536	   it's not clear how to do that, and the only cursor a hash
537	   table ever creates is the one used by hash_seq().  Passing
538	   NULL as the first arg is also a kludge, but I know that
539	   it's never used, so I do it.  The intent is to plug the
540	   memory leak.  Correctness can come later. */
541
542	if (hashp->seq_cursor)
543		hashp->seq_cursor->delete(NULL, hashp->seq_cursor, 0);
544
545	/* shut down mpool */
546	mpool_sync(hashp->mp);
547	mpool_close(hashp->mp);
548
549	if (hashp->fp != -1)
550		(void)close(hashp->fp);
551
552	/*
553	 * *** This may cause problems if hashp->fname is set in any case
554	 * other than the case that we are generating a temporary file name.
555	 * Note that the new version of mpool should support temporary
556	 * files within mpool itself.
557	 */
558	if (hashp->fname && !hashp->save_file) {
559#ifdef DEBUG
560		fprintf(stderr, dgettext(TEXT_DOMAIN,
561			"Unlinking file %s.\n"), hashp->fname);
562#endif
563		/* we need to chmod the file to allow it to be deleted... */
564		chmod(hashp->fname, 0700);
565		unlink(hashp->fname);
566		/* destroy the temporary name */
567		free((void *)(hashp->fname)); /* SUNW14resync */
568	}
569	free(hashp);
570
571	if (save_errno) {
572		errno = save_errno;
573		return (ERROR);
574	}
575	return (SUCCESS);
576}
577
578/*
579 * Write modified pages to disk
580 *
581 * Returns:
582 *	 0 == OK
583 *	-1 ERROR
584 */
585static int32_t
586hash_sync(dbp, flags)
587	const DB *dbp;
588	u_int32_t flags;
589{
590	HTAB *hashp;
591
592	hashp = (HTAB *)dbp->internal;
593
594	/*
595	 * XXX
596	 * Check success/failure conditions.
597	 */
598	return (flush_meta(hashp) || mpool_sync(hashp->mp));
599}
600
601/*
602 * Returns:
603 *	 0 == OK
604 *	-1 indicates that errno should be set
605 */
606static int32_t
607flush_meta(hashp)
608	HTAB *hashp;
609{
610	int32_t i;
611
612	if (!hashp->save_file)
613		return (0);
614	hashp->hdr.magic = HASHMAGIC;
615	hashp->hdr.version = HASHVERSION;
616	hashp->hdr.h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
617
618	/* write out metadata */
619	hput_header(hashp);
620
621	for (i = 0; i < NCACHED; i++)
622		if (hashp->mapp[i]) {
623			if (__put_page(hashp,
624			    (PAGE16 *)hashp->mapp[i], A_BITMAP, 1))
625				return (-1);
626			hashp->mapp[i] = NULL;
627		}
628	return (0);
629}
630
631/*******************************SEARCH ROUTINES *****************************/
632/*
633 * All the access routines return
634 *
635 * Returns:
636 *	 0 on SUCCESS
637 *	 1 to indicate an external ERROR (i.e. key not found, etc)
638 *	-1 to indicate an internal ERROR (i.e. out of memory, etc)
639 */
640
641/* *** make sure this is true! */
642
643static int32_t
644hash_get(dbp, key, data, flag)
645	const DB *dbp;
646	const DBT *key;
647	DBT *data;
648	u_int32_t flag;
649{
650	HTAB *hashp;
651
652	hashp = (HTAB *)dbp->internal;
653	if (flag) {
654		hashp->local_errno = errno = EINVAL;
655		return (ERROR);
656	}
657	return (hash_access(hashp, HASH_GET, key, data));
658}
659
660static int32_t
661hash_put(dbp, key, data, flag)
662	const DB *dbp;
663	DBT *key;
664	const DBT *data;
665	u_int32_t flag;
666{
667	HTAB *hashp;
668
669	hashp = (HTAB *)dbp->internal;
670	if (flag && flag != R_NOOVERWRITE) {
671		hashp->local_errno = errno = EINVAL;
672		return (ERROR);
673	}
674	if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
675		hashp->local_errno = errno = EPERM;
676		return (ERROR);
677	}
678	return (hash_access(hashp, flag == R_NOOVERWRITE ?
679		HASH_PUTNEW : HASH_PUT, key, (DBT *)data));
680}
681
682static int32_t
683hash_delete(dbp, key, flag)
684	const DB *dbp;
685	const DBT *key;
686	u_int32_t flag;		/* Ignored */
687{
688	HTAB *hashp;
689
690	hashp = (HTAB *)dbp->internal;
691	if (flag) {
692		hashp->local_errno = errno = EINVAL;
693		return (ERROR);
694	}
695	if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
696		hashp->local_errno = errno = EPERM;
697		return (ERROR);
698	}
699
700	return (hash_access(hashp, HASH_DELETE, key, NULL));
701}
702
703/*
704 * Assume that hashp has been set in wrapper routine.
705 */
706static int32_t
707hash_access(hashp, action, key, val)
708	HTAB *hashp;
709	ACTION action;
710	const DBT *key;
711	DBT *val;
712{
713	DBT page_key, page_val;
714	CURSOR cursor;
715	ITEM_INFO item_info;
716	u_int32_t bucket;
717	u_int32_t num_items;
718
719#ifdef HASH_STATISTICS
720	hash_accesses++;
721#endif
722
723	num_items = 0;
724
725	/*
726	 * Set up item_info so that we're looking for space to add an item
727	 * as we cycle through the pages looking for the key.
728	 */
729	if (action == HASH_PUT || action == HASH_PUTNEW) {
730		if (ISBIG(key->size + val->size, hashp))
731			item_info.seek_size = PAIR_OVERHEAD;
732		else
733			item_info.seek_size = key->size + val->size;
734	} else
735		item_info.seek_size = 0;
736	item_info.seek_found_page = 0;
737
738	bucket = __call_hash(hashp, (int8_t *)key->data, key->size);
739
740	cursor.pagep = NULL;
741	__get_item_reset(hashp, &cursor);
742
743	cursor.bucket = bucket;
744	while (1) {
745		__get_item_next(hashp, &cursor, &page_key, &page_val, &item_info);
746		if (item_info.status == ITEM_ERROR)
747			return (ABNORMAL);
748		if (item_info.status == ITEM_NO_MORE)
749			break;
750		num_items++;
751		if (item_info.key_off == BIGPAIR) {
752			/*
753			 * !!!
754			 * 0 is a valid index.
755			 */
756			if (__find_bigpair(hashp, &cursor, (int8_t *)key->data,
757			    key->size) > 0)
758				goto found;
759		} else if (key->size == page_key.size &&
760		    !memcmp(key->data, page_key.data, key->size))
761			goto found;
762	}
763#ifdef HASH_STATISTICS
764	hash_collisions++;
765#endif
766	__get_item_done(hashp, &cursor);
767
768	/*
769	 * At this point, item_info will list either the last page in
770	 * the chain, or the last page in the chain plus a pgno for where
771	 * to find the first page in the chain with space for the
772	 * item we wish to add.
773	 */
774
775	/* Not found */
776	switch (action) {
777	case HASH_PUT:
778	case HASH_PUTNEW:
779		if (__addel(hashp, &item_info, key, val, num_items, 0))
780			return (ERROR);
781		break;
782	case HASH_GET:
783	case HASH_DELETE:
784	default:
785		return (ABNORMAL);
786	}
787
788	if (item_info.caused_expand)
789		__expand_table(hashp);
790	return (SUCCESS);
791
792found:	__get_item_done(hashp, &cursor);
793
794	switch (action) {
795	case HASH_PUTNEW:
796		/* mpool_put(hashp->mp, pagep, 0); */
797		return (ABNORMAL);
798	case HASH_GET:
799		if (item_info.key_off == BIGPAIR) {
800			if (__big_return(hashp, &item_info, val, 0))
801				return (ERROR);
802		} else {
803			val->data = page_val.data;
804			val->size = page_val.size;
805		}
806		/* *** data may not be available! */
807		break;
808	case HASH_PUT:
809		if (__delpair(hashp, &cursor, &item_info) ||
810		    __addel(hashp, &item_info, key, val, UNKNOWN, 0))
811			return (ERROR);
812		__get_item_done(hashp, &cursor);
813		if (item_info.caused_expand)
814			__expand_table(hashp);
815		break;
816	case HASH_DELETE:
817		if (__delpair(hashp, &cursor, &item_info))
818			return (ERROR);
819		break;
820	default:
821		abort();
822	}
823	return (SUCCESS);
824}
825
826/* ****************** CURSORS ********************************** */
827CURSOR *
828__cursor_creat(dbp)
829	const DB *dbp;
830{
831	CURSOR *new_curs;
832	HTAB *hashp;
833
834	new_curs = (CURSOR *)malloc(sizeof(struct cursor_t));
835	if (!new_curs)
836		return NULL;
837	new_curs->internal =
838	    (struct item_info *)malloc(sizeof(struct item_info));
839	if (!new_curs->internal) {
840		free(new_curs);
841		return NULL;
842	}
843	new_curs->get = cursor_get;
844	new_curs->delete = cursor_delete;
845
846	new_curs->bucket = 0;
847	new_curs->pgno = INVALID_PGNO;
848	new_curs->ndx = 0;
849	new_curs->pgndx = 0;
850	new_curs->pagep = NULL;
851
852	/* place onto queue of cursors */
853	hashp = (HTAB *)dbp->internal;
854	TAILQ_INSERT_TAIL(&hashp->curs_queue, new_curs, queue);
855
856	return new_curs;
857}
858
859static int32_t
860cursor_get(dbp, cursorp, key, val, flags)
861	const DB *dbp;
862	CURSOR *cursorp;
863	DBT *key, *val;
864	u_int32_t flags;
865{
866	HTAB *hashp;
867	ITEM_INFO item_info;
868
869	hashp = (HTAB *)dbp->internal;
870
871	if (flags && flags != R_FIRST && flags != R_NEXT) {
872		hashp->local_errno = errno = EINVAL;
873		return (ERROR);
874	}
875#ifdef HASH_STATISTICS
876	hash_accesses++;
877#endif
878
879	item_info.seek_size = 0;
880
881	if (flags == R_FIRST)
882		__get_item_first(hashp, cursorp, key, val, &item_info);
883	else
884		__get_item_next(hashp, cursorp, key, val, &item_info);
885
886	/*
887	 * This needs to be changed around.  As is, get_item_next advances
888	 * the pointers on the page but this function actually advances
889	 * bucket pointers.  This works, since the only other place we
890	 * use get_item_next is in hash_access which only deals with one
891	 * bucket at a time.  However, there is the problem that certain other
892	 * functions (such as find_bigpair and delpair) depend on the
893	 * pgndx member of the cursor.  Right now, they are using pngdx - 1
894	 * since indices refer to the __next__ item that is to be fetched
895	 * from the page.  This is ugly, as you may have noticed, whoever
896	 * you are.  The best solution would be to depend on item_infos to
897	 * deal with _current_ information, and have the cursors only
898	 * deal with _next_ information.  In that scheme, get_item_next
899	 * would also advance buckets.  Version 3...
900	 */
901
902
903	/*
904	 * Must always enter this loop to do error handling and
905	 * check for big key/data pair.
906	 */
907	while (1) {
908		if (item_info.status == ITEM_OK) {
909			if (item_info.key_off == BIGPAIR &&
910			    __big_keydata(hashp, cursorp->pagep, key, val,
911			    item_info.pgndx))
912				return (ABNORMAL);
913
914			break;
915		} else if (item_info.status != ITEM_NO_MORE)
916			return (ABNORMAL);
917
918		__put_page(hashp, cursorp->pagep, A_RAW, 0);
919		cursorp->ndx = cursorp->pgndx = 0;
920		cursorp->bucket++;
921		cursorp->pgno = INVALID_PGNO;
922		cursorp->pagep = NULL;
923		if (cursorp->bucket > hashp->hdr.max_bucket)
924			return (ABNORMAL);
925		__get_item_next(hashp, cursorp, key, val, &item_info);
926	}
927
928	__get_item_done(hashp, cursorp);
929	return (0);
930}
931
932static int32_t
933cursor_delete(dbp, cursor, flags)
934	const DB *dbp;
935	CURSOR *cursor;
936	u_int32_t flags;
937{
938	/* XXX this is empirically determined, so it might not be completely
939	   correct, but it seems to work.  At the very least it fixes
940	   a memory leak */
941
942	free(cursor->internal);
943	free(cursor);
944
945	return (0);
946}
947
948static int32_t
949hash_seq(dbp, key, val, flag)
950	const DB *dbp;
951	DBT *key, *val;
952	u_int32_t flag;
953{
954	HTAB *hashp;
955
956	/*
957	 * Seq just uses the default cursor to go sequecing through the
958	 * database.  Note that the default cursor is the first in the list.
959	 */
960
961	hashp = (HTAB *)dbp->internal;
962	if (!hashp->seq_cursor)
963		hashp->seq_cursor = __cursor_creat(dbp);
964
965	return (hashp->seq_cursor->get(dbp, hashp->seq_cursor, key, val, flag));
966}
967
968/********************************* UTILITIES ************************/
969
970/*
971 * Returns:
972 *	 0 ==> OK
973 *	-1 ==> Error
974 */
975int32_t
976__expand_table(hashp)
977	HTAB *hashp;
978{
979	u_int32_t old_bucket, new_bucket;
980	int32_t spare_ndx;
981
982#ifdef HASH_STATISTICS
983	hash_expansions++;
984#endif
985	new_bucket = ++hashp->hdr.max_bucket;
986	old_bucket = (hashp->hdr.max_bucket & hashp->hdr.low_mask);
987
988	/* Get a page for this new bucket */
989	if (__new_page(hashp, new_bucket, A_BUCKET) != 0)
990		return (-1);
991
992	/*
993	 * If the split point is increasing (hdr.max_bucket's log base 2
994	 * increases), we need to copy the current contents of the spare
995	 * split bucket to the next bucket.
996	 */
997	spare_ndx = __log2(hashp->hdr.max_bucket + 1);
998	if (spare_ndx > hashp->hdr.ovfl_point) {
999		hashp->hdr.spares[spare_ndx] = hashp->hdr.spares[hashp->hdr.ovfl_point];
1000		hashp->hdr.ovfl_point = spare_ndx;
1001	}
1002	if (new_bucket > hashp->hdr.high_mask) {
1003		/* Starting a new doubling */
1004		hashp->hdr.low_mask = hashp->hdr.high_mask;
1005		hashp->hdr.high_mask = new_bucket | hashp->hdr.low_mask;
1006	}
1007	if (BUCKET_TO_PAGE(new_bucket) > MAX_PAGES(hashp)) {
1008		fprintf(stderr,dgettext(TEXT_DOMAIN,
1009			 "hash: Cannot allocate new bucket.  Pages exhausted.\n"));
1010		return (-1);
1011	}
1012	/* Relocate records to the new bucket */
1013	return (__split_page(hashp, old_bucket, new_bucket));
1014}
1015
1016u_int32_t
1017__call_hash(hashp, k, len)
1018	HTAB *hashp;
1019	int8_t *k;
1020	int32_t len;
1021{
1022	int32_t n, bucket;
1023
1024	n = hashp->hash(k, len);
1025	bucket = n & hashp->hdr.high_mask;
1026	if (bucket > hashp->hdr.max_bucket)
1027		bucket = bucket & hashp->hdr.low_mask;
1028	return (bucket);
1029}
1030
1031#if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
1032/*
1033 * Hashp->hdr needs to be byteswapped.
1034 */
1035static void
1036swap_header_copy(srcp, destp)
1037	HASHHDR *srcp, *destp;
1038{
1039	int32_t i;
1040
1041	P_32_COPY(srcp->magic, destp->magic);
1042	P_32_COPY(srcp->version, destp->version);
1043	P_32_COPY(srcp->lorder, destp->lorder);
1044	P_32_COPY(srcp->bsize, destp->bsize);
1045	P_32_COPY(srcp->bshift, destp->bshift);
1046	P_32_COPY(srcp->ovfl_point, destp->ovfl_point);
1047	P_32_COPY(srcp->last_freed, destp->last_freed);
1048	P_32_COPY(srcp->max_bucket, destp->max_bucket);
1049	P_32_COPY(srcp->high_mask, destp->high_mask);
1050	P_32_COPY(srcp->low_mask, destp->low_mask);
1051	P_32_COPY(srcp->ffactor, destp->ffactor);
1052	P_32_COPY(srcp->nkeys, destp->nkeys);
1053	P_32_COPY(srcp->hdrpages, destp->hdrpages);
1054	P_32_COPY(srcp->h_charkey, destp->h_charkey);
1055	for (i = 0; i < NCACHED; i++) {
1056		P_32_COPY(srcp->spares[i], destp->spares[i]);
1057		P_16_COPY(srcp->bitmaps[i], destp->bitmaps[i]);
1058	}
1059}
1060
1061static void
1062swap_header(hashp)
1063	HTAB *hashp;
1064{
1065	HASHHDR *hdrp;
1066	int32_t i;
1067
1068	hdrp = &hashp->hdr;
1069
1070	M_32_SWAP(hdrp->magic);
1071	M_32_SWAP(hdrp->version);
1072	M_32_SWAP(hdrp->lorder);
1073	M_32_SWAP(hdrp->bsize);
1074	M_32_SWAP(hdrp->bshift);
1075	M_32_SWAP(hdrp->ovfl_point);
1076	M_32_SWAP(hdrp->last_freed);
1077	M_32_SWAP(hdrp->max_bucket);
1078	M_32_SWAP(hdrp->high_mask);
1079	M_32_SWAP(hdrp->low_mask);
1080	M_32_SWAP(hdrp->ffactor);
1081	M_32_SWAP(hdrp->nkeys);
1082	M_32_SWAP(hdrp->hdrpages);
1083	M_32_SWAP(hdrp->h_charkey);
1084	for (i = 0; i < NCACHED; i++) {
1085		M_32_SWAP(hdrp->spares[i]);
1086		M_16_SWAP(hdrp->bitmaps[i]);
1087	}
1088}
1089#endif /* DB_BYTE_ORDER == DB_LITTLE_ENDIAN */
1090