17c478bdstevel@tonic-gate/*-
27c478bdstevel@tonic-gate * See the file LICENSE for redistribution information.
37c478bdstevel@tonic-gate *
47c478bdstevel@tonic-gate * Copyright (c) 1996, 1997, 1998
57c478bdstevel@tonic-gate *	Sleepycat Software.  All rights reserved.
67c478bdstevel@tonic-gate */
77c478bdstevel@tonic-gate/*
87c478bdstevel@tonic-gate * Copyright (c) 1990, 1993, 1994
97c478bdstevel@tonic-gate *	Margo Seltzer.  All rights reserved.
107c478bdstevel@tonic-gate */
117c478bdstevel@tonic-gate/*
127c478bdstevel@tonic-gate * Copyright (c) 1990, 1993, 1994
137c478bdstevel@tonic-gate *	The Regents of the University of California.  All rights reserved.
147c478bdstevel@tonic-gate *
157c478bdstevel@tonic-gate * This code is derived from software contributed to Berkeley by
167c478bdstevel@tonic-gate * Margo Seltzer.
177c478bdstevel@tonic-gate *
187c478bdstevel@tonic-gate * Redistribution and use in source and binary forms, with or without
197c478bdstevel@tonic-gate * modification, are permitted provided that the following conditions
207c478bdstevel@tonic-gate * are met:
217c478bdstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright
227c478bdstevel@tonic-gate *    notice, this list of conditions and the following disclaimer.
237c478bdstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright
247c478bdstevel@tonic-gate *    notice, this list of conditions and the following disclaimer in the
257c478bdstevel@tonic-gate *    documentation and/or other materials provided with the distribution.
267c478bdstevel@tonic-gate * 3. All advertising materials mentioning features or use of this software
277c478bdstevel@tonic-gate *    must display the following acknowledgement:
287c478bdstevel@tonic-gate *	This product includes software developed by the University of
297c478bdstevel@tonic-gate *	California, Berkeley and its contributors.
307c478bdstevel@tonic-gate * 4. Neither the name of the University nor the names of its contributors
317c478bdstevel@tonic-gate *    may be used to endorse or promote products derived from this software
327c478bdstevel@tonic-gate *    without specific prior written permission.
337c478bdstevel@tonic-gate *
347c478bdstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
357c478bdstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
367c478bdstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
377c478bdstevel@tonic-gate * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
387c478bdstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
397c478bdstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
407c478bdstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
417c478bdstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
427c478bdstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
437c478bdstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
447c478bdstevel@tonic-gate * SUCH DAMAGE.
457c478bdstevel@tonic-gate */
467c478bdstevel@tonic-gate
477c478bdstevel@tonic-gate#include "config.h"
487c478bdstevel@tonic-gate
497c478bdstevel@tonic-gate#ifndef lint
507c478bdstevel@tonic-gatestatic const char sccsid[] = "@(#)hash.c	10.63 (Sleepycat) 12/11/98";
517c478bdstevel@tonic-gate#endif /* not lint */
527c478bdstevel@tonic-gate
537c478bdstevel@tonic-gate#ifndef NO_SYSTEM_INCLUDES
547c478bdstevel@tonic-gate#include <sys/types.h>
557c478bdstevel@tonic-gate
567c478bdstevel@tonic-gate#include <errno.h>
577c478bdstevel@tonic-gate#include <stdlib.h>
587c478bdstevel@tonic-gate#include <string.h>
597c478bdstevel@tonic-gate#endif
607c478bdstevel@tonic-gate
617c478bdstevel@tonic-gate#include "db_int.h"
627c478bdstevel@tonic-gate#include "shqueue.h"
637c478bdstevel@tonic-gate#include "db_page.h"
647c478bdstevel@tonic-gate#include "db_am.h"
657c478bdstevel@tonic-gate#include "db_ext.h"
667c478bdstevel@tonic-gate#include "hash.h"
677c478bdstevel@tonic-gate#include "btree.h"
687c478bdstevel@tonic-gate#include "log.h"
697c478bdstevel@tonic-gate#include "db_shash.h"
707c478bdstevel@tonic-gate#include "lock.h"
717c478bdstevel@tonic-gate#include "lock_ext.h"
727c478bdstevel@tonic-gate
737c478bdstevel@tonic-gatestatic int  __ham_c_close __P((DBC *));
747c478bdstevel@tonic-gatestatic int  __ham_c_del __P((DBC *, u_int32_t));
757c478bdstevel@tonic-gatestatic int  __ham_c_destroy __P((DBC *));
767c478bdstevel@tonic-gatestatic int  __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
777c478bdstevel@tonic-gatestatic int  __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
787c478bdstevel@tonic-gatestatic int  __ham_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
797c478bdstevel@tonic-gatestatic int  __ham_dup_return __P((DBC *, DBT *, u_int32_t));
807c478bdstevel@tonic-gatestatic int  __ham_expand_table __P((DBC *));
817c478bdstevel@tonic-gatestatic void __ham_init_htab __P((DBC *, u_int32_t, u_int32_t));
827c478bdstevel@tonic-gatestatic int  __ham_lookup __P((DBC *, const DBT *, u_int32_t, db_lockmode_t));
837c478bdstevel@tonic-gatestatic int  __ham_overwrite __P((DBC *, DBT *));
847c478bdstevel@tonic-gate
857c478bdstevel@tonic-gate/************************** INTERFACE ROUTINES ***************************/
867c478bdstevel@tonic-gate/* OPEN/CLOSE */
877c478bdstevel@tonic-gate
887c478bdstevel@tonic-gate/*
897c478bdstevel@tonic-gate * __ham_open --
907c478bdstevel@tonic-gate *
917c478bdstevel@tonic-gate * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
927c478bdstevel@tonic-gate */
937c478bdstevel@tonic-gateint
947c478bdstevel@tonic-gate__ham_open(dbp, dbinfo)
957c478bdstevel@tonic-gate	DB *dbp;
967c478bdstevel@tonic-gate	DB_INFO *dbinfo;
977c478bdstevel@tonic-gate{
987c478bdstevel@tonic-gate	DB_ENV *dbenv;
997c478bdstevel@tonic-gate	DBC *dbc;
1007c478bdstevel@tonic-gate	HASH_CURSOR *hcp;
1017c478bdstevel@tonic-gate	int file_existed, ret;
1027c478bdstevel@tonic-gate
1037c478bdstevel@tonic-gate	dbc = NULL;
1047c478bdstevel@tonic-gate	dbenv = dbp->dbenv;
1057c478bdstevel@tonic-gate
1067c478bdstevel@tonic-gate	/* Set the hash function if specified by the user. */
1077c478bdstevel@tonic-gate	if (dbinfo != NULL && dbinfo->h_hash != NULL)
1087c478bdstevel@tonic-gate		dbp->h_hash = dbinfo->h_hash;
1097c478bdstevel@tonic-gate
1107c478bdstevel@tonic-gate	/*
1117c478bdstevel@tonic-gate	 * Initialize the remaining fields of the dbp.  The only function
1127c478bdstevel@tonic-gate	 * that differs from the default set is __ham_stat().
1137c478bdstevel@tonic-gate	 */
1147c478bdstevel@tonic-gate	dbp->internal = NULL;
1157c478bdstevel@tonic-gate	dbp->am_close = __ham_close;
1167c478bdstevel@tonic-gate	dbp->del = __ham_delete;
1177c478bdstevel@tonic-gate	dbp->stat = __ham_stat;
1187c478bdstevel@tonic-gate
1197c478bdstevel@tonic-gate	/* Get a cursor we can use for the rest of this function. */
1207c478bdstevel@tonic-gate	if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
1217c478bdstevel@tonic-gate		goto out;
1227c478bdstevel@tonic-gate
1237c478bdstevel@tonic-gate	hcp = (HASH_CURSOR *)dbc->internal;
1247c478bdstevel@tonic-gate	GET_META(dbp, hcp, ret);
1257c478bdstevel@tonic-gate	if (ret != 0)
1267c478bdstevel@tonic-gate		goto out;
1277c478bdstevel@tonic-gate
1287c478bdstevel@tonic-gate	/*
1297c478bdstevel@tonic-gate	 * If this is a new file, initialize it, and put it back dirty.
1307c478bdstevel@tonic-gate	 */
1317c478bdstevel@tonic-gate
1327c478bdstevel@tonic-gate	/* Initialize the hdr structure */
1337c478bdstevel@tonic-gate	if (hcp->hdr->magic == DB_HASHMAGIC) {
1347c478bdstevel@tonic-gate		file_existed = 1;
1357c478bdstevel@tonic-gate		/* File exists, verify the data in the header. */
1367c478bdstevel@tonic-gate		if (dbp->h_hash == NULL)
1377c478bdstevel@tonic-gate			dbp->h_hash =
1387c478bdstevel@tonic-gate			    hcp->hdr->version < 5 ? __ham_func4 : __ham_func5;
1397c478bdstevel@tonic-gate		if (dbp->h_hash(CHARKEY, sizeof(CHARKEY)) !=
1407c478bdstevel@tonic-gate		    hcp->hdr->h_charkey) {
1417c478bdstevel@tonic-gate			__db_err(dbp->dbenv, "hash: incompatible hash function");
1427c478bdstevel@tonic-gate			ret = EINVAL;
1437c478bdstevel@tonic-gate			goto out;
1447c478bdstevel@tonic-gate		}
1457c478bdstevel@tonic-gate		if (F_ISSET(hcp->hdr, DB_HASH_DUP))
1467c478bdstevel@tonic-gate			F_SET(dbp, DB_AM_DUP);
1477c478bdstevel@tonic-gate	} else {
1487c478bdstevel@tonic-gate		/*
1497c478bdstevel@tonic-gate		 * File does not exist, we must initialize the header.  If
1507c478bdstevel@tonic-gate		 * locking is enabled that means getting a write lock first.
1517c478bdstevel@tonic-gate		 */
1527c478bdstevel@tonic-gate		file_existed = 0;
1537c478bdstevel@tonic-gate		if (F_ISSET(dbp, DB_AM_LOCKING) &&
1547c478bdstevel@tonic-gate		    ((ret = lock_put(dbenv->lk_info, hcp->hlock)) != 0 ||
1557c478bdstevel@tonic-gate		    (ret = lock_get(dbenv->lk_info, dbc->locker, 0,
1567c478bdstevel@tonic-gate		        &dbc->lock_dbt, DB_LOCK_WRITE, &hcp->hlock)) != 0)) {
1577c478bdstevel@tonic-gate			if (ret < 0)
1587c478bdstevel@tonic-gate				ret = EAGAIN;
1597c478bdstevel@tonic-gate			goto out;
1607c478bdstevel@tonic-gate		}
1617c478bdstevel@tonic-gate
1627c478bdstevel@tonic-gate		__ham_init_htab(dbc, dbinfo != NULL ? dbinfo->h_nelem : 0,
1637c478bdstevel@tonic-gate		    dbinfo != NULL ? dbinfo->h_ffactor : 0);
1647c478bdstevel@tonic-gate		if (F_ISSET(dbp, DB_AM_DUP))
1657c478bdstevel@tonic-gate			F_SET(hcp->hdr, DB_HASH_DUP);
1667c478bdstevel@tonic-gate		if ((ret = __ham_dirty_page(dbp, (PAGE *)hcp->hdr)) != 0)
1677c478bdstevel@tonic-gate			goto out;
1687c478bdstevel@tonic-gate	}
1697c478bdstevel@tonic-gate
1707c478bdstevel@tonic-gate	/* Release the meta data page */
1717c478bdstevel@tonic-gate	RELEASE_META(dbp, hcp);
1727c478bdstevel@tonic-gate	if ((ret  = dbc->c_close(dbc)) != 0)
1737c478bdstevel@tonic-gate		goto out;
1747c478bdstevel@tonic-gate
1757c478bdstevel@tonic-gate	/* Sync the file so that we know that the meta data goes to disk. */
1767c478bdstevel@tonic-gate	if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
1777c478bdstevel@tonic-gate		goto out;
1787c478bdstevel@tonic-gate	return (0);
1797c478bdstevel@tonic-gate
1807c478bdstevel@tonic-gateout:	(void)__ham_close(dbp);
1817c478bdstevel@tonic-gate	return (ret);
1827c478bdstevel@tonic-gate}
1837c478bdstevel@tonic-gate
1847c478bdstevel@tonic-gate/*
1857c478bdstevel@tonic-gate * PUBLIC: int __ham_close __P((DB *));
1867c478bdstevel@tonic-gate */
1877c478bdstevel@tonic-gateint
1887c478bdstevel@tonic-gate__ham_close(dbp)
1897c478bdstevel@tonic-gate	DB *dbp;
1907c478bdstevel@tonic-gate{
1917c478bdstevel@tonic-gate	COMPQUIET(dbp, NULL);
1927c478bdstevel@tonic-gate	return (0);
1937c478bdstevel@tonic-gate}
1947c478bdstevel@tonic-gate
1957c478bdstevel@tonic-gate/************************** LOCAL CREATION ROUTINES **********************/
1967c478bdstevel@tonic-gate/*
1977c478bdstevel@tonic-gate * Returns 0 on No Error
1987c478bdstevel@tonic-gate */
1997c478bdstevel@tonic-gatestatic void
2007c478bdstevel@tonic-gate__ham_init_htab(dbc, nelem, ffactor)
2017c478bdstevel@tonic-gate	DBC *dbc;
2027c478bdstevel@tonic-gate	u_int32_t nelem, ffactor;
2037c478bdstevel@tonic-gate{
2047c478bdstevel@tonic-gate	DB *dbp;
2057c478bdstevel@tonic-gate	HASH_CURSOR *hcp;
2067c478bdstevel@tonic-gate	int32_t l2, nbuckets;
2077c478bdstevel@tonic-gate
2087c478bdstevel@tonic-gate	hcp = (HASH_CURSOR *)dbc->internal;
2097c478bdstevel@tonic-gate	dbp = dbc->dbp;
2107c478bdstevel@tonic-gate	memset(hcp->hdr, 0, sizeof(HASHHDR));
2117c478bdstevel@tonic-gate	hcp->hdr->ffactor = ffactor;
2127c478bdstevel@tonic-gate	hcp->hdr->pagesize = dbp->pgsize;
2137c478bdstevel@tonic-gate	ZERO_LSN(hcp->hdr->lsn);
2147c478bdstevel@tonic-gate	hcp->hdr->magic = DB_HASHMAGIC;
2157c478bdstevel@tonic-gate	hcp->hdr->version = DB_HASHVERSION;
2167c478bdstevel@tonic-gate
2177c478bdstevel@tonic-gate	if (dbp->h_hash == NULL)
2187c478bdstevel@tonic-gate		dbp->h_hash = hcp->hdr->version < 5 ? __ham_func4 : __ham_func5;
2197c478bdstevel@tonic-gate	hcp->hdr->h_charkey = dbp->h_hash(CHARKEY, sizeof(CHARKEY));
2207c478bdstevel@tonic-gate	if (nelem != 0 && hcp->hdr->ffactor != 0) {
2217c478bdstevel@tonic-gate		nelem = (nelem - 1) / hcp->hdr->ffactor + 1;
2227c478bdstevel@tonic-gate		l2 = __db_log2(nelem > 2 ? nelem : 2);
2237c478bdstevel@tonic-gate	} else
2247c478bdstevel@tonic-gate		l2 = 2;
2257c478bdstevel@tonic-gate
2267c478bdstevel@tonic-gate	nbuckets = 1 << l2;
2277c478bdstevel@tonic-gate
2287c478bdstevel@tonic-gate	hcp->hdr->ovfl_point = l2;
2297c478bdstevel@tonic-gate	hcp->hdr->last_freed = PGNO_INVALID;
2307c478bdstevel@tonic-gate
2317c478bdstevel@tonic-gate	hcp->hdr->max_bucket = hcp->hdr->high_mask = nbuckets - 1;
2327c478bdstevel@tonic-gate	hcp->hdr->low_mask = (nbuckets >> 1) - 1;
2337c478bdstevel@tonic-gate	memcpy(hcp->hdr->uid, dbp->fileid, DB_FILE_ID_LEN);
2347c478bdstevel@tonic-gate}
2357c478bdstevel@tonic-gate
2367c478bdstevel@tonic-gatestatic int
2377c478bdstevel@tonic-gate__ham_delete(dbp, txn, key, flags)
2387c478bdstevel@tonic-gate	DB *dbp;
2397c478bdstevel@tonic-gate	DB_TXN *txn;
2407c478bdstevel@tonic-gate	DBT *key;
2417c478bdstevel@tonic-gate	u_int32_t flags;
2427c478bdstevel@tonic-gate{
2437c478bdstevel@tonic-gate	DBC *dbc;
2447c478bdstevel@tonic-gate	HASH_CURSOR *hcp;
2457c478bdstevel@tonic-gate	int ret, tret;
2467c478bdstevel@tonic-gate
2477c478bdstevel@tonic-gate	DB_PANIC_CHECK(dbp);
2487c478bdstevel@tonic-gate
2497c478bdstevel@tonic-gate	if ((ret =
2507c478bdstevel@tonic-gate	    __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
2517c478bdstevel@tonic-gate		return (ret);
2527c478bdstevel@tonic-gate
2537c478bdstevel@tonic-gate	if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
2547c478bdstevel@tonic-gate		return (ret);
2557c478bdstevel@tonic-gate
2567c478bdstevel@tonic-gate	DEBUG_LWRITE(dbc, txn, "ham_delete", key, NULL, flags);
2577c478bdstevel@tonic-gate
2587c478bdstevel@tonic-gate	hcp = (HASH_CURSOR *)dbc->internal;
2597c478bdstevel@tonic-gate	GET_META(dbp, hcp, ret);
2607c478bdstevel@tonic-gate	if (ret != 0)
2617c478bdstevel@tonic-gate		goto out;
2627c478bdstevel@tonic-gate
2637c478bdstevel@tonic-gate	hcp->stats.hash_deleted++;
2647c478bdstevel@tonic-gate	if ((ret = __ham_lookup(dbc, key, 0, DB_LOCK_WRITE)) == 0)
2657c478bdstevel@tonic-gate		if (F_ISSET(hcp, H_OK))
2667c478bdstevel@tonic-gate			ret = __ham_del_pair(dbc, 1);
2677c478bdstevel@tonic-gate		else
2687c478bdstevel@tonic-gate			ret = DB_NOTFOUND;
2697c478bdstevel@tonic-gate
2707c478bdstevel@tonic-gate	RELEASE_META(dbp, hcp);
2717c478bdstevel@tonic-gateout:	if ((tret = dbc->c_close(dbc)) != 0 && ret == 0)
2727c478bdstevel@tonic-gate		ret = tret;
2737c478bdstevel@tonic-gate	return (ret);
2747c478bdstevel@tonic-gate}
2757c478bdstevel@tonic-gate
2767c478bdstevel@tonic-gate/* ****************** CURSORS ********************************** */
2777c478bdstevel@tonic-gate/*
2787c478bdstevel@tonic-gate * __ham_c_init --
2797c478bdstevel@tonic-gate *	Initialize the hash-specific portion of a cursor.
2807c478bdstevel@tonic-gate *
2817c478bdstevel@tonic-gate * PUBLIC: int __ham_c_init __P((DBC *));
2827c478bdstevel@tonic-gate */
2837c478bdstevel@tonic-gateint
2847c478bdstevel@tonic-gate__ham_c_init(dbc)
2857c478bdstevel@tonic-gate	DBC *dbc;
2867c478bdstevel@tonic-gate  {
2877c478bdstevel@tonic-gate	HASH_CURSOR *new_curs;
2887c478bdstevel@tonic-gate	int ret;
2897c478bdstevel@tonic-gate
2907c478bdstevel@tonic-gate	if ((ret = __os_calloc(1, sizeof(struct cursor_t), &new_curs)) != 0)
2917c478bdstevel@tonic-gate		return (ret);
2927c478bdstevel@tonic-gate	if ((ret =
2937c478bdstevel@tonic-gate	    __os_malloc(dbc->dbp->pgsize, NULL, &new_curs->split_buf)) != 0) {
2947c478bdstevel@tonic-gate		__os_free(new_curs, sizeof(*new_curs));
2957c478bdstevel@tonic-gate		return (ret);
2967c478bdstevel@tonic-gate	}
2977c478bdstevel@tonic-gate
2987c478bdstevel@tonic-gate	new_curs->dbc = dbc;
2997c478bdstevel@tonic-gate
3007c478bdstevel@tonic-gate	dbc->internal = new_curs;
3017c478bdstevel@tonic-gate	dbc->c_am_close = __ham_c_close;
3027c478bdstevel@tonic-gate	dbc->c_am_destroy = __ham_c_destroy;
3037c478bdstevel@tonic-gate	dbc->c_del = __ham_c_del;
3047c478bdstevel@tonic-gate	dbc->c_get = __ham_c_get;
3057c478bdstevel@tonic-gate	dbc->c_put = __ham_c_put;
3067c478bdstevel@tonic-gate
3077c478bdstevel@tonic-gate	__ham_item_init(new_curs);
3087c478bdstevel@tonic-gate
3097c478bdstevel@tonic-gate	return (0);
3107c478bdstevel@tonic-gate}
3117c478bdstevel@tonic-gate
3127c478bdstevel@tonic-gate/*
3137c478bdstevel@tonic-gate * __ham_c_close --
3147c478bdstevel@tonic-gate *	Close down the cursor from a single use.
3157c478bdstevel@tonic-gate */
3167c478bdstevel@tonic-gatestatic int
3177c478bdstevel@tonic-gate__ham_c_close(dbc)
3187c478bdstevel@tonic-gate	DBC *dbc;
3197c478bdstevel@tonic-gate{
3207c478bdstevel@tonic-gate	int ret;
3217c478bdstevel@tonic-gate
3227c478bdstevel@tonic-gate	if ((ret = __ham_item_done(dbc, 0)) != 0)
3237c478bdstevel@tonic-gate		return (ret);
3247c478bdstevel@tonic-gate
3257c478bdstevel@tonic-gate	__ham_item_init((HASH_CURSOR *)dbc->internal);
3267c478bdstevel@tonic-gate	return (0);
3277c478bdstevel@tonic-gate}
3287c478bdstevel@tonic-gate
3297c478bdstevel@tonic-gate/*
3307c478bdstevel@tonic-gate * __ham_c_destroy --
3317c478bdstevel@tonic-gate *	Cleanup the access method private part of a cursor.
3327c478bdstevel@tonic-gate */
3337c478bdstevel@tonic-gatestatic int
3347c478bdstevel@tonic-gate__ham_c_destroy(dbc)
3357c478bdstevel@tonic-gate	DBC *dbc;
3367c478bdstevel@tonic-gate{
3377c478bdstevel@tonic-gate	HASH_CURSOR *hcp;
3387c478bdstevel@tonic-gate
3397c478bdstevel@tonic-gate	hcp = (HASH_CURSOR *)dbc->internal;
3407c478bdstevel@tonic-gate	if (hcp->split_buf != NULL)
3417c478bdstevel@tonic-gate		__os_free(hcp->split_buf, dbc->dbp->pgsize);
3427c478bdstevel@tonic-gate	__os_free(hcp, sizeof(HASH_CURSOR));
3437c478bdstevel@tonic-gate
3447c478bdstevel@tonic-gate	return (0);
3457c478bdstevel@tonic-gate}
3467c478bdstevel@tonic-gate
3477c478bdstevel@tonic-gatestatic int
3487c478bdstevel@tonic-gate__ham_c_del(dbc, flags)
3497c478bdstevel@tonic-gate	DBC *dbc;
3507c478bdstevel@tonic-gate	u_int32_t flags;
3517c478bdstevel@tonic-gate{
3527c478bdstevel@tonic-gate	DB *dbp;
3537c478bdstevel@tonic-gate	DBT repldbt;
3547c478bdstevel@tonic-gate	HASH_CURSOR *hcp;
3557c478bdstevel@tonic-gate	HASH_CURSOR save_curs;
3567c478bdstevel@tonic-gate	db_pgno_t ppgno, chg_pgno;
3577c478bdstevel@tonic-gate	int ret, t_ret;
3587c478bdstevel@tonic-gate
3597c478bdstevel@tonic-gate	DEBUG_LWRITE(dbc, dbc->txn, "ham_c_del", NULL, NULL, flags);
3607c478bdstevel@tonic-gate	dbp = dbc->dbp;
3617c478bdstevel@tonic-gate	DB_PANIC_CHECK(dbp);
3627c478bdstevel@tonic-gate	hcp = (HASH_CURSOR *)dbc->internal;
3637c478bdstevel@tonic-gate
3647c478bdstevel@tonic-gate	if ((ret = __db_cdelchk(dbc->dbp, flags,
3657c478bdstevel@tonic-gate	    F_ISSET(dbc->dbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
3667c478bdstevel@tonic-gate		return (ret);
3677c478bdstevel@tonic-gate
3687c478bdstevel@tonic-gate	if (F_ISSET(hcp, H_DELETED))
3697c478bdstevel@tonic-gate		return (DB_NOTFOUND);
3707c478bdstevel@tonic-gate
3717c478bdstevel@tonic-gate	/*
3727c478bdstevel@tonic-gate	 * If we are in the concurrent DB product and this cursor
3737c478bdstevel@tonic-gate	 * is not a write cursor, then this request is invalid.
3747c478bdstevel@tonic-gate	 * If it is a simple write cursor, then we need to upgrade its
3757c478bdstevel@tonic-gate	 * lock.
3767c478bdstevel@tonic-gate	 */
3777c478bdstevel@tonic-gate	if (F_ISSET(dbp, DB_AM_CDB)) {
3787c478bdstevel@tonic-gate		/* Make sure it's a valid update cursor. */
3797c478bdstevel@tonic-gate		if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
3807c478bdstevel@tonic-gate			return (EINVAL);
3817c478bdstevel@tonic-gate
3827c478bdstevel@tonic-gate		if (F_ISSET(dbc, DBC_RMW) &&
3837c478bdstevel@tonic-gate		    (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
3847c478bdstevel@tonic-gate		    DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
3857c478bdstevel@tonic-gate		    &dbc->mylock)) != 0)
3867c478bdstevel@tonic-gate			return (EAGAIN);
3877c478bdstevel@tonic-gate	}
3887c478bdstevel@tonic-gate
3897c478bdstevel@tonic-gate	GET_META(dbp, hcp, ret);
3907c478bdstevel@tonic-gate	if (ret != 0)
3917c478bdstevel@tonic-gate		return (ret);
3927c478bdstevel@tonic-gate
3937c478bdstevel@tonic-gate	SAVE_CURSOR(hcp, &save_curs);
3947c478bdstevel@tonic-gate	hcp->stats.hash_deleted++;
3957c478bdstevel@tonic-gate
3967c478bdstevel@tonic-gate	if ((ret = __ham_get_cpage(dbc, DB_LOCK_WRITE)) != 0)
3977c478bdstevel@tonic-gate		goto out;
3987c478bdstevel@tonic-gate	if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
3997c478bdstevel@tonic-gate		/*
4007c478bdstevel@tonic-gate		 * We are about to remove a duplicate from offpage.
4017c478bdstevel@tonic-gate		 *
4027c478bdstevel@tonic-gate		 * There are 4 cases.
4037c478bdstevel@tonic-gate		 * 1. We will remove an item on a page, but there are more
4047c478bdstevel@tonic-gate		 *    items on that page.
4057c478bdstevel@tonic-gate		 * 2. We will remove the last item on a page, but there is a
4067c478bdstevel@tonic-gate		 *    following page of duplicates.
4077c478bdstevel@tonic-gate		 * 3. We will remove the last item on a page, this page was the
4087c478bdstevel@tonic-gate		 *    last page in a duplicate set, but there were dups before
4097c478bdstevel@tonic-gate		 *    it.
4107c478bdstevel@tonic-gate		 * 4. We will remove the last item on a page, removing the last
4117c478bdstevel@tonic-gate		 *    duplicate.
4127c478bdstevel@tonic-gate		 * In case 1 hcp->dpagep is unchanged.
4137c478bdstevel@tonic-gate		 * In case 2 hcp->dpagep comes back pointing to the next dup
4147c478bdstevel@tonic-gate		 *     page.
4157c478bdstevel@tonic-gate		 * In case 3 hcp->dpagep comes back NULL.
4167c478bdstevel@tonic-gate		 * In case 4 hcp->dpagep comes back NULL.
4177c478bdstevel@tonic-gate		 *
4187c478bdstevel@tonic-gate		 * Case 4 results in deleting the pair off the master page.
4197c478bdstevel@tonic-gate		 * The normal code for doing this knows how to delete the
4207c478bdstevel@tonic-gate		 * duplicates, so we will handle this case in the normal code.
4217c478bdstevel@tonic-gate		 */
4227c478bdstevel@tonic-gate		ppgno = PREV_PGNO(hcp->dpagep);
4237c478bdstevel@tonic-gate		if (ppgno == PGNO_INVALID &&
4247c478bdstevel@tonic-gate		    NEXT_PGNO(hcp->dpagep) == PGNO_INVALID &&
4257c478bdstevel@tonic-gate		    NUM_ENT(hcp->dpagep) == 1)
4267c478bdstevel@tonic-gate			goto normal;
4277c478bdstevel@tonic-gate
4287c478bdstevel@tonic-gate		/* Remove item from duplicate page. */
4297c478bdstevel@tonic-gate		chg_pgno = hcp->dpgno;
4307c478bdstevel@tonic-gate		if ((ret = __db_drem(dbc,
4317c478bdstevel@tonic-gate		    &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
4327c478bdstevel@tonic-gate			goto out;
4337c478bdstevel@tonic-gate
4347c478bdstevel@tonic-gate		if (hcp->dpagep == NULL) {
4357c478bdstevel@tonic-gate			if (ppgno != PGNO_INVALID) {		/* Case 3 */
4367c478bdstevel@tonic-gate				hcp->dpgno = ppgno;
4377c478bdstevel@tonic-gate				if ((ret = __ham_get_cpage(dbc,
4387c478bdstevel@tonic-gate				    DB_LOCK_READ)) != 0)
4397c478bdstevel@tonic-gate					goto out;
4407c478bdstevel@tonic-gate				hcp->dndx = NUM_ENT(hcp->dpagep);
4417c478bdstevel@tonic-gate				F_SET(hcp, H_DELETED);
4427c478bdstevel@tonic-gate			} else {				/* Case 4 */
4437c478bdstevel@tonic-gate				ret = __ham_del_pair(dbc, 1);
4447c478bdstevel@tonic-gate				hcp->dpgno = PGNO_INVALID;
4457c478bdstevel@tonic-gate				/*
4467c478bdstevel@tonic-gate				 * Delpair updated the cursor queue, so we
4477c478bdstevel@tonic-gate				 * don't have to do that here.
4487c478bdstevel@tonic-gate				 */
4497c478bdstevel@tonic-gate				chg_pgno = PGNO_INVALID;
4507c478bdstevel@tonic-gate			}
4517c478bdstevel@tonic-gate		} else if (PGNO(hcp->dpagep) != hcp->dpgno) {
4527c478bdstevel@tonic-gate			hcp->dndx = 0;				/* Case 2 */
4537c478bdstevel@tonic-gate			hcp->dpgno = PGNO(hcp->dpagep);
4547c478bdstevel@tonic-gate			if (ppgno == PGNO_INVALID)
4557c478bdstevel@tonic-gate				memcpy(HOFFDUP_PGNO(P_ENTRY(hcp->pagep,
4567c478bdstevel@tonic-gate				    H_DATAINDEX(hcp->bndx))),
4577c478bdstevel@tonic-gate				    &hcp->dpgno, sizeof(db_pgno_t));
4587c478bdstevel@tonic-gate			/*
4597c478bdstevel@tonic-gate			 * We need to put the master page here, because
4607c478bdstevel@tonic-gate			 * although we have a duplicate page, the master
4617c478bdstevel@tonic-gate			 * page is dirty, and ham_item_done assumes that
4627c478bdstevel@tonic-gate			 * if you have a duplicate page, it's the only one
4637c478bdstevel@tonic-gate			 * that can be dirty.
4647c478bdstevel@tonic-gate			 */
4657c478bdstevel@tonic-gate			ret = __ham_put_page(dbp, hcp->pagep, 1);
4667c478bdstevel@tonic-gate			hcp->pagep = NULL;
4677c478bdstevel@tonic-gate			F_SET(hcp, H_DELETED);
4687c478bdstevel@tonic-gate		} else						/* Case 1 */
4697c478bdstevel@tonic-gate			F_SET(hcp, H_DELETED);
4707c478bdstevel@tonic-gate		if (chg_pgno != PGNO_INVALID)
4717c478bdstevel@tonic-gate			__ham_c_update(hcp, chg_pgno, 0, 0, 1);
4727c478bdstevel@tonic-gate	} else if (F_ISSET(hcp, H_ISDUP)) {			/* on page */
4737c478bdstevel@tonic-gate		if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
4747c478bdstevel@tonic-gate		    LEN_HDATA(hcp->pagep, hcp->hdr->pagesize, hcp->bndx))
4757c478bdstevel@tonic-gate			ret = __ham_del_pair(dbc, 1);
4767c478bdstevel@tonic-gate		else {
4777c478bdstevel@tonic-gate			repldbt.flags = 0;
4787c478bdstevel@tonic-gate			F_SET(&repldbt, DB_DBT_PARTIAL);
4797c478bdstevel@tonic-gate			repldbt.doff = hcp->dup_off;
4807c478bdstevel@tonic-gate			repldbt.dlen = DUP_SIZE(hcp->dup_len);
4817c478bdstevel@tonic-gate			repldbt.size = 0;
4827c478bdstevel@tonic-gate			repldbt.data =
4837c478bdstevel@tonic-gate			    HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
4847c478bdstevel@tonic-gate			ret = __ham_replpair(dbc, &repldbt, 0);
4857c478bdstevel@tonic-gate			hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
4867c478bdstevel@tonic-gate			F_SET(hcp, H_DELETED);
4877c478bdstevel@tonic-gate			__ham_c_update(hcp, hcp->pgno,
4887c478bdstevel@tonic-gate			    DUP_SIZE(hcp->dup_len), 0, 1);
4897c478bdstevel@tonic-gate		}
4907c478bdstevel@tonic-gate
4917c478bdstevel@tonic-gate	} else
4927c478bdstevel@tonic-gate		/* Not a duplicate */
4937c478bdstevel@tonic-gatenormal:		ret = __ham_del_pair(dbc, 1);
4947c478bdstevel@tonic-gate
4957c478bdstevel@tonic-gateout:	if ((t_ret = __ham_item_done(dbc, ret == 0)) != 0 && ret == 0)
4967c478bdstevel@tonic-gate		ret = t_ret;
4977c478bdstevel@tonic-gate	RELEASE_META(dbp, hcp);
4987c478bdstevel@tonic-gate	RESTORE_CURSOR(dbp, hcp, &save_curs, ret);
4997c478bdstevel@tonic-gate	if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
5007c478bdstevel@tonic-gate		(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
5017c478bdstevel@tonic-gate		    DB_LOCK_IWRITE, 0);
5027c478bdstevel@tonic-gate	return (ret);
5037c478bdstevel@tonic-gate}
5047c478bdstevel@tonic-gate
5057c478bdstevel@tonic-gatestatic int
5067c478bdstevel@tonic-gate__ham_c_get(dbc, key, data, flags)
5077c478bdstevel@tonic-gate	DBC *dbc;
5087c478bdstevel@tonic-gate	DBT *key;
5097c478bdstevel@tonic-gate	DBT *data;
5107c478bdstevel@tonic-gate	u_int32_t flags;
5117c478bdstevel@tonic-gate{
5127c478bdstevel@tonic-gate	DB *dbp;
5137c478bdstevel@tonic-gate	HASH_CURSOR *hcp, save_curs;
5147c478bdstevel@tonic-gate	db_lockmode_t lock_type;
5157c478bdstevel@tonic-gate	int get_key, ret, t_ret;
5167c478bdstevel@tonic-gate
5177c478bdstevel@tonic-gate	DEBUG_LREAD(dbc, dbc->txn, "ham_c_get",
5187c478bdstevel@tonic-gate	    flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
5197c478bdstevel@tonic-gate	    NULL, flags);
5207c478bdstevel@tonic-gate
5217c478bdstevel@tonic-gate	hcp = (HASH_CURSOR *)dbc->internal;
5227c478bdstevel@tonic-gate	dbp = dbc->dbp;
5237c478bdstevel@tonic-gate	DB_PANIC_CHECK(dbp);
5247c478bdstevel@tonic-gate	SAVE_CURSOR(hcp, &save_curs);
5257c478bdstevel@tonic-gate	if ((ret =
5267c478bdstevel@tonic-gate	    __db_cgetchk(dbp, key, data, flags, IS_VALID(hcp))) != 0)
5277c478bdstevel@tonic-gate		return (ret);
5287c478bdstevel@tonic-gate
5297c478bdstevel@tonic-gate	/* Clear OR'd in additional bits so we can check for flag equality. */
5307c478bdstevel@tonic-gate	if (LF_ISSET(DB_RMW)) {
5317c478bdstevel@tonic-gate		lock_type = DB_LOCK_WRITE;
5327c478bdstevel@tonic-gate		LF_CLR(DB_RMW);
5337c478bdstevel@tonic-gate	} else
5347c478bdstevel@tonic-gate		lock_type = DB_LOCK_READ;
5357c478bdstevel@tonic-gate
5367c478bdstevel@tonic-gate	GET_META(dbp, hcp, ret);
5377c478bdstevel@tonic-gate	if (ret != 0)
5387c478bdstevel@tonic-gate		return (ret);
5397c478bdstevel@tonic-gate	hcp->stats.hash_get++;
5407c478bdstevel@tonic-gate	hcp->seek_size = 0;
5417c478bdstevel@tonic-gate
5427c478bdstevel@tonic-gate	ret = 0;
5437c478bdstevel@tonic-gate	get_key = 1;
5447c478bdstevel@tonic-gate	switch (flags) {
5457c478bdstevel@tonic-gate	case DB_PREV:
5467c478bdstevel@tonic-gate		if (hcp->bucket != BUCKET_INVALID) {
5477c478bdstevel@tonic-gate			ret = __ham_item_prev(dbc, lock_type);
5487c478bdstevel@tonic-gate			break;
5497c478bdstevel@tonic-gate		}
5507c478bdstevel@tonic-gate		/* FALLTHROUGH */
5517c478bdstevel@tonic-gate	case DB_LAST:
5527c478bdstevel@tonic-gate		ret = __ham_item_last(dbc, lock_type);
5537c478bdstevel@tonic-gate		break;
5547c478bdstevel@tonic-gate	case DB_FIRST:
5557c478bdstevel@tonic-gate		ret = __ham_item_first(dbc, lock_type);
5567c478bdstevel@tonic-gate		break;
5577c478bdstevel@tonic-gate	case DB_NEXT_DUP:
5587c478bdstevel@tonic-gate		if (hcp->bucket == BUCKET_INVALID)
5597c478bdstevel@tonic-gate			ret = EINVAL;
5607c478bdstevel@tonic-gate		else {
5617c478bdstevel@tonic-gate			F_SET(hcp, H_DUPONLY);
5627c478bdstevel@tonic-gate			ret = __ham_item_next(dbc, lock_type);
5637c478bdstevel@tonic-gate		}
5647c478bdstevel@tonic-gate		break;
5657c478bdstevel@tonic-gate	case DB_NEXT:
5667c478bdstevel@tonic-gate		if (hcp->bucket == BUCKET_INVALID)
5677c478bdstevel@tonic-gate			hcp->bucket = 0;
5687c478bdstevel@tonic-gate		ret = __ham_item_next(dbc, lock_type);
5697c478bdstevel@tonic-gate		break;
5707c478bdstevel@tonic-gate	case DB_SET:
5717c478bdstevel@tonic-gate	case DB_SET_RANGE:
5727c478bdstevel@tonic-gate	case DB_GET_BOTH:
5737c478bdstevel@tonic-gate		if (F_ISSET(dbc, DBC_CONTINUE)) {
5747c478bdstevel@tonic-gate			F_SET(hcp, H_DUPONLY);
5757c478bdstevel@tonic-gate			ret = __ham_item_next(dbc, lock_type);
5767c478bdstevel@tonic-gate		} else if (F_ISSET(dbc, DBC_KEYSET))
5777c478bdstevel@tonic-gate			ret = __ham_item(dbc, lock_type);
5787c478bdstevel@tonic-gate		else
5797c478bdstevel@tonic-gate			ret = __ham_lookup(dbc, key, 0, lock_type);
5807c478bdstevel@tonic-gate		get_key = 0;
5817c478bdstevel@tonic-gate		break;
5827c478bdstevel@tonic-gate	case DB_CURRENT:
5837c478bdstevel@tonic-gate		if (F_ISSET(hcp, H_DELETED)) {
5847c478bdstevel@tonic-gate			ret = DB_KEYEMPTY;
5857c478bdstevel@tonic-gate			goto out;
5867c478bdstevel@tonic-gate		}
5877c478bdstevel@tonic-gate
5887c478bdstevel@tonic-gate		ret = __ham_item(dbc, lock_type);
5897c478bdstevel@tonic-gate		break;
5907c478bdstevel@tonic-gate	}
5917c478bdstevel@tonic-gate
5927c478bdstevel@tonic-gate	/*
5937c478bdstevel@tonic-gate	 * Must always enter this loop to do error handling and
5947c478bdstevel@tonic-gate	 * check for big key/data pair.
5957c478bdstevel@tonic-gate	 */
5967c478bdstevel@tonic-gate	while (1) {
5977c478bdstevel@tonic-gate		if (ret != 0 && ret != DB_NOTFOUND)
5987c478bdstevel@tonic-gate			goto out1;
5997c478bdstevel@tonic-gate		else if (F_ISSET(hcp, H_OK)) {
6007c478bdstevel@tonic-gate			/* Get the key. */
6017c478bdstevel@tonic-gate			if (get_key && (ret = __db_ret(dbp, hcp->pagep,
6027c478bdstevel@tonic-gate			    H_KEYINDEX(hcp->bndx), key, &dbc->rkey.data,
6037c478bdstevel@tonic-gate			    &dbc->rkey.size)) != 0)
6047c478bdstevel@tonic-gate				goto out1;
6057c478bdstevel@tonic-gate
6067c478bdstevel@tonic-gate			ret = __ham_dup_return(dbc, data, flags);
6077c478bdstevel@tonic-gate			break;
6087c478bdstevel@tonic-gate		} else if (!F_ISSET(hcp, H_NOMORE)) {
6097c478bdstevel@tonic-gate			abort();
6107c478bdstevel@tonic-gate			break;
6117c478bdstevel@tonic-gate		}
6127c478bdstevel@tonic-gate
6137c478bdstevel@tonic-gate		/*
6147c478bdstevel@tonic-gate		 * Ran out of entries in a bucket; change buckets.
6157c478bdstevel@tonic-gate		 */
6167c478bdstevel@tonic-gate		switch (flags) {
6177c478bdstevel@tonic-gate			case DB_LAST:
6187c478bdstevel@tonic-gate			case DB_PREV:
6197c478bdstevel@tonic-gate				ret = __ham_item_done(dbc, 0);
6207c478bdstevel@tonic-gate				if (hcp->bucket == 0) {
6217c478bdstevel@tonic-gate					ret = DB_NOTFOUND;
6227c478bdstevel@tonic-gate					goto out1;
6237c478bdstevel@tonic-gate				}
6247c478bdstevel@tonic-gate				hcp->bucket--;
6257c478bdstevel@tonic-gate				hcp->bndx = NDX_INVALID;
6267c478bdstevel@tonic-gate				if (ret == 0)
6277c478bdstevel@tonic-gate					ret = __ham_item_prev(dbc, lock_type);
6287c478bdstevel@tonic-gate				break;
6297c478bdstevel@tonic-gate			case DB_FIRST:
6307c478bdstevel@tonic-gate			case DB_NEXT:
6317c478bdstevel@tonic-gate				ret = __ham_item_done(dbc, 0);
6327c478bdstevel@tonic-gate				hcp->bndx = NDX_INVALID;
6337c478bdstevel@tonic-gate				hcp->bucket++;
6347c478bdstevel@tonic-gate				hcp->pgno = PGNO_INVALID;
6357c478bdstevel@tonic-gate				hcp->pagep = NULL;
6367c478bdstevel@tonic-gate				if (hcp->bucket > hcp->hdr->max_bucket) {
6377c478bdstevel@tonic-gate					ret = DB_NOTFOUND;
6387c478bdstevel@tonic-gate					goto out1;
6397c478bdstevel@tonic-gate				}
6407c478bdstevel@tonic-gate				if (ret == 0)
6417c478bdstevel@tonic-gate					ret = __ham_item_next(dbc, lock_type);
6427c478bdstevel@tonic-gate				break;
6437c478bdstevel@tonic-gate			case DB_GET_BOTH:
6447c478bdstevel@tonic-gate			case DB_NEXT_DUP:
6457c478bdstevel@tonic-gate			case DB_SET:
6467c478bdstevel@tonic-gate			case DB_SET_RANGE:
6477c478bdstevel@tonic-gate				/* Key not found. */
6487c478bdstevel@tonic-gate				ret = DB_NOTFOUND;
6497c478bdstevel@tonic-gate				goto out1;
6507c478bdstevel@tonic-gate		}
6517c478bdstevel@tonic-gate	}
6527c478bdstevel@tonic-gateout1:	if ((t_ret = __ham_item_done(dbc, 0)) != 0 && ret == 0)
6537c478bdstevel@tonic-gate		ret = t_ret;
6547c478bdstevel@tonic-gateout:	RELEASE_META(dbp, hcp);
6557c478bdstevel@tonic-gate	RESTORE_CURSOR(dbp, hcp, &save_curs, ret);
6567c478bdstevel@tonic-gate	return (ret);
6577c478bdstevel@tonic-gate}
6587c478bdstevel@tonic-gate
6597c478bdstevel@tonic-gatestatic int
6607c478bdstevel@tonic-gate__ham_c_put(dbc, key, data, flags)
6617c478bdstevel@tonic-gate	DBC *dbc;
6627c478bdstevel@tonic-gate	DBT *key;
6637c478bdstevel@tonic-gate	DBT *data;
6647c478bdstevel@tonic-gate	u_int32_t flags;
6657c478bdstevel@tonic-gate{
6667c478bdstevel@tonic-gate	DB *dbp;
6677c478bdstevel@tonic-gate	DBT tmp_val, *myval;
6687c478bdstevel@tonic-gate	HASH_CURSOR *hcp, save_curs;
6697c478bdstevel@tonic-gate	u_int32_t nbytes;
6707c478bdstevel@tonic-gate	int ret, t_ret;
6717c478bdstevel@tonic-gate
6727c478bdstevel@tonic-gate	dbp = dbc->dbp;
6737c478bdstevel@tonic-gate	DB_PANIC_CHECK(dbp);
6747c478bdstevel@tonic-gate	DEBUG_LWRITE(dbc, dbc->txn, "ham_c_put",
6757c478bdstevel@tonic-gate	    flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
6767c478bdstevel@tonic-gate	    data, flags);
6777c478bdstevel@tonic-gate	hcp = (HASH_CURSOR *)dbc->internal;
6787c478bdstevel@tonic-gate
6797c478bdstevel@tonic-gate	if ((ret = __db_cputchk(dbp, key, data, flags,
6807c478bdstevel@tonic-gate	    F_ISSET(dbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
6817c478bdstevel@tonic-gate		return (ret);
6827c478bdstevel@tonic-gate
6837c478bdstevel@tonic-gate	if (F_ISSET(hcp, H_DELETED) &&
6847c478bdstevel@tonic-gate	    flags != DB_KEYFIRST && flags != DB_KEYLAST)
6857c478bdstevel@tonic-gate		return (DB_NOTFOUND);
6867c478bdstevel@tonic-gate
6877c478bdstevel@tonic-gate	/*
6887c478bdstevel@tonic-gate	 * If we are in the concurrent DB product and this cursor
6897c478bdstevel@tonic-gate	 * is not a write cursor, then this request is invalid.
6907c478bdstevel@tonic-gate	 * If it is a simple write cursor, then we need to upgrade its
6917c478bdstevel@tonic-gate	 * lock.
6927c478bdstevel@tonic-gate	 */
6937c478bdstevel@tonic-gate	if (F_ISSET(dbp, DB_AM_CDB)) {
6947c478bdstevel@tonic-gate		/* Make sure it's a valid update cursor. */
6957c478bdstevel@tonic-gate		if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
6967c478bdstevel@tonic-gate			return (EINVAL);
6977c478bdstevel@tonic-gate
6987c478bdstevel@tonic-gate		if (F_ISSET(dbc, DBC_RMW) &&
6997c478bdstevel@tonic-gate		    (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
7007c478bdstevel@tonic-gate		    DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
7017c478bdstevel@tonic-gate		    &dbc->mylock)) != 0)
7027c478bdstevel@tonic-gate			return (EAGAIN);
7037c478bdstevel@tonic-gate	}
7047c478bdstevel@tonic-gate
7057c478bdstevel@tonic-gate	GET_META(dbp, hcp, ret);
7067c478bdstevel@tonic-gate	if (ret != 0)
7077c478bdstevel@tonic-gate		return (ret);
7087c478bdstevel@tonic-gate
7097c478bdstevel@tonic-gate	SAVE_CURSOR(hcp, &save_curs);
7107c478bdstevel@tonic-gate	hcp->stats.hash_put++;
7117c478bdstevel@tonic-gate
7127c478bdstevel@tonic-gate	switch (flags) {
7137c478bdstevel@tonic-gate	case DB_KEYLAST:
7147c478bdstevel@tonic-gate	case DB_KEYFIRST:
7157c478bdstevel@tonic-gate		nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE :
7167c478bdstevel@tonic-gate		    HKEYDATA_PSIZE(key->size)) +
7177c478bdstevel@tonic-gate		    (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE :
7187c478bdstevel@tonic-gate		    HKEYDATA_PSIZE(data->size));
7197c478bdstevel@tonic-gate		if ((ret = __ham_lookup(dbc,
7207c478bdstevel@tonic-gate		    key, nbytes, DB_LOCK_WRITE)) == DB_NOTFOUND) {
7217c478bdstevel@tonic-gate			ret = 0;
7227c478bdstevel@tonic-gate			if (hcp->seek_found_page != PGNO_INVALID &&
7237c478bdstevel@tonic-gate			    hcp->seek_found_page != hcp->pgno) {
7247c478bdstevel@tonic-gate				if ((ret = __ham_item_done(dbc, 0)) != 0)
7257c478bdstevel@tonic-gate					goto out;
7267c478bdstevel@tonic-gate				hcp->pgno = hcp->seek_found_page;
7277c478bdstevel@tonic-gate				hcp->bndx = NDX_INVALID;
7287c478bdstevel@tonic-gate			}
7297c478bdstevel@tonic-gate
7307c478bdstevel@tonic-gate			if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
7317c478bdstevel@tonic-gate				/*
7327c478bdstevel@tonic-gate				 * A partial put, but the key does not exist
7337c478bdstevel@tonic-gate				 * and we are not beginning the write at 0.
7347c478bdstevel@tonic-gate				 * We must create a data item padded up to doff
7357c478bdstevel@tonic-gate				 * and then write the new bytes represented by
7367c478bdstevel@tonic-gate				 * val.
7377c478bdstevel@tonic-gate				 */
7387c478bdstevel@tonic-gate				if ((ret = __ham_init_dbt(&tmp_val,
7397c478bdstevel@tonic-gate				    data->size + data->doff,
7407c478bdstevel@tonic-gate				    &dbc->rdata.data, &dbc->rdata.size)) == 0) {
7417c478bdstevel@tonic-gate					memset(tmp_val.data, 0, data->doff);
7427c478bdstevel@tonic-gate					memcpy((u_int8_t *)tmp_val.data +
7437c478bdstevel@tonic-gate					    data->doff, data->data, data->size);
7447c478bdstevel@tonic-gate					myval = &tmp_val;
7457c478bdstevel@tonic-gate				}
7467c478bdstevel@tonic-gate			} else
7477c478bdstevel@tonic-gate				myval = (DBT *)data;
7487c478bdstevel@tonic-gate
7497c478bdstevel@tonic-gate			if (ret == 0)
7507c478bdstevel@tonic-gate				ret = __ham_add_el(dbc, key, myval, H_KEYDATA);
7517c478bdstevel@tonic-gate			goto done;
7527c478bdstevel@tonic-gate		}
7537c478bdstevel@tonic-gate		break;
7547c478bdstevel@tonic-gate	case DB_BEFORE:
7557c478bdstevel@tonic-gate	case DB_AFTER:
7567c478bdstevel@tonic-gate	case DB_CURRENT:
7577c478bdstevel@tonic-gate		ret = __ham_item(dbc, DB_LOCK_WRITE);
7587c478bdstevel@tonic-gate		break;
7597c478bdstevel@tonic-gate	}
7607c478bdstevel@tonic-gate
7617c478bdstevel@tonic-gate	if (ret == 0) {
7627c478bdstevel@tonic-gate		if ((flags == DB_CURRENT && !F_ISSET(hcp, H_ISDUP)) ||
7637c478bdstevel@tonic-gate		    ((flags == DB_KEYFIRST || flags == DB_KEYLAST) &&
7647c478bdstevel@tonic-gate		    !F_ISSET(dbp, DB_AM_DUP)))
7657c478bdstevel@tonic-gate			ret = __ham_overwrite(dbc, data);
7667c478bdstevel@tonic-gate		else
7677c478bdstevel@tonic-gate			ret = __ham_add_dup(dbc, data, flags);
7687c478bdstevel@tonic-gate	}
7697c478bdstevel@tonic-gate
7707c478bdstevel@tonic-gatedone:	if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
7717c478bdstevel@tonic-gate		ret = __ham_expand_table(dbc);
7727c478bdstevel@tonic-gate		F_CLR(hcp, H_EXPAND);
7737c478bdstevel@tonic-gate	}
7747c478bdstevel@tonic-gate
7757c478bdstevel@tonic-gate	if ((t_ret = __ham_item_done(dbc, ret == 0)) != 0 && ret == 0)
7767c478bdstevel@tonic-gate		ret = t_ret;
7777c478bdstevel@tonic-gate
7787c478bdstevel@tonic-gateout:	RELEASE_META(dbp, hcp);
7797c478bdstevel@tonic-gate	RESTORE_CURSOR(dbp, hcp, &save_curs, ret);
7807c478bdstevel@tonic-gate	if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
7817c478bdstevel@tonic-gate		(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
7827c478bdstevel@tonic-gate		    DB_LOCK_IWRITE, 0);
7837c478bdstevel@tonic-gate	return (ret);
7847c478bdstevel@tonic-gate}
7857c478bdstevel@tonic-gate
7867c478bdstevel@tonic-gate/********************************* UTILITIES ************************/
7877c478bdstevel@tonic-gate
7887c478bdstevel@tonic-gate/*
7897c478bdstevel@tonic-gate * __ham_expand_table --
7907c478bdstevel@tonic-gate */
7917c478bdstevel@tonic-gatestatic int
7927c478bdstevel@tonic-gate__ham_expand_table(dbc)
7937c478bdstevel@tonic-gate	DBC *dbc;
7947c478bdstevel@tonic-gate{
7957c478bdstevel@tonic-gate	DB *dbp;
7967c478bdstevel@tonic-gate	HASH_CURSOR *hcp;
7977c478bdstevel@tonic-gate	DB_LSN new_lsn;
7987c478bdstevel@tonic-gate	u_int32_t old_bucket, new_bucket, spare_ndx;
7997c478bdstevel@tonic-gate	int ret;
8007c478bdstevel@tonic-gate
8017c478bdstevel@tonic-gate	dbp = dbc->dbp;
8027c478bdstevel@tonic-gate	hcp = (HASH_CURSOR *)dbc->internal;
8037c478bdstevel@tonic-gate	ret = 0;
8047c478bdstevel@tonic-gate	DIRTY_META(dbp, hcp, ret);
8057c478bdstevel@tonic-gate	if (ret)
8067c478bdstevel@tonic-gate		return (ret);
8077c478bdstevel@tonic-gate
8087c478bdstevel@tonic-gate	/*
8097c478bdstevel@tonic-gate	 * If the split point is about to increase, make sure that we
8107c478bdstevel@tonic-gate	 * have enough extra pages.  The calculation here is weird.
8117c478bdstevel@tonic-gate	 * We'd like to do this after we've upped max_bucket, but it's
8127c478bdstevel@tonic-gate	 * too late then because we've logged the meta-data split.  What
8137c478bdstevel@tonic-gate	 * we'll do between then and now is increment max bucket and then
8147c478bdstevel@tonic-gate	 * see what the log of one greater than that is; here we have to
8157c478bdstevel@tonic-gate	 * look at the log of max + 2.  VERY NASTY STUFF.
8167c478bdstevel@tonic-gate	 */
8177c478bdstevel@tonic-gate	if (__db_log2(hcp->hdr->max_bucket + 2) > hcp->hdr->ovfl_point) {
8187c478bdstevel@tonic-gate		/*
8197c478bdstevel@tonic-gate		 * We are about to shift the split point.  Make sure that
8207c478bdstevel@tonic-gate		 * if the next doubling is going to be big (more than 8
8217c478bdstevel@tonic-gate		 * pages), we have some extra pages around.
8227c478bdstevel@tonic-gate		 */
8237c478bdstevel@tonic-gate		if (hcp->hdr->max_bucket + 1 >= 8 &&
8247c478bdstevel@tonic-gate		    hcp->hdr->spares[hcp->hdr->ovfl_point] <
8257c478bdstevel@tonic-gate		    hcp->hdr->spares[hcp->hdr->ovfl_point - 1] +
8267c478bdstevel@tonic-gate		    hcp->hdr->ovfl_point + 1)
8277c478bdstevel@tonic-gate			__ham_init_ovflpages(dbc);
8287c478bdstevel@tonic-gate	}
8297c478bdstevel@tonic-gate
8307c478bdstevel@tonic-gate	/* Now we can log the meta-data split. */
8317c478bdstevel@tonic-gate	if (DB_LOGGING(dbc)) {
8327c478bdstevel@tonic-gate		if ((ret = __ham_splitmeta_log(dbp->dbenv->lg_info,
8337c478bdstevel@tonic-gate		    dbc->txn, &new_lsn, 0, dbp->log_fileid,
8347c478bdstevel@tonic-gate		    hcp->hdr->max_bucket, hcp->hdr->ovfl_point,
8357c478bdstevel@tonic-gate		    hcp->hdr->spares[hcp->hdr->ovfl_point],
8367c478bdstevel@tonic-gate		    &hcp->hdr->lsn)) != 0)
8377c478bdstevel@tonic-gate			return (ret);
8387c478bdstevel@tonic-gate
8397c478bdstevel@tonic-gate		hcp->hdr->lsn = new_lsn;
8407c478bdstevel@tonic-gate	}
8417c478bdstevel@tonic-gate
8427c478bdstevel@tonic-gate	hcp->stats.hash_expansions++;
8437c478bdstevel@tonic-gate	new_bucket = ++hcp->hdr->max_bucket;
8447c478bdstevel@tonic-gate	old_bucket = (hcp->hdr->max_bucket & hcp->hdr->low_mask);
8457c478bdstevel@tonic-gate
8467c478bdstevel@tonic-gate	/*
8477c478bdstevel@tonic-gate	 * If the split point is increasing, copy the current contents
8487c478bdstevel@tonic-gate	 * of the spare split bucket to the next bucket.
8497c478bdstevel@tonic-gate	 */
8507c478bdstevel@tonic-gate	spare_ndx = __db_log2(hcp->hdr->max_bucket + 1);
8517c478bdstevel@tonic-gate	if (spare_ndx > hcp->hdr->ovfl_point) {
8527c478bdstevel@tonic-gate		hcp->hdr->spares[spare_ndx] =
8537c478bdstevel@tonic-gate		    hcp->hdr->spares[hcp->hdr->ovfl_point];
8547c478bdstevel@tonic-gate		hcp->hdr->ovfl_point = spare_ndx;
8557c478bdstevel@tonic-gate	}
8567c478bdstevel@tonic-gate
8577c478bdstevel@tonic-gate	if (new_bucket > hcp->hdr->high_mask) {
8587c478bdstevel@tonic-gate		/* Starting a new doubling */
8597c478bdstevel@tonic-gate		hcp->hdr->low_mask = hcp->hdr->high_mask;
8607c478bdstevel@tonic-gate		hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask;
8617c478bdstevel@tonic-gate	}
8627c478bdstevel@tonic-gate
8637c478bdstevel@tonic-gate	if (BUCKET_TO_PAGE(hcp, new_bucket) > MAX_PAGES(hcp)) {
8647c478bdstevel@tonic-gate		__db_err(dbp->dbenv,
8657c478bdstevel@tonic-gate		    "hash: Cannot allocate new bucket.  Pages exhausted.");
8667c478bdstevel@tonic-gate		return (ENOSPC);
8677c478bdstevel@tonic-gate	}
8687c478bdstevel@tonic-gate
8697c478bdstevel@tonic-gate	/* Relocate records to the new bucket */
8707c478bdstevel@tonic-gate	return (__ham_split_page(dbc, old_bucket, new_bucket));
8717c478bdstevel@tonic-gate}
8727c478bdstevel@tonic-gate
8737c478bdstevel@tonic-gate/*
8747c478bdstevel@tonic-gate * PUBLIC: u_int32_t __ham_call_hash __P((HASH_CURSOR *, u_int8_t *, int32_t));
8757c478bdstevel@tonic-gate */
8767c478bdstevel@tonic-gateu_int32_t
8777c478bdstevel@tonic-gate__ham_call_hash(hcp, k, len)
8787c478bdstevel@tonic-gate	HASH_CURSOR *hcp;
8797c478bdstevel@tonic-gate	u_int8_t *k;
8807c478bdstevel@tonic-gate	int32_t len;
8817c478bdstevel@tonic-gate{
8827c478bdstevel@tonic-gate	u_int32_t n, bucket;
8837c478bdstevel@tonic-gate
8847c478bdstevel@tonic-gate	n = (u_int32_t)(hcp->dbc->dbp->h_hash(k, len));
8857c478bdstevel@tonic-gate
8867c478bdstevel@tonic-gate	bucket = n & hcp->hdr->high_mask;
8877c478bdstevel@tonic-gate	if (bucket > hcp->hdr->max_bucket)
8887c478bdstevel@tonic-gate		bucket = bucket & hcp->hdr->low_mask;
8897c478bdstevel@tonic-gate	return (bucket);
8907c478bdstevel@tonic-gate}
8917c478bdstevel@tonic-gate
8927c478bdstevel@tonic-gate/*
8937c478bdstevel@tonic-gate * Check for duplicates, and call __db_ret appropriately.  Release
8947c478bdstevel@tonic-gate * everything held by the cursor.
8957c478bdstevel@tonic-gate */
8967c478bdstevel@tonic-gatestatic int
8977c478bdstevel@tonic-gate__ham_dup_return(dbc, val, flags)
8987c478bdstevel@tonic-gate	DBC *dbc;
8997c478bdstevel@tonic-gate	DBT *val;
9007c478bdstevel@tonic-gate	u_int32_t flags;
9017c478bdstevel@tonic-gate{
9027c478bdstevel@tonic-gate	DB *dbp;
9037c478bdstevel@tonic-gate	HASH_CURSOR *hcp;
9047c478bdstevel@tonic-gate	PAGE *pp;
9057c478bdstevel@tonic-gate	DBT *myval, tmp_val;
9067c478bdstevel@tonic-gate	db_indx_t ndx;
9077c478bdstevel@tonic-gate	db_pgno_t pgno;
9087c478bdstevel@tonic-gate	u_int32_t off, tlen;
9097c478bdstevel@tonic-gate	u_int8_t *hk, type;
9107c478bdstevel@tonic-gate	int cmp, ret;
9117c478bdstevel@tonic-gate	db_indx_t len;
9127c478bdstevel@tonic-gate
9137c478bdstevel@tonic-gate	/* Check for duplicate and return the first one. */
9147c478bdstevel@tonic-gate	dbp = dbc->dbp;
9157c478bdstevel@tonic-gate	hcp = (HASH_CURSOR *)dbc->internal;
9167c478bdstevel@tonic-gate	ndx = H_DATAINDEX(hcp->bndx);
9177c478bdstevel@tonic-gate	type = HPAGE_TYPE(hcp->pagep, ndx);
9187c478bdstevel@tonic-gate	pp = hcp->pagep;
9197c478bdstevel@tonic-gate	myval = val;
9207c478bdstevel@tonic-gate
9217c478bdstevel@tonic-gate	/*
9227c478bdstevel@tonic-gate	 * There are 4 cases:
9237c478bdstevel@tonic-gate	 * 1. We are not in duplicate, simply call db_ret.
9247c478bdstevel@tonic-gate	 * 2. We are looking at keys and stumbled onto a duplicate.
9257c478bdstevel@tonic-gate	 * 3. We are in the middle of a duplicate set. (ISDUP set)
9267c478bdstevel@tonic-gate	 * 4. This is a duplicate and we need to return a specific item.
9277c478bdstevel@tonic-gate	 */
9287c478bdstevel@tonic-gate
9297c478bdstevel@tonic-gate	/*
9307c478bdstevel@tonic-gate	 * Here we check for the case where we just stumbled onto a
9317c478bdstevel@tonic-gate	 * duplicate.  In this case, we do initialization and then
9327c478bdstevel@tonic-gate	 * let the normal duplicate code handle it.
9337c478bdstevel@tonic-gate	 */
9347c478bdstevel@tonic-gate	if (!F_ISSET(hcp, H_ISDUP))
9357c478bdstevel@tonic-gate		if (type == H_DUPLICATE) {
9367c478bdstevel@tonic-gate			F_SET(hcp, H_ISDUP);
9377c478bdstevel@tonic-gate			hcp->dup_tlen = LEN_HDATA(hcp->pagep,
9387c478bdstevel@tonic-gate			    hcp->hdr->pagesize, hcp->bndx);
9397c478bdstevel@tonic-gate			hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
9407c478bdstevel@tonic-gate			if (flags == DB_LAST || flags == DB_PREV) {
9417c478bdstevel@tonic-gate				hcp->dndx = 0;
9427c478bdstevel@tonic-gate				hcp->dup_off = 0;
9437c478bdstevel@tonic-gate				do {
9447c478bdstevel@tonic-gate					memcpy(&len,
9457c478bdstevel@tonic-gate					    HKEYDATA_DATA(hk) + hcp->dup_off,
9467c478bdstevel@tonic-gate					    sizeof(db_indx_t));
9477c478bdstevel@tonic-gate					hcp->dup_off += DUP_SIZE(len);
9487c478bdstevel@tonic-gate					hcp->dndx++;
9497c478bdstevel@tonic-gate				} while (hcp->dup_off < hcp->dup_tlen);
9507c478bdstevel@tonic-gate				hcp->dup_off -= DUP_SIZE(len);
9517c478bdstevel@tonic-gate				hcp->dndx--;
9527c478bdstevel@tonic-gate			} else {
9537c478bdstevel@tonic-gate				memcpy(&len,
9547c478bdstevel@tonic-gate				    HKEYDATA_DATA(hk), sizeof(db_indx_t));
9557c478bdstevel@tonic-gate				hcp->dup_off = 0;
9567c478bdstevel@tonic-gate				hcp->dndx = 0;
9577c478bdstevel@tonic-gate			}
9587c478bdstevel@tonic-gate			hcp->dup_len = len;
9597c478bdstevel@tonic-gate		} else if (type == H_OFFDUP) {
9607c478bdstevel@tonic-gate			F_SET(hcp, H_ISDUP);
9617c478bdstevel@tonic-gate			memcpy(&pgno, HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)),
9627c478bdstevel@tonic-gate			    sizeof(db_pgno_t));
9637c478bdstevel@tonic-gate			if (flags == DB_LAST || flags == DB_PREV) {
9647c478bdstevel@tonic-gate				if ((ret = __db_dend(dbc,
9657c478bdstevel@tonic-gate				    pgno, &hcp->dpagep)) != 0)
9667c478bdstevel@tonic-gate					return (ret);
9677c478bdstevel@tonic-gate				hcp->dpgno = PGNO(hcp->dpagep);
9687c478bdstevel@tonic-gate				hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
9697c478bdstevel@tonic-gate			} else if ((ret = __ham_next_cpage(dbc,
9707c478bdstevel@tonic-gate			    pgno, 0, H_ISDUP)) != 0)
9717c478bdstevel@tonic-gate				return (ret);
9727c478bdstevel@tonic-gate		}
9737c478bdstevel@tonic-gate
9747c478bdstevel@tonic-gate
9757c478bdstevel@tonic-gate	/*
9767c478bdstevel@tonic-gate	 * If we are retrieving a specific key/data pair, then we
9777c478bdstevel@tonic-gate	 * may need to adjust the cursor before returning data.
9787c478bdstevel@tonic-gate	 */
9797c478bdstevel@tonic-gate	if (flags == DB_GET_BOTH) {
9807c478bdstevel@tonic-gate		if (F_ISSET(hcp, H_ISDUP)) {
9817c478bdstevel@tonic-gate			if (hcp->dpgno != PGNO_INVALID) {
9827c478bdstevel@tonic-gate				if ((ret = __db_dsearch(dbc, 0, val,
9837c478bdstevel@tonic-gate				    hcp->dpgno, &hcp->dndx, &hcp->dpagep, &cmp))
9847c478bdstevel@tonic-gate				    != 0)
9857c478bdstevel@tonic-gate					return (ret);
9867c478bdstevel@tonic-gate				if (cmp == 0)
9877c478bdstevel@tonic-gate					hcp->dpgno = PGNO(hcp->dpagep);
9887c478bdstevel@tonic-gate			} else {
9897c478bdstevel@tonic-gate				__ham_dsearch(dbc, val, &off, &cmp);
9907c478bdstevel@tonic-gate				hcp->dup_off = off;
9917c478bdstevel@tonic-gate			}
9927c478bdstevel@tonic-gate		} else {
9937c478bdstevel@tonic-gate			hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
9947c478bdstevel@tonic-gate			if (((HKEYDATA *)hk)->type == H_OFFPAGE) {
9957c478bdstevel@tonic-gate				memcpy(&tlen,
9967c478bdstevel@tonic-gate				    HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
9977c478bdstevel@tonic-gate				memcpy(&pgno,
9987c478bdstevel@tonic-gate				    HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
9997c478bdstevel@tonic-gate				if ((ret = __db_moff(dbp, val,
10007c478bdstevel@tonic-gate				    pgno, tlen, dbp->dup_compare, &cmp)) != 0)
10017c478bdstevel@tonic-gate					return (ret);
10027c478bdstevel@tonic-gate			} else {
10037c478bdstevel@tonic-gate				/*
10047c478bdstevel@tonic-gate				 * We do not zero tmp_val since the comparison
10057c478bdstevel@tonic-gate				 * routines may only look at data and size.
10067c478bdstevel@tonic-gate				 */
10077c478bdstevel@tonic-gate				tmp_val.data = HKEYDATA_DATA(hk);
10087c478bdstevel@tonic-gate				tmp_val.size = LEN_HDATA(hcp->pagep,
10097c478bdstevel@tonic-gate				    dbp->pgsize, hcp->bndx);
10107c478bdstevel@tonic-gate				cmp = dbp->dup_compare == NULL ?
10117c478bdstevel@tonic-gate				    __bam_defcmp(&tmp_val, val) :
10127c478bdstevel@tonic-gate				    dbp->dup_compare(&tmp_val, val);
10137c478bdstevel@tonic-gate			}
10147c478bdstevel@tonic-gate		}
10157c478bdstevel@tonic-gate
10167c478bdstevel@tonic-gate		if (cmp != 0)
10177c478bdstevel@tonic-gate			return (DB_NOTFOUND);
10187c478bdstevel@tonic-gate	}
10197c478bdstevel@tonic-gate
10207c478bdstevel@tonic-gate	/*
10217c478bdstevel@tonic-gate	 * Now, everything is initialized, grab a duplicate if
10227c478bdstevel@tonic-gate	 * necessary.
10237c478bdstevel@tonic-gate	 */
10247c478bdstevel@tonic-gate	if (F_ISSET(hcp, H_ISDUP))
10257c478bdstevel@tonic-gate		if (hcp->dpgno != PGNO_INVALID) {
10267c478bdstevel@tonic-gate			pp = hcp->dpagep;
10277c478bdstevel@tonic-gate			ndx = hcp->dndx;
10287c478bdstevel@tonic-gate		} else {
10297c478bdstevel@tonic-gate			/*
10307c478bdstevel@tonic-gate			 * Copy the DBT in case we are retrieving into user
10317c478bdstevel@tonic-gate			 * memory and we need the parameters for it.  If the
10327c478bdstevel@tonic-gate			 * user requested a partial, then we need to adjust
10337c478bdstevel@tonic-gate			 * the user's parameters to get the partial of the
10347c478bdstevel@tonic-gate			 * duplicate which is itself a partial.
10357c478bdstevel@tonic-gate			 */
10367c478bdstevel@tonic-gate			memcpy(&tmp_val, val, sizeof(*val));
10377c478bdstevel@tonic-gate			if (F_ISSET(&tmp_val, DB_DBT_PARTIAL)) {
10387c478bdstevel@tonic-gate				/*
10397c478bdstevel@tonic-gate				 * Take the user's length unless it would go
10407c478bdstevel@tonic-gate				 * beyond the end of the duplicate.
10417c478bdstevel@tonic-gate				 */
10427c478bdstevel@tonic-gate				if (tmp_val.doff + hcp->dup_off > hcp->dup_len)
10437c478bdstevel@tonic-gate					tmp_val.dlen = 0;
10447c478bdstevel@tonic-gate				else if (tmp_val.dlen + tmp_val.doff >
10457c478bdstevel@tonic-gate				    hcp->dup_len)
10467c478bdstevel@tonic-gate					tmp_val.dlen =
10477c478bdstevel@tonic-gate					    hcp->dup_len - tmp_val.doff;
10487c478bdstevel@tonic-gate
10497c478bdstevel@tonic-gate				/*
10507c478bdstevel@tonic-gate				 * Calculate the new offset.
10517c478bdstevel@tonic-gate				 */
10527c478bdstevel@tonic-gate				tmp_val.doff += hcp->dup_off;
10537c478bdstevel@tonic-gate			} else {
10547c478bdstevel@tonic-gate				F_SET(&tmp_val, DB_DBT_PARTIAL);
10557c478bdstevel@tonic-gate				tmp_val.dlen = hcp->dup_len;
10567c478bdstevel@tonic-gate				tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
10577c478bdstevel@tonic-gate			}
10587c478bdstevel@tonic-gate			myval = &tmp_val;
10597c478bdstevel@tonic-gate		}
10607c478bdstevel@tonic-gate
10617c478bdstevel@tonic-gate
10627c478bdstevel@tonic-gate	/*
10637c478bdstevel@tonic-gate	 * Finally, if we had a duplicate, pp, ndx, and myval should be
10647c478bdstevel@tonic-gate	 * set appropriately.
10657c478bdstevel@tonic-gate	 */
10667c478bdstevel@tonic-gate	if ((ret = __db_ret(dbp, pp, ndx, myval, &dbc->rdata.data,
10677c478bdstevel@tonic-gate	    &dbc->rdata.size)) != 0)
10687c478bdstevel@tonic-gate		return (ret);
10697c478bdstevel@tonic-gate
10707c478bdstevel@tonic-gate	/*
10717c478bdstevel@tonic-gate	 * In case we sent a temporary off to db_ret, set the real
10727c478bdstevel@tonic-gate	 * return values.
10737c478bdstevel@tonic-gate	 */
10747c478bdstevel@tonic-gate	val->data = myval->data;
10757c478bdstevel@tonic-gate	val->size = myval->size;
10767c478bdstevel@tonic-gate
10777c478bdstevel@tonic-gate	return (0);
10787c478bdstevel@tonic-gate}
10797c478bdstevel@tonic-gate
10807c478bdstevel@tonic-gatestatic int
10817c478bdstevel@tonic-gate__ham_overwrite(dbc, nval)
10827c478bdstevel@tonic-gate	DBC *dbc;
10837c478bdstevel@tonic-gate	DBT *nval;
10847c478bdstevel@tonic-gate{
10857c478bdstevel@tonic-gate	HASH_CURSOR *hcp;
10867c478bdstevel@tonic-gate	DBT *myval, tmp_val;
10877c478bdstevel@tonic-gate	u_int8_t *hk;
10887c478bdstevel@tonic-gate
10897c478bdstevel@tonic-gate	hcp = (HASH_CURSOR *)dbc->internal;
10907c478bdstevel@tonic-gate	if (F_ISSET(dbc->dbp, DB_AM_DUP))
10917c478bdstevel@tonic-gate		return (__ham_add_dup(dbc, nval, DB_KEYLAST));
10927c478bdstevel@tonic-gate	else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
10937c478bdstevel@tonic-gate		/* Put/overwrite */
10947c478bdstevel@tonic-gate		memcpy(&tmp_val, nval, sizeof(*nval));
10957c478bdstevel@tonic-gate		F_SET(&tmp_val, DB_DBT_PARTIAL);
10967c478bdstevel@tonic-gate		tmp_val.doff = 0;
10977c478bdstevel@tonic-gate		hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
10987c478bdstevel@tonic-gate		if (HPAGE_PTYPE(hk) == H_OFFPAGE)
10997c478bdstevel@tonic-gate			memcpy(&tmp_val.dlen,
11007c478bdstevel@tonic-gate			    HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
11017c478bdstevel@tonic-gate		else
11027c478bdstevel@tonic-gate			tmp_val.dlen = LEN_HDATA(hcp->pagep,
11037c478bdstevel@tonic-gate			    hcp->hdr->pagesize,hcp->bndx);
11047c478bdstevel@tonic-gate		myval = &tmp_val;
11057c478bdstevel@tonic-gate	} else /* Regular partial put */
11067c478bdstevel@tonic-gate		myval = nval;
11077c478bdstevel@tonic-gate
11087c478bdstevel@tonic-gate	return (__ham_replpair(dbc, myval, 0));
11097c478bdstevel@tonic-gate}
11107c478bdstevel@tonic-gate
11117c478bdstevel@tonic-gate/*
11127c478bdstevel@tonic-gate * Given a key and a cursor, sets the cursor to the page/ndx on which
11137c478bdstevel@tonic-gate * the key resides.  If the key is found, the cursor H_OK flag is set
11147c478bdstevel@tonic-gate * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
11157c478bdstevel@tonic-gate * If the key is not found, the H_OK flag is not set.  If the sought
11167c478bdstevel@tonic-gate * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
11177c478bdstevel@tonic-gate * are set indicating where an add might take place.  If it is 0,
11187c478bdstevel@tonic-gate * non of the cursor pointer field are valid.
11197c478bdstevel@tonic-gate */
11207c478bdstevel@tonic-gatestatic int
11217c478bdstevel@tonic-gate__ham_lookup(dbc, key, sought, mode)
11227c478bdstevel@tonic-gate	DBC *dbc;
11237c478bdstevel@tonic-gate	const DBT *key;
11247c478bdstevel@tonic-gate	u_int32_t sought;
11257c478bdstevel@tonic-gate	db_lockmode_t mode;
11267c478bdstevel@tonic-gate{
11277c478bdstevel@tonic-gate	DB *dbp;
11287c478bdstevel@tonic-gate	HASH_CURSOR *hcp;
11297c478bdstevel@tonic-gate	db_pgno_t pgno;
11307c478bdstevel@tonic-gate	u_int32_t tlen;
11317c478bdstevel@tonic-gate	int match, ret, t_ret;
11327c478bdstevel@tonic-gate	u_int8_t *hk;
11337c478bdstevel@tonic-gate
11347c478bdstevel@tonic-gate	dbp = dbc->dbp;
11357c478bdstevel@tonic-gate	hcp = (HASH_CURSOR *)dbc->internal;
11367c478bdstevel@tonic-gate	/*
11377c478bdstevel@tonic-gate	 * Set up cursor so that we're looking for space to add an item
11387c478bdstevel@tonic-gate	 * as we cycle through the pages looking for the key.
11397c478bdstevel@tonic-gate	 */
11407c478bdstevel@tonic-gate	if ((ret = __ham_item_reset(dbc)) != 0)
11417c478bdstevel@tonic-gate		return (ret);
11427c478bdstevel@tonic-gate	hcp->seek_size = sought;
11437c478bdstevel@tonic-gate
11447c478bdstevel@tonic-gate	hcp->bucket = __ham_call_hash(hcp, (u_int8_t *)key->data, key->size);
11457c478bdstevel@tonic-gate	while (1) {
11467c478bdstevel@tonic-gate		if ((ret = __ham_item_next(dbc, mode)) != 0)
11477c478bdstevel@tonic-gate			return (ret);
11487c478bdstevel@tonic-gate
11497c478bdstevel@tonic-gate		if (F_ISSET(hcp, H_NOMORE))
11507c478bdstevel@tonic-gate			break;
11517c478bdstevel@tonic-gate
11527c478bdstevel@tonic-gate		hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
11537c478bdstevel@tonic-gate		switch (HPAGE_PTYPE(hk)) {
11547c478bdstevel@tonic-gate		case H_OFFPAGE:
11557c478bdstevel@tonic-gate			memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
11567c478bdstevel@tonic-gate			if (tlen == key->size) {
11577c478bdstevel@tonic-gate				memcpy(&pgno,
11587c478bdstevel@tonic-gate				    HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
11597c478bdstevel@tonic-gate				if ((ret = __db_moff(dbp,
11607c478bdstevel@tonic-gate				    key, pgno, tlen, NULL, &match)) != 0)
11617c478bdstevel@tonic-gate					return (ret);
11627c478bdstevel@tonic-gate				if (match == 0) {
11637c478bdstevel@tonic-gate					F_SET(hcp, H_OK);
11647c478bdstevel@tonic-gate					return (0);
11657c478bdstevel@tonic-gate				}
11667c478bdstevel@tonic-gate			}
11677c478bdstevel@tonic-gate			break;
11687c478bdstevel@tonic-gate		case H_KEYDATA:
11697c478bdstevel@tonic-gate			if (key->size == LEN_HKEY(hcp->pagep,
11707c478bdstevel@tonic-gate			    hcp->hdr->pagesize, hcp->bndx) &&
11717c478bdstevel@tonic-gate			    memcmp(key->data,
11727c478bdstevel@tonic-gate			    HKEYDATA_DATA(hk), key->size) == 0) {
11737c478bdstevel@tonic-gate				F_SET(hcp, H_OK);
11747c478bdstevel@tonic-gate				return (0);
11757c478bdstevel@tonic-gate			}
11767c478bdstevel@tonic-gate			break;
11777c478bdstevel@tonic-gate		case H_DUPLICATE:
11787c478bdstevel@tonic-gate		case H_OFFDUP:
11797c478bdstevel@tonic-gate			/*
11807c478bdstevel@tonic-gate			 * These are errors because keys are never
11817c478bdstevel@tonic-gate			 * duplicated, only data items are.
11827c478bdstevel@tonic-gate			 */
11837c478bdstevel@tonic-gate			return (__db_pgfmt(dbp, PGNO(hcp->pagep)));
11847c478bdstevel@tonic-gate		}
11857c478bdstevel@tonic-gate		hcp->stats.hash_collisions++;
11867c478bdstevel@tonic-gate	}
11877c478bdstevel@tonic-gate
11887c478bdstevel@tonic-gate	/*
11897c478bdstevel@tonic-gate	 * Item was not found, adjust cursor properly.
11907c478bdstevel@tonic-gate	 */
11917c478bdstevel@tonic-gate
11927c478bdstevel@tonic-gate	if (sought != 0)
11937c478bdstevel@tonic-gate		return (ret);
11947c478bdstevel@tonic-gate
11957c478bdstevel@tonic-gate	if ((t_ret = __ham_item_done(dbc, 0)) != 0 && ret == 0)
11967c478bdstevel@tonic-gate		ret = t_ret;
11977c478bdstevel@tonic-gate	return (ret);
11987c478bdstevel@tonic-gate}
11997c478bdstevel@tonic-gate
12007c478bdstevel@tonic-gate/*
12017c478bdstevel@tonic-gate * Initialize a dbt using some possibly already allocated storage
12027c478bdstevel@tonic-gate * for items.
12037c478bdstevel@tonic-gate * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
12047c478bdstevel@tonic-gate */
12057c478bdstevel@tonic-gateint
12067c478bdstevel@tonic-gate__ham_init_dbt(dbt, size, bufp, sizep)
12077c478bdstevel@tonic-gate	DBT *dbt;
12087c478bdstevel@tonic-gate	u_int32_t size;
12097c478bdstevel@tonic-gate	void **bufp;
12107c478bdstevel@tonic-gate	u_int32_t *sizep;
12117c478bdstevel@tonic-gate{
12127c478bdstevel@tonic-gate	int ret;
12137c478bdstevel@tonic-gate
12147c478bdstevel@tonic-gate	memset(dbt, 0, sizeof(*dbt));
12157c478bdstevel@tonic-gate	if (*sizep < size) {
12167c478bdstevel@tonic-gate		if ((ret = __os_realloc(bufp, size)) != 0) {
12177c478bdstevel@tonic-gate			*sizep = 0;
12187c478bdstevel@tonic-gate			return (ret);
12197c478bdstevel@tonic-gate		}
12207c478bdstevel@tonic-gate		*sizep = size;
12217c478bdstevel@tonic-gate	}
12227c478bdstevel@tonic-gate	dbt->data = *bufp;
12237c478bdstevel@tonic-gate	dbt->size = size;
12247c478bdstevel@tonic-gate	return (0);
12257c478bdstevel@tonic-gate}
12267c478bdstevel@tonic-gate
12277c478bdstevel@tonic-gate/*
12287c478bdstevel@tonic-gate * Adjust the cursor after an insert or delete.  The cursor passed is
12297c478bdstevel@tonic-gate * the one that was operated upon; we just need to check any of the
12307c478bdstevel@tonic-gate * others.
12317c478bdstevel@tonic-gate *
12327c478bdstevel@tonic-gate * len indicates the length of the item added/deleted
12337c478bdstevel@tonic-gate * add indicates if the item indicated by the cursor has just been
12347c478bdstevel@tonic-gate * added (add == 1) or deleted (add == 0).
12357c478bdstevel@tonic-gate * dup indicates if the addition occurred into a duplicate set.
12367c478bdstevel@tonic-gate *
12377c478bdstevel@tonic-gate * PUBLIC: void __ham_c_update
12387c478bdstevel@tonic-gate * PUBLIC:    __P((HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
12397c478bdstevel@tonic-gate */
12407c478bdstevel@tonic-gatevoid
12417c478bdstevel@tonic-gate__ham_c_update(hcp, chg_pgno, len, add, is_dup)
12427c478bdstevel@tonic-gate	HASH_CURSOR *hcp;
12437c478bdstevel@tonic-gate	db_pgno_t chg_pgno;
12447c478bdstevel@tonic-gate	u_int32_t len;
12457c478bdstevel@tonic-gate	int add, is_dup;
12467c478bdstevel@tonic-gate{
12477c478bdstevel@tonic-gate	DB *dbp;
12487c478bdstevel@tonic-gate	DBC *cp;
12497c478bdstevel@tonic-gate	HASH_CURSOR *lcp;
12507c478bdstevel@tonic-gate	int page_deleted;
12517c478bdstevel@tonic-gate
12527c478bdstevel@tonic-gate	/*
12537c478bdstevel@tonic-gate	 * Regular adds are always at the end of a given page, so we never
12547c478bdstevel@tonic-gate	 * have to adjust anyone's cursor after a regular add.
12557c478bdstevel@tonic-gate	 */
12567c478bdstevel@tonic-gate	if (!is_dup && add)
12577c478bdstevel@tonic-gate		return;
12587c478bdstevel@tonic-gate
12597c478bdstevel@tonic-gate	/*
12607c478bdstevel@tonic-gate	 * Determine if a page was deleted.    If this is a regular update
12617c478bdstevel@tonic-gate	 * (i.e., not is_dup) then the deleted page's number will be that in
12627c478bdstevel@tonic-gate	 * chg_pgno, and the pgno in the cursor will be different.  If this
12637c478bdstevel@tonic-gate	 * was an onpage-duplicate, then the same conditions apply.  If this
12647c478bdstevel@tonic-gate	 * was an off-page duplicate, then we need to verify if hcp->dpgno
12657c478bdstevel@tonic-gate	 * is the same (no delete) or different (delete) than chg_pgno.
12667c478bdstevel@tonic-gate	 */
12677c478bdstevel@tonic-gate	if (!is_dup || hcp->dpgno == PGNO_INVALID)
12687c478bdstevel@tonic-gate		page_deleted =
12697c478bdstevel@tonic-gate		    chg_pgno != PGNO_INVALID && chg_pgno != hcp->pgno;
12707c478bdstevel@tonic-gate	else
12717c478bdstevel@tonic-gate		page_deleted =
12727c478bdstevel@tonic-gate		    chg_pgno != PGNO_INVALID && chg_pgno != hcp->dpgno;
12737c478bdstevel@tonic-gate
12747c478bdstevel@tonic-gate	dbp = hcp->dbc->dbp;
12757c478bdstevel@tonic-gate	DB_THREAD_LOCK(dbp);
12767c478bdstevel@tonic-gate
12777c478bdstevel@tonic-gate	for (cp = TAILQ_FIRST(&dbp->active_queue); cp != NULL;
12787c478bdstevel@tonic-gate	    cp = TAILQ_NEXT(cp, links)) {
12797c478bdstevel@tonic-gate		if (cp->internal == hcp)
12807c478bdstevel@tonic-gate			continue;
12817c478bdstevel@tonic-gate
12827c478bdstevel@tonic-gate		lcp = (HASH_CURSOR *)cp->internal;
12837c478bdstevel@tonic-gate
12847c478bdstevel@tonic-gate		if (!is_dup && lcp->pgno != chg_pgno)
12857c478bdstevel@tonic-gate			continue;
12867c478bdstevel@tonic-gate
12877c478bdstevel@tonic-gate		if (is_dup) {
12887c478bdstevel@tonic-gate			if (F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
12897c478bdstevel@tonic-gate				continue;
12907c478bdstevel@tonic-gate			if (!F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
12917c478bdstevel@tonic-gate				continue;
12927c478bdstevel@tonic-gate		}
12937c478bdstevel@tonic-gate
12947c478bdstevel@tonic-gate		if (page_deleted) {
12957c478bdstevel@tonic-gate			if (is_dup) {
12967c478bdstevel@tonic-gate				lcp->dpgno = hcp->dpgno;
12977c478bdstevel@tonic-gate				lcp->dndx = hcp->dndx;
12987c478bdstevel@tonic-gate			} else {
12997c478bdstevel@tonic-gate				lcp->pgno = hcp->pgno;
13007c478bdstevel@tonic-gate				lcp->bndx = hcp->bndx;
13017c478bdstevel@tonic-gate				lcp->bucket = hcp->bucket;
13027c478bdstevel@tonic-gate			}
13037c478bdstevel@tonic-gate			F_CLR(lcp, H_ISDUP);
13047c478bdstevel@tonic-gate			continue;
13057c478bdstevel@tonic-gate		}
13067c478bdstevel@tonic-gate
13077c478bdstevel@tonic-gate		if (!is_dup && lcp->bndx > hcp->bndx)
13087c478bdstevel@tonic-gate			lcp->bndx--;
13097c478bdstevel@tonic-gate		else if (!is_dup && lcp->bndx == hcp->bndx)
13107c478bdstevel@tonic-gate			F_SET(lcp, H_DELETED);
13117c478bdstevel@tonic-gate		else if (is_dup && lcp->bndx == hcp->bndx) {
13127c478bdstevel@tonic-gate			/* Assign dpgno in case there was page conversion. */
13137c478bdstevel@tonic-gate			lcp->dpgno = hcp->dpgno;
13147c478bdstevel@tonic-gate			if (add && lcp->dndx >= hcp->dndx )
13157c478bdstevel@tonic-gate				lcp->dndx++;
13167c478bdstevel@tonic-gate			else if (!add && lcp->dndx > hcp->dndx)
13177c478bdstevel@tonic-gate				lcp->dndx--;
13187c478bdstevel@tonic-gate			else if (!add && lcp->dndx == hcp->dndx)
13197c478bdstevel@tonic-gate				F_SET(lcp, H_DELETED);
13207c478bdstevel@tonic-gate
13217c478bdstevel@tonic-gate			/* Now adjust on-page information. */
13227c478bdstevel@tonic-gate			if (lcp->dpgno == PGNO_INVALID)
13237c478bdstevel@tonic-gate				if (add) {
13247c478bdstevel@tonic-gate					lcp->dup_tlen += len;
13257c478bdstevel@tonic-gate					if (lcp->dndx > hcp->dndx)
13267c478bdstevel@tonic-gate						lcp->dup_off += len;
13277c478bdstevel@tonic-gate				} else {
13287c478bdstevel@tonic-gate					lcp->dup_tlen -= len;
13297c478bdstevel@tonic-gate					if (lcp->dndx > hcp->dndx)
13307c478bdstevel@tonic-gate						lcp->dup_off -= len;
13317c478bdstevel@tonic-gate				}
13327c478bdstevel@tonic-gate		}
13337c478bdstevel@tonic-gate	}
13347c478bdstevel@tonic-gate	DB_THREAD_UNLOCK(dbp);
13357c478bdstevel@tonic-gate}
13367c478bdstevel@tonic-gate
1337