1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998
5 *	Sleepycat Software.  All rights reserved.
6 */
7/*
8 * Copyright (c) 1990, 1993, 1994
9 *	Margo Seltzer.  All rights reserved.
10 */
11/*
12 * Copyright (c) 1990, 1993, 1994
13 *	The Regents of the University of California.  All rights reserved.
14 *
15 * This code is derived from software contributed to Berkeley by
16 * Margo Seltzer.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 *    notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 *    notice, this list of conditions and the following disclaimer in the
25 *    documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 *    must display the following acknowledgement:
28 *	This product includes software developed by the University of
29 *	California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 *    may be used to endorse or promote products derived from this software
32 *    without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
45 */
46
47#include "config.h"
48
49#ifndef lint
50static const char sccsid[] = "@(#)hash.c	10.63 (Sleepycat) 12/11/98";
51#endif /* not lint */
52
53#ifndef NO_SYSTEM_INCLUDES
54#include <sys/types.h>
55
56#include <errno.h>
57#include <stdlib.h>
58#include <string.h>
59#endif
60
61#include "db_int.h"
62#include "shqueue.h"
63#include "db_page.h"
64#include "db_am.h"
65#include "db_ext.h"
66#include "hash.h"
67#include "btree.h"
68#include "log.h"
69#include "db_shash.h"
70#include "lock.h"
71#include "lock_ext.h"
72
73static int  __ham_c_close __P((DBC *));
74static int  __ham_c_del __P((DBC *, u_int32_t));
75static int  __ham_c_destroy __P((DBC *));
76static int  __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
77static int  __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
78static int  __ham_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
79static int  __ham_dup_return __P((DBC *, DBT *, u_int32_t));
80static int  __ham_expand_table __P((DBC *));
81static void __ham_init_htab __P((DBC *, u_int32_t, u_int32_t));
82static int  __ham_lookup __P((DBC *, const DBT *, u_int32_t, db_lockmode_t));
83static int  __ham_overwrite __P((DBC *, DBT *));
84
85/************************** INTERFACE ROUTINES ***************************/
86/* OPEN/CLOSE */
87
88/*
89 * __ham_open --
90 *
91 * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
92 */
93int
94__ham_open(dbp, dbinfo)
95	DB *dbp;
96	DB_INFO *dbinfo;
97{
98	DB_ENV *dbenv;
99	DBC *dbc;
100	HASH_CURSOR *hcp;
101	int file_existed, ret;
102
103	dbc = NULL;
104	dbenv = dbp->dbenv;
105
106	/* Set the hash function if specified by the user. */
107	if (dbinfo != NULL && dbinfo->h_hash != NULL)
108		dbp->h_hash = dbinfo->h_hash;
109
110	/*
111	 * Initialize the remaining fields of the dbp.  The only function
112	 * that differs from the default set is __ham_stat().
113	 */
114	dbp->internal = NULL;
115	dbp->am_close = __ham_close;
116	dbp->del = __ham_delete;
117	dbp->stat = __ham_stat;
118
119	/* Get a cursor we can use for the rest of this function. */
120	if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
121		goto out;
122
123	hcp = (HASH_CURSOR *)dbc->internal;
124	GET_META(dbp, hcp, ret);
125	if (ret != 0)
126		goto out;
127
128	/*
129	 * If this is a new file, initialize it, and put it back dirty.
130	 */
131
132	/* Initialize the hdr structure */
133	if (hcp->hdr->magic == DB_HASHMAGIC) {
134		file_existed = 1;
135		/* File exists, verify the data in the header. */
136		if (dbp->h_hash == NULL)
137			dbp->h_hash =
138			    hcp->hdr->version < 5 ? __ham_func4 : __ham_func5;
139		if (dbp->h_hash(CHARKEY, sizeof(CHARKEY)) !=
140		    hcp->hdr->h_charkey) {
141			__db_err(dbp->dbenv, "hash: incompatible hash function");
142			ret = EINVAL;
143			goto out;
144		}
145		if (F_ISSET(hcp->hdr, DB_HASH_DUP))
146			F_SET(dbp, DB_AM_DUP);
147	} else {
148		/*
149		 * File does not exist, we must initialize the header.  If
150		 * locking is enabled that means getting a write lock first.
151		 */
152		file_existed = 0;
153		if (F_ISSET(dbp, DB_AM_LOCKING) &&
154		    ((ret = lock_put(dbenv->lk_info, hcp->hlock)) != 0 ||
155		    (ret = lock_get(dbenv->lk_info, dbc->locker, 0,
156		        &dbc->lock_dbt, DB_LOCK_WRITE, &hcp->hlock)) != 0)) {
157			if (ret < 0)
158				ret = EAGAIN;
159			goto out;
160		}
161
162		__ham_init_htab(dbc, dbinfo != NULL ? dbinfo->h_nelem : 0,
163		    dbinfo != NULL ? dbinfo->h_ffactor : 0);
164		if (F_ISSET(dbp, DB_AM_DUP))
165			F_SET(hcp->hdr, DB_HASH_DUP);
166		if ((ret = __ham_dirty_page(dbp, (PAGE *)hcp->hdr)) != 0)
167			goto out;
168	}
169
170	/* Release the meta data page */
171	RELEASE_META(dbp, hcp);
172	if ((ret  = dbc->c_close(dbc)) != 0)
173		goto out;
174
175	/* Sync the file so that we know that the meta data goes to disk. */
176	if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
177		goto out;
178	return (0);
179
180out:	(void)__ham_close(dbp);
181	return (ret);
182}
183
184/*
185 * PUBLIC: int __ham_close __P((DB *));
186 */
187int
188__ham_close(dbp)
189	DB *dbp;
190{
191	COMPQUIET(dbp, NULL);
192	return (0);
193}
194
195/************************** LOCAL CREATION ROUTINES **********************/
196/*
197 * Returns 0 on No Error
198 */
199static void
200__ham_init_htab(dbc, nelem, ffactor)
201	DBC *dbc;
202	u_int32_t nelem, ffactor;
203{
204	DB *dbp;
205	HASH_CURSOR *hcp;
206	int32_t l2, nbuckets;
207
208	hcp = (HASH_CURSOR *)dbc->internal;
209	dbp = dbc->dbp;
210	memset(hcp->hdr, 0, sizeof(HASHHDR));
211	hcp->hdr->ffactor = ffactor;
212	hcp->hdr->pagesize = dbp->pgsize;
213	ZERO_LSN(hcp->hdr->lsn);
214	hcp->hdr->magic = DB_HASHMAGIC;
215	hcp->hdr->version = DB_HASHVERSION;
216
217	if (dbp->h_hash == NULL)
218		dbp->h_hash = hcp->hdr->version < 5 ? __ham_func4 : __ham_func5;
219	hcp->hdr->h_charkey = dbp->h_hash(CHARKEY, sizeof(CHARKEY));
220	if (nelem != 0 && hcp->hdr->ffactor != 0) {
221		nelem = (nelem - 1) / hcp->hdr->ffactor + 1;
222		l2 = __db_log2(nelem > 2 ? nelem : 2);
223	} else
224		l2 = 2;
225
226	nbuckets = 1 << l2;
227
228	hcp->hdr->ovfl_point = l2;
229	hcp->hdr->last_freed = PGNO_INVALID;
230
231	hcp->hdr->max_bucket = hcp->hdr->high_mask = nbuckets - 1;
232	hcp->hdr->low_mask = (nbuckets >> 1) - 1;
233	memcpy(hcp->hdr->uid, dbp->fileid, DB_FILE_ID_LEN);
234}
235
236static int
237__ham_delete(dbp, txn, key, flags)
238	DB *dbp;
239	DB_TXN *txn;
240	DBT *key;
241	u_int32_t flags;
242{
243	DBC *dbc;
244	HASH_CURSOR *hcp;
245	int ret, tret;
246
247	DB_PANIC_CHECK(dbp);
248
249	if ((ret =
250	    __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
251		return (ret);
252
253	if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
254		return (ret);
255
256	DEBUG_LWRITE(dbc, txn, "ham_delete", key, NULL, flags);
257
258	hcp = (HASH_CURSOR *)dbc->internal;
259	GET_META(dbp, hcp, ret);
260	if (ret != 0)
261		goto out;
262
263	hcp->stats.hash_deleted++;
264	if ((ret = __ham_lookup(dbc, key, 0, DB_LOCK_WRITE)) == 0)
265		if (F_ISSET(hcp, H_OK))
266			ret = __ham_del_pair(dbc, 1);
267		else
268			ret = DB_NOTFOUND;
269
270	RELEASE_META(dbp, hcp);
271out:	if ((tret = dbc->c_close(dbc)) != 0 && ret == 0)
272		ret = tret;
273	return (ret);
274}
275
276/* ****************** CURSORS ********************************** */
277/*
278 * __ham_c_init --
279 *	Initialize the hash-specific portion of a cursor.
280 *
281 * PUBLIC: int __ham_c_init __P((DBC *));
282 */
283int
284__ham_c_init(dbc)
285	DBC *dbc;
286  {
287	HASH_CURSOR *new_curs;
288	int ret;
289
290	if ((ret = __os_calloc(1, sizeof(struct cursor_t), &new_curs)) != 0)
291		return (ret);
292	if ((ret =
293	    __os_malloc(dbc->dbp->pgsize, NULL, &new_curs->split_buf)) != 0) {
294		__os_free(new_curs, sizeof(*new_curs));
295		return (ret);
296	}
297
298	new_curs->dbc = dbc;
299
300	dbc->internal = new_curs;
301	dbc->c_am_close = __ham_c_close;
302	dbc->c_am_destroy = __ham_c_destroy;
303	dbc->c_del = __ham_c_del;
304	dbc->c_get = __ham_c_get;
305	dbc->c_put = __ham_c_put;
306
307	__ham_item_init(new_curs);
308
309	return (0);
310}
311
312/*
313 * __ham_c_close --
314 *	Close down the cursor from a single use.
315 */
316static int
317__ham_c_close(dbc)
318	DBC *dbc;
319{
320	int ret;
321
322	if ((ret = __ham_item_done(dbc, 0)) != 0)
323		return (ret);
324
325	__ham_item_init((HASH_CURSOR *)dbc->internal);
326	return (0);
327}
328
329/*
330 * __ham_c_destroy --
331 *	Cleanup the access method private part of a cursor.
332 */
333static int
334__ham_c_destroy(dbc)
335	DBC *dbc;
336{
337	HASH_CURSOR *hcp;
338
339	hcp = (HASH_CURSOR *)dbc->internal;
340	if (hcp->split_buf != NULL)
341		__os_free(hcp->split_buf, dbc->dbp->pgsize);
342	__os_free(hcp, sizeof(HASH_CURSOR));
343
344	return (0);
345}
346
347static int
348__ham_c_del(dbc, flags)
349	DBC *dbc;
350	u_int32_t flags;
351{
352	DB *dbp;
353	DBT repldbt;
354	HASH_CURSOR *hcp;
355	HASH_CURSOR save_curs;
356	db_pgno_t ppgno, chg_pgno;
357	int ret, t_ret;
358
359	DEBUG_LWRITE(dbc, dbc->txn, "ham_c_del", NULL, NULL, flags);
360	dbp = dbc->dbp;
361	DB_PANIC_CHECK(dbp);
362	hcp = (HASH_CURSOR *)dbc->internal;
363
364	if ((ret = __db_cdelchk(dbc->dbp, flags,
365	    F_ISSET(dbc->dbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
366		return (ret);
367
368	if (F_ISSET(hcp, H_DELETED))
369		return (DB_NOTFOUND);
370
371	/*
372	 * If we are in the concurrent DB product and this cursor
373	 * is not a write cursor, then this request is invalid.
374	 * If it is a simple write cursor, then we need to upgrade its
375	 * lock.
376	 */
377	if (F_ISSET(dbp, DB_AM_CDB)) {
378		/* Make sure it's a valid update cursor. */
379		if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
380			return (EINVAL);
381
382		if (F_ISSET(dbc, DBC_RMW) &&
383		    (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
384		    DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
385		    &dbc->mylock)) != 0)
386			return (EAGAIN);
387	}
388
389	GET_META(dbp, hcp, ret);
390	if (ret != 0)
391		return (ret);
392
393	SAVE_CURSOR(hcp, &save_curs);
394	hcp->stats.hash_deleted++;
395
396	if ((ret = __ham_get_cpage(dbc, DB_LOCK_WRITE)) != 0)
397		goto out;
398	if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
399		/*
400		 * We are about to remove a duplicate from offpage.
401		 *
402		 * There are 4 cases.
403		 * 1. We will remove an item on a page, but there are more
404		 *    items on that page.
405		 * 2. We will remove the last item on a page, but there is a
406		 *    following page of duplicates.
407		 * 3. We will remove the last item on a page, this page was the
408		 *    last page in a duplicate set, but there were dups before
409		 *    it.
410		 * 4. We will remove the last item on a page, removing the last
411		 *    duplicate.
412		 * In case 1 hcp->dpagep is unchanged.
413		 * In case 2 hcp->dpagep comes back pointing to the next dup
414		 *     page.
415		 * In case 3 hcp->dpagep comes back NULL.
416		 * In case 4 hcp->dpagep comes back NULL.
417		 *
418		 * Case 4 results in deleting the pair off the master page.
419		 * The normal code for doing this knows how to delete the
420		 * duplicates, so we will handle this case in the normal code.
421		 */
422		ppgno = PREV_PGNO(hcp->dpagep);
423		if (ppgno == PGNO_INVALID &&
424		    NEXT_PGNO(hcp->dpagep) == PGNO_INVALID &&
425		    NUM_ENT(hcp->dpagep) == 1)
426			goto normal;
427
428		/* Remove item from duplicate page. */
429		chg_pgno = hcp->dpgno;
430		if ((ret = __db_drem(dbc,
431		    &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
432			goto out;
433
434		if (hcp->dpagep == NULL) {
435			if (ppgno != PGNO_INVALID) {		/* Case 3 */
436				hcp->dpgno = ppgno;
437				if ((ret = __ham_get_cpage(dbc,
438				    DB_LOCK_READ)) != 0)
439					goto out;
440				hcp->dndx = NUM_ENT(hcp->dpagep);
441				F_SET(hcp, H_DELETED);
442			} else {				/* Case 4 */
443				ret = __ham_del_pair(dbc, 1);
444				hcp->dpgno = PGNO_INVALID;
445				/*
446				 * Delpair updated the cursor queue, so we
447				 * don't have to do that here.
448				 */
449				chg_pgno = PGNO_INVALID;
450			}
451		} else if (PGNO(hcp->dpagep) != hcp->dpgno) {
452			hcp->dndx = 0;				/* Case 2 */
453			hcp->dpgno = PGNO(hcp->dpagep);
454			if (ppgno == PGNO_INVALID)
455				memcpy(HOFFDUP_PGNO(P_ENTRY(hcp->pagep,
456				    H_DATAINDEX(hcp->bndx))),
457				    &hcp->dpgno, sizeof(db_pgno_t));
458			/*
459			 * We need to put the master page here, because
460			 * although we have a duplicate page, the master
461			 * page is dirty, and ham_item_done assumes that
462			 * if you have a duplicate page, it's the only one
463			 * that can be dirty.
464			 */
465			ret = __ham_put_page(dbp, hcp->pagep, 1);
466			hcp->pagep = NULL;
467			F_SET(hcp, H_DELETED);
468		} else						/* Case 1 */
469			F_SET(hcp, H_DELETED);
470		if (chg_pgno != PGNO_INVALID)
471			__ham_c_update(hcp, chg_pgno, 0, 0, 1);
472	} else if (F_ISSET(hcp, H_ISDUP)) {			/* on page */
473		if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
474		    LEN_HDATA(hcp->pagep, hcp->hdr->pagesize, hcp->bndx))
475			ret = __ham_del_pair(dbc, 1);
476		else {
477			repldbt.flags = 0;
478			F_SET(&repldbt, DB_DBT_PARTIAL);
479			repldbt.doff = hcp->dup_off;
480			repldbt.dlen = DUP_SIZE(hcp->dup_len);
481			repldbt.size = 0;
482			repldbt.data =
483			    HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
484			ret = __ham_replpair(dbc, &repldbt, 0);
485			hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
486			F_SET(hcp, H_DELETED);
487			__ham_c_update(hcp, hcp->pgno,
488			    DUP_SIZE(hcp->dup_len), 0, 1);
489		}
490
491	} else
492		/* Not a duplicate */
493normal:		ret = __ham_del_pair(dbc, 1);
494
495out:	if ((t_ret = __ham_item_done(dbc, ret == 0)) != 0 && ret == 0)
496		ret = t_ret;
497	RELEASE_META(dbp, hcp);
498	RESTORE_CURSOR(dbp, hcp, &save_curs, ret);
499	if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
500		(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
501		    DB_LOCK_IWRITE, 0);
502	return (ret);
503}
504
505static int
506__ham_c_get(dbc, key, data, flags)
507	DBC *dbc;
508	DBT *key;
509	DBT *data;
510	u_int32_t flags;
511{
512	DB *dbp;
513	HASH_CURSOR *hcp, save_curs;
514	db_lockmode_t lock_type;
515	int get_key, ret, t_ret;
516
517	DEBUG_LREAD(dbc, dbc->txn, "ham_c_get",
518	    flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
519	    NULL, flags);
520
521	hcp = (HASH_CURSOR *)dbc->internal;
522	dbp = dbc->dbp;
523	DB_PANIC_CHECK(dbp);
524	SAVE_CURSOR(hcp, &save_curs);
525	if ((ret =
526	    __db_cgetchk(dbp, key, data, flags, IS_VALID(hcp))) != 0)
527		return (ret);
528
529	/* Clear OR'd in additional bits so we can check for flag equality. */
530	if (LF_ISSET(DB_RMW)) {
531		lock_type = DB_LOCK_WRITE;
532		LF_CLR(DB_RMW);
533	} else
534		lock_type = DB_LOCK_READ;
535
536	GET_META(dbp, hcp, ret);
537	if (ret != 0)
538		return (ret);
539	hcp->stats.hash_get++;
540	hcp->seek_size = 0;
541
542	ret = 0;
543	get_key = 1;
544	switch (flags) {
545	case DB_PREV:
546		if (hcp->bucket != BUCKET_INVALID) {
547			ret = __ham_item_prev(dbc, lock_type);
548			break;
549		}
550		/* FALLTHROUGH */
551	case DB_LAST:
552		ret = __ham_item_last(dbc, lock_type);
553		break;
554	case DB_FIRST:
555		ret = __ham_item_first(dbc, lock_type);
556		break;
557	case DB_NEXT_DUP:
558		if (hcp->bucket == BUCKET_INVALID)
559			ret = EINVAL;
560		else {
561			F_SET(hcp, H_DUPONLY);
562			ret = __ham_item_next(dbc, lock_type);
563		}
564		break;
565	case DB_NEXT:
566		if (hcp->bucket == BUCKET_INVALID)
567			hcp->bucket = 0;
568		ret = __ham_item_next(dbc, lock_type);
569		break;
570	case DB_SET:
571	case DB_SET_RANGE:
572	case DB_GET_BOTH:
573		if (F_ISSET(dbc, DBC_CONTINUE)) {
574			F_SET(hcp, H_DUPONLY);
575			ret = __ham_item_next(dbc, lock_type);
576		} else if (F_ISSET(dbc, DBC_KEYSET))
577			ret = __ham_item(dbc, lock_type);
578		else
579			ret = __ham_lookup(dbc, key, 0, lock_type);
580		get_key = 0;
581		break;
582	case DB_CURRENT:
583		if (F_ISSET(hcp, H_DELETED)) {
584			ret = DB_KEYEMPTY;
585			goto out;
586		}
587
588		ret = __ham_item(dbc, lock_type);
589		break;
590	}
591
592	/*
593	 * Must always enter this loop to do error handling and
594	 * check for big key/data pair.
595	 */
596	while (1) {
597		if (ret != 0 && ret != DB_NOTFOUND)
598			goto out1;
599		else if (F_ISSET(hcp, H_OK)) {
600			/* Get the key. */
601			if (get_key && (ret = __db_ret(dbp, hcp->pagep,
602			    H_KEYINDEX(hcp->bndx), key, &dbc->rkey.data,
603			    &dbc->rkey.size)) != 0)
604				goto out1;
605
606			ret = __ham_dup_return(dbc, data, flags);
607			break;
608		} else if (!F_ISSET(hcp, H_NOMORE)) {
609			abort();
610			break;
611		}
612
613		/*
614		 * Ran out of entries in a bucket; change buckets.
615		 */
616		switch (flags) {
617			case DB_LAST:
618			case DB_PREV:
619				ret = __ham_item_done(dbc, 0);
620				if (hcp->bucket == 0) {
621					ret = DB_NOTFOUND;
622					goto out1;
623				}
624				hcp->bucket--;
625				hcp->bndx = NDX_INVALID;
626				if (ret == 0)
627					ret = __ham_item_prev(dbc, lock_type);
628				break;
629			case DB_FIRST:
630			case DB_NEXT:
631				ret = __ham_item_done(dbc, 0);
632				hcp->bndx = NDX_INVALID;
633				hcp->bucket++;
634				hcp->pgno = PGNO_INVALID;
635				hcp->pagep = NULL;
636				if (hcp->bucket > hcp->hdr->max_bucket) {
637					ret = DB_NOTFOUND;
638					goto out1;
639				}
640				if (ret == 0)
641					ret = __ham_item_next(dbc, lock_type);
642				break;
643			case DB_GET_BOTH:
644			case DB_NEXT_DUP:
645			case DB_SET:
646			case DB_SET_RANGE:
647				/* Key not found. */
648				ret = DB_NOTFOUND;
649				goto out1;
650		}
651	}
652out1:	if ((t_ret = __ham_item_done(dbc, 0)) != 0 && ret == 0)
653		ret = t_ret;
654out:	RELEASE_META(dbp, hcp);
655	RESTORE_CURSOR(dbp, hcp, &save_curs, ret);
656	return (ret);
657}
658
659static int
660__ham_c_put(dbc, key, data, flags)
661	DBC *dbc;
662	DBT *key;
663	DBT *data;
664	u_int32_t flags;
665{
666	DB *dbp;
667	DBT tmp_val, *myval;
668	HASH_CURSOR *hcp, save_curs;
669	u_int32_t nbytes;
670	int ret, t_ret;
671
672	dbp = dbc->dbp;
673	DB_PANIC_CHECK(dbp);
674	DEBUG_LWRITE(dbc, dbc->txn, "ham_c_put",
675	    flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
676	    data, flags);
677	hcp = (HASH_CURSOR *)dbc->internal;
678
679	if ((ret = __db_cputchk(dbp, key, data, flags,
680	    F_ISSET(dbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
681		return (ret);
682
683	if (F_ISSET(hcp, H_DELETED) &&
684	    flags != DB_KEYFIRST && flags != DB_KEYLAST)
685		return (DB_NOTFOUND);
686
687	/*
688	 * If we are in the concurrent DB product and this cursor
689	 * is not a write cursor, then this request is invalid.
690	 * If it is a simple write cursor, then we need to upgrade its
691	 * lock.
692	 */
693	if (F_ISSET(dbp, DB_AM_CDB)) {
694		/* Make sure it's a valid update cursor. */
695		if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
696			return (EINVAL);
697
698		if (F_ISSET(dbc, DBC_RMW) &&
699		    (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
700		    DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
701		    &dbc->mylock)) != 0)
702			return (EAGAIN);
703	}
704
705	GET_META(dbp, hcp, ret);
706	if (ret != 0)
707		return (ret);
708
709	SAVE_CURSOR(hcp, &save_curs);
710	hcp->stats.hash_put++;
711
712	switch (flags) {
713	case DB_KEYLAST:
714	case DB_KEYFIRST:
715		nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE :
716		    HKEYDATA_PSIZE(key->size)) +
717		    (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE :
718		    HKEYDATA_PSIZE(data->size));
719		if ((ret = __ham_lookup(dbc,
720		    key, nbytes, DB_LOCK_WRITE)) == DB_NOTFOUND) {
721			ret = 0;
722			if (hcp->seek_found_page != PGNO_INVALID &&
723			    hcp->seek_found_page != hcp->pgno) {
724				if ((ret = __ham_item_done(dbc, 0)) != 0)
725					goto out;
726				hcp->pgno = hcp->seek_found_page;
727				hcp->bndx = NDX_INVALID;
728			}
729
730			if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
731				/*
732				 * A partial put, but the key does not exist
733				 * and we are not beginning the write at 0.
734				 * We must create a data item padded up to doff
735				 * and then write the new bytes represented by
736				 * val.
737				 */
738				if ((ret = __ham_init_dbt(&tmp_val,
739				    data->size + data->doff,
740				    &dbc->rdata.data, &dbc->rdata.size)) == 0) {
741					memset(tmp_val.data, 0, data->doff);
742					memcpy((u_int8_t *)tmp_val.data +
743					    data->doff, data->data, data->size);
744					myval = &tmp_val;
745				}
746			} else
747				myval = (DBT *)data;
748
749			if (ret == 0)
750				ret = __ham_add_el(dbc, key, myval, H_KEYDATA);
751			goto done;
752		}
753		break;
754	case DB_BEFORE:
755	case DB_AFTER:
756	case DB_CURRENT:
757		ret = __ham_item(dbc, DB_LOCK_WRITE);
758		break;
759	}
760
761	if (ret == 0) {
762		if ((flags == DB_CURRENT && !F_ISSET(hcp, H_ISDUP)) ||
763		    ((flags == DB_KEYFIRST || flags == DB_KEYLAST) &&
764		    !F_ISSET(dbp, DB_AM_DUP)))
765			ret = __ham_overwrite(dbc, data);
766		else
767			ret = __ham_add_dup(dbc, data, flags);
768	}
769
770done:	if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
771		ret = __ham_expand_table(dbc);
772		F_CLR(hcp, H_EXPAND);
773	}
774
775	if ((t_ret = __ham_item_done(dbc, ret == 0)) != 0 && ret == 0)
776		ret = t_ret;
777
778out:	RELEASE_META(dbp, hcp);
779	RESTORE_CURSOR(dbp, hcp, &save_curs, ret);
780	if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
781		(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
782		    DB_LOCK_IWRITE, 0);
783	return (ret);
784}
785
786/********************************* UTILITIES ************************/
787
788/*
789 * __ham_expand_table --
790 */
791static int
792__ham_expand_table(dbc)
793	DBC *dbc;
794{
795	DB *dbp;
796	HASH_CURSOR *hcp;
797	DB_LSN new_lsn;
798	u_int32_t old_bucket, new_bucket, spare_ndx;
799	int ret;
800
801	dbp = dbc->dbp;
802	hcp = (HASH_CURSOR *)dbc->internal;
803	ret = 0;
804	DIRTY_META(dbp, hcp, ret);
805	if (ret)
806		return (ret);
807
808	/*
809	 * If the split point is about to increase, make sure that we
810	 * have enough extra pages.  The calculation here is weird.
811	 * We'd like to do this after we've upped max_bucket, but it's
812	 * too late then because we've logged the meta-data split.  What
813	 * we'll do between then and now is increment max bucket and then
814	 * see what the log of one greater than that is; here we have to
815	 * look at the log of max + 2.  VERY NASTY STUFF.
816	 */
817	if (__db_log2(hcp->hdr->max_bucket + 2) > hcp->hdr->ovfl_point) {
818		/*
819		 * We are about to shift the split point.  Make sure that
820		 * if the next doubling is going to be big (more than 8
821		 * pages), we have some extra pages around.
822		 */
823		if (hcp->hdr->max_bucket + 1 >= 8 &&
824		    hcp->hdr->spares[hcp->hdr->ovfl_point] <
825		    hcp->hdr->spares[hcp->hdr->ovfl_point - 1] +
826		    hcp->hdr->ovfl_point + 1)
827			__ham_init_ovflpages(dbc);
828	}
829
830	/* Now we can log the meta-data split. */
831	if (DB_LOGGING(dbc)) {
832		if ((ret = __ham_splitmeta_log(dbp->dbenv->lg_info,
833		    dbc->txn, &new_lsn, 0, dbp->log_fileid,
834		    hcp->hdr->max_bucket, hcp->hdr->ovfl_point,
835		    hcp->hdr->spares[hcp->hdr->ovfl_point],
836		    &hcp->hdr->lsn)) != 0)
837			return (ret);
838
839		hcp->hdr->lsn = new_lsn;
840	}
841
842	hcp->stats.hash_expansions++;
843	new_bucket = ++hcp->hdr->max_bucket;
844	old_bucket = (hcp->hdr->max_bucket & hcp->hdr->low_mask);
845
846	/*
847	 * If the split point is increasing, copy the current contents
848	 * of the spare split bucket to the next bucket.
849	 */
850	spare_ndx = __db_log2(hcp->hdr->max_bucket + 1);
851	if (spare_ndx > hcp->hdr->ovfl_point) {
852		hcp->hdr->spares[spare_ndx] =
853		    hcp->hdr->spares[hcp->hdr->ovfl_point];
854		hcp->hdr->ovfl_point = spare_ndx;
855	}
856
857	if (new_bucket > hcp->hdr->high_mask) {
858		/* Starting a new doubling */
859		hcp->hdr->low_mask = hcp->hdr->high_mask;
860		hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask;
861	}
862
863	if (BUCKET_TO_PAGE(hcp, new_bucket) > MAX_PAGES(hcp)) {
864		__db_err(dbp->dbenv,
865		    "hash: Cannot allocate new bucket.  Pages exhausted.");
866		return (ENOSPC);
867	}
868
869	/* Relocate records to the new bucket */
870	return (__ham_split_page(dbc, old_bucket, new_bucket));
871}
872
873/*
874 * PUBLIC: u_int32_t __ham_call_hash __P((HASH_CURSOR *, u_int8_t *, int32_t));
875 */
876u_int32_t
877__ham_call_hash(hcp, k, len)
878	HASH_CURSOR *hcp;
879	u_int8_t *k;
880	int32_t len;
881{
882	u_int32_t n, bucket;
883
884	n = (u_int32_t)(hcp->dbc->dbp->h_hash(k, len));
885
886	bucket = n & hcp->hdr->high_mask;
887	if (bucket > hcp->hdr->max_bucket)
888		bucket = bucket & hcp->hdr->low_mask;
889	return (bucket);
890}
891
892/*
893 * Check for duplicates, and call __db_ret appropriately.  Release
894 * everything held by the cursor.
895 */
896static int
897__ham_dup_return(dbc, val, flags)
898	DBC *dbc;
899	DBT *val;
900	u_int32_t flags;
901{
902	DB *dbp;
903	HASH_CURSOR *hcp;
904	PAGE *pp;
905	DBT *myval, tmp_val;
906	db_indx_t ndx;
907	db_pgno_t pgno;
908	u_int32_t off, tlen;
909	u_int8_t *hk, type;
910	int cmp, ret;
911	db_indx_t len;
912
913	/* Check for duplicate and return the first one. */
914	dbp = dbc->dbp;
915	hcp = (HASH_CURSOR *)dbc->internal;
916	ndx = H_DATAINDEX(hcp->bndx);
917	type = HPAGE_TYPE(hcp->pagep, ndx);
918	pp = hcp->pagep;
919	myval = val;
920
921	/*
922	 * There are 4 cases:
923	 * 1. We are not in duplicate, simply call db_ret.
924	 * 2. We are looking at keys and stumbled onto a duplicate.
925	 * 3. We are in the middle of a duplicate set. (ISDUP set)
926	 * 4. This is a duplicate and we need to return a specific item.
927	 */
928
929	/*
930	 * Here we check for the case where we just stumbled onto a
931	 * duplicate.  In this case, we do initialization and then
932	 * let the normal duplicate code handle it.
933	 */
934	if (!F_ISSET(hcp, H_ISDUP))
935		if (type == H_DUPLICATE) {
936			F_SET(hcp, H_ISDUP);
937			hcp->dup_tlen = LEN_HDATA(hcp->pagep,
938			    hcp->hdr->pagesize, hcp->bndx);
939			hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
940			if (flags == DB_LAST || flags == DB_PREV) {
941				hcp->dndx = 0;
942				hcp->dup_off = 0;
943				do {
944					memcpy(&len,
945					    HKEYDATA_DATA(hk) + hcp->dup_off,
946					    sizeof(db_indx_t));
947					hcp->dup_off += DUP_SIZE(len);
948					hcp->dndx++;
949				} while (hcp->dup_off < hcp->dup_tlen);
950				hcp->dup_off -= DUP_SIZE(len);
951				hcp->dndx--;
952			} else {
953				memcpy(&len,
954				    HKEYDATA_DATA(hk), sizeof(db_indx_t));
955				hcp->dup_off = 0;
956				hcp->dndx = 0;
957			}
958			hcp->dup_len = len;
959		} else if (type == H_OFFDUP) {
960			F_SET(hcp, H_ISDUP);
961			memcpy(&pgno, HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)),
962			    sizeof(db_pgno_t));
963			if (flags == DB_LAST || flags == DB_PREV) {
964				if ((ret = __db_dend(dbc,
965				    pgno, &hcp->dpagep)) != 0)
966					return (ret);
967				hcp->dpgno = PGNO(hcp->dpagep);
968				hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
969			} else if ((ret = __ham_next_cpage(dbc,
970			    pgno, 0, H_ISDUP)) != 0)
971				return (ret);
972		}
973
974
975	/*
976	 * If we are retrieving a specific key/data pair, then we
977	 * may need to adjust the cursor before returning data.
978	 */
979	if (flags == DB_GET_BOTH) {
980		if (F_ISSET(hcp, H_ISDUP)) {
981			if (hcp->dpgno != PGNO_INVALID) {
982				if ((ret = __db_dsearch(dbc, 0, val,
983				    hcp->dpgno, &hcp->dndx, &hcp->dpagep, &cmp))
984				    != 0)
985					return (ret);
986				if (cmp == 0)
987					hcp->dpgno = PGNO(hcp->dpagep);
988			} else {
989				__ham_dsearch(dbc, val, &off, &cmp);
990				hcp->dup_off = off;
991			}
992		} else {
993			hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
994			if (((HKEYDATA *)hk)->type == H_OFFPAGE) {
995				memcpy(&tlen,
996				    HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
997				memcpy(&pgno,
998				    HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
999				if ((ret = __db_moff(dbp, val,
1000				    pgno, tlen, dbp->dup_compare, &cmp)) != 0)
1001					return (ret);
1002			} else {
1003				/*
1004				 * We do not zero tmp_val since the comparison
1005				 * routines may only look at data and size.
1006				 */
1007				tmp_val.data = HKEYDATA_DATA(hk);
1008				tmp_val.size = LEN_HDATA(hcp->pagep,
1009				    dbp->pgsize, hcp->bndx);
1010				cmp = dbp->dup_compare == NULL ?
1011				    __bam_defcmp(&tmp_val, val) :
1012				    dbp->dup_compare(&tmp_val, val);
1013			}
1014		}
1015
1016		if (cmp != 0)
1017			return (DB_NOTFOUND);
1018	}
1019
1020	/*
1021	 * Now, everything is initialized, grab a duplicate if
1022	 * necessary.
1023	 */
1024	if (F_ISSET(hcp, H_ISDUP))
1025		if (hcp->dpgno != PGNO_INVALID) {
1026			pp = hcp->dpagep;
1027			ndx = hcp->dndx;
1028		} else {
1029			/*
1030			 * Copy the DBT in case we are retrieving into user
1031			 * memory and we need the parameters for it.  If the
1032			 * user requested a partial, then we need to adjust
1033			 * the user's parameters to get the partial of the
1034			 * duplicate which is itself a partial.
1035			 */
1036			memcpy(&tmp_val, val, sizeof(*val));
1037			if (F_ISSET(&tmp_val, DB_DBT_PARTIAL)) {
1038				/*
1039				 * Take the user's length unless it would go
1040				 * beyond the end of the duplicate.
1041				 */
1042				if (tmp_val.doff + hcp->dup_off > hcp->dup_len)
1043					tmp_val.dlen = 0;
1044				else if (tmp_val.dlen + tmp_val.doff >
1045				    hcp->dup_len)
1046					tmp_val.dlen =
1047					    hcp->dup_len - tmp_val.doff;
1048
1049				/*
1050				 * Calculate the new offset.
1051				 */
1052				tmp_val.doff += hcp->dup_off;
1053			} else {
1054				F_SET(&tmp_val, DB_DBT_PARTIAL);
1055				tmp_val.dlen = hcp->dup_len;
1056				tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
1057			}
1058			myval = &tmp_val;
1059		}
1060
1061
1062	/*
1063	 * Finally, if we had a duplicate, pp, ndx, and myval should be
1064	 * set appropriately.
1065	 */
1066	if ((ret = __db_ret(dbp, pp, ndx, myval, &dbc->rdata.data,
1067	    &dbc->rdata.size)) != 0)
1068		return (ret);
1069
1070	/*
1071	 * In case we sent a temporary off to db_ret, set the real
1072	 * return values.
1073	 */
1074	val->data = myval->data;
1075	val->size = myval->size;
1076
1077	return (0);
1078}
1079
1080static int
1081__ham_overwrite(dbc, nval)
1082	DBC *dbc;
1083	DBT *nval;
1084{
1085	HASH_CURSOR *hcp;
1086	DBT *myval, tmp_val;
1087	u_int8_t *hk;
1088
1089	hcp = (HASH_CURSOR *)dbc->internal;
1090	if (F_ISSET(dbc->dbp, DB_AM_DUP))
1091		return (__ham_add_dup(dbc, nval, DB_KEYLAST));
1092	else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
1093		/* Put/overwrite */
1094		memcpy(&tmp_val, nval, sizeof(*nval));
1095		F_SET(&tmp_val, DB_DBT_PARTIAL);
1096		tmp_val.doff = 0;
1097		hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1098		if (HPAGE_PTYPE(hk) == H_OFFPAGE)
1099			memcpy(&tmp_val.dlen,
1100			    HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1101		else
1102			tmp_val.dlen = LEN_HDATA(hcp->pagep,
1103			    hcp->hdr->pagesize,hcp->bndx);
1104		myval = &tmp_val;
1105	} else /* Regular partial put */
1106		myval = nval;
1107
1108	return (__ham_replpair(dbc, myval, 0));
1109}
1110
1111/*
1112 * Given a key and a cursor, sets the cursor to the page/ndx on which
1113 * the key resides.  If the key is found, the cursor H_OK flag is set
1114 * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1115 * If the key is not found, the H_OK flag is not set.  If the sought
1116 * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1117 * are set indicating where an add might take place.  If it is 0,
1118 * non of the cursor pointer field are valid.
1119 */
1120static int
1121__ham_lookup(dbc, key, sought, mode)
1122	DBC *dbc;
1123	const DBT *key;
1124	u_int32_t sought;
1125	db_lockmode_t mode;
1126{
1127	DB *dbp;
1128	HASH_CURSOR *hcp;
1129	db_pgno_t pgno;
1130	u_int32_t tlen;
1131	int match, ret, t_ret;
1132	u_int8_t *hk;
1133
1134	dbp = dbc->dbp;
1135	hcp = (HASH_CURSOR *)dbc->internal;
1136	/*
1137	 * Set up cursor so that we're looking for space to add an item
1138	 * as we cycle through the pages looking for the key.
1139	 */
1140	if ((ret = __ham_item_reset(dbc)) != 0)
1141		return (ret);
1142	hcp->seek_size = sought;
1143
1144	hcp->bucket = __ham_call_hash(hcp, (u_int8_t *)key->data, key->size);
1145	while (1) {
1146		if ((ret = __ham_item_next(dbc, mode)) != 0)
1147			return (ret);
1148
1149		if (F_ISSET(hcp, H_NOMORE))
1150			break;
1151
1152		hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
1153		switch (HPAGE_PTYPE(hk)) {
1154		case H_OFFPAGE:
1155			memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1156			if (tlen == key->size) {
1157				memcpy(&pgno,
1158				    HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
1159				if ((ret = __db_moff(dbp,
1160				    key, pgno, tlen, NULL, &match)) != 0)
1161					return (ret);
1162				if (match == 0) {
1163					F_SET(hcp, H_OK);
1164					return (0);
1165				}
1166			}
1167			break;
1168		case H_KEYDATA:
1169			if (key->size == LEN_HKEY(hcp->pagep,
1170			    hcp->hdr->pagesize, hcp->bndx) &&
1171			    memcmp(key->data,
1172			    HKEYDATA_DATA(hk), key->size) == 0) {
1173				F_SET(hcp, H_OK);
1174				return (0);
1175			}
1176			break;
1177		case H_DUPLICATE:
1178		case H_OFFDUP:
1179			/*
1180			 * These are errors because keys are never
1181			 * duplicated, only data items are.
1182			 */
1183			return (__db_pgfmt(dbp, PGNO(hcp->pagep)));
1184		}
1185		hcp->stats.hash_collisions++;
1186	}
1187
1188	/*
1189	 * Item was not found, adjust cursor properly.
1190	 */
1191
1192	if (sought != 0)
1193		return (ret);
1194
1195	if ((t_ret = __ham_item_done(dbc, 0)) != 0 && ret == 0)
1196		ret = t_ret;
1197	return (ret);
1198}
1199
1200/*
1201 * Initialize a dbt using some possibly already allocated storage
1202 * for items.
1203 * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1204 */
1205int
1206__ham_init_dbt(dbt, size, bufp, sizep)
1207	DBT *dbt;
1208	u_int32_t size;
1209	void **bufp;
1210	u_int32_t *sizep;
1211{
1212	int ret;
1213
1214	memset(dbt, 0, sizeof(*dbt));
1215	if (*sizep < size) {
1216		if ((ret = __os_realloc(bufp, size)) != 0) {
1217			*sizep = 0;
1218			return (ret);
1219		}
1220		*sizep = size;
1221	}
1222	dbt->data = *bufp;
1223	dbt->size = size;
1224	return (0);
1225}
1226
1227/*
1228 * Adjust the cursor after an insert or delete.  The cursor passed is
1229 * the one that was operated upon; we just need to check any of the
1230 * others.
1231 *
1232 * len indicates the length of the item added/deleted
1233 * add indicates if the item indicated by the cursor has just been
1234 * added (add == 1) or deleted (add == 0).
1235 * dup indicates if the addition occurred into a duplicate set.
1236 *
1237 * PUBLIC: void __ham_c_update
1238 * PUBLIC:    __P((HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1239 */
1240void
1241__ham_c_update(hcp, chg_pgno, len, add, is_dup)
1242	HASH_CURSOR *hcp;
1243	db_pgno_t chg_pgno;
1244	u_int32_t len;
1245	int add, is_dup;
1246{
1247	DB *dbp;
1248	DBC *cp;
1249	HASH_CURSOR *lcp;
1250	int page_deleted;
1251
1252	/*
1253	 * Regular adds are always at the end of a given page, so we never
1254	 * have to adjust anyone's cursor after a regular add.
1255	 */
1256	if (!is_dup && add)
1257		return;
1258
1259	/*
1260	 * Determine if a page was deleted.    If this is a regular update
1261	 * (i.e., not is_dup) then the deleted page's number will be that in
1262	 * chg_pgno, and the pgno in the cursor will be different.  If this
1263	 * was an onpage-duplicate, then the same conditions apply.  If this
1264	 * was an off-page duplicate, then we need to verify if hcp->dpgno
1265	 * is the same (no delete) or different (delete) than chg_pgno.
1266	 */
1267	if (!is_dup || hcp->dpgno == PGNO_INVALID)
1268		page_deleted =
1269		    chg_pgno != PGNO_INVALID && chg_pgno != hcp->pgno;
1270	else
1271		page_deleted =
1272		    chg_pgno != PGNO_INVALID && chg_pgno != hcp->dpgno;
1273
1274	dbp = hcp->dbc->dbp;
1275	DB_THREAD_LOCK(dbp);
1276
1277	for (cp = TAILQ_FIRST(&dbp->active_queue); cp != NULL;
1278	    cp = TAILQ_NEXT(cp, links)) {
1279		if (cp->internal == hcp)
1280			continue;
1281
1282		lcp = (HASH_CURSOR *)cp->internal;
1283
1284		if (!is_dup && lcp->pgno != chg_pgno)
1285			continue;
1286
1287		if (is_dup) {
1288			if (F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
1289				continue;
1290			if (!F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
1291				continue;
1292		}
1293
1294		if (page_deleted) {
1295			if (is_dup) {
1296				lcp->dpgno = hcp->dpgno;
1297				lcp->dndx = hcp->dndx;
1298			} else {
1299				lcp->pgno = hcp->pgno;
1300				lcp->bndx = hcp->bndx;
1301				lcp->bucket = hcp->bucket;
1302			}
1303			F_CLR(lcp, H_ISDUP);
1304			continue;
1305		}
1306
1307		if (!is_dup && lcp->bndx > hcp->bndx)
1308			lcp->bndx--;
1309		else if (!is_dup && lcp->bndx == hcp->bndx)
1310			F_SET(lcp, H_DELETED);
1311		else if (is_dup && lcp->bndx == hcp->bndx) {
1312			/* Assign dpgno in case there was page conversion. */
1313			lcp->dpgno = hcp->dpgno;
1314			if (add && lcp->dndx >= hcp->dndx )
1315				lcp->dndx++;
1316			else if (!add && lcp->dndx > hcp->dndx)
1317				lcp->dndx--;
1318			else if (!add && lcp->dndx == hcp->dndx)
1319				F_SET(lcp, H_DELETED);
1320
1321			/* Now adjust on-page information. */
1322			if (lcp->dpgno == PGNO_INVALID)
1323				if (add) {
1324					lcp->dup_tlen += len;
1325					if (lcp->dndx > hcp->dndx)
1326						lcp->dup_off += len;
1327				} else {
1328					lcp->dup_tlen -= len;
1329					if (lcp->dndx > hcp->dndx)
1330						lcp->dup_off -= len;
1331				}
1332		}
1333	}
1334	DB_THREAD_UNLOCK(dbp);
1335}
1336
1337