1*7c478bd9Sstevel@tonic-gate /*-
2*7c478bd9Sstevel@tonic-gate  * See the file LICENSE for redistribution information.
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1996, 1997, 1998
5*7c478bd9Sstevel@tonic-gate  *	Sleepycat Software.  All rights reserved.
6*7c478bd9Sstevel@tonic-gate  */
7*7c478bd9Sstevel@tonic-gate /*
8*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1990, 1993, 1994
9*7c478bd9Sstevel@tonic-gate  *	Margo Seltzer.  All rights reserved.
10*7c478bd9Sstevel@tonic-gate  */
11*7c478bd9Sstevel@tonic-gate /*
12*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1990, 1993, 1994
13*7c478bd9Sstevel@tonic-gate  *	The Regents of the University of California.  All rights reserved.
14*7c478bd9Sstevel@tonic-gate  *
15*7c478bd9Sstevel@tonic-gate  * This code is derived from software contributed to Berkeley by
16*7c478bd9Sstevel@tonic-gate  * Margo Seltzer.
17*7c478bd9Sstevel@tonic-gate  *
18*7c478bd9Sstevel@tonic-gate  * Redistribution and use in source and binary forms, with or without
19*7c478bd9Sstevel@tonic-gate  * modification, are permitted provided that the following conditions
20*7c478bd9Sstevel@tonic-gate  * are met:
21*7c478bd9Sstevel@tonic-gate  * 1. Redistributions of source code must retain the above copyright
22*7c478bd9Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer.
23*7c478bd9Sstevel@tonic-gate  * 2. Redistributions in binary form must reproduce the above copyright
24*7c478bd9Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer in the
25*7c478bd9Sstevel@tonic-gate  *    documentation and/or other materials provided with the distribution.
26*7c478bd9Sstevel@tonic-gate  * 3. All advertising materials mentioning features or use of this software
27*7c478bd9Sstevel@tonic-gate  *    must display the following acknowledgement:
28*7c478bd9Sstevel@tonic-gate  *	This product includes software developed by the University of
29*7c478bd9Sstevel@tonic-gate  *	California, Berkeley and its contributors.
30*7c478bd9Sstevel@tonic-gate  * 4. Neither the name of the University nor the names of its contributors
31*7c478bd9Sstevel@tonic-gate  *    may be used to endorse or promote products derived from this software
32*7c478bd9Sstevel@tonic-gate  *    without specific prior written permission.
33*7c478bd9Sstevel@tonic-gate  *
34*7c478bd9Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35*7c478bd9Sstevel@tonic-gate  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36*7c478bd9Sstevel@tonic-gate  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37*7c478bd9Sstevel@tonic-gate  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38*7c478bd9Sstevel@tonic-gate  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39*7c478bd9Sstevel@tonic-gate  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40*7c478bd9Sstevel@tonic-gate  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41*7c478bd9Sstevel@tonic-gate  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42*7c478bd9Sstevel@tonic-gate  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43*7c478bd9Sstevel@tonic-gate  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44*7c478bd9Sstevel@tonic-gate  * SUCH DAMAGE.
45*7c478bd9Sstevel@tonic-gate  */
46*7c478bd9Sstevel@tonic-gate 
47*7c478bd9Sstevel@tonic-gate #include "config.h"
48*7c478bd9Sstevel@tonic-gate 
49*7c478bd9Sstevel@tonic-gate #ifndef lint
50*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)hash_page.c	10.55 (Sleepycat) 1/3/99";
51*7c478bd9Sstevel@tonic-gate #endif /* not lint */
52*7c478bd9Sstevel@tonic-gate 
53*7c478bd9Sstevel@tonic-gate /*
54*7c478bd9Sstevel@tonic-gate  * PACKAGE:  hashing
55*7c478bd9Sstevel@tonic-gate  *
56*7c478bd9Sstevel@tonic-gate  * DESCRIPTION:
57*7c478bd9Sstevel@tonic-gate  *      Page manipulation for hashing package.
58*7c478bd9Sstevel@tonic-gate  *
59*7c478bd9Sstevel@tonic-gate  * ROUTINES:
60*7c478bd9Sstevel@tonic-gate  *
61*7c478bd9Sstevel@tonic-gate  * External
62*7c478bd9Sstevel@tonic-gate  *      __get_page
63*7c478bd9Sstevel@tonic-gate  *      __add_ovflpage
64*7c478bd9Sstevel@tonic-gate  *      __overflow_page
65*7c478bd9Sstevel@tonic-gate  * Internal
66*7c478bd9Sstevel@tonic-gate  *      open_temp
67*7c478bd9Sstevel@tonic-gate  */
68*7c478bd9Sstevel@tonic-gate 
69*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
70*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
71*7c478bd9Sstevel@tonic-gate 
72*7c478bd9Sstevel@tonic-gate #include <errno.h>
73*7c478bd9Sstevel@tonic-gate #include <string.h>
74*7c478bd9Sstevel@tonic-gate #endif
75*7c478bd9Sstevel@tonic-gate 
76*7c478bd9Sstevel@tonic-gate #include "db_int.h"
77*7c478bd9Sstevel@tonic-gate #include "db_page.h"
78*7c478bd9Sstevel@tonic-gate #include "hash.h"
79*7c478bd9Sstevel@tonic-gate 
80*7c478bd9Sstevel@tonic-gate static int __ham_lock_bucket __P((DBC *, db_lockmode_t));
81*7c478bd9Sstevel@tonic-gate 
82*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_SLOW
83*7c478bd9Sstevel@tonic-gate static void  __account_page(DB *, db_pgno_t, int);
84*7c478bd9Sstevel@tonic-gate #endif
85*7c478bd9Sstevel@tonic-gate 
86*7c478bd9Sstevel@tonic-gate /*
87*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_item __P((DBC *, db_lockmode_t));
88*7c478bd9Sstevel@tonic-gate  */
89*7c478bd9Sstevel@tonic-gate int
__ham_item(dbc,mode)90*7c478bd9Sstevel@tonic-gate __ham_item(dbc, mode)
91*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
92*7c478bd9Sstevel@tonic-gate 	db_lockmode_t mode;
93*7c478bd9Sstevel@tonic-gate {
94*7c478bd9Sstevel@tonic-gate 	DB *dbp;
95*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
96*7c478bd9Sstevel@tonic-gate 	db_pgno_t next_pgno;
97*7c478bd9Sstevel@tonic-gate 	int ret;
98*7c478bd9Sstevel@tonic-gate 
99*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
100*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
101*7c478bd9Sstevel@tonic-gate 
102*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(hcp, H_DELETED))
103*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
104*7c478bd9Sstevel@tonic-gate 	F_CLR(hcp, H_OK | H_NOMORE);
105*7c478bd9Sstevel@tonic-gate 
106*7c478bd9Sstevel@tonic-gate 	/* Check if we need to get a page for this cursor. */
107*7c478bd9Sstevel@tonic-gate 	if ((ret = __ham_get_cpage(dbc, mode)) != 0)
108*7c478bd9Sstevel@tonic-gate 		return (ret);
109*7c478bd9Sstevel@tonic-gate 
110*7c478bd9Sstevel@tonic-gate 	/* Check if we are looking for space in which to insert an item. */
111*7c478bd9Sstevel@tonic-gate 	if (hcp->seek_size && hcp->seek_found_page == PGNO_INVALID
112*7c478bd9Sstevel@tonic-gate 	    && hcp->seek_size < P_FREESPACE(hcp->pagep))
113*7c478bd9Sstevel@tonic-gate 		hcp->seek_found_page = hcp->pgno;
114*7c478bd9Sstevel@tonic-gate 
115*7c478bd9Sstevel@tonic-gate 	/* Check if we need to go on to the next page. */
116*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno == PGNO_INVALID)
117*7c478bd9Sstevel@tonic-gate 		/*
118*7c478bd9Sstevel@tonic-gate 		 * ISDUP is set, and offset is at the beginning of the datum.
119*7c478bd9Sstevel@tonic-gate 		 * We need to grab the length of the datum, then set the datum
120*7c478bd9Sstevel@tonic-gate 		 * pointer to be the beginning of the datum.
121*7c478bd9Sstevel@tonic-gate 		 */
122*7c478bd9Sstevel@tonic-gate 		memcpy(&hcp->dup_len,
123*7c478bd9Sstevel@tonic-gate 		    HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx)) +
124*7c478bd9Sstevel@tonic-gate 		    hcp->dup_off, sizeof(db_indx_t));
125*7c478bd9Sstevel@tonic-gate 	else if (F_ISSET(hcp, H_ISDUP)) {
126*7c478bd9Sstevel@tonic-gate 		/* Make sure we're not about to run off the page. */
127*7c478bd9Sstevel@tonic-gate 		if (hcp->dpagep == NULL && (ret = __ham_get_page(dbp,
128*7c478bd9Sstevel@tonic-gate 		    hcp->dpgno, &hcp->dpagep)) != 0)
129*7c478bd9Sstevel@tonic-gate 			return (ret);
130*7c478bd9Sstevel@tonic-gate 
131*7c478bd9Sstevel@tonic-gate 		if (hcp->dndx >= NUM_ENT(hcp->dpagep)) {
132*7c478bd9Sstevel@tonic-gate 			if (NEXT_PGNO(hcp->dpagep) == PGNO_INVALID) {
133*7c478bd9Sstevel@tonic-gate 				if (F_ISSET(hcp, H_DUPONLY)) {
134*7c478bd9Sstevel@tonic-gate 					F_CLR(hcp, H_OK);
135*7c478bd9Sstevel@tonic-gate 					F_SET(hcp, H_NOMORE);
136*7c478bd9Sstevel@tonic-gate 					return (0);
137*7c478bd9Sstevel@tonic-gate 				}
138*7c478bd9Sstevel@tonic-gate 				if ((ret = __ham_put_page(dbp,
139*7c478bd9Sstevel@tonic-gate 				    hcp->dpagep, 0)) != 0)
140*7c478bd9Sstevel@tonic-gate 					return (ret);
141*7c478bd9Sstevel@tonic-gate 				F_CLR(hcp, H_ISDUP);
142*7c478bd9Sstevel@tonic-gate 				hcp->dpagep = NULL;
143*7c478bd9Sstevel@tonic-gate 				hcp->dpgno = PGNO_INVALID;
144*7c478bd9Sstevel@tonic-gate 				hcp->dndx = NDX_INVALID;
145*7c478bd9Sstevel@tonic-gate 				hcp->bndx++;
146*7c478bd9Sstevel@tonic-gate 			} else if ((ret = __ham_next_cpage(dbc,
147*7c478bd9Sstevel@tonic-gate 			    NEXT_PGNO(hcp->dpagep), 0, H_ISDUP)) != 0)
148*7c478bd9Sstevel@tonic-gate 				return (ret);
149*7c478bd9Sstevel@tonic-gate 		}
150*7c478bd9Sstevel@tonic-gate 	}
151*7c478bd9Sstevel@tonic-gate 
152*7c478bd9Sstevel@tonic-gate 	if (hcp->bndx >= (db_indx_t)H_NUMPAIRS(hcp->pagep)) {
153*7c478bd9Sstevel@tonic-gate 		/* Fetch next page. */
154*7c478bd9Sstevel@tonic-gate 		if (NEXT_PGNO(hcp->pagep) == PGNO_INVALID) {
155*7c478bd9Sstevel@tonic-gate 			F_SET(hcp, H_NOMORE);
156*7c478bd9Sstevel@tonic-gate 			if (hcp->dpagep != NULL &&
157*7c478bd9Sstevel@tonic-gate 			    (ret = __ham_put_page(dbp, hcp->dpagep, 0)) != 0)
158*7c478bd9Sstevel@tonic-gate 				return (ret);
159*7c478bd9Sstevel@tonic-gate 			hcp->dpgno = PGNO_INVALID;
160*7c478bd9Sstevel@tonic-gate 			return (DB_NOTFOUND);
161*7c478bd9Sstevel@tonic-gate 		}
162*7c478bd9Sstevel@tonic-gate 		next_pgno = NEXT_PGNO(hcp->pagep);
163*7c478bd9Sstevel@tonic-gate 		hcp->bndx = 0;
164*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_next_cpage(dbc, next_pgno, 0, 0)) != 0)
165*7c478bd9Sstevel@tonic-gate 			return (ret);
166*7c478bd9Sstevel@tonic-gate 	}
167*7c478bd9Sstevel@tonic-gate 
168*7c478bd9Sstevel@tonic-gate 	F_SET(hcp, H_OK);
169*7c478bd9Sstevel@tonic-gate 	return (0);
170*7c478bd9Sstevel@tonic-gate }
171*7c478bd9Sstevel@tonic-gate 
172*7c478bd9Sstevel@tonic-gate /*
173*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_item_reset __P((DBC *));
174*7c478bd9Sstevel@tonic-gate  */
175*7c478bd9Sstevel@tonic-gate int
__ham_item_reset(dbc)176*7c478bd9Sstevel@tonic-gate __ham_item_reset(dbc)
177*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
178*7c478bd9Sstevel@tonic-gate {
179*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
180*7c478bd9Sstevel@tonic-gate 	DB *dbp;
181*7c478bd9Sstevel@tonic-gate 	int ret;
182*7c478bd9Sstevel@tonic-gate 
183*7c478bd9Sstevel@tonic-gate 	ret = 0;
184*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
185*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
186*7c478bd9Sstevel@tonic-gate 	if (hcp->pagep != NULL)
187*7c478bd9Sstevel@tonic-gate 		ret = __ham_put_page(dbp, hcp->pagep, 0);
188*7c478bd9Sstevel@tonic-gate 	if (ret == 0 && hcp->dpagep != NULL)
189*7c478bd9Sstevel@tonic-gate 		ret = __ham_put_page(dbp, hcp->dpagep, 0);
190*7c478bd9Sstevel@tonic-gate 
191*7c478bd9Sstevel@tonic-gate 	__ham_item_init(hcp);
192*7c478bd9Sstevel@tonic-gate 	return (ret);
193*7c478bd9Sstevel@tonic-gate }
194*7c478bd9Sstevel@tonic-gate 
195*7c478bd9Sstevel@tonic-gate /*
196*7c478bd9Sstevel@tonic-gate  * PUBLIC: void __ham_item_init __P((HASH_CURSOR *));
197*7c478bd9Sstevel@tonic-gate  */
198*7c478bd9Sstevel@tonic-gate void
__ham_item_init(hcp)199*7c478bd9Sstevel@tonic-gate __ham_item_init(hcp)
200*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
201*7c478bd9Sstevel@tonic-gate {
202*7c478bd9Sstevel@tonic-gate 	/*
203*7c478bd9Sstevel@tonic-gate 	 * If this cursor still holds any locks, we must
204*7c478bd9Sstevel@tonic-gate 	 * release them if we are not running with transactions.
205*7c478bd9Sstevel@tonic-gate 	 */
206*7c478bd9Sstevel@tonic-gate 	if (hcp->lock && hcp->dbc->txn == NULL)
207*7c478bd9Sstevel@tonic-gate 	    (void)lock_put(hcp->dbc->dbp->dbenv->lk_info, hcp->lock);
208*7c478bd9Sstevel@tonic-gate 
209*7c478bd9Sstevel@tonic-gate 	/*
210*7c478bd9Sstevel@tonic-gate 	 * The following fields must *not* be initialized here
211*7c478bd9Sstevel@tonic-gate 	 * because they may have meaning across inits.
212*7c478bd9Sstevel@tonic-gate 	 * 	hlock, hdr, split_buf, stats
213*7c478bd9Sstevel@tonic-gate 	 */
214*7c478bd9Sstevel@tonic-gate 	hcp->bucket = BUCKET_INVALID;
215*7c478bd9Sstevel@tonic-gate 	hcp->lbucket = BUCKET_INVALID;
216*7c478bd9Sstevel@tonic-gate 	hcp->lock = 0;
217*7c478bd9Sstevel@tonic-gate 	hcp->pagep = NULL;
218*7c478bd9Sstevel@tonic-gate 	hcp->pgno = PGNO_INVALID;
219*7c478bd9Sstevel@tonic-gate 	hcp->bndx = NDX_INVALID;
220*7c478bd9Sstevel@tonic-gate 	hcp->dpagep = NULL;
221*7c478bd9Sstevel@tonic-gate 	hcp->dpgno = PGNO_INVALID;
222*7c478bd9Sstevel@tonic-gate 	hcp->dndx = NDX_INVALID;
223*7c478bd9Sstevel@tonic-gate 	hcp->dup_off = 0;
224*7c478bd9Sstevel@tonic-gate 	hcp->dup_len = 0;
225*7c478bd9Sstevel@tonic-gate 	hcp->dup_tlen = 0;
226*7c478bd9Sstevel@tonic-gate 	hcp->seek_size = 0;
227*7c478bd9Sstevel@tonic-gate 	hcp->seek_found_page = PGNO_INVALID;
228*7c478bd9Sstevel@tonic-gate 	hcp->flags = 0;
229*7c478bd9Sstevel@tonic-gate }
230*7c478bd9Sstevel@tonic-gate 
231*7c478bd9Sstevel@tonic-gate /*
232*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_item_done __P((DBC *, int));
233*7c478bd9Sstevel@tonic-gate  */
234*7c478bd9Sstevel@tonic-gate int
__ham_item_done(dbc,dirty)235*7c478bd9Sstevel@tonic-gate __ham_item_done(dbc, dirty)
236*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
237*7c478bd9Sstevel@tonic-gate 	int dirty;
238*7c478bd9Sstevel@tonic-gate {
239*7c478bd9Sstevel@tonic-gate 	DB *dbp;
240*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
241*7c478bd9Sstevel@tonic-gate 	int ret, t_ret;
242*7c478bd9Sstevel@tonic-gate 
243*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
244*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
245*7c478bd9Sstevel@tonic-gate 	t_ret = ret = 0;
246*7c478bd9Sstevel@tonic-gate 
247*7c478bd9Sstevel@tonic-gate 	if (hcp->pagep)
248*7c478bd9Sstevel@tonic-gate 		ret = __ham_put_page(dbp, hcp->pagep,
249*7c478bd9Sstevel@tonic-gate 		    dirty && hcp->dpagep == NULL);
250*7c478bd9Sstevel@tonic-gate 	hcp->pagep = NULL;
251*7c478bd9Sstevel@tonic-gate 
252*7c478bd9Sstevel@tonic-gate 	if (hcp->dpagep)
253*7c478bd9Sstevel@tonic-gate 		t_ret = __ham_put_page(dbp, hcp->dpagep, dirty);
254*7c478bd9Sstevel@tonic-gate 	hcp->dpagep = NULL;
255*7c478bd9Sstevel@tonic-gate 
256*7c478bd9Sstevel@tonic-gate 	if (ret == 0 && t_ret != 0)
257*7c478bd9Sstevel@tonic-gate 		ret = t_ret;
258*7c478bd9Sstevel@tonic-gate 
259*7c478bd9Sstevel@tonic-gate 	/*
260*7c478bd9Sstevel@tonic-gate 	 * We don't throw out the page number since we might want to
261*7c478bd9Sstevel@tonic-gate 	 * continue getting on this page.
262*7c478bd9Sstevel@tonic-gate 	 */
263*7c478bd9Sstevel@tonic-gate 	return (ret != 0 ? ret : t_ret);
264*7c478bd9Sstevel@tonic-gate }
265*7c478bd9Sstevel@tonic-gate 
266*7c478bd9Sstevel@tonic-gate /*
267*7c478bd9Sstevel@tonic-gate  * Returns the last item in a bucket.
268*7c478bd9Sstevel@tonic-gate  *
269*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_item_last __P((DBC *, db_lockmode_t));
270*7c478bd9Sstevel@tonic-gate  */
271*7c478bd9Sstevel@tonic-gate int
__ham_item_last(dbc,mode)272*7c478bd9Sstevel@tonic-gate __ham_item_last(dbc, mode)
273*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
274*7c478bd9Sstevel@tonic-gate 	db_lockmode_t mode;
275*7c478bd9Sstevel@tonic-gate {
276*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
277*7c478bd9Sstevel@tonic-gate 	int ret;
278*7c478bd9Sstevel@tonic-gate 
279*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
280*7c478bd9Sstevel@tonic-gate 	if ((ret = __ham_item_reset(dbc)) != 0)
281*7c478bd9Sstevel@tonic-gate 		return (ret);
282*7c478bd9Sstevel@tonic-gate 
283*7c478bd9Sstevel@tonic-gate 	hcp->bucket = hcp->hdr->max_bucket;
284*7c478bd9Sstevel@tonic-gate 	F_SET(hcp, H_OK);
285*7c478bd9Sstevel@tonic-gate 	return (__ham_item_prev(dbc, mode));
286*7c478bd9Sstevel@tonic-gate }
287*7c478bd9Sstevel@tonic-gate 
288*7c478bd9Sstevel@tonic-gate /*
289*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_item_first __P((DBC *, db_lockmode_t));
290*7c478bd9Sstevel@tonic-gate  */
291*7c478bd9Sstevel@tonic-gate int
__ham_item_first(dbc,mode)292*7c478bd9Sstevel@tonic-gate __ham_item_first(dbc, mode)
293*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
294*7c478bd9Sstevel@tonic-gate 	db_lockmode_t mode;
295*7c478bd9Sstevel@tonic-gate {
296*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
297*7c478bd9Sstevel@tonic-gate 	int ret;
298*7c478bd9Sstevel@tonic-gate 
299*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
300*7c478bd9Sstevel@tonic-gate 	if ((ret = __ham_item_reset(dbc)) != 0)
301*7c478bd9Sstevel@tonic-gate 		return (ret);
302*7c478bd9Sstevel@tonic-gate 	F_SET(hcp, H_OK);
303*7c478bd9Sstevel@tonic-gate 	hcp->bucket = 0;
304*7c478bd9Sstevel@tonic-gate 	return (__ham_item_next(dbc, mode));
305*7c478bd9Sstevel@tonic-gate }
306*7c478bd9Sstevel@tonic-gate 
307*7c478bd9Sstevel@tonic-gate /*
308*7c478bd9Sstevel@tonic-gate  * __ham_item_prev --
309*7c478bd9Sstevel@tonic-gate  *	Returns a pointer to key/data pair on a page.  In the case of
310*7c478bd9Sstevel@tonic-gate  *	bigkeys, just returns the page number and index of the bigkey
311*7c478bd9Sstevel@tonic-gate  *	pointer pair.
312*7c478bd9Sstevel@tonic-gate  *
313*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_item_prev __P((DBC *, db_lockmode_t));
314*7c478bd9Sstevel@tonic-gate  */
315*7c478bd9Sstevel@tonic-gate int
__ham_item_prev(dbc,mode)316*7c478bd9Sstevel@tonic-gate __ham_item_prev(dbc, mode)
317*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
318*7c478bd9Sstevel@tonic-gate 	db_lockmode_t mode;
319*7c478bd9Sstevel@tonic-gate {
320*7c478bd9Sstevel@tonic-gate 	DB *dbp;
321*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
322*7c478bd9Sstevel@tonic-gate 	db_pgno_t next_pgno;
323*7c478bd9Sstevel@tonic-gate 	int ret;
324*7c478bd9Sstevel@tonic-gate 
325*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
326*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
327*7c478bd9Sstevel@tonic-gate 	/*
328*7c478bd9Sstevel@tonic-gate 	 * There are N cases for backing up in a hash file.
329*7c478bd9Sstevel@tonic-gate 	 * Case 1: In the middle of a page, no duplicates, just dec the index.
330*7c478bd9Sstevel@tonic-gate 	 * Case 2: In the middle of a duplicate set, back up one.
331*7c478bd9Sstevel@tonic-gate 	 * Case 3: At the beginning of a duplicate set, get out of set and
332*7c478bd9Sstevel@tonic-gate 	 *	back up to next key.
333*7c478bd9Sstevel@tonic-gate 	 * Case 4: At the beginning of a page; go to previous page.
334*7c478bd9Sstevel@tonic-gate 	 * Case 5: At the beginning of a bucket; go to prev bucket.
335*7c478bd9Sstevel@tonic-gate 	 */
336*7c478bd9Sstevel@tonic-gate 	F_CLR(hcp, H_OK | H_NOMORE | H_DELETED);
337*7c478bd9Sstevel@tonic-gate 
338*7c478bd9Sstevel@tonic-gate 	/*
339*7c478bd9Sstevel@tonic-gate 	 * First handle the duplicates.  Either you'll get the key here
340*7c478bd9Sstevel@tonic-gate 	 * or you'll exit the duplicate set and drop into the code below
341*7c478bd9Sstevel@tonic-gate 	 * to handle backing up through keys.
342*7c478bd9Sstevel@tonic-gate 	 */
343*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(hcp, H_ISDUP)) {
344*7c478bd9Sstevel@tonic-gate 		if (hcp->dpgno == PGNO_INVALID) {
345*7c478bd9Sstevel@tonic-gate 			/* Duplicates are on-page. */
346*7c478bd9Sstevel@tonic-gate 			if (hcp->dup_off != 0)
347*7c478bd9Sstevel@tonic-gate 				if ((ret = __ham_get_cpage(dbc, mode)) != 0)
348*7c478bd9Sstevel@tonic-gate 					return (ret);
349*7c478bd9Sstevel@tonic-gate 				else {
350*7c478bd9Sstevel@tonic-gate 					HASH_CURSOR *h;
351*7c478bd9Sstevel@tonic-gate 					h = hcp;
352*7c478bd9Sstevel@tonic-gate 					memcpy(&h->dup_len, HKEYDATA_DATA(
353*7c478bd9Sstevel@tonic-gate 					    H_PAIRDATA(h->pagep, h->bndx))
354*7c478bd9Sstevel@tonic-gate 					    + h->dup_off - sizeof(db_indx_t),
355*7c478bd9Sstevel@tonic-gate 					    sizeof(db_indx_t));
356*7c478bd9Sstevel@tonic-gate 					hcp->dup_off -=
357*7c478bd9Sstevel@tonic-gate 					    DUP_SIZE(hcp->dup_len);
358*7c478bd9Sstevel@tonic-gate 					hcp->dndx--;
359*7c478bd9Sstevel@tonic-gate 					return (__ham_item(dbc, mode));
360*7c478bd9Sstevel@tonic-gate 				}
361*7c478bd9Sstevel@tonic-gate 		} else if (hcp->dndx > 0) {	/* Duplicates are off-page. */
362*7c478bd9Sstevel@tonic-gate 			hcp->dndx--;
363*7c478bd9Sstevel@tonic-gate 			return (__ham_item(dbc, mode));
364*7c478bd9Sstevel@tonic-gate 		} else if ((ret = __ham_get_cpage(dbc, mode)) != 0)
365*7c478bd9Sstevel@tonic-gate 			return (ret);
366*7c478bd9Sstevel@tonic-gate 		else if (PREV_PGNO(hcp->dpagep) == PGNO_INVALID) {
367*7c478bd9Sstevel@tonic-gate 			if (F_ISSET(hcp, H_DUPONLY)) {
368*7c478bd9Sstevel@tonic-gate 				F_CLR(hcp, H_OK);
369*7c478bd9Sstevel@tonic-gate 				F_SET(hcp, H_NOMORE);
370*7c478bd9Sstevel@tonic-gate 				return (0);
371*7c478bd9Sstevel@tonic-gate 			} else {
372*7c478bd9Sstevel@tonic-gate 				F_CLR(hcp, H_ISDUP); /* End of dups */
373*7c478bd9Sstevel@tonic-gate 				hcp->dpgno = PGNO_INVALID;
374*7c478bd9Sstevel@tonic-gate 				if (hcp->dpagep != NULL)
375*7c478bd9Sstevel@tonic-gate 					(void)__ham_put_page(dbp,
376*7c478bd9Sstevel@tonic-gate 					    hcp->dpagep, 0);
377*7c478bd9Sstevel@tonic-gate 				hcp->dpagep = NULL;
378*7c478bd9Sstevel@tonic-gate 			}
379*7c478bd9Sstevel@tonic-gate 		} else if ((ret = __ham_next_cpage(dbc,
380*7c478bd9Sstevel@tonic-gate 		    PREV_PGNO(hcp->dpagep), 0, H_ISDUP)) != 0)
381*7c478bd9Sstevel@tonic-gate 			return (ret);
382*7c478bd9Sstevel@tonic-gate 		else {
383*7c478bd9Sstevel@tonic-gate 			hcp->dndx = NUM_ENT(hcp->pagep) - 1;
384*7c478bd9Sstevel@tonic-gate 			return (__ham_item(dbc, mode));
385*7c478bd9Sstevel@tonic-gate 		}
386*7c478bd9Sstevel@tonic-gate 	}
387*7c478bd9Sstevel@tonic-gate 
388*7c478bd9Sstevel@tonic-gate 	/*
389*7c478bd9Sstevel@tonic-gate 	 * If we get here, we are not in a duplicate set, and just need
390*7c478bd9Sstevel@tonic-gate 	 * to back up the cursor.  There are still three cases:
391*7c478bd9Sstevel@tonic-gate 	 * midpage, beginning of page, beginning of bucket.
392*7c478bd9Sstevel@tonic-gate 	 */
393*7c478bd9Sstevel@tonic-gate 
394*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(hcp, H_DUPONLY)) {
395*7c478bd9Sstevel@tonic-gate 		F_CLR(hcp, H_OK);
396*7c478bd9Sstevel@tonic-gate 		F_SET(hcp, H_NOMORE);
397*7c478bd9Sstevel@tonic-gate 		return (0);
398*7c478bd9Sstevel@tonic-gate 	}
399*7c478bd9Sstevel@tonic-gate 
400*7c478bd9Sstevel@tonic-gate 	if (hcp->bndx == 0) { 		/* Beginning of page. */
401*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_get_cpage(dbc, mode)) != 0)
402*7c478bd9Sstevel@tonic-gate 			return (ret);
403*7c478bd9Sstevel@tonic-gate 		hcp->pgno = PREV_PGNO(hcp->pagep);
404*7c478bd9Sstevel@tonic-gate 		if (hcp->pgno == PGNO_INVALID) {
405*7c478bd9Sstevel@tonic-gate 			/* Beginning of bucket. */
406*7c478bd9Sstevel@tonic-gate 			F_SET(hcp, H_NOMORE);
407*7c478bd9Sstevel@tonic-gate 			return (DB_NOTFOUND);
408*7c478bd9Sstevel@tonic-gate 		} else if ((ret =
409*7c478bd9Sstevel@tonic-gate 		    __ham_next_cpage(dbc, hcp->pgno, 0, 0)) != 0)
410*7c478bd9Sstevel@tonic-gate 			return (ret);
411*7c478bd9Sstevel@tonic-gate 		else
412*7c478bd9Sstevel@tonic-gate 			hcp->bndx = H_NUMPAIRS(hcp->pagep);
413*7c478bd9Sstevel@tonic-gate 	}
414*7c478bd9Sstevel@tonic-gate 
415*7c478bd9Sstevel@tonic-gate 	/*
416*7c478bd9Sstevel@tonic-gate 	 * Either we've got the cursor set up to be decremented, or we
417*7c478bd9Sstevel@tonic-gate 	 * have to find the end of a bucket.
418*7c478bd9Sstevel@tonic-gate 	 */
419*7c478bd9Sstevel@tonic-gate 	if (hcp->bndx == NDX_INVALID) {
420*7c478bd9Sstevel@tonic-gate 		if (hcp->pagep == NULL)
421*7c478bd9Sstevel@tonic-gate 			next_pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
422*7c478bd9Sstevel@tonic-gate 		else
423*7c478bd9Sstevel@tonic-gate 			goto got_page;
424*7c478bd9Sstevel@tonic-gate 
425*7c478bd9Sstevel@tonic-gate 		do {
426*7c478bd9Sstevel@tonic-gate 			if ((ret = __ham_next_cpage(dbc, next_pgno, 0, 0)) != 0)
427*7c478bd9Sstevel@tonic-gate 				return (ret);
428*7c478bd9Sstevel@tonic-gate got_page:		next_pgno = NEXT_PGNO(hcp->pagep);
429*7c478bd9Sstevel@tonic-gate 			hcp->bndx = H_NUMPAIRS(hcp->pagep);
430*7c478bd9Sstevel@tonic-gate 		} while (next_pgno != PGNO_INVALID);
431*7c478bd9Sstevel@tonic-gate 
432*7c478bd9Sstevel@tonic-gate 		if (hcp->bndx == 0) {
433*7c478bd9Sstevel@tonic-gate 			/* Bucket was empty. */
434*7c478bd9Sstevel@tonic-gate 			F_SET(hcp, H_NOMORE);
435*7c478bd9Sstevel@tonic-gate 			return (DB_NOTFOUND);
436*7c478bd9Sstevel@tonic-gate 		}
437*7c478bd9Sstevel@tonic-gate 	}
438*7c478bd9Sstevel@tonic-gate 
439*7c478bd9Sstevel@tonic-gate 	hcp->bndx--;
440*7c478bd9Sstevel@tonic-gate 
441*7c478bd9Sstevel@tonic-gate 	return (__ham_item(dbc, mode));
442*7c478bd9Sstevel@tonic-gate }
443*7c478bd9Sstevel@tonic-gate 
444*7c478bd9Sstevel@tonic-gate /*
445*7c478bd9Sstevel@tonic-gate  * Sets the cursor to the next key/data pair on a page.
446*7c478bd9Sstevel@tonic-gate  *
447*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_item_next __P((DBC *, db_lockmode_t));
448*7c478bd9Sstevel@tonic-gate  */
449*7c478bd9Sstevel@tonic-gate int
__ham_item_next(dbc,mode)450*7c478bd9Sstevel@tonic-gate __ham_item_next(dbc, mode)
451*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
452*7c478bd9Sstevel@tonic-gate 	db_lockmode_t mode;
453*7c478bd9Sstevel@tonic-gate {
454*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
455*7c478bd9Sstevel@tonic-gate 
456*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
457*7c478bd9Sstevel@tonic-gate 	/*
458*7c478bd9Sstevel@tonic-gate 	 * Deleted on-page duplicates are a weird case. If we delete the last
459*7c478bd9Sstevel@tonic-gate 	 * one, then our cursor is at the very end of a duplicate set and
460*7c478bd9Sstevel@tonic-gate 	 * we actually need to go on to the next key.
461*7c478bd9Sstevel@tonic-gate 	 */
462*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(hcp, H_DELETED)) {
463*7c478bd9Sstevel@tonic-gate 		if (hcp->bndx != NDX_INVALID &&
464*7c478bd9Sstevel@tonic-gate 		    F_ISSET(hcp, H_ISDUP) &&
465*7c478bd9Sstevel@tonic-gate 		    hcp->dpgno == PGNO_INVALID &&
466*7c478bd9Sstevel@tonic-gate 		    hcp->dup_tlen == hcp->dup_off) {
467*7c478bd9Sstevel@tonic-gate 			if (F_ISSET(hcp, H_DUPONLY)) {
468*7c478bd9Sstevel@tonic-gate 				F_CLR(hcp, H_OK);
469*7c478bd9Sstevel@tonic-gate 				F_SET(hcp, H_NOMORE);
470*7c478bd9Sstevel@tonic-gate 				return (0);
471*7c478bd9Sstevel@tonic-gate 			} else {
472*7c478bd9Sstevel@tonic-gate 				F_CLR(hcp, H_ISDUP);
473*7c478bd9Sstevel@tonic-gate 				hcp->dpgno = PGNO_INVALID;
474*7c478bd9Sstevel@tonic-gate 				hcp->bndx++;
475*7c478bd9Sstevel@tonic-gate 			}
476*7c478bd9Sstevel@tonic-gate 		} else if (!F_ISSET(hcp, H_ISDUP) &&
477*7c478bd9Sstevel@tonic-gate 		    F_ISSET(hcp, H_DUPONLY)) {
478*7c478bd9Sstevel@tonic-gate 			F_CLR(hcp, H_OK);
479*7c478bd9Sstevel@tonic-gate 			F_SET(hcp, H_NOMORE);
480*7c478bd9Sstevel@tonic-gate 			return (0);
481*7c478bd9Sstevel@tonic-gate 		}
482*7c478bd9Sstevel@tonic-gate 		F_CLR(hcp, H_DELETED);
483*7c478bd9Sstevel@tonic-gate 	} else if (hcp->bndx == NDX_INVALID) {
484*7c478bd9Sstevel@tonic-gate 		hcp->bndx = 0;
485*7c478bd9Sstevel@tonic-gate 		hcp->dpgno = PGNO_INVALID;
486*7c478bd9Sstevel@tonic-gate 		F_CLR(hcp, H_ISDUP);
487*7c478bd9Sstevel@tonic-gate 	} else if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID)
488*7c478bd9Sstevel@tonic-gate 		hcp->dndx++;
489*7c478bd9Sstevel@tonic-gate 	else if (F_ISSET(hcp, H_ISDUP)) {
490*7c478bd9Sstevel@tonic-gate 		if (hcp->dup_off + DUP_SIZE(hcp->dup_len) >=
491*7c478bd9Sstevel@tonic-gate 		    hcp->dup_tlen && F_ISSET(hcp, H_DUPONLY)) {
492*7c478bd9Sstevel@tonic-gate 			F_CLR(hcp, H_OK);
493*7c478bd9Sstevel@tonic-gate 			F_SET(hcp, H_NOMORE);
494*7c478bd9Sstevel@tonic-gate 			return (0);
495*7c478bd9Sstevel@tonic-gate 		}
496*7c478bd9Sstevel@tonic-gate 		hcp->dndx++;
497*7c478bd9Sstevel@tonic-gate 		hcp->dup_off += DUP_SIZE(hcp->dup_len);
498*7c478bd9Sstevel@tonic-gate 		if (hcp->dup_off >= hcp->dup_tlen) {
499*7c478bd9Sstevel@tonic-gate 			F_CLR(hcp, H_ISDUP);
500*7c478bd9Sstevel@tonic-gate 			hcp->dpgno = PGNO_INVALID;
501*7c478bd9Sstevel@tonic-gate 			hcp->bndx++;
502*7c478bd9Sstevel@tonic-gate 		}
503*7c478bd9Sstevel@tonic-gate 	} else if (F_ISSET(hcp, H_DUPONLY)) {
504*7c478bd9Sstevel@tonic-gate 		F_CLR(hcp, H_OK);
505*7c478bd9Sstevel@tonic-gate 		F_SET(hcp, H_NOMORE);
506*7c478bd9Sstevel@tonic-gate 		return (0);
507*7c478bd9Sstevel@tonic-gate 	} else
508*7c478bd9Sstevel@tonic-gate 		hcp->bndx++;
509*7c478bd9Sstevel@tonic-gate 
510*7c478bd9Sstevel@tonic-gate 	return (__ham_item(dbc, mode));
511*7c478bd9Sstevel@tonic-gate }
512*7c478bd9Sstevel@tonic-gate 
513*7c478bd9Sstevel@tonic-gate /*
514*7c478bd9Sstevel@tonic-gate  * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int));
515*7c478bd9Sstevel@tonic-gate  *
516*7c478bd9Sstevel@tonic-gate  * This is a little bit sleazy in that we're overloading the meaning
517*7c478bd9Sstevel@tonic-gate  * of the H_OFFPAGE type here.  When we recover deletes, we have the
518*7c478bd9Sstevel@tonic-gate  * entire entry instead of having only the DBT, so we'll pass type
519*7c478bd9Sstevel@tonic-gate  * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
520*7c478bd9Sstevel@tonic-gate  * an H_KEYDATA around it.
521*7c478bd9Sstevel@tonic-gate  */
522*7c478bd9Sstevel@tonic-gate void
__ham_putitem(p,dbt,type)523*7c478bd9Sstevel@tonic-gate __ham_putitem(p, dbt, type)
524*7c478bd9Sstevel@tonic-gate 	PAGE *p;
525*7c478bd9Sstevel@tonic-gate 	const DBT *dbt;
526*7c478bd9Sstevel@tonic-gate 	int type;
527*7c478bd9Sstevel@tonic-gate {
528*7c478bd9Sstevel@tonic-gate 	u_int16_t n, off;
529*7c478bd9Sstevel@tonic-gate 
530*7c478bd9Sstevel@tonic-gate 	n = NUM_ENT(p);
531*7c478bd9Sstevel@tonic-gate 
532*7c478bd9Sstevel@tonic-gate 	/* Put the item element on the page. */
533*7c478bd9Sstevel@tonic-gate 	if (type == H_OFFPAGE) {
534*7c478bd9Sstevel@tonic-gate 		off = HOFFSET(p) - dbt->size;
535*7c478bd9Sstevel@tonic-gate 		HOFFSET(p) = p->inp[n] = off;
536*7c478bd9Sstevel@tonic-gate 		memcpy(P_ENTRY(p, n), dbt->data, dbt->size);
537*7c478bd9Sstevel@tonic-gate 	} else {
538*7c478bd9Sstevel@tonic-gate 		off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size);
539*7c478bd9Sstevel@tonic-gate 		HOFFSET(p) = p->inp[n] = off;
540*7c478bd9Sstevel@tonic-gate 		PUT_HKEYDATA(P_ENTRY(p, n), dbt->data, dbt->size, type);
541*7c478bd9Sstevel@tonic-gate 	}
542*7c478bd9Sstevel@tonic-gate 
543*7c478bd9Sstevel@tonic-gate 	/* Adjust page info. */
544*7c478bd9Sstevel@tonic-gate 	NUM_ENT(p) += 1;
545*7c478bd9Sstevel@tonic-gate }
546*7c478bd9Sstevel@tonic-gate 
547*7c478bd9Sstevel@tonic-gate /*
548*7c478bd9Sstevel@tonic-gate  * PUBLIC: void __ham_reputpair
549*7c478bd9Sstevel@tonic-gate  * PUBLIC:    __P((PAGE *p, u_int32_t, u_int32_t, const DBT *, const DBT *));
550*7c478bd9Sstevel@tonic-gate  *
551*7c478bd9Sstevel@tonic-gate  * This is a special case to restore a key/data pair to its original
552*7c478bd9Sstevel@tonic-gate  * location during recovery.  We are guaranteed that the pair fits
553*7c478bd9Sstevel@tonic-gate  * on the page and is not the last pair on the page (because if it's
554*7c478bd9Sstevel@tonic-gate  * the last pair, the normal insert works).
555*7c478bd9Sstevel@tonic-gate  */
556*7c478bd9Sstevel@tonic-gate void
__ham_reputpair(p,psize,ndx,key,data)557*7c478bd9Sstevel@tonic-gate __ham_reputpair(p, psize, ndx, key, data)
558*7c478bd9Sstevel@tonic-gate 	PAGE *p;
559*7c478bd9Sstevel@tonic-gate 	u_int32_t psize, ndx;
560*7c478bd9Sstevel@tonic-gate 	const DBT *key, *data;
561*7c478bd9Sstevel@tonic-gate {
562*7c478bd9Sstevel@tonic-gate 	db_indx_t i, movebytes, newbytes;
563*7c478bd9Sstevel@tonic-gate 	u_int8_t *from;
564*7c478bd9Sstevel@tonic-gate 
565*7c478bd9Sstevel@tonic-gate 	/* First shuffle the existing items up on the page.  */
566*7c478bd9Sstevel@tonic-gate 	movebytes =
567*7c478bd9Sstevel@tonic-gate 	    (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 1)]) - HOFFSET(p);
568*7c478bd9Sstevel@tonic-gate 	newbytes = key->size + data->size;
569*7c478bd9Sstevel@tonic-gate 	from = (u_int8_t *)p + HOFFSET(p);
570*7c478bd9Sstevel@tonic-gate 	memmove(from - newbytes, from, movebytes);
571*7c478bd9Sstevel@tonic-gate 
572*7c478bd9Sstevel@tonic-gate 	/*
573*7c478bd9Sstevel@tonic-gate 	 * Adjust the indices and move them up 2 spaces. Note that we
574*7c478bd9Sstevel@tonic-gate 	 * have to check the exit condition inside the loop just in case
575*7c478bd9Sstevel@tonic-gate 	 * we are dealing with index 0 (db_indx_t's are unsigned).
576*7c478bd9Sstevel@tonic-gate 	 */
577*7c478bd9Sstevel@tonic-gate 	for (i = NUM_ENT(p) - 1; ; i-- ) {
578*7c478bd9Sstevel@tonic-gate 		p->inp[i + 2] = p->inp[i] - newbytes;
579*7c478bd9Sstevel@tonic-gate 		if (i == H_KEYINDEX(ndx))
580*7c478bd9Sstevel@tonic-gate 			break;
581*7c478bd9Sstevel@tonic-gate 	}
582*7c478bd9Sstevel@tonic-gate 
583*7c478bd9Sstevel@tonic-gate 	/* Put the key and data on the page. */
584*7c478bd9Sstevel@tonic-gate 	p->inp[H_KEYINDEX(ndx)] =
585*7c478bd9Sstevel@tonic-gate 	    (ndx == 0 ? psize : p->inp[H_DATAINDEX(ndx - 1)]) - key->size;
586*7c478bd9Sstevel@tonic-gate 	p->inp[H_DATAINDEX(ndx)] = p->inp[H_KEYINDEX(ndx)] - data->size;
587*7c478bd9Sstevel@tonic-gate 	memcpy(P_ENTRY(p, H_KEYINDEX(ndx)), key->data, key->size);
588*7c478bd9Sstevel@tonic-gate 	memcpy(P_ENTRY(p, H_DATAINDEX(ndx)), data->data, data->size);
589*7c478bd9Sstevel@tonic-gate 
590*7c478bd9Sstevel@tonic-gate 	/* Adjust page info. */
591*7c478bd9Sstevel@tonic-gate 	HOFFSET(p) -= newbytes;
592*7c478bd9Sstevel@tonic-gate 	NUM_ENT(p) += 2;
593*7c478bd9Sstevel@tonic-gate }
594*7c478bd9Sstevel@tonic-gate 
595*7c478bd9Sstevel@tonic-gate 
596*7c478bd9Sstevel@tonic-gate /*
597*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_del_pair __P((DBC *, int));
598*7c478bd9Sstevel@tonic-gate  */
599*7c478bd9Sstevel@tonic-gate int
__ham_del_pair(dbc,reclaim_page)600*7c478bd9Sstevel@tonic-gate __ham_del_pair(dbc, reclaim_page)
601*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
602*7c478bd9Sstevel@tonic-gate 	int reclaim_page;
603*7c478bd9Sstevel@tonic-gate {
604*7c478bd9Sstevel@tonic-gate 	DB *dbp;
605*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
606*7c478bd9Sstevel@tonic-gate 	DBT data_dbt, key_dbt;
607*7c478bd9Sstevel@tonic-gate 	DB_ENV *dbenv;
608*7c478bd9Sstevel@tonic-gate 	DB_LSN new_lsn, *n_lsn, tmp_lsn;
609*7c478bd9Sstevel@tonic-gate 	PAGE *p;
610*7c478bd9Sstevel@tonic-gate 	db_indx_t ndx;
611*7c478bd9Sstevel@tonic-gate 	db_pgno_t chg_pgno, pgno;
612*7c478bd9Sstevel@tonic-gate 	int ret, tret;
613*7c478bd9Sstevel@tonic-gate 
614*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
615*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
616*7c478bd9Sstevel@tonic-gate 
617*7c478bd9Sstevel@tonic-gate 	dbenv = dbp->dbenv;
618*7c478bd9Sstevel@tonic-gate 	ndx = hcp->bndx;
619*7c478bd9Sstevel@tonic-gate 	if (hcp->pagep == NULL &&
620*7c478bd9Sstevel@tonic-gate 	    (ret = __ham_get_page(dbp, hcp->pgno, &hcp->pagep)) != 0)
621*7c478bd9Sstevel@tonic-gate 		return (ret);
622*7c478bd9Sstevel@tonic-gate 
623*7c478bd9Sstevel@tonic-gate 	p = hcp->pagep;
624*7c478bd9Sstevel@tonic-gate 
625*7c478bd9Sstevel@tonic-gate 	/*
626*7c478bd9Sstevel@tonic-gate 	 * We optimize for the normal case which is when neither the key nor
627*7c478bd9Sstevel@tonic-gate 	 * the data are large.  In this case, we write a single log record
628*7c478bd9Sstevel@tonic-gate 	 * and do the delete.  If either is large, we'll call __big_delete
629*7c478bd9Sstevel@tonic-gate 	 * to remove the big item and then update the page to remove the
630*7c478bd9Sstevel@tonic-gate 	 * entry referring to the big item.
631*7c478bd9Sstevel@tonic-gate 	 */
632*7c478bd9Sstevel@tonic-gate 	ret = 0;
633*7c478bd9Sstevel@tonic-gate 	if (HPAGE_PTYPE(H_PAIRKEY(p, ndx)) == H_OFFPAGE) {
634*7c478bd9Sstevel@tonic-gate 		memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(p, H_KEYINDEX(ndx))),
635*7c478bd9Sstevel@tonic-gate 		    sizeof(db_pgno_t));
636*7c478bd9Sstevel@tonic-gate 		ret = __db_doff(dbc, pgno, __ham_del_page);
637*7c478bd9Sstevel@tonic-gate 	}
638*7c478bd9Sstevel@tonic-gate 
639*7c478bd9Sstevel@tonic-gate 	if (ret == 0)
640*7c478bd9Sstevel@tonic-gate 		switch (HPAGE_PTYPE(H_PAIRDATA(p, ndx))) {
641*7c478bd9Sstevel@tonic-gate 		case H_OFFPAGE:
642*7c478bd9Sstevel@tonic-gate 			memcpy(&pgno,
643*7c478bd9Sstevel@tonic-gate 			    HOFFPAGE_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
644*7c478bd9Sstevel@tonic-gate 			    sizeof(db_pgno_t));
645*7c478bd9Sstevel@tonic-gate 			ret = __db_doff(dbc, pgno, __ham_del_page);
646*7c478bd9Sstevel@tonic-gate 			break;
647*7c478bd9Sstevel@tonic-gate 		case H_OFFDUP:
648*7c478bd9Sstevel@tonic-gate 			memcpy(&pgno,
649*7c478bd9Sstevel@tonic-gate 			    HOFFDUP_PGNO(P_ENTRY(p, H_DATAINDEX(ndx))),
650*7c478bd9Sstevel@tonic-gate 			    sizeof(db_pgno_t));
651*7c478bd9Sstevel@tonic-gate 			ret = __db_ddup(dbc, pgno, __ham_del_page);
652*7c478bd9Sstevel@tonic-gate 			F_CLR(hcp, H_ISDUP);
653*7c478bd9Sstevel@tonic-gate 			break;
654*7c478bd9Sstevel@tonic-gate 		case H_DUPLICATE:
655*7c478bd9Sstevel@tonic-gate 			/*
656*7c478bd9Sstevel@tonic-gate 			 * If we delete a pair that is/was a duplicate, then
657*7c478bd9Sstevel@tonic-gate 			 * we had better clear the flag so that we update the
658*7c478bd9Sstevel@tonic-gate 			 * cursor appropriately.
659*7c478bd9Sstevel@tonic-gate 			 */
660*7c478bd9Sstevel@tonic-gate 			F_CLR(hcp, H_ISDUP);
661*7c478bd9Sstevel@tonic-gate 			break;
662*7c478bd9Sstevel@tonic-gate 		}
663*7c478bd9Sstevel@tonic-gate 
664*7c478bd9Sstevel@tonic-gate 	if (ret)
665*7c478bd9Sstevel@tonic-gate 		return (ret);
666*7c478bd9Sstevel@tonic-gate 
667*7c478bd9Sstevel@tonic-gate 	/* Now log the delete off this page. */
668*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc)) {
669*7c478bd9Sstevel@tonic-gate 		key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx));
670*7c478bd9Sstevel@tonic-gate 		key_dbt.size =
671*7c478bd9Sstevel@tonic-gate 		    LEN_HITEM(p, hcp->hdr->pagesize, H_KEYINDEX(ndx));
672*7c478bd9Sstevel@tonic-gate 		data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx));
673*7c478bd9Sstevel@tonic-gate 		data_dbt.size =
674*7c478bd9Sstevel@tonic-gate 		    LEN_HITEM(p, hcp->hdr->pagesize, H_DATAINDEX(ndx));
675*7c478bd9Sstevel@tonic-gate 
676*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_insdel_log(dbenv->lg_info,
677*7c478bd9Sstevel@tonic-gate 		    dbc->txn, &new_lsn, 0, DELPAIR,
678*7c478bd9Sstevel@tonic-gate 		    dbp->log_fileid, PGNO(p), (u_int32_t)ndx,
679*7c478bd9Sstevel@tonic-gate 		    &LSN(p), &key_dbt, &data_dbt)) != 0)
680*7c478bd9Sstevel@tonic-gate 			return (ret);
681*7c478bd9Sstevel@tonic-gate 
682*7c478bd9Sstevel@tonic-gate 		/* Move lsn onto page. */
683*7c478bd9Sstevel@tonic-gate 		LSN(p) = new_lsn;
684*7c478bd9Sstevel@tonic-gate 	}
685*7c478bd9Sstevel@tonic-gate 
686*7c478bd9Sstevel@tonic-gate 	__ham_dpair(dbp, p, ndx);
687*7c478bd9Sstevel@tonic-gate 
688*7c478bd9Sstevel@tonic-gate 	/*
689*7c478bd9Sstevel@tonic-gate 	 * If we are locking, we will not maintain this, because it is
690*7c478bd9Sstevel@tonic-gate 	 * a hot spot.
691*7c478bd9Sstevel@tonic-gate 	 * XXX perhaps we can retain incremental numbers and apply them
692*7c478bd9Sstevel@tonic-gate 	 * later.
693*7c478bd9Sstevel@tonic-gate 	 */
694*7c478bd9Sstevel@tonic-gate 	if (!F_ISSET(dbp, DB_AM_LOCKING))
695*7c478bd9Sstevel@tonic-gate 		--hcp->hdr->nelem;
696*7c478bd9Sstevel@tonic-gate 
697*7c478bd9Sstevel@tonic-gate 	/*
698*7c478bd9Sstevel@tonic-gate 	 * If we need to reclaim the page, then check if the page is empty.
699*7c478bd9Sstevel@tonic-gate 	 * There are two cases.  If it's empty and it's not the first page
700*7c478bd9Sstevel@tonic-gate 	 * in the bucket (i.e., the bucket page) then we can simply remove
701*7c478bd9Sstevel@tonic-gate 	 * it. If it is the first chain in the bucket, then we need to copy
702*7c478bd9Sstevel@tonic-gate 	 * the second page into it and remove the second page.
703*7c478bd9Sstevel@tonic-gate 	 */
704*7c478bd9Sstevel@tonic-gate 	if (reclaim_page && NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID &&
705*7c478bd9Sstevel@tonic-gate 	    NEXT_PGNO(p) != PGNO_INVALID) {
706*7c478bd9Sstevel@tonic-gate 		PAGE *n_pagep, *nn_pagep;
707*7c478bd9Sstevel@tonic-gate 		db_pgno_t tmp_pgno;
708*7c478bd9Sstevel@tonic-gate 
709*7c478bd9Sstevel@tonic-gate 		/*
710*7c478bd9Sstevel@tonic-gate 		 * First page in chain is empty and we know that there
711*7c478bd9Sstevel@tonic-gate 		 * are more pages in the chain.
712*7c478bd9Sstevel@tonic-gate 		 */
713*7c478bd9Sstevel@tonic-gate 		if ((ret =
714*7c478bd9Sstevel@tonic-gate 		    __ham_get_page(dbp, NEXT_PGNO(p), &n_pagep)) != 0)
715*7c478bd9Sstevel@tonic-gate 			return (ret);
716*7c478bd9Sstevel@tonic-gate 
717*7c478bd9Sstevel@tonic-gate 		if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
718*7c478bd9Sstevel@tonic-gate 			if ((ret =
719*7c478bd9Sstevel@tonic-gate 			    __ham_get_page(dbp, NEXT_PGNO(n_pagep),
720*7c478bd9Sstevel@tonic-gate 			    &nn_pagep)) != 0) {
721*7c478bd9Sstevel@tonic-gate 				(void) __ham_put_page(dbp, n_pagep, 0);
722*7c478bd9Sstevel@tonic-gate 				return (ret);
723*7c478bd9Sstevel@tonic-gate 			}
724*7c478bd9Sstevel@tonic-gate 		}
725*7c478bd9Sstevel@tonic-gate 
726*7c478bd9Sstevel@tonic-gate 		if (DB_LOGGING(dbc)) {
727*7c478bd9Sstevel@tonic-gate 			key_dbt.data = n_pagep;
728*7c478bd9Sstevel@tonic-gate 			key_dbt.size = hcp->hdr->pagesize;
729*7c478bd9Sstevel@tonic-gate 			if ((ret = __ham_copypage_log(dbenv->lg_info,
730*7c478bd9Sstevel@tonic-gate 			    dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(p),
731*7c478bd9Sstevel@tonic-gate 			    &LSN(p), PGNO(n_pagep), &LSN(n_pagep),
732*7c478bd9Sstevel@tonic-gate 			    NEXT_PGNO(n_pagep),
733*7c478bd9Sstevel@tonic-gate 			    NEXT_PGNO(n_pagep) == PGNO_INVALID ? NULL :
734*7c478bd9Sstevel@tonic-gate 			    &LSN(nn_pagep), &key_dbt)) != 0)
735*7c478bd9Sstevel@tonic-gate 				return (ret);
736*7c478bd9Sstevel@tonic-gate 
737*7c478bd9Sstevel@tonic-gate 			/* Move lsn onto page. */
738*7c478bd9Sstevel@tonic-gate 			LSN(p) = new_lsn;	/* Structure assignment. */
739*7c478bd9Sstevel@tonic-gate 			LSN(n_pagep) = new_lsn;
740*7c478bd9Sstevel@tonic-gate 			if (NEXT_PGNO(n_pagep) != PGNO_INVALID)
741*7c478bd9Sstevel@tonic-gate 				LSN(nn_pagep) = new_lsn;
742*7c478bd9Sstevel@tonic-gate 		}
743*7c478bd9Sstevel@tonic-gate 		if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
744*7c478bd9Sstevel@tonic-gate 			PREV_PGNO(nn_pagep) = PGNO(p);
745*7c478bd9Sstevel@tonic-gate 			(void)__ham_put_page(dbp, nn_pagep, 1);
746*7c478bd9Sstevel@tonic-gate 		}
747*7c478bd9Sstevel@tonic-gate 
748*7c478bd9Sstevel@tonic-gate 		tmp_pgno = PGNO(p);
749*7c478bd9Sstevel@tonic-gate 		tmp_lsn = LSN(p);
750*7c478bd9Sstevel@tonic-gate 		memcpy(p, n_pagep, hcp->hdr->pagesize);
751*7c478bd9Sstevel@tonic-gate 		PGNO(p) = tmp_pgno;
752*7c478bd9Sstevel@tonic-gate 		LSN(p) = tmp_lsn;
753*7c478bd9Sstevel@tonic-gate 		PREV_PGNO(p) = PGNO_INVALID;
754*7c478bd9Sstevel@tonic-gate 
755*7c478bd9Sstevel@tonic-gate 		/*
756*7c478bd9Sstevel@tonic-gate 		 * Cursor is advanced to the beginning of the next page.
757*7c478bd9Sstevel@tonic-gate 		 */
758*7c478bd9Sstevel@tonic-gate 		hcp->bndx = 0;
759*7c478bd9Sstevel@tonic-gate 		hcp->pgno = PGNO(p);
760*7c478bd9Sstevel@tonic-gate 		F_SET(hcp, H_DELETED);
761*7c478bd9Sstevel@tonic-gate 		chg_pgno = PGNO(p);
762*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_dirty_page(dbp, p)) != 0 ||
763*7c478bd9Sstevel@tonic-gate 		    (ret = __ham_del_page(dbc, n_pagep)) != 0)
764*7c478bd9Sstevel@tonic-gate 			return (ret);
765*7c478bd9Sstevel@tonic-gate 	} else if (reclaim_page &&
766*7c478bd9Sstevel@tonic-gate 	    NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) {
767*7c478bd9Sstevel@tonic-gate 		PAGE *n_pagep, *p_pagep;
768*7c478bd9Sstevel@tonic-gate 
769*7c478bd9Sstevel@tonic-gate 		if ((ret =
770*7c478bd9Sstevel@tonic-gate 		    __ham_get_page(dbp, PREV_PGNO(p), &p_pagep)) != 0)
771*7c478bd9Sstevel@tonic-gate 			return (ret);
772*7c478bd9Sstevel@tonic-gate 
773*7c478bd9Sstevel@tonic-gate 		if (NEXT_PGNO(p) != PGNO_INVALID) {
774*7c478bd9Sstevel@tonic-gate 			if ((ret = __ham_get_page(dbp,
775*7c478bd9Sstevel@tonic-gate 			    NEXT_PGNO(p), &n_pagep)) != 0) {
776*7c478bd9Sstevel@tonic-gate 				(void)__ham_put_page(dbp, p_pagep, 0);
777*7c478bd9Sstevel@tonic-gate 				return (ret);
778*7c478bd9Sstevel@tonic-gate 			}
779*7c478bd9Sstevel@tonic-gate 			n_lsn = &LSN(n_pagep);
780*7c478bd9Sstevel@tonic-gate 		} else {
781*7c478bd9Sstevel@tonic-gate 			n_pagep = NULL;
782*7c478bd9Sstevel@tonic-gate 			n_lsn = NULL;
783*7c478bd9Sstevel@tonic-gate 		}
784*7c478bd9Sstevel@tonic-gate 
785*7c478bd9Sstevel@tonic-gate 		NEXT_PGNO(p_pagep) = NEXT_PGNO(p);
786*7c478bd9Sstevel@tonic-gate 		if (n_pagep != NULL)
787*7c478bd9Sstevel@tonic-gate 			PREV_PGNO(n_pagep) = PGNO(p_pagep);
788*7c478bd9Sstevel@tonic-gate 
789*7c478bd9Sstevel@tonic-gate 		if (DB_LOGGING(dbc)) {
790*7c478bd9Sstevel@tonic-gate 			if ((ret = __ham_newpage_log(dbenv->lg_info,
791*7c478bd9Sstevel@tonic-gate 			    dbc->txn, &new_lsn, 0, DELOVFL,
792*7c478bd9Sstevel@tonic-gate 			    dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep),
793*7c478bd9Sstevel@tonic-gate 			    PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0)
794*7c478bd9Sstevel@tonic-gate 				return (ret);
795*7c478bd9Sstevel@tonic-gate 
796*7c478bd9Sstevel@tonic-gate 			/* Move lsn onto page. */
797*7c478bd9Sstevel@tonic-gate 			LSN(p_pagep) = new_lsn;	/* Structure assignment. */
798*7c478bd9Sstevel@tonic-gate 			if (n_pagep)
799*7c478bd9Sstevel@tonic-gate 				LSN(n_pagep) = new_lsn;
800*7c478bd9Sstevel@tonic-gate 			LSN(p) = new_lsn;
801*7c478bd9Sstevel@tonic-gate 		}
802*7c478bd9Sstevel@tonic-gate 		hcp->pgno = NEXT_PGNO(p);
803*7c478bd9Sstevel@tonic-gate 		hcp->bndx = 0;
804*7c478bd9Sstevel@tonic-gate 		/*
805*7c478bd9Sstevel@tonic-gate 		 * Since we are about to delete the cursor page and we have
806*7c478bd9Sstevel@tonic-gate 		 * just moved the cursor, we need to make sure that the
807*7c478bd9Sstevel@tonic-gate 		 * old page pointer isn't left hanging around in the cursor.
808*7c478bd9Sstevel@tonic-gate 		 */
809*7c478bd9Sstevel@tonic-gate 		hcp->pagep = NULL;
810*7c478bd9Sstevel@tonic-gate 		chg_pgno = PGNO(p);
811*7c478bd9Sstevel@tonic-gate 		ret = __ham_del_page(dbc, p);
812*7c478bd9Sstevel@tonic-gate 		if ((tret = __ham_put_page(dbp, p_pagep, 1)) != 0 &&
813*7c478bd9Sstevel@tonic-gate 		    ret == 0)
814*7c478bd9Sstevel@tonic-gate 			ret = tret;
815*7c478bd9Sstevel@tonic-gate 		if (n_pagep != NULL &&
816*7c478bd9Sstevel@tonic-gate 		    (tret = __ham_put_page(dbp, n_pagep, 1)) != 0 &&
817*7c478bd9Sstevel@tonic-gate 		    ret == 0)
818*7c478bd9Sstevel@tonic-gate 			ret = tret;
819*7c478bd9Sstevel@tonic-gate 		if (ret != 0)
820*7c478bd9Sstevel@tonic-gate 			return (ret);
821*7c478bd9Sstevel@tonic-gate 	} else {
822*7c478bd9Sstevel@tonic-gate 		/*
823*7c478bd9Sstevel@tonic-gate 		 * Mark item deleted so that we don't try to return it, and
824*7c478bd9Sstevel@tonic-gate 		 * so that we update the cursor correctly on the next call
825*7c478bd9Sstevel@tonic-gate 		 * to next.
826*7c478bd9Sstevel@tonic-gate 		 */
827*7c478bd9Sstevel@tonic-gate 		F_SET(hcp, H_DELETED);
828*7c478bd9Sstevel@tonic-gate 		chg_pgno = hcp->pgno;
829*7c478bd9Sstevel@tonic-gate 		ret = __ham_dirty_page(dbp, p);
830*7c478bd9Sstevel@tonic-gate 	}
831*7c478bd9Sstevel@tonic-gate 	__ham_c_update(hcp, chg_pgno, 0, 0, 0);
832*7c478bd9Sstevel@tonic-gate 
833*7c478bd9Sstevel@tonic-gate 	/*
834*7c478bd9Sstevel@tonic-gate 	 * Since we just deleted a pair from the master page, anything
835*7c478bd9Sstevel@tonic-gate 	 * in hcp->dpgno should be cleared.
836*7c478bd9Sstevel@tonic-gate 	 */
837*7c478bd9Sstevel@tonic-gate 	hcp->dpgno = PGNO_INVALID;
838*7c478bd9Sstevel@tonic-gate 
839*7c478bd9Sstevel@tonic-gate 	F_CLR(hcp, H_OK);
840*7c478bd9Sstevel@tonic-gate 	return (ret);
841*7c478bd9Sstevel@tonic-gate }
842*7c478bd9Sstevel@tonic-gate 
843*7c478bd9Sstevel@tonic-gate /*
844*7c478bd9Sstevel@tonic-gate  * __ham_replpair --
845*7c478bd9Sstevel@tonic-gate  *	Given the key data indicated by the cursor, replace part/all of it
846*7c478bd9Sstevel@tonic-gate  *	according to the fields in the dbt.
847*7c478bd9Sstevel@tonic-gate  *
848*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_replpair __P((DBC *, DBT *, u_int32_t));
849*7c478bd9Sstevel@tonic-gate  */
850*7c478bd9Sstevel@tonic-gate int
__ham_replpair(dbc,dbt,make_dup)851*7c478bd9Sstevel@tonic-gate __ham_replpair(dbc, dbt, make_dup)
852*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
853*7c478bd9Sstevel@tonic-gate 	DBT *dbt;
854*7c478bd9Sstevel@tonic-gate 	u_int32_t make_dup;
855*7c478bd9Sstevel@tonic-gate {
856*7c478bd9Sstevel@tonic-gate 	DB *dbp;
857*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
858*7c478bd9Sstevel@tonic-gate 	DBT old_dbt, tdata, tmp;
859*7c478bd9Sstevel@tonic-gate 	DB_LSN	new_lsn;
860*7c478bd9Sstevel@tonic-gate 	int32_t change;			/* XXX: Possible overflow. */
861*7c478bd9Sstevel@tonic-gate 	u_int32_t len;
862*7c478bd9Sstevel@tonic-gate 	int is_big, ret, type;
863*7c478bd9Sstevel@tonic-gate 	u_int8_t *beg, *dest, *end, *hk, *src;
864*7c478bd9Sstevel@tonic-gate 
865*7c478bd9Sstevel@tonic-gate 	/*
866*7c478bd9Sstevel@tonic-gate 	 * Big item replacements are handled in generic code.
867*7c478bd9Sstevel@tonic-gate 	 * Items that fit on the current page fall into 4 classes.
868*7c478bd9Sstevel@tonic-gate 	 * 1. On-page element, same size
869*7c478bd9Sstevel@tonic-gate 	 * 2. On-page element, new is bigger (fits)
870*7c478bd9Sstevel@tonic-gate 	 * 3. On-page element, new is bigger (does not fit)
871*7c478bd9Sstevel@tonic-gate 	 * 4. On-page element, old is bigger
872*7c478bd9Sstevel@tonic-gate 	 * Numbers 1, 2, and 4 are essentially the same (and should
873*7c478bd9Sstevel@tonic-gate 	 * be the common case).  We handle case 3 as a delete and
874*7c478bd9Sstevel@tonic-gate 	 * add.
875*7c478bd9Sstevel@tonic-gate 	 */
876*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
877*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
878*7c478bd9Sstevel@tonic-gate 
879*7c478bd9Sstevel@tonic-gate 	/*
880*7c478bd9Sstevel@tonic-gate 	 * We need to compute the number of bytes that we are adding or
881*7c478bd9Sstevel@tonic-gate 	 * removing from the entry.  Normally, we can simply substract
882*7c478bd9Sstevel@tonic-gate 	 * the number of bytes we are replacing (dbt->dlen) from the
883*7c478bd9Sstevel@tonic-gate 	 * number of bytes we are inserting (dbt->size).  However, if
884*7c478bd9Sstevel@tonic-gate 	 * we are doing a partial put off the end of a record, then this
885*7c478bd9Sstevel@tonic-gate 	 * formula doesn't work, because we are essentially adding
886*7c478bd9Sstevel@tonic-gate 	 * new bytes.
887*7c478bd9Sstevel@tonic-gate 	 */
888*7c478bd9Sstevel@tonic-gate 	change = dbt->size - dbt->dlen;
889*7c478bd9Sstevel@tonic-gate 
890*7c478bd9Sstevel@tonic-gate 	hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
891*7c478bd9Sstevel@tonic-gate 	is_big = HPAGE_PTYPE(hk) == H_OFFPAGE;
892*7c478bd9Sstevel@tonic-gate 
893*7c478bd9Sstevel@tonic-gate 	if (is_big)
894*7c478bd9Sstevel@tonic-gate 		memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
895*7c478bd9Sstevel@tonic-gate 	else
896*7c478bd9Sstevel@tonic-gate 		len = LEN_HKEYDATA(hcp->pagep,
897*7c478bd9Sstevel@tonic-gate 		    dbp->pgsize, H_DATAINDEX(hcp->bndx));
898*7c478bd9Sstevel@tonic-gate 
899*7c478bd9Sstevel@tonic-gate 	if (dbt->doff + dbt->dlen > len)
900*7c478bd9Sstevel@tonic-gate 		change += dbt->doff + dbt->dlen - len;
901*7c478bd9Sstevel@tonic-gate 
902*7c478bd9Sstevel@tonic-gate 
903*7c478bd9Sstevel@tonic-gate 	if (change > (int32_t)P_FREESPACE(hcp->pagep) || is_big) {
904*7c478bd9Sstevel@tonic-gate 		/*
905*7c478bd9Sstevel@tonic-gate 		 * Case 3 -- two subcases.
906*7c478bd9Sstevel@tonic-gate 		 * A. This is not really a partial operation, but an overwrite.
907*7c478bd9Sstevel@tonic-gate 		 *    Simple del and add works.
908*7c478bd9Sstevel@tonic-gate 		 * B. This is a partial and we need to construct the data that
909*7c478bd9Sstevel@tonic-gate 		 *    we are really inserting (yuck).
910*7c478bd9Sstevel@tonic-gate 		 * In both cases, we need to grab the key off the page (in
911*7c478bd9Sstevel@tonic-gate 		 * some cases we could do this outside of this routine; for
912*7c478bd9Sstevel@tonic-gate 		 * cleanliness we do it here.  If you happen to be on a big
913*7c478bd9Sstevel@tonic-gate 		 * key, this could be a performance hit).
914*7c478bd9Sstevel@tonic-gate 		 */
915*7c478bd9Sstevel@tonic-gate 		tmp.flags = 0;
916*7c478bd9Sstevel@tonic-gate 		F_SET(&tmp, DB_DBT_MALLOC | DB_DBT_INTERNAL);
917*7c478bd9Sstevel@tonic-gate 		if ((ret =
918*7c478bd9Sstevel@tonic-gate 		    __db_ret(dbp, hcp->pagep, H_KEYINDEX(hcp->bndx),
919*7c478bd9Sstevel@tonic-gate 		    &tmp, &dbc->rkey.data, &dbc->rkey.size)) != 0)
920*7c478bd9Sstevel@tonic-gate 			return (ret);
921*7c478bd9Sstevel@tonic-gate 
922*7c478bd9Sstevel@tonic-gate 		if (dbt->doff == 0 && dbt->dlen == len) {
923*7c478bd9Sstevel@tonic-gate 			ret = __ham_del_pair(dbc, 0);
924*7c478bd9Sstevel@tonic-gate 			if (ret == 0)
925*7c478bd9Sstevel@tonic-gate 			    ret = __ham_add_el(dbc, &tmp, dbt, H_KEYDATA);
926*7c478bd9Sstevel@tonic-gate 		} else {					/* Case B */
927*7c478bd9Sstevel@tonic-gate 			type = HPAGE_PTYPE(hk) != H_OFFPAGE ?
928*7c478bd9Sstevel@tonic-gate 			    HPAGE_PTYPE(hk) : H_KEYDATA;
929*7c478bd9Sstevel@tonic-gate 			tdata.flags = 0;
930*7c478bd9Sstevel@tonic-gate 			F_SET(&tdata, DB_DBT_MALLOC | DB_DBT_INTERNAL);
931*7c478bd9Sstevel@tonic-gate 
932*7c478bd9Sstevel@tonic-gate 			if ((ret = __db_ret(dbp, hcp->pagep,
933*7c478bd9Sstevel@tonic-gate 			    H_DATAINDEX(hcp->bndx), &tdata, &dbc->rdata.data,
934*7c478bd9Sstevel@tonic-gate 			    &dbc->rdata.size)) != 0)
935*7c478bd9Sstevel@tonic-gate 				goto err;
936*7c478bd9Sstevel@tonic-gate 
937*7c478bd9Sstevel@tonic-gate 			/* Now we can delete the item. */
938*7c478bd9Sstevel@tonic-gate 			if ((ret = __ham_del_pair(dbc, 0)) != 0) {
939*7c478bd9Sstevel@tonic-gate 				__os_free(tdata.data, tdata.size);
940*7c478bd9Sstevel@tonic-gate 				goto err;
941*7c478bd9Sstevel@tonic-gate 			}
942*7c478bd9Sstevel@tonic-gate 
943*7c478bd9Sstevel@tonic-gate 			/* Now shift old data around to make room for new. */
944*7c478bd9Sstevel@tonic-gate 			if (change > 0) {
945*7c478bd9Sstevel@tonic-gate 				 if ((ret = __os_realloc(&tdata.data,
946*7c478bd9Sstevel@tonic-gate 				     tdata.size + change)) != 0)
947*7c478bd9Sstevel@tonic-gate 					return (ret);
948*7c478bd9Sstevel@tonic-gate 				memset((u_int8_t *)tdata.data + tdata.size,
949*7c478bd9Sstevel@tonic-gate 				    0, change);
950*7c478bd9Sstevel@tonic-gate 			}
951*7c478bd9Sstevel@tonic-gate 			end = (u_int8_t *)tdata.data + tdata.size;
952*7c478bd9Sstevel@tonic-gate 
953*7c478bd9Sstevel@tonic-gate 			src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen;
954*7c478bd9Sstevel@tonic-gate 			if (src < end && tdata.size > dbt->doff + dbt->dlen) {
955*7c478bd9Sstevel@tonic-gate 				len = tdata.size - dbt->doff - dbt->dlen;
956*7c478bd9Sstevel@tonic-gate 				dest = src + change;
957*7c478bd9Sstevel@tonic-gate 				memmove(dest, src, len);
958*7c478bd9Sstevel@tonic-gate 			}
959*7c478bd9Sstevel@tonic-gate 			memcpy((u_int8_t *)tdata.data + dbt->doff,
960*7c478bd9Sstevel@tonic-gate 			    dbt->data, dbt->size);
961*7c478bd9Sstevel@tonic-gate 			tdata.size += change;
962*7c478bd9Sstevel@tonic-gate 
963*7c478bd9Sstevel@tonic-gate 			/* Now add the pair. */
964*7c478bd9Sstevel@tonic-gate 			ret = __ham_add_el(dbc, &tmp, &tdata, type);
965*7c478bd9Sstevel@tonic-gate 			__os_free(tdata.data, tdata.size);
966*7c478bd9Sstevel@tonic-gate 		}
967*7c478bd9Sstevel@tonic-gate err:		__os_free(tmp.data, tmp.size);
968*7c478bd9Sstevel@tonic-gate 		return (ret);
969*7c478bd9Sstevel@tonic-gate 	}
970*7c478bd9Sstevel@tonic-gate 
971*7c478bd9Sstevel@tonic-gate 	/*
972*7c478bd9Sstevel@tonic-gate 	 * Set up pointer into existing data. Do it before the log
973*7c478bd9Sstevel@tonic-gate 	 * message so we can use it inside of the log setup.
974*7c478bd9Sstevel@tonic-gate 	 */
975*7c478bd9Sstevel@tonic-gate 	beg = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
976*7c478bd9Sstevel@tonic-gate 	beg += dbt->doff;
977*7c478bd9Sstevel@tonic-gate 
978*7c478bd9Sstevel@tonic-gate 	/*
979*7c478bd9Sstevel@tonic-gate 	 * If we are going to have to move bytes at all, figure out
980*7c478bd9Sstevel@tonic-gate 	 * all the parameters here.  Then log the call before moving
981*7c478bd9Sstevel@tonic-gate 	 * anything around.
982*7c478bd9Sstevel@tonic-gate 	 */
983*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc)) {
984*7c478bd9Sstevel@tonic-gate 		old_dbt.data = beg;
985*7c478bd9Sstevel@tonic-gate 		old_dbt.size = dbt->dlen;
986*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_replace_log(dbp->dbenv->lg_info,
987*7c478bd9Sstevel@tonic-gate 		    dbc->txn, &new_lsn, 0, dbp->log_fileid, PGNO(hcp->pagep),
988*7c478bd9Sstevel@tonic-gate 		    (u_int32_t)H_DATAINDEX(hcp->bndx), &LSN(hcp->pagep),
989*7c478bd9Sstevel@tonic-gate 		    (u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0)
990*7c478bd9Sstevel@tonic-gate 			return (ret);
991*7c478bd9Sstevel@tonic-gate 
992*7c478bd9Sstevel@tonic-gate 		LSN(hcp->pagep) = new_lsn;	/* Structure assignment. */
993*7c478bd9Sstevel@tonic-gate 	}
994*7c478bd9Sstevel@tonic-gate 
995*7c478bd9Sstevel@tonic-gate 	__ham_onpage_replace(hcp->pagep, dbp->pgsize,
996*7c478bd9Sstevel@tonic-gate 	    (u_int32_t)H_DATAINDEX(hcp->bndx), (int32_t)dbt->doff, change, dbt);
997*7c478bd9Sstevel@tonic-gate 
998*7c478bd9Sstevel@tonic-gate 	return (0);
999*7c478bd9Sstevel@tonic-gate }
1000*7c478bd9Sstevel@tonic-gate 
1001*7c478bd9Sstevel@tonic-gate /*
1002*7c478bd9Sstevel@tonic-gate  * Replace data on a page with new data, possibly growing or shrinking what's
1003*7c478bd9Sstevel@tonic-gate  * there.  This is called on two different occasions. On one (from replpair)
1004*7c478bd9Sstevel@tonic-gate  * we are interested in changing only the data.  On the other (from recovery)
1005*7c478bd9Sstevel@tonic-gate  * we are replacing the entire data (header and all) with a new element.  In
1006*7c478bd9Sstevel@tonic-gate  * the latter case, the off argument is negative.
1007*7c478bd9Sstevel@tonic-gate  * pagep: the page that we're changing
1008*7c478bd9Sstevel@tonic-gate  * ndx: page index of the element that is growing/shrinking.
1009*7c478bd9Sstevel@tonic-gate  * off: Offset at which we are beginning the replacement.
1010*7c478bd9Sstevel@tonic-gate  * change: the number of bytes (+ or -) that the element is growing/shrinking.
1011*7c478bd9Sstevel@tonic-gate  * dbt: the new data that gets written at beg.
1012*7c478bd9Sstevel@tonic-gate  * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
1013*7c478bd9Sstevel@tonic-gate  * PUBLIC:     int32_t,  DBT *));
1014*7c478bd9Sstevel@tonic-gate  */
1015*7c478bd9Sstevel@tonic-gate void
__ham_onpage_replace(pagep,pgsize,ndx,off,change,dbt)1016*7c478bd9Sstevel@tonic-gate __ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt)
1017*7c478bd9Sstevel@tonic-gate 	PAGE *pagep;
1018*7c478bd9Sstevel@tonic-gate 	size_t pgsize;
1019*7c478bd9Sstevel@tonic-gate 	u_int32_t ndx;
1020*7c478bd9Sstevel@tonic-gate 	int32_t off;
1021*7c478bd9Sstevel@tonic-gate 	int32_t change;
1022*7c478bd9Sstevel@tonic-gate 	DBT *dbt;
1023*7c478bd9Sstevel@tonic-gate {
1024*7c478bd9Sstevel@tonic-gate 	db_indx_t i;
1025*7c478bd9Sstevel@tonic-gate 	int32_t len;
1026*7c478bd9Sstevel@tonic-gate 	u_int8_t *src, *dest;
1027*7c478bd9Sstevel@tonic-gate 	int zero_me;
1028*7c478bd9Sstevel@tonic-gate 
1029*7c478bd9Sstevel@tonic-gate 	if (change != 0) {
1030*7c478bd9Sstevel@tonic-gate 		zero_me = 0;
1031*7c478bd9Sstevel@tonic-gate 		src = (u_int8_t *)(pagep) + HOFFSET(pagep);
1032*7c478bd9Sstevel@tonic-gate 		if (off < 0)
1033*7c478bd9Sstevel@tonic-gate 			len = pagep->inp[ndx] - HOFFSET(pagep);
1034*7c478bd9Sstevel@tonic-gate 		else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) {
1035*7c478bd9Sstevel@tonic-gate 			len = HKEYDATA_DATA(P_ENTRY(pagep, ndx)) +
1036*7c478bd9Sstevel@tonic-gate 			    LEN_HKEYDATA(pagep, pgsize, ndx) - src;
1037*7c478bd9Sstevel@tonic-gate 			zero_me = 1;
1038*7c478bd9Sstevel@tonic-gate 		} else
1039*7c478bd9Sstevel@tonic-gate 			len = (HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off) - src;
1040*7c478bd9Sstevel@tonic-gate 		dest = src - change;
1041*7c478bd9Sstevel@tonic-gate 		memmove(dest, src, len);
1042*7c478bd9Sstevel@tonic-gate 		if (zero_me)
1043*7c478bd9Sstevel@tonic-gate 			memset(dest + len, 0, change);
1044*7c478bd9Sstevel@tonic-gate 
1045*7c478bd9Sstevel@tonic-gate 		/* Now update the indices. */
1046*7c478bd9Sstevel@tonic-gate 		for (i = ndx; i < NUM_ENT(pagep); i++)
1047*7c478bd9Sstevel@tonic-gate 			pagep->inp[i] -= change;
1048*7c478bd9Sstevel@tonic-gate 		HOFFSET(pagep) -= change;
1049*7c478bd9Sstevel@tonic-gate 	}
1050*7c478bd9Sstevel@tonic-gate 	if (off >= 0)
1051*7c478bd9Sstevel@tonic-gate 		memcpy(HKEYDATA_DATA(P_ENTRY(pagep, ndx)) + off,
1052*7c478bd9Sstevel@tonic-gate 		    dbt->data, dbt->size);
1053*7c478bd9Sstevel@tonic-gate 	else
1054*7c478bd9Sstevel@tonic-gate 		memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size);
1055*7c478bd9Sstevel@tonic-gate }
1056*7c478bd9Sstevel@tonic-gate 
1057*7c478bd9Sstevel@tonic-gate /*
1058*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_split_page __P((DBC *, u_int32_t, u_int32_t));
1059*7c478bd9Sstevel@tonic-gate  */
1060*7c478bd9Sstevel@tonic-gate int
__ham_split_page(dbc,obucket,nbucket)1061*7c478bd9Sstevel@tonic-gate __ham_split_page(dbc, obucket, nbucket)
1062*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1063*7c478bd9Sstevel@tonic-gate 	u_int32_t obucket, nbucket;
1064*7c478bd9Sstevel@tonic-gate {
1065*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1066*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
1067*7c478bd9Sstevel@tonic-gate 	DBT key, page_dbt;
1068*7c478bd9Sstevel@tonic-gate 	DB_ENV *dbenv;
1069*7c478bd9Sstevel@tonic-gate 	DB_LSN new_lsn;
1070*7c478bd9Sstevel@tonic-gate 	PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
1071*7c478bd9Sstevel@tonic-gate 	db_indx_t n;
1072*7c478bd9Sstevel@tonic-gate 	db_pgno_t bucket_pgno, next_pgno;
1073*7c478bd9Sstevel@tonic-gate 	u_int32_t big_len, len;
1074*7c478bd9Sstevel@tonic-gate 	int ret, tret;
1075*7c478bd9Sstevel@tonic-gate 	void *big_buf;
1076*7c478bd9Sstevel@tonic-gate 
1077*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1078*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
1079*7c478bd9Sstevel@tonic-gate 	dbenv = dbp->dbenv;
1080*7c478bd9Sstevel@tonic-gate 	temp_pagep = old_pagep = new_pagep = NULL;
1081*7c478bd9Sstevel@tonic-gate 
1082*7c478bd9Sstevel@tonic-gate 	bucket_pgno = BUCKET_TO_PAGE(hcp, obucket);
1083*7c478bd9Sstevel@tonic-gate 	if ((ret = __ham_get_page(dbp, bucket_pgno, &old_pagep)) != 0)
1084*7c478bd9Sstevel@tonic-gate 		return (ret);
1085*7c478bd9Sstevel@tonic-gate 	if ((ret = __ham_new_page(dbp, BUCKET_TO_PAGE(hcp, nbucket), P_HASH,
1086*7c478bd9Sstevel@tonic-gate 	    &new_pagep)) != 0)
1087*7c478bd9Sstevel@tonic-gate 		goto err;
1088*7c478bd9Sstevel@tonic-gate 
1089*7c478bd9Sstevel@tonic-gate 	temp_pagep = hcp->split_buf;
1090*7c478bd9Sstevel@tonic-gate 	memcpy(temp_pagep, old_pagep, hcp->hdr->pagesize);
1091*7c478bd9Sstevel@tonic-gate 
1092*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc)) {
1093*7c478bd9Sstevel@tonic-gate 		page_dbt.size = hcp->hdr->pagesize;
1094*7c478bd9Sstevel@tonic-gate 		page_dbt.data = old_pagep;
1095*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_splitdata_log(dbenv->lg_info,
1096*7c478bd9Sstevel@tonic-gate 		    dbc->txn, &new_lsn, 0, dbp->log_fileid, SPLITOLD,
1097*7c478bd9Sstevel@tonic-gate 		    PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0)
1098*7c478bd9Sstevel@tonic-gate 			goto err;
1099*7c478bd9Sstevel@tonic-gate 	}
1100*7c478bd9Sstevel@tonic-gate 
1101*7c478bd9Sstevel@tonic-gate 	P_INIT(old_pagep, hcp->hdr->pagesize, PGNO(old_pagep), PGNO_INVALID,
1102*7c478bd9Sstevel@tonic-gate 	    PGNO_INVALID, 0, P_HASH);
1103*7c478bd9Sstevel@tonic-gate 
1104*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc))
1105*7c478bd9Sstevel@tonic-gate 		LSN(old_pagep) = new_lsn;	/* Structure assignment. */
1106*7c478bd9Sstevel@tonic-gate 
1107*7c478bd9Sstevel@tonic-gate 	big_len = 0;
1108*7c478bd9Sstevel@tonic-gate 	big_buf = NULL;
1109*7c478bd9Sstevel@tonic-gate 	key.flags = 0;
1110*7c478bd9Sstevel@tonic-gate 	while (temp_pagep != NULL) {
1111*7c478bd9Sstevel@tonic-gate 		for (n = 0; n < (db_indx_t)H_NUMPAIRS(temp_pagep); n++) {
1112*7c478bd9Sstevel@tonic-gate 			if ((ret =
1113*7c478bd9Sstevel@tonic-gate 			    __db_ret(dbp, temp_pagep, H_KEYINDEX(n),
1114*7c478bd9Sstevel@tonic-gate 			    &key, &big_buf, &big_len)) != 0)
1115*7c478bd9Sstevel@tonic-gate 				goto err;
1116*7c478bd9Sstevel@tonic-gate 
1117*7c478bd9Sstevel@tonic-gate 			if (__ham_call_hash(hcp, key.data, key.size)
1118*7c478bd9Sstevel@tonic-gate 			    == obucket)
1119*7c478bd9Sstevel@tonic-gate 				pp = &old_pagep;
1120*7c478bd9Sstevel@tonic-gate 			else
1121*7c478bd9Sstevel@tonic-gate 				pp = &new_pagep;
1122*7c478bd9Sstevel@tonic-gate 
1123*7c478bd9Sstevel@tonic-gate 			/*
1124*7c478bd9Sstevel@tonic-gate 			 * Figure out how many bytes we need on the new
1125*7c478bd9Sstevel@tonic-gate 			 * page to store the key/data pair.
1126*7c478bd9Sstevel@tonic-gate 			 */
1127*7c478bd9Sstevel@tonic-gate 
1128*7c478bd9Sstevel@tonic-gate 			len = LEN_HITEM(temp_pagep, hcp->hdr->pagesize,
1129*7c478bd9Sstevel@tonic-gate 			    H_DATAINDEX(n)) +
1130*7c478bd9Sstevel@tonic-gate 			    LEN_HITEM(temp_pagep, hcp->hdr->pagesize,
1131*7c478bd9Sstevel@tonic-gate 			    H_KEYINDEX(n)) +
1132*7c478bd9Sstevel@tonic-gate 			    2 * sizeof(db_indx_t);
1133*7c478bd9Sstevel@tonic-gate 
1134*7c478bd9Sstevel@tonic-gate 			if (P_FREESPACE(*pp) < len) {
1135*7c478bd9Sstevel@tonic-gate 				if (DB_LOGGING(dbc)) {
1136*7c478bd9Sstevel@tonic-gate 					page_dbt.size = hcp->hdr->pagesize;
1137*7c478bd9Sstevel@tonic-gate 					page_dbt.data = *pp;
1138*7c478bd9Sstevel@tonic-gate 					if ((ret = __ham_splitdata_log(
1139*7c478bd9Sstevel@tonic-gate 					    dbenv->lg_info, dbc->txn,
1140*7c478bd9Sstevel@tonic-gate 					    &new_lsn, 0, dbp->log_fileid,
1141*7c478bd9Sstevel@tonic-gate 					    SPLITNEW, PGNO(*pp), &page_dbt,
1142*7c478bd9Sstevel@tonic-gate 					    &LSN(*pp))) != 0)
1143*7c478bd9Sstevel@tonic-gate 						goto err;
1144*7c478bd9Sstevel@tonic-gate 					LSN(*pp) = new_lsn;
1145*7c478bd9Sstevel@tonic-gate 				}
1146*7c478bd9Sstevel@tonic-gate 				if ((ret =
1147*7c478bd9Sstevel@tonic-gate 				    __ham_add_ovflpage(dbc, *pp, 1, pp)) != 0)
1148*7c478bd9Sstevel@tonic-gate 					goto err;
1149*7c478bd9Sstevel@tonic-gate 			}
1150*7c478bd9Sstevel@tonic-gate 			__ham_copy_item(dbp->pgsize,
1151*7c478bd9Sstevel@tonic-gate 			    temp_pagep, H_KEYINDEX(n), *pp);
1152*7c478bd9Sstevel@tonic-gate 			__ham_copy_item(dbp->pgsize,
1153*7c478bd9Sstevel@tonic-gate 			    temp_pagep, H_DATAINDEX(n), *pp);
1154*7c478bd9Sstevel@tonic-gate 		}
1155*7c478bd9Sstevel@tonic-gate 		next_pgno = NEXT_PGNO(temp_pagep);
1156*7c478bd9Sstevel@tonic-gate 
1157*7c478bd9Sstevel@tonic-gate 		/* Clear temp_page; if it's a link overflow page, free it. */
1158*7c478bd9Sstevel@tonic-gate 		if (PGNO(temp_pagep) != bucket_pgno && (ret =
1159*7c478bd9Sstevel@tonic-gate 		    __ham_del_page(dbc, temp_pagep)) != 0)
1160*7c478bd9Sstevel@tonic-gate 			goto err;
1161*7c478bd9Sstevel@tonic-gate 
1162*7c478bd9Sstevel@tonic-gate 		if (next_pgno == PGNO_INVALID)
1163*7c478bd9Sstevel@tonic-gate 			temp_pagep = NULL;
1164*7c478bd9Sstevel@tonic-gate 		else if ((ret =
1165*7c478bd9Sstevel@tonic-gate 		    __ham_get_page(dbp, next_pgno, &temp_pagep)) != 0)
1166*7c478bd9Sstevel@tonic-gate 			goto err;
1167*7c478bd9Sstevel@tonic-gate 
1168*7c478bd9Sstevel@tonic-gate 		if (temp_pagep != NULL && DB_LOGGING(dbc)) {
1169*7c478bd9Sstevel@tonic-gate 			page_dbt.size = hcp->hdr->pagesize;
1170*7c478bd9Sstevel@tonic-gate 			page_dbt.data = temp_pagep;
1171*7c478bd9Sstevel@tonic-gate 			if ((ret = __ham_splitdata_log(dbenv->lg_info,
1172*7c478bd9Sstevel@tonic-gate 			    dbc->txn, &new_lsn, 0, dbp->log_fileid,
1173*7c478bd9Sstevel@tonic-gate 			    SPLITOLD, PGNO(temp_pagep),
1174*7c478bd9Sstevel@tonic-gate 			    &page_dbt, &LSN(temp_pagep))) != 0)
1175*7c478bd9Sstevel@tonic-gate 				goto err;
1176*7c478bd9Sstevel@tonic-gate 			LSN(temp_pagep) = new_lsn;
1177*7c478bd9Sstevel@tonic-gate 		}
1178*7c478bd9Sstevel@tonic-gate 	}
1179*7c478bd9Sstevel@tonic-gate 	if (big_buf != NULL)
1180*7c478bd9Sstevel@tonic-gate 		__os_free(big_buf, big_len);
1181*7c478bd9Sstevel@tonic-gate 
1182*7c478bd9Sstevel@tonic-gate 	/*
1183*7c478bd9Sstevel@tonic-gate 	 * If the original bucket spanned multiple pages, then we've got
1184*7c478bd9Sstevel@tonic-gate 	 * a pointer to a page that used to be on the bucket chain.  It
1185*7c478bd9Sstevel@tonic-gate 	 * should be deleted.
1186*7c478bd9Sstevel@tonic-gate 	 */
1187*7c478bd9Sstevel@tonic-gate 	if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno &&
1188*7c478bd9Sstevel@tonic-gate 	    (ret = __ham_del_page(dbc, temp_pagep)) != 0)
1189*7c478bd9Sstevel@tonic-gate 		goto err;
1190*7c478bd9Sstevel@tonic-gate 
1191*7c478bd9Sstevel@tonic-gate 	/*
1192*7c478bd9Sstevel@tonic-gate 	 * Write new buckets out.
1193*7c478bd9Sstevel@tonic-gate 	 */
1194*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc)) {
1195*7c478bd9Sstevel@tonic-gate 		page_dbt.size = hcp->hdr->pagesize;
1196*7c478bd9Sstevel@tonic-gate 		page_dbt.data = old_pagep;
1197*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_splitdata_log(dbenv->lg_info,
1198*7c478bd9Sstevel@tonic-gate 		   dbc->txn, &new_lsn, 0, dbp->log_fileid,
1199*7c478bd9Sstevel@tonic-gate 		   SPLITNEW, PGNO(old_pagep),
1200*7c478bd9Sstevel@tonic-gate 		    &page_dbt, &LSN(old_pagep))) != 0)
1201*7c478bd9Sstevel@tonic-gate 			goto err;
1202*7c478bd9Sstevel@tonic-gate 		LSN(old_pagep) = new_lsn;
1203*7c478bd9Sstevel@tonic-gate 
1204*7c478bd9Sstevel@tonic-gate 		page_dbt.data = new_pagep;
1205*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_splitdata_log(dbenv->lg_info,
1206*7c478bd9Sstevel@tonic-gate 		    dbc->txn, &new_lsn, 0, dbp->log_fileid,
1207*7c478bd9Sstevel@tonic-gate 		    SPLITNEW, PGNO(new_pagep), &page_dbt, &LSN(new_pagep))) != 0)
1208*7c478bd9Sstevel@tonic-gate 			goto err;
1209*7c478bd9Sstevel@tonic-gate 		LSN(new_pagep) = new_lsn;
1210*7c478bd9Sstevel@tonic-gate 	}
1211*7c478bd9Sstevel@tonic-gate 	ret = __ham_put_page(dbp, old_pagep, 1);
1212*7c478bd9Sstevel@tonic-gate 	if ((tret = __ham_put_page(dbp, new_pagep, 1)) != 0 &&
1213*7c478bd9Sstevel@tonic-gate 	    ret == 0)
1214*7c478bd9Sstevel@tonic-gate 		ret = tret;
1215*7c478bd9Sstevel@tonic-gate 
1216*7c478bd9Sstevel@tonic-gate 	if (0) {
1217*7c478bd9Sstevel@tonic-gate err:		if (old_pagep != NULL)
1218*7c478bd9Sstevel@tonic-gate 			(void)__ham_put_page(dbp, old_pagep, 1);
1219*7c478bd9Sstevel@tonic-gate 		if (new_pagep != NULL)
1220*7c478bd9Sstevel@tonic-gate 			(void)__ham_put_page(dbp, new_pagep, 1);
1221*7c478bd9Sstevel@tonic-gate 		if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno)
1222*7c478bd9Sstevel@tonic-gate 			(void)__ham_put_page(dbp, temp_pagep, 1);
1223*7c478bd9Sstevel@tonic-gate 	}
1224*7c478bd9Sstevel@tonic-gate 	return (ret);
1225*7c478bd9Sstevel@tonic-gate }
1226*7c478bd9Sstevel@tonic-gate 
1227*7c478bd9Sstevel@tonic-gate /*
1228*7c478bd9Sstevel@tonic-gate  * Add the given pair to the page.  The page in question may already be
1229*7c478bd9Sstevel@tonic-gate  * held (i.e. it was already gotten).  If it is, then the page is passed
1230*7c478bd9Sstevel@tonic-gate  * in via the pagep parameter.  On return, pagep will contain the page
1231*7c478bd9Sstevel@tonic-gate  * to which we just added something.  This allows us to link overflow
1232*7c478bd9Sstevel@tonic-gate  * pages and return the new page having correctly put the last page.
1233*7c478bd9Sstevel@tonic-gate  *
1234*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_add_el __P((DBC *, const DBT *, const DBT *, int));
1235*7c478bd9Sstevel@tonic-gate  */
1236*7c478bd9Sstevel@tonic-gate int
__ham_add_el(dbc,key,val,type)1237*7c478bd9Sstevel@tonic-gate __ham_add_el(dbc, key, val, type)
1238*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1239*7c478bd9Sstevel@tonic-gate 	const DBT *key, *val;
1240*7c478bd9Sstevel@tonic-gate 	int type;
1241*7c478bd9Sstevel@tonic-gate {
1242*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1243*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
1244*7c478bd9Sstevel@tonic-gate 	const DBT *pkey, *pdata;
1245*7c478bd9Sstevel@tonic-gate 	DBT key_dbt, data_dbt;
1246*7c478bd9Sstevel@tonic-gate 	DB_LSN new_lsn;
1247*7c478bd9Sstevel@tonic-gate 	HOFFPAGE doff, koff;
1248*7c478bd9Sstevel@tonic-gate 	db_pgno_t next_pgno;
1249*7c478bd9Sstevel@tonic-gate 	u_int32_t data_size, key_size, pairsize, rectype;
1250*7c478bd9Sstevel@tonic-gate 	int do_expand, is_keybig, is_databig, ret;
1251*7c478bd9Sstevel@tonic-gate 	int key_type, data_type;
1252*7c478bd9Sstevel@tonic-gate 
1253*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1254*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
1255*7c478bd9Sstevel@tonic-gate 	do_expand = 0;
1256*7c478bd9Sstevel@tonic-gate 
1257*7c478bd9Sstevel@tonic-gate 	if (hcp->pagep == NULL && (ret = __ham_get_page(dbp,
1258*7c478bd9Sstevel@tonic-gate 	    hcp->seek_found_page != PGNO_INVALID ?  hcp->seek_found_page :
1259*7c478bd9Sstevel@tonic-gate 	    hcp->pgno, &hcp->pagep)) != 0)
1260*7c478bd9Sstevel@tonic-gate 		return (ret);
1261*7c478bd9Sstevel@tonic-gate 
1262*7c478bd9Sstevel@tonic-gate 	key_size = HKEYDATA_PSIZE(key->size);
1263*7c478bd9Sstevel@tonic-gate 	data_size = HKEYDATA_PSIZE(val->size);
1264*7c478bd9Sstevel@tonic-gate 	is_keybig = ISBIG(hcp, key->size);
1265*7c478bd9Sstevel@tonic-gate 	is_databig = ISBIG(hcp, val->size);
1266*7c478bd9Sstevel@tonic-gate 	if (is_keybig)
1267*7c478bd9Sstevel@tonic-gate 		key_size = HOFFPAGE_PSIZE;
1268*7c478bd9Sstevel@tonic-gate 	if (is_databig)
1269*7c478bd9Sstevel@tonic-gate 		data_size = HOFFPAGE_PSIZE;
1270*7c478bd9Sstevel@tonic-gate 
1271*7c478bd9Sstevel@tonic-gate 	pairsize = key_size + data_size;
1272*7c478bd9Sstevel@tonic-gate 
1273*7c478bd9Sstevel@tonic-gate 	/* Advance to first page in chain with room for item. */
1274*7c478bd9Sstevel@tonic-gate 	while (H_NUMPAIRS(hcp->pagep) && NEXT_PGNO(hcp->pagep) !=
1275*7c478bd9Sstevel@tonic-gate 	    PGNO_INVALID) {
1276*7c478bd9Sstevel@tonic-gate 		/*
1277*7c478bd9Sstevel@tonic-gate 		 * This may not be the end of the chain, but the pair may fit
1278*7c478bd9Sstevel@tonic-gate 		 * anyway.  Check if it's a bigpair that fits or a regular
1279*7c478bd9Sstevel@tonic-gate 		 * pair that fits.
1280*7c478bd9Sstevel@tonic-gate 		 */
1281*7c478bd9Sstevel@tonic-gate 		if (P_FREESPACE(hcp->pagep) >= pairsize)
1282*7c478bd9Sstevel@tonic-gate 			break;
1283*7c478bd9Sstevel@tonic-gate 		next_pgno = NEXT_PGNO(hcp->pagep);
1284*7c478bd9Sstevel@tonic-gate 		if ((ret =
1285*7c478bd9Sstevel@tonic-gate 		    __ham_next_cpage(dbc, next_pgno, 0, 0)) != 0)
1286*7c478bd9Sstevel@tonic-gate 			return (ret);
1287*7c478bd9Sstevel@tonic-gate 	}
1288*7c478bd9Sstevel@tonic-gate 
1289*7c478bd9Sstevel@tonic-gate 	/*
1290*7c478bd9Sstevel@tonic-gate 	 * Check if we need to allocate a new page.
1291*7c478bd9Sstevel@tonic-gate 	 */
1292*7c478bd9Sstevel@tonic-gate 	if (P_FREESPACE(hcp->pagep) < pairsize) {
1293*7c478bd9Sstevel@tonic-gate 		do_expand = 1;
1294*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_add_ovflpage(dbc,
1295*7c478bd9Sstevel@tonic-gate 		    hcp->pagep, 1, &hcp->pagep)) !=  0)
1296*7c478bd9Sstevel@tonic-gate 			return (ret);
1297*7c478bd9Sstevel@tonic-gate 		hcp->pgno = PGNO(hcp->pagep);
1298*7c478bd9Sstevel@tonic-gate 	}
1299*7c478bd9Sstevel@tonic-gate 
1300*7c478bd9Sstevel@tonic-gate 	/*
1301*7c478bd9Sstevel@tonic-gate 	 * Update cursor.
1302*7c478bd9Sstevel@tonic-gate 	 */
1303*7c478bd9Sstevel@tonic-gate 	hcp->bndx = H_NUMPAIRS(hcp->pagep);
1304*7c478bd9Sstevel@tonic-gate 	F_CLR(hcp, H_DELETED);
1305*7c478bd9Sstevel@tonic-gate 	if (is_keybig) {
1306*7c478bd9Sstevel@tonic-gate 		koff.type = H_OFFPAGE;
1307*7c478bd9Sstevel@tonic-gate 		UMRW(koff.unused[0]);
1308*7c478bd9Sstevel@tonic-gate 		UMRW(koff.unused[1]);
1309*7c478bd9Sstevel@tonic-gate 		UMRW(koff.unused[2]);
1310*7c478bd9Sstevel@tonic-gate 		if ((ret = __db_poff(dbc,
1311*7c478bd9Sstevel@tonic-gate 		    key, &koff.pgno, __ham_overflow_page)) != 0)
1312*7c478bd9Sstevel@tonic-gate 			return (ret);
1313*7c478bd9Sstevel@tonic-gate 		koff.tlen = key->size;
1314*7c478bd9Sstevel@tonic-gate 		key_dbt.data = &koff;
1315*7c478bd9Sstevel@tonic-gate 		key_dbt.size = sizeof(koff);
1316*7c478bd9Sstevel@tonic-gate 		pkey = &key_dbt;
1317*7c478bd9Sstevel@tonic-gate 		key_type = H_OFFPAGE;
1318*7c478bd9Sstevel@tonic-gate 	} else {
1319*7c478bd9Sstevel@tonic-gate 		pkey = key;
1320*7c478bd9Sstevel@tonic-gate 		key_type = H_KEYDATA;
1321*7c478bd9Sstevel@tonic-gate 	}
1322*7c478bd9Sstevel@tonic-gate 
1323*7c478bd9Sstevel@tonic-gate 	if (is_databig) {
1324*7c478bd9Sstevel@tonic-gate 		doff.type = H_OFFPAGE;
1325*7c478bd9Sstevel@tonic-gate 		UMRW(doff.unused[0]);
1326*7c478bd9Sstevel@tonic-gate 		UMRW(doff.unused[1]);
1327*7c478bd9Sstevel@tonic-gate 		UMRW(doff.unused[2]);
1328*7c478bd9Sstevel@tonic-gate 		if ((ret = __db_poff(dbc,
1329*7c478bd9Sstevel@tonic-gate 		    val, &doff.pgno, __ham_overflow_page)) != 0)
1330*7c478bd9Sstevel@tonic-gate 			return (ret);
1331*7c478bd9Sstevel@tonic-gate 		doff.tlen = val->size;
1332*7c478bd9Sstevel@tonic-gate 		data_dbt.data = &doff;
1333*7c478bd9Sstevel@tonic-gate 		data_dbt.size = sizeof(doff);
1334*7c478bd9Sstevel@tonic-gate 		pdata = &data_dbt;
1335*7c478bd9Sstevel@tonic-gate 		data_type = H_OFFPAGE;
1336*7c478bd9Sstevel@tonic-gate 	} else {
1337*7c478bd9Sstevel@tonic-gate 		pdata = val;
1338*7c478bd9Sstevel@tonic-gate 		data_type = type;
1339*7c478bd9Sstevel@tonic-gate 	}
1340*7c478bd9Sstevel@tonic-gate 
1341*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc)) {
1342*7c478bd9Sstevel@tonic-gate 		rectype = PUTPAIR;
1343*7c478bd9Sstevel@tonic-gate 		if (is_databig)
1344*7c478bd9Sstevel@tonic-gate 			rectype |= PAIR_DATAMASK;
1345*7c478bd9Sstevel@tonic-gate 		if (is_keybig)
1346*7c478bd9Sstevel@tonic-gate 			rectype |= PAIR_KEYMASK;
1347*7c478bd9Sstevel@tonic-gate 
1348*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_insdel_log(dbp->dbenv->lg_info,
1349*7c478bd9Sstevel@tonic-gate 		    dbc->txn, &new_lsn, 0, rectype,
1350*7c478bd9Sstevel@tonic-gate 		    dbp->log_fileid, PGNO(hcp->pagep),
1351*7c478bd9Sstevel@tonic-gate 		    (u_int32_t)H_NUMPAIRS(hcp->pagep),
1352*7c478bd9Sstevel@tonic-gate 		    &LSN(hcp->pagep), pkey, pdata)) != 0)
1353*7c478bd9Sstevel@tonic-gate 			return (ret);
1354*7c478bd9Sstevel@tonic-gate 
1355*7c478bd9Sstevel@tonic-gate 		/* Move lsn onto page. */
1356*7c478bd9Sstevel@tonic-gate 		LSN(hcp->pagep) = new_lsn;	/* Structure assignment. */
1357*7c478bd9Sstevel@tonic-gate 	}
1358*7c478bd9Sstevel@tonic-gate 
1359*7c478bd9Sstevel@tonic-gate 	__ham_putitem(hcp->pagep, pkey, key_type);
1360*7c478bd9Sstevel@tonic-gate 	__ham_putitem(hcp->pagep, pdata, data_type);
1361*7c478bd9Sstevel@tonic-gate 
1362*7c478bd9Sstevel@tonic-gate 	/*
1363*7c478bd9Sstevel@tonic-gate 	 * For splits, we are going to update item_info's page number
1364*7c478bd9Sstevel@tonic-gate 	 * field, so that we can easily return to the same page the
1365*7c478bd9Sstevel@tonic-gate 	 * next time we come in here.  For other operations, this shouldn't
1366*7c478bd9Sstevel@tonic-gate 	 * matter, since odds are this is the last thing that happens before
1367*7c478bd9Sstevel@tonic-gate 	 * we return to the user program.
1368*7c478bd9Sstevel@tonic-gate 	 */
1369*7c478bd9Sstevel@tonic-gate 	hcp->pgno = PGNO(hcp->pagep);
1370*7c478bd9Sstevel@tonic-gate 
1371*7c478bd9Sstevel@tonic-gate 	/*
1372*7c478bd9Sstevel@tonic-gate 	 * XXX Maybe keep incremental numbers here
1373*7c478bd9Sstevel@tonic-gate 	 */
1374*7c478bd9Sstevel@tonic-gate 	if (!F_ISSET(dbp, DB_AM_LOCKING))
1375*7c478bd9Sstevel@tonic-gate 		hcp->hdr->nelem++;
1376*7c478bd9Sstevel@tonic-gate 
1377*7c478bd9Sstevel@tonic-gate 	if (do_expand || (hcp->hdr->ffactor != 0 &&
1378*7c478bd9Sstevel@tonic-gate 	    (u_int32_t)H_NUMPAIRS(hcp->pagep) > hcp->hdr->ffactor))
1379*7c478bd9Sstevel@tonic-gate 		F_SET(hcp, H_EXPAND);
1380*7c478bd9Sstevel@tonic-gate 	return (0);
1381*7c478bd9Sstevel@tonic-gate }
1382*7c478bd9Sstevel@tonic-gate 
1383*7c478bd9Sstevel@tonic-gate 
1384*7c478bd9Sstevel@tonic-gate /*
1385*7c478bd9Sstevel@tonic-gate  * Special __putitem call used in splitting -- copies one entry to
1386*7c478bd9Sstevel@tonic-gate  * another.  Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
1387*7c478bd9Sstevel@tonic-gate  * H_DUPLICATE, H_OFFDUP).  Since we log splits at a high level, we
1388*7c478bd9Sstevel@tonic-gate  * do not need to do any logging here.
1389*7c478bd9Sstevel@tonic-gate  *
1390*7c478bd9Sstevel@tonic-gate  * PUBLIC: void __ham_copy_item __P((size_t, PAGE *, u_int32_t, PAGE *));
1391*7c478bd9Sstevel@tonic-gate  */
1392*7c478bd9Sstevel@tonic-gate void
__ham_copy_item(pgsize,src_page,src_ndx,dest_page)1393*7c478bd9Sstevel@tonic-gate __ham_copy_item(pgsize, src_page, src_ndx, dest_page)
1394*7c478bd9Sstevel@tonic-gate 	size_t pgsize;
1395*7c478bd9Sstevel@tonic-gate 	PAGE *src_page;
1396*7c478bd9Sstevel@tonic-gate 	u_int32_t src_ndx;
1397*7c478bd9Sstevel@tonic-gate 	PAGE *dest_page;
1398*7c478bd9Sstevel@tonic-gate {
1399*7c478bd9Sstevel@tonic-gate 	u_int32_t len;
1400*7c478bd9Sstevel@tonic-gate 	void *src, *dest;
1401*7c478bd9Sstevel@tonic-gate 
1402*7c478bd9Sstevel@tonic-gate 	/*
1403*7c478bd9Sstevel@tonic-gate 	 * Copy the key and data entries onto this new page.
1404*7c478bd9Sstevel@tonic-gate 	 */
1405*7c478bd9Sstevel@tonic-gate 	src = P_ENTRY(src_page, src_ndx);
1406*7c478bd9Sstevel@tonic-gate 
1407*7c478bd9Sstevel@tonic-gate 	/* Set up space on dest. */
1408*7c478bd9Sstevel@tonic-gate 	len = LEN_HITEM(src_page, pgsize, src_ndx);
1409*7c478bd9Sstevel@tonic-gate 	HOFFSET(dest_page) -= len;
1410*7c478bd9Sstevel@tonic-gate 	dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
1411*7c478bd9Sstevel@tonic-gate 	dest = P_ENTRY(dest_page, NUM_ENT(dest_page));
1412*7c478bd9Sstevel@tonic-gate 	NUM_ENT(dest_page)++;
1413*7c478bd9Sstevel@tonic-gate 
1414*7c478bd9Sstevel@tonic-gate 	memcpy(dest, src, len);
1415*7c478bd9Sstevel@tonic-gate }
1416*7c478bd9Sstevel@tonic-gate 
1417*7c478bd9Sstevel@tonic-gate /*
1418*7c478bd9Sstevel@tonic-gate  *
1419*7c478bd9Sstevel@tonic-gate  * Returns:
1420*7c478bd9Sstevel@tonic-gate  *      pointer on success
1421*7c478bd9Sstevel@tonic-gate  *      NULL on error
1422*7c478bd9Sstevel@tonic-gate  *
1423*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **));
1424*7c478bd9Sstevel@tonic-gate  */
1425*7c478bd9Sstevel@tonic-gate int
__ham_add_ovflpage(dbc,pagep,release,pp)1426*7c478bd9Sstevel@tonic-gate __ham_add_ovflpage(dbc, pagep, release, pp)
1427*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1428*7c478bd9Sstevel@tonic-gate 	PAGE *pagep;
1429*7c478bd9Sstevel@tonic-gate 	int release;
1430*7c478bd9Sstevel@tonic-gate 	PAGE **pp;
1431*7c478bd9Sstevel@tonic-gate {
1432*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1433*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
1434*7c478bd9Sstevel@tonic-gate 	DB_LSN new_lsn;
1435*7c478bd9Sstevel@tonic-gate 	PAGE *new_pagep;
1436*7c478bd9Sstevel@tonic-gate 	int ret;
1437*7c478bd9Sstevel@tonic-gate 
1438*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1439*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
1440*7c478bd9Sstevel@tonic-gate 
1441*7c478bd9Sstevel@tonic-gate 	if ((ret = __ham_overflow_page(dbc, P_HASH, &new_pagep)) != 0)
1442*7c478bd9Sstevel@tonic-gate 		return (ret);
1443*7c478bd9Sstevel@tonic-gate 
1444*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc)) {
1445*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_newpage_log(dbp->dbenv->lg_info,
1446*7c478bd9Sstevel@tonic-gate 		    dbc->txn, &new_lsn, 0, PUTOVFL,
1447*7c478bd9Sstevel@tonic-gate 		    dbp->log_fileid, PGNO(pagep), &LSN(pagep),
1448*7c478bd9Sstevel@tonic-gate 		    PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0)
1449*7c478bd9Sstevel@tonic-gate 			return (ret);
1450*7c478bd9Sstevel@tonic-gate 
1451*7c478bd9Sstevel@tonic-gate 		/* Move lsn onto page. */
1452*7c478bd9Sstevel@tonic-gate 		LSN(pagep) = LSN(new_pagep) = new_lsn;
1453*7c478bd9Sstevel@tonic-gate 	}
1454*7c478bd9Sstevel@tonic-gate 	NEXT_PGNO(pagep) = PGNO(new_pagep);
1455*7c478bd9Sstevel@tonic-gate 	PREV_PGNO(new_pagep) = PGNO(pagep);
1456*7c478bd9Sstevel@tonic-gate 
1457*7c478bd9Sstevel@tonic-gate 	if (release)
1458*7c478bd9Sstevel@tonic-gate 		ret = __ham_put_page(dbp, pagep, 1);
1459*7c478bd9Sstevel@tonic-gate 
1460*7c478bd9Sstevel@tonic-gate 	hcp->stats.hash_overflows++;
1461*7c478bd9Sstevel@tonic-gate 	*pp = new_pagep;
1462*7c478bd9Sstevel@tonic-gate 	return (ret);
1463*7c478bd9Sstevel@tonic-gate }
1464*7c478bd9Sstevel@tonic-gate 
1465*7c478bd9Sstevel@tonic-gate 
1466*7c478bd9Sstevel@tonic-gate /*
1467*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_new_page __P((DB *, u_int32_t, u_int32_t, PAGE **));
1468*7c478bd9Sstevel@tonic-gate  */
1469*7c478bd9Sstevel@tonic-gate int
__ham_new_page(dbp,addr,type,pp)1470*7c478bd9Sstevel@tonic-gate __ham_new_page(dbp, addr, type, pp)
1471*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1472*7c478bd9Sstevel@tonic-gate 	u_int32_t addr, type;
1473*7c478bd9Sstevel@tonic-gate 	PAGE **pp;
1474*7c478bd9Sstevel@tonic-gate {
1475*7c478bd9Sstevel@tonic-gate 	PAGE *pagep;
1476*7c478bd9Sstevel@tonic-gate 	int ret;
1477*7c478bd9Sstevel@tonic-gate 
1478*7c478bd9Sstevel@tonic-gate 	if ((ret = memp_fget(dbp->mpf,
1479*7c478bd9Sstevel@tonic-gate 	    &addr, DB_MPOOL_CREATE, &pagep)) != 0)
1480*7c478bd9Sstevel@tonic-gate 		return (ret);
1481*7c478bd9Sstevel@tonic-gate 
1482*7c478bd9Sstevel@tonic-gate 	/* This should not be necessary because page-in should do it. */
1483*7c478bd9Sstevel@tonic-gate 	P_INIT(pagep, dbp->pgsize, addr, PGNO_INVALID, PGNO_INVALID, 0, type);
1484*7c478bd9Sstevel@tonic-gate 
1485*7c478bd9Sstevel@tonic-gate 	*pp = pagep;
1486*7c478bd9Sstevel@tonic-gate 	return (0);
1487*7c478bd9Sstevel@tonic-gate }
1488*7c478bd9Sstevel@tonic-gate 
1489*7c478bd9Sstevel@tonic-gate /*
1490*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_del_page __P((DBC *, PAGE *));
1491*7c478bd9Sstevel@tonic-gate  */
1492*7c478bd9Sstevel@tonic-gate int
__ham_del_page(dbc,pagep)1493*7c478bd9Sstevel@tonic-gate __ham_del_page(dbc, pagep)
1494*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1495*7c478bd9Sstevel@tonic-gate 	PAGE *pagep;
1496*7c478bd9Sstevel@tonic-gate {
1497*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1498*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
1499*7c478bd9Sstevel@tonic-gate 	DB_LSN new_lsn;
1500*7c478bd9Sstevel@tonic-gate 	int ret;
1501*7c478bd9Sstevel@tonic-gate 
1502*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1503*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
1504*7c478bd9Sstevel@tonic-gate 	ret = 0;
1505*7c478bd9Sstevel@tonic-gate 	DIRTY_META(dbp, hcp, ret);
1506*7c478bd9Sstevel@tonic-gate 	if (ret != 0) {
1507*7c478bd9Sstevel@tonic-gate 		if (ret != EAGAIN)
1508*7c478bd9Sstevel@tonic-gate 			__db_err(dbp->dbenv,
1509*7c478bd9Sstevel@tonic-gate 			    "free_ovflpage: unable to lock meta data page %s\n",
1510*7c478bd9Sstevel@tonic-gate 			    strerror(ret));
1511*7c478bd9Sstevel@tonic-gate 		/*
1512*7c478bd9Sstevel@tonic-gate 		 * If we are going to return an error, then we should free
1513*7c478bd9Sstevel@tonic-gate 		 * the page, so it doesn't stay pinned forever.
1514*7c478bd9Sstevel@tonic-gate 		 */
1515*7c478bd9Sstevel@tonic-gate 		(void)__ham_put_page(dbp, pagep, 0);
1516*7c478bd9Sstevel@tonic-gate 		return (ret);
1517*7c478bd9Sstevel@tonic-gate 	}
1518*7c478bd9Sstevel@tonic-gate 
1519*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc)) {
1520*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_newpgno_log(dbp->dbenv->lg_info,
1521*7c478bd9Sstevel@tonic-gate 		    dbc->txn, &new_lsn, 0, DELPGNO,
1522*7c478bd9Sstevel@tonic-gate 		    dbp->log_fileid, PGNO(pagep), hcp->hdr->last_freed,
1523*7c478bd9Sstevel@tonic-gate 		    (u_int32_t)TYPE(pagep), NEXT_PGNO(pagep), P_INVALID,
1524*7c478bd9Sstevel@tonic-gate 		    &LSN(pagep), &hcp->hdr->lsn)) != 0)
1525*7c478bd9Sstevel@tonic-gate 			return (ret);
1526*7c478bd9Sstevel@tonic-gate 
1527*7c478bd9Sstevel@tonic-gate 		hcp->hdr->lsn = new_lsn;
1528*7c478bd9Sstevel@tonic-gate 		LSN(pagep) = new_lsn;
1529*7c478bd9Sstevel@tonic-gate 	}
1530*7c478bd9Sstevel@tonic-gate 
1531*7c478bd9Sstevel@tonic-gate #ifdef DIAGNOSTIC
1532*7c478bd9Sstevel@tonic-gate 	{
1533*7c478bd9Sstevel@tonic-gate 		db_pgno_t __pgno;
1534*7c478bd9Sstevel@tonic-gate 		DB_LSN __lsn;
1535*7c478bd9Sstevel@tonic-gate 		__pgno = pagep->pgno;
1536*7c478bd9Sstevel@tonic-gate 		__lsn = pagep->lsn;
1537*7c478bd9Sstevel@tonic-gate 		memset(pagep, 0xdb, dbp->pgsize);
1538*7c478bd9Sstevel@tonic-gate 		pagep->pgno = __pgno;
1539*7c478bd9Sstevel@tonic-gate 		pagep->lsn = __lsn;
1540*7c478bd9Sstevel@tonic-gate 	}
1541*7c478bd9Sstevel@tonic-gate #endif
1542*7c478bd9Sstevel@tonic-gate 	TYPE(pagep) = P_INVALID;
1543*7c478bd9Sstevel@tonic-gate 	NEXT_PGNO(pagep) = hcp->hdr->last_freed;
1544*7c478bd9Sstevel@tonic-gate 	hcp->hdr->last_freed = PGNO(pagep);
1545*7c478bd9Sstevel@tonic-gate 
1546*7c478bd9Sstevel@tonic-gate 	return (__ham_put_page(dbp, pagep, 1));
1547*7c478bd9Sstevel@tonic-gate }
1548*7c478bd9Sstevel@tonic-gate 
1549*7c478bd9Sstevel@tonic-gate 
1550*7c478bd9Sstevel@tonic-gate /*
1551*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_put_page __P((DB *, PAGE *, int32_t));
1552*7c478bd9Sstevel@tonic-gate  */
1553*7c478bd9Sstevel@tonic-gate int
__ham_put_page(dbp,pagep,is_dirty)1554*7c478bd9Sstevel@tonic-gate __ham_put_page(dbp, pagep, is_dirty)
1555*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1556*7c478bd9Sstevel@tonic-gate 	PAGE *pagep;
1557*7c478bd9Sstevel@tonic-gate 	int32_t is_dirty;
1558*7c478bd9Sstevel@tonic-gate {
1559*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_SLOW
1560*7c478bd9Sstevel@tonic-gate 	__account_page(dbp, ((BKT *)((char *)pagep - sizeof(BKT)))->pgno, -1);
1561*7c478bd9Sstevel@tonic-gate #endif
1562*7c478bd9Sstevel@tonic-gate 	return (memp_fput(dbp->mpf, pagep, (is_dirty ? DB_MPOOL_DIRTY : 0)));
1563*7c478bd9Sstevel@tonic-gate }
1564*7c478bd9Sstevel@tonic-gate 
1565*7c478bd9Sstevel@tonic-gate /*
1566*7c478bd9Sstevel@tonic-gate  * __ham_dirty_page --
1567*7c478bd9Sstevel@tonic-gate  *	Mark a page dirty.
1568*7c478bd9Sstevel@tonic-gate  *
1569*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_dirty_page __P((DB *, PAGE *));
1570*7c478bd9Sstevel@tonic-gate  */
1571*7c478bd9Sstevel@tonic-gate int
__ham_dirty_page(dbp,pagep)1572*7c478bd9Sstevel@tonic-gate __ham_dirty_page(dbp, pagep)
1573*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1574*7c478bd9Sstevel@tonic-gate 	PAGE *pagep;
1575*7c478bd9Sstevel@tonic-gate {
1576*7c478bd9Sstevel@tonic-gate 	return (memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY));
1577*7c478bd9Sstevel@tonic-gate }
1578*7c478bd9Sstevel@tonic-gate 
1579*7c478bd9Sstevel@tonic-gate /*
1580*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
1581*7c478bd9Sstevel@tonic-gate  */
1582*7c478bd9Sstevel@tonic-gate int
__ham_get_page(dbp,addr,pagep)1583*7c478bd9Sstevel@tonic-gate __ham_get_page(dbp, addr, pagep)
1584*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1585*7c478bd9Sstevel@tonic-gate 	db_pgno_t addr;
1586*7c478bd9Sstevel@tonic-gate 	PAGE **pagep;
1587*7c478bd9Sstevel@tonic-gate {
1588*7c478bd9Sstevel@tonic-gate 	int ret;
1589*7c478bd9Sstevel@tonic-gate 
1590*7c478bd9Sstevel@tonic-gate 	ret = memp_fget(dbp->mpf, &addr, DB_MPOOL_CREATE, pagep);
1591*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_SLOW
1592*7c478bd9Sstevel@tonic-gate 	if (*pagep != NULL)
1593*7c478bd9Sstevel@tonic-gate 		__account_page(dbp, addr, 1);
1594*7c478bd9Sstevel@tonic-gate #endif
1595*7c478bd9Sstevel@tonic-gate 	return (ret);
1596*7c478bd9Sstevel@tonic-gate }
1597*7c478bd9Sstevel@tonic-gate 
1598*7c478bd9Sstevel@tonic-gate /*
1599*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_overflow_page
1600*7c478bd9Sstevel@tonic-gate  * PUBLIC:     __P((DBC *, u_int32_t, PAGE **));
1601*7c478bd9Sstevel@tonic-gate  */
1602*7c478bd9Sstevel@tonic-gate int
__ham_overflow_page(dbc,type,pp)1603*7c478bd9Sstevel@tonic-gate __ham_overflow_page(dbc, type, pp)
1604*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1605*7c478bd9Sstevel@tonic-gate 	u_int32_t type;
1606*7c478bd9Sstevel@tonic-gate 	PAGE **pp;
1607*7c478bd9Sstevel@tonic-gate {
1608*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1609*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
1610*7c478bd9Sstevel@tonic-gate 	DB_LSN *lsnp, new_lsn;
1611*7c478bd9Sstevel@tonic-gate 	PAGE *p;
1612*7c478bd9Sstevel@tonic-gate 	db_pgno_t new_addr, next_free, newalloc_flag;
1613*7c478bd9Sstevel@tonic-gate 	u_int32_t offset, splitnum;
1614*7c478bd9Sstevel@tonic-gate 	int ret;
1615*7c478bd9Sstevel@tonic-gate 
1616*7c478bd9Sstevel@tonic-gate 	ret = 0;
1617*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1618*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
1619*7c478bd9Sstevel@tonic-gate 	DIRTY_META(dbp, hcp, ret);
1620*7c478bd9Sstevel@tonic-gate 	if (ret != 0)
1621*7c478bd9Sstevel@tonic-gate 		return (ret);
1622*7c478bd9Sstevel@tonic-gate 
1623*7c478bd9Sstevel@tonic-gate 	/*
1624*7c478bd9Sstevel@tonic-gate 	 * This routine is split up into two parts.  First we have
1625*7c478bd9Sstevel@tonic-gate 	 * to figure out the address of the new page that we are
1626*7c478bd9Sstevel@tonic-gate 	 * allocating.  Then we have to log the allocation.  Only
1627*7c478bd9Sstevel@tonic-gate 	 * after the log do we get to complete allocation of the
1628*7c478bd9Sstevel@tonic-gate 	 * new page.
1629*7c478bd9Sstevel@tonic-gate 	 */
1630*7c478bd9Sstevel@tonic-gate 	new_addr = hcp->hdr->last_freed;
1631*7c478bd9Sstevel@tonic-gate 	if (new_addr != PGNO_INVALID) {
1632*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_get_page(dbp, new_addr, &p)) != 0)
1633*7c478bd9Sstevel@tonic-gate 			return (ret);
1634*7c478bd9Sstevel@tonic-gate 		next_free = NEXT_PGNO(p);
1635*7c478bd9Sstevel@tonic-gate 		lsnp = &LSN(p);
1636*7c478bd9Sstevel@tonic-gate 		newalloc_flag = 0;
1637*7c478bd9Sstevel@tonic-gate 	} else {
1638*7c478bd9Sstevel@tonic-gate 		splitnum = hcp->hdr->ovfl_point;
1639*7c478bd9Sstevel@tonic-gate 		hcp->hdr->spares[splitnum]++;
1640*7c478bd9Sstevel@tonic-gate 		offset = hcp->hdr->spares[splitnum] -
1641*7c478bd9Sstevel@tonic-gate 		    (splitnum ? hcp->hdr->spares[splitnum - 1] : 0);
1642*7c478bd9Sstevel@tonic-gate 		new_addr = PGNO_OF(hcp, hcp->hdr->ovfl_point, offset);
1643*7c478bd9Sstevel@tonic-gate 		if (new_addr > MAX_PAGES(hcp)) {
1644*7c478bd9Sstevel@tonic-gate 			__db_err(dbp->dbenv, "hash: out of file pages");
1645*7c478bd9Sstevel@tonic-gate 			hcp->hdr->spares[splitnum]--;
1646*7c478bd9Sstevel@tonic-gate 			return (ENOMEM);
1647*7c478bd9Sstevel@tonic-gate 		}
1648*7c478bd9Sstevel@tonic-gate 		next_free = PGNO_INVALID;
1649*7c478bd9Sstevel@tonic-gate 		p = NULL;
1650*7c478bd9Sstevel@tonic-gate 		lsnp = NULL;
1651*7c478bd9Sstevel@tonic-gate 		newalloc_flag = 1;
1652*7c478bd9Sstevel@tonic-gate 	}
1653*7c478bd9Sstevel@tonic-gate 
1654*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc)) {
1655*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_newpgno_log(dbp->dbenv->lg_info,
1656*7c478bd9Sstevel@tonic-gate 		    dbc->txn, &new_lsn, 0, ALLOCPGNO,
1657*7c478bd9Sstevel@tonic-gate 		    dbp->log_fileid, new_addr, next_free,
1658*7c478bd9Sstevel@tonic-gate 		    0, newalloc_flag, type, lsnp, &hcp->hdr->lsn)) != 0)
1659*7c478bd9Sstevel@tonic-gate 			return (ret);
1660*7c478bd9Sstevel@tonic-gate 
1661*7c478bd9Sstevel@tonic-gate 		hcp->hdr->lsn = new_lsn;
1662*7c478bd9Sstevel@tonic-gate 		if (lsnp != NULL)
1663*7c478bd9Sstevel@tonic-gate 			*lsnp = new_lsn;
1664*7c478bd9Sstevel@tonic-gate 	}
1665*7c478bd9Sstevel@tonic-gate 
1666*7c478bd9Sstevel@tonic-gate 	if (p != NULL) {
1667*7c478bd9Sstevel@tonic-gate 		/* We just took something off the free list, initialize it. */
1668*7c478bd9Sstevel@tonic-gate 		hcp->hdr->last_freed = next_free;
1669*7c478bd9Sstevel@tonic-gate 		P_INIT(p, hcp->hdr->pagesize, PGNO(p), PGNO_INVALID,
1670*7c478bd9Sstevel@tonic-gate 		    PGNO_INVALID, 0, (u_int8_t)type);
1671*7c478bd9Sstevel@tonic-gate 	} else {
1672*7c478bd9Sstevel@tonic-gate 		/* Get the new page. */
1673*7c478bd9Sstevel@tonic-gate 		if ((ret = __ham_new_page(dbp, new_addr, type, &p)) != 0)
1674*7c478bd9Sstevel@tonic-gate 			return (ret);
1675*7c478bd9Sstevel@tonic-gate 	}
1676*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc))
1677*7c478bd9Sstevel@tonic-gate 		LSN(p) = new_lsn;
1678*7c478bd9Sstevel@tonic-gate 
1679*7c478bd9Sstevel@tonic-gate 	*pp = p;
1680*7c478bd9Sstevel@tonic-gate 	return (0);
1681*7c478bd9Sstevel@tonic-gate }
1682*7c478bd9Sstevel@tonic-gate 
1683*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
1684*7c478bd9Sstevel@tonic-gate /*
1685*7c478bd9Sstevel@tonic-gate  * PUBLIC: #ifdef DEBUG
1686*7c478bd9Sstevel@tonic-gate  * PUBLIC: db_pgno_t __bucket_to_page __P((HASH_CURSOR *, db_pgno_t));
1687*7c478bd9Sstevel@tonic-gate  * PUBLIC: #endif
1688*7c478bd9Sstevel@tonic-gate  */
1689*7c478bd9Sstevel@tonic-gate db_pgno_t
__bucket_to_page(hcp,n)1690*7c478bd9Sstevel@tonic-gate __bucket_to_page(hcp, n)
1691*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
1692*7c478bd9Sstevel@tonic-gate 	db_pgno_t n;
1693*7c478bd9Sstevel@tonic-gate {
1694*7c478bd9Sstevel@tonic-gate 	int ret_val;
1695*7c478bd9Sstevel@tonic-gate 
1696*7c478bd9Sstevel@tonic-gate 	ret_val = n + 1;
1697*7c478bd9Sstevel@tonic-gate 	if (n != 0)
1698*7c478bd9Sstevel@tonic-gate 		ret_val += hcp->hdr->spares[__db_log2(n + 1) - 1];
1699*7c478bd9Sstevel@tonic-gate 	return (ret_val);
1700*7c478bd9Sstevel@tonic-gate }
1701*7c478bd9Sstevel@tonic-gate #endif
1702*7c478bd9Sstevel@tonic-gate 
1703*7c478bd9Sstevel@tonic-gate /*
1704*7c478bd9Sstevel@tonic-gate  * Create a bunch of overflow pages at the current split point.
1705*7c478bd9Sstevel@tonic-gate  * PUBLIC: void __ham_init_ovflpages __P((DBC *));
1706*7c478bd9Sstevel@tonic-gate  */
1707*7c478bd9Sstevel@tonic-gate void
__ham_init_ovflpages(dbc)1708*7c478bd9Sstevel@tonic-gate __ham_init_ovflpages(dbc)
1709*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1710*7c478bd9Sstevel@tonic-gate {
1711*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1712*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
1713*7c478bd9Sstevel@tonic-gate 	DB_LSN new_lsn;
1714*7c478bd9Sstevel@tonic-gate 	PAGE *p;
1715*7c478bd9Sstevel@tonic-gate 	db_pgno_t last_pgno, new_pgno;
1716*7c478bd9Sstevel@tonic-gate 	u_int32_t i, curpages, numpages;
1717*7c478bd9Sstevel@tonic-gate 
1718*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1719*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
1720*7c478bd9Sstevel@tonic-gate 
1721*7c478bd9Sstevel@tonic-gate 	curpages = hcp->hdr->spares[hcp->hdr->ovfl_point] -
1722*7c478bd9Sstevel@tonic-gate 	    hcp->hdr->spares[hcp->hdr->ovfl_point - 1];
1723*7c478bd9Sstevel@tonic-gate 	numpages = hcp->hdr->ovfl_point + 1 - curpages;
1724*7c478bd9Sstevel@tonic-gate 
1725*7c478bd9Sstevel@tonic-gate 	last_pgno = hcp->hdr->last_freed;
1726*7c478bd9Sstevel@tonic-gate 	new_pgno = PGNO_OF(hcp, hcp->hdr->ovfl_point, curpages + 1);
1727*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc)) {
1728*7c478bd9Sstevel@tonic-gate 		(void)__ham_ovfl_log(dbp->dbenv->lg_info,
1729*7c478bd9Sstevel@tonic-gate 		    dbc->txn, &new_lsn, 0, dbp->log_fileid, new_pgno,
1730*7c478bd9Sstevel@tonic-gate 		    numpages, last_pgno, hcp->hdr->ovfl_point, &hcp->hdr->lsn);
1731*7c478bd9Sstevel@tonic-gate 		hcp->hdr->lsn = new_lsn;
1732*7c478bd9Sstevel@tonic-gate 	} else
1733*7c478bd9Sstevel@tonic-gate 		ZERO_LSN(new_lsn);
1734*7c478bd9Sstevel@tonic-gate 
1735*7c478bd9Sstevel@tonic-gate 	hcp->hdr->spares[hcp->hdr->ovfl_point] += numpages;
1736*7c478bd9Sstevel@tonic-gate 	for (i = numpages; i > 0; i--) {
1737*7c478bd9Sstevel@tonic-gate 		if (__ham_new_page(dbp,
1738*7c478bd9Sstevel@tonic-gate 		    PGNO_OF(hcp, hcp->hdr->ovfl_point, curpages + i),
1739*7c478bd9Sstevel@tonic-gate 		    P_INVALID, &p) != 0)
1740*7c478bd9Sstevel@tonic-gate 			break;
1741*7c478bd9Sstevel@tonic-gate 		LSN(p) = new_lsn;
1742*7c478bd9Sstevel@tonic-gate 		NEXT_PGNO(p) = last_pgno;
1743*7c478bd9Sstevel@tonic-gate 		last_pgno = PGNO(p);
1744*7c478bd9Sstevel@tonic-gate 		(void)__ham_put_page(dbp, p, 1);
1745*7c478bd9Sstevel@tonic-gate 	}
1746*7c478bd9Sstevel@tonic-gate 	hcp->hdr->last_freed = last_pgno;
1747*7c478bd9Sstevel@tonic-gate }
1748*7c478bd9Sstevel@tonic-gate 
1749*7c478bd9Sstevel@tonic-gate /*
1750*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_get_cpage __P((DBC *, db_lockmode_t));
1751*7c478bd9Sstevel@tonic-gate  */
1752*7c478bd9Sstevel@tonic-gate int
__ham_get_cpage(dbc,mode)1753*7c478bd9Sstevel@tonic-gate __ham_get_cpage(dbc, mode)
1754*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1755*7c478bd9Sstevel@tonic-gate 	db_lockmode_t mode;
1756*7c478bd9Sstevel@tonic-gate {
1757*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1758*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
1759*7c478bd9Sstevel@tonic-gate 	int ret;
1760*7c478bd9Sstevel@tonic-gate 
1761*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1762*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
1763*7c478bd9Sstevel@tonic-gate 
1764*7c478bd9Sstevel@tonic-gate 	/*
1765*7c478bd9Sstevel@tonic-gate 	 * There are three cases with respect to buckets and locks.  If there
1766*7c478bd9Sstevel@tonic-gate 	 * is no lock held, then if we are locking, we should get the lock.
1767*7c478bd9Sstevel@tonic-gate 	 * If there is a lock held and it's for the current bucket, we don't
1768*7c478bd9Sstevel@tonic-gate 	 * need to do anything.  If there is a lock, but it's for a different
1769*7c478bd9Sstevel@tonic-gate 	 * bucket, then we need to release and get.
1770*7c478bd9Sstevel@tonic-gate 	 */
1771*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_AM_LOCKING)) {
1772*7c478bd9Sstevel@tonic-gate 		if (hcp->lock != 0 && hcp->lbucket != hcp->bucket) {
1773*7c478bd9Sstevel@tonic-gate 			/*
1774*7c478bd9Sstevel@tonic-gate 			 * If this is the original lock, don't release it,
1775*7c478bd9Sstevel@tonic-gate 			 * because we may need to restore it upon exit.
1776*7c478bd9Sstevel@tonic-gate 			 */
1777*7c478bd9Sstevel@tonic-gate 			if (dbc->txn == NULL &&
1778*7c478bd9Sstevel@tonic-gate 			    !F_ISSET(hcp, H_ORIGINAL) && (ret =
1779*7c478bd9Sstevel@tonic-gate 			    lock_put(dbp->dbenv->lk_info, hcp->lock)) != 0)
1780*7c478bd9Sstevel@tonic-gate 				return (ret);
1781*7c478bd9Sstevel@tonic-gate 			F_CLR(hcp, H_ORIGINAL);
1782*7c478bd9Sstevel@tonic-gate 			hcp->lock = 0;
1783*7c478bd9Sstevel@tonic-gate 		}
1784*7c478bd9Sstevel@tonic-gate 		if (hcp->lock == 0 && (ret = __ham_lock_bucket(dbc, mode)) != 0)
1785*7c478bd9Sstevel@tonic-gate 			return (ret);
1786*7c478bd9Sstevel@tonic-gate 		hcp->lbucket = hcp->bucket;
1787*7c478bd9Sstevel@tonic-gate 	}
1788*7c478bd9Sstevel@tonic-gate 
1789*7c478bd9Sstevel@tonic-gate 	if (hcp->pagep == NULL) {
1790*7c478bd9Sstevel@tonic-gate 		if (hcp->pgno == PGNO_INVALID) {
1791*7c478bd9Sstevel@tonic-gate 			hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
1792*7c478bd9Sstevel@tonic-gate 			hcp->bndx = 0;
1793*7c478bd9Sstevel@tonic-gate 		}
1794*7c478bd9Sstevel@tonic-gate 
1795*7c478bd9Sstevel@tonic-gate 		if ((ret =
1796*7c478bd9Sstevel@tonic-gate 		    __ham_get_page(dbp, hcp->pgno, &hcp->pagep)) != 0)
1797*7c478bd9Sstevel@tonic-gate 			return (ret);
1798*7c478bd9Sstevel@tonic-gate 	}
1799*7c478bd9Sstevel@tonic-gate 
1800*7c478bd9Sstevel@tonic-gate 	if (hcp->dpgno != PGNO_INVALID && hcp->dpagep == NULL)
1801*7c478bd9Sstevel@tonic-gate 		if ((ret =
1802*7c478bd9Sstevel@tonic-gate 		    __ham_get_page(dbp, hcp->dpgno, &hcp->dpagep)) != 0)
1803*7c478bd9Sstevel@tonic-gate 			return (ret);
1804*7c478bd9Sstevel@tonic-gate 	return (0);
1805*7c478bd9Sstevel@tonic-gate }
1806*7c478bd9Sstevel@tonic-gate 
1807*7c478bd9Sstevel@tonic-gate /*
1808*7c478bd9Sstevel@tonic-gate  * Get a new page at the cursor, putting the last page if necessary.
1809*7c478bd9Sstevel@tonic-gate  * If the flag is set to H_ISDUP, then we are talking about the
1810*7c478bd9Sstevel@tonic-gate  * duplicate page, not the main page.
1811*7c478bd9Sstevel@tonic-gate  *
1812*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __ham_next_cpage __P((DBC *, db_pgno_t, int, u_int32_t));
1813*7c478bd9Sstevel@tonic-gate  */
1814*7c478bd9Sstevel@tonic-gate int
__ham_next_cpage(dbc,pgno,dirty,flags)1815*7c478bd9Sstevel@tonic-gate __ham_next_cpage(dbc, pgno, dirty, flags)
1816*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1817*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
1818*7c478bd9Sstevel@tonic-gate 	int dirty;
1819*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
1820*7c478bd9Sstevel@tonic-gate {
1821*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1822*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
1823*7c478bd9Sstevel@tonic-gate 	PAGE *p;
1824*7c478bd9Sstevel@tonic-gate 	int ret;
1825*7c478bd9Sstevel@tonic-gate 
1826*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1827*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
1828*7c478bd9Sstevel@tonic-gate 	if (LF_ISSET(H_ISDUP) && hcp->dpagep != NULL &&
1829*7c478bd9Sstevel@tonic-gate 	    (ret = __ham_put_page(dbp, hcp->dpagep, dirty)) != 0)
1830*7c478bd9Sstevel@tonic-gate 		return (ret);
1831*7c478bd9Sstevel@tonic-gate 	else if (!LF_ISSET(H_ISDUP) && hcp->pagep != NULL &&
1832*7c478bd9Sstevel@tonic-gate 	    (ret = __ham_put_page(dbp, hcp->pagep, dirty)) != 0)
1833*7c478bd9Sstevel@tonic-gate 		return (ret);
1834*7c478bd9Sstevel@tonic-gate 
1835*7c478bd9Sstevel@tonic-gate 	if ((ret = __ham_get_page(dbp, pgno, &p)) != 0)
1836*7c478bd9Sstevel@tonic-gate 		return (ret);
1837*7c478bd9Sstevel@tonic-gate 
1838*7c478bd9Sstevel@tonic-gate 	if (LF_ISSET(H_ISDUP)) {
1839*7c478bd9Sstevel@tonic-gate 		hcp->dpagep = p;
1840*7c478bd9Sstevel@tonic-gate 		hcp->dpgno = pgno;
1841*7c478bd9Sstevel@tonic-gate 		hcp->dndx = 0;
1842*7c478bd9Sstevel@tonic-gate 	} else {
1843*7c478bd9Sstevel@tonic-gate 		hcp->pagep = p;
1844*7c478bd9Sstevel@tonic-gate 		hcp->pgno = pgno;
1845*7c478bd9Sstevel@tonic-gate 		hcp->bndx = 0;
1846*7c478bd9Sstevel@tonic-gate 	}
1847*7c478bd9Sstevel@tonic-gate 
1848*7c478bd9Sstevel@tonic-gate 	return (0);
1849*7c478bd9Sstevel@tonic-gate }
1850*7c478bd9Sstevel@tonic-gate 
1851*7c478bd9Sstevel@tonic-gate /*
1852*7c478bd9Sstevel@tonic-gate  * __ham_lock_bucket --
1853*7c478bd9Sstevel@tonic-gate  *	Get the lock on a particular bucket.
1854*7c478bd9Sstevel@tonic-gate  */
1855*7c478bd9Sstevel@tonic-gate static int
__ham_lock_bucket(dbc,mode)1856*7c478bd9Sstevel@tonic-gate __ham_lock_bucket(dbc, mode)
1857*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1858*7c478bd9Sstevel@tonic-gate 	db_lockmode_t mode;
1859*7c478bd9Sstevel@tonic-gate {
1860*7c478bd9Sstevel@tonic-gate 	HASH_CURSOR *hcp;
1861*7c478bd9Sstevel@tonic-gate 	int ret;
1862*7c478bd9Sstevel@tonic-gate 
1863*7c478bd9Sstevel@tonic-gate 	hcp = (HASH_CURSOR *)dbc->internal;
1864*7c478bd9Sstevel@tonic-gate 	dbc->lock.pgno = (db_pgno_t)(hcp->bucket);
1865*7c478bd9Sstevel@tonic-gate 	if (dbc->txn == NULL)
1866*7c478bd9Sstevel@tonic-gate 		ret = lock_get(dbc->dbp->dbenv->lk_info, dbc->locker, 0,
1867*7c478bd9Sstevel@tonic-gate 		    &dbc->lock_dbt, mode, &hcp->lock);
1868*7c478bd9Sstevel@tonic-gate 	else
1869*7c478bd9Sstevel@tonic-gate 		ret = lock_tget(dbc->dbp->dbenv->lk_info, dbc->txn, 0,
1870*7c478bd9Sstevel@tonic-gate 		    &dbc->lock_dbt, mode, &hcp->lock);
1871*7c478bd9Sstevel@tonic-gate 
1872*7c478bd9Sstevel@tonic-gate 	return (ret < 0 ? EAGAIN : ret);
1873*7c478bd9Sstevel@tonic-gate }
1874*7c478bd9Sstevel@tonic-gate 
1875*7c478bd9Sstevel@tonic-gate /*
1876*7c478bd9Sstevel@tonic-gate  * __ham_dpair --
1877*7c478bd9Sstevel@tonic-gate  *	Delete a pair on a page, paying no attention to what the pair
1878*7c478bd9Sstevel@tonic-gate  *	represents.  The caller is responsible for freeing up duplicates
1879*7c478bd9Sstevel@tonic-gate  *	or offpage entries that might be referenced by this pair.
1880*7c478bd9Sstevel@tonic-gate  *
1881*7c478bd9Sstevel@tonic-gate  * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
1882*7c478bd9Sstevel@tonic-gate  */
1883*7c478bd9Sstevel@tonic-gate void
__ham_dpair(dbp,p,pndx)1884*7c478bd9Sstevel@tonic-gate __ham_dpair(dbp, p, pndx)
1885*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1886*7c478bd9Sstevel@tonic-gate 	PAGE *p;
1887*7c478bd9Sstevel@tonic-gate 	u_int32_t pndx;
1888*7c478bd9Sstevel@tonic-gate {
1889*7c478bd9Sstevel@tonic-gate 	db_indx_t delta, n;
1890*7c478bd9Sstevel@tonic-gate 	u_int8_t *dest, *src;
1891*7c478bd9Sstevel@tonic-gate 
1892*7c478bd9Sstevel@tonic-gate 	/*
1893*7c478bd9Sstevel@tonic-gate 	 * Compute "delta", the amount we have to shift all of the
1894*7c478bd9Sstevel@tonic-gate 	 * offsets.  To find the delta, we just need to calculate
1895*7c478bd9Sstevel@tonic-gate 	 * the size of the pair of elements we are removing.
1896*7c478bd9Sstevel@tonic-gate 	 */
1897*7c478bd9Sstevel@tonic-gate 	delta = H_PAIRSIZE(p, dbp->pgsize, pndx);
1898*7c478bd9Sstevel@tonic-gate 
1899*7c478bd9Sstevel@tonic-gate 	/*
1900*7c478bd9Sstevel@tonic-gate 	 * The hard case: we want to remove something other than
1901*7c478bd9Sstevel@tonic-gate 	 * the last item on the page.  We need to shift data and
1902*7c478bd9Sstevel@tonic-gate 	 * offsets down.
1903*7c478bd9Sstevel@tonic-gate 	 */
1904*7c478bd9Sstevel@tonic-gate 	if ((db_indx_t)pndx != H_NUMPAIRS(p) - 1) {
1905*7c478bd9Sstevel@tonic-gate 		/*
1906*7c478bd9Sstevel@tonic-gate 		 * Move the data: src is the first occupied byte on
1907*7c478bd9Sstevel@tonic-gate 		 * the page. (Length is delta.)
1908*7c478bd9Sstevel@tonic-gate 		 */
1909*7c478bd9Sstevel@tonic-gate 		src = (u_int8_t *)p + HOFFSET(p);
1910*7c478bd9Sstevel@tonic-gate 
1911*7c478bd9Sstevel@tonic-gate 		/*
1912*7c478bd9Sstevel@tonic-gate 		 * Destination is delta bytes beyond src.  This might
1913*7c478bd9Sstevel@tonic-gate 		 * be an overlapping copy, so we have to use memmove.
1914*7c478bd9Sstevel@tonic-gate 		 */
1915*7c478bd9Sstevel@tonic-gate 		dest = src + delta;
1916*7c478bd9Sstevel@tonic-gate 		memmove(dest, src, p->inp[H_DATAINDEX(pndx)] - HOFFSET(p));
1917*7c478bd9Sstevel@tonic-gate 	}
1918*7c478bd9Sstevel@tonic-gate 
1919*7c478bd9Sstevel@tonic-gate 	/* Adjust the offsets. */
1920*7c478bd9Sstevel@tonic-gate 	for (n = (db_indx_t)pndx; n < (db_indx_t)(H_NUMPAIRS(p) - 1); n++) {
1921*7c478bd9Sstevel@tonic-gate 		p->inp[H_KEYINDEX(n)] = p->inp[H_KEYINDEX(n+1)] + delta;
1922*7c478bd9Sstevel@tonic-gate 		p->inp[H_DATAINDEX(n)] = p->inp[H_DATAINDEX(n+1)] + delta;
1923*7c478bd9Sstevel@tonic-gate 	}
1924*7c478bd9Sstevel@tonic-gate 
1925*7c478bd9Sstevel@tonic-gate 	/* Adjust page metadata. */
1926*7c478bd9Sstevel@tonic-gate 	HOFFSET(p) = HOFFSET(p) + delta;
1927*7c478bd9Sstevel@tonic-gate 	NUM_ENT(p) = NUM_ENT(p) - 2;
1928*7c478bd9Sstevel@tonic-gate }
1929*7c478bd9Sstevel@tonic-gate 
1930