xref: /illumos-gate/usr/src/cmd/sendmail/db/btree/bt_cursor.c (revision 7c478bd95313f5f23a4c958a745db2134aa0324)
1*7c478bd9Sstevel@tonic-gate /*-
2*7c478bd9Sstevel@tonic-gate  * See the file LICENSE for redistribution information.
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1996, 1997, 1998
5*7c478bd9Sstevel@tonic-gate  *	Sleepycat Software.  All rights reserved.
6*7c478bd9Sstevel@tonic-gate  */
7*7c478bd9Sstevel@tonic-gate 
8*7c478bd9Sstevel@tonic-gate #include "config.h"
9*7c478bd9Sstevel@tonic-gate 
10*7c478bd9Sstevel@tonic-gate #ifndef lint
11*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)bt_cursor.c	10.81 (Sleepycat) 12/16/98";
12*7c478bd9Sstevel@tonic-gate #endif /* not lint */
13*7c478bd9Sstevel@tonic-gate 
14*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
15*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
16*7c478bd9Sstevel@tonic-gate 
17*7c478bd9Sstevel@tonic-gate #include <errno.h>
18*7c478bd9Sstevel@tonic-gate #include <stdlib.h>
19*7c478bd9Sstevel@tonic-gate #include <string.h>
20*7c478bd9Sstevel@tonic-gate #endif
21*7c478bd9Sstevel@tonic-gate 
22*7c478bd9Sstevel@tonic-gate #include "db_int.h"
23*7c478bd9Sstevel@tonic-gate #include "db_page.h"
24*7c478bd9Sstevel@tonic-gate #include "btree.h"
25*7c478bd9Sstevel@tonic-gate #include "shqueue.h"
26*7c478bd9Sstevel@tonic-gate #include "db_shash.h"
27*7c478bd9Sstevel@tonic-gate #include "lock.h"
28*7c478bd9Sstevel@tonic-gate #include "lock_ext.h"
29*7c478bd9Sstevel@tonic-gate 
30*7c478bd9Sstevel@tonic-gate static int __bam_c_close __P((DBC *));
31*7c478bd9Sstevel@tonic-gate static int __bam_c_del __P((DBC *, u_int32_t));
32*7c478bd9Sstevel@tonic-gate static int __bam_c_destroy __P((DBC *));
33*7c478bd9Sstevel@tonic-gate static int __bam_c_first __P((DBC *, CURSOR *));
34*7c478bd9Sstevel@tonic-gate static int __bam_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
35*7c478bd9Sstevel@tonic-gate static int __bam_c_getstack __P((DBC *, CURSOR *));
36*7c478bd9Sstevel@tonic-gate static int __bam_c_last __P((DBC *, CURSOR *));
37*7c478bd9Sstevel@tonic-gate static int __bam_c_next __P((DBC *, CURSOR *, int));
38*7c478bd9Sstevel@tonic-gate static int __bam_c_physdel __P((DBC *, CURSOR *, PAGE *));
39*7c478bd9Sstevel@tonic-gate static int __bam_c_prev __P((DBC *, CURSOR *));
40*7c478bd9Sstevel@tonic-gate static int __bam_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
41*7c478bd9Sstevel@tonic-gate static void __bam_c_reset __P((CURSOR *));
42*7c478bd9Sstevel@tonic-gate static int __bam_c_rget __P((DBC *, DBT *, u_int32_t));
43*7c478bd9Sstevel@tonic-gate static int __bam_c_search __P((DBC *, CURSOR *, const DBT *, u_int32_t, int *));
44*7c478bd9Sstevel@tonic-gate static int __bam_dsearch __P((DBC *, CURSOR *,  DBT *, u_int32_t *));
45*7c478bd9Sstevel@tonic-gate 
46*7c478bd9Sstevel@tonic-gate /* Discard the current page/lock held by a cursor. */
47*7c478bd9Sstevel@tonic-gate #undef	DISCARD
48*7c478bd9Sstevel@tonic-gate #define	DISCARD(dbc, cp) {						\
49*7c478bd9Sstevel@tonic-gate 	if ((cp)->page != NULL) {					\
50*7c478bd9Sstevel@tonic-gate 		(void)memp_fput((dbc)->dbp->mpf, (cp)->page, 0);	\
51*7c478bd9Sstevel@tonic-gate 		(cp)->page = NULL;					\
52*7c478bd9Sstevel@tonic-gate 	}								\
53*7c478bd9Sstevel@tonic-gate 	if ((cp)->lock != LOCK_INVALID) {				\
54*7c478bd9Sstevel@tonic-gate 		(void)__BT_TLPUT((dbc), (cp)->lock);			\
55*7c478bd9Sstevel@tonic-gate 		(cp)->lock = LOCK_INVALID;				\
56*7c478bd9Sstevel@tonic-gate 	}								\
57*7c478bd9Sstevel@tonic-gate }
58*7c478bd9Sstevel@tonic-gate 
59*7c478bd9Sstevel@tonic-gate /* If the cursor references a deleted record. */
60*7c478bd9Sstevel@tonic-gate #undef	IS_CUR_DELETED
61*7c478bd9Sstevel@tonic-gate #define	IS_CUR_DELETED(cp)						\
62*7c478bd9Sstevel@tonic-gate 	(((cp)->dpgno == PGNO_INVALID &&				\
63*7c478bd9Sstevel@tonic-gate 	B_DISSET(GET_BKEYDATA((cp)->page,				\
64*7c478bd9Sstevel@tonic-gate 	(cp)->indx + O_INDX)->type)) ||					\
65*7c478bd9Sstevel@tonic-gate 	((cp)->dpgno != PGNO_INVALID &&					\
66*7c478bd9Sstevel@tonic-gate 	B_DISSET(GET_BKEYDATA((cp)->page, (cp)->dindx)->type)))
67*7c478bd9Sstevel@tonic-gate 
68*7c478bd9Sstevel@tonic-gate /* If the cursor and index combination references a deleted record. */
69*7c478bd9Sstevel@tonic-gate #undef	IS_DELETED
70*7c478bd9Sstevel@tonic-gate #define	IS_DELETED(cp, indx)						\
71*7c478bd9Sstevel@tonic-gate 	(((cp)->dpgno == PGNO_INVALID &&				\
72*7c478bd9Sstevel@tonic-gate 	B_DISSET(GET_BKEYDATA((cp)->page, (indx) + O_INDX)->type)) ||	\
73*7c478bd9Sstevel@tonic-gate 	((cp)->dpgno != PGNO_INVALID &&					\
74*7c478bd9Sstevel@tonic-gate 	B_DISSET(GET_BKEYDATA((cp)->page, (indx))->type)))
75*7c478bd9Sstevel@tonic-gate 
76*7c478bd9Sstevel@tonic-gate /*
77*7c478bd9Sstevel@tonic-gate  * Test to see if two cursors could point to duplicates of the same key,
78*7c478bd9Sstevel@tonic-gate  * whether on-page or off-page.  The leaf page numbers must be the same
79*7c478bd9Sstevel@tonic-gate  * in both cases.  In the case of off-page duplicates, the key indices
80*7c478bd9Sstevel@tonic-gate  * on the leaf page will be the same.  In the case of on-page duplicates,
81*7c478bd9Sstevel@tonic-gate  * the duplicate page number must not be set, and the key index offsets
82*7c478bd9Sstevel@tonic-gate  * must be the same.  For the last test, as the saved copy of the cursor
83*7c478bd9Sstevel@tonic-gate  * will not have a valid page pointer, we use the cursor's.
84*7c478bd9Sstevel@tonic-gate  */
85*7c478bd9Sstevel@tonic-gate #undef	POSSIBLE_DUPLICATE
86*7c478bd9Sstevel@tonic-gate #define	POSSIBLE_DUPLICATE(cursor, saved_copy)				\
87*7c478bd9Sstevel@tonic-gate 	((cursor)->pgno == (saved_copy).pgno &&				\
88*7c478bd9Sstevel@tonic-gate 	((cursor)->indx == (saved_copy).indx ||				\
89*7c478bd9Sstevel@tonic-gate 	((cursor)->dpgno == PGNO_INVALID &&				\
90*7c478bd9Sstevel@tonic-gate 	    (saved_copy).dpgno == PGNO_INVALID &&			\
91*7c478bd9Sstevel@tonic-gate 	    (cursor)->page->inp[(cursor)->indx] ==			\
92*7c478bd9Sstevel@tonic-gate 	    (cursor)->page->inp[(saved_copy).indx])))
93*7c478bd9Sstevel@tonic-gate 
94*7c478bd9Sstevel@tonic-gate /*
95*7c478bd9Sstevel@tonic-gate  * __bam_c_reset --
96*7c478bd9Sstevel@tonic-gate  *	Initialize internal cursor structure.
97*7c478bd9Sstevel@tonic-gate  */
98*7c478bd9Sstevel@tonic-gate static void
99*7c478bd9Sstevel@tonic-gate __bam_c_reset(cp)
100*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
101*7c478bd9Sstevel@tonic-gate {
102*7c478bd9Sstevel@tonic-gate 	cp->sp = cp->csp = cp->stack;
103*7c478bd9Sstevel@tonic-gate 	cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]);
104*7c478bd9Sstevel@tonic-gate 	cp->page = NULL;
105*7c478bd9Sstevel@tonic-gate 	cp->pgno = PGNO_INVALID;
106*7c478bd9Sstevel@tonic-gate 	cp->indx = 0;
107*7c478bd9Sstevel@tonic-gate 	cp->dpgno = PGNO_INVALID;
108*7c478bd9Sstevel@tonic-gate 	cp->dindx = 0;
109*7c478bd9Sstevel@tonic-gate 	cp->lock = LOCK_INVALID;
110*7c478bd9Sstevel@tonic-gate 	cp->mode = DB_LOCK_NG;
111*7c478bd9Sstevel@tonic-gate 	cp->recno = RECNO_OOB;
112*7c478bd9Sstevel@tonic-gate 	cp->flags = 0;
113*7c478bd9Sstevel@tonic-gate }
114*7c478bd9Sstevel@tonic-gate 
115*7c478bd9Sstevel@tonic-gate /*
116*7c478bd9Sstevel@tonic-gate  * __bam_c_init --
117*7c478bd9Sstevel@tonic-gate  *	Initialize the access private portion of a cursor
118*7c478bd9Sstevel@tonic-gate  *
119*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __bam_c_init __P((DBC *));
120*7c478bd9Sstevel@tonic-gate  */
121*7c478bd9Sstevel@tonic-gate int
122*7c478bd9Sstevel@tonic-gate __bam_c_init(dbc)
123*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
124*7c478bd9Sstevel@tonic-gate {
125*7c478bd9Sstevel@tonic-gate 	DB *dbp;
126*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
127*7c478bd9Sstevel@tonic-gate 	int ret;
128*7c478bd9Sstevel@tonic-gate 
129*7c478bd9Sstevel@tonic-gate 	if ((ret = __os_calloc(1, sizeof(CURSOR), &cp)) != 0)
130*7c478bd9Sstevel@tonic-gate 		return (ret);
131*7c478bd9Sstevel@tonic-gate 
132*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
133*7c478bd9Sstevel@tonic-gate 	cp->dbc = dbc;
134*7c478bd9Sstevel@tonic-gate 
135*7c478bd9Sstevel@tonic-gate 	/*
136*7c478bd9Sstevel@tonic-gate 	 * Logical record numbers are always the same size, and we don't want
137*7c478bd9Sstevel@tonic-gate 	 * to have to check for space every time we return one.  Allocate it
138*7c478bd9Sstevel@tonic-gate 	 * in advance.
139*7c478bd9Sstevel@tonic-gate 	 */
140*7c478bd9Sstevel@tonic-gate 	if (dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) {
141*7c478bd9Sstevel@tonic-gate 		if ((ret = __os_malloc(sizeof(db_recno_t),
142*7c478bd9Sstevel@tonic-gate 		    NULL, &dbc->rkey.data)) != 0) {
143*7c478bd9Sstevel@tonic-gate 			__os_free(cp, sizeof(CURSOR));
144*7c478bd9Sstevel@tonic-gate 			return (ret);
145*7c478bd9Sstevel@tonic-gate 		}
146*7c478bd9Sstevel@tonic-gate 		dbc->rkey.ulen = sizeof(db_recno_t);
147*7c478bd9Sstevel@tonic-gate 	}
148*7c478bd9Sstevel@tonic-gate 
149*7c478bd9Sstevel@tonic-gate 	/* Initialize methods. */
150*7c478bd9Sstevel@tonic-gate 	dbc->internal = cp;
151*7c478bd9Sstevel@tonic-gate 	if (dbp->type == DB_BTREE) {
152*7c478bd9Sstevel@tonic-gate 		dbc->c_am_close = __bam_c_close;
153*7c478bd9Sstevel@tonic-gate 		dbc->c_am_destroy = __bam_c_destroy;
154*7c478bd9Sstevel@tonic-gate 		dbc->c_del = __bam_c_del;
155*7c478bd9Sstevel@tonic-gate 		dbc->c_get = __bam_c_get;
156*7c478bd9Sstevel@tonic-gate 		dbc->c_put = __bam_c_put;
157*7c478bd9Sstevel@tonic-gate 	} else {
158*7c478bd9Sstevel@tonic-gate 		dbc->c_am_close = __bam_c_close;
159*7c478bd9Sstevel@tonic-gate 		dbc->c_am_destroy = __bam_c_destroy;
160*7c478bd9Sstevel@tonic-gate 		dbc->c_del = __ram_c_del;
161*7c478bd9Sstevel@tonic-gate 		dbc->c_get = __ram_c_get;
162*7c478bd9Sstevel@tonic-gate 		dbc->c_put = __ram_c_put;
163*7c478bd9Sstevel@tonic-gate 	}
164*7c478bd9Sstevel@tonic-gate 
165*7c478bd9Sstevel@tonic-gate 	/* Initialize dynamic information. */
166*7c478bd9Sstevel@tonic-gate 	__bam_c_reset(cp);
167*7c478bd9Sstevel@tonic-gate 
168*7c478bd9Sstevel@tonic-gate 	return (0);
169*7c478bd9Sstevel@tonic-gate }
170*7c478bd9Sstevel@tonic-gate 
171*7c478bd9Sstevel@tonic-gate /*
172*7c478bd9Sstevel@tonic-gate  * __bam_c_close --
173*7c478bd9Sstevel@tonic-gate  *	Close down the cursor from a single use.
174*7c478bd9Sstevel@tonic-gate  */
175*7c478bd9Sstevel@tonic-gate static int
176*7c478bd9Sstevel@tonic-gate __bam_c_close(dbc)
177*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
178*7c478bd9Sstevel@tonic-gate {
179*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
180*7c478bd9Sstevel@tonic-gate 	DB *dbp;
181*7c478bd9Sstevel@tonic-gate 	int ret;
182*7c478bd9Sstevel@tonic-gate 
183*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
184*7c478bd9Sstevel@tonic-gate 	cp = dbc->internal;
185*7c478bd9Sstevel@tonic-gate 	ret = 0;
186*7c478bd9Sstevel@tonic-gate 
187*7c478bd9Sstevel@tonic-gate 	/*
188*7c478bd9Sstevel@tonic-gate 	 * If a cursor deleted a btree key, perform the actual deletion.
189*7c478bd9Sstevel@tonic-gate 	 * (Recno keys are either deleted immediately or never deleted.)
190*7c478bd9Sstevel@tonic-gate 	 */
191*7c478bd9Sstevel@tonic-gate 	if (dbp->type == DB_BTREE && F_ISSET(cp, C_DELETED))
192*7c478bd9Sstevel@tonic-gate 		ret = __bam_c_physdel(dbc, cp, NULL);
193*7c478bd9Sstevel@tonic-gate 
194*7c478bd9Sstevel@tonic-gate 	/* Discard any locks not acquired inside of a transaction. */
195*7c478bd9Sstevel@tonic-gate 	if (cp->lock != LOCK_INVALID) {
196*7c478bd9Sstevel@tonic-gate 		(void)__BT_TLPUT(dbc, cp->lock);
197*7c478bd9Sstevel@tonic-gate 		cp->lock = LOCK_INVALID;
198*7c478bd9Sstevel@tonic-gate 	}
199*7c478bd9Sstevel@tonic-gate 
200*7c478bd9Sstevel@tonic-gate 	/* Sanity checks. */
201*7c478bd9Sstevel@tonic-gate #ifdef DIAGNOSTIC
202*7c478bd9Sstevel@tonic-gate 	if (cp->csp != cp->stack)
203*7c478bd9Sstevel@tonic-gate 		__db_err(dbp->dbenv, "btree cursor close: stack not empty");
204*7c478bd9Sstevel@tonic-gate #endif
205*7c478bd9Sstevel@tonic-gate 
206*7c478bd9Sstevel@tonic-gate 	/* Initialize dynamic information. */
207*7c478bd9Sstevel@tonic-gate 	__bam_c_reset(cp);
208*7c478bd9Sstevel@tonic-gate 
209*7c478bd9Sstevel@tonic-gate 	return (ret);
210*7c478bd9Sstevel@tonic-gate }
211*7c478bd9Sstevel@tonic-gate 
212*7c478bd9Sstevel@tonic-gate /*
213*7c478bd9Sstevel@tonic-gate  * __bam_c_destroy --
214*7c478bd9Sstevel@tonic-gate  *	Close a single cursor -- internal version.
215*7c478bd9Sstevel@tonic-gate  */
216*7c478bd9Sstevel@tonic-gate static int
217*7c478bd9Sstevel@tonic-gate __bam_c_destroy(dbc)
218*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
219*7c478bd9Sstevel@tonic-gate {
220*7c478bd9Sstevel@tonic-gate 	/* Discard the structures. */
221*7c478bd9Sstevel@tonic-gate 	__os_free(dbc->internal, sizeof(CURSOR));
222*7c478bd9Sstevel@tonic-gate 
223*7c478bd9Sstevel@tonic-gate 	return (0);
224*7c478bd9Sstevel@tonic-gate }
225*7c478bd9Sstevel@tonic-gate 
226*7c478bd9Sstevel@tonic-gate /*
227*7c478bd9Sstevel@tonic-gate  * __bam_c_del --
228*7c478bd9Sstevel@tonic-gate  *	Delete using a cursor.
229*7c478bd9Sstevel@tonic-gate  */
230*7c478bd9Sstevel@tonic-gate static int
231*7c478bd9Sstevel@tonic-gate __bam_c_del(dbc, flags)
232*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
233*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
234*7c478bd9Sstevel@tonic-gate {
235*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
236*7c478bd9Sstevel@tonic-gate 	DB *dbp;
237*7c478bd9Sstevel@tonic-gate 	DB_LOCK lock;
238*7c478bd9Sstevel@tonic-gate 	PAGE *h;
239*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
240*7c478bd9Sstevel@tonic-gate 	db_indx_t indx;
241*7c478bd9Sstevel@tonic-gate 	int ret;
242*7c478bd9Sstevel@tonic-gate 
243*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
244*7c478bd9Sstevel@tonic-gate 	cp = dbc->internal;
245*7c478bd9Sstevel@tonic-gate 	h = NULL;
246*7c478bd9Sstevel@tonic-gate 
247*7c478bd9Sstevel@tonic-gate 	DB_PANIC_CHECK(dbp);
248*7c478bd9Sstevel@tonic-gate 
249*7c478bd9Sstevel@tonic-gate 	/* Check for invalid flags. */
250*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_cdelchk(dbp, flags,
251*7c478bd9Sstevel@tonic-gate 	    F_ISSET(dbp, DB_AM_RDONLY), cp->pgno != PGNO_INVALID)) != 0)
252*7c478bd9Sstevel@tonic-gate 		return (ret);
253*7c478bd9Sstevel@tonic-gate 
254*7c478bd9Sstevel@tonic-gate 	/*
255*7c478bd9Sstevel@tonic-gate 	 * If we are running CDB, this had better be either a write
256*7c478bd9Sstevel@tonic-gate 	 * cursor or an immediate writer.
257*7c478bd9Sstevel@tonic-gate 	 */
258*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_AM_CDB))
259*7c478bd9Sstevel@tonic-gate 		if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
260*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
261*7c478bd9Sstevel@tonic-gate 
262*7c478bd9Sstevel@tonic-gate 	DEBUG_LWRITE(dbc, dbc->txn, "bam_c_del", NULL, NULL, flags);
263*7c478bd9Sstevel@tonic-gate 
264*7c478bd9Sstevel@tonic-gate 	/* If already deleted, return failure. */
265*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(cp, C_DELETED))
266*7c478bd9Sstevel@tonic-gate 		return (DB_KEYEMPTY);
267*7c478bd9Sstevel@tonic-gate 
268*7c478bd9Sstevel@tonic-gate 	/*
269*7c478bd9Sstevel@tonic-gate 	 * We don't physically delete the record until the cursor moves,
270*7c478bd9Sstevel@tonic-gate 	 * so we have to have a long-lived write lock on the page instead
271*7c478bd9Sstevel@tonic-gate 	 * of a long-lived read lock.  Note, we have to have a read lock
272*7c478bd9Sstevel@tonic-gate 	 * to even get here, so we simply discard it.
273*7c478bd9Sstevel@tonic-gate 	 */
274*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_AM_LOCKING) && cp->mode != DB_LOCK_WRITE) {
275*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_lget(dbc,
276*7c478bd9Sstevel@tonic-gate 		    0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0)
277*7c478bd9Sstevel@tonic-gate 			goto err;
278*7c478bd9Sstevel@tonic-gate 		(void)__BT_TLPUT(dbc, cp->lock);
279*7c478bd9Sstevel@tonic-gate 		cp->lock = lock;
280*7c478bd9Sstevel@tonic-gate 		cp->mode = DB_LOCK_WRITE;
281*7c478bd9Sstevel@tonic-gate 	}
282*7c478bd9Sstevel@tonic-gate 
283*7c478bd9Sstevel@tonic-gate 	/*
284*7c478bd9Sstevel@tonic-gate 	 * Acquire the underlying page (which may be different from the above
285*7c478bd9Sstevel@tonic-gate 	 * page because it may be a duplicate page), and set the on-page and
286*7c478bd9Sstevel@tonic-gate 	 * in-cursor delete flags.  We don't need to lock it as we've already
287*7c478bd9Sstevel@tonic-gate 	 * write-locked the page leading to it.
288*7c478bd9Sstevel@tonic-gate 	 */
289*7c478bd9Sstevel@tonic-gate 	if (cp->dpgno == PGNO_INVALID) {
290*7c478bd9Sstevel@tonic-gate 		pgno = cp->pgno;
291*7c478bd9Sstevel@tonic-gate 		indx = cp->indx;
292*7c478bd9Sstevel@tonic-gate 	} else {
293*7c478bd9Sstevel@tonic-gate 		pgno = cp->dpgno;
294*7c478bd9Sstevel@tonic-gate 		indx = cp->dindx;
295*7c478bd9Sstevel@tonic-gate 	}
296*7c478bd9Sstevel@tonic-gate 
297*7c478bd9Sstevel@tonic-gate 	if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
298*7c478bd9Sstevel@tonic-gate 		goto err;
299*7c478bd9Sstevel@tonic-gate 
300*7c478bd9Sstevel@tonic-gate 	/* Log the change. */
301*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc) &&
302*7c478bd9Sstevel@tonic-gate 	    (ret = __bam_cdel_log(dbp->dbenv->lg_info, dbc->txn, &LSN(h),
303*7c478bd9Sstevel@tonic-gate 	    0, dbp->log_fileid, PGNO(h), &LSN(h), indx)) != 0) {
304*7c478bd9Sstevel@tonic-gate 		(void)memp_fput(dbp->mpf, h, 0);
305*7c478bd9Sstevel@tonic-gate 		goto err;
306*7c478bd9Sstevel@tonic-gate 	}
307*7c478bd9Sstevel@tonic-gate 
308*7c478bd9Sstevel@tonic-gate 	/*
309*7c478bd9Sstevel@tonic-gate 	 * Set the intent-to-delete flag on the page and update all cursors. */
310*7c478bd9Sstevel@tonic-gate 	if (cp->dpgno == PGNO_INVALID)
311*7c478bd9Sstevel@tonic-gate 		B_DSET(GET_BKEYDATA(h, indx + O_INDX)->type);
312*7c478bd9Sstevel@tonic-gate 	else
313*7c478bd9Sstevel@tonic-gate 		B_DSET(GET_BKEYDATA(h, indx)->type);
314*7c478bd9Sstevel@tonic-gate 	(void)__bam_ca_delete(dbp, pgno, indx, 1);
315*7c478bd9Sstevel@tonic-gate 
316*7c478bd9Sstevel@tonic-gate 	ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
317*7c478bd9Sstevel@tonic-gate 	h = NULL;
318*7c478bd9Sstevel@tonic-gate 
319*7c478bd9Sstevel@tonic-gate 	/*
320*7c478bd9Sstevel@tonic-gate 	 * If the tree has record numbers, we have to adjust the counts.
321*7c478bd9Sstevel@tonic-gate 	 *
322*7c478bd9Sstevel@tonic-gate 	 * !!!
323*7c478bd9Sstevel@tonic-gate 	 * This test is right -- we don't yet support duplicates and record
324*7c478bd9Sstevel@tonic-gate 	 * numbers in the same tree, so ignore duplicates if DB_BT_RECNUM
325*7c478bd9Sstevel@tonic-gate 	 * set.
326*7c478bd9Sstevel@tonic-gate 	 */
327*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_BT_RECNUM)) {
328*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_getstack(dbc, cp)) != 0)
329*7c478bd9Sstevel@tonic-gate 			goto err;
330*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_adjust(dbc, -1)) != 0)
331*7c478bd9Sstevel@tonic-gate 			goto err;
332*7c478bd9Sstevel@tonic-gate 		(void)__bam_stkrel(dbc, 0);
333*7c478bd9Sstevel@tonic-gate 	}
334*7c478bd9Sstevel@tonic-gate 
335*7c478bd9Sstevel@tonic-gate err:	if (h != NULL)
336*7c478bd9Sstevel@tonic-gate 		(void)memp_fput(dbp->mpf, h, 0);
337*7c478bd9Sstevel@tonic-gate 	return (ret);
338*7c478bd9Sstevel@tonic-gate }
339*7c478bd9Sstevel@tonic-gate 
340*7c478bd9Sstevel@tonic-gate /*
341*7c478bd9Sstevel@tonic-gate  * __bam_c_get --
342*7c478bd9Sstevel@tonic-gate  *	Get using a cursor (btree).
343*7c478bd9Sstevel@tonic-gate  */
344*7c478bd9Sstevel@tonic-gate static int
345*7c478bd9Sstevel@tonic-gate __bam_c_get(dbc, key, data, flags)
346*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
347*7c478bd9Sstevel@tonic-gate 	DBT *key, *data;
348*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
349*7c478bd9Sstevel@tonic-gate {
350*7c478bd9Sstevel@tonic-gate 	CURSOR *cp, copy, start;
351*7c478bd9Sstevel@tonic-gate 	DB *dbp;
352*7c478bd9Sstevel@tonic-gate 	PAGE *h;
353*7c478bd9Sstevel@tonic-gate 	int exact, ret, tmp_rmw;
354*7c478bd9Sstevel@tonic-gate 
355*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
356*7c478bd9Sstevel@tonic-gate 	cp = dbc->internal;
357*7c478bd9Sstevel@tonic-gate 
358*7c478bd9Sstevel@tonic-gate 	DB_PANIC_CHECK(dbp);
359*7c478bd9Sstevel@tonic-gate 
360*7c478bd9Sstevel@tonic-gate 	/* Check for invalid flags. */
361*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_cgetchk(dbp,
362*7c478bd9Sstevel@tonic-gate 	    key, data, flags, cp->pgno != PGNO_INVALID)) != 0)
363*7c478bd9Sstevel@tonic-gate 		return (ret);
364*7c478bd9Sstevel@tonic-gate 
365*7c478bd9Sstevel@tonic-gate 	/* Clear OR'd in additional bits so we can check for flag equality. */
366*7c478bd9Sstevel@tonic-gate 	tmp_rmw = 0;
367*7c478bd9Sstevel@tonic-gate 	if (LF_ISSET(DB_RMW)) {
368*7c478bd9Sstevel@tonic-gate 		if (!F_ISSET(dbp, DB_AM_CDB)) {
369*7c478bd9Sstevel@tonic-gate 			tmp_rmw = 1;
370*7c478bd9Sstevel@tonic-gate 			F_SET(dbc, DBC_RMW);
371*7c478bd9Sstevel@tonic-gate 		}
372*7c478bd9Sstevel@tonic-gate 		LF_CLR(DB_RMW);
373*7c478bd9Sstevel@tonic-gate 	}
374*7c478bd9Sstevel@tonic-gate 
375*7c478bd9Sstevel@tonic-gate 	DEBUG_LREAD(dbc, dbc->txn, "bam_c_get",
376*7c478bd9Sstevel@tonic-gate 	    flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags);
377*7c478bd9Sstevel@tonic-gate 
378*7c478bd9Sstevel@tonic-gate 	/*
379*7c478bd9Sstevel@tonic-gate 	 * Return a cursor's record number.  It has nothing to do with the
380*7c478bd9Sstevel@tonic-gate 	 * cursor get code except that it's been rammed into the interface.
381*7c478bd9Sstevel@tonic-gate 	 */
382*7c478bd9Sstevel@tonic-gate 	if (flags == DB_GET_RECNO) {
383*7c478bd9Sstevel@tonic-gate 		ret = __bam_c_rget(dbc, data, flags);
384*7c478bd9Sstevel@tonic-gate 		if (tmp_rmw)
385*7c478bd9Sstevel@tonic-gate 			F_CLR(dbc, DBC_RMW);
386*7c478bd9Sstevel@tonic-gate 		return (ret);
387*7c478bd9Sstevel@tonic-gate 	}
388*7c478bd9Sstevel@tonic-gate 
389*7c478bd9Sstevel@tonic-gate 	/*
390*7c478bd9Sstevel@tonic-gate 	 * Initialize the cursor for a new retrieval.  Clear the cursor's
391*7c478bd9Sstevel@tonic-gate 	 * page pointer, it was set before this operation, and no longer
392*7c478bd9Sstevel@tonic-gate 	 * has any meaning.
393*7c478bd9Sstevel@tonic-gate 	 */
394*7c478bd9Sstevel@tonic-gate 	cp->page = NULL;
395*7c478bd9Sstevel@tonic-gate 	copy = *cp;
396*7c478bd9Sstevel@tonic-gate 	cp->lock = LOCK_INVALID;
397*7c478bd9Sstevel@tonic-gate 
398*7c478bd9Sstevel@tonic-gate 	switch (flags) {
399*7c478bd9Sstevel@tonic-gate 	case DB_CURRENT:
400*7c478bd9Sstevel@tonic-gate 		/* It's not possible to return a deleted record. */
401*7c478bd9Sstevel@tonic-gate 		if (F_ISSET(cp, C_DELETED)) {
402*7c478bd9Sstevel@tonic-gate 			ret = DB_KEYEMPTY;
403*7c478bd9Sstevel@tonic-gate 			goto err;
404*7c478bd9Sstevel@tonic-gate 		}
405*7c478bd9Sstevel@tonic-gate 
406*7c478bd9Sstevel@tonic-gate 		/* Acquire the current page. */
407*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_lget(dbc,
408*7c478bd9Sstevel@tonic-gate 		    0, cp->pgno, DB_LOCK_READ, &cp->lock)) == 0)
409*7c478bd9Sstevel@tonic-gate 			ret = memp_fget(dbp->mpf,
410*7c478bd9Sstevel@tonic-gate 			    cp->dpgno == PGNO_INVALID ? &cp->pgno : &cp->dpgno,
411*7c478bd9Sstevel@tonic-gate 			    0, &cp->page);
412*7c478bd9Sstevel@tonic-gate 		if (ret != 0)
413*7c478bd9Sstevel@tonic-gate 			goto err;
414*7c478bd9Sstevel@tonic-gate 		break;
415*7c478bd9Sstevel@tonic-gate 	case DB_NEXT_DUP:
416*7c478bd9Sstevel@tonic-gate 		if (cp->pgno == PGNO_INVALID) {
417*7c478bd9Sstevel@tonic-gate 			ret = EINVAL;
418*7c478bd9Sstevel@tonic-gate 			goto err;
419*7c478bd9Sstevel@tonic-gate 		}
420*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_next(dbc, cp, 1)) != 0)
421*7c478bd9Sstevel@tonic-gate 			goto err;
422*7c478bd9Sstevel@tonic-gate 
423*7c478bd9Sstevel@tonic-gate 		/* Make sure we didn't go past the end of the duplicates. */
424*7c478bd9Sstevel@tonic-gate 		if (!POSSIBLE_DUPLICATE(cp, copy)) {
425*7c478bd9Sstevel@tonic-gate 			ret = DB_NOTFOUND;
426*7c478bd9Sstevel@tonic-gate 			goto err;
427*7c478bd9Sstevel@tonic-gate 		}
428*7c478bd9Sstevel@tonic-gate 		break;
429*7c478bd9Sstevel@tonic-gate 	case DB_NEXT:
430*7c478bd9Sstevel@tonic-gate 		if (cp->pgno != PGNO_INVALID) {
431*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_c_next(dbc, cp, 1)) != 0)
432*7c478bd9Sstevel@tonic-gate 				goto err;
433*7c478bd9Sstevel@tonic-gate 			break;
434*7c478bd9Sstevel@tonic-gate 		}
435*7c478bd9Sstevel@tonic-gate 		/* FALLTHROUGH */
436*7c478bd9Sstevel@tonic-gate 	case DB_FIRST:
437*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_first(dbc, cp)) != 0)
438*7c478bd9Sstevel@tonic-gate 			goto err;
439*7c478bd9Sstevel@tonic-gate 		break;
440*7c478bd9Sstevel@tonic-gate 	case DB_PREV:
441*7c478bd9Sstevel@tonic-gate 		if (cp->pgno != PGNO_INVALID) {
442*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_c_prev(dbc, cp)) != 0)
443*7c478bd9Sstevel@tonic-gate 				goto err;
444*7c478bd9Sstevel@tonic-gate 			break;
445*7c478bd9Sstevel@tonic-gate 		}
446*7c478bd9Sstevel@tonic-gate 		/* FALLTHROUGH */
447*7c478bd9Sstevel@tonic-gate 	case DB_LAST:
448*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_last(dbc, cp)) != 0)
449*7c478bd9Sstevel@tonic-gate 			goto err;
450*7c478bd9Sstevel@tonic-gate 		break;
451*7c478bd9Sstevel@tonic-gate 	case DB_SET:
452*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0)
453*7c478bd9Sstevel@tonic-gate 			goto err;
454*7c478bd9Sstevel@tonic-gate 
455*7c478bd9Sstevel@tonic-gate 		/*
456*7c478bd9Sstevel@tonic-gate 		 * We cannot currently be referencing a deleted record, but we
457*7c478bd9Sstevel@tonic-gate 		 * may be referencing off-page duplicates.
458*7c478bd9Sstevel@tonic-gate 		 *
459*7c478bd9Sstevel@tonic-gate 		 * If we're referencing off-page duplicates, move off-page.
460*7c478bd9Sstevel@tonic-gate 		 * If we moved off-page, move to the next non-deleted record.
461*7c478bd9Sstevel@tonic-gate 		 * If we moved to the next non-deleted record, check to make
462*7c478bd9Sstevel@tonic-gate 		 * sure we didn't switch records because our current record
463*7c478bd9Sstevel@tonic-gate 		 * had no non-deleted data items.
464*7c478bd9Sstevel@tonic-gate 		 */
465*7c478bd9Sstevel@tonic-gate 		start = *cp;
466*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
467*7c478bd9Sstevel@tonic-gate 			goto err;
468*7c478bd9Sstevel@tonic-gate 		if (cp->dpgno != PGNO_INVALID && IS_CUR_DELETED(cp)) {
469*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_c_next(dbc, cp, 0)) != 0)
470*7c478bd9Sstevel@tonic-gate 				goto err;
471*7c478bd9Sstevel@tonic-gate 			if (!POSSIBLE_DUPLICATE(cp, start)) {
472*7c478bd9Sstevel@tonic-gate 				ret = DB_NOTFOUND;
473*7c478bd9Sstevel@tonic-gate 				goto err;
474*7c478bd9Sstevel@tonic-gate 			}
475*7c478bd9Sstevel@tonic-gate 		}
476*7c478bd9Sstevel@tonic-gate 		break;
477*7c478bd9Sstevel@tonic-gate 	case DB_SET_RECNO:
478*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0)
479*7c478bd9Sstevel@tonic-gate 			goto err;
480*7c478bd9Sstevel@tonic-gate 		break;
481*7c478bd9Sstevel@tonic-gate 	case DB_GET_BOTH:
482*7c478bd9Sstevel@tonic-gate 		if (F_ISSET(dbc, DBC_CONTINUE | DBC_KEYSET)) {
483*7c478bd9Sstevel@tonic-gate 			/* Acquire the current page. */
484*7c478bd9Sstevel@tonic-gate 			if ((ret = memp_fget(dbp->mpf,
485*7c478bd9Sstevel@tonic-gate 			    cp->dpgno == PGNO_INVALID ? &cp->pgno : &cp->dpgno,
486*7c478bd9Sstevel@tonic-gate 			    0, &cp->page)) != 0)
487*7c478bd9Sstevel@tonic-gate 				goto err;
488*7c478bd9Sstevel@tonic-gate 
489*7c478bd9Sstevel@tonic-gate 			/* If DBC_CONTINUE, move to the next item. */
490*7c478bd9Sstevel@tonic-gate 			if (F_ISSET(dbc, DBC_CONTINUE) &&
491*7c478bd9Sstevel@tonic-gate 			    (ret = __bam_c_next(dbc, cp, 1)) != 0)
492*7c478bd9Sstevel@tonic-gate 				goto err;
493*7c478bd9Sstevel@tonic-gate 		} else {
494*7c478bd9Sstevel@tonic-gate 			if ((ret =
495*7c478bd9Sstevel@tonic-gate 			    __bam_c_search(dbc, cp, key, flags, &exact)) != 0)
496*7c478bd9Sstevel@tonic-gate 				goto err;
497*7c478bd9Sstevel@tonic-gate 
498*7c478bd9Sstevel@tonic-gate 			/*
499*7c478bd9Sstevel@tonic-gate 			 * We may be referencing a duplicates page.  Move to
500*7c478bd9Sstevel@tonic-gate 			 * the first duplicate.
501*7c478bd9Sstevel@tonic-gate 			 */
502*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
503*7c478bd9Sstevel@tonic-gate 				goto err;
504*7c478bd9Sstevel@tonic-gate 		}
505*7c478bd9Sstevel@tonic-gate 
506*7c478bd9Sstevel@tonic-gate 		/* Search for a matching entry. */
507*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_dsearch(dbc, cp, data, NULL)) != 0)
508*7c478bd9Sstevel@tonic-gate 			goto err;
509*7c478bd9Sstevel@tonic-gate 
510*7c478bd9Sstevel@tonic-gate 		/* Ignore deleted entries. */
511*7c478bd9Sstevel@tonic-gate 		if (IS_CUR_DELETED(cp)) {
512*7c478bd9Sstevel@tonic-gate 			ret = DB_NOTFOUND;
513*7c478bd9Sstevel@tonic-gate 			goto err;
514*7c478bd9Sstevel@tonic-gate 		}
515*7c478bd9Sstevel@tonic-gate 		break;
516*7c478bd9Sstevel@tonic-gate 	case DB_SET_RANGE:
517*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0)
518*7c478bd9Sstevel@tonic-gate 			goto err;
519*7c478bd9Sstevel@tonic-gate 
520*7c478bd9Sstevel@tonic-gate 		/*
521*7c478bd9Sstevel@tonic-gate 		 * As we didn't require an exact match, the search function
522*7c478bd9Sstevel@tonic-gate 		 * may have returned an entry past the end of the page.  If
523*7c478bd9Sstevel@tonic-gate 		 * so, move to the next entry.
524*7c478bd9Sstevel@tonic-gate 		 */
525*7c478bd9Sstevel@tonic-gate 		if (cp->indx == NUM_ENT(cp->page) &&
526*7c478bd9Sstevel@tonic-gate 		    (ret = __bam_c_next(dbc, cp, 0)) != 0)
527*7c478bd9Sstevel@tonic-gate 			goto err;
528*7c478bd9Sstevel@tonic-gate 
529*7c478bd9Sstevel@tonic-gate 		/*
530*7c478bd9Sstevel@tonic-gate 		 * We may be referencing off-page duplicates, if so, move
531*7c478bd9Sstevel@tonic-gate 		 * off-page.
532*7c478bd9Sstevel@tonic-gate 		 */
533*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
534*7c478bd9Sstevel@tonic-gate 			goto err;
535*7c478bd9Sstevel@tonic-gate 
536*7c478bd9Sstevel@tonic-gate 		/*
537*7c478bd9Sstevel@tonic-gate 		 * We may be referencing a deleted record, if so, move to
538*7c478bd9Sstevel@tonic-gate 		 * the next non-deleted record.
539*7c478bd9Sstevel@tonic-gate 		 */
540*7c478bd9Sstevel@tonic-gate 		if (IS_CUR_DELETED(cp) && (ret = __bam_c_next(dbc, cp, 0)) != 0)
541*7c478bd9Sstevel@tonic-gate 			goto err;
542*7c478bd9Sstevel@tonic-gate 		break;
543*7c478bd9Sstevel@tonic-gate 	}
544*7c478bd9Sstevel@tonic-gate 
545*7c478bd9Sstevel@tonic-gate 	/*
546*7c478bd9Sstevel@tonic-gate 	 * Return the key if the user didn't give us one.  If we've moved to
547*7c478bd9Sstevel@tonic-gate 	 * a duplicate page, we may no longer have a pointer to the main page,
548*7c478bd9Sstevel@tonic-gate 	 * so we have to go get it.  We know that it's already read-locked,
549*7c478bd9Sstevel@tonic-gate 	 * however, so we don't have to acquire a new lock.
550*7c478bd9Sstevel@tonic-gate 	 */
551*7c478bd9Sstevel@tonic-gate 	if (flags != DB_SET) {
552*7c478bd9Sstevel@tonic-gate 		if (cp->dpgno != PGNO_INVALID) {
553*7c478bd9Sstevel@tonic-gate 			if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0)
554*7c478bd9Sstevel@tonic-gate 				goto err;
555*7c478bd9Sstevel@tonic-gate 		} else
556*7c478bd9Sstevel@tonic-gate 			h = cp->page;
557*7c478bd9Sstevel@tonic-gate 		ret = __db_ret(dbp,
558*7c478bd9Sstevel@tonic-gate 		    h, cp->indx, key, &dbc->rkey.data, &dbc->rkey.ulen);
559*7c478bd9Sstevel@tonic-gate 		if (cp->dpgno != PGNO_INVALID)
560*7c478bd9Sstevel@tonic-gate 			(void)memp_fput(dbp->mpf, h, 0);
561*7c478bd9Sstevel@tonic-gate 		if (ret)
562*7c478bd9Sstevel@tonic-gate 			goto err;
563*7c478bd9Sstevel@tonic-gate 	}
564*7c478bd9Sstevel@tonic-gate 
565*7c478bd9Sstevel@tonic-gate 	/* Return the data. */
566*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_ret(dbp, cp->page,
567*7c478bd9Sstevel@tonic-gate 	    cp->dpgno == PGNO_INVALID ? cp->indx + O_INDX : cp->dindx,
568*7c478bd9Sstevel@tonic-gate 	    data, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
569*7c478bd9Sstevel@tonic-gate 		goto err;
570*7c478bd9Sstevel@tonic-gate 
571*7c478bd9Sstevel@tonic-gate 	/*
572*7c478bd9Sstevel@tonic-gate 	 * If the previous cursor record has been deleted, physically delete
573*7c478bd9Sstevel@tonic-gate 	 * the entry from the page.  We clear the deleted flag before we call
574*7c478bd9Sstevel@tonic-gate 	 * the underlying delete routine so that, if an error occurs, and we
575*7c478bd9Sstevel@tonic-gate 	 * restore the cursor, the deleted flag is cleared.  This is because,
576*7c478bd9Sstevel@tonic-gate 	 * if we manage to physically modify the page, and then restore the
577*7c478bd9Sstevel@tonic-gate 	 * cursor, we might try to repeat the page modification when closing
578*7c478bd9Sstevel@tonic-gate 	 * the cursor.
579*7c478bd9Sstevel@tonic-gate 	 */
580*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(&copy, C_DELETED)) {
581*7c478bd9Sstevel@tonic-gate 		F_CLR(&copy, C_DELETED);
582*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_physdel(dbc, &copy, cp->page)) != 0)
583*7c478bd9Sstevel@tonic-gate 			goto err;
584*7c478bd9Sstevel@tonic-gate 	}
585*7c478bd9Sstevel@tonic-gate 	F_CLR(cp, C_DELETED);
586*7c478bd9Sstevel@tonic-gate 
587*7c478bd9Sstevel@tonic-gate 	/* Release the previous lock, if any; the current lock is retained. */
588*7c478bd9Sstevel@tonic-gate 	if (copy.lock != LOCK_INVALID)
589*7c478bd9Sstevel@tonic-gate 		(void)__BT_TLPUT(dbc, copy.lock);
590*7c478bd9Sstevel@tonic-gate 
591*7c478bd9Sstevel@tonic-gate 	/* Release the current page. */
592*7c478bd9Sstevel@tonic-gate 	if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
593*7c478bd9Sstevel@tonic-gate 		goto err;
594*7c478bd9Sstevel@tonic-gate 
595*7c478bd9Sstevel@tonic-gate 	if (0) {
596*7c478bd9Sstevel@tonic-gate err:		if (cp->page != NULL)
597*7c478bd9Sstevel@tonic-gate 			(void)memp_fput(dbp->mpf, cp->page, 0);
598*7c478bd9Sstevel@tonic-gate 		if (cp->lock != LOCK_INVALID)
599*7c478bd9Sstevel@tonic-gate 			(void)__BT_TLPUT(dbc, cp->lock);
600*7c478bd9Sstevel@tonic-gate 		*cp = copy;
601*7c478bd9Sstevel@tonic-gate 	}
602*7c478bd9Sstevel@tonic-gate 
603*7c478bd9Sstevel@tonic-gate 	/* Release temporary lock upgrade. */
604*7c478bd9Sstevel@tonic-gate 	if (tmp_rmw)
605*7c478bd9Sstevel@tonic-gate 		F_CLR(dbc, DBC_RMW);
606*7c478bd9Sstevel@tonic-gate 
607*7c478bd9Sstevel@tonic-gate 	return (ret);
608*7c478bd9Sstevel@tonic-gate }
609*7c478bd9Sstevel@tonic-gate 
610*7c478bd9Sstevel@tonic-gate /*
611*7c478bd9Sstevel@tonic-gate  * __bam_dsearch --
612*7c478bd9Sstevel@tonic-gate  *	Search for a matching data item (or the first data item that's
613*7c478bd9Sstevel@tonic-gate  *	equal to or greater than the one we're searching for).
614*7c478bd9Sstevel@tonic-gate  */
615*7c478bd9Sstevel@tonic-gate static int
616*7c478bd9Sstevel@tonic-gate __bam_dsearch(dbc, cp, data, iflagp)
617*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
618*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
619*7c478bd9Sstevel@tonic-gate 	DBT *data;
620*7c478bd9Sstevel@tonic-gate 	u_int32_t *iflagp;
621*7c478bd9Sstevel@tonic-gate {
622*7c478bd9Sstevel@tonic-gate 	DB *dbp;
623*7c478bd9Sstevel@tonic-gate 	CURSOR copy, last;
624*7c478bd9Sstevel@tonic-gate 	int cmp, ret;
625*7c478bd9Sstevel@tonic-gate 
626*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
627*7c478bd9Sstevel@tonic-gate 
628*7c478bd9Sstevel@tonic-gate 	/*
629*7c478bd9Sstevel@tonic-gate 	 * If iflagp is non-NULL, we're doing an insert.
630*7c478bd9Sstevel@tonic-gate 	 *
631*7c478bd9Sstevel@tonic-gate 	 * If the duplicates are off-page, use the duplicate search routine.
632*7c478bd9Sstevel@tonic-gate 	 */
633*7c478bd9Sstevel@tonic-gate 	if (cp->dpgno != PGNO_INVALID) {
634*7c478bd9Sstevel@tonic-gate 		if ((ret = __db_dsearch(dbc, iflagp != NULL,
635*7c478bd9Sstevel@tonic-gate 		    data, cp->dpgno, &cp->dindx, &cp->page, &cmp)) != 0)
636*7c478bd9Sstevel@tonic-gate 			return (ret);
637*7c478bd9Sstevel@tonic-gate 		cp->dpgno = cp->page->pgno;
638*7c478bd9Sstevel@tonic-gate 
639*7c478bd9Sstevel@tonic-gate 		if (iflagp == NULL) {
640*7c478bd9Sstevel@tonic-gate 			if (cmp != 0)
641*7c478bd9Sstevel@tonic-gate 				return (DB_NOTFOUND);
642*7c478bd9Sstevel@tonic-gate 			return (0);
643*7c478bd9Sstevel@tonic-gate 		}
644*7c478bd9Sstevel@tonic-gate 		*iflagp = DB_BEFORE;
645*7c478bd9Sstevel@tonic-gate 		return (0);
646*7c478bd9Sstevel@tonic-gate 	}
647*7c478bd9Sstevel@tonic-gate 
648*7c478bd9Sstevel@tonic-gate 	/* Otherwise, do the search ourselves. */
649*7c478bd9Sstevel@tonic-gate 	copy = *cp;
650*7c478bd9Sstevel@tonic-gate 	for (;;) {
651*7c478bd9Sstevel@tonic-gate 		/* Save the last interesting cursor position. */
652*7c478bd9Sstevel@tonic-gate 		last = *cp;
653*7c478bd9Sstevel@tonic-gate 
654*7c478bd9Sstevel@tonic-gate 		/* See if the data item matches the one we're looking for. */
655*7c478bd9Sstevel@tonic-gate 		if ((cmp = __bam_cmp(dbp, data, cp->page, cp->indx + O_INDX,
656*7c478bd9Sstevel@tonic-gate 		    dbp->dup_compare == NULL ?
657*7c478bd9Sstevel@tonic-gate 		    __bam_defcmp : dbp->dup_compare)) == 0) {
658*7c478bd9Sstevel@tonic-gate 			if (iflagp != NULL)
659*7c478bd9Sstevel@tonic-gate 				*iflagp = DB_AFTER;
660*7c478bd9Sstevel@tonic-gate 			return (0);
661*7c478bd9Sstevel@tonic-gate 		}
662*7c478bd9Sstevel@tonic-gate 
663*7c478bd9Sstevel@tonic-gate 		/*
664*7c478bd9Sstevel@tonic-gate 		 * If duplicate entries are sorted, we're done if we find a
665*7c478bd9Sstevel@tonic-gate 		 * page entry that sorts greater than the application item.
666*7c478bd9Sstevel@tonic-gate 		 * If doing an insert, return success, otherwise DB_NOTFOUND.
667*7c478bd9Sstevel@tonic-gate 		 */
668*7c478bd9Sstevel@tonic-gate 		if (dbp->dup_compare != NULL && cmp < 0) {
669*7c478bd9Sstevel@tonic-gate 			if (iflagp == NULL)
670*7c478bd9Sstevel@tonic-gate 				return (DB_NOTFOUND);
671*7c478bd9Sstevel@tonic-gate 			*iflagp = DB_BEFORE;
672*7c478bd9Sstevel@tonic-gate 			return (0);
673*7c478bd9Sstevel@tonic-gate 		}
674*7c478bd9Sstevel@tonic-gate 
675*7c478bd9Sstevel@tonic-gate 		/*
676*7c478bd9Sstevel@tonic-gate 		 * Move to the next item.  If we reach the end of the page and
677*7c478bd9Sstevel@tonic-gate 		 * we're doing an insert, set the cursor to the last item and
678*7c478bd9Sstevel@tonic-gate 		 * set the referenced memory location so callers know to insert
679*7c478bd9Sstevel@tonic-gate 		 * after the item, instead of before it.  If not inserting, we
680*7c478bd9Sstevel@tonic-gate 		 * return DB_NOTFOUND.
681*7c478bd9Sstevel@tonic-gate 		 */
682*7c478bd9Sstevel@tonic-gate 		if ((cp->indx += P_INDX) >= NUM_ENT(cp->page)) {
683*7c478bd9Sstevel@tonic-gate 			if (iflagp == NULL)
684*7c478bd9Sstevel@tonic-gate 				return (DB_NOTFOUND);
685*7c478bd9Sstevel@tonic-gate 			goto use_last;
686*7c478bd9Sstevel@tonic-gate 		}
687*7c478bd9Sstevel@tonic-gate 
688*7c478bd9Sstevel@tonic-gate 		/*
689*7c478bd9Sstevel@tonic-gate 		 * Make sure we didn't go past the end of the duplicates.  The
690*7c478bd9Sstevel@tonic-gate 		 * error conditions are the same as above.
691*7c478bd9Sstevel@tonic-gate 		 */
692*7c478bd9Sstevel@tonic-gate 		if (!POSSIBLE_DUPLICATE(cp, copy)) {
693*7c478bd9Sstevel@tonic-gate 			if (iflagp == NULL)
694*7c478bd9Sstevel@tonic-gate 				 return (DB_NOTFOUND);
695*7c478bd9Sstevel@tonic-gate use_last:		*cp = last;
696*7c478bd9Sstevel@tonic-gate 			*iflagp = DB_AFTER;
697*7c478bd9Sstevel@tonic-gate 			return (0);
698*7c478bd9Sstevel@tonic-gate 		}
699*7c478bd9Sstevel@tonic-gate 	}
700*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
701*7c478bd9Sstevel@tonic-gate }
702*7c478bd9Sstevel@tonic-gate 
703*7c478bd9Sstevel@tonic-gate /*
704*7c478bd9Sstevel@tonic-gate  * __bam_c_rget --
705*7c478bd9Sstevel@tonic-gate  *	Return the record number for a cursor.
706*7c478bd9Sstevel@tonic-gate  */
707*7c478bd9Sstevel@tonic-gate static int
708*7c478bd9Sstevel@tonic-gate __bam_c_rget(dbc, data, flags)
709*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
710*7c478bd9Sstevel@tonic-gate 	DBT *data;
711*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
712*7c478bd9Sstevel@tonic-gate {
713*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
714*7c478bd9Sstevel@tonic-gate 	DB *dbp;
715*7c478bd9Sstevel@tonic-gate 	DBT dbt;
716*7c478bd9Sstevel@tonic-gate 	db_recno_t recno;
717*7c478bd9Sstevel@tonic-gate 	int exact, ret;
718*7c478bd9Sstevel@tonic-gate 
719*7c478bd9Sstevel@tonic-gate 	COMPQUIET(flags, 0);
720*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
721*7c478bd9Sstevel@tonic-gate 	cp = dbc->internal;
722*7c478bd9Sstevel@tonic-gate 
723*7c478bd9Sstevel@tonic-gate 	/* Get the page with the current item on it. */
724*7c478bd9Sstevel@tonic-gate 	if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
725*7c478bd9Sstevel@tonic-gate 		return (ret);
726*7c478bd9Sstevel@tonic-gate 
727*7c478bd9Sstevel@tonic-gate 	/* Get a copy of the key. */
728*7c478bd9Sstevel@tonic-gate 	memset(&dbt, 0, sizeof(DBT));
729*7c478bd9Sstevel@tonic-gate 	dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
730*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_ret(dbp, cp->page, cp->indx, &dbt, NULL, NULL)) != 0)
731*7c478bd9Sstevel@tonic-gate 		goto err;
732*7c478bd9Sstevel@tonic-gate 
733*7c478bd9Sstevel@tonic-gate 	exact = 1;
734*7c478bd9Sstevel@tonic-gate 	if ((ret = __bam_search(dbc, &dbt,
735*7c478bd9Sstevel@tonic-gate 	    F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND,
736*7c478bd9Sstevel@tonic-gate 	    1, &recno, &exact)) != 0)
737*7c478bd9Sstevel@tonic-gate 		goto err;
738*7c478bd9Sstevel@tonic-gate 
739*7c478bd9Sstevel@tonic-gate 	ret = __db_retcopy(data, &recno, sizeof(recno),
740*7c478bd9Sstevel@tonic-gate 	    &dbc->rdata.data, &dbc->rdata.ulen, dbp->db_malloc);
741*7c478bd9Sstevel@tonic-gate 
742*7c478bd9Sstevel@tonic-gate 	/* Release the stack. */
743*7c478bd9Sstevel@tonic-gate 	__bam_stkrel(dbc, 0);
744*7c478bd9Sstevel@tonic-gate 
745*7c478bd9Sstevel@tonic-gate err:	(void)memp_fput(dbp->mpf, cp->page, 0);
746*7c478bd9Sstevel@tonic-gate 	__os_free(dbt.data, dbt.size);
747*7c478bd9Sstevel@tonic-gate 	return (ret);
748*7c478bd9Sstevel@tonic-gate }
749*7c478bd9Sstevel@tonic-gate 
750*7c478bd9Sstevel@tonic-gate /*
751*7c478bd9Sstevel@tonic-gate  * __bam_c_put --
752*7c478bd9Sstevel@tonic-gate  *	Put using a cursor.
753*7c478bd9Sstevel@tonic-gate  */
754*7c478bd9Sstevel@tonic-gate static int
755*7c478bd9Sstevel@tonic-gate __bam_c_put(dbc, key, data, flags)
756*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
757*7c478bd9Sstevel@tonic-gate 	DBT *key, *data;
758*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
759*7c478bd9Sstevel@tonic-gate {
760*7c478bd9Sstevel@tonic-gate 	CURSOR *cp, copy;
761*7c478bd9Sstevel@tonic-gate 	DB *dbp;
762*7c478bd9Sstevel@tonic-gate 	DBT dbt;
763*7c478bd9Sstevel@tonic-gate 	db_indx_t indx;
764*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
765*7c478bd9Sstevel@tonic-gate 	u_int32_t iiflags, iiop;
766*7c478bd9Sstevel@tonic-gate 	int exact, needkey, ret, stack;
767*7c478bd9Sstevel@tonic-gate 	void *arg;
768*7c478bd9Sstevel@tonic-gate 
769*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
770*7c478bd9Sstevel@tonic-gate 	cp = dbc->internal;
771*7c478bd9Sstevel@tonic-gate 
772*7c478bd9Sstevel@tonic-gate 	DB_PANIC_CHECK(dbp);
773*7c478bd9Sstevel@tonic-gate 
774*7c478bd9Sstevel@tonic-gate 	DEBUG_LWRITE(dbc, dbc->txn, "bam_c_put",
775*7c478bd9Sstevel@tonic-gate 	    flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
776*7c478bd9Sstevel@tonic-gate 	    data, flags);
777*7c478bd9Sstevel@tonic-gate 
778*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_cputchk(dbp, key, data, flags,
779*7c478bd9Sstevel@tonic-gate 	    F_ISSET(dbp, DB_AM_RDONLY), cp->pgno != PGNO_INVALID)) != 0)
780*7c478bd9Sstevel@tonic-gate 		return (ret);
781*7c478bd9Sstevel@tonic-gate 
782*7c478bd9Sstevel@tonic-gate 	/*
783*7c478bd9Sstevel@tonic-gate 	 * If we are running CDB, this had better be either a write
784*7c478bd9Sstevel@tonic-gate 	 * cursor or an immediate writer.  If it's a regular writer,
785*7c478bd9Sstevel@tonic-gate 	 * that means we have an IWRITE lock and we need to upgrade
786*7c478bd9Sstevel@tonic-gate 	 * it to a write lock.
787*7c478bd9Sstevel@tonic-gate 	 */
788*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_AM_CDB)) {
789*7c478bd9Sstevel@tonic-gate 		if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
790*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
791*7c478bd9Sstevel@tonic-gate 
792*7c478bd9Sstevel@tonic-gate 		if (F_ISSET(dbc, DBC_RMW) &&
793*7c478bd9Sstevel@tonic-gate 		    (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
794*7c478bd9Sstevel@tonic-gate 		    DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
795*7c478bd9Sstevel@tonic-gate 		    &dbc->mylock)) != 0)
796*7c478bd9Sstevel@tonic-gate 			return (EAGAIN);
797*7c478bd9Sstevel@tonic-gate 	}
798*7c478bd9Sstevel@tonic-gate 
799*7c478bd9Sstevel@tonic-gate 	if (0) {
800*7c478bd9Sstevel@tonic-gate split:		/*
801*7c478bd9Sstevel@tonic-gate 		 * To split, we need a valid key for the page.  Since it's a
802*7c478bd9Sstevel@tonic-gate 		 * cursor, we have to build one.
803*7c478bd9Sstevel@tonic-gate 		 *
804*7c478bd9Sstevel@tonic-gate 		 * Acquire a copy of a key from the page.
805*7c478bd9Sstevel@tonic-gate 		 */
806*7c478bd9Sstevel@tonic-gate 		if (needkey) {
807*7c478bd9Sstevel@tonic-gate 			memset(&dbt, 0, sizeof(DBT));
808*7c478bd9Sstevel@tonic-gate 			if ((ret = __db_ret(dbp, cp->page, indx,
809*7c478bd9Sstevel@tonic-gate 			    &dbt, &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
810*7c478bd9Sstevel@tonic-gate 				goto err;
811*7c478bd9Sstevel@tonic-gate 			arg = &dbt;
812*7c478bd9Sstevel@tonic-gate 		} else
813*7c478bd9Sstevel@tonic-gate 			arg = key;
814*7c478bd9Sstevel@tonic-gate 
815*7c478bd9Sstevel@tonic-gate 		/*
816*7c478bd9Sstevel@tonic-gate 		 * Discard any locks and pinned pages (the locks are discarded
817*7c478bd9Sstevel@tonic-gate 		 * even if we're running with transactions, as they lock pages
818*7c478bd9Sstevel@tonic-gate 		 * that we're sorry we ever acquired).  If stack is set and the
819*7c478bd9Sstevel@tonic-gate 		 * cursor entries are valid, they point to the same entries as
820*7c478bd9Sstevel@tonic-gate 		 * the stack, don't free them twice.
821*7c478bd9Sstevel@tonic-gate 		 */
822*7c478bd9Sstevel@tonic-gate 		if (stack) {
823*7c478bd9Sstevel@tonic-gate 			(void)__bam_stkrel(dbc, 1);
824*7c478bd9Sstevel@tonic-gate 			stack = 0;
825*7c478bd9Sstevel@tonic-gate 		} else
826*7c478bd9Sstevel@tonic-gate 			DISCARD(dbc, cp);
827*7c478bd9Sstevel@tonic-gate 
828*7c478bd9Sstevel@tonic-gate 		/*
829*7c478bd9Sstevel@tonic-gate 		 * Restore the cursor to its original value.  This is necessary
830*7c478bd9Sstevel@tonic-gate 		 * for two reasons.  First, we are about to copy it in case of
831*7c478bd9Sstevel@tonic-gate 		 * error, again.  Second, we adjust cursors during the split,
832*7c478bd9Sstevel@tonic-gate 		 * and we have to ensure this cursor is adjusted appropriately,
833*7c478bd9Sstevel@tonic-gate 		 * along with all the other cursors.
834*7c478bd9Sstevel@tonic-gate 		 */
835*7c478bd9Sstevel@tonic-gate 		*cp = copy;
836*7c478bd9Sstevel@tonic-gate 
837*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_split(dbc, arg)) != 0)
838*7c478bd9Sstevel@tonic-gate 			goto err;
839*7c478bd9Sstevel@tonic-gate 	}
840*7c478bd9Sstevel@tonic-gate 
841*7c478bd9Sstevel@tonic-gate 	/*
842*7c478bd9Sstevel@tonic-gate 	 * Initialize the cursor for a new retrieval.  Clear the cursor's
843*7c478bd9Sstevel@tonic-gate 	 * page pointer, it was set before this operation, and no longer
844*7c478bd9Sstevel@tonic-gate 	 * has any meaning.
845*7c478bd9Sstevel@tonic-gate 	 */
846*7c478bd9Sstevel@tonic-gate 	cp->page = NULL;
847*7c478bd9Sstevel@tonic-gate 	copy = *cp;
848*7c478bd9Sstevel@tonic-gate 	cp->lock = LOCK_INVALID;
849*7c478bd9Sstevel@tonic-gate 
850*7c478bd9Sstevel@tonic-gate 	iiflags = needkey = ret = stack = 0;
851*7c478bd9Sstevel@tonic-gate 	switch (flags) {
852*7c478bd9Sstevel@tonic-gate 	case DB_AFTER:
853*7c478bd9Sstevel@tonic-gate 	case DB_BEFORE:
854*7c478bd9Sstevel@tonic-gate 	case DB_CURRENT:
855*7c478bd9Sstevel@tonic-gate 		needkey = 1;
856*7c478bd9Sstevel@tonic-gate 		if (cp->dpgno == PGNO_INVALID) {
857*7c478bd9Sstevel@tonic-gate 			pgno = cp->pgno;
858*7c478bd9Sstevel@tonic-gate 			indx = cp->indx;
859*7c478bd9Sstevel@tonic-gate 		} else {
860*7c478bd9Sstevel@tonic-gate 			pgno = cp->dpgno;
861*7c478bd9Sstevel@tonic-gate 			indx = cp->dindx;
862*7c478bd9Sstevel@tonic-gate 		}
863*7c478bd9Sstevel@tonic-gate 
864*7c478bd9Sstevel@tonic-gate 		/*
865*7c478bd9Sstevel@tonic-gate 		 * !!!
866*7c478bd9Sstevel@tonic-gate 		 * This test is right -- we don't yet support duplicates and
867*7c478bd9Sstevel@tonic-gate 		 * record numbers in the same tree, so ignore duplicates if
868*7c478bd9Sstevel@tonic-gate 		 * DB_BT_RECNUM set.
869*7c478bd9Sstevel@tonic-gate 		 */
870*7c478bd9Sstevel@tonic-gate 		if (F_ISSET(dbp, DB_BT_RECNUM) &&
871*7c478bd9Sstevel@tonic-gate 		    (flags != DB_CURRENT || F_ISSET(cp, C_DELETED))) {
872*7c478bd9Sstevel@tonic-gate 			/* Acquire a complete stack. */
873*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_c_getstack(dbc, cp)) != 0)
874*7c478bd9Sstevel@tonic-gate 				goto err;
875*7c478bd9Sstevel@tonic-gate 			cp->page = cp->csp->page;
876*7c478bd9Sstevel@tonic-gate 
877*7c478bd9Sstevel@tonic-gate 			stack = 1;
878*7c478bd9Sstevel@tonic-gate 			iiflags = BI_DOINCR;
879*7c478bd9Sstevel@tonic-gate 		} else {
880*7c478bd9Sstevel@tonic-gate 			/* Acquire the current page. */
881*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_lget(dbc,
882*7c478bd9Sstevel@tonic-gate 			    0, cp->pgno, DB_LOCK_WRITE, &cp->lock)) == 0)
883*7c478bd9Sstevel@tonic-gate 				ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page);
884*7c478bd9Sstevel@tonic-gate 			if (ret != 0)
885*7c478bd9Sstevel@tonic-gate 				goto err;
886*7c478bd9Sstevel@tonic-gate 
887*7c478bd9Sstevel@tonic-gate 			iiflags = 0;
888*7c478bd9Sstevel@tonic-gate 		}
889*7c478bd9Sstevel@tonic-gate 
890*7c478bd9Sstevel@tonic-gate 		/*
891*7c478bd9Sstevel@tonic-gate 		 * If the user has specified a duplicate comparison function,
892*7c478bd9Sstevel@tonic-gate 		 * we return an error if DB_CURRENT was specified and the
893*7c478bd9Sstevel@tonic-gate 		 * replacement data doesn't compare equal to the current data.
894*7c478bd9Sstevel@tonic-gate 		 * This stops apps from screwing up the duplicate sort order.
895*7c478bd9Sstevel@tonic-gate 		 */
896*7c478bd9Sstevel@tonic-gate 		if (flags == DB_CURRENT && dbp->dup_compare != NULL)
897*7c478bd9Sstevel@tonic-gate 			if (__bam_cmp(dbp, data,
898*7c478bd9Sstevel@tonic-gate 			    cp->page, indx, dbp->dup_compare) != 0) {
899*7c478bd9Sstevel@tonic-gate 				ret = EINVAL;
900*7c478bd9Sstevel@tonic-gate 				goto err;
901*7c478bd9Sstevel@tonic-gate 			}
902*7c478bd9Sstevel@tonic-gate 
903*7c478bd9Sstevel@tonic-gate 		iiop = flags;
904*7c478bd9Sstevel@tonic-gate 		break;
905*7c478bd9Sstevel@tonic-gate 	case DB_KEYFIRST:
906*7c478bd9Sstevel@tonic-gate 	case DB_KEYLAST:
907*7c478bd9Sstevel@tonic-gate 		/*
908*7c478bd9Sstevel@tonic-gate 		 * If we have a duplicate comparison function, we position to
909*7c478bd9Sstevel@tonic-gate 		 * the first of any on-page duplicates, and use __bam_dsearch
910*7c478bd9Sstevel@tonic-gate 		 * to search for the right slot.  Otherwise, we position to
911*7c478bd9Sstevel@tonic-gate 		 * the first/last of any on-page duplicates based on the flag
912*7c478bd9Sstevel@tonic-gate 		 * value.
913*7c478bd9Sstevel@tonic-gate 		 */
914*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_search(dbc, cp, key,
915*7c478bd9Sstevel@tonic-gate 		    flags == DB_KEYFIRST || dbp->dup_compare != NULL ?
916*7c478bd9Sstevel@tonic-gate 		    DB_KEYFIRST : DB_KEYLAST, &exact)) != 0)
917*7c478bd9Sstevel@tonic-gate 			goto err;
918*7c478bd9Sstevel@tonic-gate 		stack = 1;
919*7c478bd9Sstevel@tonic-gate 
920*7c478bd9Sstevel@tonic-gate 		/*
921*7c478bd9Sstevel@tonic-gate 		 * If an exact match:
922*7c478bd9Sstevel@tonic-gate 		 *	If duplicates aren't supported, replace the current
923*7c478bd9Sstevel@tonic-gate 		 *	item.  (When implementing the DB->put function, our
924*7c478bd9Sstevel@tonic-gate 		 *	caller has already checked the DB_NOOVERWRITE flag.)
925*7c478bd9Sstevel@tonic-gate 		 *
926*7c478bd9Sstevel@tonic-gate 		 *	If there's a duplicate comparison function, find the
927*7c478bd9Sstevel@tonic-gate 		 *	correct slot for this duplicate item.
928*7c478bd9Sstevel@tonic-gate 		 *
929*7c478bd9Sstevel@tonic-gate 		 *	If there's no duplicate comparison function, set the
930*7c478bd9Sstevel@tonic-gate 		 *	insert flag based on the argument flags.
931*7c478bd9Sstevel@tonic-gate 		 *
932*7c478bd9Sstevel@tonic-gate 		 * If there's no match, the search function returned the
933*7c478bd9Sstevel@tonic-gate 		 * smallest slot greater than the key, use it.
934*7c478bd9Sstevel@tonic-gate 		 */
935*7c478bd9Sstevel@tonic-gate 		if (exact) {
936*7c478bd9Sstevel@tonic-gate 			if (F_ISSET(dbp, DB_AM_DUP)) {
937*7c478bd9Sstevel@tonic-gate 				/*
938*7c478bd9Sstevel@tonic-gate 				 * If at off-page duplicate page, move to the
939*7c478bd9Sstevel@tonic-gate 				 * first or last entry -- if a comparison
940*7c478bd9Sstevel@tonic-gate 				 * function was specified, start searching at
941*7c478bd9Sstevel@tonic-gate 				 * the first entry.  Otherwise, move based on
942*7c478bd9Sstevel@tonic-gate 				 * the DB_KEYFIRST/DB_KEYLAST flags.
943*7c478bd9Sstevel@tonic-gate 				 */
944*7c478bd9Sstevel@tonic-gate 				if ((ret = __bam_dup(dbc, cp, cp->indx,
945*7c478bd9Sstevel@tonic-gate 				    dbp->dup_compare == NULL &&
946*7c478bd9Sstevel@tonic-gate 				    flags != DB_KEYFIRST)) != 0)
947*7c478bd9Sstevel@tonic-gate 					goto err;
948*7c478bd9Sstevel@tonic-gate 
949*7c478bd9Sstevel@tonic-gate 				/*
950*7c478bd9Sstevel@tonic-gate 				 * If there's a comparison function, search for
951*7c478bd9Sstevel@tonic-gate 				 * the correct slot.  Otherwise, set the insert
952*7c478bd9Sstevel@tonic-gate 				 * flag based on the argment flag.
953*7c478bd9Sstevel@tonic-gate 				 */
954*7c478bd9Sstevel@tonic-gate 				if (dbp->dup_compare == NULL)
955*7c478bd9Sstevel@tonic-gate 					iiop = flags == DB_KEYFIRST ?
956*7c478bd9Sstevel@tonic-gate 					    DB_BEFORE : DB_AFTER;
957*7c478bd9Sstevel@tonic-gate 				else
958*7c478bd9Sstevel@tonic-gate 					if ((ret = __bam_dsearch(dbc,
959*7c478bd9Sstevel@tonic-gate 					    cp, data, &iiop)) != 0)
960*7c478bd9Sstevel@tonic-gate 						goto err;
961*7c478bd9Sstevel@tonic-gate 			} else
962*7c478bd9Sstevel@tonic-gate 				iiop = DB_CURRENT;
963*7c478bd9Sstevel@tonic-gate 			iiflags = 0;
964*7c478bd9Sstevel@tonic-gate 		} else {
965*7c478bd9Sstevel@tonic-gate 			iiop = DB_BEFORE;
966*7c478bd9Sstevel@tonic-gate 			iiflags = BI_NEWKEY;
967*7c478bd9Sstevel@tonic-gate 		}
968*7c478bd9Sstevel@tonic-gate 
969*7c478bd9Sstevel@tonic-gate 		if (cp->dpgno == PGNO_INVALID) {
970*7c478bd9Sstevel@tonic-gate 			pgno = cp->pgno;
971*7c478bd9Sstevel@tonic-gate 			indx = cp->indx;
972*7c478bd9Sstevel@tonic-gate 		} else {
973*7c478bd9Sstevel@tonic-gate 			pgno = cp->dpgno;
974*7c478bd9Sstevel@tonic-gate 			indx = cp->dindx;
975*7c478bd9Sstevel@tonic-gate 		}
976*7c478bd9Sstevel@tonic-gate 		break;
977*7c478bd9Sstevel@tonic-gate 	}
978*7c478bd9Sstevel@tonic-gate 
979*7c478bd9Sstevel@tonic-gate 	ret = __bam_iitem(dbc, &cp->page, &indx, key, data, iiop, iiflags);
980*7c478bd9Sstevel@tonic-gate 
981*7c478bd9Sstevel@tonic-gate 	if (ret == DB_NEEDSPLIT)
982*7c478bd9Sstevel@tonic-gate 		goto split;
983*7c478bd9Sstevel@tonic-gate 	if (ret != 0)
984*7c478bd9Sstevel@tonic-gate 		goto err;
985*7c478bd9Sstevel@tonic-gate 
986*7c478bd9Sstevel@tonic-gate 	/*
987*7c478bd9Sstevel@tonic-gate 	 * Reset any cursors referencing this item that might have the item
988*7c478bd9Sstevel@tonic-gate 	 * marked for deletion.
989*7c478bd9Sstevel@tonic-gate 	 */
990*7c478bd9Sstevel@tonic-gate 	if (iiop == DB_CURRENT) {
991*7c478bd9Sstevel@tonic-gate 		(void)__bam_ca_delete(dbp, pgno, indx, 0);
992*7c478bd9Sstevel@tonic-gate 
993*7c478bd9Sstevel@tonic-gate 		/*
994*7c478bd9Sstevel@tonic-gate 		 * It's also possible that we are the cursor that had the
995*7c478bd9Sstevel@tonic-gate 		 * item marked for deletion, in which case we want to make
996*7c478bd9Sstevel@tonic-gate 		 * sure that we don't delete it because we had the delete
997*7c478bd9Sstevel@tonic-gate 		 * flag set already.
998*7c478bd9Sstevel@tonic-gate 		 */
999*7c478bd9Sstevel@tonic-gate 		if (cp->pgno == copy.pgno && cp->indx == copy.indx &&
1000*7c478bd9Sstevel@tonic-gate 		    cp->dpgno == copy.dpgno && cp->dindx == copy.dindx)
1001*7c478bd9Sstevel@tonic-gate 			F_CLR(&copy, C_DELETED);
1002*7c478bd9Sstevel@tonic-gate 	}
1003*7c478bd9Sstevel@tonic-gate 
1004*7c478bd9Sstevel@tonic-gate 	/*
1005*7c478bd9Sstevel@tonic-gate 	 * Update the cursor to point to the new entry.  The new entry was
1006*7c478bd9Sstevel@tonic-gate 	 * stored on the current page, because we split pages until it was
1007*7c478bd9Sstevel@tonic-gate 	 * possible.
1008*7c478bd9Sstevel@tonic-gate 	 */
1009*7c478bd9Sstevel@tonic-gate 	if (cp->dpgno == PGNO_INVALID)
1010*7c478bd9Sstevel@tonic-gate 		cp->indx = indx;
1011*7c478bd9Sstevel@tonic-gate 	else
1012*7c478bd9Sstevel@tonic-gate 		cp->dindx = indx;
1013*7c478bd9Sstevel@tonic-gate 
1014*7c478bd9Sstevel@tonic-gate 	/*
1015*7c478bd9Sstevel@tonic-gate 	 * If the previous cursor record has been deleted, physically delete
1016*7c478bd9Sstevel@tonic-gate 	 * the entry from the page.  We clear the deleted flag before we call
1017*7c478bd9Sstevel@tonic-gate 	 * the underlying delete routine so that, if an error occurs, and we
1018*7c478bd9Sstevel@tonic-gate 	 * restore the cursor, the deleted flag is cleared.  This is because,
1019*7c478bd9Sstevel@tonic-gate 	 * if we manage to physically modify the page, and then restore the
1020*7c478bd9Sstevel@tonic-gate 	 * cursor, we might try to repeat the page modification when closing
1021*7c478bd9Sstevel@tonic-gate 	 * the cursor.
1022*7c478bd9Sstevel@tonic-gate 	 */
1023*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(&copy, C_DELETED)) {
1024*7c478bd9Sstevel@tonic-gate 		F_CLR(&copy, C_DELETED);
1025*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_physdel(dbc, &copy, cp->page)) != 0)
1026*7c478bd9Sstevel@tonic-gate 			goto err;
1027*7c478bd9Sstevel@tonic-gate 	}
1028*7c478bd9Sstevel@tonic-gate 	F_CLR(cp, C_DELETED);
1029*7c478bd9Sstevel@tonic-gate 
1030*7c478bd9Sstevel@tonic-gate 	/* Release the previous lock, if any; the current lock is retained. */
1031*7c478bd9Sstevel@tonic-gate 	if (copy.lock != LOCK_INVALID)
1032*7c478bd9Sstevel@tonic-gate 		(void)__BT_TLPUT(dbc, copy.lock);
1033*7c478bd9Sstevel@tonic-gate 
1034*7c478bd9Sstevel@tonic-gate 	/*
1035*7c478bd9Sstevel@tonic-gate 	 * Discard any pages pinned in the tree and their locks, except for
1036*7c478bd9Sstevel@tonic-gate 	 * the leaf page, for which we only discard the pin, not the lock.
1037*7c478bd9Sstevel@tonic-gate 	 *
1038*7c478bd9Sstevel@tonic-gate 	 * Note, the leaf page participated in the stack we acquired, and so
1039*7c478bd9Sstevel@tonic-gate 	 * we have to adjust the stack as necessary.  If there was only a
1040*7c478bd9Sstevel@tonic-gate 	 * single page on the stack, we don't have to free further stack pages.
1041*7c478bd9Sstevel@tonic-gate 	 */
1042*7c478bd9Sstevel@tonic-gate 	if (stack && BT_STK_POP(cp) != NULL)
1043*7c478bd9Sstevel@tonic-gate 		(void)__bam_stkrel(dbc, 0);
1044*7c478bd9Sstevel@tonic-gate 
1045*7c478bd9Sstevel@tonic-gate 	/* Release the current page. */
1046*7c478bd9Sstevel@tonic-gate 	if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
1047*7c478bd9Sstevel@tonic-gate 		goto err;
1048*7c478bd9Sstevel@tonic-gate 
1049*7c478bd9Sstevel@tonic-gate 	if (0) {
1050*7c478bd9Sstevel@tonic-gate err:		/* Discard any pinned pages. */
1051*7c478bd9Sstevel@tonic-gate 		if (stack)
1052*7c478bd9Sstevel@tonic-gate 			(void)__bam_stkrel(dbc, 0);
1053*7c478bd9Sstevel@tonic-gate 		else
1054*7c478bd9Sstevel@tonic-gate 			DISCARD(dbc, cp);
1055*7c478bd9Sstevel@tonic-gate 		*cp = copy;
1056*7c478bd9Sstevel@tonic-gate 	}
1057*7c478bd9Sstevel@tonic-gate 
1058*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
1059*7c478bd9Sstevel@tonic-gate 		(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
1060*7c478bd9Sstevel@tonic-gate 		    DB_LOCK_IWRITE, 0);
1061*7c478bd9Sstevel@tonic-gate 
1062*7c478bd9Sstevel@tonic-gate 	return (ret);
1063*7c478bd9Sstevel@tonic-gate }
1064*7c478bd9Sstevel@tonic-gate 
1065*7c478bd9Sstevel@tonic-gate /*
1066*7c478bd9Sstevel@tonic-gate  * __bam_c_first --
1067*7c478bd9Sstevel@tonic-gate  *	Return the first record.
1068*7c478bd9Sstevel@tonic-gate  */
1069*7c478bd9Sstevel@tonic-gate static int
1070*7c478bd9Sstevel@tonic-gate __bam_c_first(dbc, cp)
1071*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1072*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
1073*7c478bd9Sstevel@tonic-gate {
1074*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1075*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
1076*7c478bd9Sstevel@tonic-gate 	int ret;
1077*7c478bd9Sstevel@tonic-gate 
1078*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1079*7c478bd9Sstevel@tonic-gate 
1080*7c478bd9Sstevel@tonic-gate 	/* Walk down the left-hand side of the tree. */
1081*7c478bd9Sstevel@tonic-gate 	for (pgno = PGNO_ROOT;;) {
1082*7c478bd9Sstevel@tonic-gate 		if ((ret =
1083*7c478bd9Sstevel@tonic-gate 		    __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
1084*7c478bd9Sstevel@tonic-gate 			return (ret);
1085*7c478bd9Sstevel@tonic-gate 		if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
1086*7c478bd9Sstevel@tonic-gate 			return (ret);
1087*7c478bd9Sstevel@tonic-gate 
1088*7c478bd9Sstevel@tonic-gate 		/* If we find a leaf page, we're done. */
1089*7c478bd9Sstevel@tonic-gate 		if (ISLEAF(cp->page))
1090*7c478bd9Sstevel@tonic-gate 			break;
1091*7c478bd9Sstevel@tonic-gate 
1092*7c478bd9Sstevel@tonic-gate 		pgno = GET_BINTERNAL(cp->page, 0)->pgno;
1093*7c478bd9Sstevel@tonic-gate 		DISCARD(dbc, cp);
1094*7c478bd9Sstevel@tonic-gate 	}
1095*7c478bd9Sstevel@tonic-gate 
1096*7c478bd9Sstevel@tonic-gate 	cp->pgno = cp->page->pgno;
1097*7c478bd9Sstevel@tonic-gate 	cp->indx = 0;
1098*7c478bd9Sstevel@tonic-gate 	cp->dpgno = PGNO_INVALID;
1099*7c478bd9Sstevel@tonic-gate 
1100*7c478bd9Sstevel@tonic-gate 	/* Check for duplicates. */
1101*7c478bd9Sstevel@tonic-gate 	if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
1102*7c478bd9Sstevel@tonic-gate 		return (ret);
1103*7c478bd9Sstevel@tonic-gate 
1104*7c478bd9Sstevel@tonic-gate 	/* If on an empty page or a deleted record, move to the next one. */
1105*7c478bd9Sstevel@tonic-gate 	if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(cp))
1106*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_next(dbc, cp, 0)) != 0)
1107*7c478bd9Sstevel@tonic-gate 			return (ret);
1108*7c478bd9Sstevel@tonic-gate 
1109*7c478bd9Sstevel@tonic-gate 	return (0);
1110*7c478bd9Sstevel@tonic-gate }
1111*7c478bd9Sstevel@tonic-gate 
1112*7c478bd9Sstevel@tonic-gate /*
1113*7c478bd9Sstevel@tonic-gate  * __bam_c_last --
1114*7c478bd9Sstevel@tonic-gate  *	Return the last record.
1115*7c478bd9Sstevel@tonic-gate  */
1116*7c478bd9Sstevel@tonic-gate static int
1117*7c478bd9Sstevel@tonic-gate __bam_c_last(dbc, cp)
1118*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1119*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
1120*7c478bd9Sstevel@tonic-gate {
1121*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1122*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
1123*7c478bd9Sstevel@tonic-gate 	int ret;
1124*7c478bd9Sstevel@tonic-gate 
1125*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1126*7c478bd9Sstevel@tonic-gate 
1127*7c478bd9Sstevel@tonic-gate 	/* Walk down the right-hand side of the tree. */
1128*7c478bd9Sstevel@tonic-gate 	for (pgno = PGNO_ROOT;;) {
1129*7c478bd9Sstevel@tonic-gate 		if ((ret =
1130*7c478bd9Sstevel@tonic-gate 		    __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
1131*7c478bd9Sstevel@tonic-gate 			return (ret);
1132*7c478bd9Sstevel@tonic-gate 		if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
1133*7c478bd9Sstevel@tonic-gate 			return (ret);
1134*7c478bd9Sstevel@tonic-gate 
1135*7c478bd9Sstevel@tonic-gate 		/* If we find a leaf page, we're done. */
1136*7c478bd9Sstevel@tonic-gate 		if (ISLEAF(cp->page))
1137*7c478bd9Sstevel@tonic-gate 			break;
1138*7c478bd9Sstevel@tonic-gate 
1139*7c478bd9Sstevel@tonic-gate 		pgno =
1140*7c478bd9Sstevel@tonic-gate 		    GET_BINTERNAL(cp->page, NUM_ENT(cp->page) - O_INDX)->pgno;
1141*7c478bd9Sstevel@tonic-gate 		DISCARD(dbc, cp);
1142*7c478bd9Sstevel@tonic-gate 	}
1143*7c478bd9Sstevel@tonic-gate 
1144*7c478bd9Sstevel@tonic-gate 	cp->pgno = cp->page->pgno;
1145*7c478bd9Sstevel@tonic-gate 	cp->indx = NUM_ENT(cp->page) == 0 ? 0 : NUM_ENT(cp->page) - P_INDX;
1146*7c478bd9Sstevel@tonic-gate 	cp->dpgno = PGNO_INVALID;
1147*7c478bd9Sstevel@tonic-gate 
1148*7c478bd9Sstevel@tonic-gate 	/* Check for duplicates. */
1149*7c478bd9Sstevel@tonic-gate 	if ((ret = __bam_dup(dbc, cp, cp->indx, 1)) != 0)
1150*7c478bd9Sstevel@tonic-gate 		return (ret);
1151*7c478bd9Sstevel@tonic-gate 
1152*7c478bd9Sstevel@tonic-gate 	/* If on an empty page or a deleted record, move to the next one. */
1153*7c478bd9Sstevel@tonic-gate 	if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(cp))
1154*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_prev(dbc, cp)) != 0)
1155*7c478bd9Sstevel@tonic-gate 			return (ret);
1156*7c478bd9Sstevel@tonic-gate 
1157*7c478bd9Sstevel@tonic-gate 	return (0);
1158*7c478bd9Sstevel@tonic-gate }
1159*7c478bd9Sstevel@tonic-gate 
1160*7c478bd9Sstevel@tonic-gate /*
1161*7c478bd9Sstevel@tonic-gate  * __bam_c_next --
1162*7c478bd9Sstevel@tonic-gate  *	Move to the next record.
1163*7c478bd9Sstevel@tonic-gate  */
1164*7c478bd9Sstevel@tonic-gate static int
1165*7c478bd9Sstevel@tonic-gate __bam_c_next(dbc, cp, initial_move)
1166*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1167*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
1168*7c478bd9Sstevel@tonic-gate 	int initial_move;
1169*7c478bd9Sstevel@tonic-gate {
1170*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1171*7c478bd9Sstevel@tonic-gate 	db_indx_t adjust, indx;
1172*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
1173*7c478bd9Sstevel@tonic-gate 	int ret;
1174*7c478bd9Sstevel@tonic-gate 
1175*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1176*7c478bd9Sstevel@tonic-gate 
1177*7c478bd9Sstevel@tonic-gate 	/*
1178*7c478bd9Sstevel@tonic-gate 	 * We're either moving through a page of duplicates or a btree leaf
1179*7c478bd9Sstevel@tonic-gate 	 * page.
1180*7c478bd9Sstevel@tonic-gate 	 */
1181*7c478bd9Sstevel@tonic-gate 	if (cp->dpgno == PGNO_INVALID) {
1182*7c478bd9Sstevel@tonic-gate 		adjust = dbp->type == DB_BTREE ? P_INDX : O_INDX;
1183*7c478bd9Sstevel@tonic-gate 		pgno = cp->pgno;
1184*7c478bd9Sstevel@tonic-gate 		indx = cp->indx;
1185*7c478bd9Sstevel@tonic-gate 	} else {
1186*7c478bd9Sstevel@tonic-gate 		adjust = O_INDX;
1187*7c478bd9Sstevel@tonic-gate 		pgno = cp->dpgno;
1188*7c478bd9Sstevel@tonic-gate 		indx = cp->dindx;
1189*7c478bd9Sstevel@tonic-gate 	}
1190*7c478bd9Sstevel@tonic-gate 	if (cp->page == NULL) {
1191*7c478bd9Sstevel@tonic-gate 		if ((ret =
1192*7c478bd9Sstevel@tonic-gate 		    __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
1193*7c478bd9Sstevel@tonic-gate 			return (ret);
1194*7c478bd9Sstevel@tonic-gate 		if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
1195*7c478bd9Sstevel@tonic-gate 			return (ret);
1196*7c478bd9Sstevel@tonic-gate 	}
1197*7c478bd9Sstevel@tonic-gate 
1198*7c478bd9Sstevel@tonic-gate 	/*
1199*7c478bd9Sstevel@tonic-gate 	 * If at the end of the page, move to a subsequent page.
1200*7c478bd9Sstevel@tonic-gate 	 *
1201*7c478bd9Sstevel@tonic-gate 	 * !!!
1202*7c478bd9Sstevel@tonic-gate 	 * Check for >= NUM_ENT.  If we're here as the result of a search that
1203*7c478bd9Sstevel@tonic-gate 	 * landed us on NUM_ENT, we'll increment indx before we test.
1204*7c478bd9Sstevel@tonic-gate 	 *
1205*7c478bd9Sstevel@tonic-gate 	 * !!!
1206*7c478bd9Sstevel@tonic-gate 	 * This code handles empty pages and pages with only deleted entries.
1207*7c478bd9Sstevel@tonic-gate 	 */
1208*7c478bd9Sstevel@tonic-gate 	if (initial_move)
1209*7c478bd9Sstevel@tonic-gate 		indx += adjust;
1210*7c478bd9Sstevel@tonic-gate 	for (;;) {
1211*7c478bd9Sstevel@tonic-gate 		if (indx >= NUM_ENT(cp->page)) {
1212*7c478bd9Sstevel@tonic-gate 			/*
1213*7c478bd9Sstevel@tonic-gate 			 * If we're in a btree leaf page, we've reached the end
1214*7c478bd9Sstevel@tonic-gate 			 * of the tree.  If we've reached the end of a page of
1215*7c478bd9Sstevel@tonic-gate 			 * duplicates, continue from the btree leaf page where
1216*7c478bd9Sstevel@tonic-gate 			 * we found this page of duplicates.
1217*7c478bd9Sstevel@tonic-gate 			 */
1218*7c478bd9Sstevel@tonic-gate 			pgno = cp->page->next_pgno;
1219*7c478bd9Sstevel@tonic-gate 			if (pgno == PGNO_INVALID) {
1220*7c478bd9Sstevel@tonic-gate 				/* If in a btree leaf page, it's EOF. */
1221*7c478bd9Sstevel@tonic-gate 				if (cp->dpgno == PGNO_INVALID)
1222*7c478bd9Sstevel@tonic-gate 					return (DB_NOTFOUND);
1223*7c478bd9Sstevel@tonic-gate 
1224*7c478bd9Sstevel@tonic-gate 				/* Continue from the last btree leaf page. */
1225*7c478bd9Sstevel@tonic-gate 				cp->dpgno = PGNO_INVALID;
1226*7c478bd9Sstevel@tonic-gate 
1227*7c478bd9Sstevel@tonic-gate 				adjust = P_INDX;
1228*7c478bd9Sstevel@tonic-gate 				pgno = cp->pgno;
1229*7c478bd9Sstevel@tonic-gate 				indx = cp->indx + P_INDX;
1230*7c478bd9Sstevel@tonic-gate 			} else
1231*7c478bd9Sstevel@tonic-gate 				indx = 0;
1232*7c478bd9Sstevel@tonic-gate 
1233*7c478bd9Sstevel@tonic-gate 			DISCARD(dbc, cp);
1234*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_lget(dbc,
1235*7c478bd9Sstevel@tonic-gate 			    0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
1236*7c478bd9Sstevel@tonic-gate 				return (ret);
1237*7c478bd9Sstevel@tonic-gate 			if ((ret =
1238*7c478bd9Sstevel@tonic-gate 			    memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
1239*7c478bd9Sstevel@tonic-gate 				return (ret);
1240*7c478bd9Sstevel@tonic-gate 			continue;
1241*7c478bd9Sstevel@tonic-gate 		}
1242*7c478bd9Sstevel@tonic-gate 
1243*7c478bd9Sstevel@tonic-gate 		/* Ignore deleted records. */
1244*7c478bd9Sstevel@tonic-gate 		if (IS_DELETED(cp, indx)) {
1245*7c478bd9Sstevel@tonic-gate 			indx += adjust;
1246*7c478bd9Sstevel@tonic-gate 			continue;
1247*7c478bd9Sstevel@tonic-gate 		}
1248*7c478bd9Sstevel@tonic-gate 
1249*7c478bd9Sstevel@tonic-gate 		/*
1250*7c478bd9Sstevel@tonic-gate 		 * If we're not in a duplicates page, check to see if we've
1251*7c478bd9Sstevel@tonic-gate 		 * found a page of duplicates, in which case we move to the
1252*7c478bd9Sstevel@tonic-gate 		 * first entry.
1253*7c478bd9Sstevel@tonic-gate 		 */
1254*7c478bd9Sstevel@tonic-gate 		if (cp->dpgno == PGNO_INVALID) {
1255*7c478bd9Sstevel@tonic-gate 			cp->pgno = cp->page->pgno;
1256*7c478bd9Sstevel@tonic-gate 			cp->indx = indx;
1257*7c478bd9Sstevel@tonic-gate 
1258*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_dup(dbc, cp, indx, 0)) != 0)
1259*7c478bd9Sstevel@tonic-gate 				return (ret);
1260*7c478bd9Sstevel@tonic-gate 			if (cp->dpgno != PGNO_INVALID) {
1261*7c478bd9Sstevel@tonic-gate 				indx = cp->dindx;
1262*7c478bd9Sstevel@tonic-gate 				adjust = O_INDX;
1263*7c478bd9Sstevel@tonic-gate 				continue;
1264*7c478bd9Sstevel@tonic-gate 			}
1265*7c478bd9Sstevel@tonic-gate 		} else {
1266*7c478bd9Sstevel@tonic-gate 			cp->dpgno = cp->page->pgno;
1267*7c478bd9Sstevel@tonic-gate 			cp->dindx = indx;
1268*7c478bd9Sstevel@tonic-gate 		}
1269*7c478bd9Sstevel@tonic-gate 		break;
1270*7c478bd9Sstevel@tonic-gate 	}
1271*7c478bd9Sstevel@tonic-gate 	return (0);
1272*7c478bd9Sstevel@tonic-gate }
1273*7c478bd9Sstevel@tonic-gate 
1274*7c478bd9Sstevel@tonic-gate /*
1275*7c478bd9Sstevel@tonic-gate  * __bam_c_prev --
1276*7c478bd9Sstevel@tonic-gate  *	Move to the previous record.
1277*7c478bd9Sstevel@tonic-gate  */
1278*7c478bd9Sstevel@tonic-gate static int
1279*7c478bd9Sstevel@tonic-gate __bam_c_prev(dbc, cp)
1280*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1281*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
1282*7c478bd9Sstevel@tonic-gate {
1283*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1284*7c478bd9Sstevel@tonic-gate 	db_indx_t indx, adjust;
1285*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
1286*7c478bd9Sstevel@tonic-gate 	int ret, set_indx;
1287*7c478bd9Sstevel@tonic-gate 
1288*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1289*7c478bd9Sstevel@tonic-gate 
1290*7c478bd9Sstevel@tonic-gate 	/*
1291*7c478bd9Sstevel@tonic-gate 	 * We're either moving through a page of duplicates or a btree leaf
1292*7c478bd9Sstevel@tonic-gate 	 * page.
1293*7c478bd9Sstevel@tonic-gate 	 */
1294*7c478bd9Sstevel@tonic-gate 	if (cp->dpgno == PGNO_INVALID) {
1295*7c478bd9Sstevel@tonic-gate 		adjust = dbp->type == DB_BTREE ? P_INDX : O_INDX;
1296*7c478bd9Sstevel@tonic-gate 		pgno = cp->pgno;
1297*7c478bd9Sstevel@tonic-gate 		indx = cp->indx;
1298*7c478bd9Sstevel@tonic-gate 	} else {
1299*7c478bd9Sstevel@tonic-gate 		adjust = O_INDX;
1300*7c478bd9Sstevel@tonic-gate 		pgno = cp->dpgno;
1301*7c478bd9Sstevel@tonic-gate 		indx = cp->dindx;
1302*7c478bd9Sstevel@tonic-gate 	}
1303*7c478bd9Sstevel@tonic-gate 	if (cp->page == NULL) {
1304*7c478bd9Sstevel@tonic-gate 		if ((ret =
1305*7c478bd9Sstevel@tonic-gate 		    __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
1306*7c478bd9Sstevel@tonic-gate 			return (ret);
1307*7c478bd9Sstevel@tonic-gate 		if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
1308*7c478bd9Sstevel@tonic-gate 			return (ret);
1309*7c478bd9Sstevel@tonic-gate 	}
1310*7c478bd9Sstevel@tonic-gate 
1311*7c478bd9Sstevel@tonic-gate 	/*
1312*7c478bd9Sstevel@tonic-gate 	 * If at the beginning of the page, move to any previous one.
1313*7c478bd9Sstevel@tonic-gate 	 *
1314*7c478bd9Sstevel@tonic-gate 	 * !!!
1315*7c478bd9Sstevel@tonic-gate 	 * This code handles empty pages and pages with only deleted entries.
1316*7c478bd9Sstevel@tonic-gate 	 */
1317*7c478bd9Sstevel@tonic-gate 	for (;;) {
1318*7c478bd9Sstevel@tonic-gate 		if (indx == 0) {
1319*7c478bd9Sstevel@tonic-gate 			/*
1320*7c478bd9Sstevel@tonic-gate 			 * If we're in a btree leaf page, we've reached the
1321*7c478bd9Sstevel@tonic-gate 			 * beginning of the tree.  If we've reached the first
1322*7c478bd9Sstevel@tonic-gate 			 * of a page of duplicates, continue from the btree
1323*7c478bd9Sstevel@tonic-gate 			 * leaf page where we found this page of duplicates.
1324*7c478bd9Sstevel@tonic-gate 			 */
1325*7c478bd9Sstevel@tonic-gate 			pgno = cp->page->prev_pgno;
1326*7c478bd9Sstevel@tonic-gate 			if (pgno == PGNO_INVALID) {
1327*7c478bd9Sstevel@tonic-gate 				/* If in a btree leaf page, it's SOF. */
1328*7c478bd9Sstevel@tonic-gate 				if (cp->dpgno == PGNO_INVALID)
1329*7c478bd9Sstevel@tonic-gate 					return (DB_NOTFOUND);
1330*7c478bd9Sstevel@tonic-gate 
1331*7c478bd9Sstevel@tonic-gate 				/* Continue from the last btree leaf page. */
1332*7c478bd9Sstevel@tonic-gate 				cp->dpgno = PGNO_INVALID;
1333*7c478bd9Sstevel@tonic-gate 
1334*7c478bd9Sstevel@tonic-gate 				adjust = P_INDX;
1335*7c478bd9Sstevel@tonic-gate 				pgno = cp->pgno;
1336*7c478bd9Sstevel@tonic-gate 				indx = cp->indx;
1337*7c478bd9Sstevel@tonic-gate 				set_indx = 0;
1338*7c478bd9Sstevel@tonic-gate 			} else
1339*7c478bd9Sstevel@tonic-gate 				set_indx = 1;
1340*7c478bd9Sstevel@tonic-gate 
1341*7c478bd9Sstevel@tonic-gate 			DISCARD(dbc, cp);
1342*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_lget(dbc,
1343*7c478bd9Sstevel@tonic-gate 			    0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
1344*7c478bd9Sstevel@tonic-gate 				return (ret);
1345*7c478bd9Sstevel@tonic-gate 			if ((ret =
1346*7c478bd9Sstevel@tonic-gate 			    memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
1347*7c478bd9Sstevel@tonic-gate 				return (ret);
1348*7c478bd9Sstevel@tonic-gate 
1349*7c478bd9Sstevel@tonic-gate 			if (set_indx)
1350*7c478bd9Sstevel@tonic-gate 				indx = NUM_ENT(cp->page);
1351*7c478bd9Sstevel@tonic-gate 			if (indx == 0)
1352*7c478bd9Sstevel@tonic-gate 				continue;
1353*7c478bd9Sstevel@tonic-gate 		}
1354*7c478bd9Sstevel@tonic-gate 
1355*7c478bd9Sstevel@tonic-gate 		/* Ignore deleted records. */
1356*7c478bd9Sstevel@tonic-gate 		indx -= adjust;
1357*7c478bd9Sstevel@tonic-gate 		if (IS_DELETED(cp, indx))
1358*7c478bd9Sstevel@tonic-gate 			continue;
1359*7c478bd9Sstevel@tonic-gate 
1360*7c478bd9Sstevel@tonic-gate 		/*
1361*7c478bd9Sstevel@tonic-gate 		 * If we're not in a duplicates page, check to see if we've
1362*7c478bd9Sstevel@tonic-gate 		 * found a page of duplicates, in which case we move to the
1363*7c478bd9Sstevel@tonic-gate 		 * last entry.
1364*7c478bd9Sstevel@tonic-gate 		 */
1365*7c478bd9Sstevel@tonic-gate 		if (cp->dpgno == PGNO_INVALID) {
1366*7c478bd9Sstevel@tonic-gate 			cp->pgno = cp->page->pgno;
1367*7c478bd9Sstevel@tonic-gate 			cp->indx = indx;
1368*7c478bd9Sstevel@tonic-gate 
1369*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_dup(dbc, cp, indx, 1)) != 0)
1370*7c478bd9Sstevel@tonic-gate 				return (ret);
1371*7c478bd9Sstevel@tonic-gate 			if (cp->dpgno != PGNO_INVALID) {
1372*7c478bd9Sstevel@tonic-gate 				indx = cp->dindx + O_INDX;
1373*7c478bd9Sstevel@tonic-gate 				adjust = O_INDX;
1374*7c478bd9Sstevel@tonic-gate 				continue;
1375*7c478bd9Sstevel@tonic-gate 			}
1376*7c478bd9Sstevel@tonic-gate 		} else {
1377*7c478bd9Sstevel@tonic-gate 			cp->dpgno = cp->page->pgno;
1378*7c478bd9Sstevel@tonic-gate 			cp->dindx = indx;
1379*7c478bd9Sstevel@tonic-gate 		}
1380*7c478bd9Sstevel@tonic-gate 		break;
1381*7c478bd9Sstevel@tonic-gate 	}
1382*7c478bd9Sstevel@tonic-gate 	return (0);
1383*7c478bd9Sstevel@tonic-gate }
1384*7c478bd9Sstevel@tonic-gate 
1385*7c478bd9Sstevel@tonic-gate /*
1386*7c478bd9Sstevel@tonic-gate  * __bam_c_search --
1387*7c478bd9Sstevel@tonic-gate  *	Move to a specified record.
1388*7c478bd9Sstevel@tonic-gate  */
1389*7c478bd9Sstevel@tonic-gate static int
1390*7c478bd9Sstevel@tonic-gate __bam_c_search(dbc, cp, key, flags, exactp)
1391*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1392*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
1393*7c478bd9Sstevel@tonic-gate 	const DBT *key;
1394*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
1395*7c478bd9Sstevel@tonic-gate 	int *exactp;
1396*7c478bd9Sstevel@tonic-gate {
1397*7c478bd9Sstevel@tonic-gate 	BTREE *t;
1398*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1399*7c478bd9Sstevel@tonic-gate 	DB_LOCK lock;
1400*7c478bd9Sstevel@tonic-gate 	PAGE *h;
1401*7c478bd9Sstevel@tonic-gate 	db_recno_t recno;
1402*7c478bd9Sstevel@tonic-gate 	db_indx_t indx;
1403*7c478bd9Sstevel@tonic-gate 	u_int32_t sflags;
1404*7c478bd9Sstevel@tonic-gate 	int cmp, needexact, ret;
1405*7c478bd9Sstevel@tonic-gate 
1406*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1407*7c478bd9Sstevel@tonic-gate 	t = dbp->internal;
1408*7c478bd9Sstevel@tonic-gate 
1409*7c478bd9Sstevel@tonic-gate 	/* Find an entry in the database. */
1410*7c478bd9Sstevel@tonic-gate 	switch (flags) {
1411*7c478bd9Sstevel@tonic-gate 	case DB_SET_RECNO:
1412*7c478bd9Sstevel@tonic-gate 		if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0)
1413*7c478bd9Sstevel@tonic-gate 			return (ret);
1414*7c478bd9Sstevel@tonic-gate 		sflags = F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND;
1415*7c478bd9Sstevel@tonic-gate 		needexact = *exactp = 1;
1416*7c478bd9Sstevel@tonic-gate 		ret = __bam_rsearch(dbc, &recno, sflags, 1, exactp);
1417*7c478bd9Sstevel@tonic-gate 		break;
1418*7c478bd9Sstevel@tonic-gate 	case DB_SET:
1419*7c478bd9Sstevel@tonic-gate 	case DB_GET_BOTH:
1420*7c478bd9Sstevel@tonic-gate 		sflags = F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND;
1421*7c478bd9Sstevel@tonic-gate 		needexact = *exactp = 1;
1422*7c478bd9Sstevel@tonic-gate 		goto search;
1423*7c478bd9Sstevel@tonic-gate 	case DB_SET_RANGE:
1424*7c478bd9Sstevel@tonic-gate 		sflags = F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND;
1425*7c478bd9Sstevel@tonic-gate 		needexact = *exactp = 0;
1426*7c478bd9Sstevel@tonic-gate 		goto search;
1427*7c478bd9Sstevel@tonic-gate 	case DB_KEYFIRST:
1428*7c478bd9Sstevel@tonic-gate 		sflags = S_KEYFIRST;
1429*7c478bd9Sstevel@tonic-gate 		goto fast_search;
1430*7c478bd9Sstevel@tonic-gate 	case DB_KEYLAST:
1431*7c478bd9Sstevel@tonic-gate 		sflags = S_KEYLAST;
1432*7c478bd9Sstevel@tonic-gate fast_search:	needexact = *exactp = 0;
1433*7c478bd9Sstevel@tonic-gate 		/*
1434*7c478bd9Sstevel@tonic-gate 		 * If the application has a history of inserting into the first
1435*7c478bd9Sstevel@tonic-gate 		 * or last pages of the database, we check those pages first to
1436*7c478bd9Sstevel@tonic-gate 		 * avoid doing a full search.
1437*7c478bd9Sstevel@tonic-gate 		 *
1438*7c478bd9Sstevel@tonic-gate 		 * Record numbers can't be fast-tracked, the entire tree has to
1439*7c478bd9Sstevel@tonic-gate 		 * be locked.
1440*7c478bd9Sstevel@tonic-gate 		 */
1441*7c478bd9Sstevel@tonic-gate 		h = NULL;
1442*7c478bd9Sstevel@tonic-gate 		lock = LOCK_INVALID;
1443*7c478bd9Sstevel@tonic-gate 		if (F_ISSET(dbp, DB_BT_RECNUM))
1444*7c478bd9Sstevel@tonic-gate 			goto search;
1445*7c478bd9Sstevel@tonic-gate 
1446*7c478bd9Sstevel@tonic-gate 		/* Check if the application has a history of sorted input. */
1447*7c478bd9Sstevel@tonic-gate 		if (t->bt_lpgno == PGNO_INVALID)
1448*7c478bd9Sstevel@tonic-gate 			goto search;
1449*7c478bd9Sstevel@tonic-gate 
1450*7c478bd9Sstevel@tonic-gate 		/*
1451*7c478bd9Sstevel@tonic-gate 		 * Lock and retrieve the page on which we did the last insert.
1452*7c478bd9Sstevel@tonic-gate 		 * It's okay if it doesn't exist, or if it's not the page type
1453*7c478bd9Sstevel@tonic-gate 		 * we expected, it just means that the world changed.
1454*7c478bd9Sstevel@tonic-gate 		 */
1455*7c478bd9Sstevel@tonic-gate 		if (__bam_lget(dbc, 0, t->bt_lpgno, DB_LOCK_WRITE, &lock))
1456*7c478bd9Sstevel@tonic-gate 			goto fast_miss;
1457*7c478bd9Sstevel@tonic-gate 		if (memp_fget(dbp->mpf, &t->bt_lpgno, 0, &h))
1458*7c478bd9Sstevel@tonic-gate 			goto fast_miss;
1459*7c478bd9Sstevel@tonic-gate 		if (TYPE(h) != P_LBTREE)
1460*7c478bd9Sstevel@tonic-gate 			goto fast_miss;
1461*7c478bd9Sstevel@tonic-gate 		if (NUM_ENT(h) == 0)
1462*7c478bd9Sstevel@tonic-gate 			goto fast_miss;
1463*7c478bd9Sstevel@tonic-gate 
1464*7c478bd9Sstevel@tonic-gate 		/*
1465*7c478bd9Sstevel@tonic-gate 		 * What we do here is test to see if we're at the beginning or
1466*7c478bd9Sstevel@tonic-gate 		 * end of the tree and if the new item sorts before/after the
1467*7c478bd9Sstevel@tonic-gate 		 * first/last page entry.  We don't try and catch inserts into
1468*7c478bd9Sstevel@tonic-gate 		 * the middle of the tree (although we could, as long as there
1469*7c478bd9Sstevel@tonic-gate 		 * were two keys on the page and we saved both the index and
1470*7c478bd9Sstevel@tonic-gate 		 * the page number of the last insert).
1471*7c478bd9Sstevel@tonic-gate 		 */
1472*7c478bd9Sstevel@tonic-gate 		if (h->next_pgno == PGNO_INVALID) {
1473*7c478bd9Sstevel@tonic-gate 			indx = NUM_ENT(h) - P_INDX;
1474*7c478bd9Sstevel@tonic-gate 			if ((cmp =
1475*7c478bd9Sstevel@tonic-gate 			    __bam_cmp(dbp, key, h, indx, t->bt_compare)) < 0)
1476*7c478bd9Sstevel@tonic-gate 				goto try_begin;
1477*7c478bd9Sstevel@tonic-gate 			if (cmp > 0) {
1478*7c478bd9Sstevel@tonic-gate 				indx += P_INDX;
1479*7c478bd9Sstevel@tonic-gate 				goto fast_hit;
1480*7c478bd9Sstevel@tonic-gate 			}
1481*7c478bd9Sstevel@tonic-gate 
1482*7c478bd9Sstevel@tonic-gate 			/*
1483*7c478bd9Sstevel@tonic-gate 			 * Found a duplicate.  If doing DB_KEYLAST, we're at
1484*7c478bd9Sstevel@tonic-gate 			 * the correct position, otherwise, move to the first
1485*7c478bd9Sstevel@tonic-gate 			 * of the duplicates.
1486*7c478bd9Sstevel@tonic-gate 			 */
1487*7c478bd9Sstevel@tonic-gate 			if (flags == DB_KEYLAST)
1488*7c478bd9Sstevel@tonic-gate 				goto fast_hit;
1489*7c478bd9Sstevel@tonic-gate 			for (;
1490*7c478bd9Sstevel@tonic-gate 			    indx > 0 && h->inp[indx - P_INDX] == h->inp[indx];
1491*7c478bd9Sstevel@tonic-gate 			    indx -= P_INDX)
1492*7c478bd9Sstevel@tonic-gate 				;
1493*7c478bd9Sstevel@tonic-gate 			goto fast_hit;
1494*7c478bd9Sstevel@tonic-gate 		}
1495*7c478bd9Sstevel@tonic-gate try_begin:	if (h->prev_pgno == PGNO_INVALID) {
1496*7c478bd9Sstevel@tonic-gate 			indx = 0;
1497*7c478bd9Sstevel@tonic-gate 			if ((cmp =
1498*7c478bd9Sstevel@tonic-gate 			    __bam_cmp(dbp, key, h, indx, t->bt_compare)) > 0)
1499*7c478bd9Sstevel@tonic-gate 				goto fast_miss;
1500*7c478bd9Sstevel@tonic-gate 			if (cmp < 0)
1501*7c478bd9Sstevel@tonic-gate 				goto fast_hit;
1502*7c478bd9Sstevel@tonic-gate 			/*
1503*7c478bd9Sstevel@tonic-gate 			 * Found a duplicate.  If doing DB_KEYFIRST, we're at
1504*7c478bd9Sstevel@tonic-gate 			 * the correct position, otherwise, move to the last
1505*7c478bd9Sstevel@tonic-gate 			 * of the duplicates.
1506*7c478bd9Sstevel@tonic-gate 			 */
1507*7c478bd9Sstevel@tonic-gate 			if (flags == DB_KEYFIRST)
1508*7c478bd9Sstevel@tonic-gate 				goto fast_hit;
1509*7c478bd9Sstevel@tonic-gate 			for (;
1510*7c478bd9Sstevel@tonic-gate 			    indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
1511*7c478bd9Sstevel@tonic-gate 			    h->inp[indx] == h->inp[indx + P_INDX];
1512*7c478bd9Sstevel@tonic-gate 			    indx += P_INDX)
1513*7c478bd9Sstevel@tonic-gate 				;
1514*7c478bd9Sstevel@tonic-gate 			goto fast_hit;
1515*7c478bd9Sstevel@tonic-gate 		}
1516*7c478bd9Sstevel@tonic-gate 		goto fast_miss;
1517*7c478bd9Sstevel@tonic-gate 
1518*7c478bd9Sstevel@tonic-gate fast_hit:	/* Set the exact match flag, we may have found a duplicate. */
1519*7c478bd9Sstevel@tonic-gate 		*exactp = cmp == 0;
1520*7c478bd9Sstevel@tonic-gate 
1521*7c478bd9Sstevel@tonic-gate 		/* Enter the entry in the stack. */
1522*7c478bd9Sstevel@tonic-gate 		BT_STK_CLR(cp);
1523*7c478bd9Sstevel@tonic-gate 		BT_STK_ENTER(cp, h, indx, lock, ret);
1524*7c478bd9Sstevel@tonic-gate 		break;
1525*7c478bd9Sstevel@tonic-gate 
1526*7c478bd9Sstevel@tonic-gate fast_miss:	if (h != NULL)
1527*7c478bd9Sstevel@tonic-gate 			(void)memp_fput(dbp->mpf, h, 0);
1528*7c478bd9Sstevel@tonic-gate 		if (lock != LOCK_INVALID)
1529*7c478bd9Sstevel@tonic-gate 			(void)__BT_LPUT(dbc, lock);
1530*7c478bd9Sstevel@tonic-gate 
1531*7c478bd9Sstevel@tonic-gate search:		ret = __bam_search(dbc, key, sflags, 1, NULL, exactp);
1532*7c478bd9Sstevel@tonic-gate 		break;
1533*7c478bd9Sstevel@tonic-gate 	default:				/* XXX: Impossible. */
1534*7c478bd9Sstevel@tonic-gate 		abort();
1535*7c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
1536*7c478bd9Sstevel@tonic-gate 	}
1537*7c478bd9Sstevel@tonic-gate 	if (ret != 0)
1538*7c478bd9Sstevel@tonic-gate 		return (ret);
1539*7c478bd9Sstevel@tonic-gate 
1540*7c478bd9Sstevel@tonic-gate 	/*
1541*7c478bd9Sstevel@tonic-gate 	 * Initialize the cursor to reference it.  This has to be done
1542*7c478bd9Sstevel@tonic-gate 	 * before we return (even with DB_NOTFOUND) because we have to
1543*7c478bd9Sstevel@tonic-gate 	 * free the page(s) we locked in __bam_search.
1544*7c478bd9Sstevel@tonic-gate 	 */
1545*7c478bd9Sstevel@tonic-gate 	cp->page = cp->csp->page;
1546*7c478bd9Sstevel@tonic-gate 	cp->pgno = cp->csp->page->pgno;
1547*7c478bd9Sstevel@tonic-gate 	cp->indx = cp->csp->indx;
1548*7c478bd9Sstevel@tonic-gate 	cp->lock = cp->csp->lock;
1549*7c478bd9Sstevel@tonic-gate 	cp->dpgno = PGNO_INVALID;
1550*7c478bd9Sstevel@tonic-gate 
1551*7c478bd9Sstevel@tonic-gate 	/*
1552*7c478bd9Sstevel@tonic-gate 	 * If we inserted a key into the first or last slot of the tree,
1553*7c478bd9Sstevel@tonic-gate 	 * remember where it was so we can do it more quickly next time.
1554*7c478bd9Sstevel@tonic-gate 	 */
1555*7c478bd9Sstevel@tonic-gate 	if (flags == DB_KEYFIRST || flags == DB_KEYLAST)
1556*7c478bd9Sstevel@tonic-gate 		t->bt_lpgno =
1557*7c478bd9Sstevel@tonic-gate 		    ((cp->page->next_pgno == PGNO_INVALID &&
1558*7c478bd9Sstevel@tonic-gate 		    cp->indx >= NUM_ENT(cp->page)) ||
1559*7c478bd9Sstevel@tonic-gate 		    (cp->page->prev_pgno == PGNO_INVALID && cp->indx == 0)) ?
1560*7c478bd9Sstevel@tonic-gate 		    cp->pgno : PGNO_INVALID;
1561*7c478bd9Sstevel@tonic-gate 
1562*7c478bd9Sstevel@tonic-gate 	/* If we need an exact match and didn't find one, we're done. */
1563*7c478bd9Sstevel@tonic-gate 	if (needexact && *exactp == 0)
1564*7c478bd9Sstevel@tonic-gate 		return (DB_NOTFOUND);
1565*7c478bd9Sstevel@tonic-gate 
1566*7c478bd9Sstevel@tonic-gate 	return (0);
1567*7c478bd9Sstevel@tonic-gate }
1568*7c478bd9Sstevel@tonic-gate 
1569*7c478bd9Sstevel@tonic-gate /*
1570*7c478bd9Sstevel@tonic-gate  * __bam_dup --
1571*7c478bd9Sstevel@tonic-gate  *	Check for an off-page duplicates entry, and if found, move to the
1572*7c478bd9Sstevel@tonic-gate  *	first or last entry.
1573*7c478bd9Sstevel@tonic-gate  *
1574*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __bam_dup __P((DBC *, CURSOR *, u_int32_t, int));
1575*7c478bd9Sstevel@tonic-gate  */
1576*7c478bd9Sstevel@tonic-gate int
1577*7c478bd9Sstevel@tonic-gate __bam_dup(dbc, cp, indx, last_dup)
1578*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1579*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
1580*7c478bd9Sstevel@tonic-gate 	u_int32_t indx;
1581*7c478bd9Sstevel@tonic-gate 	int last_dup;
1582*7c478bd9Sstevel@tonic-gate {
1583*7c478bd9Sstevel@tonic-gate 	BOVERFLOW *bo;
1584*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1585*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
1586*7c478bd9Sstevel@tonic-gate 	int ret;
1587*7c478bd9Sstevel@tonic-gate 
1588*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1589*7c478bd9Sstevel@tonic-gate 
1590*7c478bd9Sstevel@tonic-gate 	/*
1591*7c478bd9Sstevel@tonic-gate 	 * Check for an overflow entry.  If we find one, move to the
1592*7c478bd9Sstevel@tonic-gate 	 * duplicates page, and optionally move to the last record on
1593*7c478bd9Sstevel@tonic-gate 	 * that page.
1594*7c478bd9Sstevel@tonic-gate 	 *
1595*7c478bd9Sstevel@tonic-gate 	 * !!!
1596*7c478bd9Sstevel@tonic-gate 	 * We don't lock duplicates pages, we've already got the correct
1597*7c478bd9Sstevel@tonic-gate 	 * lock on the main page.
1598*7c478bd9Sstevel@tonic-gate 	 */
1599*7c478bd9Sstevel@tonic-gate 	bo = GET_BOVERFLOW(cp->page, indx + O_INDX);
1600*7c478bd9Sstevel@tonic-gate 	if (B_TYPE(bo->type) != B_DUPLICATE)
1601*7c478bd9Sstevel@tonic-gate 		return (0);
1602*7c478bd9Sstevel@tonic-gate 
1603*7c478bd9Sstevel@tonic-gate 	pgno = bo->pgno;
1604*7c478bd9Sstevel@tonic-gate 	if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
1605*7c478bd9Sstevel@tonic-gate 		return (ret);
1606*7c478bd9Sstevel@tonic-gate 	cp->page = NULL;
1607*7c478bd9Sstevel@tonic-gate 	if (last_dup) {
1608*7c478bd9Sstevel@tonic-gate 		if ((ret = __db_dend(dbc, pgno, &cp->page)) != 0)
1609*7c478bd9Sstevel@tonic-gate 			return (ret);
1610*7c478bd9Sstevel@tonic-gate 		indx = NUM_ENT(cp->page) - O_INDX;
1611*7c478bd9Sstevel@tonic-gate 	} else {
1612*7c478bd9Sstevel@tonic-gate 		if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
1613*7c478bd9Sstevel@tonic-gate 			return (ret);
1614*7c478bd9Sstevel@tonic-gate 		indx = 0;
1615*7c478bd9Sstevel@tonic-gate 	}
1616*7c478bd9Sstevel@tonic-gate 
1617*7c478bd9Sstevel@tonic-gate 	/* Update the cursor's duplicate information. */
1618*7c478bd9Sstevel@tonic-gate 	cp->dpgno = cp->page->pgno;
1619*7c478bd9Sstevel@tonic-gate 	cp->dindx = indx;
1620*7c478bd9Sstevel@tonic-gate 
1621*7c478bd9Sstevel@tonic-gate 	return (0);
1622*7c478bd9Sstevel@tonic-gate }
1623*7c478bd9Sstevel@tonic-gate 
1624*7c478bd9Sstevel@tonic-gate /*
1625*7c478bd9Sstevel@tonic-gate  * __bam_c_physdel --
1626*7c478bd9Sstevel@tonic-gate  *	Actually do the cursor deletion.
1627*7c478bd9Sstevel@tonic-gate  */
1628*7c478bd9Sstevel@tonic-gate static int
1629*7c478bd9Sstevel@tonic-gate __bam_c_physdel(dbc, cp, h)
1630*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1631*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
1632*7c478bd9Sstevel@tonic-gate 	PAGE *h;
1633*7c478bd9Sstevel@tonic-gate {
1634*7c478bd9Sstevel@tonic-gate 	enum { DELETE_ITEM, DELETE_PAGE, NOTHING_FURTHER } cmd;
1635*7c478bd9Sstevel@tonic-gate 	BOVERFLOW bo;
1636*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1637*7c478bd9Sstevel@tonic-gate 	DBT dbt;
1638*7c478bd9Sstevel@tonic-gate 	DB_LOCK lock;
1639*7c478bd9Sstevel@tonic-gate 	db_indx_t indx;
1640*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno, next_pgno, prev_pgno;
1641*7c478bd9Sstevel@tonic-gate 	int delete_page, local_page, ret;
1642*7c478bd9Sstevel@tonic-gate 
1643*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1644*7c478bd9Sstevel@tonic-gate 
1645*7c478bd9Sstevel@tonic-gate 	delete_page = ret = 0;
1646*7c478bd9Sstevel@tonic-gate 
1647*7c478bd9Sstevel@tonic-gate 	/* Figure out what we're deleting. */
1648*7c478bd9Sstevel@tonic-gate 	if (cp->dpgno == PGNO_INVALID) {
1649*7c478bd9Sstevel@tonic-gate 		pgno = cp->pgno;
1650*7c478bd9Sstevel@tonic-gate 		indx = cp->indx;
1651*7c478bd9Sstevel@tonic-gate 	} else {
1652*7c478bd9Sstevel@tonic-gate 		pgno = cp->dpgno;
1653*7c478bd9Sstevel@tonic-gate 		indx = cp->dindx;
1654*7c478bd9Sstevel@tonic-gate 	}
1655*7c478bd9Sstevel@tonic-gate 
1656*7c478bd9Sstevel@tonic-gate 	/*
1657*7c478bd9Sstevel@tonic-gate 	 * If the item is referenced by another cursor, set that cursor's
1658*7c478bd9Sstevel@tonic-gate 	 * delete flag and leave it up to it to do the delete.
1659*7c478bd9Sstevel@tonic-gate 	 *
1660*7c478bd9Sstevel@tonic-gate 	 * !!!
1661*7c478bd9Sstevel@tonic-gate 	 * This test for > 0 is a tricky.  There are two ways that we can
1662*7c478bd9Sstevel@tonic-gate 	 * be called here.  Either we are closing the cursor or we've moved
1663*7c478bd9Sstevel@tonic-gate 	 * off the page with the deleted entry.  In the first case, we've
1664*7c478bd9Sstevel@tonic-gate 	 * already removed the cursor from the active queue, so we won't see
1665*7c478bd9Sstevel@tonic-gate 	 * it in __bam_ca_delete. In the second case, it will be on a different
1666*7c478bd9Sstevel@tonic-gate 	 * item, so we won't bother with it in __bam_ca_delete.
1667*7c478bd9Sstevel@tonic-gate 	 */
1668*7c478bd9Sstevel@tonic-gate 	if (__bam_ca_delete(dbp, pgno, indx, 1) > 0)
1669*7c478bd9Sstevel@tonic-gate 		return (0);
1670*7c478bd9Sstevel@tonic-gate 
1671*7c478bd9Sstevel@tonic-gate 	/*
1672*7c478bd9Sstevel@tonic-gate 	 * If this is concurrent DB, upgrade the lock if necessary.
1673*7c478bd9Sstevel@tonic-gate 	 */
1674*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW) &&
1675*7c478bd9Sstevel@tonic-gate 	    (ret = lock_get(dbp->dbenv->lk_info,
1676*7c478bd9Sstevel@tonic-gate 	    dbc->locker, DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
1677*7c478bd9Sstevel@tonic-gate 	    &dbc->mylock)) != 0)
1678*7c478bd9Sstevel@tonic-gate 		return (EAGAIN);
1679*7c478bd9Sstevel@tonic-gate 
1680*7c478bd9Sstevel@tonic-gate 	/*
1681*7c478bd9Sstevel@tonic-gate 	 * If we don't already have the page locked, get it and delete the
1682*7c478bd9Sstevel@tonic-gate 	 * items.
1683*7c478bd9Sstevel@tonic-gate 	 */
1684*7c478bd9Sstevel@tonic-gate 	if ((h == NULL || h->pgno != pgno)) {
1685*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &lock)) != 0)
1686*7c478bd9Sstevel@tonic-gate 			return (ret);
1687*7c478bd9Sstevel@tonic-gate 		if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
1688*7c478bd9Sstevel@tonic-gate 			return (ret);
1689*7c478bd9Sstevel@tonic-gate 		local_page = 1;
1690*7c478bd9Sstevel@tonic-gate 	} else
1691*7c478bd9Sstevel@tonic-gate 		local_page = 0;
1692*7c478bd9Sstevel@tonic-gate 
1693*7c478bd9Sstevel@tonic-gate 	/*
1694*7c478bd9Sstevel@tonic-gate 	 * If we're deleting a duplicate entry and there are other duplicate
1695*7c478bd9Sstevel@tonic-gate 	 * entries remaining, call the common code to do the work and fix up
1696*7c478bd9Sstevel@tonic-gate 	 * the parent page as necessary.  Otherwise, do a normal btree delete.
1697*7c478bd9Sstevel@tonic-gate 	 *
1698*7c478bd9Sstevel@tonic-gate 	 * There are 5 possible cases:
1699*7c478bd9Sstevel@tonic-gate 	 *
1700*7c478bd9Sstevel@tonic-gate 	 * 1. It's not a duplicate item: do a normal btree delete.
1701*7c478bd9Sstevel@tonic-gate 	 * 2. It's a duplicate item:
1702*7c478bd9Sstevel@tonic-gate 	 *	2a: We delete an item from a page of duplicates, but there are
1703*7c478bd9Sstevel@tonic-gate 	 *	    more items on the page.
1704*7c478bd9Sstevel@tonic-gate 	 *      2b: We delete the last item from a page of duplicates, deleting
1705*7c478bd9Sstevel@tonic-gate 	 *	    the last duplicate.
1706*7c478bd9Sstevel@tonic-gate 	 *      2c: We delete the last item from a page of duplicates, but there
1707*7c478bd9Sstevel@tonic-gate 	 *	    is a previous page of duplicates.
1708*7c478bd9Sstevel@tonic-gate 	 *      2d: We delete the last item from a page of duplicates, but there
1709*7c478bd9Sstevel@tonic-gate 	 *	    is a following page of duplicates.
1710*7c478bd9Sstevel@tonic-gate 	 *
1711*7c478bd9Sstevel@tonic-gate 	 * In the case of:
1712*7c478bd9Sstevel@tonic-gate 	 *
1713*7c478bd9Sstevel@tonic-gate 	 *  1: There's nothing further to do.
1714*7c478bd9Sstevel@tonic-gate 	 * 2a: There's nothing further to do.
1715*7c478bd9Sstevel@tonic-gate 	 * 2b: Do the normal btree delete instead of a duplicate delete, as
1716*7c478bd9Sstevel@tonic-gate 	 *     that deletes both the duplicate chain and the parent page's
1717*7c478bd9Sstevel@tonic-gate 	 *     entry.
1718*7c478bd9Sstevel@tonic-gate 	 * 2c: There's nothing further to do.
1719*7c478bd9Sstevel@tonic-gate 	 * 2d: Delete the duplicate, and update the parent page's entry.
1720*7c478bd9Sstevel@tonic-gate 	 */
1721*7c478bd9Sstevel@tonic-gate 	if (TYPE(h) == P_DUPLICATE) {
1722*7c478bd9Sstevel@tonic-gate 		pgno = PGNO(h);
1723*7c478bd9Sstevel@tonic-gate 		prev_pgno = PREV_PGNO(h);
1724*7c478bd9Sstevel@tonic-gate 		next_pgno = NEXT_PGNO(h);
1725*7c478bd9Sstevel@tonic-gate 
1726*7c478bd9Sstevel@tonic-gate 		if (NUM_ENT(h) == 1 &&
1727*7c478bd9Sstevel@tonic-gate 		    prev_pgno == PGNO_INVALID && next_pgno == PGNO_INVALID)
1728*7c478bd9Sstevel@tonic-gate 			cmd = DELETE_PAGE;
1729*7c478bd9Sstevel@tonic-gate 		else {
1730*7c478bd9Sstevel@tonic-gate 			cmd = DELETE_ITEM;
1731*7c478bd9Sstevel@tonic-gate 
1732*7c478bd9Sstevel@tonic-gate 			/* Delete the duplicate. */
1733*7c478bd9Sstevel@tonic-gate 			if ((ret = __db_drem(dbc, &h, indx, __bam_free)) != 0)
1734*7c478bd9Sstevel@tonic-gate 				goto err;
1735*7c478bd9Sstevel@tonic-gate 
1736*7c478bd9Sstevel@tonic-gate 			/*
1737*7c478bd9Sstevel@tonic-gate 			 * 2a: h != NULL, h->pgno == pgno
1738*7c478bd9Sstevel@tonic-gate 			 * 2b: We don't reach this clause, as the above test
1739*7c478bd9Sstevel@tonic-gate 			 *     was true.
1740*7c478bd9Sstevel@tonic-gate 			 * 2c: h == NULL, prev_pgno != PGNO_INVALID
1741*7c478bd9Sstevel@tonic-gate 			 * 2d: h != NULL, next_pgno != PGNO_INVALID
1742*7c478bd9Sstevel@tonic-gate 			 *
1743*7c478bd9Sstevel@tonic-gate 			 * Test for 2a and 2c: if we didn't empty the current
1744*7c478bd9Sstevel@tonic-gate 			 * page or there was a previous page of duplicates, we
1745*7c478bd9Sstevel@tonic-gate 			 * don't need to touch the parent page.
1746*7c478bd9Sstevel@tonic-gate 			 */
1747*7c478bd9Sstevel@tonic-gate 			if ((h != NULL && pgno == h->pgno) ||
1748*7c478bd9Sstevel@tonic-gate 			    prev_pgno != PGNO_INVALID)
1749*7c478bd9Sstevel@tonic-gate 				cmd = NOTHING_FURTHER;
1750*7c478bd9Sstevel@tonic-gate 		}
1751*7c478bd9Sstevel@tonic-gate 
1752*7c478bd9Sstevel@tonic-gate 		/*
1753*7c478bd9Sstevel@tonic-gate 		 * Release any page we're holding and its lock.
1754*7c478bd9Sstevel@tonic-gate 		 *
1755*7c478bd9Sstevel@tonic-gate 		 * !!!
1756*7c478bd9Sstevel@tonic-gate 		 * If there is no subsequent page in the duplicate chain, then
1757*7c478bd9Sstevel@tonic-gate 		 * __db_drem will have put page "h" and set it to NULL.
1758*7c478bd9Sstevel@tonic-gate 		*/
1759*7c478bd9Sstevel@tonic-gate 		if (local_page) {
1760*7c478bd9Sstevel@tonic-gate 			if (h != NULL)
1761*7c478bd9Sstevel@tonic-gate 				(void)memp_fput(dbp->mpf, h, 0);
1762*7c478bd9Sstevel@tonic-gate 			(void)__BT_TLPUT(dbc, lock);
1763*7c478bd9Sstevel@tonic-gate 			local_page = 0;
1764*7c478bd9Sstevel@tonic-gate 		}
1765*7c478bd9Sstevel@tonic-gate 
1766*7c478bd9Sstevel@tonic-gate 		if (cmd == NOTHING_FURTHER)
1767*7c478bd9Sstevel@tonic-gate 			goto done;
1768*7c478bd9Sstevel@tonic-gate 
1769*7c478bd9Sstevel@tonic-gate 		/* Acquire the parent page and switch the index to its entry. */
1770*7c478bd9Sstevel@tonic-gate 		if ((ret =
1771*7c478bd9Sstevel@tonic-gate 		    __bam_lget(dbc, 0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0)
1772*7c478bd9Sstevel@tonic-gate 			goto err;
1773*7c478bd9Sstevel@tonic-gate 		if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0) {
1774*7c478bd9Sstevel@tonic-gate 			(void)__BT_TLPUT(dbc, lock);
1775*7c478bd9Sstevel@tonic-gate 			goto err;
1776*7c478bd9Sstevel@tonic-gate 		}
1777*7c478bd9Sstevel@tonic-gate 		local_page = 1;
1778*7c478bd9Sstevel@tonic-gate 		indx = cp->indx;
1779*7c478bd9Sstevel@tonic-gate 
1780*7c478bd9Sstevel@tonic-gate 		if (cmd == DELETE_PAGE)
1781*7c478bd9Sstevel@tonic-gate 			goto btd;
1782*7c478bd9Sstevel@tonic-gate 
1783*7c478bd9Sstevel@tonic-gate 		/*
1784*7c478bd9Sstevel@tonic-gate 		 * Copy, delete, update, add-back the parent page's data entry.
1785*7c478bd9Sstevel@tonic-gate 		 *
1786*7c478bd9Sstevel@tonic-gate 		 * XXX
1787*7c478bd9Sstevel@tonic-gate 		 * This may be a performance/logging problem.  We should add a
1788*7c478bd9Sstevel@tonic-gate 		 * log message which simply logs/updates a random set of bytes
1789*7c478bd9Sstevel@tonic-gate 		 * on a page, and use it instead of doing a delete/add pair.
1790*7c478bd9Sstevel@tonic-gate 		 */
1791*7c478bd9Sstevel@tonic-gate 		indx += O_INDX;
1792*7c478bd9Sstevel@tonic-gate 		bo = *GET_BOVERFLOW(h, indx);
1793*7c478bd9Sstevel@tonic-gate 		(void)__db_ditem(dbc, h, indx, BOVERFLOW_SIZE);
1794*7c478bd9Sstevel@tonic-gate 		bo.pgno = next_pgno;
1795*7c478bd9Sstevel@tonic-gate 		memset(&dbt, 0, sizeof(dbt));
1796*7c478bd9Sstevel@tonic-gate 		dbt.data = &bo;
1797*7c478bd9Sstevel@tonic-gate 		dbt.size = BOVERFLOW_SIZE;
1798*7c478bd9Sstevel@tonic-gate 		(void)__db_pitem(dbc, h, indx, BOVERFLOW_SIZE, &dbt, NULL);
1799*7c478bd9Sstevel@tonic-gate 		(void)memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
1800*7c478bd9Sstevel@tonic-gate 		goto done;
1801*7c478bd9Sstevel@tonic-gate 	}
1802*7c478bd9Sstevel@tonic-gate 
1803*7c478bd9Sstevel@tonic-gate btd:	/*
1804*7c478bd9Sstevel@tonic-gate 	 * If the page is going to be emptied, delete it.  To delete a leaf
1805*7c478bd9Sstevel@tonic-gate 	 * page we need a copy of a key from the page.  We use the 0th page
1806*7c478bd9Sstevel@tonic-gate 	 * index since it's the last key that the page held.
1807*7c478bd9Sstevel@tonic-gate 	 *
1808*7c478bd9Sstevel@tonic-gate 	 * We malloc the page information instead of using the return key/data
1809*7c478bd9Sstevel@tonic-gate 	 * memory because we've already set them -- the reason we've already
1810*7c478bd9Sstevel@tonic-gate 	 * set them is because we're (potentially) about to do a reverse split,
1811*7c478bd9Sstevel@tonic-gate 	 * which would make our saved page information useless.
1812*7c478bd9Sstevel@tonic-gate 	 *
1813*7c478bd9Sstevel@tonic-gate 	 * !!!
1814*7c478bd9Sstevel@tonic-gate 	 * The following operations to delete a page might deadlock.  I think
1815*7c478bd9Sstevel@tonic-gate 	 * that's OK.  The problem is if we're deleting an item because we're
1816*7c478bd9Sstevel@tonic-gate 	 * closing cursors because we've already deadlocked and want to call
1817*7c478bd9Sstevel@tonic-gate 	 * txn_abort().  If we fail due to deadlock, we leave a locked empty
1818*7c478bd9Sstevel@tonic-gate 	 * page in the tree, which won't be empty long because we're going to
1819*7c478bd9Sstevel@tonic-gate 	 * undo the delete.
1820*7c478bd9Sstevel@tonic-gate 	 */
1821*7c478bd9Sstevel@tonic-gate 	if (NUM_ENT(h) == 2 && h->pgno != PGNO_ROOT) {
1822*7c478bd9Sstevel@tonic-gate 		memset(&dbt, 0, sizeof(DBT));
1823*7c478bd9Sstevel@tonic-gate 		dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
1824*7c478bd9Sstevel@tonic-gate 		if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0)
1825*7c478bd9Sstevel@tonic-gate 			goto err;
1826*7c478bd9Sstevel@tonic-gate 		delete_page = 1;
1827*7c478bd9Sstevel@tonic-gate 	}
1828*7c478bd9Sstevel@tonic-gate 
1829*7c478bd9Sstevel@tonic-gate 	/*
1830*7c478bd9Sstevel@tonic-gate 	 * Do a normal btree delete.
1831*7c478bd9Sstevel@tonic-gate 	 *
1832*7c478bd9Sstevel@tonic-gate 	 * !!!
1833*7c478bd9Sstevel@tonic-gate 	 * Delete the key item first, otherwise the duplicate checks in
1834*7c478bd9Sstevel@tonic-gate 	 * __bam_ditem() won't work!
1835*7c478bd9Sstevel@tonic-gate 	 */
1836*7c478bd9Sstevel@tonic-gate 	if ((ret = __bam_ditem(dbc, h, indx)) != 0)
1837*7c478bd9Sstevel@tonic-gate 		goto err;
1838*7c478bd9Sstevel@tonic-gate 	if ((ret = __bam_ditem(dbc, h, indx)) != 0)
1839*7c478bd9Sstevel@tonic-gate 		goto err;
1840*7c478bd9Sstevel@tonic-gate 
1841*7c478bd9Sstevel@tonic-gate 	/* Discard any remaining locks/pages. */
1842*7c478bd9Sstevel@tonic-gate 	if (local_page) {
1843*7c478bd9Sstevel@tonic-gate 		(void)memp_fput(dbp->mpf, h, 0);
1844*7c478bd9Sstevel@tonic-gate 		(void)__BT_TLPUT(dbc, lock);
1845*7c478bd9Sstevel@tonic-gate 		local_page = 0;
1846*7c478bd9Sstevel@tonic-gate 	}
1847*7c478bd9Sstevel@tonic-gate 
1848*7c478bd9Sstevel@tonic-gate 	/* Delete the page if it was emptied. */
1849*7c478bd9Sstevel@tonic-gate 	if (delete_page)
1850*7c478bd9Sstevel@tonic-gate 		ret = __bam_dpage(dbc, &dbt);
1851*7c478bd9Sstevel@tonic-gate 
1852*7c478bd9Sstevel@tonic-gate err:
1853*7c478bd9Sstevel@tonic-gate done:	if (delete_page)
1854*7c478bd9Sstevel@tonic-gate 		__os_free(dbt.data, dbt.size);
1855*7c478bd9Sstevel@tonic-gate 
1856*7c478bd9Sstevel@tonic-gate 	if (local_page) {
1857*7c478bd9Sstevel@tonic-gate 		/*
1858*7c478bd9Sstevel@tonic-gate 		 * It's possible for h to be NULL, as __db_drem may have
1859*7c478bd9Sstevel@tonic-gate 		 * been relinking pages by the time that it deadlocked.
1860*7c478bd9Sstevel@tonic-gate 		 */
1861*7c478bd9Sstevel@tonic-gate 		if (h != NULL)
1862*7c478bd9Sstevel@tonic-gate 			(void)memp_fput(dbp->mpf, h, 0);
1863*7c478bd9Sstevel@tonic-gate 		(void)__BT_TLPUT(dbc, lock);
1864*7c478bd9Sstevel@tonic-gate 	}
1865*7c478bd9Sstevel@tonic-gate 
1866*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
1867*7c478bd9Sstevel@tonic-gate 		(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
1868*7c478bd9Sstevel@tonic-gate 		    DB_LOCK_IWRITE, 0);
1869*7c478bd9Sstevel@tonic-gate 
1870*7c478bd9Sstevel@tonic-gate 	return (ret);
1871*7c478bd9Sstevel@tonic-gate }
1872*7c478bd9Sstevel@tonic-gate 
1873*7c478bd9Sstevel@tonic-gate /*
1874*7c478bd9Sstevel@tonic-gate  * __bam_c_getstack --
1875*7c478bd9Sstevel@tonic-gate  *	Acquire a full stack for a cursor.
1876*7c478bd9Sstevel@tonic-gate  */
1877*7c478bd9Sstevel@tonic-gate static int
1878*7c478bd9Sstevel@tonic-gate __bam_c_getstack(dbc, cp)
1879*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1880*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
1881*7c478bd9Sstevel@tonic-gate {
1882*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1883*7c478bd9Sstevel@tonic-gate 	DBT dbt;
1884*7c478bd9Sstevel@tonic-gate 	PAGE *h;
1885*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
1886*7c478bd9Sstevel@tonic-gate 	int exact, ret;
1887*7c478bd9Sstevel@tonic-gate 
1888*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1889*7c478bd9Sstevel@tonic-gate 	h = NULL;
1890*7c478bd9Sstevel@tonic-gate 	memset(&dbt, 0, sizeof(DBT));
1891*7c478bd9Sstevel@tonic-gate 	ret = 0;
1892*7c478bd9Sstevel@tonic-gate 
1893*7c478bd9Sstevel@tonic-gate 	/* Get the page with the current item on it. */
1894*7c478bd9Sstevel@tonic-gate 	pgno = cp->pgno;
1895*7c478bd9Sstevel@tonic-gate 	if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
1896*7c478bd9Sstevel@tonic-gate 		return (ret);
1897*7c478bd9Sstevel@tonic-gate 
1898*7c478bd9Sstevel@tonic-gate 	/* Get a copy of a key from the page. */
1899*7c478bd9Sstevel@tonic-gate 	dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
1900*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0)
1901*7c478bd9Sstevel@tonic-gate 		goto err;
1902*7c478bd9Sstevel@tonic-gate 
1903*7c478bd9Sstevel@tonic-gate 	/* Get a write-locked stack for that page. */
1904*7c478bd9Sstevel@tonic-gate 	exact = 0;
1905*7c478bd9Sstevel@tonic-gate 	ret = __bam_search(dbc, &dbt, S_KEYFIRST, 1, NULL, &exact);
1906*7c478bd9Sstevel@tonic-gate 
1907*7c478bd9Sstevel@tonic-gate 	/* We no longer need the key or the page. */
1908*7c478bd9Sstevel@tonic-gate err:	if (h != NULL)
1909*7c478bd9Sstevel@tonic-gate 		(void)memp_fput(dbp->mpf, h, 0);
1910*7c478bd9Sstevel@tonic-gate 	if (dbt.data != NULL)
1911*7c478bd9Sstevel@tonic-gate 		__os_free(dbt.data, dbt.size);
1912*7c478bd9Sstevel@tonic-gate 	return (ret);
1913*7c478bd9Sstevel@tonic-gate }
1914