1*7c478bd9Sstevel@tonic-gate /*-
2*7c478bd9Sstevel@tonic-gate  * See the file LICENSE for redistribution information.
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1996, 1997, 1998
5*7c478bd9Sstevel@tonic-gate  *	Sleepycat Software.  All rights reserved.
6*7c478bd9Sstevel@tonic-gate  */
7*7c478bd9Sstevel@tonic-gate 
8*7c478bd9Sstevel@tonic-gate #include "config.h"
9*7c478bd9Sstevel@tonic-gate 
10*7c478bd9Sstevel@tonic-gate #ifndef lint
11*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)bt_cursor.c	10.81 (Sleepycat) 12/16/98";
12*7c478bd9Sstevel@tonic-gate #endif /* not lint */
13*7c478bd9Sstevel@tonic-gate 
14*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
15*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
16*7c478bd9Sstevel@tonic-gate 
17*7c478bd9Sstevel@tonic-gate #include <errno.h>
18*7c478bd9Sstevel@tonic-gate #include <stdlib.h>
19*7c478bd9Sstevel@tonic-gate #include <string.h>
20*7c478bd9Sstevel@tonic-gate #endif
21*7c478bd9Sstevel@tonic-gate 
22*7c478bd9Sstevel@tonic-gate #include "db_int.h"
23*7c478bd9Sstevel@tonic-gate #include "db_page.h"
24*7c478bd9Sstevel@tonic-gate #include "btree.h"
25*7c478bd9Sstevel@tonic-gate #include "shqueue.h"
26*7c478bd9Sstevel@tonic-gate #include "db_shash.h"
27*7c478bd9Sstevel@tonic-gate #include "lock.h"
28*7c478bd9Sstevel@tonic-gate #include "lock_ext.h"
29*7c478bd9Sstevel@tonic-gate 
30*7c478bd9Sstevel@tonic-gate static int __bam_c_close __P((DBC *));
31*7c478bd9Sstevel@tonic-gate static int __bam_c_del __P((DBC *, u_int32_t));
32*7c478bd9Sstevel@tonic-gate static int __bam_c_destroy __P((DBC *));
33*7c478bd9Sstevel@tonic-gate static int __bam_c_first __P((DBC *, CURSOR *));
34*7c478bd9Sstevel@tonic-gate static int __bam_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
35*7c478bd9Sstevel@tonic-gate static int __bam_c_getstack __P((DBC *, CURSOR *));
36*7c478bd9Sstevel@tonic-gate static int __bam_c_last __P((DBC *, CURSOR *));
37*7c478bd9Sstevel@tonic-gate static int __bam_c_next __P((DBC *, CURSOR *, int));
38*7c478bd9Sstevel@tonic-gate static int __bam_c_physdel __P((DBC *, CURSOR *, PAGE *));
39*7c478bd9Sstevel@tonic-gate static int __bam_c_prev __P((DBC *, CURSOR *));
40*7c478bd9Sstevel@tonic-gate static int __bam_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
41*7c478bd9Sstevel@tonic-gate static void __bam_c_reset __P((CURSOR *));
42*7c478bd9Sstevel@tonic-gate static int __bam_c_rget __P((DBC *, DBT *, u_int32_t));
43*7c478bd9Sstevel@tonic-gate static int __bam_c_search __P((DBC *, CURSOR *, const DBT *, u_int32_t, int *));
44*7c478bd9Sstevel@tonic-gate static int __bam_dsearch __P((DBC *, CURSOR *,  DBT *, u_int32_t *));
45*7c478bd9Sstevel@tonic-gate 
46*7c478bd9Sstevel@tonic-gate /* Discard the current page/lock held by a cursor. */
47*7c478bd9Sstevel@tonic-gate #undef	DISCARD
48*7c478bd9Sstevel@tonic-gate #define	DISCARD(dbc, cp) {						\
49*7c478bd9Sstevel@tonic-gate 	if ((cp)->page != NULL) {					\
50*7c478bd9Sstevel@tonic-gate 		(void)memp_fput((dbc)->dbp->mpf, (cp)->page, 0);	\
51*7c478bd9Sstevel@tonic-gate 		(cp)->page = NULL;					\
52*7c478bd9Sstevel@tonic-gate 	}								\
53*7c478bd9Sstevel@tonic-gate 	if ((cp)->lock != LOCK_INVALID) {				\
54*7c478bd9Sstevel@tonic-gate 		(void)__BT_TLPUT((dbc), (cp)->lock);			\
55*7c478bd9Sstevel@tonic-gate 		(cp)->lock = LOCK_INVALID;				\
56*7c478bd9Sstevel@tonic-gate 	}								\
57*7c478bd9Sstevel@tonic-gate }
58*7c478bd9Sstevel@tonic-gate 
59*7c478bd9Sstevel@tonic-gate /* If the cursor references a deleted record. */
60*7c478bd9Sstevel@tonic-gate #undef	IS_CUR_DELETED
61*7c478bd9Sstevel@tonic-gate #define	IS_CUR_DELETED(cp)						\
62*7c478bd9Sstevel@tonic-gate 	(((cp)->dpgno == PGNO_INVALID &&				\
63*7c478bd9Sstevel@tonic-gate 	B_DISSET(GET_BKEYDATA((cp)->page,				\
64*7c478bd9Sstevel@tonic-gate 	(cp)->indx + O_INDX)->type)) ||					\
65*7c478bd9Sstevel@tonic-gate 	((cp)->dpgno != PGNO_INVALID &&					\
66*7c478bd9Sstevel@tonic-gate 	B_DISSET(GET_BKEYDATA((cp)->page, (cp)->dindx)->type)))
67*7c478bd9Sstevel@tonic-gate 
68*7c478bd9Sstevel@tonic-gate /* If the cursor and index combination references a deleted record. */
69*7c478bd9Sstevel@tonic-gate #undef	IS_DELETED
70*7c478bd9Sstevel@tonic-gate #define	IS_DELETED(cp, indx)						\
71*7c478bd9Sstevel@tonic-gate 	(((cp)->dpgno == PGNO_INVALID &&				\
72*7c478bd9Sstevel@tonic-gate 	B_DISSET(GET_BKEYDATA((cp)->page, (indx) + O_INDX)->type)) ||	\
73*7c478bd9Sstevel@tonic-gate 	((cp)->dpgno != PGNO_INVALID &&					\
74*7c478bd9Sstevel@tonic-gate 	B_DISSET(GET_BKEYDATA((cp)->page, (indx))->type)))
75*7c478bd9Sstevel@tonic-gate 
76*7c478bd9Sstevel@tonic-gate /*
77*7c478bd9Sstevel@tonic-gate  * Test to see if two cursors could point to duplicates of the same key,
78*7c478bd9Sstevel@tonic-gate  * whether on-page or off-page.  The leaf page numbers must be the same
79*7c478bd9Sstevel@tonic-gate  * in both cases.  In the case of off-page duplicates, the key indices
80*7c478bd9Sstevel@tonic-gate  * on the leaf page will be the same.  In the case of on-page duplicates,
81*7c478bd9Sstevel@tonic-gate  * the duplicate page number must not be set, and the key index offsets
82*7c478bd9Sstevel@tonic-gate  * must be the same.  For the last test, as the saved copy of the cursor
83*7c478bd9Sstevel@tonic-gate  * will not have a valid page pointer, we use the cursor's.
84*7c478bd9Sstevel@tonic-gate  */
85*7c478bd9Sstevel@tonic-gate #undef	POSSIBLE_DUPLICATE
86*7c478bd9Sstevel@tonic-gate #define	POSSIBLE_DUPLICATE(cursor, saved_copy)				\
87*7c478bd9Sstevel@tonic-gate 	((cursor)->pgno == (saved_copy).pgno &&				\
88*7c478bd9Sstevel@tonic-gate 	((cursor)->indx == (saved_copy).indx ||				\
89*7c478bd9Sstevel@tonic-gate 	((cursor)->dpgno == PGNO_INVALID &&				\
90*7c478bd9Sstevel@tonic-gate 	    (saved_copy).dpgno == PGNO_INVALID &&			\
91*7c478bd9Sstevel@tonic-gate 	    (cursor)->page->inp[(cursor)->indx] ==			\
92*7c478bd9Sstevel@tonic-gate 	    (cursor)->page->inp[(saved_copy).indx])))
93*7c478bd9Sstevel@tonic-gate 
94*7c478bd9Sstevel@tonic-gate /*
95*7c478bd9Sstevel@tonic-gate  * __bam_c_reset --
96*7c478bd9Sstevel@tonic-gate  *	Initialize internal cursor structure.
97*7c478bd9Sstevel@tonic-gate  */
98*7c478bd9Sstevel@tonic-gate static void
__bam_c_reset(cp)99*7c478bd9Sstevel@tonic-gate __bam_c_reset(cp)
100*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
101*7c478bd9Sstevel@tonic-gate {
102*7c478bd9Sstevel@tonic-gate 	cp->sp = cp->csp = cp->stack;
103*7c478bd9Sstevel@tonic-gate 	cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]);
104*7c478bd9Sstevel@tonic-gate 	cp->page = NULL;
105*7c478bd9Sstevel@tonic-gate 	cp->pgno = PGNO_INVALID;
106*7c478bd9Sstevel@tonic-gate 	cp->indx = 0;
107*7c478bd9Sstevel@tonic-gate 	cp->dpgno = PGNO_INVALID;
108*7c478bd9Sstevel@tonic-gate 	cp->dindx = 0;
109*7c478bd9Sstevel@tonic-gate 	cp->lock = LOCK_INVALID;
110*7c478bd9Sstevel@tonic-gate 	cp->mode = DB_LOCK_NG;
111*7c478bd9Sstevel@tonic-gate 	cp->recno = RECNO_OOB;
112*7c478bd9Sstevel@tonic-gate 	cp->flags = 0;
113*7c478bd9Sstevel@tonic-gate }
114*7c478bd9Sstevel@tonic-gate 
115*7c478bd9Sstevel@tonic-gate /*
116*7c478bd9Sstevel@tonic-gate  * __bam_c_init --
117*7c478bd9Sstevel@tonic-gate  *	Initialize the access private portion of a cursor
118*7c478bd9Sstevel@tonic-gate  *
119*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __bam_c_init __P((DBC *));
120*7c478bd9Sstevel@tonic-gate  */
121*7c478bd9Sstevel@tonic-gate int
__bam_c_init(dbc)122*7c478bd9Sstevel@tonic-gate __bam_c_init(dbc)
123*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
124*7c478bd9Sstevel@tonic-gate {
125*7c478bd9Sstevel@tonic-gate 	DB *dbp;
126*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
127*7c478bd9Sstevel@tonic-gate 	int ret;
128*7c478bd9Sstevel@tonic-gate 
129*7c478bd9Sstevel@tonic-gate 	if ((ret = __os_calloc(1, sizeof(CURSOR), &cp)) != 0)
130*7c478bd9Sstevel@tonic-gate 		return (ret);
131*7c478bd9Sstevel@tonic-gate 
132*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
133*7c478bd9Sstevel@tonic-gate 	cp->dbc = dbc;
134*7c478bd9Sstevel@tonic-gate 
135*7c478bd9Sstevel@tonic-gate 	/*
136*7c478bd9Sstevel@tonic-gate 	 * Logical record numbers are always the same size, and we don't want
137*7c478bd9Sstevel@tonic-gate 	 * to have to check for space every time we return one.  Allocate it
138*7c478bd9Sstevel@tonic-gate 	 * in advance.
139*7c478bd9Sstevel@tonic-gate 	 */
140*7c478bd9Sstevel@tonic-gate 	if (dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) {
141*7c478bd9Sstevel@tonic-gate 		if ((ret = __os_malloc(sizeof(db_recno_t),
142*7c478bd9Sstevel@tonic-gate 		    NULL, &dbc->rkey.data)) != 0) {
143*7c478bd9Sstevel@tonic-gate 			__os_free(cp, sizeof(CURSOR));
144*7c478bd9Sstevel@tonic-gate 			return (ret);
145*7c478bd9Sstevel@tonic-gate 		}
146*7c478bd9Sstevel@tonic-gate 		dbc->rkey.ulen = sizeof(db_recno_t);
147*7c478bd9Sstevel@tonic-gate 	}
148*7c478bd9Sstevel@tonic-gate 
149*7c478bd9Sstevel@tonic-gate 	/* Initialize methods. */
150*7c478bd9Sstevel@tonic-gate 	dbc->internal = cp;
151*7c478bd9Sstevel@tonic-gate 	if (dbp->type == DB_BTREE) {
152*7c478bd9Sstevel@tonic-gate 		dbc->c_am_close = __bam_c_close;
153*7c478bd9Sstevel@tonic-gate 		dbc->c_am_destroy = __bam_c_destroy;
154*7c478bd9Sstevel@tonic-gate 		dbc->c_del = __bam_c_del;
155*7c478bd9Sstevel@tonic-gate 		dbc->c_get = __bam_c_get;
156*7c478bd9Sstevel@tonic-gate 		dbc->c_put = __bam_c_put;
157*7c478bd9Sstevel@tonic-gate 	} else {
158*7c478bd9Sstevel@tonic-gate 		dbc->c_am_close = __bam_c_close;
159*7c478bd9Sstevel@tonic-gate 		dbc->c_am_destroy = __bam_c_destroy;
160*7c478bd9Sstevel@tonic-gate 		dbc->c_del = __ram_c_del;
161*7c478bd9Sstevel@tonic-gate 		dbc->c_get = __ram_c_get;
162*7c478bd9Sstevel@tonic-gate 		dbc->c_put = __ram_c_put;
163*7c478bd9Sstevel@tonic-gate 	}
164*7c478bd9Sstevel@tonic-gate 
165*7c478bd9Sstevel@tonic-gate 	/* Initialize dynamic information. */
166*7c478bd9Sstevel@tonic-gate 	__bam_c_reset(cp);
167*7c478bd9Sstevel@tonic-gate 
168*7c478bd9Sstevel@tonic-gate 	return (0);
169*7c478bd9Sstevel@tonic-gate }
170*7c478bd9Sstevel@tonic-gate 
171*7c478bd9Sstevel@tonic-gate /*
172*7c478bd9Sstevel@tonic-gate  * __bam_c_close --
173*7c478bd9Sstevel@tonic-gate  *	Close down the cursor from a single use.
174*7c478bd9Sstevel@tonic-gate  */
175*7c478bd9Sstevel@tonic-gate static int
__bam_c_close(dbc)176*7c478bd9Sstevel@tonic-gate __bam_c_close(dbc)
177*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
178*7c478bd9Sstevel@tonic-gate {
179*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
180*7c478bd9Sstevel@tonic-gate 	DB *dbp;
181*7c478bd9Sstevel@tonic-gate 	int ret;
182*7c478bd9Sstevel@tonic-gate 
183*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
184*7c478bd9Sstevel@tonic-gate 	cp = dbc->internal;
185*7c478bd9Sstevel@tonic-gate 	ret = 0;
186*7c478bd9Sstevel@tonic-gate 
187*7c478bd9Sstevel@tonic-gate 	/*
188*7c478bd9Sstevel@tonic-gate 	 * If a cursor deleted a btree key, perform the actual deletion.
189*7c478bd9Sstevel@tonic-gate 	 * (Recno keys are either deleted immediately or never deleted.)
190*7c478bd9Sstevel@tonic-gate 	 */
191*7c478bd9Sstevel@tonic-gate 	if (dbp->type == DB_BTREE && F_ISSET(cp, C_DELETED))
192*7c478bd9Sstevel@tonic-gate 		ret = __bam_c_physdel(dbc, cp, NULL);
193*7c478bd9Sstevel@tonic-gate 
194*7c478bd9Sstevel@tonic-gate 	/* Discard any locks not acquired inside of a transaction. */
195*7c478bd9Sstevel@tonic-gate 	if (cp->lock != LOCK_INVALID) {
196*7c478bd9Sstevel@tonic-gate 		(void)__BT_TLPUT(dbc, cp->lock);
197*7c478bd9Sstevel@tonic-gate 		cp->lock = LOCK_INVALID;
198*7c478bd9Sstevel@tonic-gate 	}
199*7c478bd9Sstevel@tonic-gate 
200*7c478bd9Sstevel@tonic-gate 	/* Sanity checks. */
201*7c478bd9Sstevel@tonic-gate #ifdef DIAGNOSTIC
202*7c478bd9Sstevel@tonic-gate 	if (cp->csp != cp->stack)
203*7c478bd9Sstevel@tonic-gate 		__db_err(dbp->dbenv, "btree cursor close: stack not empty");
204*7c478bd9Sstevel@tonic-gate #endif
205*7c478bd9Sstevel@tonic-gate 
206*7c478bd9Sstevel@tonic-gate 	/* Initialize dynamic information. */
207*7c478bd9Sstevel@tonic-gate 	__bam_c_reset(cp);
208*7c478bd9Sstevel@tonic-gate 
209*7c478bd9Sstevel@tonic-gate 	return (ret);
210*7c478bd9Sstevel@tonic-gate }
211*7c478bd9Sstevel@tonic-gate 
212*7c478bd9Sstevel@tonic-gate /*
213*7c478bd9Sstevel@tonic-gate  * __bam_c_destroy --
214*7c478bd9Sstevel@tonic-gate  *	Close a single cursor -- internal version.
215*7c478bd9Sstevel@tonic-gate  */
216*7c478bd9Sstevel@tonic-gate static int
__bam_c_destroy(dbc)217*7c478bd9Sstevel@tonic-gate __bam_c_destroy(dbc)
218*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
219*7c478bd9Sstevel@tonic-gate {
220*7c478bd9Sstevel@tonic-gate 	/* Discard the structures. */
221*7c478bd9Sstevel@tonic-gate 	__os_free(dbc->internal, sizeof(CURSOR));
222*7c478bd9Sstevel@tonic-gate 
223*7c478bd9Sstevel@tonic-gate 	return (0);
224*7c478bd9Sstevel@tonic-gate }
225*7c478bd9Sstevel@tonic-gate 
226*7c478bd9Sstevel@tonic-gate /*
227*7c478bd9Sstevel@tonic-gate  * __bam_c_del --
228*7c478bd9Sstevel@tonic-gate  *	Delete using a cursor.
229*7c478bd9Sstevel@tonic-gate  */
230*7c478bd9Sstevel@tonic-gate static int
__bam_c_del(dbc,flags)231*7c478bd9Sstevel@tonic-gate __bam_c_del(dbc, flags)
232*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
233*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
234*7c478bd9Sstevel@tonic-gate {
235*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
236*7c478bd9Sstevel@tonic-gate 	DB *dbp;
237*7c478bd9Sstevel@tonic-gate 	DB_LOCK lock;
238*7c478bd9Sstevel@tonic-gate 	PAGE *h;
239*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
240*7c478bd9Sstevel@tonic-gate 	db_indx_t indx;
241*7c478bd9Sstevel@tonic-gate 	int ret;
242*7c478bd9Sstevel@tonic-gate 
243*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
244*7c478bd9Sstevel@tonic-gate 	cp = dbc->internal;
245*7c478bd9Sstevel@tonic-gate 	h = NULL;
246*7c478bd9Sstevel@tonic-gate 
247*7c478bd9Sstevel@tonic-gate 	DB_PANIC_CHECK(dbp);
248*7c478bd9Sstevel@tonic-gate 
249*7c478bd9Sstevel@tonic-gate 	/* Check for invalid flags. */
250*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_cdelchk(dbp, flags,
251*7c478bd9Sstevel@tonic-gate 	    F_ISSET(dbp, DB_AM_RDONLY), cp->pgno != PGNO_INVALID)) != 0)
252*7c478bd9Sstevel@tonic-gate 		return (ret);
253*7c478bd9Sstevel@tonic-gate 
254*7c478bd9Sstevel@tonic-gate 	/*
255*7c478bd9Sstevel@tonic-gate 	 * If we are running CDB, this had better be either a write
256*7c478bd9Sstevel@tonic-gate 	 * cursor or an immediate writer.
257*7c478bd9Sstevel@tonic-gate 	 */
258*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_AM_CDB))
259*7c478bd9Sstevel@tonic-gate 		if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
260*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
261*7c478bd9Sstevel@tonic-gate 
262*7c478bd9Sstevel@tonic-gate 	DEBUG_LWRITE(dbc, dbc->txn, "bam_c_del", NULL, NULL, flags);
263*7c478bd9Sstevel@tonic-gate 
264*7c478bd9Sstevel@tonic-gate 	/* If already deleted, return failure. */
265*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(cp, C_DELETED))
266*7c478bd9Sstevel@tonic-gate 		return (DB_KEYEMPTY);
267*7c478bd9Sstevel@tonic-gate 
268*7c478bd9Sstevel@tonic-gate 	/*
269*7c478bd9Sstevel@tonic-gate 	 * We don't physically delete the record until the cursor moves,
270*7c478bd9Sstevel@tonic-gate 	 * so we have to have a long-lived write lock on the page instead
271*7c478bd9Sstevel@tonic-gate 	 * of a long-lived read lock.  Note, we have to have a read lock
272*7c478bd9Sstevel@tonic-gate 	 * to even get here, so we simply discard it.
273*7c478bd9Sstevel@tonic-gate 	 */
274*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_AM_LOCKING) && cp->mode != DB_LOCK_WRITE) {
275*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_lget(dbc,
276*7c478bd9Sstevel@tonic-gate 		    0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0)
277*7c478bd9Sstevel@tonic-gate 			goto err;
278*7c478bd9Sstevel@tonic-gate 		(void)__BT_TLPUT(dbc, cp->lock);
279*7c478bd9Sstevel@tonic-gate 		cp->lock = lock;
280*7c478bd9Sstevel@tonic-gate 		cp->mode = DB_LOCK_WRITE;
281*7c478bd9Sstevel@tonic-gate 	}
282*7c478bd9Sstevel@tonic-gate 
283*7c478bd9Sstevel@tonic-gate 	/*
284*7c478bd9Sstevel@tonic-gate 	 * Acquire the underlying page (which may be different from the above
285*7c478bd9Sstevel@tonic-gate 	 * page because it may be a duplicate page), and set the on-page and
286*7c478bd9Sstevel@tonic-gate 	 * in-cursor delete flags.  We don't need to lock it as we've already
287*7c478bd9Sstevel@tonic-gate 	 * write-locked the page leading to it.
288*7c478bd9Sstevel@tonic-gate 	 */
289*7c478bd9Sstevel@tonic-gate 	if (cp->dpgno == PGNO_INVALID) {
290*7c478bd9Sstevel@tonic-gate 		pgno = cp->pgno;
291*7c478bd9Sstevel@tonic-gate 		indx = cp->indx;
292*7c478bd9Sstevel@tonic-gate 	} else {
293*7c478bd9Sstevel@tonic-gate 		pgno = cp->dpgno;
294*7c478bd9Sstevel@tonic-gate 		indx = cp->dindx;
295*7c478bd9Sstevel@tonic-gate 	}
296*7c478bd9Sstevel@tonic-gate 
297*7c478bd9Sstevel@tonic-gate 	if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0)
298*7c478bd9Sstevel@tonic-gate 		goto err;
299*7c478bd9Sstevel@tonic-gate 
300*7c478bd9Sstevel@tonic-gate 	/* Log the change. */
301*7c478bd9Sstevel@tonic-gate 	if (DB_LOGGING(dbc) &&
302*7c478bd9Sstevel@tonic-gate 	    (ret = __bam_cdel_log(dbp->dbenv->lg_info, dbc->txn, &LSN(h),
303*7c478bd9Sstevel@tonic-gate 	    0, dbp->log_fileid, PGNO(h), &LSN(h), indx)) != 0) {
304*7c478bd9Sstevel@tonic-gate 		(void)memp_fput(dbp->mpf, h, 0);
305*7c478bd9Sstevel@tonic-gate 		goto err;
306*7c478bd9Sstevel@tonic-gate 	}
307*7c478bd9Sstevel@tonic-gate 
308*7c478bd9Sstevel@tonic-gate 	/*
309*7c478bd9Sstevel@tonic-gate 	 * Set the intent-to-delete flag on the page and update all cursors. */
310*7c478bd9Sstevel@tonic-gate 	if (cp->dpgno == PGNO_INVALID)
311*7c478bd9Sstevel@tonic-gate 		B_DSET(GET_BKEYDATA(h, indx + O_INDX)->type);
312*7c478bd9Sstevel@tonic-gate 	else
313*7c478bd9Sstevel@tonic-gate 		B_DSET(GET_BKEYDATA(h, indx)->type);
314*7c478bd9Sstevel@tonic-gate 	(void)__bam_ca_delete(dbp, pgno, indx, 1);
315*7c478bd9Sstevel@tonic-gate 
316*7c478bd9Sstevel@tonic-gate 	ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
317*7c478bd9Sstevel@tonic-gate 	h = NULL;
318*7c478bd9Sstevel@tonic-gate 
319*7c478bd9Sstevel@tonic-gate 	/*
320*7c478bd9Sstevel@tonic-gate 	 * If the tree has record numbers, we have to adjust the counts.
321*7c478bd9Sstevel@tonic-gate 	 *
322*7c478bd9Sstevel@tonic-gate 	 * !!!
323*7c478bd9Sstevel@tonic-gate 	 * This test is right -- we don't yet support duplicates and record
324*7c478bd9Sstevel@tonic-gate 	 * numbers in the same tree, so ignore duplicates if DB_BT_RECNUM
325*7c478bd9Sstevel@tonic-gate 	 * set.
326*7c478bd9Sstevel@tonic-gate 	 */
327*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_BT_RECNUM)) {
328*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_getstack(dbc, cp)) != 0)
329*7c478bd9Sstevel@tonic-gate 			goto err;
330*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_adjust(dbc, -1)) != 0)
331*7c478bd9Sstevel@tonic-gate 			goto err;
332*7c478bd9Sstevel@tonic-gate 		(void)__bam_stkrel(dbc, 0);
333*7c478bd9Sstevel@tonic-gate 	}
334*7c478bd9Sstevel@tonic-gate 
335*7c478bd9Sstevel@tonic-gate err:	if (h != NULL)
336*7c478bd9Sstevel@tonic-gate 		(void)memp_fput(dbp->mpf, h, 0);
337*7c478bd9Sstevel@tonic-gate 	return (ret);
338*7c478bd9Sstevel@tonic-gate }
339*7c478bd9Sstevel@tonic-gate 
340*7c478bd9Sstevel@tonic-gate /*
341*7c478bd9Sstevel@tonic-gate  * __bam_c_get --
342*7c478bd9Sstevel@tonic-gate  *	Get using a cursor (btree).
343*7c478bd9Sstevel@tonic-gate  */
344*7c478bd9Sstevel@tonic-gate static int
__bam_c_get(dbc,key,data,flags)345*7c478bd9Sstevel@tonic-gate __bam_c_get(dbc, key, data, flags)
346*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
347*7c478bd9Sstevel@tonic-gate 	DBT *key, *data;
348*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
349*7c478bd9Sstevel@tonic-gate {
350*7c478bd9Sstevel@tonic-gate 	CURSOR *cp, copy, start;
351*7c478bd9Sstevel@tonic-gate 	DB *dbp;
352*7c478bd9Sstevel@tonic-gate 	PAGE *h;
353*7c478bd9Sstevel@tonic-gate 	int exact, ret, tmp_rmw;
354*7c478bd9Sstevel@tonic-gate 
355*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
356*7c478bd9Sstevel@tonic-gate 	cp = dbc->internal;
357*7c478bd9Sstevel@tonic-gate 
358*7c478bd9Sstevel@tonic-gate 	DB_PANIC_CHECK(dbp);
359*7c478bd9Sstevel@tonic-gate 
360*7c478bd9Sstevel@tonic-gate 	/* Check for invalid flags. */
361*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_cgetchk(dbp,
362*7c478bd9Sstevel@tonic-gate 	    key, data, flags, cp->pgno != PGNO_INVALID)) != 0)
363*7c478bd9Sstevel@tonic-gate 		return (ret);
364*7c478bd9Sstevel@tonic-gate 
365*7c478bd9Sstevel@tonic-gate 	/* Clear OR'd in additional bits so we can check for flag equality. */
366*7c478bd9Sstevel@tonic-gate 	tmp_rmw = 0;
367*7c478bd9Sstevel@tonic-gate 	if (LF_ISSET(DB_RMW)) {
368*7c478bd9Sstevel@tonic-gate 		if (!F_ISSET(dbp, DB_AM_CDB)) {
369*7c478bd9Sstevel@tonic-gate 			tmp_rmw = 1;
370*7c478bd9Sstevel@tonic-gate 			F_SET(dbc, DBC_RMW);
371*7c478bd9Sstevel@tonic-gate 		}
372*7c478bd9Sstevel@tonic-gate 		LF_CLR(DB_RMW);
373*7c478bd9Sstevel@tonic-gate 	}
374*7c478bd9Sstevel@tonic-gate 
375*7c478bd9Sstevel@tonic-gate 	DEBUG_LREAD(dbc, dbc->txn, "bam_c_get",
376*7c478bd9Sstevel@tonic-gate 	    flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags);
377*7c478bd9Sstevel@tonic-gate 
378*7c478bd9Sstevel@tonic-gate 	/*
379*7c478bd9Sstevel@tonic-gate 	 * Return a cursor's record number.  It has nothing to do with the
380*7c478bd9Sstevel@tonic-gate 	 * cursor get code except that it's been rammed into the interface.
381*7c478bd9Sstevel@tonic-gate 	 */
382*7c478bd9Sstevel@tonic-gate 	if (flags == DB_GET_RECNO) {
383*7c478bd9Sstevel@tonic-gate 		ret = __bam_c_rget(dbc, data, flags);
384*7c478bd9Sstevel@tonic-gate 		if (tmp_rmw)
385*7c478bd9Sstevel@tonic-gate 			F_CLR(dbc, DBC_RMW);
386*7c478bd9Sstevel@tonic-gate 		return (ret);
387*7c478bd9Sstevel@tonic-gate 	}
388*7c478bd9Sstevel@tonic-gate 
389*7c478bd9Sstevel@tonic-gate 	/*
390*7c478bd9Sstevel@tonic-gate 	 * Initialize the cursor for a new retrieval.  Clear the cursor's
391*7c478bd9Sstevel@tonic-gate 	 * page pointer, it was set before this operation, and no longer
392*7c478bd9Sstevel@tonic-gate 	 * has any meaning.
393*7c478bd9Sstevel@tonic-gate 	 */
394*7c478bd9Sstevel@tonic-gate 	cp->page = NULL;
395*7c478bd9Sstevel@tonic-gate 	copy = *cp;
396*7c478bd9Sstevel@tonic-gate 	cp->lock = LOCK_INVALID;
397*7c478bd9Sstevel@tonic-gate 
398*7c478bd9Sstevel@tonic-gate 	switch (flags) {
399*7c478bd9Sstevel@tonic-gate 	case DB_CURRENT:
400*7c478bd9Sstevel@tonic-gate 		/* It's not possible to return a deleted record. */
401*7c478bd9Sstevel@tonic-gate 		if (F_ISSET(cp, C_DELETED)) {
402*7c478bd9Sstevel@tonic-gate 			ret = DB_KEYEMPTY;
403*7c478bd9Sstevel@tonic-gate 			goto err;
404*7c478bd9Sstevel@tonic-gate 		}
405*7c478bd9Sstevel@tonic-gate 
406*7c478bd9Sstevel@tonic-gate 		/* Acquire the current page. */
407*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_lget(dbc,
408*7c478bd9Sstevel@tonic-gate 		    0, cp->pgno, DB_LOCK_READ, &cp->lock)) == 0)
409*7c478bd9Sstevel@tonic-gate 			ret = memp_fget(dbp->mpf,
410*7c478bd9Sstevel@tonic-gate 			    cp->dpgno == PGNO_INVALID ? &cp->pgno : &cp->dpgno,
411*7c478bd9Sstevel@tonic-gate 			    0, &cp->page);
412*7c478bd9Sstevel@tonic-gate 		if (ret != 0)
413*7c478bd9Sstevel@tonic-gate 			goto err;
414*7c478bd9Sstevel@tonic-gate 		break;
415*7c478bd9Sstevel@tonic-gate 	case DB_NEXT_DUP:
416*7c478bd9Sstevel@tonic-gate 		if (cp->pgno == PGNO_INVALID) {
417*7c478bd9Sstevel@tonic-gate 			ret = EINVAL;
418*7c478bd9Sstevel@tonic-gate 			goto err;
419*7c478bd9Sstevel@tonic-gate 		}
420*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_next(dbc, cp, 1)) != 0)
421*7c478bd9Sstevel@tonic-gate 			goto err;
422*7c478bd9Sstevel@tonic-gate 
423*7c478bd9Sstevel@tonic-gate 		/* Make sure we didn't go past the end of the duplicates. */
424*7c478bd9Sstevel@tonic-gate 		if (!POSSIBLE_DUPLICATE(cp, copy)) {
425*7c478bd9Sstevel@tonic-gate 			ret = DB_NOTFOUND;
426*7c478bd9Sstevel@tonic-gate 			goto err;
427*7c478bd9Sstevel@tonic-gate 		}
428*7c478bd9Sstevel@tonic-gate 		break;
429*7c478bd9Sstevel@tonic-gate 	case DB_NEXT:
430*7c478bd9Sstevel@tonic-gate 		if (cp->pgno != PGNO_INVALID) {
431*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_c_next(dbc, cp, 1)) != 0)
432*7c478bd9Sstevel@tonic-gate 				goto err;
433*7c478bd9Sstevel@tonic-gate 			break;
434*7c478bd9Sstevel@tonic-gate 		}
435*7c478bd9Sstevel@tonic-gate 		/* FALLTHROUGH */
436*7c478bd9Sstevel@tonic-gate 	case DB_FIRST:
437*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_first(dbc, cp)) != 0)
438*7c478bd9Sstevel@tonic-gate 			goto err;
439*7c478bd9Sstevel@tonic-gate 		break;
440*7c478bd9Sstevel@tonic-gate 	case DB_PREV:
441*7c478bd9Sstevel@tonic-gate 		if (cp->pgno != PGNO_INVALID) {
442*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_c_prev(dbc, cp)) != 0)
443*7c478bd9Sstevel@tonic-gate 				goto err;
444*7c478bd9Sstevel@tonic-gate 			break;
445*7c478bd9Sstevel@tonic-gate 		}
446*7c478bd9Sstevel@tonic-gate 		/* FALLTHROUGH */
447*7c478bd9Sstevel@tonic-gate 	case DB_LAST:
448*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_last(dbc, cp)) != 0)
449*7c478bd9Sstevel@tonic-gate 			goto err;
450*7c478bd9Sstevel@tonic-gate 		break;
451*7c478bd9Sstevel@tonic-gate 	case DB_SET:
452*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0)
453*7c478bd9Sstevel@tonic-gate 			goto err;
454*7c478bd9Sstevel@tonic-gate 
455*7c478bd9Sstevel@tonic-gate 		/*
456*7c478bd9Sstevel@tonic-gate 		 * We cannot currently be referencing a deleted record, but we
457*7c478bd9Sstevel@tonic-gate 		 * may be referencing off-page duplicates.
458*7c478bd9Sstevel@tonic-gate 		 *
459*7c478bd9Sstevel@tonic-gate 		 * If we're referencing off-page duplicates, move off-page.
460*7c478bd9Sstevel@tonic-gate 		 * If we moved off-page, move to the next non-deleted record.
461*7c478bd9Sstevel@tonic-gate 		 * If we moved to the next non-deleted record, check to make
462*7c478bd9Sstevel@tonic-gate 		 * sure we didn't switch records because our current record
463*7c478bd9Sstevel@tonic-gate 		 * had no non-deleted data items.
464*7c478bd9Sstevel@tonic-gate 		 */
465*7c478bd9Sstevel@tonic-gate 		start = *cp;
466*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
467*7c478bd9Sstevel@tonic-gate 			goto err;
468*7c478bd9Sstevel@tonic-gate 		if (cp->dpgno != PGNO_INVALID && IS_CUR_DELETED(cp)) {
469*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_c_next(dbc, cp, 0)) != 0)
470*7c478bd9Sstevel@tonic-gate 				goto err;
471*7c478bd9Sstevel@tonic-gate 			if (!POSSIBLE_DUPLICATE(cp, start)) {
472*7c478bd9Sstevel@tonic-gate 				ret = DB_NOTFOUND;
473*7c478bd9Sstevel@tonic-gate 				goto err;
474*7c478bd9Sstevel@tonic-gate 			}
475*7c478bd9Sstevel@tonic-gate 		}
476*7c478bd9Sstevel@tonic-gate 		break;
477*7c478bd9Sstevel@tonic-gate 	case DB_SET_RECNO:
478*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0)
479*7c478bd9Sstevel@tonic-gate 			goto err;
480*7c478bd9Sstevel@tonic-gate 		break;
481*7c478bd9Sstevel@tonic-gate 	case DB_GET_BOTH:
482*7c478bd9Sstevel@tonic-gate 		if (F_ISSET(dbc, DBC_CONTINUE | DBC_KEYSET)) {
483*7c478bd9Sstevel@tonic-gate 			/* Acquire the current page. */
484*7c478bd9Sstevel@tonic-gate 			if ((ret = memp_fget(dbp->mpf,
485*7c478bd9Sstevel@tonic-gate 			    cp->dpgno == PGNO_INVALID ? &cp->pgno : &cp->dpgno,
486*7c478bd9Sstevel@tonic-gate 			    0, &cp->page)) != 0)
487*7c478bd9Sstevel@tonic-gate 				goto err;
488*7c478bd9Sstevel@tonic-gate 
489*7c478bd9Sstevel@tonic-gate 			/* If DBC_CONTINUE, move to the next item. */
490*7c478bd9Sstevel@tonic-gate 			if (F_ISSET(dbc, DBC_CONTINUE) &&
491*7c478bd9Sstevel@tonic-gate 			    (ret = __bam_c_next(dbc, cp, 1)) != 0)
492*7c478bd9Sstevel@tonic-gate 				goto err;
493*7c478bd9Sstevel@tonic-gate 		} else {
494*7c478bd9Sstevel@tonic-gate 			if ((ret =
495*7c478bd9Sstevel@tonic-gate 			    __bam_c_search(dbc, cp, key, flags, &exact)) != 0)
496*7c478bd9Sstevel@tonic-gate 				goto err;
497*7c478bd9Sstevel@tonic-gate 
498*7c478bd9Sstevel@tonic-gate 			/*
499*7c478bd9Sstevel@tonic-gate 			 * We may be referencing a duplicates page.  Move to
500*7c478bd9Sstevel@tonic-gate 			 * the first duplicate.
501*7c478bd9Sstevel@tonic-gate 			 */
502*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
503*7c478bd9Sstevel@tonic-gate 				goto err;
504*7c478bd9Sstevel@tonic-gate 		}
505*7c478bd9Sstevel@tonic-gate 
506*7c478bd9Sstevel@tonic-gate 		/* Search for a matching entry. */
507*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_dsearch(dbc, cp, data, NULL)) != 0)
508*7c478bd9Sstevel@tonic-gate 			goto err;
509*7c478bd9Sstevel@tonic-gate 
510*7c478bd9Sstevel@tonic-gate 		/* Ignore deleted entries. */
511*7c478bd9Sstevel@tonic-gate 		if (IS_CUR_DELETED(cp)) {
512*7c478bd9Sstevel@tonic-gate 			ret = DB_NOTFOUND;
513*7c478bd9Sstevel@tonic-gate 			goto err;
514*7c478bd9Sstevel@tonic-gate 		}
515*7c478bd9Sstevel@tonic-gate 		break;
516*7c478bd9Sstevel@tonic-gate 	case DB_SET_RANGE:
517*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0)
518*7c478bd9Sstevel@tonic-gate 			goto err;
519*7c478bd9Sstevel@tonic-gate 
520*7c478bd9Sstevel@tonic-gate 		/*
521*7c478bd9Sstevel@tonic-gate 		 * As we didn't require an exact match, the search function
522*7c478bd9Sstevel@tonic-gate 		 * may have returned an entry past the end of the page.  If
523*7c478bd9Sstevel@tonic-gate 		 * so, move to the next entry.
524*7c478bd9Sstevel@tonic-gate 		 */
525*7c478bd9Sstevel@tonic-gate 		if (cp->indx == NUM_ENT(cp->page) &&
526*7c478bd9Sstevel@tonic-gate 		    (ret = __bam_c_next(dbc, cp, 0)) != 0)
527*7c478bd9Sstevel@tonic-gate 			goto err;
528*7c478bd9Sstevel@tonic-gate 
529*7c478bd9Sstevel@tonic-gate 		/*
530*7c478bd9Sstevel@tonic-gate 		 * We may be referencing off-page duplicates, if so, move
531*7c478bd9Sstevel@tonic-gate 		 * off-page.
532*7c478bd9Sstevel@tonic-gate 		 */
533*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
534*7c478bd9Sstevel@tonic-gate 			goto err;
535*7c478bd9Sstevel@tonic-gate 
536*7c478bd9Sstevel@tonic-gate 		/*
537*7c478bd9Sstevel@tonic-gate 		 * We may be referencing a deleted record, if so, move to
538*7c478bd9Sstevel@tonic-gate 		 * the next non-deleted record.
539*7c478bd9Sstevel@tonic-gate 		 */
540*7c478bd9Sstevel@tonic-gate 		if (IS_CUR_DELETED(cp) && (ret = __bam_c_next(dbc, cp, 0)) != 0)
541*7c478bd9Sstevel@tonic-gate 			goto err;
542*7c478bd9Sstevel@tonic-gate 		break;
543*7c478bd9Sstevel@tonic-gate 	}
544*7c478bd9Sstevel@tonic-gate 
545*7c478bd9Sstevel@tonic-gate 	/*
546*7c478bd9Sstevel@tonic-gate 	 * Return the key if the user didn't give us one.  If we've moved to
547*7c478bd9Sstevel@tonic-gate 	 * a duplicate page, we may no longer have a pointer to the main page,
548*7c478bd9Sstevel@tonic-gate 	 * so we have to go get it.  We know that it's already read-locked,
549*7c478bd9Sstevel@tonic-gate 	 * however, so we don't have to acquire a new lock.
550*7c478bd9Sstevel@tonic-gate 	 */
551*7c478bd9Sstevel@tonic-gate 	if (flags != DB_SET) {
552*7c478bd9Sstevel@tonic-gate 		if (cp->dpgno != PGNO_INVALID) {
553*7c478bd9Sstevel@tonic-gate 			if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0)
554*7c478bd9Sstevel@tonic-gate 				goto err;
555*7c478bd9Sstevel@tonic-gate 		} else
556*7c478bd9Sstevel@tonic-gate 			h = cp->page;
557*7c478bd9Sstevel@tonic-gate 		ret = __db_ret(dbp,
558*7c478bd9Sstevel@tonic-gate 		    h, cp->indx, key, &dbc->rkey.data, &dbc->rkey.ulen);
559*7c478bd9Sstevel@tonic-gate 		if (cp->dpgno != PGNO_INVALID)
560*7c478bd9Sstevel@tonic-gate 			(void)memp_fput(dbp->mpf, h, 0);
561*7c478bd9Sstevel@tonic-gate 		if (ret)
562*7c478bd9Sstevel@tonic-gate 			goto err;
563*7c478bd9Sstevel@tonic-gate 	}
564*7c478bd9Sstevel@tonic-gate 
565*7c478bd9Sstevel@tonic-gate 	/* Return the data. */
566*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_ret(dbp, cp->page,
567*7c478bd9Sstevel@tonic-gate 	    cp->dpgno == PGNO_INVALID ? cp->indx + O_INDX : cp->dindx,
568*7c478bd9Sstevel@tonic-gate 	    data, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
569*7c478bd9Sstevel@tonic-gate 		goto err;
570*7c478bd9Sstevel@tonic-gate 
571*7c478bd9Sstevel@tonic-gate 	/*
572*7c478bd9Sstevel@tonic-gate 	 * If the previous cursor record has been deleted, physically delete
573*7c478bd9Sstevel@tonic-gate 	 * the entry from the page.  We clear the deleted flag before we call
574*7c478bd9Sstevel@tonic-gate 	 * the underlying delete routine so that, if an error occurs, and we
575*7c478bd9Sstevel@tonic-gate 	 * restore the cursor, the deleted flag is cleared.  This is because,
576*7c478bd9Sstevel@tonic-gate 	 * if we manage to physically modify the page, and then restore the
577*7c478bd9Sstevel@tonic-gate 	 * cursor, we might try to repeat the page modification when closing
578*7c478bd9Sstevel@tonic-gate 	 * the cursor.
579*7c478bd9Sstevel@tonic-gate 	 */
580*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(&copy, C_DELETED)) {
581*7c478bd9Sstevel@tonic-gate 		F_CLR(&copy, C_DELETED);
582*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_physdel(dbc, &copy, cp->page)) != 0)
583*7c478bd9Sstevel@tonic-gate 			goto err;
584*7c478bd9Sstevel@tonic-gate 	}
585*7c478bd9Sstevel@tonic-gate 	F_CLR(cp, C_DELETED);
586*7c478bd9Sstevel@tonic-gate 
587*7c478bd9Sstevel@tonic-gate 	/* Release the previous lock, if any; the current lock is retained. */
588*7c478bd9Sstevel@tonic-gate 	if (copy.lock != LOCK_INVALID)
589*7c478bd9Sstevel@tonic-gate 		(void)__BT_TLPUT(dbc, copy.lock);
590*7c478bd9Sstevel@tonic-gate 
591*7c478bd9Sstevel@tonic-gate 	/* Release the current page. */
592*7c478bd9Sstevel@tonic-gate 	if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
593*7c478bd9Sstevel@tonic-gate 		goto err;
594*7c478bd9Sstevel@tonic-gate 
595*7c478bd9Sstevel@tonic-gate 	if (0) {
596*7c478bd9Sstevel@tonic-gate err:		if (cp->page != NULL)
597*7c478bd9Sstevel@tonic-gate 			(void)memp_fput(dbp->mpf, cp->page, 0);
598*7c478bd9Sstevel@tonic-gate 		if (cp->lock != LOCK_INVALID)
599*7c478bd9Sstevel@tonic-gate 			(void)__BT_TLPUT(dbc, cp->lock);
600*7c478bd9Sstevel@tonic-gate 		*cp = copy;
601*7c478bd9Sstevel@tonic-gate 	}
602*7c478bd9Sstevel@tonic-gate 
603*7c478bd9Sstevel@tonic-gate 	/* Release temporary lock upgrade. */
604*7c478bd9Sstevel@tonic-gate 	if (tmp_rmw)
605*7c478bd9Sstevel@tonic-gate 		F_CLR(dbc, DBC_RMW);
606*7c478bd9Sstevel@tonic-gate 
607*7c478bd9Sstevel@tonic-gate 	return (ret);
608*7c478bd9Sstevel@tonic-gate }
609*7c478bd9Sstevel@tonic-gate 
610*7c478bd9Sstevel@tonic-gate /*
611*7c478bd9Sstevel@tonic-gate  * __bam_dsearch --
612*7c478bd9Sstevel@tonic-gate  *	Search for a matching data item (or the first data item that's
613*7c478bd9Sstevel@tonic-gate  *	equal to or greater than the one we're searching for).
614*7c478bd9Sstevel@tonic-gate  */
615*7c478bd9Sstevel@tonic-gate static int
__bam_dsearch(dbc,cp,data,iflagp)616*7c478bd9Sstevel@tonic-gate __bam_dsearch(dbc, cp, data, iflagp)
617*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
618*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
619*7c478bd9Sstevel@tonic-gate 	DBT *data;
620*7c478bd9Sstevel@tonic-gate 	u_int32_t *iflagp;
621*7c478bd9Sstevel@tonic-gate {
622*7c478bd9Sstevel@tonic-gate 	DB *dbp;
623*7c478bd9Sstevel@tonic-gate 	CURSOR copy, last;
624*7c478bd9Sstevel@tonic-gate 	int cmp, ret;
625*7c478bd9Sstevel@tonic-gate 
626*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
627*7c478bd9Sstevel@tonic-gate 
628*7c478bd9Sstevel@tonic-gate 	/*
629*7c478bd9Sstevel@tonic-gate 	 * If iflagp is non-NULL, we're doing an insert.
630*7c478bd9Sstevel@tonic-gate 	 *
631*7c478bd9Sstevel@tonic-gate 	 * If the duplicates are off-page, use the duplicate search routine.
632*7c478bd9Sstevel@tonic-gate 	 */
633*7c478bd9Sstevel@tonic-gate 	if (cp->dpgno != PGNO_INVALID) {
634*7c478bd9Sstevel@tonic-gate 		if ((ret = __db_dsearch(dbc, iflagp != NULL,
635*7c478bd9Sstevel@tonic-gate 		    data, cp->dpgno, &cp->dindx, &cp->page, &cmp)) != 0)
636*7c478bd9Sstevel@tonic-gate 			return (ret);
637*7c478bd9Sstevel@tonic-gate 		cp->dpgno = cp->page->pgno;
638*7c478bd9Sstevel@tonic-gate 
639*7c478bd9Sstevel@tonic-gate 		if (iflagp == NULL) {
640*7c478bd9Sstevel@tonic-gate 			if (cmp != 0)
641*7c478bd9Sstevel@tonic-gate 				return (DB_NOTFOUND);
642*7c478bd9Sstevel@tonic-gate 			return (0);
643*7c478bd9Sstevel@tonic-gate 		}
644*7c478bd9Sstevel@tonic-gate 		*iflagp = DB_BEFORE;
645*7c478bd9Sstevel@tonic-gate 		return (0);
646*7c478bd9Sstevel@tonic-gate 	}
647*7c478bd9Sstevel@tonic-gate 
648*7c478bd9Sstevel@tonic-gate 	/* Otherwise, do the search ourselves. */
649*7c478bd9Sstevel@tonic-gate 	copy = *cp;
650*7c478bd9Sstevel@tonic-gate 	for (;;) {
651*7c478bd9Sstevel@tonic-gate 		/* Save the last interesting cursor position. */
652*7c478bd9Sstevel@tonic-gate 		last = *cp;
653*7c478bd9Sstevel@tonic-gate 
654*7c478bd9Sstevel@tonic-gate 		/* See if the data item matches the one we're looking for. */
655*7c478bd9Sstevel@tonic-gate 		if ((cmp = __bam_cmp(dbp, data, cp->page, cp->indx + O_INDX,
656*7c478bd9Sstevel@tonic-gate 		    dbp->dup_compare == NULL ?
657*7c478bd9Sstevel@tonic-gate 		    __bam_defcmp : dbp->dup_compare)) == 0) {
658*7c478bd9Sstevel@tonic-gate 			if (iflagp != NULL)
659*7c478bd9Sstevel@tonic-gate 				*iflagp = DB_AFTER;
660*7c478bd9Sstevel@tonic-gate 			return (0);
661*7c478bd9Sstevel@tonic-gate 		}
662*7c478bd9Sstevel@tonic-gate 
663*7c478bd9Sstevel@tonic-gate 		/*
664*7c478bd9Sstevel@tonic-gate 		 * If duplicate entries are sorted, we're done if we find a
665*7c478bd9Sstevel@tonic-gate 		 * page entry that sorts greater than the application item.
666*7c478bd9Sstevel@tonic-gate 		 * If doing an insert, return success, otherwise DB_NOTFOUND.
667*7c478bd9Sstevel@tonic-gate 		 */
668*7c478bd9Sstevel@tonic-gate 		if (dbp->dup_compare != NULL && cmp < 0) {
669*7c478bd9Sstevel@tonic-gate 			if (iflagp == NULL)
670*7c478bd9Sstevel@tonic-gate 				return (DB_NOTFOUND);
671*7c478bd9Sstevel@tonic-gate 			*iflagp = DB_BEFORE;
672*7c478bd9Sstevel@tonic-gate 			return (0);
673*7c478bd9Sstevel@tonic-gate 		}
674*7c478bd9Sstevel@tonic-gate 
675*7c478bd9Sstevel@tonic-gate 		/*
676*7c478bd9Sstevel@tonic-gate 		 * Move to the next item.  If we reach the end of the page and
677*7c478bd9Sstevel@tonic-gate 		 * we're doing an insert, set the cursor to the last item and
678*7c478bd9Sstevel@tonic-gate 		 * set the referenced memory location so callers know to insert
679*7c478bd9Sstevel@tonic-gate 		 * after the item, instead of before it.  If not inserting, we
680*7c478bd9Sstevel@tonic-gate 		 * return DB_NOTFOUND.
681*7c478bd9Sstevel@tonic-gate 		 */
682*7c478bd9Sstevel@tonic-gate 		if ((cp->indx += P_INDX) >= NUM_ENT(cp->page)) {
683*7c478bd9Sstevel@tonic-gate 			if (iflagp == NULL)
684*7c478bd9Sstevel@tonic-gate 				return (DB_NOTFOUND);
685*7c478bd9Sstevel@tonic-gate 			goto use_last;
686*7c478bd9Sstevel@tonic-gate 		}
687*7c478bd9Sstevel@tonic-gate 
688*7c478bd9Sstevel@tonic-gate 		/*
689*7c478bd9Sstevel@tonic-gate 		 * Make sure we didn't go past the end of the duplicates.  The
690*7c478bd9Sstevel@tonic-gate 		 * error conditions are the same as above.
691*7c478bd9Sstevel@tonic-gate 		 */
692*7c478bd9Sstevel@tonic-gate 		if (!POSSIBLE_DUPLICATE(cp, copy)) {
693*7c478bd9Sstevel@tonic-gate 			if (iflagp == NULL)
694*7c478bd9Sstevel@tonic-gate 				 return (DB_NOTFOUND);
695*7c478bd9Sstevel@tonic-gate use_last:		*cp = last;
696*7c478bd9Sstevel@tonic-gate 			*iflagp = DB_AFTER;
697*7c478bd9Sstevel@tonic-gate 			return (0);
698*7c478bd9Sstevel@tonic-gate 		}
699*7c478bd9Sstevel@tonic-gate 	}
700*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
701*7c478bd9Sstevel@tonic-gate }
702*7c478bd9Sstevel@tonic-gate 
703*7c478bd9Sstevel@tonic-gate /*
704*7c478bd9Sstevel@tonic-gate  * __bam_c_rget --
705*7c478bd9Sstevel@tonic-gate  *	Return the record number for a cursor.
706*7c478bd9Sstevel@tonic-gate  */
707*7c478bd9Sstevel@tonic-gate static int
__bam_c_rget(dbc,data,flags)708*7c478bd9Sstevel@tonic-gate __bam_c_rget(dbc, data, flags)
709*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
710*7c478bd9Sstevel@tonic-gate 	DBT *data;
711*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
712*7c478bd9Sstevel@tonic-gate {
713*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
714*7c478bd9Sstevel@tonic-gate 	DB *dbp;
715*7c478bd9Sstevel@tonic-gate 	DBT dbt;
716*7c478bd9Sstevel@tonic-gate 	db_recno_t recno;
717*7c478bd9Sstevel@tonic-gate 	int exact, ret;
718*7c478bd9Sstevel@tonic-gate 
719*7c478bd9Sstevel@tonic-gate 	COMPQUIET(flags, 0);
720*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
721*7c478bd9Sstevel@tonic-gate 	cp = dbc->internal;
722*7c478bd9Sstevel@tonic-gate 
723*7c478bd9Sstevel@tonic-gate 	/* Get the page with the current item on it. */
724*7c478bd9Sstevel@tonic-gate 	if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0)
725*7c478bd9Sstevel@tonic-gate 		return (ret);
726*7c478bd9Sstevel@tonic-gate 
727*7c478bd9Sstevel@tonic-gate 	/* Get a copy of the key. */
728*7c478bd9Sstevel@tonic-gate 	memset(&dbt, 0, sizeof(DBT));
729*7c478bd9Sstevel@tonic-gate 	dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
730*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_ret(dbp, cp->page, cp->indx, &dbt, NULL, NULL)) != 0)
731*7c478bd9Sstevel@tonic-gate 		goto err;
732*7c478bd9Sstevel@tonic-gate 
733*7c478bd9Sstevel@tonic-gate 	exact = 1;
734*7c478bd9Sstevel@tonic-gate 	if ((ret = __bam_search(dbc, &dbt,
735*7c478bd9Sstevel@tonic-gate 	    F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND,
736*7c478bd9Sstevel@tonic-gate 	    1, &recno, &exact)) != 0)
737*7c478bd9Sstevel@tonic-gate 		goto err;
738*7c478bd9Sstevel@tonic-gate 
739*7c478bd9Sstevel@tonic-gate 	ret = __db_retcopy(data, &recno, sizeof(recno),
740*7c478bd9Sstevel@tonic-gate 	    &dbc->rdata.data, &dbc->rdata.ulen, dbp->db_malloc);
741*7c478bd9Sstevel@tonic-gate 
742*7c478bd9Sstevel@tonic-gate 	/* Release the stack. */
743*7c478bd9Sstevel@tonic-gate 	__bam_stkrel(dbc, 0);
744*7c478bd9Sstevel@tonic-gate 
745*7c478bd9Sstevel@tonic-gate err:	(void)memp_fput(dbp->mpf, cp->page, 0);
746*7c478bd9Sstevel@tonic-gate 	__os_free(dbt.data, dbt.size);
747*7c478bd9Sstevel@tonic-gate 	return (ret);
748*7c478bd9Sstevel@tonic-gate }
749*7c478bd9Sstevel@tonic-gate 
750*7c478bd9Sstevel@tonic-gate /*
751*7c478bd9Sstevel@tonic-gate  * __bam_c_put --
752*7c478bd9Sstevel@tonic-gate  *	Put using a cursor.
753*7c478bd9Sstevel@tonic-gate  */
754*7c478bd9Sstevel@tonic-gate static int
__bam_c_put(dbc,key,data,flags)755*7c478bd9Sstevel@tonic-gate __bam_c_put(dbc, key, data, flags)
756*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
757*7c478bd9Sstevel@tonic-gate 	DBT *key, *data;
758*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
759*7c478bd9Sstevel@tonic-gate {
760*7c478bd9Sstevel@tonic-gate 	CURSOR *cp, copy;
761*7c478bd9Sstevel@tonic-gate 	DB *dbp;
762*7c478bd9Sstevel@tonic-gate 	DBT dbt;
763*7c478bd9Sstevel@tonic-gate 	db_indx_t indx;
764*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
765*7c478bd9Sstevel@tonic-gate 	u_int32_t iiflags, iiop;
766*7c478bd9Sstevel@tonic-gate 	int exact, needkey, ret, stack;
767*7c478bd9Sstevel@tonic-gate 	void *arg;
768*7c478bd9Sstevel@tonic-gate 
769*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
770*7c478bd9Sstevel@tonic-gate 	cp = dbc->internal;
771*7c478bd9Sstevel@tonic-gate 
772*7c478bd9Sstevel@tonic-gate 	DB_PANIC_CHECK(dbp);
773*7c478bd9Sstevel@tonic-gate 
774*7c478bd9Sstevel@tonic-gate 	DEBUG_LWRITE(dbc, dbc->txn, "bam_c_put",
775*7c478bd9Sstevel@tonic-gate 	    flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
776*7c478bd9Sstevel@tonic-gate 	    data, flags);
777*7c478bd9Sstevel@tonic-gate 
778*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_cputchk(dbp, key, data, flags,
779*7c478bd9Sstevel@tonic-gate 	    F_ISSET(dbp, DB_AM_RDONLY), cp->pgno != PGNO_INVALID)) != 0)
780*7c478bd9Sstevel@tonic-gate 		return (ret);
781*7c478bd9Sstevel@tonic-gate 
782*7c478bd9Sstevel@tonic-gate 	/*
783*7c478bd9Sstevel@tonic-gate 	 * If we are running CDB, this had better be either a write
784*7c478bd9Sstevel@tonic-gate 	 * cursor or an immediate writer.  If it's a regular writer,
785*7c478bd9Sstevel@tonic-gate 	 * that means we have an IWRITE lock and we need to upgrade
786*7c478bd9Sstevel@tonic-gate 	 * it to a write lock.
787*7c478bd9Sstevel@tonic-gate 	 */
788*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_AM_CDB)) {
789*7c478bd9Sstevel@tonic-gate 		if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
790*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
791*7c478bd9Sstevel@tonic-gate 
792*7c478bd9Sstevel@tonic-gate 		if (F_ISSET(dbc, DBC_RMW) &&
793*7c478bd9Sstevel@tonic-gate 		    (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
794*7c478bd9Sstevel@tonic-gate 		    DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
795*7c478bd9Sstevel@tonic-gate 		    &dbc->mylock)) != 0)
796*7c478bd9Sstevel@tonic-gate 			return (EAGAIN);
797*7c478bd9Sstevel@tonic-gate 	}
798*7c478bd9Sstevel@tonic-gate 
799*7c478bd9Sstevel@tonic-gate 	if (0) {
800*7c478bd9Sstevel@tonic-gate split:		/*
801*7c478bd9Sstevel@tonic-gate 		 * To split, we need a valid key for the page.  Since it's a
802*7c478bd9Sstevel@tonic-gate 		 * cursor, we have to build one.
803*7c478bd9Sstevel@tonic-gate 		 *
804*7c478bd9Sstevel@tonic-gate 		 * Acquire a copy of a key from the page.
805*7c478bd9Sstevel@tonic-gate 		 */
806*7c478bd9Sstevel@tonic-gate 		if (needkey) {
807*7c478bd9Sstevel@tonic-gate 			memset(&dbt, 0, sizeof(DBT));
808*7c478bd9Sstevel@tonic-gate 			if ((ret = __db_ret(dbp, cp->page, indx,
809*7c478bd9Sstevel@tonic-gate 			    &dbt, &dbc->rkey.data, &dbc->rkey.ulen)) != 0)
810*7c478bd9Sstevel@tonic-gate 				goto err;
811*7c478bd9Sstevel@tonic-gate 			arg = &dbt;
812*7c478bd9Sstevel@tonic-gate 		} else
813*7c478bd9Sstevel@tonic-gate 			arg = key;
814*7c478bd9Sstevel@tonic-gate 
815*7c478bd9Sstevel@tonic-gate 		/*
816*7c478bd9Sstevel@tonic-gate 		 * Discard any locks and pinned pages (the locks are discarded
817*7c478bd9Sstevel@tonic-gate 		 * even if we're running with transactions, as they lock pages
818*7c478bd9Sstevel@tonic-gate 		 * that we're sorry we ever acquired).  If stack is set and the
819*7c478bd9Sstevel@tonic-gate 		 * cursor entries are valid, they point to the same entries as
820*7c478bd9Sstevel@tonic-gate 		 * the stack, don't free them twice.
821*7c478bd9Sstevel@tonic-gate 		 */
822*7c478bd9Sstevel@tonic-gate 		if (stack) {
823*7c478bd9Sstevel@tonic-gate 			(void)__bam_stkrel(dbc, 1);
824*7c478bd9Sstevel@tonic-gate 			stack = 0;
825*7c478bd9Sstevel@tonic-gate 		} else
826*7c478bd9Sstevel@tonic-gate 			DISCARD(dbc, cp);
827*7c478bd9Sstevel@tonic-gate 
828*7c478bd9Sstevel@tonic-gate 		/*
829*7c478bd9Sstevel@tonic-gate 		 * Restore the cursor to its original value.  This is necessary
830*7c478bd9Sstevel@tonic-gate 		 * for two reasons.  First, we are about to copy it in case of
831*7c478bd9Sstevel@tonic-gate 		 * error, again.  Second, we adjust cursors during the split,
832*7c478bd9Sstevel@tonic-gate 		 * and we have to ensure this cursor is adjusted appropriately,
833*7c478bd9Sstevel@tonic-gate 		 * along with all the other cursors.
834*7c478bd9Sstevel@tonic-gate 		 */
835*7c478bd9Sstevel@tonic-gate 		*cp = copy;
836*7c478bd9Sstevel@tonic-gate 
837*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_split(dbc, arg)) != 0)
838*7c478bd9Sstevel@tonic-gate 			goto err;
839*7c478bd9Sstevel@tonic-gate 	}
840*7c478bd9Sstevel@tonic-gate 
841*7c478bd9Sstevel@tonic-gate 	/*
842*7c478bd9Sstevel@tonic-gate 	 * Initialize the cursor for a new retrieval.  Clear the cursor's
843*7c478bd9Sstevel@tonic-gate 	 * page pointer, it was set before this operation, and no longer
844*7c478bd9Sstevel@tonic-gate 	 * has any meaning.
845*7c478bd9Sstevel@tonic-gate 	 */
846*7c478bd9Sstevel@tonic-gate 	cp->page = NULL;
847*7c478bd9Sstevel@tonic-gate 	copy = *cp;
848*7c478bd9Sstevel@tonic-gate 	cp->lock = LOCK_INVALID;
849*7c478bd9Sstevel@tonic-gate 
850*7c478bd9Sstevel@tonic-gate 	iiflags = needkey = ret = stack = 0;
851*7c478bd9Sstevel@tonic-gate 	switch (flags) {
852*7c478bd9Sstevel@tonic-gate 	case DB_AFTER:
853*7c478bd9Sstevel@tonic-gate 	case DB_BEFORE:
854*7c478bd9Sstevel@tonic-gate 	case DB_CURRENT:
855*7c478bd9Sstevel@tonic-gate 		needkey = 1;
856*7c478bd9Sstevel@tonic-gate 		if (cp->dpgno == PGNO_INVALID) {
857*7c478bd9Sstevel@tonic-gate 			pgno = cp->pgno;
858*7c478bd9Sstevel@tonic-gate 			indx = cp->indx;
859*7c478bd9Sstevel@tonic-gate 		} else {
860*7c478bd9Sstevel@tonic-gate 			pgno = cp->dpgno;
861*7c478bd9Sstevel@tonic-gate 			indx = cp->dindx;
862*7c478bd9Sstevel@tonic-gate 		}
863*7c478bd9Sstevel@tonic-gate 
864*7c478bd9Sstevel@tonic-gate 		/*
865*7c478bd9Sstevel@tonic-gate 		 * !!!
866*7c478bd9Sstevel@tonic-gate 		 * This test is right -- we don't yet support duplicates and
867*7c478bd9Sstevel@tonic-gate 		 * record numbers in the same tree, so ignore duplicates if
868*7c478bd9Sstevel@tonic-gate 		 * DB_BT_RECNUM set.
869*7c478bd9Sstevel@tonic-gate 		 */
870*7c478bd9Sstevel@tonic-gate 		if (F_ISSET(dbp, DB_BT_RECNUM) &&
871*7c478bd9Sstevel@tonic-gate 		    (flags != DB_CURRENT || F_ISSET(cp, C_DELETED))) {
872*7c478bd9Sstevel@tonic-gate 			/* Acquire a complete stack. */
873*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_c_getstack(dbc, cp)) != 0)
874*7c478bd9Sstevel@tonic-gate 				goto err;
875*7c478bd9Sstevel@tonic-gate 			cp->page = cp->csp->page;
876*7c478bd9Sstevel@tonic-gate 
877*7c478bd9Sstevel@tonic-gate 			stack = 1;
878*7c478bd9Sstevel@tonic-gate 			iiflags = BI_DOINCR;
879*7c478bd9Sstevel@tonic-gate 		} else {
880*7c478bd9Sstevel@tonic-gate 			/* Acquire the current page. */
881*7c478bd9Sstevel@tonic-gate 			if ((ret = __bam_lget(dbc,
882*7c478bd9Sstevel@tonic-gate 			    0, cp->pgno, DB_LOCK_WRITE, &cp->lock)) == 0)
883*7c478bd9Sstevel@tonic-gate 				ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page);
884*7c478bd9Sstevel@tonic-gate 			if (ret != 0)
885*7c478bd9Sstevel@tonic-gate 				goto err;
886*7c478bd9Sstevel@tonic-gate 
887*7c478bd9Sstevel@tonic-gate 			iiflags = 0;
888*7c478bd9Sstevel@tonic-gate 		}
889*7c478bd9Sstevel@tonic-gate 
890*7c478bd9Sstevel@tonic-gate 		/*
891*7c478bd9Sstevel@tonic-gate 		 * If the user has specified a duplicate comparison function,
892*7c478bd9Sstevel@tonic-gate 		 * we return an error if DB_CURRENT was specified and the
893*7c478bd9Sstevel@tonic-gate 		 * replacement data doesn't compare equal to the current data.
894*7c478bd9Sstevel@tonic-gate 		 * This stops apps from screwing up the duplicate sort order.
895*7c478bd9Sstevel@tonic-gate 		 */
896*7c478bd9Sstevel@tonic-gate 		if (flags == DB_CURRENT && dbp->dup_compare != NULL)
897*7c478bd9Sstevel@tonic-gate 			if (__bam_cmp(dbp, data,
898*7c478bd9Sstevel@tonic-gate 			    cp->page, indx, dbp->dup_compare) != 0) {
899*7c478bd9Sstevel@tonic-gate 				ret = EINVAL;
900*7c478bd9Sstevel@tonic-gate 				goto err;
901*7c478bd9Sstevel@tonic-gate 			}
902*7c478bd9Sstevel@tonic-gate 
903*7c478bd9Sstevel@tonic-gate 		iiop = flags;
904*7c478bd9Sstevel@tonic-gate 		break;
905*7c478bd9Sstevel@tonic-gate 	case DB_KEYFIRST:
906*7c478bd9Sstevel@tonic-gate 	case DB_KEYLAST:
907*7c478bd9Sstevel@tonic-gate 		/*
908*7c478bd9Sstevel@tonic-gate 		 * If we have a duplicate comparison function, we position to
909*7c478bd9Sstevel@tonic-gate 		 * the first of any on-page duplicates, and use __bam_dsearch
910*7c478bd9Sstevel@tonic-gate 		 * to search for the right slot.  Otherwise, we position to
911*7c478bd9Sstevel@tonic-gate 		 * the first/last of any on-page duplicates based on the flag
912*7c478bd9Sstevel@tonic-gate 		 * value.
913*7c478bd9Sstevel@tonic-gate 		 */
914*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_search(dbc, cp, key,
915*7c478bd9Sstevel@tonic-gate 		    flags == DB_KEYFIRST || dbp->dup_compare != NULL ?
916*7c478bd9Sstevel@tonic-gate 		    DB_KEYFIRST : DB_KEYLAST, &exact)) != 0)
917*7c478bd9Sstevel@tonic-gate 			goto err;
918*7c478bd9Sstevel@tonic-gate 		stack = 1;
919*7c478bd9Sstevel@tonic-gate 
920*7c478bd9Sstevel@tonic-gate 		/*
921*7c478bd9Sstevel@tonic-gate 		 * If an exact match:
922*7c478bd9Sstevel@tonic-gate 		 *	If duplicates aren't supported, replace the current
923*7c478bd9Sstevel@tonic-gate 		 *	item.  (When implementing the DB->put function, our
924*7c478bd9Sstevel@tonic-gate 		 *	caller has already checked the DB_NOOVERWRITE flag.)
925*7c478bd9Sstevel@tonic-gate 		 *
926*7c478bd9Sstevel@tonic-gate 		 *	If there's a duplicate comparison function, find the
927*7c478bd9Sstevel@tonic-gate 		 *	correct slot for this duplicate item.
928*7c478bd9Sstevel@tonic-gate 		 *
929*7c478bd9Sstevel@tonic-gate 		 *	If there's no duplicate comparison function, set the
930*7c478bd9Sstevel@tonic-gate 		 *	insert flag based on the argument flags.
931*7c478bd9Sstevel@tonic-gate 		 *
932*7c478bd9Sstevel@tonic-gate 		 * If there's no match, the search function returned the
933*7c478bd9Sstevel@tonic-gate 		 * smallest slot greater than the key, use it.
934*7c478bd9Sstevel@tonic-gate 		 */
935*7c478bd9Sstevel@tonic-gate 		if (exact) {
936*7c478bd9Sstevel@tonic-gate 			if (F_ISSET(dbp, DB_AM_DUP)) {
937*7c478bd9Sstevel@tonic-gate 				/*
938*7c478bd9Sstevel@tonic-gate 				 * If at off-page duplicate page, move to the
939*7c478bd9Sstevel@tonic-gate 				 * first or last entry -- if a comparison
940*7c478bd9Sstevel@tonic-gate 				 * function was specified, start searching at
941*7c478bd9Sstevel@tonic-gate 				 * the first entry.  Otherwise, move based on
942*7c478bd9Sstevel@tonic-gate 				 * the DB_KEYFIRST/DB_KEYLAST flags.
943*7c478bd9Sstevel@tonic-gate 				 */
944*7c478bd9Sstevel@tonic-gate 				if ((ret = __bam_dup(dbc, cp, cp->indx,
945*7c478bd9Sstevel@tonic-gate 				    dbp->dup_compare == NULL &&
946*7c478bd9Sstevel@tonic-gate 				    flags != DB_KEYFIRST)) != 0)
947*7c478bd9Sstevel@tonic-gate 					goto err;
948*7c478bd9Sstevel@tonic-gate 
949*7c478bd9Sstevel@tonic-gate 				/*
950*7c478bd9Sstevel@tonic-gate 				 * If there's a comparison function, search for
951*7c478bd9Sstevel@tonic-gate 				 * the correct slot.  Otherwise, set the insert
952*7c478bd9Sstevel@tonic-gate 				 * flag based on the argment flag.
953*7c478bd9Sstevel@tonic-gate 				 */
954*7c478bd9Sstevel@tonic-gate 				if (dbp->dup_compare == NULL)
955*7c478bd9Sstevel@tonic-gate 					iiop = flags == DB_KEYFIRST ?
956*7c478bd9Sstevel@tonic-gate 					    DB_BEFORE : DB_AFTER;
957*7c478bd9Sstevel@tonic-gate 				else
958*7c478bd9Sstevel@tonic-gate 					if ((ret = __bam_dsearch(dbc,
959*7c478bd9Sstevel@tonic-gate 					    cp, data, &iiop)) != 0)
960*7c478bd9Sstevel@tonic-gate 						goto err;
961*7c478bd9Sstevel@tonic-gate 			} else
962*7c478bd9Sstevel@tonic-gate 				iiop = DB_CURRENT;
963*7c478bd9Sstevel@tonic-gate 			iiflags = 0;
964*7c478bd9Sstevel@tonic-gate 		} else {
965*7c478bd9Sstevel@tonic-gate 			iiop = DB_BEFORE;
966*7c478bd9Sstevel@tonic-gate 			iiflags = BI_NEWKEY;
967*7c478bd9Sstevel@tonic-gate 		}
968*7c478bd9Sstevel@tonic-gate 
969*7c478bd9Sstevel@tonic-gate 		if (cp->dpgno == PGNO_INVALID) {
970*7c478bd9Sstevel@tonic-gate 			pgno = cp->pgno;
971*7c478bd9Sstevel@tonic-gate 			indx = cp->indx;
972*7c478bd9Sstevel@tonic-gate 		} else {
973*7c478bd9Sstevel@tonic-gate 			pgno = cp->dpgno;
974*7c478bd9Sstevel@tonic-gate 			indx = cp->dindx;
975*7c478bd9Sstevel@tonic-gate 		}
976*7c478bd9Sstevel@tonic-gate 		break;
977*7c478bd9Sstevel@tonic-gate 	}
978*7c478bd9Sstevel@tonic-gate 
979*7c478bd9Sstevel@tonic-gate 	ret = __bam_iitem(dbc, &cp->page, &indx, key, data, iiop, iiflags);
980*7c478bd9Sstevel@tonic-gate 
981*7c478bd9Sstevel@tonic-gate 	if (ret == DB_NEEDSPLIT)
982*7c478bd9Sstevel@tonic-gate 		goto split;
983*7c478bd9Sstevel@tonic-gate 	if (ret != 0)
984*7c478bd9Sstevel@tonic-gate 		goto err;
985*7c478bd9Sstevel@tonic-gate 
986*7c478bd9Sstevel@tonic-gate 	/*
987*7c478bd9Sstevel@tonic-gate 	 * Reset any cursors referencing this item that might have the item
988*7c478bd9Sstevel@tonic-gate 	 * marked for deletion.
989*7c478bd9Sstevel@tonic-gate 	 */
990*7c478bd9Sstevel@tonic-gate 	if (iiop == DB_CURRENT) {
991*7c478bd9Sstevel@tonic-gate 		(void)__bam_ca_delete(dbp, pgno, indx, 0);
992*7c478bd9Sstevel@tonic-gate 
993*7c478bd9Sstevel@tonic-gate 		/*
994*7c478bd9Sstevel@tonic-gate 		 * It's also possible that we are the cursor that had the
995*7c478bd9Sstevel@tonic-gate 		 * item marked for deletion, in which case we want to make
996*7c478bd9Sstevel@tonic-gate 		 * sure that we don't delete it because we had the delete
997*7c478bd9Sstevel@tonic-gate 		 * flag set already.
998*7c478bd9Sstevel@tonic-gate 		 */
999*7c478bd9Sstevel@tonic-gate 		if (cp->pgno == copy.pgno && cp->indx == copy.indx &&
1000*7c478bd9Sstevel@tonic-gate 		    cp->dpgno == copy.dpgno && cp->dindx == copy.dindx)
1001*7c478bd9Sstevel@tonic-gate 			F_CLR(&copy, C_DELETED);
1002*7c478bd9Sstevel@tonic-gate 	}
1003*7c478bd9Sstevel@tonic-gate 
1004*7c478bd9Sstevel@tonic-gate 	/*
1005*7c478bd9Sstevel@tonic-gate 	 * Update the cursor to point to the new entry.  The new entry was
1006*7c478bd9Sstevel@tonic-gate 	 * stored on the current page, because we split pages until it was
1007*7c478bd9Sstevel@tonic-gate 	 * possible.
1008*7c478bd9Sstevel@tonic-gate 	 */
1009*7c478bd9Sstevel@tonic-gate 	if (cp->dpgno == PGNO_INVALID)
1010*7c478bd9Sstevel@tonic-gate 		cp->indx = indx;
1011*7c478bd9Sstevel@tonic-gate 	else
1012*7c478bd9Sstevel@tonic-gate 		cp->dindx = indx;
1013*7c478bd9Sstevel@tonic-gate 
1014*7c478bd9Sstevel@tonic-gate 	/*
1015*7c478bd9Sstevel@tonic-gate 	 * If the previous cursor record has been deleted, physically delete
1016*7c478bd9Sstevel@tonic-gate 	 * the entry from the page.  We clear the deleted flag before we call
1017*7c478bd9Sstevel@tonic-gate 	 * the underlying delete routine so that, if an error occurs, and we
1018*7c478bd9Sstevel@tonic-gate 	 * restore the cursor, the deleted flag is cleared.  This is because,
1019*7c478bd9Sstevel@tonic-gate 	 * if we manage to physically modify the page, and then restore the
1020*7c478bd9Sstevel@tonic-gate 	 * cursor, we might try to repeat the page modification when closing
1021*7c478bd9Sstevel@tonic-gate 	 * the cursor.
1022*7c478bd9Sstevel@tonic-gate 	 */
1023*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(&copy, C_DELETED)) {
1024*7c478bd9Sstevel@tonic-gate 		F_CLR(&copy, C_DELETED);
1025*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_physdel(dbc, &copy, cp->page)) != 0)
1026*7c478bd9Sstevel@tonic-gate 			goto err;
1027*7c478bd9Sstevel@tonic-gate 	}
1028*7c478bd9Sstevel@tonic-gate 	F_CLR(cp, C_DELETED);
1029*7c478bd9Sstevel@tonic-gate 
1030*7c478bd9Sstevel@tonic-gate 	/* Release the previous lock, if any; the current lock is retained. */
1031*7c478bd9Sstevel@tonic-gate 	if (copy.lock != LOCK_INVALID)
1032*7c478bd9Sstevel@tonic-gate 		(void)__BT_TLPUT(dbc, copy.lock);
1033*7c478bd9Sstevel@tonic-gate 
1034*7c478bd9Sstevel@tonic-gate 	/*
1035*7c478bd9Sstevel@tonic-gate 	 * Discard any pages pinned in the tree and their locks, except for
1036*7c478bd9Sstevel@tonic-gate 	 * the leaf page, for which we only discard the pin, not the lock.
1037*7c478bd9Sstevel@tonic-gate 	 *
1038*7c478bd9Sstevel@tonic-gate 	 * Note, the leaf page participated in the stack we acquired, and so
1039*7c478bd9Sstevel@tonic-gate 	 * we have to adjust the stack as necessary.  If there was only a
1040*7c478bd9Sstevel@tonic-gate 	 * single page on the stack, we don't have to free further stack pages.
1041*7c478bd9Sstevel@tonic-gate 	 */
1042*7c478bd9Sstevel@tonic-gate 	if (stack && BT_STK_POP(cp) != NULL)
1043*7c478bd9Sstevel@tonic-gate 		(void)__bam_stkrel(dbc, 0);
1044*7c478bd9Sstevel@tonic-gate 
1045*7c478bd9Sstevel@tonic-gate 	/* Release the current page. */
1046*7c478bd9Sstevel@tonic-gate 	if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
1047*7c478bd9Sstevel@tonic-gate 		goto err;
1048*7c478bd9Sstevel@tonic-gate 
1049*7c478bd9Sstevel@tonic-gate 	if (0) {
1050*7c478bd9Sstevel@tonic-gate err:		/* Discard any pinned pages. */
1051*7c478bd9Sstevel@tonic-gate 		if (stack)
1052*7c478bd9Sstevel@tonic-gate 			(void)__bam_stkrel(dbc, 0);
1053*7c478bd9Sstevel@tonic-gate 		else
1054*7c478bd9Sstevel@tonic-gate 			DISCARD(dbc, cp);
1055*7c478bd9Sstevel@tonic-gate 		*cp = copy;
1056*7c478bd9Sstevel@tonic-gate 	}
1057*7c478bd9Sstevel@tonic-gate 
1058*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
1059*7c478bd9Sstevel@tonic-gate 		(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
1060*7c478bd9Sstevel@tonic-gate 		    DB_LOCK_IWRITE, 0);
1061*7c478bd9Sstevel@tonic-gate 
1062*7c478bd9Sstevel@tonic-gate 	return (ret);
1063*7c478bd9Sstevel@tonic-gate }
1064*7c478bd9Sstevel@tonic-gate 
1065*7c478bd9Sstevel@tonic-gate /*
1066*7c478bd9Sstevel@tonic-gate  * __bam_c_first --
1067*7c478bd9Sstevel@tonic-gate  *	Return the first record.
1068*7c478bd9Sstevel@tonic-gate  */
1069*7c478bd9Sstevel@tonic-gate static int
__bam_c_first(dbc,cp)1070*7c478bd9Sstevel@tonic-gate __bam_c_first(dbc, cp)
1071*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
1072*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
1073*7c478bd9Sstevel@tonic-gate {
1074*7c478bd9Sstevel@tonic-gate 	DB *dbp;
1075*7c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
1076*7c478bd9Sstevel@tonic-gate 	int ret;
1077*7c478bd9Sstevel@tonic-gate 
1078*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
1079*7c478bd9Sstevel@tonic-gate 
1080*7c478bd9Sstevel@tonic-gate 	/* Walk down the left-hand side of the tree. */
1081*7c478bd9Sstevel@tonic-gate 	for (pgno = PGNO_ROOT;;) {
1082*7c478bd9Sstevel@tonic-gate 		if ((ret =
1083*7c478bd9Sstevel@tonic-gate 		    __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
1084*7c478bd9Sstevel@tonic-gate 			return (ret);
1085*7c478bd9Sstevel@tonic-gate 		if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0)
1086*7c478bd9Sstevel@tonic-gate 			return (ret);
1087*7c478bd9Sstevel@tonic-gate 
1088*7c478bd9Sstevel@tonic-gate 		/* If we find a leaf page, we're done. */
1089*7c478bd9Sstevel@tonic-gate 		if (ISLEAF(cp->page))
1090*7c478bd9Sstevel@tonic-gate 			break;
1091*7c478bd9Sstevel@tonic-gate 
1092*7c478bd9Sstevel@tonic-gate 		pgno = GET_BINTERNAL(cp->page, 0)->pgno;
1093*7c478bd9Sstevel@tonic-gate 		DISCARD(dbc, cp);
1094*7c478bd9Sstevel@tonic-gate 	}
1095*7c478bd9Sstevel@tonic-gate 
1096*7c478bd9Sstevel@tonic-gate 	cp->pgno = cp->page->pgno;
1097*7c478bd9Sstevel@tonic-gate 	cp->indx = 0;
1098*7c478bd9Sstevel@tonic-gate 	cp->dpgno = PGNO_INVALID;
1099*7c478bd9Sstevel@tonic-gate 
1100*7c478bd9Sstevel@tonic-gate 	/* Check for duplicates. */
1101*7c478bd9Sstevel@tonic-gate 	if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0)
1102*7c478bd9Sstevel@tonic-gate 		return (ret);
1103*7c478bd9Sstevel@tonic-gate 
1104*7c478bd9Sstevel@tonic-gate 	/* If on an empty page or a deleted record, move to the next one. */
1105*7c478bd9Sstevel@tonic-gate 	if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(cp))
1106*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_c_next(dbc, cp, 0)) != 0)
1107*7c478bd9Sstevel@tonic-gate 			return (ret);
1108*7c478bd9Sstevel@tonic-gate 
1109*7c478bd9Sstevel@tonic-gate 	return (0);
1110