1*7c478bd9Sstevel@tonic-gate /*-
2*7c478bd9Sstevel@tonic-gate  * See the file LICENSE for redistribution information.
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1996, 1997, 1998
5*7c478bd9Sstevel@tonic-gate  *	Sleepycat Software.  All rights reserved.
6*7c478bd9Sstevel@tonic-gate  */
7*7c478bd9Sstevel@tonic-gate /*
8*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1990, 1993, 1994, 1995, 1996
9*7c478bd9Sstevel@tonic-gate  *	Keith Bostic.  All rights reserved.
10*7c478bd9Sstevel@tonic-gate  */
11*7c478bd9Sstevel@tonic-gate /*
12*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1990, 1993, 1994, 1995
13*7c478bd9Sstevel@tonic-gate  *	The Regents of the University of California.  All rights reserved.
14*7c478bd9Sstevel@tonic-gate  *
15*7c478bd9Sstevel@tonic-gate  * This code is derived from software contributed to Berkeley by
16*7c478bd9Sstevel@tonic-gate  * Mike Olson.
17*7c478bd9Sstevel@tonic-gate  *
18*7c478bd9Sstevel@tonic-gate  * Redistribution and use in source and binary forms, with or without
19*7c478bd9Sstevel@tonic-gate  * modification, are permitted provided that the following conditions
20*7c478bd9Sstevel@tonic-gate  * are met:
21*7c478bd9Sstevel@tonic-gate  * 1. Redistributions of source code must retain the above copyright
22*7c478bd9Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer.
23*7c478bd9Sstevel@tonic-gate  * 2. Redistributions in binary form must reproduce the above copyright
24*7c478bd9Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer in the
25*7c478bd9Sstevel@tonic-gate  *    documentation and/or other materials provided with the distribution.
26*7c478bd9Sstevel@tonic-gate  * 3. All advertising materials mentioning features or use of this software
27*7c478bd9Sstevel@tonic-gate  *    must display the following acknowledgement:
28*7c478bd9Sstevel@tonic-gate  *	This product includes software developed by the University of
29*7c478bd9Sstevel@tonic-gate  *	California, Berkeley and its contributors.
30*7c478bd9Sstevel@tonic-gate  * 4. Neither the name of the University nor the names of its contributors
31*7c478bd9Sstevel@tonic-gate  *    may be used to endorse or promote products derived from this software
32*7c478bd9Sstevel@tonic-gate  *    without specific prior written permission.
33*7c478bd9Sstevel@tonic-gate  *
34*7c478bd9Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35*7c478bd9Sstevel@tonic-gate  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36*7c478bd9Sstevel@tonic-gate  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37*7c478bd9Sstevel@tonic-gate  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38*7c478bd9Sstevel@tonic-gate  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39*7c478bd9Sstevel@tonic-gate  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40*7c478bd9Sstevel@tonic-gate  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41*7c478bd9Sstevel@tonic-gate  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42*7c478bd9Sstevel@tonic-gate  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43*7c478bd9Sstevel@tonic-gate  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44*7c478bd9Sstevel@tonic-gate  * SUCH DAMAGE.
45*7c478bd9Sstevel@tonic-gate  */
46*7c478bd9Sstevel@tonic-gate 
47*7c478bd9Sstevel@tonic-gate #include "config.h"
48*7c478bd9Sstevel@tonic-gate 
49*7c478bd9Sstevel@tonic-gate #ifndef lint
50*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)bt_search.c	10.25 (Sleepycat) 12/16/98";
51*7c478bd9Sstevel@tonic-gate #endif /* not lint */
52*7c478bd9Sstevel@tonic-gate 
53*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
54*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
55*7c478bd9Sstevel@tonic-gate 
56*7c478bd9Sstevel@tonic-gate #include <errno.h>
57*7c478bd9Sstevel@tonic-gate #include <string.h>
58*7c478bd9Sstevel@tonic-gate #endif
59*7c478bd9Sstevel@tonic-gate 
60*7c478bd9Sstevel@tonic-gate #include "db_int.h"
61*7c478bd9Sstevel@tonic-gate #include "db_page.h"
62*7c478bd9Sstevel@tonic-gate #include "btree.h"
63*7c478bd9Sstevel@tonic-gate 
64*7c478bd9Sstevel@tonic-gate /*
65*7c478bd9Sstevel@tonic-gate  * __bam_search --
66*7c478bd9Sstevel@tonic-gate  *	Search a btree for a key.
67*7c478bd9Sstevel@tonic-gate  *
68*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __bam_search __P((DBC *,
69*7c478bd9Sstevel@tonic-gate  * PUBLIC:     const DBT *, u_int32_t, int, db_recno_t *, int *));
70*7c478bd9Sstevel@tonic-gate  */
71*7c478bd9Sstevel@tonic-gate int
__bam_search(dbc,key,flags,stop,recnop,exactp)72*7c478bd9Sstevel@tonic-gate __bam_search(dbc, key, flags, stop, recnop, exactp)
73*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
74*7c478bd9Sstevel@tonic-gate 	const DBT *key;
75*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
76*7c478bd9Sstevel@tonic-gate 	int stop, *exactp;
77*7c478bd9Sstevel@tonic-gate 	db_recno_t *recnop;
78*7c478bd9Sstevel@tonic-gate {
79*7c478bd9Sstevel@tonic-gate 	BTREE *t;
80*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
81*7c478bd9Sstevel@tonic-gate 	DB *dbp;
82*7c478bd9Sstevel@tonic-gate 	DB_LOCK lock;
83*7c478bd9Sstevel@tonic-gate 	PAGE *h;
84*7c478bd9Sstevel@tonic-gate 	db_indx_t base, i, indx, lim;
85*7c478bd9Sstevel@tonic-gate 	db_pgno_t pg;
86*7c478bd9Sstevel@tonic-gate 	db_recno_t recno;
87*7c478bd9Sstevel@tonic-gate 	int cmp, jump, ret, stack;
88*7c478bd9Sstevel@tonic-gate 
89*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
90*7c478bd9Sstevel@tonic-gate 	cp = dbc->internal;
91*7c478bd9Sstevel@tonic-gate 	t = dbp->internal;
92*7c478bd9Sstevel@tonic-gate 	recno = 0;
93*7c478bd9Sstevel@tonic-gate 
94*7c478bd9Sstevel@tonic-gate 	BT_STK_CLR(cp);
95*7c478bd9Sstevel@tonic-gate 
96*7c478bd9Sstevel@tonic-gate 	/*
97*7c478bd9Sstevel@tonic-gate 	 * There are several ways we search a btree tree.  The flags argument
98*7c478bd9Sstevel@tonic-gate 	 * specifies if we're acquiring read or write locks, if we position
99*7c478bd9Sstevel@tonic-gate 	 * to the first or last item in a set of duplicates, if we return
100*7c478bd9Sstevel@tonic-gate 	 * deleted items, and if we are locking pairs of pages.  In addition,
101*7c478bd9Sstevel@tonic-gate 	 * if we're modifying record numbers, we have to lock the entire tree
102*7c478bd9Sstevel@tonic-gate 	 * regardless.  See btree.h for more details.
103*7c478bd9Sstevel@tonic-gate 	 *
104*7c478bd9Sstevel@tonic-gate 	 * If write-locking pages, we need to know whether or not to acquire a
105*7c478bd9Sstevel@tonic-gate 	 * write lock on a page before getting it.  This depends on how deep it
106*7c478bd9Sstevel@tonic-gate 	 * is in tree, which we don't know until we acquire the root page.  So,
107*7c478bd9Sstevel@tonic-gate 	 * if we need to lock the root page we may have to upgrade it later,
108*7c478bd9Sstevel@tonic-gate 	 * because we won't get the correct lock initially.
109*7c478bd9Sstevel@tonic-gate 	 *
110*7c478bd9Sstevel@tonic-gate 	 * Retrieve the root page.
111*7c478bd9Sstevel@tonic-gate 	 */
112*7c478bd9Sstevel@tonic-gate 	pg = PGNO_ROOT;
113*7c478bd9Sstevel@tonic-gate 	stack = F_ISSET(dbp, DB_BT_RECNUM) && LF_ISSET(S_STACK);
114*7c478bd9Sstevel@tonic-gate 	if ((ret = __bam_lget(dbc,
115*7c478bd9Sstevel@tonic-gate 	    0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
116*7c478bd9Sstevel@tonic-gate 		return (ret);
117*7c478bd9Sstevel@tonic-gate 	if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
118*7c478bd9Sstevel@tonic-gate 		(void)__BT_LPUT(dbc, lock);
119*7c478bd9Sstevel@tonic-gate 		return (ret);
120*7c478bd9Sstevel@tonic-gate 	}
121*7c478bd9Sstevel@tonic-gate 
122*7c478bd9Sstevel@tonic-gate 	/*
123*7c478bd9Sstevel@tonic-gate 	 * Decide if we need to save this page; if we do, write lock it.
124*7c478bd9Sstevel@tonic-gate 	 * We deliberately don't lock-couple on this call.  If the tree
125*7c478bd9Sstevel@tonic-gate 	 * is tiny, i.e., one page, and two threads are busily updating
126*7c478bd9Sstevel@tonic-gate 	 * the root page, we're almost guaranteed deadlocks galore, as
127*7c478bd9Sstevel@tonic-gate 	 * each one gets a read lock and then blocks the other's attempt
128*7c478bd9Sstevel@tonic-gate 	 * for a write lock.
129*7c478bd9Sstevel@tonic-gate 	 */
130*7c478bd9Sstevel@tonic-gate 	if (!stack &&
131*7c478bd9Sstevel@tonic-gate 	    ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
132*7c478bd9Sstevel@tonic-gate 	    (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
133*7c478bd9Sstevel@tonic-gate 		(void)memp_fput(dbp->mpf, h, 0);
134*7c478bd9Sstevel@tonic-gate 		(void)__BT_LPUT(dbc, lock);
135*7c478bd9Sstevel@tonic-gate 		if ((ret = __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
136*7c478bd9Sstevel@tonic-gate 			return (ret);
137*7c478bd9Sstevel@tonic-gate 		if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0) {
138*7c478bd9Sstevel@tonic-gate 			(void)__BT_LPUT(dbc, lock);
139*7c478bd9Sstevel@tonic-gate 			return (ret);
140*7c478bd9Sstevel@tonic-gate 		}
141*7c478bd9Sstevel@tonic-gate 		stack = 1;
142*7c478bd9Sstevel@tonic-gate 	}
143*7c478bd9Sstevel@tonic-gate 
144*7c478bd9Sstevel@tonic-gate 	for (;;) {
145*7c478bd9Sstevel@tonic-gate 		/*
146*7c478bd9Sstevel@tonic-gate 		 * Do a binary search on the current page.  If we're searching
147*7c478bd9Sstevel@tonic-gate 		 * a leaf page, we have to manipulate the indices in groups of
148*7c478bd9Sstevel@tonic-gate 		 * two.  If we're searching an internal page, they're an index
149*7c478bd9Sstevel@tonic-gate 		 * per page item.  If we find an exact match on a leaf page,
150*7c478bd9Sstevel@tonic-gate 		 * we're done.
151*7c478bd9Sstevel@tonic-gate 		 */
152*7c478bd9Sstevel@tonic-gate 		jump = TYPE(h) == P_LBTREE ? P_INDX : O_INDX;
153*7c478bd9Sstevel@tonic-gate 		for (base = 0,
154*7c478bd9Sstevel@tonic-gate 		    lim = NUM_ENT(h) / (db_indx_t)jump; lim != 0; lim >>= 1) {
155*7c478bd9Sstevel@tonic-gate 			indx = base + ((lim >> 1) * jump);
156*7c478bd9Sstevel@tonic-gate 			if ((cmp =
157*7c478bd9Sstevel@tonic-gate 			    __bam_cmp(dbp, key, h, indx, t->bt_compare)) == 0) {
158*7c478bd9Sstevel@tonic-gate 				if (TYPE(h) == P_LBTREE)
159*7c478bd9Sstevel@tonic-gate 					goto match;
160*7c478bd9Sstevel@tonic-gate 				goto next;
161*7c478bd9Sstevel@tonic-gate 			}
162*7c478bd9Sstevel@tonic-gate 			if (cmp > 0) {
163*7c478bd9Sstevel@tonic-gate 				base = indx + jump;
164*7c478bd9Sstevel@tonic-gate 				--lim;
165*7c478bd9Sstevel@tonic-gate 			}
166*7c478bd9Sstevel@tonic-gate 		}
167*7c478bd9Sstevel@tonic-gate 
168*7c478bd9Sstevel@tonic-gate 		/*
169*7c478bd9Sstevel@tonic-gate 		 * No match found.  Base is the smallest index greater than
170*7c478bd9Sstevel@tonic-gate 		 * key and may be zero or a last + O_INDX index.
171*7c478bd9Sstevel@tonic-gate 		 *
172*7c478bd9Sstevel@tonic-gate 		 * If it's a leaf page, return base as the "found" value.
173*7c478bd9Sstevel@tonic-gate 		 * Delete only deletes exact matches.
174*7c478bd9Sstevel@tonic-gate 		 */
175*7c478bd9Sstevel@tonic-gate 		if (TYPE(h) == P_LBTREE) {
176*7c478bd9Sstevel@tonic-gate 			*exactp = 0;
177*7c478bd9Sstevel@tonic-gate 
178*7c478bd9Sstevel@tonic-gate 			if (LF_ISSET(S_EXACT))
179*7c478bd9Sstevel@tonic-gate 				goto notfound;
180*7c478bd9Sstevel@tonic-gate 
181*7c478bd9Sstevel@tonic-gate 			/*
182*7c478bd9Sstevel@tonic-gate 			 * !!!
183*7c478bd9Sstevel@tonic-gate 			 * Possibly returning a deleted record -- DB_SET_RANGE,
184*7c478bd9Sstevel@tonic-gate 			 * DB_KEYFIRST and DB_KEYLAST don't require an exact
185*7c478bd9Sstevel@tonic-gate 			 * match, and we don't want to walk multiple pages here
186*7c478bd9Sstevel@tonic-gate 			 * to find an undeleted record.  This is handled in the
187*7c478bd9Sstevel@tonic-gate 			 * __bam_c_search() routine.
188*7c478bd9Sstevel@tonic-gate 			 */
189*7c478bd9Sstevel@tonic-gate 			BT_STK_ENTER(cp, h, base, lock, ret);
190*7c478bd9Sstevel@tonic-gate 			return (ret);
191*7c478bd9Sstevel@tonic-gate 		}
192*7c478bd9Sstevel@tonic-gate 
193*7c478bd9Sstevel@tonic-gate 		/*
194*7c478bd9Sstevel@tonic-gate 		 * If it's not a leaf page, record the internal page (which is
195*7c478bd9Sstevel@tonic-gate 		 * a parent page for the key).  Decrement the base by 1 if it's
196*7c478bd9Sstevel@tonic-gate 		 * non-zero so that if a split later occurs, the inserted page
197*7c478bd9Sstevel@tonic-gate 		 * will be to the right of the saved page.
198*7c478bd9Sstevel@tonic-gate 		 */
199*7c478bd9Sstevel@tonic-gate 		indx = base > 0 ? base - O_INDX : base;
200*7c478bd9Sstevel@tonic-gate 
201*7c478bd9Sstevel@tonic-gate 		/*
202*7c478bd9Sstevel@tonic-gate 		 * If we're trying to calculate the record number, sum up
203*7c478bd9Sstevel@tonic-gate 		 * all the record numbers on this page up to the indx point.
204*7c478bd9Sstevel@tonic-gate 		 */
205*7c478bd9Sstevel@tonic-gate 		if (recnop != NULL)
206*7c478bd9Sstevel@tonic-gate 			for (i = 0; i < indx; ++i)
207*7c478bd9Sstevel@tonic-gate 				recno += GET_BINTERNAL(h, i)->nrecs;
208*7c478bd9Sstevel@tonic-gate 
209*7c478bd9Sstevel@tonic-gate next:		pg = GET_BINTERNAL(h, indx)->pgno;
210*7c478bd9Sstevel@tonic-gate 		if (stack) {
211*7c478bd9Sstevel@tonic-gate 			/* Return if this is the lowest page wanted. */
212*7c478bd9Sstevel@tonic-gate 			if (LF_ISSET(S_PARENT) && stop == h->level) {
213*7c478bd9Sstevel@tonic-gate 				BT_STK_ENTER(cp, h, indx, lock, ret);
214*7c478bd9Sstevel@tonic-gate 				return (ret);
215*7c478bd9Sstevel@tonic-gate 			}
216*7c478bd9Sstevel@tonic-gate 			BT_STK_PUSH(cp, h, indx, lock, ret);
217*7c478bd9Sstevel@tonic-gate 			if (ret != 0)
218*7c478bd9Sstevel@tonic-gate 				goto err;
219*7c478bd9Sstevel@tonic-gate 
220*7c478bd9Sstevel@tonic-gate 			if ((ret =
221*7c478bd9Sstevel@tonic-gate 			    __bam_lget(dbc, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
222*7c478bd9Sstevel@tonic-gate 				goto err;
223*7c478bd9Sstevel@tonic-gate 		} else {
224*7c478bd9Sstevel@tonic-gate 			/*
225*7c478bd9Sstevel@tonic-gate 			 * Decide if we want to return a reference to the next
226*7c478bd9Sstevel@tonic-gate 			 * page in the return stack.  If so, lock it and never
227*7c478bd9Sstevel@tonic-gate 			 * unlock it.
228*7c478bd9Sstevel@tonic-gate 			 */
229*7c478bd9Sstevel@tonic-gate 			if ((LF_ISSET(S_PARENT) &&
230*7c478bd9Sstevel@tonic-gate 			    (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) ||
231*7c478bd9Sstevel@tonic-gate 			    (h->level - 1) == LEAFLEVEL)
232*7c478bd9Sstevel@tonic-gate 				stack = 1;
233*7c478bd9Sstevel@tonic-gate 
234*7c478bd9Sstevel@tonic-gate 			(void)memp_fput(dbp->mpf, h, 0);
235*7c478bd9Sstevel@tonic-gate 
236*7c478bd9Sstevel@tonic-gate 			if ((ret =
237*7c478bd9Sstevel@tonic-gate 			    __bam_lget(dbc, 1, pg, stack && LF_ISSET(S_WRITE) ?
238*7c478bd9Sstevel@tonic-gate 			    DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
239*7c478bd9Sstevel@tonic-gate 				goto err;
240*7c478bd9Sstevel@tonic-gate 		}
241*7c478bd9Sstevel@tonic-gate 		if ((ret = memp_fget(dbp->mpf, &pg, 0, &h)) != 0)
242*7c478bd9Sstevel@tonic-gate 			goto err;
243*7c478bd9Sstevel@tonic-gate 	}
244*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
245*7c478bd9Sstevel@tonic-gate 
246*7c478bd9Sstevel@tonic-gate match:	*exactp = 1;
247*7c478bd9Sstevel@tonic-gate 
248*7c478bd9Sstevel@tonic-gate 	/*
249*7c478bd9Sstevel@tonic-gate 	 * If we're trying to calculate the record number, add in the
250*7c478bd9Sstevel@tonic-gate 	 * offset on this page and correct for the fact that records
251*7c478bd9Sstevel@tonic-gate 	 * in the tree are 0-based.
252*7c478bd9Sstevel@tonic-gate 	 */
253*7c478bd9Sstevel@tonic-gate 	if (recnop != NULL)
254*7c478bd9Sstevel@tonic-gate 		*recnop = recno + (indx / P_INDX) + 1;
255*7c478bd9Sstevel@tonic-gate 
256*7c478bd9Sstevel@tonic-gate 	/*
257*7c478bd9Sstevel@tonic-gate 	 * If we got here, we know that we have a btree leaf page.
258*7c478bd9Sstevel@tonic-gate 	 *
259*7c478bd9Sstevel@tonic-gate 	 * If there are duplicates, go to the first/last one.  This is
260*7c478bd9Sstevel@tonic-gate 	 * safe because we know that we're not going to leave the page,
261*7c478bd9Sstevel@tonic-gate 	 * all duplicate sets that are not on overflow pages exist on a
262*7c478bd9Sstevel@tonic-gate 	 * single leaf page.
263*7c478bd9Sstevel@tonic-gate 	 */
264*7c478bd9Sstevel@tonic-gate 	if (LF_ISSET(S_DUPLAST))
265*7c478bd9Sstevel@tonic-gate 		while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
266*7c478bd9Sstevel@tonic-gate 		    h->inp[indx] == h->inp[indx + P_INDX])
267*7c478bd9Sstevel@tonic-gate 			indx += P_INDX;
268*7c478bd9Sstevel@tonic-gate 	else
269*7c478bd9Sstevel@tonic-gate 		while (indx > 0 &&
270*7c478bd9Sstevel@tonic-gate 		    h->inp[indx] == h->inp[indx - P_INDX])
271*7c478bd9Sstevel@tonic-gate 			indx -= P_INDX;
272*7c478bd9Sstevel@tonic-gate 
273*7c478bd9Sstevel@tonic-gate 	/*
274*7c478bd9Sstevel@tonic-gate 	 * Now check if we are allowed to return deleted items; if not
275*7c478bd9Sstevel@tonic-gate 	 * find the next (or previous) non-deleted item.
276*7c478bd9Sstevel@tonic-gate 	 */
277*7c478bd9Sstevel@tonic-gate 	if (LF_ISSET(S_DELNO)) {
278*7c478bd9Sstevel@tonic-gate 		if (LF_ISSET(S_DUPLAST))
279*7c478bd9Sstevel@tonic-gate 			while (B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type) &&
280*7c478bd9Sstevel@tonic-gate 			    indx > 0 &&
281*7c478bd9Sstevel@tonic-gate 			    h->inp[indx] == h->inp[indx - P_INDX])
282*7c478bd9Sstevel@tonic-gate 				indx -= P_INDX;
283*7c478bd9Sstevel@tonic-gate 		else
284*7c478bd9Sstevel@tonic-gate 			while (B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type) &&
285*7c478bd9Sstevel@tonic-gate 			    indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
286*7c478bd9Sstevel@tonic-gate 			    h->inp[indx] == h->inp[indx + P_INDX])
287*7c478bd9Sstevel@tonic-gate 				indx += P_INDX;
288*7c478bd9Sstevel@tonic-gate 
289*7c478bd9Sstevel@tonic-gate 		if (B_DISSET(GET_BKEYDATA(h, indx + O_INDX)->type))
290*7c478bd9Sstevel@tonic-gate 			goto notfound;
291*7c478bd9Sstevel@tonic-gate 	}
292*7c478bd9Sstevel@tonic-gate 
293*7c478bd9Sstevel@tonic-gate 	BT_STK_ENTER(cp, h, indx, lock, ret);
294*7c478bd9Sstevel@tonic-gate 	return (ret);
295*7c478bd9Sstevel@tonic-gate 
296*7c478bd9Sstevel@tonic-gate notfound:
297*7c478bd9Sstevel@tonic-gate 	(void)memp_fput(dbp->mpf, h, 0);
298*7c478bd9Sstevel@tonic-gate 	(void)__BT_LPUT(dbc, lock);
299*7c478bd9Sstevel@tonic-gate 	ret = DB_NOTFOUND;
300*7c478bd9Sstevel@tonic-gate 
301*7c478bd9Sstevel@tonic-gate err:	if (cp->csp > cp->sp) {
302*7c478bd9Sstevel@tonic-gate 		BT_STK_POP(cp);
303*7c478bd9Sstevel@tonic-gate 		__bam_stkrel(dbc, 0);
304*7c478bd9Sstevel@tonic-gate 	}
305*7c478bd9Sstevel@tonic-gate 	return (ret);
306*7c478bd9Sstevel@tonic-gate }
307*7c478bd9Sstevel@tonic-gate 
308*7c478bd9Sstevel@tonic-gate /*
309*7c478bd9Sstevel@tonic-gate  * __bam_stkrel --
310*7c478bd9Sstevel@tonic-gate  *	Release all pages currently held in the stack.
311*7c478bd9Sstevel@tonic-gate  *
312*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __bam_stkrel __P((DBC *, int));
313*7c478bd9Sstevel@tonic-gate  */
314*7c478bd9Sstevel@tonic-gate int
__bam_stkrel(dbc,nolocks)315*7c478bd9Sstevel@tonic-gate __bam_stkrel(dbc, nolocks)
316*7c478bd9Sstevel@tonic-gate 	DBC *dbc;
317*7c478bd9Sstevel@tonic-gate 	int nolocks;
318*7c478bd9Sstevel@tonic-gate {
319*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
320*7c478bd9Sstevel@tonic-gate 	DB *dbp;
321*7c478bd9Sstevel@tonic-gate 	EPG *epg;
322*7c478bd9Sstevel@tonic-gate 
323*7c478bd9Sstevel@tonic-gate 	dbp = dbc->dbp;
324*7c478bd9Sstevel@tonic-gate 	cp = dbc->internal;
325*7c478bd9Sstevel@tonic-gate 
326*7c478bd9Sstevel@tonic-gate 	/* Release inner pages first. */
327*7c478bd9Sstevel@tonic-gate 	for (epg = cp->sp; epg <= cp->csp; ++epg) {
328*7c478bd9Sstevel@tonic-gate 		if (epg->page != NULL)
329*7c478bd9Sstevel@tonic-gate 			(void)memp_fput(dbp->mpf, epg->page, 0);
330*7c478bd9Sstevel@tonic-gate 		if (epg->lock != LOCK_INVALID)
331*7c478bd9Sstevel@tonic-gate 			if (nolocks)
332*7c478bd9Sstevel@tonic-gate 				(void)__BT_LPUT(dbc, epg->lock);
333*7c478bd9Sstevel@tonic-gate 			else
334*7c478bd9Sstevel@tonic-gate 				(void)__BT_TLPUT(dbc, epg->lock);
335*7c478bd9Sstevel@tonic-gate 	}
336*7c478bd9Sstevel@tonic-gate 
337*7c478bd9Sstevel@tonic-gate 	/* Clear the stack, all pages have been released. */
338*7c478bd9Sstevel@tonic-gate 	BT_STK_CLR(cp);
339*7c478bd9Sstevel@tonic-gate 
340*7c478bd9Sstevel@tonic-gate 	return (0);
341*7c478bd9Sstevel@tonic-gate }
342*7c478bd9Sstevel@tonic-gate 
343*7c478bd9Sstevel@tonic-gate /*
344*7c478bd9Sstevel@tonic-gate  * __bam_stkgrow --
345*7c478bd9Sstevel@tonic-gate  *	Grow the stack.
346*7c478bd9Sstevel@tonic-gate  *
347*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __bam_stkgrow __P((CURSOR *));
348*7c478bd9Sstevel@tonic-gate  */
349*7c478bd9Sstevel@tonic-gate int
__bam_stkgrow(cp)350*7c478bd9Sstevel@tonic-gate __bam_stkgrow(cp)
351*7c478bd9Sstevel@tonic-gate 	CURSOR *cp;
352*7c478bd9Sstevel@tonic-gate {
353*7c478bd9Sstevel@tonic-gate 	EPG *p;
354*7c478bd9Sstevel@tonic-gate 	size_t entries;
355*7c478bd9Sstevel@tonic-gate 	int ret;
356*7c478bd9Sstevel@tonic-gate 
357*7c478bd9Sstevel@tonic-gate 	entries = cp->esp - cp->sp;
358*7c478bd9Sstevel@tonic-gate 
359*7c478bd9Sstevel@tonic-gate 	if ((ret = __os_calloc(entries * 2, sizeof(EPG), &p)) != 0)
360*7c478bd9Sstevel@tonic-gate 		return (ret);
361*7c478bd9Sstevel@tonic-gate 	memcpy(p, cp->sp, entries * sizeof(EPG));
362*7c478bd9Sstevel@tonic-gate 	if (cp->sp != cp->stack)
363*7c478bd9Sstevel@tonic-gate 		__os_free(cp->sp, entries * sizeof(EPG));
364*7c478bd9Sstevel@tonic-gate 	cp->sp = p;
365*7c478bd9Sstevel@tonic-gate 	cp->csp = p + entries;
366*7c478bd9Sstevel@tonic-gate 	cp->esp = p + entries * 2;
367*7c478bd9Sstevel@tonic-gate 	return (0);
368*7c478bd9Sstevel@tonic-gate }
369