1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998
5  *	Sleepycat Software.  All rights reserved.
6  */
7 /*
8  * Copyright (c) 1990, 1993, 1994, 1995, 1996
9  *	Keith Bostic.  All rights reserved.
10  */
11 /*
12  * Copyright (c) 1990, 1993, 1994, 1995
13  *	The Regents of the University of California.  All rights reserved.
14  *
15  * This code is derived from software contributed to Berkeley by
16  * Mike Olson.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions
20  * are met:
21  * 1. Redistributions of source code must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce the above copyright
24  *    notice, this list of conditions and the following disclaimer in the
25  *    documentation and/or other materials provided with the distribution.
26  * 3. All advertising materials mentioning features or use of this software
27  *    must display the following acknowledgement:
28  *	This product includes software developed by the University of
29  *	California, Berkeley and its contributors.
30  * 4. Neither the name of the University nor the names of its contributors
31  *    may be used to endorse or promote products derived from this software
32  *    without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44  * SUCH DAMAGE.
45  *
46  *	@(#)btree.h	10.26 (Sleepycat) 12/16/98
47  */
48 
49 /* Forward structure declarations. */
50 struct __btree;		typedef struct __btree BTREE;
51 struct __cursor;	typedef struct __cursor CURSOR;
52 struct __epg;		typedef struct __epg EPG;
53 struct __recno;		typedef struct __recno RECNO;
54 
55 #define	DEFMINKEYPAGE	 (2)
56 
57 #define	ISINTERNAL(p)	(TYPE(p) == P_IBTREE || TYPE(p) == P_IRECNO)
58 #define	ISLEAF(p)	(TYPE(p) == P_LBTREE || TYPE(p) == P_LRECNO)
59 
60 /*
61  * If doing transactions we have to hold the locks associated with a data item
62  * from a page for the entire transaction.  However, we don't have to hold the
63  * locks associated with walking the tree.  Distinguish between the two so that
64  * we don't tie up the internal pages of the tree longer than necessary.
65  */
66 #define	__BT_LPUT(dbc, lock)						\
67 	(F_ISSET((dbc)->dbp, DB_AM_LOCKING) ?				\
68 	    lock_put((dbc)->dbp->dbenv->lk_info, lock) : 0)
69 #define	__BT_TLPUT(dbc, lock)						\
70 	(F_ISSET((dbc)->dbp, DB_AM_LOCKING) && (dbc)->txn == NULL ?	\
71 	    lock_put((dbc)->dbp->dbenv->lk_info, lock) : 0)
72 
73 /*
74  * Flags to __bam_search() and __bam_rsearch().
75  *
76  * Note, internal page searches must find the largest record less than key in
77  * the tree so that descents work.  Leaf page searches must find the smallest
78  * record greater than key so that the returned index is the record's correct
79  * position for insertion.
80  *
81  * The flags parameter to the search routines describes three aspects of the
82  * search: the type of locking required (including if we're locking a pair of
83  * pages), the item to return in the presence of duplicates and whether or not
84  * to return deleted entries.  To simplify both the mnemonic representation
85  * and the code that checks for various cases, we construct a set of bitmasks.
86  */
87 #define	S_READ		0x00001		/* Read locks. */
88 #define	S_WRITE		0x00002		/* Write locks. */
89 
90 #define	S_APPEND	0x00040		/* Append to the tree. */
91 #define	S_DELNO		0x00080		/* Don't return deleted items. */
92 #define	S_DUPFIRST	0x00100		/* Return first duplicate. */
93 #define	S_DUPLAST	0x00200		/* Return last duplicate. */
94 #define	S_EXACT		0x00400		/* Exact items only. */
95 #define	S_PARENT	0x00800		/* Lock page pair. */
96 #define	S_STACK		0x01000		/* Need a complete stack. */
97 #define	S_PAST_EOF	0x02000		/* If doing insert search (or keyfirst
98 					 * or keylast operations), or a split
99 					 * on behalf of an insert, it's okay to
100 					 * return an entry one past end-of-page.
101 					 */
102 
103 #define	S_DELETE	(S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT | S_STACK)
104 #define	S_FIND		(S_READ | S_DUPFIRST | S_DELNO)
105 #define	S_FIND_WR	(S_WRITE | S_DUPFIRST | S_DELNO)
106 #define	S_INSERT	(S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK)
107 #define	S_KEYFIRST	(S_WRITE | S_DUPFIRST | S_PAST_EOF | S_STACK)
108 #define	S_KEYLAST	(S_WRITE | S_DUPLAST | S_PAST_EOF | S_STACK)
109 #define	S_WRPAIR	(S_WRITE | S_DUPLAST | S_PAST_EOF | S_PARENT)
110 
111 /*
112  * Flags to __bam_iitem().
113  */
114 #define	BI_DELETED	0x01		/* Key/data pair only placeholder. */
115 #define	BI_DOINCR	0x02		/* Increment the record count. */
116 #define	BI_NEWKEY	0x04		/* New key. */
117 
118 /*
119  * Various routines pass around page references.  A page reference can be a
120  * pointer to the page or a page number; for either, an indx can designate
121  * an item on the page.
122  */
123 struct __epg {
124 	PAGE	 *page;			/* The page. */
125 	db_indx_t indx;			/* The index on the page. */
126 	DB_LOCK	  lock;			/* The page's lock. */
127 };
128 
129 /*
130  * We maintain a stack of the pages that we're locking in the tree.  Btree's
131  * (currently) only save two levels of the tree at a time, so the default
132  * stack is always large enough.  Recno trees have to lock the entire tree to
133  * do inserts/deletes, however.  Grow the stack as necessary.
134  */
135 #define	BT_STK_CLR(c)							\
136 	((c)->csp = (c)->sp)
137 
138 #define	BT_STK_ENTER(c, pagep, page_indx, lock, ret) do {		\
139 	if ((ret =							\
140 	    (c)->csp == (c)->esp ? __bam_stkgrow(c) : 0) == 0) {	\
141 		(c)->csp->page = pagep;					\
142 		(c)->csp->indx = page_indx;				\
143 		(c)->csp->lock = lock;					\
144 	}								\
145 } while (0)
146 
147 #define	BT_STK_PUSH(c, pagep, page_indx, lock, ret) do {		\
148 	BT_STK_ENTER(c, pagep, page_indx, lock, ret);			\
149 	++(c)->csp;							\
150 } while (0)
151 
152 #define	BT_STK_POP(c)							\
153 	((c)->csp == (c)->stack ? NULL : --(c)->csp)
154 
155 /*
156  * Arguments passed to __bam_ca_replace().
157  */
158 typedef enum {
159 	REPLACE_SETUP,
160 	REPLACE_SUCCESS,
161 	REPLACE_FAILED
162 } ca_replace_arg;
163 
164 /* Arguments passed to __ram_ca(). */
165 typedef enum {
166 	CA_DELETE,
167 	CA_IAFTER,
168 	CA_IBEFORE
169 } ca_recno_arg;
170 
171 #define	RECNO_OOB	0		/* Illegal record number. */
172 
173 /* Btree/Recno cursor. */
174 struct __cursor {
175 	DBC		*dbc;		/* Enclosing DBC. */
176 
177 	/* Per-thread information: shared by btree/recno. */
178 	EPG		*sp;		/* Stack pointer. */
179 	EPG	 	*csp;		/* Current stack entry. */
180 	EPG		*esp;		/* End stack pointer. */
181 	EPG		 stack[5];
182 
183 	/* Per-thread information: btree private. */
184 	PAGE		*page;		/* Cursor page. */
185 
186 	db_pgno_t	 pgno;		/* Page. */
187 	db_indx_t	 indx;		/* Page item ref'd by the cursor. */
188 
189 	db_pgno_t	 dpgno;		/* Duplicate page. */
190 	db_indx_t	 dindx;		/* Page item ref'd by the cursor. */
191 
192 	DB_LOCK		 lock;		/* Cursor read lock. */
193 	db_lockmode_t	 mode;		/* Lock mode. */
194 
195 	/* Per-thread information: recno private. */
196 	db_recno_t	 recno;		/* Current record number. */
197 
198 	/*
199 	 * Btree:
200 	 * We set a flag in the cursor structure if the underlying object has
201 	 * been deleted.  It's not strictly necessary, we could get the same
202 	 * information by looking at the page itself.
203 	 *
204 	 * Recno:
205 	 * When renumbering recno databases during deletes, cursors referencing
206 	 * "deleted" records end up positioned between two records, and so must
207 	 * be specially adjusted on the next operation.
208 	 */
209 #define	C_DELETED	0x0001		/* Record was deleted. */
210 	u_int32_t	 flags;
211 };
212 
213 /*
214  * The in-memory recno data structure.
215  *
216  * !!!
217  * These fields are ignored as far as multi-threading is concerned.  There
218  * are no transaction semantics associated with backing files, nor is there
219  * any thread protection.
220  */
221 struct __recno {
222 	int		 re_delim;	/* Variable-length delimiting byte. */
223 	int		 re_pad;	/* Fixed-length padding byte. */
224 	u_int32_t	 re_len;	/* Length for fixed-length records. */
225 
226 	char		*re_source;	/* Source file name. */
227 	int		 re_fd;		/* Source file descriptor */
228 	db_recno_t	 re_last;	/* Last record number read. */
229 	void		*re_cmap;	/* Current point in mapped space. */
230 	void		*re_smap;	/* Start of mapped space. */
231 	void		*re_emap;	/* End of mapped space. */
232 	size_t		 re_msize;	/* Size of mapped region. */
233 					/* Recno input function. */
234 	int (*re_irec) __P((DBC *, db_recno_t));
235 
236 #define	RECNO_EOF	0x0001		/* EOF on backing source file. */
237 #define	RECNO_MODIFIED	0x0002		/* Tree was modified. */
238 	u_int32_t	 flags;
239 };
240 
241 /*
242  * The in-memory, per-tree btree data structure.
243  */
244 struct __btree {
245 	db_pgno_t	 bt_lpgno;	/* Last insert location. */
246 
247 	db_indx_t 	 bt_maxkey;	/* Maximum keys per page. */
248 	db_indx_t 	 bt_minkey;	/* Minimum keys per page. */
249 
250 	int (*bt_compare)		/* Comparison function. */
251 	    __P((const DBT *, const DBT *));
252 	size_t(*bt_prefix)		/* Prefix function. */
253 	    __P((const DBT *, const DBT *));
254 
255 	db_indx_t	 bt_ovflsize;	/* Maximum key/data on-page size. */
256 
257 	RECNO		*recno;		/* Private recno structure. */
258 };
259 
260 #include "btree_auto.h"
261 #include "btree_ext.h"
262 #include "db_am.h"
263 #include "common_ext.h"
264