1*7c478bd9Sstevel@tonic-gate /*- 2*7c478bd9Sstevel@tonic-gate * See the file LICENSE for redistribution information. 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998 5*7c478bd9Sstevel@tonic-gate * Sleepycat Software. All rights reserved. 6*7c478bd9Sstevel@tonic-gate */ 7*7c478bd9Sstevel@tonic-gate 8*7c478bd9Sstevel@tonic-gate #include "config.h" 9*7c478bd9Sstevel@tonic-gate 10*7c478bd9Sstevel@tonic-gate #ifndef lint 11*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)bt_cursor.c 10.81 (Sleepycat) 12/16/98"; 12*7c478bd9Sstevel@tonic-gate #endif /* not lint */ 13*7c478bd9Sstevel@tonic-gate 14*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES 15*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 16*7c478bd9Sstevel@tonic-gate 17*7c478bd9Sstevel@tonic-gate #include <errno.h> 18*7c478bd9Sstevel@tonic-gate #include <stdlib.h> 19*7c478bd9Sstevel@tonic-gate #include <string.h> 20*7c478bd9Sstevel@tonic-gate #endif 21*7c478bd9Sstevel@tonic-gate 22*7c478bd9Sstevel@tonic-gate #include "db_int.h" 23*7c478bd9Sstevel@tonic-gate #include "db_page.h" 24*7c478bd9Sstevel@tonic-gate #include "btree.h" 25*7c478bd9Sstevel@tonic-gate #include "shqueue.h" 26*7c478bd9Sstevel@tonic-gate #include "db_shash.h" 27*7c478bd9Sstevel@tonic-gate #include "lock.h" 28*7c478bd9Sstevel@tonic-gate #include "lock_ext.h" 29*7c478bd9Sstevel@tonic-gate 30*7c478bd9Sstevel@tonic-gate static int __bam_c_close __P((DBC *)); 31*7c478bd9Sstevel@tonic-gate static int __bam_c_del __P((DBC *, u_int32_t)); 32*7c478bd9Sstevel@tonic-gate static int __bam_c_destroy __P((DBC *)); 33*7c478bd9Sstevel@tonic-gate static int __bam_c_first __P((DBC *, CURSOR *)); 34*7c478bd9Sstevel@tonic-gate static int __bam_c_get __P((DBC *, DBT *, DBT *, u_int32_t)); 35*7c478bd9Sstevel@tonic-gate static int __bam_c_getstack __P((DBC *, CURSOR *)); 36*7c478bd9Sstevel@tonic-gate static int __bam_c_last __P((DBC *, CURSOR *)); 37*7c478bd9Sstevel@tonic-gate static int __bam_c_next __P((DBC *, CURSOR *, int)); 38*7c478bd9Sstevel@tonic-gate static int __bam_c_physdel __P((DBC *, CURSOR *, PAGE *)); 39*7c478bd9Sstevel@tonic-gate static int __bam_c_prev __P((DBC *, CURSOR *)); 40*7c478bd9Sstevel@tonic-gate static int __bam_c_put __P((DBC *, DBT *, DBT *, u_int32_t)); 41*7c478bd9Sstevel@tonic-gate static void __bam_c_reset __P((CURSOR *)); 42*7c478bd9Sstevel@tonic-gate static int __bam_c_rget __P((DBC *, DBT *, u_int32_t)); 43*7c478bd9Sstevel@tonic-gate static int __bam_c_search __P((DBC *, CURSOR *, const DBT *, u_int32_t, int *)); 44*7c478bd9Sstevel@tonic-gate static int __bam_dsearch __P((DBC *, CURSOR *, DBT *, u_int32_t *)); 45*7c478bd9Sstevel@tonic-gate 46*7c478bd9Sstevel@tonic-gate /* Discard the current page/lock held by a cursor. */ 47*7c478bd9Sstevel@tonic-gate #undef DISCARD 48*7c478bd9Sstevel@tonic-gate #define DISCARD(dbc, cp) { \ 49*7c478bd9Sstevel@tonic-gate if ((cp)->page != NULL) { \ 50*7c478bd9Sstevel@tonic-gate (void)memp_fput((dbc)->dbp->mpf, (cp)->page, 0); \ 51*7c478bd9Sstevel@tonic-gate (cp)->page = NULL; \ 52*7c478bd9Sstevel@tonic-gate } \ 53*7c478bd9Sstevel@tonic-gate if ((cp)->lock != LOCK_INVALID) { \ 54*7c478bd9Sstevel@tonic-gate (void)__BT_TLPUT((dbc), (cp)->lock); \ 55*7c478bd9Sstevel@tonic-gate (cp)->lock = LOCK_INVALID; \ 56*7c478bd9Sstevel@tonic-gate } \ 57*7c478bd9Sstevel@tonic-gate } 58*7c478bd9Sstevel@tonic-gate 59*7c478bd9Sstevel@tonic-gate /* If the cursor references a deleted record. */ 60*7c478bd9Sstevel@tonic-gate #undef IS_CUR_DELETED 61*7c478bd9Sstevel@tonic-gate #define IS_CUR_DELETED(cp) \ 62*7c478bd9Sstevel@tonic-gate (((cp)->dpgno == PGNO_INVALID && \ 63*7c478bd9Sstevel@tonic-gate B_DISSET(GET_BKEYDATA((cp)->page, \ 64*7c478bd9Sstevel@tonic-gate (cp)->indx + O_INDX)->type)) || \ 65*7c478bd9Sstevel@tonic-gate ((cp)->dpgno != PGNO_INVALID && \ 66*7c478bd9Sstevel@tonic-gate B_DISSET(GET_BKEYDATA((cp)->page, (cp)->dindx)->type))) 67*7c478bd9Sstevel@tonic-gate 68*7c478bd9Sstevel@tonic-gate /* If the cursor and index combination references a deleted record. */ 69*7c478bd9Sstevel@tonic-gate #undef IS_DELETED 70*7c478bd9Sstevel@tonic-gate #define IS_DELETED(cp, indx) \ 71*7c478bd9Sstevel@tonic-gate (((cp)->dpgno == PGNO_INVALID && \ 72*7c478bd9Sstevel@tonic-gate B_DISSET(GET_BKEYDATA((cp)->page, (indx) + O_INDX)->type)) || \ 73*7c478bd9Sstevel@tonic-gate ((cp)->dpgno != PGNO_INVALID && \ 74*7c478bd9Sstevel@tonic-gate B_DISSET(GET_BKEYDATA((cp)->page, (indx))->type))) 75*7c478bd9Sstevel@tonic-gate 76*7c478bd9Sstevel@tonic-gate /* 77*7c478bd9Sstevel@tonic-gate * Test to see if two cursors could point to duplicates of the same key, 78*7c478bd9Sstevel@tonic-gate * whether on-page or off-page. The leaf page numbers must be the same 79*7c478bd9Sstevel@tonic-gate * in both cases. In the case of off-page duplicates, the key indices 80*7c478bd9Sstevel@tonic-gate * on the leaf page will be the same. In the case of on-page duplicates, 81*7c478bd9Sstevel@tonic-gate * the duplicate page number must not be set, and the key index offsets 82*7c478bd9Sstevel@tonic-gate * must be the same. For the last test, as the saved copy of the cursor 83*7c478bd9Sstevel@tonic-gate * will not have a valid page pointer, we use the cursor's. 84*7c478bd9Sstevel@tonic-gate */ 85*7c478bd9Sstevel@tonic-gate #undef POSSIBLE_DUPLICATE 86*7c478bd9Sstevel@tonic-gate #define POSSIBLE_DUPLICATE(cursor, saved_copy) \ 87*7c478bd9Sstevel@tonic-gate ((cursor)->pgno == (saved_copy).pgno && \ 88*7c478bd9Sstevel@tonic-gate ((cursor)->indx == (saved_copy).indx || \ 89*7c478bd9Sstevel@tonic-gate ((cursor)->dpgno == PGNO_INVALID && \ 90*7c478bd9Sstevel@tonic-gate (saved_copy).dpgno == PGNO_INVALID && \ 91*7c478bd9Sstevel@tonic-gate (cursor)->page->inp[(cursor)->indx] == \ 92*7c478bd9Sstevel@tonic-gate (cursor)->page->inp[(saved_copy).indx]))) 93*7c478bd9Sstevel@tonic-gate 94*7c478bd9Sstevel@tonic-gate /* 95*7c478bd9Sstevel@tonic-gate * __bam_c_reset -- 96*7c478bd9Sstevel@tonic-gate * Initialize internal cursor structure. 97*7c478bd9Sstevel@tonic-gate */ 98*7c478bd9Sstevel@tonic-gate static void 99*7c478bd9Sstevel@tonic-gate __bam_c_reset(cp) 100*7c478bd9Sstevel@tonic-gate CURSOR *cp; 101*7c478bd9Sstevel@tonic-gate { 102*7c478bd9Sstevel@tonic-gate cp->sp = cp->csp = cp->stack; 103*7c478bd9Sstevel@tonic-gate cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]); 104*7c478bd9Sstevel@tonic-gate cp->page = NULL; 105*7c478bd9Sstevel@tonic-gate cp->pgno = PGNO_INVALID; 106*7c478bd9Sstevel@tonic-gate cp->indx = 0; 107*7c478bd9Sstevel@tonic-gate cp->dpgno = PGNO_INVALID; 108*7c478bd9Sstevel@tonic-gate cp->dindx = 0; 109*7c478bd9Sstevel@tonic-gate cp->lock = LOCK_INVALID; 110*7c478bd9Sstevel@tonic-gate cp->mode = DB_LOCK_NG; 111*7c478bd9Sstevel@tonic-gate cp->recno = RECNO_OOB; 112*7c478bd9Sstevel@tonic-gate cp->flags = 0; 113*7c478bd9Sstevel@tonic-gate } 114*7c478bd9Sstevel@tonic-gate 115*7c478bd9Sstevel@tonic-gate /* 116*7c478bd9Sstevel@tonic-gate * __bam_c_init -- 117*7c478bd9Sstevel@tonic-gate * Initialize the access private portion of a cursor 118*7c478bd9Sstevel@tonic-gate * 119*7c478bd9Sstevel@tonic-gate * PUBLIC: int __bam_c_init __P((DBC *)); 120*7c478bd9Sstevel@tonic-gate */ 121*7c478bd9Sstevel@tonic-gate int 122*7c478bd9Sstevel@tonic-gate __bam_c_init(dbc) 123*7c478bd9Sstevel@tonic-gate DBC *dbc; 124*7c478bd9Sstevel@tonic-gate { 125*7c478bd9Sstevel@tonic-gate DB *dbp; 126*7c478bd9Sstevel@tonic-gate CURSOR *cp; 127*7c478bd9Sstevel@tonic-gate int ret; 128*7c478bd9Sstevel@tonic-gate 129*7c478bd9Sstevel@tonic-gate if ((ret = __os_calloc(1, sizeof(CURSOR), &cp)) != 0) 130*7c478bd9Sstevel@tonic-gate return (ret); 131*7c478bd9Sstevel@tonic-gate 132*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 133*7c478bd9Sstevel@tonic-gate cp->dbc = dbc; 134*7c478bd9Sstevel@tonic-gate 135*7c478bd9Sstevel@tonic-gate /* 136*7c478bd9Sstevel@tonic-gate * Logical record numbers are always the same size, and we don't want 137*7c478bd9Sstevel@tonic-gate * to have to check for space every time we return one. Allocate it 138*7c478bd9Sstevel@tonic-gate * in advance. 139*7c478bd9Sstevel@tonic-gate */ 140*7c478bd9Sstevel@tonic-gate if (dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) { 141*7c478bd9Sstevel@tonic-gate if ((ret = __os_malloc(sizeof(db_recno_t), 142*7c478bd9Sstevel@tonic-gate NULL, &dbc->rkey.data)) != 0) { 143*7c478bd9Sstevel@tonic-gate __os_free(cp, sizeof(CURSOR)); 144*7c478bd9Sstevel@tonic-gate return (ret); 145*7c478bd9Sstevel@tonic-gate } 146*7c478bd9Sstevel@tonic-gate dbc->rkey.ulen = sizeof(db_recno_t); 147*7c478bd9Sstevel@tonic-gate } 148*7c478bd9Sstevel@tonic-gate 149*7c478bd9Sstevel@tonic-gate /* Initialize methods. */ 150*7c478bd9Sstevel@tonic-gate dbc->internal = cp; 151*7c478bd9Sstevel@tonic-gate if (dbp->type == DB_BTREE) { 152*7c478bd9Sstevel@tonic-gate dbc->c_am_close = __bam_c_close; 153*7c478bd9Sstevel@tonic-gate dbc->c_am_destroy = __bam_c_destroy; 154*7c478bd9Sstevel@tonic-gate dbc->c_del = __bam_c_del; 155*7c478bd9Sstevel@tonic-gate dbc->c_get = __bam_c_get; 156*7c478bd9Sstevel@tonic-gate dbc->c_put = __bam_c_put; 157*7c478bd9Sstevel@tonic-gate } else { 158*7c478bd9Sstevel@tonic-gate dbc->c_am_close = __bam_c_close; 159*7c478bd9Sstevel@tonic-gate dbc->c_am_destroy = __bam_c_destroy; 160*7c478bd9Sstevel@tonic-gate dbc->c_del = __ram_c_del; 161*7c478bd9Sstevel@tonic-gate dbc->c_get = __ram_c_get; 162*7c478bd9Sstevel@tonic-gate dbc->c_put = __ram_c_put; 163*7c478bd9Sstevel@tonic-gate } 164*7c478bd9Sstevel@tonic-gate 165*7c478bd9Sstevel@tonic-gate /* Initialize dynamic information. */ 166*7c478bd9Sstevel@tonic-gate __bam_c_reset(cp); 167*7c478bd9Sstevel@tonic-gate 168*7c478bd9Sstevel@tonic-gate return (0); 169*7c478bd9Sstevel@tonic-gate } 170*7c478bd9Sstevel@tonic-gate 171*7c478bd9Sstevel@tonic-gate /* 172*7c478bd9Sstevel@tonic-gate * __bam_c_close -- 173*7c478bd9Sstevel@tonic-gate * Close down the cursor from a single use. 174*7c478bd9Sstevel@tonic-gate */ 175*7c478bd9Sstevel@tonic-gate static int 176*7c478bd9Sstevel@tonic-gate __bam_c_close(dbc) 177*7c478bd9Sstevel@tonic-gate DBC *dbc; 178*7c478bd9Sstevel@tonic-gate { 179*7c478bd9Sstevel@tonic-gate CURSOR *cp; 180*7c478bd9Sstevel@tonic-gate DB *dbp; 181*7c478bd9Sstevel@tonic-gate int ret; 182*7c478bd9Sstevel@tonic-gate 183*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 184*7c478bd9Sstevel@tonic-gate cp = dbc->internal; 185*7c478bd9Sstevel@tonic-gate ret = 0; 186*7c478bd9Sstevel@tonic-gate 187*7c478bd9Sstevel@tonic-gate /* 188*7c478bd9Sstevel@tonic-gate * If a cursor deleted a btree key, perform the actual deletion. 189*7c478bd9Sstevel@tonic-gate * (Recno keys are either deleted immediately or never deleted.) 190*7c478bd9Sstevel@tonic-gate */ 191*7c478bd9Sstevel@tonic-gate if (dbp->type == DB_BTREE && F_ISSET(cp, C_DELETED)) 192*7c478bd9Sstevel@tonic-gate ret = __bam_c_physdel(dbc, cp, NULL); 193*7c478bd9Sstevel@tonic-gate 194*7c478bd9Sstevel@tonic-gate /* Discard any locks not acquired inside of a transaction. */ 195*7c478bd9Sstevel@tonic-gate if (cp->lock != LOCK_INVALID) { 196*7c478bd9Sstevel@tonic-gate (void)__BT_TLPUT(dbc, cp->lock); 197*7c478bd9Sstevel@tonic-gate cp->lock = LOCK_INVALID; 198*7c478bd9Sstevel@tonic-gate } 199*7c478bd9Sstevel@tonic-gate 200*7c478bd9Sstevel@tonic-gate /* Sanity checks. */ 201*7c478bd9Sstevel@tonic-gate #ifdef DIAGNOSTIC 202*7c478bd9Sstevel@tonic-gate if (cp->csp != cp->stack) 203*7c478bd9Sstevel@tonic-gate __db_err(dbp->dbenv, "btree cursor close: stack not empty"); 204*7c478bd9Sstevel@tonic-gate #endif 205*7c478bd9Sstevel@tonic-gate 206*7c478bd9Sstevel@tonic-gate /* Initialize dynamic information. */ 207*7c478bd9Sstevel@tonic-gate __bam_c_reset(cp); 208*7c478bd9Sstevel@tonic-gate 209*7c478bd9Sstevel@tonic-gate return (ret); 210*7c478bd9Sstevel@tonic-gate } 211*7c478bd9Sstevel@tonic-gate 212*7c478bd9Sstevel@tonic-gate /* 213*7c478bd9Sstevel@tonic-gate * __bam_c_destroy -- 214*7c478bd9Sstevel@tonic-gate * Close a single cursor -- internal version. 215*7c478bd9Sstevel@tonic-gate */ 216*7c478bd9Sstevel@tonic-gate static int 217*7c478bd9Sstevel@tonic-gate __bam_c_destroy(dbc) 218*7c478bd9Sstevel@tonic-gate DBC *dbc; 219*7c478bd9Sstevel@tonic-gate { 220*7c478bd9Sstevel@tonic-gate /* Discard the structures. */ 221*7c478bd9Sstevel@tonic-gate __os_free(dbc->internal, sizeof(CURSOR)); 222*7c478bd9Sstevel@tonic-gate 223*7c478bd9Sstevel@tonic-gate return (0); 224*7c478bd9Sstevel@tonic-gate } 225*7c478bd9Sstevel@tonic-gate 226*7c478bd9Sstevel@tonic-gate /* 227*7c478bd9Sstevel@tonic-gate * __bam_c_del -- 228*7c478bd9Sstevel@tonic-gate * Delete using a cursor. 229*7c478bd9Sstevel@tonic-gate */ 230*7c478bd9Sstevel@tonic-gate static int 231*7c478bd9Sstevel@tonic-gate __bam_c_del(dbc, flags) 232*7c478bd9Sstevel@tonic-gate DBC *dbc; 233*7c478bd9Sstevel@tonic-gate u_int32_t flags; 234*7c478bd9Sstevel@tonic-gate { 235*7c478bd9Sstevel@tonic-gate CURSOR *cp; 236*7c478bd9Sstevel@tonic-gate DB *dbp; 237*7c478bd9Sstevel@tonic-gate DB_LOCK lock; 238*7c478bd9Sstevel@tonic-gate PAGE *h; 239*7c478bd9Sstevel@tonic-gate db_pgno_t pgno; 240*7c478bd9Sstevel@tonic-gate db_indx_t indx; 241*7c478bd9Sstevel@tonic-gate int ret; 242*7c478bd9Sstevel@tonic-gate 243*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 244*7c478bd9Sstevel@tonic-gate cp = dbc->internal; 245*7c478bd9Sstevel@tonic-gate h = NULL; 246*7c478bd9Sstevel@tonic-gate 247*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp); 248*7c478bd9Sstevel@tonic-gate 249*7c478bd9Sstevel@tonic-gate /* Check for invalid flags. */ 250*7c478bd9Sstevel@tonic-gate if ((ret = __db_cdelchk(dbp, flags, 251*7c478bd9Sstevel@tonic-gate F_ISSET(dbp, DB_AM_RDONLY), cp->pgno != PGNO_INVALID)) != 0) 252*7c478bd9Sstevel@tonic-gate return (ret); 253*7c478bd9Sstevel@tonic-gate 254*7c478bd9Sstevel@tonic-gate /* 255*7c478bd9Sstevel@tonic-gate * If we are running CDB, this had better be either a write 256*7c478bd9Sstevel@tonic-gate * cursor or an immediate writer. 257*7c478bd9Sstevel@tonic-gate */ 258*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB)) 259*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER)) 260*7c478bd9Sstevel@tonic-gate return (EINVAL); 261*7c478bd9Sstevel@tonic-gate 262*7c478bd9Sstevel@tonic-gate DEBUG_LWRITE(dbc, dbc->txn, "bam_c_del", NULL, NULL, flags); 263*7c478bd9Sstevel@tonic-gate 264*7c478bd9Sstevel@tonic-gate /* If already deleted, return failure. */ 265*7c478bd9Sstevel@tonic-gate if (F_ISSET(cp, C_DELETED)) 266*7c478bd9Sstevel@tonic-gate return (DB_KEYEMPTY); 267*7c478bd9Sstevel@tonic-gate 268*7c478bd9Sstevel@tonic-gate /* 269*7c478bd9Sstevel@tonic-gate * We don't physically delete the record until the cursor moves, 270*7c478bd9Sstevel@tonic-gate * so we have to have a long-lived write lock on the page instead 271*7c478bd9Sstevel@tonic-gate * of a long-lived read lock. Note, we have to have a read lock 272*7c478bd9Sstevel@tonic-gate * to even get here, so we simply discard it. 273*7c478bd9Sstevel@tonic-gate */ 274*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_LOCKING) && cp->mode != DB_LOCK_WRITE) { 275*7c478bd9Sstevel@tonic-gate if ((ret = __bam_lget(dbc, 276*7c478bd9Sstevel@tonic-gate 0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0) 277*7c478bd9Sstevel@tonic-gate goto err; 278*7c478bd9Sstevel@tonic-gate (void)__BT_TLPUT(dbc, cp->lock); 279*7c478bd9Sstevel@tonic-gate cp->lock = lock; 280*7c478bd9Sstevel@tonic-gate cp->mode = DB_LOCK_WRITE; 281*7c478bd9Sstevel@tonic-gate } 282*7c478bd9Sstevel@tonic-gate 283*7c478bd9Sstevel@tonic-gate /* 284*7c478bd9Sstevel@tonic-gate * Acquire the underlying page (which may be different from the above 285*7c478bd9Sstevel@tonic-gate * page because it may be a duplicate page), and set the on-page and 286*7c478bd9Sstevel@tonic-gate * in-cursor delete flags. We don't need to lock it as we've already 287*7c478bd9Sstevel@tonic-gate * write-locked the page leading to it. 288*7c478bd9Sstevel@tonic-gate */ 289*7c478bd9Sstevel@tonic-gate if (cp->dpgno == PGNO_INVALID) { 290*7c478bd9Sstevel@tonic-gate pgno = cp->pgno; 291*7c478bd9Sstevel@tonic-gate indx = cp->indx; 292*7c478bd9Sstevel@tonic-gate } else { 293*7c478bd9Sstevel@tonic-gate pgno = cp->dpgno; 294*7c478bd9Sstevel@tonic-gate indx = cp->dindx; 295*7c478bd9Sstevel@tonic-gate } 296*7c478bd9Sstevel@tonic-gate 297*7c478bd9Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) 298*7c478bd9Sstevel@tonic-gate goto err; 299*7c478bd9Sstevel@tonic-gate 300*7c478bd9Sstevel@tonic-gate /* Log the change. */ 301*7c478bd9Sstevel@tonic-gate if (DB_LOGGING(dbc) && 302*7c478bd9Sstevel@tonic-gate (ret = __bam_cdel_log(dbp->dbenv->lg_info, dbc->txn, &LSN(h), 303*7c478bd9Sstevel@tonic-gate 0, dbp->log_fileid, PGNO(h), &LSN(h), indx)) != 0) { 304*7c478bd9Sstevel@tonic-gate (void)memp_fput(dbp->mpf, h, 0); 305*7c478bd9Sstevel@tonic-gate goto err; 306*7c478bd9Sstevel@tonic-gate } 307*7c478bd9Sstevel@tonic-gate 308*7c478bd9Sstevel@tonic-gate /* 309*7c478bd9Sstevel@tonic-gate * Set the intent-to-delete flag on the page and update all cursors. */ 310*7c478bd9Sstevel@tonic-gate if (cp->dpgno == PGNO_INVALID) 311*7c478bd9Sstevel@tonic-gate B_DSET(GET_BKEYDATA(h, indx + O_INDX)->type); 312*7c478bd9Sstevel@tonic-gate else 313*7c478bd9Sstevel@tonic-gate B_DSET(GET_BKEYDATA(h, indx)->type); 314*7c478bd9Sstevel@tonic-gate (void)__bam_ca_delete(dbp, pgno, indx, 1); 315*7c478bd9Sstevel@tonic-gate 316*7c478bd9Sstevel@tonic-gate ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY); 317*7c478bd9Sstevel@tonic-gate h = NULL; 318*7c478bd9Sstevel@tonic-gate 319*7c478bd9Sstevel@tonic-gate /* 320*7c478bd9Sstevel@tonic-gate * If the tree has record numbers, we have to adjust the counts. 321*7c478bd9Sstevel@tonic-gate * 322*7c478bd9Sstevel@tonic-gate * !!! 323*7c478bd9Sstevel@tonic-gate * This test is right -- we don't yet support duplicates and record 324*7c478bd9Sstevel@tonic-gate * numbers in the same tree, so ignore duplicates if DB_BT_RECNUM 325*7c478bd9Sstevel@tonic-gate * set. 326*7c478bd9Sstevel@tonic-gate */ 327*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_BT_RECNUM)) { 328*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_getstack(dbc, cp)) != 0) 329*7c478bd9Sstevel@tonic-gate goto err; 330*7c478bd9Sstevel@tonic-gate if ((ret = __bam_adjust(dbc, -1)) != 0) 331*7c478bd9Sstevel@tonic-gate goto err; 332*7c478bd9Sstevel@tonic-gate (void)__bam_stkrel(dbc, 0); 333*7c478bd9Sstevel@tonic-gate } 334*7c478bd9Sstevel@tonic-gate 335*7c478bd9Sstevel@tonic-gate err: if (h != NULL) 336*7c478bd9Sstevel@tonic-gate (void)memp_fput(dbp->mpf, h, 0); 337*7c478bd9Sstevel@tonic-gate return (ret); 338*7c478bd9Sstevel@tonic-gate } 339*7c478bd9Sstevel@tonic-gate 340*7c478bd9Sstevel@tonic-gate /* 341*7c478bd9Sstevel@tonic-gate * __bam_c_get -- 342*7c478bd9Sstevel@tonic-gate * Get using a cursor (btree). 343*7c478bd9Sstevel@tonic-gate */ 344*7c478bd9Sstevel@tonic-gate static int 345*7c478bd9Sstevel@tonic-gate __bam_c_get(dbc, key, data, flags) 346*7c478bd9Sstevel@tonic-gate DBC *dbc; 347*7c478bd9Sstevel@tonic-gate DBT *key, *data; 348*7c478bd9Sstevel@tonic-gate u_int32_t flags; 349*7c478bd9Sstevel@tonic-gate { 350*7c478bd9Sstevel@tonic-gate CURSOR *cp, copy, start; 351*7c478bd9Sstevel@tonic-gate DB *dbp; 352*7c478bd9Sstevel@tonic-gate PAGE *h; 353*7c478bd9Sstevel@tonic-gate int exact, ret, tmp_rmw; 354*7c478bd9Sstevel@tonic-gate 355*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 356*7c478bd9Sstevel@tonic-gate cp = dbc->internal; 357*7c478bd9Sstevel@tonic-gate 358*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp); 359*7c478bd9Sstevel@tonic-gate 360*7c478bd9Sstevel@tonic-gate /* Check for invalid flags. */ 361*7c478bd9Sstevel@tonic-gate if ((ret = __db_cgetchk(dbp, 362*7c478bd9Sstevel@tonic-gate key, data, flags, cp->pgno != PGNO_INVALID)) != 0) 363*7c478bd9Sstevel@tonic-gate return (ret); 364*7c478bd9Sstevel@tonic-gate 365*7c478bd9Sstevel@tonic-gate /* Clear OR'd in additional bits so we can check for flag equality. */ 366*7c478bd9Sstevel@tonic-gate tmp_rmw = 0; 367*7c478bd9Sstevel@tonic-gate if (LF_ISSET(DB_RMW)) { 368*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbp, DB_AM_CDB)) { 369*7c478bd9Sstevel@tonic-gate tmp_rmw = 1; 370*7c478bd9Sstevel@tonic-gate F_SET(dbc, DBC_RMW); 371*7c478bd9Sstevel@tonic-gate } 372*7c478bd9Sstevel@tonic-gate LF_CLR(DB_RMW); 373*7c478bd9Sstevel@tonic-gate } 374*7c478bd9Sstevel@tonic-gate 375*7c478bd9Sstevel@tonic-gate DEBUG_LREAD(dbc, dbc->txn, "bam_c_get", 376*7c478bd9Sstevel@tonic-gate flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags); 377*7c478bd9Sstevel@tonic-gate 378*7c478bd9Sstevel@tonic-gate /* 379*7c478bd9Sstevel@tonic-gate * Return a cursor's record number. It has nothing to do with the 380*7c478bd9Sstevel@tonic-gate * cursor get code except that it's been rammed into the interface. 381*7c478bd9Sstevel@tonic-gate */ 382*7c478bd9Sstevel@tonic-gate if (flags == DB_GET_RECNO) { 383*7c478bd9Sstevel@tonic-gate ret = __bam_c_rget(dbc, data, flags); 384*7c478bd9Sstevel@tonic-gate if (tmp_rmw) 385*7c478bd9Sstevel@tonic-gate F_CLR(dbc, DBC_RMW); 386*7c478bd9Sstevel@tonic-gate return (ret); 387*7c478bd9Sstevel@tonic-gate } 388*7c478bd9Sstevel@tonic-gate 389*7c478bd9Sstevel@tonic-gate /* 390*7c478bd9Sstevel@tonic-gate * Initialize the cursor for a new retrieval. Clear the cursor's 391*7c478bd9Sstevel@tonic-gate * page pointer, it was set before this operation, and no longer 392*7c478bd9Sstevel@tonic-gate * has any meaning. 393*7c478bd9Sstevel@tonic-gate */ 394*7c478bd9Sstevel@tonic-gate cp->page = NULL; 395*7c478bd9Sstevel@tonic-gate copy = *cp; 396*7c478bd9Sstevel@tonic-gate cp->lock = LOCK_INVALID; 397*7c478bd9Sstevel@tonic-gate 398*7c478bd9Sstevel@tonic-gate switch (flags) { 399*7c478bd9Sstevel@tonic-gate case DB_CURRENT: 400*7c478bd9Sstevel@tonic-gate /* It's not possible to return a deleted record. */ 401*7c478bd9Sstevel@tonic-gate if (F_ISSET(cp, C_DELETED)) { 402*7c478bd9Sstevel@tonic-gate ret = DB_KEYEMPTY; 403*7c478bd9Sstevel@tonic-gate goto err; 404*7c478bd9Sstevel@tonic-gate } 405*7c478bd9Sstevel@tonic-gate 406*7c478bd9Sstevel@tonic-gate /* Acquire the current page. */ 407*7c478bd9Sstevel@tonic-gate if ((ret = __bam_lget(dbc, 408*7c478bd9Sstevel@tonic-gate 0, cp->pgno, DB_LOCK_READ, &cp->lock)) == 0) 409*7c478bd9Sstevel@tonic-gate ret = memp_fget(dbp->mpf, 410*7c478bd9Sstevel@tonic-gate cp->dpgno == PGNO_INVALID ? &cp->pgno : &cp->dpgno, 411*7c478bd9Sstevel@tonic-gate 0, &cp->page); 412*7c478bd9Sstevel@tonic-gate if (ret != 0) 413*7c478bd9Sstevel@tonic-gate goto err; 414*7c478bd9Sstevel@tonic-gate break; 415*7c478bd9Sstevel@tonic-gate case DB_NEXT_DUP: 416*7c478bd9Sstevel@tonic-gate if (cp->pgno == PGNO_INVALID) { 417*7c478bd9Sstevel@tonic-gate ret = EINVAL; 418*7c478bd9Sstevel@tonic-gate goto err; 419*7c478bd9Sstevel@tonic-gate } 420*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_next(dbc, cp, 1)) != 0) 421*7c478bd9Sstevel@tonic-gate goto err; 422*7c478bd9Sstevel@tonic-gate 423*7c478bd9Sstevel@tonic-gate /* Make sure we didn't go past the end of the duplicates. */ 424*7c478bd9Sstevel@tonic-gate if (!POSSIBLE_DUPLICATE(cp, copy)) { 425*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND; 426*7c478bd9Sstevel@tonic-gate goto err; 427*7c478bd9Sstevel@tonic-gate } 428*7c478bd9Sstevel@tonic-gate break; 429*7c478bd9Sstevel@tonic-gate case DB_NEXT: 430*7c478bd9Sstevel@tonic-gate if (cp->pgno != PGNO_INVALID) { 431*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_next(dbc, cp, 1)) != 0) 432*7c478bd9Sstevel@tonic-gate goto err; 433*7c478bd9Sstevel@tonic-gate break; 434*7c478bd9Sstevel@tonic-gate } 435*7c478bd9Sstevel@tonic-gate /* FALLTHROUGH */ 436*7c478bd9Sstevel@tonic-gate case DB_FIRST: 437*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_first(dbc, cp)) != 0) 438*7c478bd9Sstevel@tonic-gate goto err; 439*7c478bd9Sstevel@tonic-gate break; 440*7c478bd9Sstevel@tonic-gate case DB_PREV: 441*7c478bd9Sstevel@tonic-gate if (cp->pgno != PGNO_INVALID) { 442*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_prev(dbc, cp)) != 0) 443*7c478bd9Sstevel@tonic-gate goto err; 444*7c478bd9Sstevel@tonic-gate break; 445*7c478bd9Sstevel@tonic-gate } 446*7c478bd9Sstevel@tonic-gate /* FALLTHROUGH */ 447*7c478bd9Sstevel@tonic-gate case DB_LAST: 448*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_last(dbc, cp)) != 0) 449*7c478bd9Sstevel@tonic-gate goto err; 450*7c478bd9Sstevel@tonic-gate break; 451*7c478bd9Sstevel@tonic-gate case DB_SET: 452*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0) 453*7c478bd9Sstevel@tonic-gate goto err; 454*7c478bd9Sstevel@tonic-gate 455*7c478bd9Sstevel@tonic-gate /* 456*7c478bd9Sstevel@tonic-gate * We cannot currently be referencing a deleted record, but we 457*7c478bd9Sstevel@tonic-gate * may be referencing off-page duplicates. 458*7c478bd9Sstevel@tonic-gate * 459*7c478bd9Sstevel@tonic-gate * If we're referencing off-page duplicates, move off-page. 460*7c478bd9Sstevel@tonic-gate * If we moved off-page, move to the next non-deleted record. 461*7c478bd9Sstevel@tonic-gate * If we moved to the next non-deleted record, check to make 462*7c478bd9Sstevel@tonic-gate * sure we didn't switch records because our current record 463*7c478bd9Sstevel@tonic-gate * had no non-deleted data items. 464*7c478bd9Sstevel@tonic-gate */ 465*7c478bd9Sstevel@tonic-gate start = *cp; 466*7c478bd9Sstevel@tonic-gate if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0) 467*7c478bd9Sstevel@tonic-gate goto err; 468*7c478bd9Sstevel@tonic-gate if (cp->dpgno != PGNO_INVALID && IS_CUR_DELETED(cp)) { 469*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_next(dbc, cp, 0)) != 0) 470*7c478bd9Sstevel@tonic-gate goto err; 471*7c478bd9Sstevel@tonic-gate if (!POSSIBLE_DUPLICATE(cp, start)) { 472*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND; 473*7c478bd9Sstevel@tonic-gate goto err; 474*7c478bd9Sstevel@tonic-gate } 475*7c478bd9Sstevel@tonic-gate } 476*7c478bd9Sstevel@tonic-gate break; 477*7c478bd9Sstevel@tonic-gate case DB_SET_RECNO: 478*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0) 479*7c478bd9Sstevel@tonic-gate goto err; 480*7c478bd9Sstevel@tonic-gate break; 481*7c478bd9Sstevel@tonic-gate case DB_GET_BOTH: 482*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbc, DBC_CONTINUE | DBC_KEYSET)) { 483*7c478bd9Sstevel@tonic-gate /* Acquire the current page. */ 484*7c478bd9Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, 485*7c478bd9Sstevel@tonic-gate cp->dpgno == PGNO_INVALID ? &cp->pgno : &cp->dpgno, 486*7c478bd9Sstevel@tonic-gate 0, &cp->page)) != 0) 487*7c478bd9Sstevel@tonic-gate goto err; 488*7c478bd9Sstevel@tonic-gate 489*7c478bd9Sstevel@tonic-gate /* If DBC_CONTINUE, move to the next item. */ 490*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbc, DBC_CONTINUE) && 491*7c478bd9Sstevel@tonic-gate (ret = __bam_c_next(dbc, cp, 1)) != 0) 492*7c478bd9Sstevel@tonic-gate goto err; 493*7c478bd9Sstevel@tonic-gate } else { 494*7c478bd9Sstevel@tonic-gate if ((ret = 495*7c478bd9Sstevel@tonic-gate __bam_c_search(dbc, cp, key, flags, &exact)) != 0) 496*7c478bd9Sstevel@tonic-gate goto err; 497*7c478bd9Sstevel@tonic-gate 498*7c478bd9Sstevel@tonic-gate /* 499*7c478bd9Sstevel@tonic-gate * We may be referencing a duplicates page. Move to 500*7c478bd9Sstevel@tonic-gate * the first duplicate. 501*7c478bd9Sstevel@tonic-gate */ 502*7c478bd9Sstevel@tonic-gate if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0) 503*7c478bd9Sstevel@tonic-gate goto err; 504*7c478bd9Sstevel@tonic-gate } 505*7c478bd9Sstevel@tonic-gate 506*7c478bd9Sstevel@tonic-gate /* Search for a matching entry. */ 507*7c478bd9Sstevel@tonic-gate if ((ret = __bam_dsearch(dbc, cp, data, NULL)) != 0) 508*7c478bd9Sstevel@tonic-gate goto err; 509*7c478bd9Sstevel@tonic-gate 510*7c478bd9Sstevel@tonic-gate /* Ignore deleted entries. */ 511*7c478bd9Sstevel@tonic-gate if (IS_CUR_DELETED(cp)) { 512*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND; 513*7c478bd9Sstevel@tonic-gate goto err; 514*7c478bd9Sstevel@tonic-gate } 515*7c478bd9Sstevel@tonic-gate break; 516*7c478bd9Sstevel@tonic-gate case DB_SET_RANGE: 517*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_search(dbc, cp, key, flags, &exact)) != 0) 518*7c478bd9Sstevel@tonic-gate goto err; 519*7c478bd9Sstevel@tonic-gate 520*7c478bd9Sstevel@tonic-gate /* 521*7c478bd9Sstevel@tonic-gate * As we didn't require an exact match, the search function 522*7c478bd9Sstevel@tonic-gate * may have returned an entry past the end of the page. If 523*7c478bd9Sstevel@tonic-gate * so, move to the next entry. 524*7c478bd9Sstevel@tonic-gate */ 525*7c478bd9Sstevel@tonic-gate if (cp->indx == NUM_ENT(cp->page) && 526*7c478bd9Sstevel@tonic-gate (ret = __bam_c_next(dbc, cp, 0)) != 0) 527*7c478bd9Sstevel@tonic-gate goto err; 528*7c478bd9Sstevel@tonic-gate 529*7c478bd9Sstevel@tonic-gate /* 530*7c478bd9Sstevel@tonic-gate * We may be referencing off-page duplicates, if so, move 531*7c478bd9Sstevel@tonic-gate * off-page. 532*7c478bd9Sstevel@tonic-gate */ 533*7c478bd9Sstevel@tonic-gate if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0) 534*7c478bd9Sstevel@tonic-gate goto err; 535*7c478bd9Sstevel@tonic-gate 536*7c478bd9Sstevel@tonic-gate /* 537*7c478bd9Sstevel@tonic-gate * We may be referencing a deleted record, if so, move to 538*7c478bd9Sstevel@tonic-gate * the next non-deleted record. 539*7c478bd9Sstevel@tonic-gate */ 540*7c478bd9Sstevel@tonic-gate if (IS_CUR_DELETED(cp) && (ret = __bam_c_next(dbc, cp, 0)) != 0) 541*7c478bd9Sstevel@tonic-gate goto err; 542*7c478bd9Sstevel@tonic-gate break; 543*7c478bd9Sstevel@tonic-gate } 544*7c478bd9Sstevel@tonic-gate 545*7c478bd9Sstevel@tonic-gate /* 546*7c478bd9Sstevel@tonic-gate * Return the key if the user didn't give us one. If we've moved to 547*7c478bd9Sstevel@tonic-gate * a duplicate page, we may no longer have a pointer to the main page, 548*7c478bd9Sstevel@tonic-gate * so we have to go get it. We know that it's already read-locked, 549*7c478bd9Sstevel@tonic-gate * however, so we don't have to acquire a new lock. 550*7c478bd9Sstevel@tonic-gate */ 551*7c478bd9Sstevel@tonic-gate if (flags != DB_SET) { 552*7c478bd9Sstevel@tonic-gate if (cp->dpgno != PGNO_INVALID) { 553*7c478bd9Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0) 554*7c478bd9Sstevel@tonic-gate goto err; 555*7c478bd9Sstevel@tonic-gate } else 556*7c478bd9Sstevel@tonic-gate h = cp->page; 557*7c478bd9Sstevel@tonic-gate ret = __db_ret(dbp, 558*7c478bd9Sstevel@tonic-gate h, cp->indx, key, &dbc->rkey.data, &dbc->rkey.ulen); 559*7c478bd9Sstevel@tonic-gate if (cp->dpgno != PGNO_INVALID) 560*7c478bd9Sstevel@tonic-gate (void)memp_fput(dbp->mpf, h, 0); 561*7c478bd9Sstevel@tonic-gate if (ret) 562*7c478bd9Sstevel@tonic-gate goto err; 563*7c478bd9Sstevel@tonic-gate } 564*7c478bd9Sstevel@tonic-gate 565*7c478bd9Sstevel@tonic-gate /* Return the data. */ 566*7c478bd9Sstevel@tonic-gate if ((ret = __db_ret(dbp, cp->page, 567*7c478bd9Sstevel@tonic-gate cp->dpgno == PGNO_INVALID ? cp->indx + O_INDX : cp->dindx, 568*7c478bd9Sstevel@tonic-gate data, &dbc->rdata.data, &dbc->rdata.ulen)) != 0) 569*7c478bd9Sstevel@tonic-gate goto err; 570*7c478bd9Sstevel@tonic-gate 571*7c478bd9Sstevel@tonic-gate /* 572*7c478bd9Sstevel@tonic-gate * If the previous cursor record has been deleted, physically delete 573*7c478bd9Sstevel@tonic-gate * the entry from the page. We clear the deleted flag before we call 574*7c478bd9Sstevel@tonic-gate * the underlying delete routine so that, if an error occurs, and we 575*7c478bd9Sstevel@tonic-gate * restore the cursor, the deleted flag is cleared. This is because, 576*7c478bd9Sstevel@tonic-gate * if we manage to physically modify the page, and then restore the 577*7c478bd9Sstevel@tonic-gate * cursor, we might try to repeat the page modification when closing 578*7c478bd9Sstevel@tonic-gate * the cursor. 579*7c478bd9Sstevel@tonic-gate */ 580*7c478bd9Sstevel@tonic-gate if (F_ISSET(©, C_DELETED)) { 581*7c478bd9Sstevel@tonic-gate F_CLR(©, C_DELETED); 582*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_physdel(dbc, ©, cp->page)) != 0) 583*7c478bd9Sstevel@tonic-gate goto err; 584*7c478bd9Sstevel@tonic-gate } 585*7c478bd9Sstevel@tonic-gate F_CLR(cp, C_DELETED); 586*7c478bd9Sstevel@tonic-gate 587*7c478bd9Sstevel@tonic-gate /* Release the previous lock, if any; the current lock is retained. */ 588*7c478bd9Sstevel@tonic-gate if (copy.lock != LOCK_INVALID) 589*7c478bd9Sstevel@tonic-gate (void)__BT_TLPUT(dbc, copy.lock); 590*7c478bd9Sstevel@tonic-gate 591*7c478bd9Sstevel@tonic-gate /* Release the current page. */ 592*7c478bd9Sstevel@tonic-gate if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0) 593*7c478bd9Sstevel@tonic-gate goto err; 594*7c478bd9Sstevel@tonic-gate 595*7c478bd9Sstevel@tonic-gate if (0) { 596*7c478bd9Sstevel@tonic-gate err: if (cp->page != NULL) 597*7c478bd9Sstevel@tonic-gate (void)memp_fput(dbp->mpf, cp->page, 0); 598*7c478bd9Sstevel@tonic-gate if (cp->lock != LOCK_INVALID) 599*7c478bd9Sstevel@tonic-gate (void)__BT_TLPUT(dbc, cp->lock); 600*7c478bd9Sstevel@tonic-gate *cp = copy; 601*7c478bd9Sstevel@tonic-gate } 602*7c478bd9Sstevel@tonic-gate 603*7c478bd9Sstevel@tonic-gate /* Release temporary lock upgrade. */ 604*7c478bd9Sstevel@tonic-gate if (tmp_rmw) 605*7c478bd9Sstevel@tonic-gate F_CLR(dbc, DBC_RMW); 606*7c478bd9Sstevel@tonic-gate 607*7c478bd9Sstevel@tonic-gate return (ret); 608*7c478bd9Sstevel@tonic-gate } 609*7c478bd9Sstevel@tonic-gate 610*7c478bd9Sstevel@tonic-gate /* 611*7c478bd9Sstevel@tonic-gate * __bam_dsearch -- 612*7c478bd9Sstevel@tonic-gate * Search for a matching data item (or the first data item that's 613*7c478bd9Sstevel@tonic-gate * equal to or greater than the one we're searching for). 614*7c478bd9Sstevel@tonic-gate */ 615*7c478bd9Sstevel@tonic-gate static int 616*7c478bd9Sstevel@tonic-gate __bam_dsearch(dbc, cp, data, iflagp) 617*7c478bd9Sstevel@tonic-gate DBC *dbc; 618*7c478bd9Sstevel@tonic-gate CURSOR *cp; 619*7c478bd9Sstevel@tonic-gate DBT *data; 620*7c478bd9Sstevel@tonic-gate u_int32_t *iflagp; 621*7c478bd9Sstevel@tonic-gate { 622*7c478bd9Sstevel@tonic-gate DB *dbp; 623*7c478bd9Sstevel@tonic-gate CURSOR copy, last; 624*7c478bd9Sstevel@tonic-gate int cmp, ret; 625*7c478bd9Sstevel@tonic-gate 626*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 627*7c478bd9Sstevel@tonic-gate 628*7c478bd9Sstevel@tonic-gate /* 629*7c478bd9Sstevel@tonic-gate * If iflagp is non-NULL, we're doing an insert. 630*7c478bd9Sstevel@tonic-gate * 631*7c478bd9Sstevel@tonic-gate * If the duplicates are off-page, use the duplicate search routine. 632*7c478bd9Sstevel@tonic-gate */ 633*7c478bd9Sstevel@tonic-gate if (cp->dpgno != PGNO_INVALID) { 634*7c478bd9Sstevel@tonic-gate if ((ret = __db_dsearch(dbc, iflagp != NULL, 635*7c478bd9Sstevel@tonic-gate data, cp->dpgno, &cp->dindx, &cp->page, &cmp)) != 0) 636*7c478bd9Sstevel@tonic-gate return (ret); 637*7c478bd9Sstevel@tonic-gate cp->dpgno = cp->page->pgno; 638*7c478bd9Sstevel@tonic-gate 639*7c478bd9Sstevel@tonic-gate if (iflagp == NULL) { 640*7c478bd9Sstevel@tonic-gate if (cmp != 0) 641*7c478bd9Sstevel@tonic-gate return (DB_NOTFOUND); 642*7c478bd9Sstevel@tonic-gate return (0); 643*7c478bd9Sstevel@tonic-gate } 644*7c478bd9Sstevel@tonic-gate *iflagp = DB_BEFORE; 645*7c478bd9Sstevel@tonic-gate return (0); 646*7c478bd9Sstevel@tonic-gate } 647*7c478bd9Sstevel@tonic-gate 648*7c478bd9Sstevel@tonic-gate /* Otherwise, do the search ourselves. */ 649*7c478bd9Sstevel@tonic-gate copy = *cp; 650*7c478bd9Sstevel@tonic-gate for (;;) { 651*7c478bd9Sstevel@tonic-gate /* Save the last interesting cursor position. */ 652*7c478bd9Sstevel@tonic-gate last = *cp; 653*7c478bd9Sstevel@tonic-gate 654*7c478bd9Sstevel@tonic-gate /* See if the data item matches the one we're looking for. */ 655*7c478bd9Sstevel@tonic-gate if ((cmp = __bam_cmp(dbp, data, cp->page, cp->indx + O_INDX, 656*7c478bd9Sstevel@tonic-gate dbp->dup_compare == NULL ? 657*7c478bd9Sstevel@tonic-gate __bam_defcmp : dbp->dup_compare)) == 0) { 658*7c478bd9Sstevel@tonic-gate if (iflagp != NULL) 659*7c478bd9Sstevel@tonic-gate *iflagp = DB_AFTER; 660*7c478bd9Sstevel@tonic-gate return (0); 661*7c478bd9Sstevel@tonic-gate } 662*7c478bd9Sstevel@tonic-gate 663*7c478bd9Sstevel@tonic-gate /* 664*7c478bd9Sstevel@tonic-gate * If duplicate entries are sorted, we're done if we find a 665*7c478bd9Sstevel@tonic-gate * page entry that sorts greater than the application item. 666*7c478bd9Sstevel@tonic-gate * If doing an insert, return success, otherwise DB_NOTFOUND. 667*7c478bd9Sstevel@tonic-gate */ 668*7c478bd9Sstevel@tonic-gate if (dbp->dup_compare != NULL && cmp < 0) { 669*7c478bd9Sstevel@tonic-gate if (iflagp == NULL) 670*7c478bd9Sstevel@tonic-gate return (DB_NOTFOUND); 671*7c478bd9Sstevel@tonic-gate *iflagp = DB_BEFORE; 672*7c478bd9Sstevel@tonic-gate return (0); 673*7c478bd9Sstevel@tonic-gate } 674*7c478bd9Sstevel@tonic-gate 675*7c478bd9Sstevel@tonic-gate /* 676*7c478bd9Sstevel@tonic-gate * Move to the next item. If we reach the end of the page and 677*7c478bd9Sstevel@tonic-gate * we're doing an insert, set the cursor to the last item and 678*7c478bd9Sstevel@tonic-gate * set the referenced memory location so callers know to insert 679*7c478bd9Sstevel@tonic-gate * after the item, instead of before it. If not inserting, we 680*7c478bd9Sstevel@tonic-gate * return DB_NOTFOUND. 681*7c478bd9Sstevel@tonic-gate */ 682*7c478bd9Sstevel@tonic-gate if ((cp->indx += P_INDX) >= NUM_ENT(cp->page)) { 683*7c478bd9Sstevel@tonic-gate if (iflagp == NULL) 684*7c478bd9Sstevel@tonic-gate return (DB_NOTFOUND); 685*7c478bd9Sstevel@tonic-gate goto use_last; 686*7c478bd9Sstevel@tonic-gate } 687*7c478bd9Sstevel@tonic-gate 688*7c478bd9Sstevel@tonic-gate /* 689*7c478bd9Sstevel@tonic-gate * Make sure we didn't go past the end of the duplicates. The 690*7c478bd9Sstevel@tonic-gate * error conditions are the same as above. 691*7c478bd9Sstevel@tonic-gate */ 692*7c478bd9Sstevel@tonic-gate if (!POSSIBLE_DUPLICATE(cp, copy)) { 693*7c478bd9Sstevel@tonic-gate if (iflagp == NULL) 694*7c478bd9Sstevel@tonic-gate return (DB_NOTFOUND); 695*7c478bd9Sstevel@tonic-gate use_last: *cp = last; 696*7c478bd9Sstevel@tonic-gate *iflagp = DB_AFTER; 697*7c478bd9Sstevel@tonic-gate return (0); 698*7c478bd9Sstevel@tonic-gate } 699*7c478bd9Sstevel@tonic-gate } 700*7c478bd9Sstevel@tonic-gate /* NOTREACHED */ 701*7c478bd9Sstevel@tonic-gate } 702*7c478bd9Sstevel@tonic-gate 703*7c478bd9Sstevel@tonic-gate /* 704*7c478bd9Sstevel@tonic-gate * __bam_c_rget -- 705*7c478bd9Sstevel@tonic-gate * Return the record number for a cursor. 706*7c478bd9Sstevel@tonic-gate */ 707*7c478bd9Sstevel@tonic-gate static int 708*7c478bd9Sstevel@tonic-gate __bam_c_rget(dbc, data, flags) 709*7c478bd9Sstevel@tonic-gate DBC *dbc; 710*7c478bd9Sstevel@tonic-gate DBT *data; 711*7c478bd9Sstevel@tonic-gate u_int32_t flags; 712*7c478bd9Sstevel@tonic-gate { 713*7c478bd9Sstevel@tonic-gate CURSOR *cp; 714*7c478bd9Sstevel@tonic-gate DB *dbp; 715*7c478bd9Sstevel@tonic-gate DBT dbt; 716*7c478bd9Sstevel@tonic-gate db_recno_t recno; 717*7c478bd9Sstevel@tonic-gate int exact, ret; 718*7c478bd9Sstevel@tonic-gate 719*7c478bd9Sstevel@tonic-gate COMPQUIET(flags, 0); 720*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 721*7c478bd9Sstevel@tonic-gate cp = dbc->internal; 722*7c478bd9Sstevel@tonic-gate 723*7c478bd9Sstevel@tonic-gate /* Get the page with the current item on it. */ 724*7c478bd9Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &cp->page)) != 0) 725*7c478bd9Sstevel@tonic-gate return (ret); 726*7c478bd9Sstevel@tonic-gate 727*7c478bd9Sstevel@tonic-gate /* Get a copy of the key. */ 728*7c478bd9Sstevel@tonic-gate memset(&dbt, 0, sizeof(DBT)); 729*7c478bd9Sstevel@tonic-gate dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL; 730*7c478bd9Sstevel@tonic-gate if ((ret = __db_ret(dbp, cp->page, cp->indx, &dbt, NULL, NULL)) != 0) 731*7c478bd9Sstevel@tonic-gate goto err; 732*7c478bd9Sstevel@tonic-gate 733*7c478bd9Sstevel@tonic-gate exact = 1; 734*7c478bd9Sstevel@tonic-gate if ((ret = __bam_search(dbc, &dbt, 735*7c478bd9Sstevel@tonic-gate F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND, 736*7c478bd9Sstevel@tonic-gate 1, &recno, &exact)) != 0) 737*7c478bd9Sstevel@tonic-gate goto err; 738*7c478bd9Sstevel@tonic-gate 739*7c478bd9Sstevel@tonic-gate ret = __db_retcopy(data, &recno, sizeof(recno), 740*7c478bd9Sstevel@tonic-gate &dbc->rdata.data, &dbc->rdata.ulen, dbp->db_malloc); 741*7c478bd9Sstevel@tonic-gate 742*7c478bd9Sstevel@tonic-gate /* Release the stack. */ 743*7c478bd9Sstevel@tonic-gate __bam_stkrel(dbc, 0); 744*7c478bd9Sstevel@tonic-gate 745*7c478bd9Sstevel@tonic-gate err: (void)memp_fput(dbp->mpf, cp->page, 0); 746*7c478bd9Sstevel@tonic-gate __os_free(dbt.data, dbt.size); 747*7c478bd9Sstevel@tonic-gate return (ret); 748*7c478bd9Sstevel@tonic-gate } 749*7c478bd9Sstevel@tonic-gate 750*7c478bd9Sstevel@tonic-gate /* 751*7c478bd9Sstevel@tonic-gate * __bam_c_put -- 752*7c478bd9Sstevel@tonic-gate * Put using a cursor. 753*7c478bd9Sstevel@tonic-gate */ 754*7c478bd9Sstevel@tonic-gate static int 755*7c478bd9Sstevel@tonic-gate __bam_c_put(dbc, key, data, flags) 756*7c478bd9Sstevel@tonic-gate DBC *dbc; 757*7c478bd9Sstevel@tonic-gate DBT *key, *data; 758*7c478bd9Sstevel@tonic-gate u_int32_t flags; 759*7c478bd9Sstevel@tonic-gate { 760*7c478bd9Sstevel@tonic-gate CURSOR *cp, copy; 761*7c478bd9Sstevel@tonic-gate DB *dbp; 762*7c478bd9Sstevel@tonic-gate DBT dbt; 763*7c478bd9Sstevel@tonic-gate db_indx_t indx; 764*7c478bd9Sstevel@tonic-gate db_pgno_t pgno; 765*7c478bd9Sstevel@tonic-gate u_int32_t iiflags, iiop; 766*7c478bd9Sstevel@tonic-gate int exact, needkey, ret, stack; 767*7c478bd9Sstevel@tonic-gate void *arg; 768*7c478bd9Sstevel@tonic-gate 769*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 770*7c478bd9Sstevel@tonic-gate cp = dbc->internal; 771*7c478bd9Sstevel@tonic-gate 772*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp); 773*7c478bd9Sstevel@tonic-gate 774*7c478bd9Sstevel@tonic-gate DEBUG_LWRITE(dbc, dbc->txn, "bam_c_put", 775*7c478bd9Sstevel@tonic-gate flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL, 776*7c478bd9Sstevel@tonic-gate data, flags); 777*7c478bd9Sstevel@tonic-gate 778*7c478bd9Sstevel@tonic-gate if ((ret = __db_cputchk(dbp, key, data, flags, 779*7c478bd9Sstevel@tonic-gate F_ISSET(dbp, DB_AM_RDONLY), cp->pgno != PGNO_INVALID)) != 0) 780*7c478bd9Sstevel@tonic-gate return (ret); 781*7c478bd9Sstevel@tonic-gate 782*7c478bd9Sstevel@tonic-gate /* 783*7c478bd9Sstevel@tonic-gate * If we are running CDB, this had better be either a write 784*7c478bd9Sstevel@tonic-gate * cursor or an immediate writer. If it's a regular writer, 785*7c478bd9Sstevel@tonic-gate * that means we have an IWRITE lock and we need to upgrade 786*7c478bd9Sstevel@tonic-gate * it to a write lock. 787*7c478bd9Sstevel@tonic-gate */ 788*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB)) { 789*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER)) 790*7c478bd9Sstevel@tonic-gate return (EINVAL); 791*7c478bd9Sstevel@tonic-gate 792*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbc, DBC_RMW) && 793*7c478bd9Sstevel@tonic-gate (ret = lock_get(dbp->dbenv->lk_info, dbc->locker, 794*7c478bd9Sstevel@tonic-gate DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, 795*7c478bd9Sstevel@tonic-gate &dbc->mylock)) != 0) 796*7c478bd9Sstevel@tonic-gate return (EAGAIN); 797*7c478bd9Sstevel@tonic-gate } 798*7c478bd9Sstevel@tonic-gate 799*7c478bd9Sstevel@tonic-gate if (0) { 800*7c478bd9Sstevel@tonic-gate split: /* 801*7c478bd9Sstevel@tonic-gate * To split, we need a valid key for the page. Since it's a 802*7c478bd9Sstevel@tonic-gate * cursor, we have to build one. 803*7c478bd9Sstevel@tonic-gate * 804*7c478bd9Sstevel@tonic-gate * Acquire a copy of a key from the page. 805*7c478bd9Sstevel@tonic-gate */ 806*7c478bd9Sstevel@tonic-gate if (needkey) { 807*7c478bd9Sstevel@tonic-gate memset(&dbt, 0, sizeof(DBT)); 808*7c478bd9Sstevel@tonic-gate if ((ret = __db_ret(dbp, cp->page, indx, 809*7c478bd9Sstevel@tonic-gate &dbt, &dbc->rkey.data, &dbc->rkey.ulen)) != 0) 810*7c478bd9Sstevel@tonic-gate goto err; 811*7c478bd9Sstevel@tonic-gate arg = &dbt; 812*7c478bd9Sstevel@tonic-gate } else 813*7c478bd9Sstevel@tonic-gate arg = key; 814*7c478bd9Sstevel@tonic-gate 815*7c478bd9Sstevel@tonic-gate /* 816*7c478bd9Sstevel@tonic-gate * Discard any locks and pinned pages (the locks are discarded 817*7c478bd9Sstevel@tonic-gate * even if we're running with transactions, as they lock pages 818*7c478bd9Sstevel@tonic-gate * that we're sorry we ever acquired). If stack is set and the 819*7c478bd9Sstevel@tonic-gate * cursor entries are valid, they point to the same entries as 820*7c478bd9Sstevel@tonic-gate * the stack, don't free them twice. 821*7c478bd9Sstevel@tonic-gate */ 822*7c478bd9Sstevel@tonic-gate if (stack) { 823*7c478bd9Sstevel@tonic-gate (void)__bam_stkrel(dbc, 1); 824*7c478bd9Sstevel@tonic-gate stack = 0; 825*7c478bd9Sstevel@tonic-gate } else 826*7c478bd9Sstevel@tonic-gate DISCARD(dbc, cp); 827*7c478bd9Sstevel@tonic-gate 828*7c478bd9Sstevel@tonic-gate /* 829*7c478bd9Sstevel@tonic-gate * Restore the cursor to its original value. This is necessary 830*7c478bd9Sstevel@tonic-gate * for two reasons. First, we are about to copy it in case of 831*7c478bd9Sstevel@tonic-gate * error, again. Second, we adjust cursors during the split, 832*7c478bd9Sstevel@tonic-gate * and we have to ensure this cursor is adjusted appropriately, 833*7c478bd9Sstevel@tonic-gate * along with all the other cursors. 834*7c478bd9Sstevel@tonic-gate */ 835*7c478bd9Sstevel@tonic-gate *cp = copy; 836*7c478bd9Sstevel@tonic-gate 837*7c478bd9Sstevel@tonic-gate if ((ret = __bam_split(dbc, arg)) != 0) 838*7c478bd9Sstevel@tonic-gate goto err; 839*7c478bd9Sstevel@tonic-gate } 840*7c478bd9Sstevel@tonic-gate 841*7c478bd9Sstevel@tonic-gate /* 842*7c478bd9Sstevel@tonic-gate * Initialize the cursor for a new retrieval. Clear the cursor's 843*7c478bd9Sstevel@tonic-gate * page pointer, it was set before this operation, and no longer 844*7c478bd9Sstevel@tonic-gate * has any meaning. 845*7c478bd9Sstevel@tonic-gate */ 846*7c478bd9Sstevel@tonic-gate cp->page = NULL; 847*7c478bd9Sstevel@tonic-gate copy = *cp; 848*7c478bd9Sstevel@tonic-gate cp->lock = LOCK_INVALID; 849*7c478bd9Sstevel@tonic-gate 850*7c478bd9Sstevel@tonic-gate iiflags = needkey = ret = stack = 0; 851*7c478bd9Sstevel@tonic-gate switch (flags) { 852*7c478bd9Sstevel@tonic-gate case DB_AFTER: 853*7c478bd9Sstevel@tonic-gate case DB_BEFORE: 854*7c478bd9Sstevel@tonic-gate case DB_CURRENT: 855*7c478bd9Sstevel@tonic-gate needkey = 1; 856*7c478bd9Sstevel@tonic-gate if (cp->dpgno == PGNO_INVALID) { 857*7c478bd9Sstevel@tonic-gate pgno = cp->pgno; 858*7c478bd9Sstevel@tonic-gate indx = cp->indx; 859*7c478bd9Sstevel@tonic-gate } else { 860*7c478bd9Sstevel@tonic-gate pgno = cp->dpgno; 861*7c478bd9Sstevel@tonic-gate indx = cp->dindx; 862*7c478bd9Sstevel@tonic-gate } 863*7c478bd9Sstevel@tonic-gate 864*7c478bd9Sstevel@tonic-gate /* 865*7c478bd9Sstevel@tonic-gate * !!! 866*7c478bd9Sstevel@tonic-gate * This test is right -- we don't yet support duplicates and 867*7c478bd9Sstevel@tonic-gate * record numbers in the same tree, so ignore duplicates if 868*7c478bd9Sstevel@tonic-gate * DB_BT_RECNUM set. 869*7c478bd9Sstevel@tonic-gate */ 870*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_BT_RECNUM) && 871*7c478bd9Sstevel@tonic-gate (flags != DB_CURRENT || F_ISSET(cp, C_DELETED))) { 872*7c478bd9Sstevel@tonic-gate /* Acquire a complete stack. */ 873*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_getstack(dbc, cp)) != 0) 874*7c478bd9Sstevel@tonic-gate goto err; 875*7c478bd9Sstevel@tonic-gate cp->page = cp->csp->page; 876*7c478bd9Sstevel@tonic-gate 877*7c478bd9Sstevel@tonic-gate stack = 1; 878*7c478bd9Sstevel@tonic-gate iiflags = BI_DOINCR; 879*7c478bd9Sstevel@tonic-gate } else { 880*7c478bd9Sstevel@tonic-gate /* Acquire the current page. */ 881*7c478bd9Sstevel@tonic-gate if ((ret = __bam_lget(dbc, 882*7c478bd9Sstevel@tonic-gate 0, cp->pgno, DB_LOCK_WRITE, &cp->lock)) == 0) 883*7c478bd9Sstevel@tonic-gate ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page); 884*7c478bd9Sstevel@tonic-gate if (ret != 0) 885*7c478bd9Sstevel@tonic-gate goto err; 886*7c478bd9Sstevel@tonic-gate 887*7c478bd9Sstevel@tonic-gate iiflags = 0; 888*7c478bd9Sstevel@tonic-gate } 889*7c478bd9Sstevel@tonic-gate 890*7c478bd9Sstevel@tonic-gate /* 891*7c478bd9Sstevel@tonic-gate * If the user has specified a duplicate comparison function, 892*7c478bd9Sstevel@tonic-gate * we return an error if DB_CURRENT was specified and the 893*7c478bd9Sstevel@tonic-gate * replacement data doesn't compare equal to the current data. 894*7c478bd9Sstevel@tonic-gate * This stops apps from screwing up the duplicate sort order. 895*7c478bd9Sstevel@tonic-gate */ 896*7c478bd9Sstevel@tonic-gate if (flags == DB_CURRENT && dbp->dup_compare != NULL) 897*7c478bd9Sstevel@tonic-gate if (__bam_cmp(dbp, data, 898*7c478bd9Sstevel@tonic-gate cp->page, indx, dbp->dup_compare) != 0) { 899*7c478bd9Sstevel@tonic-gate ret = EINVAL; 900*7c478bd9Sstevel@tonic-gate goto err; 901*7c478bd9Sstevel@tonic-gate } 902*7c478bd9Sstevel@tonic-gate 903*7c478bd9Sstevel@tonic-gate iiop = flags; 904*7c478bd9Sstevel@tonic-gate break; 905*7c478bd9Sstevel@tonic-gate case DB_KEYFIRST: 906*7c478bd9Sstevel@tonic-gate case DB_KEYLAST: 907*7c478bd9Sstevel@tonic-gate /* 908*7c478bd9Sstevel@tonic-gate * If we have a duplicate comparison function, we position to 909*7c478bd9Sstevel@tonic-gate * the first of any on-page duplicates, and use __bam_dsearch 910*7c478bd9Sstevel@tonic-gate * to search for the right slot. Otherwise, we position to 911*7c478bd9Sstevel@tonic-gate * the first/last of any on-page duplicates based on the flag 912*7c478bd9Sstevel@tonic-gate * value. 913*7c478bd9Sstevel@tonic-gate */ 914*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_search(dbc, cp, key, 915*7c478bd9Sstevel@tonic-gate flags == DB_KEYFIRST || dbp->dup_compare != NULL ? 916*7c478bd9Sstevel@tonic-gate DB_KEYFIRST : DB_KEYLAST, &exact)) != 0) 917*7c478bd9Sstevel@tonic-gate goto err; 918*7c478bd9Sstevel@tonic-gate stack = 1; 919*7c478bd9Sstevel@tonic-gate 920*7c478bd9Sstevel@tonic-gate /* 921*7c478bd9Sstevel@tonic-gate * If an exact match: 922*7c478bd9Sstevel@tonic-gate * If duplicates aren't supported, replace the current 923*7c478bd9Sstevel@tonic-gate * item. (When implementing the DB->put function, our 924*7c478bd9Sstevel@tonic-gate * caller has already checked the DB_NOOVERWRITE flag.) 925*7c478bd9Sstevel@tonic-gate * 926*7c478bd9Sstevel@tonic-gate * If there's a duplicate comparison function, find the 927*7c478bd9Sstevel@tonic-gate * correct slot for this duplicate item. 928*7c478bd9Sstevel@tonic-gate * 929*7c478bd9Sstevel@tonic-gate * If there's no duplicate comparison function, set the 930*7c478bd9Sstevel@tonic-gate * insert flag based on the argument flags. 931*7c478bd9Sstevel@tonic-gate * 932*7c478bd9Sstevel@tonic-gate * If there's no match, the search function returned the 933*7c478bd9Sstevel@tonic-gate * smallest slot greater than the key, use it. 934*7c478bd9Sstevel@tonic-gate */ 935*7c478bd9Sstevel@tonic-gate if (exact) { 936*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_DUP)) { 937*7c478bd9Sstevel@tonic-gate /* 938*7c478bd9Sstevel@tonic-gate * If at off-page duplicate page, move to the 939*7c478bd9Sstevel@tonic-gate * first or last entry -- if a comparison 940*7c478bd9Sstevel@tonic-gate * function was specified, start searching at 941*7c478bd9Sstevel@tonic-gate * the first entry. Otherwise, move based on 942*7c478bd9Sstevel@tonic-gate * the DB_KEYFIRST/DB_KEYLAST flags. 943*7c478bd9Sstevel@tonic-gate */ 944*7c478bd9Sstevel@tonic-gate if ((ret = __bam_dup(dbc, cp, cp->indx, 945*7c478bd9Sstevel@tonic-gate dbp->dup_compare == NULL && 946*7c478bd9Sstevel@tonic-gate flags != DB_KEYFIRST)) != 0) 947*7c478bd9Sstevel@tonic-gate goto err; 948*7c478bd9Sstevel@tonic-gate 949*7c478bd9Sstevel@tonic-gate /* 950*7c478bd9Sstevel@tonic-gate * If there's a comparison function, search for 951*7c478bd9Sstevel@tonic-gate * the correct slot. Otherwise, set the insert 952*7c478bd9Sstevel@tonic-gate * flag based on the argment flag. 953*7c478bd9Sstevel@tonic-gate */ 954*7c478bd9Sstevel@tonic-gate if (dbp->dup_compare == NULL) 955*7c478bd9Sstevel@tonic-gate iiop = flags == DB_KEYFIRST ? 956*7c478bd9Sstevel@tonic-gate DB_BEFORE : DB_AFTER; 957*7c478bd9Sstevel@tonic-gate else 958*7c478bd9Sstevel@tonic-gate if ((ret = __bam_dsearch(dbc, 959*7c478bd9Sstevel@tonic-gate cp, data, &iiop)) != 0) 960*7c478bd9Sstevel@tonic-gate goto err; 961*7c478bd9Sstevel@tonic-gate } else 962*7c478bd9Sstevel@tonic-gate iiop = DB_CURRENT; 963*7c478bd9Sstevel@tonic-gate iiflags = 0; 964*7c478bd9Sstevel@tonic-gate } else { 965*7c478bd9Sstevel@tonic-gate iiop = DB_BEFORE; 966*7c478bd9Sstevel@tonic-gate iiflags = BI_NEWKEY; 967*7c478bd9Sstevel@tonic-gate } 968*7c478bd9Sstevel@tonic-gate 969*7c478bd9Sstevel@tonic-gate if (cp->dpgno == PGNO_INVALID) { 970*7c478bd9Sstevel@tonic-gate pgno = cp->pgno; 971*7c478bd9Sstevel@tonic-gate indx = cp->indx; 972*7c478bd9Sstevel@tonic-gate } else { 973*7c478bd9Sstevel@tonic-gate pgno = cp->dpgno; 974*7c478bd9Sstevel@tonic-gate indx = cp->dindx; 975*7c478bd9Sstevel@tonic-gate } 976*7c478bd9Sstevel@tonic-gate break; 977*7c478bd9Sstevel@tonic-gate } 978*7c478bd9Sstevel@tonic-gate 979*7c478bd9Sstevel@tonic-gate ret = __bam_iitem(dbc, &cp->page, &indx, key, data, iiop, iiflags); 980*7c478bd9Sstevel@tonic-gate 981*7c478bd9Sstevel@tonic-gate if (ret == DB_NEEDSPLIT) 982*7c478bd9Sstevel@tonic-gate goto split; 983*7c478bd9Sstevel@tonic-gate if (ret != 0) 984*7c478bd9Sstevel@tonic-gate goto err; 985*7c478bd9Sstevel@tonic-gate 986*7c478bd9Sstevel@tonic-gate /* 987*7c478bd9Sstevel@tonic-gate * Reset any cursors referencing this item that might have the item 988*7c478bd9Sstevel@tonic-gate * marked for deletion. 989*7c478bd9Sstevel@tonic-gate */ 990*7c478bd9Sstevel@tonic-gate if (iiop == DB_CURRENT) { 991*7c478bd9Sstevel@tonic-gate (void)__bam_ca_delete(dbp, pgno, indx, 0); 992*7c478bd9Sstevel@tonic-gate 993*7c478bd9Sstevel@tonic-gate /* 994*7c478bd9Sstevel@tonic-gate * It's also possible that we are the cursor that had the 995*7c478bd9Sstevel@tonic-gate * item marked for deletion, in which case we want to make 996*7c478bd9Sstevel@tonic-gate * sure that we don't delete it because we had the delete 997*7c478bd9Sstevel@tonic-gate * flag set already. 998*7c478bd9Sstevel@tonic-gate */ 999*7c478bd9Sstevel@tonic-gate if (cp->pgno == copy.pgno && cp->indx == copy.indx && 1000*7c478bd9Sstevel@tonic-gate cp->dpgno == copy.dpgno && cp->dindx == copy.dindx) 1001*7c478bd9Sstevel@tonic-gate F_CLR(©, C_DELETED); 1002*7c478bd9Sstevel@tonic-gate } 1003*7c478bd9Sstevel@tonic-gate 1004*7c478bd9Sstevel@tonic-gate /* 1005*7c478bd9Sstevel@tonic-gate * Update the cursor to point to the new entry. The new entry was 1006*7c478bd9Sstevel@tonic-gate * stored on the current page, because we split pages until it was 1007*7c478bd9Sstevel@tonic-gate * possible. 1008*7c478bd9Sstevel@tonic-gate */ 1009*7c478bd9Sstevel@tonic-gate if (cp->dpgno == PGNO_INVALID) 1010*7c478bd9Sstevel@tonic-gate cp->indx = indx; 1011*7c478bd9Sstevel@tonic-gate else 1012*7c478bd9Sstevel@tonic-gate cp->dindx = indx; 1013*7c478bd9Sstevel@tonic-gate 1014*7c478bd9Sstevel@tonic-gate /* 1015*7c478bd9Sstevel@tonic-gate * If the previous cursor record has been deleted, physically delete 1016*7c478bd9Sstevel@tonic-gate * the entry from the page. We clear the deleted flag before we call 1017*7c478bd9Sstevel@tonic-gate * the underlying delete routine so that, if an error occurs, and we 1018*7c478bd9Sstevel@tonic-gate * restore the cursor, the deleted flag is cleared. This is because, 1019*7c478bd9Sstevel@tonic-gate * if we manage to physically modify the page, and then restore the 1020*7c478bd9Sstevel@tonic-gate * cursor, we might try to repeat the page modification when closing 1021*7c478bd9Sstevel@tonic-gate * the cursor. 1022*7c478bd9Sstevel@tonic-gate */ 1023*7c478bd9Sstevel@tonic-gate if (F_ISSET(©, C_DELETED)) { 1024*7c478bd9Sstevel@tonic-gate F_CLR(©, C_DELETED); 1025*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_physdel(dbc, ©, cp->page)) != 0) 1026*7c478bd9Sstevel@tonic-gate goto err; 1027*7c478bd9Sstevel@tonic-gate } 1028*7c478bd9Sstevel@tonic-gate F_CLR(cp, C_DELETED); 1029*7c478bd9Sstevel@tonic-gate 1030*7c478bd9Sstevel@tonic-gate /* Release the previous lock, if any; the current lock is retained. */ 1031*7c478bd9Sstevel@tonic-gate if (copy.lock != LOCK_INVALID) 1032*7c478bd9Sstevel@tonic-gate (void)__BT_TLPUT(dbc, copy.lock); 1033*7c478bd9Sstevel@tonic-gate 1034*7c478bd9Sstevel@tonic-gate /* 1035*7c478bd9Sstevel@tonic-gate * Discard any pages pinned in the tree and their locks, except for 1036*7c478bd9Sstevel@tonic-gate * the leaf page, for which we only discard the pin, not the lock. 1037*7c478bd9Sstevel@tonic-gate * 1038*7c478bd9Sstevel@tonic-gate * Note, the leaf page participated in the stack we acquired, and so 1039*7c478bd9Sstevel@tonic-gate * we have to adjust the stack as necessary. If there was only a 1040*7c478bd9Sstevel@tonic-gate * single page on the stack, we don't have to free further stack pages. 1041*7c478bd9Sstevel@tonic-gate */ 1042*7c478bd9Sstevel@tonic-gate if (stack && BT_STK_POP(cp) != NULL) 1043*7c478bd9Sstevel@tonic-gate (void)__bam_stkrel(dbc, 0); 1044*7c478bd9Sstevel@tonic-gate 1045*7c478bd9Sstevel@tonic-gate /* Release the current page. */ 1046*7c478bd9Sstevel@tonic-gate if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0) 1047*7c478bd9Sstevel@tonic-gate goto err; 1048*7c478bd9Sstevel@tonic-gate 1049*7c478bd9Sstevel@tonic-gate if (0) { 1050*7c478bd9Sstevel@tonic-gate err: /* Discard any pinned pages. */ 1051*7c478bd9Sstevel@tonic-gate if (stack) 1052*7c478bd9Sstevel@tonic-gate (void)__bam_stkrel(dbc, 0); 1053*7c478bd9Sstevel@tonic-gate else 1054*7c478bd9Sstevel@tonic-gate DISCARD(dbc, cp); 1055*7c478bd9Sstevel@tonic-gate *cp = copy; 1056*7c478bd9Sstevel@tonic-gate } 1057*7c478bd9Sstevel@tonic-gate 1058*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW)) 1059*7c478bd9Sstevel@tonic-gate (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock, 1060*7c478bd9Sstevel@tonic-gate DB_LOCK_IWRITE, 0); 1061*7c478bd9Sstevel@tonic-gate 1062*7c478bd9Sstevel@tonic-gate return (ret); 1063*7c478bd9Sstevel@tonic-gate } 1064*7c478bd9Sstevel@tonic-gate 1065*7c478bd9Sstevel@tonic-gate /* 1066*7c478bd9Sstevel@tonic-gate * __bam_c_first -- 1067*7c478bd9Sstevel@tonic-gate * Return the first record. 1068*7c478bd9Sstevel@tonic-gate */ 1069*7c478bd9Sstevel@tonic-gate static int 1070*7c478bd9Sstevel@tonic-gate __bam_c_first(dbc, cp) 1071*7c478bd9Sstevel@tonic-gate DBC *dbc; 1072*7c478bd9Sstevel@tonic-gate CURSOR *cp; 1073*7c478bd9Sstevel@tonic-gate { 1074*7c478bd9Sstevel@tonic-gate DB *dbp; 1075*7c478bd9Sstevel@tonic-gate db_pgno_t pgno; 1076*7c478bd9Sstevel@tonic-gate int ret; 1077*7c478bd9Sstevel@tonic-gate 1078*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 1079*7c478bd9Sstevel@tonic-gate 1080*7c478bd9Sstevel@tonic-gate /* Walk down the left-hand side of the tree. */ 1081*7c478bd9Sstevel@tonic-gate for (pgno = PGNO_ROOT;;) { 1082*7c478bd9Sstevel@tonic-gate if ((ret = 1083*7c478bd9Sstevel@tonic-gate __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0) 1084*7c478bd9Sstevel@tonic-gate return (ret); 1085*7c478bd9Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) 1086*7c478bd9Sstevel@tonic-gate return (ret); 1087*7c478bd9Sstevel@tonic-gate 1088*7c478bd9Sstevel@tonic-gate /* If we find a leaf page, we're done. */ 1089*7c478bd9Sstevel@tonic-gate if (ISLEAF(cp->page)) 1090*7c478bd9Sstevel@tonic-gate break; 1091*7c478bd9Sstevel@tonic-gate 1092*7c478bd9Sstevel@tonic-gate pgno = GET_BINTERNAL(cp->page, 0)->pgno; 1093*7c478bd9Sstevel@tonic-gate DISCARD(dbc, cp); 1094*7c478bd9Sstevel@tonic-gate } 1095*7c478bd9Sstevel@tonic-gate 1096*7c478bd9Sstevel@tonic-gate cp->pgno = cp->page->pgno; 1097*7c478bd9Sstevel@tonic-gate cp->indx = 0; 1098*7c478bd9Sstevel@tonic-gate cp->dpgno = PGNO_INVALID; 1099*7c478bd9Sstevel@tonic-gate 1100*7c478bd9Sstevel@tonic-gate /* Check for duplicates. */ 1101*7c478bd9Sstevel@tonic-gate if ((ret = __bam_dup(dbc, cp, cp->indx, 0)) != 0) 1102*7c478bd9Sstevel@tonic-gate return (ret); 1103*7c478bd9Sstevel@tonic-gate 1104*7c478bd9Sstevel@tonic-gate /* If on an empty page or a deleted record, move to the next one. */ 1105*7c478bd9Sstevel@tonic-gate if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(cp)) 1106*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_next(dbc, cp, 0)) != 0) 1107*7c478bd9Sstevel@tonic-gate return (ret); 1108*7c478bd9Sstevel@tonic-gate 1109*7c478bd9Sstevel@tonic-gate return (0); 1110*7c478bd9Sstevel@tonic-gate } 1111*7c478bd9Sstevel@tonic-gate 1112*7c478bd9Sstevel@tonic-gate /* 1113*7c478bd9Sstevel@tonic-gate * __bam_c_last -- 1114*7c478bd9Sstevel@tonic-gate * Return the last record. 1115*7c478bd9Sstevel@tonic-gate */ 1116*7c478bd9Sstevel@tonic-gate static int 1117*7c478bd9Sstevel@tonic-gate __bam_c_last(dbc, cp) 1118*7c478bd9Sstevel@tonic-gate DBC *dbc; 1119*7c478bd9Sstevel@tonic-gate CURSOR *cp; 1120*7c478bd9Sstevel@tonic-gate { 1121*7c478bd9Sstevel@tonic-gate DB *dbp; 1122*7c478bd9Sstevel@tonic-gate db_pgno_t pgno; 1123*7c478bd9Sstevel@tonic-gate int ret; 1124*7c478bd9Sstevel@tonic-gate 1125*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 1126*7c478bd9Sstevel@tonic-gate 1127*7c478bd9Sstevel@tonic-gate /* Walk down the right-hand side of the tree. */ 1128*7c478bd9Sstevel@tonic-gate for (pgno = PGNO_ROOT;;) { 1129*7c478bd9Sstevel@tonic-gate if ((ret = 1130*7c478bd9Sstevel@tonic-gate __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0) 1131*7c478bd9Sstevel@tonic-gate return (ret); 1132*7c478bd9Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) 1133*7c478bd9Sstevel@tonic-gate return (ret); 1134*7c478bd9Sstevel@tonic-gate 1135*7c478bd9Sstevel@tonic-gate /* If we find a leaf page, we're done. */ 1136*7c478bd9Sstevel@tonic-gate if (ISLEAF(cp->page)) 1137*7c478bd9Sstevel@tonic-gate break; 1138*7c478bd9Sstevel@tonic-gate 1139*7c478bd9Sstevel@tonic-gate pgno = 1140*7c478bd9Sstevel@tonic-gate GET_BINTERNAL(cp->page, NUM_ENT(cp->page) - O_INDX)->pgno; 1141*7c478bd9Sstevel@tonic-gate DISCARD(dbc, cp); 1142*7c478bd9Sstevel@tonic-gate } 1143*7c478bd9Sstevel@tonic-gate 1144*7c478bd9Sstevel@tonic-gate cp->pgno = cp->page->pgno; 1145*7c478bd9Sstevel@tonic-gate cp->indx = NUM_ENT(cp->page) == 0 ? 0 : NUM_ENT(cp->page) - P_INDX; 1146*7c478bd9Sstevel@tonic-gate cp->dpgno = PGNO_INVALID; 1147*7c478bd9Sstevel@tonic-gate 1148*7c478bd9Sstevel@tonic-gate /* Check for duplicates. */ 1149*7c478bd9Sstevel@tonic-gate if ((ret = __bam_dup(dbc, cp, cp->indx, 1)) != 0) 1150*7c478bd9Sstevel@tonic-gate return (ret); 1151*7c478bd9Sstevel@tonic-gate 1152*7c478bd9Sstevel@tonic-gate /* If on an empty page or a deleted record, move to the next one. */ 1153*7c478bd9Sstevel@tonic-gate if (NUM_ENT(cp->page) == 0 || IS_CUR_DELETED(cp)) 1154*7c478bd9Sstevel@tonic-gate if ((ret = __bam_c_prev(dbc, cp)) != 0) 1155*7c478bd9Sstevel@tonic-gate return (ret); 1156*7c478bd9Sstevel@tonic-gate 1157*7c478bd9Sstevel@tonic-gate return (0); 1158*7c478bd9Sstevel@tonic-gate } 1159*7c478bd9Sstevel@tonic-gate 1160*7c478bd9Sstevel@tonic-gate /* 1161*7c478bd9Sstevel@tonic-gate * __bam_c_next -- 1162*7c478bd9Sstevel@tonic-gate * Move to the next record. 1163*7c478bd9Sstevel@tonic-gate */ 1164*7c478bd9Sstevel@tonic-gate static int 1165*7c478bd9Sstevel@tonic-gate __bam_c_next(dbc, cp, initial_move) 1166*7c478bd9Sstevel@tonic-gate DBC *dbc; 1167*7c478bd9Sstevel@tonic-gate CURSOR *cp; 1168*7c478bd9Sstevel@tonic-gate int initial_move; 1169*7c478bd9Sstevel@tonic-gate { 1170*7c478bd9Sstevel@tonic-gate DB *dbp; 1171*7c478bd9Sstevel@tonic-gate db_indx_t adjust, indx; 1172*7c478bd9Sstevel@tonic-gate db_pgno_t pgno; 1173*7c478bd9Sstevel@tonic-gate int ret; 1174*7c478bd9Sstevel@tonic-gate 1175*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 1176*7c478bd9Sstevel@tonic-gate 1177*7c478bd9Sstevel@tonic-gate /* 1178*7c478bd9Sstevel@tonic-gate * We're either moving through a page of duplicates or a btree leaf 1179*7c478bd9Sstevel@tonic-gate * page. 1180*7c478bd9Sstevel@tonic-gate */ 1181*7c478bd9Sstevel@tonic-gate if (cp->dpgno == PGNO_INVALID) { 1182*7c478bd9Sstevel@tonic-gate adjust = dbp->type == DB_BTREE ? P_INDX : O_INDX; 1183*7c478bd9Sstevel@tonic-gate pgno = cp->pgno; 1184*7c478bd9Sstevel@tonic-gate indx = cp->indx; 1185*7c478bd9Sstevel@tonic-gate } else { 1186*7c478bd9Sstevel@tonic-gate adjust = O_INDX; 1187*7c478bd9Sstevel@tonic-gate pgno = cp->dpgno; 1188*7c478bd9Sstevel@tonic-gate indx = cp->dindx; 1189*7c478bd9Sstevel@tonic-gate } 1190*7c478bd9Sstevel@tonic-gate if (cp->page == NULL) { 1191*7c478bd9Sstevel@tonic-gate if ((ret = 1192*7c478bd9Sstevel@tonic-gate __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0) 1193*7c478bd9Sstevel@tonic-gate return (ret); 1194*7c478bd9Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) 1195*7c478bd9Sstevel@tonic-gate return (ret); 1196*7c478bd9Sstevel@tonic-gate } 1197*7c478bd9Sstevel@tonic-gate 1198*7c478bd9Sstevel@tonic-gate /* 1199*7c478bd9Sstevel@tonic-gate * If at the end of the page, move to a subsequent page. 1200*7c478bd9Sstevel@tonic-gate * 1201*7c478bd9Sstevel@tonic-gate * !!! 1202*7c478bd9Sstevel@tonic-gate * Check for >= NUM_ENT. If we're here as the result of a search that 1203*7c478bd9Sstevel@tonic-gate * landed us on NUM_ENT, we'll increment indx before we test. 1204*7c478bd9Sstevel@tonic-gate * 1205*7c478bd9Sstevel@tonic-gate * !!! 1206*7c478bd9Sstevel@tonic-gate * This code handles empty pages and pages with only deleted entries. 1207*7c478bd9Sstevel@tonic-gate */ 1208*7c478bd9Sstevel@tonic-gate if (initial_move) 1209*7c478bd9Sstevel@tonic-gate indx += adjust; 1210*7c478bd9Sstevel@tonic-gate for (;;) { 1211*7c478bd9Sstevel@tonic-gate if (indx >= NUM_ENT(cp->page)) { 1212*7c478bd9Sstevel@tonic-gate /* 1213*7c478bd9Sstevel@tonic-gate * If we're in a btree leaf page, we've reached the end 1214*7c478bd9Sstevel@tonic-gate * of the tree. If we've reached the end of a page of 1215*7c478bd9Sstevel@tonic-gate * duplicates, continue from the btree leaf page where 1216*7c478bd9Sstevel@tonic-gate * we found this page of duplicates. 1217*7c478bd9Sstevel@tonic-gate */ 1218*7c478bd9Sstevel@tonic-gate pgno = cp->page->next_pgno; 1219*7c478bd9Sstevel@tonic-gate if (pgno == PGNO_INVALID) { 1220*7c478bd9Sstevel@tonic-gate /* If in a btree leaf page, it's EOF. */ 1221*7c478bd9Sstevel@tonic-gate if (cp->dpgno == PGNO_INVALID) 1222*7c478bd9Sstevel@tonic-gate return (DB_NOTFOUND); 1223*7c478bd9Sstevel@tonic-gate 1224*7c478bd9Sstevel@tonic-gate /* Continue from the last btree leaf page. */ 1225*7c478bd9Sstevel@tonic-gate cp->dpgno = PGNO_INVALID; 1226*7c478bd9Sstevel@tonic-gate 1227*7c478bd9Sstevel@tonic-gate adjust = P_INDX; 1228*7c478bd9Sstevel@tonic-gate pgno = cp->pgno; 1229*7c478bd9Sstevel@tonic-gate indx = cp->indx + P_INDX; 1230*7c478bd9Sstevel@tonic-gate } else 1231*7c478bd9Sstevel@tonic-gate indx = 0; 1232*7c478bd9Sstevel@tonic-gate 1233*7c478bd9Sstevel@tonic-gate DISCARD(dbc, cp); 1234*7c478bd9Sstevel@tonic-gate if ((ret = __bam_lget(dbc, 1235*7c478bd9Sstevel@tonic-gate 0, pgno, DB_LOCK_READ, &cp->lock)) != 0) 1236*7c478bd9Sstevel@tonic-gate return (ret); 1237*7c478bd9Sstevel@tonic-gate if ((ret = 1238*7c478bd9Sstevel@tonic-gate memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) 1239*7c478bd9Sstevel@tonic-gate return (ret); 1240*7c478bd9Sstevel@tonic-gate continue; 1241*7c478bd9Sstevel@tonic-gate } 1242*7c478bd9Sstevel@tonic-gate 1243*7c478bd9Sstevel@tonic-gate /* Ignore deleted records. */ 1244*7c478bd9Sstevel@tonic-gate if (IS_DELETED(cp, indx)) { 1245*7c478bd9Sstevel@tonic-gate indx += adjust; 1246*7c478bd9Sstevel@tonic-gate continue; 1247*7c478bd9Sstevel@tonic-gate } 1248*7c478bd9Sstevel@tonic-gate 1249*7c478bd9Sstevel@tonic-gate /* 1250*7c478bd9Sstevel@tonic-gate * If we're not in a duplicates page, check to see if we've 1251*7c478bd9Sstevel@tonic-gate * found a page of duplicates, in which case we move to the 1252*7c478bd9Sstevel@tonic-gate * first entry. 1253*7c478bd9Sstevel@tonic-gate */ 1254*7c478bd9Sstevel@tonic-gate if (cp->dpgno == PGNO_INVALID) { 1255*7c478bd9Sstevel@tonic-gate cp->pgno = cp->page->pgno; 1256*7c478bd9Sstevel@tonic-gate cp->indx = indx; 1257*7c478bd9Sstevel@tonic-gate 1258*7c478bd9Sstevel@tonic-gate if ((ret = __bam_dup(dbc, cp, indx, 0)) != 0) 1259*7c478bd9Sstevel@tonic-gate return (ret); 1260*7c478bd9Sstevel@tonic-gate if (cp->dpgno != PGNO_INVALID) { 1261*7c478bd9Sstevel@tonic-gate indx = cp->dindx; 1262*7c478bd9Sstevel@tonic-gate adjust = O_INDX; 1263*7c478bd9Sstevel@tonic-gate continue; 1264*7c478bd9Sstevel@tonic-gate } 1265*7c478bd9Sstevel@tonic-gate } else { 1266*7c478bd9Sstevel@tonic-gate cp->dpgno = cp->page->pgno; 1267*7c478bd9Sstevel@tonic-gate cp->dindx = indx; 1268*7c478bd9Sstevel@tonic-gate } 1269*7c478bd9Sstevel@tonic-gate break; 1270*7c478bd9Sstevel@tonic-gate } 1271*7c478bd9Sstevel@tonic-gate return (0); 1272*7c478bd9Sstevel@tonic-gate } 1273*7c478bd9Sstevel@tonic-gate 1274*7c478bd9Sstevel@tonic-gate /* 1275*7c478bd9Sstevel@tonic-gate * __bam_c_prev -- 1276*7c478bd9Sstevel@tonic-gate * Move to the previous record. 1277*7c478bd9Sstevel@tonic-gate */ 1278*7c478bd9Sstevel@tonic-gate static int 1279*7c478bd9Sstevel@tonic-gate __bam_c_prev(dbc, cp) 1280*7c478bd9Sstevel@tonic-gate DBC *dbc; 1281*7c478bd9Sstevel@tonic-gate CURSOR *cp; 1282*7c478bd9Sstevel@tonic-gate { 1283*7c478bd9Sstevel@tonic-gate DB *dbp; 1284*7c478bd9Sstevel@tonic-gate db_indx_t indx, adjust; 1285*7c478bd9Sstevel@tonic-gate db_pgno_t pgno; 1286*7c478bd9Sstevel@tonic-gate int ret, set_indx; 1287*7c478bd9Sstevel@tonic-gate 1288*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 1289*7c478bd9Sstevel@tonic-gate 1290*7c478bd9Sstevel@tonic-gate /* 1291*7c478bd9Sstevel@tonic-gate * We're either moving through a page of duplicates or a btree leaf 1292*7c478bd9Sstevel@tonic-gate * page. 1293*7c478bd9Sstevel@tonic-gate */ 1294*7c478bd9Sstevel@tonic-gate if (cp->dpgno == PGNO_INVALID) { 1295*7c478bd9Sstevel@tonic-gate adjust = dbp->type == DB_BTREE ? P_INDX : O_INDX; 1296*7c478bd9Sstevel@tonic-gate pgno = cp->pgno; 1297*7c478bd9Sstevel@tonic-gate indx = cp->indx; 1298*7c478bd9Sstevel@tonic-gate } else { 1299*7c478bd9Sstevel@tonic-gate adjust = O_INDX; 1300*7c478bd9Sstevel@tonic-gate pgno = cp->dpgno; 1301*7c478bd9Sstevel@tonic-gate indx = cp->dindx; 1302*7c478bd9Sstevel@tonic-gate } 1303*7c478bd9Sstevel@tonic-gate if (cp->page == NULL) { 1304*7c478bd9Sstevel@tonic-gate if ((ret = 1305*7c478bd9Sstevel@tonic-gate __bam_lget(dbc, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0) 1306*7c478bd9Sstevel@tonic-gate return (ret); 1307*7c478bd9Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) 1308*7c478bd9Sstevel@tonic-gate return (ret); 1309*7c478bd9Sstevel@tonic-gate } 1310*7c478bd9Sstevel@tonic-gate 1311*7c478bd9Sstevel@tonic-gate /* 1312*7c478bd9Sstevel@tonic-gate * If at the beginning of the page, move to any previous one. 1313*7c478bd9Sstevel@tonic-gate * 1314*7c478bd9Sstevel@tonic-gate * !!! 1315*7c478bd9Sstevel@tonic-gate * This code handles empty pages and pages with only deleted entries. 1316*7c478bd9Sstevel@tonic-gate */ 1317*7c478bd9Sstevel@tonic-gate for (;;) { 1318*7c478bd9Sstevel@tonic-gate if (indx == 0) { 1319*7c478bd9Sstevel@tonic-gate /* 1320*7c478bd9Sstevel@tonic-gate * If we're in a btree leaf page, we've reached the 1321*7c478bd9Sstevel@tonic-gate * beginning of the tree. If we've reached the first 1322*7c478bd9Sstevel@tonic-gate * of a page of duplicates, continue from the btree 1323*7c478bd9Sstevel@tonic-gate * leaf page where we found this page of duplicates. 1324*7c478bd9Sstevel@tonic-gate */ 1325*7c478bd9Sstevel@tonic-gate pgno = cp->page->prev_pgno; 1326*7c478bd9Sstevel@tonic-gate if (pgno == PGNO_INVALID) { 1327*7c478bd9Sstevel@tonic-gate /* If in a btree leaf page, it's SOF. */ 1328*7c478bd9Sstevel@tonic-gate if (cp->dpgno == PGNO_INVALID) 1329*7c478bd9Sstevel@tonic-gate return (DB_NOTFOUND); 1330*7c478bd9Sstevel@tonic-gate 1331*7c478bd9Sstevel@tonic-gate /* Continue from the last btree leaf page. */ 1332*7c478bd9Sstevel@tonic-gate cp->dpgno = PGNO_INVALID; 1333*7c478bd9Sstevel@tonic-gate 1334*7c478bd9Sstevel@tonic-gate adjust = P_INDX; 1335*7c478bd9Sstevel@tonic-gate pgno = cp->pgno; 1336*7c478bd9Sstevel@tonic-gate indx = cp->indx; 1337*7c478bd9Sstevel@tonic-gate set_indx = 0; 1338*7c478bd9Sstevel@tonic-gate } else 1339*7c478bd9Sstevel@tonic-gate set_indx = 1; 1340*7c478bd9Sstevel@tonic-gate 1341*7c478bd9Sstevel@tonic-gate DISCARD(dbc, cp); 1342*7c478bd9Sstevel@tonic-gate if ((ret = __bam_lget(dbc, 1343*7c478bd9Sstevel@tonic-gate 0, pgno, DB_LOCK_READ, &cp->lock)) != 0) 1344*7c478bd9Sstevel@tonic-gate return (ret); 1345*7c478bd9Sstevel@tonic-gate if ((ret = 1346*7c478bd9Sstevel@tonic-gate memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) 1347*7c478bd9Sstevel@tonic-gate return (ret); 1348*7c478bd9Sstevel@tonic-gate 1349*7c478bd9Sstevel@tonic-gate if (set_indx) 1350*7c478bd9Sstevel@tonic-gate indx = NUM_ENT(cp->page); 1351*7c478bd9Sstevel@tonic-gate if (indx == 0) 1352*7c478bd9Sstevel@tonic-gate continue; 1353*7c478bd9Sstevel@tonic-gate } 1354*7c478bd9Sstevel@tonic-gate 1355*7c478bd9Sstevel@tonic-gate /* Ignore deleted records. */ 1356*7c478bd9Sstevel@tonic-gate indx -= adjust; 1357*7c478bd9Sstevel@tonic-gate if (IS_DELETED(cp, indx)) 1358*7c478bd9Sstevel@tonic-gate continue; 1359*7c478bd9Sstevel@tonic-gate 1360*7c478bd9Sstevel@tonic-gate /* 1361*7c478bd9Sstevel@tonic-gate * If we're not in a duplicates page, check to see if we've 1362*7c478bd9Sstevel@tonic-gate * found a page of duplicates, in which case we move to the 1363*7c478bd9Sstevel@tonic-gate * last entry. 1364*7c478bd9Sstevel@tonic-gate */ 1365*7c478bd9Sstevel@tonic-gate if (cp->dpgno == PGNO_INVALID) { 1366*7c478bd9Sstevel@tonic-gate cp->pgno = cp->page->pgno; 1367*7c478bd9Sstevel@tonic-gate cp->indx = indx; 1368*7c478bd9Sstevel@tonic-gate 1369*7c478bd9Sstevel@tonic-gate if ((ret = __bam_dup(dbc, cp, indx, 1)) != 0) 1370*7c478bd9Sstevel@tonic-gate return (ret); 1371*7c478bd9Sstevel@tonic-gate if (cp->dpgno != PGNO_INVALID) { 1372*7c478bd9Sstevel@tonic-gate indx = cp->dindx + O_INDX; 1373*7c478bd9Sstevel@tonic-gate adjust = O_INDX; 1374*7c478bd9Sstevel@tonic-gate continue; 1375*7c478bd9Sstevel@tonic-gate } 1376*7c478bd9Sstevel@tonic-gate } else { 1377*7c478bd9Sstevel@tonic-gate cp->dpgno = cp->page->pgno; 1378*7c478bd9Sstevel@tonic-gate cp->dindx = indx; 1379*7c478bd9Sstevel@tonic-gate } 1380*7c478bd9Sstevel@tonic-gate break; 1381*7c478bd9Sstevel@tonic-gate } 1382*7c478bd9Sstevel@tonic-gate return (0); 1383*7c478bd9Sstevel@tonic-gate } 1384*7c478bd9Sstevel@tonic-gate 1385*7c478bd9Sstevel@tonic-gate /* 1386*7c478bd9Sstevel@tonic-gate * __bam_c_search -- 1387*7c478bd9Sstevel@tonic-gate * Move to a specified record. 1388*7c478bd9Sstevel@tonic-gate */ 1389*7c478bd9Sstevel@tonic-gate static int 1390*7c478bd9Sstevel@tonic-gate __bam_c_search(dbc, cp, key, flags, exactp) 1391*7c478bd9Sstevel@tonic-gate DBC *dbc; 1392*7c478bd9Sstevel@tonic-gate CURSOR *cp; 1393*7c478bd9Sstevel@tonic-gate const DBT *key; 1394*7c478bd9Sstevel@tonic-gate u_int32_t flags; 1395*7c478bd9Sstevel@tonic-gate int *exactp; 1396*7c478bd9Sstevel@tonic-gate { 1397*7c478bd9Sstevel@tonic-gate BTREE *t; 1398*7c478bd9Sstevel@tonic-gate DB *dbp; 1399*7c478bd9Sstevel@tonic-gate DB_LOCK lock; 1400*7c478bd9Sstevel@tonic-gate PAGE *h; 1401*7c478bd9Sstevel@tonic-gate db_recno_t recno; 1402*7c478bd9Sstevel@tonic-gate db_indx_t indx; 1403*7c478bd9Sstevel@tonic-gate u_int32_t sflags; 1404*7c478bd9Sstevel@tonic-gate int cmp, needexact, ret; 1405*7c478bd9Sstevel@tonic-gate 1406*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 1407*7c478bd9Sstevel@tonic-gate t = dbp->internal; 1408*7c478bd9Sstevel@tonic-gate 1409*7c478bd9Sstevel@tonic-gate /* Find an entry in the database. */ 1410*7c478bd9Sstevel@tonic-gate switch (flags) { 1411*7c478bd9Sstevel@tonic-gate case DB_SET_RECNO: 1412*7c478bd9Sstevel@tonic-gate if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0) 1413*7c478bd9Sstevel@tonic-gate return (ret); 1414*7c478bd9Sstevel@tonic-gate sflags = F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND; 1415*7c478bd9Sstevel@tonic-gate needexact = *exactp = 1; 1416*7c478bd9Sstevel@tonic-gate ret = __bam_rsearch(dbc, &recno, sflags, 1, exactp); 1417*7c478bd9Sstevel@tonic-gate break; 1418*7c478bd9Sstevel@tonic-gate case DB_SET: 1419*7c478bd9Sstevel@tonic-gate case DB_GET_BOTH: 1420*7c478bd9Sstevel@tonic-gate sflags = F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND; 1421*7c478bd9Sstevel@tonic-gate needexact = *exactp = 1; 1422*7c478bd9Sstevel@tonic-gate goto search; 1423*7c478bd9Sstevel@tonic-gate case DB_SET_RANGE: 1424*7c478bd9Sstevel@tonic-gate sflags = F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND; 1425*7c478bd9Sstevel@tonic-gate needexact = *exactp = 0; 1426*7c478bd9Sstevel@tonic-gate goto search; 1427*7c478bd9Sstevel@tonic-gate case DB_KEYFIRST: 1428*7c478bd9Sstevel@tonic-gate sflags = S_KEYFIRST; 1429*7c478bd9Sstevel@tonic-gate goto fast_search; 1430*7c478bd9Sstevel@tonic-gate case DB_KEYLAST: 1431*7c478bd9Sstevel@tonic-gate sflags = S_KEYLAST; 1432*7c478bd9Sstevel@tonic-gate fast_search: needexact = *exactp = 0; 1433*7c478bd9Sstevel@tonic-gate /* 1434*7c478bd9Sstevel@tonic-gate * If the application has a history of inserting into the first 1435*7c478bd9Sstevel@tonic-gate * or last pages of the database, we check those pages first to 1436*7c478bd9Sstevel@tonic-gate * avoid doing a full search. 1437*7c478bd9Sstevel@tonic-gate * 1438*7c478bd9Sstevel@tonic-gate * Record numbers can't be fast-tracked, the entire tree has to 1439*7c478bd9Sstevel@tonic-gate * be locked. 1440*7c478bd9Sstevel@tonic-gate */ 1441*7c478bd9Sstevel@tonic-gate h = NULL; 1442*7c478bd9Sstevel@tonic-gate lock = LOCK_INVALID; 1443*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_BT_RECNUM)) 1444*7c478bd9Sstevel@tonic-gate goto search; 1445*7c478bd9Sstevel@tonic-gate 1446*7c478bd9Sstevel@tonic-gate /* Check if the application has a history of sorted input. */ 1447*7c478bd9Sstevel@tonic-gate if (t->bt_lpgno == PGNO_INVALID) 1448*7c478bd9Sstevel@tonic-gate goto search; 1449*7c478bd9Sstevel@tonic-gate 1450*7c478bd9Sstevel@tonic-gate /* 1451*7c478bd9Sstevel@tonic-gate * Lock and retrieve the page on which we did the last insert. 1452*7c478bd9Sstevel@tonic-gate * It's okay if it doesn't exist, or if it's not the page type 1453*7c478bd9Sstevel@tonic-gate * we expected, it just means that the world changed. 1454*7c478bd9Sstevel@tonic-gate */ 1455*7c478bd9Sstevel@tonic-gate if (__bam_lget(dbc, 0, t->bt_lpgno, DB_LOCK_WRITE, &lock)) 1456*7c478bd9Sstevel@tonic-gate goto fast_miss; 1457*7c478bd9Sstevel@tonic-gate if (memp_fget(dbp->mpf, &t->bt_lpgno, 0, &h)) 1458*7c478bd9Sstevel@tonic-gate goto fast_miss; 1459*7c478bd9Sstevel@tonic-gate if (TYPE(h) != P_LBTREE) 1460*7c478bd9Sstevel@tonic-gate goto fast_miss; 1461*7c478bd9Sstevel@tonic-gate if (NUM_ENT(h) == 0) 1462*7c478bd9Sstevel@tonic-gate goto fast_miss; 1463*7c478bd9Sstevel@tonic-gate 1464*7c478bd9Sstevel@tonic-gate /* 1465*7c478bd9Sstevel@tonic-gate * What we do here is test to see if we're at the beginning or 1466*7c478bd9Sstevel@tonic-gate * end of the tree and if the new item sorts before/after the 1467*7c478bd9Sstevel@tonic-gate * first/last page entry. We don't try and catch inserts into 1468*7c478bd9Sstevel@tonic-gate * the middle of the tree (although we could, as long as there 1469*7c478bd9Sstevel@tonic-gate * were two keys on the page and we saved both the index and 1470*7c478bd9Sstevel@tonic-gate * the page number of the last insert). 1471*7c478bd9Sstevel@tonic-gate */ 1472*7c478bd9Sstevel@tonic-gate if (h->next_pgno == PGNO_INVALID) { 1473*7c478bd9Sstevel@tonic-gate indx = NUM_ENT(h) - P_INDX; 1474*7c478bd9Sstevel@tonic-gate if ((cmp = 1475*7c478bd9Sstevel@tonic-gate __bam_cmp(dbp, key, h, indx, t->bt_compare)) < 0) 1476*7c478bd9Sstevel@tonic-gate goto try_begin; 1477*7c478bd9Sstevel@tonic-gate if (cmp > 0) { 1478*7c478bd9Sstevel@tonic-gate indx += P_INDX; 1479*7c478bd9Sstevel@tonic-gate goto fast_hit; 1480*7c478bd9Sstevel@tonic-gate } 1481*7c478bd9Sstevel@tonic-gate 1482*7c478bd9Sstevel@tonic-gate /* 1483*7c478bd9Sstevel@tonic-gate * Found a duplicate. If doing DB_KEYLAST, we're at 1484*7c478bd9Sstevel@tonic-gate * the correct position, otherwise, move to the first 1485*7c478bd9Sstevel@tonic-gate * of the duplicates. 1486*7c478bd9Sstevel@tonic-gate */ 1487*7c478bd9Sstevel@tonic-gate if (flags == DB_KEYLAST) 1488*7c478bd9Sstevel@tonic-gate goto fast_hit; 1489*7c478bd9Sstevel@tonic-gate for (; 1490*7c478bd9Sstevel@tonic-gate indx > 0 && h->inp[indx - P_INDX] == h->inp[indx]; 1491*7c478bd9Sstevel@tonic-gate indx -= P_INDX) 1492*7c478bd9Sstevel@tonic-gate ; 1493*7c478bd9Sstevel@tonic-gate goto fast_hit; 1494*7c478bd9Sstevel@tonic-gate } 1495*7c478bd9Sstevel@tonic-gate try_begin: if (h->prev_pgno == PGNO_INVALID) { 1496*7c478bd9Sstevel@tonic-gate indx = 0; 1497*7c478bd9Sstevel@tonic-gate if ((cmp = 1498*7c478bd9Sstevel@tonic-gate __bam_cmp(dbp, key, h, indx, t->bt_compare)) > 0) 1499*7c478bd9Sstevel@tonic-gate goto fast_miss; 1500*7c478bd9Sstevel@tonic-gate if (cmp < 0) 1501*7c478bd9Sstevel@tonic-gate goto fast_hit; 1502*7c478bd9Sstevel@tonic-gate /* 1503*7c478bd9Sstevel@tonic-gate * Found a duplicate. If doing DB_KEYFIRST, we're at 1504*7c478bd9Sstevel@tonic-gate * the correct position, otherwise, move to the last 1505*7c478bd9Sstevel@tonic-gate * of the duplicates. 1506*7c478bd9Sstevel@tonic-gate */ 1507*7c478bd9Sstevel@tonic-gate if (flags == DB_KEYFIRST) 1508*7c478bd9Sstevel@tonic-gate goto fast_hit; 1509*7c478bd9Sstevel@tonic-gate for (; 1510*7c478bd9Sstevel@tonic-gate indx < (db_indx_t)(NUM_ENT(h) - P_INDX) && 1511*7c478bd9Sstevel@tonic-gate h->inp[indx] == h->inp[indx + P_INDX]; 1512*7c478bd9Sstevel@tonic-gate indx += P_INDX) 1513*7c478bd9Sstevel@tonic-gate ; 1514*7c478bd9Sstevel@tonic-gate goto fast_hit; 1515*7c478bd9Sstevel@tonic-gate } 1516*7c478bd9Sstevel@tonic-gate goto fast_miss; 1517*7c478bd9Sstevel@tonic-gate 1518*7c478bd9Sstevel@tonic-gate fast_hit: /* Set the exact match flag, we may have found a duplicate. */ 1519*7c478bd9Sstevel@tonic-gate *exactp = cmp == 0; 1520*7c478bd9Sstevel@tonic-gate 1521*7c478bd9Sstevel@tonic-gate /* Enter the entry in the stack. */ 1522*7c478bd9Sstevel@tonic-gate BT_STK_CLR(cp); 1523*7c478bd9Sstevel@tonic-gate BT_STK_ENTER(cp, h, indx, lock, ret); 1524*7c478bd9Sstevel@tonic-gate break; 1525*7c478bd9Sstevel@tonic-gate 1526*7c478bd9Sstevel@tonic-gate fast_miss: if (h != NULL) 1527*7c478bd9Sstevel@tonic-gate (void)memp_fput(dbp->mpf, h, 0); 1528*7c478bd9Sstevel@tonic-gate if (lock != LOCK_INVALID) 1529*7c478bd9Sstevel@tonic-gate (void)__BT_LPUT(dbc, lock); 1530*7c478bd9Sstevel@tonic-gate 1531*7c478bd9Sstevel@tonic-gate search: ret = __bam_search(dbc, key, sflags, 1, NULL, exactp); 1532*7c478bd9Sstevel@tonic-gate break; 1533*7c478bd9Sstevel@tonic-gate default: /* XXX: Impossible. */ 1534*7c478bd9Sstevel@tonic-gate abort(); 1535*7c478bd9Sstevel@tonic-gate /* NOTREACHED */ 1536*7c478bd9Sstevel@tonic-gate } 1537*7c478bd9Sstevel@tonic-gate if (ret != 0) 1538*7c478bd9Sstevel@tonic-gate return (ret); 1539*7c478bd9Sstevel@tonic-gate 1540*7c478bd9Sstevel@tonic-gate /* 1541*7c478bd9Sstevel@tonic-gate * Initialize the cursor to reference it. This has to be done 1542*7c478bd9Sstevel@tonic-gate * before we return (even with DB_NOTFOUND) because we have to 1543*7c478bd9Sstevel@tonic-gate * free the page(s) we locked in __bam_search. 1544*7c478bd9Sstevel@tonic-gate */ 1545*7c478bd9Sstevel@tonic-gate cp->page = cp->csp->page; 1546*7c478bd9Sstevel@tonic-gate cp->pgno = cp->csp->page->pgno; 1547*7c478bd9Sstevel@tonic-gate cp->indx = cp->csp->indx; 1548*7c478bd9Sstevel@tonic-gate cp->lock = cp->csp->lock; 1549*7c478bd9Sstevel@tonic-gate cp->dpgno = PGNO_INVALID; 1550*7c478bd9Sstevel@tonic-gate 1551*7c478bd9Sstevel@tonic-gate /* 1552*7c478bd9Sstevel@tonic-gate * If we inserted a key into the first or last slot of the tree, 1553*7c478bd9Sstevel@tonic-gate * remember where it was so we can do it more quickly next time. 1554*7c478bd9Sstevel@tonic-gate */ 1555*7c478bd9Sstevel@tonic-gate if (flags == DB_KEYFIRST || flags == DB_KEYLAST) 1556*7c478bd9Sstevel@tonic-gate t->bt_lpgno = 1557*7c478bd9Sstevel@tonic-gate ((cp->page->next_pgno == PGNO_INVALID && 1558*7c478bd9Sstevel@tonic-gate cp->indx >= NUM_ENT(cp->page)) || 1559*7c478bd9Sstevel@tonic-gate (cp->page->prev_pgno == PGNO_INVALID && cp->indx == 0)) ? 1560*7c478bd9Sstevel@tonic-gate cp->pgno : PGNO_INVALID; 1561*7c478bd9Sstevel@tonic-gate 1562*7c478bd9Sstevel@tonic-gate /* If we need an exact match and didn't find one, we're done. */ 1563*7c478bd9Sstevel@tonic-gate if (needexact && *exactp == 0) 1564*7c478bd9Sstevel@tonic-gate return (DB_NOTFOUND); 1565*7c478bd9Sstevel@tonic-gate 1566*7c478bd9Sstevel@tonic-gate return (0); 1567*7c478bd9Sstevel@tonic-gate } 1568*7c478bd9Sstevel@tonic-gate 1569*7c478bd9Sstevel@tonic-gate /* 1570*7c478bd9Sstevel@tonic-gate * __bam_dup -- 1571*7c478bd9Sstevel@tonic-gate * Check for an off-page duplicates entry, and if found, move to the 1572*7c478bd9Sstevel@tonic-gate * first or last entry. 1573*7c478bd9Sstevel@tonic-gate * 1574*7c478bd9Sstevel@tonic-gate * PUBLIC: int __bam_dup __P((DBC *, CURSOR *, u_int32_t, int)); 1575*7c478bd9Sstevel@tonic-gate */ 1576*7c478bd9Sstevel@tonic-gate int 1577*7c478bd9Sstevel@tonic-gate __bam_dup(dbc, cp, indx, last_dup) 1578*7c478bd9Sstevel@tonic-gate DBC *dbc; 1579*7c478bd9Sstevel@tonic-gate CURSOR *cp; 1580*7c478bd9Sstevel@tonic-gate u_int32_t indx; 1581*7c478bd9Sstevel@tonic-gate int last_dup; 1582*7c478bd9Sstevel@tonic-gate { 1583*7c478bd9Sstevel@tonic-gate BOVERFLOW *bo; 1584*7c478bd9Sstevel@tonic-gate DB *dbp; 1585*7c478bd9Sstevel@tonic-gate db_pgno_t pgno; 1586*7c478bd9Sstevel@tonic-gate int ret; 1587*7c478bd9Sstevel@tonic-gate 1588*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 1589*7c478bd9Sstevel@tonic-gate 1590*7c478bd9Sstevel@tonic-gate /* 1591*7c478bd9Sstevel@tonic-gate * Check for an overflow entry. If we find one, move to the 1592*7c478bd9Sstevel@tonic-gate * duplicates page, and optionally move to the last record on 1593*7c478bd9Sstevel@tonic-gate * that page. 1594*7c478bd9Sstevel@tonic-gate * 1595*7c478bd9Sstevel@tonic-gate * !!! 1596*7c478bd9Sstevel@tonic-gate * We don't lock duplicates pages, we've already got the correct 1597*7c478bd9Sstevel@tonic-gate * lock on the main page. 1598*7c478bd9Sstevel@tonic-gate */ 1599*7c478bd9Sstevel@tonic-gate bo = GET_BOVERFLOW(cp->page, indx + O_INDX); 1600*7c478bd9Sstevel@tonic-gate if (B_TYPE(bo->type) != B_DUPLICATE) 1601*7c478bd9Sstevel@tonic-gate return (0); 1602*7c478bd9Sstevel@tonic-gate 1603*7c478bd9Sstevel@tonic-gate pgno = bo->pgno; 1604*7c478bd9Sstevel@tonic-gate if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0) 1605*7c478bd9Sstevel@tonic-gate return (ret); 1606*7c478bd9Sstevel@tonic-gate cp->page = NULL; 1607*7c478bd9Sstevel@tonic-gate if (last_dup) { 1608*7c478bd9Sstevel@tonic-gate if ((ret = __db_dend(dbc, pgno, &cp->page)) != 0) 1609*7c478bd9Sstevel@tonic-gate return (ret); 1610*7c478bd9Sstevel@tonic-gate indx = NUM_ENT(cp->page) - O_INDX; 1611*7c478bd9Sstevel@tonic-gate } else { 1612*7c478bd9Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, &pgno, 0, &cp->page)) != 0) 1613*7c478bd9Sstevel@tonic-gate return (ret); 1614*7c478bd9Sstevel@tonic-gate indx = 0; 1615*7c478bd9Sstevel@tonic-gate } 1616*7c478bd9Sstevel@tonic-gate 1617*7c478bd9Sstevel@tonic-gate /* Update the cursor's duplicate information. */ 1618*7c478bd9Sstevel@tonic-gate cp->dpgno = cp->page->pgno; 1619*7c478bd9Sstevel@tonic-gate cp->dindx = indx; 1620*7c478bd9Sstevel@tonic-gate 1621*7c478bd9Sstevel@tonic-gate return (0); 1622*7c478bd9Sstevel@tonic-gate } 1623*7c478bd9Sstevel@tonic-gate 1624*7c478bd9Sstevel@tonic-gate /* 1625*7c478bd9Sstevel@tonic-gate * __bam_c_physdel -- 1626*7c478bd9Sstevel@tonic-gate * Actually do the cursor deletion. 1627*7c478bd9Sstevel@tonic-gate */ 1628*7c478bd9Sstevel@tonic-gate static int 1629*7c478bd9Sstevel@tonic-gate __bam_c_physdel(dbc, cp, h) 1630*7c478bd9Sstevel@tonic-gate DBC *dbc; 1631*7c478bd9Sstevel@tonic-gate CURSOR *cp; 1632*7c478bd9Sstevel@tonic-gate PAGE *h; 1633*7c478bd9Sstevel@tonic-gate { 1634*7c478bd9Sstevel@tonic-gate enum { DELETE_ITEM, DELETE_PAGE, NOTHING_FURTHER } cmd; 1635*7c478bd9Sstevel@tonic-gate BOVERFLOW bo; 1636*7c478bd9Sstevel@tonic-gate DB *dbp; 1637*7c478bd9Sstevel@tonic-gate DBT dbt; 1638*7c478bd9Sstevel@tonic-gate DB_LOCK lock; 1639*7c478bd9Sstevel@tonic-gate db_indx_t indx; 1640*7c478bd9Sstevel@tonic-gate db_pgno_t pgno, next_pgno, prev_pgno; 1641*7c478bd9Sstevel@tonic-gate int delete_page, local_page, ret; 1642*7c478bd9Sstevel@tonic-gate 1643*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 1644*7c478bd9Sstevel@tonic-gate 1645*7c478bd9Sstevel@tonic-gate delete_page = ret = 0; 1646*7c478bd9Sstevel@tonic-gate 1647*7c478bd9Sstevel@tonic-gate /* Figure out what we're deleting. */ 1648*7c478bd9Sstevel@tonic-gate if (cp->dpgno == PGNO_INVALID) { 1649*7c478bd9Sstevel@tonic-gate pgno = cp->pgno; 1650*7c478bd9Sstevel@tonic-gate indx = cp->indx; 1651*7c478bd9Sstevel@tonic-gate } else { 1652*7c478bd9Sstevel@tonic-gate pgno = cp->dpgno; 1653*7c478bd9Sstevel@tonic-gate indx = cp->dindx; 1654*7c478bd9Sstevel@tonic-gate } 1655*7c478bd9Sstevel@tonic-gate 1656*7c478bd9Sstevel@tonic-gate /* 1657*7c478bd9Sstevel@tonic-gate * If the item is referenced by another cursor, set that cursor's 1658*7c478bd9Sstevel@tonic-gate * delete flag and leave it up to it to do the delete. 1659*7c478bd9Sstevel@tonic-gate * 1660*7c478bd9Sstevel@tonic-gate * !!! 1661*7c478bd9Sstevel@tonic-gate * This test for > 0 is a tricky. There are two ways that we can 1662*7c478bd9Sstevel@tonic-gate * be called here. Either we are closing the cursor or we've moved 1663*7c478bd9Sstevel@tonic-gate * off the page with the deleted entry. In the first case, we've 1664*7c478bd9Sstevel@tonic-gate * already removed the cursor from the active queue, so we won't see 1665*7c478bd9Sstevel@tonic-gate * it in __bam_ca_delete. In the second case, it will be on a different 1666*7c478bd9Sstevel@tonic-gate * item, so we won't bother with it in __bam_ca_delete. 1667*7c478bd9Sstevel@tonic-gate */ 1668*7c478bd9Sstevel@tonic-gate if (__bam_ca_delete(dbp, pgno, indx, 1) > 0) 1669*7c478bd9Sstevel@tonic-gate return (0); 1670*7c478bd9Sstevel@tonic-gate 1671*7c478bd9Sstevel@tonic-gate /* 1672*7c478bd9Sstevel@tonic-gate * If this is concurrent DB, upgrade the lock if necessary. 1673*7c478bd9Sstevel@tonic-gate */ 1674*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW) && 1675*7c478bd9Sstevel@tonic-gate (ret = lock_get(dbp->dbenv->lk_info, 1676*7c478bd9Sstevel@tonic-gate dbc->locker, DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, 1677*7c478bd9Sstevel@tonic-gate &dbc->mylock)) != 0) 1678*7c478bd9Sstevel@tonic-gate return (EAGAIN); 1679*7c478bd9Sstevel@tonic-gate 1680*7c478bd9Sstevel@tonic-gate /* 1681*7c478bd9Sstevel@tonic-gate * If we don't already have the page locked, get it and delete the 1682*7c478bd9Sstevel@tonic-gate * items. 1683*7c478bd9Sstevel@tonic-gate */ 1684*7c478bd9Sstevel@tonic-gate if ((h == NULL || h->pgno != pgno)) { 1685*7c478bd9Sstevel@tonic-gate if ((ret = __bam_lget(dbc, 0, pgno, DB_LOCK_WRITE, &lock)) != 0) 1686*7c478bd9Sstevel@tonic-gate return (ret); 1687*7c478bd9Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) 1688*7c478bd9Sstevel@tonic-gate return (ret); 1689*7c478bd9Sstevel@tonic-gate local_page = 1; 1690*7c478bd9Sstevel@tonic-gate } else 1691*7c478bd9Sstevel@tonic-gate local_page = 0; 1692*7c478bd9Sstevel@tonic-gate 1693*7c478bd9Sstevel@tonic-gate /* 1694*7c478bd9Sstevel@tonic-gate * If we're deleting a duplicate entry and there are other duplicate 1695*7c478bd9Sstevel@tonic-gate * entries remaining, call the common code to do the work and fix up 1696*7c478bd9Sstevel@tonic-gate * the parent page as necessary. Otherwise, do a normal btree delete. 1697*7c478bd9Sstevel@tonic-gate * 1698*7c478bd9Sstevel@tonic-gate * There are 5 possible cases: 1699*7c478bd9Sstevel@tonic-gate * 1700*7c478bd9Sstevel@tonic-gate * 1. It's not a duplicate item: do a normal btree delete. 1701*7c478bd9Sstevel@tonic-gate * 2. It's a duplicate item: 1702*7c478bd9Sstevel@tonic-gate * 2a: We delete an item from a page of duplicates, but there are 1703*7c478bd9Sstevel@tonic-gate * more items on the page. 1704*7c478bd9Sstevel@tonic-gate * 2b: We delete the last item from a page of duplicates, deleting 1705*7c478bd9Sstevel@tonic-gate * the last duplicate. 1706*7c478bd9Sstevel@tonic-gate * 2c: We delete the last item from a page of duplicates, but there 1707*7c478bd9Sstevel@tonic-gate * is a previous page of duplicates. 1708*7c478bd9Sstevel@tonic-gate * 2d: We delete the last item from a page of duplicates, but there 1709*7c478bd9Sstevel@tonic-gate * is a following page of duplicates. 1710*7c478bd9Sstevel@tonic-gate * 1711*7c478bd9Sstevel@tonic-gate * In the case of: 1712*7c478bd9Sstevel@tonic-gate * 1713*7c478bd9Sstevel@tonic-gate * 1: There's nothing further to do. 1714*7c478bd9Sstevel@tonic-gate * 2a: There's nothing further to do. 1715*7c478bd9Sstevel@tonic-gate * 2b: Do the normal btree delete instead of a duplicate delete, as 1716*7c478bd9Sstevel@tonic-gate * that deletes both the duplicate chain and the parent page's 1717*7c478bd9Sstevel@tonic-gate * entry. 1718*7c478bd9Sstevel@tonic-gate * 2c: There's nothing further to do. 1719*7c478bd9Sstevel@tonic-gate * 2d: Delete the duplicate, and update the parent page's entry. 1720*7c478bd9Sstevel@tonic-gate */ 1721*7c478bd9Sstevel@tonic-gate if (TYPE(h) == P_DUPLICATE) { 1722*7c478bd9Sstevel@tonic-gate pgno = PGNO(h); 1723*7c478bd9Sstevel@tonic-gate prev_pgno = PREV_PGNO(h); 1724*7c478bd9Sstevel@tonic-gate next_pgno = NEXT_PGNO(h); 1725*7c478bd9Sstevel@tonic-gate 1726*7c478bd9Sstevel@tonic-gate if (NUM_ENT(h) == 1 && 1727*7c478bd9Sstevel@tonic-gate prev_pgno == PGNO_INVALID && next_pgno == PGNO_INVALID) 1728*7c478bd9Sstevel@tonic-gate cmd = DELETE_PAGE; 1729*7c478bd9Sstevel@tonic-gate else { 1730*7c478bd9Sstevel@tonic-gate cmd = DELETE_ITEM; 1731*7c478bd9Sstevel@tonic-gate 1732*7c478bd9Sstevel@tonic-gate /* Delete the duplicate. */ 1733*7c478bd9Sstevel@tonic-gate if ((ret = __db_drem(dbc, &h, indx, __bam_free)) != 0) 1734*7c478bd9Sstevel@tonic-gate goto err; 1735*7c478bd9Sstevel@tonic-gate 1736*7c478bd9Sstevel@tonic-gate /* 1737*7c478bd9Sstevel@tonic-gate * 2a: h != NULL, h->pgno == pgno 1738*7c478bd9Sstevel@tonic-gate * 2b: We don't reach this clause, as the above test 1739*7c478bd9Sstevel@tonic-gate * was true. 1740*7c478bd9Sstevel@tonic-gate * 2c: h == NULL, prev_pgno != PGNO_INVALID 1741*7c478bd9Sstevel@tonic-gate * 2d: h != NULL, next_pgno != PGNO_INVALID 1742*7c478bd9Sstevel@tonic-gate * 1743*7c478bd9Sstevel@tonic-gate * Test for 2a and 2c: if we didn't empty the current 1744*7c478bd9Sstevel@tonic-gate * page or there was a previous page of duplicates, we 1745*7c478bd9Sstevel@tonic-gate * don't need to touch the parent page. 1746*7c478bd9Sstevel@tonic-gate */ 1747*7c478bd9Sstevel@tonic-gate if ((h != NULL && pgno == h->pgno) || 1748*7c478bd9Sstevel@tonic-gate prev_pgno != PGNO_INVALID) 1749*7c478bd9Sstevel@tonic-gate cmd = NOTHING_FURTHER; 1750*7c478bd9Sstevel@tonic-gate } 1751*7c478bd9Sstevel@tonic-gate 1752*7c478bd9Sstevel@tonic-gate /* 1753*7c478bd9Sstevel@tonic-gate * Release any page we're holding and its lock. 1754*7c478bd9Sstevel@tonic-gate * 1755*7c478bd9Sstevel@tonic-gate * !!! 1756*7c478bd9Sstevel@tonic-gate * If there is no subsequent page in the duplicate chain, then 1757*7c478bd9Sstevel@tonic-gate * __db_drem will have put page "h" and set it to NULL. 1758*7c478bd9Sstevel@tonic-gate */ 1759*7c478bd9Sstevel@tonic-gate if (local_page) { 1760*7c478bd9Sstevel@tonic-gate if (h != NULL) 1761*7c478bd9Sstevel@tonic-gate (void)memp_fput(dbp->mpf, h, 0); 1762*7c478bd9Sstevel@tonic-gate (void)__BT_TLPUT(dbc, lock); 1763*7c478bd9Sstevel@tonic-gate local_page = 0; 1764*7c478bd9Sstevel@tonic-gate } 1765*7c478bd9Sstevel@tonic-gate 1766*7c478bd9Sstevel@tonic-gate if (cmd == NOTHING_FURTHER) 1767*7c478bd9Sstevel@tonic-gate goto done; 1768*7c478bd9Sstevel@tonic-gate 1769*7c478bd9Sstevel@tonic-gate /* Acquire the parent page and switch the index to its entry. */ 1770*7c478bd9Sstevel@tonic-gate if ((ret = 1771*7c478bd9Sstevel@tonic-gate __bam_lget(dbc, 0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0) 1772*7c478bd9Sstevel@tonic-gate goto err; 1773*7c478bd9Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, &cp->pgno, 0, &h)) != 0) { 1774*7c478bd9Sstevel@tonic-gate (void)__BT_TLPUT(dbc, lock); 1775*7c478bd9Sstevel@tonic-gate goto err; 1776*7c478bd9Sstevel@tonic-gate } 1777*7c478bd9Sstevel@tonic-gate local_page = 1; 1778*7c478bd9Sstevel@tonic-gate indx = cp->indx; 1779*7c478bd9Sstevel@tonic-gate 1780*7c478bd9Sstevel@tonic-gate if (cmd == DELETE_PAGE) 1781*7c478bd9Sstevel@tonic-gate goto btd; 1782*7c478bd9Sstevel@tonic-gate 1783*7c478bd9Sstevel@tonic-gate /* 1784*7c478bd9Sstevel@tonic-gate * Copy, delete, update, add-back the parent page's data entry. 1785*7c478bd9Sstevel@tonic-gate * 1786*7c478bd9Sstevel@tonic-gate * XXX 1787*7c478bd9Sstevel@tonic-gate * This may be a performance/logging problem. We should add a 1788*7c478bd9Sstevel@tonic-gate * log message which simply logs/updates a random set of bytes 1789*7c478bd9Sstevel@tonic-gate * on a page, and use it instead of doing a delete/add pair. 1790*7c478bd9Sstevel@tonic-gate */ 1791*7c478bd9Sstevel@tonic-gate indx += O_INDX; 1792*7c478bd9Sstevel@tonic-gate bo = *GET_BOVERFLOW(h, indx); 1793*7c478bd9Sstevel@tonic-gate (void)__db_ditem(dbc, h, indx, BOVERFLOW_SIZE); 1794*7c478bd9Sstevel@tonic-gate bo.pgno = next_pgno; 1795*7c478bd9Sstevel@tonic-gate memset(&dbt, 0, sizeof(dbt)); 1796*7c478bd9Sstevel@tonic-gate dbt.data = &bo; 1797*7c478bd9Sstevel@tonic-gate dbt.size = BOVERFLOW_SIZE; 1798*7c478bd9Sstevel@tonic-gate (void)__db_pitem(dbc, h, indx, BOVERFLOW_SIZE, &dbt, NULL); 1799*7c478bd9Sstevel@tonic-gate (void)memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY); 1800*7c478bd9Sstevel@tonic-gate goto done; 1801*7c478bd9Sstevel@tonic-gate } 1802*7c478bd9Sstevel@tonic-gate 1803*7c478bd9Sstevel@tonic-gate btd: /* 1804*7c478bd9Sstevel@tonic-gate * If the page is going to be emptied, delete it. To delete a leaf 1805*7c478bd9Sstevel@tonic-gate * page we need a copy of a key from the page. We use the 0th page 1806*7c478bd9Sstevel@tonic-gate * index since it's the last key that the page held. 1807*7c478bd9Sstevel@tonic-gate * 1808*7c478bd9Sstevel@tonic-gate * We malloc the page information instead of using the return key/data 1809*7c478bd9Sstevel@tonic-gate * memory because we've already set them -- the reason we've already 1810*7c478bd9Sstevel@tonic-gate * set them is because we're (potentially) about to do a reverse split, 1811*7c478bd9Sstevel@tonic-gate * which would make our saved page information useless. 1812*7c478bd9Sstevel@tonic-gate * 1813*7c478bd9Sstevel@tonic-gate * !!! 1814*7c478bd9Sstevel@tonic-gate * The following operations to delete a page might deadlock. I think 1815*7c478bd9Sstevel@tonic-gate * that's OK. The problem is if we're deleting an item because we're 1816*7c478bd9Sstevel@tonic-gate * closing cursors because we've already deadlocked and want to call 1817*7c478bd9Sstevel@tonic-gate * txn_abort(). If we fail due to deadlock, we leave a locked empty 1818*7c478bd9Sstevel@tonic-gate * page in the tree, which won't be empty long because we're going to 1819*7c478bd9Sstevel@tonic-gate * undo the delete. 1820*7c478bd9Sstevel@tonic-gate */ 1821*7c478bd9Sstevel@tonic-gate if (NUM_ENT(h) == 2 && h->pgno != PGNO_ROOT) { 1822*7c478bd9Sstevel@tonic-gate memset(&dbt, 0, sizeof(DBT)); 1823*7c478bd9Sstevel@tonic-gate dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL; 1824*7c478bd9Sstevel@tonic-gate if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0) 1825*7c478bd9Sstevel@tonic-gate goto err; 1826*7c478bd9Sstevel@tonic-gate delete_page = 1; 1827*7c478bd9Sstevel@tonic-gate } 1828*7c478bd9Sstevel@tonic-gate 1829*7c478bd9Sstevel@tonic-gate /* 1830*7c478bd9Sstevel@tonic-gate * Do a normal btree delete. 1831*7c478bd9Sstevel@tonic-gate * 1832*7c478bd9Sstevel@tonic-gate * !!! 1833*7c478bd9Sstevel@tonic-gate * Delete the key item first, otherwise the duplicate checks in 1834*7c478bd9Sstevel@tonic-gate * __bam_ditem() won't work! 1835*7c478bd9Sstevel@tonic-gate */ 1836*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ditem(dbc, h, indx)) != 0) 1837*7c478bd9Sstevel@tonic-gate goto err; 1838*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ditem(dbc, h, indx)) != 0) 1839*7c478bd9Sstevel@tonic-gate goto err; 1840*7c478bd9Sstevel@tonic-gate 1841*7c478bd9Sstevel@tonic-gate /* Discard any remaining locks/pages. */ 1842*7c478bd9Sstevel@tonic-gate if (local_page) { 1843*7c478bd9Sstevel@tonic-gate (void)memp_fput(dbp->mpf, h, 0); 1844*7c478bd9Sstevel@tonic-gate (void)__BT_TLPUT(dbc, lock); 1845*7c478bd9Sstevel@tonic-gate local_page = 0; 1846*7c478bd9Sstevel@tonic-gate } 1847*7c478bd9Sstevel@tonic-gate 1848*7c478bd9Sstevel@tonic-gate /* Delete the page if it was emptied. */ 1849*7c478bd9Sstevel@tonic-gate if (delete_page) 1850*7c478bd9Sstevel@tonic-gate ret = __bam_dpage(dbc, &dbt); 1851*7c478bd9Sstevel@tonic-gate 1852*7c478bd9Sstevel@tonic-gate err: 1853*7c478bd9Sstevel@tonic-gate done: if (delete_page) 1854*7c478bd9Sstevel@tonic-gate __os_free(dbt.data, dbt.size); 1855*7c478bd9Sstevel@tonic-gate 1856*7c478bd9Sstevel@tonic-gate if (local_page) { 1857*7c478bd9Sstevel@tonic-gate /* 1858*7c478bd9Sstevel@tonic-gate * It's possible for h to be NULL, as __db_drem may have 1859*7c478bd9Sstevel@tonic-gate * been relinking pages by the time that it deadlocked. 1860*7c478bd9Sstevel@tonic-gate */ 1861*7c478bd9Sstevel@tonic-gate if (h != NULL) 1862*7c478bd9Sstevel@tonic-gate (void)memp_fput(dbp->mpf, h, 0); 1863*7c478bd9Sstevel@tonic-gate (void)__BT_TLPUT(dbc, lock); 1864*7c478bd9Sstevel@tonic-gate } 1865*7c478bd9Sstevel@tonic-gate 1866*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW)) 1867*7c478bd9Sstevel@tonic-gate (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock, 1868*7c478bd9Sstevel@tonic-gate DB_LOCK_IWRITE, 0); 1869*7c478bd9Sstevel@tonic-gate 1870*7c478bd9Sstevel@tonic-gate return (ret); 1871*7c478bd9Sstevel@tonic-gate } 1872*7c478bd9Sstevel@tonic-gate 1873*7c478bd9Sstevel@tonic-gate /* 1874*7c478bd9Sstevel@tonic-gate * __bam_c_getstack -- 1875*7c478bd9Sstevel@tonic-gate * Acquire a full stack for a cursor. 1876*7c478bd9Sstevel@tonic-gate */ 1877*7c478bd9Sstevel@tonic-gate static int 1878*7c478bd9Sstevel@tonic-gate __bam_c_getstack(dbc, cp) 1879*7c478bd9Sstevel@tonic-gate DBC *dbc; 1880*7c478bd9Sstevel@tonic-gate CURSOR *cp; 1881*7c478bd9Sstevel@tonic-gate { 1882*7c478bd9Sstevel@tonic-gate DB *dbp; 1883*7c478bd9Sstevel@tonic-gate DBT dbt; 1884*7c478bd9Sstevel@tonic-gate PAGE *h; 1885*7c478bd9Sstevel@tonic-gate db_pgno_t pgno; 1886*7c478bd9Sstevel@tonic-gate int exact, ret; 1887*7c478bd9Sstevel@tonic-gate 1888*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 1889*7c478bd9Sstevel@tonic-gate h = NULL; 1890*7c478bd9Sstevel@tonic-gate memset(&dbt, 0, sizeof(DBT)); 1891*7c478bd9Sstevel@tonic-gate ret = 0; 1892*7c478bd9Sstevel@tonic-gate 1893*7c478bd9Sstevel@tonic-gate /* Get the page with the current item on it. */ 1894*7c478bd9Sstevel@tonic-gate pgno = cp->pgno; 1895*7c478bd9Sstevel@tonic-gate if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) 1896*7c478bd9Sstevel@tonic-gate return (ret); 1897*7c478bd9Sstevel@tonic-gate 1898*7c478bd9Sstevel@tonic-gate /* Get a copy of a key from the page. */ 1899*7c478bd9Sstevel@tonic-gate dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL; 1900*7c478bd9Sstevel@tonic-gate if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0) 1901*7c478bd9Sstevel@tonic-gate goto err; 1902*7c478bd9Sstevel@tonic-gate 1903*7c478bd9Sstevel@tonic-gate /* Get a write-locked stack for that page. */ 1904*7c478bd9Sstevel@tonic-gate exact = 0; 1905*7c478bd9Sstevel@tonic-gate ret = __bam_search(dbc, &dbt, S_KEYFIRST, 1, NULL, &exact); 1906*7c478bd9Sstevel@tonic-gate 1907*7c478bd9Sstevel@tonic-gate /* We no longer need the key or the page. */ 1908*7c478bd9Sstevel@tonic-gate err: if (h != NULL) 1909*7c478bd9Sstevel@tonic-gate (void)memp_fput(dbp->mpf, h, 0); 1910*7c478bd9Sstevel@tonic-gate if (dbt.data != NULL) 1911*7c478bd9Sstevel@tonic-gate __os_free(dbt.data, dbt.size); 1912*7c478bd9Sstevel@tonic-gate return (ret); 1913*7c478bd9Sstevel@tonic-gate } 1914