1*7c478bd9Sstevel@tonic-gate /*- 2*7c478bd9Sstevel@tonic-gate * See the file LICENSE for redistribution information. 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * Copyright (c) 1997, 1998 5*7c478bd9Sstevel@tonic-gate * Sleepycat Software. All rights reserved. 6*7c478bd9Sstevel@tonic-gate */ 7*7c478bd9Sstevel@tonic-gate 8*7c478bd9Sstevel@tonic-gate #include "config.h" 9*7c478bd9Sstevel@tonic-gate 10*7c478bd9Sstevel@tonic-gate #ifndef lint 11*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)bt_recno.c 10.53 (Sleepycat) 12/11/98"; 12*7c478bd9Sstevel@tonic-gate #endif /* not lint */ 13*7c478bd9Sstevel@tonic-gate 14*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES 15*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 16*7c478bd9Sstevel@tonic-gate 17*7c478bd9Sstevel@tonic-gate #include <errno.h> 18*7c478bd9Sstevel@tonic-gate #include <limits.h> 19*7c478bd9Sstevel@tonic-gate #include <string.h> 20*7c478bd9Sstevel@tonic-gate #endif 21*7c478bd9Sstevel@tonic-gate 22*7c478bd9Sstevel@tonic-gate #include "db_int.h" 23*7c478bd9Sstevel@tonic-gate #include "db_page.h" 24*7c478bd9Sstevel@tonic-gate #include "btree.h" 25*7c478bd9Sstevel@tonic-gate #include "db_ext.h" 26*7c478bd9Sstevel@tonic-gate #include "shqueue.h" 27*7c478bd9Sstevel@tonic-gate #include "db_shash.h" 28*7c478bd9Sstevel@tonic-gate #include "lock.h" 29*7c478bd9Sstevel@tonic-gate #include "lock_ext.h" 30*7c478bd9Sstevel@tonic-gate 31*7c478bd9Sstevel@tonic-gate static int __ram_add __P((DBC *, db_recno_t *, DBT *, u_int32_t, u_int32_t)); 32*7c478bd9Sstevel@tonic-gate static int __ram_delete __P((DB *, DB_TXN *, DBT *, u_int32_t)); 33*7c478bd9Sstevel@tonic-gate static int __ram_fmap __P((DBC *, db_recno_t)); 34*7c478bd9Sstevel@tonic-gate static int __ram_i_delete __P((DBC *)); 35*7c478bd9Sstevel@tonic-gate static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); 36*7c478bd9Sstevel@tonic-gate static int __ram_source __P((DB *, RECNO *, const char *)); 37*7c478bd9Sstevel@tonic-gate static int __ram_sync __P((DB *, u_int32_t)); 38*7c478bd9Sstevel@tonic-gate static int __ram_update __P((DBC *, db_recno_t, int)); 39*7c478bd9Sstevel@tonic-gate static int __ram_vmap __P((DBC *, db_recno_t)); 40*7c478bd9Sstevel@tonic-gate static int __ram_writeback __P((DBC *)); 41*7c478bd9Sstevel@tonic-gate 42*7c478bd9Sstevel@tonic-gate /* 43*7c478bd9Sstevel@tonic-gate * In recno, there are two meanings to the on-page "deleted" flag. If we're 44*7c478bd9Sstevel@tonic-gate * re-numbering records, it means the record was implicitly created. We skip 45*7c478bd9Sstevel@tonic-gate * over implicitly created records if doing a cursor "next" or "prev", and 46*7c478bd9Sstevel@tonic-gate * return DB_KEYEMPTY if they're explicitly requested.. If not re-numbering 47*7c478bd9Sstevel@tonic-gate * records, it means that the record was implicitly created, or was deleted. 48*7c478bd9Sstevel@tonic-gate * We skip over implicitly created or deleted records if doing a cursor "next" 49*7c478bd9Sstevel@tonic-gate * or "prev", and return DB_KEYEMPTY if they're explicitly requested. 50*7c478bd9Sstevel@tonic-gate * 51*7c478bd9Sstevel@tonic-gate * If we're re-numbering records, then we have to detect in the cursor that 52*7c478bd9Sstevel@tonic-gate * a record was deleted, and adjust the cursor as necessary on the next get. 53*7c478bd9Sstevel@tonic-gate * If we're not re-numbering records, then we can detect that a record has 54*7c478bd9Sstevel@tonic-gate * been deleted by looking at the actual on-page record, so we completely 55*7c478bd9Sstevel@tonic-gate * ignore the cursor's delete flag. This is different from the B+tree code. 56*7c478bd9Sstevel@tonic-gate * It also maintains whether the cursor references a deleted record in the 57*7c478bd9Sstevel@tonic-gate * cursor, and it doesn't always check the on-page value. 58*7c478bd9Sstevel@tonic-gate */ 59*7c478bd9Sstevel@tonic-gate #define CD_SET(dbp, cp) { \ 60*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_RENUMBER)) \ 61*7c478bd9Sstevel@tonic-gate F_SET(cp, C_DELETED); \ 62*7c478bd9Sstevel@tonic-gate } 63*7c478bd9Sstevel@tonic-gate #define CD_CLR(dbp, cp) { \ 64*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_RENUMBER)) \ 65*7c478bd9Sstevel@tonic-gate F_CLR(cp, C_DELETED); \ 66*7c478bd9Sstevel@tonic-gate } 67*7c478bd9Sstevel@tonic-gate #define CD_ISSET(dbp, cp) \ 68*7c478bd9Sstevel@tonic-gate (F_ISSET(dbp, DB_RE_RENUMBER) && F_ISSET(cp, C_DELETED)) 69*7c478bd9Sstevel@tonic-gate 70*7c478bd9Sstevel@tonic-gate /* 71*7c478bd9Sstevel@tonic-gate * __ram_open -- 72*7c478bd9Sstevel@tonic-gate * Recno open function. 73*7c478bd9Sstevel@tonic-gate * 74*7c478bd9Sstevel@tonic-gate * PUBLIC: int __ram_open __P((DB *, DB_INFO *)); 75*7c478bd9Sstevel@tonic-gate */ 76*7c478bd9Sstevel@tonic-gate int 77*7c478bd9Sstevel@tonic-gate __ram_open(dbp, dbinfo) 78*7c478bd9Sstevel@tonic-gate DB *dbp; 79*7c478bd9Sstevel@tonic-gate DB_INFO *dbinfo; 80*7c478bd9Sstevel@tonic-gate { 81*7c478bd9Sstevel@tonic-gate BTREE *t; 82*7c478bd9Sstevel@tonic-gate DBC *dbc; 83*7c478bd9Sstevel@tonic-gate RECNO *rp; 84*7c478bd9Sstevel@tonic-gate int ret, t_ret; 85*7c478bd9Sstevel@tonic-gate 86*7c478bd9Sstevel@tonic-gate /* Allocate and initialize the private btree structure. */ 87*7c478bd9Sstevel@tonic-gate if ((ret = __os_calloc(1, sizeof(BTREE), &t)) != 0) 88*7c478bd9Sstevel@tonic-gate return (ret); 89*7c478bd9Sstevel@tonic-gate dbp->internal = t; 90*7c478bd9Sstevel@tonic-gate __bam_setovflsize(dbp); 91*7c478bd9Sstevel@tonic-gate 92*7c478bd9Sstevel@tonic-gate /* Allocate and initialize the private recno structure. */ 93*7c478bd9Sstevel@tonic-gate if ((ret = __os_calloc(1, sizeof(*rp), &rp)) != 0) 94*7c478bd9Sstevel@tonic-gate return (ret); 95*7c478bd9Sstevel@tonic-gate /* Link in the private recno structure. */ 96*7c478bd9Sstevel@tonic-gate t->recno = rp; 97*7c478bd9Sstevel@tonic-gate 98*7c478bd9Sstevel@tonic-gate /* 99*7c478bd9Sstevel@tonic-gate * Intention is to make sure all of the user's selections are okay 100*7c478bd9Sstevel@tonic-gate * here and then use them without checking. 101*7c478bd9Sstevel@tonic-gate */ 102*7c478bd9Sstevel@tonic-gate if (dbinfo == NULL) { 103*7c478bd9Sstevel@tonic-gate rp->re_delim = '\n'; 104*7c478bd9Sstevel@tonic-gate rp->re_pad = ' '; 105*7c478bd9Sstevel@tonic-gate rp->re_fd = -1; 106*7c478bd9Sstevel@tonic-gate F_SET(rp, RECNO_EOF); 107*7c478bd9Sstevel@tonic-gate } else { 108*7c478bd9Sstevel@tonic-gate /* 109*7c478bd9Sstevel@tonic-gate * If the user specified a source tree, open it and map it in. 110*7c478bd9Sstevel@tonic-gate * 111*7c478bd9Sstevel@tonic-gate * !!! 112*7c478bd9Sstevel@tonic-gate * We don't complain if the user specified transactions or 113*7c478bd9Sstevel@tonic-gate * threads. It's possible to make it work, but you'd better 114*7c478bd9Sstevel@tonic-gate * know what you're doing! 115*7c478bd9Sstevel@tonic-gate */ 116*7c478bd9Sstevel@tonic-gate if (dbinfo->re_source == NULL) { 117*7c478bd9Sstevel@tonic-gate rp->re_fd = -1; 118*7c478bd9Sstevel@tonic-gate F_SET(rp, RECNO_EOF); 119*7c478bd9Sstevel@tonic-gate } else { 120*7c478bd9Sstevel@tonic-gate if ((ret = 121*7c478bd9Sstevel@tonic-gate __ram_source(dbp, rp, dbinfo->re_source)) != 0) 122*7c478bd9Sstevel@tonic-gate goto err; 123*7c478bd9Sstevel@tonic-gate } 124*7c478bd9Sstevel@tonic-gate 125*7c478bd9Sstevel@tonic-gate /* Copy delimiter, length and padding values. */ 126*7c478bd9Sstevel@tonic-gate rp->re_delim = 127*7c478bd9Sstevel@tonic-gate F_ISSET(dbp, DB_RE_DELIMITER) ? dbinfo->re_delim : '\n'; 128*7c478bd9Sstevel@tonic-gate rp->re_pad = F_ISSET(dbp, DB_RE_PAD) ? dbinfo->re_pad : ' '; 129*7c478bd9Sstevel@tonic-gate 130*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { 131*7c478bd9Sstevel@tonic-gate if ((rp->re_len = dbinfo->re_len) == 0) { 132*7c478bd9Sstevel@tonic-gate __db_err(dbp->dbenv, 133*7c478bd9Sstevel@tonic-gate "record length must be greater than 0"); 134*7c478bd9Sstevel@tonic-gate ret = EINVAL; 135*7c478bd9Sstevel@tonic-gate goto err; 136*7c478bd9Sstevel@tonic-gate } 137*7c478bd9Sstevel@tonic-gate } else 138*7c478bd9Sstevel@tonic-gate rp->re_len = 0; 139*7c478bd9Sstevel@tonic-gate } 140*7c478bd9Sstevel@tonic-gate 141*7c478bd9Sstevel@tonic-gate /* Initialize the remaining fields/methods of the DB. */ 142*7c478bd9Sstevel@tonic-gate dbp->am_close = __ram_close; 143*7c478bd9Sstevel@tonic-gate dbp->del = __ram_delete; 144*7c478bd9Sstevel@tonic-gate dbp->put = __ram_put; 145*7c478bd9Sstevel@tonic-gate dbp->stat = __bam_stat; 146*7c478bd9Sstevel@tonic-gate dbp->sync = __ram_sync; 147*7c478bd9Sstevel@tonic-gate 148*7c478bd9Sstevel@tonic-gate /* Start up the tree. */ 149*7c478bd9Sstevel@tonic-gate if ((ret = __bam_read_root(dbp)) != 0) 150*7c478bd9Sstevel@tonic-gate goto err; 151*7c478bd9Sstevel@tonic-gate 152*7c478bd9Sstevel@tonic-gate /* Set the overflow page size. */ 153*7c478bd9Sstevel@tonic-gate __bam_setovflsize(dbp); 154*7c478bd9Sstevel@tonic-gate 155*7c478bd9Sstevel@tonic-gate /* If we're snapshotting an underlying source file, do it now. */ 156*7c478bd9Sstevel@tonic-gate if (dbinfo != NULL && F_ISSET(dbinfo, DB_SNAPSHOT)) { 157*7c478bd9Sstevel@tonic-gate /* Allocate a cursor. */ 158*7c478bd9Sstevel@tonic-gate if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) 159*7c478bd9Sstevel@tonic-gate goto err; 160*7c478bd9Sstevel@tonic-gate 161*7c478bd9Sstevel@tonic-gate /* Do the snapshot. */ 162*7c478bd9Sstevel@tonic-gate if ((ret = __ram_update(dbc, 163*7c478bd9Sstevel@tonic-gate DB_MAX_RECORDS, 0)) != 0 && ret == DB_NOTFOUND) 164*7c478bd9Sstevel@tonic-gate ret = 0; 165*7c478bd9Sstevel@tonic-gate 166*7c478bd9Sstevel@tonic-gate /* Discard the cursor. */ 167*7c478bd9Sstevel@tonic-gate if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) 168*7c478bd9Sstevel@tonic-gate ret = t_ret; 169*7c478bd9Sstevel@tonic-gate 170*7c478bd9Sstevel@tonic-gate if (ret != 0) 171*7c478bd9Sstevel@tonic-gate goto err; 172*7c478bd9Sstevel@tonic-gate } 173*7c478bd9Sstevel@tonic-gate 174*7c478bd9Sstevel@tonic-gate return (0); 175*7c478bd9Sstevel@tonic-gate 176*7c478bd9Sstevel@tonic-gate err: /* If we mmap'd a source file, discard it. */ 177*7c478bd9Sstevel@tonic-gate if (rp->re_smap != NULL) 178*7c478bd9Sstevel@tonic-gate (void)__db_unmapfile(rp->re_smap, rp->re_msize); 179*7c478bd9Sstevel@tonic-gate 180*7c478bd9Sstevel@tonic-gate /* If we opened a source file, discard it. */ 181*7c478bd9Sstevel@tonic-gate if (rp->re_fd != -1) 182*7c478bd9Sstevel@tonic-gate (void)__os_close(rp->re_fd); 183*7c478bd9Sstevel@tonic-gate if (rp->re_source != NULL) 184*7c478bd9Sstevel@tonic-gate __os_freestr(rp->re_source); 185*7c478bd9Sstevel@tonic-gate 186*7c478bd9Sstevel@tonic-gate __os_free(rp, sizeof(*rp)); 187*7c478bd9Sstevel@tonic-gate 188*7c478bd9Sstevel@tonic-gate return (ret); 189*7c478bd9Sstevel@tonic-gate } 190*7c478bd9Sstevel@tonic-gate 191*7c478bd9Sstevel@tonic-gate /* 192*7c478bd9Sstevel@tonic-gate * __ram_delete -- 193*7c478bd9Sstevel@tonic-gate * Recno db->del function. 194*7c478bd9Sstevel@tonic-gate */ 195*7c478bd9Sstevel@tonic-gate static int 196*7c478bd9Sstevel@tonic-gate __ram_delete(dbp, txn, key, flags) 197*7c478bd9Sstevel@tonic-gate DB *dbp; 198*7c478bd9Sstevel@tonic-gate DB_TXN *txn; 199*7c478bd9Sstevel@tonic-gate DBT *key; 200*7c478bd9Sstevel@tonic-gate u_int32_t flags; 201*7c478bd9Sstevel@tonic-gate { 202*7c478bd9Sstevel@tonic-gate CURSOR *cp; 203*7c478bd9Sstevel@tonic-gate DBC *dbc; 204*7c478bd9Sstevel@tonic-gate db_recno_t recno; 205*7c478bd9Sstevel@tonic-gate int ret, t_ret; 206*7c478bd9Sstevel@tonic-gate 207*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp); 208*7c478bd9Sstevel@tonic-gate 209*7c478bd9Sstevel@tonic-gate /* Check for invalid flags. */ 210*7c478bd9Sstevel@tonic-gate if ((ret = __db_delchk(dbp, 211*7c478bd9Sstevel@tonic-gate key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0) 212*7c478bd9Sstevel@tonic-gate return (ret); 213*7c478bd9Sstevel@tonic-gate 214*7c478bd9Sstevel@tonic-gate /* Acquire a cursor. */ 215*7c478bd9Sstevel@tonic-gate if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) 216*7c478bd9Sstevel@tonic-gate return (ret); 217*7c478bd9Sstevel@tonic-gate 218*7c478bd9Sstevel@tonic-gate DEBUG_LWRITE(dbc, txn, "ram_delete", key, NULL, flags); 219*7c478bd9Sstevel@tonic-gate 220*7c478bd9Sstevel@tonic-gate /* Check the user's record number and fill in as necessary. */ 221*7c478bd9Sstevel@tonic-gate if ((ret = __ram_getno(dbc, key, &recno, 0)) != 0) 222*7c478bd9Sstevel@tonic-gate goto err; 223*7c478bd9Sstevel@tonic-gate 224*7c478bd9Sstevel@tonic-gate /* Do the delete. */ 225*7c478bd9Sstevel@tonic-gate cp = dbc->internal; 226*7c478bd9Sstevel@tonic-gate cp->recno = recno; 227*7c478bd9Sstevel@tonic-gate ret = __ram_i_delete(dbc); 228*7c478bd9Sstevel@tonic-gate 229*7c478bd9Sstevel@tonic-gate /* Release the cursor. */ 230*7c478bd9Sstevel@tonic-gate err: if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) 231*7c478bd9Sstevel@tonic-gate ret = t_ret; 232*7c478bd9Sstevel@tonic-gate 233*7c478bd9Sstevel@tonic-gate return (ret); 234*7c478bd9Sstevel@tonic-gate } 235*7c478bd9Sstevel@tonic-gate 236*7c478bd9Sstevel@tonic-gate /* 237*7c478bd9Sstevel@tonic-gate * __ram_i_delete -- 238*7c478bd9Sstevel@tonic-gate * Internal version of recno delete, called by __ram_delete and 239*7c478bd9Sstevel@tonic-gate * __ram_c_del. 240*7c478bd9Sstevel@tonic-gate */ 241*7c478bd9Sstevel@tonic-gate static int 242*7c478bd9Sstevel@tonic-gate __ram_i_delete(dbc) 243*7c478bd9Sstevel@tonic-gate DBC *dbc; 244*7c478bd9Sstevel@tonic-gate { 245*7c478bd9Sstevel@tonic-gate BKEYDATA bk; 246*7c478bd9Sstevel@tonic-gate BTREE *t; 247*7c478bd9Sstevel@tonic-gate CURSOR *cp; 248*7c478bd9Sstevel@tonic-gate DB *dbp; 249*7c478bd9Sstevel@tonic-gate DBT hdr, data; 250*7c478bd9Sstevel@tonic-gate PAGE *h; 251*7c478bd9Sstevel@tonic-gate db_indx_t indx; 252*7c478bd9Sstevel@tonic-gate int exact, ret, stack; 253*7c478bd9Sstevel@tonic-gate 254*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 255*7c478bd9Sstevel@tonic-gate cp = dbc->internal; 256*7c478bd9Sstevel@tonic-gate t = dbp->internal; 257*7c478bd9Sstevel@tonic-gate stack = 0; 258*7c478bd9Sstevel@tonic-gate 259*7c478bd9Sstevel@tonic-gate /* 260*7c478bd9Sstevel@tonic-gate * If this is CDB and this isn't a write cursor, then it's an error. 261*7c478bd9Sstevel@tonic-gate * If it is a write cursor, but we don't yet hold the write lock, then 262*7c478bd9Sstevel@tonic-gate * we need to upgrade to the write lock. 263*7c478bd9Sstevel@tonic-gate */ 264*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB)) { 265*7c478bd9Sstevel@tonic-gate /* Make sure it's a valid update cursor. */ 266*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER)) 267*7c478bd9Sstevel@tonic-gate return (EINVAL); 268*7c478bd9Sstevel@tonic-gate 269*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbc, DBC_RMW) && 270*7c478bd9Sstevel@tonic-gate (ret = lock_get(dbp->dbenv->lk_info, dbc->locker, 271*7c478bd9Sstevel@tonic-gate DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, 272*7c478bd9Sstevel@tonic-gate &dbc->mylock)) != 0) 273*7c478bd9Sstevel@tonic-gate return (EAGAIN); 274*7c478bd9Sstevel@tonic-gate } 275*7c478bd9Sstevel@tonic-gate 276*7c478bd9Sstevel@tonic-gate /* Search the tree for the key; delete only deletes exact matches. */ 277*7c478bd9Sstevel@tonic-gate if ((ret = __bam_rsearch(dbc, &cp->recno, S_DELETE, 1, &exact)) != 0) 278*7c478bd9Sstevel@tonic-gate goto err; 279*7c478bd9Sstevel@tonic-gate if (!exact) { 280*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND; 281*7c478bd9Sstevel@tonic-gate goto err; 282*7c478bd9Sstevel@tonic-gate } 283*7c478bd9Sstevel@tonic-gate stack = 1; 284*7c478bd9Sstevel@tonic-gate 285*7c478bd9Sstevel@tonic-gate h = cp->csp->page; 286*7c478bd9Sstevel@tonic-gate indx = cp->csp->indx; 287*7c478bd9Sstevel@tonic-gate 288*7c478bd9Sstevel@tonic-gate /* 289*7c478bd9Sstevel@tonic-gate * If re-numbering records, the on-page deleted flag can only mean 290*7c478bd9Sstevel@tonic-gate * that this record was implicitly created. Applications aren't 291*7c478bd9Sstevel@tonic-gate * permitted to delete records they never created, return an error. 292*7c478bd9Sstevel@tonic-gate * 293*7c478bd9Sstevel@tonic-gate * If not re-numbering records, the on-page deleted flag means that 294*7c478bd9Sstevel@tonic-gate * this record was implicitly created, or, was deleted at some time. 295*7c478bd9Sstevel@tonic-gate * The former is an error because applications aren't permitted to 296*7c478bd9Sstevel@tonic-gate * delete records they never created, the latter is an error because 297*7c478bd9Sstevel@tonic-gate * if the record was "deleted", we could never have found it. 298*7c478bd9Sstevel@tonic-gate */ 299*7c478bd9Sstevel@tonic-gate if (B_DISSET(GET_BKEYDATA(h, indx)->type)) { 300*7c478bd9Sstevel@tonic-gate ret = DB_KEYEMPTY; 301*7c478bd9Sstevel@tonic-gate goto err; 302*7c478bd9Sstevel@tonic-gate } 303*7c478bd9Sstevel@tonic-gate 304*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_RENUMBER)) { 305*7c478bd9Sstevel@tonic-gate /* Delete the item, adjust the counts, adjust the cursors. */ 306*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ditem(dbc, h, indx)) != 0) 307*7c478bd9Sstevel@tonic-gate goto err; 308*7c478bd9Sstevel@tonic-gate __bam_adjust(dbc, -1); 309*7c478bd9Sstevel@tonic-gate __ram_ca(dbp, cp->recno, CA_DELETE); 310*7c478bd9Sstevel@tonic-gate 311*7c478bd9Sstevel@tonic-gate /* 312*7c478bd9Sstevel@tonic-gate * If the page is empty, delete it. The whole tree is locked 313*7c478bd9Sstevel@tonic-gate * so there are no preparations to make. 314*7c478bd9Sstevel@tonic-gate */ 315*7c478bd9Sstevel@tonic-gate if (NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT) { 316*7c478bd9Sstevel@tonic-gate stack = 0; 317*7c478bd9Sstevel@tonic-gate ret = __bam_dpages(dbc); 318*7c478bd9Sstevel@tonic-gate } 319*7c478bd9Sstevel@tonic-gate } else { 320*7c478bd9Sstevel@tonic-gate /* Use a delete/put pair to replace the record with a marker. */ 321*7c478bd9Sstevel@tonic-gate if ((ret = __bam_ditem(dbc, h, indx)) != 0) 322*7c478bd9Sstevel@tonic-gate goto err; 323*7c478bd9Sstevel@tonic-gate 324*7c478bd9Sstevel@tonic-gate B_TSET(bk.type, B_KEYDATA, 1); 325*7c478bd9Sstevel@tonic-gate bk.len = 0; 326*7c478bd9Sstevel@tonic-gate memset(&hdr, 0, sizeof(hdr)); 327*7c478bd9Sstevel@tonic-gate hdr.data = &bk; 328*7c478bd9Sstevel@tonic-gate hdr.size = SSZA(BKEYDATA, data); 329*7c478bd9Sstevel@tonic-gate memset(&data, 0, sizeof(data)); 330*7c478bd9Sstevel@tonic-gate data.data = (char *)""; 331*7c478bd9Sstevel@tonic-gate data.size = 0; 332*7c478bd9Sstevel@tonic-gate if ((ret = __db_pitem(dbc, 333*7c478bd9Sstevel@tonic-gate h, indx, BKEYDATA_SIZE(0), &hdr, &data)) != 0) 334*7c478bd9Sstevel@tonic-gate goto err; 335*7c478bd9Sstevel@tonic-gate } 336*7c478bd9Sstevel@tonic-gate F_SET(t->recno, RECNO_MODIFIED); 337*7c478bd9Sstevel@tonic-gate 338*7c478bd9Sstevel@tonic-gate err: if (stack) 339*7c478bd9Sstevel@tonic-gate __bam_stkrel(dbc, 0); 340*7c478bd9Sstevel@tonic-gate 341*7c478bd9Sstevel@tonic-gate /* If we upgraded the CDB lock upon entry; downgrade it now. */ 342*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW)) 343*7c478bd9Sstevel@tonic-gate (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock, 344*7c478bd9Sstevel@tonic-gate DB_LOCK_IWRITE, 0); 345*7c478bd9Sstevel@tonic-gate return (ret); 346*7c478bd9Sstevel@tonic-gate } 347*7c478bd9Sstevel@tonic-gate 348*7c478bd9Sstevel@tonic-gate /* 349*7c478bd9Sstevel@tonic-gate * __ram_put -- 350*7c478bd9Sstevel@tonic-gate * Recno db->put function. 351*7c478bd9Sstevel@tonic-gate */ 352*7c478bd9Sstevel@tonic-gate static int 353*7c478bd9Sstevel@tonic-gate __ram_put(dbp, txn, key, data, flags) 354*7c478bd9Sstevel@tonic-gate DB *dbp; 355*7c478bd9Sstevel@tonic-gate DB_TXN *txn; 356*7c478bd9Sstevel@tonic-gate DBT *key, *data; 357*7c478bd9Sstevel@tonic-gate u_int32_t flags; 358*7c478bd9Sstevel@tonic-gate { 359*7c478bd9Sstevel@tonic-gate DBC *dbc; 360*7c478bd9Sstevel@tonic-gate db_recno_t recno; 361*7c478bd9Sstevel@tonic-gate int ret, t_ret; 362*7c478bd9Sstevel@tonic-gate 363*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp); 364*7c478bd9Sstevel@tonic-gate 365*7c478bd9Sstevel@tonic-gate /* Check for invalid flags. */ 366*7c478bd9Sstevel@tonic-gate if ((ret = __db_putchk(dbp, 367*7c478bd9Sstevel@tonic-gate key, data, flags, F_ISSET(dbp, DB_AM_RDONLY), 0)) != 0) 368*7c478bd9Sstevel@tonic-gate return (ret); 369*7c478bd9Sstevel@tonic-gate 370*7c478bd9Sstevel@tonic-gate /* Allocate a cursor. */ 371*7c478bd9Sstevel@tonic-gate if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0) 372*7c478bd9Sstevel@tonic-gate return (ret); 373*7c478bd9Sstevel@tonic-gate 374*7c478bd9Sstevel@tonic-gate DEBUG_LWRITE(dbc, txn, "ram_put", key, data, flags); 375*7c478bd9Sstevel@tonic-gate 376*7c478bd9Sstevel@tonic-gate /* 377*7c478bd9Sstevel@tonic-gate * If we're appending to the tree, make sure we've read in all of 378*7c478bd9Sstevel@tonic-gate * the backing source file. Otherwise, check the user's record 379*7c478bd9Sstevel@tonic-gate * number and fill in as necessary. 380*7c478bd9Sstevel@tonic-gate */ 381*7c478bd9Sstevel@tonic-gate ret = flags == DB_APPEND ? 382*7c478bd9Sstevel@tonic-gate __ram_update(dbc, DB_MAX_RECORDS, 0) : 383*7c478bd9Sstevel@tonic-gate __ram_getno(dbc, key, &recno, 1); 384*7c478bd9Sstevel@tonic-gate 385*7c478bd9Sstevel@tonic-gate /* Add the record. */ 386*7c478bd9Sstevel@tonic-gate if (ret == 0) 387*7c478bd9Sstevel@tonic-gate ret = __ram_add(dbc, &recno, data, flags, 0); 388*7c478bd9Sstevel@tonic-gate 389*7c478bd9Sstevel@tonic-gate /* Discard the cursor. */ 390*7c478bd9Sstevel@tonic-gate if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) 391*7c478bd9Sstevel@tonic-gate ret = t_ret; 392*7c478bd9Sstevel@tonic-gate 393*7c478bd9Sstevel@tonic-gate /* Return the record number if we're appending to the tree. */ 394*7c478bd9Sstevel@tonic-gate if (ret == 0 && flags == DB_APPEND) 395*7c478bd9Sstevel@tonic-gate *(db_recno_t *)key->data = recno; 396*7c478bd9Sstevel@tonic-gate 397*7c478bd9Sstevel@tonic-gate return (ret); 398*7c478bd9Sstevel@tonic-gate } 399*7c478bd9Sstevel@tonic-gate 400*7c478bd9Sstevel@tonic-gate /* 401*7c478bd9Sstevel@tonic-gate * __ram_sync -- 402*7c478bd9Sstevel@tonic-gate * Recno db->sync function. 403*7c478bd9Sstevel@tonic-gate */ 404*7c478bd9Sstevel@tonic-gate static int 405*7c478bd9Sstevel@tonic-gate __ram_sync(dbp, flags) 406*7c478bd9Sstevel@tonic-gate DB *dbp; 407*7c478bd9Sstevel@tonic-gate u_int32_t flags; 408*7c478bd9Sstevel@tonic-gate { 409*7c478bd9Sstevel@tonic-gate DBC *dbc; 410*7c478bd9Sstevel@tonic-gate int ret, t_ret; 411*7c478bd9Sstevel@tonic-gate 412*7c478bd9Sstevel@tonic-gate /* 413*7c478bd9Sstevel@tonic-gate * Sync the underlying btree. 414*7c478bd9Sstevel@tonic-gate * 415*7c478bd9Sstevel@tonic-gate * !!! 416*7c478bd9Sstevel@tonic-gate * We don't need to do a panic check or flags check, the "real" 417*7c478bd9Sstevel@tonic-gate * sync function does all that for us. 418*7c478bd9Sstevel@tonic-gate */ 419*7c478bd9Sstevel@tonic-gate if ((ret = __db_sync(dbp, flags)) != 0) 420*7c478bd9Sstevel@tonic-gate return (ret); 421*7c478bd9Sstevel@tonic-gate 422*7c478bd9Sstevel@tonic-gate /* Allocate a cursor. */ 423*7c478bd9Sstevel@tonic-gate if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0) 424*7c478bd9Sstevel@tonic-gate return (ret); 425*7c478bd9Sstevel@tonic-gate 426*7c478bd9Sstevel@tonic-gate DEBUG_LWRITE(dbc, NULL, "ram_sync", NULL, NULL, flags); 427*7c478bd9Sstevel@tonic-gate 428*7c478bd9Sstevel@tonic-gate /* Copy back the backing source file. */ 429*7c478bd9Sstevel@tonic-gate ret = __ram_writeback(dbc); 430*7c478bd9Sstevel@tonic-gate 431*7c478bd9Sstevel@tonic-gate /* Discard the cursor. */ 432*7c478bd9Sstevel@tonic-gate if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0) 433*7c478bd9Sstevel@tonic-gate ret = t_ret; 434*7c478bd9Sstevel@tonic-gate 435*7c478bd9Sstevel@tonic-gate return (ret); 436*7c478bd9Sstevel@tonic-gate } 437*7c478bd9Sstevel@tonic-gate 438*7c478bd9Sstevel@tonic-gate /* 439*7c478bd9Sstevel@tonic-gate * __ram_close -- 440*7c478bd9Sstevel@tonic-gate * Recno db->close function. 441*7c478bd9Sstevel@tonic-gate * 442*7c478bd9Sstevel@tonic-gate * PUBLIC: int __ram_close __P((DB *)); 443*7c478bd9Sstevel@tonic-gate */ 444*7c478bd9Sstevel@tonic-gate int 445*7c478bd9Sstevel@tonic-gate __ram_close(dbp) 446*7c478bd9Sstevel@tonic-gate DB *dbp; 447*7c478bd9Sstevel@tonic-gate { 448*7c478bd9Sstevel@tonic-gate RECNO *rp; 449*7c478bd9Sstevel@tonic-gate 450*7c478bd9Sstevel@tonic-gate rp = ((BTREE *)dbp->internal)->recno; 451*7c478bd9Sstevel@tonic-gate 452*7c478bd9Sstevel@tonic-gate /* Close any underlying mmap region. */ 453*7c478bd9Sstevel@tonic-gate if (rp->re_smap != NULL) 454*7c478bd9Sstevel@tonic-gate (void)__db_unmapfile(rp->re_smap, rp->re_msize); 455*7c478bd9Sstevel@tonic-gate 456*7c478bd9Sstevel@tonic-gate /* Close any backing source file descriptor. */ 457*7c478bd9Sstevel@tonic-gate if (rp->re_fd != -1) 458*7c478bd9Sstevel@tonic-gate (void)__os_close(rp->re_fd); 459*7c478bd9Sstevel@tonic-gate 460*7c478bd9Sstevel@tonic-gate /* Free any backing source file name. */ 461*7c478bd9Sstevel@tonic-gate if (rp->re_source != NULL) 462*7c478bd9Sstevel@tonic-gate __os_freestr(rp->re_source); 463*7c478bd9Sstevel@tonic-gate 464*7c478bd9Sstevel@tonic-gate /* Free allocated memory. */ 465*7c478bd9Sstevel@tonic-gate __os_free(rp, sizeof(RECNO)); 466*7c478bd9Sstevel@tonic-gate ((BTREE *)dbp->internal)->recno = NULL; 467*7c478bd9Sstevel@tonic-gate 468*7c478bd9Sstevel@tonic-gate /* Close the underlying btree. */ 469*7c478bd9Sstevel@tonic-gate return (__bam_close(dbp)); 470*7c478bd9Sstevel@tonic-gate } 471*7c478bd9Sstevel@tonic-gate 472*7c478bd9Sstevel@tonic-gate /* 473*7c478bd9Sstevel@tonic-gate * __ram_c_del -- 474*7c478bd9Sstevel@tonic-gate * Recno cursor->c_del function. 475*7c478bd9Sstevel@tonic-gate * 476*7c478bd9Sstevel@tonic-gate * PUBLIC: int __ram_c_del __P((DBC *, u_int32_t)); 477*7c478bd9Sstevel@tonic-gate */ 478*7c478bd9Sstevel@tonic-gate int 479*7c478bd9Sstevel@tonic-gate __ram_c_del(dbc, flags) 480*7c478bd9Sstevel@tonic-gate DBC *dbc; 481*7c478bd9Sstevel@tonic-gate u_int32_t flags; 482*7c478bd9Sstevel@tonic-gate { 483*7c478bd9Sstevel@tonic-gate CURSOR *cp; 484*7c478bd9Sstevel@tonic-gate DB *dbp; 485*7c478bd9Sstevel@tonic-gate int ret; 486*7c478bd9Sstevel@tonic-gate 487*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 488*7c478bd9Sstevel@tonic-gate cp = dbc->internal; 489*7c478bd9Sstevel@tonic-gate 490*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp); 491*7c478bd9Sstevel@tonic-gate 492*7c478bd9Sstevel@tonic-gate /* Check for invalid flags. */ 493*7c478bd9Sstevel@tonic-gate if ((ret = __db_cdelchk(dbp, flags, 494*7c478bd9Sstevel@tonic-gate F_ISSET(dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0) 495*7c478bd9Sstevel@tonic-gate return (ret); 496*7c478bd9Sstevel@tonic-gate 497*7c478bd9Sstevel@tonic-gate DEBUG_LWRITE(dbc, dbc->txn, "ram_c_del", NULL, NULL, flags); 498*7c478bd9Sstevel@tonic-gate 499*7c478bd9Sstevel@tonic-gate /* 500*7c478bd9Sstevel@tonic-gate * If we are running CDB, this had better be either a write 501*7c478bd9Sstevel@tonic-gate * cursor or an immediate writer. 502*7c478bd9Sstevel@tonic-gate */ 503*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB)) 504*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER)) 505*7c478bd9Sstevel@tonic-gate return (EINVAL); 506*7c478bd9Sstevel@tonic-gate 507*7c478bd9Sstevel@tonic-gate /* 508*7c478bd9Sstevel@tonic-gate * The semantics of cursors during delete are as follows: if record 509*7c478bd9Sstevel@tonic-gate * numbers are mutable (DB_RE_RENUMBER is set), deleting a record 510*7c478bd9Sstevel@tonic-gate * causes the cursor to automatically point to the record immediately 511*7c478bd9Sstevel@tonic-gate * following. In this case it is possible to use a single cursor for 512*7c478bd9Sstevel@tonic-gate * repeated delete operations, without intervening operations. 513*7c478bd9Sstevel@tonic-gate * 514*7c478bd9Sstevel@tonic-gate * If record numbers are not mutable, then records are replaced with 515*7c478bd9Sstevel@tonic-gate * a marker containing a delete flag. If the record referenced by 516*7c478bd9Sstevel@tonic-gate * this cursor has already been deleted, we will detect that as part 517*7c478bd9Sstevel@tonic-gate * of the delete operation, and fail. 518*7c478bd9Sstevel@tonic-gate */ 519*7c478bd9Sstevel@tonic-gate return (__ram_i_delete(dbc)); 520*7c478bd9Sstevel@tonic-gate } 521*7c478bd9Sstevel@tonic-gate 522*7c478bd9Sstevel@tonic-gate /* 523*7c478bd9Sstevel@tonic-gate * __ram_c_get -- 524*7c478bd9Sstevel@tonic-gate * Recno cursor->c_get function. 525*7c478bd9Sstevel@tonic-gate * 526*7c478bd9Sstevel@tonic-gate * PUBLIC: int __ram_c_get __P((DBC *, DBT *, DBT *, u_int32_t)); 527*7c478bd9Sstevel@tonic-gate */ 528*7c478bd9Sstevel@tonic-gate int 529*7c478bd9Sstevel@tonic-gate __ram_c_get(dbc, key, data, flags) 530*7c478bd9Sstevel@tonic-gate DBC *dbc; 531*7c478bd9Sstevel@tonic-gate DBT *key, *data; 532*7c478bd9Sstevel@tonic-gate u_int32_t flags; 533*7c478bd9Sstevel@tonic-gate { 534*7c478bd9Sstevel@tonic-gate CURSOR *cp, copy; 535*7c478bd9Sstevel@tonic-gate DB *dbp; 536*7c478bd9Sstevel@tonic-gate PAGE *h; 537*7c478bd9Sstevel@tonic-gate db_indx_t indx; 538*7c478bd9Sstevel@tonic-gate int exact, ret, stack, tmp_rmw; 539*7c478bd9Sstevel@tonic-gate 540*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 541*7c478bd9Sstevel@tonic-gate cp = dbc->internal; 542*7c478bd9Sstevel@tonic-gate 543*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp); 544*7c478bd9Sstevel@tonic-gate 545*7c478bd9Sstevel@tonic-gate /* Check for invalid flags. */ 546*7c478bd9Sstevel@tonic-gate if ((ret = __db_cgetchk(dbc->dbp, 547*7c478bd9Sstevel@tonic-gate key, data, flags, cp->recno != RECNO_OOB)) != 0) 548*7c478bd9Sstevel@tonic-gate return (ret); 549*7c478bd9Sstevel@tonic-gate 550*7c478bd9Sstevel@tonic-gate /* Clear OR'd in additional bits so we can check for flag equality. */ 551*7c478bd9Sstevel@tonic-gate tmp_rmw = 0; 552*7c478bd9Sstevel@tonic-gate if (LF_ISSET(DB_RMW)) { 553*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbp, DB_AM_CDB)) { 554*7c478bd9Sstevel@tonic-gate tmp_rmw = 1; 555*7c478bd9Sstevel@tonic-gate F_SET(dbc, DBC_RMW); 556*7c478bd9Sstevel@tonic-gate } 557*7c478bd9Sstevel@tonic-gate LF_CLR(DB_RMW); 558*7c478bd9Sstevel@tonic-gate } 559*7c478bd9Sstevel@tonic-gate 560*7c478bd9Sstevel@tonic-gate DEBUG_LREAD(dbc, dbc->txn, "ram_c_get", 561*7c478bd9Sstevel@tonic-gate flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags); 562*7c478bd9Sstevel@tonic-gate 563*7c478bd9Sstevel@tonic-gate /* Initialize the cursor for a new retrieval. */ 564*7c478bd9Sstevel@tonic-gate copy = *cp; 565*7c478bd9Sstevel@tonic-gate 566*7c478bd9Sstevel@tonic-gate retry: /* Update the record number. */ 567*7c478bd9Sstevel@tonic-gate stack = 0; 568*7c478bd9Sstevel@tonic-gate switch (flags) { 569*7c478bd9Sstevel@tonic-gate case DB_CURRENT: 570*7c478bd9Sstevel@tonic-gate /* 571*7c478bd9Sstevel@tonic-gate * If record numbers are mutable: if we just deleted a record, 572*7c478bd9Sstevel@tonic-gate * there is no action necessary, we return the record following 573*7c478bd9Sstevel@tonic-gate * the deleted item by virtue of renumbering the tree. 574*7c478bd9Sstevel@tonic-gate */ 575*7c478bd9Sstevel@tonic-gate break; 576*7c478bd9Sstevel@tonic-gate case DB_NEXT: 577*7c478bd9Sstevel@tonic-gate /* 578*7c478bd9Sstevel@tonic-gate * If record numbers are mutable: if we just deleted a record, 579*7c478bd9Sstevel@tonic-gate * we have to avoid incrementing the record number so that we 580*7c478bd9Sstevel@tonic-gate * return the right record by virtue of renumbering the tree. 581*7c478bd9Sstevel@tonic-gate */ 582*7c478bd9Sstevel@tonic-gate if (CD_ISSET(dbp, cp)) 583*7c478bd9Sstevel@tonic-gate break; 584*7c478bd9Sstevel@tonic-gate 585*7c478bd9Sstevel@tonic-gate if (cp->recno != RECNO_OOB) { 586*7c478bd9Sstevel@tonic-gate ++cp->recno; 587*7c478bd9Sstevel@tonic-gate break; 588*7c478bd9Sstevel@tonic-gate } 589*7c478bd9Sstevel@tonic-gate /* FALLTHROUGH */ 590*7c478bd9Sstevel@tonic-gate case DB_FIRST: 591*7c478bd9Sstevel@tonic-gate flags = DB_NEXT; 592*7c478bd9Sstevel@tonic-gate cp->recno = 1; 593*7c478bd9Sstevel@tonic-gate break; 594*7c478bd9Sstevel@tonic-gate case DB_PREV: 595*7c478bd9Sstevel@tonic-gate if (cp->recno != RECNO_OOB) { 596*7c478bd9Sstevel@tonic-gate if (cp->recno == 1) { 597*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND; 598*7c478bd9Sstevel@tonic-gate goto err; 599*7c478bd9Sstevel@tonic-gate } 600*7c478bd9Sstevel@tonic-gate --cp->recno; 601*7c478bd9Sstevel@tonic-gate break; 602*7c478bd9Sstevel@tonic-gate } 603*7c478bd9Sstevel@tonic-gate /* FALLTHROUGH */ 604*7c478bd9Sstevel@tonic-gate case DB_LAST: 605*7c478bd9Sstevel@tonic-gate flags = DB_PREV; 606*7c478bd9Sstevel@tonic-gate if (((ret = __ram_update(dbc, 607*7c478bd9Sstevel@tonic-gate DB_MAX_RECORDS, 0)) != 0) && ret != DB_NOTFOUND) 608*7c478bd9Sstevel@tonic-gate goto err; 609*7c478bd9Sstevel@tonic-gate if ((ret = __bam_nrecs(dbc, &cp->recno)) != 0) 610*7c478bd9Sstevel@tonic-gate goto err; 611*7c478bd9Sstevel@tonic-gate if (cp->recno == 0) { 612*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND; 613*7c478bd9Sstevel@tonic-gate goto err; 614*7c478bd9Sstevel@tonic-gate } 615*7c478bd9Sstevel@tonic-gate break; 616*7c478bd9Sstevel@tonic-gate case DB_SET: 617*7c478bd9Sstevel@tonic-gate case DB_SET_RANGE: 618*7c478bd9Sstevel@tonic-gate if ((ret = __ram_getno(dbc, key, &cp->recno, 0)) != 0) 619*7c478bd9Sstevel@tonic-gate goto err; 620*7c478bd9Sstevel@tonic-gate break; 621*7c478bd9Sstevel@tonic-gate } 622*7c478bd9Sstevel@tonic-gate 623*7c478bd9Sstevel@tonic-gate /* Return the key if the user didn't give us one. */ 624*7c478bd9Sstevel@tonic-gate if (flags != DB_SET && flags != DB_SET_RANGE && 625*7c478bd9Sstevel@tonic-gate (ret = __db_retcopy(key, &cp->recno, sizeof(cp->recno), 626*7c478bd9Sstevel@tonic-gate &dbc->rkey.data, &dbc->rkey.ulen, dbp->db_malloc)) != 0) 627*7c478bd9Sstevel@tonic-gate goto err; 628*7c478bd9Sstevel@tonic-gate 629*7c478bd9Sstevel@tonic-gate /* Search the tree for the record. */ 630*7c478bd9Sstevel@tonic-gate if ((ret = __bam_rsearch(dbc, &cp->recno, 631*7c478bd9Sstevel@tonic-gate F_ISSET(dbc, DBC_RMW) ? S_FIND_WR : S_FIND, 1, &exact)) != 0) 632*7c478bd9Sstevel@tonic-gate goto err; 633*7c478bd9Sstevel@tonic-gate stack = 1; 634*7c478bd9Sstevel@tonic-gate if (!exact) { 635*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND; 636*7c478bd9Sstevel@tonic-gate goto err; 637*7c478bd9Sstevel@tonic-gate } 638*7c478bd9Sstevel@tonic-gate h = cp->csp->page; 639*7c478bd9Sstevel@tonic-gate indx = cp->csp->indx; 640*7c478bd9Sstevel@tonic-gate 641*7c478bd9Sstevel@tonic-gate /* 642*7c478bd9Sstevel@tonic-gate * If re-numbering records, the on-page deleted flag means this record 643*7c478bd9Sstevel@tonic-gate * was implicitly created. If not re-numbering records, the on-page 644*7c478bd9Sstevel@tonic-gate * deleted flag means this record was implicitly created, or, it was 645*7c478bd9Sstevel@tonic-gate * deleted at some time. Regardless, we skip such records if doing 646*7c478bd9Sstevel@tonic-gate * cursor next/prev operations, and fail if the application requested 647*7c478bd9Sstevel@tonic-gate * them explicitly. 648*7c478bd9Sstevel@tonic-gate */ 649*7c478bd9Sstevel@tonic-gate if (B_DISSET(GET_BKEYDATA(h, indx)->type)) { 650*7c478bd9Sstevel@tonic-gate if (flags == DB_NEXT || flags == DB_PREV) { 651*7c478bd9Sstevel@tonic-gate (void)__bam_stkrel(dbc, 0); 652*7c478bd9Sstevel@tonic-gate goto retry; 653*7c478bd9Sstevel@tonic-gate } 654*7c478bd9Sstevel@tonic-gate ret = DB_KEYEMPTY; 655*7c478bd9Sstevel@tonic-gate goto err; 656*7c478bd9Sstevel@tonic-gate } 657*7c478bd9Sstevel@tonic-gate 658*7c478bd9Sstevel@tonic-gate /* Return the data item. */ 659*7c478bd9Sstevel@tonic-gate if ((ret = __db_ret(dbp, 660*7c478bd9Sstevel@tonic-gate h, indx, data, &dbc->rdata.data, &dbc->rdata.ulen)) != 0) 661*7c478bd9Sstevel@tonic-gate goto err; 662*7c478bd9Sstevel@tonic-gate 663*7c478bd9Sstevel@tonic-gate /* The cursor was reset, no further delete adjustment is necessary. */ 664*7c478bd9Sstevel@tonic-gate CD_CLR(dbp, cp); 665*7c478bd9Sstevel@tonic-gate 666*7c478bd9Sstevel@tonic-gate err: if (stack) 667*7c478bd9Sstevel@tonic-gate (void)__bam_stkrel(dbc, 0); 668*7c478bd9Sstevel@tonic-gate 669*7c478bd9Sstevel@tonic-gate /* Release temporary lock upgrade. */ 670*7c478bd9Sstevel@tonic-gate if (tmp_rmw) 671*7c478bd9Sstevel@tonic-gate F_CLR(dbc, DBC_RMW); 672*7c478bd9Sstevel@tonic-gate 673*7c478bd9Sstevel@tonic-gate if (ret != 0) 674*7c478bd9Sstevel@tonic-gate *cp = copy; 675*7c478bd9Sstevel@tonic-gate 676*7c478bd9Sstevel@tonic-gate return (ret); 677*7c478bd9Sstevel@tonic-gate } 678*7c478bd9Sstevel@tonic-gate 679*7c478bd9Sstevel@tonic-gate /* 680*7c478bd9Sstevel@tonic-gate * __ram_c_put -- 681*7c478bd9Sstevel@tonic-gate * Recno cursor->c_put function. 682*7c478bd9Sstevel@tonic-gate * 683*7c478bd9Sstevel@tonic-gate * PUBLIC: int __ram_c_put __P((DBC *, DBT *, DBT *, u_int32_t)); 684*7c478bd9Sstevel@tonic-gate */ 685*7c478bd9Sstevel@tonic-gate int 686*7c478bd9Sstevel@tonic-gate __ram_c_put(dbc, key, data, flags) 687*7c478bd9Sstevel@tonic-gate DBC *dbc; 688*7c478bd9Sstevel@tonic-gate DBT *key, *data; 689*7c478bd9Sstevel@tonic-gate u_int32_t flags; 690*7c478bd9Sstevel@tonic-gate { 691*7c478bd9Sstevel@tonic-gate CURSOR *cp, copy; 692*7c478bd9Sstevel@tonic-gate DB *dbp; 693*7c478bd9Sstevel@tonic-gate int exact, ret; 694*7c478bd9Sstevel@tonic-gate void *arg; 695*7c478bd9Sstevel@tonic-gate 696*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 697*7c478bd9Sstevel@tonic-gate cp = dbc->internal; 698*7c478bd9Sstevel@tonic-gate 699*7c478bd9Sstevel@tonic-gate DB_PANIC_CHECK(dbp); 700*7c478bd9Sstevel@tonic-gate 701*7c478bd9Sstevel@tonic-gate if ((ret = __db_cputchk(dbc->dbp, key, data, flags, 702*7c478bd9Sstevel@tonic-gate F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0) 703*7c478bd9Sstevel@tonic-gate return (ret); 704*7c478bd9Sstevel@tonic-gate 705*7c478bd9Sstevel@tonic-gate DEBUG_LWRITE(dbc, dbc->txn, "ram_c_put", NULL, data, flags); 706*7c478bd9Sstevel@tonic-gate 707*7c478bd9Sstevel@tonic-gate /* 708*7c478bd9Sstevel@tonic-gate * If we are running CDB, this had better be either a write 709*7c478bd9Sstevel@tonic-gate * cursor or an immediate writer. If it's a regular writer, 710*7c478bd9Sstevel@tonic-gate * that means we have an IWRITE lock and we need to upgrade 711*7c478bd9Sstevel@tonic-gate * it to a write lock. 712*7c478bd9Sstevel@tonic-gate */ 713*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_AM_CDB)) { 714*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER)) 715*7c478bd9Sstevel@tonic-gate return (EINVAL); 716*7c478bd9Sstevel@tonic-gate 717*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbc, DBC_RMW) && 718*7c478bd9Sstevel@tonic-gate (ret = lock_get(dbp->dbenv->lk_info, dbc->locker, 719*7c478bd9Sstevel@tonic-gate DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, 720*7c478bd9Sstevel@tonic-gate &dbc->mylock)) != 0) 721*7c478bd9Sstevel@tonic-gate return (EAGAIN); 722*7c478bd9Sstevel@tonic-gate } 723*7c478bd9Sstevel@tonic-gate 724*7c478bd9Sstevel@tonic-gate /* Initialize the cursor for a new retrieval. */ 725*7c478bd9Sstevel@tonic-gate copy = *cp; 726*7c478bd9Sstevel@tonic-gate 727*7c478bd9Sstevel@tonic-gate /* 728*7c478bd9Sstevel@tonic-gate * To split, we need a valid key for the page. Since it's a cursor, 729*7c478bd9Sstevel@tonic-gate * we have to build one. 730*7c478bd9Sstevel@tonic-gate * 731*7c478bd9Sstevel@tonic-gate * The split code discards all short-term locks and stack pages. 732*7c478bd9Sstevel@tonic-gate */ 733*7c478bd9Sstevel@tonic-gate if (0) { 734*7c478bd9Sstevel@tonic-gate split: arg = &cp->recno; 735*7c478bd9Sstevel@tonic-gate if ((ret = __bam_split(dbc, arg)) != 0) 736*7c478bd9Sstevel@tonic-gate goto err; 737*7c478bd9Sstevel@tonic-gate } 738*7c478bd9Sstevel@tonic-gate 739*7c478bd9Sstevel@tonic-gate if ((ret = __bam_rsearch(dbc, &cp->recno, S_INSERT, 1, &exact)) != 0) 740*7c478bd9Sstevel@tonic-gate goto err; 741*7c478bd9Sstevel@tonic-gate if (!exact) { 742*7c478bd9Sstevel@tonic-gate ret = DB_NOTFOUND; 743*7c478bd9Sstevel@tonic-gate goto err; 744*7c478bd9Sstevel@tonic-gate } 745*7c478bd9Sstevel@tonic-gate if ((ret = __bam_iitem(dbc, &cp->csp->page, 746*7c478bd9Sstevel@tonic-gate &cp->csp->indx, key, data, flags, 0)) == DB_NEEDSPLIT) { 747*7c478bd9Sstevel@tonic-gate if ((ret = __bam_stkrel(dbc, 0)) != 0) 748*7c478bd9Sstevel@tonic-gate goto err; 749*7c478bd9Sstevel@tonic-gate goto split; 750*7c478bd9Sstevel@tonic-gate } 751*7c478bd9Sstevel@tonic-gate if ((ret = __bam_stkrel(dbc, 0)) != 0) 752*7c478bd9Sstevel@tonic-gate goto err; 753*7c478bd9Sstevel@tonic-gate 754*7c478bd9Sstevel@tonic-gate switch (flags) { 755*7c478bd9Sstevel@tonic-gate case DB_AFTER: 756*7c478bd9Sstevel@tonic-gate /* Adjust the cursors. */ 757*7c478bd9Sstevel@tonic-gate __ram_ca(dbp, cp->recno, CA_IAFTER); 758*7c478bd9Sstevel@tonic-gate 759*7c478bd9Sstevel@tonic-gate /* Set this cursor to reference the new record. */ 760*7c478bd9Sstevel@tonic-gate cp->recno = copy.recno + 1; 761*7c478bd9Sstevel@tonic-gate break; 762*7c478bd9Sstevel@tonic-gate case DB_BEFORE: 763*7c478bd9Sstevel@tonic-gate /* Adjust the cursors. */ 764*7c478bd9Sstevel@tonic-gate __ram_ca(dbp, cp->recno, CA_IBEFORE); 765*7c478bd9Sstevel@tonic-gate 766*7c478bd9Sstevel@tonic-gate /* Set this cursor to reference the new record. */ 767*7c478bd9Sstevel@tonic-gate cp->recno = copy.recno; 768*7c478bd9Sstevel@tonic-gate break; 769*7c478bd9Sstevel@tonic-gate } 770*7c478bd9Sstevel@tonic-gate 771*7c478bd9Sstevel@tonic-gate /* The cursor was reset, no further delete adjustment is necessary. */ 772*7c478bd9Sstevel@tonic-gate CD_CLR(dbp, cp); 773*7c478bd9Sstevel@tonic-gate 774*7c478bd9Sstevel@tonic-gate err: if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW)) 775*7c478bd9Sstevel@tonic-gate (void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock, 776*7c478bd9Sstevel@tonic-gate DB_LOCK_IWRITE, 0); 777*7c478bd9Sstevel@tonic-gate 778*7c478bd9Sstevel@tonic-gate if (ret != 0) 779*7c478bd9Sstevel@tonic-gate *cp = copy; 780*7c478bd9Sstevel@tonic-gate 781*7c478bd9Sstevel@tonic-gate return (ret); 782*7c478bd9Sstevel@tonic-gate } 783*7c478bd9Sstevel@tonic-gate 784*7c478bd9Sstevel@tonic-gate /* 785*7c478bd9Sstevel@tonic-gate * __ram_ca -- 786*7c478bd9Sstevel@tonic-gate * Adjust cursors. 787*7c478bd9Sstevel@tonic-gate * 788*7c478bd9Sstevel@tonic-gate * PUBLIC: void __ram_ca __P((DB *, db_recno_t, ca_recno_arg)); 789*7c478bd9Sstevel@tonic-gate */ 790*7c478bd9Sstevel@tonic-gate void 791*7c478bd9Sstevel@tonic-gate __ram_ca(dbp, recno, op) 792*7c478bd9Sstevel@tonic-gate DB *dbp; 793*7c478bd9Sstevel@tonic-gate db_recno_t recno; 794*7c478bd9Sstevel@tonic-gate ca_recno_arg op; 795*7c478bd9Sstevel@tonic-gate { 796*7c478bd9Sstevel@tonic-gate CURSOR *cp; 797*7c478bd9Sstevel@tonic-gate DBC *dbc; 798*7c478bd9Sstevel@tonic-gate 799*7c478bd9Sstevel@tonic-gate /* 800*7c478bd9Sstevel@tonic-gate * Adjust the cursors. See the comment in __bam_ca_delete(). 801*7c478bd9Sstevel@tonic-gate */ 802*7c478bd9Sstevel@tonic-gate DB_THREAD_LOCK(dbp); 803*7c478bd9Sstevel@tonic-gate for (dbc = TAILQ_FIRST(&dbp->active_queue); 804*7c478bd9Sstevel@tonic-gate dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) { 805*7c478bd9Sstevel@tonic-gate cp = dbc->internal; 806*7c478bd9Sstevel@tonic-gate switch (op) { 807*7c478bd9Sstevel@tonic-gate case CA_DELETE: 808*7c478bd9Sstevel@tonic-gate if (recno > cp->recno) 809*7c478bd9Sstevel@tonic-gate --cp->recno; 810*7c478bd9Sstevel@tonic-gate if (recno == cp->recno) 811*7c478bd9Sstevel@tonic-gate CD_SET(dbp, cp); 812*7c478bd9Sstevel@tonic-gate break; 813*7c478bd9Sstevel@tonic-gate case CA_IAFTER: 814*7c478bd9Sstevel@tonic-gate if (recno > cp->recno) 815*7c478bd9Sstevel@tonic-gate ++cp->recno; 816*7c478bd9Sstevel@tonic-gate break; 817*7c478bd9Sstevel@tonic-gate case CA_IBEFORE: 818*7c478bd9Sstevel@tonic-gate if (recno >= cp->recno) 819*7c478bd9Sstevel@tonic-gate ++cp->recno; 820*7c478bd9Sstevel@tonic-gate break; 821*7c478bd9Sstevel@tonic-gate } 822*7c478bd9Sstevel@tonic-gate } 823*7c478bd9Sstevel@tonic-gate DB_THREAD_UNLOCK(dbp); 824*7c478bd9Sstevel@tonic-gate } 825*7c478bd9Sstevel@tonic-gate 826*7c478bd9Sstevel@tonic-gate /* 827*7c478bd9Sstevel@tonic-gate * __ram_getno -- 828*7c478bd9Sstevel@tonic-gate * Check the user's record number, and make sure we've seen it. 829*7c478bd9Sstevel@tonic-gate * 830*7c478bd9Sstevel@tonic-gate * PUBLIC: int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int)); 831*7c478bd9Sstevel@tonic-gate */ 832*7c478bd9Sstevel@tonic-gate int 833*7c478bd9Sstevel@tonic-gate __ram_getno(dbc, key, rep, can_create) 834*7c478bd9Sstevel@tonic-gate DBC *dbc; 835*7c478bd9Sstevel@tonic-gate const DBT *key; 836*7c478bd9Sstevel@tonic-gate db_recno_t *rep; 837*7c478bd9Sstevel@tonic-gate int can_create; 838*7c478bd9Sstevel@tonic-gate { 839*7c478bd9Sstevel@tonic-gate DB *dbp; 840*7c478bd9Sstevel@tonic-gate db_recno_t recno; 841*7c478bd9Sstevel@tonic-gate 842*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 843*7c478bd9Sstevel@tonic-gate 844*7c478bd9Sstevel@tonic-gate /* Check the user's record number. */ 845*7c478bd9Sstevel@tonic-gate if ((recno = *(db_recno_t *)key->data) == 0) { 846*7c478bd9Sstevel@tonic-gate __db_err(dbp->dbenv, "illegal record number of 0"); 847*7c478bd9Sstevel@tonic-gate return (EINVAL); 848*7c478bd9Sstevel@tonic-gate } 849*7c478bd9Sstevel@tonic-gate if (rep != NULL) 850*7c478bd9Sstevel@tonic-gate *rep = recno; 851*7c478bd9Sstevel@tonic-gate 852*7c478bd9Sstevel@tonic-gate /* 853*7c478bd9Sstevel@tonic-gate * Btree can neither create records nor read them in. Recno can 854*7c478bd9Sstevel@tonic-gate * do both, see if we can find the record. 855*7c478bd9Sstevel@tonic-gate */ 856*7c478bd9Sstevel@tonic-gate return (dbp->type == DB_RECNO ? 857*7c478bd9Sstevel@tonic-gate __ram_update(dbc, recno, can_create) : 0); 858*7c478bd9Sstevel@tonic-gate } 859*7c478bd9Sstevel@tonic-gate 860*7c478bd9Sstevel@tonic-gate /* 861*7c478bd9Sstevel@tonic-gate * __ram_update -- 862*7c478bd9Sstevel@tonic-gate * Ensure the tree has records up to and including the specified one. 863*7c478bd9Sstevel@tonic-gate */ 864*7c478bd9Sstevel@tonic-gate static int 865*7c478bd9Sstevel@tonic-gate __ram_update(dbc, recno, can_create) 866*7c478bd9Sstevel@tonic-gate DBC *dbc; 867*7c478bd9Sstevel@tonic-gate db_recno_t recno; 868*7c478bd9Sstevel@tonic-gate int can_create; 869*7c478bd9Sstevel@tonic-gate { 870*7c478bd9Sstevel@tonic-gate BTREE *t; 871*7c478bd9Sstevel@tonic-gate DB *dbp; 872*7c478bd9Sstevel@tonic-gate RECNO *rp; 873*7c478bd9Sstevel@tonic-gate db_recno_t nrecs; 874*7c478bd9Sstevel@tonic-gate int ret; 875*7c478bd9Sstevel@tonic-gate 876*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 877*7c478bd9Sstevel@tonic-gate t = dbp->internal; 878*7c478bd9Sstevel@tonic-gate rp = t->recno; 879*7c478bd9Sstevel@tonic-gate 880*7c478bd9Sstevel@tonic-gate /* 881*7c478bd9Sstevel@tonic-gate * If we can't create records and we've read the entire backing input 882*7c478bd9Sstevel@tonic-gate * file, we're done. 883*7c478bd9Sstevel@tonic-gate */ 884*7c478bd9Sstevel@tonic-gate if (!can_create && F_ISSET(rp, RECNO_EOF)) 885*7c478bd9Sstevel@tonic-gate return (0); 886*7c478bd9Sstevel@tonic-gate 887*7c478bd9Sstevel@tonic-gate /* 888*7c478bd9Sstevel@tonic-gate * If we haven't seen this record yet, try to get it from the original 889*7c478bd9Sstevel@tonic-gate * file. 890*7c478bd9Sstevel@tonic-gate */ 891*7c478bd9Sstevel@tonic-gate if ((ret = __bam_nrecs(dbc, &nrecs)) != 0) 892*7c478bd9Sstevel@tonic-gate return (ret); 893*7c478bd9Sstevel@tonic-gate if (!F_ISSET(rp, RECNO_EOF) && recno > nrecs) { 894*7c478bd9Sstevel@tonic-gate if ((ret = rp->re_irec(dbc, recno)) != 0) 895*7c478bd9Sstevel@tonic-gate return (ret); 896*7c478bd9Sstevel@tonic-gate if ((ret = __bam_nrecs(dbc, &nrecs)) != 0) 897*7c478bd9Sstevel@tonic-gate return (ret); 898*7c478bd9Sstevel@tonic-gate } 899*7c478bd9Sstevel@tonic-gate 900*7c478bd9Sstevel@tonic-gate /* 901*7c478bd9Sstevel@tonic-gate * If we can create records, create empty ones up to the requested 902*7c478bd9Sstevel@tonic-gate * record. 903*7c478bd9Sstevel@tonic-gate */ 904*7c478bd9Sstevel@tonic-gate if (!can_create || recno <= nrecs + 1) 905*7c478bd9Sstevel@tonic-gate return (0); 906*7c478bd9Sstevel@tonic-gate 907*7c478bd9Sstevel@tonic-gate dbc->rdata.dlen = 0; 908*7c478bd9Sstevel@tonic-gate dbc->rdata.doff = 0; 909*7c478bd9Sstevel@tonic-gate dbc->rdata.flags = 0; 910*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { 911*7c478bd9Sstevel@tonic-gate if (dbc->rdata.ulen < rp->re_len) { 912*7c478bd9Sstevel@tonic-gate if ((ret = 913*7c478bd9Sstevel@tonic-gate __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) { 914*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = 0; 915*7c478bd9Sstevel@tonic-gate dbc->rdata.data = NULL; 916*7c478bd9Sstevel@tonic-gate return (ret); 917*7c478bd9Sstevel@tonic-gate } 918*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = rp->re_len; 919*7c478bd9Sstevel@tonic-gate } 920*7c478bd9Sstevel@tonic-gate dbc->rdata.size = rp->re_len; 921*7c478bd9Sstevel@tonic-gate memset(dbc->rdata.data, rp->re_pad, rp->re_len); 922*7c478bd9Sstevel@tonic-gate } else 923*7c478bd9Sstevel@tonic-gate dbc->rdata.size = 0; 924*7c478bd9Sstevel@tonic-gate 925*7c478bd9Sstevel@tonic-gate while (recno > ++nrecs) 926*7c478bd9Sstevel@tonic-gate if ((ret = __ram_add(dbc, 927*7c478bd9Sstevel@tonic-gate &nrecs, &dbc->rdata, 0, BI_DELETED)) != 0) 928*7c478bd9Sstevel@tonic-gate return (ret); 929*7c478bd9Sstevel@tonic-gate return (0); 930*7c478bd9Sstevel@tonic-gate } 931*7c478bd9Sstevel@tonic-gate 932*7c478bd9Sstevel@tonic-gate /* 933*7c478bd9Sstevel@tonic-gate * __ram_source -- 934*7c478bd9Sstevel@tonic-gate * Load information about the backing file. 935*7c478bd9Sstevel@tonic-gate */ 936*7c478bd9Sstevel@tonic-gate static int 937*7c478bd9Sstevel@tonic-gate __ram_source(dbp, rp, fname) 938*7c478bd9Sstevel@tonic-gate DB *dbp; 939*7c478bd9Sstevel@tonic-gate RECNO *rp; 940*7c478bd9Sstevel@tonic-gate const char *fname; 941*7c478bd9Sstevel@tonic-gate { 942*7c478bd9Sstevel@tonic-gate size_t size; 943*7c478bd9Sstevel@tonic-gate u_int32_t bytes, mbytes, oflags; 944*7c478bd9Sstevel@tonic-gate int ret; 945*7c478bd9Sstevel@tonic-gate 946*7c478bd9Sstevel@tonic-gate /* 947*7c478bd9Sstevel@tonic-gate * !!! 948*7c478bd9Sstevel@tonic-gate * The caller has full responsibility for cleaning up on error -- 949*7c478bd9Sstevel@tonic-gate * (it has to anyway, in case it fails after this routine succeeds). 950*7c478bd9Sstevel@tonic-gate */ 951*7c478bd9Sstevel@tonic-gate if ((ret = __db_appname(dbp->dbenv, 952*7c478bd9Sstevel@tonic-gate DB_APP_DATA, NULL, fname, 0, NULL, &rp->re_source)) != 0) 953*7c478bd9Sstevel@tonic-gate return (ret); 954*7c478bd9Sstevel@tonic-gate 955*7c478bd9Sstevel@tonic-gate oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0; 956*7c478bd9Sstevel@tonic-gate if ((ret = 957*7c478bd9Sstevel@tonic-gate __db_open(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) { 958*7c478bd9Sstevel@tonic-gate __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret)); 959*7c478bd9Sstevel@tonic-gate return (ret); 960*7c478bd9Sstevel@tonic-gate } 961*7c478bd9Sstevel@tonic-gate 962*7c478bd9Sstevel@tonic-gate /* 963*7c478bd9Sstevel@tonic-gate * XXX 964*7c478bd9Sstevel@tonic-gate * We'd like to test to see if the file is too big to mmap. Since we 965*7c478bd9Sstevel@tonic-gate * don't know what size or type off_t's or size_t's are, or the largest 966*7c478bd9Sstevel@tonic-gate * unsigned integral type is, or what random insanity the local C 967*7c478bd9Sstevel@tonic-gate * compiler will perpetrate, doing the comparison in a portable way is 968*7c478bd9Sstevel@tonic-gate * flatly impossible. Hope that mmap fails if the file is too large. 969*7c478bd9Sstevel@tonic-gate */ 970*7c478bd9Sstevel@tonic-gate if ((ret = __os_ioinfo(rp->re_source, 971*7c478bd9Sstevel@tonic-gate rp->re_fd, &mbytes, &bytes, NULL)) != 0) { 972*7c478bd9Sstevel@tonic-gate __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret)); 973*7c478bd9Sstevel@tonic-gate return (ret); 974*7c478bd9Sstevel@tonic-gate } 975*7c478bd9Sstevel@tonic-gate if (mbytes == 0 && bytes == 0) { 976*7c478bd9Sstevel@tonic-gate F_SET(rp, RECNO_EOF); 977*7c478bd9Sstevel@tonic-gate return (0); 978*7c478bd9Sstevel@tonic-gate } 979*7c478bd9Sstevel@tonic-gate 980*7c478bd9Sstevel@tonic-gate size = mbytes * MEGABYTE + bytes; 981*7c478bd9Sstevel@tonic-gate if ((ret = __db_mapfile(rp->re_source, 982*7c478bd9Sstevel@tonic-gate rp->re_fd, (size_t)size, 1, &rp->re_smap)) != 0) 983*7c478bd9Sstevel@tonic-gate return (ret); 984*7c478bd9Sstevel@tonic-gate rp->re_cmap = rp->re_smap; 985*7c478bd9Sstevel@tonic-gate rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size); 986*7c478bd9Sstevel@tonic-gate rp->re_irec = F_ISSET(dbp, DB_RE_FIXEDLEN) ? __ram_fmap : __ram_vmap; 987*7c478bd9Sstevel@tonic-gate return (0); 988*7c478bd9Sstevel@tonic-gate } 989*7c478bd9Sstevel@tonic-gate 990*7c478bd9Sstevel@tonic-gate /* 991*7c478bd9Sstevel@tonic-gate * __ram_writeback -- 992*7c478bd9Sstevel@tonic-gate * Rewrite the backing file. 993*7c478bd9Sstevel@tonic-gate */ 994*7c478bd9Sstevel@tonic-gate static int 995*7c478bd9Sstevel@tonic-gate __ram_writeback(dbc) 996*7c478bd9Sstevel@tonic-gate DBC *dbc; 997*7c478bd9Sstevel@tonic-gate { 998*7c478bd9Sstevel@tonic-gate DB *dbp; 999*7c478bd9Sstevel@tonic-gate DBT key, data; 1000*7c478bd9Sstevel@tonic-gate RECNO *rp; 1001*7c478bd9Sstevel@tonic-gate db_recno_t keyno; 1002*7c478bd9Sstevel@tonic-gate ssize_t nw; 1003*7c478bd9Sstevel@tonic-gate int fd, ret, t_ret; 1004*7c478bd9Sstevel@tonic-gate u_int8_t delim, *pad; 1005*7c478bd9Sstevel@tonic-gate 1006*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 1007*7c478bd9Sstevel@tonic-gate rp = ((BTREE *)dbp->internal)->recno; 1008*7c478bd9Sstevel@tonic-gate 1009*7c478bd9Sstevel@tonic-gate /* If the file wasn't modified, we're done. */ 1010*7c478bd9Sstevel@tonic-gate if (!F_ISSET(rp, RECNO_MODIFIED)) 1011*7c478bd9Sstevel@tonic-gate return (0); 1012*7c478bd9Sstevel@tonic-gate 1013*7c478bd9Sstevel@tonic-gate /* If there's no backing source file, we're done. */ 1014*7c478bd9Sstevel@tonic-gate if (rp->re_source == NULL) { 1015*7c478bd9Sstevel@tonic-gate F_CLR(rp, RECNO_MODIFIED); 1016*7c478bd9Sstevel@tonic-gate return (0); 1017*7c478bd9Sstevel@tonic-gate } 1018*7c478bd9Sstevel@tonic-gate 1019*7c478bd9Sstevel@tonic-gate /* 1020*7c478bd9Sstevel@tonic-gate * Read any remaining records into the tree. 1021*7c478bd9Sstevel@tonic-gate * 1022*7c478bd9Sstevel@tonic-gate * !!! 1023*7c478bd9Sstevel@tonic-gate * This is why we can't support transactions when applications specify 1024*7c478bd9Sstevel@tonic-gate * backing (re_source) files. At this point we have to read in the 1025*7c478bd9Sstevel@tonic-gate * rest of the records from the file so that we can write all of the 1026*7c478bd9Sstevel@tonic-gate * records back out again, which could modify a page for which we'd 1027*7c478bd9Sstevel@tonic-gate * have to log changes and which we don't have locked. This could be 1028*7c478bd9Sstevel@tonic-gate * partially fixed by taking a snapshot of the entire file during the 1029*7c478bd9Sstevel@tonic-gate * db_open(), or, since db_open() isn't transaction protected, as part 1030*7c478bd9Sstevel@tonic-gate * of the first DB operation. But, if a checkpoint occurs then, the 1031*7c478bd9Sstevel@tonic-gate * part of the log holding the copy of the file could be discarded, and 1032*7c478bd9Sstevel@tonic-gate * that would make it impossible to recover in the face of disaster. 1033*7c478bd9Sstevel@tonic-gate * This could all probably be fixed, but it would require transaction 1034*7c478bd9Sstevel@tonic-gate * protecting the backing source file, i.e. mpool would have to know 1035*7c478bd9Sstevel@tonic-gate * about it, and we don't want to go there. 1036*7c478bd9Sstevel@tonic-gate */ 1037*7c478bd9Sstevel@tonic-gate if ((ret = 1038*7c478bd9Sstevel@tonic-gate __ram_update(dbc, DB_MAX_RECORDS, 0)) != 0 && ret != DB_NOTFOUND) 1039*7c478bd9Sstevel@tonic-gate return (ret); 1040*7c478bd9Sstevel@tonic-gate 1041*7c478bd9Sstevel@tonic-gate /* 1042*7c478bd9Sstevel@tonic-gate * !!! 1043*7c478bd9Sstevel@tonic-gate * Close any underlying mmap region. This is required for Windows NT 1044*7c478bd9Sstevel@tonic-gate * (4.0, Service Pack 2) -- if the file is still mapped, the following 1045*7c478bd9Sstevel@tonic-gate * open will fail. 1046*7c478bd9Sstevel@tonic-gate */ 1047*7c478bd9Sstevel@tonic-gate if (rp->re_smap != NULL) { 1048*7c478bd9Sstevel@tonic-gate (void)__db_unmapfile(rp->re_smap, rp->re_msize); 1049*7c478bd9Sstevel@tonic-gate rp->re_smap = NULL; 1050*7c478bd9Sstevel@tonic-gate } 1051*7c478bd9Sstevel@tonic-gate 1052*7c478bd9Sstevel@tonic-gate /* Get rid of any backing file descriptor, just on GP's. */ 1053*7c478bd9Sstevel@tonic-gate if (rp->re_fd != -1) { 1054*7c478bd9Sstevel@tonic-gate (void)__os_close(rp->re_fd); 1055*7c478bd9Sstevel@tonic-gate rp->re_fd = -1; 1056*7c478bd9Sstevel@tonic-gate } 1057*7c478bd9Sstevel@tonic-gate 1058*7c478bd9Sstevel@tonic-gate /* Open the file, truncating it. */ 1059*7c478bd9Sstevel@tonic-gate if ((ret = __db_open(rp->re_source, 1060*7c478bd9Sstevel@tonic-gate DB_SEQUENTIAL | DB_TRUNCATE, 1061*7c478bd9Sstevel@tonic-gate DB_SEQUENTIAL | DB_TRUNCATE, 0, &fd)) != 0) { 1062*7c478bd9Sstevel@tonic-gate __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret)); 1063*7c478bd9Sstevel@tonic-gate return (ret); 1064*7c478bd9Sstevel@tonic-gate } 1065*7c478bd9Sstevel@tonic-gate 1066*7c478bd9Sstevel@tonic-gate /* 1067*7c478bd9Sstevel@tonic-gate * We step through the records, writing each one out. Use the record 1068*7c478bd9Sstevel@tonic-gate * number and the dbp->get() function, instead of a cursor, so we find 1069*7c478bd9Sstevel@tonic-gate * and write out "deleted" or non-existent records. 1070*7c478bd9Sstevel@tonic-gate */ 1071*7c478bd9Sstevel@tonic-gate memset(&key, 0, sizeof(key)); 1072*7c478bd9Sstevel@tonic-gate memset(&data, 0, sizeof(data)); 1073*7c478bd9Sstevel@tonic-gate key.size = sizeof(db_recno_t); 1074*7c478bd9Sstevel@tonic-gate key.data = &keyno; 1075*7c478bd9Sstevel@tonic-gate 1076*7c478bd9Sstevel@tonic-gate /* 1077*7c478bd9Sstevel@tonic-gate * We'll need the delimiter if we're doing variable-length records, 1078*7c478bd9Sstevel@tonic-gate * and the pad character if we're doing fixed-length records. 1079*7c478bd9Sstevel@tonic-gate */ 1080*7c478bd9Sstevel@tonic-gate delim = rp->re_delim; 1081*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { 1082*7c478bd9Sstevel@tonic-gate if ((ret = __os_malloc(rp->re_len, NULL, &pad)) != 0) 1083*7c478bd9Sstevel@tonic-gate goto err; 1084*7c478bd9Sstevel@tonic-gate memset(pad, rp->re_pad, rp->re_len); 1085*7c478bd9Sstevel@tonic-gate } else 1086*7c478bd9Sstevel@tonic-gate COMPQUIET(pad, NULL); 1087*7c478bd9Sstevel@tonic-gate for (keyno = 1;; ++keyno) { 1088*7c478bd9Sstevel@tonic-gate switch (ret = dbp->get(dbp, NULL, &key, &data, 0)) { 1089*7c478bd9Sstevel@tonic-gate case 0: 1090*7c478bd9Sstevel@tonic-gate if ((ret = 1091*7c478bd9Sstevel@tonic-gate __os_write(fd, data.data, data.size, &nw)) != 0) 1092*7c478bd9Sstevel@tonic-gate goto err; 1093*7c478bd9Sstevel@tonic-gate if (nw != (ssize_t)data.size) { 1094*7c478bd9Sstevel@tonic-gate ret = EIO; 1095*7c478bd9Sstevel@tonic-gate goto err; 1096*7c478bd9Sstevel@tonic-gate } 1097*7c478bd9Sstevel@tonic-gate break; 1098*7c478bd9Sstevel@tonic-gate case DB_KEYEMPTY: 1099*7c478bd9Sstevel@tonic-gate if (F_ISSET(dbp, DB_RE_FIXEDLEN)) { 1100*7c478bd9Sstevel@tonic-gate if ((ret = 1101*7c478bd9Sstevel@tonic-gate __os_write(fd, pad, rp->re_len, &nw)) != 0) 1102*7c478bd9Sstevel@tonic-gate goto err; 1103*7c478bd9Sstevel@tonic-gate if (nw != (ssize_t)rp->re_len) { 1104*7c478bd9Sstevel@tonic-gate ret = EIO; 1105*7c478bd9Sstevel@tonic-gate goto err; 1106*7c478bd9Sstevel@tonic-gate } 1107*7c478bd9Sstevel@tonic-gate } 1108*7c478bd9Sstevel@tonic-gate break; 1109*7c478bd9Sstevel@tonic-gate case DB_NOTFOUND: 1110*7c478bd9Sstevel@tonic-gate ret = 0; 1111*7c478bd9Sstevel@tonic-gate goto done; 1112*7c478bd9Sstevel@tonic-gate } 1113*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbp, DB_RE_FIXEDLEN)) { 1114*7c478bd9Sstevel@tonic-gate if ((ret = __os_write(fd, &delim, 1, &nw)) != 0) 1115*7c478bd9Sstevel@tonic-gate goto err; 1116*7c478bd9Sstevel@tonic-gate if (nw != 1) { 1117*7c478bd9Sstevel@tonic-gate ret = EIO; 1118*7c478bd9Sstevel@tonic-gate goto err; 1119*7c478bd9Sstevel@tonic-gate } 1120*7c478bd9Sstevel@tonic-gate } 1121*7c478bd9Sstevel@tonic-gate } 1122*7c478bd9Sstevel@tonic-gate 1123*7c478bd9Sstevel@tonic-gate err: 1124*7c478bd9Sstevel@tonic-gate done: /* Close the file descriptor. */ 1125*7c478bd9Sstevel@tonic-gate if ((t_ret = __os_close(fd)) != 0 || ret == 0) 1126*7c478bd9Sstevel@tonic-gate ret = t_ret; 1127*7c478bd9Sstevel@tonic-gate 1128*7c478bd9Sstevel@tonic-gate if (ret == 0) 1129*7c478bd9Sstevel@tonic-gate F_CLR(rp, RECNO_MODIFIED); 1130*7c478bd9Sstevel@tonic-gate return (ret); 1131*7c478bd9Sstevel@tonic-gate } 1132*7c478bd9Sstevel@tonic-gate 1133*7c478bd9Sstevel@tonic-gate /* 1134*7c478bd9Sstevel@tonic-gate * __ram_fmap -- 1135*7c478bd9Sstevel@tonic-gate * Get fixed length records from a file. 1136*7c478bd9Sstevel@tonic-gate */ 1137*7c478bd9Sstevel@tonic-gate static int 1138*7c478bd9Sstevel@tonic-gate __ram_fmap(dbc, top) 1139*7c478bd9Sstevel@tonic-gate DBC *dbc; 1140*7c478bd9Sstevel@tonic-gate db_recno_t top; 1141*7c478bd9Sstevel@tonic-gate { 1142*7c478bd9Sstevel@tonic-gate DB *dbp; 1143*7c478bd9Sstevel@tonic-gate DBT data; 1144*7c478bd9Sstevel@tonic-gate RECNO *rp; 1145*7c478bd9Sstevel@tonic-gate db_recno_t recno; 1146*7c478bd9Sstevel@tonic-gate u_int32_t len; 1147*7c478bd9Sstevel@tonic-gate u_int8_t *sp, *ep, *p; 1148*7c478bd9Sstevel@tonic-gate int ret; 1149*7c478bd9Sstevel@tonic-gate 1150*7c478bd9Sstevel@tonic-gate if ((ret = __bam_nrecs(dbc, &recno)) != 0) 1151*7c478bd9Sstevel@tonic-gate return (ret); 1152*7c478bd9Sstevel@tonic-gate 1153*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 1154*7c478bd9Sstevel@tonic-gate rp = ((BTREE *)(dbp->internal))->recno; 1155*7c478bd9Sstevel@tonic-gate 1156*7c478bd9Sstevel@tonic-gate if (dbc->rdata.ulen < rp->re_len) { 1157*7c478bd9Sstevel@tonic-gate if ((ret = __os_realloc(&dbc->rdata.data, rp->re_len)) != 0) { 1158*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = 0; 1159*7c478bd9Sstevel@tonic-gate dbc->rdata.data = NULL; 1160*7c478bd9Sstevel@tonic-gate return (ret); 1161*7c478bd9Sstevel@tonic-gate } 1162*7c478bd9Sstevel@tonic-gate dbc->rdata.ulen = rp->re_len; 1163*7c478bd9Sstevel@tonic-gate } 1164*7c478bd9Sstevel@tonic-gate 1165*7c478bd9Sstevel@tonic-gate memset(&data, 0, sizeof(data)); 1166*7c478bd9Sstevel@tonic-gate data.data = dbc->rdata.data; 1167*7c478bd9Sstevel@tonic-gate data.size = rp->re_len; 1168*7c478bd9Sstevel@tonic-gate 1169*7c478bd9Sstevel@tonic-gate sp = (u_int8_t *)rp->re_cmap; 1170*7c478bd9Sstevel@tonic-gate ep = (u_int8_t *)rp->re_emap; 1171*7c478bd9Sstevel@tonic-gate while (recno < top) { 1172*7c478bd9Sstevel@tonic-gate if (sp >= ep) { 1173*7c478bd9Sstevel@tonic-gate F_SET(rp, RECNO_EOF); 1174*7c478bd9Sstevel@tonic-gate return (DB_NOTFOUND); 1175*7c478bd9Sstevel@tonic-gate } 1176*7c478bd9Sstevel@tonic-gate len = rp->re_len; 1177*7c478bd9Sstevel@tonic-gate for (p = dbc->rdata.data; 1178*7c478bd9Sstevel@tonic-gate sp < ep && len > 0; *p++ = *sp++, --len) 1179*7c478bd9Sstevel@tonic-gate ; 1180*7c478bd9Sstevel@tonic-gate 1181*7c478bd9Sstevel@tonic-gate /* 1182*7c478bd9Sstevel@tonic-gate * Another process may have read this record from the input 1183*7c478bd9Sstevel@tonic-gate * file and stored it into the database already, in which 1184*7c478bd9Sstevel@tonic-gate * case we don't need to repeat that operation. We detect 1185*7c478bd9Sstevel@tonic-gate * this by checking if the last record we've read is greater 1186*7c478bd9Sstevel@tonic-gate * or equal to the number of records in the database. 1187*7c478bd9Sstevel@tonic-gate * 1188*7c478bd9Sstevel@tonic-gate * XXX 1189*7c478bd9Sstevel@tonic-gate * We should just do a seek, since the records are fixed 1190*7c478bd9Sstevel@tonic-gate * length. 1191*7c478bd9Sstevel@tonic-gate */ 1192*7c478bd9Sstevel@tonic-gate if (rp->re_last >= recno) { 1193*7c478bd9Sstevel@tonic-gate if (len != 0) 1194*7c478bd9Sstevel@tonic-gate memset(p, rp->re_pad, len); 1195*7c478bd9Sstevel@tonic-gate 1196*7c478bd9Sstevel@tonic-gate ++recno; 1197*7c478bd9Sstevel@tonic-gate if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0) 1198*7c478bd9Sstevel@tonic-gate return (ret); 1199*7c478bd9Sstevel@tonic-gate } 1200*7c478bd9Sstevel@tonic-gate ++rp->re_last; 1201*7c478bd9Sstevel@tonic-gate } 1202*7c478bd9Sstevel@tonic-gate rp->re_cmap = sp; 1203*7c478bd9Sstevel@tonic-gate return (0); 1204*7c478bd9Sstevel@tonic-gate } 1205*7c478bd9Sstevel@tonic-gate 1206*7c478bd9Sstevel@tonic-gate /* 1207*7c478bd9Sstevel@tonic-gate * __ram_vmap -- 1208*7c478bd9Sstevel@tonic-gate * Get variable length records from a file. 1209*7c478bd9Sstevel@tonic-gate */ 1210*7c478bd9Sstevel@tonic-gate static int 1211*7c478bd9Sstevel@tonic-gate __ram_vmap(dbc, top) 1212*7c478bd9Sstevel@tonic-gate DBC *dbc; 1213*7c478bd9Sstevel@tonic-gate db_recno_t top; 1214*7c478bd9Sstevel@tonic-gate { 1215*7c478bd9Sstevel@tonic-gate DBT data; 1216*7c478bd9Sstevel@tonic-gate RECNO *rp; 1217*7c478bd9Sstevel@tonic-gate db_recno_t recno; 1218*7c478bd9Sstevel@tonic-gate u_int8_t *sp, *ep; 1219*7c478bd9Sstevel@tonic-gate int delim, ret; 1220*7c478bd9Sstevel@tonic-gate 1221*7c478bd9Sstevel@tonic-gate rp = ((BTREE *)(dbc->dbp->internal))->recno; 1222*7c478bd9Sstevel@tonic-gate 1223*7c478bd9Sstevel@tonic-gate if ((ret = __bam_nrecs(dbc, &recno)) != 0) 1224*7c478bd9Sstevel@tonic-gate return (ret); 1225*7c478bd9Sstevel@tonic-gate 1226*7c478bd9Sstevel@tonic-gate memset(&data, 0, sizeof(data)); 1227*7c478bd9Sstevel@tonic-gate 1228*7c478bd9Sstevel@tonic-gate delim = rp->re_delim; 1229*7c478bd9Sstevel@tonic-gate 1230*7c478bd9Sstevel@tonic-gate sp = (u_int8_t *)rp->re_cmap; 1231*7c478bd9Sstevel@tonic-gate ep = (u_int8_t *)rp->re_emap; 1232*7c478bd9Sstevel@tonic-gate while (recno < top) { 1233*7c478bd9Sstevel@tonic-gate if (sp >= ep) { 1234*7c478bd9Sstevel@tonic-gate F_SET(rp, RECNO_EOF); 1235*7c478bd9Sstevel@tonic-gate return (DB_NOTFOUND); 1236*7c478bd9Sstevel@tonic-gate } 1237*7c478bd9Sstevel@tonic-gate for (data.data = sp; sp < ep && *sp != delim; ++sp) 1238*7c478bd9Sstevel@tonic-gate ; 1239*7c478bd9Sstevel@tonic-gate 1240*7c478bd9Sstevel@tonic-gate /* 1241*7c478bd9Sstevel@tonic-gate * Another process may have read this record from the input 1242*7c478bd9Sstevel@tonic-gate * file and stored it into the database already, in which 1243*7c478bd9Sstevel@tonic-gate * case we don't need to repeat that operation. We detect 1244*7c478bd9Sstevel@tonic-gate * this by checking if the last record we've read is greater 1245*7c478bd9Sstevel@tonic-gate * or equal to the number of records in the database. 1246*7c478bd9Sstevel@tonic-gate */ 1247*7c478bd9Sstevel@tonic-gate if (rp->re_last >= recno) { 1248*7c478bd9Sstevel@tonic-gate data.size = sp - (u_int8_t *)data.data; 1249*7c478bd9Sstevel@tonic-gate ++recno; 1250*7c478bd9Sstevel@tonic-gate if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0) 1251*7c478bd9Sstevel@tonic-gate return (ret); 1252*7c478bd9Sstevel@tonic-gate } 1253*7c478bd9Sstevel@tonic-gate ++rp->re_last; 1254*7c478bd9Sstevel@tonic-gate ++sp; 1255*7c478bd9Sstevel@tonic-gate } 1256*7c478bd9Sstevel@tonic-gate rp->re_cmap = sp; 1257*7c478bd9Sstevel@tonic-gate return (0); 1258*7c478bd9Sstevel@tonic-gate } 1259*7c478bd9Sstevel@tonic-gate 1260*7c478bd9Sstevel@tonic-gate /* 1261*7c478bd9Sstevel@tonic-gate * __ram_add -- 1262*7c478bd9Sstevel@tonic-gate * Add records into the tree. 1263*7c478bd9Sstevel@tonic-gate */ 1264*7c478bd9Sstevel@tonic-gate static int 1265*7c478bd9Sstevel@tonic-gate __ram_add(dbc, recnop, data, flags, bi_flags) 1266*7c478bd9Sstevel@tonic-gate DBC *dbc; 1267*7c478bd9Sstevel@tonic-gate db_recno_t *recnop; 1268*7c478bd9Sstevel@tonic-gate DBT *data; 1269*7c478bd9Sstevel@tonic-gate u_int32_t flags, bi_flags; 1270*7c478bd9Sstevel@tonic-gate { 1271*7c478bd9Sstevel@tonic-gate BKEYDATA *bk; 1272*7c478bd9Sstevel@tonic-gate CURSOR *cp; 1273*7c478bd9Sstevel@tonic-gate DB *dbp; 1274*7c478bd9Sstevel@tonic-gate PAGE *h; 1275*7c478bd9Sstevel@tonic-gate db_indx_t indx; 1276*7c478bd9Sstevel@tonic-gate int exact, isdeleted, ret, stack; 1277*7c478bd9Sstevel@tonic-gate 1278*7c478bd9Sstevel@tonic-gate dbp = dbc->dbp; 1279*7c478bd9Sstevel@tonic-gate cp = dbc->internal; 1280*7c478bd9Sstevel@tonic-gate 1281*7c478bd9Sstevel@tonic-gate retry: /* Find the slot for insertion. */ 1282*7c478bd9Sstevel@tonic-gate if ((ret = __bam_rsearch(dbc, recnop, 1283*7c478bd9Sstevel@tonic-gate S_INSERT | (flags == DB_APPEND ? S_APPEND : 0), 1, &exact)) != 0) 1284*7c478bd9Sstevel@tonic-gate return (ret); 1285*7c478bd9Sstevel@tonic-gate h = cp->csp->page; 1286*7c478bd9Sstevel@tonic-gate indx = cp->csp->indx; 1287*7c478bd9Sstevel@tonic-gate stack = 1; 1288*7c478bd9Sstevel@tonic-gate 1289*7c478bd9Sstevel@tonic-gate /* 1290*7c478bd9Sstevel@tonic-gate * If re-numbering records, the on-page deleted flag means this record 1291*7c478bd9Sstevel@tonic-gate * was implicitly created. If not re-numbering records, the on-page 1292*7c478bd9Sstevel@tonic-gate * deleted flag means this record was implicitly created, or, it was 1293*7c478bd9Sstevel@tonic-gate * deleted at some time. 1294*7c478bd9Sstevel@tonic-gate * 1295*7c478bd9Sstevel@tonic-gate * If DB_NOOVERWRITE is set and the item already exists in the tree, 1296*7c478bd9Sstevel@tonic-gate * return an error unless the item was either marked for deletion or 1297*7c478bd9Sstevel@tonic-gate * only implicitly created. 1298*7c478bd9Sstevel@tonic-gate */ 1299*7c478bd9Sstevel@tonic-gate isdeleted = 0; 1300*7c478bd9Sstevel@tonic-gate if (exact) { 1301*7c478bd9Sstevel@tonic-gate bk = GET_BKEYDATA(h, indx); 1302*7c478bd9Sstevel@tonic-gate if (B_DISSET(bk->type)) 1303*7c478bd9Sstevel@tonic-gate isdeleted = 1; 1304*7c478bd9Sstevel@tonic-gate else 1305*7c478bd9Sstevel@tonic-gate if (flags == DB_NOOVERWRITE) { 1306*7c478bd9Sstevel@tonic-gate ret = DB_KEYEXIST; 1307*7c478bd9Sstevel@tonic-gate goto err; 1308*7c478bd9Sstevel@tonic-gate } 1309*7c478bd9Sstevel@tonic-gate } 1310*7c478bd9Sstevel@tonic-gate 1311*7c478bd9Sstevel@tonic-gate /* 1312*7c478bd9Sstevel@tonic-gate * Select the arguments for __bam_iitem() and do the insert. If the 1313*7c478bd9Sstevel@tonic-gate * key is an exact match, or we're replacing the data item with a 1314*7c478bd9Sstevel@tonic-gate * new data item, replace the current item. If the key isn't an exact 1315*7c478bd9Sstevel@tonic-gate * match, we're inserting a new key/data pair, before the search 1316*7c478bd9Sstevel@tonic-gate * location. 1317*7c478bd9Sstevel@tonic-gate */ 1318*7c478bd9Sstevel@tonic-gate switch (ret = __bam_iitem(dbc, 1319*7c478bd9Sstevel@tonic-gate &h, &indx, NULL, data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) { 1320*7c478bd9Sstevel@tonic-gate case 0: 1321*7c478bd9Sstevel@tonic-gate /* 1322*7c478bd9Sstevel@tonic-gate * Don't adjust anything. 1323*7c478bd9Sstevel@tonic-gate * 1324*7c478bd9Sstevel@tonic-gate * If we inserted a record, no cursors need adjusting because 1325*7c478bd9Sstevel@tonic-gate * the only new record it's possible to insert is at the very 1326*7c478bd9Sstevel@tonic-gate * end of the tree. The necessary adjustments to the internal 1327*7c478bd9Sstevel@tonic-gate * page counts were made by __bam_iitem(). 1328*7c478bd9Sstevel@tonic-gate * 1329*7c478bd9Sstevel@tonic-gate * If we overwrote a record, no cursors need adjusting because 1330*7c478bd9Sstevel@tonic-gate * future DBcursor->get calls will simply return the underlying 1331*7c478bd9Sstevel@tonic-gate * record (there's no adjustment made for the DB_CURRENT flag 1332*7c478bd9Sstevel@tonic-gate * when a cursor get operation immediately follows a cursor 1333*7c478bd9Sstevel@tonic-gate * delete operation, and the normal adjustment for the DB_NEXT 1334*7c478bd9Sstevel@tonic-gate * flag is still correct). 1335*7c478bd9Sstevel@tonic-gate */ 1336*7c478bd9Sstevel@tonic-gate break; 1337*7c478bd9Sstevel@tonic-gate case DB_NEEDSPLIT: 1338*7c478bd9Sstevel@tonic-gate /* Discard the stack of pages and split the page. */ 1339*7c478bd9Sstevel@tonic-gate (void)__bam_stkrel(dbc, 0); 1340*7c478bd9Sstevel@tonic-gate stack = 0; 1341*7c478bd9Sstevel@tonic-gate 1342*7c478bd9Sstevel@tonic-gate if ((ret = __bam_split(dbc, recnop)) != 0) 1343*7c478bd9Sstevel@tonic-gate goto err; 1344*7c478bd9Sstevel@tonic-gate 1345*7c478bd9Sstevel@tonic-gate goto retry; 1346*7c478bd9Sstevel@tonic-gate /* NOTREACHED */ 1347*7c478bd9Sstevel@tonic-gate default: 1348*7c478bd9Sstevel@tonic-gate goto err; 1349*7c478bd9Sstevel@tonic-gate } 1350*7c478bd9Sstevel@tonic-gate 1351*7c478bd9Sstevel@tonic-gate 1352*7c478bd9Sstevel@tonic-gate err: if (stack) 1353*7c478bd9Sstevel@tonic-gate __bam_stkrel(dbc, 0); 1354*7c478bd9Sstevel@tonic-gate 1355*7c478bd9Sstevel@tonic-gate return (ret); 1356*7c478bd9Sstevel@tonic-gate } 1357