17c478bd9Sstevel@tonic-gate /*-
27c478bd9Sstevel@tonic-gate * Copyright (c) 1990, 1993, 1994
37c478bd9Sstevel@tonic-gate * The Regents of the University of California. All rights reserved.
47c478bd9Sstevel@tonic-gate *
57c478bd9Sstevel@tonic-gate * This code is derived from software contributed to Berkeley by
67c478bd9Sstevel@tonic-gate * Mike Olson.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without
97c478bd9Sstevel@tonic-gate * modification, are permitted provided that the following conditions
107c478bd9Sstevel@tonic-gate * are met:
117c478bd9Sstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright
127c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer.
137c478bd9Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright
147c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer in the
157c478bd9Sstevel@tonic-gate * documentation and/or other materials provided with the distribution.
167c478bd9Sstevel@tonic-gate * 3. All advertising materials mentioning features or use of this software
177c478bd9Sstevel@tonic-gate * must display the following acknowledgement:
187c478bd9Sstevel@tonic-gate * This product includes software developed by the University of
197c478bd9Sstevel@tonic-gate * California, Berkeley and its contributors.
207c478bd9Sstevel@tonic-gate * 4. Neither the name of the University nor the names of its contributors
217c478bd9Sstevel@tonic-gate * may be used to endorse or promote products derived from this software
227c478bd9Sstevel@tonic-gate * without specific prior written permission.
237c478bd9Sstevel@tonic-gate *
247c478bd9Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
257c478bd9Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
267c478bd9Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
277c478bd9Sstevel@tonic-gate * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
287c478bd9Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
297c478bd9Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
307c478bd9Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
317c478bd9Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
327c478bd9Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
337c478bd9Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
347c478bd9Sstevel@tonic-gate * SUCH DAMAGE.
357c478bd9Sstevel@tonic-gate */
367c478bd9Sstevel@tonic-gate
377c478bd9Sstevel@tonic-gate #if defined(LIBC_SCCS) && !defined(lint)
387c478bd9Sstevel@tonic-gate static char sccsid[] = "@(#)bt_delete.c 8.13 (Berkeley) 7/28/94";
397c478bd9Sstevel@tonic-gate #endif /* LIBC_SCCS and not lint */
407c478bd9Sstevel@tonic-gate
417c478bd9Sstevel@tonic-gate #include <sys/types.h>
427c478bd9Sstevel@tonic-gate
437c478bd9Sstevel@tonic-gate #include <errno.h>
447c478bd9Sstevel@tonic-gate #include <stdio.h>
457c478bd9Sstevel@tonic-gate #include <string.h>
467c478bd9Sstevel@tonic-gate
477c478bd9Sstevel@tonic-gate #include "db-int.h"
487c478bd9Sstevel@tonic-gate #include "btree.h"
497c478bd9Sstevel@tonic-gate
507c478bd9Sstevel@tonic-gate static int __bt_bdelete __P((BTREE *, const DBT *));
517c478bd9Sstevel@tonic-gate static int __bt_curdel __P((BTREE *, const DBT *, PAGE *, u_int));
527c478bd9Sstevel@tonic-gate static int __bt_pdelete __P((BTREE *, PAGE *));
537c478bd9Sstevel@tonic-gate static int __bt_relink __P((BTREE *, PAGE *));
547c478bd9Sstevel@tonic-gate static int __bt_stkacq __P((BTREE *, PAGE **, CURSOR *));
557c478bd9Sstevel@tonic-gate
567c478bd9Sstevel@tonic-gate /*
577c478bd9Sstevel@tonic-gate * __bt_delete
587c478bd9Sstevel@tonic-gate * Delete the item(s) referenced by a key.
597c478bd9Sstevel@tonic-gate *
607c478bd9Sstevel@tonic-gate * Return RET_SPECIAL if the key is not found.
617c478bd9Sstevel@tonic-gate */
627c478bd9Sstevel@tonic-gate int
__bt_delete(dbp,key,flags)637c478bd9Sstevel@tonic-gate __bt_delete(dbp, key, flags)
647c478bd9Sstevel@tonic-gate const DB *dbp;
657c478bd9Sstevel@tonic-gate const DBT *key;
667c478bd9Sstevel@tonic-gate u_int flags;
677c478bd9Sstevel@tonic-gate {
687c478bd9Sstevel@tonic-gate BTREE *t;
697c478bd9Sstevel@tonic-gate CURSOR *c;
707c478bd9Sstevel@tonic-gate PAGE *h;
717c478bd9Sstevel@tonic-gate int status;
727c478bd9Sstevel@tonic-gate
737c478bd9Sstevel@tonic-gate t = dbp->internal;
747c478bd9Sstevel@tonic-gate
757c478bd9Sstevel@tonic-gate /* Toss any page pinned across calls. */
767c478bd9Sstevel@tonic-gate if (t->bt_pinned != NULL) {
777c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, t->bt_pinned, 0);
787c478bd9Sstevel@tonic-gate t->bt_pinned = NULL;
797c478bd9Sstevel@tonic-gate }
807c478bd9Sstevel@tonic-gate
817c478bd9Sstevel@tonic-gate /* Check for change to a read-only tree. */
827c478bd9Sstevel@tonic-gate if (F_ISSET(t, B_RDONLY)) {
837c478bd9Sstevel@tonic-gate errno = EPERM;
847c478bd9Sstevel@tonic-gate return (RET_ERROR);
857c478bd9Sstevel@tonic-gate }
867c478bd9Sstevel@tonic-gate
877c478bd9Sstevel@tonic-gate switch (flags) {
887c478bd9Sstevel@tonic-gate case 0:
897c478bd9Sstevel@tonic-gate status = __bt_bdelete(t, key);
907c478bd9Sstevel@tonic-gate break;
917c478bd9Sstevel@tonic-gate case R_CURSOR:
927c478bd9Sstevel@tonic-gate /*
937c478bd9Sstevel@tonic-gate * If flags is R_CURSOR, delete the cursor. Must already
947c478bd9Sstevel@tonic-gate * have started a scan and not have already deleted it.
957c478bd9Sstevel@tonic-gate */
967c478bd9Sstevel@tonic-gate c = &t->bt_cursor;
977c478bd9Sstevel@tonic-gate if (F_ISSET(c, CURS_INIT)) {
987c478bd9Sstevel@tonic-gate if (F_ISSET(c, CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
997c478bd9Sstevel@tonic-gate return (RET_SPECIAL);
1007c478bd9Sstevel@tonic-gate if ((h = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL)
1017c478bd9Sstevel@tonic-gate return (RET_ERROR);
1027c478bd9Sstevel@tonic-gate
1037c478bd9Sstevel@tonic-gate /*
1047c478bd9Sstevel@tonic-gate * If the page is about to be emptied, we'll need to
1057c478bd9Sstevel@tonic-gate * delete it, which means we have to acquire a stack.
1067c478bd9Sstevel@tonic-gate */
1077c478bd9Sstevel@tonic-gate if (NEXTINDEX(h) == 1)
1087c478bd9Sstevel@tonic-gate if (__bt_stkacq(t, &h, &t->bt_cursor))
1097c478bd9Sstevel@tonic-gate return (RET_ERROR);
1107c478bd9Sstevel@tonic-gate
1117c478bd9Sstevel@tonic-gate status = __bt_dleaf(t, NULL, h, c->pg.index);
1127c478bd9Sstevel@tonic-gate
1137c478bd9Sstevel@tonic-gate if (NEXTINDEX(h) == 0 && status == RET_SUCCESS) {
1147c478bd9Sstevel@tonic-gate if (__bt_pdelete(t, h))
1157c478bd9Sstevel@tonic-gate return (RET_ERROR);
1167c478bd9Sstevel@tonic-gate } else
1177c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp,
1187c478bd9Sstevel@tonic-gate h, status == RET_SUCCESS ? MPOOL_DIRTY : 0);
1197c478bd9Sstevel@tonic-gate break;
1207c478bd9Sstevel@tonic-gate }
1217c478bd9Sstevel@tonic-gate /* FALLTHROUGH */
1227c478bd9Sstevel@tonic-gate default:
1237c478bd9Sstevel@tonic-gate errno = EINVAL;
1247c478bd9Sstevel@tonic-gate return (RET_ERROR);
1257c478bd9Sstevel@tonic-gate }
1267c478bd9Sstevel@tonic-gate if (status == RET_SUCCESS)
1277c478bd9Sstevel@tonic-gate F_SET(t, B_MODIFIED);
1287c478bd9Sstevel@tonic-gate return (status);
1297c478bd9Sstevel@tonic-gate }
1307c478bd9Sstevel@tonic-gate
1317c478bd9Sstevel@tonic-gate /*
1327c478bd9Sstevel@tonic-gate * __bt_stkacq --
1337c478bd9Sstevel@tonic-gate * Acquire a stack so we can delete a cursor entry.
1347c478bd9Sstevel@tonic-gate *
1357c478bd9Sstevel@tonic-gate * Parameters:
1367c478bd9Sstevel@tonic-gate * t: tree
1377c478bd9Sstevel@tonic-gate * hp: pointer to current, pinned PAGE pointer
1387c478bd9Sstevel@tonic-gate * c: pointer to the cursor
1397c478bd9Sstevel@tonic-gate *
1407c478bd9Sstevel@tonic-gate * Returns:
1417c478bd9Sstevel@tonic-gate * 0 on success, 1 on failure
1427c478bd9Sstevel@tonic-gate */
1437c478bd9Sstevel@tonic-gate static int
__bt_stkacq(t,hp,c)1447c478bd9Sstevel@tonic-gate __bt_stkacq(t, hp, c)
1457c478bd9Sstevel@tonic-gate BTREE *t;
1467c478bd9Sstevel@tonic-gate PAGE **hp;
1477c478bd9Sstevel@tonic-gate CURSOR *c;
1487c478bd9Sstevel@tonic-gate {
1497c478bd9Sstevel@tonic-gate BINTERNAL *bi;
1507c478bd9Sstevel@tonic-gate EPG *e;
1517c478bd9Sstevel@tonic-gate EPGNO *parent;
1527c478bd9Sstevel@tonic-gate PAGE *h;
15356a424ccSmp indx_t idx;
1547c478bd9Sstevel@tonic-gate db_pgno_t pgno;
1557c478bd9Sstevel@tonic-gate recno_t nextpg, prevpg;
1567c478bd9Sstevel@tonic-gate int exact, level;
157*1da57d55SToomas Soome
1587c478bd9Sstevel@tonic-gate /*
1597c478bd9Sstevel@tonic-gate * Find the first occurrence of the key in the tree. Toss the
1607c478bd9Sstevel@tonic-gate * currently locked page so we don't hit an already-locked page.
1617c478bd9Sstevel@tonic-gate */
1627c478bd9Sstevel@tonic-gate h = *hp;
1637c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, 0);
1647c478bd9Sstevel@tonic-gate if ((e = __bt_search(t, &c->key, &exact)) == NULL)
1657c478bd9Sstevel@tonic-gate return (1);
1667c478bd9Sstevel@tonic-gate h = e->page;
1677c478bd9Sstevel@tonic-gate
1687c478bd9Sstevel@tonic-gate /* See if we got it in one shot. */
1697c478bd9Sstevel@tonic-gate if (h->pgno == c->pg.pgno)
1707c478bd9Sstevel@tonic-gate goto ret;
1717c478bd9Sstevel@tonic-gate
1727c478bd9Sstevel@tonic-gate /*
1737c478bd9Sstevel@tonic-gate * Move right, looking for the page. At each move we have to move
1747c478bd9Sstevel@tonic-gate * up the stack until we don't have to move to the next page. If
1757c478bd9Sstevel@tonic-gate * we have to change pages at an internal level, we have to fix the
1767c478bd9Sstevel@tonic-gate * stack back up.
1777c478bd9Sstevel@tonic-gate */
1787c478bd9Sstevel@tonic-gate while (h->pgno != c->pg.pgno) {
1797c478bd9Sstevel@tonic-gate if ((nextpg = h->nextpg) == P_INVALID)
1807c478bd9Sstevel@tonic-gate break;
1817c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, 0);
1827c478bd9Sstevel@tonic-gate
1837c478bd9Sstevel@tonic-gate /* Move up the stack. */
1847c478bd9Sstevel@tonic-gate for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
1857c478bd9Sstevel@tonic-gate /* Get the parent page. */
1867c478bd9Sstevel@tonic-gate if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
1877c478bd9Sstevel@tonic-gate return (1);
1887c478bd9Sstevel@tonic-gate
1897c478bd9Sstevel@tonic-gate /* Move to the next index. */
1907c478bd9Sstevel@tonic-gate if (parent->index != NEXTINDEX(h) - 1) {
19156a424ccSmp idx = parent->index + 1;
19256a424ccSmp BT_PUSH(t, h->pgno, idx);
1937c478bd9Sstevel@tonic-gate break;
1947c478bd9Sstevel@tonic-gate }
1957c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, 0);
1967c478bd9Sstevel@tonic-gate }
1977c478bd9Sstevel@tonic-gate
1987c478bd9Sstevel@tonic-gate /* Restore the stack. */
1997c478bd9Sstevel@tonic-gate while (level--) {
2007c478bd9Sstevel@tonic-gate /* Push the next level down onto the stack. */
20156a424ccSmp bi = GETBINTERNAL(h, idx);
2027c478bd9Sstevel@tonic-gate pgno = bi->pgno;
2037c478bd9Sstevel@tonic-gate BT_PUSH(t, pgno, 0);
2047c478bd9Sstevel@tonic-gate
2057c478bd9Sstevel@tonic-gate /* Lose the currently pinned page. */
2067c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, 0);
2077c478bd9Sstevel@tonic-gate
2087c478bd9Sstevel@tonic-gate /* Get the next level down. */
2097c478bd9Sstevel@tonic-gate if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
2107c478bd9Sstevel@tonic-gate return (1);
21156a424ccSmp idx = 0;
2127c478bd9Sstevel@tonic-gate }
2137c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, 0);
2147c478bd9Sstevel@tonic-gate if ((h = mpool_get(t->bt_mp, nextpg, 0)) == NULL)
2157c478bd9Sstevel@tonic-gate return (1);
2167c478bd9Sstevel@tonic-gate }
2177c478bd9Sstevel@tonic-gate
2187c478bd9Sstevel@tonic-gate if (h->pgno == c->pg.pgno)
2197c478bd9Sstevel@tonic-gate goto ret;
2207c478bd9Sstevel@tonic-gate
2217c478bd9Sstevel@tonic-gate /* Reacquire the original stack. */
2227c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, 0);
2237c478bd9Sstevel@tonic-gate if ((e = __bt_search(t, &c->key, &exact)) == NULL)
2247c478bd9Sstevel@tonic-gate return (1);
2257c478bd9Sstevel@tonic-gate h = e->page;
2267c478bd9Sstevel@tonic-gate
2277c478bd9Sstevel@tonic-gate /*
2287c478bd9Sstevel@tonic-gate * Move left, looking for the page. At each move we have to move
2297c478bd9Sstevel@tonic-gate * up the stack until we don't have to change pages to move to the
2307c478bd9Sstevel@tonic-gate * next page. If we have to change pages at an internal level, we
2317c478bd9Sstevel@tonic-gate * have to fix the stack back up.
2327c478bd9Sstevel@tonic-gate */
2337c478bd9Sstevel@tonic-gate while (h->pgno != c->pg.pgno) {
2347c478bd9Sstevel@tonic-gate if ((prevpg = h->prevpg) == P_INVALID)
2357c478bd9Sstevel@tonic-gate break;
2367c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, 0);
2377c478bd9Sstevel@tonic-gate
2387c478bd9Sstevel@tonic-gate /* Move up the stack. */
2397c478bd9Sstevel@tonic-gate for (level = 0; (parent = BT_POP(t)) != NULL; ++level) {
2407c478bd9Sstevel@tonic-gate /* Get the parent page. */
2417c478bd9Sstevel@tonic-gate if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
2427c478bd9Sstevel@tonic-gate return (1);
2437c478bd9Sstevel@tonic-gate
2447c478bd9Sstevel@tonic-gate /* Move to the next index. */
2457c478bd9Sstevel@tonic-gate if (parent->index != 0) {
24656a424ccSmp idx = parent->index - 1;
24756a424ccSmp BT_PUSH(t, h->pgno, idx);
2487c478bd9Sstevel@tonic-gate break;
2497c478bd9Sstevel@tonic-gate }
2507c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, 0);
2517c478bd9Sstevel@tonic-gate }
2527c478bd9Sstevel@tonic-gate
2537c478bd9Sstevel@tonic-gate /* Restore the stack. */
2547c478bd9Sstevel@tonic-gate while (level--) {
2557c478bd9Sstevel@tonic-gate /* Push the next level down onto the stack. */
25656a424ccSmp bi = GETBINTERNAL(h, idx);
2577c478bd9Sstevel@tonic-gate pgno = bi->pgno;
2587c478bd9Sstevel@tonic-gate
2597c478bd9Sstevel@tonic-gate /* Lose the currently pinned page. */
2607c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, 0);
2617c478bd9Sstevel@tonic-gate
2627c478bd9Sstevel@tonic-gate /* Get the next level down. */
2637c478bd9Sstevel@tonic-gate if ((h = mpool_get(t->bt_mp, pgno, 0)) == NULL)
2647c478bd9Sstevel@tonic-gate return (1);
2657c478bd9Sstevel@tonic-gate
26656a424ccSmp idx = NEXTINDEX(h) - 1;
26756a424ccSmp BT_PUSH(t, pgno, idx);
2687c478bd9Sstevel@tonic-gate }
2697c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, 0);
2707c478bd9Sstevel@tonic-gate if ((h = mpool_get(t->bt_mp, prevpg, 0)) == NULL)
2717c478bd9Sstevel@tonic-gate return (1);
2727c478bd9Sstevel@tonic-gate }
273*1da57d55SToomas Soome
2747c478bd9Sstevel@tonic-gate
2757c478bd9Sstevel@tonic-gate ret: mpool_put(t->bt_mp, h, 0);
2767c478bd9Sstevel@tonic-gate return ((*hp = mpool_get(t->bt_mp, c->pg.pgno, 0)) == NULL);
2777c478bd9Sstevel@tonic-gate }
2787c478bd9Sstevel@tonic-gate
2797c478bd9Sstevel@tonic-gate /*
2807c478bd9Sstevel@tonic-gate * __bt_bdelete --
2817c478bd9Sstevel@tonic-gate * Delete all key/data pairs matching the specified key.
2827c478bd9Sstevel@tonic-gate *
2837c478bd9Sstevel@tonic-gate * Parameters:
2847c478bd9Sstevel@tonic-gate * t: tree
2857c478bd9Sstevel@tonic-gate * key: key to delete
2867c478bd9Sstevel@tonic-gate *
2877c478bd9Sstevel@tonic-gate * Returns:
2887c478bd9Sstevel@tonic-gate * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key not found.
2897c478bd9Sstevel@tonic-gate */
2907c478bd9Sstevel@tonic-gate static int
__bt_bdelete(t,key)2917c478bd9Sstevel@tonic-gate __bt_bdelete(t, key)
2927c478bd9Sstevel@tonic-gate BTREE *t;
2937c478bd9Sstevel@tonic-gate const DBT *key;
2947c478bd9Sstevel@tonic-gate {
2957c478bd9Sstevel@tonic-gate EPG *e;
2967c478bd9Sstevel@tonic-gate PAGE *h;
2977c478bd9Sstevel@tonic-gate int deleted, exact, redo;
2987c478bd9Sstevel@tonic-gate
2997c478bd9Sstevel@tonic-gate deleted = 0;
3007c478bd9Sstevel@tonic-gate
3017c478bd9Sstevel@tonic-gate /* Find any matching record; __bt_search pins the page. */
3027c478bd9Sstevel@tonic-gate loop: if ((e = __bt_search(t, key, &exact)) == NULL)
3037c478bd9Sstevel@tonic-gate return (deleted ? RET_SUCCESS : RET_ERROR);
3047c478bd9Sstevel@tonic-gate if (!exact) {
3057c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, e->page, 0);
3067c478bd9Sstevel@tonic-gate return (deleted ? RET_SUCCESS : RET_SPECIAL);
3077c478bd9Sstevel@tonic-gate }
3087c478bd9Sstevel@tonic-gate
3097c478bd9Sstevel@tonic-gate /*
3107c478bd9Sstevel@tonic-gate * Delete forward, then delete backward, from the found key. If
3117c478bd9Sstevel@tonic-gate * there are duplicates and we reach either side of the page, do
3127c478bd9Sstevel@tonic-gate * the key search again, so that we get them all.
3137c478bd9Sstevel@tonic-gate */
3147c478bd9Sstevel@tonic-gate redo = 0;
3157c478bd9Sstevel@tonic-gate h = e->page;
3167c478bd9Sstevel@tonic-gate do {
3177c478bd9Sstevel@tonic-gate if (__bt_dleaf(t, key, h, e->index)) {
3187c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, 0);
3197c478bd9Sstevel@tonic-gate return (RET_ERROR);
3207c478bd9Sstevel@tonic-gate }
3217c478bd9Sstevel@tonic-gate if (F_ISSET(t, B_NODUPS)) {
3227c478bd9Sstevel@tonic-gate if (NEXTINDEX(h) == 0) {
3237c478bd9Sstevel@tonic-gate if (__bt_pdelete(t, h))
3247c478bd9Sstevel@tonic-gate return (RET_ERROR);
3257c478bd9Sstevel@tonic-gate } else
3267c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, MPOOL_DIRTY);
3277c478bd9Sstevel@tonic-gate return (RET_SUCCESS);
3287c478bd9Sstevel@tonic-gate }
3297c478bd9Sstevel@tonic-gate deleted = 1;
3307c478bd9Sstevel@tonic-gate } while (e->index < NEXTINDEX(h) && __bt_cmp(t, key, e) == 0);
3317c478bd9Sstevel@tonic-gate
3327c478bd9Sstevel@tonic-gate /* Check for right-hand edge of the page. */
3337c478bd9Sstevel@tonic-gate if (e->index == NEXTINDEX(h))
3347c478bd9Sstevel@tonic-gate redo = 1;
3357c478bd9Sstevel@tonic-gate
3367c478bd9Sstevel@tonic-gate /* Delete from the key to the beginning of the page. */
3377c478bd9Sstevel@tonic-gate while (e->index-- > 0) {
3387c478bd9Sstevel@tonic-gate if (__bt_cmp(t, key, e) != 0)
3397c478bd9Sstevel@tonic-gate break;
3407c478bd9Sstevel@tonic-gate if (__bt_dleaf(t, key, h, e->index) == RET_ERROR) {
3417c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, 0);
3427c478bd9Sstevel@tonic-gate return (RET_ERROR);
3437c478bd9Sstevel@tonic-gate }
3447c478bd9Sstevel@tonic-gate if (e->index == 0)
3457c478bd9Sstevel@tonic-gate redo = 1;
3467c478bd9Sstevel@tonic-gate }
3477c478bd9Sstevel@tonic-gate
3487c478bd9Sstevel@tonic-gate /* Check for an empty page. */
3497c478bd9Sstevel@tonic-gate if (NEXTINDEX(h) == 0) {
3507c478bd9Sstevel@tonic-gate if (__bt_pdelete(t, h))
3517c478bd9Sstevel@tonic-gate return (RET_ERROR);
3527c478bd9Sstevel@tonic-gate goto loop;
3537c478bd9Sstevel@tonic-gate }
3547c478bd9Sstevel@tonic-gate
3557c478bd9Sstevel@tonic-gate /* Put the page. */
3567c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, MPOOL_DIRTY);
3577c478bd9Sstevel@tonic-gate
3587c478bd9Sstevel@tonic-gate if (redo)
3597c478bd9Sstevel@tonic-gate goto loop;
3607c478bd9Sstevel@tonic-gate return (RET_SUCCESS);
3617c478bd9Sstevel@tonic-gate }
3627c478bd9Sstevel@tonic-gate
3637c478bd9Sstevel@tonic-gate /*
3647c478bd9Sstevel@tonic-gate * __bt_pdelete --
3657c478bd9Sstevel@tonic-gate * Delete a single page from the tree.
3667c478bd9Sstevel@tonic-gate *
3677c478bd9Sstevel@tonic-gate * Parameters:
3687c478bd9Sstevel@tonic-gate * t: tree
3697c478bd9Sstevel@tonic-gate * h: leaf page
3707c478bd9Sstevel@tonic-gate *
3717c478bd9Sstevel@tonic-gate * Returns:
3727c478bd9Sstevel@tonic-gate * RET_SUCCESS, RET_ERROR.
3737c478bd9Sstevel@tonic-gate *
3747c478bd9Sstevel@tonic-gate * Side-effects:
3757c478bd9Sstevel@tonic-gate * mpool_put's the page
3767c478bd9Sstevel@tonic-gate */
3777c478bd9Sstevel@tonic-gate static int
__bt_pdelete(t,h)3787c478bd9Sstevel@tonic-gate __bt_pdelete(t, h)
3797c478bd9Sstevel@tonic-gate BTREE *t;
3807c478bd9Sstevel@tonic-gate PAGE *h;
3817c478bd9Sstevel@tonic-gate {
3827c478bd9Sstevel@tonic-gate BINTERNAL *bi;
3837c478bd9Sstevel@tonic-gate PAGE *pg;
3847c478bd9Sstevel@tonic-gate EPGNO *parent;
38556a424ccSmp indx_t cnt, idx, *ip, offset;
3867c478bd9Sstevel@tonic-gate u_int32_t nksize;
3877c478bd9Sstevel@tonic-gate char *from;
3887c478bd9Sstevel@tonic-gate
3897c478bd9Sstevel@tonic-gate /*
3907c478bd9Sstevel@tonic-gate * Walk the parent page stack -- a LIFO stack of the pages that were
3917c478bd9Sstevel@tonic-gate * traversed when we searched for the page where the delete occurred.
3927c478bd9Sstevel@tonic-gate * Each stack entry is a page number and a page index offset. The
3937c478bd9Sstevel@tonic-gate * offset is for the page traversed on the search. We've just deleted
3947c478bd9Sstevel@tonic-gate * a page, so we have to delete the key from the parent page.
3957c478bd9Sstevel@tonic-gate *
3967c478bd9Sstevel@tonic-gate * If the delete from the parent page makes it empty, this process may
3977c478bd9Sstevel@tonic-gate * continue all the way up the tree. We stop if we reach the root page
3987c478bd9Sstevel@tonic-gate * (which is never deleted, it's just not worth the effort) or if the
3997c478bd9Sstevel@tonic-gate * delete does not empty the page.
4007c478bd9Sstevel@tonic-gate */
4017c478bd9Sstevel@tonic-gate while ((parent = BT_POP(t)) != NULL) {
4027c478bd9Sstevel@tonic-gate /* Get the parent page. */
4037c478bd9Sstevel@tonic-gate if ((pg = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
4047c478bd9Sstevel@tonic-gate return (RET_ERROR);
405*1da57d55SToomas Soome
40656a424ccSmp idx = parent->index;
40756a424ccSmp bi = GETBINTERNAL(pg, idx);
4087c478bd9Sstevel@tonic-gate
4097c478bd9Sstevel@tonic-gate /* Free any overflow pages. */
4107c478bd9Sstevel@tonic-gate if (bi->flags & P_BIGKEY &&
4117c478bd9Sstevel@tonic-gate __ovfl_delete(t, bi->bytes) == RET_ERROR) {
4127c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, pg, 0);
4137c478bd9Sstevel@tonic-gate return (RET_ERROR);
4147c478bd9Sstevel@tonic-gate }
4157c478bd9Sstevel@tonic-gate
4167c478bd9Sstevel@tonic-gate /*
4177c478bd9Sstevel@tonic-gate * Free the parent if it has only the one key and it's not the
4187c478bd9Sstevel@tonic-gate * root page. If it's the rootpage, turn it back into an empty
4197c478bd9Sstevel@tonic-gate * leaf page.
4207c478bd9Sstevel@tonic-gate */
4217c478bd9Sstevel@tonic-gate if (NEXTINDEX(pg) == 1)
4227c478bd9Sstevel@tonic-gate if (pg->pgno == P_ROOT) {
4237c478bd9Sstevel@tonic-gate pg->lower = BTDATAOFF;
4247c478bd9Sstevel@tonic-gate pg->upper = t->bt_psize;
4257c478bd9Sstevel@tonic-gate pg->flags = P_BLEAF;
4267c478bd9Sstevel@tonic-gate } else {
4277c478bd9Sstevel@tonic-gate if (__bt_relink(t, pg) || __bt_free(t, pg))
4287c478bd9Sstevel@tonic-gate return (RET_ERROR);
4297c478bd9Sstevel@tonic-gate continue;
4307c478bd9Sstevel@tonic-gate }
4317c478bd9Sstevel@tonic-gate else {
4327c478bd9Sstevel@tonic-gate /* Pack remaining key items at the end of the page. */
4337c478bd9Sstevel@tonic-gate nksize = NBINTERNAL(bi->ksize);
4347c478bd9Sstevel@tonic-gate from = (char *)pg + pg->upper;
4357c478bd9Sstevel@tonic-gate memmove(from + nksize, from, (char *)bi - from);
4367c478bd9Sstevel@tonic-gate pg->upper += nksize;
4377c478bd9Sstevel@tonic-gate
4387c478bd9Sstevel@tonic-gate /* Adjust indices' offsets, shift the indices down. */
43956a424ccSmp offset = pg->linp[idx];
44056a424ccSmp for (cnt = idx, ip = &pg->linp[0]; cnt--; ++ip)
4417c478bd9Sstevel@tonic-gate if (ip[0] < offset)
4427c478bd9Sstevel@tonic-gate ip[0] += nksize;
44356a424ccSmp for (cnt = NEXTINDEX(pg) - idx; --cnt; ++ip)
4447c478bd9Sstevel@tonic-gate ip[0] = ip[1] < offset ? ip[1] + nksize : ip[1];
4457c478bd9Sstevel@tonic-gate pg->lower -= sizeof(indx_t);
4467c478bd9Sstevel@tonic-gate }
4477c478bd9Sstevel@tonic-gate
4487c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
4497c478bd9Sstevel@tonic-gate break;
4507c478bd9Sstevel@tonic-gate }
4517c478bd9Sstevel@tonic-gate
4527c478bd9Sstevel@tonic-gate /* Free the leaf page, as long as it wasn't the root. */
4537c478bd9Sstevel@tonic-gate if (h->pgno == P_ROOT) {
4547c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, h, MPOOL_DIRTY);
4557c478bd9Sstevel@tonic-gate return (RET_SUCCESS);
4567c478bd9Sstevel@tonic-gate }
4577c478bd9Sstevel@tonic-gate return (__bt_relink(t, h) || __bt_free(t, h));
4587c478bd9Sstevel@tonic-gate }
4597c478bd9Sstevel@tonic-gate
4607c478bd9Sstevel@tonic-gate /*
4617c478bd9Sstevel@tonic-gate * __bt_dleaf --
4627c478bd9Sstevel@tonic-gate * Delete a single record from a leaf page.
4637c478bd9Sstevel@tonic-gate *
4647c478bd9Sstevel@tonic-gate * Parameters:
4657c478bd9Sstevel@tonic-gate * t: tree
4667c478bd9Sstevel@tonic-gate * key: referenced key
4677c478bd9Sstevel@tonic-gate * h: page
46856a424ccSmp * idx: index on page to delete
4697c478bd9Sstevel@tonic-gate *
4707c478bd9Sstevel@tonic-gate * Returns:
4717c478bd9Sstevel@tonic-gate * RET_SUCCESS, RET_ERROR.
4727c478bd9Sstevel@tonic-gate */
4737c478bd9Sstevel@tonic-gate int
__bt_dleaf(t,key,h,idx)47456a424ccSmp __bt_dleaf(t, key, h, idx)
4757c478bd9Sstevel@tonic-gate BTREE *t;
4767c478bd9Sstevel@tonic-gate const DBT *key;
4777c478bd9Sstevel@tonic-gate PAGE *h;
47856a424ccSmp u_int idx;
4797c478bd9Sstevel@tonic-gate {
4807c478bd9Sstevel@tonic-gate BLEAF *bl;
4817c478bd9Sstevel@tonic-gate indx_t cnt, *ip, offset;
4827c478bd9Sstevel@tonic-gate u_int32_t nbytes;
4837c478bd9Sstevel@tonic-gate void *to;
4847c478bd9Sstevel@tonic-gate char *from;
4857c478bd9Sstevel@tonic-gate
4867c478bd9Sstevel@tonic-gate /* If this record is referenced by the cursor, delete the cursor. */
4877c478bd9Sstevel@tonic-gate if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
4887c478bd9Sstevel@tonic-gate !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
48956a424ccSmp t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index == idx &&
49056a424ccSmp __bt_curdel(t, key, h, idx))
4917c478bd9Sstevel@tonic-gate return (RET_ERROR);
4927c478bd9Sstevel@tonic-gate
4937c478bd9Sstevel@tonic-gate /* If the entry uses overflow pages, make them available for reuse. */
49456a424ccSmp to = bl = GETBLEAF(h, idx);
4957c478bd9Sstevel@tonic-gate if (bl->flags & P_BIGKEY && __ovfl_delete(t, bl->bytes) == RET_ERROR)
4967c478bd9Sstevel@tonic-gate return (RET_ERROR);
4977c478bd9Sstevel@tonic-gate if (bl->flags & P_BIGDATA &&
4987c478bd9Sstevel@tonic-gate __ovfl_delete(t, bl->bytes + bl->ksize) == RET_ERROR)
4997c478bd9Sstevel@tonic-gate return (RET_ERROR);
5007c478bd9Sstevel@tonic-gate
5017c478bd9Sstevel@tonic-gate /* Pack the remaining key/data items at the end of the page. */
5027c478bd9Sstevel@tonic-gate nbytes = NBLEAF(bl);
5037c478bd9Sstevel@tonic-gate from = (char *)h + h->upper;
5047c478bd9Sstevel@tonic-gate memmove(from + nbytes, from, (char *)to - from);
5057c478bd9Sstevel@tonic-gate h->upper += nbytes;
5067c478bd9Sstevel@tonic-gate
5077c478bd9Sstevel@tonic-gate /* Adjust the indices' offsets, shift the indices down. */
50856a424ccSmp offset = h->linp[idx];
50956a424ccSmp for (cnt = idx, ip = &h->linp[0]; cnt--; ++ip)
5107c478bd9Sstevel@tonic-gate if (ip[0] < offset)
5117c478bd9Sstevel@tonic-gate ip[0] += nbytes;
51256a424ccSmp for (cnt = NEXTINDEX(h) - idx; --cnt; ++ip)
5137c478bd9Sstevel@tonic-gate ip[0] = ip[1] < offset ? ip[1] + nbytes : ip[1];
5147c478bd9Sstevel@tonic-gate h->lower -= sizeof(indx_t);
5157c478bd9Sstevel@tonic-gate
5167c478bd9Sstevel@tonic-gate /* If the cursor is on this page, adjust it as necessary. */
5177c478bd9Sstevel@tonic-gate if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
5187c478bd9Sstevel@tonic-gate !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
51956a424ccSmp t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index > idx)
5207c478bd9Sstevel@tonic-gate --t->bt_cursor.pg.index;
5217c478bd9Sstevel@tonic-gate
5227c478bd9Sstevel@tonic-gate return (RET_SUCCESS);
5237c478bd9Sstevel@tonic-gate }
5247c478bd9Sstevel@tonic-gate
5257c478bd9Sstevel@tonic-gate /*
5267c478bd9Sstevel@tonic-gate * __bt_curdel --
5277c478bd9Sstevel@tonic-gate * Delete the cursor.
5287c478bd9Sstevel@tonic-gate *
5297c478bd9Sstevel@tonic-gate * Parameters:
5307c478bd9Sstevel@tonic-gate * t: tree
5317c478bd9Sstevel@tonic-gate * key: referenced key (or NULL)
5327c478bd9Sstevel@tonic-gate * h: page
53356a424ccSmp * idx: idx on page to delete
5347c478bd9Sstevel@tonic-gate *
5357c478bd9Sstevel@tonic-gate * Returns:
5367c478bd9Sstevel@tonic-gate * RET_SUCCESS, RET_ERROR.
5377c478bd9Sstevel@tonic-gate */
5387c478bd9Sstevel@tonic-gate static int
__bt_curdel(t,key,h,idx)53956a424ccSmp __bt_curdel(t, key, h, idx)
5407c478bd9Sstevel@tonic-gate BTREE *t;
5417c478bd9Sstevel@tonic-gate const DBT *key;
5427c478bd9Sstevel@tonic-gate PAGE *h;
54356a424ccSmp u_int idx;
5447c478bd9Sstevel@tonic-gate {
5457c478bd9Sstevel@tonic-gate CURSOR *c;
5467c478bd9Sstevel@tonic-gate EPG e;
5477c478bd9Sstevel@tonic-gate PAGE *pg;
5487c478bd9Sstevel@tonic-gate int curcopy, status;
5497c478bd9Sstevel@tonic-gate
5507c478bd9Sstevel@tonic-gate /*
5517c478bd9Sstevel@tonic-gate * If there are duplicates, move forward or backward to one.
5527c478bd9Sstevel@tonic-gate * Otherwise, copy the key into the cursor area.
5537c478bd9Sstevel@tonic-gate */
5547c478bd9Sstevel@tonic-gate c = &t->bt_cursor;
5557c478bd9Sstevel@tonic-gate F_CLR(c, CURS_AFTER | CURS_BEFORE | CURS_ACQUIRE);
5567c478bd9Sstevel@tonic-gate
5577c478bd9Sstevel@tonic-gate curcopy = 0;
5587c478bd9Sstevel@tonic-gate if (!F_ISSET(t, B_NODUPS)) {
5597c478bd9Sstevel@tonic-gate /*
5607c478bd9Sstevel@tonic-gate * We're going to have to do comparisons. If we weren't
5617c478bd9Sstevel@tonic-gate * provided a copy of the key, i.e. the user is deleting
5627c478bd9Sstevel@tonic-gate * the current cursor position, get one.
5637c478bd9Sstevel@tonic-gate */
5647c478bd9Sstevel@tonic-gate if (key == NULL) {
5657c478bd9Sstevel@tonic-gate e.page = h;
56656a424ccSmp e.index = idx;
5677c478bd9Sstevel@tonic-gate if ((status = __bt_ret(t, &e,
5687c478bd9Sstevel@tonic-gate &c->key, &c->key, NULL, NULL, 1)) != RET_SUCCESS)
5697c478bd9Sstevel@tonic-gate return (status);
5707c478bd9Sstevel@tonic-gate curcopy = 1;
5717c478bd9Sstevel@tonic-gate key = &c->key;
5727c478bd9Sstevel@tonic-gate }
5737c478bd9Sstevel@tonic-gate /* Check previous key, if not at the beginning of the page. */
574*1da57d55SToomas Soome if (idx > 0) {
5757c478bd9Sstevel@tonic-gate e.page = h;
57656a424ccSmp e.index = idx - 1;
5777c478bd9Sstevel@tonic-gate if (__bt_cmp(t, key, &e) == 0) {
5787c478bd9Sstevel@tonic-gate F_SET(c, CURS_BEFORE);
5797c478bd9Sstevel@tonic-gate goto dup2;
5807c478bd9Sstevel@tonic-gate }
5817c478bd9Sstevel@tonic-gate }
5827c478bd9Sstevel@tonic-gate /* Check next key, if not at the end of the page. */
58356a424ccSmp if (idx < NEXTINDEX(h) - 1) {
5847c478bd9Sstevel@tonic-gate e.page = h;
58556a424ccSmp e.index = idx + 1;
5867c478bd9Sstevel@tonic-gate if (__bt_cmp(t, key, &e) == 0) {
5877c478bd9Sstevel@tonic-gate F_SET(c, CURS_AFTER);
5887c478bd9Sstevel@tonic-gate goto dup2;
5897c478bd9Sstevel@tonic-gate }
5907c478bd9Sstevel@tonic-gate }
5917c478bd9Sstevel@tonic-gate /* Check previous key if at the beginning of the page. */
59256a424ccSmp if (idx == 0 && h->prevpg != P_INVALID) {
5937c478bd9Sstevel@tonic-gate if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
5947c478bd9Sstevel@tonic-gate return (RET_ERROR);
5957c478bd9Sstevel@tonic-gate e.page = pg;
5967c478bd9Sstevel@tonic-gate e.index = NEXTINDEX(pg) - 1;
5977c478bd9Sstevel@tonic-gate if (__bt_cmp(t, key, &e) == 0) {
5987c478bd9Sstevel@tonic-gate F_SET(c, CURS_BEFORE);
5997c478bd9Sstevel@tonic-gate goto dup1;
6007c478bd9Sstevel@tonic-gate }
6017c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, pg, 0);
6027c478bd9Sstevel@tonic-gate }
6037c478bd9Sstevel@tonic-gate /* Check next key if at the end of the page. */
60456a424ccSmp if (idx == NEXTINDEX(h) - 1 && h->nextpg != P_INVALID) {
6057c478bd9Sstevel@tonic-gate if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
6067c478bd9Sstevel@tonic-gate return (RET_ERROR);
6077c478bd9Sstevel@tonic-gate e.page = pg;
6087c478bd9Sstevel@tonic-gate e.index = 0;
6097c478bd9Sstevel@tonic-gate if (__bt_cmp(t, key, &e) == 0) {
6107c478bd9Sstevel@tonic-gate F_SET(c, CURS_AFTER);
6117c478bd9Sstevel@tonic-gate dup1: mpool_put(t->bt_mp, pg, 0);
6127c478bd9Sstevel@tonic-gate dup2: c->pg.pgno = e.page->pgno;
6137c478bd9Sstevel@tonic-gate c->pg.index = e.index;
6147c478bd9Sstevel@tonic-gate return (RET_SUCCESS);
6157c478bd9Sstevel@tonic-gate }
6167c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, pg, 0);
6177c478bd9Sstevel@tonic-gate }
6187c478bd9Sstevel@tonic-gate }
6197c478bd9Sstevel@tonic-gate e.page = h;
62056a424ccSmp e.index = idx;
6217c478bd9Sstevel@tonic-gate if (curcopy || (status =
6227c478bd9Sstevel@tonic-gate __bt_ret(t, &e, &c->key, &c->key, NULL, NULL, 1)) == RET_SUCCESS) {
6237c478bd9Sstevel@tonic-gate F_SET(c, CURS_ACQUIRE);
6247c478bd9Sstevel@tonic-gate return (RET_SUCCESS);
6257c478bd9Sstevel@tonic-gate }
6267c478bd9Sstevel@tonic-gate return (status);
6277c478bd9Sstevel@tonic-gate }
6287c478bd9Sstevel@tonic-gate
6297c478bd9Sstevel@tonic-gate /*
6307c478bd9Sstevel@tonic-gate * __bt_relink --
6317c478bd9Sstevel@tonic-gate * Link around a deleted page.
6327c478bd9Sstevel@tonic-gate *
6337c478bd9Sstevel@tonic-gate * Parameters:
6347c478bd9Sstevel@tonic-gate * t: tree
6357c478bd9Sstevel@tonic-gate * h: page to be deleted
6367c478bd9Sstevel@tonic-gate */
6377c478bd9Sstevel@tonic-gate static int
__bt_relink(t,h)6387c478bd9Sstevel@tonic-gate __bt_relink(t, h)
6397c478bd9Sstevel@tonic-gate BTREE *t;
6407c478bd9Sstevel@tonic-gate PAGE *h;
6417c478bd9Sstevel@tonic-gate {
6427c478bd9Sstevel@tonic-gate PAGE *pg;
6437c478bd9Sstevel@tonic-gate
6447c478bd9Sstevel@tonic-gate if (h->nextpg != P_INVALID) {
6457c478bd9Sstevel@tonic-gate if ((pg = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL)
6467c478bd9Sstevel@tonic-gate return (RET_ERROR);
6477c478bd9Sstevel@tonic-gate pg->prevpg = h->prevpg;
6487c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
6497c478bd9Sstevel@tonic-gate }
6507c478bd9Sstevel@tonic-gate if (h->prevpg != P_INVALID) {
6517c478bd9Sstevel@tonic-gate if ((pg = mpool_get(t->bt_mp, h->prevpg, 0)) == NULL)
6527c478bd9Sstevel@tonic-gate return (RET_ERROR);
6537c478bd9Sstevel@tonic-gate pg->nextpg = h->nextpg;
6547c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, pg, MPOOL_DIRTY);
6557c478bd9Sstevel@tonic-gate }
6567c478bd9Sstevel@tonic-gate return (0);
6577c478bd9Sstevel@tonic-gate }
658