17c478bd9Sstevel@tonic-gate /*-
27c478bd9Sstevel@tonic-gate  * Copyright (c) 1990, 1993, 1994
37c478bd9Sstevel@tonic-gate  *	The Regents of the University of California.  All rights reserved.
47c478bd9Sstevel@tonic-gate  *
57c478bd9Sstevel@tonic-gate  * This code is derived from software contributed to Berkeley by
67c478bd9Sstevel@tonic-gate  * Mike Olson.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * Redistribution and use in source and binary forms, with or without
97c478bd9Sstevel@tonic-gate  * modification, are permitted provided that the following conditions
107c478bd9Sstevel@tonic-gate  * are met:
117c478bd9Sstevel@tonic-gate  * 1. Redistributions of source code must retain the above copyright
127c478bd9Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer.
137c478bd9Sstevel@tonic-gate  * 2. Redistributions in binary form must reproduce the above copyright
147c478bd9Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer in the
157c478bd9Sstevel@tonic-gate  *    documentation and/or other materials provided with the distribution.
167c478bd9Sstevel@tonic-gate  * 3. All advertising materials mentioning features or use of this software
177c478bd9Sstevel@tonic-gate  *    must display the following acknowledgement:
187c478bd9Sstevel@tonic-gate  *	This product includes software developed by the University of
197c478bd9Sstevel@tonic-gate  *	California, Berkeley and its contributors.
207c478bd9Sstevel@tonic-gate  * 4. Neither the name of the University nor the names of its contributors
217c478bd9Sstevel@tonic-gate  *    may be used to endorse or promote products derived from this software
227c478bd9Sstevel@tonic-gate  *    without specific prior written permission.
237c478bd9Sstevel@tonic-gate  *
247c478bd9Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
257c478bd9Sstevel@tonic-gate  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
267c478bd9Sstevel@tonic-gate  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
277c478bd9Sstevel@tonic-gate  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
287c478bd9Sstevel@tonic-gate  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
297c478bd9Sstevel@tonic-gate  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
307c478bd9Sstevel@tonic-gate  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
317c478bd9Sstevel@tonic-gate  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
327c478bd9Sstevel@tonic-gate  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
337c478bd9Sstevel@tonic-gate  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
347c478bd9Sstevel@tonic-gate  * SUCH DAMAGE.
357c478bd9Sstevel@tonic-gate  */
367c478bd9Sstevel@tonic-gate 
377c478bd9Sstevel@tonic-gate #if defined(LIBC_SCCS) && !defined(lint)
387c478bd9Sstevel@tonic-gate static char sccsid[] = "@(#)bt_overflow.c	8.5 (Berkeley) 7/16/94";
397c478bd9Sstevel@tonic-gate #endif /* LIBC_SCCS and not lint */
407c478bd9Sstevel@tonic-gate 
417c478bd9Sstevel@tonic-gate #include <sys/param.h>
427c478bd9Sstevel@tonic-gate 
437c478bd9Sstevel@tonic-gate #include <stdio.h>
447c478bd9Sstevel@tonic-gate #include <stdlib.h>
457c478bd9Sstevel@tonic-gate #include <string.h>
467c478bd9Sstevel@tonic-gate 
477c478bd9Sstevel@tonic-gate #include "db-int.h"
487c478bd9Sstevel@tonic-gate #include "btree.h"
497c478bd9Sstevel@tonic-gate 
507c478bd9Sstevel@tonic-gate /*
517c478bd9Sstevel@tonic-gate  * Big key/data code.
527c478bd9Sstevel@tonic-gate  *
537c478bd9Sstevel@tonic-gate  * Big key and data entries are stored on linked lists of pages.  The initial
547c478bd9Sstevel@tonic-gate  * reference is byte string stored with the key or data and is the page number
557c478bd9Sstevel@tonic-gate  * and size.  The actual record is stored in a chain of pages linked by the
567c478bd9Sstevel@tonic-gate  * nextpg field of the PAGE header.
577c478bd9Sstevel@tonic-gate  *
587c478bd9Sstevel@tonic-gate  * The first page of the chain has a special property.  If the record is used
597c478bd9Sstevel@tonic-gate  * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set
607c478bd9Sstevel@tonic-gate  * in the header.
617c478bd9Sstevel@tonic-gate  *
627c478bd9Sstevel@tonic-gate  * XXX
637c478bd9Sstevel@tonic-gate  * A single DBT is written to each chain, so a lot of space on the last page
647c478bd9Sstevel@tonic-gate  * is wasted.  This is a fairly major bug for some data sets.
657c478bd9Sstevel@tonic-gate  */
667c478bd9Sstevel@tonic-gate 
677c478bd9Sstevel@tonic-gate /*
687c478bd9Sstevel@tonic-gate  * __OVFL_GET -- Get an overflow key/data item.
697c478bd9Sstevel@tonic-gate  *
707c478bd9Sstevel@tonic-gate  * Parameters:
717c478bd9Sstevel@tonic-gate  *	t:	tree
727c478bd9Sstevel@tonic-gate  *	p:	pointer to { db_pgno_t, u_int32_t }
737c478bd9Sstevel@tonic-gate  *	buf:	storage address
747c478bd9Sstevel@tonic-gate  *	bufsz:	storage size
757c478bd9Sstevel@tonic-gate  *
767c478bd9Sstevel@tonic-gate  * Returns:
777c478bd9Sstevel@tonic-gate  *	RET_ERROR, RET_SUCCESS
787c478bd9Sstevel@tonic-gate  */
797c478bd9Sstevel@tonic-gate int
__ovfl_get(t,p,ssz,buf,bufsz)807c478bd9Sstevel@tonic-gate __ovfl_get(t, p, ssz, buf, bufsz)
817c478bd9Sstevel@tonic-gate 	BTREE *t;
827c478bd9Sstevel@tonic-gate 	void *p;
837c478bd9Sstevel@tonic-gate 	size_t *ssz;
847c478bd9Sstevel@tonic-gate 	void **buf;
857c478bd9Sstevel@tonic-gate 	size_t *bufsz;
867c478bd9Sstevel@tonic-gate {
877c478bd9Sstevel@tonic-gate 	PAGE *h;
887c478bd9Sstevel@tonic-gate 	db_pgno_t pg;
897c478bd9Sstevel@tonic-gate 	size_t nb, plen;
907c478bd9Sstevel@tonic-gate 	u_int32_t sz;
917c478bd9Sstevel@tonic-gate 
927c478bd9Sstevel@tonic-gate 	memmove(&pg, p, sizeof(db_pgno_t));
937c478bd9Sstevel@tonic-gate 	memmove(&sz, (char *)p + sizeof(db_pgno_t), sizeof(u_int32_t));
947c478bd9Sstevel@tonic-gate 	*ssz = sz;
957c478bd9Sstevel@tonic-gate 
9656a424ccSmp #ifdef DEBUG
977c478bd9Sstevel@tonic-gate 	if (pg == P_INVALID || sz == 0)
987c478bd9Sstevel@tonic-gate 		abort();
997c478bd9Sstevel@tonic-gate #endif
1007c478bd9Sstevel@tonic-gate 	/* Make the buffer bigger as necessary. */
1017c478bd9Sstevel@tonic-gate 	if (*bufsz < sz) {
1027c478bd9Sstevel@tonic-gate 		*buf = (char *)(*buf == NULL ? malloc(sz) : realloc(*buf, sz));
1037c478bd9Sstevel@tonic-gate 		if (*buf == NULL)
1047c478bd9Sstevel@tonic-gate 			return (RET_ERROR);
1057c478bd9Sstevel@tonic-gate 		*bufsz = sz;
1067c478bd9Sstevel@tonic-gate 	}
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate 	/*
1097c478bd9Sstevel@tonic-gate 	 * Step through the linked list of pages, copying the data on each one
1107c478bd9Sstevel@tonic-gate 	 * into the buffer.  Never copy more than the data's length.
1117c478bd9Sstevel@tonic-gate 	 */
1127c478bd9Sstevel@tonic-gate 	plen = t->bt_psize - BTDATAOFF;
1137c478bd9Sstevel@tonic-gate 	for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) {
1147c478bd9Sstevel@tonic-gate 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
1157c478bd9Sstevel@tonic-gate 			return (RET_ERROR);
1167c478bd9Sstevel@tonic-gate 
1177c478bd9Sstevel@tonic-gate 		nb = MIN(sz, plen);
1187c478bd9Sstevel@tonic-gate 		memmove(p, (char *)h + BTDATAOFF, nb);
1197c478bd9Sstevel@tonic-gate 		mpool_put(t->bt_mp, h, 0);
1207c478bd9Sstevel@tonic-gate 
1217c478bd9Sstevel@tonic-gate 		if ((sz -= nb) == 0)
1227c478bd9Sstevel@tonic-gate 			break;
1237c478bd9Sstevel@tonic-gate 	}
1247c478bd9Sstevel@tonic-gate 	return (RET_SUCCESS);
1257c478bd9Sstevel@tonic-gate }
1267c478bd9Sstevel@tonic-gate 
1277c478bd9Sstevel@tonic-gate /*
1287c478bd9Sstevel@tonic-gate  * __OVFL_PUT -- Store an overflow key/data item.
1297c478bd9Sstevel@tonic-gate  *
1307c478bd9Sstevel@tonic-gate  * Parameters:
1317c478bd9Sstevel@tonic-gate  *	t:	tree
1327c478bd9Sstevel@tonic-gate  *	data:	DBT to store
1337c478bd9Sstevel@tonic-gate  *	pgno:	storage page number
1347c478bd9Sstevel@tonic-gate  *
1357c478bd9Sstevel@tonic-gate  * Returns:
1367c478bd9Sstevel@tonic-gate  *	RET_ERROR, RET_SUCCESS
1377c478bd9Sstevel@tonic-gate  */
1387c478bd9Sstevel@tonic-gate int
__ovfl_put(t,dbt,pg)1397c478bd9Sstevel@tonic-gate __ovfl_put(t, dbt, pg)
1407c478bd9Sstevel@tonic-gate 	BTREE *t;
1417c478bd9Sstevel@tonic-gate 	const DBT *dbt;
1427c478bd9Sstevel@tonic-gate 	db_pgno_t *pg;
1437c478bd9Sstevel@tonic-gate {
1447c478bd9Sstevel@tonic-gate 	PAGE *h, *last;
1457c478bd9Sstevel@tonic-gate 	void *p;
1467c478bd9Sstevel@tonic-gate 	db_pgno_t npg;
1477c478bd9Sstevel@tonic-gate 	size_t nb, plen;
1487c478bd9Sstevel@tonic-gate 	u_int32_t sz;
1497c478bd9Sstevel@tonic-gate 
1507c478bd9Sstevel@tonic-gate 	/*
1517c478bd9Sstevel@tonic-gate 	 * Allocate pages and copy the key/data record into them.  Store the
1527c478bd9Sstevel@tonic-gate 	 * number of the first page in the chain.
1537c478bd9Sstevel@tonic-gate 	 */
1547c478bd9Sstevel@tonic-gate 	plen = t->bt_psize - BTDATAOFF;
1557c478bd9Sstevel@tonic-gate 	for (last = NULL, p = dbt->data, sz = dbt->size;;
1567c478bd9Sstevel@tonic-gate 	    p = (char *)p + plen, last = h) {
1577c478bd9Sstevel@tonic-gate 		if ((h = __bt_new(t, &npg)) == NULL)
1587c478bd9Sstevel@tonic-gate 			return (RET_ERROR);
1597c478bd9Sstevel@tonic-gate 
1607c478bd9Sstevel@tonic-gate 		h->pgno = npg;
1617c478bd9Sstevel@tonic-gate 		h->nextpg = h->prevpg = P_INVALID;
1627c478bd9Sstevel@tonic-gate 		h->flags = P_OVERFLOW;
1637c478bd9Sstevel@tonic-gate 		h->lower = h->upper = 0;
1647c478bd9Sstevel@tonic-gate 
1657c478bd9Sstevel@tonic-gate 		nb = MIN(sz, plen);
1667c478bd9Sstevel@tonic-gate 		memmove((char *)h + BTDATAOFF, p, nb);
1677c478bd9Sstevel@tonic-gate 
1687c478bd9Sstevel@tonic-gate 		if (last) {
1697c478bd9Sstevel@tonic-gate 			last->nextpg = h->pgno;
1707c478bd9Sstevel@tonic-gate 			mpool_put(t->bt_mp, last, MPOOL_DIRTY);
1717c478bd9Sstevel@tonic-gate 		} else
1727c478bd9Sstevel@tonic-gate 			*pg = h->pgno;
1737c478bd9Sstevel@tonic-gate 
1747c478bd9Sstevel@tonic-gate 		if ((sz -= nb) == 0) {
1757c478bd9Sstevel@tonic-gate 			mpool_put(t->bt_mp, h, MPOOL_DIRTY);
1767c478bd9Sstevel@tonic-gate 			break;
1777c478bd9Sstevel@tonic-gate 		}
1787c478bd9Sstevel@tonic-gate 	}
1797c478bd9Sstevel@tonic-gate 	return (RET_SUCCESS);
1807c478bd9Sstevel@tonic-gate }
1817c478bd9Sstevel@tonic-gate 
1827c478bd9Sstevel@tonic-gate /*
1837c478bd9Sstevel@tonic-gate  * __OVFL_DELETE -- Delete an overflow chain.
1847c478bd9Sstevel@tonic-gate  *
1857c478bd9Sstevel@tonic-gate  * Parameters:
1867c478bd9Sstevel@tonic-gate  *	t:	tree
1877c478bd9Sstevel@tonic-gate  *	p:	pointer to { db_pgno_t, u_int32_t }
1887c478bd9Sstevel@tonic-gate  *
1897c478bd9Sstevel@tonic-gate  * Returns:
1907c478bd9Sstevel@tonic-gate  *	RET_ERROR, RET_SUCCESS
1917c478bd9Sstevel@tonic-gate  */
1927c478bd9Sstevel@tonic-gate int
__ovfl_delete(t,p)1937c478bd9Sstevel@tonic-gate __ovfl_delete(t, p)
1947c478bd9Sstevel@tonic-gate 	BTREE *t;
1957c478bd9Sstevel@tonic-gate 	void *p;
1967c478bd9Sstevel@tonic-gate {
1977c478bd9Sstevel@tonic-gate 	PAGE *h;
1987c478bd9Sstevel@tonic-gate 	db_pgno_t pg;
1997c478bd9Sstevel@tonic-gate 	size_t plen;
2007c478bd9Sstevel@tonic-gate 	u_int32_t sz;
2017c478bd9Sstevel@tonic-gate 
2027c478bd9Sstevel@tonic-gate 	memmove(&pg, p, sizeof(db_pgno_t));
2037c478bd9Sstevel@tonic-gate 	memmove(&sz, (char *)p + sizeof(db_pgno_t), sizeof(u_int32_t));
2047c478bd9Sstevel@tonic-gate 
20556a424ccSmp #ifdef DEBUG
2067c478bd9Sstevel@tonic-gate 	if (pg == P_INVALID || sz == 0)
2077c478bd9Sstevel@tonic-gate 		abort();
2087c478bd9Sstevel@tonic-gate #endif
2097c478bd9Sstevel@tonic-gate 	if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
2107c478bd9Sstevel@tonic-gate 		return (RET_ERROR);
2117c478bd9Sstevel@tonic-gate 
2127c478bd9Sstevel@tonic-gate 	/* Don't delete chains used by internal pages. */
2137c478bd9Sstevel@tonic-gate 	if (h->flags & P_PRESERVE) {
2147c478bd9Sstevel@tonic-gate 		mpool_put(t->bt_mp, h, 0);
2157c478bd9Sstevel@tonic-gate 		return (RET_SUCCESS);
2167c478bd9Sstevel@tonic-gate 	}
2177c478bd9Sstevel@tonic-gate 
2187c478bd9Sstevel@tonic-gate 	/* Step through the chain, calling the free routine for each page. */
2197c478bd9Sstevel@tonic-gate 	for (plen = t->bt_psize - BTDATAOFF;; sz -= plen) {
2207c478bd9Sstevel@tonic-gate 		pg = h->nextpg;
2217c478bd9Sstevel@tonic-gate 		__bt_free(t, h);
2227c478bd9Sstevel@tonic-gate 		if (sz <= plen)
2237c478bd9Sstevel@tonic-gate 			break;
2247c478bd9Sstevel@tonic-gate 		if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
2257c478bd9Sstevel@tonic-gate 			return (RET_ERROR);
2267c478bd9Sstevel@tonic-gate 	}
2277c478bd9Sstevel@tonic-gate 	return (RET_SUCCESS);
2287c478bd9Sstevel@tonic-gate }
229