17c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 27c478bd9Sstevel@tonic-gate 37c478bd9Sstevel@tonic-gate /*- 47c478bd9Sstevel@tonic-gate * Copyright (c) 1990, 1993, 1994 57c478bd9Sstevel@tonic-gate * The Regents of the University of California. All rights reserved. 67c478bd9Sstevel@tonic-gate * 77c478bd9Sstevel@tonic-gate * This code is derived from software contributed to Berkeley by 87c478bd9Sstevel@tonic-gate * Mike Olson. 97c478bd9Sstevel@tonic-gate * 107c478bd9Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without 117c478bd9Sstevel@tonic-gate * modification, are permitted provided that the following conditions 127c478bd9Sstevel@tonic-gate * are met: 137c478bd9Sstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright 147c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer. 157c478bd9Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright 167c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer in the 177c478bd9Sstevel@tonic-gate * documentation and/or other materials provided with the distribution. 187c478bd9Sstevel@tonic-gate * 3. All advertising materials mentioning features or use of this software 197c478bd9Sstevel@tonic-gate * must display the following acknowledgement: 207c478bd9Sstevel@tonic-gate * This product includes software developed by the University of 217c478bd9Sstevel@tonic-gate * California, Berkeley and its contributors. 227c478bd9Sstevel@tonic-gate * 4. Neither the name of the University nor the names of its contributors 237c478bd9Sstevel@tonic-gate * may be used to endorse or promote products derived from this software 247c478bd9Sstevel@tonic-gate * without specific prior written permission. 257c478bd9Sstevel@tonic-gate * 267c478bd9Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 277c478bd9Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 287c478bd9Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 297c478bd9Sstevel@tonic-gate * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 307c478bd9Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 317c478bd9Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 327c478bd9Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 337c478bd9Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 347c478bd9Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 357c478bd9Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 367c478bd9Sstevel@tonic-gate * SUCH DAMAGE. 377c478bd9Sstevel@tonic-gate */ 387c478bd9Sstevel@tonic-gate 397c478bd9Sstevel@tonic-gate #if defined(LIBC_SCCS) && !defined(lint) 407c478bd9Sstevel@tonic-gate static char sccsid[] = "@(#)bt_open.c 8.11 (Berkeley) 11/2/95"; 417c478bd9Sstevel@tonic-gate #endif /* LIBC_SCCS and not lint */ 427c478bd9Sstevel@tonic-gate 437c478bd9Sstevel@tonic-gate /* 447c478bd9Sstevel@tonic-gate * Implementation of btree access method for 4.4BSD. 457c478bd9Sstevel@tonic-gate * 467c478bd9Sstevel@tonic-gate * The design here was originally based on that of the btree access method 477c478bd9Sstevel@tonic-gate * used in the Postgres database system at UC Berkeley. This implementation 487c478bd9Sstevel@tonic-gate * is wholly independent of the Postgres code. 497c478bd9Sstevel@tonic-gate */ 507c478bd9Sstevel@tonic-gate 517c478bd9Sstevel@tonic-gate #include <sys/param.h> 527c478bd9Sstevel@tonic-gate #include <sys/stat.h> 537c478bd9Sstevel@tonic-gate 547c478bd9Sstevel@tonic-gate #include <errno.h> 557c478bd9Sstevel@tonic-gate #include <fcntl.h> 567c478bd9Sstevel@tonic-gate #include <limits.h> 577c478bd9Sstevel@tonic-gate #include <signal.h> 587c478bd9Sstevel@tonic-gate #include <stdio.h> 597c478bd9Sstevel@tonic-gate #include <stdlib.h> 607c478bd9Sstevel@tonic-gate #include <string.h> 617c478bd9Sstevel@tonic-gate #include <unistd.h> 627c478bd9Sstevel@tonic-gate 637c478bd9Sstevel@tonic-gate #include "db-int.h" 647c478bd9Sstevel@tonic-gate #include "btree.h" 657c478bd9Sstevel@tonic-gate 6656a424ccSmp #ifdef DEBUG 677c478bd9Sstevel@tonic-gate #undef MINPSIZE 687c478bd9Sstevel@tonic-gate #define MINPSIZE 128 697c478bd9Sstevel@tonic-gate #endif 707c478bd9Sstevel@tonic-gate 717c478bd9Sstevel@tonic-gate static int byteorder __P((void)); 727c478bd9Sstevel@tonic-gate static int nroot __P((BTREE *)); 737c478bd9Sstevel@tonic-gate static int tmp __P((void)); 747c478bd9Sstevel@tonic-gate 757c478bd9Sstevel@tonic-gate /* 767c478bd9Sstevel@tonic-gate * __BT_OPEN -- Open a btree. 777c478bd9Sstevel@tonic-gate * 787c478bd9Sstevel@tonic-gate * Creates and fills a DB struct, and calls the routine that actually 797c478bd9Sstevel@tonic-gate * opens the btree. 807c478bd9Sstevel@tonic-gate * 817c478bd9Sstevel@tonic-gate * Parameters: 827c478bd9Sstevel@tonic-gate * fname: filename (NULL for in-memory trees) 837c478bd9Sstevel@tonic-gate * flags: open flag bits 847c478bd9Sstevel@tonic-gate * mode: open permission bits 857c478bd9Sstevel@tonic-gate * b: BTREEINFO pointer 867c478bd9Sstevel@tonic-gate * 877c478bd9Sstevel@tonic-gate * Returns: 887c478bd9Sstevel@tonic-gate * NULL on failure, pointer to DB on success. 897c478bd9Sstevel@tonic-gate * 907c478bd9Sstevel@tonic-gate */ 917c478bd9Sstevel@tonic-gate DB * 927c478bd9Sstevel@tonic-gate __bt_open(fname, flags, mode, openinfo, dflags) 937c478bd9Sstevel@tonic-gate const char *fname; 947c478bd9Sstevel@tonic-gate int flags, mode, dflags; 957c478bd9Sstevel@tonic-gate const BTREEINFO *openinfo; 967c478bd9Sstevel@tonic-gate { 977c478bd9Sstevel@tonic-gate struct stat sb; 987c478bd9Sstevel@tonic-gate BTMETA m; 997c478bd9Sstevel@tonic-gate BTREE *t; 1007c478bd9Sstevel@tonic-gate BTREEINFO b; 1017c478bd9Sstevel@tonic-gate DB *dbp; 1027c478bd9Sstevel@tonic-gate db_pgno_t ncache; 1037c478bd9Sstevel@tonic-gate ssize_t nr; 1047c478bd9Sstevel@tonic-gate int machine_lorder; 1057c478bd9Sstevel@tonic-gate 1067c478bd9Sstevel@tonic-gate t = NULL; 1077c478bd9Sstevel@tonic-gate 1087c478bd9Sstevel@tonic-gate /* 1097c478bd9Sstevel@tonic-gate * Intention is to make sure all of the user's selections are okay 1107c478bd9Sstevel@tonic-gate * here and then use them without checking. Can't be complete, since 1117c478bd9Sstevel@tonic-gate * we don't know the right page size, lorder or flags until the backing 1127c478bd9Sstevel@tonic-gate * file is opened. Also, the file's page size can cause the cachesize 1137c478bd9Sstevel@tonic-gate * to change. 1147c478bd9Sstevel@tonic-gate */ 1157c478bd9Sstevel@tonic-gate machine_lorder = byteorder(); 1167c478bd9Sstevel@tonic-gate if (openinfo) { 1177c478bd9Sstevel@tonic-gate b = *openinfo; 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate /* Flags: R_DUP. */ 1207c478bd9Sstevel@tonic-gate if (b.flags & ~(R_DUP)) 1217c478bd9Sstevel@tonic-gate goto einval; 1227c478bd9Sstevel@tonic-gate 1237c478bd9Sstevel@tonic-gate /* 1247c478bd9Sstevel@tonic-gate * Page size must be indx_t aligned and >= MINPSIZE. Default 1257c478bd9Sstevel@tonic-gate * page size is set farther on, based on the underlying file 1267c478bd9Sstevel@tonic-gate * transfer size. 1277c478bd9Sstevel@tonic-gate */ 1287c478bd9Sstevel@tonic-gate if (b.psize && 1297c478bd9Sstevel@tonic-gate (b.psize < MINPSIZE || b.psize > MAX_PAGE_OFFSET + 1 || 13056a424ccSmp b.psize & (sizeof(indx_t) - 1))) 1317c478bd9Sstevel@tonic-gate goto einval; 1327c478bd9Sstevel@tonic-gate 1337c478bd9Sstevel@tonic-gate /* Minimum number of keys per page; absolute minimum is 2. */ 1347c478bd9Sstevel@tonic-gate if (b.minkeypage) { 1357c478bd9Sstevel@tonic-gate if (b.minkeypage < 2) 1367c478bd9Sstevel@tonic-gate goto einval; 1377c478bd9Sstevel@tonic-gate } else 1387c478bd9Sstevel@tonic-gate b.minkeypage = DEFMINKEYPAGE; 1397c478bd9Sstevel@tonic-gate 1407c478bd9Sstevel@tonic-gate /* If no comparison, use default comparison and prefix. */ 1417c478bd9Sstevel@tonic-gate if (b.compare == NULL) { 1427c478bd9Sstevel@tonic-gate b.compare = __bt_defcmp; 1437c478bd9Sstevel@tonic-gate if (b.prefix == NULL) 1447c478bd9Sstevel@tonic-gate b.prefix = __bt_defpfx; 1457c478bd9Sstevel@tonic-gate } 1467c478bd9Sstevel@tonic-gate 1477c478bd9Sstevel@tonic-gate if (b.lorder == 0) 1487c478bd9Sstevel@tonic-gate b.lorder = machine_lorder; 1497c478bd9Sstevel@tonic-gate } else { 1507c478bd9Sstevel@tonic-gate b.compare = __bt_defcmp; 1517c478bd9Sstevel@tonic-gate b.cachesize = 0; 1527c478bd9Sstevel@tonic-gate b.flags = 0; 1537c478bd9Sstevel@tonic-gate b.lorder = machine_lorder; 1547c478bd9Sstevel@tonic-gate b.minkeypage = DEFMINKEYPAGE; 1557c478bd9Sstevel@tonic-gate b.prefix = __bt_defpfx; 1567c478bd9Sstevel@tonic-gate b.psize = 0; 1577c478bd9Sstevel@tonic-gate } 1587c478bd9Sstevel@tonic-gate 1597c478bd9Sstevel@tonic-gate /* Check for the ubiquitous PDP-11. */ 1607c478bd9Sstevel@tonic-gate if (b.lorder != DB_BIG_ENDIAN && b.lorder != DB_LITTLE_ENDIAN) 1617c478bd9Sstevel@tonic-gate goto einval; 1627c478bd9Sstevel@tonic-gate 1637c478bd9Sstevel@tonic-gate /* Allocate and initialize DB and BTREE structures. */ 1647c478bd9Sstevel@tonic-gate if ((t = (BTREE *)malloc(sizeof(BTREE))) == NULL) 1657c478bd9Sstevel@tonic-gate goto err; 1667c478bd9Sstevel@tonic-gate memset(t, 0, sizeof(BTREE)); 1677c478bd9Sstevel@tonic-gate t->bt_fd = -1; /* Don't close unopened fd on error. */ 1687c478bd9Sstevel@tonic-gate t->bt_lorder = b.lorder; 1697c478bd9Sstevel@tonic-gate t->bt_order = NOT; 1707c478bd9Sstevel@tonic-gate t->bt_cmp = b.compare; 1717c478bd9Sstevel@tonic-gate t->bt_pfx = b.prefix; 1727c478bd9Sstevel@tonic-gate t->bt_rfd = -1; 1737c478bd9Sstevel@tonic-gate 1747c478bd9Sstevel@tonic-gate if ((t->bt_dbp = dbp = (DB *)malloc(sizeof(DB))) == NULL) 1757c478bd9Sstevel@tonic-gate goto err; 1767c478bd9Sstevel@tonic-gate memset(t->bt_dbp, 0, sizeof(DB)); 1777c478bd9Sstevel@tonic-gate if (t->bt_lorder != machine_lorder) 1787c478bd9Sstevel@tonic-gate F_SET(t, B_NEEDSWAP); 1797c478bd9Sstevel@tonic-gate 1807c478bd9Sstevel@tonic-gate dbp->type = DB_BTREE; 1817c478bd9Sstevel@tonic-gate dbp->internal = t; 1827c478bd9Sstevel@tonic-gate dbp->close = __bt_close; 1837c478bd9Sstevel@tonic-gate dbp->del = __bt_delete; 1847c478bd9Sstevel@tonic-gate dbp->fd = __bt_fd; 1857c478bd9Sstevel@tonic-gate dbp->get = __bt_get; 1867c478bd9Sstevel@tonic-gate dbp->put = __bt_put; 1877c478bd9Sstevel@tonic-gate dbp->seq = __bt_seq; 1887c478bd9Sstevel@tonic-gate dbp->sync = __bt_sync; 1897c478bd9Sstevel@tonic-gate 1907c478bd9Sstevel@tonic-gate /* 1917c478bd9Sstevel@tonic-gate * If no file name was supplied, this is an in-memory btree and we 1927c478bd9Sstevel@tonic-gate * open a backing temporary file. Otherwise, it's a disk-based tree. 1937c478bd9Sstevel@tonic-gate */ 1947c478bd9Sstevel@tonic-gate if (fname) { 1957c478bd9Sstevel@tonic-gate switch (flags & O_ACCMODE) { 1967c478bd9Sstevel@tonic-gate case O_RDONLY: 1977c478bd9Sstevel@tonic-gate F_SET(t, B_RDONLY); 1987c478bd9Sstevel@tonic-gate break; 1997c478bd9Sstevel@tonic-gate case O_RDWR: 2007c478bd9Sstevel@tonic-gate break; 2017c478bd9Sstevel@tonic-gate case O_WRONLY: 2027c478bd9Sstevel@tonic-gate default: 2037c478bd9Sstevel@tonic-gate goto einval; 2047c478bd9Sstevel@tonic-gate } 2057c478bd9Sstevel@tonic-gate 2067c478bd9Sstevel@tonic-gate if ((t->bt_fd = open(fname, flags | O_BINARY, mode)) < 0) 2077c478bd9Sstevel@tonic-gate goto err; 2087c478bd9Sstevel@tonic-gate 2097c478bd9Sstevel@tonic-gate } else { 2107c478bd9Sstevel@tonic-gate if ((flags & O_ACCMODE) != O_RDWR) 2117c478bd9Sstevel@tonic-gate goto einval; 2127c478bd9Sstevel@tonic-gate if ((t->bt_fd = tmp()) == -1) 2137c478bd9Sstevel@tonic-gate goto err; 2147c478bd9Sstevel@tonic-gate F_SET(t, B_INMEM); 2157c478bd9Sstevel@tonic-gate } 2167c478bd9Sstevel@tonic-gate 2177c478bd9Sstevel@tonic-gate if (fcntl(t->bt_fd, F_SETFD, 1) == -1) 2187c478bd9Sstevel@tonic-gate goto err; 2197c478bd9Sstevel@tonic-gate 2207c478bd9Sstevel@tonic-gate if (fstat(t->bt_fd, &sb)) 2217c478bd9Sstevel@tonic-gate goto err; 2227c478bd9Sstevel@tonic-gate if (sb.st_size) { 2237c478bd9Sstevel@tonic-gate if ((nr = read(t->bt_fd, &m, sizeof(BTMETA))) < 0) 2247c478bd9Sstevel@tonic-gate goto err; 2257c478bd9Sstevel@tonic-gate if (nr != sizeof(BTMETA)) 2267c478bd9Sstevel@tonic-gate goto eftype; 2277c478bd9Sstevel@tonic-gate 2287c478bd9Sstevel@tonic-gate /* 2297c478bd9Sstevel@tonic-gate * Read in the meta-data. This can change the notion of what 2307c478bd9Sstevel@tonic-gate * the lorder, page size and flags are, and, when the page size 2317c478bd9Sstevel@tonic-gate * changes, the cachesize value can change too. If the user 2327c478bd9Sstevel@tonic-gate * specified the wrong byte order for an existing database, we 2337c478bd9Sstevel@tonic-gate * don't bother to return an error, we just clear the NEEDSWAP 2347c478bd9Sstevel@tonic-gate * bit. 2357c478bd9Sstevel@tonic-gate */ 2367c478bd9Sstevel@tonic-gate if (m.magic == BTREEMAGIC) 2377c478bd9Sstevel@tonic-gate F_CLR(t, B_NEEDSWAP); 2387c478bd9Sstevel@tonic-gate else { 2397c478bd9Sstevel@tonic-gate F_SET(t, B_NEEDSWAP); 2407c478bd9Sstevel@tonic-gate M_32_SWAP(m.magic); 2417c478bd9Sstevel@tonic-gate M_32_SWAP(m.version); 2427c478bd9Sstevel@tonic-gate M_32_SWAP(m.psize); 2437c478bd9Sstevel@tonic-gate M_32_SWAP(m.free); 2447c478bd9Sstevel@tonic-gate M_32_SWAP(m.nrecs); 2457c478bd9Sstevel@tonic-gate M_32_SWAP(m.flags); 2467c478bd9Sstevel@tonic-gate } 2477c478bd9Sstevel@tonic-gate if (m.magic != BTREEMAGIC || m.version != BTREEVERSION) 2487c478bd9Sstevel@tonic-gate goto eftype; 2497c478bd9Sstevel@tonic-gate if (m.psize < MINPSIZE || m.psize > MAX_PAGE_OFFSET + 1 || 25056a424ccSmp m.psize & (sizeof(indx_t) - 1)) 2517c478bd9Sstevel@tonic-gate goto eftype; 2527c478bd9Sstevel@tonic-gate if (m.flags & ~SAVEMETA) 2537c478bd9Sstevel@tonic-gate goto eftype; 2547c478bd9Sstevel@tonic-gate b.psize = m.psize; 2557c478bd9Sstevel@tonic-gate F_SET(t, m.flags); 2567c478bd9Sstevel@tonic-gate t->bt_free = m.free; 2577c478bd9Sstevel@tonic-gate t->bt_nrecs = m.nrecs; 2587c478bd9Sstevel@tonic-gate } else { 2597c478bd9Sstevel@tonic-gate /* 2607c478bd9Sstevel@tonic-gate * Set the page size to the best value for I/O to this file. 2617c478bd9Sstevel@tonic-gate * Don't overflow the page offset type. 2627c478bd9Sstevel@tonic-gate */ 2637c478bd9Sstevel@tonic-gate if (b.psize == 0) { 2647c478bd9Sstevel@tonic-gate b.psize = sb.st_blksize; 2657c478bd9Sstevel@tonic-gate if (b.psize < MINPSIZE) 2667c478bd9Sstevel@tonic-gate b.psize = MINPSIZE; 2677c478bd9Sstevel@tonic-gate if (b.psize > MAX_PAGE_OFFSET + 1) 2687c478bd9Sstevel@tonic-gate b.psize = MAX_PAGE_OFFSET + 1; 2697c478bd9Sstevel@tonic-gate } 2707c478bd9Sstevel@tonic-gate 2717c478bd9Sstevel@tonic-gate /* Set flag if duplicates permitted. */ 2727c478bd9Sstevel@tonic-gate if (!(b.flags & R_DUP)) 2737c478bd9Sstevel@tonic-gate F_SET(t, B_NODUPS); 2747c478bd9Sstevel@tonic-gate 2757c478bd9Sstevel@tonic-gate t->bt_free = P_INVALID; 2767c478bd9Sstevel@tonic-gate t->bt_nrecs = 0; 2777c478bd9Sstevel@tonic-gate F_SET(t, B_METADIRTY); 2787c478bd9Sstevel@tonic-gate } 2797c478bd9Sstevel@tonic-gate 2807c478bd9Sstevel@tonic-gate t->bt_psize = b.psize; 2817c478bd9Sstevel@tonic-gate 2827c478bd9Sstevel@tonic-gate /* Set the cache size; must be a multiple of the page size. */ 28356a424ccSmp if (b.cachesize && b.cachesize & (b.psize - 1)) 28456a424ccSmp b.cachesize += (~b.cachesize & (b.psize - 1)) + 1; 2857c478bd9Sstevel@tonic-gate if (b.cachesize < b.psize * MINCACHE) 2867c478bd9Sstevel@tonic-gate b.cachesize = b.psize * MINCACHE; 2877c478bd9Sstevel@tonic-gate 2887c478bd9Sstevel@tonic-gate /* Calculate number of pages to cache. */ 2897c478bd9Sstevel@tonic-gate ncache = (b.cachesize + t->bt_psize - 1) / t->bt_psize; 2907c478bd9Sstevel@tonic-gate 2917c478bd9Sstevel@tonic-gate /* 2927c478bd9Sstevel@tonic-gate * The btree data structure requires that at least two keys can fit on 2937c478bd9Sstevel@tonic-gate * a page, but other than that there's no fixed requirement. The user 2947c478bd9Sstevel@tonic-gate * specified a minimum number per page, and we translated that into the 2957c478bd9Sstevel@tonic-gate * number of bytes a key/data pair can use before being placed on an 2967c478bd9Sstevel@tonic-gate * overflow page. This calculation includes the page header, the size 2977c478bd9Sstevel@tonic-gate * of the index referencing the leaf item and the size of the leaf item 2987c478bd9Sstevel@tonic-gate * structure. Also, don't let the user specify a minkeypage such that 2997c478bd9Sstevel@tonic-gate * a key/data pair won't fit even if both key and data are on overflow 3007c478bd9Sstevel@tonic-gate * pages. 3017c478bd9Sstevel@tonic-gate */ 3027c478bd9Sstevel@tonic-gate t->bt_ovflsize = (t->bt_psize - BTDATAOFF) / b.minkeypage - 3037c478bd9Sstevel@tonic-gate (sizeof(indx_t) + NBLEAFDBT(0, 0)); 3047c478bd9Sstevel@tonic-gate if (t->bt_ovflsize < NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t)) 3057c478bd9Sstevel@tonic-gate t->bt_ovflsize = 3067c478bd9Sstevel@tonic-gate NBLEAFDBT(NOVFLSIZE, NOVFLSIZE) + sizeof(indx_t); 3077c478bd9Sstevel@tonic-gate 3087c478bd9Sstevel@tonic-gate /* Initialize the buffer pool. */ 3097c478bd9Sstevel@tonic-gate if ((t->bt_mp = 3107c478bd9Sstevel@tonic-gate mpool_open(NULL, t->bt_fd, t->bt_psize, ncache)) == NULL) 3117c478bd9Sstevel@tonic-gate goto err; 3127c478bd9Sstevel@tonic-gate if (!F_ISSET(t, B_INMEM)) 3137c478bd9Sstevel@tonic-gate mpool_filter(t->bt_mp, __bt_pgin, __bt_pgout, t); 3147c478bd9Sstevel@tonic-gate 3157c478bd9Sstevel@tonic-gate /* Create a root page if new tree. */ 3167c478bd9Sstevel@tonic-gate if (nroot(t) == RET_ERROR) 3177c478bd9Sstevel@tonic-gate goto err; 3187c478bd9Sstevel@tonic-gate 3197c478bd9Sstevel@tonic-gate /* Global flags. */ 3207c478bd9Sstevel@tonic-gate if (dflags & DB_LOCK) 3217c478bd9Sstevel@tonic-gate F_SET(t, B_DB_LOCK); 3227c478bd9Sstevel@tonic-gate if (dflags & DB_SHMEM) 3237c478bd9Sstevel@tonic-gate F_SET(t, B_DB_SHMEM); 3247c478bd9Sstevel@tonic-gate if (dflags & DB_TXN) 3257c478bd9Sstevel@tonic-gate F_SET(t, B_DB_TXN); 3267c478bd9Sstevel@tonic-gate 3277c478bd9Sstevel@tonic-gate return (dbp); 3287c478bd9Sstevel@tonic-gate 3297c478bd9Sstevel@tonic-gate einval: errno = EINVAL; 3307c478bd9Sstevel@tonic-gate goto err; 3317c478bd9Sstevel@tonic-gate 3327c478bd9Sstevel@tonic-gate eftype: errno = EFTYPE; 3337c478bd9Sstevel@tonic-gate goto err; 3347c478bd9Sstevel@tonic-gate 3357c478bd9Sstevel@tonic-gate err: if (t) { 3367c478bd9Sstevel@tonic-gate if (t->bt_dbp) 3377c478bd9Sstevel@tonic-gate free(t->bt_dbp); 3387c478bd9Sstevel@tonic-gate if (t->bt_fd != -1) 3397c478bd9Sstevel@tonic-gate (void)close(t->bt_fd); 3407c478bd9Sstevel@tonic-gate free(t); 3417c478bd9Sstevel@tonic-gate } 3427c478bd9Sstevel@tonic-gate return (NULL); 3437c478bd9Sstevel@tonic-gate } 3447c478bd9Sstevel@tonic-gate 3457c478bd9Sstevel@tonic-gate /* 3467c478bd9Sstevel@tonic-gate * NROOT -- Create the root of a new tree. 3477c478bd9Sstevel@tonic-gate * 3487c478bd9Sstevel@tonic-gate * Parameters: 3497c478bd9Sstevel@tonic-gate * t: tree 3507c478bd9Sstevel@tonic-gate * 3517c478bd9Sstevel@tonic-gate * Returns: 3527c478bd9Sstevel@tonic-gate * RET_ERROR, RET_SUCCESS 3537c478bd9Sstevel@tonic-gate */ 3547c478bd9Sstevel@tonic-gate static int 3557c478bd9Sstevel@tonic-gate nroot(t) 3567c478bd9Sstevel@tonic-gate BTREE *t; 3577c478bd9Sstevel@tonic-gate { 3587c478bd9Sstevel@tonic-gate PAGE *meta, *root; 3597c478bd9Sstevel@tonic-gate db_pgno_t npg; 3607c478bd9Sstevel@tonic-gate 3617c478bd9Sstevel@tonic-gate if ((root = mpool_get(t->bt_mp, 1, 0)) != NULL) { 3627c478bd9Sstevel@tonic-gate if (root->lower == 0 && 3637c478bd9Sstevel@tonic-gate root->pgno == 0 && 3647c478bd9Sstevel@tonic-gate root->linp[0] == 0) { 3657c478bd9Sstevel@tonic-gate mpool_delete(t->bt_mp, root); 3667c478bd9Sstevel@tonic-gate errno = EINVAL; 3677c478bd9Sstevel@tonic-gate } else { 3687c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, root, 0); 3697c478bd9Sstevel@tonic-gate return (RET_SUCCESS); 3707c478bd9Sstevel@tonic-gate } 3717c478bd9Sstevel@tonic-gate } 3727c478bd9Sstevel@tonic-gate if (errno != EINVAL) /* It's OK to not exist. */ 3737c478bd9Sstevel@tonic-gate return (RET_ERROR); 3747c478bd9Sstevel@tonic-gate errno = 0; 3757c478bd9Sstevel@tonic-gate 3767c478bd9Sstevel@tonic-gate if ((meta = mpool_new(t->bt_mp, &npg, MPOOL_PAGE_NEXT)) == NULL) 3777c478bd9Sstevel@tonic-gate return (RET_ERROR); 3787c478bd9Sstevel@tonic-gate 3797c478bd9Sstevel@tonic-gate if ((root = mpool_new(t->bt_mp, &npg, MPOOL_PAGE_NEXT)) == NULL) 3807c478bd9Sstevel@tonic-gate return (RET_ERROR); 3817c478bd9Sstevel@tonic-gate 3827c478bd9Sstevel@tonic-gate if (npg != P_ROOT) 3837c478bd9Sstevel@tonic-gate return (RET_ERROR); 3847c478bd9Sstevel@tonic-gate root->pgno = npg; 3857c478bd9Sstevel@tonic-gate root->prevpg = root->nextpg = P_INVALID; 3867c478bd9Sstevel@tonic-gate root->lower = BTDATAOFF; 3877c478bd9Sstevel@tonic-gate root->upper = t->bt_psize; 3887c478bd9Sstevel@tonic-gate root->flags = P_BLEAF; 3897c478bd9Sstevel@tonic-gate memset(meta, 0, t->bt_psize); 3907c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, meta, MPOOL_DIRTY); 3917c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, root, MPOOL_DIRTY); 3927c478bd9Sstevel@tonic-gate return (RET_SUCCESS); 3937c478bd9Sstevel@tonic-gate } 3947c478bd9Sstevel@tonic-gate 3957c478bd9Sstevel@tonic-gate static int 3967c478bd9Sstevel@tonic-gate tmp() 3977c478bd9Sstevel@tonic-gate { 3987c478bd9Sstevel@tonic-gate #ifdef SIG_BLOCK 3997c478bd9Sstevel@tonic-gate sigset_t set, oset; 4007c478bd9Sstevel@tonic-gate #else 4017c478bd9Sstevel@tonic-gate int oset; 4027c478bd9Sstevel@tonic-gate #endif 4037c478bd9Sstevel@tonic-gate int fd; 4047c478bd9Sstevel@tonic-gate char *envtmp; 4057c478bd9Sstevel@tonic-gate char path[MAXPATHLEN]; 4067c478bd9Sstevel@tonic-gate static char fn[] = "/bt.XXXXXX"; 4077c478bd9Sstevel@tonic-gate 4087c478bd9Sstevel@tonic-gate envtmp = getenv("TMPDIR"); 4097c478bd9Sstevel@tonic-gate 4107c478bd9Sstevel@tonic-gate /* this used to be done with snprintf(), but since snprintf 4117c478bd9Sstevel@tonic-gate isn't in most operating systems, and overflow checking in 4127c478bd9Sstevel@tonic-gate this case is easy, this is what is done */ 4137c478bd9Sstevel@tonic-gate 4147c478bd9Sstevel@tonic-gate if (envtmp && ((strlen(envtmp)+sizeof(fn)+1) > sizeof(path))) 4157c478bd9Sstevel@tonic-gate return(-1); 4167c478bd9Sstevel@tonic-gate 4177c478bd9Sstevel@tonic-gate (void)sprintf(path, "%s%s", (envtmp ? envtmp : "/tmp"), fn); 4187c478bd9Sstevel@tonic-gate 4197c478bd9Sstevel@tonic-gate #ifdef SIG_BLOCK 4207c478bd9Sstevel@tonic-gate (void)sigfillset(&set); 4217c478bd9Sstevel@tonic-gate (void)sigprocmask(SIG_BLOCK, &set, &oset); 4227c478bd9Sstevel@tonic-gate #else 4237c478bd9Sstevel@tonic-gate oset = sigblock(~0); 4247c478bd9Sstevel@tonic-gate #endif 4257c478bd9Sstevel@tonic-gate if ((fd = mkstemp(path)) != -1) 4267c478bd9Sstevel@tonic-gate (void)unlink(path); 4277c478bd9Sstevel@tonic-gate #ifdef SIG_BLOCK 4287c478bd9Sstevel@tonic-gate (void)sigprocmask(SIG_SETMASK, &oset, NULL); 4297c478bd9Sstevel@tonic-gate #else 4307c478bd9Sstevel@tonic-gate sigsetmask(oset); 4317c478bd9Sstevel@tonic-gate #endif 4327c478bd9Sstevel@tonic-gate #ifdef __CYGWIN32__ 4337c478bd9Sstevel@tonic-gate /* Ensure the fd is in binary mode. */ 4347c478bd9Sstevel@tonic-gate setmode(fd, O_BINARY); 4357c478bd9Sstevel@tonic-gate #endif /* __CYGWIN32__ */ 4367c478bd9Sstevel@tonic-gate 4377c478bd9Sstevel@tonic-gate return(fd); 4387c478bd9Sstevel@tonic-gate } 4397c478bd9Sstevel@tonic-gate 4407c478bd9Sstevel@tonic-gate static int 4417c478bd9Sstevel@tonic-gate byteorder() 4427c478bd9Sstevel@tonic-gate { 4437c478bd9Sstevel@tonic-gate u_int32_t x; 4447c478bd9Sstevel@tonic-gate u_char *p; 4457c478bd9Sstevel@tonic-gate 4467c478bd9Sstevel@tonic-gate x = 0x01020304; 4477c478bd9Sstevel@tonic-gate p = (u_char *)&x; 4487c478bd9Sstevel@tonic-gate switch (*p) { 4497c478bd9Sstevel@tonic-gate case 1: 4507c478bd9Sstevel@tonic-gate return (DB_BIG_ENDIAN); 4517c478bd9Sstevel@tonic-gate case 4: 4527c478bd9Sstevel@tonic-gate return (DB_LITTLE_ENDIAN); 4537c478bd9Sstevel@tonic-gate default: 4547c478bd9Sstevel@tonic-gate return (0); 4557c478bd9Sstevel@tonic-gate } 4567c478bd9Sstevel@tonic-gate } 4577c478bd9Sstevel@tonic-gate 4587c478bd9Sstevel@tonic-gate int 4597c478bd9Sstevel@tonic-gate __bt_fd(dbp) 4607c478bd9Sstevel@tonic-gate const DB *dbp; 4617c478bd9Sstevel@tonic-gate { 4627c478bd9Sstevel@tonic-gate BTREE *t; 4637c478bd9Sstevel@tonic-gate 4647c478bd9Sstevel@tonic-gate t = dbp->internal; 4657c478bd9Sstevel@tonic-gate 4667c478bd9Sstevel@tonic-gate /* Toss any page pinned across calls. */ 4677c478bd9Sstevel@tonic-gate if (t->bt_pinned != NULL) { 4687c478bd9Sstevel@tonic-gate mpool_put(t->bt_mp, t->bt_pinned, 0); 4697c478bd9Sstevel@tonic-gate t->bt_pinned = NULL; 4707c478bd9Sstevel@tonic-gate } 4717c478bd9Sstevel@tonic-gate 4727c478bd9Sstevel@tonic-gate /* In-memory database can't have a file descriptor. */ 4737c478bd9Sstevel@tonic-gate if (F_ISSET(t, B_INMEM)) { 4747c478bd9Sstevel@tonic-gate errno = ENOENT; 4757c478bd9Sstevel@tonic-gate return (-1); 4767c478bd9Sstevel@tonic-gate } 4777c478bd9Sstevel@tonic-gate return (t->bt_fd); 4787c478bd9Sstevel@tonic-gate } 479