17c478bd9Sstevel@tonic-gate /*-
27c478bd9Sstevel@tonic-gate  * Copyright (c) 1990, 1993, 1994
37c478bd9Sstevel@tonic-gate  *	The Regents of the University of California.  All rights reserved.
47c478bd9Sstevel@tonic-gate  *
57c478bd9Sstevel@tonic-gate  * Redistribution and use in source and binary forms, with or without
67c478bd9Sstevel@tonic-gate  * modification, are permitted provided that the following conditions
77c478bd9Sstevel@tonic-gate  * are met:
87c478bd9Sstevel@tonic-gate  * 1. Redistributions of source code must retain the above copyright
97c478bd9Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer.
107c478bd9Sstevel@tonic-gate  * 2. Redistributions in binary form must reproduce the above copyright
117c478bd9Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer in the
127c478bd9Sstevel@tonic-gate  *    documentation and/or other materials provided with the distribution.
137c478bd9Sstevel@tonic-gate  * 3. All advertising materials mentioning features or use of this software
147c478bd9Sstevel@tonic-gate  *    must display the following acknowledgement:
157c478bd9Sstevel@tonic-gate  *	This product includes software developed by the University of
167c478bd9Sstevel@tonic-gate  *	California, Berkeley and its contributors.
177c478bd9Sstevel@tonic-gate  * 4. Neither the name of the University nor the names of its contributors
187c478bd9Sstevel@tonic-gate  *    may be used to endorse or promote products derived from this software
197c478bd9Sstevel@tonic-gate  *    without specific prior written permission.
207c478bd9Sstevel@tonic-gate  *
217c478bd9Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
227c478bd9Sstevel@tonic-gate  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
237c478bd9Sstevel@tonic-gate  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
247c478bd9Sstevel@tonic-gate  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
257c478bd9Sstevel@tonic-gate  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
267c478bd9Sstevel@tonic-gate  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
277c478bd9Sstevel@tonic-gate  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
287c478bd9Sstevel@tonic-gate  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
297c478bd9Sstevel@tonic-gate  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
307c478bd9Sstevel@tonic-gate  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
317c478bd9Sstevel@tonic-gate  * SUCH DAMAGE.
327c478bd9Sstevel@tonic-gate  */
337c478bd9Sstevel@tonic-gate 
347c478bd9Sstevel@tonic-gate #if defined(LIBC_SCCS) && !defined(lint)
357c478bd9Sstevel@tonic-gate static char sccsid[] = "@(#)mpool.c	8.7 (Berkeley) 11/2/95";
367c478bd9Sstevel@tonic-gate #endif /* LIBC_SCCS and not lint */
377c478bd9Sstevel@tonic-gate 
387c478bd9Sstevel@tonic-gate #include <sys/param.h>
397c478bd9Sstevel@tonic-gate #include <sys/stat.h>
407c478bd9Sstevel@tonic-gate 
417c478bd9Sstevel@tonic-gate #include <errno.h>
427c478bd9Sstevel@tonic-gate #include <stdio.h>
437c478bd9Sstevel@tonic-gate #include <stdlib.h>
447c478bd9Sstevel@tonic-gate #include <string.h>
457c478bd9Sstevel@tonic-gate #include <unistd.h>
467c478bd9Sstevel@tonic-gate 
477c478bd9Sstevel@tonic-gate #include "db-int.h"
487c478bd9Sstevel@tonic-gate #include "mpool.h"
497c478bd9Sstevel@tonic-gate 
507c478bd9Sstevel@tonic-gate static BKT *mpool_bkt __P((MPOOL *));
517c478bd9Sstevel@tonic-gate static BKT *mpool_look __P((MPOOL *, db_pgno_t));
527c478bd9Sstevel@tonic-gate static int  mpool_write __P((MPOOL *, BKT *));
537c478bd9Sstevel@tonic-gate 
547c478bd9Sstevel@tonic-gate /*
557c478bd9Sstevel@tonic-gate  * mpool_open --
567c478bd9Sstevel@tonic-gate  *	Initialize a memory pool.
577c478bd9Sstevel@tonic-gate  */
587c478bd9Sstevel@tonic-gate MPOOL *
mpool_open(key,fd,pagesize,maxcache)597c478bd9Sstevel@tonic-gate mpool_open(key, fd, pagesize, maxcache)
607c478bd9Sstevel@tonic-gate 	void *key;
617c478bd9Sstevel@tonic-gate 	int fd;
627c478bd9Sstevel@tonic-gate 	db_pgno_t pagesize, maxcache;
637c478bd9Sstevel@tonic-gate {
647c478bd9Sstevel@tonic-gate 	struct stat sb;
657c478bd9Sstevel@tonic-gate 	MPOOL *mp;
667c478bd9Sstevel@tonic-gate 	int entry;
677c478bd9Sstevel@tonic-gate 
687c478bd9Sstevel@tonic-gate 	/*
697c478bd9Sstevel@tonic-gate 	 * Get information about the file.
707c478bd9Sstevel@tonic-gate 	 *
717c478bd9Sstevel@tonic-gate 	 * XXX
727c478bd9Sstevel@tonic-gate 	 * We don't currently handle pipes, although we should.
737c478bd9Sstevel@tonic-gate 	 */
747c478bd9Sstevel@tonic-gate 	if (fstat(fd, &sb))
757c478bd9Sstevel@tonic-gate 		return (NULL);
767c478bd9Sstevel@tonic-gate 	if (!S_ISREG(sb.st_mode)) {
777c478bd9Sstevel@tonic-gate 		errno = ESPIPE;
787c478bd9Sstevel@tonic-gate 		return (NULL);
797c478bd9Sstevel@tonic-gate 	}
807c478bd9Sstevel@tonic-gate 
817c478bd9Sstevel@tonic-gate 	/* Allocate and initialize the MPOOL cookie. */
827c478bd9Sstevel@tonic-gate 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
837c478bd9Sstevel@tonic-gate 		return (NULL);
847c478bd9Sstevel@tonic-gate 	CIRCLEQ_INIT(&mp->lqh);
857c478bd9Sstevel@tonic-gate 	for (entry = 0; entry < HASHSIZE; ++entry)
867c478bd9Sstevel@tonic-gate 		CIRCLEQ_INIT(&mp->hqh[entry]);
877c478bd9Sstevel@tonic-gate 	mp->maxcache = maxcache;
887c478bd9Sstevel@tonic-gate 	mp->npages = sb.st_size / pagesize;
897c478bd9Sstevel@tonic-gate 	mp->pagesize = pagesize;
907c478bd9Sstevel@tonic-gate 	mp->fd = fd;
917c478bd9Sstevel@tonic-gate 	return (mp);
927c478bd9Sstevel@tonic-gate }
937c478bd9Sstevel@tonic-gate 
947c478bd9Sstevel@tonic-gate /*
957c478bd9Sstevel@tonic-gate  * mpool_filter --
967c478bd9Sstevel@tonic-gate  *	Initialize input/output filters.
977c478bd9Sstevel@tonic-gate  */
987c478bd9Sstevel@tonic-gate void
mpool_filter(mp,pgin,pgout,pgcookie)997c478bd9Sstevel@tonic-gate mpool_filter(mp, pgin, pgout, pgcookie)
1007c478bd9Sstevel@tonic-gate 	MPOOL *mp;
1017c478bd9Sstevel@tonic-gate 	void (*pgin) __P((void *, db_pgno_t, void *));
1027c478bd9Sstevel@tonic-gate 	void (*pgout) __P((void *, db_pgno_t, void *));
1037c478bd9Sstevel@tonic-gate 	void *pgcookie;
1047c478bd9Sstevel@tonic-gate {
1057c478bd9Sstevel@tonic-gate 	mp->pgin = pgin;
1067c478bd9Sstevel@tonic-gate 	mp->pgout = pgout;
1077c478bd9Sstevel@tonic-gate 	mp->pgcookie = pgcookie;
1087c478bd9Sstevel@tonic-gate }
109*55fea89dSDan Cross 
1107c478bd9Sstevel@tonic-gate /*
1117c478bd9Sstevel@tonic-gate  * mpool_new --
1127c478bd9Sstevel@tonic-gate  *	Get a new page of memory.
1137c478bd9Sstevel@tonic-gate  */
1147c478bd9Sstevel@tonic-gate void *
mpool_new(mp,pgnoaddr,flags)1157c478bd9Sstevel@tonic-gate mpool_new(mp, pgnoaddr, flags)
1167c478bd9Sstevel@tonic-gate 	MPOOL *mp;
1177c478bd9Sstevel@tonic-gate 	db_pgno_t *pgnoaddr;
1187c478bd9Sstevel@tonic-gate 	u_int flags;
1197c478bd9Sstevel@tonic-gate {
1207c478bd9Sstevel@tonic-gate 	struct _hqh *head;
1217c478bd9Sstevel@tonic-gate 	BKT *bp;
1227c478bd9Sstevel@tonic-gate 
1237c478bd9Sstevel@tonic-gate 	if (mp->npages == MAX_PAGE_NUMBER) {
1247c478bd9Sstevel@tonic-gate 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
1257c478bd9Sstevel@tonic-gate 		abort();
1267c478bd9Sstevel@tonic-gate 	}
1277c478bd9Sstevel@tonic-gate #ifdef STATISTICS
1287c478bd9Sstevel@tonic-gate 	++mp->pagenew;
1297c478bd9Sstevel@tonic-gate #endif
1307c478bd9Sstevel@tonic-gate 	/*
1317c478bd9Sstevel@tonic-gate 	 * Get a BKT from the cache.  Assign a new page number, attach
1327c478bd9Sstevel@tonic-gate 	 * it to the head of the hash chain, the tail of the lru chain,
1337c478bd9Sstevel@tonic-gate 	 * and return.
1347c478bd9Sstevel@tonic-gate 	 */
1357c478bd9Sstevel@tonic-gate 	if ((bp = mpool_bkt(mp)) == NULL)
1367c478bd9Sstevel@tonic-gate 		return (NULL);
1377c478bd9Sstevel@tonic-gate 	if (flags == MPOOL_PAGE_REQUEST) {
1387c478bd9Sstevel@tonic-gate 		mp->npages++;
1397c478bd9Sstevel@tonic-gate 		bp->pgno = *pgnoaddr;
1407c478bd9Sstevel@tonic-gate 	} else
1417c478bd9Sstevel@tonic-gate 		bp->pgno = *pgnoaddr = mp->npages++;
1427c478bd9Sstevel@tonic-gate 
1437c478bd9Sstevel@tonic-gate 	bp->flags = MPOOL_PINNED | MPOOL_INUSE;
1447c478bd9Sstevel@tonic-gate 
1457c478bd9Sstevel@tonic-gate 	head = &mp->hqh[HASHKEY(bp->pgno)];
1467c478bd9Sstevel@tonic-gate 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
1477c478bd9Sstevel@tonic-gate 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
1487c478bd9Sstevel@tonic-gate 	return (bp->page);
1497c478bd9Sstevel@tonic-gate }
1507c478bd9Sstevel@tonic-gate 
1517c478bd9Sstevel@tonic-gate int
mpool_delete(mp,page)1527c478bd9Sstevel@tonic-gate mpool_delete(mp, page)
1537c478bd9Sstevel@tonic-gate 	MPOOL *mp;
1547c478bd9Sstevel@tonic-gate 	void *page;
1557c478bd9Sstevel@tonic-gate {
1567c478bd9Sstevel@tonic-gate 	struct _hqh *head;
1577c478bd9Sstevel@tonic-gate 	BKT *bp;
1587c478bd9Sstevel@tonic-gate 
1597c478bd9Sstevel@tonic-gate 	bp = (BKT *)((char *)page - sizeof(BKT));
1607c478bd9Sstevel@tonic-gate 
16156a424ccSmp #ifdef DEBUG
1627c478bd9Sstevel@tonic-gate 	if (!(bp->flags & MPOOL_PINNED)) {
16356a424ccSmp 		(void)fprintf(stderr,
16456a424ccSmp 		    "mpool_delete: page %d not pinned\n", bp->pgno);
1657c478bd9Sstevel@tonic-gate 		abort();
1667c478bd9Sstevel@tonic-gate 	}
1677c478bd9Sstevel@tonic-gate #endif
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate 	/* Remove from the hash and lru queues. */
1707c478bd9Sstevel@tonic-gate 	head = &mp->hqh[HASHKEY(bp->pgno)];
1717c478bd9Sstevel@tonic-gate 	CIRCLEQ_REMOVE(head, bp, hq);
1727c478bd9Sstevel@tonic-gate 	CIRCLEQ_REMOVE(&mp->lqh, bp, q);
1737c478bd9Sstevel@tonic-gate 
1747c478bd9Sstevel@tonic-gate 	free(bp);
1757c478bd9Sstevel@tonic-gate 	return (RET_SUCCESS);
176*55fea89dSDan Cross }
177*55fea89dSDan Cross 
1787c478bd9Sstevel@tonic-gate /*
1797c478bd9Sstevel@tonic-gate  * mpool_get
1807c478bd9Sstevel@tonic-gate  *	Get a page.
1817c478bd9Sstevel@tonic-gate  */
1827c478bd9Sstevel@tonic-gate void *
mpool_get(mp,pgno,flags)1837c478bd9Sstevel@tonic-gate mpool_get(mp, pgno, flags)
1847c478bd9Sstevel@tonic-gate 	MPOOL *mp;
1857c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
1867c478bd9Sstevel@tonic-gate 	u_int flags;				/* XXX not used? */
1877c478bd9Sstevel@tonic-gate {
1887c478bd9Sstevel@tonic-gate 	struct _hqh *head;
1897c478bd9Sstevel@tonic-gate 	BKT *bp;
1907c478bd9Sstevel@tonic-gate 	off_t off;
1917c478bd9Sstevel@tonic-gate 	int nr;
1927c478bd9Sstevel@tonic-gate 
1937c478bd9Sstevel@tonic-gate #ifdef STATISTICS
1947c478bd9Sstevel@tonic-gate 	++mp->pageget;
1957c478bd9Sstevel@tonic-gate #endif
1967c478bd9Sstevel@tonic-gate 
1977c478bd9Sstevel@tonic-gate 	/* Check for a page that is cached. */
1987c478bd9Sstevel@tonic-gate 	if ((bp = mpool_look(mp, pgno)) != NULL) {
19956a424ccSmp #ifdef DEBUG
2007c478bd9Sstevel@tonic-gate 		if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) {
20156a424ccSmp 			(void)fprintf(stderr,
20256a424ccSmp 			    "mpool_get: page %d already pinned\n", bp->pgno);
2037c478bd9Sstevel@tonic-gate 			abort();
2047c478bd9Sstevel@tonic-gate 		}
2057c478bd9Sstevel@tonic-gate #endif
2067c478bd9Sstevel@tonic-gate 		/*
2077c478bd9Sstevel@tonic-gate 		 * Move the page to the head of the hash chain and the tail
2087c478bd9Sstevel@tonic-gate 		 * of the lru chain.
2097c478bd9Sstevel@tonic-gate 		 */
2107c478bd9Sstevel@tonic-gate 		head = &mp->hqh[HASHKEY(bp->pgno)];
2117c478bd9Sstevel@tonic-gate 		CIRCLEQ_REMOVE(head, bp, hq);
2127c478bd9Sstevel@tonic-gate 		CIRCLEQ_INSERT_HEAD(head, bp, hq);
2137c478bd9Sstevel@tonic-gate 		CIRCLEQ_REMOVE(&mp->lqh, bp, q);
2147c478bd9Sstevel@tonic-gate 		CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate 		/* Return a pinned page. */
2177c478bd9Sstevel@tonic-gate 		bp->flags |= MPOOL_PINNED;
2187c478bd9Sstevel@tonic-gate 		return (bp->page);
2197c478bd9Sstevel@tonic-gate 	}
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate 	/* Get a page from the cache. */
2227c478bd9Sstevel@tonic-gate 	if ((bp = mpool_bkt(mp)) == NULL)
2237c478bd9Sstevel@tonic-gate 		return (NULL);
2247c478bd9Sstevel@tonic-gate 
2257c478bd9Sstevel@tonic-gate 	/* Read in the contents. */
2267c478bd9Sstevel@tonic-gate #ifdef STATISTICS
2277c478bd9Sstevel@tonic-gate 	++mp->pageread;
2287c478bd9Sstevel@tonic-gate #endif
2297c478bd9Sstevel@tonic-gate 	off = mp->pagesize * pgno;
23056a424ccSmp 	if (off / mp->pagesize != pgno) {
23156a424ccSmp 	    /* Run past the end of the file, or at least the part we
23256a424ccSmp 	       can address without large-file support?  */
23356a424ccSmp 	    errno = E2BIG;
23456a424ccSmp 	    return NULL;
23556a424ccSmp 	}
2367c478bd9Sstevel@tonic-gate 	if (lseek(mp->fd, off, SEEK_SET) != off)
2377c478bd9Sstevel@tonic-gate 		return (NULL);
2387c478bd9Sstevel@tonic-gate 
2397c478bd9Sstevel@tonic-gate 	if ((nr = read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
2407c478bd9Sstevel@tonic-gate 		if (nr > 0) {
2417c478bd9Sstevel@tonic-gate 			/* A partial read is definitely bad. */
2427c478bd9Sstevel@tonic-gate 			errno = EINVAL;
2437c478bd9Sstevel@tonic-gate 			return (NULL);
2447c478bd9Sstevel@tonic-gate 		} else {
2457c478bd9Sstevel@tonic-gate 			/*
2467c478bd9Sstevel@tonic-gate 			 * A zero-length reads, means you need to create a
2477c478bd9Sstevel@tonic-gate 			 * new page.
2487c478bd9Sstevel@tonic-gate 			 */
2497c478bd9Sstevel@tonic-gate 			memset(bp->page, 0, mp->pagesize);
2507c478bd9Sstevel@tonic-gate 		}
2517c478bd9Sstevel@tonic-gate 	}
2527c478bd9Sstevel@tonic-gate 
2537c478bd9Sstevel@tonic-gate 	/* Set the page number, pin the page. */
2547c478bd9Sstevel@tonic-gate 	bp->pgno = pgno;
2557c478bd9Sstevel@tonic-gate 	if (!(flags & MPOOL_IGNOREPIN))
2567c478bd9Sstevel@tonic-gate 		bp->flags = MPOOL_PINNED;
2577c478bd9Sstevel@tonic-gate 	bp->flags |= MPOOL_INUSE;
2587c478bd9Sstevel@tonic-gate 
2597c478bd9Sstevel@tonic-gate 	/*
2607c478bd9Sstevel@tonic-gate 	 * Add the page to the head of the hash chain and the tail
2617c478bd9Sstevel@tonic-gate 	 * of the lru chain.
2627c478bd9Sstevel@tonic-gate 	 */
2637c478bd9Sstevel@tonic-gate 	head = &mp->hqh[HASHKEY(bp->pgno)];
2647c478bd9Sstevel@tonic-gate 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
2657c478bd9Sstevel@tonic-gate 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
2667c478bd9Sstevel@tonic-gate 
2677c478bd9Sstevel@tonic-gate 	/* Run through the user's filter. */
2687c478bd9Sstevel@tonic-gate 	if (mp->pgin != NULL)
2697c478bd9Sstevel@tonic-gate 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
2707c478bd9Sstevel@tonic-gate 
2717c478bd9Sstevel@tonic-gate 	return (bp->page);
2727c478bd9Sstevel@tonic-gate }
2737c478bd9Sstevel@tonic-gate 
2747c478bd9Sstevel@tonic-gate /*
2757c478bd9Sstevel@tonic-gate  * mpool_put
2767c478bd9Sstevel@tonic-gate  *	Return a page.
2777c478bd9Sstevel@tonic-gate  */
2787c478bd9Sstevel@tonic-gate int
mpool_put(mp,page,flags)2797c478bd9Sstevel@tonic-gate mpool_put(mp, page, flags)
2807c478bd9Sstevel@tonic-gate 	MPOOL *mp;
2817c478bd9Sstevel@tonic-gate 	void *page;
2827c478bd9Sstevel@tonic-gate 	u_int flags;
2837c478bd9Sstevel@tonic-gate {
2847c478bd9Sstevel@tonic-gate 	BKT *bp;
2857c478bd9Sstevel@tonic-gate 
2867c478bd9Sstevel@tonic-gate #ifdef STATISTICS
2877c478bd9Sstevel@tonic-gate 	++mp->pageput;
2887c478bd9Sstevel@tonic-gate #endif
2897c478bd9Sstevel@tonic-gate 	bp = (BKT *)((char *)page - sizeof(BKT));
29056a424ccSmp #ifdef DEBUG
2917c478bd9Sstevel@tonic-gate 	if (!(bp->flags & MPOOL_PINNED)) {
29256a424ccSmp 		(void)fprintf(stderr,
29356a424ccSmp 		    "mpool_put: page %d not pinned\n", bp->pgno);
2947c478bd9Sstevel@tonic-gate 		abort();
2957c478bd9Sstevel@tonic-gate 	}
2967c478bd9Sstevel@tonic-gate #endif
2977c478bd9Sstevel@tonic-gate 	bp->flags &= ~MPOOL_PINNED;
2987c478bd9Sstevel@tonic-gate 	if (flags & MPOOL_DIRTY)
2997c478bd9Sstevel@tonic-gate 		bp->flags |= flags & MPOOL_DIRTY;
3007c478bd9Sstevel@tonic-gate 	return (RET_SUCCESS);
3017c478bd9Sstevel@tonic-gate }
3027c478bd9Sstevel@tonic-gate 
3037c478bd9Sstevel@tonic-gate /*
3047c478bd9Sstevel@tonic-gate  * mpool_close
3057c478bd9Sstevel@tonic-gate  *	Close the buffer pool.
3067c478bd9Sstevel@tonic-gate  */
3077c478bd9Sstevel@tonic-gate int
mpool_close(mp)3087c478bd9Sstevel@tonic-gate mpool_close(mp)
3097c478bd9Sstevel@tonic-gate 	MPOOL *mp;
3107c478bd9Sstevel@tonic-gate {
3117c478bd9Sstevel@tonic-gate 	BKT *bp;
3127c478bd9Sstevel@tonic-gate 
3137c478bd9Sstevel@tonic-gate 	/* Free up any space allocated to the lru pages. */
3147c478bd9Sstevel@tonic-gate 	while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
3157c478bd9Sstevel@tonic-gate 		CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
3167c478bd9Sstevel@tonic-gate 		free(bp);
3177c478bd9Sstevel@tonic-gate 	}
3187c478bd9Sstevel@tonic-gate 
3197c478bd9Sstevel@tonic-gate 	/* Free the MPOOL cookie. */
3207c478bd9Sstevel@tonic-gate 	free(mp);
3217c478bd9Sstevel@tonic-gate 	return (RET_SUCCESS);
3227c478bd9Sstevel@tonic-gate }
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate /*
3257c478bd9Sstevel@tonic-gate  * mpool_sync
3267c478bd9Sstevel@tonic-gate  *	Sync the pool to disk.
3277c478bd9Sstevel@tonic-gate  */
3287c478bd9Sstevel@tonic-gate int
mpool_sync(mp)3297c478bd9Sstevel@tonic-gate mpool_sync(mp)
3307c478bd9Sstevel@tonic-gate 	MPOOL *mp;
3317c478bd9Sstevel@tonic-gate {
3327c478bd9Sstevel@tonic-gate 	BKT *bp;
3337c478bd9Sstevel@tonic-gate 
3347c478bd9Sstevel@tonic-gate 	/* Walk the lru chain, flushing any dirty pages to disk. */
3357c478bd9Sstevel@tonic-gate 	for (bp = mp->lqh.cqh_first;
3367c478bd9Sstevel@tonic-gate 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
3377c478bd9Sstevel@tonic-gate 		if (bp->flags & MPOOL_DIRTY &&
3387c478bd9Sstevel@tonic-gate 		    mpool_write(mp, bp) == RET_ERROR)
3397c478bd9Sstevel@tonic-gate 			return (RET_ERROR);
3407c478bd9Sstevel@tonic-gate 
3417c478bd9Sstevel@tonic-gate 	/* Sync the file descriptor. */
3427c478bd9Sstevel@tonic-gate 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
3437c478bd9Sstevel@tonic-gate }
3447c478bd9Sstevel@tonic-gate 
3457c478bd9Sstevel@tonic-gate /*
3467c478bd9Sstevel@tonic-gate  * mpool_bkt
3477c478bd9Sstevel@tonic-gate  *	Get a page from the cache (or create one).
3487c478bd9Sstevel@tonic-gate  */
3497c478bd9Sstevel@tonic-gate static BKT *
mpool_bkt(mp)3507c478bd9Sstevel@tonic-gate mpool_bkt(mp)
3517c478bd9Sstevel@tonic-gate 	MPOOL *mp;
3527c478bd9Sstevel@tonic-gate {
3537c478bd9Sstevel@tonic-gate 	struct _hqh *head;
3547c478bd9Sstevel@tonic-gate 	BKT *bp;
3557c478bd9Sstevel@tonic-gate 
3567c478bd9Sstevel@tonic-gate 	/* If under the max cached, always create a new page. */
3577c478bd9Sstevel@tonic-gate 	if (mp->curcache < mp->maxcache)
3587c478bd9Sstevel@tonic-gate 		goto new;
3597c478bd9Sstevel@tonic-gate 
3607c478bd9Sstevel@tonic-gate 	/*
3617c478bd9Sstevel@tonic-gate 	 * If the cache is max'd out, walk the lru list for a buffer we
3627c478bd9Sstevel@tonic-gate 	 * can flush.  If we find one, write it (if necessary) and take it
3637c478bd9Sstevel@tonic-gate 	 * off any lists.  If we don't find anything we grow the cache anyway.
3647c478bd9Sstevel@tonic-gate 	 * The cache never shrinks.
3657c478bd9Sstevel@tonic-gate 	 */
3667c478bd9Sstevel@tonic-gate 	for (bp = mp->lqh.cqh_first;
3677c478bd9Sstevel@tonic-gate 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
3687c478bd9Sstevel@tonic-gate 		if (!(bp->flags & MPOOL_PINNED)) {
3697c478bd9Sstevel@tonic-gate 			/* Flush if dirty. */
3707c478bd9Sstevel@tonic-gate 			if (bp->flags & MPOOL_DIRTY &&
3717c478bd9Sstevel@tonic-gate 			    mpool_write(mp, bp) == RET_ERROR)
3727c478bd9Sstevel@tonic-gate 				return (NULL);
3737c478bd9Sstevel@tonic-gate #ifdef STATISTICS
3747c478bd9Sstevel@tonic-gate 			++mp->pageflush;
3757c478bd9Sstevel@tonic-gate #endif
3767c478bd9Sstevel@tonic-gate 			/* Remove from the hash and lru queues. */
3777c478bd9Sstevel@tonic-gate 			head = &mp->hqh[HASHKEY(bp->pgno)];
3787c478bd9Sstevel@tonic-gate 			CIRCLEQ_REMOVE(head, bp, hq);
3797c478bd9Sstevel@tonic-gate 			CIRCLEQ_REMOVE(&mp->lqh, bp, q);
38056a424ccSmp #ifdef DEBUG
3817c478bd9Sstevel@tonic-gate 			{ void *spage;
3827c478bd9Sstevel@tonic-gate 				spage = bp->page;
3837c478bd9Sstevel@tonic-gate 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
3847c478bd9Sstevel@tonic-gate 				bp->page = spage;
3857c478bd9Sstevel@tonic-gate 			}
3867c478bd9Sstevel@tonic-gate #endif
3877c478bd9Sstevel@tonic-gate 			bp->flags = 0;
3887c478bd9Sstevel@tonic-gate 			return (bp);
3897c478bd9Sstevel@tonic-gate 		}
3907c478bd9Sstevel@tonic-gate 
3917c478bd9Sstevel@tonic-gate new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
3927c478bd9Sstevel@tonic-gate 		return (NULL);
3937c478bd9Sstevel@tonic-gate #ifdef STATISTICS
3947c478bd9Sstevel@tonic-gate 	++mp->pagealloc;
3957c478bd9Sstevel@tonic-gate #endif
396159d09a2SMark Phalan #if defined(DEBUG) || defined(PURIFY) || 1
3977c478bd9Sstevel@tonic-gate 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
3987c478bd9Sstevel@tonic-gate #endif
3997c478bd9Sstevel@tonic-gate 	bp->page = (char *)bp + sizeof(BKT);
4007c478bd9Sstevel@tonic-gate 	bp->flags = 0;
4017c478bd9Sstevel@tonic-gate 	++mp->curcache;
4027c478bd9Sstevel@tonic-gate 	return (bp);
4037c478bd9Sstevel@tonic-gate }
4047c478bd9Sstevel@tonic-gate 
4057c478bd9Sstevel@tonic-gate /*
4067c478bd9Sstevel@tonic-gate  * mpool_write
4077c478bd9Sstevel@tonic-gate  *	Write a page to disk.
4087c478bd9Sstevel@tonic-gate  */
4097c478bd9Sstevel@tonic-gate static int
mpool_write(mp,bp)4107c478bd9Sstevel@tonic-gate mpool_write(mp, bp)
4117c478bd9Sstevel@tonic-gate 	MPOOL *mp;
4127c478bd9Sstevel@tonic-gate 	BKT *bp;
4137c478bd9Sstevel@tonic-gate {
4147c478bd9Sstevel@tonic-gate 	off_t off;
4157c478bd9Sstevel@tonic-gate 
4167c478bd9Sstevel@tonic-gate #ifdef STATISTICS
4177c478bd9Sstevel@tonic-gate 	++mp->pagewrite;
4187c478bd9Sstevel@tonic-gate #endif
4197c478bd9Sstevel@tonic-gate 
4207c478bd9Sstevel@tonic-gate 	/* Run through the user's filter. */
4217c478bd9Sstevel@tonic-gate 	if (mp->pgout)
4227c478bd9Sstevel@tonic-gate 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate 	off = mp->pagesize * bp->pgno;
42556a424ccSmp 	if (off / mp->pagesize != bp->pgno) {
42656a424ccSmp 	    /* Run past the end of the file, or at least the part we
42756a424ccSmp 	       can address without large-file support?  */
42856a424ccSmp 	    errno = E2BIG;
42956a424ccSmp 	    return RET_ERROR;
43056a424ccSmp 	}
4317c478bd9Sstevel@tonic-gate 	if (lseek(mp->fd, off, SEEK_SET) != off)
4327c478bd9Sstevel@tonic-gate 		return (RET_ERROR);
4337c478bd9Sstevel@tonic-gate 	if (write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
4347c478bd9Sstevel@tonic-gate 		return (RET_ERROR);
4357c478bd9Sstevel@tonic-gate 
4367c478bd9Sstevel@tonic-gate 	bp->flags &= ~MPOOL_DIRTY;
4377c478bd9Sstevel@tonic-gate 	return (RET_SUCCESS);
4387c478bd9Sstevel@tonic-gate }
4397c478bd9Sstevel@tonic-gate 
4407c478bd9Sstevel@tonic-gate /*
4417c478bd9Sstevel@tonic-gate  * mpool_look
4427c478bd9Sstevel@tonic-gate  *	Lookup a page in the cache.
4437c478bd9Sstevel@tonic-gate  */
4447c478bd9Sstevel@tonic-gate static BKT *
mpool_look(mp,pgno)4457c478bd9Sstevel@tonic-gate mpool_look(mp, pgno)
4467c478bd9Sstevel@tonic-gate 	MPOOL *mp;
4477c478bd9Sstevel@tonic-gate 	db_pgno_t pgno;
4487c478bd9Sstevel@tonic-gate {
4497c478bd9Sstevel@tonic-gate 	struct _hqh *head;
4507c478bd9Sstevel@tonic-gate 	BKT *bp;
4517c478bd9Sstevel@tonic-gate 
4527c478bd9Sstevel@tonic-gate 	head = &mp->hqh[HASHKEY(pgno)];
4537c478bd9Sstevel@tonic-gate 	for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
454159d09a2SMark Phalan 		if ((bp->pgno == pgno) && (bp->flags & MPOOL_INUSE)) {
4557c478bd9Sstevel@tonic-gate #ifdef STATISTICS
4567c478bd9Sstevel@tonic-gate 			++mp->cachehit;
4577c478bd9Sstevel@tonic-gate #endif
4587c478bd9Sstevel@tonic-gate 			return (bp);
4597c478bd9Sstevel@tonic-gate 		}
4607c478bd9Sstevel@tonic-gate #ifdef STATISTICS
4617c478bd9Sstevel@tonic-gate 	++mp->cachemiss;
4627c478bd9Sstevel@tonic-gate #endif
4637c478bd9Sstevel@tonic-gate 	return (NULL);
4647c478bd9Sstevel@tonic-gate }
4657c478bd9Sstevel@tonic-gate 
4667c478bd9Sstevel@tonic-gate #ifdef STATISTICS
4677c478bd9Sstevel@tonic-gate /*
4687c478bd9Sstevel@tonic-gate  * mpool_stat
4697c478bd9Sstevel@tonic-gate  *	Print out cache statistics.
4707c478bd9Sstevel@tonic-gate  */
4717c478bd9Sstevel@tonic-gate void
mpool_stat(mp)4727c478bd9Sstevel@tonic-gate mpool_stat(mp)
4737c478bd9Sstevel@tonic-gate 	MPOOL *mp;
4747c478bd9Sstevel@tonic-gate {
4757c478bd9Sstevel@tonic-gate 	BKT *bp;
4767c478bd9Sstevel@tonic-gate 	int cnt;
4777c478bd9Sstevel@tonic-gate 	char *sep;
4787c478bd9Sstevel@tonic-gate 
4797c478bd9Sstevel@tonic-gate 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
4807c478bd9Sstevel@tonic-gate 	(void)fprintf(stderr,
4817c478bd9Sstevel@tonic-gate 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
4827c478bd9Sstevel@tonic-gate 	    mp->pagesize, mp->curcache, mp->maxcache);
4837c478bd9Sstevel@tonic-gate 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
4847c478bd9Sstevel@tonic-gate 	    mp->pageput, mp->pageget, mp->pagenew);
4857c478bd9Sstevel@tonic-gate 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
4867c478bd9Sstevel@tonic-gate 	    mp->pagealloc, mp->pageflush);
4877c478bd9Sstevel@tonic-gate 	if (mp->cachehit + mp->cachemiss)
4887c478bd9Sstevel@tonic-gate 		(void)fprintf(stderr,
489*55fea89dSDan Cross 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
4907c478bd9Sstevel@tonic-gate 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
4917c478bd9Sstevel@tonic-gate 		    * 100, mp->cachehit, mp->cachemiss);
4927c478bd9Sstevel@tonic-gate 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
4937c478bd9Sstevel@tonic-gate 	    mp->pageread, mp->pagewrite);
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate 	sep = "";
4967c478bd9Sstevel@tonic-gate 	cnt = 0;
4977c478bd9Sstevel@tonic-gate 	for (bp = mp->lqh.cqh_first;
4987c478bd9Sstevel@tonic-gate 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
4997c478bd9Sstevel@tonic-gate 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
5007c478bd9Sstevel@tonic-gate 		if (bp->flags & MPOOL_DIRTY)
5017c478bd9Sstevel@tonic-gate 			(void)fprintf(stderr, "d");
5027c478bd9Sstevel@tonic-gate 		if (bp->flags & MPOOL_PINNED)
5037c478bd9Sstevel@tonic-gate 			(void)fprintf(stderr, "P");
5047c478bd9Sstevel@tonic-gate 		if (++cnt == 10) {
5057c478bd9Sstevel@tonic-gate 			sep = "\n";
5067c478bd9Sstevel@tonic-gate 			cnt = 0;
5077c478bd9Sstevel@tonic-gate 		} else
5087c478bd9Sstevel@tonic-gate 			sep = ", ";
509*55fea89dSDan Cross 
5107c478bd9Sstevel@tonic-gate 	}
5117c478bd9Sstevel@tonic-gate 	(void)fprintf(stderr, "\n");
5127c478bd9Sstevel@tonic-gate }
5137c478bd9Sstevel@tonic-gate #endif
514