xref: /illumos-gate/usr/src/cmd/sendmail/db/mp/mp_fopen.c (revision 7c478bd9)
1*7c478bd9Sstevel@tonic-gate /*-
2*7c478bd9Sstevel@tonic-gate  * See the file LICENSE for redistribution information.
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1996, 1997, 1998
5*7c478bd9Sstevel@tonic-gate  *	Sleepycat Software.  All rights reserved.
6*7c478bd9Sstevel@tonic-gate  */
7*7c478bd9Sstevel@tonic-gate #include "config.h"
8*7c478bd9Sstevel@tonic-gate 
9*7c478bd9Sstevel@tonic-gate #ifndef lint
10*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)mp_fopen.c	10.60 (Sleepycat) 1/1/99";
11*7c478bd9Sstevel@tonic-gate #endif /* not lint */
12*7c478bd9Sstevel@tonic-gate 
13*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
14*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
15*7c478bd9Sstevel@tonic-gate 
16*7c478bd9Sstevel@tonic-gate #include <errno.h>
17*7c478bd9Sstevel@tonic-gate #include <string.h>
18*7c478bd9Sstevel@tonic-gate #endif
19*7c478bd9Sstevel@tonic-gate 
20*7c478bd9Sstevel@tonic-gate #include "db_int.h"
21*7c478bd9Sstevel@tonic-gate #include "shqueue.h"
22*7c478bd9Sstevel@tonic-gate #include "db_shash.h"
23*7c478bd9Sstevel@tonic-gate #include "mp.h"
24*7c478bd9Sstevel@tonic-gate #include "common_ext.h"
25*7c478bd9Sstevel@tonic-gate 
26*7c478bd9Sstevel@tonic-gate static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
27*7c478bd9Sstevel@tonic-gate static int __memp_mf_open __P((DB_MPOOL *,
28*7c478bd9Sstevel@tonic-gate     const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **));
29*7c478bd9Sstevel@tonic-gate 
30*7c478bd9Sstevel@tonic-gate /*
31*7c478bd9Sstevel@tonic-gate  * memp_fopen --
32*7c478bd9Sstevel@tonic-gate  *	Open a backing file for the memory pool.
33*7c478bd9Sstevel@tonic-gate  */
34*7c478bd9Sstevel@tonic-gate int
memp_fopen(dbmp,path,flags,mode,pagesize,finfop,retp)35*7c478bd9Sstevel@tonic-gate memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp)
36*7c478bd9Sstevel@tonic-gate 	DB_MPOOL *dbmp;
37*7c478bd9Sstevel@tonic-gate 	const char *path;
38*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
39*7c478bd9Sstevel@tonic-gate 	int mode;
40*7c478bd9Sstevel@tonic-gate 	size_t pagesize;
41*7c478bd9Sstevel@tonic-gate 	DB_MPOOL_FINFO *finfop;
42*7c478bd9Sstevel@tonic-gate 	DB_MPOOLFILE **retp;
43*7c478bd9Sstevel@tonic-gate {
44*7c478bd9Sstevel@tonic-gate 	int ret;
45*7c478bd9Sstevel@tonic-gate 
46*7c478bd9Sstevel@tonic-gate 	MP_PANIC_CHECK(dbmp);
47*7c478bd9Sstevel@tonic-gate 
48*7c478bd9Sstevel@tonic-gate 	/* Validate arguments. */
49*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_fchk(dbmp->dbenv,
50*7c478bd9Sstevel@tonic-gate 	    "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0)
51*7c478bd9Sstevel@tonic-gate 		return (ret);
52*7c478bd9Sstevel@tonic-gate 
53*7c478bd9Sstevel@tonic-gate 	/* Require a non-zero pagesize. */
54*7c478bd9Sstevel@tonic-gate 	if (pagesize == 0) {
55*7c478bd9Sstevel@tonic-gate 		__db_err(dbmp->dbenv, "memp_fopen: pagesize not specified");
56*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
57*7c478bd9Sstevel@tonic-gate 	}
58*7c478bd9Sstevel@tonic-gate 	if (finfop != NULL && finfop->clear_len > pagesize)
59*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
60*7c478bd9Sstevel@tonic-gate 
61*7c478bd9Sstevel@tonic-gate 	return (__memp_fopen(dbmp,
62*7c478bd9Sstevel@tonic-gate 	    NULL, path, flags, mode, pagesize, 1, finfop, retp));
63*7c478bd9Sstevel@tonic-gate }
64*7c478bd9Sstevel@tonic-gate 
65*7c478bd9Sstevel@tonic-gate /*
66*7c478bd9Sstevel@tonic-gate  * __memp_fopen --
67*7c478bd9Sstevel@tonic-gate  *	Open a backing file for the memory pool; internal version.
68*7c478bd9Sstevel@tonic-gate  *
69*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *,
70*7c478bd9Sstevel@tonic-gate  * PUBLIC:    u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
71*7c478bd9Sstevel@tonic-gate  */
72*7c478bd9Sstevel@tonic-gate int
__memp_fopen(dbmp,mfp,path,flags,mode,pagesize,needlock,finfop,retp)73*7c478bd9Sstevel@tonic-gate __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp)
74*7c478bd9Sstevel@tonic-gate 	DB_MPOOL *dbmp;
75*7c478bd9Sstevel@tonic-gate 	MPOOLFILE *mfp;
76*7c478bd9Sstevel@tonic-gate 	const char *path;
77*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
78*7c478bd9Sstevel@tonic-gate 	int mode, needlock;
79*7c478bd9Sstevel@tonic-gate 	size_t pagesize;
80*7c478bd9Sstevel@tonic-gate 	DB_MPOOL_FINFO *finfop;
81*7c478bd9Sstevel@tonic-gate 	DB_MPOOLFILE **retp;
82*7c478bd9Sstevel@tonic-gate {
83*7c478bd9Sstevel@tonic-gate 	DB_ENV *dbenv;
84*7c478bd9Sstevel@tonic-gate 	DB_MPOOLFILE *dbmfp;
85*7c478bd9Sstevel@tonic-gate 	DB_MPOOL_FINFO finfo;
86*7c478bd9Sstevel@tonic-gate 	db_pgno_t last_pgno;
87*7c478bd9Sstevel@tonic-gate 	size_t maxmap;
88*7c478bd9Sstevel@tonic-gate 	u_int32_t mbytes, bytes;
89*7c478bd9Sstevel@tonic-gate 	int ret;
90*7c478bd9Sstevel@tonic-gate 	u_int8_t idbuf[DB_FILE_ID_LEN];
91*7c478bd9Sstevel@tonic-gate 	char *rpath;
92*7c478bd9Sstevel@tonic-gate 
93*7c478bd9Sstevel@tonic-gate 	dbenv = dbmp->dbenv;
94*7c478bd9Sstevel@tonic-gate 	ret = 0;
95*7c478bd9Sstevel@tonic-gate 	rpath = NULL;
96*7c478bd9Sstevel@tonic-gate 
97*7c478bd9Sstevel@tonic-gate 	/*
98*7c478bd9Sstevel@tonic-gate 	 * If mfp is provided, we take the DB_MPOOL_FINFO information from
99*7c478bd9Sstevel@tonic-gate 	 * the mfp.  We don't bother initializing everything, because some
100*7c478bd9Sstevel@tonic-gate 	 * of them are expensive to acquire.  If no mfp is provided and the
101*7c478bd9Sstevel@tonic-gate 	 * finfop argument is NULL, we default the values.
102*7c478bd9Sstevel@tonic-gate 	 */
103*7c478bd9Sstevel@tonic-gate 	if (finfop == NULL) {
104*7c478bd9Sstevel@tonic-gate 		memset(&finfo, 0, sizeof(finfo));
105*7c478bd9Sstevel@tonic-gate 		if (mfp != NULL) {
106*7c478bd9Sstevel@tonic-gate 			finfo.ftype = mfp->ftype;
107*7c478bd9Sstevel@tonic-gate 			finfo.pgcookie = NULL;
108*7c478bd9Sstevel@tonic-gate 			finfo.fileid = NULL;
109*7c478bd9Sstevel@tonic-gate 			finfo.lsn_offset = mfp->lsn_off;
110*7c478bd9Sstevel@tonic-gate 			finfo.clear_len = mfp->clear_len;
111*7c478bd9Sstevel@tonic-gate 		} else {
112*7c478bd9Sstevel@tonic-gate 			finfo.ftype = 0;
113*7c478bd9Sstevel@tonic-gate 			finfo.pgcookie = NULL;
114*7c478bd9Sstevel@tonic-gate 			finfo.fileid = NULL;
115*7c478bd9Sstevel@tonic-gate 			finfo.lsn_offset = -1;
116*7c478bd9Sstevel@tonic-gate 			finfo.clear_len = 0;
117*7c478bd9Sstevel@tonic-gate 		}
118*7c478bd9Sstevel@tonic-gate 		finfop = &finfo;
119*7c478bd9Sstevel@tonic-gate 	}
120*7c478bd9Sstevel@tonic-gate 
121*7c478bd9Sstevel@tonic-gate 	/* Allocate and initialize the per-process structure. */
122*7c478bd9Sstevel@tonic-gate 	if ((ret = __os_calloc(1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0)
123*7c478bd9Sstevel@tonic-gate 		return (ret);
124*7c478bd9Sstevel@tonic-gate 	dbmfp->dbmp = dbmp;
125*7c478bd9Sstevel@tonic-gate 	dbmfp->fd = -1;
126*7c478bd9Sstevel@tonic-gate 	dbmfp->ref = 1;
127*7c478bd9Sstevel@tonic-gate 	if (LF_ISSET(DB_RDONLY))
128*7c478bd9Sstevel@tonic-gate 		F_SET(dbmfp, MP_READONLY);
129*7c478bd9Sstevel@tonic-gate 
130*7c478bd9Sstevel@tonic-gate 	if (path == NULL) {
131*7c478bd9Sstevel@tonic-gate 		if (LF_ISSET(DB_RDONLY)) {
132*7c478bd9Sstevel@tonic-gate 			__db_err(dbenv,
133*7c478bd9Sstevel@tonic-gate 			    "memp_fopen: temporary files can't be readonly");
134*7c478bd9Sstevel@tonic-gate 			ret = EINVAL;
135*7c478bd9Sstevel@tonic-gate 			goto err;
136*7c478bd9Sstevel@tonic-gate 		}
137*7c478bd9Sstevel@tonic-gate 		last_pgno = 0;
138*7c478bd9Sstevel@tonic-gate 	} else {
139*7c478bd9Sstevel@tonic-gate 		/* Get the real name for this file and open it. */
140*7c478bd9Sstevel@tonic-gate 		if ((ret = __db_appname(dbenv,
141*7c478bd9Sstevel@tonic-gate 		    DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0)
142*7c478bd9Sstevel@tonic-gate 			goto err;
143*7c478bd9Sstevel@tonic-gate 		if ((ret = __db_open(rpath,
144*7c478bd9Sstevel@tonic-gate 		   LF_ISSET(DB_CREATE | DB_RDONLY),
145*7c478bd9Sstevel@tonic-gate 		   DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) {
146*7c478bd9Sstevel@tonic-gate 			__db_err(dbenv, "%s: %s", rpath, strerror(ret));
147*7c478bd9Sstevel@tonic-gate 			goto err;
148*7c478bd9Sstevel@tonic-gate 		}
149*7c478bd9Sstevel@tonic-gate 
150*7c478bd9Sstevel@tonic-gate 		/*
151*7c478bd9Sstevel@tonic-gate 		 * Don't permit files that aren't a multiple of the pagesize,
152*7c478bd9Sstevel@tonic-gate 		 * and find the number of the last page in the file, all the
153*7c478bd9Sstevel@tonic-gate 		 * time being careful not to overflow 32 bits.
154*7c478bd9Sstevel@tonic-gate 		 *
155*7c478bd9Sstevel@tonic-gate 		 * !!!
156*7c478bd9Sstevel@tonic-gate 		 * We can't use off_t's here, or in any code in the mainline
157*7c478bd9Sstevel@tonic-gate 		 * library for that matter.  (We have to use them in the os
158*7c478bd9Sstevel@tonic-gate 		 * stubs, of course, as there are system calls that take them
159*7c478bd9Sstevel@tonic-gate 		 * as arguments.)  The reason is that some customers build in
160*7c478bd9Sstevel@tonic-gate 		 * environments where an off_t is 32-bits, but still run where
161*7c478bd9Sstevel@tonic-gate 		 * offsets are 64-bits, and they pay us a lot of money.
162*7c478bd9Sstevel@tonic-gate 		 */
163*7c478bd9Sstevel@tonic-gate 		if ((ret = __os_ioinfo(rpath,
164*7c478bd9Sstevel@tonic-gate 		    dbmfp->fd, &mbytes, &bytes, NULL)) != 0) {
165*7c478bd9Sstevel@tonic-gate 			__db_err(dbenv, "%s: %s", rpath, strerror(ret));
166*7c478bd9Sstevel@tonic-gate 			goto err;
167*7c478bd9Sstevel@tonic-gate 		}
168*7c478bd9Sstevel@tonic-gate 
169*7c478bd9Sstevel@tonic-gate 		/* Page sizes have to be a power-of-two, ignore mbytes. */
170*7c478bd9Sstevel@tonic-gate 		if (bytes % pagesize != 0) {
171*7c478bd9Sstevel@tonic-gate 			__db_err(dbenv,
172*7c478bd9Sstevel@tonic-gate 			    "%s: file size not a multiple of the pagesize",
173*7c478bd9Sstevel@tonic-gate 			    rpath);
174*7c478bd9Sstevel@tonic-gate 			ret = EINVAL;
175*7c478bd9Sstevel@tonic-gate 			goto err;
176*7c478bd9Sstevel@tonic-gate 		}
177*7c478bd9Sstevel@tonic-gate 
178*7c478bd9Sstevel@tonic-gate 		last_pgno = mbytes * (MEGABYTE / pagesize);
179*7c478bd9Sstevel@tonic-gate 		last_pgno += bytes / pagesize;
180*7c478bd9Sstevel@tonic-gate 
181*7c478bd9Sstevel@tonic-gate 		/* Correction: page numbers are zero-based, not 1-based. */
182*7c478bd9Sstevel@tonic-gate 		if (last_pgno != 0)
183*7c478bd9Sstevel@tonic-gate 			--last_pgno;
184*7c478bd9Sstevel@tonic-gate 
185*7c478bd9Sstevel@tonic-gate 		/*
186*7c478bd9Sstevel@tonic-gate 		 * Get the file id if we weren't given one.  Generated file id's
187*7c478bd9Sstevel@tonic-gate 		 * don't use timestamps, otherwise there'd be no chance of any
188*7c478bd9Sstevel@tonic-gate 		 * other process joining the party.
189*7c478bd9Sstevel@tonic-gate 		 */
190*7c478bd9Sstevel@tonic-gate 		if (finfop->fileid == NULL) {
191*7c478bd9Sstevel@tonic-gate 			if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0)
192*7c478bd9Sstevel@tonic-gate 				goto err;
193*7c478bd9Sstevel@tonic-gate 			finfop->fileid = idbuf;
194*7c478bd9Sstevel@tonic-gate 		}
195*7c478bd9Sstevel@tonic-gate 	}
196*7c478bd9Sstevel@tonic-gate 
197*7c478bd9Sstevel@tonic-gate 	/*
198*7c478bd9Sstevel@tonic-gate 	 * If we weren't provided an underlying shared object to join with,
199*7c478bd9Sstevel@tonic-gate 	 * find/allocate the shared file objects.  Also allocate space for
200*7c478bd9Sstevel@tonic-gate 	 * for the per-process thread lock.
201*7c478bd9Sstevel@tonic-gate 	 */
202*7c478bd9Sstevel@tonic-gate 	if (needlock)
203*7c478bd9Sstevel@tonic-gate 		LOCKREGION(dbmp);
204*7c478bd9Sstevel@tonic-gate 
205*7c478bd9Sstevel@tonic-gate 	if (mfp == NULL)
206*7c478bd9Sstevel@tonic-gate 		ret = __memp_mf_open(dbmp,
207*7c478bd9Sstevel@tonic-gate 		    path, pagesize, last_pgno, finfop, &mfp);
208*7c478bd9Sstevel@tonic-gate 	else {
209*7c478bd9Sstevel@tonic-gate 		++mfp->ref;
210*7c478bd9Sstevel@tonic-gate 		ret = 0;
211*7c478bd9Sstevel@tonic-gate 	}
212*7c478bd9Sstevel@tonic-gate 	if (ret == 0 &&
213*7c478bd9Sstevel@tonic-gate 	    F_ISSET(dbmp, MP_LOCKHANDLE) && (ret =
214*7c478bd9Sstevel@tonic-gate 	    __memp_alloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0)
215*7c478bd9Sstevel@tonic-gate 		LOCKINIT(dbmp, dbmfp->mutexp);
216*7c478bd9Sstevel@tonic-gate 
217*7c478bd9Sstevel@tonic-gate 	if (needlock)
218*7c478bd9Sstevel@tonic-gate 		UNLOCKREGION(dbmp);
219*7c478bd9Sstevel@tonic-gate 	if (ret != 0)
220*7c478bd9Sstevel@tonic-gate 		goto err;
221*7c478bd9Sstevel@tonic-gate 
222*7c478bd9Sstevel@tonic-gate 	dbmfp->mfp = mfp;
223*7c478bd9Sstevel@tonic-gate 
224*7c478bd9Sstevel@tonic-gate 	/*
225*7c478bd9Sstevel@tonic-gate 	 * If a file:
226*7c478bd9Sstevel@tonic-gate 	 *	+ is read-only
227*7c478bd9Sstevel@tonic-gate 	 *	+ isn't temporary
228*7c478bd9Sstevel@tonic-gate 	 *	+ doesn't require any pgin/pgout support
229*7c478bd9Sstevel@tonic-gate 	 *	+ the DB_NOMMAP flag wasn't set
230*7c478bd9Sstevel@tonic-gate 	 *	+ and is less than mp_mmapsize bytes in size
231*7c478bd9Sstevel@tonic-gate 	 *
232*7c478bd9Sstevel@tonic-gate 	 * we can mmap it instead of reading/writing buffers.  Don't do error
233*7c478bd9Sstevel@tonic-gate 	 * checking based on the mmap call failure.  We want to do normal I/O
234*7c478bd9Sstevel@tonic-gate 	 * on the file if the reason we failed was because the file was on an
235*7c478bd9Sstevel@tonic-gate 	 * NFS mounted partition, and we can fail in buffer I/O just as easily
236*7c478bd9Sstevel@tonic-gate 	 * as here.
237*7c478bd9Sstevel@tonic-gate 	 *
238*7c478bd9Sstevel@tonic-gate 	 * XXX
239*7c478bd9Sstevel@tonic-gate 	 * We'd like to test to see if the file is too big to mmap.  Since we
240*7c478bd9Sstevel@tonic-gate 	 * don't know what size or type off_t's or size_t's are, or the largest
241*7c478bd9Sstevel@tonic-gate 	 * unsigned integral type is, or what random insanity the local C
242*7c478bd9Sstevel@tonic-gate 	 * compiler will perpetrate, doing the comparison in a portable way is
243*7c478bd9Sstevel@tonic-gate 	 * flatly impossible.  Hope that mmap fails if the file is too large.
244*7c478bd9Sstevel@tonic-gate 	 */
245*7c478bd9Sstevel@tonic-gate #define	DB_MAXMMAPSIZE	(10 * 1024 * 1024)	/* 10 Mb. */
246*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(mfp, MP_CAN_MMAP)) {
247*7c478bd9Sstevel@tonic-gate 		if (!F_ISSET(dbmfp, MP_READONLY))
248*7c478bd9Sstevel@tonic-gate 			F_CLR(mfp, MP_CAN_MMAP);
249*7c478bd9Sstevel@tonic-gate 		if (path == NULL)
250*7c478bd9Sstevel@tonic-gate 			F_CLR(mfp, MP_CAN_MMAP);
251*7c478bd9Sstevel@tonic-gate 		if (finfop->ftype != 0)
252*7c478bd9Sstevel@tonic-gate 			F_CLR(mfp, MP_CAN_MMAP);
253*7c478bd9Sstevel@tonic-gate 		if (LF_ISSET(DB_NOMMAP))
254*7c478bd9Sstevel@tonic-gate 			F_CLR(mfp, MP_CAN_MMAP);
255*7c478bd9Sstevel@tonic-gate 		maxmap = dbenv == NULL || dbenv->mp_mmapsize == 0 ?
256*7c478bd9Sstevel@tonic-gate 		    DB_MAXMMAPSIZE : dbenv->mp_mmapsize;
257*7c478bd9Sstevel@tonic-gate 		if (mbytes > maxmap / MEGABYTE ||
258*7c478bd9Sstevel@tonic-gate 		    (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE))
259*7c478bd9Sstevel@tonic-gate 			F_CLR(mfp, MP_CAN_MMAP);
260*7c478bd9Sstevel@tonic-gate 	}
261*7c478bd9Sstevel@tonic-gate 	dbmfp->addr = NULL;
262*7c478bd9Sstevel@tonic-gate 	if (F_ISSET(mfp, MP_CAN_MMAP)) {
263*7c478bd9Sstevel@tonic-gate 		dbmfp->len = (size_t)mbytes * MEGABYTE + bytes;
264*7c478bd9Sstevel@tonic-gate 		if (__db_mapfile(rpath,
265*7c478bd9Sstevel@tonic-gate 		    dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) {
266*7c478bd9Sstevel@tonic-gate 			dbmfp->addr = NULL;
267*7c478bd9Sstevel@tonic-gate 			F_CLR(mfp, MP_CAN_MMAP);
268*7c478bd9Sstevel@tonic-gate 		}
269*7c478bd9Sstevel@tonic-gate 	}
270*7c478bd9Sstevel@tonic-gate 	if (rpath != NULL)
271*7c478bd9Sstevel@tonic-gate 		__os_freestr(rpath);
272*7c478bd9Sstevel@tonic-gate 
273*7c478bd9Sstevel@tonic-gate 	LOCKHANDLE(dbmp, dbmp->mutexp);
274*7c478bd9Sstevel@tonic-gate 	TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
275*7c478bd9Sstevel@tonic-gate 	UNLOCKHANDLE(dbmp, dbmp->mutexp);
276*7c478bd9Sstevel@tonic-gate 
277*7c478bd9Sstevel@tonic-gate 	*retp = dbmfp;
278*7c478bd9Sstevel@tonic-gate 	return (0);
279*7c478bd9Sstevel@tonic-gate 
280*7c478bd9Sstevel@tonic-gate err:	/*
281*7c478bd9Sstevel@tonic-gate 	 * Note that we do not have to free the thread mutex, because we
282*7c478bd9Sstevel@tonic-gate 	 * never get to here after we have successfully allocated it.
283*7c478bd9Sstevel@tonic-gate 	 */
284*7c478bd9Sstevel@tonic-gate 	if (rpath != NULL)
285*7c478bd9Sstevel@tonic-gate 		__os_freestr(rpath);
286*7c478bd9Sstevel@tonic-gate 	if (dbmfp->fd != -1)
287*7c478bd9Sstevel@tonic-gate 		(void)__os_close(dbmfp->fd);
288*7c478bd9Sstevel@tonic-gate 	if (dbmfp != NULL)
289*7c478bd9Sstevel@tonic-gate 		__os_free(dbmfp, sizeof(DB_MPOOLFILE));
290*7c478bd9Sstevel@tonic-gate 	return (ret);
291*7c478bd9Sstevel@tonic-gate }
292*7c478bd9Sstevel@tonic-gate 
293*7c478bd9Sstevel@tonic-gate /*
294*7c478bd9Sstevel@tonic-gate  * __memp_mf_open --
295*7c478bd9Sstevel@tonic-gate  *	Open an MPOOLFILE.
296*7c478bd9Sstevel@tonic-gate  */
297*7c478bd9Sstevel@tonic-gate static int
__memp_mf_open(dbmp,path,pagesize,last_pgno,finfop,retp)298*7c478bd9Sstevel@tonic-gate __memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp)
299*7c478bd9Sstevel@tonic-gate 	DB_MPOOL *dbmp;
300*7c478bd9Sstevel@tonic-gate 	const char *path;
301*7c478bd9Sstevel@tonic-gate 	size_t pagesize;
302*7c478bd9Sstevel@tonic-gate 	db_pgno_t last_pgno;
303*7c478bd9Sstevel@tonic-gate 	DB_MPOOL_FINFO *finfop;
304*7c478bd9Sstevel@tonic-gate 	MPOOLFILE **retp;
305*7c478bd9Sstevel@tonic-gate {
306*7c478bd9Sstevel@tonic-gate 	MPOOLFILE *mfp;
307*7c478bd9Sstevel@tonic-gate 	int ret;
308*7c478bd9Sstevel@tonic-gate 	void *p;
309*7c478bd9Sstevel@tonic-gate 
310*7c478bd9Sstevel@tonic-gate #define	ISTEMPORARY	(path == NULL)
311*7c478bd9Sstevel@tonic-gate 
312*7c478bd9Sstevel@tonic-gate 	/*
313*7c478bd9Sstevel@tonic-gate 	 * Walk the list of MPOOLFILE's, looking for a matching file.
314*7c478bd9Sstevel@tonic-gate 	 * Temporary files can't match previous files.
315*7c478bd9Sstevel@tonic-gate 	 */
316*7c478bd9Sstevel@tonic-gate 	if (!ISTEMPORARY)
317*7c478bd9Sstevel@tonic-gate 		for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
318*7c478bd9Sstevel@tonic-gate 		    mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
319*7c478bd9Sstevel@tonic-gate 			if (F_ISSET(mfp, MP_TEMP))
320*7c478bd9Sstevel@tonic-gate 				continue;
321*7c478bd9Sstevel@tonic-gate 			if (!memcmp(finfop->fileid,
322*7c478bd9Sstevel@tonic-gate 			    R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
323*7c478bd9Sstevel@tonic-gate 				if (finfop->clear_len != mfp->clear_len ||
324*7c478bd9Sstevel@tonic-gate 				    finfop->ftype != mfp->ftype ||
325*7c478bd9Sstevel@tonic-gate 				    pagesize != mfp->stat.st_pagesize) {
326*7c478bd9Sstevel@tonic-gate 					__db_err(dbmp->dbenv,
327*7c478bd9Sstevel@tonic-gate 			    "%s: ftype, clear length or pagesize changed",
328*7c478bd9Sstevel@tonic-gate 					    path);
329*7c478bd9Sstevel@tonic-gate 					return (EINVAL);
330*7c478bd9Sstevel@tonic-gate 				}
331*7c478bd9Sstevel@tonic-gate 
332*7c478bd9Sstevel@tonic-gate 				/* Found it: increment the reference count. */
333*7c478bd9Sstevel@tonic-gate 				++mfp->ref;
334*7c478bd9Sstevel@tonic-gate 				*retp = mfp;
335*7c478bd9Sstevel@tonic-gate 				return (0);
336*7c478bd9Sstevel@tonic-gate 			}
337*7c478bd9Sstevel@tonic-gate 		}
338*7c478bd9Sstevel@tonic-gate 
339*7c478bd9Sstevel@tonic-gate 	/* Allocate a new MPOOLFILE. */
340*7c478bd9Sstevel@tonic-gate 	if ((ret = __memp_alloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
341*7c478bd9Sstevel@tonic-gate 		return (ret);
342*7c478bd9Sstevel@tonic-gate 	*retp = mfp;
343*7c478bd9Sstevel@tonic-gate 
344*7c478bd9Sstevel@tonic-gate 	/* Initialize the structure. */
345*7c478bd9Sstevel@tonic-gate 	memset(mfp, 0, sizeof(MPOOLFILE));
346*7c478bd9Sstevel@tonic-gate 	mfp->ref = 1;
347*7c478bd9Sstevel@tonic-gate 	mfp->ftype = finfop->ftype;
348*7c478bd9Sstevel@tonic-gate 	mfp->lsn_off = finfop->lsn_offset;
349*7c478bd9Sstevel@tonic-gate 	mfp->clear_len = finfop->clear_len;
350*7c478bd9Sstevel@tonic-gate 
351*7c478bd9Sstevel@tonic-gate 	/*
352*7c478bd9Sstevel@tonic-gate 	 * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,
353*7c478bd9Sstevel@tonic-gate 	 * we have to know the last page in the file.  Figure it out and save
354*7c478bd9Sstevel@tonic-gate 	 * it away.
355*7c478bd9Sstevel@tonic-gate 	 */
356*7c478bd9Sstevel@tonic-gate 	mfp->stat.st_pagesize = pagesize;
357*7c478bd9Sstevel@tonic-gate 	mfp->orig_last_pgno = mfp->last_pgno = last_pgno;
358*7c478bd9Sstevel@tonic-gate 
359*7c478bd9Sstevel@tonic-gate 	if (ISTEMPORARY)
360*7c478bd9Sstevel@tonic-gate 		F_SET(mfp, MP_TEMP);
361*7c478bd9Sstevel@tonic-gate 	else {
362*7c478bd9Sstevel@tonic-gate 		/* Copy the file path into shared memory. */
363*7c478bd9Sstevel@tonic-gate 		if ((ret = __memp_alloc(dbmp,
364*7c478bd9Sstevel@tonic-gate 		    strlen(path) + 1, &mfp->path_off, &p)) != 0)
365*7c478bd9Sstevel@tonic-gate 			goto err;
366*7c478bd9Sstevel@tonic-gate 		memcpy(p, path, strlen(path) + 1);
367*7c478bd9Sstevel@tonic-gate 
368*7c478bd9Sstevel@tonic-gate 		/* Copy the file identification string into shared memory. */
369*7c478bd9Sstevel@tonic-gate 		if ((ret = __memp_alloc(dbmp,
370*7c478bd9Sstevel@tonic-gate 		    DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
371*7c478bd9Sstevel@tonic-gate 			goto err;
372*7c478bd9Sstevel@tonic-gate 		memcpy(p, finfop->fileid, DB_FILE_ID_LEN);
373*7c478bd9Sstevel@tonic-gate 
374*7c478bd9Sstevel@tonic-gate 		F_SET(mfp, MP_CAN_MMAP);
375*7c478bd9Sstevel@tonic-gate 	}
376*7c478bd9Sstevel@tonic-gate 
377*7c478bd9Sstevel@tonic-gate 	/* Copy the page cookie into shared memory. */
378*7c478bd9Sstevel@tonic-gate 	if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) {
379*7c478bd9Sstevel@tonic-gate 		mfp->pgcookie_len = 0;
380*7c478bd9Sstevel@tonic-gate 		mfp->pgcookie_off = 0;
381*7c478bd9Sstevel@tonic-gate 	} else {
382*7c478bd9Sstevel@tonic-gate 		if ((ret = __memp_alloc(dbmp,
383*7c478bd9Sstevel@tonic-gate 		    finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
384*7c478bd9Sstevel@tonic-gate 			goto err;
385*7c478bd9Sstevel@tonic-gate 		memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size);
386*7c478bd9Sstevel@tonic-gate 		mfp->pgcookie_len = finfop->pgcookie->size;
387*7c478bd9Sstevel@tonic-gate 	}
388*7c478bd9Sstevel@tonic-gate 
389*7c478bd9Sstevel@tonic-gate 	/* Prepend the MPOOLFILE to the list of MPOOLFILE's. */
390*7c478bd9Sstevel@tonic-gate 	SH_TAILQ_INSERT_HEAD(&dbmp->mp->mpfq, mfp, q, __mpoolfile);
391*7c478bd9Sstevel@tonic-gate 
392*7c478bd9Sstevel@tonic-gate 	if (0) {
393*7c478bd9Sstevel@tonic-gate err:		if (mfp->path_off != 0)
394*7c478bd9Sstevel@tonic-gate 			__db_shalloc_free(dbmp->addr,
395*7c478bd9Sstevel@tonic-gate 			    R_ADDR(dbmp, mfp->path_off));
396*7c478bd9Sstevel@tonic-gate 		if (mfp->fileid_off != 0)
397*7c478bd9Sstevel@tonic-gate 			__db_shalloc_free(dbmp->addr,
398*7c478bd9Sstevel@tonic-gate 			    R_ADDR(dbmp, mfp->fileid_off));
399*7c478bd9Sstevel@tonic-gate 		if (mfp != NULL)
400*7c478bd9Sstevel@tonic-gate 			__db_shalloc_free(dbmp->addr, mfp);
401*7c478bd9Sstevel@tonic-gate 		mfp = NULL;
402*7c478bd9Sstevel@tonic-gate 	}
403*7c478bd9Sstevel@tonic-gate 	return (0);
404*7c478bd9Sstevel@tonic-gate }
405*7c478bd9Sstevel@tonic-gate 
406*7c478bd9Sstevel@tonic-gate /*
407*7c478bd9Sstevel@tonic-gate  * memp_fclose --
408*7c478bd9Sstevel@tonic-gate  *	Close a backing file for the memory pool.
409*7c478bd9Sstevel@tonic-gate  */
410*7c478bd9Sstevel@tonic-gate int
memp_fclose(dbmfp)411*7c478bd9Sstevel@tonic-gate memp_fclose(dbmfp)
412*7c478bd9Sstevel@tonic-gate 	DB_MPOOLFILE *dbmfp;
413*7c478bd9Sstevel@tonic-gate {
414*7c478bd9Sstevel@tonic-gate 	DB_MPOOL *dbmp;
415*7c478bd9Sstevel@tonic-gate 	int ret, t_ret;
416*7c478bd9Sstevel@tonic-gate 
417*7c478bd9Sstevel@tonic-gate 	dbmp = dbmfp->dbmp;
418*7c478bd9Sstevel@tonic-gate 	ret = 0;
419*7c478bd9Sstevel@tonic-gate 
420*7c478bd9Sstevel@tonic-gate 	MP_PANIC_CHECK(dbmp);
421*7c478bd9Sstevel@tonic-gate 
422*7c478bd9Sstevel@tonic-gate 	for (;;) {
423*7c478bd9Sstevel@tonic-gate 		LOCKHANDLE(dbmp, dbmp->mutexp);
424*7c478bd9Sstevel@tonic-gate 
425*7c478bd9Sstevel@tonic-gate 		/*
426*7c478bd9Sstevel@tonic-gate 		 * We have to reference count DB_MPOOLFILE structures as other
427*7c478bd9Sstevel@tonic-gate 		 * threads may be using them.  The problem only happens if the
428*7c478bd9Sstevel@tonic-gate 		 * application makes a bad design choice.  Here's the path:
429*7c478bd9Sstevel@tonic-gate 		 *
430*7c478bd9Sstevel@tonic-gate 		 * Thread A opens a database.
431*7c478bd9Sstevel@tonic-gate 		 * Thread B uses thread A's DB_MPOOLFILE to write a buffer
432*7c478bd9Sstevel@tonic-gate 		 *    in order to free up memory in the mpool cache.
433*7c478bd9Sstevel@tonic-gate 		 * Thread A closes the database while thread B is using the
434*7c478bd9Sstevel@tonic-gate 		 *    DB_MPOOLFILE structure.
435*7c478bd9Sstevel@tonic-gate 		 *
436*7c478bd9Sstevel@tonic-gate 		 * By opening all databases before creating the threads, and
437*7c478bd9Sstevel@tonic-gate 		 * closing them after the threads have exited, applications
438*7c478bd9Sstevel@tonic-gate 		 * get better performance and avoid the problem path entirely.
439*7c478bd9Sstevel@tonic-gate 		 *
440*7c478bd9Sstevel@tonic-gate 		 * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer
441*7c478bd9Sstevel@tonic-gate 		 * is a short-term lock, even in worst case, since we better be
442*7c478bd9Sstevel@tonic-gate 		 * the only thread of control using the DB_MPOOLFILE structure
443*7c478bd9Sstevel@tonic-gate 		 * to read pages *into* the cache.  Wait until we're the only
444*7c478bd9Sstevel@tonic-gate 		 * reference holder and remove the DB_MPOOLFILE structure from
445*7c478bd9Sstevel@tonic-gate 		 * the list, so nobody else can even find it.
446*7c478bd9Sstevel@tonic-gate 		 */
447*7c478bd9Sstevel@tonic-gate 		if (dbmfp->ref == 1) {
448*7c478bd9Sstevel@tonic-gate 			TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q);
449*7c478bd9Sstevel@tonic-gate 			break;
450*7c478bd9Sstevel@tonic-gate 		}
451*7c478bd9Sstevel@tonic-gate 		UNLOCKHANDLE(dbmp, dbmp->mutexp);
452*7c478bd9Sstevel@tonic-gate 
453*7c478bd9Sstevel@tonic-gate 		(void)__os_sleep(1, 0);
454*7c478bd9Sstevel@tonic-gate 	}
455*7c478bd9Sstevel@tonic-gate 	UNLOCKHANDLE(dbmp, dbmp->mutexp);
456*7c478bd9Sstevel@tonic-gate 
457*7c478bd9Sstevel@tonic-gate 	/* Complain if pinned blocks never returned. */
458*7c478bd9Sstevel@tonic-gate 	if (dbmfp->pinref != 0)
459*7c478bd9Sstevel@tonic-gate 		__db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned",
460*7c478bd9Sstevel@tonic-gate 		    __memp_fn(dbmfp), (u_long)dbmfp->pinref);
461*7c478bd9Sstevel@tonic-gate 
462*7c478bd9Sstevel@tonic-gate 	/* Close the underlying MPOOLFILE. */
463*7c478bd9Sstevel@tonic-gate 	(void)__memp_mf_close(dbmp, dbmfp);
464*7c478bd9Sstevel@tonic-gate 
465*7c478bd9Sstevel@tonic-gate 	/* Discard any mmap information. */
466*7c478bd9Sstevel@tonic-gate 	if (dbmfp->addr != NULL &&
467*7c478bd9Sstevel@tonic-gate 	    (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0)
468*7c478bd9Sstevel@tonic-gate 		__db_err(dbmp->dbenv,
469*7c478bd9Sstevel@tonic-gate 		    "%s: %s", __memp_fn(dbmfp), strerror(ret));
470*7c478bd9Sstevel@tonic-gate 
471*7c478bd9Sstevel@tonic-gate 	/* Close the file; temporary files may not yet have been created. */
472*7c478bd9Sstevel@tonic-gate 	if (dbmfp->fd != -1 && (t_ret = __os_close(dbmfp->fd)) != 0) {
473*7c478bd9Sstevel@tonic-gate 		__db_err(dbmp->dbenv,
474*7c478bd9Sstevel@tonic-gate 		    "%s: %s", __memp_fn(dbmfp), strerror(t_ret));
475*7c478bd9Sstevel@tonic-gate 		if (ret != 0)
476*7c478bd9Sstevel@tonic-gate 			t_ret = ret;
477*7c478bd9Sstevel@tonic-gate 	}
478*7c478bd9Sstevel@tonic-gate 
479*7c478bd9Sstevel@tonic-gate 	/* Free memory. */
480*7c478bd9Sstevel@tonic-gate 	if (dbmfp->mutexp != NULL) {
481*7c478bd9Sstevel@tonic-gate 		LOCKREGION(dbmp);
482*7c478bd9Sstevel@tonic-gate 		__db_shalloc_free(dbmp->addr, dbmfp->mutexp);
483*7c478bd9Sstevel@tonic-gate 		UNLOCKREGION(dbmp);
484*7c478bd9Sstevel@tonic-gate 	}
485*7c478bd9Sstevel@tonic-gate 
486*7c478bd9Sstevel@tonic-gate 	/* Discard the DB_MPOOLFILE structure. */
487*7c478bd9Sstevel@tonic-gate 	__os_free(dbmfp, sizeof(DB_MPOOLFILE));
488*7c478bd9Sstevel@tonic-gate 
489*7c478bd9Sstevel@tonic-gate 	return (ret);
490*7c478bd9Sstevel@tonic-gate }
491*7c478bd9Sstevel@tonic-gate 
492*7c478bd9Sstevel@tonic-gate /*
493*7c478bd9Sstevel@tonic-gate  * __memp_mf_close --
494*7c478bd9Sstevel@tonic-gate  *	Close down an MPOOLFILE.
495*7c478bd9Sstevel@tonic-gate  */
496*7c478bd9Sstevel@tonic-gate static int
__memp_mf_close(dbmp,dbmfp)497*7c478bd9Sstevel@tonic-gate __memp_mf_close(dbmp, dbmfp)
498*7c478bd9Sstevel@tonic-gate 	DB_MPOOL *dbmp;
499*7c478bd9Sstevel@tonic-gate 	DB_MPOOLFILE *dbmfp;
500*7c478bd9Sstevel@tonic-gate {
501*7c478bd9Sstevel@tonic-gate 	BH *bhp, *nbhp;
502*7c478bd9Sstevel@tonic-gate 	MPOOL *mp;
503*7c478bd9Sstevel@tonic-gate 	MPOOLFILE *mfp;
504*7c478bd9Sstevel@tonic-gate 	size_t mf_offset;
505*7c478bd9Sstevel@tonic-gate 
506*7c478bd9Sstevel@tonic-gate 	mp = dbmp->mp;
507*7c478bd9Sstevel@tonic-gate 	mfp = dbmfp->mfp;
508*7c478bd9Sstevel@tonic-gate 
509*7c478bd9Sstevel@tonic-gate 	LOCKREGION(dbmp);
510*7c478bd9Sstevel@tonic-gate 
511*7c478bd9Sstevel@tonic-gate 	/* If more than a single reference, simply decrement. */
512*7c478bd9Sstevel@tonic-gate 	if (mfp->ref > 1) {
513*7c478bd9Sstevel@tonic-gate 		--mfp->ref;
514*7c478bd9Sstevel@tonic-gate 		goto ret1;
515*7c478bd9Sstevel@tonic-gate 	}
516*7c478bd9Sstevel@tonic-gate 
517*7c478bd9Sstevel@tonic-gate 	/*
518*7c478bd9Sstevel@tonic-gate 	 * Move any BH's held by the file to the free list.  We don't free the
519*7c478bd9Sstevel@tonic-gate 	 * memory itself because we may be discarding the memory pool, and it's
520*7c478bd9Sstevel@tonic-gate 	 * fairly expensive to reintegrate the buffers back into the region for
521*7c478bd9Sstevel@tonic-gate 	 * no purpose.
522*7c478bd9Sstevel@tonic-gate 	 */
523*7c478bd9Sstevel@tonic-gate 	mf_offset = R_OFFSET(dbmp, mfp);
524*7c478bd9Sstevel@tonic-gate 	for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
525*7c478bd9Sstevel@tonic-gate 		nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
526*7c478bd9Sstevel@tonic-gate 
527*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_NO_DIRTY
528*7c478bd9Sstevel@tonic-gate 		/* Complain if we find any blocks that were left dirty. */
529*7c478bd9Sstevel@tonic-gate 		if (F_ISSET(bhp, BH_DIRTY))
530*7c478bd9Sstevel@tonic-gate 			__db_err(dbmp->dbenv,
531*7c478bd9Sstevel@tonic-gate 			    "%s: close: pgno %lu left dirty; ref %lu",
532*7c478bd9Sstevel@tonic-gate 			    __memp_fn(dbmfp),
533*7c478bd9Sstevel@tonic-gate 			    (u_long)bhp->pgno, (u_long)bhp->ref);
534*7c478bd9Sstevel@tonic-gate #endif
535*7c478bd9Sstevel@tonic-gate 
536*7c478bd9Sstevel@tonic-gate 		if (bhp->mf_offset == mf_offset) {
537*7c478bd9Sstevel@tonic-gate 			if (F_ISSET(bhp, BH_DIRTY)) {
538*7c478bd9Sstevel@tonic-gate 				++mp->stat.st_page_clean;
539*7c478bd9Sstevel@tonic-gate 				--mp->stat.st_page_dirty;
540*7c478bd9Sstevel@tonic-gate 			}
541*7c478bd9Sstevel@tonic-gate 			__memp_bhfree(dbmp, mfp, bhp, 0);
542*7c478bd9Sstevel@tonic-gate 			SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh);
543*7c478bd9Sstevel@tonic-gate 		}
544*7c478bd9Sstevel@tonic-gate 	}
545*7c478bd9Sstevel@tonic-gate 
546*7c478bd9Sstevel@tonic-gate 	/* Delete from the list of MPOOLFILEs. */
547*7c478bd9Sstevel@tonic-gate 	SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
548*7c478bd9Sstevel@tonic-gate 
549*7c478bd9Sstevel@tonic-gate 	/* Free the space. */
550*7c478bd9Sstevel@tonic-gate 	if (mfp->path_off != 0)
551*7c478bd9Sstevel@tonic-gate 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
552*7c478bd9Sstevel@tonic-gate 	if (mfp->fileid_off != 0)
553*7c478bd9Sstevel@tonic-gate 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
554*7c478bd9Sstevel@tonic-gate 	if (mfp->pgcookie_off != 0)
555*7c478bd9Sstevel@tonic-gate 		__db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off));
556*7c478bd9Sstevel@tonic-gate 	__db_shalloc_free(dbmp->addr, mfp);
557*7c478bd9Sstevel@tonic-gate 
558*7c478bd9Sstevel@tonic-gate ret1:	UNLOCKREGION(dbmp);
559*7c478bd9Sstevel@tonic-gate 	return (0);
560*7c478bd9Sstevel@tonic-gate }
561