1*7c478bd9Sstevel@tonic-gate /*-
2*7c478bd9Sstevel@tonic-gate * See the file LICENSE for redistribution information.
3*7c478bd9Sstevel@tonic-gate *
4*7c478bd9Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998
5*7c478bd9Sstevel@tonic-gate * Sleepycat Software. All rights reserved.
6*7c478bd9Sstevel@tonic-gate */
7*7c478bd9Sstevel@tonic-gate #include "config.h"
8*7c478bd9Sstevel@tonic-gate
9*7c478bd9Sstevel@tonic-gate #ifndef lint
10*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)mp_fopen.c 10.60 (Sleepycat) 1/1/99";
11*7c478bd9Sstevel@tonic-gate #endif /* not lint */
12*7c478bd9Sstevel@tonic-gate
13*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
14*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
15*7c478bd9Sstevel@tonic-gate
16*7c478bd9Sstevel@tonic-gate #include <errno.h>
17*7c478bd9Sstevel@tonic-gate #include <string.h>
18*7c478bd9Sstevel@tonic-gate #endif
19*7c478bd9Sstevel@tonic-gate
20*7c478bd9Sstevel@tonic-gate #include "db_int.h"
21*7c478bd9Sstevel@tonic-gate #include "shqueue.h"
22*7c478bd9Sstevel@tonic-gate #include "db_shash.h"
23*7c478bd9Sstevel@tonic-gate #include "mp.h"
24*7c478bd9Sstevel@tonic-gate #include "common_ext.h"
25*7c478bd9Sstevel@tonic-gate
26*7c478bd9Sstevel@tonic-gate static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
27*7c478bd9Sstevel@tonic-gate static int __memp_mf_open __P((DB_MPOOL *,
28*7c478bd9Sstevel@tonic-gate const char *, size_t, db_pgno_t, DB_MPOOL_FINFO *, MPOOLFILE **));
29*7c478bd9Sstevel@tonic-gate
30*7c478bd9Sstevel@tonic-gate /*
31*7c478bd9Sstevel@tonic-gate * memp_fopen --
32*7c478bd9Sstevel@tonic-gate * Open a backing file for the memory pool.
33*7c478bd9Sstevel@tonic-gate */
34*7c478bd9Sstevel@tonic-gate int
memp_fopen(dbmp,path,flags,mode,pagesize,finfop,retp)35*7c478bd9Sstevel@tonic-gate memp_fopen(dbmp, path, flags, mode, pagesize, finfop, retp)
36*7c478bd9Sstevel@tonic-gate DB_MPOOL *dbmp;
37*7c478bd9Sstevel@tonic-gate const char *path;
38*7c478bd9Sstevel@tonic-gate u_int32_t flags;
39*7c478bd9Sstevel@tonic-gate int mode;
40*7c478bd9Sstevel@tonic-gate size_t pagesize;
41*7c478bd9Sstevel@tonic-gate DB_MPOOL_FINFO *finfop;
42*7c478bd9Sstevel@tonic-gate DB_MPOOLFILE **retp;
43*7c478bd9Sstevel@tonic-gate {
44*7c478bd9Sstevel@tonic-gate int ret;
45*7c478bd9Sstevel@tonic-gate
46*7c478bd9Sstevel@tonic-gate MP_PANIC_CHECK(dbmp);
47*7c478bd9Sstevel@tonic-gate
48*7c478bd9Sstevel@tonic-gate /* Validate arguments. */
49*7c478bd9Sstevel@tonic-gate if ((ret = __db_fchk(dbmp->dbenv,
50*7c478bd9Sstevel@tonic-gate "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0)
51*7c478bd9Sstevel@tonic-gate return (ret);
52*7c478bd9Sstevel@tonic-gate
53*7c478bd9Sstevel@tonic-gate /* Require a non-zero pagesize. */
54*7c478bd9Sstevel@tonic-gate if (pagesize == 0) {
55*7c478bd9Sstevel@tonic-gate __db_err(dbmp->dbenv, "memp_fopen: pagesize not specified");
56*7c478bd9Sstevel@tonic-gate return (EINVAL);
57*7c478bd9Sstevel@tonic-gate }
58*7c478bd9Sstevel@tonic-gate if (finfop != NULL && finfop->clear_len > pagesize)
59*7c478bd9Sstevel@tonic-gate return (EINVAL);
60*7c478bd9Sstevel@tonic-gate
61*7c478bd9Sstevel@tonic-gate return (__memp_fopen(dbmp,
62*7c478bd9Sstevel@tonic-gate NULL, path, flags, mode, pagesize, 1, finfop, retp));
63*7c478bd9Sstevel@tonic-gate }
64*7c478bd9Sstevel@tonic-gate
65*7c478bd9Sstevel@tonic-gate /*
66*7c478bd9Sstevel@tonic-gate * __memp_fopen --
67*7c478bd9Sstevel@tonic-gate * Open a backing file for the memory pool; internal version.
68*7c478bd9Sstevel@tonic-gate *
69*7c478bd9Sstevel@tonic-gate * PUBLIC: int __memp_fopen __P((DB_MPOOL *, MPOOLFILE *, const char *,
70*7c478bd9Sstevel@tonic-gate * PUBLIC: u_int32_t, int, size_t, int, DB_MPOOL_FINFO *, DB_MPOOLFILE **));
71*7c478bd9Sstevel@tonic-gate */
72*7c478bd9Sstevel@tonic-gate int
__memp_fopen(dbmp,mfp,path,flags,mode,pagesize,needlock,finfop,retp)73*7c478bd9Sstevel@tonic-gate __memp_fopen(dbmp, mfp, path, flags, mode, pagesize, needlock, finfop, retp)
74*7c478bd9Sstevel@tonic-gate DB_MPOOL *dbmp;
75*7c478bd9Sstevel@tonic-gate MPOOLFILE *mfp;
76*7c478bd9Sstevel@tonic-gate const char *path;
77*7c478bd9Sstevel@tonic-gate u_int32_t flags;
78*7c478bd9Sstevel@tonic-gate int mode, needlock;
79*7c478bd9Sstevel@tonic-gate size_t pagesize;
80*7c478bd9Sstevel@tonic-gate DB_MPOOL_FINFO *finfop;
81*7c478bd9Sstevel@tonic-gate DB_MPOOLFILE **retp;
82*7c478bd9Sstevel@tonic-gate {
83*7c478bd9Sstevel@tonic-gate DB_ENV *dbenv;
84*7c478bd9Sstevel@tonic-gate DB_MPOOLFILE *dbmfp;
85*7c478bd9Sstevel@tonic-gate DB_MPOOL_FINFO finfo;
86*7c478bd9Sstevel@tonic-gate db_pgno_t last_pgno;
87*7c478bd9Sstevel@tonic-gate size_t maxmap;
88*7c478bd9Sstevel@tonic-gate u_int32_t mbytes, bytes;
89*7c478bd9Sstevel@tonic-gate int ret;
90*7c478bd9Sstevel@tonic-gate u_int8_t idbuf[DB_FILE_ID_LEN];
91*7c478bd9Sstevel@tonic-gate char *rpath;
92*7c478bd9Sstevel@tonic-gate
93*7c478bd9Sstevel@tonic-gate dbenv = dbmp->dbenv;
94*7c478bd9Sstevel@tonic-gate ret = 0;
95*7c478bd9Sstevel@tonic-gate rpath = NULL;
96*7c478bd9Sstevel@tonic-gate
97*7c478bd9Sstevel@tonic-gate /*
98*7c478bd9Sstevel@tonic-gate * If mfp is provided, we take the DB_MPOOL_FINFO information from
99*7c478bd9Sstevel@tonic-gate * the mfp. We don't bother initializing everything, because some
100*7c478bd9Sstevel@tonic-gate * of them are expensive to acquire. If no mfp is provided and the
101*7c478bd9Sstevel@tonic-gate * finfop argument is NULL, we default the values.
102*7c478bd9Sstevel@tonic-gate */
103*7c478bd9Sstevel@tonic-gate if (finfop == NULL) {
104*7c478bd9Sstevel@tonic-gate memset(&finfo, 0, sizeof(finfo));
105*7c478bd9Sstevel@tonic-gate if (mfp != NULL) {
106*7c478bd9Sstevel@tonic-gate finfo.ftype = mfp->ftype;
107*7c478bd9Sstevel@tonic-gate finfo.pgcookie = NULL;
108*7c478bd9Sstevel@tonic-gate finfo.fileid = NULL;
109*7c478bd9Sstevel@tonic-gate finfo.lsn_offset = mfp->lsn_off;
110*7c478bd9Sstevel@tonic-gate finfo.clear_len = mfp->clear_len;
111*7c478bd9Sstevel@tonic-gate } else {
112*7c478bd9Sstevel@tonic-gate finfo.ftype = 0;
113*7c478bd9Sstevel@tonic-gate finfo.pgcookie = NULL;
114*7c478bd9Sstevel@tonic-gate finfo.fileid = NULL;
115*7c478bd9Sstevel@tonic-gate finfo.lsn_offset = -1;
116*7c478bd9Sstevel@tonic-gate finfo.clear_len = 0;
117*7c478bd9Sstevel@tonic-gate }
118*7c478bd9Sstevel@tonic-gate finfop = &finfo;
119*7c478bd9Sstevel@tonic-gate }
120*7c478bd9Sstevel@tonic-gate
121*7c478bd9Sstevel@tonic-gate /* Allocate and initialize the per-process structure. */
122*7c478bd9Sstevel@tonic-gate if ((ret = __os_calloc(1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0)
123*7c478bd9Sstevel@tonic-gate return (ret);
124*7c478bd9Sstevel@tonic-gate dbmfp->dbmp = dbmp;
125*7c478bd9Sstevel@tonic-gate dbmfp->fd = -1;
126*7c478bd9Sstevel@tonic-gate dbmfp->ref = 1;
127*7c478bd9Sstevel@tonic-gate if (LF_ISSET(DB_RDONLY))
128*7c478bd9Sstevel@tonic-gate F_SET(dbmfp, MP_READONLY);
129*7c478bd9Sstevel@tonic-gate
130*7c478bd9Sstevel@tonic-gate if (path == NULL) {
131*7c478bd9Sstevel@tonic-gate if (LF_ISSET(DB_RDONLY)) {
132*7c478bd9Sstevel@tonic-gate __db_err(dbenv,
133*7c478bd9Sstevel@tonic-gate "memp_fopen: temporary files can't be readonly");
134*7c478bd9Sstevel@tonic-gate ret = EINVAL;
135*7c478bd9Sstevel@tonic-gate goto err;
136*7c478bd9Sstevel@tonic-gate }
137*7c478bd9Sstevel@tonic-gate last_pgno = 0;
138*7c478bd9Sstevel@tonic-gate } else {
139*7c478bd9Sstevel@tonic-gate /* Get the real name for this file and open it. */
140*7c478bd9Sstevel@tonic-gate if ((ret = __db_appname(dbenv,
141*7c478bd9Sstevel@tonic-gate DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0)
142*7c478bd9Sstevel@tonic-gate goto err;
143*7c478bd9Sstevel@tonic-gate if ((ret = __db_open(rpath,
144*7c478bd9Sstevel@tonic-gate LF_ISSET(DB_CREATE | DB_RDONLY),
145*7c478bd9Sstevel@tonic-gate DB_CREATE | DB_RDONLY, mode, &dbmfp->fd)) != 0) {
146*7c478bd9Sstevel@tonic-gate __db_err(dbenv, "%s: %s", rpath, strerror(ret));
147*7c478bd9Sstevel@tonic-gate goto err;
148*7c478bd9Sstevel@tonic-gate }
149*7c478bd9Sstevel@tonic-gate
150*7c478bd9Sstevel@tonic-gate /*
151*7c478bd9Sstevel@tonic-gate * Don't permit files that aren't a multiple of the pagesize,
152*7c478bd9Sstevel@tonic-gate * and find the number of the last page in the file, all the
153*7c478bd9Sstevel@tonic-gate * time being careful not to overflow 32 bits.
154*7c478bd9Sstevel@tonic-gate *
155*7c478bd9Sstevel@tonic-gate * !!!
156*7c478bd9Sstevel@tonic-gate * We can't use off_t's here, or in any code in the mainline
157*7c478bd9Sstevel@tonic-gate * library for that matter. (We have to use them in the os
158*7c478bd9Sstevel@tonic-gate * stubs, of course, as there are system calls that take them
159*7c478bd9Sstevel@tonic-gate * as arguments.) The reason is that some customers build in
160*7c478bd9Sstevel@tonic-gate * environments where an off_t is 32-bits, but still run where
161*7c478bd9Sstevel@tonic-gate * offsets are 64-bits, and they pay us a lot of money.
162*7c478bd9Sstevel@tonic-gate */
163*7c478bd9Sstevel@tonic-gate if ((ret = __os_ioinfo(rpath,
164*7c478bd9Sstevel@tonic-gate dbmfp->fd, &mbytes, &bytes, NULL)) != 0) {
165*7c478bd9Sstevel@tonic-gate __db_err(dbenv, "%s: %s", rpath, strerror(ret));
166*7c478bd9Sstevel@tonic-gate goto err;
167*7c478bd9Sstevel@tonic-gate }
168*7c478bd9Sstevel@tonic-gate
169*7c478bd9Sstevel@tonic-gate /* Page sizes have to be a power-of-two, ignore mbytes. */
170*7c478bd9Sstevel@tonic-gate if (bytes % pagesize != 0) {
171*7c478bd9Sstevel@tonic-gate __db_err(dbenv,
172*7c478bd9Sstevel@tonic-gate "%s: file size not a multiple of the pagesize",
173*7c478bd9Sstevel@tonic-gate rpath);
174*7c478bd9Sstevel@tonic-gate ret = EINVAL;
175*7c478bd9Sstevel@tonic-gate goto err;
176*7c478bd9Sstevel@tonic-gate }
177*7c478bd9Sstevel@tonic-gate
178*7c478bd9Sstevel@tonic-gate last_pgno = mbytes * (MEGABYTE / pagesize);
179*7c478bd9Sstevel@tonic-gate last_pgno += bytes / pagesize;
180*7c478bd9Sstevel@tonic-gate
181*7c478bd9Sstevel@tonic-gate /* Correction: page numbers are zero-based, not 1-based. */
182*7c478bd9Sstevel@tonic-gate if (last_pgno != 0)
183*7c478bd9Sstevel@tonic-gate --last_pgno;
184*7c478bd9Sstevel@tonic-gate
185*7c478bd9Sstevel@tonic-gate /*
186*7c478bd9Sstevel@tonic-gate * Get the file id if we weren't given one. Generated file id's
187*7c478bd9Sstevel@tonic-gate * don't use timestamps, otherwise there'd be no chance of any
188*7c478bd9Sstevel@tonic-gate * other process joining the party.
189*7c478bd9Sstevel@tonic-gate */
190*7c478bd9Sstevel@tonic-gate if (finfop->fileid == NULL) {
191*7c478bd9Sstevel@tonic-gate if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0)
192*7c478bd9Sstevel@tonic-gate goto err;
193*7c478bd9Sstevel@tonic-gate finfop->fileid = idbuf;
194*7c478bd9Sstevel@tonic-gate }
195*7c478bd9Sstevel@tonic-gate }
196*7c478bd9Sstevel@tonic-gate
197*7c478bd9Sstevel@tonic-gate /*
198*7c478bd9Sstevel@tonic-gate * If we weren't provided an underlying shared object to join with,
199*7c478bd9Sstevel@tonic-gate * find/allocate the shared file objects. Also allocate space for
200*7c478bd9Sstevel@tonic-gate * for the per-process thread lock.
201*7c478bd9Sstevel@tonic-gate */
202*7c478bd9Sstevel@tonic-gate if (needlock)
203*7c478bd9Sstevel@tonic-gate LOCKREGION(dbmp);
204*7c478bd9Sstevel@tonic-gate
205*7c478bd9Sstevel@tonic-gate if (mfp == NULL)
206*7c478bd9Sstevel@tonic-gate ret = __memp_mf_open(dbmp,
207*7c478bd9Sstevel@tonic-gate path, pagesize, last_pgno, finfop, &mfp);
208*7c478bd9Sstevel@tonic-gate else {
209*7c478bd9Sstevel@tonic-gate ++mfp->ref;
210*7c478bd9Sstevel@tonic-gate ret = 0;
211*7c478bd9Sstevel@tonic-gate }
212*7c478bd9Sstevel@tonic-gate if (ret == 0 &&
213*7c478bd9Sstevel@tonic-gate F_ISSET(dbmp, MP_LOCKHANDLE) && (ret =
214*7c478bd9Sstevel@tonic-gate __memp_alloc(dbmp, sizeof(db_mutex_t), NULL, &dbmfp->mutexp)) == 0)
215*7c478bd9Sstevel@tonic-gate LOCKINIT(dbmp, dbmfp->mutexp);
216*7c478bd9Sstevel@tonic-gate
217*7c478bd9Sstevel@tonic-gate if (needlock)
218*7c478bd9Sstevel@tonic-gate UNLOCKREGION(dbmp);
219*7c478bd9Sstevel@tonic-gate if (ret != 0)
220*7c478bd9Sstevel@tonic-gate goto err;
221*7c478bd9Sstevel@tonic-gate
222*7c478bd9Sstevel@tonic-gate dbmfp->mfp = mfp;
223*7c478bd9Sstevel@tonic-gate
224*7c478bd9Sstevel@tonic-gate /*
225*7c478bd9Sstevel@tonic-gate * If a file:
226*7c478bd9Sstevel@tonic-gate * + is read-only
227*7c478bd9Sstevel@tonic-gate * + isn't temporary
228*7c478bd9Sstevel@tonic-gate * + doesn't require any pgin/pgout support
229*7c478bd9Sstevel@tonic-gate * + the DB_NOMMAP flag wasn't set
230*7c478bd9Sstevel@tonic-gate * + and is less than mp_mmapsize bytes in size
231*7c478bd9Sstevel@tonic-gate *
232*7c478bd9Sstevel@tonic-gate * we can mmap it instead of reading/writing buffers. Don't do error
233*7c478bd9Sstevel@tonic-gate * checking based on the mmap call failure. We want to do normal I/O
234*7c478bd9Sstevel@tonic-gate * on the file if the reason we failed was because the file was on an
235*7c478bd9Sstevel@tonic-gate * NFS mounted partition, and we can fail in buffer I/O just as easily
236*7c478bd9Sstevel@tonic-gate * as here.
237*7c478bd9Sstevel@tonic-gate *
238*7c478bd9Sstevel@tonic-gate * XXX
239*7c478bd9Sstevel@tonic-gate * We'd like to test to see if the file is too big to mmap. Since we
240*7c478bd9Sstevel@tonic-gate * don't know what size or type off_t's or size_t's are, or the largest
241*7c478bd9Sstevel@tonic-gate * unsigned integral type is, or what random insanity the local C
242*7c478bd9Sstevel@tonic-gate * compiler will perpetrate, doing the comparison in a portable way is
243*7c478bd9Sstevel@tonic-gate * flatly impossible. Hope that mmap fails if the file is too large.
244*7c478bd9Sstevel@tonic-gate */
245*7c478bd9Sstevel@tonic-gate #define DB_MAXMMAPSIZE (10 * 1024 * 1024) /* 10 Mb. */
246*7c478bd9Sstevel@tonic-gate if (F_ISSET(mfp, MP_CAN_MMAP)) {
247*7c478bd9Sstevel@tonic-gate if (!F_ISSET(dbmfp, MP_READONLY))
248*7c478bd9Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP);
249*7c478bd9Sstevel@tonic-gate if (path == NULL)
250*7c478bd9Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP);
251*7c478bd9Sstevel@tonic-gate if (finfop->ftype != 0)
252*7c478bd9Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP);
253*7c478bd9Sstevel@tonic-gate if (LF_ISSET(DB_NOMMAP))
254*7c478bd9Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP);
255*7c478bd9Sstevel@tonic-gate maxmap = dbenv == NULL || dbenv->mp_mmapsize == 0 ?
256*7c478bd9Sstevel@tonic-gate DB_MAXMMAPSIZE : dbenv->mp_mmapsize;
257*7c478bd9Sstevel@tonic-gate if (mbytes > maxmap / MEGABYTE ||
258*7c478bd9Sstevel@tonic-gate (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE))
259*7c478bd9Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP);
260*7c478bd9Sstevel@tonic-gate }
261*7c478bd9Sstevel@tonic-gate dbmfp->addr = NULL;
262*7c478bd9Sstevel@tonic-gate if (F_ISSET(mfp, MP_CAN_MMAP)) {
263*7c478bd9Sstevel@tonic-gate dbmfp->len = (size_t)mbytes * MEGABYTE + bytes;
264*7c478bd9Sstevel@tonic-gate if (__db_mapfile(rpath,
265*7c478bd9Sstevel@tonic-gate dbmfp->fd, dbmfp->len, 1, &dbmfp->addr) != 0) {
266*7c478bd9Sstevel@tonic-gate dbmfp->addr = NULL;
267*7c478bd9Sstevel@tonic-gate F_CLR(mfp, MP_CAN_MMAP);
268*7c478bd9Sstevel@tonic-gate }
269*7c478bd9Sstevel@tonic-gate }
270*7c478bd9Sstevel@tonic-gate if (rpath != NULL)
271*7c478bd9Sstevel@tonic-gate __os_freestr(rpath);
272*7c478bd9Sstevel@tonic-gate
273*7c478bd9Sstevel@tonic-gate LOCKHANDLE(dbmp, dbmp->mutexp);
274*7c478bd9Sstevel@tonic-gate TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
275*7c478bd9Sstevel@tonic-gate UNLOCKHANDLE(dbmp, dbmp->mutexp);
276*7c478bd9Sstevel@tonic-gate
277*7c478bd9Sstevel@tonic-gate *retp = dbmfp;
278*7c478bd9Sstevel@tonic-gate return (0);
279*7c478bd9Sstevel@tonic-gate
280*7c478bd9Sstevel@tonic-gate err: /*
281*7c478bd9Sstevel@tonic-gate * Note that we do not have to free the thread mutex, because we
282*7c478bd9Sstevel@tonic-gate * never get to here after we have successfully allocated it.
283*7c478bd9Sstevel@tonic-gate */
284*7c478bd9Sstevel@tonic-gate if (rpath != NULL)
285*7c478bd9Sstevel@tonic-gate __os_freestr(rpath);
286*7c478bd9Sstevel@tonic-gate if (dbmfp->fd != -1)
287*7c478bd9Sstevel@tonic-gate (void)__os_close(dbmfp->fd);
288*7c478bd9Sstevel@tonic-gate if (dbmfp != NULL)
289*7c478bd9Sstevel@tonic-gate __os_free(dbmfp, sizeof(DB_MPOOLFILE));
290*7c478bd9Sstevel@tonic-gate return (ret);
291*7c478bd9Sstevel@tonic-gate }
292*7c478bd9Sstevel@tonic-gate
293*7c478bd9Sstevel@tonic-gate /*
294*7c478bd9Sstevel@tonic-gate * __memp_mf_open --
295*7c478bd9Sstevel@tonic-gate * Open an MPOOLFILE.
296*7c478bd9Sstevel@tonic-gate */
297*7c478bd9Sstevel@tonic-gate static int
__memp_mf_open(dbmp,path,pagesize,last_pgno,finfop,retp)298*7c478bd9Sstevel@tonic-gate __memp_mf_open(dbmp, path, pagesize, last_pgno, finfop, retp)
299*7c478bd9Sstevel@tonic-gate DB_MPOOL *dbmp;
300*7c478bd9Sstevel@tonic-gate const char *path;
301*7c478bd9Sstevel@tonic-gate size_t pagesize;
302*7c478bd9Sstevel@tonic-gate db_pgno_t last_pgno;
303*7c478bd9Sstevel@tonic-gate DB_MPOOL_FINFO *finfop;
304*7c478bd9Sstevel@tonic-gate MPOOLFILE **retp;
305*7c478bd9Sstevel@tonic-gate {
306*7c478bd9Sstevel@tonic-gate MPOOLFILE *mfp;
307*7c478bd9Sstevel@tonic-gate int ret;
308*7c478bd9Sstevel@tonic-gate void *p;
309*7c478bd9Sstevel@tonic-gate
310*7c478bd9Sstevel@tonic-gate #define ISTEMPORARY (path == NULL)
311*7c478bd9Sstevel@tonic-gate
312*7c478bd9Sstevel@tonic-gate /*
313*7c478bd9Sstevel@tonic-gate * Walk the list of MPOOLFILE's, looking for a matching file.
314*7c478bd9Sstevel@tonic-gate * Temporary files can't match previous files.
315*7c478bd9Sstevel@tonic-gate */
316*7c478bd9Sstevel@tonic-gate if (!ISTEMPORARY)
317*7c478bd9Sstevel@tonic-gate for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
318*7c478bd9Sstevel@tonic-gate mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
319*7c478bd9Sstevel@tonic-gate if (F_ISSET(mfp, MP_TEMP))
320*7c478bd9Sstevel@tonic-gate continue;
321*7c478bd9Sstevel@tonic-gate if (!memcmp(finfop->fileid,
322*7c478bd9Sstevel@tonic-gate R_ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
323*7c478bd9Sstevel@tonic-gate if (finfop->clear_len != mfp->clear_len ||
324*7c478bd9Sstevel@tonic-gate finfop->ftype != mfp->ftype ||
325*7c478bd9Sstevel@tonic-gate pagesize != mfp->stat.st_pagesize) {
326*7c478bd9Sstevel@tonic-gate __db_err(dbmp->dbenv,
327*7c478bd9Sstevel@tonic-gate "%s: ftype, clear length or pagesize changed",
328*7c478bd9Sstevel@tonic-gate path);
329*7c478bd9Sstevel@tonic-gate return (EINVAL);
330*7c478bd9Sstevel@tonic-gate }
331*7c478bd9Sstevel@tonic-gate
332*7c478bd9Sstevel@tonic-gate /* Found it: increment the reference count. */
333*7c478bd9Sstevel@tonic-gate ++mfp->ref;
334*7c478bd9Sstevel@tonic-gate *retp = mfp;
335*7c478bd9Sstevel@tonic-gate return (0);
336*7c478bd9Sstevel@tonic-gate }
337*7c478bd9Sstevel@tonic-gate }
338*7c478bd9Sstevel@tonic-gate
339*7c478bd9Sstevel@tonic-gate /* Allocate a new MPOOLFILE. */
340*7c478bd9Sstevel@tonic-gate if ((ret = __memp_alloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
341*7c478bd9Sstevel@tonic-gate return (ret);
342*7c478bd9Sstevel@tonic-gate *retp = mfp;
343*7c478bd9Sstevel@tonic-gate
344*7c478bd9Sstevel@tonic-gate /* Initialize the structure. */
345*7c478bd9Sstevel@tonic-gate memset(mfp, 0, sizeof(MPOOLFILE));
346*7c478bd9Sstevel@tonic-gate mfp->ref = 1;
347*7c478bd9Sstevel@tonic-gate mfp->ftype = finfop->ftype;
348*7c478bd9Sstevel@tonic-gate mfp->lsn_off = finfop->lsn_offset;
349*7c478bd9Sstevel@tonic-gate mfp->clear_len = finfop->clear_len;
350*7c478bd9Sstevel@tonic-gate
351*7c478bd9Sstevel@tonic-gate /*
352*7c478bd9Sstevel@tonic-gate * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget,
353*7c478bd9Sstevel@tonic-gate * we have to know the last page in the file. Figure it out and save
354*7c478bd9Sstevel@tonic-gate * it away.
355*7c478bd9Sstevel@tonic-gate */
356*7c478bd9Sstevel@tonic-gate mfp->stat.st_pagesize = pagesize;
357*7c478bd9Sstevel@tonic-gate mfp->orig_last_pgno = mfp->last_pgno = last_pgno;
358*7c478bd9Sstevel@tonic-gate
359*7c478bd9Sstevel@tonic-gate if (ISTEMPORARY)
360*7c478bd9Sstevel@tonic-gate F_SET(mfp, MP_TEMP);
361*7c478bd9Sstevel@tonic-gate else {
362*7c478bd9Sstevel@tonic-gate /* Copy the file path into shared memory. */
363*7c478bd9Sstevel@tonic-gate if ((ret = __memp_alloc(dbmp,
364*7c478bd9Sstevel@tonic-gate strlen(path) + 1, &mfp->path_off, &p)) != 0)
365*7c478bd9Sstevel@tonic-gate goto err;
366*7c478bd9Sstevel@tonic-gate memcpy(p, path, strlen(path) + 1);
367*7c478bd9Sstevel@tonic-gate
368*7c478bd9Sstevel@tonic-gate /* Copy the file identification string into shared memory. */
369*7c478bd9Sstevel@tonic-gate if ((ret = __memp_alloc(dbmp,
370*7c478bd9Sstevel@tonic-gate DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
371*7c478bd9Sstevel@tonic-gate goto err;
372*7c478bd9Sstevel@tonic-gate memcpy(p, finfop->fileid, DB_FILE_ID_LEN);
373*7c478bd9Sstevel@tonic-gate
374*7c478bd9Sstevel@tonic-gate F_SET(mfp, MP_CAN_MMAP);
375*7c478bd9Sstevel@tonic-gate }
376*7c478bd9Sstevel@tonic-gate
377*7c478bd9Sstevel@tonic-gate /* Copy the page cookie into shared memory. */
378*7c478bd9Sstevel@tonic-gate if (finfop->pgcookie == NULL || finfop->pgcookie->size == 0) {
379*7c478bd9Sstevel@tonic-gate mfp->pgcookie_len = 0;
380*7c478bd9Sstevel@tonic-gate mfp->pgcookie_off = 0;
381*7c478bd9Sstevel@tonic-gate } else {
382*7c478bd9Sstevel@tonic-gate if ((ret = __memp_alloc(dbmp,
383*7c478bd9Sstevel@tonic-gate finfop->pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
384*7c478bd9Sstevel@tonic-gate goto err;
385*7c478bd9Sstevel@tonic-gate memcpy(p, finfop->pgcookie->data, finfop->pgcookie->size);
386*7c478bd9Sstevel@tonic-gate mfp->pgcookie_len = finfop->pgcookie->size;
387*7c478bd9Sstevel@tonic-gate }
388*7c478bd9Sstevel@tonic-gate
389*7c478bd9Sstevel@tonic-gate /* Prepend the MPOOLFILE to the list of MPOOLFILE's. */
390*7c478bd9Sstevel@tonic-gate SH_TAILQ_INSERT_HEAD(&dbmp->mp->mpfq, mfp, q, __mpoolfile);
391*7c478bd9Sstevel@tonic-gate
392*7c478bd9Sstevel@tonic-gate if (0) {
393*7c478bd9Sstevel@tonic-gate err: if (mfp->path_off != 0)
394*7c478bd9Sstevel@tonic-gate __db_shalloc_free(dbmp->addr,
395*7c478bd9Sstevel@tonic-gate R_ADDR(dbmp, mfp->path_off));
396*7c478bd9Sstevel@tonic-gate if (mfp->fileid_off != 0)
397*7c478bd9Sstevel@tonic-gate __db_shalloc_free(dbmp->addr,
398*7c478bd9Sstevel@tonic-gate R_ADDR(dbmp, mfp->fileid_off));
399*7c478bd9Sstevel@tonic-gate if (mfp != NULL)
400*7c478bd9Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, mfp);
401*7c478bd9Sstevel@tonic-gate mfp = NULL;
402*7c478bd9Sstevel@tonic-gate }
403*7c478bd9Sstevel@tonic-gate return (0);
404*7c478bd9Sstevel@tonic-gate }
405*7c478bd9Sstevel@tonic-gate
406*7c478bd9Sstevel@tonic-gate /*
407*7c478bd9Sstevel@tonic-gate * memp_fclose --
408*7c478bd9Sstevel@tonic-gate * Close a backing file for the memory pool.
409*7c478bd9Sstevel@tonic-gate */
410*7c478bd9Sstevel@tonic-gate int
memp_fclose(dbmfp)411*7c478bd9Sstevel@tonic-gate memp_fclose(dbmfp)
412*7c478bd9Sstevel@tonic-gate DB_MPOOLFILE *dbmfp;
413*7c478bd9Sstevel@tonic-gate {
414*7c478bd9Sstevel@tonic-gate DB_MPOOL *dbmp;
415*7c478bd9Sstevel@tonic-gate int ret, t_ret;
416*7c478bd9Sstevel@tonic-gate
417*7c478bd9Sstevel@tonic-gate dbmp = dbmfp->dbmp;
418*7c478bd9Sstevel@tonic-gate ret = 0;
419*7c478bd9Sstevel@tonic-gate
420*7c478bd9Sstevel@tonic-gate MP_PANIC_CHECK(dbmp);
421*7c478bd9Sstevel@tonic-gate
422*7c478bd9Sstevel@tonic-gate for (;;) {
423*7c478bd9Sstevel@tonic-gate LOCKHANDLE(dbmp, dbmp->mutexp);
424*7c478bd9Sstevel@tonic-gate
425*7c478bd9Sstevel@tonic-gate /*
426*7c478bd9Sstevel@tonic-gate * We have to reference count DB_MPOOLFILE structures as other
427*7c478bd9Sstevel@tonic-gate * threads may be using them. The problem only happens if the
428*7c478bd9Sstevel@tonic-gate * application makes a bad design choice. Here's the path:
429*7c478bd9Sstevel@tonic-gate *
430*7c478bd9Sstevel@tonic-gate * Thread A opens a database.
431*7c478bd9Sstevel@tonic-gate * Thread B uses thread A's DB_MPOOLFILE to write a buffer
432*7c478bd9Sstevel@tonic-gate * in order to free up memory in the mpool cache.
433*7c478bd9Sstevel@tonic-gate * Thread A closes the database while thread B is using the
434*7c478bd9Sstevel@tonic-gate * DB_MPOOLFILE structure.
435*7c478bd9Sstevel@tonic-gate *
436*7c478bd9Sstevel@tonic-gate * By opening all databases before creating the threads, and
437*7c478bd9Sstevel@tonic-gate * closing them after the threads have exited, applications
438*7c478bd9Sstevel@tonic-gate * get better performance and avoid the problem path entirely.
439*7c478bd9Sstevel@tonic-gate *
440*7c478bd9Sstevel@tonic-gate * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer
441*7c478bd9Sstevel@tonic-gate * is a short-term lock, even in worst case, since we better be
442*7c478bd9Sstevel@tonic-gate * the only thread of control using the DB_MPOOLFILE structure
443*7c478bd9Sstevel@tonic-gate * to read pages *into* the cache. Wait until we're the only
444*7c478bd9Sstevel@tonic-gate * reference holder and remove the DB_MPOOLFILE structure from
445*7c478bd9Sstevel@tonic-gate * the list, so nobody else can even find it.
446*7c478bd9Sstevel@tonic-gate */
447*7c478bd9Sstevel@tonic-gate if (dbmfp->ref == 1) {
448*7c478bd9Sstevel@tonic-gate TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q);
449*7c478bd9Sstevel@tonic-gate break;
450*7c478bd9Sstevel@tonic-gate }
451*7c478bd9Sstevel@tonic-gate UNLOCKHANDLE(dbmp, dbmp->mutexp);
452*7c478bd9Sstevel@tonic-gate
453*7c478bd9Sstevel@tonic-gate (void)__os_sleep(1, 0);
454*7c478bd9Sstevel@tonic-gate }
455*7c478bd9Sstevel@tonic-gate UNLOCKHANDLE(dbmp, dbmp->mutexp);
456*7c478bd9Sstevel@tonic-gate
457*7c478bd9Sstevel@tonic-gate /* Complain if pinned blocks never returned. */
458*7c478bd9Sstevel@tonic-gate if (dbmfp->pinref != 0)
459*7c478bd9Sstevel@tonic-gate __db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned",
460*7c478bd9Sstevel@tonic-gate __memp_fn(dbmfp), (u_long)dbmfp->pinref);
461*7c478bd9Sstevel@tonic-gate
462*7c478bd9Sstevel@tonic-gate /* Close the underlying MPOOLFILE. */
463*7c478bd9Sstevel@tonic-gate (void)__memp_mf_close(dbmp, dbmfp);
464*7c478bd9Sstevel@tonic-gate
465*7c478bd9Sstevel@tonic-gate /* Discard any mmap information. */
466*7c478bd9Sstevel@tonic-gate if (dbmfp->addr != NULL &&
467*7c478bd9Sstevel@tonic-gate (ret = __db_unmapfile(dbmfp->addr, dbmfp->len)) != 0)
468*7c478bd9Sstevel@tonic-gate __db_err(dbmp->dbenv,
469*7c478bd9Sstevel@tonic-gate "%s: %s", __memp_fn(dbmfp), strerror(ret));
470*7c478bd9Sstevel@tonic-gate
471*7c478bd9Sstevel@tonic-gate /* Close the file; temporary files may not yet have been created. */
472*7c478bd9Sstevel@tonic-gate if (dbmfp->fd != -1 && (t_ret = __os_close(dbmfp->fd)) != 0) {
473*7c478bd9Sstevel@tonic-gate __db_err(dbmp->dbenv,
474*7c478bd9Sstevel@tonic-gate "%s: %s", __memp_fn(dbmfp), strerror(t_ret));
475*7c478bd9Sstevel@tonic-gate if (ret != 0)
476*7c478bd9Sstevel@tonic-gate t_ret = ret;
477*7c478bd9Sstevel@tonic-gate }
478*7c478bd9Sstevel@tonic-gate
479*7c478bd9Sstevel@tonic-gate /* Free memory. */
480*7c478bd9Sstevel@tonic-gate if (dbmfp->mutexp != NULL) {
481*7c478bd9Sstevel@tonic-gate LOCKREGION(dbmp);
482*7c478bd9Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, dbmfp->mutexp);
483*7c478bd9Sstevel@tonic-gate UNLOCKREGION(dbmp);
484*7c478bd9Sstevel@tonic-gate }
485*7c478bd9Sstevel@tonic-gate
486*7c478bd9Sstevel@tonic-gate /* Discard the DB_MPOOLFILE structure. */
487*7c478bd9Sstevel@tonic-gate __os_free(dbmfp, sizeof(DB_MPOOLFILE));
488*7c478bd9Sstevel@tonic-gate
489*7c478bd9Sstevel@tonic-gate return (ret);
490*7c478bd9Sstevel@tonic-gate }
491*7c478bd9Sstevel@tonic-gate
492*7c478bd9Sstevel@tonic-gate /*
493*7c478bd9Sstevel@tonic-gate * __memp_mf_close --
494*7c478bd9Sstevel@tonic-gate * Close down an MPOOLFILE.
495*7c478bd9Sstevel@tonic-gate */
496*7c478bd9Sstevel@tonic-gate static int
__memp_mf_close(dbmp,dbmfp)497*7c478bd9Sstevel@tonic-gate __memp_mf_close(dbmp, dbmfp)
498*7c478bd9Sstevel@tonic-gate DB_MPOOL *dbmp;
499*7c478bd9Sstevel@tonic-gate DB_MPOOLFILE *dbmfp;
500*7c478bd9Sstevel@tonic-gate {
501*7c478bd9Sstevel@tonic-gate BH *bhp, *nbhp;
502*7c478bd9Sstevel@tonic-gate MPOOL *mp;
503*7c478bd9Sstevel@tonic-gate MPOOLFILE *mfp;
504*7c478bd9Sstevel@tonic-gate size_t mf_offset;
505*7c478bd9Sstevel@tonic-gate
506*7c478bd9Sstevel@tonic-gate mp = dbmp->mp;
507*7c478bd9Sstevel@tonic-gate mfp = dbmfp->mfp;
508*7c478bd9Sstevel@tonic-gate
509*7c478bd9Sstevel@tonic-gate LOCKREGION(dbmp);
510*7c478bd9Sstevel@tonic-gate
511*7c478bd9Sstevel@tonic-gate /* If more than a single reference, simply decrement. */
512*7c478bd9Sstevel@tonic-gate if (mfp->ref > 1) {
513*7c478bd9Sstevel@tonic-gate --mfp->ref;
514*7c478bd9Sstevel@tonic-gate goto ret1;
515*7c478bd9Sstevel@tonic-gate }
516*7c478bd9Sstevel@tonic-gate
517*7c478bd9Sstevel@tonic-gate /*
518*7c478bd9Sstevel@tonic-gate * Move any BH's held by the file to the free list. We don't free the
519*7c478bd9Sstevel@tonic-gate * memory itself because we may be discarding the memory pool, and it's
520*7c478bd9Sstevel@tonic-gate * fairly expensive to reintegrate the buffers back into the region for
521*7c478bd9Sstevel@tonic-gate * no purpose.
522*7c478bd9Sstevel@tonic-gate */
523*7c478bd9Sstevel@tonic-gate mf_offset = R_OFFSET(dbmp, mfp);
524*7c478bd9Sstevel@tonic-gate for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
525*7c478bd9Sstevel@tonic-gate nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
526*7c478bd9Sstevel@tonic-gate
527*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_NO_DIRTY
528*7c478bd9Sstevel@tonic-gate /* Complain if we find any blocks that were left dirty. */
529*7c478bd9Sstevel@tonic-gate if (F_ISSET(bhp, BH_DIRTY))
530*7c478bd9Sstevel@tonic-gate __db_err(dbmp->dbenv,
531*7c478bd9Sstevel@tonic-gate "%s: close: pgno %lu left dirty; ref %lu",
532*7c478bd9Sstevel@tonic-gate __memp_fn(dbmfp),
533*7c478bd9Sstevel@tonic-gate (u_long)bhp->pgno, (u_long)bhp->ref);
534*7c478bd9Sstevel@tonic-gate #endif
535*7c478bd9Sstevel@tonic-gate
536*7c478bd9Sstevel@tonic-gate if (bhp->mf_offset == mf_offset) {
537*7c478bd9Sstevel@tonic-gate if (F_ISSET(bhp, BH_DIRTY)) {
538*7c478bd9Sstevel@tonic-gate ++mp->stat.st_page_clean;
539*7c478bd9Sstevel@tonic-gate --mp->stat.st_page_dirty;
540*7c478bd9Sstevel@tonic-gate }
541*7c478bd9Sstevel@tonic-gate __memp_bhfree(dbmp, mfp, bhp, 0);
542*7c478bd9Sstevel@tonic-gate SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh);
543*7c478bd9Sstevel@tonic-gate }
544*7c478bd9Sstevel@tonic-gate }
545*7c478bd9Sstevel@tonic-gate
546*7c478bd9Sstevel@tonic-gate /* Delete from the list of MPOOLFILEs. */
547*7c478bd9Sstevel@tonic-gate SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
548*7c478bd9Sstevel@tonic-gate
549*7c478bd9Sstevel@tonic-gate /* Free the space. */
550*7c478bd9Sstevel@tonic-gate if (mfp->path_off != 0)
551*7c478bd9Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->path_off));
552*7c478bd9Sstevel@tonic-gate if (mfp->fileid_off != 0)
553*7c478bd9Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->fileid_off));
554*7c478bd9Sstevel@tonic-gate if (mfp->pgcookie_off != 0)
555*7c478bd9Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, R_ADDR(dbmp, mfp->pgcookie_off));
556*7c478bd9Sstevel@tonic-gate __db_shalloc_free(dbmp->addr, mfp);
557*7c478bd9Sstevel@tonic-gate
558*7c478bd9Sstevel@tonic-gate ret1: UNLOCKREGION(dbmp);
559*7c478bd9Sstevel@tonic-gate return (0);
560*7c478bd9Sstevel@tonic-gate }
561