xref: /illumos-gate/usr/src/cmd/sendmail/db/log/log_put.c (revision 7c478bd9)
1*7c478bd9Sstevel@tonic-gate /*-
2*7c478bd9Sstevel@tonic-gate  * See the file LICENSE for redistribution information.
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * Copyright (c) 1996, 1997, 1998
5*7c478bd9Sstevel@tonic-gate  *	Sleepycat Software.  All rights reserved.
6*7c478bd9Sstevel@tonic-gate  */
7*7c478bd9Sstevel@tonic-gate #include "config.h"
8*7c478bd9Sstevel@tonic-gate 
9*7c478bd9Sstevel@tonic-gate #ifndef lint
10*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)log_put.c	10.44 (Sleepycat) 11/3/98";
11*7c478bd9Sstevel@tonic-gate #endif /* not lint */
12*7c478bd9Sstevel@tonic-gate 
13*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
14*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
15*7c478bd9Sstevel@tonic-gate 
16*7c478bd9Sstevel@tonic-gate #include <errno.h>
17*7c478bd9Sstevel@tonic-gate #include <stdio.h>
18*7c478bd9Sstevel@tonic-gate #include <string.h>
19*7c478bd9Sstevel@tonic-gate #include <time.h>
20*7c478bd9Sstevel@tonic-gate #include <unistd.h>
21*7c478bd9Sstevel@tonic-gate #endif
22*7c478bd9Sstevel@tonic-gate 
23*7c478bd9Sstevel@tonic-gate #include "db_int.h"
24*7c478bd9Sstevel@tonic-gate #include "shqueue.h"
25*7c478bd9Sstevel@tonic-gate #include "db_page.h"
26*7c478bd9Sstevel@tonic-gate #include "log.h"
27*7c478bd9Sstevel@tonic-gate #include "hash.h"
28*7c478bd9Sstevel@tonic-gate #include "clib_ext.h"
29*7c478bd9Sstevel@tonic-gate #include "common_ext.h"
30*7c478bd9Sstevel@tonic-gate 
31*7c478bd9Sstevel@tonic-gate static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t));
32*7c478bd9Sstevel@tonic-gate static int __log_flush __P((DB_LOG *, const DB_LSN *));
33*7c478bd9Sstevel@tonic-gate static int __log_newfd __P((DB_LOG *));
34*7c478bd9Sstevel@tonic-gate static int __log_putr __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
35*7c478bd9Sstevel@tonic-gate static int __log_write __P((DB_LOG *, void *, u_int32_t));
36*7c478bd9Sstevel@tonic-gate 
37*7c478bd9Sstevel@tonic-gate /*
38*7c478bd9Sstevel@tonic-gate  * log_put --
39*7c478bd9Sstevel@tonic-gate  *	Write a log record.
40*7c478bd9Sstevel@tonic-gate  */
41*7c478bd9Sstevel@tonic-gate int
log_put(dblp,lsn,dbt,flags)42*7c478bd9Sstevel@tonic-gate log_put(dblp, lsn, dbt, flags)
43*7c478bd9Sstevel@tonic-gate 	DB_LOG *dblp;
44*7c478bd9Sstevel@tonic-gate 	DB_LSN *lsn;
45*7c478bd9Sstevel@tonic-gate 	const DBT *dbt;
46*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
47*7c478bd9Sstevel@tonic-gate {
48*7c478bd9Sstevel@tonic-gate 	int ret;
49*7c478bd9Sstevel@tonic-gate 
50*7c478bd9Sstevel@tonic-gate 	LOG_PANIC_CHECK(dblp);
51*7c478bd9Sstevel@tonic-gate 
52*7c478bd9Sstevel@tonic-gate 	/* Validate arguments. */
53*7c478bd9Sstevel@tonic-gate 	if (flags != 0 && flags != DB_CHECKPOINT &&
54*7c478bd9Sstevel@tonic-gate 	    flags != DB_CURLSN && flags != DB_FLUSH)
55*7c478bd9Sstevel@tonic-gate 		return (__db_ferr(dblp->dbenv, "log_put", 0));
56*7c478bd9Sstevel@tonic-gate 
57*7c478bd9Sstevel@tonic-gate 	LOCK_LOGREGION(dblp);
58*7c478bd9Sstevel@tonic-gate 	ret = __log_put(dblp, lsn, dbt, flags);
59*7c478bd9Sstevel@tonic-gate 	UNLOCK_LOGREGION(dblp);
60*7c478bd9Sstevel@tonic-gate 	return (ret);
61*7c478bd9Sstevel@tonic-gate }
62*7c478bd9Sstevel@tonic-gate 
63*7c478bd9Sstevel@tonic-gate /*
64*7c478bd9Sstevel@tonic-gate  * __log_put --
65*7c478bd9Sstevel@tonic-gate  *	Write a log record; internal version.
66*7c478bd9Sstevel@tonic-gate  *
67*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
68*7c478bd9Sstevel@tonic-gate  */
69*7c478bd9Sstevel@tonic-gate int
__log_put(dblp,lsn,dbt,flags)70*7c478bd9Sstevel@tonic-gate __log_put(dblp, lsn, dbt, flags)
71*7c478bd9Sstevel@tonic-gate 	DB_LOG *dblp;
72*7c478bd9Sstevel@tonic-gate 	DB_LSN *lsn;
73*7c478bd9Sstevel@tonic-gate 	const DBT *dbt;
74*7c478bd9Sstevel@tonic-gate 	u_int32_t flags;
75*7c478bd9Sstevel@tonic-gate {
76*7c478bd9Sstevel@tonic-gate 	DBT fid_dbt, t;
77*7c478bd9Sstevel@tonic-gate 	DB_LSN r_unused;
78*7c478bd9Sstevel@tonic-gate 	FNAME *fnp;
79*7c478bd9Sstevel@tonic-gate 	LOG *lp;
80*7c478bd9Sstevel@tonic-gate 	u_int32_t lastoff;
81*7c478bd9Sstevel@tonic-gate 	int ret;
82*7c478bd9Sstevel@tonic-gate 
83*7c478bd9Sstevel@tonic-gate 	lp = dblp->lp;
84*7c478bd9Sstevel@tonic-gate 
85*7c478bd9Sstevel@tonic-gate 	/*
86*7c478bd9Sstevel@tonic-gate 	 * If the application just wants to know where we are, fill in
87*7c478bd9Sstevel@tonic-gate 	 * the information.  Currently used by the transaction manager
88*7c478bd9Sstevel@tonic-gate 	 * to avoid writing TXN_begin records.
89*7c478bd9Sstevel@tonic-gate 	 */
90*7c478bd9Sstevel@tonic-gate 	if (flags == DB_CURLSN) {
91*7c478bd9Sstevel@tonic-gate 		lsn->file = lp->lsn.file;
92*7c478bd9Sstevel@tonic-gate 		lsn->offset = lp->lsn.offset;
93*7c478bd9Sstevel@tonic-gate 		return (0);
94*7c478bd9Sstevel@tonic-gate 	}
95*7c478bd9Sstevel@tonic-gate 
96*7c478bd9Sstevel@tonic-gate 	/* If this information won't fit in the file, swap files. */
97*7c478bd9Sstevel@tonic-gate 	if (lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) {
98*7c478bd9Sstevel@tonic-gate 		if (sizeof(HDR) +
99*7c478bd9Sstevel@tonic-gate 		    sizeof(LOGP) + dbt->size > lp->persist.lg_max) {
100*7c478bd9Sstevel@tonic-gate 			__db_err(dblp->dbenv,
101*7c478bd9Sstevel@tonic-gate 			    "log_put: record larger than maximum file size");
102*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
103*7c478bd9Sstevel@tonic-gate 		}
104*7c478bd9Sstevel@tonic-gate 
105*7c478bd9Sstevel@tonic-gate 		/* Flush the log. */
106*7c478bd9Sstevel@tonic-gate 		if ((ret = __log_flush(dblp, NULL)) != 0)
107*7c478bd9Sstevel@tonic-gate 			return (ret);
108*7c478bd9Sstevel@tonic-gate 
109*7c478bd9Sstevel@tonic-gate 		/*
110*7c478bd9Sstevel@tonic-gate 		 * Save the last known offset from the previous file, we'll
111*7c478bd9Sstevel@tonic-gate 		 * need it to initialize the persistent header information.
112*7c478bd9Sstevel@tonic-gate 		 */
113*7c478bd9Sstevel@tonic-gate 		lastoff = lp->lsn.offset;
114*7c478bd9Sstevel@tonic-gate 
115*7c478bd9Sstevel@tonic-gate 		/* Point the current LSN to the new file. */
116*7c478bd9Sstevel@tonic-gate 		++lp->lsn.file;
117*7c478bd9Sstevel@tonic-gate 		lp->lsn.offset = 0;
118*7c478bd9Sstevel@tonic-gate 
119*7c478bd9Sstevel@tonic-gate 		/* Reset the file write offset. */
120*7c478bd9Sstevel@tonic-gate 		lp->w_off = 0;
121*7c478bd9Sstevel@tonic-gate 	} else
122*7c478bd9Sstevel@tonic-gate 		lastoff = 0;
123*7c478bd9Sstevel@tonic-gate 
124*7c478bd9Sstevel@tonic-gate 	/* Initialize the LSN information returned to the user. */
125*7c478bd9Sstevel@tonic-gate 	lsn->file = lp->lsn.file;
126*7c478bd9Sstevel@tonic-gate 	lsn->offset = lp->lsn.offset;
127*7c478bd9Sstevel@tonic-gate 
128*7c478bd9Sstevel@tonic-gate 	/*
129*7c478bd9Sstevel@tonic-gate 	 * Insert persistent information as the first record in every file.
130*7c478bd9Sstevel@tonic-gate 	 * Note that the previous length is wrong for the very first record
131*7c478bd9Sstevel@tonic-gate 	 * of the log, but that's okay, we check for it during retrieval.
132*7c478bd9Sstevel@tonic-gate 	 */
133*7c478bd9Sstevel@tonic-gate 	if (lp->lsn.offset == 0) {
134*7c478bd9Sstevel@tonic-gate 		t.data = &lp->persist;
135*7c478bd9Sstevel@tonic-gate 		t.size = sizeof(LOGP);
136*7c478bd9Sstevel@tonic-gate 		if ((ret = __log_putr(dblp, lsn,
137*7c478bd9Sstevel@tonic-gate 		    &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0)
138*7c478bd9Sstevel@tonic-gate 			return (ret);
139*7c478bd9Sstevel@tonic-gate 
140*7c478bd9Sstevel@tonic-gate 		/* Update the LSN information returned to the user. */
141*7c478bd9Sstevel@tonic-gate 		lsn->file = lp->lsn.file;
142*7c478bd9Sstevel@tonic-gate 		lsn->offset = lp->lsn.offset;
143*7c478bd9Sstevel@tonic-gate 	}
144*7c478bd9Sstevel@tonic-gate 
145*7c478bd9Sstevel@tonic-gate 	/* Write the application's log record. */
146*7c478bd9Sstevel@tonic-gate 	if ((ret = __log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len)) != 0)
147*7c478bd9Sstevel@tonic-gate 		return (ret);
148*7c478bd9Sstevel@tonic-gate 
149*7c478bd9Sstevel@tonic-gate 	/*
150*7c478bd9Sstevel@tonic-gate 	 * On a checkpoint, we:
151*7c478bd9Sstevel@tonic-gate 	 *	Put out the checkpoint record (above).
152*7c478bd9Sstevel@tonic-gate 	 *	Save the LSN of the checkpoint in the shared region.
153*7c478bd9Sstevel@tonic-gate 	 *	Append the set of file name information into the log.
154*7c478bd9Sstevel@tonic-gate 	 */
155*7c478bd9Sstevel@tonic-gate 	if (flags == DB_CHECKPOINT) {
156*7c478bd9Sstevel@tonic-gate 		lp->chkpt_lsn = *lsn;
157*7c478bd9Sstevel@tonic-gate 
158*7c478bd9Sstevel@tonic-gate 		for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
159*7c478bd9Sstevel@tonic-gate 		    fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
160*7c478bd9Sstevel@tonic-gate 			if (fnp->ref == 0)	/* Entry not in use. */
161*7c478bd9Sstevel@tonic-gate 				continue;
162*7c478bd9Sstevel@tonic-gate 			memset(&t, 0, sizeof(t));
163*7c478bd9Sstevel@tonic-gate 			t.data = R_ADDR(dblp, fnp->name_off);
164*7c478bd9Sstevel@tonic-gate 			t.size = strlen(t.data) + 1;
165*7c478bd9Sstevel@tonic-gate 			memset(&fid_dbt, 0, sizeof(fid_dbt));
166*7c478bd9Sstevel@tonic-gate 			fid_dbt.data = fnp->ufid;
167*7c478bd9Sstevel@tonic-gate 			fid_dbt.size = DB_FILE_ID_LEN;
168*7c478bd9Sstevel@tonic-gate 			if ((ret = __log_register_log(dblp, NULL, &r_unused, 0,
169*7c478bd9Sstevel@tonic-gate 			    LOG_CHECKPOINT, &t, &fid_dbt, fnp->id, fnp->s_type))
170*7c478bd9Sstevel@tonic-gate 			    != 0)
171*7c478bd9Sstevel@tonic-gate 				return (ret);
172*7c478bd9Sstevel@tonic-gate 		}
173*7c478bd9Sstevel@tonic-gate 	}
174*7c478bd9Sstevel@tonic-gate 
175*7c478bd9Sstevel@tonic-gate 	/*
176*7c478bd9Sstevel@tonic-gate 	 * On a checkpoint or when flush is requested, we:
177*7c478bd9Sstevel@tonic-gate 	 *	Flush the current buffer contents to disk.
178*7c478bd9Sstevel@tonic-gate 	 *	Sync the log to disk.
179*7c478bd9Sstevel@tonic-gate 	 */
180*7c478bd9Sstevel@tonic-gate 	if (flags == DB_FLUSH || flags == DB_CHECKPOINT)
181*7c478bd9Sstevel@tonic-gate 		if ((ret = __log_flush(dblp, NULL)) != 0)
182*7c478bd9Sstevel@tonic-gate 			return (ret);
183*7c478bd9Sstevel@tonic-gate 
184*7c478bd9Sstevel@tonic-gate 	/*
185*7c478bd9Sstevel@tonic-gate 	 * On a checkpoint, we:
186*7c478bd9Sstevel@tonic-gate 	 *	Save the time the checkpoint was written.
187*7c478bd9Sstevel@tonic-gate 	 *	Reset the bytes written since the last checkpoint.
188*7c478bd9Sstevel@tonic-gate 	 */
189*7c478bd9Sstevel@tonic-gate 	if (flags == DB_CHECKPOINT) {
190*7c478bd9Sstevel@tonic-gate 		(void)time(&lp->chkpt);
191*7c478bd9Sstevel@tonic-gate 		lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0;
192*7c478bd9Sstevel@tonic-gate 	}
193*7c478bd9Sstevel@tonic-gate 	return (0);
194*7c478bd9Sstevel@tonic-gate }
195*7c478bd9Sstevel@tonic-gate 
196*7c478bd9Sstevel@tonic-gate /*
197*7c478bd9Sstevel@tonic-gate  * __log_putr --
198*7c478bd9Sstevel@tonic-gate  *	Actually put a record into the log.
199*7c478bd9Sstevel@tonic-gate  */
200*7c478bd9Sstevel@tonic-gate static int
__log_putr(dblp,lsn,dbt,prev)201*7c478bd9Sstevel@tonic-gate __log_putr(dblp, lsn, dbt, prev)
202*7c478bd9Sstevel@tonic-gate 	DB_LOG *dblp;
203*7c478bd9Sstevel@tonic-gate 	DB_LSN *lsn;
204*7c478bd9Sstevel@tonic-gate 	const DBT *dbt;
205*7c478bd9Sstevel@tonic-gate 	u_int32_t prev;
206*7c478bd9Sstevel@tonic-gate {
207*7c478bd9Sstevel@tonic-gate 	HDR hdr;
208*7c478bd9Sstevel@tonic-gate 	LOG *lp;
209*7c478bd9Sstevel@tonic-gate 	int ret;
210*7c478bd9Sstevel@tonic-gate 
211*7c478bd9Sstevel@tonic-gate 	lp = dblp->lp;
212*7c478bd9Sstevel@tonic-gate 
213*7c478bd9Sstevel@tonic-gate 	/*
214*7c478bd9Sstevel@tonic-gate 	 * Initialize the header.  If we just switched files, lsn.offset will
215*7c478bd9Sstevel@tonic-gate 	 * be 0, and what we really want is the offset of the previous record
216*7c478bd9Sstevel@tonic-gate 	 * in the previous file.  Fortunately, prev holds the value we want.
217*7c478bd9Sstevel@tonic-gate 	 */
218*7c478bd9Sstevel@tonic-gate 	hdr.prev = prev;
219*7c478bd9Sstevel@tonic-gate 	hdr.len = sizeof(HDR) + dbt->size;
220*7c478bd9Sstevel@tonic-gate 	hdr.cksum = __ham_func4(dbt->data, dbt->size);
221*7c478bd9Sstevel@tonic-gate 
222*7c478bd9Sstevel@tonic-gate 	if ((ret = __log_fill(dblp, lsn, &hdr, sizeof(HDR))) != 0)
223*7c478bd9Sstevel@tonic-gate 		return (ret);
224*7c478bd9Sstevel@tonic-gate 	lp->len = sizeof(HDR);
225*7c478bd9Sstevel@tonic-gate 	lp->lsn.offset += sizeof(HDR);
226*7c478bd9Sstevel@tonic-gate 
227*7c478bd9Sstevel@tonic-gate 	if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0)
228*7c478bd9Sstevel@tonic-gate 		return (ret);
229*7c478bd9Sstevel@tonic-gate 	lp->len += dbt->size;
230*7c478bd9Sstevel@tonic-gate 	lp->lsn.offset += dbt->size;
231*7c478bd9Sstevel@tonic-gate 	return (0);
232*7c478bd9Sstevel@tonic-gate }
233*7c478bd9Sstevel@tonic-gate 
234*7c478bd9Sstevel@tonic-gate /*
235*7c478bd9Sstevel@tonic-gate  * log_flush --
236*7c478bd9Sstevel@tonic-gate  *	Write all records less than or equal to the specified LSN.
237*7c478bd9Sstevel@tonic-gate  */
238*7c478bd9Sstevel@tonic-gate int
log_flush(dblp,lsn)239*7c478bd9Sstevel@tonic-gate log_flush(dblp, lsn)
240*7c478bd9Sstevel@tonic-gate 	DB_LOG *dblp;
241*7c478bd9Sstevel@tonic-gate 	const DB_LSN *lsn;
242*7c478bd9Sstevel@tonic-gate {
243*7c478bd9Sstevel@tonic-gate 	int ret;
244*7c478bd9Sstevel@tonic-gate 
245*7c478bd9Sstevel@tonic-gate 	LOG_PANIC_CHECK(dblp);
246*7c478bd9Sstevel@tonic-gate 
247*7c478bd9Sstevel@tonic-gate 	LOCK_LOGREGION(dblp);
248*7c478bd9Sstevel@tonic-gate 	ret = __log_flush(dblp, lsn);
249*7c478bd9Sstevel@tonic-gate 	UNLOCK_LOGREGION(dblp);
250*7c478bd9Sstevel@tonic-gate 	return (ret);
251*7c478bd9Sstevel@tonic-gate }
252*7c478bd9Sstevel@tonic-gate 
253*7c478bd9Sstevel@tonic-gate /*
254*7c478bd9Sstevel@tonic-gate  * __log_flush --
255*7c478bd9Sstevel@tonic-gate  *	Write all records less than or equal to the specified LSN; internal
256*7c478bd9Sstevel@tonic-gate  *	version.
257*7c478bd9Sstevel@tonic-gate  */
258*7c478bd9Sstevel@tonic-gate static int
__log_flush(dblp,lsn)259*7c478bd9Sstevel@tonic-gate __log_flush(dblp, lsn)
260*7c478bd9Sstevel@tonic-gate 	DB_LOG *dblp;
261*7c478bd9Sstevel@tonic-gate 	const DB_LSN *lsn;
262*7c478bd9Sstevel@tonic-gate {
263*7c478bd9Sstevel@tonic-gate 	DB_LSN t_lsn;
264*7c478bd9Sstevel@tonic-gate 	LOG *lp;
265*7c478bd9Sstevel@tonic-gate 	int current, ret;
266*7c478bd9Sstevel@tonic-gate 
267*7c478bd9Sstevel@tonic-gate 	ret = 0;
268*7c478bd9Sstevel@tonic-gate 	lp = dblp->lp;
269*7c478bd9Sstevel@tonic-gate 
270*7c478bd9Sstevel@tonic-gate 	/*
271*7c478bd9Sstevel@tonic-gate 	 * If no LSN specified, flush the entire log by setting the flush LSN
272*7c478bd9Sstevel@tonic-gate 	 * to the last LSN written in the log.  Otherwise, check that the LSN
273*7c478bd9Sstevel@tonic-gate 	 * isn't a non-existent record for the log.
274*7c478bd9Sstevel@tonic-gate 	 */
275*7c478bd9Sstevel@tonic-gate 	if (lsn == NULL) {
276*7c478bd9Sstevel@tonic-gate 		t_lsn.file = lp->lsn.file;
277*7c478bd9Sstevel@tonic-gate 		t_lsn.offset = lp->lsn.offset - lp->len;
278*7c478bd9Sstevel@tonic-gate 		lsn = &t_lsn;
279*7c478bd9Sstevel@tonic-gate 	} else
280*7c478bd9Sstevel@tonic-gate 		if (lsn->file > lp->lsn.file ||
281*7c478bd9Sstevel@tonic-gate 		    (lsn->file == lp->lsn.file &&
282*7c478bd9Sstevel@tonic-gate 		    lsn->offset > lp->lsn.offset - lp->len)) {
283*7c478bd9Sstevel@tonic-gate 			__db_err(dblp->dbenv,
284*7c478bd9Sstevel@tonic-gate 			    "log_flush: LSN past current end-of-log");
285*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
286*7c478bd9Sstevel@tonic-gate 		}
287*7c478bd9Sstevel@tonic-gate 
288*7c478bd9Sstevel@tonic-gate 	/*
289*7c478bd9Sstevel@tonic-gate 	 * If the LSN is less than the last-sync'd LSN, we're done.  Note,
290*7c478bd9Sstevel@tonic-gate 	 * the last-sync LSN saved in s_lsn is the LSN of the first byte
291*7c478bd9Sstevel@tonic-gate 	 * we absolutely know has been written to disk, so the test is <=.
292*7c478bd9Sstevel@tonic-gate 	 */
293*7c478bd9Sstevel@tonic-gate 	if (lsn->file < lp->s_lsn.file ||
294*7c478bd9Sstevel@tonic-gate 	    (lsn->file == lp->s_lsn.file && lsn->offset <= lp->s_lsn.offset))
295*7c478bd9Sstevel@tonic-gate 		return (0);
296*7c478bd9Sstevel@tonic-gate 
297*7c478bd9Sstevel@tonic-gate 	/*
298*7c478bd9Sstevel@tonic-gate 	 * We may need to write the current buffer.  We have to write the
299*7c478bd9Sstevel@tonic-gate 	 * current buffer if the flush LSN is greater than or equal to the
300*7c478bd9Sstevel@tonic-gate 	 * buffer's starting LSN.
301*7c478bd9Sstevel@tonic-gate 	 */
302*7c478bd9Sstevel@tonic-gate 	current = 0;
303*7c478bd9Sstevel@tonic-gate 	if (lp->b_off != 0 && log_compare(lsn, &lp->f_lsn) >= 0) {
304*7c478bd9Sstevel@tonic-gate 		if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
305*7c478bd9Sstevel@tonic-gate 			return (ret);
306*7c478bd9Sstevel@tonic-gate 
307*7c478bd9Sstevel@tonic-gate 		lp->b_off = 0;
308*7c478bd9Sstevel@tonic-gate 		current = 1;
309*7c478bd9Sstevel@tonic-gate 	}
310*7c478bd9Sstevel@tonic-gate 
311*7c478bd9Sstevel@tonic-gate 	/*
312*7c478bd9Sstevel@tonic-gate 	 * It's possible that this thread may never have written to this log
313*7c478bd9Sstevel@tonic-gate 	 * file.  Acquire a file descriptor if we don't already have one.
314*7c478bd9Sstevel@tonic-gate 	 */
315*7c478bd9Sstevel@tonic-gate 	if (dblp->lfname != dblp->lp->lsn.file)
316*7c478bd9Sstevel@tonic-gate 		if ((ret = __log_newfd(dblp)) != 0)
317*7c478bd9Sstevel@tonic-gate 			return (ret);
318*7c478bd9Sstevel@tonic-gate 
319*7c478bd9Sstevel@tonic-gate 	/* Sync all writes to disk. */
320*7c478bd9Sstevel@tonic-gate 	if ((ret = __os_fsync(dblp->lfd)) != 0) {
321*7c478bd9Sstevel@tonic-gate 		__db_panic(dblp->dbenv, ret);
322*7c478bd9Sstevel@tonic-gate 		return (ret);
323*7c478bd9Sstevel@tonic-gate 	}
324*7c478bd9Sstevel@tonic-gate 	++lp->stat.st_scount;
325*7c478bd9Sstevel@tonic-gate 
326*7c478bd9Sstevel@tonic-gate 	/*
327*7c478bd9Sstevel@tonic-gate 	 * Set the last-synced LSN, using the LSN of the current buffer.  If
328*7c478bd9Sstevel@tonic-gate 	 * the current buffer was flushed, we know the LSN of the first byte
329*7c478bd9Sstevel@tonic-gate 	 * of the buffer is on disk, otherwise, we only know that the LSN of
330*7c478bd9Sstevel@tonic-gate 	 * the record before the one beginning the current buffer is on disk.
331*7c478bd9Sstevel@tonic-gate 	 *
332*7c478bd9Sstevel@tonic-gate 	 * XXX
333*7c478bd9Sstevel@tonic-gate 	 * Check to make sure that the saved lsn isn't 0 before we go making
334*7c478bd9Sstevel@tonic-gate 	 * this change.  If DB_CHECKPOINT was called before we actually wrote
335*7c478bd9Sstevel@tonic-gate 	 * something, you can end up here without ever having written anything
336*7c478bd9Sstevel@tonic-gate 	 * to a log file, and decrementing either s_lsn.file or s_lsn.offset
337*7c478bd9Sstevel@tonic-gate 	 * will cause much sadness later on.
338*7c478bd9Sstevel@tonic-gate 	 */
339*7c478bd9Sstevel@tonic-gate 	lp->s_lsn = lp->f_lsn;
340*7c478bd9Sstevel@tonic-gate 	if (!current && lp->s_lsn.file != 0)
341*7c478bd9Sstevel@tonic-gate 		if (lp->s_lsn.offset == 0) {
342*7c478bd9Sstevel@tonic-gate 			--lp->s_lsn.file;
343*7c478bd9Sstevel@tonic-gate 			lp->s_lsn.offset = lp->persist.lg_max;
344*7c478bd9Sstevel@tonic-gate 		} else
345*7c478bd9Sstevel@tonic-gate 			--lp->s_lsn.offset;
346*7c478bd9Sstevel@tonic-gate 
347*7c478bd9Sstevel@tonic-gate 	return (0);
348*7c478bd9Sstevel@tonic-gate }
349*7c478bd9Sstevel@tonic-gate 
350*7c478bd9Sstevel@tonic-gate /*
351*7c478bd9Sstevel@tonic-gate  * __log_fill --
352*7c478bd9Sstevel@tonic-gate  *	Write information into the log.
353*7c478bd9Sstevel@tonic-gate  */
354*7c478bd9Sstevel@tonic-gate static int
__log_fill(dblp,lsn,addr,len)355*7c478bd9Sstevel@tonic-gate __log_fill(dblp, lsn, addr, len)
356*7c478bd9Sstevel@tonic-gate 	DB_LOG *dblp;
357*7c478bd9Sstevel@tonic-gate 	DB_LSN *lsn;
358*7c478bd9Sstevel@tonic-gate 	void *addr;
359*7c478bd9Sstevel@tonic-gate 	u_int32_t len;
360*7c478bd9Sstevel@tonic-gate {
361*7c478bd9Sstevel@tonic-gate 	LOG *lp;
362*7c478bd9Sstevel@tonic-gate 	u_int32_t nrec;
363*7c478bd9Sstevel@tonic-gate 	size_t nw, remain;
364*7c478bd9Sstevel@tonic-gate 	int ret;
365*7c478bd9Sstevel@tonic-gate 
366*7c478bd9Sstevel@tonic-gate 	/* Copy out the data. */
367*7c478bd9Sstevel@tonic-gate 	for (lp = dblp->lp; len > 0;) {
368*7c478bd9Sstevel@tonic-gate 		/*
369*7c478bd9Sstevel@tonic-gate 		 * If we're beginning a new buffer, note the user LSN to which
370*7c478bd9Sstevel@tonic-gate 		 * the first byte of the buffer belongs.  We have to know this
371*7c478bd9Sstevel@tonic-gate 		 * when flushing the buffer so that we know if the in-memory
372*7c478bd9Sstevel@tonic-gate 		 * buffer needs to be flushed.
373*7c478bd9Sstevel@tonic-gate 		 */
374*7c478bd9Sstevel@tonic-gate 		if (lp->b_off == 0)
375*7c478bd9Sstevel@tonic-gate 			lp->f_lsn = *lsn;
376*7c478bd9Sstevel@tonic-gate 
377*7c478bd9Sstevel@tonic-gate 		/*
378*7c478bd9Sstevel@tonic-gate 		 * If we're on a buffer boundary and the data is big enough,
379*7c478bd9Sstevel@tonic-gate 		 * copy as many records as we can directly from the data.
380*7c478bd9Sstevel@tonic-gate 		 */
381*7c478bd9Sstevel@tonic-gate 		if (lp->b_off == 0 && len >= sizeof(lp->buf)) {
382*7c478bd9Sstevel@tonic-gate 			nrec = len / sizeof(lp->buf);
383*7c478bd9Sstevel@tonic-gate 			if ((ret = __log_write(dblp,
384*7c478bd9Sstevel@tonic-gate 			    addr, nrec * sizeof(lp->buf))) != 0)
385*7c478bd9Sstevel@tonic-gate 				return (ret);
386*7c478bd9Sstevel@tonic-gate 			addr = (u_int8_t *)addr + nrec * sizeof(lp->buf);
387*7c478bd9Sstevel@tonic-gate 			len -= nrec * sizeof(lp->buf);
388*7c478bd9Sstevel@tonic-gate 			continue;
389*7c478bd9Sstevel@tonic-gate 		}
390*7c478bd9Sstevel@tonic-gate 
391*7c478bd9Sstevel@tonic-gate 		/* Figure out how many bytes we can copy this time. */
392*7c478bd9Sstevel@tonic-gate 		remain = sizeof(lp->buf) - lp->b_off;
393*7c478bd9Sstevel@tonic-gate 		nw = remain > len ? len : remain;
394*7c478bd9Sstevel@tonic-gate 		memcpy(lp->buf + lp->b_off, addr, nw);
395*7c478bd9Sstevel@tonic-gate 		addr = (u_int8_t *)addr + nw;
396*7c478bd9Sstevel@tonic-gate 		len -= nw;
397*7c478bd9Sstevel@tonic-gate 		lp->b_off += nw;
398*7c478bd9Sstevel@tonic-gate 
399*7c478bd9Sstevel@tonic-gate 		/* If we fill the buffer, flush it. */
400*7c478bd9Sstevel@tonic-gate 		if (lp->b_off == sizeof(lp->buf)) {
401*7c478bd9Sstevel@tonic-gate 			if ((ret =
402*7c478bd9Sstevel@tonic-gate 			    __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0)
403*7c478bd9Sstevel@tonic-gate 				return (ret);
404*7c478bd9Sstevel@tonic-gate 			lp->b_off = 0;
405*7c478bd9Sstevel@tonic-gate 		}
406*7c478bd9Sstevel@tonic-gate 	}
407*7c478bd9Sstevel@tonic-gate 	return (0);
408*7c478bd9Sstevel@tonic-gate }
409*7c478bd9Sstevel@tonic-gate 
410*7c478bd9Sstevel@tonic-gate /*
411*7c478bd9Sstevel@tonic-gate  * __log_write --
412*7c478bd9Sstevel@tonic-gate  *	Write the log buffer to disk.
413*7c478bd9Sstevel@tonic-gate  */
414*7c478bd9Sstevel@tonic-gate static int
__log_write(dblp,addr,len)415*7c478bd9Sstevel@tonic-gate __log_write(dblp, addr, len)
416*7c478bd9Sstevel@tonic-gate 	DB_LOG *dblp;
417*7c478bd9Sstevel@tonic-gate 	void *addr;
418*7c478bd9Sstevel@tonic-gate 	u_int32_t len;
419*7c478bd9Sstevel@tonic-gate {
420*7c478bd9Sstevel@tonic-gate 	LOG *lp;
421*7c478bd9Sstevel@tonic-gate 	ssize_t nw;
422*7c478bd9Sstevel@tonic-gate 	int ret;
423*7c478bd9Sstevel@tonic-gate 
424*7c478bd9Sstevel@tonic-gate 	/*
425*7c478bd9Sstevel@tonic-gate 	 * If we haven't opened the log file yet or the current one
426*7c478bd9Sstevel@tonic-gate 	 * has changed, acquire a new log file.
427*7c478bd9Sstevel@tonic-gate 	 */
428*7c478bd9Sstevel@tonic-gate 	lp = dblp->lp;
429*7c478bd9Sstevel@tonic-gate 	if (dblp->lfd == -1 || dblp->lfname != lp->lsn.file)
430*7c478bd9Sstevel@tonic-gate 		if ((ret = __log_newfd(dblp)) != 0)
431*7c478bd9Sstevel@tonic-gate 			return (ret);
432*7c478bd9Sstevel@tonic-gate 
433*7c478bd9Sstevel@tonic-gate 	/*
434*7c478bd9Sstevel@tonic-gate 	 * Seek to the offset in the file (someone may have written it
435*7c478bd9Sstevel@tonic-gate 	 * since we last did).
436*7c478bd9Sstevel@tonic-gate 	 */
437*7c478bd9Sstevel@tonic-gate 	if ((ret = __os_seek(dblp->lfd, 0, 0, lp->w_off, 0, SEEK_SET)) != 0 ||
438*7c478bd9Sstevel@tonic-gate 	    (ret = __os_write(dblp->lfd, addr, len, &nw)) != 0) {
439*7c478bd9Sstevel@tonic-gate 		__db_panic(dblp->dbenv, ret);
440*7c478bd9Sstevel@tonic-gate 		return (ret);
441*7c478bd9Sstevel@tonic-gate 	}
442*7c478bd9Sstevel@tonic-gate 	if (nw != (int32_t)len)
443*7c478bd9Sstevel@tonic-gate 		return (EIO);
444*7c478bd9Sstevel@tonic-gate 
445*7c478bd9Sstevel@tonic-gate 	/* Reset the buffer offset and update the seek offset. */
446*7c478bd9Sstevel@tonic-gate 	lp->w_off += len;
447*7c478bd9Sstevel@tonic-gate 
448*7c478bd9Sstevel@tonic-gate 	/* Update written statistics. */
449*7c478bd9Sstevel@tonic-gate 	if ((lp->stat.st_w_bytes += len) >= MEGABYTE) {
450*7c478bd9Sstevel@tonic-gate 		lp->stat.st_w_bytes -= MEGABYTE;
451*7c478bd9Sstevel@tonic-gate 		++lp->stat.st_w_mbytes;
452*7c478bd9Sstevel@tonic-gate 	}
453*7c478bd9Sstevel@tonic-gate 	if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) {
454*7c478bd9Sstevel@tonic-gate 		lp->stat.st_wc_bytes -= MEGABYTE;
455*7c478bd9Sstevel@tonic-gate 		++lp->stat.st_wc_mbytes;
456*7c478bd9Sstevel@tonic-gate 	}
457*7c478bd9Sstevel@tonic-gate 	++lp->stat.st_wcount;
458*7c478bd9Sstevel@tonic-gate 
459*7c478bd9Sstevel@tonic-gate 	return (0);
460*7c478bd9Sstevel@tonic-gate }
461*7c478bd9Sstevel@tonic-gate 
462*7c478bd9Sstevel@tonic-gate /*
463*7c478bd9Sstevel@tonic-gate  * log_file --
464*7c478bd9Sstevel@tonic-gate  *	Map a DB_LSN to a file name.
465*7c478bd9Sstevel@tonic-gate  */
466*7c478bd9Sstevel@tonic-gate int
log_file(dblp,lsn,namep,len)467*7c478bd9Sstevel@tonic-gate log_file(dblp, lsn, namep, len)
468*7c478bd9Sstevel@tonic-gate 	DB_LOG *dblp;
469*7c478bd9Sstevel@tonic-gate 	const DB_LSN *lsn;
470*7c478bd9Sstevel@tonic-gate 	char *namep;
471*7c478bd9Sstevel@tonic-gate 	size_t len;
472*7c478bd9Sstevel@tonic-gate {
473*7c478bd9Sstevel@tonic-gate 	int ret;
474*7c478bd9Sstevel@tonic-gate 	char *name;
475*7c478bd9Sstevel@tonic-gate 
476*7c478bd9Sstevel@tonic-gate 	LOG_PANIC_CHECK(dblp);
477*7c478bd9Sstevel@tonic-gate 
478*7c478bd9Sstevel@tonic-gate 	LOCK_LOGREGION(dblp);
479*7c478bd9Sstevel@tonic-gate 	ret = __log_name(dblp, lsn->file, &name, NULL, 0);
480*7c478bd9Sstevel@tonic-gate 	UNLOCK_LOGREGION(dblp);
481*7c478bd9Sstevel@tonic-gate 	if (ret != 0)
482*7c478bd9Sstevel@tonic-gate 		return (ret);
483*7c478bd9Sstevel@tonic-gate 
484*7c478bd9Sstevel@tonic-gate 	/* Check to make sure there's enough room and copy the name. */
485*7c478bd9Sstevel@tonic-gate 	if (len < strlen(name) + 1) {
486*7c478bd9Sstevel@tonic-gate 		*namep = '\0';
487*7c478bd9Sstevel@tonic-gate 		return (ENOMEM);
488*7c478bd9Sstevel@tonic-gate 	}
489*7c478bd9Sstevel@tonic-gate 	(void)strcpy(namep, name);
490*7c478bd9Sstevel@tonic-gate 	__os_freestr(name);
491*7c478bd9Sstevel@tonic-gate 
492*7c478bd9Sstevel@tonic-gate 	return (0);
493*7c478bd9Sstevel@tonic-gate }
494*7c478bd9Sstevel@tonic-gate 
495*7c478bd9Sstevel@tonic-gate /*
496*7c478bd9Sstevel@tonic-gate  * __log_newfd --
497*7c478bd9Sstevel@tonic-gate  *	Acquire a file descriptor for the current log file.
498*7c478bd9Sstevel@tonic-gate  */
499*7c478bd9Sstevel@tonic-gate static int
__log_newfd(dblp)500*7c478bd9Sstevel@tonic-gate __log_newfd(dblp)
501*7c478bd9Sstevel@tonic-gate 	DB_LOG *dblp;
502*7c478bd9Sstevel@tonic-gate {
503*7c478bd9Sstevel@tonic-gate 	int ret;
504*7c478bd9Sstevel@tonic-gate 	char *name;
505*7c478bd9Sstevel@tonic-gate 
506*7c478bd9Sstevel@tonic-gate 	/* Close any previous file descriptor. */
507*7c478bd9Sstevel@tonic-gate 	if (dblp->lfd != -1) {
508*7c478bd9Sstevel@tonic-gate 		(void)__os_close(dblp->lfd);
509*7c478bd9Sstevel@tonic-gate 		dblp->lfd = -1;
510*7c478bd9Sstevel@tonic-gate 	}
511*7c478bd9Sstevel@tonic-gate 
512*7c478bd9Sstevel@tonic-gate 	/* Get the path of the new file and open it. */
513*7c478bd9Sstevel@tonic-gate 	dblp->lfname = dblp->lp->lsn.file;
514*7c478bd9Sstevel@tonic-gate 	if ((ret = __log_name(dblp,
515*7c478bd9Sstevel@tonic-gate 	    dblp->lfname, &name, &dblp->lfd, DB_CREATE | DB_SEQUENTIAL)) != 0)
516*7c478bd9Sstevel@tonic-gate 		__db_err(dblp->dbenv, "log_put: %s: %s", name, strerror(ret));
517*7c478bd9Sstevel@tonic-gate 
518*7c478bd9Sstevel@tonic-gate 	__os_freestr(name);
519*7c478bd9Sstevel@tonic-gate 	return (ret);
520*7c478bd9Sstevel@tonic-gate }
521*7c478bd9Sstevel@tonic-gate 
522*7c478bd9Sstevel@tonic-gate /*
523*7c478bd9Sstevel@tonic-gate  * __log_name --
524*7c478bd9Sstevel@tonic-gate  *	Return the log name for a particular file, and optionally open it.
525*7c478bd9Sstevel@tonic-gate  *
526*7c478bd9Sstevel@tonic-gate  * PUBLIC: int __log_name __P((DB_LOG *, u_int32_t, char **, int *, u_int32_t));
527*7c478bd9Sstevel@tonic-gate  */
528*7c478bd9Sstevel@tonic-gate int
__log_name(dblp,filenumber,namep,fdp,flags)529*7c478bd9Sstevel@tonic-gate __log_name(dblp, filenumber, namep, fdp, flags)
530*7c478bd9Sstevel@tonic-gate 	DB_LOG *dblp;
531*7c478bd9Sstevel@tonic-gate 	u_int32_t filenumber, flags;
532*7c478bd9Sstevel@tonic-gate 	char **namep;
533*7c478bd9Sstevel@tonic-gate 	int *fdp;
534*7c478bd9Sstevel@tonic-gate {
535*7c478bd9Sstevel@tonic-gate 	int ret;
536*7c478bd9Sstevel@tonic-gate 	char *oname;
537*7c478bd9Sstevel@tonic-gate 	char old[sizeof(LFPREFIX) + 5 + 20], new[sizeof(LFPREFIX) + 10 + 20];
538*7c478bd9Sstevel@tonic-gate 
539*7c478bd9Sstevel@tonic-gate 	/*
540*7c478bd9Sstevel@tonic-gate 	 * !!!
541*7c478bd9Sstevel@tonic-gate 	 * The semantics of this routine are bizarre.
542*7c478bd9Sstevel@tonic-gate 	 *
543*7c478bd9Sstevel@tonic-gate 	 * The reason for all of this is that we need a place where we can
544*7c478bd9Sstevel@tonic-gate 	 * intercept requests for log files, and, if appropriate, check for
545*7c478bd9Sstevel@tonic-gate 	 * both the old-style and new-style log file names.  The trick is
546*7c478bd9Sstevel@tonic-gate 	 * that all callers of this routine that are opening the log file
547*7c478bd9Sstevel@tonic-gate 	 * read-only want to use an old-style file name if they can't find
548*7c478bd9Sstevel@tonic-gate 	 * a match using a new-style name.  The only down-side is that some
549*7c478bd9Sstevel@tonic-gate 	 * callers may check for the old-style when they really don't need
550*7c478bd9Sstevel@tonic-gate 	 * to, but that shouldn't mess up anything, and we only check for
551*7c478bd9Sstevel@tonic-gate 	 * the old-style name when we've already failed to find a new-style
552*7c478bd9Sstevel@tonic-gate 	 * one.
553*7c478bd9Sstevel@tonic-gate 	 *
554*7c478bd9Sstevel@tonic-gate 	 * Create a new-style file name, and if we're not going to open the
555*7c478bd9Sstevel@tonic-gate 	 * file, return regardless.
556*7c478bd9Sstevel@tonic-gate 	 */
557*7c478bd9Sstevel@tonic-gate 	(void)snprintf(new, sizeof(new), LFNAME, filenumber);
558*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_appname(dblp->dbenv,
559*7c478bd9Sstevel@tonic-gate 	    DB_APP_LOG, dblp->dir, new, 0, NULL, namep)) != 0 || fdp == NULL)
560*7c478bd9Sstevel@tonic-gate 		return (ret);
561*7c478bd9Sstevel@tonic-gate 
562*7c478bd9Sstevel@tonic-gate 	/* Open the new-style file -- if we succeed, we're done. */
563*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_open(*namep,
564*7c478bd9Sstevel@tonic-gate 	    flags, flags, dblp->lp->persist.mode, fdp)) == 0)
565*7c478bd9Sstevel@tonic-gate 		return (0);
566*7c478bd9Sstevel@tonic-gate 
567*7c478bd9Sstevel@tonic-gate 	/*
568*7c478bd9Sstevel@tonic-gate 	 * The open failed... if the DB_RDONLY flag isn't set, we're done,
569*7c478bd9Sstevel@tonic-gate 	 * the caller isn't interested in old-style files.
570*7c478bd9Sstevel@tonic-gate 	 */
571*7c478bd9Sstevel@tonic-gate 	if (!LF_ISSET(DB_RDONLY))
572*7c478bd9Sstevel@tonic-gate 		return (ret);
573*7c478bd9Sstevel@tonic-gate 
574*7c478bd9Sstevel@tonic-gate 	/* Create an old-style file name. */
575*7c478bd9Sstevel@tonic-gate 	(void)snprintf(old, sizeof(old), LFNAME_V1, filenumber);
576*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_appname(dblp->dbenv,
577*7c478bd9Sstevel@tonic-gate 	    DB_APP_LOG, dblp->dir, old, 0, NULL, &oname)) != 0)
578*7c478bd9Sstevel@tonic-gate 		goto err;
579*7c478bd9Sstevel@tonic-gate 
580*7c478bd9Sstevel@tonic-gate 	/*
581*7c478bd9Sstevel@tonic-gate 	 * Open the old-style file -- if we succeed, we're done.  Free the
582*7c478bd9Sstevel@tonic-gate 	 * space allocated for the new-style name and return the old-style
583*7c478bd9Sstevel@tonic-gate 	 * name to the caller.
584*7c478bd9Sstevel@tonic-gate 	 */
585*7c478bd9Sstevel@tonic-gate 	if ((ret = __db_open(oname,
586*7c478bd9Sstevel@tonic-gate 	    flags, flags, dblp->lp->persist.mode, fdp)) == 0) {
587*7c478bd9Sstevel@tonic-gate 		__os_freestr(*namep);
588*7c478bd9Sstevel@tonic-gate 		*namep = oname;
589*7c478bd9Sstevel@tonic-gate 		return (0);
590*7c478bd9Sstevel@tonic-gate 	}
591*7c478bd9Sstevel@tonic-gate 
592*7c478bd9Sstevel@tonic-gate 	/*
593*7c478bd9Sstevel@tonic-gate 	 * Couldn't find either style of name -- return the new-style name
594*7c478bd9Sstevel@tonic-gate 	 * for the caller's error message.  If it's an old-style name that's
595*7c478bd9Sstevel@tonic-gate 	 * actually missing we're going to confuse the user with the error
596*7c478bd9Sstevel@tonic-gate 	 * message, but that implies that not only were we looking for an
597*7c478bd9Sstevel@tonic-gate 	 * old-style name, but we expected it to exist and we weren't just
598*7c478bd9Sstevel@tonic-gate 	 * looking for any log file.  That's not a likely error.
599*7c478bd9Sstevel@tonic-gate 	 */
600*7c478bd9Sstevel@tonic-gate err:	__os_freestr(oname);
601*7c478bd9Sstevel@tonic-gate 	return (ret);
602*7c478bd9Sstevel@tonic-gate }
603