1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998
5 *	Sleepycat Software.  All rights reserved.
6 */
7#include "config.h"
8
9#ifndef lint
10static const char sccsid[] = "@(#)log_get.c	10.38 (Sleepycat) 10/3/98";
11#endif /* not lint */
12
13#ifndef NO_SYSTEM_INCLUDES
14#include <sys/types.h>
15
16#include <errno.h>
17#include <string.h>
18#include <unistd.h>
19#endif
20
21#include "db_int.h"
22#include "shqueue.h"
23#include "db_page.h"
24#include "log.h"
25#include "hash.h"
26#include "common_ext.h"
27
28/*
29 * log_get --
30 *	Get a log record.
31 */
32int
33log_get(dblp, alsn, dbt, flags)
34	DB_LOG *dblp;
35	DB_LSN *alsn;
36	DBT *dbt;
37	u_int32_t flags;
38{
39	int ret;
40
41	LOG_PANIC_CHECK(dblp);
42
43	/* Validate arguments. */
44	if (flags != DB_CHECKPOINT && flags != DB_CURRENT &&
45	    flags != DB_FIRST && flags != DB_LAST &&
46	    flags != DB_NEXT && flags != DB_PREV && flags != DB_SET)
47		return (__db_ferr(dblp->dbenv, "log_get", 1));
48
49	if (F_ISSET(dblp, DB_AM_THREAD)) {
50		if (flags == DB_NEXT || flags == DB_PREV || flags == DB_CURRENT)
51			return (__db_ferr(dblp->dbenv, "log_get", 1));
52		if (!F_ISSET(dbt, DB_DBT_USERMEM | DB_DBT_MALLOC))
53			return (__db_ferr(dblp->dbenv, "threaded data", 1));
54	}
55
56	LOCK_LOGREGION(dblp);
57
58	/*
59	 * If we get one of the log's header records, repeat the operation.
60	 * This assumes that applications don't ever request the log header
61	 * records by LSN, but that seems reasonable to me.
62	 */
63	ret = __log_get(dblp, alsn, dbt, flags, 0);
64	if (ret == 0 && alsn->offset == 0) {
65		switch (flags) {
66		case DB_FIRST:
67			flags = DB_NEXT;
68			break;
69		case DB_LAST:
70			flags = DB_PREV;
71			break;
72		}
73		ret = __log_get(dblp, alsn, dbt, flags, 0);
74	}
75
76	UNLOCK_LOGREGION(dblp);
77
78	return (ret);
79}
80
81/*
82 * __log_get --
83 *	Get a log record; internal version.
84 *
85 * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int));
86 */
87int
88__log_get(dblp, alsn, dbt, flags, silent)
89	DB_LOG *dblp;
90	DB_LSN *alsn;
91	DBT *dbt;
92	u_int32_t flags;
93	int silent;
94{
95	DB_LSN nlsn;
96	HDR hdr;
97	LOG *lp;
98	size_t len;
99	ssize_t nr;
100	int cnt, ret;
101	char *np, *tbuf;
102	const char *fail;
103	void *p, *shortp;
104
105	lp = dblp->lp;
106	fail = np = tbuf = NULL;
107
108	nlsn = dblp->c_lsn;
109	switch (flags) {
110	case DB_CHECKPOINT:
111		nlsn = lp->chkpt_lsn;
112		if (IS_ZERO_LSN(nlsn)) {
113			__db_err(dblp->dbenv,
114	"log_get: unable to find checkpoint record: no checkpoint set.");
115			ret = ENOENT;
116			goto err2;
117		}
118		break;
119	case DB_NEXT:				/* Next log record. */
120		if (!IS_ZERO_LSN(nlsn)) {
121			/* Increment the cursor by the cursor record size. */
122			nlsn.offset += dblp->c_len;
123			break;
124		}
125		/* FALLTHROUGH */
126	case DB_FIRST:				/* Find the first log record. */
127		/* Find the first log file. */
128		if ((ret = __log_find(dblp, 1, &cnt)) != 0)
129			goto err2;
130
131		/*
132		 * We may have only entered records in the buffer, and not
133		 * yet written a log file.  If no log files were found and
134		 * there's anything in the buffer, it belongs to file 1.
135		 */
136		if (cnt == 0)
137			cnt = 1;
138
139		nlsn.file = cnt;
140		nlsn.offset = 0;
141		break;
142	case DB_CURRENT:			/* Current log record. */
143		break;
144	case DB_PREV:				/* Previous log record. */
145		if (!IS_ZERO_LSN(nlsn)) {
146			/* If at start-of-file, move to the previous file. */
147			if (nlsn.offset == 0) {
148				if (nlsn.file == 1 ||
149				    __log_valid(dblp, nlsn.file - 1, 0) != 0)
150					return (DB_NOTFOUND);
151
152				--nlsn.file;
153				nlsn.offset = dblp->c_off;
154			} else
155				nlsn.offset = dblp->c_off;
156			break;
157		}
158		/* FALLTHROUGH */
159	case DB_LAST:				/* Last log record. */
160		nlsn.file = lp->lsn.file;
161		nlsn.offset = lp->lsn.offset - lp->len;
162		break;
163	case DB_SET:				/* Set log record. */
164		nlsn = *alsn;
165		break;
166	}
167
168retry:
169	/* Return 1 if the request is past end-of-file. */
170	if (nlsn.file > lp->lsn.file ||
171	    (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset))
172		return (DB_NOTFOUND);
173
174	/* If we've switched files, discard the current fd. */
175	if (dblp->c_lsn.file != nlsn.file && dblp->c_fd != -1) {
176		(void)__os_close(dblp->c_fd);
177		dblp->c_fd = -1;
178	}
179
180	/* If the entire record is in the in-memory buffer, copy it out. */
181	if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) {
182		/* Copy the header. */
183		p = lp->buf + (nlsn.offset - lp->w_off);
184		memcpy(&hdr, p, sizeof(HDR));
185
186		/* Copy the record. */
187		len = hdr.len - sizeof(HDR);
188		if ((ret = __db_retcopy(dbt, (u_int8_t *)p + sizeof(HDR),
189		    len, &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
190			goto err1;
191		goto cksum;
192	}
193
194	/* Acquire a file descriptor. */
195	if (dblp->c_fd == -1) {
196		if ((ret = __log_name(dblp, nlsn.file,
197		    &np, &dblp->c_fd, DB_RDONLY | DB_SEQUENTIAL)) != 0) {
198			fail = np;
199			goto err1;
200		}
201		__os_freestr(np);
202		np = NULL;
203	}
204
205	/* Seek to the header offset and read the header. */
206	if ((ret =
207	    __os_seek(dblp->c_fd, 0, 0, nlsn.offset, 0, SEEK_SET)) != 0) {
208		fail = "seek";
209		goto err1;
210	}
211	if ((ret = __os_read(dblp->c_fd, &hdr, sizeof(HDR), &nr)) != 0) {
212		fail = "read";
213		goto err1;
214	}
215	if (nr == sizeof(HDR))
216		shortp = NULL;
217	else {
218		/* If read returns EOF, try the next file. */
219		if (nr == 0) {
220			if (flags != DB_NEXT || nlsn.file == lp->lsn.file)
221				goto corrupt;
222
223			/* Move to the next file. */
224			++nlsn.file;
225			nlsn.offset = 0;
226			goto retry;
227		}
228
229		/*
230		 * If read returns a short count the rest of the record has
231		 * to be in the in-memory buffer.
232		 */
233		if (lp->b_off < sizeof(HDR) - nr)
234			goto corrupt;
235
236		/* Get the rest of the header from the in-memory buffer. */
237		memcpy((u_int8_t *)&hdr + nr, lp->buf, sizeof(HDR) - nr);
238		shortp = lp->buf + (sizeof(HDR) - nr);
239	}
240
241	/*
242	 * Check for buffers of 0's, that's what we usually see during
243	 * recovery, although it's certainly not something on which we
244	 * can depend.
245	 */
246	if (hdr.len <= sizeof(HDR))
247		goto corrupt;
248	len = hdr.len - sizeof(HDR);
249
250	/* If we've already moved to the in-memory buffer, fill from there. */
251	if (shortp != NULL) {
252		if (lp->b_off < ((u_int8_t *)shortp - lp->buf) + len)
253			goto corrupt;
254		if ((ret = __db_retcopy(dbt, shortp, len,
255		    &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
256			goto err1;
257		goto cksum;
258	}
259
260	/*
261	 * Allocate temporary memory to hold the record.
262	 *
263	 * XXX
264	 * We're calling malloc(3) with a region locked.  This isn't
265	 * a good idea.
266	 */
267	if ((ret = __os_malloc(len, NULL, &tbuf)) != 0)
268		goto err1;
269
270	/*
271	 * Read the record into the buffer.  If read returns a short count,
272	 * there was an error or the rest of the record is in the in-memory
273	 * buffer.  Note, the information may be garbage if we're in recovery,
274	 * so don't read past the end of the buffer's memory.
275	 */
276	if ((ret = __os_read(dblp->c_fd, tbuf, len, &nr)) != 0) {
277		fail = "read";
278		goto err1;
279	}
280	if (len - nr > sizeof(lp->buf))
281		goto corrupt;
282	if (nr != (ssize_t)len) {
283		if (lp->b_off < len - nr)
284			goto corrupt;
285
286		/* Get the rest of the record from the in-memory buffer. */
287		memcpy((u_int8_t *)tbuf + nr, lp->buf, len - nr);
288	}
289
290	/* Copy the record into the user's DBT. */
291	if ((ret = __db_retcopy(dbt, tbuf, len,
292	    &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
293		goto err1;
294	__os_free(tbuf, 0);
295	tbuf = NULL;
296
297cksum:	if (hdr.cksum != __ham_func4(dbt->data, dbt->size)) {
298		if (!silent)
299			__db_err(dblp->dbenv, "log_get: checksum mismatch");
300		goto corrupt;
301	}
302
303	/* Update the cursor and the return lsn. */
304	dblp->c_off = hdr.prev;
305	dblp->c_len = hdr.len;
306	dblp->c_lsn = *alsn = nlsn;
307
308	return (0);
309
310corrupt:/*
311	 * This is the catchall -- for some reason we didn't find enough
312	 * information or it wasn't reasonable information, and it wasn't
313	 * because a system call failed.
314	 */
315	ret = EIO;
316	fail = "read";
317
318err1:	if (!silent)
319		if (fail == NULL)
320			__db_err(dblp->dbenv, "log_get: %s", strerror(ret));
321		else
322			__db_err(dblp->dbenv,
323			    "log_get: %s: %s", fail, strerror(ret));
324err2:	if (np != NULL)
325		__os_freestr(np);
326	if (tbuf != NULL)
327		__os_free(tbuf, 0);
328	return (ret);
329}
330