1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * nfs log - read buffer file and return structs in usable form
31 */
32
33#include <ctype.h>
34#include <stdio.h>
35#include <stdlib.h>
36#include <stddef.h>
37#include <string.h>
38#include <fcntl.h>
39#include <unistd.h>
40#include <signal.h>
41#include <sys/types.h>
42#include <sys/param.h>
43#include <sys/stat.h>
44#include <sys/utsname.h>
45#include <sys/mman.h>
46#include <strings.h>
47#include <errno.h>
48#include <syslog.h>
49#include <time.h>
50#include <limits.h>
51#include <libintl.h>
52#include <values.h>
53#include <search.h>
54#include <pwd.h>
55#include <netdb.h>
56#include <rpc/rpc.h>
57#include <netconfig.h>
58#include <netdir.h>
59#include <nfs/nfs_sec.h>
60#include <nfs/export.h>
61#include <rpc/auth.h>
62#include <rpc/svc.h>
63#include <rpc/xdr.h>
64#include <rpc/clnt.h>
65#include <nfs/nfs.h>
66#include <nfs/nfs_log.h>
67#include "nfslogd.h"
68
69#define	MAX_LRS_READ_AHEAD 2048
70#define	MAX_RECS_TO_DELAY 32768
71
72static int 		nfslog_init_buf(char *, struct nfslog_buf *, int *);
73static void		nfslog_free_buf(struct nfslog_buf *, int);
74static struct nfslog_lr *nfslog_read_buffer(struct nfslog_buf *);
75static void		free_lrp(struct nfslog_lr *);
76static struct nfslog_lr *remove_lrp_from_lb(struct nfslog_buf *,
77			struct nfslog_lr *);
78static void		insert_lrp_to_lb(struct nfslog_buf *,
79			struct nfslog_lr *);
80static void		nfslog_rewrite_bufheader(struct nfslog_buf *);
81
82/*
83 * Treat the provided path name as an NFS log buffer file.
84 * Allocate a data structure for its handling and initialize it.
85 * *error contains the previous error condition encountered for
86 * this object. This value can be used to avoid printing the last
87 * error endlessly.
88 * It will set *error appropriately after processing.
89 */
90struct nfslog_buf *
91nfslog_open_buf(char *bufpath, int *error)
92{
93	struct nfslog_buf	*lbp = NULL;
94
95	if (bufpath == NULL) {
96		*error = EINVAL;
97		return (NULL);
98	}
99
100	if ((lbp = malloc(sizeof (struct nfslog_buf))) == NULL) {
101		*error = ENOMEM;
102		return (NULL);
103	}
104	bzero(lbp, sizeof (struct nfslog_buf));
105
106	if (nfslog_init_buf(bufpath, lbp, error)) {
107		free(lbp);
108		return (NULL);
109	}
110	return (lbp);
111}
112
113/*
114 * Free the log buffer struct with all of its baggage and free the data struct
115 */
116void
117nfslog_close_buf(struct nfslog_buf *lbp, int close_quick)
118{
119	nfslog_free_buf(lbp, close_quick);
120	free(lbp);
121}
122
123/*
124 * Set up the log buffer struct; simple things are opening and locking
125 * the buffer file and then on to mmap()ing it for later use by the
126 * XDR decode path.  Make sure to read the buffer header before
127 * returning so that we will be at the first true log record.
128 *
129 * *error contains the last error encountered on this object. It can
130 * be used to avoid reporting the same error endlessly. It is reset
131 * to the current error code on return.
132 */
133static int
134nfslog_init_buf(char *bufpath, struct nfslog_buf *lbp, int *error)
135{
136	struct stat sb;
137	int preverror = *error;
138
139	lbp->next = lbp;
140	lbp->prev = lbp;
141	/*
142	 * set these values so that the free routine will know what to do
143	 */
144	lbp->mmap_addr = (intptr_t)MAP_FAILED;
145	lbp->last_rec_id = MAXINT - 1;
146	lbp->bh.bh_length = 0;
147	lbp->bh_lrp = NULL;
148	lbp->num_lrps = 0;
149	lbp->lrps = NULL;
150	lbp->last_record_offset = 0;
151	lbp->prp = NULL;
152	lbp->num_pr_queued = 0;
153
154	lbp->bufpath = strdup(bufpath);
155	if (lbp->bufpath == NULL) {
156		*error = ENOMEM;
157		if (preverror != *error) {
158			syslog(LOG_ERR, gettext("Cannot strdup '%s': %s"),
159				bufpath, strerror(*error));
160		}
161		nfslog_free_buf(lbp, FALSE);
162		return (*error);
163	}
164
165	if ((lbp->fd = open(bufpath, O_RDWR)) < 0) {
166		*error = errno;
167		if (preverror != *error) {
168			syslog(LOG_ERR, gettext("Cannot open '%s': %s"),
169				bufpath, strerror(*error));
170		}
171		nfslog_free_buf(lbp, FALSE);
172		return (*error);
173	}
174
175	/*
176	 * Lock the entire buffer file to prevent conflicting access.
177	 * We get a write lock because we want only 1 process to be
178	 * generating records from it.
179	 */
180	lbp->fl.l_type = F_WRLCK;
181	lbp->fl.l_whence = SEEK_SET;		/* beginning of file */
182	lbp->fl.l_start = (offset_t)0;
183	lbp->fl.l_len = 0;			/* entire file */
184	lbp->fl.l_sysid = 0;
185	lbp->fl.l_pid = 0;
186	if (fcntl(lbp->fd, F_SETLKW, &lbp->fl) == -1) {
187		*error = errno;
188		if (preverror != *error) {
189			syslog(LOG_ERR, gettext("Cannot lock (%s): %s"),
190				bufpath, strerror(*error));
191		}
192		nfslog_free_buf(lbp, FALSE);
193		return (*error);
194	}
195
196	if (fstat(lbp->fd, &sb)) {
197		*error = errno;
198		if (preverror != *error) {
199			syslog(LOG_ERR, gettext("Cannot stat (%s): %s"),
200				bufpath, strerror(*error));
201		}
202		nfslog_free_buf(lbp, FALSE);
203		return (*error);
204	}
205	lbp->filesize = sb.st_size;
206
207	lbp->mmap_addr = (intptr_t)mmap(0, lbp->filesize, PROT_READ|PROT_WRITE,
208		MAP_SHARED|MAP_NORESERVE, lbp->fd, 0);
209
210	/* This is part of the duality of the use of either mmap()|read() */
211	if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
212		lbp->next_rec = 0;
213	} else {
214		lbp->next_rec = lbp->mmap_addr;
215	}
216
217	/* Read the header */
218	if ((lbp->bh_lrp = nfslog_read_buffer(lbp)) == NULL) {
219		*error = EIO;
220		if (preverror != *error) {
221			syslog(LOG_ERR, gettext(
222				"error in reading file '%s': %s"),
223				bufpath, strerror(EIO));
224		}
225		nfslog_free_buf(lbp, FALSE);
226		return (*error);
227	}
228
229	if (!xdr_nfslog_buffer_header(&lbp->bh_lrp->xdrs, &lbp->bh)) {
230		*error = EIO;
231		if (preverror != *error) {
232			syslog(LOG_ERR, gettext(
233				"error in reading file '%s': %s"),
234				bufpath, strerror(*error));
235		}
236		nfslog_free_buf(lbp, FALSE);
237		return (*error);
238	}
239
240	/*
241	 * Set the pointer to the next record based on the buffer header.
242	 * 'lbp->bh.bh_offset' contains the offset of where to begin
243	 * processing relative to the buffer header.
244	 */
245	lbp->next_rec += lbp->bh.bh_offset;
246
247	/*
248	 * If we are going to be using read() for file data, then we may
249	 * have to adjust the current file pointer to take into account
250	 * a starting point other than the beginning of the file.
251	 * If mmap is being used, this is taken care of as a side effect of
252	 * setting up the value of next_rec.
253	 */
254	if (lbp->mmap_addr == (intptr_t)MAP_FAILED && lbp->next_rec != 0) {
255		(void) lseek(lbp->fd, lbp->next_rec, SEEK_SET);
256		/* This is a special case of setting the last_record_offset */
257		lbp->last_record_offset = lbp->next_rec;
258	} else {
259		lbp->last_record_offset = lbp->next_rec - lbp->mmap_addr;
260	}
261
262	return (*error = 0);
263}
264
265/*
266 * Free the nfslog buffer and its associated allocations
267 */
268static void
269nfslog_free_buf(struct nfslog_buf *lbp, int close_quick)
270{
271	XDR	xdrs;
272	int	error;
273	caddr_t buffer;
274	struct nfslog_lr *lrp, *lrp_next;
275	struct processed_records *prp, *tprp;
276
277	/* work to free the offset records and rewrite header */
278	if (lbp->prp) {
279		if (lbp->last_record_offset == lbp->prp->start_offset) {
280
281			/* adjust the offset for the entire buffer */
282			lbp->last_record_offset =
283				lbp->prp->start_offset + lbp->prp->len;
284
285			nfslog_rewrite_bufheader(lbp);
286		}
287		if (close_quick)
288			return;
289		prp = lbp->prp;
290		do {
291			tprp = prp->next;
292			free(prp);
293			prp = tprp;
294		} while (lbp->prp != prp);
295	}
296
297	if (close_quick)
298		return;
299
300	/* Take care of the queue log records first */
301	if (lbp->lrps != NULL) {
302		lrp = lbp->lrps;
303		do {
304			lrp_next = lrp->next;
305			nfslog_free_logrecord(lrp, FALSE);
306			lrp = lrp_next;
307		} while (lrp != lbp->lrps);
308		lbp->lrps = NULL;
309	}
310
311	/* The buffer header was decoded and needs to be freed */
312	if (lbp->bh.bh_length != 0) {
313		buffer = (lbp->bh_lrp->buffer != NULL ?
314			lbp->bh_lrp->buffer : (caddr_t)lbp->mmap_addr);
315		xdrmem_create(&xdrs, buffer, lbp->bh_lrp->recsize, XDR_FREE);
316		(void) xdr_nfslog_buffer_header(&xdrs, &lbp->bh);
317		lbp->bh.bh_length = 0;
318	}
319
320	/* get rid of the bufheader lrp */
321	if (lbp->bh_lrp != NULL) {
322		free_lrp(lbp->bh_lrp);
323		lbp->bh_lrp = NULL;
324	}
325
326	/* Clean up for mmap() usage */
327	if (lbp->mmap_addr != (intptr_t)MAP_FAILED) {
328		if (munmap((void *)lbp->mmap_addr, lbp->filesize)) {
329			error = errno;
330			syslog(LOG_ERR, gettext("munmap failed: %s: %s"),
331				(lbp->bufpath != NULL ? lbp->bufpath : ""),
332				strerror(error));
333		}
334		lbp->mmap_addr = (intptr_t)MAP_FAILED;
335	}
336
337	/* Finally close the buffer file */
338	if (lbp->fd >= 0) {
339		lbp->fl.l_type = F_UNLCK;
340		if (fcntl(lbp->fd, F_SETLK, &lbp->fl) == -1) {
341			error = errno;
342			syslog(LOG_ERR,
343				gettext("Cannot unlock file %s: %s"),
344				(lbp->bufpath != NULL ? lbp->bufpath : ""),
345				strerror(error));
346		}
347		(void) close(lbp->fd);
348		lbp->fd = -1;
349	}
350	if (lbp->bufpath != NULL)
351		free(lbp->bufpath);
352}
353
354/*
355 * We are reading a record from the log buffer file.  Since we are reading
356 * an XDR stream, we first have to read the first integer to determine
357 * how much to read in whole for this record.  Our preference is to use
358 * mmap() but if failed initially we will be using read().  Need to be
359 * careful about proper initialization of the log record both from a field
360 * perspective and for XDR decoding.
361 */
362static struct nfslog_lr *
363nfslog_read_buffer(struct nfslog_buf *lbp)
364{
365	XDR xdrs;
366	unsigned int	record_size;
367	struct nfslog_lr *lrp;
368	char		*sizebuf, tbuf[16];
369	caddr_t		buffer;
370	offset_t	next_rec;
371
372	lrp = (struct nfslog_lr *)malloc(sizeof (*lrp));
373	bzero(lrp, sizeof (*lrp));
374
375	/* Check to see if mmap worked */
376	if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
377		/*
378		 * EOF or other failure; we don't try to recover, just return
379		 */
380		if (read(lbp->fd, tbuf, BYTES_PER_XDR_UNIT) <= 0) {
381			free_lrp(lrp);
382			return (NULL);
383		}
384		sizebuf = tbuf;
385	} else {
386		/* EOF check for the mmap() case */
387		if (lbp->filesize <= lbp->next_rec - lbp->mmap_addr) {
388			free_lrp(lrp);
389			return (NULL);
390		}
391		sizebuf = (char *)(uintptr_t)lbp->next_rec;
392	}
393
394	/* We have to XDR the first int so we know how much is in this record */
395	xdrmem_create(&xdrs, sizebuf, sizeof (unsigned int), XDR_DECODE);
396
397	if (!xdr_u_int(&xdrs, &record_size)) {
398		free_lrp(lrp);
399		return (NULL);
400	}
401
402	lrp->recsize = record_size;
403	next_rec = lbp->next_rec + lrp->recsize;
404
405	if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
406		/*
407		 * Read() case - shouldn't be used very much.
408		 * Note: The 'buffer' field is used later on
409		 * to determine which method is being used mmap()|read()
410		 */
411		if (lbp->filesize < next_rec) {
412			/* partial record from buffer */
413			syslog(LOG_ERR, gettext(
414				"Last partial record in work buffer %s "
415				"discarded\n"), lbp->bufpath);
416			free_lrp(lrp);
417			return (NULL);
418		}
419
420		if ((lrp->buffer = malloc(lrp->recsize)) == NULL) {
421			free_lrp(lrp);
422			return (NULL);
423		}
424		bcopy(sizebuf, lrp->buffer, BYTES_PER_XDR_UNIT);
425		if (read(lbp->fd, &lrp->buffer[BYTES_PER_XDR_UNIT],
426			lrp->recsize - BYTES_PER_XDR_UNIT) <= 0) {
427			free_lrp(lrp);
428			return (NULL);
429		}
430	} else if (lbp->filesize < next_rec - lbp->mmap_addr) {
431			/* partial record from buffer */
432			syslog(LOG_ERR, gettext(
433				"Last partial record in work buffer %s "
434				"discarded\n"), lbp->bufpath);
435			free_lrp(lrp);
436			return (NULL);
437	}
438
439
440	/* other initializations */
441	lrp->next = lrp->prev = lrp;
442	/* Keep track of the offset at which this record was read */
443	if (lbp->mmap_addr == (intptr_t)MAP_FAILED)
444		lrp->f_offset = lbp->next_rec;
445	else
446		lrp->f_offset = lbp->next_rec - lbp->mmap_addr;
447	/* This is the true address of the record */
448	lrp->record = lbp->next_rec;
449	lrp->xdrargs = lrp->xdrres = NULL;
450	lrp->lbp = lbp;
451
452	/* Here is the logic for mmap() vs. read() */
453	buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record);
454
455	/* Setup for the 'real' XDR decode of the entire record */
456	xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_DECODE);
457
458	/* calculate the offset for the next record */
459	lbp->next_rec = next_rec;
460
461	return (lrp);
462}
463
464/*
465 * Simple removal of the log record from the log buffer queue.
466 * Make sure to manage the count of records queued.
467 */
468static struct nfslog_lr *
469remove_lrp_from_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp)
470{
471	if (lbp->lrps == lrp) {
472		if (lbp->lrps == lbp->lrps->next) {
473			lbp->lrps = NULL;
474		} else {
475			lbp->lrps = lrp->next;
476			remque(lrp);
477		}
478	} else {
479		remque(lrp);
480	}
481	lbp->num_lrps--;
482	return (lrp);
483}
484
485/*
486 * Insert a log record struct on the log buffer struct.  The log buffer
487 * has a pointer to the head of a queue of log records that have been
488 * read from the buffer file but have not been processed yet because
489 * the record id did not match the sequence desired for processing.
490 * The insertion must be in the 'correct'/sorted order which adds
491 * to the complexity of this function.
492 */
493static void
494insert_lrp_to_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp)
495{
496	int ins_rec_id = lrp->log_record.re_header.rh_rec_id;
497	struct nfslog_lr *curlrp;
498
499	if (lbp->lrps == NULL) {
500		/* that was easy */
501		lbp->lrps = lrp;
502	} else {
503		/*
504		 * Does this lrp go before the first on the list?
505		 * If so, do the insertion by hand since insque is not
506		 * as flexible when queueing an element to the head of
507		 * a list.
508		 */
509		if (ins_rec_id < lbp->lrps->log_record.re_header.rh_rec_id) {
510			lrp->next = lbp->lrps;
511			lrp->prev = lbp->lrps->prev;
512			lbp->lrps->prev->next = lrp;
513			lbp->lrps->prev = lrp;
514			lbp->lrps = lrp;
515		} else {
516			/*
517			 * Search the queue for the correct insertion point.
518			 * Be careful about the insque so that the record
519			 * ends up in the right place.
520			 */
521			curlrp = lbp->lrps;
522			do {
523				if (ins_rec_id <
524				curlrp->next->log_record.re_header.rh_rec_id)
525					break;
526				curlrp = curlrp->next;
527			} while (curlrp != lbp->lrps);
528			if (curlrp == lbp->lrps)
529				insque(lrp, lbp->lrps->prev);
530			else
531				insque(lrp, curlrp);
532		}
533	}
534	/* always keep track of how many we have */
535	lbp->num_lrps++;
536}
537
538/*
539 * We are rewriting the buffer header at the start of the log buffer
540 * for the sole purpose of resetting the bh_offset field.  This is
541 * supposed to represent the progress that the nfslogd daemon has made
542 * in its processing of the log buffer file.
543 * 'lbp->last_record_offset' contains the absolute offset of the end
544 * of the last element processed. The on-disk buffer offset is relative
545 * to the buffer header, therefore we subtract the length of the buffer
546 * header from the absolute offset.
547 */
548static void
549nfslog_rewrite_bufheader(struct nfslog_buf *lbp)
550{
551	XDR xdrs;
552	nfslog_buffer_header bh;
553	/* size big enough for buffer header encode */
554#define	XBUFSIZE 128
555	char buffer[XBUFSIZE];
556	unsigned int wsize;
557
558	/*
559	 * if version 1 buffer is large and the current offset cannot be
560	 * represented, then don't update the offset in the buffer.
561	 */
562	if (lbp->bh.bh_flags & NFSLOG_BH_OFFSET_OVERFLOW) {
563		/* No need to update the header - offset too big */
564		return;
565	}
566	/*
567	 * build the buffer header from the original that was saved
568	 * on initialization; note that the offset is taken from the
569	 * last record processed (the last offset that represents
570	 * all records processed without any holes in the processing)
571	 */
572	bh = lbp->bh;
573
574	/*
575	 * if version 1 buffer is large and the current offset cannot be
576	 * represented in 32 bits, then save only the last valid offset
577	 * in the buffer and mark the flags to indicate that.
578	 */
579	if ((bh.bh_version > 1) ||
580		(lbp->last_record_offset - bh.bh_length < UINT32_MAX)) {
581		bh.bh_offset = lbp->last_record_offset - bh.bh_length;
582	} else {
583		/* don't update the offset in the buffer */
584		bh.bh_flags |= NFSLOG_BH_OFFSET_OVERFLOW;
585		lbp->bh.bh_flags = bh.bh_flags;
586		syslog(LOG_ERR, gettext(
587			"nfslog_rewrite_bufheader: %s: offset does not fit "
588			"in a 32 bit field\n"), lbp->bufpath);
589	}
590
591	xdrmem_create(&xdrs, buffer, XBUFSIZE, XDR_ENCODE);
592
593	if (!xdr_nfslog_buffer_header(&xdrs, &bh)) {
594		syslog(LOG_ERR, gettext(
595			"error in re-writing buffer file %s header\n"),
596			lbp->bufpath);
597		return;
598	}
599
600	wsize = xdr_getpos(&xdrs);
601
602	if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
603		/* go to the beginning of the file */
604		(void) lseek(lbp->fd, 0, SEEK_SET);
605		(void) write(lbp->fd, buffer, wsize);
606		(void) lseek(lbp->fd, lbp->next_rec, SEEK_SET);
607		(void) fsync(lbp->fd);
608	} else {
609		bcopy(buffer, (void *)lbp->mmap_addr, wsize);
610		(void) msync((void *)lbp->mmap_addr, wsize, MS_SYNC);
611	}
612}
613
614/*
615 * With the provided lrp, we will take and 'insert' the range that the
616 * record covered in the buffer file into a list of processed ranges
617 * for the buffer file.  These ranges represent the records processed
618 * but not 'marked' in the buffer header as being processed.
619 * This insertion process is being done for two reasons.  The first is that
620 * we do not want to pay the performance penalty of re-writing the buffer header
621 * for each record that we process.  The second reason is that the records
622 * may be processed out of order because of the unique ids.  This will occur
623 * if the kernel has written the records to the buffer file out of order.
624 * The read routine will 'sort' them as the records are read.
625 *
626 * We do not want to re-write the buffer header such that a record is
627 * represented and being processed when it has not been.  In the case
628 * that the nfslogd daemon restarts processing and the buffer header
629 * has been re-written improperly, some records could be skipped.
630 * We will be taking the conservative approach and only writing buffer
631 * header offsets when the entire offset range has been processed.
632 */
633static void
634nfslog_ins_last_rec_processed(struct nfslog_lr *lrp)
635{
636	struct processed_records *prp, *tp;
637
638	/* init the data struct as if it were the only one */
639	prp = malloc(sizeof (*prp));
640	prp->next = prp->prev = prp;
641	prp->start_offset = lrp->f_offset;
642	prp->len = lrp->recsize;
643	prp->num_recs = 1;
644
645	/* always add since we know we are going to insert */
646	lrp->lbp->num_pr_queued++;
647
648	/* Is this the first one?  If so, take the easy way out */
649	if (lrp->lbp->prp == NULL) {
650		lrp->lbp->prp = prp;
651	} else {
652		/* sort on insertion... */
653		tp = lrp->lbp->prp;
654		do {
655			if (prp->start_offset < tp->start_offset)
656				break;
657			tp = tp->next;
658		} while (tp != lrp->lbp->prp);
659		/* insert where appropriate (before the one we found */
660		insque(prp, tp->prev);
661		/*
662		 * special case where the insertion was done at the
663		 * head of the list
664		 */
665		if (tp == lrp->lbp->prp && prp->start_offset < tp->start_offset)
666			lrp->lbp->prp = prp;
667
668		/*
669		 * now that the entry is in place, we need to see if it can
670		 * be combined with the previous or following entries.
671		 * combination is done by adding to the length.
672		 */
673		if (prp->start_offset ==
674			(prp->prev->start_offset + prp->prev->len)) {
675			tp = prp->prev;
676			remque(prp);
677			tp->len += prp->len;
678			tp->num_recs += prp->num_recs;
679			free(prp);
680			prp = tp;
681		}
682		if (prp->next->start_offset ==
683			(prp->start_offset + prp->len)) {
684			prp->len += prp->next->len;
685			prp->num_recs += prp->next->num_recs;
686			tp = prp->next;
687			remque(tp);
688			free(tp);
689		}
690	}
691
692	if (lrp->lbp->num_pr_queued > MAX_RECS_TO_DELAY) {
693		prp = lrp->lbp->prp;
694		if (lrp->lbp->last_record_offset ==
695			prp->start_offset) {
696
697			/* adjust the offset for the entire buffer */
698			lrp->lbp->last_record_offset =
699				prp->start_offset + prp->len;
700
701			nfslog_rewrite_bufheader(lrp->lbp);
702
703			tp = prp->next;
704			if (tp != prp)
705				remque(prp);
706			else
707				tp = NULL;
708			lrp->lbp->prp = tp;
709			lrp->lbp->num_pr_queued -= prp->num_recs;
710			free(prp);
711		}
712	}
713}
714
715/*
716 * nfslog_get_logrecord is responsible for retrieving the next log record
717 * from the buffer file. This would normally be very straightforward but there
718 * is the added complexity of attempting to order the requests coming out of
719 * the buffer file.  The fundamental problems is that the kernel nfs logging
720 * functionality does not guarantee that the records were written to the file
721 * in the order that the NFS server processed them.  This can cause a problem
722 * in the fh -> pathname mapping in the case were a lookup for a file comes
723 * later in the buffer file than other operations on the lookup's target.
724 * The fh mapping database will not have an entry and will therefore not
725 * be able to map the fh to a name.
726 *
727 * So to solve this problem, the kernel nfs logging code tags each record
728 * with a monotonically increasing id and is guaranteed to be allocated
729 * in the order that the requests were processed.  Realize however that
730 * this processing guarantee is essentially for one thread on one client.
731 * This id mechanism does not order all requests since it is only the
732 * single client/single thread case that is most concerning to us here.
733 *
734 * This function will do the 'sorting' of the requests as they are
735 * read from the buffer file.  The sorting needs to take into account
736 * that some ids may be missing (operations not logged but ids allocated)
737 * and that the id field will eventually wrap over MAXINT.
738 *
739 * Complexity to solve the fh -> pathname mapping issue.
740 */
741struct nfslog_lr *
742nfslog_get_logrecord(struct nfslog_buf *lbp)
743{
744	/* figure out what the next should be if the world were perfect */
745	unsigned int next_rec_id = lbp->last_rec_id + 1;
746	struct nfslog_lr *lrp = NULL;
747
748	/*
749	 * First we check the queued records on the log buffer struct
750	 * to see if the one we want is there.  The records are sorted
751	 * on the record id during the insertions to the queue so that
752	 * this check is easy.
753	 */
754	if (lbp->lrps != NULL) {
755		/* Does the first record match ? */
756		if (lbp->lrps->log_record.re_header.rh_rec_id == next_rec_id) {
757			lrp = remove_lrp_from_lb(lbp, lbp->lrps);
758			lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id;
759		} else {
760			/*
761			 * Here we are checking for wrap of the record id
762			 * since it is an unsigned in.  The idea is that
763			 * if there is a huge span between what we expect
764			 * and what is queued then we need to flush/empty
765			 * the queued records first.
766			 */
767			if (next_rec_id <
768				lbp->lrps->log_record.re_header.rh_rec_id &&
769				((lbp->lrps->log_record.re_header.rh_rec_id -
770					next_rec_id) > (MAXINT / 2))) {
771
772				lrp = remove_lrp_from_lb(lbp, lbp->lrps);
773				lbp->last_rec_id =
774					lrp->log_record.re_header.rh_rec_id;
775			}
776		}
777	}
778	/*
779	 * So the first queued record didn't match (or there were no queued
780	 * records to look at).  Now we go to the buffer file looking for
781	 * the expected log record based on its id.  We loop looking for
782	 * a matching records and save/queue the records that don't match.
783	 * Note that we will queue a maximum number to handle the case
784	 * of a missing record id or a queue that is very confused.  We don't
785	 * want to consume too much memory.
786	 */
787	while (lrp == NULL) {
788		/* Have we queued too many for this buffer? */
789		if (lbp->num_lrps >= MAX_LRS_READ_AHEAD) {
790			lrp = remove_lrp_from_lb(lbp, lbp->lrps);
791			lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id;
792			break;
793		}
794		/*
795		 * Get a record from the buffer file.  If none are available,
796		 * this is probably and EOF condition (could be a read error
797		 * as well but that is masked. :-().  No records in the
798		 * file means that we need to pull any queued records
799		 * so that we don't miss any in the processing.
800		 */
801		if ((lrp = nfslog_read_buffer(lbp)) == NULL) {
802			if (lbp->lrps != NULL) {
803				lrp = remove_lrp_from_lb(lbp, lbp->lrps);
804				lbp->last_rec_id =
805					lrp->log_record.re_header.rh_rec_id;
806			} else {
807				return (NULL);  /* it was really and EOF */
808			}
809		} else {
810			/*
811			 * Just read a record from the buffer file and now we
812			 * need to XDR the record header so that we can take
813			 * a look at the record id.
814			 */
815			if (!xdr_nfslog_request_record(&lrp->xdrs,
816				&lrp->log_record)) {
817				/* Free and return EOF/NULL on error */
818				nfslog_free_logrecord(lrp, FALSE);
819				return (NULL);
820			}
821			/*
822			 * If the new record is less than or matches the
823			 * expected record id, then we return this record
824			 */
825			if (lrp->log_record.re_header.rh_rec_id <=
826				next_rec_id) {
827
828				lbp->last_rec_id =
829					lrp->log_record.re_header.rh_rec_id;
830			} else {
831				/*
832				 * This is not the one we were looking
833				 * for; queue it for later processing
834				 * (queueing sorts on record id)
835				 */
836				insert_lrp_to_lb(lbp, lrp);
837				lrp = NULL;
838			}
839		}
840	}
841	return (lrp);
842}
843
844/*
845 * Free the log record provided.
846 * This is complex because the associated XDR streams also need to be freed
847 * since allocation could have occured during the DECODE phase.  The record
848 * header, args and results need to be XDR_FREEd.  The xdr funtions will
849 * be provided if a free needs to be done.
850 *
851 * Note that caller tells us if the record being freed was processed.
852 * If so, then the buffer header should be updated.  Updating the buffer
853 * header keeps track of where the nfslogd daemon left off in its processing
854 * if it is unable to complete the entire file.
855 */
856void
857nfslog_free_logrecord(struct nfslog_lr *lrp, bool_t processing_complete)
858{
859	caddr_t			buffer;
860	nfslog_request_record 	*reqrec;
861
862	if (processing_complete) {
863		nfslog_ins_last_rec_processed(lrp);
864	}
865
866	reqrec = &lrp->log_record;
867
868	buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record);
869
870	xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_FREE);
871
872	(void) xdr_nfslog_request_record(&lrp->xdrs, reqrec);
873
874	if (lrp->xdrargs != NULL && reqrec->re_rpc_arg)
875		(*lrp->xdrargs)(&lrp->xdrs, reqrec->re_rpc_arg);
876
877	if (reqrec->re_rpc_arg)
878		free(reqrec->re_rpc_arg);
879
880	if (lrp->xdrres != NULL && reqrec->re_rpc_res)
881		(*lrp->xdrres)(&lrp->xdrs, reqrec->re_rpc_res);
882
883	if (reqrec->re_rpc_res)
884		free(reqrec->re_rpc_res);
885
886	free_lrp(lrp);
887}
888
889static void
890free_lrp(struct nfslog_lr *lrp)
891{
892	if (lrp->buffer != NULL)
893		free(lrp->buffer);
894	free(lrp);
895}
896
897/*
898 * Utility function used elsewhere
899 */
900void
901nfslog_opaque_print_buf(void *buf, int len, char *outbuf, int *outbufoffsetp,
902	int maxoffset)
903{
904	int	i, j;
905	uint_t	*ip;
906	uchar_t	*u_buf = (uchar_t *)buf;
907	int	outbufoffset = *outbufoffsetp;
908
909	outbufoffset += sprintf(&outbuf[outbufoffset], " \"");
910	if (len <= sizeof (int)) {
911		for (j = 0; (j < len) && (outbufoffset < maxoffset);
912			j++, u_buf++)
913			outbufoffset += sprintf(&outbuf[outbufoffset],
914						"%02x", *u_buf);
915		return;
916	}
917	/* More than 4 bytes, print with spaces in integer offsets */
918	j = (int)((uintptr_t)buf % sizeof (int));
919	i = 0;
920	if (j > 0) {
921		i = sizeof (int) - j;
922		for (; (j < sizeof (int)) && (outbufoffset < maxoffset);
923			j++, u_buf++)
924			outbufoffset += sprintf(&outbuf[outbufoffset],
925						"%02x", *u_buf);
926	}
927	/* LINTED */
928	ip = (uint_t *)u_buf;
929	for (; ((i + sizeof (int)) <= len) && (outbufoffset < maxoffset);
930		i += sizeof (int), ip++) {
931		outbufoffset += sprintf(&outbuf[outbufoffset], " %08x", *ip);
932	}
933	if (i < len) {
934		/* Last element not int */
935		u_buf = (uchar_t *)ip;
936		if (i > j)	/* not first element */
937			outbufoffset += sprintf(&outbuf[outbufoffset], " ");
938		for (; (i < len) && (outbufoffset < maxoffset); i++, u_buf++) {
939			outbufoffset += sprintf(&outbuf[outbufoffset],
940						"%02x", *u_buf);
941		}
942	}
943	if (outbufoffset < maxoffset)
944		outbufoffset += sprintf(&outbuf[outbufoffset], "\"");
945	*outbufoffsetp = outbufoffset;
946}
947