1/*
2 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2016 by Delphix. All rights reserved.
4 */
5
6/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
7/*	  All Rights Reserved  	*/
8
9/*
10 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms are permitted
14 * provided that: (1) source distributions retain this entire copyright
15 * notice and comment, and (2) distributions including binaries display
16 * the following acknowledgement:  ``This product includes software
17 * developed by the University of California, Berkeley and its contributors''
18 * in the documentation or other materials provided with the distribution
19 * and in all advertising materials mentioning features or use of this
20 * software. Neither the name of the University nor the names of its
21 * contributors may be used to endorse or promote products derived
22 * from this software without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26 */
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <unistd.h>
31#include <stdarg.h>
32#include <libadm.h>
33#include <note.h>
34#include <sys/param.h>
35#include <sys/types.h>
36#include <sys/mntent.h>
37#include <sys/filio.h>
38#include <sys/fs/ufs_fs.h>
39#include <sys/vnode.h>
40#include <sys/fs/ufs_acl.h>
41#include <sys/fs/ufs_inode.h>
42#include <sys/fs/ufs_log.h>
43#define	_KERNEL
44#include <sys/fs/ufs_fsdir.h>
45#undef _KERNEL
46#include <sys/mnttab.h>
47#include <sys/types.h>
48#include <sys/stat.h>
49#include <fcntl.h>
50#include <signal.h>
51#include <string.h>
52#include <ctype.h>
53#include <sys/vfstab.h>
54#include <sys/lockfs.h>
55#include <errno.h>
56#include <sys/cmn_err.h>
57#include <sys/dkio.h>
58#include <sys/vtoc.h>
59#include <sys/efi_partition.h>
60#include <fslib.h>
61#include <inttypes.h>
62#include "fsck.h"
63
64caddr_t mount_point = NULL;
65
66static int64_t diskreads, totalreads;	/* Disk cache statistics */
67
68static int log_checksum(int32_t *, int32_t *, int);
69static void vdirerror(fsck_ino_t, caddr_t, va_list);
70static struct mnttab *search_mnttab(caddr_t, caddr_t, caddr_t, size_t);
71static struct vfstab *search_vfstab(caddr_t, caddr_t, caddr_t, size_t);
72static void vpwarn(caddr_t, va_list);
73static int getaline(FILE *, caddr_t, int);
74static struct bufarea *alloc_bufarea(void);
75static void rwerror(caddr_t, diskaddr_t, int rval);
76static void debugclean(void);
77static void report_io_prob(caddr_t, diskaddr_t, size_t, ssize_t);
78static void freelogblk(daddr32_t);
79static void verrexit(caddr_t, va_list);
80static void vpfatal(caddr_t, va_list);
81static diskaddr_t get_device_size(int, caddr_t);
82static diskaddr_t brute_force_get_device_size(int);
83static void cg_constants(int, daddr32_t *, daddr32_t *, daddr32_t *,
84	    daddr32_t *, daddr32_t *, daddr32_t *);
85
86int
87ftypeok(struct dinode *dp)
88{
89	switch (dp->di_mode & IFMT) {
90
91	case IFDIR:
92	case IFREG:
93	case IFBLK:
94	case IFCHR:
95	case IFLNK:
96	case IFSOCK:
97	case IFIFO:
98	case IFSHAD:
99	case IFATTRDIR:
100		return (1);
101
102	default:
103		if (debug)
104			(void) printf("bad file type 0%o\n", dp->di_mode);
105		return (0);
106	}
107}
108
109int
110acltypeok(struct dinode *dp)
111{
112	if (CHECK_ACL_ALLOWED(dp->di_mode & IFMT))
113		return (1);
114
115	if (debug)
116		(void) printf("bad file type for acl I=%d: 0%o\n",
117		    dp->di_shadow, dp->di_mode);
118	return (0);
119}
120
121NOTE(PRINTFLIKE(1))
122int
123reply(caddr_t fmt, ...)
124{
125	va_list ap;
126	char line[80];
127
128	if (preen)
129		pfatal("INTERNAL ERROR: GOT TO reply() in preen mode");
130
131	if (mflag) {
132		/*
133		 * We don't know what's going on, so don't potentially
134		 * make things worse by having errexit() write stuff
135		 * out to disk.
136		 */
137		(void) printf(
138		    "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
139		    devname);
140		exit(EXERRFATAL);
141	}
142
143	va_start(ap, fmt);
144	(void) putchar('\n');
145	(void) vprintf(fmt, ap);
146	(void) putchar('?');
147	(void) putchar(' ');
148	va_end(ap);
149
150	if (nflag || fswritefd < 0) {
151		(void) printf(" no\n\n");
152		return (0);
153	}
154	if (yflag) {
155		(void) printf(" yes\n\n");
156		return (1);
157	}
158	(void) fflush(stdout);
159	if (getaline(stdin, line, sizeof (line)) == EOF)
160		errexit("\n");
161	(void) printf("\n");
162	if (line[0] == 'y' || line[0] == 'Y') {
163		return (1);
164	} else {
165		return (0);
166	}
167}
168
169int
170getaline(FILE *fp, caddr_t loc, int maxlen)
171{
172	int n;
173	caddr_t p, lastloc;
174
175	p = loc;
176	lastloc = &p[maxlen-1];
177	while ((n = getc(fp)) != '\n') {
178		if (n == EOF)
179			return (EOF);
180		if (!isspace(n) && p < lastloc)
181			*p++ = (char)n;
182	}
183	*p = '\0';
184	/* LINTED pointer difference won't overflow */
185	return (p - loc);
186}
187
188/*
189 * Malloc buffers and set up cache.
190 */
191void
192bufinit(void)
193{
194	struct bufarea *bp;
195	int bufcnt, i;
196	caddr_t bufp;
197
198	bufp = malloc((size_t)sblock.fs_bsize);
199	if (bufp == NULL)
200		goto nomem;
201	initbarea(&cgblk);
202	cgblk.b_un.b_buf = bufp;
203	bufhead.b_next = bufhead.b_prev = &bufhead;
204	bufcnt = MAXBUFSPACE / sblock.fs_bsize;
205	if (bufcnt < MINBUFS)
206		bufcnt = MINBUFS;
207	for (i = 0; i < bufcnt; i++) {
208		bp = (struct bufarea *)malloc(sizeof (struct bufarea));
209		if (bp == NULL) {
210			if (i >= MINBUFS)
211				goto noalloc;
212			goto nomem;
213		}
214
215		bufp = malloc((size_t)sblock.fs_bsize);
216		if (bufp == NULL) {
217			free((void *)bp);
218			if (i >= MINBUFS)
219				goto noalloc;
220			goto nomem;
221		}
222		initbarea(bp);
223		bp->b_un.b_buf = bufp;
224		bp->b_prev = &bufhead;
225		bp->b_next = bufhead.b_next;
226		bufhead.b_next->b_prev = bp;
227		bufhead.b_next = bp;
228	}
229noalloc:
230	bufhead.b_size = i;	/* save number of buffers */
231	pbp = pdirbp = NULL;
232	return;
233
234nomem:
235	errexit("cannot allocate buffer pool\n");
236	/* NOTREACHED */
237}
238
239/*
240 * Undo a bufinit().
241 */
242void
243unbufinit(void)
244{
245	int cnt;
246	struct bufarea *bp, *nbp;
247
248	cnt = 0;
249	for (bp = bufhead.b_prev; bp != NULL && bp != &bufhead; bp = nbp) {
250		cnt++;
251		flush(fswritefd, bp);
252		nbp = bp->b_prev;
253		/*
254		 * We're discarding the entire chain, so this isn't
255		 * technically necessary.  However, it doesn't hurt
256		 * and lint's data flow analysis is much happier
257		 * (this prevents it from thinking there's a chance
258		 * of our using memory elsewhere after it's been released).
259		 */
260		nbp->b_next = bp->b_next;
261		bp->b_next->b_prev = nbp;
262		free((void *)bp->b_un.b_buf);
263		free((void *)bp);
264	}
265
266	if (bufhead.b_size != cnt)
267		errexit("Panic: cache lost %d buffers\n",
268		    bufhead.b_size - cnt);
269}
270
271/*
272 * Manage a cache of directory blocks.
273 */
274struct bufarea *
275getdatablk(daddr32_t blkno, size_t size)
276{
277	struct bufarea *bp;
278
279	for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
280		if (bp->b_bno == fsbtodb(&sblock, blkno)) {
281			goto foundit;
282		}
283	for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
284		if ((bp->b_flags & B_INUSE) == 0)
285			break;
286	if (bp == &bufhead) {
287		bp = alloc_bufarea();
288		if (bp == NULL) {
289			errexit("deadlocked buffer pool\n");
290			/* NOTREACHED */
291		}
292	}
293	/*
294	 * We're at the same logical level as getblk(), so if there
295	 * are any errors, we'll let our caller handle them.
296	 */
297	diskreads++;
298	(void) getblk(bp, blkno, size);
299
300foundit:
301	totalreads++;
302	bp->b_cnt++;
303	/*
304	 * Move the buffer to head of linked list if it isn't
305	 * already there.
306	 */
307	if (bufhead.b_next != bp) {
308		bp->b_prev->b_next = bp->b_next;
309		bp->b_next->b_prev = bp->b_prev;
310		bp->b_prev = &bufhead;
311		bp->b_next = bufhead.b_next;
312		bufhead.b_next->b_prev = bp;
313		bufhead.b_next = bp;
314	}
315	bp->b_flags |= B_INUSE;
316	return (bp);
317}
318
319void
320brelse(struct bufarea *bp)
321{
322	bp->b_cnt--;
323	if (bp->b_cnt == 0) {
324		bp->b_flags &= ~B_INUSE;
325	}
326}
327
328struct bufarea *
329getblk(struct bufarea *bp, daddr32_t blk, size_t size)
330{
331	diskaddr_t dblk;
332
333	dblk = fsbtodb(&sblock, blk);
334	if (bp->b_bno == dblk)
335		return (bp);
336	flush(fswritefd, bp);
337	bp->b_errs = fsck_bread(fsreadfd, bp->b_un.b_buf, dblk, size);
338	bp->b_bno = dblk;
339	bp->b_size = size;
340	return (bp);
341}
342
343void
344flush(int fd, struct bufarea *bp)
345{
346	int i, j;
347	caddr_t sip;
348	long size;
349
350	if (!bp->b_dirty)
351		return;
352
353	/*
354	 * It's not our buf, so if there are errors, let whoever
355	 * acquired it deal with the actual problem.
356	 */
357	if (bp->b_errs != 0)
358		pfatal("WRITING ZERO'ED BLOCK %lld TO DISK\n", bp->b_bno);
359	bp->b_dirty = 0;
360	bp->b_errs = 0;
361	bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
362	if (bp != &sblk) {
363		return;
364	}
365
366	/*
367	 * We're flushing the superblock, so make sure all the
368	 * ancillary bits go out as well.
369	 */
370	sip = (caddr_t)sblock.fs_u.fs_csp;
371	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
372		size = sblock.fs_cssize - i < sblock.fs_bsize ?
373		    sblock.fs_cssize - i : sblock.fs_bsize;
374		bwrite(fswritefd, sip,
375		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
376		    size);
377		sip += size;
378	}
379}
380
381static void
382rwerror(caddr_t mesg, diskaddr_t blk, int rval)
383{
384	int olderr = errno;
385
386	if (!preen)
387		(void) printf("\n");
388
389	if (rval == -1)
390		pfatal("CANNOT %s: DISK BLOCK %lld: %s",
391		    mesg, blk, strerror(olderr));
392	else
393		pfatal("CANNOT %s: DISK BLOCK %lld", mesg, blk);
394
395	if (reply("CONTINUE") == 0) {
396		exitstat = EXERRFATAL;
397		errexit("Program terminated\n");
398	}
399}
400
401void
402ckfini(void)
403{
404	int64_t percentage;
405
406	if (fswritefd < 0)
407		return;
408
409	flush(fswritefd, &sblk);
410	/*
411	 * Were we using a backup superblock?
412	 */
413	if (havesb && sblk.b_bno != SBOFF / dev_bsize) {
414		if (preen || reply("UPDATE STANDARD SUPERBLOCK") == 1) {
415			sblk.b_bno = SBOFF / dev_bsize;
416			sbdirty();
417			flush(fswritefd, &sblk);
418		}
419	}
420	flush(fswritefd, &cgblk);
421	if (cgblk.b_un.b_buf != NULL) {
422		free((void *)cgblk.b_un.b_buf);
423		cgblk.b_un.b_buf = NULL;
424	}
425	unbufinit();
426	pbp = NULL;
427	pdirbp = NULL;
428	if (debug) {
429		/*
430		 * Note that we only count cache-related reads.
431		 * Anything that called fsck_bread() or getblk()
432		 * directly are explicitly not cached, so they're not
433		 * included here.
434		 */
435		if (totalreads != 0)
436			percentage = diskreads * 100 / totalreads;
437		else
438			percentage = 0;
439
440		(void) printf("cache missed %lld of %lld reads (%lld%%)\n",
441		    (longlong_t)diskreads, (longlong_t)totalreads,
442		    (longlong_t)percentage);
443	}
444
445	(void) close(fsreadfd);
446	(void) close(fswritefd);
447	fsreadfd = -1;
448	fswritefd = -1;
449}
450
451int
452fsck_bread(int fd, caddr_t buf, diskaddr_t blk, size_t size)
453{
454	caddr_t cp;
455	int i;
456	int errs;
457	offset_t offset = ldbtob(blk);
458	offset_t addr;
459
460	/*
461	 * In our universe, nothing exists before the superblock, so
462	 * just pretend it's always zeros.  This is the complement of
463	 * bwrite()'s ignoring write requests into that space.
464	 */
465	if (blk < SBLOCK) {
466		if (debug)
467			(void) printf(
468			    "WARNING: fsck_bread() passed blkno < %d (%lld)\n",
469			    SBLOCK, (longlong_t)blk);
470		(void) memset(buf, 0, (size_t)size);
471		return (1);
472	}
473
474	if (llseek(fd, offset, SEEK_SET) < 0) {
475		rwerror("SEEK", blk, -1);
476	}
477
478	if ((i = read(fd, buf, size)) == size) {
479		return (0);
480	}
481	rwerror("READ", blk, i);
482	if (llseek(fd, offset, SEEK_SET) < 0) {
483		rwerror("SEEK", blk, -1);
484	}
485	errs = 0;
486	(void) memset(buf, 0, (size_t)size);
487	pwarn("THE FOLLOWING SECTORS COULD NOT BE READ:");
488	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
489		addr = ldbtob(blk + i);
490		if (llseek(fd, addr, SEEK_SET) < 0 ||
491		    read(fd, cp, (int)secsize) < 0) {
492			iscorrupt = 1;
493			(void) printf(" %llu", blk + (u_longlong_t)i);
494			errs++;
495		}
496	}
497	(void) printf("\n");
498	return (errs);
499}
500
501void
502bwrite(int fd, caddr_t buf, diskaddr_t blk, int64_t size)
503{
504	int i;
505	int n;
506	caddr_t cp;
507	offset_t offset = ldbtob(blk);
508	offset_t addr;
509
510	if (fd < 0)
511		return;
512	if (blk < SBLOCK) {
513		if (debug)
514			(void) printf(
515		    "WARNING: Attempt to write illegal blkno %lld on %s\n",
516			    (longlong_t)blk, devname);
517		return;
518	}
519	if (llseek(fd, offset, SEEK_SET) < 0) {
520		rwerror("SEEK", blk, -1);
521	}
522	if ((i = write(fd, buf, (int)size)) == size) {
523		fsmodified = 1;
524		return;
525	}
526	rwerror("WRITE", blk, i);
527	if (llseek(fd, offset, SEEK_SET) < 0) {
528		rwerror("SEEK", blk, -1);
529	}
530	pwarn("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
531	for (cp = buf, i = 0; i < btodb(size); i++, cp += DEV_BSIZE) {
532		n = 0;
533		addr = ldbtob(blk + i);
534		if (llseek(fd, addr, SEEK_SET) < 0 ||
535		    (n = write(fd, cp, DEV_BSIZE)) < 0) {
536			iscorrupt = 1;
537			(void) printf(" %llu", blk + (u_longlong_t)i);
538		} else if (n > 0) {
539			fsmodified = 1;
540		}
541
542	}
543	(void) printf("\n");
544}
545
546/*
547 * Allocates the specified number of contiguous fragments.
548 */
549daddr32_t
550allocblk(int wantedfrags)
551{
552	int block, leadfrag, tailfrag;
553	daddr32_t selected;
554	size_t size;
555	struct bufarea *bp;
556
557	/*
558	 * It's arguable whether we should just fail, or instead
559	 * error out here.  Since we should only ever be asked for
560	 * a single fragment or an entire block (i.e., sblock.fs_frag),
561	 * we'll fail out because anything else means somebody
562	 * changed code without considering all of the ramifications.
563	 */
564	if (wantedfrags <= 0 || wantedfrags > sblock.fs_frag) {
565		exitstat = EXERRFATAL;
566		errexit("allocblk() asked for %d frags.  "
567		    "Legal range is 1 to %d",
568		    wantedfrags, sblock.fs_frag);
569	}
570
571	/*
572	 * For each filesystem block, look at every possible starting
573	 * offset within the block such that we can get the number of
574	 * contiguous fragments that we need.  This is a drastically
575	 * simplified version of the kernel's mapsearch() and alloc*().
576	 * It's also correspondingly slower.
577	 */
578	for (block = 0; block < maxfsblock - sblock.fs_frag;
579	    block += sblock.fs_frag) {
580		for (leadfrag = 0; leadfrag <= sblock.fs_frag - wantedfrags;
581		    leadfrag++) {
582			/*
583			 * Is first fragment of candidate run available?
584			 */
585			if (testbmap(block + leadfrag))
586				continue;
587			/*
588			 * Are the rest of them available?
589			 */
590			for (tailfrag = 1; tailfrag < wantedfrags; tailfrag++)
591				if (testbmap(block + leadfrag + tailfrag))
592					break;
593			if (tailfrag < wantedfrags) {
594				/*
595				 * No, skip the known-unusable run.
596				 */
597				leadfrag += tailfrag;
598				continue;
599			}
600			/*
601			 * Found what we need, so claim them.
602			 */
603			for (tailfrag = 0; tailfrag < wantedfrags; tailfrag++)
604				setbmap(block + leadfrag + tailfrag);
605			n_blks += wantedfrags;
606			size = wantedfrags * sblock.fs_fsize;
607			selected = block + leadfrag;
608			bp = getdatablk(selected, size);
609			(void) memset((void *)bp->b_un.b_buf, 0, size);
610			dirty(bp);
611			brelse(bp);
612			if (debug)
613				(void) printf(
614		    "allocblk: selected %d (in block %d), frags %d, size %d\n",
615				    selected, selected % sblock.fs_bsize,
616				    wantedfrags, (int)size);
617			return (selected);
618		}
619	}
620	return (0);
621}
622
623/*
624 * Free a previously allocated block
625 */
626void
627freeblk(fsck_ino_t ino, daddr32_t blkno, int frags)
628{
629	struct inodesc idesc;
630
631	if (debug)
632		(void) printf("debug: freeing %d fragments starting at %d\n",
633		    frags, blkno);
634
635	init_inodesc(&idesc);
636
637	idesc.id_number = ino;
638	idesc.id_blkno = blkno;
639	idesc.id_numfrags = frags;
640	idesc.id_truncto = -1;
641
642	/*
643	 * Nothing in the return status has any relevance to how
644	 * we're using pass4check(), so just ignore it.
645	 */
646	(void) pass4check(&idesc);
647}
648
649/*
650 * Fill NAMEBUF with a path starting in CURDIR for INO.  Assumes
651 * that the given buffer is at least MAXPATHLEN + 1 characters.
652 */
653void
654getpathname(caddr_t namebuf, fsck_ino_t curdir, fsck_ino_t ino)
655{
656	int len;
657	caddr_t cp;
658	struct dinode *dp;
659	struct inodesc idesc;
660	struct inoinfo *inp;
661
662	if (debug)
663		(void) printf("debug: getpathname(curdir %d, ino %d)\n",
664		    curdir, ino);
665
666	if ((curdir == 0) || (!INO_IS_DVALID(curdir))) {
667		(void) strcpy(namebuf, "?");
668		return;
669	}
670
671	if ((curdir == UFSROOTINO) && (ino == UFSROOTINO)) {
672		(void) strcpy(namebuf, "/");
673		return;
674	}
675
676	init_inodesc(&idesc);
677	idesc.id_type = DATA;
678	cp = &namebuf[MAXPATHLEN - 1];
679	*cp = '\0';
680
681	/*
682	 * In the case of extended attributes, our
683	 * parent won't necessarily be a directory, so just
684	 * return what we've found with a prefix indicating
685	 * that it's an XATTR.  Presumably our caller will
686	 * know what's going on and do something useful, like
687	 * work out the path of the parent and then combine
688	 * the two names.
689	 *
690	 * Can't use strcpy(), etc, because we've probably
691	 * already got some name information in the buffer and
692	 * the usual trailing \0 would lose it.
693	 */
694	dp = ginode(curdir);
695	if ((dp->di_mode & IFMT) == IFATTRDIR) {
696		idesc.id_number = curdir;
697		idesc.id_parent = ino;
698		idesc.id_func = findname;
699		idesc.id_name = namebuf;
700		idesc.id_fix = NOFIX;
701		if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) == 0) {
702			*cp-- = '?';
703		}
704
705		len = sizeof (XATTR_DIR_NAME) - 1;
706		cp -= len;
707		(void) memmove(cp, XATTR_DIR_NAME, len);
708		goto attrname;
709	}
710
711	/*
712	 * If curdir == ino, need to get a handle on .. so we
713	 * can search it for ino's name.  Otherwise, just search
714	 * the given directory for ino.  Repeat until out of space
715	 * or a full path has been built.
716	 */
717	if (curdir != ino) {
718		idesc.id_parent = curdir;
719		goto namelookup;
720	}
721	while (ino != UFSROOTINO && ino != 0) {
722		idesc.id_number = ino;
723		idesc.id_func = findino;
724		idesc.id_name = "..";
725		idesc.id_fix = NOFIX;
726		if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) == 0) {
727			inp = getinoinfo(ino);
728			if ((inp == NULL) || (inp->i_parent == 0)) {
729				break;
730			}
731			idesc.id_parent = inp->i_parent;
732		}
733
734		/*
735		 * To get this far, id_parent must have the inode
736		 * number for `..' in it.  By definition, that's got
737		 * to be a directory, so search it for the inode of
738		 * interest.
739		 */
740namelookup:
741		idesc.id_number = idesc.id_parent;
742		idesc.id_parent = ino;
743		idesc.id_func = findname;
744		idesc.id_name = namebuf;
745		idesc.id_fix = NOFIX;
746		if ((ckinode(ginode(idesc.id_number),
747		    &idesc, CKI_TRAVERSE) & FOUND) == 0) {
748			break;
749		}
750		/*
751		 * Prepend to what we've accumulated so far.  If
752		 * there's not enough room for even one more path element
753		 * (of the worst-case length), then bail out.
754		 */
755		len = strlen(namebuf);
756		cp -= len;
757		if (cp < &namebuf[MAXNAMLEN])
758			break;
759		(void) memmove(cp, namebuf, len);
760		*--cp = '/';
761
762		/*
763		 * Corner case for a looped-to-itself directory.
764		 */
765		if (ino == idesc.id_number)
766			break;
767
768		/*
769		 * Climb one level of the hierarchy.  In other words,
770		 * the current .. becomes the inode to search for and
771		 * its parent becomes the directory to search in.
772		 */
773		ino = idesc.id_number;
774	}
775
776	/*
777	 * If we hit a discontinuity in the hierarchy, indicate it by
778	 * prefixing the path so far with `?'.  Otherwise, the first
779	 * character will be `/' as a side-effect of the *--cp above.
780	 *
781	 * The special case is to handle the situation where we're
782	 * trying to look something up in UFSROOTINO, but didn't find
783	 * it.
784	 */
785	if (ino != UFSROOTINO || cp == &namebuf[MAXPATHLEN - 1]) {
786		if (cp > namebuf)
787			cp--;
788		*cp = '?';
789	}
790
791	/*
792	 * The invariants being used for buffer integrity are:
793	 * - namebuf[] is terminated with \0 before anything else
794	 * - cp is always <= the last element of namebuf[]
795	 * - the new path element is always stored at the
796	 *   beginning of namebuf[], and is no more than MAXNAMLEN-1
797	 *   characters
798	 * - cp is is decremented by the number of characters in
799	 *   the new path element
800	 * - if, after the above accounting for the new element's
801	 *   size, there is no longer enough room at the beginning of
802	 *   namebuf[] for a full-sized path element and a slash,
803	 *   terminate the loop.  cp is in the range
804	 *   &namebuf[0]..&namebuf[MAXNAMLEN - 1]
805	 */
806attrname:
807	/* LINTED per the above discussion */
808	(void) memmove(namebuf, cp, &namebuf[MAXPATHLEN] - cp);
809}
810
811/* ARGSUSED */
812void
813catch(int dummy)
814{
815	ckfini();
816	exit(EXSIGNAL);
817}
818
819/*
820 * When preening, allow a single quit to signal
821 * a special exit after filesystem checks complete
822 * so that reboot sequence may be interrupted.
823 */
824/* ARGSUSED */
825void
826catchquit(int dummy)
827{
828	(void) printf("returning to single-user after filesystem check\n");
829	interrupted = 1;
830	(void) signal(SIGQUIT, SIG_DFL);
831}
832
833
834/*
835 * determine whether an inode should be fixed.
836 */
837NOTE(PRINTFLIKE(2))
838int
839dofix(struct inodesc *idesc, caddr_t msg, ...)
840{
841	int rval = 0;
842	va_list ap;
843
844	va_start(ap, msg);
845
846	switch (idesc->id_fix) {
847
848	case DONTKNOW:
849		if (idesc->id_type == DATA)
850			vdirerror(idesc->id_number, msg, ap);
851		else
852			vpwarn(msg, ap);
853		if (preen) {
854			idesc->id_fix = FIX;
855			rval = ALTERED;
856			break;
857		}
858		if (reply("SALVAGE") == 0) {
859			idesc->id_fix = NOFIX;
860			break;
861		}
862		idesc->id_fix = FIX;
863		rval = ALTERED;
864		break;
865
866	case FIX:
867		rval = ALTERED;
868		break;
869
870	case NOFIX:
871		break;
872
873	default:
874		errexit("UNKNOWN INODESC FIX MODE %d\n", (int)idesc->id_fix);
875	}
876
877	va_end(ap);
878	return (rval);
879}
880
881NOTE(PRINTFLIKE(1))
882void
883errexit(caddr_t fmt, ...)
884{
885	va_list ap;
886
887	va_start(ap, fmt);
888	verrexit(fmt, ap);
889	/* NOTREACHED */
890}
891
892NOTE(PRINTFLIKE(1))
893static void
894verrexit(caddr_t fmt, va_list ap)
895{
896	static int recursing = 0;
897
898	if (!recursing) {
899		recursing = 1;
900		if (errorlocked || iscorrupt) {
901			if (havesb && fswritefd >= 0) {
902				sblock.fs_clean = FSBAD;
903				sblock.fs_state = FSOKAY - (long)sblock.fs_time;
904				sblock.fs_state = -sblock.fs_state;
905				sbdirty();
906				write_altsb(fswritefd);
907				flush(fswritefd, &sblk);
908			}
909		}
910		ckfini();
911		recursing = 0;
912	}
913	(void) vprintf(fmt, ap);
914	if (fmt[strlen(fmt) - 1] != '\n')
915		(void) putchar('\n');
916	exit((exitstat != 0) ? exitstat : EXERRFATAL);
917}
918
919/*
920 * An unexpected inconsistency occured.
921 * Die if preening, otherwise just print message and continue.
922 */
923NOTE(PRINTFLIKE(1))
924void
925pfatal(caddr_t fmt, ...)
926{
927	va_list ap;
928
929	va_start(ap, fmt);
930	vpfatal(fmt, ap);
931	va_end(ap);
932}
933
934NOTE(PRINTFLIKE(1))
935static void
936vpfatal(caddr_t fmt, va_list ap)
937{
938	if (preen) {
939		if (*fmt != '\0') {
940			(void) printf("%s: ", devname);
941			(void) vprintf(fmt, ap);
942			(void) printf("\n");
943		}
944		(void) printf(
945		    "%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
946		    devname);
947		if (havesb && fswritefd >= 0) {
948			sblock.fs_clean = FSBAD;
949			sblock.fs_state = -(FSOKAY - (long)sblock.fs_time);
950			sbdirty();
951			flush(fswritefd, &sblk);
952		}
953		/*
954		 * We're exiting, it doesn't really matter that our
955		 * caller doesn't get to call va_end().
956		 */
957		if (exitstat == 0)
958			exitstat = EXFNDERRS;
959		exit(exitstat);
960	}
961	if (*fmt != '\0') {
962		(void) vprintf(fmt, ap);
963	}
964}
965
966/*
967 * Pwarn just prints a message when not preening,
968 * or a warning (preceded by filename) when preening.
969 */
970NOTE(PRINTFLIKE(1))
971void
972pwarn(caddr_t fmt, ...)
973{
974	va_list ap;
975
976	va_start(ap, fmt);
977	vpwarn(fmt, ap);
978	va_end(ap);
979}
980
981NOTE(PRINTFLIKE(1))
982static void
983vpwarn(caddr_t fmt, va_list ap)
984{
985	if (*fmt != '\0') {
986		if (preen)
987			(void) printf("%s: ", devname);
988		(void) vprintf(fmt, ap);
989	}
990}
991
992/*
993 * Like sprintf(), except the buffer is dynamically allocated
994 * and returned, instead of being passed in.  A pointer to the
995 * buffer is stored in *RET, and FMT is the usual format string.
996 * The number of characters in *RET (excluding the trailing \0,
997 * to be consistent with the other *printf() routines) is returned.
998 *
999 * Solaris doesn't have asprintf(3C) yet, unfortunately.
1000 */
1001NOTE(PRINTFLIKE(2))
1002int
1003fsck_asprintf(caddr_t *ret, caddr_t fmt, ...)
1004{
1005	int len;
1006	caddr_t buffer;
1007	va_list ap;
1008
1009	va_start(ap, fmt);
1010	len = vsnprintf(NULL, 0, fmt, ap);
1011	va_end(ap);
1012
1013	buffer = malloc((len + 1) * sizeof (char));
1014	if (buffer == NULL) {
1015		errexit("Out of memory in asprintf\n");
1016		/* NOTREACHED */
1017	}
1018
1019	va_start(ap, fmt);
1020	(void) vsnprintf(buffer, len + 1, fmt, ap);
1021	va_end(ap);
1022
1023	*ret = buffer;
1024	return (len);
1025}
1026
1027/*
1028 * So we can take advantage of kernel routines in ufs_subr.c.
1029 */
1030/* PRINTFLIKE2 */
1031void
1032cmn_err(int level, caddr_t fmt, ...)
1033{
1034	va_list ap;
1035
1036	va_start(ap, fmt);
1037	if (level == CE_PANIC) {
1038		(void) printf("INTERNAL INCONSISTENCY:");
1039		verrexit(fmt, ap);
1040	} else {
1041		(void) vprintf(fmt, ap);
1042	}
1043	va_end(ap);
1044}
1045
1046/*
1047 * Check to see if unraw version of name is already mounted.
1048 * Updates devstr with the device name if devstr is not NULL
1049 * and str_size is positive.
1050 */
1051int
1052mounted(caddr_t name, caddr_t devstr, size_t str_size)
1053{
1054	int found;
1055	struct mnttab *mntent;
1056
1057	mntent = search_mnttab(NULL, unrawname(name), devstr, str_size);
1058	if (mntent == NULL)
1059		return (M_NOMNT);
1060
1061	/*
1062	 * It's mounted.  With or without write access?
1063	 */
1064	if (hasmntopt(mntent, MNTOPT_RO) != 0)
1065		found = M_RO;	/* mounted as RO */
1066	else
1067		found = M_RW; 	/* mounted as R/W */
1068
1069	if (mount_point == NULL) {
1070		mount_point = strdup(mntent->mnt_mountp);
1071		if (mount_point == NULL) {
1072			errexit("fsck: memory allocation failure: %s",
1073			    strerror(errno));
1074			/* NOTREACHED */
1075		}
1076
1077		if (devstr != NULL && str_size > 0)
1078			(void) strlcpy(devstr, mntent->mnt_special, str_size);
1079	}
1080
1081	return (found);
1082}
1083
1084/*
1085 * Check to see if name corresponds to an entry in vfstab, and that the entry
1086 * does not have option ro.
1087 */
1088int
1089writable(caddr_t name)
1090{
1091	int rw = 1;
1092	struct vfstab vfsbuf, vfskey;
1093	FILE *vfstab;
1094
1095	vfstab = fopen(VFSTAB, "r");
1096	if (vfstab == NULL) {
1097		(void) printf("can't open %s\n", VFSTAB);
1098		return (1);
1099	}
1100	(void) memset((void *)&vfskey, 0, sizeof (vfskey));
1101	vfsnull(&vfskey);
1102	vfskey.vfs_special = unrawname(name);
1103	vfskey.vfs_fstype = MNTTYPE_UFS;
1104	if ((getvfsany(vfstab, &vfsbuf, &vfskey) == 0) &&
1105	    (hasvfsopt(&vfsbuf, MNTOPT_RO))) {
1106		rw = 0;
1107	}
1108	(void) fclose(vfstab);
1109	return (rw);
1110}
1111
1112/*
1113 * debugclean
1114 */
1115static void
1116debugclean(void)
1117{
1118	if (!debug)
1119		return;
1120
1121	if ((iscorrupt == 0) && (isdirty == 0))
1122		return;
1123
1124	if ((sblock.fs_clean == FSSTABLE) || (sblock.fs_clean == FSCLEAN) ||
1125	    (sblock.fs_clean == FSLOG && islog && islogok) ||
1126	    ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && !errorlocked))
1127		return;
1128
1129	(void) printf("WARNING: inconsistencies detected on %s filesystem %s\n",
1130	    sblock.fs_clean == FSSTABLE ? "stable" :
1131	    sblock.fs_clean == FSLOG ? "logging" :
1132	    sblock.fs_clean == FSFIX ? "being fixed" : "clean",
1133	    devname);
1134}
1135
1136/*
1137 * updateclean
1138 *	Carefully and transparently update the clean flag.
1139 *
1140 * `iscorrupt' has to be in its final state before this is called.
1141 */
1142int
1143updateclean(void)
1144{
1145	int freedlog = 0;
1146	struct bufarea cleanbuf;
1147	size_t size;
1148	ssize_t io_res;
1149	diskaddr_t bno;
1150	char fsclean;
1151	int fsreclaim;
1152	char fsflags;
1153	int flags_ok = 1;
1154	daddr32_t fslogbno;
1155	offset_t sblkoff;
1156	time_t t;
1157
1158	/*
1159	 * debug stuff
1160	 */
1161	debugclean();
1162
1163	/*
1164	 * set fsclean to its appropriate value
1165	 */
1166	fslogbno = sblock.fs_logbno;
1167	fsclean = sblock.fs_clean;
1168	fsreclaim = sblock.fs_reclaim;
1169	fsflags = sblock.fs_flags;
1170	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked) {
1171		fsclean = FSACTIVE;
1172	}
1173	/*
1174	 * If ufs log is not okay, note that we need to clear it.
1175	 */
1176	examinelog(NULL);
1177	if (fslogbno && !(islog && islogok)) {
1178		fsclean = FSACTIVE;
1179		fslogbno = 0;
1180	}
1181
1182	/*
1183	 * if necessary, update fs_clean and fs_state
1184	 */
1185	switch (fsclean) {
1186
1187	case FSACTIVE:
1188		if (!iscorrupt) {
1189			fsclean = FSSTABLE;
1190			fsreclaim = 0;
1191		}
1192		break;
1193
1194	case FSCLEAN:
1195	case FSSTABLE:
1196		if (iscorrupt) {
1197			fsclean = FSACTIVE;
1198		} else {
1199			fsreclaim = 0;
1200		}
1201		break;
1202
1203	case FSLOG:
1204		if (iscorrupt) {
1205			fsclean = FSACTIVE;
1206		} else if (!islog || fslogbno == 0) {
1207			fsclean = FSSTABLE;
1208			fsreclaim = 0;
1209		} else if (fflag) {
1210			fsreclaim = 0;
1211		}
1212		break;
1213
1214	case FSFIX:
1215		fsclean = FSBAD;
1216		if (errorlocked && !iscorrupt) {
1217			fsclean = islog ? FSLOG : FSCLEAN;
1218		}
1219		break;
1220
1221	default:
1222		if (iscorrupt) {
1223			fsclean = FSACTIVE;
1224		} else {
1225			fsclean = FSSTABLE;
1226			fsreclaim = 0;
1227		}
1228	}
1229
1230	if (largefile_count > 0)
1231		fsflags |= FSLARGEFILES;
1232	else
1233		fsflags &= ~FSLARGEFILES;
1234
1235	/*
1236	 * There can be two discrepencies here.  A) The superblock
1237	 * shows no largefiles but we found some while scanning.
1238	 * B) The superblock indicates the presence of largefiles,
1239	 * but none are present.  Note that if preening, the superblock
1240	 * is silently corrected.
1241	 */
1242	if ((fsflags == FSLARGEFILES && sblock.fs_flags != FSLARGEFILES) ||
1243	    (fsflags != FSLARGEFILES && sblock.fs_flags == FSLARGEFILES))
1244		flags_ok = 0;
1245
1246	if (debug)
1247		(void) printf(
1248		    "** largefile count=%d, fs.fs_flags=%x, flags_ok %d\n",
1249		    largefile_count, sblock.fs_flags, flags_ok);
1250
1251	/*
1252	 * If fs is unchanged, do nothing.
1253	 */
1254	if ((!isdirty) && (flags_ok) &&
1255	    (fslogbno == sblock.fs_logbno) &&
1256	    (sblock.fs_clean == fsclean) &&
1257	    (sblock.fs_reclaim == fsreclaim) &&
1258	    (FSOKAY == (sblock.fs_state + sblock.fs_time))) {
1259		if (errorlocked) {
1260			if (!do_errorlock(LOCKFS_ULOCK))
1261				pwarn(
1262		    "updateclean(unchanged): unlock(LOCKFS_ULOCK) failed\n");
1263		}
1264		return (freedlog);
1265	}
1266
1267	/*
1268	 * if user allows, update superblock state
1269	 */
1270	if (debug) {
1271		(void) printf(
1272	    "superblock: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1273		    sblock.fs_flags, sblock.fs_logbno,
1274		    sblock.fs_clean, sblock.fs_reclaim,
1275		    sblock.fs_state + sblock.fs_time);
1276		(void) printf(
1277	    "calculated: flags 0x%x logbno %d clean %d reclaim %d state 0x%x\n",
1278		    fsflags, fslogbno, fsclean, fsreclaim, FSOKAY);
1279	}
1280	if (!isdirty && !preen && !rerun &&
1281	    (reply("FILE SYSTEM STATE IN SUPERBLOCK IS WRONG; FIX") == 0))
1282		return (freedlog);
1283
1284	(void) time(&t);
1285	sblock.fs_time = (time32_t)t;
1286	if (debug)
1287		printclean();
1288
1289	if (sblock.fs_logbno != fslogbno) {
1290		examinelog(&freelogblk);
1291		freedlog++;
1292	}
1293
1294	sblock.fs_logbno = fslogbno;
1295	sblock.fs_clean = fsclean;
1296	sblock.fs_state = FSOKAY - (long)sblock.fs_time;
1297	sblock.fs_reclaim = fsreclaim;
1298	sblock.fs_flags = fsflags;
1299
1300	/*
1301	 * if superblock can't be written, return
1302	 */
1303	if (fswritefd < 0)
1304		return (freedlog);
1305
1306	/*
1307	 * Read private copy of superblock, update clean flag, and write it.
1308	 */
1309	bno  = sblk.b_bno;
1310	size = sblk.b_size;
1311
1312	sblkoff = ldbtob(bno);
1313
1314	if ((cleanbuf.b_un.b_buf = malloc(size)) == NULL)
1315		errexit("out of memory");
1316	if (llseek(fsreadfd, sblkoff, SEEK_SET) == -1) {
1317		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1318		    (longlong_t)bno, strerror(errno));
1319		goto out;
1320	}
1321
1322	if ((io_res = read(fsreadfd, cleanbuf.b_un.b_buf, size)) != size) {
1323		report_io_prob("READ FROM", bno, size, io_res);
1324		goto out;
1325	}
1326
1327	cleanbuf.b_un.b_fs->fs_logbno  = sblock.fs_logbno;
1328	cleanbuf.b_un.b_fs->fs_clean   = sblock.fs_clean;
1329	cleanbuf.b_un.b_fs->fs_state   = sblock.fs_state;
1330	cleanbuf.b_un.b_fs->fs_time    = sblock.fs_time;
1331	cleanbuf.b_un.b_fs->fs_reclaim = sblock.fs_reclaim;
1332	cleanbuf.b_un.b_fs->fs_flags   = sblock.fs_flags;
1333
1334	if (llseek(fswritefd, sblkoff, SEEK_SET) == -1) {
1335		(void) printf("COULD NOT SEEK TO SUPERBLOCK AT %lld: %s\n",
1336		    (longlong_t)bno, strerror(errno));
1337		goto out;
1338	}
1339
1340	if ((io_res = write(fswritefd, cleanbuf.b_un.b_buf, size)) != size) {
1341		report_io_prob("WRITE TO", bno, size, io_res);
1342		goto out;
1343	}
1344
1345	/*
1346	 * 1208040
1347	 * If we had to use -b to grab an alternate superblock, then we
1348	 * likely had to do so because of unacceptable differences between
1349	 * the main and alternate superblocks.  So, we had better update
1350	 * the alternate superblock as well, or we'll just fail again
1351	 * the next time we attempt to run fsck!
1352	 */
1353	if (bflag != 0) {
1354		write_altsb(fswritefd);
1355	}
1356
1357	if (errorlocked) {
1358		if (!do_errorlock(LOCKFS_ULOCK))
1359			pwarn(
1360		    "updateclean(changed): unlock(LOCKFS_ULOCK) failed\n");
1361	}
1362
1363out:
1364	if (cleanbuf.b_un.b_buf != NULL) {
1365		free((void *)cleanbuf.b_un.b_buf);
1366	}
1367
1368	return (freedlog);
1369}
1370
1371static void
1372report_io_prob(caddr_t what, diskaddr_t bno, size_t expected, ssize_t failure)
1373{
1374	if (failure < 0)
1375		(void) printf("COULD NOT %s SUPERBLOCK AT %d: %s\n",
1376		    what, (int)bno, strerror(errno));
1377	else if (failure == 0)
1378		(void) printf("COULD NOT %s SUPERBLOCK AT %d: EOF\n",
1379		    what, (int)bno);
1380	else
1381		(void) printf("SHORT %s SUPERBLOCK AT %d: %u out of %u bytes\n",
1382		    what, (int)bno, (unsigned)failure, (unsigned)expected);
1383}
1384
1385/*
1386 * print out clean info
1387 */
1388void
1389printclean(void)
1390{
1391	caddr_t s;
1392
1393	if (FSOKAY != (sblock.fs_state + sblock.fs_time) && !errorlocked)
1394		s = "unknown";
1395	else
1396		switch (sblock.fs_clean) {
1397
1398		case FSACTIVE:
1399			s = "active";
1400			break;
1401
1402		case FSCLEAN:
1403			s = "clean";
1404			break;
1405
1406		case FSSTABLE:
1407			s = "stable";
1408			break;
1409
1410		case FSLOG:
1411			s = "logging";
1412			break;
1413
1414		case FSBAD:
1415			s = "is bad";
1416			break;
1417
1418		case FSFIX:
1419			s = "being fixed";
1420			break;
1421
1422		default:
1423			s = "unknown";
1424		}
1425
1426	if (preen)
1427		pwarn("is %s.\n", s);
1428	else
1429		(void) printf("** %s is %s.\n", devname, s);
1430}
1431
1432int
1433is_errorlocked(caddr_t fs)
1434{
1435	int		retval;
1436	struct stat64	statb;
1437	caddr_t		mountp;
1438	struct mnttab	*mntent;
1439
1440	retval = 0;
1441
1442	if (!fs)
1443		return (0);
1444
1445	if (stat64(fs, &statb) < 0)
1446		return (0);
1447
1448	if (S_ISDIR(statb.st_mode)) {
1449		mountp = fs;
1450	} else if (S_ISBLK(statb.st_mode) || S_ISCHR(statb.st_mode)) {
1451		mntent = search_mnttab(NULL, fs, NULL, 0);
1452		if (mntent == NULL)
1453			return (0);
1454		mountp = mntent->mnt_mountp;
1455		if (mountp == NULL) /* theoretically a can't-happen */
1456			return (0);
1457	} else {
1458		return (0);
1459	}
1460
1461	/*
1462	 * From here on, must `goto out' to avoid memory leakage.
1463	 */
1464
1465	if (elock_combuf == NULL)
1466		elock_combuf =
1467		    (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char));
1468	else
1469		elock_combuf =
1470		    (caddr_t)realloc(elock_combuf, LOCKFS_MAXCOMMENTLEN);
1471
1472	if (elock_combuf == NULL)
1473		goto out;
1474
1475	(void) memset((void *)elock_combuf, 0, LOCKFS_MAXCOMMENTLEN);
1476
1477	if (elock_mountp != NULL) {
1478		free(elock_mountp);
1479	}
1480
1481	elock_mountp = strdup(mountp);
1482	if (elock_mountp == NULL)
1483		goto out;
1484
1485	if (mountfd < 0) {
1486		if ((mountfd = open64(mountp, O_RDONLY)) == -1)
1487			goto out;
1488	}
1489
1490	if (lfp == NULL) {
1491		lfp = (struct lockfs *)malloc(sizeof (struct lockfs));
1492		if (lfp == NULL)
1493			goto out;
1494		(void) memset((void *)lfp, 0, sizeof (struct lockfs));
1495	}
1496
1497	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1498	lfp->lf_comment = elock_combuf;
1499
1500	if (ioctl(mountfd, _FIOLFSS, lfp) == -1)
1501		goto out;
1502
1503	/*
1504	 * lint believes that the ioctl() (or any other function
1505	 * taking lfp as an arg) could free lfp.  This is not the
1506	 * case, however.
1507	 */
1508	retval = LOCKFS_IS_ELOCK(lfp);
1509
1510out:
1511	return (retval);
1512}
1513
1514/*
1515 * Given a name which is known to be a directory, see if it appears
1516 * in the vfstab.  If so, return the entry's block (special) device
1517 * field via devstr.
1518 */
1519int
1520check_vfstab(caddr_t name, caddr_t devstr, size_t str_size)
1521{
1522	return (NULL != search_vfstab(name, NULL, devstr, str_size));
1523}
1524
1525/*
1526 * Given a name which is known to be a directory, see if it appears
1527 * in the mnttab.  If so, return the entry's block (special) device
1528 * field via devstr.
1529 */
1530int
1531check_mnttab(caddr_t name, caddr_t devstr, size_t str_size)
1532{
1533	return (NULL != search_mnttab(name, NULL, devstr, str_size));
1534}
1535
1536/*
1537 * Search for mount point and/or special device in the given file.
1538 * The first matching entry is returned.
1539 *
1540 * If an entry is found and str_size is greater than zero, then
1541 * up to size_str bytes of the special device name from the entry
1542 * are copied to devstr.
1543 */
1544
1545#define	SEARCH_TAB_BODY(st_type, st_file, st_mount, st_special, \
1546			st_nuller, st_init, st_searcher) \
1547	{ \
1548		FILE *fp; \
1549		struct st_type *retval = NULL; \
1550		struct st_type key; \
1551		static struct st_type buffer; \
1552		\
1553		/* LINTED ``assigned value never used'' */ \
1554		st_nuller(&key); \
1555		key.st_mount = mountp; \
1556		key.st_special = special; \
1557		st_init; \
1558		\
1559		if ((fp = fopen(st_file, "r")) == NULL) \
1560			return (NULL); \
1561		\
1562		if (st_searcher(fp, &buffer, &key) == 0) { \
1563			retval = &buffer; \
1564			if (devstr != NULL && str_size > 0 && \
1565			    buffer.st_special != NULL) { \
1566				(void) strlcpy(devstr, buffer.st_special, \
1567				    str_size); \
1568			} \
1569		} \
1570		(void) fclose(fp); \
1571		return (retval); \
1572	}
1573
1574static struct vfstab *
1575search_vfstab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1576SEARCH_TAB_BODY(vfstab, VFSTAB, vfs_mountp, vfs_special, vfsnull,
1577		(retval = retval), getvfsany)
1578
1579static struct mnttab *
1580search_mnttab(caddr_t mountp, caddr_t special, caddr_t devstr, size_t str_size)
1581SEARCH_TAB_BODY(mnttab, MNTTAB, mnt_mountp, mnt_special, mntnull,
1582		(key.mnt_fstype = MNTTYPE_UFS), getmntany)
1583
1584int
1585do_errorlock(int lock_type)
1586{
1587	caddr_t	   buf;
1588	time_t	   now;
1589	struct tm *local;
1590	int	   rc;
1591
1592	if (elock_combuf == NULL)
1593		errexit("do_errorlock(%s, %d): unallocated elock_combuf\n",
1594		    elock_mountp ? elock_mountp : "<null>",
1595		    lock_type);
1596
1597	if ((buf = (caddr_t)calloc(LOCKFS_MAXCOMMENTLEN, sizeof (char))) ==
1598	    NULL) {
1599		errexit("Couldn't alloc memory for temp. lock status buffer\n");
1600	}
1601	if (lfp == NULL) {
1602		errexit("do_errorlock(%s, %d): lockfs status unallocated\n",
1603		    elock_mountp, lock_type);
1604	}
1605
1606	(void) memmove((void *)buf, (void *)elock_combuf,
1607	    LOCKFS_MAXCOMMENTLEN-1);
1608
1609	switch (lock_type) {
1610	case LOCKFS_ELOCK:
1611		/*
1612		 * Note that if it is error-locked, we won't get an
1613		 * error back if we try to error-lock it again.
1614		 */
1615		if (time(&now) != (time_t)-1) {
1616			if ((local = localtime(&now)) != NULL)
1617				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1618		    "%s [pid:%d fsck start:%02d/%02d/%02d %02d:%02d:%02d",
1619				    elock_combuf, (int)pid,
1620				    local->tm_mon + 1, local->tm_mday,
1621				    (local->tm_year % 100), local->tm_hour,
1622				    local->tm_min, local->tm_sec);
1623			else
1624				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1625				    "%s [fsck pid %d", elock_combuf, pid);
1626
1627		} else {
1628			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1629			    "%s [fsck pid %d", elock_combuf, pid);
1630		}
1631		break;
1632
1633	case LOCKFS_ULOCK:
1634		if (time(&now) != (time_t)-1) {
1635			if ((local = localtime(&now)) != NULL) {
1636				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1637				    "%s, done:%02d/%02d/%02d %02d:%02d:%02d]",
1638				    elock_combuf,
1639				    local->tm_mon + 1, local->tm_mday,
1640				    (local->tm_year % 100), local->tm_hour,
1641				    local->tm_min, local->tm_sec);
1642			} else {
1643				(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1644				    "%s]", elock_combuf);
1645			}
1646		} else {
1647			(void) snprintf(buf, LOCKFS_MAXCOMMENTLEN,
1648			    "%s]", elock_combuf);
1649		}
1650		if ((rc = ioctl(mountfd, _FIOLFSS, lfp)) == -1) {
1651			pwarn("do_errorlock: unlock failed: %s\n",
1652			    strerror(errno));
1653			goto out;
1654		}
1655		break;
1656
1657	default:
1658		break;
1659	}
1660
1661	(void) memmove((void *)elock_combuf, (void *)buf,
1662	    LOCKFS_MAXCOMMENTLEN - 1);
1663
1664	lfp->lf_lock = lock_type;
1665	lfp->lf_comlen = LOCKFS_MAXCOMMENTLEN;
1666	lfp->lf_comment = elock_combuf;
1667	lfp->lf_flags = 0;
1668	errno = 0;
1669
1670	if ((rc = ioctl(mountfd, _FIOLFS, lfp)) == -1) {
1671		if (errno == EINVAL) {
1672			pwarn("Another fsck active?\n");
1673			iscorrupt = 0;	/* don't go away mad, just go away */
1674		} else {
1675			pwarn("do_errorlock(lock_type:%d, %s) failed: %s\n",
1676			    lock_type, elock_combuf, strerror(errno));
1677		}
1678	}
1679out:
1680	if (buf != NULL) {
1681		free((void *)buf);
1682	}
1683
1684	return (rc != -1);
1685}
1686
1687/*
1688 * Shadow inode support.  To register a shadow with a client is to note
1689 * that an inode (the client) refers to the shadow.
1690 */
1691
1692static struct shadowclients *
1693newshadowclient(struct shadowclients *prev)
1694{
1695	struct shadowclients *rc;
1696
1697	rc = (struct shadowclients *)malloc(sizeof (*rc));
1698	if (rc == NULL)
1699		errexit("newshadowclient: cannot malloc shadow client");
1700	rc->next = prev;
1701	rc->nclients = 0;
1702
1703	rc->client = (fsck_ino_t *)malloc(sizeof (fsck_ino_t) *
1704	    maxshadowclients);
1705	if (rc->client == NULL)
1706		errexit("newshadowclient: cannot malloc client array");
1707	return (rc);
1708}
1709
1710void
1711registershadowclient(fsck_ino_t shadow, fsck_ino_t client,
1712	struct shadowclientinfo **info)
1713{
1714	struct shadowclientinfo *sci;
1715	struct shadowclients *scc;
1716
1717	/*
1718	 * Already have a record for this shadow?
1719	 */
1720	for (sci = *info; sci != NULL; sci = sci->next)
1721		if (sci->shadow == shadow)
1722			break;
1723	if (sci == NULL) {
1724		/*
1725		 * It's a new shadow, add it to the list
1726		 */
1727		sci = (struct shadowclientinfo *)malloc(sizeof (*sci));
1728		if (sci == NULL)
1729			errexit("registershadowclient: cannot malloc");
1730		sci->next = *info;
1731		*info = sci;
1732		sci->shadow = shadow;
1733		sci->totalClients = 0;
1734		sci->clients = newshadowclient(NULL);
1735	}
1736
1737	sci->totalClients++;
1738	scc = sci->clients;
1739	if (scc->nclients >= maxshadowclients) {
1740		scc = newshadowclient(sci->clients);
1741		sci->clients = scc;
1742	}
1743
1744	scc->client[scc->nclients++] = client;
1745}
1746
1747/*
1748 * Locate and discard a shadow.
1749 */
1750void
1751clearshadow(fsck_ino_t shadow, struct shadowclientinfo **info)
1752{
1753	struct shadowclientinfo *sci, *prev;
1754
1755	/*
1756	 * Do we have a record for this shadow?
1757	 */
1758	prev = NULL;
1759	for (sci = *info; sci != NULL; sci = sci->next) {
1760		if (sci->shadow == shadow)
1761			break;
1762		prev = sci;
1763	}
1764
1765	if (sci != NULL) {
1766		/*
1767		 * First, pull it off the list, since we know there
1768		 * shouldn't be any future references to this one.
1769		 */
1770		if (prev == NULL)
1771			*info = sci->next;
1772		else
1773			prev->next = sci->next;
1774		deshadow(sci, clearattrref);
1775	}
1776}
1777
1778/*
1779 * Discard all memory used to track clients of a shadow.
1780 */
1781void
1782deshadow(struct shadowclientinfo *sci, void (*cb)(fsck_ino_t))
1783{
1784	struct shadowclients *clients, *discard;
1785	int idx;
1786
1787	clients = sci->clients;
1788	while (clients != NULL) {
1789		discard = clients;
1790		clients = clients->next;
1791		if (discard->client != NULL) {
1792			if (cb != NULL) {
1793				for (idx = 0; idx < discard->nclients; idx++)
1794					(*cb)(discard->client[idx]);
1795			}
1796			free((void *)discard->client);
1797		}
1798		free((void *)discard);
1799	}
1800
1801	free((void *)sci);
1802}
1803
1804/*
1805 * Allocate more buffer as need arises but allocate one at a time.
1806 * This is done to make sure that fsck does not exit with error if it
1807 * needs more buffer to complete its task.
1808 */
1809static struct bufarea *
1810alloc_bufarea(void)
1811{
1812	struct bufarea *newbp;
1813	caddr_t bufp;
1814
1815	bufp = malloc((unsigned int)sblock.fs_bsize);
1816	if (bufp == NULL)
1817		return (NULL);
1818
1819	newbp = (struct bufarea *)malloc(sizeof (struct bufarea));
1820	if (newbp == NULL) {
1821		free((void *)bufp);
1822		return (NULL);
1823	}
1824
1825	initbarea(newbp);
1826	newbp->b_un.b_buf = bufp;
1827	newbp->b_prev = &bufhead;
1828	newbp->b_next = bufhead.b_next;
1829	bufhead.b_next->b_prev = newbp;
1830	bufhead.b_next = newbp;
1831	bufhead.b_size++;
1832	return (newbp);
1833}
1834
1835/*
1836 * We length-limit in both unrawname() and rawname() to avoid
1837 * overflowing our arrays or those of our naive, trusting callers.
1838 */
1839
1840caddr_t
1841unrawname(caddr_t name)
1842{
1843	caddr_t dp;
1844	static char fullname[MAXPATHLEN + 1];
1845
1846	if ((dp = getfullblkname(name)) == NULL)
1847		return ("");
1848
1849	(void) strlcpy(fullname, dp, sizeof (fullname));
1850	/*
1851	 * Not reporting under debug, as the allocation isn't
1852	 * reported by getfullblkname.  The idea is that we
1853	 * produce balanced alloc/free instances.
1854	 */
1855	free(dp);
1856
1857	return (fullname);
1858}
1859
1860caddr_t
1861rawname(caddr_t name)
1862{
1863	caddr_t dp;
1864	static char fullname[MAXPATHLEN + 1];
1865
1866	if ((dp = getfullrawname(name)) == NULL)
1867		return ("");
1868
1869	(void) strlcpy(fullname, dp, sizeof (fullname));
1870	/*
1871	 * Not reporting under debug, as the allocation isn't
1872	 * reported by getfullblkname.  The idea is that we
1873	 * produce balanced alloc/free instances.
1874	 */
1875	free(dp);
1876
1877	return (fullname);
1878}
1879
1880/*
1881 * Make sure that a cg header looks at least moderately reasonable.
1882 * We want to be able to trust the contents enough to be able to use
1883 * the standard accessor macros.  So, besides looking at the obvious
1884 * such as the magic number, we verify that the offset field values
1885 * are properly aligned and not too big or small.
1886 *
1887 * Returns a NULL pointer if the cg is sane enough for our needs, else
1888 * a dynamically-allocated string describing all of its faults.
1889 */
1890#define	Append_Error(full, full_len, addition, addition_len) \
1891	if (full == NULL) { \
1892		full = addition; \
1893		full_len = addition_len; \
1894	} else { \
1895		/* lint doesn't think realloc() understands NULLs */ \
1896		full = realloc(full, full_len + addition_len + 1); \
1897		if (full == NULL) { \
1898			errexit("Out of memory in cg_sanity"); \
1899			/* NOTREACHED */ \
1900		} \
1901		(void) strcpy(full + full_len, addition); \
1902		full_len += addition_len; \
1903		free(addition); \
1904	}
1905
1906caddr_t
1907cg_sanity(struct cg *cgp, int cgno)
1908{
1909	caddr_t full_err;
1910	caddr_t this_err = NULL;
1911	int full_len, this_len;
1912	daddr32_t ndblk;
1913	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
1914	daddr32_t exp_freeoff, exp_nextfreeoff;
1915
1916	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
1917	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
1918
1919	full_err = NULL;
1920	full_len = 0;
1921
1922	if (!cg_chkmagic(cgp)) {
1923		this_len = fsck_asprintf(&this_err,
1924		    "BAD CG MAGIC NUMBER (0x%x should be 0x%x)\n",
1925		    cgp->cg_magic, CG_MAGIC);
1926		Append_Error(full_err, full_len, this_err, this_len);
1927	}
1928
1929	if (cgp->cg_cgx != cgno) {
1930		this_len = fsck_asprintf(&this_err,
1931		    "WRONG CG NUMBER (%d should be %d)\n",
1932		    cgp->cg_cgx, cgno);
1933		Append_Error(full_err, full_len, this_err, this_len);
1934	}
1935
1936	if ((cgp->cg_btotoff & 3) != 0) {
1937		this_len = fsck_asprintf(&this_err,
1938		    "BLOCK TOTALS OFFSET %d NOT FOUR-BYTE ALIGNED\n",
1939		    cgp->cg_btotoff);
1940		Append_Error(full_err, full_len, this_err, this_len);
1941	}
1942
1943	if ((cgp->cg_boff & 1) != 0) {
1944		this_len = fsck_asprintf(&this_err,
1945	    "FREE BLOCK POSITIONS TABLE OFFSET %d NOT TWO-BYTE ALIGNED\n",
1946		    cgp->cg_boff);
1947		Append_Error(full_err, full_len, this_err, this_len);
1948	}
1949
1950	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
1951		if (cgp->cg_ncyl < 1) {
1952			this_len = fsck_asprintf(&this_err,
1953	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is less than 1)\n",
1954			    cgp->cg_ncyl);
1955		} else {
1956			this_len = fsck_asprintf(&this_err,
1957	    "IMPOSSIBLE NUMBER OF CYLINDERS IN GROUP (%d is greater than %d)\n",
1958			    cgp->cg_ncyl, sblock.fs_cpg);
1959		}
1960		Append_Error(full_err, full_len, this_err, this_len);
1961	}
1962
1963	if (cgp->cg_niblk != sblock.fs_ipg) {
1964		this_len = fsck_asprintf(&this_err,
1965		    "INCORRECT NUMBER OF INODES IN GROUP (%d should be %d)\n",
1966		    cgp->cg_niblk, sblock.fs_ipg);
1967		Append_Error(full_err, full_len, this_err, this_len);
1968	}
1969
1970	if (cgp->cg_ndblk != ndblk) {
1971		this_len = fsck_asprintf(&this_err,
1972	    "INCORRECT NUMBER OF DATA BLOCKS IN GROUP (%d should be %d)\n",
1973		    cgp->cg_ndblk, ndblk);
1974		Append_Error(full_err, full_len, this_err, this_len);
1975	}
1976
1977	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= ndblk)) {
1978		this_len = fsck_asprintf(&this_err,
1979		    "IMPOSSIBLE BLOCK ALLOCATION ROTOR POSITION "
1980		    "(%d should be at least 0 and less than %d)\n",
1981		    cgp->cg_rotor, ndblk);
1982		Append_Error(full_err, full_len, this_err, this_len);
1983	}
1984
1985	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= ndblk)) {
1986		this_len = fsck_asprintf(&this_err,
1987		    "IMPOSSIBLE FRAGMENT ALLOCATION ROTOR POSITION "
1988		    "(%d should be at least 0 and less than %d)\n",
1989		    cgp->cg_frotor, ndblk);
1990		Append_Error(full_err, full_len, this_err, this_len);
1991	}
1992
1993	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
1994		this_len = fsck_asprintf(&this_err,
1995		    "IMPOSSIBLE INODE ALLOCATION ROTOR POSITION "
1996		    "(%d should be at least 0 and less than %d)\n",
1997		    cgp->cg_irotor, sblock.fs_ipg);
1998		Append_Error(full_err, full_len, this_err, this_len);
1999	}
2000
2001	if (cgp->cg_btotoff != exp_btotoff) {
2002		this_len = fsck_asprintf(&this_err,
2003		    "INCORRECT BLOCK TOTALS OFFSET (%d should be %d)\n",
2004		    cgp->cg_btotoff, exp_btotoff);
2005		Append_Error(full_err, full_len, this_err, this_len);
2006	}
2007
2008	if (cgp->cg_boff != exp_boff) {
2009		this_len = fsck_asprintf(&this_err,
2010		    "BAD FREE BLOCK POSITIONS TABLE OFFSET (%d should %d)\n",
2011		    cgp->cg_boff, exp_boff);
2012		Append_Error(full_err, full_len, this_err, this_len);
2013	}
2014
2015	if (cgp->cg_iusedoff != exp_iusedoff) {
2016		this_len = fsck_asprintf(&this_err,
2017		    "INCORRECT USED INODE MAP OFFSET (%d should be %d)\n",
2018		    cgp->cg_iusedoff, exp_iusedoff);
2019		Append_Error(full_err, full_len, this_err, this_len);
2020	}
2021
2022	if (cgp->cg_freeoff != exp_freeoff) {
2023		this_len = fsck_asprintf(&this_err,
2024		    "INCORRECT FREE FRAGMENT MAP OFFSET (%d should be %d)\n",
2025		    cgp->cg_freeoff, exp_freeoff);
2026		Append_Error(full_err, full_len, this_err, this_len);
2027	}
2028
2029	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2030		this_len = fsck_asprintf(&this_err,
2031		    "END OF HEADER POSITION INCORRECT (%d should be %d)\n",
2032		    cgp->cg_nextfreeoff, exp_nextfreeoff);
2033		Append_Error(full_err, full_len, this_err, this_len);
2034	}
2035
2036	return (full_err);
2037}
2038
2039#undef	Append_Error
2040
2041/*
2042 * This is taken from mkfs, and is what is used to come up with the
2043 * original values for a struct cg.  This implies that, since these
2044 * are all constants, recalculating them now should give us the same
2045 * thing as what's on disk.
2046 */
2047static void
2048cg_constants(int cgno, daddr32_t *btotoff, daddr32_t *boff,
2049	daddr32_t *iusedoff, daddr32_t *freeoff, daddr32_t *nextfreeoff,
2050	daddr32_t *ndblk)
2051{
2052	daddr32_t cbase, dmax;
2053	struct cg *cgp;
2054
2055	(void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cgno),
2056	    (size_t)sblock.fs_cgsize);
2057	cgp = cgblk.b_un.b_cg;
2058
2059	cbase = cgbase(&sblock, cgno);
2060	dmax = cbase + sblock.fs_fpg;
2061	if (dmax > sblock.fs_size)
2062		dmax = sblock.fs_size;
2063
2064	/* LINTED pointer difference won't overflow */
2065	*btotoff = &cgp->cg_space[0] - (uchar_t *)(&cgp->cg_link);
2066	*boff = *btotoff + sblock.fs_cpg * sizeof (daddr32_t);
2067	*iusedoff = *boff + sblock.fs_cpg * sblock.fs_nrpos * sizeof (int16_t);
2068	*freeoff = *iusedoff + howmany(sblock.fs_ipg, NBBY);
2069	*nextfreeoff = *freeoff +
2070	    howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2071	*ndblk = dmax - cbase;
2072}
2073
2074/*
2075 * Corrects all fields in the cg that can be done with the available
2076 * redundant data.
2077 */
2078void
2079fix_cg(struct cg *cgp, int cgno)
2080{
2081	daddr32_t exp_btotoff, exp_boff, exp_iusedoff;
2082	daddr32_t exp_freeoff, exp_nextfreeoff;
2083	daddr32_t ndblk;
2084
2085	cg_constants(cgno, &exp_btotoff, &exp_boff, &exp_iusedoff,
2086	    &exp_freeoff, &exp_nextfreeoff, &ndblk);
2087
2088	if (cgp->cg_cgx != cgno) {
2089		cgp->cg_cgx = cgno;
2090	}
2091
2092	if ((cgp->cg_ncyl < 1) || (cgp->cg_ncyl > sblock.fs_cpg)) {
2093		if (cgno == (sblock.fs_ncg - 1)) {
2094			cgp->cg_ncyl = sblock.fs_ncyl -
2095			    (sblock.fs_cpg * cgno);
2096		} else {
2097			cgp->cg_ncyl = sblock.fs_cpg;
2098		}
2099	}
2100
2101	if (cgp->cg_niblk != sblock.fs_ipg) {
2102		/*
2103		 * This is not used by the kernel, so it's pretty
2104		 * harmless if it's wrong.
2105		 */
2106		cgp->cg_niblk = sblock.fs_ipg;
2107	}
2108
2109	if (cgp->cg_ndblk != ndblk) {
2110		cgp->cg_ndblk = ndblk;
2111	}
2112
2113	/*
2114	 * For the rotors, any position's valid, so pick the one we know
2115	 * will always exist.
2116	 */
2117	if ((cgp->cg_rotor < 0) || (cgp->cg_rotor >= cgp->cg_ndblk)) {
2118		cgp->cg_rotor = 0;
2119	}
2120
2121	if ((cgp->cg_frotor < 0) || (cgp->cg_frotor >= cgp->cg_ndblk)) {
2122		cgp->cg_frotor = 0;
2123	}
2124
2125	if ((cgp->cg_irotor < 0) || (cgp->cg_irotor >= sblock.fs_ipg)) {
2126		cgp->cg_irotor = 0;
2127	}
2128
2129	/*
2130	 * For btotoff and boff, if they're misaligned they won't
2131	 * match the expected values, so we're catching both cases
2132	 * here.  Of course, if any of these are off, it seems likely
2133	 * that the tables really won't be where we calculate they
2134	 * should be anyway.
2135	 */
2136	if (cgp->cg_btotoff != exp_btotoff) {
2137		cgp->cg_btotoff = exp_btotoff;
2138	}
2139
2140	if (cgp->cg_boff != exp_boff) {
2141		cgp->cg_boff = exp_boff;
2142	}
2143
2144	if (cgp->cg_iusedoff != exp_iusedoff) {
2145		cgp->cg_iusedoff = exp_iusedoff;
2146	}
2147
2148	if (cgp->cg_freeoff != exp_freeoff) {
2149		cgp->cg_freeoff = exp_freeoff;
2150	}
2151
2152	if (cgp->cg_nextfreeoff != exp_nextfreeoff) {
2153		cgp->cg_nextfreeoff = exp_nextfreeoff;
2154	}
2155
2156	/*
2157	 * Reset the magic, as we've recreated this cg, also
2158	 * update the cg_time, as we're writing out the cg
2159	 */
2160	cgp->cg_magic = CG_MAGIC;
2161	cgp->cg_time = time(NULL);
2162
2163	/*
2164	 * We know there was at least one correctable problem,
2165	 * or else we wouldn't have been called.  So instead of
2166	 * marking the buffer dirty N times above, just do it
2167	 * once here.
2168	 */
2169	cgdirty();
2170}
2171
2172void
2173examinelog(void (*cb)(daddr32_t))
2174{
2175	struct bufarea *bp;
2176	extent_block_t *ebp;
2177	extent_t *ep;
2178	daddr32_t nfno, fno;
2179	int i;
2180	int j;
2181
2182	/*
2183	 * Since ufs stores fs_logbno as blocks and MTBufs stores it as frags
2184	 * we need to translate accordingly using logbtodb()
2185	 */
2186
2187	if (logbtodb(&sblock, sblock.fs_logbno) < SBLOCK) {
2188		if (debug) {
2189			(void) printf("fs_logbno < SBLOCK: %ld < %ld\n" \
2190			    "Aborting log examination\n", \
2191			    logbtodb(&sblock, sblock.fs_logbno), SBLOCK);
2192		}
2193		return;
2194	}
2195
2196	/*
2197	 * Read errors will return zeros, which will cause us
2198	 * to do nothing harmful, so don't need to handle it.
2199	 */
2200	bp = getdatablk(logbtofrag(&sblock, sblock.fs_logbno),
2201	    (size_t)sblock.fs_bsize);
2202	ebp = (void *)bp->b_un.b_buf;
2203
2204	/*
2205	 * Does it look like a log allocation table?
2206	 */
2207	/* LINTED pointer cast is aligned */
2208	if (!log_checksum(&ebp->chksum, (int32_t *)bp->b_un.b_buf,
2209	    sblock.fs_bsize))
2210		return;
2211	if (ebp->type != LUFS_EXTENTS || ebp->nextents == 0)
2212		return;
2213
2214	ep = &ebp->extents[0];
2215	for (i = 0; i < ebp->nextents; ++i, ++ep) {
2216		fno = logbtofrag(&sblock, ep->pbno);
2217		nfno = dbtofsb(&sblock, ep->nbno);
2218		for (j = 0; j < nfno; ++j, ++fno) {
2219			/*
2220			 * Invoke the callback first, so that pass1 can
2221			 * mark the log blocks in-use.  Then, if any
2222			 * subsequent pass over the log shows us that a
2223			 * block got freed (say, it was also claimed by
2224			 * an inode that we cleared), we can safely declare
2225			 * the log bad.
2226			 */
2227			if (cb != NULL)
2228				(*cb)(fno);
2229			if (!testbmap(fno))
2230				islogok = 0;
2231		}
2232	}
2233	brelse(bp);
2234
2235	if (cb != NULL) {
2236		fno = logbtofrag(&sblock, sblock.fs_logbno);
2237		for (j = 0; j < sblock.fs_frag; ++j, ++fno)
2238			(*cb)(fno);
2239	}
2240}
2241
2242static void
2243freelogblk(daddr32_t frag)
2244{
2245	freeblk(sblock.fs_logbno, frag, 1);
2246}
2247
2248caddr_t
2249file_id(fsck_ino_t inum, mode_t mode)
2250{
2251	static char name[MAXPATHLEN + 1];
2252
2253	if (lfdir == inum) {
2254		return (lfname);
2255	}
2256
2257	if ((mode & IFMT) == IFDIR) {
2258		(void) strcpy(name, "DIR");
2259	} else if ((mode & IFMT) == IFATTRDIR) {
2260		(void) strcpy(name, "ATTR DIR");
2261	} else if ((mode & IFMT) == IFSHAD) {
2262		(void) strcpy(name, "ACL");
2263	} else {
2264		(void) strcpy(name, "FILE");
2265	}
2266
2267	return (name);
2268}
2269
2270/*
2271 * Simple initializer for inodesc structures, so users of only a few
2272 * fields don't have to worry about getting the right defaults for
2273 * everything out.
2274 */
2275void
2276init_inodesc(struct inodesc *idesc)
2277{
2278	/*
2279	 * Most fields should be zero, just hit the special cases.
2280	 */
2281	(void) memset((void *)idesc, 0, sizeof (struct inodesc));
2282	idesc->id_fix = DONTKNOW;
2283	idesc->id_lbn = -1;
2284	idesc->id_truncto = -1;
2285	idesc->id_firsthole = -1;
2286}
2287
2288/*
2289 * Compare routine for tsearch(C) to use on ino_t instances.
2290 */
2291int
2292ino_t_cmp(const void *left, const void *right)
2293{
2294	const fsck_ino_t lino = (const fsck_ino_t)left;
2295	const fsck_ino_t rino = (const fsck_ino_t)right;
2296
2297	return (lino - rino);
2298}
2299
2300int
2301cgisdirty(void)
2302{
2303	return (cgblk.b_dirty);
2304}
2305
2306void
2307cgflush(void)
2308{
2309	flush(fswritefd, &cgblk);
2310}
2311
2312void
2313dirty(struct bufarea *bp)
2314{
2315	if (fswritefd < 0) {
2316		/*
2317		 * No one should call dirty() in read only mode.
2318		 * But if one does, it's not fatal issue. Just warn them.
2319		 */
2320		pwarn("WON'T SET DIRTY FLAG IN READ_ONLY MODE\n");
2321	} else {
2322		(bp)->b_dirty = 1;
2323		isdirty = 1;
2324	}
2325}
2326
2327void
2328initbarea(struct bufarea *bp)
2329{
2330	(bp)->b_dirty = 0;
2331	(bp)->b_bno = (diskaddr_t)-1LL;
2332	(bp)->b_flags = 0;
2333	(bp)->b_cnt = 0;
2334	(bp)->b_errs = 0;
2335}
2336
2337/*
2338 * Partition-sizing routines adapted from ../newfs/newfs.c.
2339 * Needed because calcsb() needs to use mkfs to work out what the
2340 * superblock should be, and mkfs insists on being told how many
2341 * sectors to use.
2342 *
2343 * Error handling assumes we're never called while preening.
2344 *
2345 * XXX This should be extracted into a ../ufslib.{c,h},
2346 *     in the same spirit to ../../fslib.{c,h}.  Once that is
2347 *     done, both fsck and newfs should be modified to link
2348 *     against it.
2349 */
2350
2351static int label_type;
2352
2353#define	LABEL_TYPE_VTOC		1
2354#define	LABEL_TYPE_EFI		2
2355#define	LABEL_TYPE_OTHER	3
2356
2357#define	MB			(1024 * 1024)
2358#define	SECTORS_PER_TERABYTE	(1LL << 31)
2359#define	FS_SIZE_UPPER_LIMIT	0x100000000000LL
2360
2361diskaddr_t
2362getdisksize(caddr_t disk, int fd)
2363{
2364	int rpm;
2365	struct dk_geom g;
2366	struct dk_cinfo ci;
2367	diskaddr_t actual_size;
2368
2369	/*
2370	 * get_device_size() determines the actual size of the
2371	 * device, and also the disk's attributes, such as geometry.
2372	 */
2373	actual_size = get_device_size(fd, disk);
2374
2375	if (label_type == LABEL_TYPE_VTOC) {
2376		if (ioctl(fd, DKIOCGGEOM, &g)) {
2377			pwarn("%s: Unable to read Disk geometry", disk);
2378			return (0);
2379		}
2380		if (sblock.fs_nsect == 0)
2381			sblock.fs_nsect = g.dkg_nsect;
2382		if (sblock.fs_ntrak == 0)
2383			sblock.fs_ntrak = g.dkg_nhead;
2384		if (sblock.fs_rps == 0) {
2385			rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm;
2386			sblock.fs_rps = rpm / 60;
2387		}
2388	}
2389
2390	if (sblock.fs_bsize == 0)
2391		sblock.fs_bsize = MAXBSIZE;
2392
2393	/*
2394	 * Adjust maxcontig by the device's maxtransfer. If maxtransfer
2395	 * information is not available, default to the min of a MB and
2396	 * maxphys.
2397	 */
2398	if (sblock.fs_maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) {
2399		sblock.fs_maxcontig = ci.dki_maxtransfer * DEV_BSIZE;
2400		if (sblock.fs_maxcontig < 0) {
2401			int gotit, maxphys;
2402
2403			gotit = fsgetmaxphys(&maxphys, NULL);
2404
2405			/*
2406			 * If we cannot get the maxphys value, default
2407			 * to ufs_maxmaxphys (MB).
2408			 */
2409			if (gotit) {
2410				sblock.fs_maxcontig = MIN(maxphys, MB);
2411			} else {
2412				sblock.fs_maxcontig = MB;
2413			}
2414		}
2415		sblock.fs_maxcontig /= sblock.fs_bsize;
2416	}
2417
2418	return (actual_size);
2419}
2420
2421/*
2422 * Figure out how big the partition we're dealing with is.
2423 */
2424static diskaddr_t
2425get_device_size(int fd, caddr_t name)
2426{
2427	struct extvtoc vtoc;
2428	struct dk_gpt *efi_vtoc;
2429	diskaddr_t slicesize = 0;
2430
2431	int index = read_extvtoc(fd, &vtoc);
2432
2433	if (index >= 0) {
2434		label_type = LABEL_TYPE_VTOC;
2435	} else {
2436		if (index == VT_ENOTSUP || index == VT_ERROR) {
2437			/* it might be an EFI label */
2438			index = efi_alloc_and_read(fd, &efi_vtoc);
2439			if (index >= 0)
2440				label_type = LABEL_TYPE_EFI;
2441		}
2442	}
2443
2444	if (index < 0) {
2445		/*
2446		 * Since both attempts to read the label failed, we're
2447		 * going to fall back to a brute force approach to
2448		 * determining the device's size:  see how far out we can
2449		 * perform reads on the device.
2450		 */
2451
2452		slicesize = brute_force_get_device_size(fd);
2453		if (slicesize == 0) {
2454			switch (index) {
2455			case VT_ERROR:
2456				pwarn("%s: %s\n", name, strerror(errno));
2457				break;
2458			case VT_EIO:
2459				pwarn("%s: I/O error accessing VTOC", name);
2460				break;
2461			case VT_EINVAL:
2462				pwarn("%s: Invalid field in VTOC", name);
2463				break;
2464			default:
2465				pwarn("%s: unknown error %d accessing VTOC",
2466				    name, index);
2467				break;
2468			}
2469			return (0);
2470		} else {
2471			label_type = LABEL_TYPE_OTHER;
2472		}
2473	}
2474
2475	if (label_type == LABEL_TYPE_EFI) {
2476		slicesize = efi_vtoc->efi_parts[index].p_size;
2477		efi_free(efi_vtoc);
2478	} else if (label_type == LABEL_TYPE_VTOC) {
2479		slicesize = vtoc.v_part[index].p_size;
2480	}
2481
2482	return (slicesize);
2483}
2484
2485/*
2486 * brute_force_get_device_size
2487 *
2488 * Determine the size of the device by seeing how far we can
2489 * read.  Doing an llseek( , , SEEK_END) would probably work
2490 * in most cases, but we've seen at least one third-party driver
2491 * which doesn't correctly support the SEEK_END option when the
2492 * the device is greater than a terabyte.
2493 */
2494
2495static diskaddr_t
2496brute_force_get_device_size(int fd)
2497{
2498	diskaddr_t	min_fail = 0;
2499	diskaddr_t	max_succeed = 0;
2500	diskaddr_t	cur_db_off;
2501	char 		buf[DEV_BSIZE];
2502
2503	/*
2504	 * First, see if we can read the device at all, just to
2505	 * eliminate errors that have nothing to do with the
2506	 * device's size.
2507	 */
2508
2509	if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) ||
2510	    ((read(fd, buf, DEV_BSIZE)) == -1))
2511		return (0);  /* can't determine size */
2512
2513	/*
2514	 * Now, go sequentially through the multiples of 4TB
2515	 * to find the first read that fails (this isn't strictly
2516	 * the most efficient way to find the actual size if the
2517	 * size really could be anything between 0 and 2**64 bytes.
2518	 * We expect the sizes to be less than 16 TB for some time,
2519	 * so why do a bunch of reads that are larger than that?
2520	 * However, this algorithm *will* work for sizes of greater
2521	 * than 16 TB.  We're just not optimizing for those sizes.)
2522	 */
2523
2524	/*
2525	 * XXX lint uses 32-bit arithmetic for doing flow analysis.
2526	 * We're using > 32-bit constants here.  Therefore, its flow
2527	 * analysis is wrong.  For the time being, ignore complaints
2528	 * from it about the body of the for() being unreached.
2529	 */
2530	for (cur_db_off = SECTORS_PER_TERABYTE * 4;
2531	    (min_fail == 0) && (cur_db_off < FS_SIZE_UPPER_LIMIT);
2532	    cur_db_off += 4 * SECTORS_PER_TERABYTE) {
2533		if ((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2534		    SEEK_SET) == -1) ||
2535		    (read(fd, buf, DEV_BSIZE) != DEV_BSIZE))
2536			min_fail = cur_db_off;
2537		else
2538			max_succeed = cur_db_off;
2539	}
2540
2541	/*
2542	 * XXX Same lint flow analysis problem as above.
2543	 */
2544	if (min_fail == 0)
2545		return (0);
2546
2547	/*
2548	 * We now know that the size of the device is less than
2549	 * min_fail and greater than or equal to max_succeed.  Now
2550	 * keep splitting the difference until the actual size in
2551	 * sectors in known.  We also know that the difference
2552	 * between max_succeed and min_fail at this time is
2553	 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
2554	 * simplifies the math below.
2555	 */
2556
2557	while (min_fail - max_succeed > 1) {
2558		cur_db_off = max_succeed + (min_fail - max_succeed)/2;
2559		if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
2560		    SEEK_SET)) == -1) ||
2561		    ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
2562			min_fail = cur_db_off;
2563		else
2564			max_succeed = cur_db_off;
2565	}
2566
2567	/* the size is the last successfully read sector offset plus one */
2568	return (max_succeed + 1);
2569}
2570
2571static void
2572vfileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, va_list ap)
2573{
2574	struct dinode *dp;
2575	char pathbuf[MAXPATHLEN + 1];
2576
2577	vpwarn(fmt, ap);
2578	(void) putchar(' ');
2579	pinode(ino);
2580	(void) printf("\n");
2581	getpathname(pathbuf, cwd, ino);
2582	if (ino < UFSROOTINO || ino > maxino) {
2583		pfatal("NAME=%s\n", pathbuf);
2584		return;
2585	}
2586	dp = ginode(ino);
2587	if (ftypeok(dp))
2588		pfatal("%s=%s\n", file_id(ino, dp->di_mode), pathbuf);
2589	else
2590		pfatal("NAME=%s\n", pathbuf);
2591}
2592
2593void
2594direrror(fsck_ino_t ino, caddr_t fmt, ...)
2595{
2596	va_list ap;
2597
2598	va_start(ap, fmt);
2599	vfileerror(ino, ino, fmt, ap);
2600	va_end(ap);
2601}
2602
2603static void
2604vdirerror(fsck_ino_t ino, caddr_t fmt, va_list ap)
2605{
2606	vfileerror(ino, ino, fmt, ap);
2607}
2608
2609void
2610fileerror(fsck_ino_t cwd, fsck_ino_t ino, caddr_t fmt, ...)
2611{
2612	va_list ap;
2613
2614	va_start(ap, fmt);
2615	vfileerror(cwd, ino, fmt, ap);
2616	va_end(ap);
2617}
2618
2619/*
2620 * Adds the given inode to the orphaned-directories list, limbo_dirs.
2621 * Assumes that the caller has set INCLEAR in the inode's statemap[]
2622 * entry.
2623 *
2624 * With INCLEAR set, the inode will get ignored by passes 2 and 3,
2625 * meaning it's effectively an orphan.  It needs to be noted now, so
2626 * it will be remembered in pass 4.
2627 */
2628
2629void
2630add_orphan_dir(fsck_ino_t ino)
2631{
2632	if (tsearch((void *)ino, &limbo_dirs, ino_t_cmp) == NULL)
2633		errexit("add_orphan_dir: out of memory");
2634}
2635
2636/*
2637 * Remove an inode from the orphaned-directories list, presumably
2638 * because it's been cleared.
2639 */
2640void
2641remove_orphan_dir(fsck_ino_t ino)
2642{
2643	(void) tdelete((void *)ino, &limbo_dirs, ino_t_cmp);
2644}
2645
2646/*
2647 * log_setsum() and log_checksum() are equivalent to lufs.c:setsum()
2648 * and lufs.c:checksum().
2649 */
2650static void
2651log_setsum(int32_t *sp, int32_t *lp, int nb)
2652{
2653	int32_t csum = 0;
2654
2655	*sp = 0;
2656	nb /= sizeof (int32_t);
2657	while (nb--)
2658		csum += *lp++;
2659	*sp = csum;
2660}
2661
2662static int
2663log_checksum(int32_t *sp, int32_t *lp, int nb)
2664{
2665	int32_t ssum = *sp;
2666
2667	log_setsum(sp, lp, nb);
2668	if (ssum != *sp) {
2669		*sp = ssum;
2670		return (0);
2671	}
2672	return (1);
2673}
2674