1/*
2 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
3 */
4
5/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
6/*	  All Rights Reserved  	*/
7
8/*
9 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms are permitted
13 * provided that: (1) source distributions retain this entire copyright
14 * notice and comment, and (2) distributions including binaries display
15 * the following acknowledgement:  ``This product includes software
16 * developed by the University of California, Berkeley and its contributors''
17 * in the documentation or other materials provided with the distribution
18 * and in all advertising materials mentioning features or use of this
19 * software. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
24 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25 */
26
27
28#include <stdio.h>
29#include <string.h>
30#include <stdlib.h>
31#include <unistd.h>
32#include <time.h>
33#include <limits.h>
34#include <sys/param.h>
35#include <sys/types.h>
36#include <sys/sysmacros.h>
37#include <sys/mntent.h>
38#include <sys/vnode.h>
39#include <sys/fs/ufs_inode.h>
40#include <sys/fs/ufs_fs.h>
41#define	_KERNEL
42#include <sys/fs/ufs_fsdir.h>
43#undef _KERNEL
44#include <pwd.h>
45#include "fsck.h"
46
47static int get_indir_offsets(int, daddr_t, int *, int *);
48static int clearanentry(struct inodesc *);
49static void pdinode(struct dinode *);
50static void inoflush(void);
51static void mark_delayed_inodes(fsck_ino_t, daddr32_t);
52static int iblock(struct inodesc *, int, u_offset_t, enum cki_action);
53static struct inoinfo *search_cache(struct inoinfo *, fsck_ino_t);
54static int ckinode_common(struct dinode *, struct inodesc *, enum cki_action);
55static int lookup_dotdot_ino(fsck_ino_t);
56
57/*
58 * ckinode() essentially traverses the blocklist of the provided
59 * inode.  For each block either the caller-supplied callback (id_func
60 * in the provided struct inodesc) or dirscan() is invoked.  Which is
61 * chosen is controlled by what type of traversal was requested
62 * (id_type) - if it was for an ADDR or ACL, use the callback,
63 * otherwise it is assumed to be DATA (i.e., a directory) whose
64 * contents need to be scanned.
65 *
66 * Note that a directory inode can get passed in with a type of ADDR;
67 * the type field is orthogonal to the IFMT value.  This is so that
68 * the file aspects (no duplicate blocks, etc) of a directory can be
69 * verified just like is done for any other file, or the actual
70 * contents can be scanned so that connectivity and such can be
71 * investigated.
72 *
73 * The traversal is controlled by flags in the return value of
74 * dirscan() or the callback.  Five flags are defined, STOP, SKIP,
75 * KEEPON, ALTERED, and FOUND.  Their semantics are:
76 *
77 *     STOP -    no further processing of this inode is desired/possible/
78 *               feasible/etc.  This can mean that whatever the scan
79 *               was searching for was found, or a serious
80 *               inconsistency was encountered, or anything else
81 *               appropriate.
82 *
83 *     SKIP -    something that made it impossible to continue was
84 *               encountered, and the caller should go on to the next
85 *               inode.  This is more for i/o failures than for
86 *               logical inconsistencies.  Nothing actually looks for
87 *               this.
88 *
89 *     KEEPON -  no more blocks of this inode need to be scanned, but
90 *               nothing's wrong, so keep on going with the next
91 *               inode.  It is similar to STOP, except that
92 *               ckinode()'s caller will typically advance to the next
93 *               inode for KEEPON, whereas it ceases scanning through
94 *               the inodes completely for STOP.
95 *
96 *     ALTERED - a change was made to the inode.  If the caller sees
97 *               this set, it should make sure to flush out the
98 *               changes.  Note that any data blocks read in by the
99 *               function need to be marked dirty by it directly;
100 *               flushing of those will happen automatically later.
101 *
102 *     FOUND -   whatever was being searched for was located.
103 *               Typically combined with STOP to avoid wasting time
104 *               doing additional looking.
105 *
106 * During a traversal, some state needs to be carried around.  At the
107 * least, the callback functions need to know what inode they're
108 * working on, which logical block, and whether or not fixing problems
109 * when they're encountered is desired.  Rather than try to guess what
110 * else might be needed (and thus end up passing way more arguments
111 * than is reasonable), all the possibilities have been bundled in
112 * struct inodesc.  About half of the fields are specific to directory
113 * traversals, and the rest are pretty much generic to any traversal.
114 *
115 * The general fields are:
116 *
117 *     id_fix        What to do when an error is found.  Generally, this
118 *                   is set to DONTKNOW before a traversal.  If a
119 *                   problem is encountered, it is changed to either FIX
120 *                   or NOFIX by the dofix() query function.  If id_fix
121 *                   has already been set to FIX when dofix() is called, then
122 *                   it includes the ALTERED flag (see above) in its return
123 *                   value; the net effect is that the inode's buffer
124 *                   will get marked dirty and written to disk at some
125 *                   point.  If id_fix is DONTKNOW, then dofix() will
126 *                   query the user.  If it is NOFIX, then dofix()
127 *                   essentially does nothing.  A few routines set NOFIX
128 *                   as the initial value, as they are performing a best-
129 *                   effort informational task, rather than an actual
130 *                   repair operation.
131 *
132 *     id_func       This is the function that will be called for every
133 *                   logical block in the file (assuming id_type is not
134 *                   DATA).  The logical block may represent a hole, so
135 *                   the callback needs to be prepared to handle that
136 *                   case.  Its return value is a combination of the flags
137 *                   described above (SKIP, ALTERED, etc).
138 *
139 *     id_number     The inode number whose block list or data is being
140 *                   scanned.
141 *
142 *     id_parent     When id_type is DATA, this is the inode number for
143 *                   the parent of id_number.  Otherwise, it is
144 *                   available for use as an extra parameter or return
145 *                   value between the callback and ckinode()'s caller.
146 *                   Which, if either, of those is left completely up to
147 *                   the two routines involved, so nothing can generally
148 *                   be assumed about the id_parent value for non-DATA
149 *                   traversals.
150 *
151 *     id_lbn        This is the current logical block (not fragment)
152 *                   number being visited by the traversal.
153 *
154 *     id_blkno      This is the physical block corresponding to id_lbn.
155 *
156 *     id_numfrags   This defines how large a block is being processed in
157 *                   this particular invocation of the callback.
158 *                   Usually, it will be the same as sblock.fs_frag.
159 *                   However, if a direct block is being processed and
160 *                   it is less than a full filesystem block,
161 *                   id_numfrags will indicate just how many fragments
162 *                   (starting from id_lbn) are actually part of the
163 *                   file.
164 *
165 *     id_truncto    The pass 4 callback is used in several places to
166 *                   free the blocks of a file (the `FILE HAS PROBLEM
167 *                   FOO; CLEAR?' scenario).  This has been generalized
168 *                   to allow truncating a file to a particular length
169 *                   rather than always completely discarding it.  If
170 *                   id_truncto is -1, then the entire file is released,
171 *                   otherwise it is logical block number to truncate
172 *                   to.  This generalized interface was motivated by a
173 *                   desire to be able to discard everything after a
174 *                   hole in a directory, rather than the entire
175 *                   directory.
176 *
177 *     id_type       Selects the type of traversal.  DATA for dirscan(),
178 *                   ADDR or ACL for using the provided callback.
179 *
180 * There are several more fields used just for dirscan() traversals:
181 *
182 *     id_filesize   The number of bytes in the overall directory left to
183 *                   process.
184 *
185 *     id_loc        Byte position within the directory block.  Should always
186 *                   point to the start of a directory entry.
187 *
188 *     id_entryno    Which logical directory entry is being processed (0
189 *                   is `.', 1 is `..', 2 and on are normal entries).
190 *                   This field is primarily used to enable special
191 *                   checks when looking at the first two entries.
192 *
193 *                   The exception (there's always an exception in fsck)
194 *                   is that in pass 1, it tracks how many fragments are
195 *                   being used by a particular inode.
196 *
197 *     id_firsthole  The first logical block number that was found to
198 *                   be zero.  As directories are not supposed to have
199 *                   holes, this marks where a directory should be
200 *                   truncated down to.  A value of -1 indicates that
201 *                   no holes were found.
202 *
203 *     id_dirp       A pointer to the in-memory copy of the current
204 *                   directory entry (as identified by id_loc).
205 *
206 *     id_name       This is a directory entry name to either create
207 *                   (callback is mkentry) or locate (callback is
208 *                   chgino, findino, or findname).
209 */
210int
211ckinode(struct dinode *dp, struct inodesc *idesc, enum cki_action action)
212{
213	struct inodesc cleardesc;
214	mode_t	mode;
215
216	if (idesc->id_filesize == 0)
217		idesc->id_filesize = (offset_t)dp->di_size;
218
219	/*
220	 * Our caller should be filtering out completely-free inodes
221	 * (mode == zero), so we'll work on the assumption that what
222	 * we're given has some basic validity.
223	 *
224	 * The kernel is inconsistent about MAXPATHLEN including the
225	 * trailing \0, so allow the more-generous length for symlinks.
226	 */
227	mode = dp->di_mode & IFMT;
228	if (mode == IFBLK || mode == IFCHR)
229		return (KEEPON);
230	if (mode == IFLNK && dp->di_size > MAXPATHLEN) {
231		pwarn("I=%d  Symlink longer than supported maximum\n",
232		    idesc->id_number);
233		init_inodesc(&cleardesc);
234		cleardesc.id_type = ADDR;
235		cleardesc.id_number = idesc->id_number;
236		cleardesc.id_fix = DONTKNOW;
237		clri(&cleardesc, "BAD", CLRI_VERBOSE, CLRI_NOP_CORRUPT);
238		return (STOP);
239	}
240	return (ckinode_common(dp, idesc, action));
241}
242
243/*
244 * This was split out from ckinode() to allow it to be used
245 * without having to pass in kludge flags to suppress the
246 * wrong-for-deletion initialization and irrelevant checks.
247 * This feature is no longer needed, but is being kept in case
248 * the need comes back.
249 */
250static int
251ckinode_common(struct dinode *dp, struct inodesc *idesc,
252	enum cki_action action)
253{
254	offset_t offset;
255	struct dinode dino;
256	daddr_t ndb;
257	int indir_data_blks, last_indir_blk;
258	int ret, i, frags;
259
260	(void) memmove(&dino, dp, sizeof (struct dinode));
261	ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
262
263	for (i = 0; i < NDADDR; i++) {
264		idesc->id_lbn++;
265		offset = blkoff(&sblock, dino.di_size);
266		if ((--ndb == 0) && (offset != 0)) {
267			idesc->id_numfrags =
268			    numfrags(&sblock, fragroundup(&sblock, offset));
269		} else {
270			idesc->id_numfrags = sblock.fs_frag;
271		}
272		if (dino.di_db[i] == 0) {
273			if ((ndb > 0) && (idesc->id_firsthole < 0)) {
274				idesc->id_firsthole = i;
275			}
276			continue;
277		}
278		idesc->id_blkno = dino.di_db[i];
279		if (idesc->id_type == ADDR || idesc->id_type == ACL)
280			ret = (*idesc->id_func)(idesc);
281		else
282			ret = dirscan(idesc);
283
284		/*
285		 * Need to clear the entry, now that we're done with
286		 * it.  We depend on freeblk() ignoring a request to
287		 * free already-free fragments to handle the problem of
288		 * a partial block.
289		 */
290		if ((action == CKI_TRUNCATE) &&
291		    (idesc->id_truncto >= 0) &&
292		    (idesc->id_lbn >= idesc->id_truncto)) {
293			dp = ginode(idesc->id_number);
294			/*
295			 * The (int) cast is safe, in that if di_size won't
296			 * fit, it'll be a multiple of any legal fs_frag,
297			 * thus giving a zero result.  That value, in turn
298			 * means we're doing an entire block.
299			 */
300			frags = howmany((int)dp->di_size, sblock.fs_fsize) %
301			    sblock.fs_frag;
302			if (frags == 0)
303				frags = sblock.fs_frag;
304			freeblk(idesc->id_number, dp->di_db[i],
305			    frags);
306			dp = ginode(idesc->id_number);
307			dp->di_db[i] = 0;
308			inodirty();
309			ret |= ALTERED;
310		}
311
312		if (ret & STOP)
313			return (ret);
314	}
315
316#ifdef lint
317	/*
318	 * Cure a lint complaint of ``possible use before set''.
319	 * Apparently it can't quite figure out the switch statement.
320	 */
321	indir_data_blks = 0;
322#endif
323	/*
324	 * indir_data_blks contains the number of data blocks in all
325	 * the previous levels for this iteration.  E.g., for the
326	 * single indirect case (i = 0, di_ib[i] != 0), NDADDR's worth
327	 * of blocks have already been covered by the direct blocks
328	 * (di_db[]).  At the triple indirect level (i = NIADDR - 1),
329	 * it is all of the number of data blocks that were covered
330	 * by the second indirect, single indirect, and direct block
331	 * levels.
332	 */
333	idesc->id_numfrags = sblock.fs_frag;
334	ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
335	for (i = 0; i < NIADDR; i++) {
336		(void) get_indir_offsets(i, ndb, &indir_data_blks,
337		    &last_indir_blk);
338		if (dino.di_ib[i] != 0) {
339			/*
340			 * We'll only clear di_ib[i] if the first entry (and
341			 * therefore all of them) is to be cleared, since we
342			 * only go through this code on the first entry of
343			 * each level of indirection.  The +1 is to account
344			 * for the fact that we don't modify id_lbn until
345			 * we actually start processing on a data block.
346			 */
347			idesc->id_blkno = dino.di_ib[i];
348			ret = iblock(idesc, i + 1,
349			    (u_offset_t)howmany(dino.di_size,
350			    (u_offset_t)sblock.fs_bsize) - indir_data_blks,
351			    action);
352			if ((action == CKI_TRUNCATE) &&
353			    (idesc->id_truncto <= indir_data_blks) &&
354			    ((idesc->id_lbn + 1) >= indir_data_blks) &&
355			    ((idesc->id_lbn + 1) <= last_indir_blk)) {
356				dp = ginode(idesc->id_number);
357				if (dp->di_ib[i] != 0) {
358					freeblk(idesc->id_number, dp->di_ib[i],
359					    sblock.fs_frag);
360				}
361			}
362			if (ret & STOP)
363				return (ret);
364		} else {
365			/*
366			 * Need to know which of the file's logical blocks
367			 * reside in the missing indirect block.  However, the
368			 * precise location is only needed for truncating
369			 * directories, and level-of-indirection precision is
370			 * sufficient for that.
371			 */
372			if ((indir_data_blks < ndb) &&
373			    (idesc->id_firsthole < 0)) {
374				idesc->id_firsthole = indir_data_blks;
375			}
376		}
377	}
378	return (KEEPON);
379}
380
381static int
382get_indir_offsets(int ilevel_wanted, daddr_t ndb, int *data_blks,
383	int *last_blk)
384{
385	int ndb_ilevel = -1;
386	int ilevel;
387	int dblks, lblk;
388
389	for (ilevel = 0; ilevel < NIADDR; ilevel++) {
390		switch (ilevel) {
391		case 0:	/* SINGLE */
392			dblks = NDADDR;
393			lblk = dblks + NINDIR(&sblock) - 1;
394			break;
395		case 1:	/* DOUBLE */
396			dblks = NDADDR + NINDIR(&sblock);
397			lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock)) - 1;
398			break;
399		case 2:	/* TRIPLE */
400			dblks = NDADDR + NINDIR(&sblock) +
401			    (NINDIR(&sblock) * NINDIR(&sblock));
402			lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock) *
403			    NINDIR(&sblock)) - 1;
404			break;
405		default:
406			exitstat = EXERRFATAL;
407			/*
408			 * Translate from zero-based array to
409			 * one-based human-style counting.
410			 */
411			errexit("panic: indirection level %d not 1, 2, or 3",
412			    ilevel + 1);
413			/* NOTREACHED */
414		}
415
416		if (dblks < ndb && ndb <= lblk)
417			ndb_ilevel = ilevel;
418
419		if (ilevel == ilevel_wanted) {
420			if (data_blks != NULL)
421				*data_blks = dblks;
422			if (last_blk != NULL)
423				*last_blk = lblk;
424		}
425	}
426
427	return (ndb_ilevel);
428}
429
430static int
431iblock(struct inodesc *idesc, int ilevel, u_offset_t iblks,
432	enum cki_action action)
433{
434	struct bufarea *bp;
435	int i, n;
436	int (*func)(struct inodesc *) = NULL;
437	u_offset_t fsbperindirb;
438	daddr32_t last_lbn;
439	int nif;
440	char buf[BUFSIZ];
441
442	n = KEEPON;
443
444	switch (idesc->id_type) {
445	case ADDR:
446		func = idesc->id_func;
447		if (((n = (*func)(idesc)) & KEEPON) == 0)
448				return (n);
449		break;
450	case ACL:
451		func = idesc->id_func;
452		break;
453	case DATA:
454		func = dirscan;
455		break;
456	default:
457		errexit("unknown inodesc type %d in iblock()", idesc->id_type);
458		/* NOTREACHED */
459	}
460	if (chkrange(idesc->id_blkno, idesc->id_numfrags)) {
461		return ((idesc->id_type == ACL) ? STOP : SKIP);
462	}
463
464	bp = getdatablk(idesc->id_blkno, (size_t)sblock.fs_bsize);
465	if (bp->b_errs != 0) {
466		brelse(bp);
467		return (SKIP);
468	}
469
470	ilevel--;
471	/*
472	 * Trivia note: the BSD fsck has the number of bytes remaining
473	 * as the third argument to iblock(), so the equivalent of
474	 * fsbperindirb starts at fs_bsize instead of one.  We're
475	 * working in units of filesystem blocks here, not bytes or
476	 * fragments.
477	 */
478	for (fsbperindirb = 1, i = 0; i < ilevel; i++) {
479		fsbperindirb *= (u_offset_t)NINDIR(&sblock);
480	}
481	/*
482	 * nif indicates the next "free" pointer (as an array index) in this
483	 * indirect block, based on counting the blocks remaining in the
484	 * file after subtracting all previously processed blocks.
485	 * This figure is based on the size field of the inode.
486	 *
487	 * Note that in normal operation, nif may initially be calculated
488	 * as larger than the number of pointers in this block (as when
489	 * there are more indirect blocks following); if that is
490	 * the case, nif is limited to the max number of pointers per
491	 * indirect block.
492	 *
493	 * Also note that if an inode is inconsistent (has more blocks
494	 * allocated to it than the size field would indicate), the sweep
495	 * through any indirect blocks directly pointed at by the inode
496	 * continues. Since the block offset of any data blocks referenced
497	 * by these indirect blocks is greater than the size of the file,
498	 * the index nif may be computed as a negative value.
499	 * In this case, we reset nif to indicate that all pointers in
500	 * this retrieval block should be zeroed and the resulting
501	 * unreferenced data and/or retrieval blocks will be recovered
502	 * through garbage collection later.
503	 */
504	nif = (offset_t)howmany(iblks, fsbperindirb);
505	if (nif > NINDIR(&sblock))
506		nif = NINDIR(&sblock);
507	else if (nif < 0)
508		nif = 0;
509	/*
510	 * first pass: all "free" retrieval pointers (from [nif] thru
511	 * 	the end of the indirect block) should be zero. (This
512	 *	assertion does not hold for directories, which may be
513	 *	truncated without releasing their allocated space)
514	 */
515	if (nif < NINDIR(&sblock) && (idesc->id_func == pass1check ||
516	    idesc->id_func == pass3bcheck)) {
517		for (i = nif; i < NINDIR(&sblock); i++) {
518			if (bp->b_un.b_indir[i] == 0)
519				continue;
520			(void) sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
521			    (ulong_t)idesc->id_number);
522			if (preen) {
523				pfatal(buf);
524			} else if (dofix(idesc, buf)) {
525				freeblk(idesc->id_number,
526				    bp->b_un.b_indir[i],
527				    sblock.fs_frag);
528				bp->b_un.b_indir[i] = 0;
529				dirty(bp);
530			}
531		}
532		flush(fswritefd, bp);
533	}
534	/*
535	 * second pass: all retrieval pointers referring to blocks within
536	 *	a valid range [0..filesize] (both indirect and data blocks)
537	 *	are examined in the same manner as ckinode() checks the
538	 *	direct blocks in the inode.  Sweep through from
539	 *	the first pointer in this retrieval block to [nif-1].
540	 */
541	last_lbn = howmany(idesc->id_filesize, sblock.fs_bsize);
542	for (i = 0; i < nif; i++) {
543		if (ilevel == 0)
544			idesc->id_lbn++;
545		if (bp->b_un.b_indir[i] != 0) {
546			idesc->id_blkno = bp->b_un.b_indir[i];
547			if (ilevel > 0) {
548				n = iblock(idesc, ilevel, iblks, action);
549				/*
550				 * Each iteration decreases "remaining block
551				 * count" by the number of blocks accessible
552				 * by a pointer at this indirect block level.
553				 */
554				iblks -= fsbperindirb;
555			} else {
556				/*
557				 * If we're truncating, func will discard
558				 * the data block for us.
559				 */
560				n = (*func)(idesc);
561			}
562
563			if ((action == CKI_TRUNCATE) &&
564			    (idesc->id_truncto >= 0) &&
565			    (idesc->id_lbn >= idesc->id_truncto)) {
566				freeblk(idesc->id_number,  bp->b_un.b_indir[i],
567				    sblock.fs_frag);
568			}
569
570			/*
571			 * Note that truncation never gets STOP back
572			 * under normal circumstances.  Abnormal would
573			 * be a bad acl short-circuit in iblock() or
574			 * an out-of-range failure in pass4check().
575			 * We still want to keep going when truncating
576			 * under those circumstances, since the whole
577			 * point of truncating is to get rid of all
578			 * that.
579			 */
580			if ((n & STOP) && (action != CKI_TRUNCATE)) {
581				brelse(bp);
582				return (n);
583			}
584		} else {
585			if ((idesc->id_lbn < last_lbn) &&
586			    (idesc->id_firsthole < 0)) {
587				idesc->id_firsthole = idesc->id_lbn;
588			}
589			if (idesc->id_type == DATA) {
590				/*
591				 * No point in continuing in the indirect
592				 * blocks of a directory, since they'll just
593				 * get freed anyway.
594				 */
595				brelse(bp);
596				return ((n & ~KEEPON) | STOP);
597			}
598		}
599	}
600
601	brelse(bp);
602	return (KEEPON);
603}
604
605/*
606 * Check that a block is a legal block number.
607 * Return 0 if in range, 1 if out of range.
608 */
609int
610chkrange(daddr32_t blk, int cnt)
611{
612	int c;
613
614	if (cnt <= 0 || blk <= 0 || ((unsigned)blk >= (unsigned)maxfsblock) ||
615	    ((cnt - 1) > (maxfsblock - blk))) {
616		if (debug)
617			(void) printf(
618			    "Bad fragment range: should be 1 <= %d..%d < %d\n",
619			    blk, blk + cnt, maxfsblock);
620		return (1);
621	}
622	if ((cnt > sblock.fs_frag) ||
623	    ((fragnum(&sblock, blk) + cnt) > sblock.fs_frag)) {
624		if (debug)
625			(void) printf("Bad fragment size: size %d\n", cnt);
626		return (1);
627	}
628	c = dtog(&sblock, blk);
629	if (blk < cgdmin(&sblock, c)) {
630		if ((unsigned)(blk + cnt) > (unsigned)cgsblock(&sblock, c)) {
631			if (debug)
632				(void) printf(
633	    "Bad fragment position: %d..%d spans start of cg metadata\n",
634				    blk, blk + cnt);
635			return (1);
636		}
637	} else {
638		if ((unsigned)(blk + cnt) > (unsigned)cgbase(&sblock, c+1)) {
639			if (debug)
640				(void) printf(
641				    "Bad frag pos: %d..%d crosses end of cg\n",
642				    blk, blk + cnt);
643			return (1);
644		}
645	}
646	return (0);
647}
648
649/*
650 * General purpose interface for reading inodes.
651 */
652
653/*
654 * Note that any call to ginode() can potentially invalidate any
655 * dinode pointers previously acquired from it.  To avoid pain,
656 * make sure to always call inodirty() immediately after modifying
657 * an inode, if there's any chance of ginode() being called after
658 * that.  Also, always call ginode() right before you need to access
659 * an inode, so that there won't be any surprises from functions
660 * called between the previous ginode() invocation and the dinode
661 * use.
662 *
663 * Despite all that, we aren't doing the amount of i/o that's implied,
664 * as we use the buffer cache that getdatablk() and friends maintain.
665 */
666static fsck_ino_t startinum = -1;
667
668struct dinode *
669ginode(fsck_ino_t inum)
670{
671	daddr32_t iblk;
672	struct dinode *dp;
673
674	if (inum < UFSROOTINO || inum > maxino) {
675		errexit("bad inode number %d to ginode\n", inum);
676	}
677	if (startinum == -1 ||
678	    pbp == NULL ||
679	    inum < startinum ||
680	    inum >= (fsck_ino_t)(startinum + (fsck_ino_t)INOPB(&sblock))) {
681		iblk = itod(&sblock, inum);
682		if (pbp != NULL) {
683			brelse(pbp);
684		}
685		/*
686		 * We don't check for errors here, because we can't
687		 * tell our caller about it, and the zeros that will
688		 * be in the buffer are just as good as anything we
689		 * could fake.
690		 */
691		pbp = getdatablk(iblk, (size_t)sblock.fs_bsize);
692		startinum =
693		    (fsck_ino_t)((inum / INOPB(&sblock)) * INOPB(&sblock));
694	}
695	dp = &pbp->b_un.b_dinode[inum % INOPB(&sblock)];
696	if (dp->di_suid != UID_LONG)
697		dp->di_uid = dp->di_suid;
698	if (dp->di_sgid != GID_LONG)
699		dp->di_gid = dp->di_sgid;
700	return (dp);
701}
702
703/*
704 * Special purpose version of ginode used to optimize first pass
705 * over all the inodes in numerical order.  It bypasses the buffer
706 * system used by ginode(), etc in favour of reading the bulk of a
707 * cg's inodes at one time.
708 */
709static fsck_ino_t nextino, lastinum;
710static int64_t readcnt, readpercg, fullcnt, inobufsize;
711static int64_t partialcnt, partialsize;
712static size_t lastsize;
713static struct dinode *inodebuf;
714static diskaddr_t currentdblk;
715static struct dinode *currentinode;
716
717struct dinode *
718getnextinode(fsck_ino_t inum)
719{
720	size_t size;
721	diskaddr_t dblk;
722	static struct dinode *dp;
723
724	if (inum != nextino++ || inum > maxino)
725		errexit("bad inode number %d to nextinode\n", inum);
726
727	/*
728	 * Will always go into the if() the first time we're called,
729	 * so dp will always be valid.
730	 */
731	if (inum >= lastinum) {
732		readcnt++;
733		dblk = fsbtodb(&sblock, itod(&sblock, lastinum));
734		currentdblk = dblk;
735		if (readcnt % readpercg == 0) {
736			if (partialsize > SIZE_MAX)
737				errexit(
738				    "Internal error: partialsize overflow");
739			size = (size_t)partialsize;
740			lastinum += partialcnt;
741		} else {
742			if (inobufsize > SIZE_MAX)
743				errexit("Internal error: inobufsize overflow");
744			size = (size_t)inobufsize;
745			lastinum += fullcnt;
746		}
747		/*
748		 * If fsck_bread() returns an error, it will already have
749		 * zeroed out the buffer, so we do not need to do so here.
750		 */
751		(void) fsck_bread(fsreadfd, (caddr_t)inodebuf, dblk, size);
752		lastsize = size;
753		dp = inodebuf;
754	}
755	currentinode = dp;
756	return (dp++);
757}
758
759/*
760 * Reread the current getnext() buffer.  This allows for changing inodes
761 * other than the current one via ginode()/inodirty()/inoflush().
762 *
763 * Just reuses all the interesting variables that getnextinode() set up
764 * last time it was called.  This shouldn't get called often, so we don't
765 * try to figure out if the caller's actually touched an inode in the
766 * range we have cached.  There could have been an arbitrary number of
767 * them, after all.
768 */
769struct dinode *
770getnextrefresh(void)
771{
772	if (inodebuf == NULL) {
773		return (NULL);
774	}
775
776	inoflush();
777	(void) fsck_bread(fsreadfd, (caddr_t)inodebuf, currentdblk, lastsize);
778	return (currentinode);
779}
780
781void
782resetinodebuf(void)
783{
784	startinum = 0;
785	nextino = 0;
786	lastinum = 0;
787	readcnt = 0;
788	inobufsize = blkroundup(&sblock, INOBUFSIZE);
789	fullcnt = inobufsize / sizeof (struct dinode);
790	readpercg = sblock.fs_ipg / fullcnt;
791	partialcnt = sblock.fs_ipg % fullcnt;
792	partialsize = partialcnt * sizeof (struct dinode);
793	if (partialcnt != 0) {
794		readpercg++;
795	} else {
796		partialcnt = fullcnt;
797		partialsize = inobufsize;
798	}
799	if (inodebuf == NULL &&
800	    (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL)
801		errexit("Cannot allocate space for inode buffer\n");
802	while (nextino < UFSROOTINO)
803		(void) getnextinode(nextino);
804}
805
806void
807freeinodebuf(void)
808{
809	if (inodebuf != NULL) {
810		free((void *)inodebuf);
811	}
812	inodebuf = NULL;
813}
814
815/*
816 * Routines to maintain information about directory inodes.
817 * This is built during the first pass and used during the
818 * second and third passes.
819 *
820 * Enter inodes into the cache.
821 */
822void
823cacheino(struct dinode *dp, fsck_ino_t inum)
824{
825	struct inoinfo *inp;
826	struct inoinfo **inpp;
827	uint_t blks;
828
829	blks = NDADDR + NIADDR;
830	inp = (struct inoinfo *)
831	    malloc(sizeof (*inp) + (blks - 1) * sizeof (daddr32_t));
832	if (inp == NULL)
833		errexit("Cannot increase directory list\n");
834	init_inoinfo(inp, dp, inum); /* doesn't touch i_nextlist or i_number */
835	inpp = &inphead[inum % numdirs];
836	inp->i_nextlist = *inpp;
837	*inpp = inp;
838	inp->i_number = inum;
839	if (inplast == listmax) {
840		listmax += 100;
841		inpsort = (struct inoinfo **)realloc((void *)inpsort,
842		    (unsigned)listmax * sizeof (struct inoinfo *));
843		if (inpsort == NULL)
844			errexit("cannot increase directory list");
845	}
846	inpsort[inplast++] = inp;
847}
848
849/*
850 * Look up an inode cache structure.
851 */
852struct inoinfo *
853getinoinfo(fsck_ino_t inum)
854{
855	struct inoinfo *inp;
856
857	inp = search_cache(inphead[inum % numdirs], inum);
858	return (inp);
859}
860
861/*
862 * Determine whether inode is in cache.
863 */
864int
865inocached(fsck_ino_t inum)
866{
867	return (search_cache(inphead[inum % numdirs], inum) != NULL);
868}
869
870/*
871 * Clean up all the inode cache structure.
872 */
873void
874inocleanup(void)
875{
876	struct inoinfo **inpp;
877
878	if (inphead == NULL)
879		return;
880	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
881		free((void *)(*inpp));
882	}
883	free((void *)inphead);
884	free((void *)inpsort);
885	inphead = inpsort = NULL;
886}
887
888/*
889 * Routines to maintain information about acl inodes.
890 * This is built during the first pass and used during the
891 * second and third passes.
892 *
893 * Enter acl inodes into the cache.
894 */
895void
896cacheacl(struct dinode *dp, fsck_ino_t inum)
897{
898	struct inoinfo *aclp;
899	struct inoinfo **aclpp;
900	uint_t blks;
901
902	blks = NDADDR + NIADDR;
903	aclp = (struct inoinfo *)
904	    malloc(sizeof (*aclp) + (blks - 1) * sizeof (daddr32_t));
905	if (aclp == NULL)
906		return;
907	aclpp = &aclphead[inum % numacls];
908	aclp->i_nextlist = *aclpp;
909	*aclpp = aclp;
910	aclp->i_number = inum;
911	aclp->i_isize = (offset_t)dp->di_size;
912	aclp->i_blkssize = (size_t)(blks * sizeof (daddr32_t));
913	(void) memmove(&aclp->i_blks[0], &dp->di_db[0], aclp->i_blkssize);
914	if (aclplast == aclmax) {
915		aclmax += 100;
916		aclpsort = (struct inoinfo **)realloc((char *)aclpsort,
917		    (unsigned)aclmax * sizeof (struct inoinfo *));
918		if (aclpsort == NULL)
919			errexit("cannot increase acl list");
920	}
921	aclpsort[aclplast++] = aclp;
922}
923
924
925/*
926 * Generic cache search function.
927 * ROOT is the first entry in a hash chain (the caller is expected
928 * to have done the initial bucket lookup).  KEY is what's being
929 * searched for.
930 *
931 * Returns a pointer to the entry if it is found, NULL otherwise.
932 */
933static struct inoinfo *
934search_cache(struct inoinfo *element, fsck_ino_t key)
935{
936	while (element != NULL) {
937		if (element->i_number == key)
938			break;
939		element = element->i_nextlist;
940	}
941
942	return (element);
943}
944
945void
946inodirty(void)
947{
948	dirty(pbp);
949}
950
951static void
952inoflush(void)
953{
954	if (pbp != NULL)
955		flush(fswritefd, pbp);
956}
957
958/*
959 * Interactive wrapper for freeino(), for those times when we're
960 * not sure if we should throw something away.
961 */
962void
963clri(struct inodesc *idesc, char *type, int verbose, int corrupting)
964{
965	int need_parent;
966	struct dinode *dp;
967
968	if (statemap[idesc->id_number] == USTATE)
969		return;
970
971	dp = ginode(idesc->id_number);
972	if (verbose == CLRI_VERBOSE) {
973		pwarn("%s %s", type, file_id(idesc->id_number, dp->di_mode));
974		pinode(idesc->id_number);
975	}
976	if (preen || (reply("CLEAR") == 1)) {
977		need_parent = (corrupting == CLRI_NOP_OK) ?
978		    TI_NOPARENT : TI_PARENT;
979		freeino(idesc->id_number, need_parent);
980		if (preen)
981			(void) printf(" (CLEARED)\n");
982		remove_orphan_dir(idesc->id_number);
983	} else if (corrupting == CLRI_NOP_CORRUPT) {
984		iscorrupt = 1;
985	}
986	(void) printf("\n");
987}
988
989/*
990 * Find the directory entry for the inode noted in id_parent (which is
991 * not necessarily the parent of anything, we're just using a convenient
992 * field.
993 */
994int
995findname(struct inodesc *idesc)
996{
997	struct direct *dirp = idesc->id_dirp;
998
999	if (dirp->d_ino != idesc->id_parent)
1000		return (KEEPON);
1001	(void) memmove(idesc->id_name, dirp->d_name,
1002	    MIN(dirp->d_namlen, MAXNAMLEN) + 1);
1003	return (STOP|FOUND);
1004}
1005
1006/*
1007 * Find the inode number associated with the given name.
1008 */
1009int
1010findino(struct inodesc *idesc)
1011{
1012	struct direct *dirp = idesc->id_dirp;
1013
1014	if (dirp->d_ino == 0)
1015		return (KEEPON);
1016	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1017	    dirp->d_ino >= UFSROOTINO && dirp->d_ino <= maxino) {
1018		idesc->id_parent = dirp->d_ino;
1019		return (STOP|FOUND);
1020	}
1021	return (KEEPON);
1022}
1023
1024int
1025cleardirentry(fsck_ino_t parentdir, fsck_ino_t target)
1026{
1027	struct inodesc idesc;
1028	struct dinode *dp;
1029
1030	dp = ginode(parentdir);
1031	init_inodesc(&idesc);
1032	idesc.id_func = clearanentry;
1033	idesc.id_parent = target;
1034	idesc.id_type = DATA;
1035	idesc.id_fix = NOFIX;
1036	return (ckinode(dp, &idesc, CKI_TRAVERSE));
1037}
1038
1039static int
1040clearanentry(struct inodesc *idesc)
1041{
1042	struct direct *dirp = idesc->id_dirp;
1043
1044	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1045		idesc->id_entryno++;
1046		return (KEEPON);
1047	}
1048	dirp->d_ino = 0;
1049	return (STOP|FOUND|ALTERED);
1050}
1051
1052void
1053pinode(fsck_ino_t ino)
1054{
1055	struct dinode *dp;
1056
1057	(void) printf(" I=%lu ", (ulong_t)ino);
1058	if (ino < UFSROOTINO || ino > maxino)
1059		return;
1060	dp = ginode(ino);
1061	pdinode(dp);
1062}
1063
1064static void
1065pdinode(struct dinode *dp)
1066{
1067	char *p;
1068	struct passwd *pw;
1069	time_t t;
1070
1071	(void) printf(" OWNER=");
1072	if ((pw = getpwuid((int)dp->di_uid)) != 0)
1073		(void) printf("%s ", pw->pw_name);
1074	else
1075		(void) printf("%lu ", (ulong_t)dp->di_uid);
1076	(void) printf("MODE=%o\n", dp->di_mode);
1077	if (preen)
1078		(void) printf("%s: ", devname);
1079	(void) printf("SIZE=%lld ", (longlong_t)dp->di_size);
1080
1081	/* ctime() ignores LOCALE, so this is safe */
1082	t = (time_t)dp->di_mtime;
1083	p = ctime(&t);
1084	(void) printf("MTIME=%12.12s %4.4s ", p + 4, p + 20);
1085}
1086
1087void
1088blkerror(fsck_ino_t ino, char *type, daddr32_t blk, daddr32_t lbn)
1089{
1090	pfatal("FRAGMENT %d %s I=%u LFN %d", blk, type, ino, lbn);
1091	(void) printf("\n");
1092
1093	switch (statemap[ino] & ~INDELAYD) {
1094
1095	case FSTATE:
1096	case FZLINK:
1097		statemap[ino] = FCLEAR;
1098		return;
1099
1100	case DFOUND:
1101	case DSTATE:
1102	case DZLINK:
1103		statemap[ino] = DCLEAR;
1104		add_orphan_dir(ino);
1105		return;
1106
1107	case SSTATE:
1108		statemap[ino] = SCLEAR;
1109		return;
1110
1111	case FCLEAR:
1112	case DCLEAR:
1113	case SCLEAR:
1114		return;
1115
1116	default:
1117		errexit("BAD STATE 0x%x TO BLKERR\n", statemap[ino]);
1118		/* NOTREACHED */
1119	}
1120}
1121
1122/*
1123 * allocate an unused inode
1124 */
1125fsck_ino_t
1126allocino(fsck_ino_t request, int type)
1127{
1128	fsck_ino_t ino;
1129	struct dinode *dp;
1130	struct cg *cgp = &cgrp;
1131	int cg;
1132	time_t t;
1133	caddr_t err;
1134
1135	if (debug && (request != 0) && (request != UFSROOTINO))
1136		errexit("assertion failed: allocino() asked for "
1137		    "inode %d instead of 0 or %d",
1138		    (int)request, (int)UFSROOTINO);
1139
1140	/*
1141	 * We know that we're only going to get requests for UFSROOTINO
1142	 * or 0.  If UFSROOTINO is wanted, then it better be available
1143	 * because our caller is trying to recreate the root directory.
1144	 * If we're asked for 0, then which one we return doesn't matter.
1145	 * We know that inodes 0 and 1 are never valid to return, so we
1146	 * the start at the lowest-legal inode number.
1147	 *
1148	 * If we got a request for UFSROOTINO, then request != 0, and
1149	 * this pair of conditionals is the only place that treats
1150	 * UFSROOTINO specially.
1151	 */
1152	if (request == 0)
1153		request = UFSROOTINO;
1154	else if (statemap[request] != USTATE)
1155		return (0);
1156
1157	/*
1158	 * Doesn't do wrapping, since we know we started at
1159	 * the smallest inode.
1160	 */
1161	for (ino = request; ino < maxino; ino++)
1162		if (statemap[ino] == USTATE)
1163			break;
1164	if (ino == maxino)
1165		return (0);
1166
1167	/*
1168	 * In pass5, we'll calculate the bitmaps and counts all again from
1169	 * scratch and do a comparison, but for that to work the cg has
1170	 * to know what in-memory changes we've made to it.  If we have
1171	 * trouble reading the cg, cg_sanity() should kick it out so
1172	 * we can skip explicit i/o error checking here.
1173	 */
1174	cg = itog(&sblock, ino);
1175	(void) getblk(&cgblk, cgtod(&sblock, cg), (size_t)sblock.fs_cgsize);
1176	err = cg_sanity(cgp, cg);
1177	if (err != NULL) {
1178		pfatal("CG %d: %s\n", cg, err);
1179		free((void *)err);
1180		if (reply("REPAIR") == 0)
1181			errexit("Program terminated.");
1182		fix_cg(cgp, cg);
1183	}
1184	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1185	cgp->cg_cs.cs_nifree--;
1186	cgdirty();
1187
1188	if (lastino < ino)
1189		lastino = ino;
1190
1191	/*
1192	 * Don't currently support IFATTRDIR or any of the other
1193	 * types, as they aren't needed.
1194	 */
1195	switch (type & IFMT) {
1196	case IFDIR:
1197		statemap[ino] = DSTATE;
1198		cgp->cg_cs.cs_ndir++;
1199		break;
1200	case IFREG:
1201	case IFLNK:
1202		statemap[ino] = FSTATE;
1203		break;
1204	default:
1205		/*
1206		 * Pretend nothing ever happened.  This clears the
1207		 * dirty flag, among other things.
1208		 */
1209		initbarea(&cgblk);
1210		if (debug)
1211			(void) printf("allocino: unknown type 0%o\n",
1212			    type & IFMT);
1213		return (0);
1214	}
1215
1216	/*
1217	 * We're allocating what should be a completely-unused inode,
1218	 * so make sure we don't inherit anything from any previous
1219	 * incarnations.
1220	 */
1221	dp = ginode(ino);
1222	(void) memset((void *)dp, 0, sizeof (struct dinode));
1223	dp->di_db[0] = allocblk(1);
1224	if (dp->di_db[0] == 0) {
1225		statemap[ino] = USTATE;
1226		return (0);
1227	}
1228	dp->di_mode = (mode_t)type;
1229	(void) time(&t);
1230	dp->di_atime = (time32_t)t;
1231	dp->di_ctime = dp->di_atime;
1232	dp->di_mtime = dp->di_ctime;
1233	dp->di_size = (u_offset_t)sblock.fs_fsize;
1234	dp->di_blocks = btodb(sblock.fs_fsize);
1235	n_files++;
1236	inodirty();
1237	return (ino);
1238}
1239
1240/*
1241 * Release some or all of the blocks of an inode.
1242 * Only truncates down.  Assumes new_length is appropriately aligned
1243 * to a block boundary (or a directory block boundary, if it's a
1244 * directory).
1245 *
1246 * If this is a directory, discard all of its contents first, so
1247 * we don't create a bunch of orphans that would need another fsck
1248 * run to clean up.
1249 *
1250 * Even if truncating to zero length, the inode remains allocated.
1251 */
1252void
1253truncino(fsck_ino_t ino, offset_t new_length, int update)
1254{
1255	struct inodesc idesc;
1256	struct inoinfo *iip;
1257	struct dinode *dp;
1258	fsck_ino_t parent;
1259	mode_t mode;
1260	caddr_t message;
1261	int isdir, islink;
1262	int ilevel, dblk;
1263
1264	dp = ginode(ino);
1265	mode = (dp->di_mode & IFMT);
1266	isdir = (mode == IFDIR) || (mode == IFATTRDIR);
1267	islink = (mode == IFLNK);
1268
1269	if (isdir) {
1270		/*
1271		 * Go with the parent we found by chasing references,
1272		 * if we've gotten that far.  Otherwise, use what the
1273		 * directory itself claims.  If there's no ``..'' entry
1274		 * in it, give up trying to get the link counts right.
1275		 */
1276		if (update == TI_NOPARENT) {
1277			parent = -1;
1278		} else {
1279			iip = getinoinfo(ino);
1280			if (iip != NULL) {
1281				parent = iip->i_parent;
1282			} else {
1283				parent = lookup_dotdot_ino(ino);
1284				if (parent != 0) {
1285					/*
1286					 * Make sure that the claimed
1287					 * parent actually has a
1288					 * reference to us.
1289					 */
1290					dp = ginode(parent);
1291					idesc.id_name = lfname;
1292					idesc.id_type = DATA;
1293					idesc.id_func = findino;
1294					idesc.id_number = ino;
1295					idesc.id_fix = DONTKNOW;
1296					if ((ckinode(dp, &idesc,
1297					    CKI_TRAVERSE) & FOUND) == 0)
1298						parent = 0;
1299				}
1300			}
1301		}
1302
1303		mark_delayed_inodes(ino, numfrags(&sblock, new_length));
1304		if (parent > 0) {
1305			dp = ginode(parent);
1306			LINK_RANGE(message, dp->di_nlink, -1);
1307			if (message != NULL) {
1308				LINK_CLEAR(message, parent, dp->di_mode,
1309				    &idesc);
1310				if (statemap[parent] == USTATE)
1311					goto no_parent_update;
1312			}
1313			TRACK_LNCNTP(parent, lncntp[parent]--);
1314		} else if ((mode == IFDIR) && (parent == 0)) {
1315			/*
1316			 * Currently don't have a good way to
1317			 * handle this, so throw up our hands.
1318			 * However, we know that we can still
1319			 * do some good if we continue, so
1320			 * don't actually exit yet.
1321			 *
1322			 * We don't do it for attrdirs,
1323			 * because there aren't link counts
1324			 * between them and their parents.
1325			 */
1326			pwarn("Could not determine former parent of "
1327			    "inode %d, link counts are possibly\n"
1328			    "incorrect.  Please rerun fsck(1M) to "
1329			    "correct this.\n",
1330			    ino);
1331			iscorrupt = 1;
1332		}
1333		/*
1334		 * ...else if it's a directory with parent == -1, then
1335		 * we've not gotten far enough to know connectivity,
1336		 * and it'll get handled automatically later.
1337		 */
1338	}
1339
1340no_parent_update:
1341	init_inodesc(&idesc);
1342	idesc.id_type = ADDR;
1343	idesc.id_func = pass4check;
1344	idesc.id_number = ino;
1345	idesc.id_fix = DONTKNOW;
1346	idesc.id_truncto = howmany(new_length, sblock.fs_bsize);
1347	dp = ginode(ino);
1348	if (!islink && ckinode(dp, &idesc, CKI_TRUNCATE) & ALTERED)
1349		inodirty();
1350
1351	/*
1352	 * This has to be done after ckinode(), so that all of
1353	 * the fragments get visited.  Note that we assume we're
1354	 * always truncating to a block boundary, rather than a
1355	 * fragment boundary.
1356	 */
1357	dp = ginode(ino);
1358	dp->di_size = new_length;
1359
1360	/*
1361	 * Clear now-obsolete pointers.
1362	 */
1363	for (dblk = idesc.id_truncto + 1; dblk < NDADDR; dblk++) {
1364		dp->di_db[dblk] = 0;
1365	}
1366
1367	ilevel = get_indir_offsets(-1, idesc.id_truncto, NULL, NULL);
1368	for (ilevel++; ilevel < NIADDR; ilevel++) {
1369		dp->di_ib[ilevel] = 0;
1370	}
1371
1372	inodirty();
1373}
1374
1375/*
1376 * Release an inode's resources, then release the inode itself.
1377 */
1378void
1379freeino(fsck_ino_t ino, int update_parent)
1380{
1381	int cg;
1382	struct dinode *dp;
1383	struct cg *cgp;
1384
1385	n_files--;
1386	dp = ginode(ino);
1387	/*
1388	 * We need to make sure that the file is really a large file.
1389	 * Everything bigger than UFS_MAXOFFSET_T is treated as a file with
1390	 * negative size, which shall be cleared. (see verify_inode() in
1391	 * pass1.c)
1392	 */
1393	if (dp->di_size > (u_offset_t)MAXOFF_T &&
1394	    dp->di_size <= (u_offset_t)UFS_MAXOFFSET_T &&
1395	    ftypeok(dp) &&
1396	    (dp->di_mode & IFMT) != IFBLK &&
1397	    (dp->di_mode & IFMT) != IFCHR) {
1398		largefile_count--;
1399	}
1400	truncino(ino, 0, update_parent);
1401
1402	dp = ginode(ino);
1403	if ((dp->di_mode & IFMT) == IFATTRDIR) {
1404		clearshadow(ino, &attrclientinfo);
1405		dp = ginode(ino);
1406	}
1407
1408	clearinode(dp);
1409	inodirty();
1410	statemap[ino] = USTATE;
1411
1412	/*
1413	 * Keep the disk in sync with us so that pass5 doesn't get
1414	 * upset about spurious inconsistencies.
1415	 */
1416	cg = itog(&sblock, ino);
1417	(void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cg),
1418	    (size_t)sblock.fs_cgsize);
1419	cgp = cgblk.b_un.b_cg;
1420	clrbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1421	cgp->cg_cs.cs_nifree += 1;
1422	cgdirty();
1423	sblock.fs_cstotal.cs_nifree += 1;
1424	sbdirty();
1425}
1426
1427void
1428init_inoinfo(struct inoinfo *inp, struct dinode *dp, fsck_ino_t inum)
1429{
1430	inp->i_parent = ((inum == UFSROOTINO) ? UFSROOTINO : (fsck_ino_t)0);
1431	inp->i_dotdot = (fsck_ino_t)0;
1432	inp->i_isize = (offset_t)dp->di_size;
1433	inp->i_blkssize = (NDADDR + NIADDR) * sizeof (daddr32_t);
1434	inp->i_extattr = dp->di_oeftflag;
1435	(void) memmove((void *)&inp->i_blks[0], (void *)&dp->di_db[0],
1436	    inp->i_blkssize);
1437}
1438
1439/*
1440 * Return the inode number in the ".." entry of the provided
1441 * directory inode.
1442 */
1443static int
1444lookup_dotdot_ino(fsck_ino_t ino)
1445{
1446	struct inodesc idesc;
1447
1448	init_inodesc(&idesc);
1449	idesc.id_type = DATA;
1450	idesc.id_func = findino;
1451	idesc.id_name = "..";
1452	idesc.id_number = ino;
1453	idesc.id_fix = NOFIX;
1454
1455	if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1456		return (idesc.id_parent);
1457	}
1458
1459	return (0);
1460}
1461
1462/*
1463 * Convenience wrapper around ckinode(findino()).
1464 */
1465int
1466lookup_named_ino(fsck_ino_t dir, caddr_t name)
1467{
1468	struct inodesc idesc;
1469
1470	init_inodesc(&idesc);
1471	idesc.id_type = DATA;
1472	idesc.id_func = findino;
1473	idesc.id_name = name;
1474	idesc.id_number = dir;
1475	idesc.id_fix = NOFIX;
1476
1477	if ((ckinode(ginode(dir), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1478		return (idesc.id_parent);
1479	}
1480
1481	return (0);
1482}
1483
1484/*
1485 * Marks inodes that are being orphaned and might need to be reconnected
1486 * by pass4().  The inode we're traversing is the directory whose
1487 * contents will be reconnected later.  id_parent is the lfn at which
1488 * to start looking at said contents.
1489 */
1490static int
1491mark_a_delayed_inode(struct inodesc *idesc)
1492{
1493	struct direct *dirp = idesc->id_dirp;
1494
1495	if (idesc->id_lbn < idesc->id_parent) {
1496		return (KEEPON);
1497	}
1498
1499	if (dirp->d_ino != 0 &&
1500	    strcmp(dirp->d_name, ".") != 0 &&
1501	    strcmp(dirp->d_name, "..") != 0) {
1502		statemap[dirp->d_ino] &= ~INFOUND;
1503		statemap[dirp->d_ino] |= INDELAYD;
1504	}
1505
1506	return (KEEPON);
1507}
1508
1509static void
1510mark_delayed_inodes(fsck_ino_t ino, daddr32_t first_lfn)
1511{
1512	struct dinode *dp;
1513	struct inodesc idelayed;
1514
1515	init_inodesc(&idelayed);
1516	idelayed.id_number = ino;
1517	idelayed.id_type = DATA;
1518	idelayed.id_fix = NOFIX;
1519	idelayed.id_func = mark_a_delayed_inode;
1520	idelayed.id_parent = first_lfn;
1521	idelayed.id_entryno = 2;
1522
1523	dp = ginode(ino);
1524	(void) ckinode(dp, &idelayed, CKI_TRAVERSE);
1525}
1526
1527/*
1528 * Clear the i_oeftflag/extended attribute pointer from INO.
1529 */
1530void
1531clearattrref(fsck_ino_t ino)
1532{
1533	struct dinode *dp;
1534
1535	dp = ginode(ino);
1536	if (debug) {
1537		if (dp->di_oeftflag == 0)
1538			(void) printf("clearattref: no attr to clear on %d\n",
1539			    ino);
1540	}
1541
1542	dp->di_oeftflag = 0;
1543	inodirty();
1544}
1545