xref: /illumos-gate/usr/src/cmd/fs.d/ufs/fsck/inode.c (revision 355d6bb5)
17c478bd9Sstevel@tonic-gate /*
2*355d6bb5Sswilcox  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
37c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
47c478bd9Sstevel@tonic-gate  */
57c478bd9Sstevel@tonic-gate 
67c478bd9Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
77c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
87c478bd9Sstevel@tonic-gate 
97c478bd9Sstevel@tonic-gate /*
107c478bd9Sstevel@tonic-gate  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
117c478bd9Sstevel@tonic-gate  * All rights reserved.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * Redistribution and use in source and binary forms are permitted
147c478bd9Sstevel@tonic-gate  * provided that: (1) source distributions retain this entire copyright
157c478bd9Sstevel@tonic-gate  * notice and comment, and (2) distributions including binaries display
167c478bd9Sstevel@tonic-gate  * the following acknowledgement:  ``This product includes software
177c478bd9Sstevel@tonic-gate  * developed by the University of California, Berkeley and its contributors''
187c478bd9Sstevel@tonic-gate  * in the documentation or other materials provided with the distribution
197c478bd9Sstevel@tonic-gate  * and in all advertising materials mentioning features or use of this
207c478bd9Sstevel@tonic-gate  * software. Neither the name of the University nor the names of its
217c478bd9Sstevel@tonic-gate  * contributors may be used to endorse or promote products derived
227c478bd9Sstevel@tonic-gate  * from this software without specific prior written permission.
237c478bd9Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
247c478bd9Sstevel@tonic-gate  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
257c478bd9Sstevel@tonic-gate  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
267c478bd9Sstevel@tonic-gate  */
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
297c478bd9Sstevel@tonic-gate 
307c478bd9Sstevel@tonic-gate #include <stdio.h>
317c478bd9Sstevel@tonic-gate #include <string.h>
327c478bd9Sstevel@tonic-gate #include <stdlib.h>
337c478bd9Sstevel@tonic-gate #include <unistd.h>
347c478bd9Sstevel@tonic-gate #include <time.h>
35*355d6bb5Sswilcox #include <limits.h>
367c478bd9Sstevel@tonic-gate #include <sys/param.h>
377c478bd9Sstevel@tonic-gate #include <sys/types.h>
387c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
397c478bd9Sstevel@tonic-gate #include <sys/mntent.h>
407c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
417c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
427c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
43*355d6bb5Sswilcox #define	_KERNEL
447c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h>
45*355d6bb5Sswilcox #undef _KERNEL
467c478bd9Sstevel@tonic-gate #include <pwd.h>
477c478bd9Sstevel@tonic-gate #include "fsck.h"
487c478bd9Sstevel@tonic-gate 
49*355d6bb5Sswilcox static int get_indir_offsets(int, daddr_t, int *, int *);
50*355d6bb5Sswilcox static int clearanentry(struct inodesc *);
51*355d6bb5Sswilcox static void pdinode(struct dinode *);
52*355d6bb5Sswilcox static void inoflush(void);
53*355d6bb5Sswilcox static void mark_delayed_inodes(fsck_ino_t, daddr32_t);
54*355d6bb5Sswilcox static int iblock(struct inodesc *, int, u_offset_t, enum cki_action);
55*355d6bb5Sswilcox static struct inoinfo *search_cache(struct inoinfo *, fsck_ino_t);
56*355d6bb5Sswilcox static int ckinode_common(struct dinode *, struct inodesc *, enum cki_action);
57*355d6bb5Sswilcox static int lookup_dotdot_ino(fsck_ino_t);
58*355d6bb5Sswilcox 
59*355d6bb5Sswilcox /*
60*355d6bb5Sswilcox  * ckinode() essentially traverses the blocklist of the provided
61*355d6bb5Sswilcox  * inode.  For each block either the caller-supplied callback (id_func
62*355d6bb5Sswilcox  * in the provided struct inodesc) or dirscan() is invoked.  Which is
63*355d6bb5Sswilcox  * chosen is controlled by what type of traversal was requested
64*355d6bb5Sswilcox  * (id_type) - if it was for an ADDR or ACL, use the callback,
65*355d6bb5Sswilcox  * otherwise it is assumed to be DATA (i.e., a directory) whose
66*355d6bb5Sswilcox  * contents need to be scanned.
67*355d6bb5Sswilcox  *
68*355d6bb5Sswilcox  * Note that a directory inode can get passed in with a type of ADDR;
69*355d6bb5Sswilcox  * the type field is orthogonal to the IFMT value.  This is so that
70*355d6bb5Sswilcox  * the file aspects (no duplicate blocks, etc) of a directory can be
71*355d6bb5Sswilcox  * verified just like is done for any other file, or the actual
72*355d6bb5Sswilcox  * contents can be scanned so that connectivity and such can be
73*355d6bb5Sswilcox  * investigated.
74*355d6bb5Sswilcox  *
75*355d6bb5Sswilcox  * The traversal is controlled by flags in the return value of
76*355d6bb5Sswilcox  * dirscan() or the callback.  Five flags are defined, STOP, SKIP,
77*355d6bb5Sswilcox  * KEEPON, ALTERED, and FOUND.  Their semantics are:
78*355d6bb5Sswilcox  *
79*355d6bb5Sswilcox  *     STOP -    no further processing of this inode is desired/possible/
80*355d6bb5Sswilcox  *               feasible/etc.  This can mean that whatever the scan
81*355d6bb5Sswilcox  *               was searching for was found, or a serious
82*355d6bb5Sswilcox  *               inconsistency was encountered, or anything else
83*355d6bb5Sswilcox  *               appropriate.
84*355d6bb5Sswilcox  *
85*355d6bb5Sswilcox  *     SKIP -    something that made it impossible to continue was
86*355d6bb5Sswilcox  *               encountered, and the caller should go on to the next
87*355d6bb5Sswilcox  *               inode.  This is more for i/o failures than for
88*355d6bb5Sswilcox  *               logical inconsistencies.  Nothing actually looks for
89*355d6bb5Sswilcox  *               this.
90*355d6bb5Sswilcox  *
91*355d6bb5Sswilcox  *     KEEPON -  no more blocks of this inode need to be scanned, but
92*355d6bb5Sswilcox  *               nothing's wrong, so keep on going with the next
93*355d6bb5Sswilcox  *               inode.  It is similar to STOP, except that
94*355d6bb5Sswilcox  *               ckinode()'s caller will typically advance to the next
95*355d6bb5Sswilcox  *               inode for KEEPON, whereas it ceases scanning through
96*355d6bb5Sswilcox  *               the inodes completely for STOP.
97*355d6bb5Sswilcox  *
98*355d6bb5Sswilcox  *     ALTERED - a change was made to the inode.  If the caller sees
99*355d6bb5Sswilcox  *               this set, it should make sure to flush out the
100*355d6bb5Sswilcox  *               changes.  Note that any data blocks read in by the
101*355d6bb5Sswilcox  *               function need to be marked dirty by it directly;
102*355d6bb5Sswilcox  *               flushing of those will happen automatically later.
103*355d6bb5Sswilcox  *
104*355d6bb5Sswilcox  *     FOUND -   whatever was being searched for was located.
105*355d6bb5Sswilcox  *               Typically combined with STOP to avoid wasting time
106*355d6bb5Sswilcox  *               doing additional looking.
107*355d6bb5Sswilcox  *
108*355d6bb5Sswilcox  * During a traversal, some state needs to be carried around.  At the
109*355d6bb5Sswilcox  * least, the callback functions need to know what inode they're
110*355d6bb5Sswilcox  * working on, which logical block, and whether or not fixing problems
111*355d6bb5Sswilcox  * when they're encountered is desired.  Rather than try to guess what
112*355d6bb5Sswilcox  * else might be needed (and thus end up passing way more arguments
113*355d6bb5Sswilcox  * than is reasonable), all the possibilities have been bundled in
114*355d6bb5Sswilcox  * struct inodesc.  About half of the fields are specific to directory
115*355d6bb5Sswilcox  * traversals, and the rest are pretty much generic to any traversal.
116*355d6bb5Sswilcox  *
117*355d6bb5Sswilcox  * The general fields are:
118*355d6bb5Sswilcox  *
119*355d6bb5Sswilcox  *     id_fix        What to do when an error is found.  Generally, this
120*355d6bb5Sswilcox  *                   is set to DONTKNOW before a traversal.  If a
121*355d6bb5Sswilcox  *                   problem is encountered, it is changed to either FIX
122*355d6bb5Sswilcox  *                   or NOFIX by the dofix() query function.  If id_fix
123*355d6bb5Sswilcox  *                   has already been set to FIX when dofix() is called, then
124*355d6bb5Sswilcox  *                   it includes the ALTERED flag (see above) in its return
125*355d6bb5Sswilcox  *                   value; the net effect is that the inode's buffer
126*355d6bb5Sswilcox  *                   will get marked dirty and written to disk at some
127*355d6bb5Sswilcox  *                   point.  If id_fix is DONTKNOW, then dofix() will
128*355d6bb5Sswilcox  *                   query the user.  If it is NOFIX, then dofix()
129*355d6bb5Sswilcox  *                   essentially does nothing.  A few routines set NOFIX
130*355d6bb5Sswilcox  *                   as the initial value, as they are performing a best-
131*355d6bb5Sswilcox  *                   effort informational task, rather than an actual
132*355d6bb5Sswilcox  *                   repair operation.
133*355d6bb5Sswilcox  *
134*355d6bb5Sswilcox  *     id_func       This is the function that will be called for every
135*355d6bb5Sswilcox  *                   logical block in the file (assuming id_type is not
136*355d6bb5Sswilcox  *                   DATA).  The logical block may represent a hole, so
137*355d6bb5Sswilcox  *                   the callback needs to be prepared to handle that
138*355d6bb5Sswilcox  *                   case.  Its return value is a combination of the flags
139*355d6bb5Sswilcox  *                   described above (SKIP, ALTERED, etc).
140*355d6bb5Sswilcox  *
141*355d6bb5Sswilcox  *     id_number     The inode number whose block list or data is being
142*355d6bb5Sswilcox  *                   scanned.
143*355d6bb5Sswilcox  *
144*355d6bb5Sswilcox  *     id_parent     When id_type is DATA, this is the inode number for
145*355d6bb5Sswilcox  *                   the parent of id_number.  Otherwise, it is
146*355d6bb5Sswilcox  *                   available for use as an extra parameter or return
147*355d6bb5Sswilcox  *                   value between the callback and ckinode()'s caller.
148*355d6bb5Sswilcox  *                   Which, if either, of those is left completely up to
149*355d6bb5Sswilcox  *                   the two routines involved, so nothing can generally
150*355d6bb5Sswilcox  *                   be assumed about the id_parent value for non-DATA
151*355d6bb5Sswilcox  *                   traversals.
152*355d6bb5Sswilcox  *
153*355d6bb5Sswilcox  *     id_lbn        This is the current logical block (not fragment)
154*355d6bb5Sswilcox  *                   number being visited by the traversal.
155*355d6bb5Sswilcox  *
156*355d6bb5Sswilcox  *     id_blkno      This is the physical block corresponding to id_lbn.
157*355d6bb5Sswilcox  *
158*355d6bb5Sswilcox  *     id_numfrags   This defines how large a block is being processed in
159*355d6bb5Sswilcox  *                   this particular invocation of the callback.
160*355d6bb5Sswilcox  *                   Usually, it will be the same as sblock.fs_frag.
161*355d6bb5Sswilcox  *                   However, if a direct block is being processed and
162*355d6bb5Sswilcox  *                   it is less than a full filesystem block,
163*355d6bb5Sswilcox  *                   id_numfrags will indicate just how many fragments
164*355d6bb5Sswilcox  *                   (starting from id_lbn) are actually part of the
165*355d6bb5Sswilcox  *                   file.
166*355d6bb5Sswilcox  *
167*355d6bb5Sswilcox  *     id_truncto    The pass 4 callback is used in several places to
168*355d6bb5Sswilcox  *                   free the blocks of a file (the `FILE HAS PROBLEM
169*355d6bb5Sswilcox  *                   FOO; CLEAR?' scenario).  This has been generalized
170*355d6bb5Sswilcox  *                   to allow truncating a file to a particular length
171*355d6bb5Sswilcox  *                   rather than always completely discarding it.  If
172*355d6bb5Sswilcox  *                   id_truncto is -1, then the entire file is released,
173*355d6bb5Sswilcox  *                   otherwise it is logical block number to truncate
174*355d6bb5Sswilcox  *                   to.  This generalized interface was motivated by a
175*355d6bb5Sswilcox  *                   desire to be able to discard everything after a
176*355d6bb5Sswilcox  *                   hole in a directory, rather than the entire
177*355d6bb5Sswilcox  *                   directory.
178*355d6bb5Sswilcox  *
179*355d6bb5Sswilcox  *     id_type       Selects the type of traversal.  DATA for dirscan(),
180*355d6bb5Sswilcox  *                   ADDR or ACL for using the provided callback.
181*355d6bb5Sswilcox  *
182*355d6bb5Sswilcox  * There are several more fields used just for dirscan() traversals:
183*355d6bb5Sswilcox  *
184*355d6bb5Sswilcox  *     id_filesize   The number of bytes in the overall directory left to
185*355d6bb5Sswilcox  *                   process.
186*355d6bb5Sswilcox  *
187*355d6bb5Sswilcox  *     id_loc        Byte position within the directory block.  Should always
188*355d6bb5Sswilcox  *                   point to the start of a directory entry.
189*355d6bb5Sswilcox  *
190*355d6bb5Sswilcox  *     id_entryno    Which logical directory entry is being processed (0
191*355d6bb5Sswilcox  *                   is `.', 1 is `..', 2 and on are normal entries).
192*355d6bb5Sswilcox  *                   This field is primarily used to enable special
193*355d6bb5Sswilcox  *                   checks when looking at the first two entries.
194*355d6bb5Sswilcox  *
195*355d6bb5Sswilcox  *                   The exception (there's always an exception in fsck)
196*355d6bb5Sswilcox  *                   is that in pass 1, it tracks how many fragments are
197*355d6bb5Sswilcox  *                   being used by a particular inode.
198*355d6bb5Sswilcox  *
199*355d6bb5Sswilcox  *     id_firsthole  The first logical block number that was found to
200*355d6bb5Sswilcox  *                   be zero.  As directories are not supposed to have
201*355d6bb5Sswilcox  *                   holes, this marks where a directory should be
202*355d6bb5Sswilcox  *                   truncated down to.  A value of -1 indicates that
203*355d6bb5Sswilcox  *                   no holes were found.
204*355d6bb5Sswilcox  *
205*355d6bb5Sswilcox  *     id_dirp       A pointer to the in-memory copy of the current
206*355d6bb5Sswilcox  *                   directory entry (as identified by id_loc).
207*355d6bb5Sswilcox  *
208*355d6bb5Sswilcox  *     id_name       This is a directory entry name to either create
209*355d6bb5Sswilcox  *                   (callback is mkentry) or locate (callback is
210*355d6bb5Sswilcox  *                   chgino, findino, or findname).
211*355d6bb5Sswilcox  */
212*355d6bb5Sswilcox int
213*355d6bb5Sswilcox ckinode(struct dinode *dp, struct inodesc *idesc, enum cki_action action)
214*355d6bb5Sswilcox {
215*355d6bb5Sswilcox 	struct inodesc cleardesc;
216*355d6bb5Sswilcox 	mode_t	mode;
217*355d6bb5Sswilcox 
218*355d6bb5Sswilcox 	if (idesc->id_filesize == 0)
219*355d6bb5Sswilcox 		idesc->id_filesize = (offset_t)dp->di_size;
2207c478bd9Sstevel@tonic-gate 
221*355d6bb5Sswilcox 	/*
222*355d6bb5Sswilcox 	 * Our caller should be filtering out completely-free inodes
223*355d6bb5Sswilcox 	 * (mode == zero), so we'll work on the assumption that what
224*355d6bb5Sswilcox 	 * we're given has some basic validity.
225*355d6bb5Sswilcox 	 *
226*355d6bb5Sswilcox 	 * The kernel is inconsistent about MAXPATHLEN including the
227*355d6bb5Sswilcox 	 * trailing \0, so allow the more-generous length for symlinks.
228*355d6bb5Sswilcox 	 */
229*355d6bb5Sswilcox 	mode = dp->di_mode & IFMT;
230*355d6bb5Sswilcox 	if (mode == IFBLK || mode == IFCHR)
231*355d6bb5Sswilcox 		return (KEEPON);
232*355d6bb5Sswilcox 	if (mode == IFLNK && dp->di_size > MAXPATHLEN) {
233*355d6bb5Sswilcox 		pwarn("I=%d  Symlink longer than supported maximum",
234*355d6bb5Sswilcox 		    idesc->id_number);
235*355d6bb5Sswilcox 		init_inodesc(&cleardesc);
236*355d6bb5Sswilcox 		cleardesc.id_type = ADDR;
237*355d6bb5Sswilcox 		cleardesc.id_number = idesc->id_number;
238*355d6bb5Sswilcox 		cleardesc.id_fix = DONTKNOW;
239*355d6bb5Sswilcox 		clri(&cleardesc, "BAD", CLRI_VERBOSE, CLRI_NOP_CORRUPT);
240*355d6bb5Sswilcox 		return (STOP);
241*355d6bb5Sswilcox 	}
242*355d6bb5Sswilcox 	return (ckinode_common(dp, idesc, action));
243*355d6bb5Sswilcox }
2447c478bd9Sstevel@tonic-gate 
245*355d6bb5Sswilcox /*
246*355d6bb5Sswilcox  * This was split out from ckinode() to allow it to be used
247*355d6bb5Sswilcox  * without having to pass in kludge flags to suppress the
248*355d6bb5Sswilcox  * wrong-for-deletion initialization and irrelevant checks.
249*355d6bb5Sswilcox  * This feature is no longer needed, but is being kept in case
250*355d6bb5Sswilcox  * the need comes back.
251*355d6bb5Sswilcox  */
252*355d6bb5Sswilcox static int
253*355d6bb5Sswilcox ckinode_common(struct dinode *dp, struct inodesc *idesc,
254*355d6bb5Sswilcox 	enum cki_action action)
2557c478bd9Sstevel@tonic-gate {
256*355d6bb5Sswilcox 	offset_t offset;
2577c478bd9Sstevel@tonic-gate 	struct dinode dino;
258*355d6bb5Sswilcox 	daddr_t ndb;
259*355d6bb5Sswilcox 	int indir_data_blks, last_indir_blk;
260*355d6bb5Sswilcox 	int ret, i, frags;
2617c478bd9Sstevel@tonic-gate 
262*355d6bb5Sswilcox 	(void) memmove(&dino, dp, sizeof (struct dinode));
2637c478bd9Sstevel@tonic-gate 	ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
264*355d6bb5Sswilcox 
265*355d6bb5Sswilcox 	for (i = 0; i < NDADDR; i++) {
266*355d6bb5Sswilcox 		idesc->id_lbn++;
267*355d6bb5Sswilcox 		offset = blkoff(&sblock, dino.di_size);
268*355d6bb5Sswilcox 		if ((--ndb == 0) && (offset != 0)) {
2697c478bd9Sstevel@tonic-gate 			idesc->id_numfrags =
2707c478bd9Sstevel@tonic-gate 			    numfrags(&sblock, fragroundup(&sblock, offset));
271*355d6bb5Sswilcox 		} else {
2727c478bd9Sstevel@tonic-gate 			idesc->id_numfrags = sblock.fs_frag;
273*355d6bb5Sswilcox 		}
274*355d6bb5Sswilcox 		if (dino.di_db[i] == 0) {
275*355d6bb5Sswilcox 			if ((ndb > 0) && (idesc->id_firsthole < 0)) {
276*355d6bb5Sswilcox 				idesc->id_firsthole = i;
277*355d6bb5Sswilcox 			}
2787c478bd9Sstevel@tonic-gate 			continue;
279*355d6bb5Sswilcox 		}
280*355d6bb5Sswilcox 		idesc->id_blkno = dino.di_db[i];
2817c478bd9Sstevel@tonic-gate 		if (idesc->id_type == ADDR || idesc->id_type == ACL)
2827c478bd9Sstevel@tonic-gate 			ret = (*idesc->id_func)(idesc);
2837c478bd9Sstevel@tonic-gate 		else
2847c478bd9Sstevel@tonic-gate 			ret = dirscan(idesc);
285*355d6bb5Sswilcox 
286*355d6bb5Sswilcox 		/*
287*355d6bb5Sswilcox 		 * Need to clear the entry, now that we're done with
288*355d6bb5Sswilcox 		 * it.  We depend on freeblk() ignoring a request to
289*355d6bb5Sswilcox 		 * free already-free fragments to handle the problem of
290*355d6bb5Sswilcox 		 * a partial block.
291*355d6bb5Sswilcox 		 */
292*355d6bb5Sswilcox 		if ((action == CKI_TRUNCATE) &&
293*355d6bb5Sswilcox 		    (idesc->id_truncto >= 0) &&
294*355d6bb5Sswilcox 		    (idesc->id_lbn >= idesc->id_truncto)) {
295*355d6bb5Sswilcox 			dp = ginode(idesc->id_number);
296*355d6bb5Sswilcox 			/*
297*355d6bb5Sswilcox 			 * The (int) cast is safe, in that if di_size won't
298*355d6bb5Sswilcox 			 * fit, it'll be a multiple of any legal fs_frag,
299*355d6bb5Sswilcox 			 * thus giving a zero result.  That value, in turn
300*355d6bb5Sswilcox 			 * means we're doing an entire block.
301*355d6bb5Sswilcox 			 */
302*355d6bb5Sswilcox 			frags = howmany((int)dp->di_size, sblock.fs_fsize) %
303*355d6bb5Sswilcox 			    sblock.fs_frag;
304*355d6bb5Sswilcox 			if (frags == 0)
305*355d6bb5Sswilcox 				frags = sblock.fs_frag;
306*355d6bb5Sswilcox 			freeblk(idesc->id_number, dp->di_db[i],
307*355d6bb5Sswilcox 			    frags);
308*355d6bb5Sswilcox 			dp = ginode(idesc->id_number);
309*355d6bb5Sswilcox 			dp->di_db[i] = 0;
310*355d6bb5Sswilcox 			inodirty();
311*355d6bb5Sswilcox 			ret |= ALTERED;
312*355d6bb5Sswilcox 		}
313*355d6bb5Sswilcox 
3147c478bd9Sstevel@tonic-gate 		if (ret & STOP)
3157c478bd9Sstevel@tonic-gate 			return (ret);
3167c478bd9Sstevel@tonic-gate 	}
3177c478bd9Sstevel@tonic-gate 
318*355d6bb5Sswilcox #ifdef lint
3197c478bd9Sstevel@tonic-gate 	/*
320*355d6bb5Sswilcox 	 * Cure a lint complaint of ``possible use before set''.
321*355d6bb5Sswilcox 	 * Apparently it can't quite figure out the switch statement.
3227c478bd9Sstevel@tonic-gate 	 */
323*355d6bb5Sswilcox 	indir_data_blks = 0;
324*355d6bb5Sswilcox #endif
325*355d6bb5Sswilcox 	/*
326*355d6bb5Sswilcox 	 * indir_data_blks contains the number of data blocks in all
327*355d6bb5Sswilcox 	 * the previous levels for this iteration.  E.g., for the
328*355d6bb5Sswilcox 	 * single indirect case (i = 0, di_ib[i] != 0), NDADDR's worth
329*355d6bb5Sswilcox 	 * of blocks have already been covered by the direct blocks
330*355d6bb5Sswilcox 	 * (di_db[]).  At the triple indirect level (i = NIADDR - 1),
331*355d6bb5Sswilcox 	 * it is all of the number of data blocks that were covered
332*355d6bb5Sswilcox 	 * by the second indirect, single indirect, and direct block
333*355d6bb5Sswilcox 	 * levels.
334*355d6bb5Sswilcox 	 */
335*355d6bb5Sswilcox 	idesc->id_numfrags = sblock.fs_frag;
336*355d6bb5Sswilcox 	ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
337*355d6bb5Sswilcox 	for (i = 0; i < NIADDR; i++) {
338*355d6bb5Sswilcox 		(void) get_indir_offsets(i, ndb, &indir_data_blks,
339*355d6bb5Sswilcox 		    &last_indir_blk);
340*355d6bb5Sswilcox 		if (dino.di_ib[i] != 0) {
341*355d6bb5Sswilcox 			/*
342*355d6bb5Sswilcox 			 * We'll only clear di_ib[i] if the first entry (and
343*355d6bb5Sswilcox 			 * therefore all of them) is to be cleared, since we
344*355d6bb5Sswilcox 			 * only go through this code on the first entry of
345*355d6bb5Sswilcox 			 * each level of indirection.  The +1 is to account
346*355d6bb5Sswilcox 			 * for the fact that we don't modify id_lbn until
347*355d6bb5Sswilcox 			 * we actually start processing on a data block.
348*355d6bb5Sswilcox 			 */
349*355d6bb5Sswilcox 			idesc->id_blkno = dino.di_ib[i];
350*355d6bb5Sswilcox 			ret = iblock(idesc, i + 1,
3517c478bd9Sstevel@tonic-gate 			    (u_offset_t)howmany(dino.di_size,
352*355d6bb5Sswilcox 						(u_offset_t)sblock.fs_bsize) -
353*355d6bb5Sswilcox 						    indir_data_blks,
354*355d6bb5Sswilcox 						action);
355*355d6bb5Sswilcox 			if ((action == CKI_TRUNCATE) &&
356*355d6bb5Sswilcox 			    (idesc->id_truncto <= indir_data_blks) &&
357*355d6bb5Sswilcox 			    ((idesc->id_lbn + 1) >= indir_data_blks) &&
358*355d6bb5Sswilcox 			    ((idesc->id_lbn + 1) <= last_indir_blk)) {
359*355d6bb5Sswilcox 				dp = ginode(idesc->id_number);
360*355d6bb5Sswilcox 				if (dp->di_ib[i] != 0) {
361*355d6bb5Sswilcox 					freeblk(idesc->id_number, dp->di_ib[i],
362*355d6bb5Sswilcox 					    sblock.fs_frag);
363*355d6bb5Sswilcox 				}
364*355d6bb5Sswilcox 			}
3657c478bd9Sstevel@tonic-gate 			if (ret & STOP)
3667c478bd9Sstevel@tonic-gate 				return (ret);
3677c478bd9Sstevel@tonic-gate 		} else {
368*355d6bb5Sswilcox 			/*
369*355d6bb5Sswilcox 			 * Need to know which of the file's logical blocks
370*355d6bb5Sswilcox 			 * reside in the missing indirect block.  However, the
371*355d6bb5Sswilcox 			 * precise location is only needed for truncating
372*355d6bb5Sswilcox 			 * directories, and level-of-indirection precision is
373*355d6bb5Sswilcox 			 * sufficient for that.
374*355d6bb5Sswilcox 			 */
375*355d6bb5Sswilcox 			if ((indir_data_blks < ndb) &&
376*355d6bb5Sswilcox 			    (idesc->id_firsthole < 0)) {
377*355d6bb5Sswilcox 				idesc->id_firsthole = indir_data_blks;
378*355d6bb5Sswilcox 			}
3797c478bd9Sstevel@tonic-gate 		}
3807c478bd9Sstevel@tonic-gate 	}
3817c478bd9Sstevel@tonic-gate 	return (KEEPON);
3827c478bd9Sstevel@tonic-gate }
3837c478bd9Sstevel@tonic-gate 
384*355d6bb5Sswilcox static int
385*355d6bb5Sswilcox get_indir_offsets(int ilevel_wanted, daddr_t ndb, int *data_blks,
386*355d6bb5Sswilcox 	int *last_blk)
387*355d6bb5Sswilcox {
388*355d6bb5Sswilcox 	int ndb_ilevel = -1;
3897c478bd9Sstevel@tonic-gate 	int ilevel;
390*355d6bb5Sswilcox 	int dblks, lblk;
391*355d6bb5Sswilcox 
392*355d6bb5Sswilcox 	for (ilevel = 0; ilevel < NIADDR; ilevel++) {
393*355d6bb5Sswilcox 		switch (ilevel) {
394*355d6bb5Sswilcox 		case 0:	/* SINGLE */
395*355d6bb5Sswilcox 			dblks = NDADDR;
396*355d6bb5Sswilcox 			lblk = dblks + NINDIR(&sblock) - 1;
397*355d6bb5Sswilcox 			break;
398*355d6bb5Sswilcox 		case 1:	/* DOUBLE */
399*355d6bb5Sswilcox 			dblks = NDADDR + NINDIR(&sblock);
400*355d6bb5Sswilcox 			lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock)) - 1;
401*355d6bb5Sswilcox 			break;
402*355d6bb5Sswilcox 		case 2:	/* TRIPLE */
403*355d6bb5Sswilcox 			dblks = NDADDR + NINDIR(&sblock) +
404*355d6bb5Sswilcox 			    (NINDIR(&sblock) * NINDIR(&sblock));
405*355d6bb5Sswilcox 			lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock) *
406*355d6bb5Sswilcox 			    NINDIR(&sblock)) - 1;
407*355d6bb5Sswilcox 			break;
408*355d6bb5Sswilcox 		default:
409*355d6bb5Sswilcox 			exitstat = EXERRFATAL;
410*355d6bb5Sswilcox 			/*
411*355d6bb5Sswilcox 			 * Translate from zero-based array to
412*355d6bb5Sswilcox 			 * one-based human-style counting.
413*355d6bb5Sswilcox 			 */
414*355d6bb5Sswilcox 			errexit("panic: indirection level %d not 1, 2, or 3",
415*355d6bb5Sswilcox 			    ilevel + 1);
416*355d6bb5Sswilcox 			/* NOTREACHED */
417*355d6bb5Sswilcox 		}
418*355d6bb5Sswilcox 
419*355d6bb5Sswilcox 		if (dblks < ndb && ndb <= lblk)
420*355d6bb5Sswilcox 			ndb_ilevel = ilevel;
421*355d6bb5Sswilcox 
422*355d6bb5Sswilcox 		if (ilevel == ilevel_wanted) {
423*355d6bb5Sswilcox 			if (data_blks != NULL)
424*355d6bb5Sswilcox 				*data_blks = dblks;
425*355d6bb5Sswilcox 			if (last_blk != NULL)
426*355d6bb5Sswilcox 				*last_blk = lblk;
427*355d6bb5Sswilcox 		}
428*355d6bb5Sswilcox 	}
429*355d6bb5Sswilcox 
430*355d6bb5Sswilcox 	return (ndb_ilevel);
431*355d6bb5Sswilcox }
432*355d6bb5Sswilcox 
433*355d6bb5Sswilcox static int
434*355d6bb5Sswilcox iblock(struct inodesc *idesc, int ilevel, u_offset_t iblks,
435*355d6bb5Sswilcox 	enum cki_action action)
4367c478bd9Sstevel@tonic-gate {
4377c478bd9Sstevel@tonic-gate 	struct bufarea *bp;
438*355d6bb5Sswilcox 	int i, n;
439*355d6bb5Sswilcox 	int (*func)(struct inodesc *) = NULL;
440*355d6bb5Sswilcox 	u_offset_t fsbperindirb;
441*355d6bb5Sswilcox 	daddr32_t last_lbn;
442*355d6bb5Sswilcox 	int nif;
4437c478bd9Sstevel@tonic-gate 	char buf[BUFSIZ];
4447c478bd9Sstevel@tonic-gate 
445*355d6bb5Sswilcox 	n = KEEPON;
446*355d6bb5Sswilcox 
447*355d6bb5Sswilcox 	switch (idesc->id_type) {
448*355d6bb5Sswilcox 	case ADDR:
4497c478bd9Sstevel@tonic-gate 		func = idesc->id_func;
4507c478bd9Sstevel@tonic-gate 		if (((n = (*func)(idesc)) & KEEPON) == 0)
451*355d6bb5Sswilcox 				return (n);
452*355d6bb5Sswilcox 		break;
453*355d6bb5Sswilcox 	case ACL:
4547c478bd9Sstevel@tonic-gate 		func = idesc->id_func;
455*355d6bb5Sswilcox 		break;
456*355d6bb5Sswilcox 	case DATA:
4577c478bd9Sstevel@tonic-gate 		func = dirscan;
458*355d6bb5Sswilcox 		break;
459*355d6bb5Sswilcox 	default:
460*355d6bb5Sswilcox 		errexit("unknown inodesc type %d in iblock()", idesc->id_type);
461*355d6bb5Sswilcox 		/* NOTREACHED */
4627c478bd9Sstevel@tonic-gate 	}
463*355d6bb5Sswilcox 	if (chkrange(idesc->id_blkno, idesc->id_numfrags)) {
464*355d6bb5Sswilcox 		return ((idesc->id_type == ACL) ? STOP : SKIP);
465*355d6bb5Sswilcox 	}
466*355d6bb5Sswilcox 
467*355d6bb5Sswilcox 	bp = getdatablk(idesc->id_blkno, (size_t)sblock.fs_bsize);
468*355d6bb5Sswilcox 	if (bp->b_errs != 0) {
469*355d6bb5Sswilcox 		brelse(bp);
470*355d6bb5Sswilcox 		return (SKIP);
471*355d6bb5Sswilcox 	}
472*355d6bb5Sswilcox 
4737c478bd9Sstevel@tonic-gate 	ilevel--;
474*355d6bb5Sswilcox 	/*
475*355d6bb5Sswilcox 	 * Trivia note: the BSD fsck has the number of bytes remaining
476*355d6bb5Sswilcox 	 * as the third argument to iblock(), so the equivalent of
477*355d6bb5Sswilcox 	 * fsbperindirb starts at fs_bsize instead of one.  We're
478*355d6bb5Sswilcox 	 * working in units of filesystem blocks here, not bytes or
479*355d6bb5Sswilcox 	 * fragments.
480*355d6bb5Sswilcox 	 */
4817c478bd9Sstevel@tonic-gate 	for (fsbperindirb = 1, i = 0; i < ilevel; i++) {
4827c478bd9Sstevel@tonic-gate 		fsbperindirb *= (u_offset_t)NINDIR(&sblock);
4837c478bd9Sstevel@tonic-gate 	}
4847c478bd9Sstevel@tonic-gate 	/*
4857c478bd9Sstevel@tonic-gate 	 * nif indicates the next "free" pointer (as an array index) in this
4867c478bd9Sstevel@tonic-gate 	 * indirect block, based on counting the blocks remaining in the
4877c478bd9Sstevel@tonic-gate 	 * file after subtracting all previously processed blocks.
4887c478bd9Sstevel@tonic-gate 	 * This figure is based on the size field of the inode.
4897c478bd9Sstevel@tonic-gate 	 *
490*355d6bb5Sswilcox 	 * Note that in normal operation, nif may initially be calculated
491*355d6bb5Sswilcox 	 * as larger than the number of pointers in this block (as when
492*355d6bb5Sswilcox 	 * there are more indirect blocks following); if that is
4937c478bd9Sstevel@tonic-gate 	 * the case, nif is limited to the max number of pointers per
4947c478bd9Sstevel@tonic-gate 	 * indirect block.
4957c478bd9Sstevel@tonic-gate 	 *
496*355d6bb5Sswilcox 	 * Also note that if an inode is inconsistent (has more blocks
4977c478bd9Sstevel@tonic-gate 	 * allocated to it than the size field would indicate), the sweep
4987c478bd9Sstevel@tonic-gate 	 * through any indirect blocks directly pointed at by the inode
4997c478bd9Sstevel@tonic-gate 	 * continues. Since the block offset of any data blocks referenced
5007c478bd9Sstevel@tonic-gate 	 * by these indirect blocks is greater than the size of the file,
5017c478bd9Sstevel@tonic-gate 	 * the index nif may be computed as a negative value.
5027c478bd9Sstevel@tonic-gate 	 * In this case, we reset nif to indicate that all pointers in
5037c478bd9Sstevel@tonic-gate 	 * this retrieval block should be zeroed and the resulting
504*355d6bb5Sswilcox 	 * unreferenced data and/or retrieval blocks will be recovered
5057c478bd9Sstevel@tonic-gate 	 * through garbage collection later.
5067c478bd9Sstevel@tonic-gate 	 */
5077c478bd9Sstevel@tonic-gate 	nif = (offset_t)howmany(iblks, fsbperindirb);
5087c478bd9Sstevel@tonic-gate 	if (nif > NINDIR(&sblock))
5097c478bd9Sstevel@tonic-gate 		nif = NINDIR(&sblock);
5107c478bd9Sstevel@tonic-gate 	else if (nif < 0)
5117c478bd9Sstevel@tonic-gate 		nif = 0;
5127c478bd9Sstevel@tonic-gate 	/*
5137c478bd9Sstevel@tonic-gate 	 * first pass: all "free" retrieval pointers (from [nif] thru
5147c478bd9Sstevel@tonic-gate 	 * 	the end of the indirect block) should be zero. (This
5157c478bd9Sstevel@tonic-gate 	 *	assertion does not hold for directories, which may be
5167c478bd9Sstevel@tonic-gate 	 *	truncated without releasing their allocated space)
5177c478bd9Sstevel@tonic-gate 	 */
518*355d6bb5Sswilcox 	if (nif < NINDIR(&sblock) && (idesc->id_func == pass1check ||
519*355d6bb5Sswilcox 	    idesc->id_func == pass3bcheck)) {
520*355d6bb5Sswilcox 		for (i = nif; i < NINDIR(&sblock); i++) {
521*355d6bb5Sswilcox 			if (bp->b_un.b_indir[i] == 0)
5227c478bd9Sstevel@tonic-gate 				continue;
523*355d6bb5Sswilcox 			(void) sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
524*355d6bb5Sswilcox 			    (ulong_t)idesc->id_number);
525*355d6bb5Sswilcox 			if (preen) {
526*355d6bb5Sswilcox 				pfatal(buf);
527*355d6bb5Sswilcox 			} else if (dofix(idesc, buf)) {
528*355d6bb5Sswilcox 				freeblk(idesc->id_number,
529*355d6bb5Sswilcox 				    bp->b_un.b_indir[i],
530*355d6bb5Sswilcox 				    sblock.fs_frag);
531*355d6bb5Sswilcox 				bp->b_un.b_indir[i] = 0;
5327c478bd9Sstevel@tonic-gate 				dirty(bp);
5337c478bd9Sstevel@tonic-gate 			}
5347c478bd9Sstevel@tonic-gate 		}
5357c478bd9Sstevel@tonic-gate 		flush(fswritefd, bp);
5367c478bd9Sstevel@tonic-gate 	}
5377c478bd9Sstevel@tonic-gate 	/*
538*355d6bb5Sswilcox 	 * second pass: all retrieval pointers referring to blocks within
5397c478bd9Sstevel@tonic-gate 	 *	a valid range [0..filesize] (both indirect and data blocks)
540*355d6bb5Sswilcox 	 *	are examined in the same manner as ckinode() checks the
541*355d6bb5Sswilcox 	 *	direct blocks in the inode.  Sweep through from
5427c478bd9Sstevel@tonic-gate 	 *	the first pointer in this retrieval block to [nif-1].
5437c478bd9Sstevel@tonic-gate 	 */
544*355d6bb5Sswilcox 	last_lbn = howmany(idesc->id_filesize, sblock.fs_bsize);
545*355d6bb5Sswilcox 	for (i = 0; i < nif; i++) {
546*355d6bb5Sswilcox 		if (ilevel == 0)
547*355d6bb5Sswilcox 			idesc->id_lbn++;
548*355d6bb5Sswilcox 		if (bp->b_un.b_indir[i] != 0) {
549*355d6bb5Sswilcox 			idesc->id_blkno = bp->b_un.b_indir[i];
5507c478bd9Sstevel@tonic-gate 			if (ilevel > 0) {
551*355d6bb5Sswilcox 				n = iblock(idesc, ilevel, iblks, action);
5527c478bd9Sstevel@tonic-gate 				/*
553*355d6bb5Sswilcox 				 * Each iteration decreases "remaining block
554*355d6bb5Sswilcox 				 * count" by the number of blocks accessible
5557c478bd9Sstevel@tonic-gate 				 * by a pointer at this indirect block level.
5567c478bd9Sstevel@tonic-gate 				 */
5577c478bd9Sstevel@tonic-gate 				iblks -= fsbperindirb;
5587c478bd9Sstevel@tonic-gate 			} else {
559*355d6bb5Sswilcox 				/*
560*355d6bb5Sswilcox 				 * If we're truncating, func will discard
561*355d6bb5Sswilcox 				 * the data block for us.
562*355d6bb5Sswilcox 				 */
5637c478bd9Sstevel@tonic-gate 				n = (*func)(idesc);
5647c478bd9Sstevel@tonic-gate 			}
565*355d6bb5Sswilcox 
566*355d6bb5Sswilcox 			if ((action == CKI_TRUNCATE) &&
567*355d6bb5Sswilcox 			    (idesc->id_truncto >= 0) &&
568*355d6bb5Sswilcox 			    (idesc->id_lbn >= idesc->id_truncto)) {
569*355d6bb5Sswilcox 				freeblk(idesc->id_number,  bp->b_un.b_indir[i],
570*355d6bb5Sswilcox 				    sblock.fs_frag);
571*355d6bb5Sswilcox 			}
572*355d6bb5Sswilcox 
573*355d6bb5Sswilcox 			/*
574*355d6bb5Sswilcox 			 * Note that truncation never gets STOP back
575*355d6bb5Sswilcox 			 * under normal circumstances.  Abnormal would
576*355d6bb5Sswilcox 			 * be a bad acl short-circuit in iblock() or
577*355d6bb5Sswilcox 			 * an out-of-range failure in pass4check().
578*355d6bb5Sswilcox 			 * We still want to keep going when truncating
579*355d6bb5Sswilcox 			 * under those circumstances, since the whole
580*355d6bb5Sswilcox 			 * point of truncating is to get rid of all
581*355d6bb5Sswilcox 			 * that.
582*355d6bb5Sswilcox 			 */
583*355d6bb5Sswilcox 			if ((n & STOP) && (action != CKI_TRUNCATE)) {
5847c478bd9Sstevel@tonic-gate 				brelse(bp);
5857c478bd9Sstevel@tonic-gate 				return (n);
5867c478bd9Sstevel@tonic-gate 			}
5877c478bd9Sstevel@tonic-gate 		} else {
588*355d6bb5Sswilcox 			if ((idesc->id_lbn < last_lbn) &&
589*355d6bb5Sswilcox 			    (idesc->id_firsthole < 0)) {
590*355d6bb5Sswilcox 				idesc->id_firsthole = idesc->id_lbn;
591*355d6bb5Sswilcox 			}
592*355d6bb5Sswilcox 			if (idesc->id_type == DATA) {
593*355d6bb5Sswilcox 				/*
594*355d6bb5Sswilcox 				 * No point in continuing in the indirect
595*355d6bb5Sswilcox 				 * blocks of a directory, since they'll just
596*355d6bb5Sswilcox 				 * get freed anyway.
597*355d6bb5Sswilcox 				 */
598*355d6bb5Sswilcox 				brelse(bp);
599*355d6bb5Sswilcox 				return ((n & ~KEEPON) | STOP);
600*355d6bb5Sswilcox 			}
6017c478bd9Sstevel@tonic-gate 		}
6027c478bd9Sstevel@tonic-gate 	}
603*355d6bb5Sswilcox 
6047c478bd9Sstevel@tonic-gate 	brelse(bp);
6057c478bd9Sstevel@tonic-gate 	return (KEEPON);
6067c478bd9Sstevel@tonic-gate }
6077c478bd9Sstevel@tonic-gate 
6087c478bd9Sstevel@tonic-gate /*
6097c478bd9Sstevel@tonic-gate  * Check that a block is a legal block number.
6107c478bd9Sstevel@tonic-gate  * Return 0 if in range, 1 if out of range.
6117c478bd9Sstevel@tonic-gate  */
612*355d6bb5Sswilcox int
613*355d6bb5Sswilcox chkrange(daddr32_t blk, int cnt)
6147c478bd9Sstevel@tonic-gate {
6157c478bd9Sstevel@tonic-gate 	int c;
6167c478bd9Sstevel@tonic-gate 
617*355d6bb5Sswilcox 	if (cnt <= 0 || blk <= 0 || ((unsigned)blk >= (unsigned)maxfsblock) ||
618*355d6bb5Sswilcox 	    ((cnt - 1) > (maxfsblock - blk))) {
619*355d6bb5Sswilcox 		if (debug)
620*355d6bb5Sswilcox 			(void) printf(
621*355d6bb5Sswilcox 			    "Bad fragment range: should be 1 <= %d..%d < %d\n",
622*355d6bb5Sswilcox 			    blk, blk + cnt, maxfsblock);
6237c478bd9Sstevel@tonic-gate 		return (1);
624*355d6bb5Sswilcox 	}
625*355d6bb5Sswilcox 	if ((cnt > sblock.fs_frag) ||
626*355d6bb5Sswilcox 	    ((fragnum(&sblock, blk) + cnt) > sblock.fs_frag)) {
627*355d6bb5Sswilcox 		if (debug)
628*355d6bb5Sswilcox 			(void) printf("Bad fragment size: size %d\n", cnt);
629*355d6bb5Sswilcox 		return (1);
630*355d6bb5Sswilcox 	}
6317c478bd9Sstevel@tonic-gate 	c = dtog(&sblock, blk);
6327c478bd9Sstevel@tonic-gate 	if (blk < cgdmin(&sblock, c)) {
6337c478bd9Sstevel@tonic-gate 		if ((unsigned)(blk + cnt) > (unsigned)cgsblock(&sblock, c)) {
634*355d6bb5Sswilcox 			if (debug)
635*355d6bb5Sswilcox 				(void) printf(
636*355d6bb5Sswilcox 	    "Bad fragment position: %d..%d spans start of cg metadata\n",
637*355d6bb5Sswilcox 				    blk, blk + cnt);
6387c478bd9Sstevel@tonic-gate 			return (1);
6397c478bd9Sstevel@tonic-gate 		}
6407c478bd9Sstevel@tonic-gate 	} else {
6417c478bd9Sstevel@tonic-gate 		if ((unsigned)(blk + cnt) > (unsigned)cgbase(&sblock, c+1)) {
642*355d6bb5Sswilcox 			if (debug)
643*355d6bb5Sswilcox 				(void) printf(
644*355d6bb5Sswilcox 				    "Bad frag pos: %d..%d crosses end of cg\n",
645*355d6bb5Sswilcox 				    blk, blk + cnt);
6467c478bd9Sstevel@tonic-gate 			return (1);
6477c478bd9Sstevel@tonic-gate 		}
6487c478bd9Sstevel@tonic-gate 	}
6497c478bd9Sstevel@tonic-gate 	return (0);
6507c478bd9Sstevel@tonic-gate }
6517c478bd9Sstevel@tonic-gate 
6527c478bd9Sstevel@tonic-gate /*
6537c478bd9Sstevel@tonic-gate  * General purpose interface for reading inodes.
6547c478bd9Sstevel@tonic-gate  */
655*355d6bb5Sswilcox 
656*355d6bb5Sswilcox /*
657*355d6bb5Sswilcox  * Note that any call to ginode() can potentially invalidate any
658*355d6bb5Sswilcox  * dinode pointers previously acquired from it.  To avoid pain,
659*355d6bb5Sswilcox  * make sure to always call inodirty() immediately after modifying
660*355d6bb5Sswilcox  * an inode, if there's any chance of ginode() being called after
661*355d6bb5Sswilcox  * that.  Also, always call ginode() right before you need to access
662*355d6bb5Sswilcox  * an inode, so that there won't be any surprises from functions
663*355d6bb5Sswilcox  * called between the previous ginode() invocation and the dinode
664*355d6bb5Sswilcox  * use.
665*355d6bb5Sswilcox  *
666*355d6bb5Sswilcox  * Despite all that, we aren't doing the amount of i/o that's implied,
667*355d6bb5Sswilcox  * as we use the buffer cache that getdatablk() and friends maintain.
668*355d6bb5Sswilcox  */
669*355d6bb5Sswilcox static fsck_ino_t startinum = -1;
670*355d6bb5Sswilcox 
6717c478bd9Sstevel@tonic-gate struct dinode *
672*355d6bb5Sswilcox ginode(fsck_ino_t inum)
6737c478bd9Sstevel@tonic-gate {
6747c478bd9Sstevel@tonic-gate 	daddr32_t iblk;
6757c478bd9Sstevel@tonic-gate 	struct dinode *dp;
6767c478bd9Sstevel@tonic-gate 
677*355d6bb5Sswilcox 	if (inum < UFSROOTINO || inum > maxino) {
678*355d6bb5Sswilcox 		errexit("bad inode number %d to ginode\n", inum);
679*355d6bb5Sswilcox 	}
680*355d6bb5Sswilcox 	if (startinum == -1 ||
681*355d6bb5Sswilcox 	    pbp == NULL ||
682*355d6bb5Sswilcox 	    inum < startinum ||
683*355d6bb5Sswilcox 	    inum >= (fsck_ino_t)(startinum + (fsck_ino_t)INOPB(&sblock))) {
684*355d6bb5Sswilcox 		iblk = itod(&sblock, inum);
685*355d6bb5Sswilcox 		if (pbp != NULL) {
6867c478bd9Sstevel@tonic-gate 			brelse(pbp);
6877c478bd9Sstevel@tonic-gate 		}
688*355d6bb5Sswilcox 		/*
689*355d6bb5Sswilcox 		 * We don't check for errors here, because we can't
690*355d6bb5Sswilcox 		 * tell our caller about it, and the zeros that will
691*355d6bb5Sswilcox 		 * be in the buffer are just as good as anything we
692*355d6bb5Sswilcox 		 * could fake.
693*355d6bb5Sswilcox 		 */
694*355d6bb5Sswilcox 		pbp = getdatablk(iblk, (size_t)sblock.fs_bsize);
6957c478bd9Sstevel@tonic-gate 		startinum =
696*355d6bb5Sswilcox 		    (fsck_ino_t)((inum / INOPB(&sblock)) * INOPB(&sblock));
6977c478bd9Sstevel@tonic-gate 	}
698*355d6bb5Sswilcox 	dp = &pbp->b_un.b_dinode[inum % INOPB(&sblock)];
699*355d6bb5Sswilcox 	if (dp->di_suid != UID_LONG)
700*355d6bb5Sswilcox 		dp->di_uid = dp->di_suid;
701*355d6bb5Sswilcox 	if (dp->di_sgid != GID_LONG)
702*355d6bb5Sswilcox 		dp->di_gid = dp->di_sgid;
7037c478bd9Sstevel@tonic-gate 	return (dp);
7047c478bd9Sstevel@tonic-gate }
7057c478bd9Sstevel@tonic-gate 
7067c478bd9Sstevel@tonic-gate /*
7077c478bd9Sstevel@tonic-gate  * Special purpose version of ginode used to optimize first pass
708*355d6bb5Sswilcox  * over all the inodes in numerical order.  It bypasses the buffer
709*355d6bb5Sswilcox  * system used by ginode(), etc in favour of reading the bulk of a
710*355d6bb5Sswilcox  * cg's inodes at one time.
7117c478bd9Sstevel@tonic-gate  */
712*355d6bb5Sswilcox static fsck_ino_t nextino, lastinum;
713*355d6bb5Sswilcox static int64_t readcnt, readpercg, fullcnt, inobufsize;
714*355d6bb5Sswilcox static int64_t partialcnt, partialsize;
715*355d6bb5Sswilcox static size_t lastsize;
716*355d6bb5Sswilcox static struct dinode *inodebuf;
717*355d6bb5Sswilcox static diskaddr_t currentdblk;
718*355d6bb5Sswilcox static struct dinode *currentinode;
7197c478bd9Sstevel@tonic-gate 
7207c478bd9Sstevel@tonic-gate struct dinode *
721*355d6bb5Sswilcox getnextinode(fsck_ino_t inum)
7227c478bd9Sstevel@tonic-gate {
723*355d6bb5Sswilcox 	size_t size;
7247c478bd9Sstevel@tonic-gate 	diskaddr_t dblk;
7257c478bd9Sstevel@tonic-gate 	static struct dinode *dp;
7267c478bd9Sstevel@tonic-gate 
727*355d6bb5Sswilcox 	if (inum != nextino++ || inum > maxino)
728*355d6bb5Sswilcox 		errexit("bad inode number %d to nextinode\n", inum);
729*355d6bb5Sswilcox 
730*355d6bb5Sswilcox 	/*
731*355d6bb5Sswilcox 	 * Will always go into the if() the first time we're called,
732*355d6bb5Sswilcox 	 * so dp will always be valid.
733*355d6bb5Sswilcox 	 */
734*355d6bb5Sswilcox 	if (inum >= lastinum) {
7357c478bd9Sstevel@tonic-gate 		readcnt++;
7367c478bd9Sstevel@tonic-gate 		dblk = fsbtodb(&sblock, itod(&sblock, lastinum));
737*355d6bb5Sswilcox 		currentdblk = dblk;
7387c478bd9Sstevel@tonic-gate 		if (readcnt % readpercg == 0) {
739*355d6bb5Sswilcox 			if (partialsize > SIZE_MAX)
740*355d6bb5Sswilcox 				errexit(
741*355d6bb5Sswilcox 				    "Internal error: partialsize overflow");
742*355d6bb5Sswilcox 			size = (size_t)partialsize;
7437c478bd9Sstevel@tonic-gate 			lastinum += partialcnt;
7447c478bd9Sstevel@tonic-gate 		} else {
745*355d6bb5Sswilcox 			if (inobufsize > SIZE_MAX)
746*355d6bb5Sswilcox 				errexit("Internal error: inobufsize overflow");
747*355d6bb5Sswilcox 			size = (size_t)inobufsize;
7487c478bd9Sstevel@tonic-gate 			lastinum += fullcnt;
7497c478bd9Sstevel@tonic-gate 		}
750*355d6bb5Sswilcox 		/*
751*355d6bb5Sswilcox 		 * If fsck_bread() returns an error, it will already have
752*355d6bb5Sswilcox 		 * zeroed out the buffer, so we do not need to do so here.
753*355d6bb5Sswilcox 		 */
754*355d6bb5Sswilcox 		(void) fsck_bread(fsreadfd, (caddr_t)inodebuf, dblk, size);
755*355d6bb5Sswilcox 		lastsize = size;
7567c478bd9Sstevel@tonic-gate 		dp = inodebuf;
7577c478bd9Sstevel@tonic-gate 	}
758*355d6bb5Sswilcox 	currentinode = dp;
7597c478bd9Sstevel@tonic-gate 	return (dp++);
7607c478bd9Sstevel@tonic-gate }
7617c478bd9Sstevel@tonic-gate 
762*355d6bb5Sswilcox /*
763*355d6bb5Sswilcox  * Reread the current getnext() buffer.  This allows for changing inodes
764*355d6bb5Sswilcox  * other than the current one via ginode()/inodirty()/inoflush().
765*355d6bb5Sswilcox  *
766*355d6bb5Sswilcox  * Just reuses all the interesting variables that getnextinode() set up
767*355d6bb5Sswilcox  * last time it was called.  This shouldn't get called often, so we don't
768*355d6bb5Sswilcox  * try to figure out if the caller's actually touched an inode in the
769*355d6bb5Sswilcox  * range we have cached.  There could have been an arbitrary number of
770*355d6bb5Sswilcox  * them, after all.
771*355d6bb5Sswilcox  */
772*355d6bb5Sswilcox struct dinode *
773*355d6bb5Sswilcox getnextrefresh(void)
7747c478bd9Sstevel@tonic-gate {
775*355d6bb5Sswilcox 	if (inodebuf == NULL) {
776*355d6bb5Sswilcox 		return (NULL);
777*355d6bb5Sswilcox 	}
778*355d6bb5Sswilcox 
779*355d6bb5Sswilcox 	inoflush();
780*355d6bb5Sswilcox 	(void) fsck_bread(fsreadfd, (caddr_t)inodebuf, currentdblk, lastsize);
781*355d6bb5Sswilcox 	return (currentinode);
782*355d6bb5Sswilcox }
7837c478bd9Sstevel@tonic-gate 
784*355d6bb5Sswilcox void
785*355d6bb5Sswilcox resetinodebuf(void)
786*355d6bb5Sswilcox {
7877c478bd9Sstevel@tonic-gate 	startinum = 0;
7887c478bd9Sstevel@tonic-gate 	nextino = 0;
7897c478bd9Sstevel@tonic-gate 	lastinum = 0;
7907c478bd9Sstevel@tonic-gate 	readcnt = 0;
7917c478bd9Sstevel@tonic-gate 	inobufsize = blkroundup(&sblock, INOBUFSIZE);
7927c478bd9Sstevel@tonic-gate 	fullcnt = inobufsize / sizeof (struct dinode);
7937c478bd9Sstevel@tonic-gate 	readpercg = sblock.fs_ipg / fullcnt;
7947c478bd9Sstevel@tonic-gate 	partialcnt = sblock.fs_ipg % fullcnt;
7957c478bd9Sstevel@tonic-gate 	partialsize = partialcnt * sizeof (struct dinode);
7967c478bd9Sstevel@tonic-gate 	if (partialcnt != 0) {
7977c478bd9Sstevel@tonic-gate 		readpercg++;
7987c478bd9Sstevel@tonic-gate 	} else {
7997c478bd9Sstevel@tonic-gate 		partialcnt = fullcnt;
8007c478bd9Sstevel@tonic-gate 		partialsize = inobufsize;
8017c478bd9Sstevel@tonic-gate 	}
8027c478bd9Sstevel@tonic-gate 	if (inodebuf == NULL &&
8037c478bd9Sstevel@tonic-gate 	    (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL)
8047c478bd9Sstevel@tonic-gate 		errexit("Cannot allocate space for inode buffer\n");
8057c478bd9Sstevel@tonic-gate 	while (nextino < UFSROOTINO)
8067c478bd9Sstevel@tonic-gate 		(void) getnextinode(nextino);
8077c478bd9Sstevel@tonic-gate }
8087c478bd9Sstevel@tonic-gate 
809*355d6bb5Sswilcox void
810*355d6bb5Sswilcox freeinodebuf(void)
8117c478bd9Sstevel@tonic-gate {
812*355d6bb5Sswilcox 	if (inodebuf != NULL) {
813*355d6bb5Sswilcox 		free((void *)inodebuf);
814*355d6bb5Sswilcox 	}
8157c478bd9Sstevel@tonic-gate 	inodebuf = NULL;
8167c478bd9Sstevel@tonic-gate }
8177c478bd9Sstevel@tonic-gate 
8187c478bd9Sstevel@tonic-gate /*
8197c478bd9Sstevel@tonic-gate  * Routines to maintain information about directory inodes.
8207c478bd9Sstevel@tonic-gate  * This is built during the first pass and used during the
8217c478bd9Sstevel@tonic-gate  * second and third passes.
8227c478bd9Sstevel@tonic-gate  *
8237c478bd9Sstevel@tonic-gate  * Enter inodes into the cache.
8247c478bd9Sstevel@tonic-gate  */
825*355d6bb5Sswilcox void
826*355d6bb5Sswilcox cacheino(struct dinode *dp, fsck_ino_t inum)
8277c478bd9Sstevel@tonic-gate {
8287c478bd9Sstevel@tonic-gate 	struct inoinfo *inp;
8297c478bd9Sstevel@tonic-gate 	struct inoinfo **inpp;
8307c478bd9Sstevel@tonic-gate 	uint_t blks;
8317c478bd9Sstevel@tonic-gate 
8327c478bd9Sstevel@tonic-gate 	blks = NDADDR + NIADDR;
8337c478bd9Sstevel@tonic-gate 	inp = (struct inoinfo *)
8347c478bd9Sstevel@tonic-gate 		malloc(sizeof (*inp) + (blks - 1) * sizeof (daddr32_t));
8357c478bd9Sstevel@tonic-gate 	if (inp == NULL)
836*355d6bb5Sswilcox 		errexit("Cannot increase directory list\n");
837*355d6bb5Sswilcox 	init_inoinfo(inp, dp, inum); /* doesn't touch i_nextlist or i_number */
838*355d6bb5Sswilcox 	inpp = &inphead[inum % numdirs];
839*355d6bb5Sswilcox 	inp->i_nextlist = *inpp;
8407c478bd9Sstevel@tonic-gate 	*inpp = inp;
841*355d6bb5Sswilcox 	inp->i_number = inum;
8427c478bd9Sstevel@tonic-gate 	if (inplast == listmax) {
8437c478bd9Sstevel@tonic-gate 		listmax += 100;
844*355d6bb5Sswilcox 		inpsort = (struct inoinfo **)realloc((void *)inpsort,
8457c478bd9Sstevel@tonic-gate 		    (unsigned)listmax * sizeof (struct inoinfo *));
8467c478bd9Sstevel@tonic-gate 		if (inpsort == NULL)
8477c478bd9Sstevel@tonic-gate 			errexit("cannot increase directory list");
8487c478bd9Sstevel@tonic-gate 	}
8497c478bd9Sstevel@tonic-gate 	inpsort[inplast++] = inp;
8507c478bd9Sstevel@tonic-gate }
8517c478bd9Sstevel@tonic-gate 
8527c478bd9Sstevel@tonic-gate /*
8537c478bd9Sstevel@tonic-gate  * Look up an inode cache structure.
8547c478bd9Sstevel@tonic-gate  */
8557c478bd9Sstevel@tonic-gate struct inoinfo *
856*355d6bb5Sswilcox getinoinfo(fsck_ino_t inum)
8577c478bd9Sstevel@tonic-gate {
8587c478bd9Sstevel@tonic-gate 	struct inoinfo *inp;
8597c478bd9Sstevel@tonic-gate 
860*355d6bb5Sswilcox 	inp = search_cache(inphead[inum % numdirs], inum);
861*355d6bb5Sswilcox 	return (inp);
8627c478bd9Sstevel@tonic-gate }
8637c478bd9Sstevel@tonic-gate 
8647c478bd9Sstevel@tonic-gate /*
8657c478bd9Sstevel@tonic-gate  * Determine whether inode is in cache.
8667c478bd9Sstevel@tonic-gate  */
867*355d6bb5Sswilcox int
868*355d6bb5Sswilcox inocached(fsck_ino_t inum)
8697c478bd9Sstevel@tonic-gate {
870*355d6bb5Sswilcox 	return (search_cache(inphead[inum % numdirs], inum) != NULL);
8717c478bd9Sstevel@tonic-gate }
8727c478bd9Sstevel@tonic-gate 
8737c478bd9Sstevel@tonic-gate /*
8747c478bd9Sstevel@tonic-gate  * Clean up all the inode cache structure.
8757c478bd9Sstevel@tonic-gate  */
876*355d6bb5Sswilcox void
877*355d6bb5Sswilcox inocleanup(void)
8787c478bd9Sstevel@tonic-gate {
8797c478bd9Sstevel@tonic-gate 	struct inoinfo **inpp;
8807c478bd9Sstevel@tonic-gate 
8817c478bd9Sstevel@tonic-gate 	if (inphead == NULL)
8827c478bd9Sstevel@tonic-gate 		return;
883*355d6bb5Sswilcox 	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
884*355d6bb5Sswilcox 		free((void *)(*inpp));
885*355d6bb5Sswilcox 	}
886*355d6bb5Sswilcox 	free((void *)inphead);
887*355d6bb5Sswilcox 	free((void *)inpsort);
8887c478bd9Sstevel@tonic-gate 	inphead = inpsort = NULL;
8897c478bd9Sstevel@tonic-gate }
8907c478bd9Sstevel@tonic-gate 
8917c478bd9Sstevel@tonic-gate /*
8927c478bd9Sstevel@tonic-gate  * Routines to maintain information about acl inodes.
8937c478bd9Sstevel@tonic-gate  * This is built during the first pass and used during the
8947c478bd9Sstevel@tonic-gate  * second and third passes.
8957c478bd9Sstevel@tonic-gate  *
8967c478bd9Sstevel@tonic-gate  * Enter acl inodes into the cache.
8977c478bd9Sstevel@tonic-gate  */
898*355d6bb5Sswilcox void
899*355d6bb5Sswilcox cacheacl(struct dinode *dp, fsck_ino_t inum)
9007c478bd9Sstevel@tonic-gate {
901*355d6bb5Sswilcox 	struct inoinfo *aclp;
902*355d6bb5Sswilcox 	struct inoinfo **aclpp;
9037c478bd9Sstevel@tonic-gate 	uint_t blks;
9047c478bd9Sstevel@tonic-gate 
9057c478bd9Sstevel@tonic-gate 	blks = NDADDR + NIADDR;
906*355d6bb5Sswilcox 	aclp = (struct inoinfo *)
9077c478bd9Sstevel@tonic-gate 		malloc(sizeof (*aclp) + (blks - 1) * sizeof (daddr32_t));
9087c478bd9Sstevel@tonic-gate 	if (aclp == NULL)
9097c478bd9Sstevel@tonic-gate 		return;
910*355d6bb5Sswilcox 	aclpp = &aclphead[inum % numacls];
911*355d6bb5Sswilcox 	aclp->i_nextlist = *aclpp;
9127c478bd9Sstevel@tonic-gate 	*aclpp = aclp;
913*355d6bb5Sswilcox 	aclp->i_number = inum;
9147c478bd9Sstevel@tonic-gate 	aclp->i_isize = (offset_t)dp->di_size;
915*355d6bb5Sswilcox 	aclp->i_blkssize = (size_t)(blks * sizeof (daddr32_t));
916*355d6bb5Sswilcox 	(void) memmove(&aclp->i_blks[0], &dp->di_db[0], aclp->i_blkssize);
9177c478bd9Sstevel@tonic-gate 	if (aclplast == aclmax) {
9187c478bd9Sstevel@tonic-gate 		aclmax += 100;
919*355d6bb5Sswilcox 		aclpsort = (struct inoinfo **)realloc((char *)aclpsort,
920*355d6bb5Sswilcox 		    (unsigned)aclmax * sizeof (struct inoinfo *));
9217c478bd9Sstevel@tonic-gate 		if (aclpsort == NULL)
9227c478bd9Sstevel@tonic-gate 			errexit("cannot increase acl list");
9237c478bd9Sstevel@tonic-gate 	}
9247c478bd9Sstevel@tonic-gate 	aclpsort[aclplast++] = aclp;
9257c478bd9Sstevel@tonic-gate }
9267c478bd9Sstevel@tonic-gate 
927*355d6bb5Sswilcox 
9287c478bd9Sstevel@tonic-gate /*
929*355d6bb5Sswilcox  * Generic cache search function.
930*355d6bb5Sswilcox  * ROOT is the first entry in a hash chain (the caller is expected
931*355d6bb5Sswilcox  * to have done the initial bucket lookup).  KEY is what's being
932*355d6bb5Sswilcox  * searched for.
933*355d6bb5Sswilcox  *
934*355d6bb5Sswilcox  * Returns a pointer to the entry if it is found, NULL otherwise.
9357c478bd9Sstevel@tonic-gate  */
936*355d6bb5Sswilcox static struct inoinfo *
937*355d6bb5Sswilcox search_cache(struct inoinfo *element, fsck_ino_t key)
9387c478bd9Sstevel@tonic-gate {
939*355d6bb5Sswilcox 	while (element != NULL) {
940*355d6bb5Sswilcox 		if (element->i_number == key)
941*355d6bb5Sswilcox 			break;
942*355d6bb5Sswilcox 		element = element->i_nextlist;
9437c478bd9Sstevel@tonic-gate 	}
944*355d6bb5Sswilcox 
945*355d6bb5Sswilcox 	return (element);
9467c478bd9Sstevel@tonic-gate }
9477c478bd9Sstevel@tonic-gate 
948*355d6bb5Sswilcox void
949*355d6bb5Sswilcox inodirty(void)
9507c478bd9Sstevel@tonic-gate {
951*355d6bb5Sswilcox 	dirty(pbp);
9527c478bd9Sstevel@tonic-gate }
9537c478bd9Sstevel@tonic-gate 
954*355d6bb5Sswilcox static void
955*355d6bb5Sswilcox inoflush(void)
9567c478bd9Sstevel@tonic-gate {
957*355d6bb5Sswilcox 	if (pbp != NULL)
958*355d6bb5Sswilcox 		flush(fswritefd, pbp);
9597c478bd9Sstevel@tonic-gate }
9607c478bd9Sstevel@tonic-gate 
961*355d6bb5Sswilcox /*
962*355d6bb5Sswilcox  * Interactive wrapper for freeino(), for those times when we're
963*355d6bb5Sswilcox  * not sure if we should throw something away.
964*355d6bb5Sswilcox  */
965*355d6bb5Sswilcox void
966*355d6bb5Sswilcox clri(struct inodesc *idesc, char *type, int verbose, int corrupting)
9677c478bd9Sstevel@tonic-gate {
968*355d6bb5Sswilcox 	int need_parent;
9697c478bd9Sstevel@tonic-gate 	struct dinode *dp;
9707c478bd9Sstevel@tonic-gate 
971*355d6bb5Sswilcox 	if (statemap[idesc->id_number] == USTATE)
972*355d6bb5Sswilcox 		return;
973*355d6bb5Sswilcox 
9747c478bd9Sstevel@tonic-gate 	dp = ginode(idesc->id_number);
975*355d6bb5Sswilcox 	if (verbose == CLRI_VERBOSE) {
976*355d6bb5Sswilcox 		pwarn("%s %s", type, file_id(idesc->id_number, dp->di_mode));
9777c478bd9Sstevel@tonic-gate 		pinode(idesc->id_number);
9787c478bd9Sstevel@tonic-gate 	}
979*355d6bb5Sswilcox 	if (preen || (reply("CLEAR") == 1)) {
980*355d6bb5Sswilcox 		need_parent = (corrupting == CLRI_NOP_OK) ?
981*355d6bb5Sswilcox 			TI_NOPARENT : TI_PARENT;
982*355d6bb5Sswilcox 		freeino(idesc->id_number, need_parent);
9837c478bd9Sstevel@tonic-gate 		if (preen)
984*355d6bb5Sswilcox 			(void) printf(" (CLEARED)\n");
985*355d6bb5Sswilcox 		remove_orphan_dir(idesc->id_number);
986*355d6bb5Sswilcox 	} else if (corrupting == CLRI_NOP_CORRUPT) {
987*355d6bb5Sswilcox 		iscorrupt = 1;
9887c478bd9Sstevel@tonic-gate 	}
989*355d6bb5Sswilcox 	(void) printf("\n");
9907c478bd9Sstevel@tonic-gate }
9917c478bd9Sstevel@tonic-gate 
992*355d6bb5Sswilcox /*
993*355d6bb5Sswilcox  * Find the directory entry for the inode noted in id_parent (which is
994*355d6bb5Sswilcox  * not necessarily the parent of anything, we're just using a convenient
995*355d6bb5Sswilcox  * field.
996*355d6bb5Sswilcox  */
997*355d6bb5Sswilcox int
998*355d6bb5Sswilcox findname(struct inodesc *idesc)
9997c478bd9Sstevel@tonic-gate {
10007c478bd9Sstevel@tonic-gate 	struct direct *dirp = idesc->id_dirp;
10017c478bd9Sstevel@tonic-gate 
10027c478bd9Sstevel@tonic-gate 	if (dirp->d_ino != idesc->id_parent)
10037c478bd9Sstevel@tonic-gate 		return (KEEPON);
1004*355d6bb5Sswilcox 	(void) memmove(idesc->id_name, dirp->d_name,
10057c478bd9Sstevel@tonic-gate 	    MIN(dirp->d_namlen, MAXNAMLEN) + 1);
10067c478bd9Sstevel@tonic-gate 	return (STOP|FOUND);
10077c478bd9Sstevel@tonic-gate }
10087c478bd9Sstevel@tonic-gate 
1009*355d6bb5Sswilcox /*
1010*355d6bb5Sswilcox  * Find the inode number associated with the given name.
1011*355d6bb5Sswilcox  */
1012*355d6bb5Sswilcox int
1013*355d6bb5Sswilcox findino(struct inodesc *idesc)
10147c478bd9Sstevel@tonic-gate {
10157c478bd9Sstevel@tonic-gate 	struct direct *dirp = idesc->id_dirp;
10167c478bd9Sstevel@tonic-gate 
10177c478bd9Sstevel@tonic-gate 	if (dirp->d_ino == 0)
10187c478bd9Sstevel@tonic-gate 		return (KEEPON);
10197c478bd9Sstevel@tonic-gate 	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
10207c478bd9Sstevel@tonic-gate 	    dirp->d_ino >= UFSROOTINO && dirp->d_ino <= maxino) {
10217c478bd9Sstevel@tonic-gate 		idesc->id_parent = dirp->d_ino;
10227c478bd9Sstevel@tonic-gate 		return (STOP|FOUND);
10237c478bd9Sstevel@tonic-gate 	}
10247c478bd9Sstevel@tonic-gate 	return (KEEPON);
10257c478bd9Sstevel@tonic-gate }
10267c478bd9Sstevel@tonic-gate 
1027*355d6bb5Sswilcox int
1028*355d6bb5Sswilcox cleardirentry(fsck_ino_t parentdir, fsck_ino_t target)
10297c478bd9Sstevel@tonic-gate {
1030*355d6bb5Sswilcox 	struct inodesc idesc;
10317c478bd9Sstevel@tonic-gate 	struct dinode *dp;
10327c478bd9Sstevel@tonic-gate 
1033*355d6bb5Sswilcox 	dp = ginode(parentdir);
1034*355d6bb5Sswilcox 	init_inodesc(&idesc);
1035*355d6bb5Sswilcox 	idesc.id_func = clearanentry;
1036*355d6bb5Sswilcox 	idesc.id_parent = target;
1037*355d6bb5Sswilcox 	idesc.id_type = DATA;
1038*355d6bb5Sswilcox 	idesc.id_fix = NOFIX;
1039*355d6bb5Sswilcox 	return (ckinode(dp, &idesc, CKI_TRAVERSE));
1040*355d6bb5Sswilcox }
1041*355d6bb5Sswilcox 
1042*355d6bb5Sswilcox static int
1043*355d6bb5Sswilcox clearanentry(struct inodesc *idesc)
1044*355d6bb5Sswilcox {
1045*355d6bb5Sswilcox 	struct direct *dirp = idesc->id_dirp;
1046*355d6bb5Sswilcox 
1047*355d6bb5Sswilcox 	if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1048*355d6bb5Sswilcox 		idesc->id_entryno++;
1049*355d6bb5Sswilcox 		return (KEEPON);
1050*355d6bb5Sswilcox 	}
1051*355d6bb5Sswilcox 	dirp->d_ino = 0;
1052*355d6bb5Sswilcox 	return (STOP|FOUND|ALTERED);
1053*355d6bb5Sswilcox }
1054*355d6bb5Sswilcox 
1055*355d6bb5Sswilcox void
1056*355d6bb5Sswilcox pinode(fsck_ino_t ino)
1057*355d6bb5Sswilcox {
1058*355d6bb5Sswilcox 	struct dinode *dp;
1059*355d6bb5Sswilcox 
1060*355d6bb5Sswilcox 	(void) printf(" I=%lu ", (ulong_t)ino);
10617c478bd9Sstevel@tonic-gate 	if (ino < UFSROOTINO || ino > maxino)
10627c478bd9Sstevel@tonic-gate 		return;
10637c478bd9Sstevel@tonic-gate 	dp = ginode(ino);
1064*355d6bb5Sswilcox 	pdinode(dp);
1065*355d6bb5Sswilcox }
1066*355d6bb5Sswilcox 
1067*355d6bb5Sswilcox static void
1068*355d6bb5Sswilcox pdinode(struct dinode *dp)
1069*355d6bb5Sswilcox {
1070*355d6bb5Sswilcox 	char *p;
1071*355d6bb5Sswilcox 	struct passwd *pw;
1072*355d6bb5Sswilcox 	time_t t;
1073*355d6bb5Sswilcox 
1074*355d6bb5Sswilcox 	(void) printf(" OWNER=");
10757c478bd9Sstevel@tonic-gate 	if ((pw = getpwuid((int)dp->di_uid)) != 0)
1076*355d6bb5Sswilcox 		(void) printf("%s ", pw->pw_name);
10777c478bd9Sstevel@tonic-gate 	else
1078*355d6bb5Sswilcox 		(void) printf("%lu ", (ulong_t)dp->di_uid);
1079*355d6bb5Sswilcox 	(void) printf("MODE=%o\n", dp->di_mode);
10807c478bd9Sstevel@tonic-gate 	if (preen)
1081*355d6bb5Sswilcox 		(void) printf("%s: ", devname);
1082*355d6bb5Sswilcox 	(void) printf("SIZE=%lld ", (longlong_t)dp->di_size);
1083*355d6bb5Sswilcox 
1084*355d6bb5Sswilcox 	/* ctime() ignores LOCALE, so this is safe */
10857c478bd9Sstevel@tonic-gate 	t = (time_t)dp->di_mtime;
10867c478bd9Sstevel@tonic-gate 	p = ctime(&t);
1087*355d6bb5Sswilcox 	(void) printf("MTIME=%12.12s %4.4s ", p + 4, p + 20);
10887c478bd9Sstevel@tonic-gate }
10897c478bd9Sstevel@tonic-gate 
1090*355d6bb5Sswilcox void
1091*355d6bb5Sswilcox blkerror(fsck_ino_t ino, char *type, daddr32_t blk, daddr32_t lbn)
10927c478bd9Sstevel@tonic-gate {
1093*355d6bb5Sswilcox 	pfatal("FRAGMENT %d %s I=%u LFN %d", blk, type, ino, lbn);
1094*355d6bb5Sswilcox 	(void) printf("\n");
10957c478bd9Sstevel@tonic-gate 
1096*355d6bb5Sswilcox 	switch (statemap[ino] & ~INDELAYD) {
10977c478bd9Sstevel@tonic-gate 
10987c478bd9Sstevel@tonic-gate 	case FSTATE:
1099*355d6bb5Sswilcox 	case FZLINK:
11007c478bd9Sstevel@tonic-gate 		statemap[ino] = FCLEAR;
11017c478bd9Sstevel@tonic-gate 		return;
11027c478bd9Sstevel@tonic-gate 
1103*355d6bb5Sswilcox 	case DFOUND:
11047c478bd9Sstevel@tonic-gate 	case DSTATE:
1105*355d6bb5Sswilcox 	case DZLINK:
11067c478bd9Sstevel@tonic-gate 		statemap[ino] = DCLEAR;
1107*355d6bb5Sswilcox 		add_orphan_dir(ino);
11087c478bd9Sstevel@tonic-gate 		return;
11097c478bd9Sstevel@tonic-gate 
11107c478bd9Sstevel@tonic-gate 	case SSTATE:
11117c478bd9Sstevel@tonic-gate 		statemap[ino] = SCLEAR;
11127c478bd9Sstevel@tonic-gate 		return;
11137c478bd9Sstevel@tonic-gate 
11147c478bd9Sstevel@tonic-gate 	case FCLEAR:
11157c478bd9Sstevel@tonic-gate 	case DCLEAR:
11167c478bd9Sstevel@tonic-gate 	case SCLEAR:
11177c478bd9Sstevel@tonic-gate 		return;
11187c478bd9Sstevel@tonic-gate 
11197c478bd9Sstevel@tonic-gate 	default:
1120*355d6bb5Sswilcox 		errexit("BAD STATE 0x%x TO BLKERR\n", statemap[ino]);
11217c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
11227c478bd9Sstevel@tonic-gate 	}
11237c478bd9Sstevel@tonic-gate }
11247c478bd9Sstevel@tonic-gate 
11257c478bd9Sstevel@tonic-gate /*
11267c478bd9Sstevel@tonic-gate  * allocate an unused inode
11277c478bd9Sstevel@tonic-gate  */
1128*355d6bb5Sswilcox fsck_ino_t
1129*355d6bb5Sswilcox allocino(fsck_ino_t request, int type)
11307c478bd9Sstevel@tonic-gate {
1131*355d6bb5Sswilcox 	fsck_ino_t ino;
11327c478bd9Sstevel@tonic-gate 	struct dinode *dp;
1133*355d6bb5Sswilcox 	struct cg *cgp = &cgrp;
1134*355d6bb5Sswilcox 	int cg, cg_fatal;
11357c478bd9Sstevel@tonic-gate 	time_t t;
1136*355d6bb5Sswilcox 	caddr_t err;
1137*355d6bb5Sswilcox 
1138*355d6bb5Sswilcox 	if (debug && (request != 0) && (request != UFSROOTINO))
1139*355d6bb5Sswilcox 		errexit("assertion failed: allocino() asked for "
1140*355d6bb5Sswilcox 			"inode %d instead of 0 or %d",
1141*355d6bb5Sswilcox 			(int)request, (int)UFSROOTINO);
11427c478bd9Sstevel@tonic-gate 
1143*355d6bb5Sswilcox 	/*
1144*355d6bb5Sswilcox 	 * We know that we're only going to get requests for UFSROOTINO
1145*355d6bb5Sswilcox 	 * or 0.  If UFSROOTINO is wanted, then it better be available
1146*355d6bb5Sswilcox 	 * because our caller is trying to recreate the root directory.
1147*355d6bb5Sswilcox 	 * If we're asked for 0, then which one we return doesn't matter.
1148*355d6bb5Sswilcox 	 * We know that inodes 0 and 1 are never valid to return, so we
1149*355d6bb5Sswilcox 	 * the start at the lowest-legal inode number.
1150*355d6bb5Sswilcox 	 *
1151*355d6bb5Sswilcox 	 * If we got a request for UFSROOTINO, then request != 0, and
1152*355d6bb5Sswilcox 	 * this pair of conditionals is the only place that treats
1153*355d6bb5Sswilcox 	 * UFSROOTINO specially.
1154*355d6bb5Sswilcox 	 */
11557c478bd9Sstevel@tonic-gate 	if (request == 0)
11567c478bd9Sstevel@tonic-gate 		request = UFSROOTINO;
11577c478bd9Sstevel@tonic-gate 	else if (statemap[request] != USTATE)
11587c478bd9Sstevel@tonic-gate 		return (0);
1159*355d6bb5Sswilcox 
1160*355d6bb5Sswilcox 	/*
1161*355d6bb5Sswilcox 	 * Doesn't do wrapping, since we know we started at
1162*355d6bb5Sswilcox 	 * the smallest inode.
1163*355d6bb5Sswilcox 	 */
11647c478bd9Sstevel@tonic-gate 	for (ino = request; ino < maxino; ino++)
11657c478bd9Sstevel@tonic-gate 		if (statemap[ino] == USTATE)
11667c478bd9Sstevel@tonic-gate 			break;
11677c478bd9Sstevel@tonic-gate 	if (ino == maxino)
11687c478bd9Sstevel@tonic-gate 		return (0);
1169*355d6bb5Sswilcox 
1170*355d6bb5Sswilcox 	/*
1171*355d6bb5Sswilcox 	 * In pass5, we'll calculate the bitmaps and counts all again from
1172*355d6bb5Sswilcox 	 * scratch and do a comparison, but for that to work the cg has
1173*355d6bb5Sswilcox 	 * to know what in-memory changes we've made to it.  If we have
1174*355d6bb5Sswilcox 	 * trouble reading the cg, cg_sanity() should kick it out so
1175*355d6bb5Sswilcox 	 * we can skip explicit i/o error checking here.
1176*355d6bb5Sswilcox 	 */
1177*355d6bb5Sswilcox 	cg = itog(&sblock, ino);
1178*355d6bb5Sswilcox 	(void) getblk(&cgblk, cgtod(&sblock, cg), (size_t)sblock.fs_cgsize);
1179*355d6bb5Sswilcox 	err = cg_sanity(cgp, cg, &cg_fatal);
1180*355d6bb5Sswilcox 	if (err != NULL) {
1181*355d6bb5Sswilcox 		pfatal("CG %d: %s\n", cg, err);
1182*355d6bb5Sswilcox 		free((void *)err);
1183*355d6bb5Sswilcox 		if (cg_fatal)
1184*355d6bb5Sswilcox 			errexit(
1185*355d6bb5Sswilcox 	    "Irreparable cylinder group header problem.  Program terminated.");
1186*355d6bb5Sswilcox 		if (reply("REPAIR") == 0)
1187*355d6bb5Sswilcox 			errexit("Program terminated.");
1188*355d6bb5Sswilcox 		fix_cg(cgp, cg);
1189*355d6bb5Sswilcox 	}
1190*355d6bb5Sswilcox 	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1191*355d6bb5Sswilcox 	cgp->cg_cs.cs_nifree--;
1192*355d6bb5Sswilcox 	cgdirty();
1193*355d6bb5Sswilcox 
1194*355d6bb5Sswilcox 	if (lastino < ino)
1195*355d6bb5Sswilcox 		lastino = ino;
1196*355d6bb5Sswilcox 
1197*355d6bb5Sswilcox 	/*
1198*355d6bb5Sswilcox 	 * Don't currently support IFATTRDIR or any of the other
1199*355d6bb5Sswilcox 	 * types, as they aren't needed.
1200*355d6bb5Sswilcox 	 */
12017c478bd9Sstevel@tonic-gate 	switch (type & IFMT) {
12027c478bd9Sstevel@tonic-gate 	case IFDIR:
12037c478bd9Sstevel@tonic-gate 		statemap[ino] = DSTATE;
1204*355d6bb5Sswilcox 		cgp->cg_cs.cs_ndir++;
12057c478bd9Sstevel@tonic-gate 		break;
12067c478bd9Sstevel@tonic-gate 	case IFREG:
12077c478bd9Sstevel@tonic-gate 	case IFLNK:
12087c478bd9Sstevel@tonic-gate 		statemap[ino] = FSTATE;
12097c478bd9Sstevel@tonic-gate 		break;
12107c478bd9Sstevel@tonic-gate 	default:
1211*355d6bb5Sswilcox 		/*
1212*355d6bb5Sswilcox 		 * Pretend nothing ever happened.  This clears the
1213*355d6bb5Sswilcox 		 * dirty flag, among other things.
1214*355d6bb5Sswilcox 		 */
1215*355d6bb5Sswilcox 		initbarea(&cgblk);
1216*355d6bb5Sswilcox 		if (debug)
1217*355d6bb5Sswilcox 			(void) printf("allocino: unknown type 0%o\n",
1218*355d6bb5Sswilcox 			    type & IFMT);
12197c478bd9Sstevel@tonic-gate 		return (0);
12207c478bd9Sstevel@tonic-gate 	}
1221*355d6bb5Sswilcox 
1222*355d6bb5Sswilcox 	/*
1223*355d6bb5Sswilcox 	 * We're allocating what should be a completely-unused inode,
1224*355d6bb5Sswilcox 	 * so make sure we don't inherit anything from any previous
1225*355d6bb5Sswilcox 	 * incarnations.
1226*355d6bb5Sswilcox 	 */
12277c478bd9Sstevel@tonic-gate 	dp = ginode(ino);
1228*355d6bb5Sswilcox 	(void) memset((void *)dp, 0, sizeof (struct dinode));
12297c478bd9Sstevel@tonic-gate 	dp->di_db[0] = allocblk(1);
12307c478bd9Sstevel@tonic-gate 	if (dp->di_db[0] == 0) {
12317c478bd9Sstevel@tonic-gate 		statemap[ino] = USTATE;
12327c478bd9Sstevel@tonic-gate 		return (0);
12337c478bd9Sstevel@tonic-gate 	}
1234*355d6bb5Sswilcox 	dp->di_mode = (mode_t)type;
1235*355d6bb5Sswilcox 	(void) time(&t);
12367c478bd9Sstevel@tonic-gate 	dp->di_atime = (time32_t)t;
1237*355d6bb5Sswilcox 	dp->di_ctime = dp->di_atime;
1238*355d6bb5Sswilcox 	dp->di_mtime = dp->di_ctime;
12397c478bd9Sstevel@tonic-gate 	dp->di_size = (u_offset_t)sblock.fs_fsize;
12407c478bd9Sstevel@tonic-gate 	dp->di_blocks = btodb(sblock.fs_fsize);
12417c478bd9Sstevel@tonic-gate 	n_files++;
12427c478bd9Sstevel@tonic-gate 	inodirty();
12437c478bd9Sstevel@tonic-gate 	return (ino);
12447c478bd9Sstevel@tonic-gate }
12457c478bd9Sstevel@tonic-gate 
12467c478bd9Sstevel@tonic-gate /*
1247*355d6bb5Sswilcox  * Release some or all of the blocks of an inode.
1248*355d6bb5Sswilcox  * Only truncates down.  Assumes new_length is appropriately aligned
1249*355d6bb5Sswilcox  * to a block boundary (or a directory block boundary, if it's a
1250*355d6bb5Sswilcox  * directory).
1251*355d6bb5Sswilcox  *
1252*355d6bb5Sswilcox  * If this is a directory, discard all of its contents first, so
1253*355d6bb5Sswilcox  * we don't create a bunch of orphans that would need another fsck
1254*355d6bb5Sswilcox  * run to clean up.
1255*355d6bb5Sswilcox  *
1256*355d6bb5Sswilcox  * Even if truncating to zero length, the inode remains allocated.
12577c478bd9Sstevel@tonic-gate  */
1258*355d6bb5Sswilcox void
1259*355d6bb5Sswilcox truncino(fsck_ino_t ino, offset_t new_length, int update)
12607c478bd9Sstevel@tonic-gate {
12617c478bd9Sstevel@tonic-gate 	struct inodesc idesc;
1262*355d6bb5Sswilcox 	struct inoinfo *iip;
12637c478bd9Sstevel@tonic-gate 	struct dinode *dp;
1264*355d6bb5Sswilcox 	fsck_ino_t parent;
1265*355d6bb5Sswilcox 	mode_t mode;
1266*355d6bb5Sswilcox 	caddr_t message;
1267*355d6bb5Sswilcox 	int isdir;
1268*355d6bb5Sswilcox 	int ilevel, dblk;
1269*355d6bb5Sswilcox 
1270*355d6bb5Sswilcox 	dp = ginode(ino);
1271*355d6bb5Sswilcox 	mode = (dp->di_mode & IFMT);
1272*355d6bb5Sswilcox 	isdir = (mode == IFDIR) || (mode == IFATTRDIR);
1273*355d6bb5Sswilcox 
1274*355d6bb5Sswilcox 	if (isdir) {
1275*355d6bb5Sswilcox 		/*
1276*355d6bb5Sswilcox 		 * Go with the parent we found by chasing references,
1277*355d6bb5Sswilcox 		 * if we've gotten that far.  Otherwise, use what the
1278*355d6bb5Sswilcox 		 * directory itself claims.  If there's no ``..'' entry
1279*355d6bb5Sswilcox 		 * in it, give up trying to get the link counts right.
1280*355d6bb5Sswilcox 		 */
1281*355d6bb5Sswilcox 		if (update == TI_NOPARENT) {
1282*355d6bb5Sswilcox 			parent = -1;
1283*355d6bb5Sswilcox 		} else {
1284*355d6bb5Sswilcox 			iip = getinoinfo(ino);
1285*355d6bb5Sswilcox 			if (iip != NULL) {
1286*355d6bb5Sswilcox 				parent = iip->i_parent;
1287*355d6bb5Sswilcox 			} else {
1288*355d6bb5Sswilcox 				parent = lookup_dotdot_ino(ino);
1289*355d6bb5Sswilcox 				if (parent != 0) {
1290*355d6bb5Sswilcox 					/*
1291*355d6bb5Sswilcox 					 * Make sure that the claimed
1292*355d6bb5Sswilcox 					 * parent actually has a
1293*355d6bb5Sswilcox 					 * reference to us.
1294*355d6bb5Sswilcox 					 */
1295*355d6bb5Sswilcox 					dp = ginode(parent);
1296*355d6bb5Sswilcox 					idesc.id_name = lfname;
1297*355d6bb5Sswilcox 					idesc.id_type = DATA;
1298*355d6bb5Sswilcox 					idesc.id_func = findino;
1299*355d6bb5Sswilcox 					idesc.id_number = ino;
1300*355d6bb5Sswilcox 					idesc.id_fix = DONTKNOW;
1301*355d6bb5Sswilcox 					if ((ckinode(dp, &idesc,
1302*355d6bb5Sswilcox 					    CKI_TRAVERSE) & FOUND) == 0)
1303*355d6bb5Sswilcox 						parent = 0;
1304*355d6bb5Sswilcox 				}
1305*355d6bb5Sswilcox 			}
1306*355d6bb5Sswilcox 		}
1307*355d6bb5Sswilcox 
1308*355d6bb5Sswilcox 		mark_delayed_inodes(ino, numfrags(&sblock, new_length));
1309*355d6bb5Sswilcox 		if (parent > 0) {
1310*355d6bb5Sswilcox 			dp = ginode(parent);
1311*355d6bb5Sswilcox 			LINK_RANGE(message, dp->di_nlink, -1);
1312*355d6bb5Sswilcox 			if (message != NULL) {
1313*355d6bb5Sswilcox 				LINK_CLEAR(message, parent, dp->di_mode,
1314*355d6bb5Sswilcox 				    &idesc);
1315*355d6bb5Sswilcox 				if (statemap[parent] == USTATE)
1316*355d6bb5Sswilcox 					goto no_parent_update;
1317*355d6bb5Sswilcox 			}
1318*355d6bb5Sswilcox 			TRACK_LNCNTP(parent, lncntp[parent]--);
1319*355d6bb5Sswilcox 		} else if ((mode == IFDIR) && (parent == 0)) {
1320*355d6bb5Sswilcox 			/*
1321*355d6bb5Sswilcox 			 * Currently don't have a good way to
1322*355d6bb5Sswilcox 			 * handle this, so throw up our hands.
1323*355d6bb5Sswilcox 			 * However, we know that we can still
1324*355d6bb5Sswilcox 			 * do some good if we continue, so
1325*355d6bb5Sswilcox 			 * don't actually exit yet.
1326*355d6bb5Sswilcox 			 *
1327*355d6bb5Sswilcox 			 * We don't do it for attrdirs,
1328*355d6bb5Sswilcox 			 * because there aren't link counts
1329*355d6bb5Sswilcox 			 * between them and their parents.
1330*355d6bb5Sswilcox 			 */
1331*355d6bb5Sswilcox 			pwarn("Could not determine former parent of "
1332*355d6bb5Sswilcox 			    "inode %d, link counts are possibly\n"
1333*355d6bb5Sswilcox 			    "incorrect.  Please rerun fsck(1M) to "
1334*355d6bb5Sswilcox 			    "correct this.\n",
1335*355d6bb5Sswilcox 			    ino);
1336*355d6bb5Sswilcox 			iscorrupt = 1;
1337*355d6bb5Sswilcox 		}
1338*355d6bb5Sswilcox 		/*
1339*355d6bb5Sswilcox 		 * ...else if it's a directory with parent == -1, then
1340*355d6bb5Sswilcox 		 * we've not gotten far enough to know connectivity,
1341*355d6bb5Sswilcox 		 * and it'll get handled automatically later.
1342*355d6bb5Sswilcox 		 */
1343*355d6bb5Sswilcox 	}
13447c478bd9Sstevel@tonic-gate 
1345*355d6bb5Sswilcox no_parent_update:
1346*355d6bb5Sswilcox 	init_inodesc(&idesc);
13477c478bd9Sstevel@tonic-gate 	idesc.id_type = ADDR;
13487c478bd9Sstevel@tonic-gate 	idesc.id_func = pass4check;
13497c478bd9Sstevel@tonic-gate 	idesc.id_number = ino;
13507c478bd9Sstevel@tonic-gate 	idesc.id_fix = DONTKNOW;
1351*355d6bb5Sswilcox 	idesc.id_truncto = howmany(new_length, sblock.fs_bsize);
1352*355d6bb5Sswilcox 	dp = ginode(ino);
1353*355d6bb5Sswilcox 	if (ckinode(dp, &idesc, CKI_TRUNCATE) & ALTERED)
1354*355d6bb5Sswilcox 		inodirty();
1355*355d6bb5Sswilcox 
1356*355d6bb5Sswilcox 	/*
1357*355d6bb5Sswilcox 	 * This has to be done after ckinode(), so that all of
1358*355d6bb5Sswilcox 	 * the fragments get visited.  Note that we assume we're
1359*355d6bb5Sswilcox 	 * always truncating to a block boundary, rather than a
1360*355d6bb5Sswilcox 	 * fragment boundary.
1361*355d6bb5Sswilcox 	 */
1362*355d6bb5Sswilcox 	dp = ginode(ino);
1363*355d6bb5Sswilcox 	dp->di_size = new_length;
1364*355d6bb5Sswilcox 
1365*355d6bb5Sswilcox 	/*
1366*355d6bb5Sswilcox 	 * Clear now-obsolete pointers.
1367*355d6bb5Sswilcox 	 */
1368*355d6bb5Sswilcox 	for (dblk = idesc.id_truncto + 1; dblk < NDADDR; dblk++) {
1369*355d6bb5Sswilcox 		dp->di_db[dblk] = 0;
1370*355d6bb5Sswilcox 	}
1371*355d6bb5Sswilcox 
1372*355d6bb5Sswilcox 	ilevel = get_indir_offsets(-1, idesc.id_truncto, NULL, NULL);
1373*355d6bb5Sswilcox 	for (ilevel++; ilevel < NIADDR; ilevel++) {
1374*355d6bb5Sswilcox 		dp->di_ib[ilevel] = 0;
1375*355d6bb5Sswilcox 	}
1376*355d6bb5Sswilcox 
1377*355d6bb5Sswilcox 	inodirty();
1378*355d6bb5Sswilcox }
1379*355d6bb5Sswilcox 
1380*355d6bb5Sswilcox /*
1381*355d6bb5Sswilcox  * Release an inode's resources, then release the inode itself.
1382*355d6bb5Sswilcox  */
1383*355d6bb5Sswilcox void
1384*355d6bb5Sswilcox freeino(fsck_ino_t ino, int update_parent)
1385*355d6bb5Sswilcox {
1386*355d6bb5Sswilcox 	int cg;
1387*355d6bb5Sswilcox 	struct dinode *dp;
1388*355d6bb5Sswilcox 	struct cg *cgp;
1389*355d6bb5Sswilcox 
1390*355d6bb5Sswilcox 	n_files--;
1391*355d6bb5Sswilcox 	dp = ginode(ino);
1392*355d6bb5Sswilcox 	if (dp->di_size > (u_offset_t)MAXOFF_T) {
1393*355d6bb5Sswilcox 		largefile_count--;
1394*355d6bb5Sswilcox 	}
1395*355d6bb5Sswilcox 	truncino(ino, 0, update_parent);
1396*355d6bb5Sswilcox 
13977c478bd9Sstevel@tonic-gate 	dp = ginode(ino);
1398*355d6bb5Sswilcox 	if ((dp->di_mode & IFMT) == IFATTRDIR) {
1399*355d6bb5Sswilcox 		clearshadow(ino, &attrclientinfo);
1400*355d6bb5Sswilcox 		dp = ginode(ino);
1401*355d6bb5Sswilcox 	}
1402*355d6bb5Sswilcox 
14037c478bd9Sstevel@tonic-gate 	clearinode(dp);
14047c478bd9Sstevel@tonic-gate 	inodirty();
14057c478bd9Sstevel@tonic-gate 	statemap[ino] = USTATE;
1406*355d6bb5Sswilcox 
1407*355d6bb5Sswilcox 	/*
1408*355d6bb5Sswilcox 	 * Keep the disk in sync with us so that pass5 doesn't get
1409*355d6bb5Sswilcox 	 * upset about spurious inconsistencies.
1410*355d6bb5Sswilcox 	 */
1411*355d6bb5Sswilcox 	cg = itog(&sblock, ino);
1412*355d6bb5Sswilcox 	(void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cg),
1413*355d6bb5Sswilcox 	    (size_t)sblock.fs_cgsize);
1414*355d6bb5Sswilcox 	cgp = cgblk.b_un.b_cg;
1415*355d6bb5Sswilcox 	clrbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1416*355d6bb5Sswilcox 	cgp->cg_cs.cs_nifree += 1;
1417*355d6bb5Sswilcox 	cgdirty();
1418*355d6bb5Sswilcox 	sblock.fs_cstotal.cs_nifree += 1;
1419*355d6bb5Sswilcox 	sbdirty();
1420*355d6bb5Sswilcox }
1421*355d6bb5Sswilcox 
1422*355d6bb5Sswilcox void
1423*355d6bb5Sswilcox init_inoinfo(struct inoinfo *inp, struct dinode *dp, fsck_ino_t inum)
1424*355d6bb5Sswilcox {
1425*355d6bb5Sswilcox 	inp->i_parent = ((inum == UFSROOTINO) ? UFSROOTINO : (fsck_ino_t)0);
1426*355d6bb5Sswilcox 	inp->i_dotdot = (fsck_ino_t)0;
1427*355d6bb5Sswilcox 	inp->i_isize = (offset_t)dp->di_size;
1428*355d6bb5Sswilcox 	inp->i_blkssize = (NDADDR + NIADDR) * sizeof (daddr32_t);
1429*355d6bb5Sswilcox 	inp->i_extattr = dp->di_oeftflag;
1430*355d6bb5Sswilcox 	(void) memmove((void *)&inp->i_blks[0], (void *)&dp->di_db[0],
1431*355d6bb5Sswilcox 	    inp->i_blkssize);
1432*355d6bb5Sswilcox }
1433*355d6bb5Sswilcox 
1434*355d6bb5Sswilcox /*
1435*355d6bb5Sswilcox  * Return the inode number in the ".." entry of the provided
1436*355d6bb5Sswilcox  * directory inode.
1437*355d6bb5Sswilcox  */
1438*355d6bb5Sswilcox static int
1439*355d6bb5Sswilcox lookup_dotdot_ino(fsck_ino_t ino)
1440*355d6bb5Sswilcox {
1441*355d6bb5Sswilcox 	struct inodesc idesc;
1442*355d6bb5Sswilcox 
1443*355d6bb5Sswilcox 	init_inodesc(&idesc);
1444*355d6bb5Sswilcox 	idesc.id_type = DATA;
1445*355d6bb5Sswilcox 	idesc.id_func = findino;
1446*355d6bb5Sswilcox 	idesc.id_name = "..";
1447*355d6bb5Sswilcox 	idesc.id_number = ino;
1448*355d6bb5Sswilcox 	idesc.id_fix = NOFIX;
1449*355d6bb5Sswilcox 
1450*355d6bb5Sswilcox 	if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1451*355d6bb5Sswilcox 		return (idesc.id_parent);
1452*355d6bb5Sswilcox 	}
1453*355d6bb5Sswilcox 
1454*355d6bb5Sswilcox 	return (0);
1455*355d6bb5Sswilcox }
1456*355d6bb5Sswilcox 
1457*355d6bb5Sswilcox /*
1458*355d6bb5Sswilcox  * Convenience wrapper around ckinode(findino()).
1459*355d6bb5Sswilcox  */
1460*355d6bb5Sswilcox int
1461*355d6bb5Sswilcox lookup_named_ino(fsck_ino_t dir, caddr_t name)
1462*355d6bb5Sswilcox {
1463*355d6bb5Sswilcox 	struct inodesc idesc;
1464*355d6bb5Sswilcox 
1465*355d6bb5Sswilcox 	init_inodesc(&idesc);
1466*355d6bb5Sswilcox 	idesc.id_type = DATA;
1467*355d6bb5Sswilcox 	idesc.id_func = findino;
1468*355d6bb5Sswilcox 	idesc.id_name = name;
1469*355d6bb5Sswilcox 	idesc.id_number = dir;
1470*355d6bb5Sswilcox 	idesc.id_fix = NOFIX;
1471*355d6bb5Sswilcox 
1472*355d6bb5Sswilcox 	if ((ckinode(ginode(dir), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1473*355d6bb5Sswilcox 		return (idesc.id_parent);
1474*355d6bb5Sswilcox 	}
1475*355d6bb5Sswilcox 
1476*355d6bb5Sswilcox 	return (0);
1477*355d6bb5Sswilcox }
1478*355d6bb5Sswilcox 
1479*355d6bb5Sswilcox /*
1480*355d6bb5Sswilcox  * Marks inodes that are being orphaned and might need to be reconnected
1481*355d6bb5Sswilcox  * by pass4().  The inode we're traversing is the directory whose
1482*355d6bb5Sswilcox  * contents will be reconnected later.  id_parent is the lfn at which
1483*355d6bb5Sswilcox  * to start looking at said contents.
1484*355d6bb5Sswilcox  */
1485*355d6bb5Sswilcox static int
1486*355d6bb5Sswilcox mark_a_delayed_inode(struct inodesc *idesc)
1487*355d6bb5Sswilcox {
1488*355d6bb5Sswilcox 	struct direct *dirp = idesc->id_dirp;
1489*355d6bb5Sswilcox 
1490*355d6bb5Sswilcox 	if (idesc->id_lbn < idesc->id_parent) {
1491*355d6bb5Sswilcox 		return (KEEPON);
1492*355d6bb5Sswilcox 	}
1493*355d6bb5Sswilcox 
1494*355d6bb5Sswilcox 	if (dirp->d_ino != 0 &&
1495*355d6bb5Sswilcox 	    strcmp(dirp->d_name, ".") != 0 &&
1496*355d6bb5Sswilcox 	    strcmp(dirp->d_name, "..") != 0) {
1497*355d6bb5Sswilcox 		statemap[dirp->d_ino] &= ~INFOUND;
1498*355d6bb5Sswilcox 		statemap[dirp->d_ino] |= INDELAYD;
1499*355d6bb5Sswilcox 	}
1500*355d6bb5Sswilcox 
1501*355d6bb5Sswilcox 	return (KEEPON);
1502*355d6bb5Sswilcox }
1503*355d6bb5Sswilcox 
1504*355d6bb5Sswilcox static void
1505*355d6bb5Sswilcox mark_delayed_inodes(fsck_ino_t ino, daddr32_t first_lfn)
1506*355d6bb5Sswilcox {
1507*355d6bb5Sswilcox 	struct dinode *dp;
1508*355d6bb5Sswilcox 	struct inodesc idelayed;
1509*355d6bb5Sswilcox 
1510*355d6bb5Sswilcox 	init_inodesc(&idelayed);
1511*355d6bb5Sswilcox 	idelayed.id_number = ino;
1512*355d6bb5Sswilcox 	idelayed.id_type = DATA;
1513*355d6bb5Sswilcox 	idelayed.id_fix = NOFIX;
1514*355d6bb5Sswilcox 	idelayed.id_func = mark_a_delayed_inode;
1515*355d6bb5Sswilcox 	idelayed.id_parent = first_lfn;
1516*355d6bb5Sswilcox 	idelayed.id_entryno = 2;
1517*355d6bb5Sswilcox 
1518*355d6bb5Sswilcox 	dp = ginode(ino);
1519*355d6bb5Sswilcox 	(void) ckinode(dp, &idelayed, CKI_TRAVERSE);
1520*355d6bb5Sswilcox }
1521*355d6bb5Sswilcox 
1522*355d6bb5Sswilcox /*
1523*355d6bb5Sswilcox  * Clear the i_oeftflag/extended attribute pointer from INO.
1524*355d6bb5Sswilcox  */
1525*355d6bb5Sswilcox void
1526*355d6bb5Sswilcox clearattrref(fsck_ino_t ino)
1527*355d6bb5Sswilcox {
1528*355d6bb5Sswilcox 	struct dinode *dp;
1529*355d6bb5Sswilcox 
1530*355d6bb5Sswilcox 	dp = ginode(ino);
1531*355d6bb5Sswilcox 	if (debug) {
1532*355d6bb5Sswilcox 		if (dp->di_oeftflag == 0)
1533*355d6bb5Sswilcox 			(void) printf("clearattref: no attr to clear on %d\n",
1534*355d6bb5Sswilcox 			    ino);
1535*355d6bb5Sswilcox 	}
1536*355d6bb5Sswilcox 
1537*355d6bb5Sswilcox 	dp->di_oeftflag = 0;
1538*355d6bb5Sswilcox 	inodirty();
15397c478bd9Sstevel@tonic-gate }
1540