xref: /illumos-gate/usr/src/cmd/fs.d/ufs/fsck/pass5.c (revision 70e93bcc)
1 /*
2  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved  	*/
8 
9 /*
10  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
11  * All rights reserved.
12  *
13  * Redistribution and use in source and binary forms are permitted
14  * provided that: (1) source distributions retain this entire copyright
15  * notice and comment, and (2) distributions including binaries display
16  * the following acknowledgement:  ``This product includes software
17  * developed by the University of California, Berkeley and its contributors''
18  * in the documentation or other materials provided with the distribution
19  * and in all advertising materials mentioning features or use of this
20  * software. Neither the name of the University nor the names of its
21  * contributors may be used to endorse or promote products derived
22  * from this software without specific prior written permission.
23  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <string.h>
34 #include <sys/param.h>
35 #include <sys/mntent.h>
36 #include <sys/fs/ufs_fs.h>
37 #include <sys/vnode.h>
38 #include <sys/fs/ufs_inode.h>
39 #include "fsck.h"
40 
41 static int check_maps(uchar_t *, uchar_t *, int, int, char *, int, int);
42 
43 void
44 pass5(void)
45 {
46 	caddr_t err;
47 	int32_t c, blk, frags;
48 	size_t	basesize, sumsize, mapsize;
49 	int excessdirs;
50 	int inomapsize, blkmapsize;
51 	int update_csums, bad_csum_sb, bad_csum_cg, update_bitmaps;
52 	struct fs *fs = &sblock;
53 	struct cg *cg = &cgrp;
54 	diskaddr_t dbase, dmax;
55 	diskaddr_t d;
56 	uint64_t i, j;
57 	struct csum *cs;
58 	struct csum backup_cs;
59 	time_t now;
60 	struct csum cstotal;
61 	struct inodesc idesc;
62 	union {				/* keep lint happy about alignment */
63 		struct cg cg;		/* the rest of buf has the bitmaps */
64 		char buf[MAXBSIZE];
65 	} u;
66 	caddr_t buf = u.buf;
67 	struct cg *newcg = &u.cg;
68 
69 	(void) memset((void *)buf, 0, sizeof (u.buf));
70 	newcg->cg_niblk = fs->fs_ipg;
71 
72 	if (fs->fs_postblformat != FS_DYNAMICPOSTBLFMT) {
73 		pfatal("UNSUPPORTED ROTATIONAL TABLE FORMAT %d\n",
74 			fs->fs_postblformat);
75 		errexit("Program terminated.");
76 		/* NOTREACHED */
77 	}
78 
79 	/* LINTED this subtraction can't overflow and is int32-aligned */
80 	basesize = &newcg->cg_space[0] - (uchar_t *)newcg;
81 
82 	/*
83 	 * We reserve the space for the old rotation summary
84 	 * tables for the benefit of old kernels, but do not
85 	 * maintain them in modern kernels. In time, they could
86 	 * theoretically go away, if we wanted to deal with
87 	 * changing the on-disk format.
88 	 */
89 
90 	/*
91 	 * Note that we don't use any of the cg_*() macros until
92 	 * after cg_sanity() has approved of what we've got.
93 	 */
94 	newcg->cg_btotoff = basesize;
95 	newcg->cg_boff = newcg->cg_btotoff + fs->fs_cpg * sizeof (daddr32_t);
96 	newcg->cg_iusedoff = newcg->cg_boff +
97 		fs->fs_cpg * fs->fs_nrpos * sizeof (uint16_t);
98 	(void) memset(&newcg->cg_space[0], 0, newcg->cg_iusedoff - basesize);
99 
100 	inomapsize = howmany(fs->fs_ipg, NBBY);
101 	newcg->cg_freeoff = newcg->cg_iusedoff + inomapsize;
102 	blkmapsize = howmany(fs->fs_fpg, NBBY);
103 	newcg->cg_nextfreeoff = newcg->cg_freeoff + blkmapsize;
104 	newcg->cg_magic = CG_MAGIC;
105 
106 	sumsize = newcg->cg_iusedoff - newcg->cg_btotoff;
107 	mapsize = newcg->cg_nextfreeoff - newcg->cg_iusedoff;
108 
109 	init_inodesc(&idesc);
110 	idesc.id_type = ADDR;
111 	(void) memset((void *)&cstotal, 0, sizeof (struct csum));
112 	now = time(NULL);
113 
114 	/*
115 	 * If the last fragments in the file system don't make up a
116 	 * full file system block, mark the bits in the blockmap
117 	 * that correspond to those missing fragments as "allocated",
118 	 * so that the last block doesn't get counted as a free block
119 	 * and those missing fragments don't get counted as free frags.
120 	 */
121 	j = blknum(fs, (uint64_t)fs->fs_size + fs->fs_frag - 1);
122 	for (i = fs->fs_size; i < j; i++)
123 		setbmap(i);
124 
125 	/*
126 	 * The cg summaries are not always updated when using
127 	 * logging.  Since we're really concerned with getting a
128 	 * sane filesystem, rather than in trying to debug UFS
129 	 * corner cases, logically we would just always recompute
130 	 * them.  However, it is disconcerting to users to be asked
131 	 * about updating the summaries when, from their point of
132 	 * view, there's been no indication of a problem up to this
133 	 * point.  So, only do it if we find a discrepancy.
134 	 */
135 	update_csums = -1;
136 	update_bitmaps = 0;
137 	for (c = 0; c < fs->fs_ncg; c++) {
138 		backup_cs = cstotal;
139 
140 		/*
141 		 * cg_sanity() will catch i/o errors for us.
142 		 */
143 		(void) getblk(&cgblk, (diskaddr_t)cgtod(fs, c),
144 		    (size_t)fs->fs_cgsize);
145 		err = cg_sanity(cg, c);
146 		if (err != NULL) {
147 			pfatal("CG %d: %s\n", c, err);
148 			free((void *)err);
149 			if (reply("REPAIR") == 0)
150 				errexit("Program terminated.");
151 			fix_cg(cg, c);
152 		}
153 		/*
154 		 * If the on-disk timestamp is in the future, then it
155 		 * by definition is wrong.  Otherwise, if it's in
156 		 * the past, then use that value so that we don't
157 		 * declare a spurious mismatch.
158 		 */
159 		if (now > cg->cg_time)
160 			newcg->cg_time = cg->cg_time;
161 		else
162 			newcg->cg_time = now;
163 		newcg->cg_cgx = c;
164 		dbase = cgbase(fs, c);
165 		dmax = dbase + fs->fs_fpg;
166 		if (dmax > fs->fs_size)
167 			dmax = fs->fs_size;
168 		newcg->cg_ndblk = dmax - dbase;
169 		if (c == fs->fs_ncg - 1)
170 			newcg->cg_ncyl = fs->fs_ncyl - (fs->fs_cpg * c);
171 		else
172 			newcg->cg_ncyl = fs->fs_cpg;
173 		newcg->cg_niblk = sblock.fs_ipg;
174 		newcg->cg_cs.cs_ndir = 0;
175 		newcg->cg_cs.cs_nffree = 0;
176 		newcg->cg_cs.cs_nbfree = 0;
177 		newcg->cg_cs.cs_nifree = fs->fs_ipg;
178 		if ((cg->cg_rotor >= 0) && (cg->cg_rotor < newcg->cg_ndblk))
179 			newcg->cg_rotor = cg->cg_rotor;
180 		else
181 			newcg->cg_rotor = 0;
182 		if ((cg->cg_frotor >= 0) && (cg->cg_frotor < newcg->cg_ndblk))
183 			newcg->cg_frotor = cg->cg_frotor;
184 		else
185 			newcg->cg_frotor = 0;
186 		if ((cg->cg_irotor >= 0) && (cg->cg_irotor < newcg->cg_niblk))
187 			newcg->cg_irotor = cg->cg_irotor;
188 		else
189 			newcg->cg_irotor = 0;
190 		(void) memset((void *)&newcg->cg_frsum[0], 0,
191 		    sizeof (newcg->cg_frsum));
192 		(void) memset((void *)cg_inosused(newcg), 0, (size_t)mapsize);
193 		/* LINTED macro is int32-aligned per newcg->cg_btotoff above */
194 		(void) memset((void *)&cg_blktot(newcg)[0], 0,
195 		    sumsize + mapsize);
196 		j = fs->fs_ipg * c;
197 		for (i = 0; i < fs->fs_ipg; j++, i++) {
198 			switch (statemap[j] & ~(INORPHAN | INDELAYD)) {
199 
200 			case USTATE:
201 				break;
202 
203 			case DSTATE:
204 			case DCLEAR:
205 			case DFOUND:
206 			case DZLINK:
207 				newcg->cg_cs.cs_ndir++;
208 				/* FALLTHROUGH */
209 
210 			case FSTATE:
211 			case FCLEAR:
212 			case FZLINK:
213 			case SSTATE:
214 			case SCLEAR:
215 				newcg->cg_cs.cs_nifree--;
216 				setbit(cg_inosused(newcg), i);
217 				break;
218 
219 			default:
220 				if (j < UFSROOTINO)
221 					break;
222 				errexit("BAD STATE 0x%x FOR INODE I=%d",
223 				    statemap[j], (int)j);
224 			}
225 		}
226 		if (c == 0) {
227 			for (i = 0; i < UFSROOTINO; i++) {
228 				setbit(cg_inosused(newcg), i);
229 				newcg->cg_cs.cs_nifree--;
230 			}
231 		}
232 		/*
233 		 * Count up what fragments and blocks are free, and
234 		 * reflect the relevant section of blockmap[] into
235 		 * newcg's map.
236 		 */
237 		for (i = 0, d = dbase;
238 		    d < dmax;
239 		    d += fs->fs_frag, i += fs->fs_frag) {
240 			frags = 0;
241 			for (j = 0; j < fs->fs_frag; j++) {
242 				if (testbmap(d + j))
243 					continue;
244 				setbit(cg_blksfree(newcg), i + j);
245 				frags++;
246 			}
247 			if (frags == fs->fs_frag) {
248 				newcg->cg_cs.cs_nbfree++;
249 				j = cbtocylno(fs, i);
250 				/* LINTED macro is int32-aligned per above */
251 				cg_blktot(newcg)[j]++;
252 				/* LINTED cg_blks(newcg) is aligned */
253 				cg_blks(fs, newcg, j)[cbtorpos(fs, i)]++;
254 			} else if (frags > 0) {
255 				newcg->cg_cs.cs_nffree += frags;
256 				blk = blkmap(fs, cg_blksfree(newcg), i);
257 				fragacct(fs, blk, newcg->cg_frsum, 1);
258 			}
259 		}
260 		cstotal.cs_nffree += newcg->cg_cs.cs_nffree;
261 		cstotal.cs_nbfree += newcg->cg_cs.cs_nbfree;
262 		cstotal.cs_nifree += newcg->cg_cs.cs_nifree;
263 		cstotal.cs_ndir += newcg->cg_cs.cs_ndir;
264 
265 		/*
266 		 * Note that, just like the kernel, we dynamically
267 		 * allocated an array to hold the csums and stuffed
268 		 * the pointer into the in-core superblock's fs_u.fs_csp
269 		 * field.  This means that the fs_u field contains a
270 		 * random value when the disk version is examined, but
271 		 * fs_cs() gives us a valid pointer nonetheless.
272 		 * We need to compare the recalculated summaries to
273 		 * both the superblock version and the on disk version.
274 		 * If either is bad, copy the calculated version over
275 		 * the corrupt values.
276 		 */
277 
278 		cs = &fs->fs_cs(fs, c);
279 		bad_csum_sb = (memcmp((void *)cs, (void *)&newcg->cg_cs,
280 		    sizeof (*cs)) != 0);
281 
282 		bad_csum_cg = (memcmp((void *)&cg->cg_cs, (void *)&newcg->cg_cs,
283 		    sizeof (struct csum)) != 0);
284 
285 		/*
286 		 * Has the user told us what to do yet?  If not, find out.
287 		 */
288 		if ((bad_csum_sb || bad_csum_cg) && (update_csums == -1)) {
289 			if (preen) {
290 				update_csums = 1;
291 				(void) printf("CORRECTING BAD CG SUMMARIES"
292 					" FOR CG %d\n", c);
293 			} else if (update_csums == -1) {
294 				update_csums = (reply(
295 				    "CORRECT BAD CG SUMMARIES FOR CG %d",
296 				    c) == 1);
297 			}
298 		}
299 
300 		if (bad_csum_sb && (update_csums == 1)) {
301 			(void) memmove((void *)cs, (void *)&newcg->cg_cs,
302 			    sizeof (*cs));
303 			sbdirty();
304 			(void) printf("CORRECTED SUPERBLOCK SUMMARIES FOR"
305 				    " CG %d\n", c);
306 		}
307 
308 		if (bad_csum_cg && (update_csums == 1)) {
309 			(void) memmove((void *)cg, (void *)newcg,
310 			    (size_t)basesize);
311 			/* LINTED per cg_sanity() */
312 			(void) memmove((void *)&cg_blktot(cg)[0],
313 			    /* LINTED macro aligned as above */
314 			    (void *)&cg_blktot(newcg)[0], sumsize);
315 			cgdirty();
316 			(void) printf("CORRECTED SUMMARIES FOR CG %d\n", c);
317 		}
318 
319 		excessdirs = cg->cg_cs.cs_ndir - newcg->cg_cs.cs_ndir;
320 		if (excessdirs < 0) {
321 			pfatal("LOST %d DIRECTORIES IN CG %d\n",
322 			    -excessdirs, c);
323 			excessdirs = 0;
324 		}
325 		if (excessdirs > 0) {
326 			if (check_maps((uchar_t *)cg_inosused(newcg),
327 			    (uchar_t *)cg_inosused(cg), inomapsize,
328 			    cg->cg_cgx * fs->fs_ipg, "DIR", 0, excessdirs)) {
329 				if (!verbose)
330 					(void) printf("DIR BITMAP WRONG ");
331 				if (preen || update_bitmaps ||
332 				    reply("FIX") == 1) {
333 					(void) memmove((void *)cg_inosused(cg),
334 					    (void *)cg_inosused(newcg),
335 					    inomapsize);
336 					cgdirty();
337 					if (preen ||
338 					    (!verbose && update_bitmaps))
339 						(void) printf("(CORRECTED)\n");
340 					update_bitmaps = 1;
341 				}
342 			}
343 		}
344 
345 		if (check_maps((uchar_t *)cg_inosused(newcg),
346 		    (uchar_t *)cg_inosused(cg), inomapsize,
347 		    cg->cg_cgx * fs->fs_ipg, "FILE", excessdirs, fs->fs_ipg)) {
348 			if (!verbose)
349 				(void) printf("FILE BITMAP WRONG ");
350 			if (preen || update_bitmaps || reply("FIX") == 1) {
351 				(void) memmove((void *)cg_inosused(cg),
352 				    (void *)cg_inosused(newcg), inomapsize);
353 				cgdirty();
354 				if (preen ||
355 				    (!verbose && update_bitmaps))
356 					(void) printf("(CORRECTED)\n");
357 				update_bitmaps = 1;
358 			}
359 		}
360 
361 		if (check_maps((uchar_t *)cg_blksfree(cg),
362 		    (uchar_t *)cg_blksfree(newcg), blkmapsize,
363 		    cg->cg_cgx * fs->fs_fpg, "FRAG", 0, fs->fs_fpg)) {
364 			if (!verbose)
365 				(void) printf("FRAG BITMAP WRONG ");
366 			if (preen || update_bitmaps || reply("FIX") == 1) {
367 				(void) memmove((void *)cg_blksfree(cg),
368 				    (void *)cg_blksfree(newcg), blkmapsize);
369 				cgdirty();
370 				if (preen ||
371 				    (!verbose && update_bitmaps))
372 					(void) printf("(CORRECTED)\n");
373 				update_bitmaps = 1;
374 			}
375 		}
376 
377 		/*
378 		 * Fixing one set of problems often shows up more in the
379 		 * same cg.  Just to make sure, go back and check it
380 		 * again if we found something this time through.
381 		 */
382 		if (cgisdirty()) {
383 			cgflush();
384 			cstotal = backup_cs;
385 			c--;
386 		}
387 	}
388 
389 	if ((fflag || !(islog && islogok)) &&
390 	    (memcmp((void *)&cstotal, (void *)&fs->fs_cstotal,
391 	    sizeof (struct csum)) != 0)) {
392 		if (dofix(&idesc, "CORRECT GLOBAL SUMMARY")) {
393 			(void) memmove((void *)&fs->fs_cstotal,
394 			    (void *)&cstotal, sizeof (struct csum));
395 			fs->fs_ronly = 0;
396 			fs->fs_fmod = 0;
397 			sbdirty();
398 		} else {
399 			iscorrupt = 1;
400 		}
401 	}
402 }
403 
404 /*
405  * Compare two allocation bitmaps, reporting any discrepancies.
406  *
407  * If a mismatch is found, if the bit is set in map1, it's considered
408  * to be an indication that the corresponding resource is supposed
409  * to be free, but isn't.  Otherwise, it's considered marked as allocated
410  * but not found to be so.  In other words, if the two maps being compared
411  * use a set bit to indicate something is free, pass the on-disk map
412  * first.  Otherwise, pass the calculated map first.
413  */
414 static int
415 check_maps(
416 	uchar_t *map1,	/* map of claimed allocations */
417 	uchar_t *map2,	/* map of determined allocations */
418 	int mapsize,	/* size of above two maps */
419 	int startvalue,	/* resource value for first element in map */
420 	char *name,	/* name of resource found in maps */
421 	int skip,	/* number of entries to skip before starting to free */
422 	int limit)	/* limit on number of entries to free */
423 {
424 	long i, j, k, l, m, n, size;
425 	int astart, aend, ustart, uend;
426 	int mismatch;
427 
428 	mismatch = 0;
429 	astart = ustart = aend = uend = -1;
430 	for (i = 0; i < mapsize; i++) {
431 		j = *map1++;
432 		k = *map2++;
433 		if (j == k)
434 			continue;
435 		for (m = 0, l = 1; m < NBBY; m++, l <<= 1) {
436 			if ((j & l) == (k & l))
437 				continue;
438 			n = startvalue + i * NBBY + m;
439 			if ((j & l) != 0) {
440 				if (astart == -1) {
441 					astart = aend = n;
442 					continue;
443 				}
444 				if (aend + 1 == n) {
445 					aend = n;
446 					continue;
447 				}
448 				if (verbose) {
449 					if (astart == aend)
450 						pwarn(
451 			    "ALLOCATED %s %d WAS MARKED FREE ON DISK\n",
452 						    name, astart);
453 					else
454 						pwarn(
455 			    "ALLOCATED %sS %d-%d WERE MARKED FREE ON DISK\n",
456 						    name, astart, aend);
457 				}
458 				mismatch = 1;
459 				astart = aend = n;
460 			} else {
461 				if (ustart == -1) {
462 					ustart = uend = n;
463 					continue;
464 				}
465 				if (uend + 1 == n) {
466 					uend = n;
467 					continue;
468 				}
469 				size = uend - ustart + 1;
470 				if (size <= skip) {
471 					skip -= size;
472 					ustart = uend = n;
473 					continue;
474 				}
475 				if (skip > 0) {
476 					ustart += skip;
477 					size -= skip;
478 					skip = 0;
479 				}
480 				if (size > limit)
481 					size = limit;
482 				if (verbose) {
483 					if (size == 1)
484 						pwarn(
485 			    "UNALLOCATED %s %d WAS MARKED USED ON DISK\n",
486 						    name, ustart);
487 					else
488 						pwarn(
489 			    "UNALLOCATED %sS %d-%ld WERE MARKED USED ON DISK\n",
490 						    name, ustart,
491 						    ustart + size - 1);
492 				}
493 				mismatch = 1;
494 				limit -= size;
495 				if (limit <= 0)
496 					return (mismatch);
497 				ustart = uend = n;
498 			}
499 		}
500 	}
501 	if (astart != -1) {
502 		if (verbose) {
503 			if (astart == aend)
504 				pwarn(
505 			    "ALLOCATED %s %d WAS MARKED FREE ON DISK\n",
506 				    name, astart);
507 			else
508 				pwarn(
509 			    "ALLOCATED %sS %d-%d WERE MARKED FREE ON DISK\n",
510 				    name, astart, aend);
511 		}
512 		mismatch = 1;
513 	}
514 	if (ustart != -1) {
515 		size = uend - ustart + 1;
516 		if (size <= skip)
517 			return (mismatch);
518 		if (skip > 0) {
519 			ustart += skip;
520 			size -= skip;
521 		}
522 		if (size > limit)
523 			size = limit;
524 		if (verbose) {
525 			if (size == 1)
526 				pwarn(
527 			    "UNALLOCATED %s %d WAS MARKED USED ON DISK\n",
528 				    name, ustart);
529 			else
530 				pwarn(
531 		    "UNALLOCATED %sS %d-%ld WERE MARKED USED ON DISK\n",
532 				    name, ustart, ustart + size - 1);
533 		}
534 		mismatch = 1;
535 	}
536 	return (mismatch);
537 }
538