xref: /illumos-gate/usr/src/uts/common/fs/ufs/ufs_panic.c (revision 1d48f750)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/errno.h>
30 #include <sys/mode.h>
31 #include <sys/sysmacros.h>
32 #include <sys/cmn_err.h>
33 #include <sys/varargs.h>
34 #include <sys/time.h>
35 #include <sys/buf.h>
36 #include <sys/kmem.h>
37 #include <sys/t_lock.h>
38 #include <sys/poll.h>
39 #include <sys/debug.h>
40 #include <sys/cred.h>
41 #include <sys/lockfs.h>
42 #include <sys/fs/ufs_fs.h>
43 #include <sys/fs/ufs_inode.h>
44 #include <sys/fs/ufs_panic.h>
45 #include <sys/fs/ufs_lockfs.h>
46 #include <sys/fs/ufs_trans.h>
47 #include <sys/fs/ufs_mount.h>
48 #include <sys/fs/ufs_prot.h>
49 #include <sys/fs/ufs_bio.h>
50 #include <sys/pathname.h>
51 #include <sys/utsname.h>
52 #include <sys/conf.h>
53 
54 /* handy */
55 #define	abs(x)		((x) < 0? -(x): (x))
56 
57 #if defined(DEBUG)
58 
59 #define	DBGLVL_NONE	0x00000000
60 #define	DBGLVL_MAJOR	0x00000100
61 #define	DBGLVL_MINOR	0x00000200
62 #define	DBGLVL_MINUTE	0x00000400
63 #define	DBGLVL_TRIVIA	0x00000800
64 #define	DBGLVL_HIDEOUS	0x00001000
65 
66 #define	DBGFLG_NONE		0x00000000
67 #define	DBGFLG_NOPANIC		0x00000001
68 #define	DBGFLG_LVLONLY		0x00000002
69 #define	DBGFLG_FIXWOULDPANIC	0x00000004
70 
71 #define	DBGFLG_FLAGMASK		0x0000000F
72 #define	DBGFLG_LEVELMASK	~DBGFLG_FLAGMASK
73 
74 #define	DEBUG_FLAGS	(ufs_fix_failure_dbg & DBGFLG_FLAGMASK)
75 #define	DEBUG_LEVEL	(ufs_fix_failure_dbg & DBGFLG_LEVELMASK)
76 
77 unsigned int ufs_fix_failure_dbg =	DBGLVL_NONE | DBGFLG_NONE;
78 
79 #define	DCALL(dbg_level, call)						\
80 	{								\
81 		if (DEBUG_LEVEL != DBGLVL_NONE) {			\
82 			if (DEBUG_FLAGS & DBGFLG_LVLONLY) {		\
83 				if (DEBUG_LEVEL & dbg_level) {		\
84 					call;				\
85 				}					\
86 			} else {					\
87 				if (dbg_level <= DEBUG_LEVEL) {		\
88 					call;				\
89 				}					\
90 			}						\
91 		}							\
92 	}
93 
94 #define	DPRINTF(dbg_level, msg)		DCALL(dbg_level, printf msg)
95 
96 #define	MAJOR(msg)			DPRINTF(DBGLVL_MAJOR, msg)
97 #define	MINOR(msg)			DPRINTF(DBGLVL_MINOR, msg)
98 #define	MINUTE(msg)			DPRINTF(DBGLVL_MINUTE, msg)
99 #define	TRIVIA(msg)			DPRINTF(DBGLVL_TRIVIA, msg)
100 #define	HIDEOUS(msg)			DPRINTF(DBGLVL_HIDEOUS, msg)
101 
102 #else	/* !DEBUG */
103 
104 #define	DCALL(ignored_dbg_level, ignored_routine)
105 #define	MAJOR(ignored)
106 #define	MINOR(ignored)
107 #define	MINUTE(ignored)
108 #define	TRIVIA(ignored)
109 #define	HIDEOUS(ignored)
110 
111 #endif /* DEBUG */
112 
113 #define	NULLSTR(str)	(!(str) || *(str) == '\0'? "<null>" : (str))
114 #define	NULSTRING	""
115 
116 /* somewhat arbitrary limits, in seconds */
117 /* all probably ought to be different, but these are convenient for debugging */
118 const time_t	UF_TOO_LONG		= 128;	/* max. wait for fsck start */
119 
120 /* all of these are in units of seconds used for retry period while ... */
121 const time_t	UF_FIXSTART_PERIOD	= 16;	/* awaiting fsck start */
122 const time_t	UF_FIXPOLL_PERIOD	= 256;	/* awaiting fsck finish */
123 const time_t	UF_SHORT_ERROR_PERIOD	= 4;	/* after (lockfs) error */
124 const time_t	UF_LONG_ERROR_PERIOD	= 512;	/* after (lockfs) error */
125 
126 #define	NO_ERROR		0
127 #define	LOCKFS_OLOCK		LOCKFS_MAXLOCK+1
128 
129 const ulong_t	GB			= 1024 * 1024 * 1024;
130 const ulong_t	SecondsPerGig		= 1024;	/* ~17 minutes (overestimate) */
131 
132 /*
133  * per filesystem flags
134  */
135 const int	UFSFX_PANIC		= (UFSMNT_ONERROR_PANIC >> 4);
136 const int	UFSFX_LCKONLY		= (UFSMNT_ONERROR_LOCK >> 4);
137 const int	UFSFX_LCKUMOUNT		= (UFSMNT_ONERROR_UMOUNT >> 4);
138 const int	UFSFX_DEFAULT		= (UFSMNT_ONERROR_DEFAULT >> 4);
139 const int	UFSFX_REPAIR_START	= 0x10000000;
140 
141 /* return protocols */
142 
143 typedef enum triage_return_code {
144 	TRIAGE_DEAD = -1,
145 	TRIAGE_NO_SPIRIT,
146 	TRIAGE_ATTEND_TO
147 } triage_t;
148 
149 typedef enum statefunc_return_code {
150 	SFRC_SUCCESS = 1,
151 	SFRC_FAIL = 0
152 } sfrc_t;
153 
154 /* external references */
155 /* in ufs_thread.c */
156 extern int	ufs_thread_run(struct ufs_q *, callb_cpr_t *cprinfop);
157 extern int	ufs_checkaccton(vnode_t *);		/* in ufs_lockfs.c */
158 extern int	ufs_checkswapon(vnode_t *);		/* in ufs_lockfs.c */
159 
160 extern struct pollhead		ufs_pollhd;		/* in ufs_vnops.c */
161 
162 /* globals */
163 struct	ufs_q	 ufs_fix;
164 
165 /*
166  * patchable constants:
167  *   These are set in ufsfx_init() [called at modload]
168  */
169 struct ufs_failure_tunable {
170 	long	 uft_too_long;		/* limit repair startup time */
171 	long	 uft_fixstart_period;	/* pre-repair start period */
172 	long	 uft_fixpoll_period;	/* post-fsck start period */
173 	long	 uft_short_err_period;	/* post-error short period */
174 	long	 uft_long_err_period;	/* post-error long period */
175 } ufsfx_tune;
176 
177 /* internal statistics of events */
178 struct uf_statistics {
179 	ulong_t		ufst_lock_violations;
180 	ulong_t		ufst_current_races;
181 	ulong_t		ufst_unmount_failures;
182 	ulong_t		ufst_num_fixed;
183 	ulong_t		ufst_num_failed;
184 	ulong_t		ufst_cpu_waste;
185 	time_t		ufst_last_start_tm;
186 	kmutex_t	ufst_mutex;
187 } uf_stats;
188 
189 typedef enum state_action {
190 	UFA_ERROR = -1,		/* internal error */
191 	UFA_FOUND,		/* found uf in state */
192 	UFA_SET			/* change uf to state */
193 } ufsa_t;
194 
195 /* state definition */
196 typedef struct uf_state_desc {
197 	int	  ud_v;					/* value */
198 	char	 *ud_name;				/* name */
199 	sfrc_t	(*ud_sfp)(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
200 							/* per-state actions */
201 	ufs_failure_states_t	  ud_prev;		/* valid prev. states */
202 
203 	struct uf_state_desc_attr {
204 		unsigned	terminal:1;	/* no action req. if found */
205 		unsigned	at_fail:1;	/* state set by thread */
206 						/* encountering the error */
207 		unsigned	unused;
208 	} ud_attr;
209 } ufsd_t;
210 
211 /*
212  * forward references
213  */
214 
215 /* thread to watch for failures */
216 static void	ufsfx_thread_fix_failures(void *);
217 static int	ufsfx_do_failure_q(void);
218 static void	ufsfx_kill_fix_failure_thread(void *);
219 
220 /* routines called when failure occurs */
221 static int		 ufs_fault_v(vnode_t *, char *, va_list)
222 	__KVPRINTFLIKE(2);
223 static ufs_failure_t	*init_failure(vnode_t *, char *, va_list)
224 	__KVPRINTFLIKE(2);
225 static void		 queue_failure(ufs_failure_t *);
226 /*PRINTFLIKE2*/
227 static void		 real_panic(ufs_failure_t *, const char *, ...)
228 	__KPRINTFLIKE(2);
229 static void		 real_panic_v(ufs_failure_t *, const char *, va_list)
230 	__KVPRINTFLIKE(2);
231 static triage_t		 triage(vnode_t *);
232 
233 /* routines called when failure record is acted upon */
234 static sfrc_t	set_state(ufs_failure_t *, ufs_failure_states_t);
235 static int	state_trans_valid(ufs_failure_states_t, ufs_failure_states_t);
236 static int	terminal_state(ufs_failure_states_t);
237 
238 /* routines called when states entered/found */
239 static sfrc_t	sf_minimum(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
240 static sfrc_t	sf_undef(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
241 static sfrc_t	sf_init(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
242 static sfrc_t	sf_queue(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
243 static sfrc_t	sf_found_queue(ufs_failure_t *);
244 static sfrc_t	sf_nonterm_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
245 static sfrc_t	sf_term_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
246 static sfrc_t	sf_panic(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
247 static sfrc_t	sf_set_trylck(ufs_failure_t *);
248 static sfrc_t	sf_set_locked(ufs_failure_t *);
249 static sfrc_t	sf_found_trylck(ufs_failure_t *);
250 static sfrc_t	sf_found_lock_fix_cmn(ufs_failure_t *, ufs_failure_states_t);
251 static sfrc_t	sf_found_umount(ufs_failure_t *);
252 
253 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */
254 static time_t	trylock_time_exceeded(ufs_failure_t *);
255 static void	pester_msg(ufs_failure_t *, int);
256 static int	get_lockfs_status(ufs_failure_t *, struct lockfs *);
257 static void	alloc_lockfs_comment(ufs_failure_t *, struct lockfs *);
258 static int	set_lockfs(ufs_failure_t *, struct lockfs *);
259 static int	lockfs_failure(ufs_failure_t *);
260 static int	lockfs_success(ufs_failure_t *);
261 static int	fsck_active(ufs_failure_t *);
262 
263 /* low-level support routines */
264 static ufsd_t	*get_state_desc(ufs_failure_states_t);
265 static char	*fs_name(ufs_failure_t *);
266 
267 #if defined(DEBUG)
268 static char	*state_name(ufs_failure_states_t);
269 static char	*lock_name(struct lockfs *);
270 static char	*err_name(int);
271 static char	*act_name(ufsa_t);
272 static void	 dump_uf_list(char *msg);
273 static void	 dump_uf(ufs_failure_t *, int i);
274 #endif /* DEBUG */
275 /*
276  *
277  * State Transitions:
278  *
279  * normally:
280  * if flagged to be locked but not unmounted:	(UFSMNT_ONERROR_LOCK)
281  *	UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED
282  *
283  * The only difference between these two is that the fsck must be started
284  * manually.
285  *
286  * if flagged to be unmounted:			(UFSMNT_ONERROR_UMOUNT)
287  *	UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX
288  *
289  * if flagged to panic:				(UFSMNT_ONERROR_PANIC)
290  *	UNDEF -> INIT -> PANIC
291  *
292  * if a secondary panic on a file system which has an active failure
293  * record:
294  *	UNDEF -> INIT -> QUEUE -> REPLICA
295  *
296  * UNDEF, INIT, QUEUE all are set in the context of the failing thread.
297  * All other states (except possibly PANIC) are set in by the monitor
298  * (lock) thread.
299  *
300  */
301 
302 ufsd_t	state_desc[] =
303 {
304 	{ UF_ILLEGAL,	"in an unknown state",	sf_minimum,	UF_ILLEGAL,
305 								{ 0, 1, 0 } },
306 	{ UF_UNDEF,	"undefined",		sf_undef,	UF_UNDEF,
307 								{ 0, 1, 0 } },
308 	{ UF_INIT,	"being initialized",	sf_init,	UF_UNDEF,
309 								{ 0, 1, 0 } },
310 	{ UF_QUEUE,	"queued",		sf_queue,	UF_INIT,
311 								{ 0, 1, 0 } },
312 	{ UF_TRYLCK,	"trying to be locked",	sf_nonterm_cmn,
313 						UF_QUEUE,	{ 0, 0, 0 } },
314 	{ UF_LOCKED,	"locked",		sf_nonterm_cmn,
315 					UF_TRYLCK | UF_FIXING,	{ 0, 0, 0 } },
316 	{ UF_UMOUNT,	"being unmounted",	sf_nonterm_cmn,
317 
318 #if defined(DEBUG)
319 					UF_PANIC |
320 #endif /* DEBUG */
321 					UF_TRYLCK | UF_LOCKED,	{ 0, 0, 0 } },
322 	{ UF_FIXING,	"being fixed",		sf_nonterm_cmn,
323 						UF_LOCKED,	{ 0, 0, 0 } },
324 	{ UF_FIXED,	"fixed",		sf_term_cmn,
325 						UF_FIXING,	{ 1, 0, 0 } },
326 	{ UF_NOTFIX,	"not fixed",		sf_term_cmn,
327 
328 #if defined(DEBUG)
329 							UF_PANIC |
330 #endif /* DEBUG */
331 
332 	    UF_QUEUE | UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING,
333 								{ 1, 0, 0 } },
334 	{ UF_REPLICA,	"a replica",		sf_term_cmn,
335 						UF_QUEUE,	{ 1, 0, 0 } },
336 	{ UF_PANIC,	"panicking",		sf_panic,
337 		/* XXX make this narrower */	UF_ALLSTATES,	{ 0, 0, 0 } },
338 	{ UF_UNDEF,	NULL,			((sfrc_t (*)()) NULL),
339 						UF_UNDEF,	{ 0, 0, 0 } }
340 };
341 
342 /* unified collection */
343 struct ufsfx_info {
344 	struct uf_statistics		*ufi_statp;
345 	struct ufs_failure_tunable	*ufi_tunep;
346 	ufsd_t				*ufi_statetab;
347 } uffsinfo;
348 
349 #if defined(DEBUG)
350 struct action_description {
351 	ufsa_t	 ad_v;
352 	char	*ad_name;
353 };
354 
355 #define	EUNK		(-1)
356 
357 struct error_description {
358 	int	 ed_errno;
359 	char	*ed_name;
360 } err_desc[] =
361 {
362 	{ EUNK,		"<unexpected errno?>"	},
363 	{ EINVAL,	"EINVAL"		},
364 	{ EACCES,	"EACCES"		},
365 	{ EPERM,	"EPERM"			},
366 	{ EIO,		"EIO"			},
367 	{ EDEADLK,	"EDEADLK"		},
368 	{ EBUSY,	"EBUSY"			},
369 	{ EAGAIN,	"EAGAIN"		},
370 	{ ERESTART,	"ERESTART"		},
371 	{ ETIMEDOUT,	"ETIMEDOUT"		},
372 	{ NO_ERROR,	"Ok"			},
373 	{ EUNK,		NULL			}
374 };
375 
376 struct action_description act_desc[] =
377 {
378 	{ UFA_ERROR,	"<unexpected action?>"	},
379 	{ UFA_FOUND,	"\"found\""	},
380 	{ UFA_SET,	"\"set\""	},
381 	{ UFA_ERROR,	NULL			},
382 };
383 
384 #define	LOCKFS_BADLOCK	(-1)
385 
386 struct lock_description {
387 	int	 ld_type;
388 	char	*ld_name;
389 } lock_desc[] =
390 {
391 	{ LOCKFS_BADLOCK,	"<unexpected lock?>"	},
392 	{ LOCKFS_ULOCK,		"Unlock"		},
393 	{ LOCKFS_ELOCK,		"Error Lock"		},
394 	{ LOCKFS_HLOCK,		"Hard Lock"		},
395 	{ LOCKFS_OLOCK,		"Old Lock"		},
396 	{ LOCKFS_BADLOCK,	NULL			}
397 };
398 
399 #endif /* DEBUG */
400 
401 /*
402  * ufs_fault, ufs_fault_v
403  *
404  *  called instead of cmn_err(CE_PANIC, ...) by ufs routines
405  *  when a failure is detected to put the file system into an
406  *  error state (if possible) or to devolve to a panic otherwise
407  *
408  * vnode is some vnode in this file system, used to find the way
409  * to ufsvfs, vfsp etc.  Since a panic can be called from many
410  * levels, the vnode is the most convenient hook to pass through.
411  *
412  */
413 
414 /*PRINTFLIKE2*/
415 int
ufs_fault(vnode_t * vp,char * fmt,...)416 ufs_fault(vnode_t *vp, char *fmt, ...)
417 {
418 	va_list	adx;
419 	int	error;
420 
421 	MINOR(("[ufs_fault"));
422 
423 	va_start(adx, fmt);
424 	error = ufs_fault_v(vp, fmt, adx);
425 	va_end(adx);
426 
427 	MINOR((": %s (%d)]\n", err_name(error), error));
428 	return (error);
429 }
430 
431 const char *nullfmt = "<null format?>";
432 
433 static int
ufs_fault_v(vnode_t * vp,char * fmt,va_list adx)434 ufs_fault_v(vnode_t *vp, char *fmt, va_list adx)
435 {
436 	ufs_failure_t		*new = NULL;
437 	ufsvfs_t		*ufsvfsp;
438 	triage_t		 fix;
439 	int			 err = ERESTART;
440 	int			need_vfslock;
441 
442 	MINOR(("[ufs_fault_v"));
443 
444 	if (fmt == NULL)
445 		fmt = (char *)nullfmt;
446 
447 	fix = triage(vp);
448 
449 	if (vp) {
450 		ufsvfsp = (struct ufsvfs *)vp->v_vfsp->vfs_data;
451 
452 		/*
453 		 * Something bad has happened. That is why we are here.
454 		 *
455 		 * In order for the bad thing to be recorded in the superblock
456 		 * we need to write to the superblock directly.
457 		 * In the case that logging is enabled the logging code
458 		 * would normally intercept our write as a delta to the log,
459 		 * thus we mark the filesystem FSBAD in any case.
460 		 */
461 		need_vfslock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
462 
463 		if (need_vfslock) {
464 			mutex_enter(&ufsvfsp->vfs_lock);
465 		}
466 
467 		ufsvfsp->vfs_fs->fs_clean = FSBAD;
468 		ASSERT(SEMA_HELD(&ufsvfsp->vfs_bufp->b_sem));
469 		ufsvfsp->vfs_bufp->b_flags &=
470 		    ~(B_ASYNC | B_READ | B_DONE | B_ERROR | B_DELWRI);
471 
472 		(void) bdev_strategy(ufsvfsp->vfs_bufp);
473 		(void) biowait(ufsvfsp->vfs_bufp);
474 
475 		if (need_vfslock) {
476 			mutex_exit(&ufsvfsp->vfs_lock);
477 		}
478 	}
479 
480 	switch (fix) {
481 
482 	default:
483 	case TRIAGE_DEAD:
484 	case TRIAGE_NO_SPIRIT:
485 
486 		real_panic_v(new, fmt, adx);
487 		/* LINTED: warning: logical expression always true: op "||" */
488 		ASSERT(DEBUG);
489 		err = EAGAIN;
490 
491 #if defined(DEBUG)
492 		if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) {
493 			break;
494 		}
495 #else
496 		break;
497 
498 #endif /* DEBUG */
499 		/* FALLTHROUGH */
500 
501 	case TRIAGE_ATTEND_TO:
502 
503 		/* q thread not running yet? */
504 		if (mutex_tryenter(&ufs_fix.uq_mutex)) {
505 			if (!ufs_fix.uq_threadp) {
506 				mutex_exit(&ufs_fix.uq_mutex);
507 				ufs_thread_start(&ufs_fix,
508 				    ufsfx_thread_fix_failures, NULL);
509 				ufs_fix.uq_threadp->t_flag |= T_DONTBLOCK;
510 				mutex_enter(&ufs_fix.uq_mutex);
511 			} else {
512 				/*
513 				 * We got the lock but we are not the current
514 				 * threadp so we have to release the lock.
515 				 */
516 				mutex_exit(&ufs_fix.uq_mutex);
517 			}
518 		} else {
519 			MINOR((": fix failure thread already running "));
520 			/*
521 			 * No need to log another failure as one is already
522 			 * being logged.
523 			 */
524 			break;
525 		}
526 
527 		if (ufs_fix.uq_threadp && ufs_fix.uq_threadp == curthread) {
528 			mutex_exit(&ufs_fix.uq_mutex);
529 			cmn_err(CE_WARN, "ufs_fault_v: recursive ufs_fault");
530 		} else {
531 			/*
532 			 * Must check if we actually still own the lock and
533 			 * if so then release the lock and move on with life.
534 			 */
535 			if (mutex_owner(&ufs_fix.uq_mutex) == curthread)
536 				mutex_exit(&ufs_fix.uq_mutex);
537 		}
538 
539 		new = init_failure(vp, fmt, adx);
540 		if (new != NULL) {
541 			queue_failure(new);
542 			break;
543 		}
544 		real_panic_v(new, fmt, adx);
545 		break;
546 
547 	}
548 	MINOR(("] "));
549 	return (err);
550 }
551 
552 /*
553  * triage()
554  *
555  *  Attempt to fix iff:
556  *    - the system is not already panicking
557  *    - this file system isn't explicitly marked not to be fixed
558  *    - we can connect to the user-level daemon
559  * These conditions are detectable later, but if we can determine
560  * them in the failing threads context the core dump may be more
561  * useful.
562  *
563  */
564 
565 static triage_t
triage(vnode_t * vp)566 triage(vnode_t *vp)
567 {
568 	struct inode	 *ip;
569 	int		  need_unlock_vfs;
570 	int		  fs_flags;
571 
572 	MINUTE(("[triage"));
573 
574 	if (panicstr) {
575 		MINUTE((
576 		": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr));
577 		return (TRIAGE_DEAD);
578 	}
579 
580 	if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs) {
581 		MINUTE((
582 	": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n"));
583 		return (TRIAGE_DEAD);
584 	}
585 
586 	/* use tryenter and continue no matter what since we're panicky */
587 	need_unlock_vfs = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
588 	if (need_unlock_vfs)
589 		need_unlock_vfs = mutex_tryenter(&ip->i_ufsvfs->vfs_lock);
590 
591 	fs_flags = ip->i_ufsvfs->vfs_fsfx.fx_flags;
592 	if (need_unlock_vfs)
593 		mutex_exit(&ip->i_ufsvfs->vfs_lock);
594 
595 	if (fs_flags & UFSFX_PANIC) {
596 		MINUTE((
597 		": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n"));
598 		return (TRIAGE_NO_SPIRIT);
599 	}
600 
601 	if (ufs_checkaccton(vp) != 0) {
602 		MINUTE((
603 		": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n"));
604 		return (TRIAGE_DEAD);
605 	}
606 
607 	if (ufs_checkswapon(vp) != 0) {
608 		MINUTE((
609 		": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n"));
610 		return (TRIAGE_DEAD);
611 	}
612 
613 	MINUTE((": return TRIAGE_ATTEND_TO] "));
614 	return (TRIAGE_ATTEND_TO);
615 }
616 
617 /*
618  * init failure
619  *
620  * This routine allocates a failure struct and initializes
621  * it's member elements.
622  * Space is allocated for copies of dynamic identifying fs structures
623  * passed in.  Without a much more segmented kernel architecture
624  * this is as protected as we can make it (for now.)
625  */
626 static ufs_failure_t *
init_failure(vnode_t * vp,char * fmt,va_list adx)627 init_failure(vnode_t *vp, char *fmt, va_list adx)
628 {
629 	ufs_failure_t	*new;
630 	struct inode	*ip;
631 	int		 initialization_worked = 0;
632 	int		 need_vfs_unlock;
633 
634 	MINOR(("[init_failure"));
635 
636 	new = kmem_zalloc(sizeof (ufs_failure_t), KM_NOSLEEP);
637 	if (!new) {
638 		MINOR((": kmem_zalloc failed]\n"));
639 		return (NULL);
640 	}
641 
642 	/*
643 	 * enough information to make a fix attempt possible?
644 	 */
645 	if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs || !vp->v_vfsp ||
646 	    !ip->i_ufsvfs->vfs_bufp || !ITOF(ip) || !fmt)
647 		goto errout;
648 
649 	if (vp->v_type != VREG && vp->v_type != VDIR &&
650 	    vp->v_type != VBLK && vp->v_type != VCHR &&
651 	    vp->v_type != VLNK && vp->v_type != VFIFO &&
652 	    vp->v_type != VSOCK)
653 		goto errout;
654 
655 	if (ip->i_ufsvfs->vfs_root->v_type != VREG &&
656 	    ip->i_ufsvfs->vfs_root->v_type != VDIR &&
657 	    ip->i_ufsvfs->vfs_root->v_type != VBLK &&
658 	    ip->i_ufsvfs->vfs_root->v_type != VCHR &&
659 	    ip->i_ufsvfs->vfs_root->v_type != VLNK &&
660 	    ip->i_ufsvfs->vfs_root->v_type != VFIFO &&
661 	    ip->i_ufsvfs->vfs_root->v_type != VSOCK)
662 		goto errout;
663 
664 	if ((ITOF(ip)->fs_magic != FS_MAGIC) &&
665 	    (ITOF(ip)->fs_magic != MTB_UFS_MAGIC))
666 		goto errout;
667 
668 	/* intialize values */
669 
670 	(void) vsnprintf(new->uf_panic_str, LOCKFS_MAXCOMMENTLEN - 1, fmt, adx);
671 
672 	new->uf_ufsvfsp = ip->i_ufsvfs;
673 	new->uf_vfsp    = ip->i_vfs;
674 
675 	mutex_init(&new->uf_mutex, NULL, MUTEX_DEFAULT, NULL);
676 	need_vfs_unlock = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
677 
678 	if (need_vfs_unlock) {
679 		if (!mutex_tryenter(&ip->i_ufsvfs->vfs_lock)) {
680 			/*
681 			 * not much alternative here, but we're panicking
682 			 * already, it couldn't be worse - so just
683 			 * proceed optimistically and take note.
684 			 */
685 			mutex_enter(&uf_stats.ufst_mutex);
686 			uf_stats.ufst_lock_violations++;
687 			mutex_exit(&uf_stats.ufst_mutex);
688 			MINOR((": couldn't get vfs lock"))
689 			need_vfs_unlock = 0;
690 		}
691 	}
692 
693 	if (mutex_tryenter(&new->uf_mutex)) {
694 		initialization_worked = set_state(new, UF_INIT);
695 		mutex_exit(&new->uf_mutex);
696 	}
697 
698 	if (need_vfs_unlock)
699 		mutex_exit(&ip->i_ufsvfs->vfs_lock);
700 
701 	if (initialization_worked) {
702 		MINOR(("] "));
703 		return (new);
704 	}
705 	/* FALLTHROUGH */
706 
707 errout:
708 	if (new)
709 		kmem_free(new, sizeof (ufs_failure_t));
710 	MINOR((": failed]\n"));
711 	return (NULL);
712 }
713 
714 static void
queue_failure(ufs_failure_t * new)715 queue_failure(ufs_failure_t *new)
716 {
717 	MINOR(("[queue_failure"));
718 
719 	mutex_enter(&ufs_fix.uq_mutex);
720 
721 	if (ufs_fix.uq_ufhead)
722 		insque(new, &ufs_fix.uq_ufhead);
723 	else
724 		ufs_fix.uq_ufhead = new;
725 
726 	if (mutex_tryenter(&new->uf_mutex)) {
727 		(void) set_state(new, UF_QUEUE);
728 		mutex_exit(&new->uf_mutex);
729 	}
730 
731 	mutex_enter(&uf_stats.ufst_mutex);		/* force wakeup */
732 	ufs_fix.uq_ne = ufs_fix.uq_lowat = uf_stats.ufst_num_failed;
733 	mutex_exit(&uf_stats.ufst_mutex);
734 
735 	cv_broadcast(&ufs_fix.uq_cv);
736 
737 	DCALL(DBGLVL_MAJOR, cmn_err(CE_WARN, new->uf_panic_str ?
738 	    new->uf_panic_str : "queue_failure: NULL panic str?"));
739 	mutex_exit(&ufs_fix.uq_mutex);
740 
741 	MINOR(("] "));
742 }
743 
744 /*PRINTFLIKE2*/
745 static void
real_panic(ufs_failure_t * f,const char * fmt,...)746 real_panic(ufs_failure_t *f, const char *fmt, ...)
747 {
748 	va_list	adx;
749 
750 	MINUTE(("[real_panic "));
751 
752 	va_start(adx, fmt);
753 	real_panic_v(f, fmt, adx);
754 	va_end(adx);
755 
756 	MINUTE((": return?!]\n"));
757 }
758 
759 static void
real_panic_v(ufs_failure_t * f,const char * fmt,va_list adx)760 real_panic_v(ufs_failure_t *f, const char *fmt, va_list adx)
761 {
762 	int seriousness = CE_PANIC;
763 	int need_unlock;
764 
765 	MINUTE(("[real_panic_v "));
766 
767 	if (f && f->uf_ufsvfsp)
768 		TRANS_SETERROR(f->uf_ufsvfsp);
769 
770 #if defined(DEBUG)
771 	if (DEBUG_FLAGS & DBGFLG_NOPANIC) {
772 		seriousness = CE_WARN;
773 		cmn_err(CE_WARN, "real_panic: EWOULDPANIC\n");
774 	}
775 #endif /* DEBUG */
776 
777 	delay(hz >> 1);			/* allow previous warnings to get out */
778 
779 	if (!f && fmt)
780 		vcmn_err(seriousness, fmt, adx);
781 	else
782 		cmn_err(seriousness, f != NULL && f->uf_panic_str[0] != '\0' ?
783 		    f->uf_panic_str: "real_panic: <unknown panic?>");
784 
785 	if (f) {
786 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
787 		if (need_unlock) {
788 			mutex_enter(&f->uf_mutex);
789 		}
790 
791 		f->uf_retry = -1;
792 		(void) set_state(f, UF_PANIC);
793 
794 		if (need_unlock) {
795 			mutex_exit(&f->uf_mutex);
796 		}
797 	}
798 	MINUTE((": return?!]\n"));
799 }
800 
801 /*
802  * initializes ufs panic structs, locks, etc
803  */
804 void
ufsfx_init(void)805 ufsfx_init(void)
806 {
807 
808 	MINUTE(("[ufsfx_init"));
809 
810 	/* patchable; unchanged while running, so no lock is needed */
811 	ufsfx_tune.uft_too_long		= UF_TOO_LONG;
812 	ufsfx_tune.uft_fixstart_period	= UF_FIXSTART_PERIOD;
813 	ufsfx_tune.uft_fixpoll_period	= UF_FIXPOLL_PERIOD;
814 	ufsfx_tune.uft_short_err_period	= UF_SHORT_ERROR_PERIOD;
815 	ufsfx_tune.uft_long_err_period	= UF_LONG_ERROR_PERIOD;
816 
817 	uffsinfo.ufi_statp	= &uf_stats;
818 	uffsinfo.ufi_tunep	= &ufsfx_tune;
819 	uffsinfo.ufi_statetab	= &state_desc[0];
820 
821 	mutex_init(&uf_stats.ufst_mutex, NULL, MUTEX_DEFAULT, NULL);
822 	ufs_thread_init(&ufs_fix, /* maxne */ 1);
823 
824 	MINUTE(("] "));
825 }
826 
827 /*
828  * initializes per-ufs values
829  * returns 0 (ok) or errno
830  */
831 int
ufsfx_mount(struct ufsvfs * ufsvfsp,int flags)832 ufsfx_mount(struct ufsvfs *ufsvfsp, int flags)
833 {
834 	MINUTE(("[ufsfx_mount (%d)", flags));
835 	/* don't check/need vfs_lock because it's still being initialized */
836 
837 	ufsvfsp->vfs_fsfx.fx_flags = (flags & UFSMNT_ONERROR_FLGMASK) >> 4;
838 
839 	MINUTE((": %s: fx_flags:%ld,",
840 	    ufsvfsp->vfs_fs->fs_fsmnt, ufsvfsp->vfs_fsfx.fx_flags));
841 	/*
842 	 *	onerror={panic ^ lock only ^ unmount}
843 	 */
844 
845 	if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_PANIC) {
846 		MINUTE((" PANIC"));
847 
848 	} else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKONLY) {
849 		MINUTE((" LCKONLY"));
850 
851 	} else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKUMOUNT) {
852 		MINUTE((" LCKUMOUNT"));
853 
854 	} else {
855 		ufsvfsp->vfs_fsfx.fx_flags = UFSFX_DEFAULT;
856 		ASSERT(ufsvfsp->vfs_fsfx.fx_flags &
857 		    (UFSMNT_ONERROR_FLGMASK >> 4));
858 		MINUTE((" DEFAULT"));
859 	}
860 
861 	pollwakeup(&ufs_pollhd, POLLPRI);
862 	MINUTE(("]\n"));
863 	return (0);
864 }
865 
866 /*
867  * ufsfx_unmount
868  *
869  * called during unmount
870  */
871 void
ufsfx_unmount(struct ufsvfs * ufsvfsp)872 ufsfx_unmount(struct ufsvfs *ufsvfsp)
873 {
874 	ufs_failure_t	*f;
875 	int		 must_unlock_list;
876 
877 	MINUTE(("[ufsfx_unmount"));
878 
879 	if (!ufsvfsp) {
880 		MINUTE((": no ufsvfsp]"));
881 		return;
882 	}
883 
884 	if ((must_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex)) != 0)
885 		mutex_enter(&ufs_fix.uq_mutex);
886 
887 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
888 		int must_unlock_failure;
889 
890 		must_unlock_failure = !MUTEX_HELD(&f->uf_mutex);
891 		if (must_unlock_failure) {
892 			mutex_enter(&f->uf_mutex);
893 		}
894 
895 		if (f->uf_ufsvfsp == ufsvfsp) {
896 
897 			/*
898 			 * if we owned the failure record lock, then this
899 			 * is probably a fix failure-triggered unmount, so
900 			 * the warning is not appropriate or needed
901 			 */
902 
903 			/* XXX if rebooting don't print this? */
904 			if (!terminal_state(f->uf_s) && must_unlock_failure) {
905 				cmn_err(CE_WARN,
906 				    "Unmounting %s while error-locked",
907 				    fs_name(f));
908 			}
909 
910 			f->uf_ufsvfsp		= NULL;
911 			f->uf_vfs_ufsfxp	= NULL;
912 			f->uf_vfs_lockp		= NULL;
913 			f->uf_bp		= NULL;
914 			f->uf_vfsp		= NULL;
915 			f->uf_retry		= -1;
916 		}
917 
918 		if (must_unlock_failure)
919 			mutex_exit(&f->uf_mutex);
920 	}
921 	if (must_unlock_list)
922 		mutex_exit(&ufs_fix.uq_mutex);
923 
924 	pollwakeup(&ufs_pollhd, POLLPRI | POLLHUP);
925 	MINUTE(("] "));
926 }
927 
928 /*
929  * ufsfx_(un)lockfs
930  *
931  * provides hook from lockfs code so we can recognize unlock/relock
932  *  This is called after it is certain that the (un)lock will succeed.
933  */
934 void
ufsfx_unlockfs(struct ufsvfs * ufsvfsp)935 ufsfx_unlockfs(struct ufsvfs *ufsvfsp)
936 {
937 	ufs_failure_t	*f;
938 	int		 need_unlock;
939 	int		 need_unlock_list;
940 	int		 informed = 0;
941 
942 	MINUTE(("[ufsfx_unlockfs"));
943 
944 	if (!ufsvfsp)
945 		return;
946 
947 	need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
948 
949 	if (need_unlock_list)
950 		mutex_enter(&ufs_fix.uq_mutex);
951 
952 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
953 
954 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
955 		if (need_unlock)
956 			mutex_enter(&f->uf_mutex);
957 
958 		if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s)) {
959 			if (!(f->uf_s & UF_FIXING)) {
960 				/*
961 				 * This might happen if we don't notice that
962 				 * the fs gets marked FSFIX before it is
963 				 * marked FSCLEAN, as might occur if the
964 				 * the superblock was hammered directly.
965 				 */
966 				if (!informed) {
967 					informed = 1;
968 					cmn_err(CE_NOTE,
969 					    "Unlock of %s succeeded before "
970 					    "fs_clean marked FSFIX?",
971 					    fs_name(f));
972 				}
973 
974 				/*
975 				 * pass through fixing state so
976 				 * transition protocol is satisfied
977 				 */
978 				if (!set_state(f, UF_FIXING)) {
979 					MINUTE((": failed] "));
980 				}
981 			}
982 
983 			if (!set_state(f, UF_FIXED)) {
984 				/* it's already fixed, so don't panic now */
985 				MINUTE((": failed] "));
986 			}
987 		}
988 
989 		if (need_unlock)
990 			mutex_exit(&f->uf_mutex);
991 	}
992 	if (need_unlock_list)
993 		mutex_exit(&ufs_fix.uq_mutex);
994 	MINUTE(("] "));
995 }
996 
997 void
ufsfx_lockfs(struct ufsvfs * ufsvfsp)998 ufsfx_lockfs(struct ufsvfs *ufsvfsp)
999 {
1000 	ufs_failure_t	*f;
1001 	int		 need_unlock;
1002 	int		 need_unlock_list;
1003 
1004 	MINUTE(("[ufsfx_lockfs"));
1005 
1006 	if (!ufsvfsp)
1007 		return;
1008 
1009 	need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
1010 
1011 	if (need_unlock_list)
1012 		mutex_enter(&ufs_fix.uq_mutex);
1013 
1014 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
1015 
1016 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
1017 		if (need_unlock)
1018 			mutex_enter(&f->uf_mutex);
1019 
1020 		if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s) &&
1021 		    f->uf_s != UF_PANIC) {
1022 			switch (f->uf_s) {
1023 
1024 			default:
1025 				cmn_err(CE_WARN,
1026 				    "fs %s not in state "
1027 				    "UF_TRYLCK, UF_LOCKED or UF_FIXING",
1028 				    fs_name(f));
1029 				break;
1030 
1031 			case UF_TRYLCK:
1032 				if (!set_state(f, UF_LOCKED)) {
1033 					MINUTE((": failed] "));
1034 				}
1035 				break;
1036 
1037 			case UF_LOCKED:
1038 				if (!set_state(f, UF_FIXING)) {
1039 					MINUTE((": failed] "));
1040 				}
1041 				break;
1042 
1043 			case UF_FIXING:
1044 				break;
1045 
1046 			}
1047 		}
1048 
1049 		if (need_unlock)
1050 			mutex_exit(&f->uf_mutex);
1051 	}
1052 	if (need_unlock_list)
1053 		mutex_exit(&ufs_fix.uq_mutex);
1054 
1055 	MINUTE(("] "));
1056 }
1057 
1058 /*
1059  * error lock, trigger fsck and unlock those fs with failures
1060  * blatantly copied from the hlock routine, although this routine
1061  * triggers differently in order to use uq_ne as meaningful data.
1062  */
1063 /* ARGSUSED */
1064 void
ufsfx_thread_fix_failures(void * ignored)1065 ufsfx_thread_fix_failures(void *ignored)
1066 {
1067 	int		retry;
1068 	callb_cpr_t	cprinfo;
1069 
1070 	CALLB_CPR_INIT(&cprinfo, &ufs_fix.uq_mutex, callb_generic_cpr,
1071 	    "ufsfixfail");
1072 
1073 	MINUTE(("[ufsfx_thread_fix_failures] "));
1074 
1075 	for (;;) {
1076 		/* sleep until there is work to do */
1077 
1078 		mutex_enter(&ufs_fix.uq_mutex);
1079 		(void) ufs_thread_run(&ufs_fix, &cprinfo);
1080 		ufs_fix.uq_ne = 0;
1081 		mutex_exit(&ufs_fix.uq_mutex);
1082 
1083 		/* process failures on our q */
1084 		do {
1085 			retry = ufsfx_do_failure_q();
1086 			if (retry) {
1087 				mutex_enter(&ufs_fix.uq_mutex);
1088 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
1089 				(void) cv_reltimedwait(&ufs_fix.uq_cv,
1090 				    &ufs_fix.uq_mutex, (hz * retry),
1091 				    TR_CLOCK_TICK);
1092 				CALLB_CPR_SAFE_END(&cprinfo,
1093 				    &ufs_fix.uq_mutex);
1094 				mutex_exit(&ufs_fix.uq_mutex);
1095 			}
1096 		} while (retry);
1097 	}
1098 	/* NOTREACHED */
1099 }
1100 
1101 
1102 /*
1103  * watch for fix-on-panic work
1104  *
1105  * returns # of seconds to sleep before trying again
1106  * and zero if no retry is needed
1107  */
1108 
1109 int
ufsfx_do_failure_q(void)1110 ufsfx_do_failure_q(void)
1111 {
1112 	ufs_failure_t	*f;
1113 	long		 retry = 1;
1114 	ufsd_t		*s;
1115 
1116 	MAJOR(("[ufsfx_do_failure_q"));
1117 	DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
1118 
1119 	if (!mutex_tryenter(&ufs_fix.uq_mutex))
1120 		return (retry);
1121 
1122 	retry = 0;
1123 rescan_q:
1124 
1125 	/*
1126 	 * walk down failure list
1127 	 *  depending on state of each failure, do whatever
1128 	 *  is appropriate to move it to the next state
1129 	 *  taking note of whether retry gets set
1130 	 *
1131 	 * retry protocol:
1132 	 * wakeup in shortest required time for any failure
1133 	 *   retry == 0; nothing more to do (terminal state)
1134 	 *   retry < 0; reprocess queue immediately, retry will
1135 	 *		be abs(retry) for the next cycle
1136 	 *   retry > 0; schedule wakeup for retry seconds
1137 	 */
1138 
1139 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
1140 
1141 		if (!mutex_tryenter(&f->uf_mutex)) {
1142 			retry = 1;
1143 			continue;
1144 		}
1145 		s = get_state_desc(f->uf_s);
1146 
1147 		MINOR((": found%s: %s, \"%s: %s\"\n",
1148 		    s->ud_attr.terminal ? " old" : "",
1149 		    fs_name(f), state_name(f->uf_s), f->uf_panic_str));
1150 
1151 		if (s->ud_attr.terminal) {
1152 			mutex_exit(&f->uf_mutex);
1153 			continue;
1154 		}
1155 
1156 		if (s->ud_sfp)
1157 			(*s->ud_sfp)(f, UFA_FOUND, f->uf_s);
1158 
1159 		ASSERT(terminal_state(f->uf_s) || f->uf_retry != 0);
1160 
1161 		if (f->uf_retry != 0) {
1162 			if (retry > f->uf_retry || retry == 0)
1163 				retry = f->uf_retry;
1164 			if (f->uf_retry < 0)
1165 				f->uf_retry = abs(f->uf_retry);
1166 		}
1167 		mutex_exit(&f->uf_mutex);
1168 	}
1169 
1170 
1171 	if (retry < 0) {
1172 		retry = abs(retry);
1173 		goto rescan_q;
1174 	}
1175 
1176 	mutex_exit(&ufs_fix.uq_mutex);
1177 
1178 	DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
1179 	MAJOR((": retry=%ld, good night]\n\n", retry));
1180 
1181 	return (retry);
1182 }
1183 
1184 static void
pester_msg(ufs_failure_t * f,int seriousness)1185 pester_msg(ufs_failure_t *f, int seriousness)
1186 {
1187 	MINUTE(("[pester_msg"));
1188 	ASSERT(f->uf_s & (UF_LOCKED | UF_FIXING));
1189 
1190 	/*
1191 	 * XXX if seems too long for this fs, poke administrator
1192 	 * XXX to run fsck manually (and change retry time?)
1193 	 */
1194 	cmn_err(seriousness, "Waiting for repair of %s to %s",
1195 	    fs_name(f), f->uf_s & UF_LOCKED ? "start" : "finish");
1196 	MINUTE(("]"));
1197 }
1198 
1199 static time_t
trylock_time_exceeded(ufs_failure_t * f)1200 trylock_time_exceeded(ufs_failure_t *f)
1201 {
1202 	time_t		toolong;
1203 	extern time_t	time;
1204 
1205 	MINUTE(("[trylock_time_exceeded"));
1206 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1207 
1208 	toolong = (time_t)ufsfx_tune.uft_too_long + f->uf_entered_tm;
1209 	if (time > toolong)
1210 		cmn_err(CE_WARN, "error-lock timeout exceeded: %s", fs_name(f));
1211 
1212 	MINUTE(("] "));
1213 	return (time <= toolong? 0: time - toolong);
1214 }
1215 
1216 static int
get_lockfs_status(ufs_failure_t * f,struct lockfs * lfp)1217 get_lockfs_status(ufs_failure_t *f, struct lockfs *lfp)
1218 {
1219 	MINUTE(("[get_lockfs_status"));
1220 
1221 	if (!f->uf_ufsvfsp) {
1222 		MINUTE((": ufsvfsp is NULL]\n"));
1223 		return (0);
1224 	}
1225 
1226 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1227 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1228 	ASSERT(!vfs_lock_held(f->uf_vfsp));
1229 	ASSERT(f->uf_ufsvfsp->vfs_root != NULL);
1230 
1231 	f->uf_lf_err = ufs_fiolfss(f->uf_ufsvfsp->vfs_root, lfp);
1232 
1233 	if (f->uf_lf_err) {
1234 		f->uf_retry = ufsfx_tune.uft_short_err_period;
1235 	}
1236 
1237 	MINUTE(("] "));
1238 	return (1);
1239 }
1240 
1241 static sfrc_t
set_state(ufs_failure_t * f,ufs_failure_states_t new_state)1242 set_state(ufs_failure_t *f, ufs_failure_states_t new_state)
1243 {
1244 	ufsd_t		*s;
1245 	sfrc_t		 sfrc = SFRC_FAIL;
1246 	int		 need_unlock;
1247 	extern time_t	 time;
1248 
1249 	HIDEOUS(("[set_state: new state:%s", state_name(new_state)));
1250 	ASSERT(f);
1251 	ASSERT(MUTEX_HELD(&f->uf_mutex));
1252 
1253 	/*
1254 	 * if someone else is panicking, just let panic sync proceed
1255 	 */
1256 	if (panicstr) {
1257 		(void) set_state(f, UF_NOTFIX);
1258 		HIDEOUS((": state reset: not fixed] "));
1259 		return (sfrc);
1260 	}
1261 
1262 	/*
1263 	 * bad state transition, an internal error
1264 	 */
1265 	if (!state_trans_valid(f->uf_s, new_state)) {
1266 		/* recursion */
1267 		if (!(f->uf_s & UF_PANIC) && !(new_state & UF_PANIC))
1268 			(void) set_state(f, UF_PANIC);
1269 		MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ",
1270 		    state_name(f->uf_s), state_name(new_state)));
1271 		return (sfrc);
1272 	}
1273 
1274 	s = get_state_desc(new_state);
1275 
1276 	need_unlock = !MUTEX_HELD(&ufs_fix.uq_mutex);
1277 	if (need_unlock)
1278 		mutex_enter(&ufs_fix.uq_mutex);
1279 
1280 	if (s->ud_attr.at_fail && ufs_fix.uq_threadp &&
1281 	    curthread == ufs_fix.uq_threadp) {
1282 		cmn_err(CE_WARN, "set_state: probable recursive panic of %s",
1283 		    fs_name(f));
1284 	}
1285 	if (need_unlock)
1286 		mutex_exit(&ufs_fix.uq_mutex);
1287 
1288 	/* NULL state functions always succeed */
1289 	sfrc = !s->ud_sfp? SFRC_SUCCESS: (*s->ud_sfp)(f, UFA_SET, new_state);
1290 
1291 	if (sfrc == SFRC_SUCCESS && f->uf_s != new_state) {
1292 		f->uf_s = new_state;
1293 		f->uf_entered_tm = time;
1294 		f->uf_counter = 0;
1295 	}
1296 
1297 	HIDEOUS(("]\n"));
1298 	return (sfrc);
1299 }
1300 
1301 static ufsd_t *
get_state_desc(ufs_failure_states_t state)1302 get_state_desc(ufs_failure_states_t state)
1303 {
1304 	ufsd_t *s;
1305 
1306 	HIDEOUS(("[get_state_desc"));
1307 
1308 	for (s = &state_desc[1]; s->ud_name != NULL; s++) {
1309 		if (s->ud_v == state) {
1310 			HIDEOUS(("] "));
1311 			return (s);
1312 		}
1313 	}
1314 
1315 	HIDEOUS(("] "));
1316 	return (&state_desc[0]);	/* default */
1317 }
1318 
1319 static sfrc_t
sf_undef(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t s)1320 sf_undef(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1321 {
1322 	sfrc_t rc;
1323 
1324 	TRIVIA(("[sf_undef, action is %s, state is %s\n",
1325 	    act_name(a), state_name(s)));
1326 	ASSERT(s == UF_UNDEF);
1327 
1328 	/* shouldn't find null failure records or ever set one */
1329 	rc = set_state(f, UF_NOTFIX);
1330 
1331 	TRIVIA(("] "));
1332 	return (rc);
1333 }
1334 
1335 
1336 static sfrc_t
sf_init(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t s)1337 sf_init(
1338 	ufs_failure_t	*f,
1339 	ufsa_t	 a,
1340 	ufs_failure_states_t	 s)
1341 {
1342 	sfrc_t		rc = SFRC_FAIL;
1343 	extern time_t	time;
1344 
1345 	TRIVIA(("[sf_init, action is %s", act_name(a)));
1346 	ASSERT(s & UF_INIT);
1347 
1348 	switch (a) {
1349 	case UFA_SET:
1350 		f->uf_begin_tm = time;
1351 		f->uf_retry = 1;
1352 		if (!f->uf_ufsvfsp) {
1353 			(void) set_state(f, UF_PANIC);
1354 			TRIVIA((": NULL ufsvfsp]\n"));
1355 			return (rc);
1356 		}
1357 		/*
1358 		 * because we can call panic from many different levels,
1359 		 * we can't be sure that we've got the vfs_lock at this
1360 		 * point.  However, there's not much alternative and if
1361 		 * we don't (have the lock) the worst case is we'll just
1362 		 * panic again
1363 		 */
1364 		f->uf_vfs_lockp		= &f->uf_ufsvfsp->vfs_lock;
1365 		f->uf_vfs_ufsfxp	= &f->uf_ufsvfsp->vfs_fsfx;
1366 
1367 		if (!f->uf_ufsvfsp->vfs_bufp) {
1368 			(void) set_state(f, UF_PANIC);
1369 			TRIVIA((": NULL vfs_bufp]\n"));
1370 			return (rc);
1371 		}
1372 		f->uf_bp = f->uf_ufsvfsp->vfs_bufp;
1373 
1374 		if (!f->uf_ufsvfsp->vfs_bufp->b_un.b_fs) {
1375 			(void) set_state(f, UF_PANIC);
1376 			TRIVIA((": NULL vfs_fs]\n"));
1377 			return (rc);
1378 		}
1379 
1380 		/* vfs_fs = vfs_bufp->b_un.b_fs */
1381 		bcopy(f->uf_ufsvfsp->vfs_fs->fs_fsmnt, f->uf_fsname, MAXMNTLEN);
1382 
1383 		f->uf_lf.lf_lock  = LOCKFS_ELOCK;	/* primer */
1384 
1385 		if (!f->uf_vfsp || f->uf_vfsp->vfs_dev == NODEV) {
1386 			(void) set_state(f, UF_PANIC);
1387 			TRIVIA((": NULL vfsp or vfs_dev == NODEV"));
1388 			return (rc);
1389 		}
1390 		f->uf_dev = f->uf_vfsp->vfs_dev;
1391 
1392 		rc = SFRC_SUCCESS;
1393 		break;
1394 
1395 	case UFA_FOUND:
1396 	default:
1397 		/* failures marked init shouldn't even be on the queue yet */
1398 		rc = set_state(f, UF_QUEUE);
1399 		TRIVIA((": found failure with state init]\n"));
1400 	}
1401 
1402 	TRIVIA(("] "));
1403 	return (rc);
1404 }
1405 
1406 static sfrc_t
sf_queue(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t s)1407 sf_queue(
1408 	ufs_failure_t	*f,
1409 	ufsa_t	 a,
1410 	ufs_failure_states_t	 s)
1411 {
1412 	sfrc_t		rc = SFRC_FAIL;
1413 
1414 	TRIVIA(("[sf_queue, action is %s", act_name(a)));
1415 	ASSERT(s & UF_QUEUE);
1416 
1417 	if (!f->uf_ufsvfsp) {
1418 		TRIVIA((": NULL ufsvfsp]\n"));
1419 		return (rc);
1420 	}
1421 
1422 	switch (a) {
1423 	case UFA_FOUND:
1424 		rc = sf_found_queue(f);
1425 		break;
1426 
1427 	case UFA_SET:
1428 
1429 		ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
1430 
1431 		mutex_enter(&uf_stats.ufst_mutex);
1432 		uf_stats.ufst_num_failed++;
1433 		mutex_exit(&uf_stats.ufst_mutex);
1434 
1435 		/*
1436 		 * if can't get the vfs lock, just wait until
1437 		 * UF_TRYLCK to set fx_current
1438 		 */
1439 		if (mutex_tryenter(f->uf_vfs_lockp)) {
1440 			f->uf_vfs_ufsfxp->fx_current = f;
1441 			mutex_exit(f->uf_vfs_lockp);
1442 		} else {
1443 			mutex_enter(&uf_stats.ufst_mutex);
1444 			uf_stats.ufst_current_races++;
1445 			mutex_exit(&uf_stats.ufst_mutex);
1446 		}
1447 
1448 		f->uf_retry = 1;
1449 		rc = SFRC_SUCCESS;
1450 		TRIVIA(("] "));
1451 		break;
1452 
1453 	default:
1454 		(void) set_state(f, UF_PANIC);
1455 		TRIVIA((": failed] "));
1456 	}
1457 
1458 	return (rc);
1459 }
1460 
1461 static sfrc_t
sf_found_queue(ufs_failure_t * f)1462 sf_found_queue(ufs_failure_t *f)
1463 {
1464 	int		replica;
1465 	sfrc_t		rc = SFRC_FAIL;
1466 
1467 	TRIVIA(("[sf_found_queue"));
1468 
1469 	/*
1470 	 * don't need to check for null ufsvfsp because
1471 	 * unmount must own list's ufs_fix.uq_mutex
1472 	 * to mark it null and we own that lock since
1473 	 * we got here.
1474 	 */
1475 
1476 	ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
1477 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1478 
1479 	if (!mutex_tryenter(f->uf_vfs_lockp)) {
1480 		TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1481 		f->uf_retry = 1;
1482 		return (rc);
1483 	}
1484 
1485 	replica = f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current != NULL &&
1486 	    f->uf_vfs_ufsfxp->fx_current != f &&
1487 	    !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s);
1488 
1489 	/*
1490 	 * copy general flags to this ufs_failure so we don't
1491 	 * need to refer back to the ufsvfs, or, more importantly,
1492 	 * don't need to keep acquiring (trying to acquire) vfs_lockp
1493 	 *
1494 	 * The most restrictive option wins:
1495 	 *  panic > errlock only > errlock+unmount > repair
1496 	 * XXX panic > elock > elock > elock+umount
1497 	 */
1498 	if (f->uf_vfs_ufsfxp->fx_flags & UFSFX_PANIC) {
1499 		if (!set_state(f, UF_PANIC)) {
1500 			TRIVIA((": marked panic but was queued?"));
1501 			real_panic(f, " ");
1502 			/*NOTREACHED*/
1503 		}
1504 		mutex_exit(f->uf_vfs_lockp);
1505 		return (rc);
1506 	}
1507 	f->uf_flags = f->uf_vfs_ufsfxp->fx_flags;
1508 
1509 	if (replica) {
1510 		if (!set_state(f, UF_REPLICA)) {
1511 			f->uf_retry = 1;
1512 			TRIVIA((": set to replica failed] "));
1513 		} else {
1514 			TRIVIA(("] "));
1515 		}
1516 		mutex_exit(f->uf_vfs_lockp);
1517 		return (rc);
1518 	}
1519 	mutex_exit(f->uf_vfs_lockp);
1520 
1521 	if (!set_state(f, UF_TRYLCK)) {
1522 		TRIVIA((": failed] "));
1523 	} else {
1524 		rc = SFRC_SUCCESS;
1525 	}
1526 	return (rc);
1527 }
1528 
1529 static sfrc_t
sf_nonterm_cmn(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t s)1530 sf_nonterm_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1531 {
1532 	sfrc_t	rc = SFRC_FAIL;
1533 
1534 	TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a), state_name(s)));
1535 	ASSERT(s & (UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING));
1536 	ASSERT(!terminal_state(s));
1537 
1538 	if (!f->uf_ufsvfsp && !(f->uf_s & UF_UMOUNT)) {
1539 		TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n"));
1540 		(void) set_state(f, UF_NOTFIX);
1541 		return (rc);
1542 	}
1543 
1544 	switch (a) {
1545 	case UFA_SET:
1546 		switch (s) {
1547 		case UF_TRYLCK:
1548 			ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
1549 			rc = sf_set_trylck(f);
1550 			break;
1551 
1552 		case UF_LOCKED:
1553 			rc = sf_set_locked(f);
1554 			break;
1555 
1556 		case UF_FIXING:
1557 			f->uf_flags |= UFSFX_REPAIR_START;
1558 			f->uf_retry  = ufsfx_tune.uft_fixpoll_period;
1559 			rc = SFRC_SUCCESS;
1560 			break;
1561 
1562 		case UF_UMOUNT:
1563 			f->uf_retry = -ufsfx_tune.uft_short_err_period;
1564 			rc = SFRC_SUCCESS;
1565 			break;
1566 
1567 		default:
1568 			(void) set_state(f, UF_PANIC);
1569 			TRIVIA((": failed] "));
1570 		}
1571 		break;
1572 
1573 	case UFA_FOUND:
1574 
1575 		switch (s) {
1576 		case UF_TRYLCK:
1577 			rc = sf_found_trylck(f);
1578 			break;
1579 
1580 		case UF_LOCKED:
1581 		case UF_FIXING:
1582 			rc = sf_found_lock_fix_cmn(f, s);
1583 			break;
1584 
1585 		case UF_UMOUNT:
1586 			rc = sf_found_umount(f);
1587 			break;
1588 
1589 		default:
1590 			(void) set_state(f, UF_PANIC);
1591 			TRIVIA((": failed] "));
1592 			break;
1593 		}
1594 		break;
1595 	default:
1596 		(void) set_state(f, UF_PANIC);
1597 		TRIVIA((": failed] "));
1598 		break;
1599 	}
1600 
1601 	TRIVIA(("] "));
1602 	return (rc);
1603 }
1604 
1605 static sfrc_t
sf_set_trylck(ufs_failure_t * f)1606 sf_set_trylck(ufs_failure_t *f)
1607 {
1608 	TRIVIA(("[sf_set_trylck"));
1609 
1610 	if (!mutex_tryenter(f->uf_vfs_lockp)) {
1611 		TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
1612 		f->uf_retry = 1;
1613 		return (SFRC_FAIL);
1614 	}
1615 
1616 	if (!f->uf_vfs_ufsfxp->fx_current)
1617 		f->uf_vfs_ufsfxp->fx_current = f;
1618 
1619 	mutex_exit(f->uf_vfs_lockp);
1620 
1621 	f->uf_lf.lf_flags = 0;
1622 	f->uf_lf.lf_lock  = LOCKFS_ELOCK;
1623 	f->uf_retry = -ufsfx_tune.uft_fixstart_period;
1624 	TRIVIA(("] "));
1625 	return (SFRC_SUCCESS);
1626 }
1627 
1628 static sfrc_t
sf_found_trylck(ufs_failure_t * f)1629 sf_found_trylck(ufs_failure_t *f)
1630 {
1631 	struct lockfs lockfs_status;
1632 
1633 	TRIVIA(("[sf_found_trylck"));
1634 
1635 	if (trylock_time_exceeded(f) > 0) {
1636 		(void) set_state(f, UF_PANIC);
1637 		TRIVIA((": failed] "));
1638 		return (SFRC_FAIL);
1639 	}
1640 
1641 	if (!get_lockfs_status(f, &lockfs_status)) {
1642 		(void) set_state(f, UF_PANIC);
1643 		TRIVIA((": failed] "));
1644 		return (SFRC_FAIL);
1645 	}
1646 
1647 	if (f->uf_lf_err == NO_ERROR)
1648 		f->uf_lf.lf_key = lockfs_status.lf_key;
1649 
1650 	if (!set_lockfs(f, &lockfs_status)) {
1651 		(void) set_state(f, UF_PANIC);
1652 		TRIVIA((": failed] "));
1653 		return (SFRC_FAIL);
1654 	}
1655 	TRIVIA(("] "));
1656 	return (SFRC_SUCCESS);
1657 }
1658 
1659 static sfrc_t
sf_set_locked(ufs_failure_t * f)1660 sf_set_locked(ufs_failure_t *f)
1661 {
1662 	TRIVIA(("[sf_set_locked"));
1663 
1664 	f->uf_retry = -ufsfx_tune.uft_fixstart_period;
1665 
1666 #if defined(DEBUG)
1667 	if (f->uf_flags & UFSFX_REPAIR_START)
1668 		TRIVIA(("clearing UFSFX_REPAIR_START "));
1669 #endif /* DEBUG */
1670 
1671 	f->uf_flags &= ~UFSFX_REPAIR_START;
1672 
1673 	if (f->uf_s & UF_TRYLCK) {
1674 		cmn_err(CE_WARN, "Error-locked %s: \"%s\"",
1675 		    fs_name(f), f->uf_panic_str);
1676 
1677 		if (f->uf_flags & UFSFX_LCKONLY)
1678 			cmn_err(CE_WARN, "Manual repair of %s required",
1679 			    fs_name(f));
1680 	}
1681 
1682 	/*
1683 	 * just reset to current state
1684 	 */
1685 #if defined(DEBUG)
1686 	TRIVIA(("locked->locked "));
1687 #endif /* DEBUG */
1688 
1689 	TRIVIA(("] "));
1690 	return (SFRC_SUCCESS);
1691 }
1692 
1693 static sfrc_t
sf_found_lock_fix_cmn(ufs_failure_t * f,ufs_failure_states_t s)1694 sf_found_lock_fix_cmn(ufs_failure_t *f, ufs_failure_states_t s)
1695 {
1696 	time_t		toolong;
1697 	extern time_t	time;
1698 	struct buf	*bp			= NULL;
1699 	struct fs	*dfs;
1700 	time_t		 concerned, anxious;
1701 	sfrc_t		 rc			= SFRC_FAIL;
1702 	ulong_t		 gb_size;
1703 
1704 	TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s)));
1705 
1706 	if (s & UF_LOCKED) {
1707 		ASSERT(MUTEX_HELD(&f->uf_mutex));
1708 
1709 		toolong =
1710 		    time > (ufsfx_tune.uft_too_long + f->uf_entered_tm);
1711 		TRIVIA(("%stoolong", !toolong? "not": ""));
1712 		HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ",
1713 		    time, ufsfx_tune.uft_too_long, f->uf_entered_tm));
1714 
1715 		if (f->uf_flags & UFSFX_LCKUMOUNT) {
1716 			if (set_state(f, UF_UMOUNT)) {
1717 				TRIVIA(("] "));
1718 				rc = SFRC_SUCCESS;
1719 			} else {
1720 				TRIVIA((": failed] "));
1721 				f->uf_retry = 1;
1722 			}
1723 			return (rc);
1724 		}
1725 		if (!toolong) {
1726 			rc = SFRC_SUCCESS;
1727 		} else {
1728 			if (!(f->uf_flags & UFSFX_REPAIR_START)) {
1729 				cmn_err(CE_WARN, "%s repair of %s not started.",
1730 				    (f->uf_flags & UFSFX_LCKONLY) ?
1731 				    "Manual" : "Automatic", fs_name(f));
1732 
1733 				f->uf_retry = ufsfx_tune.uft_long_err_period;
1734 			} else {
1735 				f->uf_retry = ufsfx_tune.uft_long_err_period;
1736 				cmn_err(CE_WARN, "Repair of %s is not timely; "
1737 				    "operator attention is required.",
1738 				    fs_name(f));
1739 			}
1740 			TRIVIA(("] "));
1741 			return (rc);
1742 		}
1743 	}
1744 
1745 #if defined(DEBUG)
1746 	else {
1747 		ASSERT(s & UF_FIXING);
1748 	}
1749 #endif /* DEBUG */
1750 
1751 	/*
1752 	 * get on disk superblock; force it to really
1753 	 * come from the disk
1754 	 */
1755 	(void) bfinval(f->uf_dev, 0);
1756 	bp = UFS_BREAD(f->uf_ufsvfsp, f->uf_dev, SBLOCK, SBSIZE);
1757 	if (bp) {
1758 		bp->b_flags |= (B_STALE | B_AGE);
1759 		dfs = bp->b_un.b_fs;
1760 	}
1761 
1762 	if (!bp || (bp->b_flags & B_ERROR) || ((dfs->fs_magic != FS_MAGIC) &&
1763 	    (dfs->fs_magic != MTB_UFS_MAGIC))) {
1764 		TRIVIA((": UFS_BREAD(SBLOCK) failed]\n"));
1765 		f->uf_retry = 1;
1766 		goto out;
1767 	}
1768 
1769 	/* fsck started but we haven't noticed yet? */
1770 	if (!(s & UF_FIXING) && dfs->fs_clean == FSFIX) {
1771 		if (!set_state(f, UF_FIXING)) {
1772 			TRIVIA((": failed]\n"));
1773 			f->uf_retry = 1;
1774 			goto out;
1775 		}
1776 	}
1777 
1778 	/* fsck started but didn't succeed? */
1779 	if ((s & UF_FIXING) && ((dfs->fs_clean == FSBAD) || !fsck_active(f))) {
1780 		TRIVIA((": fs_clean: %d", (int)dfs->fs_clean));
1781 		(void) set_state(f, UF_LOCKED);
1782 		cmn_err(CE_WARN, "%s: Manual repair is necessary.", fs_name(f));
1783 		f->uf_retry = ufsfx_tune.uft_long_err_period;
1784 		goto out;
1785 	}
1786 
1787 	gb_size = (dfs->fs_size * dfs->fs_bshift) / GB;
1788 	toolong = (time_t)((gb_size == 0? 1: gb_size) * SecondsPerGig);
1789 
1790 	/* fsck started but doesn't seem to be proceeding? */
1791 	if ((s & UF_FIXING) && dfs->fs_clean == FSFIX) {
1792 		if (time > f->uf_entered_tm + toolong) {
1793 
1794 			cmn_err(CE_WARN,
1795 			    "Repair completion timeout exceeded on %s; "
1796 			    "manual fsck may be required", fs_name(f));
1797 			f->uf_retry = ufsfx_tune.uft_long_err_period;
1798 		}
1799 	}
1800 
1801 	concerned = f->uf_entered_tm + (toolong / 3);
1802 	anxious = f->uf_entered_tm + ((2 * toolong) / 3);
1803 
1804 	if (time > concerned)
1805 		pester_msg(f, time > anxious? CE_WARN: CE_NOTE);
1806 
1807 	TRIVIA(("] "));
1808 
1809 out:
1810 	if (bp)
1811 		brelse(bp);
1812 
1813 	return (rc);
1814 }
1815 
1816 static sfrc_t
sf_found_umount(ufs_failure_t * f)1817 sf_found_umount(ufs_failure_t *f)
1818 {
1819 	extern time_t	 time;
1820 	sfrc_t		 rc			= SFRC_FAIL;
1821 	struct vfs	*vfsp			= f->uf_vfsp;
1822 	struct ufsvfs	*ufsvfsp		= f->uf_ufsvfsp;
1823 	int		 toolong		= 0;
1824 	int		 err			= 0;
1825 
1826 	TRIVIA(("[sf_found_umount"));
1827 
1828 	toolong = time > ufsfx_tune.uft_too_long + f->uf_entered_tm;
1829 	if (toolong) {
1830 		TRIVIA((": unmount time limit exceeded] "));
1831 		goto out;
1832 	}
1833 
1834 	if (!vfsp || !ufsvfsp) {	/* trivial case */
1835 		TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] "));
1836 		goto out;
1837 	}
1838 
1839 	if (!ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) {
1840 		TRIVIA((": !not error locked?"));
1841 		err = EINVAL;
1842 		goto out;
1843 	}
1844 
1845 	/* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */
1846 	if (vn_vfswlock(vfsp->vfs_vnodecovered)) {
1847 		TRIVIA((": couldn't lock coveredvp"));
1848 		err = EBUSY;
1849 		goto out;
1850 	}
1851 
1852 	if ((err = dounmount(vfsp, 0, kcred)) != 0) {
1853 
1854 		/* take note, but not many alternatives here */
1855 		mutex_enter(&uf_stats.ufst_mutex);
1856 		uf_stats.ufst_unmount_failures++;
1857 		mutex_exit(&uf_stats.ufst_mutex);
1858 
1859 		TRIVIA((": unmount failed] "));
1860 	} else {
1861 		cmn_err(CE_NOTE, "unmounted error-locked %s", fs_name(f));
1862 	}
1863 
1864 out:
1865 	if (toolong || (err != EBUSY && err != EAGAIN))
1866 		rc = set_state(f, UF_NOTFIX);
1867 
1868 	TRIVIA(("] "));
1869 	return (rc);
1870 }
1871 
1872 static sfrc_t
sf_term_cmn(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t s)1873 sf_term_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
1874 {
1875 	extern time_t	time;
1876 	sfrc_t		rc = SFRC_FAIL;
1877 
1878 	TRIVIA(("[sf_term_cmn, action is %s, state is %s",
1879 	    act_name(a), state_name(s)));
1880 	ASSERT(s & (UF_FIXED | UF_NOTFIX | UF_REPLICA));
1881 	ASSERT(terminal_state(s));
1882 
1883 	if (!f->uf_ufsvfsp && !(f->uf_s & (UF_UMOUNT | UF_NOTFIX))) {
1884 		TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n"));
1885 		return (rc);
1886 	}
1887 
1888 	switch (a) {
1889 	case UFA_SET:
1890 		switch (s) {
1891 		case UF_NOTFIX:
1892 		case UF_FIXED:
1893 		{
1894 			int need_lock_vfs;
1895 
1896 			if (f->uf_ufsvfsp && f->uf_vfs_lockp)
1897 				need_lock_vfs = !MUTEX_HELD(f->uf_vfs_lockp);
1898 			else
1899 				need_lock_vfs = 0;
1900 
1901 			if (need_lock_vfs && !mutex_tryenter(f->uf_vfs_lockp)) {
1902 				TRIVIA((": tryenter(vfslockp) fail; retry]\n"));
1903 				f->uf_retry = 1;
1904 				break;
1905 			}
1906 
1907 			f->uf_end_tm = time;
1908 			f->uf_lf.lf_lock = LOCKFS_OLOCK;
1909 			f->uf_retry = 0;
1910 
1911 			if (f->uf_vfs_ufsfxp)
1912 				f->uf_vfs_ufsfxp->fx_current = NULL;
1913 
1914 			if (need_lock_vfs)
1915 				mutex_exit(f->uf_vfs_lockp);
1916 
1917 			cmn_err(CE_NOTE, (s & UF_NOTFIX)? "Could not fix %s":
1918 			    "%s is now accessible", fs_name(f));
1919 
1920 			if (s & UF_FIXED) {
1921 				mutex_enter(&uf_stats.ufst_mutex);
1922 				uf_stats.ufst_num_fixed++;
1923 				mutex_exit(&uf_stats.ufst_mutex);
1924 			}
1925 			(void) timeout(ufsfx_kill_fix_failure_thread,
1926 			    (void *)(ufsfx_tune.uft_short_err_period * hz),
1927 			    ufsfx_tune.uft_short_err_period * hz);
1928 			rc = SFRC_SUCCESS;
1929 			break;
1930 		}
1931 		case UF_REPLICA:
1932 
1933 			ASSERT(MUTEX_HELD(f->uf_vfs_lockp));
1934 
1935 			/* not actually a replica? */
1936 			if (f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current &&
1937 			    f->uf_vfs_ufsfxp->fx_current != f &&
1938 			    !terminal_state(
1939 			    f->uf_vfs_ufsfxp->fx_current->uf_s)) {
1940 
1941 				f->uf_orig = f->uf_vfs_ufsfxp->fx_current;
1942 				f->uf_retry = 0;
1943 				rc = SFRC_SUCCESS;
1944 			} else {
1945 				TRIVIA((": NULL fx_current]\n"));
1946 				f->uf_retry = 1;
1947 			}
1948 
1949 			break;
1950 
1951 		default:
1952 			rc = set_state(f, UF_PANIC);
1953 			TRIVIA((": failed] "));
1954 			break;
1955 		}
1956 		break;
1957 
1958 	case UFA_FOUND:
1959 		/*
1960 		 * XXX de-allocate these after some period?
1961 		 * XXX or move to an historical list?
1962 		 * XXX or have an ioctl which reaps them?
1963 		 */
1964 		/*
1965 		 * For now, since we don't expect lots of failures
1966 		 * to occur (to the point of memory shortages),
1967 		 * just punt
1968 		 */
1969 
1970 		/* be sure we're not wasting cpu on old failures */
1971 		if (f->uf_retry != 0) {
1972 			mutex_enter(&uf_stats.ufst_mutex);
1973 			uf_stats.ufst_cpu_waste++;
1974 			mutex_exit(&uf_stats.ufst_mutex);
1975 			f->uf_retry = 0;
1976 		}
1977 		rc = SFRC_SUCCESS;
1978 		break;
1979 
1980 	default:
1981 		(void) set_state(f, UF_PANIC);
1982 		TRIVIA((": failed] "));
1983 		break;
1984 	}
1985 
1986 	TRIVIA(("] "));
1987 	return (rc);
1988 }
1989 
1990 static sfrc_t
sf_panic(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t s)1991 sf_panic(
1992 	ufs_failure_t	*f,
1993 	ufsa_t	 a,
1994 	ufs_failure_states_t	 s)
1995 {
1996 	sfrc_t	rc = SFRC_FAIL;
1997 
1998 	TRIVIA(("[sf_panic, action is %s, prev. state is %s",
1999 	    act_name(a), state_name(f->uf_s)));
2000 	ASSERT(s & UF_PANIC);
2001 
2002 	switch (a) {
2003 	case UFA_SET:
2004 		f->uf_retry = -ufsfx_tune.uft_short_err_period;
2005 		rc = SFRC_SUCCESS;
2006 		break;
2007 
2008 	case UFA_FOUND:
2009 	default:
2010 		real_panic(f, " ");
2011 
2012 		/* LINTED: warning: logical expression always true: op "||" */
2013 		ASSERT(DEBUG);
2014 
2015 		(void) set_state(f, UF_UMOUNT);	/* XXX UF_NOTFIX? */
2016 
2017 		break;
2018 	}
2019 
2020 	TRIVIA(("] "));
2021 	return (rc);
2022 }
2023 
2024 /*
2025  * minimum state function
2026  */
2027 static sfrc_t
sf_minimum(ufs_failure_t * f,ufsa_t a,ufs_failure_states_t ignored)2028 sf_minimum(
2029 	ufs_failure_t	*f,
2030 	ufsa_t	 a, /* LINTED argument unused in function: ignored */
2031 	ufs_failure_states_t	 ignored)
2032 {
2033 	sfrc_t rc = SFRC_FAIL;
2034 
2035 	TRIVIA(("[sf_minimum, action is %s", act_name(a)));
2036 
2037 	switch (a) {
2038 	case UFA_SET:
2039 		f->uf_retry = 0;
2040 		/* FALLTHROUGH */
2041 
2042 	case UFA_FOUND:
2043 		rc = SFRC_SUCCESS;
2044 		break;
2045 
2046 	default:
2047 		(void) set_state(f, UF_PANIC);
2048 		TRIVIA((": failed] "));
2049 		break;
2050 	}
2051 
2052 	TRIVIA(("] "));
2053 	return (rc);
2054 }
2055 
2056 static int
state_trans_valid(ufs_failure_states_t from,ufs_failure_states_t to)2057 state_trans_valid(ufs_failure_states_t from, ufs_failure_states_t to)
2058 {
2059 	ufsd_t	*s;
2060 	int	 valid;
2061 
2062 	HIDEOUS(("[state_trans_valid"));
2063 
2064 	if (from & to)
2065 		return (1);
2066 
2067 	s = get_state_desc(to);
2068 
2069 	/*
2070 	 * extra test is necessary since we want UF_UNDEF = 0,
2071 	 * (to detect freshly allocated memory)
2072 	 * but can't check for that value with a bit test
2073 	 */
2074 	valid = (to & UF_INIT)? from == s->ud_prev: from & s->ud_prev;
2075 
2076 	HIDEOUS((": %svalid] ", valid? "": "in"));
2077 	return (valid);
2078 }
2079 
2080 static int
terminal_state(ufs_failure_states_t state)2081 terminal_state(ufs_failure_states_t state)
2082 {
2083 	ufsd_t	*s;
2084 
2085 	HIDEOUS(("[terminal_state"));
2086 
2087 	s = get_state_desc(state);
2088 
2089 	HIDEOUS((": %sterminal] ", s->ud_attr.terminal? "": "not "));
2090 	return ((int)s->ud_attr.terminal);
2091 }
2092 
2093 static void
alloc_lockfs_comment(ufs_failure_t * f,struct lockfs * lfp)2094 alloc_lockfs_comment(ufs_failure_t *f, struct lockfs *lfp)
2095 {
2096 	MINUTE(("[alloc_lockfs_comment"));
2097 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2098 
2099 	/*
2100 	 * ufs_fiolfs expects a kmem_alloc'ed comment;
2101 	 * it frees the comment if the lock fails
2102 	 * or else when the lock is unlocked.
2103 	 */
2104 
2105 	f->uf_lf.lf_comment = kmem_zalloc(LOCKFS_MAXCOMMENTLEN, KM_NOSLEEP);
2106 	if (f->uf_lf.lf_comment) {
2107 		char	*from;
2108 		size_t	 len;
2109 
2110 		/*
2111 		 * use panic string if there's no previous comment
2112 		 * or if we're setting the error lock
2113 		 */
2114 		if ((LOCKFS_IS_ELOCK(&f->uf_lf) || !lfp->lf_comment ||
2115 		    lfp->lf_comlen <= 0)) {
2116 			from = f->uf_panic_str;
2117 			len = LOCKFS_MAXCOMMENTLEN;
2118 		} else {
2119 			from = lfp->lf_comment;
2120 			len = lfp->lf_comlen;
2121 		}
2122 
2123 		bcopy(from, f->uf_lf.lf_comment, len);
2124 		f->uf_lf.lf_comlen = len;
2125 
2126 	} else {
2127 		f->uf_lf.lf_comlen = 0;
2128 	}
2129 	MINUTE(("] "));
2130 }
2131 
2132 static int
set_lockfs(ufs_failure_t * f,struct lockfs * lfp)2133 set_lockfs(ufs_failure_t *f, struct lockfs *lfp)
2134 {
2135 	int	(*handle_lockfs_rc)(ufs_failure_t *);
2136 	int	  rc;
2137 
2138 	MINUTE(("[set_lockfs"));
2139 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2140 	ASSERT(!vfs_lock_held(f->uf_vfsp));
2141 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
2142 
2143 	if (!f->uf_ufsvfsp) {
2144 		MINUTE((": ufsvfsp is NULL]\n"));
2145 		return (0);
2146 	}
2147 
2148 	ASSERT(MUTEX_NOT_HELD(&f->uf_ufsvfsp->vfs_ulockfs.ul_lock));
2149 
2150 	if (!f->uf_ufsvfsp->vfs_root) {
2151 		MINUTE((": vfs_root is NULL]\n"));
2152 		return (0);
2153 	}
2154 
2155 	alloc_lockfs_comment(f, lfp);
2156 	f->uf_lf_err = 0;
2157 
2158 	if (!LOCKFS_IS_ELOCK(lfp)) {
2159 		lfp->lf_lock = f->uf_lf.lf_lock = LOCKFS_ELOCK;
2160 		VN_HOLD(f->uf_ufsvfsp->vfs_root);
2161 		f->uf_lf_err =
2162 		    ufs__fiolfs(f->uf_ufsvfsp->vfs_root,
2163 		    &f->uf_lf, /* from_user */ 0, /* from_log */ 0);
2164 		VN_RELE(f->uf_ufsvfsp->vfs_root);
2165 	}
2166 
2167 	handle_lockfs_rc = f->uf_lf_err != 0? lockfs_failure: lockfs_success;
2168 	rc = handle_lockfs_rc(f);
2169 
2170 	MINUTE(("] "));
2171 	return (rc);
2172 }
2173 
2174 static int
lockfs_failure(ufs_failure_t * f)2175 lockfs_failure(ufs_failure_t *f)
2176 {
2177 	int	error;
2178 	ufs_failure_states_t	s;
2179 
2180 	TRIVIA(("[lockfs_failure"));
2181 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2182 
2183 	if (!f->uf_ufsvfsp) {
2184 		TRIVIA((": ufsvfsp is NULL]\n"));
2185 		return (0);
2186 	}
2187 
2188 	error = f->uf_lf_err;
2189 	switch (error) {
2190 			/* non-transient errors: */
2191 	case EACCES:	/* disk/in-core metadata reconciliation failed  */
2192 	case EPERM:	/* inode reconciliation failed; incore inode changed? */
2193 	case EIO:	/* device is hard-locked or not responding */
2194 	case EROFS:	/* device is write-locked */
2195 	case EDEADLK:	/* can't lockfs; deadlock would result; */
2196 			/* Swapping or saving accounting records */
2197 			/* onto this fs can cause this errno. */
2198 
2199 		MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)",
2200 		    fs_name(f), lock_name(&f->uf_lf),
2201 		    err_name(error), error));
2202 
2203 		/*
2204 		 * if can't get lock, then fallback to panic, unless
2205 		 * unless unmount was requested (although unmount will
2206 		 * probably fail if the lock failed, so we'll panic
2207 		 * anyway
2208 		 */
2209 
2210 		s = ((f->uf_flags & UFSFX_LCKUMOUNT) && error != EDEADLK) ?
2211 		    UF_UMOUNT: UF_PANIC;
2212 
2213 		if (!set_state(f, s)) {
2214 			real_panic(f, " ");
2215 			/*NOTREACHED*/
2216 			break;
2217 		}
2218 		break;
2219 
2220 
2221 	case EBUSY:
2222 	case EAGAIN:
2223 
2224 		f->uf_retry = ufsfx_tune.uft_short_err_period;
2225 		if (curthread->t_flag & T_DONTPEND) {
2226 			curthread->t_flag &= ~T_DONTPEND;
2227 
2228 		} else if (!(f->uf_s & (UF_LOCKED | UF_FIXING))) {
2229 			ufs_failure_states_t state;
2230 			/*
2231 			 * if we didn't know that the fix had started,
2232 			 * take note
2233 			 */
2234 			state = error == EBUSY? UF_LOCKED: UF_FIXING;
2235 			if (!set_state(f, state)) {
2236 				TRIVIA((": failed] "));
2237 				return (0);
2238 			}
2239 		}
2240 		break;
2241 
2242 	default:	/* some other non-fatal error */
2243 		MINOR(("lockfs(\"%s\") of %s returned %s (%d)",
2244 		    lock_name(&f->uf_lf), fs_name(f),
2245 		    err_name(f->uf_lf_err), f->uf_lf_err));
2246 
2247 		f->uf_retry = ufsfx_tune.uft_short_err_period;
2248 		break;
2249 
2250 	case EINVAL:	/* unmounted? */
2251 		(void) set_state(f, UF_NOTFIX);
2252 		break;
2253 	}
2254 	TRIVIA(("] "));
2255 	return (1);
2256 }
2257 
2258 static int
lockfs_success(ufs_failure_t * f)2259 lockfs_success(ufs_failure_t *f)
2260 {
2261 	TRIVIA(("[lockfs_success"));
2262 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2263 
2264 	if (!f->uf_ufsvfsp) {
2265 		TRIVIA((": ufsvfsp is NULL]\n"));
2266 		return (0);
2267 	}
2268 
2269 	switch (f->uf_lf.lf_lock) {
2270 	case LOCKFS_ELOCK:	/* error lock worked */
2271 
2272 		if (!set_state(f, UF_LOCKED)) {
2273 			TRIVIA((": failed] "));
2274 			return (0);
2275 		}
2276 		break;
2277 
2278 	case LOCKFS_ULOCK:	/* unlock worked */
2279 		/*
2280 		 * how'd we get here?
2281 		 * This should be done from fsck's unlock,
2282 		 * not from this thread's context.
2283 		 */
2284 		cmn_err(CE_WARN, "Unlocked error-lock of %s", fs_name(f));
2285 		ufsfx_unlockfs(f->uf_ufsvfsp);
2286 		break;
2287 
2288 	default:
2289 		if (!set_state(f, UF_NOTFIX)) {
2290 			TRIVIA((": failed] "));
2291 			return (0);
2292 		}
2293 		break;
2294 	}
2295 	TRIVIA(("] "));
2296 	return (1);
2297 }
2298 
2299 /*
2300  * when fsck is running it puts its pid into the lockfs
2301  * comment structure, prefaced by PIDSTR
2302  */
2303 const char *PIDSTR = "[pid:";
2304 static int
fsck_active(ufs_failure_t * f)2305 fsck_active(ufs_failure_t *f)
2306 {
2307 	char		*cp;
2308 	int		 i, found, errlocked;
2309 	size_t		 comlen;
2310 	const int	 PIDSTRLEN = (int)strlen(PIDSTR);
2311 	struct ulockfs	*ulp = &f->uf_ufsvfsp->vfs_ulockfs;
2312 
2313 	TRIVIA(("[fsck_active"));
2314 
2315 	ASSERT(f);
2316 	ASSERT(f->uf_s & UF_FIXING);
2317 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2318 	ASSERT(f->uf_ufsvfsp);
2319 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
2320 	ASSERT(MUTEX_NOT_HELD(&ulp->ul_lock));
2321 
2322 	mutex_enter(&ulp->ul_lock);
2323 	cp = ulp->ul_lockfs.lf_comment;
2324 	comlen = ulp->ul_lockfs.lf_comlen;
2325 	errlocked = (int)ULOCKFS_IS_ELOCK(ulp);
2326 	mutex_exit(&ulp->ul_lock);
2327 
2328 	if (!cp || comlen == 0) {
2329 		TRIVIA((": null comment or comlen <= 0, found:0]"));
2330 		return (0);
2331 	}
2332 
2333 	for (found = i = 0; !found && i < (comlen - PIDSTRLEN); i++, cp++)
2334 		found = strncmp(cp, PIDSTR, PIDSTRLEN) == 0;
2335 
2336 	TRIVIA(("found:%d, is_elock:%d]", found, errlocked));
2337 	return (errlocked & found);
2338 }
2339 
2340 static const char unknown_fs[]		= "<unknown fs>";
2341 static const char null_failure[] = "<NULL ufs failure record; unknown fs>";
2342 static const char mutated_vfs_bufp[]	= "<mutated vfs_bufp, unknown fs>";
2343 static const char mutated_vfs_fs[]	= "<mutated vfs_fs, unknown fs>";
2344 
2345 static char *
fs_name(ufs_failure_t * f)2346 fs_name(ufs_failure_t *f)
2347 {
2348 	HIDEOUS(("[fs_name"));
2349 	ASSERT(MUTEX_HELD(&f->uf_mutex));
2350 
2351 	if (!f) {
2352 		HIDEOUS((": failure ptr is NULL]\n"));
2353 		return ((char *)null_failure);
2354 	}
2355 
2356 	if (f->uf_fsname[0] != '\0') {
2357 		HIDEOUS((": return (uf_fsname)]\n"));
2358 		return (f->uf_fsname);
2359 	}
2360 
2361 	if (MUTEX_HELD(f->uf_vfs_lockp)) {
2362 		if (f->uf_bp != f->uf_ufsvfsp->vfs_bufp) {
2363 			HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2364 			    (void *)f->uf_bp, (void *)f->uf_ufsvfsp->vfs_bufp));
2365 			return ((char *)mutated_vfs_bufp);
2366 		}
2367 		if (f->uf_fs != f->uf_ufsvfsp->vfs_fs) {
2368 			HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
2369 			    (void *)f->uf_fs, (void *)f->uf_ufsvfsp->vfs_fs));
2370 			return ((char *)mutated_vfs_fs);
2371 		}
2372 		if (f->uf_ufsvfsp && f->uf_bp && f->uf_fs &&
2373 		    *f->uf_fs->fs_fsmnt != '\0') {
2374 			HIDEOUS((": return (fs_fsmnt)]\n"));
2375 			return (f->uf_fs->fs_fsmnt);
2376 		}
2377 	}
2378 
2379 	HIDEOUS((": unknown file system]\n"));
2380 	return ((char *)unknown_fs);
2381 }
2382 
2383 #if defined(DEBUG)
2384 static char *
lock_name(struct lockfs * lfp)2385 lock_name(struct lockfs *lfp)
2386 {
2387 	struct lock_description	*l;
2388 	char			*lname;
2389 
2390 	HIDEOUS(("[lock_name"));
2391 
2392 	lname = lock_desc[0].ld_name;
2393 	for (l = &lock_desc[1]; l->ld_name != NULL; l++) {
2394 		if (lfp && lfp->lf_lock == l->ld_type) {
2395 			lname = l->ld_name;
2396 			break;
2397 		}
2398 	}
2399 	HIDEOUS(("]"));
2400 	return (lname);
2401 }
2402 
2403 static char *
state_name(ufs_failure_states_t state)2404 state_name(ufs_failure_states_t state)
2405 {
2406 	ufsd_t	*s;
2407 
2408 	HIDEOUS(("[state_name"));
2409 
2410 	s = get_state_desc(state);
2411 
2412 	HIDEOUS(("]"));
2413 	return (s->ud_name);
2414 }
2415 
2416 static char *
err_name(int error)2417 err_name(int error)
2418 {
2419 	struct error_description *e;
2420 
2421 	HIDEOUS(("[err_name"));
2422 
2423 	for (e = &err_desc[1]; e->ed_name != NULL; e++) {
2424 		if (error == e->ed_errno) {
2425 			HIDEOUS(("]"));
2426 			return (e->ed_name);
2427 		}
2428 	}
2429 	HIDEOUS(("]"));
2430 	return (err_desc[0].ed_name);
2431 }
2432 
2433 static char *
act_name(ufsa_t action)2434 act_name(ufsa_t action)
2435 {
2436 	struct action_description *a;
2437 
2438 	HIDEOUS(("[act_name"));
2439 
2440 	for (a = &act_desc[1]; a->ad_name != NULL; a++) {
2441 		if (action == a->ad_v) {
2442 			HIDEOUS(("]"));
2443 			return (a->ad_name);
2444 		}
2445 	}
2446 	HIDEOUS(("]"));
2447 	return (act_desc[0].ad_name);
2448 }
2449 
2450 /*
2451  * dump failure list
2452  */
2453 static void
dump_uf_list(char * msg)2454 dump_uf_list(char *msg)
2455 {
2456 	ufs_failure_t	*f;
2457 	int		 i;
2458 	int		 list_was_locked = MUTEX_HELD(&ufs_fix.uq_mutex);
2459 
2460 	if (!list_was_locked && !mutex_tryenter(&ufs_fix.uq_mutex)) {
2461 		printf("dump_uf_list: couldn't get list lock\n");
2462 		return;
2463 	}
2464 
2465 	if (msg) {
2466 		printf("\n%s", msg);
2467 	}
2468 	printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n",
2469 	    ufs_fix.uq_lowat, ufs_fix.uq_ne);
2470 
2471 	mutex_enter(&uf_stats.ufst_mutex);
2472 	printf("\tuf_stats.current_races: %ld\n", uf_stats.ufst_current_races);
2473 	printf("\tuf_stats.num_failed: %ld\n", uf_stats.ufst_num_failed);
2474 	printf("\tuf_stats.num_fixed: %ld\n", uf_stats.ufst_num_fixed);
2475 	printf("\tuf_stats.cpu_waste: %ld\n", uf_stats.ufst_cpu_waste);
2476 	printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n",
2477 	    uf_stats.ufst_lock_violations, uf_stats.ufst_unmount_failures);
2478 	mutex_exit(&uf_stats.ufst_mutex);
2479 
2480 	for (f = ufs_fix.uq_ufhead, i = 1; f; f = f->uf_next, i++) {
2481 
2482 		if (!mutex_tryenter(&f->uf_mutex)) {
2483 			printf("%d.\t\"skipped - try enter failed\"\n", i);
2484 			continue;
2485 		}
2486 
2487 		dump_uf(f, i);
2488 
2489 		mutex_exit(&f->uf_mutex);
2490 	}
2491 
2492 	printf("\n");
2493 
2494 	if (!list_was_locked)
2495 		mutex_exit(&ufs_fix.uq_mutex);
2496 }
2497 
2498 static void
dump_uf(ufs_failure_t * f,int i)2499 dump_uf(ufs_failure_t *f, int i)
2500 {
2501 	if (!f) {
2502 		printf("dump_uf: NULL failure record\n");
2503 		return;
2504 	}
2505 
2506 	printf("%d.\t\"%s\" is %s.\n",
2507 	    i, fs_name(f), state_name(f->uf_s));
2508 	printf("\t\"%s\"\tAddr: 0x%p\n", f->uf_panic_str, (void *)f);
2509 	printf("\tNext: 0x%p\t\tPrev: 0x%p\n",
2510 	    (void *)f->uf_next, (void *)f->uf_prev);
2511 
2512 	if (f->uf_orig)
2513 		printf("\tOriginal failure: 0x%p \"%s\"\n",
2514 		    (void *)f->uf_orig, f->uf_orig->uf_panic_str);
2515 
2516 	printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n",
2517 	    (void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp);
2518 	printf("\tVfs_fsfxp: 0x%p\n", (void *)f->uf_vfs_ufsfxp);
2519 	printf("\tVfs_bufp: 0x%p", (void *)f->uf_bp);
2520 
2521 	if (f->uf_bp)
2522 		printf("\t\tVfs_fs: 0x%p\n", (void *)f->uf_fs);
2523 	else
2524 		printf("\n");
2525 
2526 	printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n",
2527 	    f->uf_begin_tm, f->uf_entered_tm, f->uf_end_tm);
2528 
2529 	printf("\tFlags: (%d) %s%s%s%s", f->uf_flags,
2530 	    f->uf_flags & UFSFX_LCKONLY?	 "\"lock only\" "	: "",
2531 	    f->uf_flags & UFSFX_LCKUMOUNT?	 "\"lock+unmount\" "	: "",
2532 	    f->uf_flags & UFSFX_REPAIR_START? "\"started repair\" "	: "",
2533 	    f->uf_flags == 0?                "<none>"               : "");
2534 
2535 	printf("\tRetry: %ld seconds\n", f->uf_retry);
2536 
2537 	printf("\tLockfs:\ttype: %s\terror: %s (%d)\n",
2538 	    lock_name(&f->uf_lf), err_name(f->uf_lf_err), f->uf_lf_err);
2539 
2540 }
2541 #endif /* DEBUG */
2542 
2543 /*
2544  * returns # of ufs_failures in a non-terminal state on queue
2545  * used to coordinate with hlock thread (see ufs_thread.c)
2546  * and to determine when the error lock thread may exit
2547  */
2548 
2549 int
ufsfx_get_failure_qlen(void)2550 ufsfx_get_failure_qlen(void)
2551 {
2552 	ufs_failure_t	*f;
2553 	ufsd_t		*s;
2554 	int		 qlen = 0;
2555 
2556 	MINUTE(("[ufsfx_get_failure_qlen"));
2557 
2558 	if (!mutex_tryenter(&ufs_fix.uq_mutex))
2559 		return (-1);
2560 
2561 	/*
2562 	 * walk down failure list
2563 	 */
2564 
2565 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
2566 
2567 		if (!mutex_tryenter(&f->uf_mutex))
2568 			continue;
2569 
2570 		s = get_state_desc(f->uf_s);
2571 
2572 		if (s->ud_attr.terminal) {
2573 			mutex_exit(&f->uf_mutex);
2574 			continue;
2575 		}
2576 
2577 		MINUTE((": found: %s, \"%s: %s\"\n",
2578 		    fs_name(f), state_name(f->uf_s), f->uf_panic_str));
2579 
2580 		qlen++;
2581 		mutex_exit(&f->uf_mutex);
2582 	}
2583 
2584 	mutex_exit(&ufs_fix.uq_mutex);
2585 
2586 	MINUTE((": qlen=%d]\n", qlen));
2587 
2588 	return (qlen);
2589 }
2590 
2591 /*
2592  * timeout routine
2593  *  called to shutdown fix failure thread and server daemon
2594  */
2595 static void
ufsfx_kill_fix_failure_thread(void * arg)2596 ufsfx_kill_fix_failure_thread(void *arg)
2597 {
2598 	clock_t odelta = (clock_t)arg;
2599 	int	qlen;
2600 
2601 	MAJOR(("[ufsfx_kill_fix_failure_thread"));
2602 
2603 	qlen = ufsfx_get_failure_qlen();
2604 
2605 	if (qlen < 0) {
2606 		clock_t delta;
2607 
2608 		delta = odelta << 1;
2609 		if (delta <= 0)
2610 			delta = INT_MAX;
2611 
2612 		(void) timeout(ufsfx_kill_fix_failure_thread,
2613 		    (void *)delta, delta);
2614 		MAJOR((": rescheduled"));
2615 
2616 	} else if (qlen == 0) {
2617 		ufs_thread_exit(&ufs_fix);
2618 		MAJOR((": killed"));
2619 	}
2620 	/*
2621 	 * else
2622 	 *  let timeout expire
2623 	 */
2624 	MAJOR(("]\n"));
2625 }
2626