xref: /illumos-gate/usr/src/cmd/fm/fmd/common/fmd_case.c (revision 705e9f42)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5749f21d3Swesolows  * Common Development and Distribution License (the "License").
6749f21d3Swesolows  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
21d9638e54Smws 
227c478bd9Sstevel@tonic-gate /*
23*705e9f42SStephen Hanson  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
27d9638e54Smws /*
28d9638e54Smws  * FMD Case Subsystem
29d9638e54Smws  *
30d9638e54Smws  * Diagnosis engines are expected to group telemetry events related to the
31d9638e54Smws  * diagnosis of a particular problem on the system into a set of cases.  The
32d9638e54Smws  * diagnosis engine may have any number of cases open at a given point in time.
33d9638e54Smws  * Some cases may eventually be *solved* by associating a suspect list of one
34d9638e54Smws  * or more problems with the case, at which point fmd publishes a list.suspect
35d9638e54Smws  * event for the case and it becomes visible to administrators and agents.
36d9638e54Smws  *
37d9638e54Smws  * Every case is named using a UUID, and is globally visible in the case hash.
38d9638e54Smws  * Cases are reference-counted, except for the reference from the case hash
39d9638e54Smws  * itself.  Consumers of case references include modules, which store active
40d9638e54Smws  * cases on the mod_cases list, ASRUs in the resource cache, and the RPC code.
41d9638e54Smws  *
42d9638e54Smws  * Cases obey the following state machine.  In states UNSOLVED, SOLVED, and
43d9638e54Smws  * CLOSE_WAIT, a case's module refers to the owning module (a diagnosis engine
44d9638e54Smws  * or transport) and the case is referenced by the mod_cases list.  Once the
45d9638e54Smws  * case reaches the CLOSED or REPAIRED states, a case's module changes to refer
46d9638e54Smws  * to the root module (fmd.d_rmod) and is deleted from the owner's mod_cases.
47d9638e54Smws  *
48d9638e54Smws  *			+------------+
49d9638e54Smws  *	     +----------|  UNSOLVED  |
50d9638e54Smws  *	     |		+------------+
5125c6ff4bSstephh  *	     |		      1 |
5225c6ff4bSstephh  *	     |			|
5325c6ff4bSstephh  *	     |		+-------v----+
5425c6ff4bSstephh  *	   2 |		|    SOLVED  |
5525c6ff4bSstephh  *	     |		+------------+
5625c6ff4bSstephh  *	     |		    3 |  5 |
5725c6ff4bSstephh  *	     +------------+   |    |
5825c6ff4bSstephh  *			  |   |    |
5925c6ff4bSstephh  *			+-v---v----v-+
6025c6ff4bSstephh  *			| CLOSE_WAIT |
6125c6ff4bSstephh  *			+------------+
6225c6ff4bSstephh  *			  |   |    |
6325c6ff4bSstephh  *	      +-----------+   |    +------------+
6425c6ff4bSstephh  *	      |		    4 |			|
6525c6ff4bSstephh  *	      v		+-----v------+		|
6625c6ff4bSstephh  *	   discard      |   CLOSED   |	      6	|
6725c6ff4bSstephh  *			+------------+		|
6825c6ff4bSstephh  *			      |			|
6925c6ff4bSstephh  *			      |	   +------------+
7025c6ff4bSstephh  *			    7 |	   |
7125c6ff4bSstephh  *			+-----v----v-+
7225c6ff4bSstephh  *			|  REPAIRED  |
7325c6ff4bSstephh  *			+------------+
7425c6ff4bSstephh  *			      |
7525c6ff4bSstephh  *			    8 |
7625c6ff4bSstephh  *			+-----v------+
7725c6ff4bSstephh  *			|  RESOLVED  |
78d9638e54Smws  *			+------------+
7925c6ff4bSstephh  *			      |
8025c6ff4bSstephh  *			      v
8125c6ff4bSstephh  *			   discard
82d9638e54Smws  *
83d9638e54Smws  * The state machine changes are triggered by calls to fmd_case_transition()
84d9638e54Smws  * from various locations inside of fmd, as described below:
85d9638e54Smws  *
86d9638e54Smws  * [1] Called by: fmd_case_solve()
87d9638e54Smws  *       Actions: FMD_CF_SOLVED flag is set in ci_flags
88d9638e54Smws  *                conviction policy is applied to suspect list
89d9638e54Smws  *                suspects convicted are marked faulty (F) in R$
90d9638e54Smws  *                list.suspect event logged and dispatched
91d9638e54Smws  *
9225c6ff4bSstephh  * [2] Called by: fmd_case_close(), fmd_case_uuclose()
9325c6ff4bSstephh  *       Actions: diagnosis engine fmdo_close() entry point scheduled
9425c6ff4bSstephh  *                case discarded upon exit from CLOSE_WAIT
9525c6ff4bSstephh  *
9625c6ff4bSstephh  * [3] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose()
97d9638e54Smws  *       Actions: FMD_CF_ISOLATED flag is set in ci_flags
98d9638e54Smws  *                suspects convicted (F) are marked unusable (U) in R$
99d9638e54Smws  *                diagnosis engine fmdo_close() entry point scheduled
10025c6ff4bSstephh  *                case transitions to CLOSED [4] upon exit from CLOSE_WAIT
101d9638e54Smws  *
10225c6ff4bSstephh  * [4] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
103d9638e54Smws  *       Actions: list.isolated event dispatched
104d9638e54Smws  *                case deleted from module's list of open cases
105d9638e54Smws  *
106d9638e54Smws  * [5] Called by: fmd_case_repair(), fmd_case_update()
107d9638e54Smws  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
108d9638e54Smws  *                diagnosis engine fmdo_close() entry point scheduled
109d9638e54Smws  *                case transitions to REPAIRED [6] upon exit from CLOSE_WAIT
110d9638e54Smws  *
11125c6ff4bSstephh  * [6] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
11225c6ff4bSstephh  *       Actions: suspects convicted are marked non faulty (!F) in R$
11325c6ff4bSstephh  *                list.repaired or list.updated event dispatched
114d9638e54Smws  *
115d9638e54Smws  * [7] Called by: fmd_case_repair(), fmd_case_update()
116d9638e54Smws  *       Actions: FMD_CF_REPAIR flag is set in ci_flags
117d9638e54Smws  *                suspects convicted are marked non faulty (!F) in R$
11825c6ff4bSstephh  *                list.repaired or list.updated event dispatched
11925c6ff4bSstephh  *
12025c6ff4bSstephh  * [8] Called by: fmd_case_uuresolve()
12125c6ff4bSstephh  *       Actions: list.resolved event dispatched
12225c6ff4bSstephh  *		  case is discarded
123d9638e54Smws  */
124d9638e54Smws 
1257c478bd9Sstevel@tonic-gate #include <sys/fm/protocol.h>
1267c478bd9Sstevel@tonic-gate #include <uuid/uuid.h>
1277c478bd9Sstevel@tonic-gate #include <alloca.h>
1287c478bd9Sstevel@tonic-gate 
1297c478bd9Sstevel@tonic-gate #include <fmd_alloc.h>
1307c478bd9Sstevel@tonic-gate #include <fmd_module.h>
1317c478bd9Sstevel@tonic-gate #include <fmd_error.h>
1327c478bd9Sstevel@tonic-gate #include <fmd_conf.h>
1337c478bd9Sstevel@tonic-gate #include <fmd_case.h>
1347c478bd9Sstevel@tonic-gate #include <fmd_string.h>
1357c478bd9Sstevel@tonic-gate #include <fmd_subr.h>
1367c478bd9Sstevel@tonic-gate #include <fmd_protocol.h>
1377c478bd9Sstevel@tonic-gate #include <fmd_event.h>
1387c478bd9Sstevel@tonic-gate #include <fmd_eventq.h>
1397c478bd9Sstevel@tonic-gate #include <fmd_dispq.h>
1407c478bd9Sstevel@tonic-gate #include <fmd_buf.h>
1417c478bd9Sstevel@tonic-gate #include <fmd_log.h>
1427c478bd9Sstevel@tonic-gate #include <fmd_asru.h>
1430b9e3e76Smws #include <fmd_fmri.h>
144d9638e54Smws #include <fmd_xprt.h>
1457c478bd9Sstevel@tonic-gate 
1467c478bd9Sstevel@tonic-gate #include <fmd.h>
1477c478bd9Sstevel@tonic-gate 
1487c478bd9Sstevel@tonic-gate static const char *const _fmd_case_snames[] = {
1497c478bd9Sstevel@tonic-gate 	"UNSOLVED",	/* FMD_CASE_UNSOLVED */
1507c478bd9Sstevel@tonic-gate 	"SOLVED",	/* FMD_CASE_SOLVED */
151d9638e54Smws 	"CLOSE_WAIT",	/* FMD_CASE_CLOSE_WAIT */
1527c478bd9Sstevel@tonic-gate 	"CLOSED",	/* FMD_CASE_CLOSED */
15325c6ff4bSstephh 	"REPAIRED",	/* FMD_CASE_REPAIRED */
15425c6ff4bSstephh 	"RESOLVED"	/* FMD_CASE_RESOLVED */
1557c478bd9Sstevel@tonic-gate };
1567c478bd9Sstevel@tonic-gate 
15797c04605Scy static fmd_case_impl_t *fmd_case_tryhold(fmd_case_impl_t *);
15897c04605Scy 
1597c478bd9Sstevel@tonic-gate fmd_case_hash_t *
1607c478bd9Sstevel@tonic-gate fmd_case_hash_create(void)
1617c478bd9Sstevel@tonic-gate {
1627c478bd9Sstevel@tonic-gate 	fmd_case_hash_t *chp = fmd_alloc(sizeof (fmd_case_hash_t), FMD_SLEEP);
1637c478bd9Sstevel@tonic-gate 
1647c478bd9Sstevel@tonic-gate 	(void) pthread_rwlock_init(&chp->ch_lock, NULL);
1657c478bd9Sstevel@tonic-gate 	chp->ch_hashlen = fmd.d_str_buckets;
1667c478bd9Sstevel@tonic-gate 	chp->ch_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen, FMD_SLEEP);
167567cc2e6Sstephh 	chp->ch_code_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen,
168567cc2e6Sstephh 	    FMD_SLEEP);
169d9638e54Smws 	chp->ch_count = 0;
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate 	return (chp);
1727c478bd9Sstevel@tonic-gate }
1737c478bd9Sstevel@tonic-gate 
1747c478bd9Sstevel@tonic-gate /*
1757c478bd9Sstevel@tonic-gate  * Destroy the case hash.  Unlike most of our hash tables, no active references
176d9638e54Smws  * are kept by the case hash itself; all references come from other subsystems.
1777c478bd9Sstevel@tonic-gate  * The hash must be destroyed after all modules are unloaded; if anything was
1787c478bd9Sstevel@tonic-gate  * present in the hash it would be by definition a reference count leak.
1797c478bd9Sstevel@tonic-gate  */
1807c478bd9Sstevel@tonic-gate void
1817c478bd9Sstevel@tonic-gate fmd_case_hash_destroy(fmd_case_hash_t *chp)
1827c478bd9Sstevel@tonic-gate {
1837c478bd9Sstevel@tonic-gate 	fmd_free(chp->ch_hash, sizeof (void *) * chp->ch_hashlen);
184567cc2e6Sstephh 	fmd_free(chp->ch_code_hash, sizeof (void *) * chp->ch_hashlen);
1857c478bd9Sstevel@tonic-gate 	fmd_free(chp, sizeof (fmd_case_hash_t));
1867c478bd9Sstevel@tonic-gate }
1877c478bd9Sstevel@tonic-gate 
188d9638e54Smws /*
189d9638e54Smws  * Take a snapshot of the case hash by placing an additional hold on each
190d9638e54Smws  * member in an auxiliary array, and then call 'func' for each case.
191d9638e54Smws  */
192d9638e54Smws void
193d9638e54Smws fmd_case_hash_apply(fmd_case_hash_t *chp,
194d9638e54Smws     void (*func)(fmd_case_t *, void *), void *arg)
195d9638e54Smws {
196d9638e54Smws 	fmd_case_impl_t *cp, **cps, **cpp;
197d9638e54Smws 	uint_t cpc, i;
198d9638e54Smws 
199d9638e54Smws 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
200d9638e54Smws 
201d9638e54Smws 	cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
202d9638e54Smws 	cpc = chp->ch_count;
203d9638e54Smws 
204d9638e54Smws 	for (i = 0; i < chp->ch_hashlen; i++) {
205c297654fSCheng Sean Ye 		for (cp = chp->ch_hash[i]; cp != NULL; cp = cp->ci_next)
206c297654fSCheng Sean Ye 			*cpp++ = fmd_case_tryhold(cp);
207d9638e54Smws 	}
208d9638e54Smws 
209d9638e54Smws 	ASSERT(cpp == cps + cpc);
210d9638e54Smws 	(void) pthread_rwlock_unlock(&chp->ch_lock);
211d9638e54Smws 
212d9638e54Smws 	for (i = 0; i < cpc; i++) {
213c297654fSCheng Sean Ye 		if (cps[i] != NULL) {
214c297654fSCheng Sean Ye 			func((fmd_case_t *)cps[i], arg);
215c297654fSCheng Sean Ye 			fmd_case_rele((fmd_case_t *)cps[i]);
216c297654fSCheng Sean Ye 		}
217d9638e54Smws 	}
218d9638e54Smws 
219d9638e54Smws 	fmd_free(cps, cpc * sizeof (fmd_case_t *));
220d9638e54Smws }
221d9638e54Smws 
222567cc2e6Sstephh static void
223567cc2e6Sstephh fmd_case_code_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
224567cc2e6Sstephh {
225567cc2e6Sstephh 	uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
226567cc2e6Sstephh 
227567cc2e6Sstephh 	cip->ci_code_next = chp->ch_code_hash[h];
228567cc2e6Sstephh 	chp->ch_code_hash[h] = cip;
229567cc2e6Sstephh }
230567cc2e6Sstephh 
231567cc2e6Sstephh static void
232567cc2e6Sstephh fmd_case_code_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
233567cc2e6Sstephh {
234567cc2e6Sstephh 	fmd_case_impl_t **pp, *cp;
235567cc2e6Sstephh 
236567cc2e6Sstephh 	if (cip->ci_code) {
237567cc2e6Sstephh 		uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
238567cc2e6Sstephh 
239567cc2e6Sstephh 		pp = &chp->ch_code_hash[h];
240567cc2e6Sstephh 		for (cp = *pp; cp != NULL; cp = cp->ci_code_next) {
241567cc2e6Sstephh 			if (cp != cip)
242567cc2e6Sstephh 				pp = &cp->ci_code_next;
243567cc2e6Sstephh 			else
244567cc2e6Sstephh 				break;
245567cc2e6Sstephh 		}
246567cc2e6Sstephh 		if (cp != NULL) {
247567cc2e6Sstephh 			*pp = cp->ci_code_next;
248567cc2e6Sstephh 			cp->ci_code_next = NULL;
249567cc2e6Sstephh 		}
250567cc2e6Sstephh 	}
251567cc2e6Sstephh }
252567cc2e6Sstephh 
253d9638e54Smws /*
254d9638e54Smws  * Look up the diagcode for this case and cache it in ci_code.  If no suspects
255d9638e54Smws  * were defined for this case or if the lookup fails, the event dictionary or
256d9638e54Smws  * module code is broken, and we set the event code to a precomputed default.
257d9638e54Smws  */
258d9638e54Smws static const char *
259d9638e54Smws fmd_case_mkcode(fmd_case_t *cp)
2607c478bd9Sstevel@tonic-gate {
2617c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2627c478bd9Sstevel@tonic-gate 	fmd_case_susp_t *cis;
263567cc2e6Sstephh 	fmd_case_hash_t *chp = fmd.d_cases;
2647c478bd9Sstevel@tonic-gate 
265d9638e54Smws 	char **keys, **keyp;
2667c478bd9Sstevel@tonic-gate 	const char *s;
2677c478bd9Sstevel@tonic-gate 
268d9638e54Smws 	ASSERT(MUTEX_HELD(&cip->ci_lock));
269d9638e54Smws 	ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
270d9638e54Smws 
271567cc2e6Sstephh 	/*
272567cc2e6Sstephh 	 * delete any existing entry from code hash if it is on it
273567cc2e6Sstephh 	 */
274567cc2e6Sstephh 	fmd_case_code_hash_delete(chp, cip);
275567cc2e6Sstephh 
276d9638e54Smws 	fmd_free(cip->ci_code, cip->ci_codelen);
277d9638e54Smws 	cip->ci_codelen = cip->ci_mod->mod_codelen;
278d9638e54Smws 	cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
279d9638e54Smws 	keys = keyp = alloca(sizeof (char *) * (cip->ci_nsuspects + 1));
280d9638e54Smws 
281d9638e54Smws 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
282d9638e54Smws 		if (nvlist_lookup_string(cis->cis_nvl, FM_CLASS, keyp) == 0)
283d9638e54Smws 			keyp++;
284d9638e54Smws 	}
285d9638e54Smws 
286d9638e54Smws 	*keyp = NULL; /* mark end of keys[] array for libdiagcode */
287d9638e54Smws 
288d9638e54Smws 	if (cip->ci_nsuspects == 0 || fmd_module_dc_key2code(
289d9638e54Smws 	    cip->ci_mod, keys, cip->ci_code, cip->ci_codelen) != 0) {
290d9638e54Smws 		(void) fmd_conf_getprop(fmd.d_conf, "nodiagcode", &s);
291d9638e54Smws 		fmd_free(cip->ci_code, cip->ci_codelen);
292d9638e54Smws 		cip->ci_codelen = strlen(s) + 1;
293d9638e54Smws 		cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
294d9638e54Smws 		(void) strcpy(cip->ci_code, s);
295d9638e54Smws 	}
296d9638e54Smws 
297567cc2e6Sstephh 	/*
298567cc2e6Sstephh 	 * add into hash of solved cases
299567cc2e6Sstephh 	 */
300567cc2e6Sstephh 	fmd_case_code_hash_insert(chp, cip);
301567cc2e6Sstephh 
302d9638e54Smws 	return (cip->ci_code);
303d9638e54Smws }
304d9638e54Smws 
305567cc2e6Sstephh typedef struct {
306567cc2e6Sstephh 	int	*fcl_countp;
307c7d6cfd6SStephen Hanson 	int	fcl_maxcount;
308567cc2e6Sstephh 	uint8_t *fcl_ba;
309567cc2e6Sstephh 	nvlist_t **fcl_nva;
310567cc2e6Sstephh 	int	*fcl_msgp;
311567cc2e6Sstephh } fmd_case_lst_t;
312567cc2e6Sstephh 
313567cc2e6Sstephh static void
314567cc2e6Sstephh fmd_case_set_lst(fmd_asru_link_t *alp, void *arg)
315d9638e54Smws {
316567cc2e6Sstephh 	fmd_case_lst_t *entryp = (fmd_case_lst_t *)arg;
317567cc2e6Sstephh 	boolean_t b;
318567cc2e6Sstephh 	int state;
319567cc2e6Sstephh 
320c7d6cfd6SStephen Hanson 	if (*entryp->fcl_countp >= entryp->fcl_maxcount)
321c7d6cfd6SStephen Hanson 		return;
322567cc2e6Sstephh 	if (nvlist_lookup_boolean_value(alp->al_event, FM_SUSPECT_MESSAGE,
323567cc2e6Sstephh 	    &b) == 0 && b == B_FALSE)
324567cc2e6Sstephh 		*entryp->fcl_msgp = B_FALSE;
325567cc2e6Sstephh 	entryp->fcl_ba[*entryp->fcl_countp] = 0;
326567cc2e6Sstephh 	state = fmd_asru_al_getstate(alp);
32725c6ff4bSstephh 	if (state & FMD_ASRU_DEGRADED)
32825c6ff4bSstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_DEGRADED;
329567cc2e6Sstephh 	if (state & FMD_ASRU_UNUSABLE)
330567cc2e6Sstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_UNUSABLE;
331567cc2e6Sstephh 	if (state & FMD_ASRU_FAULTY)
332567cc2e6Sstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_FAULTY;
333567cc2e6Sstephh 	if (!(state & FMD_ASRU_PRESENT))
334567cc2e6Sstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_NOT_PRESENT;
33525c6ff4bSstephh 	if (alp->al_reason == FMD_ASRU_REPAIRED)
33625c6ff4bSstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPAIRED;
33725c6ff4bSstephh 	else if (alp->al_reason == FMD_ASRU_REPLACED)
33825c6ff4bSstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPLACED;
33925c6ff4bSstephh 	else if (alp->al_reason == FMD_ASRU_ACQUITTED)
34025c6ff4bSstephh 		entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_ACQUITTED;
341567cc2e6Sstephh 	entryp->fcl_nva[*entryp->fcl_countp] = alp->al_event;
342567cc2e6Sstephh 	(*entryp->fcl_countp)++;
343567cc2e6Sstephh }
344d9638e54Smws 
345567cc2e6Sstephh static void
346567cc2e6Sstephh fmd_case_faulty(fmd_asru_link_t *alp, void *arg)
347567cc2e6Sstephh {
348567cc2e6Sstephh 	int *faultyp = (int *)arg;
349567cc2e6Sstephh 
350567cc2e6Sstephh 	*faultyp |= (alp->al_flags & FMD_ASRU_FAULTY);
351567cc2e6Sstephh }
352d9638e54Smws 
353567cc2e6Sstephh static void
354567cc2e6Sstephh fmd_case_usable(fmd_asru_link_t *alp, void *arg)
355567cc2e6Sstephh {
356567cc2e6Sstephh 	int *usablep = (int *)arg;
357d9638e54Smws 
358567cc2e6Sstephh 	*usablep |= !(fmd_asru_al_getstate(alp) & FMD_ASRU_UNUSABLE);
359567cc2e6Sstephh }
360567cc2e6Sstephh 
36125c6ff4bSstephh static void
36225c6ff4bSstephh fmd_case_not_faulty(fmd_asru_link_t *alp, void *arg)
36325c6ff4bSstephh {
36425c6ff4bSstephh 	int *not_faultyp = (int *)arg;
36525c6ff4bSstephh 
36625c6ff4bSstephh 	*not_faultyp |= !(alp->al_flags & FMD_ASRU_FAULTY);
36725c6ff4bSstephh }
36825c6ff4bSstephh 
36925c6ff4bSstephh /*
37025c6ff4bSstephh  * Have we got any suspects with an asru that are still unusable and present?
37125c6ff4bSstephh  */
37225c6ff4bSstephh static void
37325c6ff4bSstephh fmd_case_unusable_and_present(fmd_asru_link_t *alp, void *arg)
37425c6ff4bSstephh {
37525c6ff4bSstephh 	int *rvalp = (int *)arg;
376cbf75e67SStephen Hanson 	int state;
37725c6ff4bSstephh 	nvlist_t *asru;
37825c6ff4bSstephh 
379cbf75e67SStephen Hanson 	/*
380cbf75e67SStephen Hanson 	 * if this a proxy case and this suspect doesn't have an local asru
381cbf75e67SStephen Hanson 	 * then state is unknown so we must assume it may still be unusable.
382cbf75e67SStephen Hanson 	 */
383cbf75e67SStephen Hanson 	if ((alp->al_flags & FMD_ASRU_PROXY) &&
384cbf75e67SStephen Hanson 	    !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) {
385cbf75e67SStephen Hanson 		*rvalp |= B_TRUE;
386cbf75e67SStephen Hanson 		return;
387cbf75e67SStephen Hanson 	}
388cbf75e67SStephen Hanson 
389cbf75e67SStephen Hanson 	state = fmd_asru_al_getstate(alp);
39025c6ff4bSstephh 	if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) != 0)
39125c6ff4bSstephh 		return;
39225c6ff4bSstephh 	*rvalp |= ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_PRESENT));
39325c6ff4bSstephh }
39425c6ff4bSstephh 
395567cc2e6Sstephh nvlist_t *
396567cc2e6Sstephh fmd_case_mkevent(fmd_case_t *cp, const char *class)
397567cc2e6Sstephh {
398567cc2e6Sstephh 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
399567cc2e6Sstephh 	nvlist_t **nva, *nvl;
400567cc2e6Sstephh 	uint8_t *ba;
4017c478bd9Sstevel@tonic-gate 	int msg = B_TRUE;
402627351e3Scy 	const char *code;
403567cc2e6Sstephh 	fmd_case_lst_t fcl;
404567cc2e6Sstephh 	int count = 0;
4057c478bd9Sstevel@tonic-gate 
406d9638e54Smws 	(void) pthread_mutex_lock(&cip->ci_lock);
4077c478bd9Sstevel@tonic-gate 	ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
4087c478bd9Sstevel@tonic-gate 
409567cc2e6Sstephh 	nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
410567cc2e6Sstephh 	ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
4117c478bd9Sstevel@tonic-gate 
4127c478bd9Sstevel@tonic-gate 	/*
4137c478bd9Sstevel@tonic-gate 	 * For each suspect associated with the case, store its fault event
414d9638e54Smws 	 * nvlist in 'nva'.  We also look to see if any of the suspect faults
415d9638e54Smws 	 * have asked not to be messaged.  If any of them have made such a
416d9638e54Smws 	 * request, propagate that attribute to the composite list.* event.
417d9638e54Smws 	 * Finally, store each suspect's faulty status into the bitmap 'ba'.
4187c478bd9Sstevel@tonic-gate 	 */
419567cc2e6Sstephh 	fcl.fcl_countp = &count;
420c7d6cfd6SStephen Hanson 	fcl.fcl_maxcount = cip->ci_nsuspects;
421567cc2e6Sstephh 	fcl.fcl_msgp = &msg;
422567cc2e6Sstephh 	fcl.fcl_ba = ba;
423567cc2e6Sstephh 	fcl.fcl_nva = nva;
424567cc2e6Sstephh 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl);
4257c478bd9Sstevel@tonic-gate 
426d9638e54Smws 	if (cip->ci_code == NULL)
427d9638e54Smws 		(void) fmd_case_mkcode(cp);
428627351e3Scy 	/*
42925c6ff4bSstephh 	 * For repair and updated event, we lookup diagcode from dict using key
43025c6ff4bSstephh 	 * "list.repaired" or "list.updated" or "list.resolved".
431627351e3Scy 	 */
432627351e3Scy 	if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
433627351e3Scy 		(void) fmd_conf_getprop(fmd.d_conf, "repaircode", &code);
43425c6ff4bSstephh 	else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0)
43525c6ff4bSstephh 		(void) fmd_conf_getprop(fmd.d_conf, "resolvecode", &code);
43625c6ff4bSstephh 	else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
43725c6ff4bSstephh 		(void) fmd_conf_getprop(fmd.d_conf, "updatecode", &code);
438627351e3Scy 	else
439627351e3Scy 		code = cip->ci_code;
440d9638e54Smws 
44144743693Sstephh 	if (msg == B_FALSE)
44244743693Sstephh 		cip->ci_flags |= FMD_CF_INVISIBLE;
44344743693Sstephh 
444cbf75e67SStephen Hanson 	/*
445cbf75e67SStephen Hanson 	 * Use the ci_diag_de if one has been saved (eg for an injected fault).
446cbf75e67SStephen Hanson 	 * Otherwise use the authority for the current module.
447cbf75e67SStephen Hanson 	 */
448cbf75e67SStephen Hanson 	nvl = fmd_protocol_list(class, cip->ci_diag_de == NULL ?
449cbf75e67SStephen Hanson 	    cip->ci_mod->mod_fmri : cip->ci_diag_de, cip->ci_uuid, code, count,
450540db9a9SStephen Hanson 	    nva, ba, msg, &cip->ci_tv, cip->ci_injected);
451d9638e54Smws 
452d9638e54Smws 	(void) pthread_mutex_unlock(&cip->ci_lock);
453d9638e54Smws 	return (nvl);
4547c478bd9Sstevel@tonic-gate }
4557c478bd9Sstevel@tonic-gate 
4565750ef5cSStephen Hanson static int fmd_case_match_on_faulty_overlap = 1;
4575750ef5cSStephen Hanson static int fmd_case_match_on_acquit_overlap = 1;
4585750ef5cSStephen Hanson static int fmd_case_auto_acquit_isolated = 1;
4595750ef5cSStephen Hanson static int fmd_case_auto_acquit_non_acquitted = 1;
4605750ef5cSStephen Hanson static int fmd_case_too_recent = 10; /* time in seconds */
4615750ef5cSStephen Hanson 
462567cc2e6Sstephh static boolean_t
463567cc2e6Sstephh fmd_case_compare_elem(nvlist_t *nvl, nvlist_t *xnvl, const char *elem)
464567cc2e6Sstephh {
465567cc2e6Sstephh 	nvlist_t *new_rsrc;
466567cc2e6Sstephh 	nvlist_t *rsrc;
467567cc2e6Sstephh 	char *new_name = NULL;
468567cc2e6Sstephh 	char *name = NULL;
469567cc2e6Sstephh 	ssize_t new_namelen;
470567cc2e6Sstephh 	ssize_t namelen;
471567cc2e6Sstephh 	int fmri_present = 1;
472567cc2e6Sstephh 	int new_fmri_present = 1;
473567cc2e6Sstephh 	int match = B_FALSE;
474940d71d2Seschrock 	fmd_topo_t *ftp = fmd_topo_hold();
475567cc2e6Sstephh 
476567cc2e6Sstephh 	if (nvlist_lookup_nvlist(xnvl, elem, &rsrc) != 0)
477567cc2e6Sstephh 		fmri_present = 0;
478567cc2e6Sstephh 	else {
479567cc2e6Sstephh 		if ((namelen = fmd_fmri_nvl2str(rsrc, NULL, 0)) == -1)
480567cc2e6Sstephh 			goto done;
481567cc2e6Sstephh 		name = fmd_alloc(namelen + 1, FMD_SLEEP);
482567cc2e6Sstephh 		if (fmd_fmri_nvl2str(rsrc, name, namelen + 1) == -1)
483567cc2e6Sstephh 			goto done;
484567cc2e6Sstephh 	}
485567cc2e6Sstephh 	if (nvlist_lookup_nvlist(nvl, elem, &new_rsrc) != 0)
486567cc2e6Sstephh 		new_fmri_present = 0;
487567cc2e6Sstephh 	else {
488567cc2e6Sstephh 		if ((new_namelen = fmd_fmri_nvl2str(new_rsrc, NULL, 0)) == -1)
489567cc2e6Sstephh 			goto done;
490567cc2e6Sstephh 		new_name = fmd_alloc(new_namelen + 1, FMD_SLEEP);
491567cc2e6Sstephh 		if (fmd_fmri_nvl2str(new_rsrc, new_name, new_namelen + 1) == -1)
492567cc2e6Sstephh 			goto done;
493567cc2e6Sstephh 	}
494567cc2e6Sstephh 	match = (fmri_present == new_fmri_present &&
495940d71d2Seschrock 	    (fmri_present == 0 ||
496940d71d2Seschrock 	    topo_fmri_strcmp(ftp->ft_hdl, name, new_name)));
497567cc2e6Sstephh done:
498567cc2e6Sstephh 	if (name != NULL)
499567cc2e6Sstephh 		fmd_free(name, namelen + 1);
500567cc2e6Sstephh 	if (new_name != NULL)
501567cc2e6Sstephh 		fmd_free(new_name, new_namelen + 1);
502940d71d2Seschrock 	fmd_topo_rele(ftp);
503567cc2e6Sstephh 	return (match);
504567cc2e6Sstephh }
505567cc2e6Sstephh 
506567cc2e6Sstephh static int
5075750ef5cSStephen Hanson fmd_case_match_suspect(nvlist_t *nvl1, nvlist_t *nvl2)
508567cc2e6Sstephh {
509567cc2e6Sstephh 	char *class, *new_class;
510567cc2e6Sstephh 
5115750ef5cSStephen Hanson 	if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_ASRU))
512567cc2e6Sstephh 		return (0);
5135750ef5cSStephen Hanson 	if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_RESOURCE))
514567cc2e6Sstephh 		return (0);
5155750ef5cSStephen Hanson 	if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_FRU))
516567cc2e6Sstephh 		return (0);
5175750ef5cSStephen Hanson 	(void) nvlist_lookup_string(nvl2, FM_CLASS, &class);
5185750ef5cSStephen Hanson 	(void) nvlist_lookup_string(nvl1, FM_CLASS, &new_class);
519567cc2e6Sstephh 	return (strcmp(class, new_class) == 0);
520567cc2e6Sstephh }
521567cc2e6Sstephh 
5225750ef5cSStephen Hanson typedef struct {
5235750ef5cSStephen Hanson 	int	*fcms_countp;
5245750ef5cSStephen Hanson 	int	fcms_maxcount;
5255750ef5cSStephen Hanson 	fmd_case_impl_t *fcms_cip;
5265750ef5cSStephen Hanson 	uint8_t *fcms_new_susp_state;
5275750ef5cSStephen Hanson 	uint8_t *fcms_old_susp_state;
5285750ef5cSStephen Hanson 	uint8_t *fcms_old_match_state;
5295750ef5cSStephen Hanson } fcms_t;
5305750ef5cSStephen Hanson #define	SUSPECT_STATE_FAULTY				0x1
5315750ef5cSStephen Hanson #define	SUSPECT_STATE_ISOLATED				0x2
5325750ef5cSStephen Hanson #define	SUSPECT_STATE_REMOVED				0x4
5335750ef5cSStephen Hanson #define	SUSPECT_STATE_ACQUITED				0x8
5345750ef5cSStephen Hanson #define	SUSPECT_STATE_REPAIRED				0x10
5355750ef5cSStephen Hanson #define	SUSPECT_STATE_REPLACED				0x20
5365750ef5cSStephen Hanson #define	SUSPECT_STATE_NO_MATCH				0x1
5375750ef5cSStephen Hanson 
538567cc2e6Sstephh /*
5395750ef5cSStephen Hanson  * This is called for each suspect in the old case. Compare it against each
5405750ef5cSStephen Hanson  * suspect in the new case, setting fcms_old_susp_state and fcms_new_susp_state
5415750ef5cSStephen Hanson  * as appropriate. fcms_new_susp_state will left as 0 if the suspect is not
5425750ef5cSStephen Hanson  * found in the old case.
543567cc2e6Sstephh  */
5445750ef5cSStephen Hanson static void
5455750ef5cSStephen Hanson fmd_case_match_suspects(fmd_asru_link_t *alp, void *arg)
546567cc2e6Sstephh {
5475750ef5cSStephen Hanson 	fcms_t *fcmsp = (fcms_t *)arg;
5485750ef5cSStephen Hanson 	fmd_case_impl_t *cip = fcmsp->fcms_cip;
5495750ef5cSStephen Hanson 	fmd_case_susp_t *cis;
5505750ef5cSStephen Hanson 	int i = 0;
5515750ef5cSStephen Hanson 	int state = fmd_asru_al_getstate(alp);
552567cc2e6Sstephh 
5535750ef5cSStephen Hanson 	if (*fcmsp->fcms_countp >= fcmsp->fcms_maxcount)
5545750ef5cSStephen Hanson 		return;
5555750ef5cSStephen Hanson 
5565750ef5cSStephen Hanson 	if (!(state & FMD_ASRU_PRESENT) || (!(state & FMD_ASRU_FAULTY) &&
5575750ef5cSStephen Hanson 	    alp->al_reason == FMD_ASRU_REMOVED))
5585750ef5cSStephen Hanson 		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
5595750ef5cSStephen Hanson 		    SUSPECT_STATE_REMOVED;
5605750ef5cSStephen Hanson 	else if ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_FAULTY))
5615750ef5cSStephen Hanson 		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
5625750ef5cSStephen Hanson 		    SUSPECT_STATE_ISOLATED;
5635750ef5cSStephen Hanson 	else if (state & FMD_ASRU_FAULTY)
5645750ef5cSStephen Hanson 		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
5655750ef5cSStephen Hanson 		    SUSPECT_STATE_FAULTY;
5665750ef5cSStephen Hanson 	else if (alp->al_reason == FMD_ASRU_REPLACED)
5675750ef5cSStephen Hanson 		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
5685750ef5cSStephen Hanson 		    SUSPECT_STATE_REPLACED;
5695750ef5cSStephen Hanson 	else if (alp->al_reason == FMD_ASRU_ACQUITTED)
5705750ef5cSStephen Hanson 		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
5715750ef5cSStephen Hanson 		    SUSPECT_STATE_ACQUITED;
5725750ef5cSStephen Hanson 	else
5735750ef5cSStephen Hanson 		fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
5745750ef5cSStephen Hanson 		    SUSPECT_STATE_REPAIRED;
5755750ef5cSStephen Hanson 
5765750ef5cSStephen Hanson 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next, i++)
5775750ef5cSStephen Hanson 		if (fmd_case_match_suspect(cis->cis_nvl, alp->al_event) == 1)
5785750ef5cSStephen Hanson 			break;
5795750ef5cSStephen Hanson 	if (cis != NULL)
5805750ef5cSStephen Hanson 		fcmsp->fcms_new_susp_state[i] =
5815750ef5cSStephen Hanson 		    fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp];
5825750ef5cSStephen Hanson 	else
5835750ef5cSStephen Hanson 		fcmsp->fcms_old_match_state[*fcmsp->fcms_countp] |=
5845750ef5cSStephen Hanson 		    SUSPECT_STATE_NO_MATCH;
5855750ef5cSStephen Hanson 	(*fcmsp->fcms_countp)++;
5865750ef5cSStephen Hanson }
5875750ef5cSStephen Hanson 
5885750ef5cSStephen Hanson typedef struct {
5895750ef5cSStephen Hanson 	int	*fca_do_update;
5905750ef5cSStephen Hanson 	fmd_case_impl_t *fca_cip;
5915750ef5cSStephen Hanson } fca_t;
5925750ef5cSStephen Hanson 
5935750ef5cSStephen Hanson /*
5945750ef5cSStephen Hanson  * Re-fault all acquitted suspects that are still present in the new list.
5955750ef5cSStephen Hanson  */
5965750ef5cSStephen Hanson static void
5975750ef5cSStephen Hanson fmd_case_fault_acquitted_matching(fmd_asru_link_t *alp, void *arg)
5985750ef5cSStephen Hanson {
5995750ef5cSStephen Hanson 	fca_t *fcap = (fca_t *)arg;
6005750ef5cSStephen Hanson 	fmd_case_impl_t *cip = fcap->fca_cip;
6015750ef5cSStephen Hanson 	fmd_case_susp_t *cis;
6025750ef5cSStephen Hanson 	int state = fmd_asru_al_getstate(alp);
6035750ef5cSStephen Hanson 
6045750ef5cSStephen Hanson 	if (!(state & FMD_ASRU_FAULTY) &&
6055750ef5cSStephen Hanson 	    alp->al_reason == FMD_ASRU_ACQUITTED) {
6065750ef5cSStephen Hanson 		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
6075750ef5cSStephen Hanson 			if (fmd_case_match_suspect(cis->cis_nvl,
6085750ef5cSStephen Hanson 			    alp->al_event) == 1)
6095750ef5cSStephen Hanson 				break;
6105750ef5cSStephen Hanson 		if (cis != NULL) {
6115750ef5cSStephen Hanson 			(void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
6125750ef5cSStephen Hanson 			*fcap->fca_do_update = 1;
6135750ef5cSStephen Hanson 		}
6145750ef5cSStephen Hanson 	}
6155750ef5cSStephen Hanson }
6165750ef5cSStephen Hanson 
6175750ef5cSStephen Hanson /*
6185750ef5cSStephen Hanson  * Re-fault all suspects that are still present in the new list.
6195750ef5cSStephen Hanson  */
6205750ef5cSStephen Hanson static void
6215750ef5cSStephen Hanson fmd_case_fault_all_matching(fmd_asru_link_t *alp, void *arg)
6225750ef5cSStephen Hanson {
6235750ef5cSStephen Hanson 	fca_t *fcap = (fca_t *)arg;
6245750ef5cSStephen Hanson 	fmd_case_impl_t *cip = fcap->fca_cip;
6255750ef5cSStephen Hanson 	fmd_case_susp_t *cis;
6265750ef5cSStephen Hanson 	int state = fmd_asru_al_getstate(alp);
6275750ef5cSStephen Hanson 
6285750ef5cSStephen Hanson 	if (!(state & FMD_ASRU_FAULTY)) {
6295750ef5cSStephen Hanson 		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
6305750ef5cSStephen Hanson 			if (fmd_case_match_suspect(cis->cis_nvl,
6315750ef5cSStephen Hanson 			    alp->al_event) == 1)
6325750ef5cSStephen Hanson 				break;
6335750ef5cSStephen Hanson 		if (cis != NULL) {
6345750ef5cSStephen Hanson 			(void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
6355750ef5cSStephen Hanson 			*fcap->fca_do_update = 1;
6365750ef5cSStephen Hanson 		}
6375750ef5cSStephen Hanson 	}
6385750ef5cSStephen Hanson }
6395750ef5cSStephen Hanson 
6405750ef5cSStephen Hanson /*
6415750ef5cSStephen Hanson  * Acquit all suspects that are no longer present in the new list.
6425750ef5cSStephen Hanson  */
6435750ef5cSStephen Hanson static void
6445750ef5cSStephen Hanson fmd_case_acquit_no_match(fmd_asru_link_t *alp, void *arg)
6455750ef5cSStephen Hanson {
6465750ef5cSStephen Hanson 	fca_t *fcap = (fca_t *)arg;
6475750ef5cSStephen Hanson 	fmd_case_impl_t *cip = fcap->fca_cip;
6485750ef5cSStephen Hanson 	fmd_case_susp_t *cis;
6495750ef5cSStephen Hanson 	int state = fmd_asru_al_getstate(alp);
6505750ef5cSStephen Hanson 
6515750ef5cSStephen Hanson 	if (state & FMD_ASRU_FAULTY) {
6525750ef5cSStephen Hanson 		for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
6535750ef5cSStephen Hanson 			if (fmd_case_match_suspect(cis->cis_nvl,
6545750ef5cSStephen Hanson 			    alp->al_event) == 1)
6555750ef5cSStephen Hanson 				break;
6565750ef5cSStephen Hanson 		if (cis == NULL) {
6575750ef5cSStephen Hanson 			(void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
6585750ef5cSStephen Hanson 			    FMD_ASRU_ACQUITTED);
6595750ef5cSStephen Hanson 			*fcap->fca_do_update = 1;
6605750ef5cSStephen Hanson 		}
6615750ef5cSStephen Hanson 	}
6625750ef5cSStephen Hanson }
6635750ef5cSStephen Hanson 
6645750ef5cSStephen Hanson /*
6655750ef5cSStephen Hanson  * Acquit all isolated suspects.
6665750ef5cSStephen Hanson  */
6675750ef5cSStephen Hanson static void
6685750ef5cSStephen Hanson fmd_case_acquit_isolated(fmd_asru_link_t *alp, void *arg)
6695750ef5cSStephen Hanson {
6705750ef5cSStephen Hanson 	int *do_update = (int *)arg;
6715750ef5cSStephen Hanson 	int state = fmd_asru_al_getstate(alp);
6725750ef5cSStephen Hanson 
6735750ef5cSStephen Hanson 	if ((state & FMD_ASRU_PRESENT) && (state & FMD_ASRU_UNUSABLE) &&
6745750ef5cSStephen Hanson 	    (state & FMD_ASRU_FAULTY)) {
6755750ef5cSStephen Hanson 		(void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
6765750ef5cSStephen Hanson 		    FMD_ASRU_ACQUITTED);
6775750ef5cSStephen Hanson 		*do_update = 1;
6785750ef5cSStephen Hanson 	}
6795750ef5cSStephen Hanson }
6805750ef5cSStephen Hanson 
6815750ef5cSStephen Hanson /*
6825750ef5cSStephen Hanson  * Acquit suspect which matches specified nvlist
6835750ef5cSStephen Hanson  */
6845750ef5cSStephen Hanson static void
6855750ef5cSStephen Hanson fmd_case_acquit_suspect(fmd_asru_link_t *alp, void *arg)
6865750ef5cSStephen Hanson {
6875750ef5cSStephen Hanson 	nvlist_t *nvl = (nvlist_t *)arg;
6885750ef5cSStephen Hanson 	int state = fmd_asru_al_getstate(alp);
6895750ef5cSStephen Hanson 
6905750ef5cSStephen Hanson 	if ((state & FMD_ASRU_FAULTY) &&
6915750ef5cSStephen Hanson 	    fmd_case_match_suspect(nvl, alp->al_event) == 1)
6925750ef5cSStephen Hanson 		(void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
6935750ef5cSStephen Hanson 		    FMD_ASRU_ACQUITTED);
6945750ef5cSStephen Hanson }
6955750ef5cSStephen Hanson 
6965750ef5cSStephen Hanson typedef struct {
6975750ef5cSStephen Hanson 	fmd_case_impl_t *fccd_cip;
6985750ef5cSStephen Hanson 	uint8_t *fccd_new_susp_state;
6995750ef5cSStephen Hanson 	uint8_t *fccd_new_match_state;
7005750ef5cSStephen Hanson 	int *fccd_discard_new;
7015750ef5cSStephen Hanson 	int *fccd_adjust_new;
7025750ef5cSStephen Hanson } fccd_t;
7035750ef5cSStephen Hanson 
7045750ef5cSStephen Hanson /*
7055750ef5cSStephen Hanson  * see if a matching suspect list already exists in the cache
7065750ef5cSStephen Hanson  */
7075750ef5cSStephen Hanson static void
7085750ef5cSStephen Hanson fmd_case_check_for_dups(fmd_case_t *old_cp, void *arg)
7095750ef5cSStephen Hanson {
7105750ef5cSStephen Hanson 	fccd_t *fccdp = (fccd_t *)arg;
7115750ef5cSStephen Hanson 	fmd_case_impl_t *new_cip = fccdp->fccd_cip;
7125750ef5cSStephen Hanson 	fmd_case_impl_t *old_cip = (fmd_case_impl_t *)old_cp;
7135750ef5cSStephen Hanson 	int i, count = 0, do_update = 0, got_isolated_overlap = 0;
7145750ef5cSStephen Hanson 	int got_faulty_overlap = 0;
7155750ef5cSStephen Hanson 	int got_acquit_overlap = 0;
7165750ef5cSStephen Hanson 	boolean_t too_recent;
7175750ef5cSStephen Hanson 	uint64_t most_recent = 0;
7185750ef5cSStephen Hanson 	fcms_t fcms;
7195750ef5cSStephen Hanson 	fca_t fca;
7205750ef5cSStephen Hanson 	uint8_t *new_susp_state;
7215750ef5cSStephen Hanson 	uint8_t *old_susp_state;
7225750ef5cSStephen Hanson 	uint8_t *old_match_state;
7235750ef5cSStephen Hanson 
7245750ef5cSStephen Hanson 	new_susp_state = alloca(new_cip->ci_nsuspects * sizeof (uint8_t));
7255750ef5cSStephen Hanson 	for (i = 0; i < new_cip->ci_nsuspects; i++)
7265750ef5cSStephen Hanson 		new_susp_state[i] = 0;
7275750ef5cSStephen Hanson 	old_susp_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t));
7285750ef5cSStephen Hanson 	for (i = 0; i < old_cip->ci_nsuspects; i++)
7295750ef5cSStephen Hanson 		old_susp_state[i] = 0;
7305750ef5cSStephen Hanson 	old_match_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t));
7315750ef5cSStephen Hanson 	for (i = 0; i < old_cip->ci_nsuspects; i++)
7325750ef5cSStephen Hanson 		old_match_state[i] = 0;
733567cc2e6Sstephh 
734567cc2e6Sstephh 	/*
7355750ef5cSStephen Hanson 	 * Compare with each suspect in the existing case.
736567cc2e6Sstephh 	 */
7375750ef5cSStephen Hanson 	fcms.fcms_countp = &count;
7385750ef5cSStephen Hanson 	fcms.fcms_maxcount = old_cip->ci_nsuspects;
7395750ef5cSStephen Hanson 	fcms.fcms_cip = new_cip;
7405750ef5cSStephen Hanson 	fcms.fcms_new_susp_state = new_susp_state;
7415750ef5cSStephen Hanson 	fcms.fcms_old_susp_state = old_susp_state;
7425750ef5cSStephen Hanson 	fcms.fcms_old_match_state = old_match_state;
7435750ef5cSStephen Hanson 	fmd_asru_hash_apply_by_case(fmd.d_asrus, (fmd_case_t *)old_cip,
7445750ef5cSStephen Hanson 	    fmd_case_match_suspects, &fcms);
7455750ef5cSStephen Hanson 
7465750ef5cSStephen Hanson 	/*
7475750ef5cSStephen Hanson 	 * If we have some faulty, non-isolated suspects that overlap, then most
7485750ef5cSStephen Hanson 	 * likely it is the suspects that overlap in the suspect lists that are
7495750ef5cSStephen Hanson 	 * to blame. So we can consider this to be a match.
7505750ef5cSStephen Hanson 	 */
7515750ef5cSStephen Hanson 	for (i = 0; i < new_cip->ci_nsuspects; i++)
7525750ef5cSStephen Hanson 		if (new_susp_state[i] == SUSPECT_STATE_FAULTY)
7535750ef5cSStephen Hanson 			got_faulty_overlap = 1;
7545750ef5cSStephen Hanson 	if (got_faulty_overlap && fmd_case_match_on_faulty_overlap)
7555750ef5cSStephen Hanson 		goto got_match;
7565750ef5cSStephen Hanson 
7575750ef5cSStephen Hanson 	/*
7585750ef5cSStephen Hanson 	 * If we have no faulty, non-isolated suspects in the old case, but we
7595750ef5cSStephen Hanson 	 * do have some acquitted suspects that overlap, then most likely it is
7605750ef5cSStephen Hanson 	 * the acquitted suspects that overlap in the suspect lists that are
7615750ef5cSStephen Hanson 	 * to blame. So we can consider this to be a match.
7625750ef5cSStephen Hanson 	 */
7635750ef5cSStephen Hanson 	for (i = 0; i < new_cip->ci_nsuspects; i++)
7645750ef5cSStephen Hanson 		if (new_susp_state[i] == SUSPECT_STATE_ACQUITED)
7655750ef5cSStephen Hanson 			got_acquit_overlap = 1;
7665750ef5cSStephen Hanson 	for (i = 0; i < old_cip->ci_nsuspects; i++)
7675750ef5cSStephen Hanson 		if (old_susp_state[i] == SUSPECT_STATE_FAULTY)
7685750ef5cSStephen Hanson 			got_acquit_overlap = 0;
7695750ef5cSStephen Hanson 	if (got_acquit_overlap && fmd_case_match_on_acquit_overlap)
7705750ef5cSStephen Hanson 		goto got_match;
7715750ef5cSStephen Hanson 
7725750ef5cSStephen Hanson 	/*
7735750ef5cSStephen Hanson 	 * Check that all suspects in the new list are present in the old list.
7745750ef5cSStephen Hanson 	 * Return if we find one that isn't.
7755750ef5cSStephen Hanson 	 */
7765750ef5cSStephen Hanson 	for (i = 0; i < new_cip->ci_nsuspects; i++)
7775750ef5cSStephen Hanson 		if (new_susp_state[i] == 0)
7785750ef5cSStephen Hanson 			return;
7795750ef5cSStephen Hanson 
7805750ef5cSStephen Hanson 	/*
7815750ef5cSStephen Hanson 	 * Check that all suspects in the old list are present in the new list
7825750ef5cSStephen Hanson 	 * *or* they are isolated or removed/replaced (which would explain why
7835750ef5cSStephen Hanson 	 * they are not present in the new list). Return if we find one that is
7845750ef5cSStephen Hanson 	 * faulty and unisolated or repaired or acquitted, and that is not
7855750ef5cSStephen Hanson 	 * present in the new case.
7865750ef5cSStephen Hanson 	 */
7875750ef5cSStephen Hanson 	for (i = 0; i < old_cip->ci_nsuspects; i++)
7885750ef5cSStephen Hanson 		if (old_match_state[i] == SUSPECT_STATE_NO_MATCH &&
7895750ef5cSStephen Hanson 		    (old_susp_state[i] == SUSPECT_STATE_FAULTY ||
7905750ef5cSStephen Hanson 		    old_susp_state[i] == SUSPECT_STATE_ACQUITED ||
7915750ef5cSStephen Hanson 		    old_susp_state[i] == SUSPECT_STATE_REPAIRED))
7925750ef5cSStephen Hanson 			return;
7935750ef5cSStephen Hanson 
7945750ef5cSStephen Hanson got_match:
7955750ef5cSStephen Hanson 	/*
7965750ef5cSStephen Hanson 	 * If the old case is already in repaired/resolved state, we can't
7975750ef5cSStephen Hanson 	 * do anything more with it, so keep the new case, but acquit some
7985750ef5cSStephen Hanson 	 * of the suspects if appropriate.
7995750ef5cSStephen Hanson 	 */
8005750ef5cSStephen Hanson 	if (old_cip->ci_state >= FMD_CASE_REPAIRED) {
8015750ef5cSStephen Hanson 		if (fmd_case_auto_acquit_non_acquitted) {
8025750ef5cSStephen Hanson 			*fccdp->fccd_adjust_new = 1;
8035750ef5cSStephen Hanson 			for (i = 0; i < new_cip->ci_nsuspects; i++) {
8045750ef5cSStephen Hanson 				fccdp->fccd_new_susp_state[i] |=
8055750ef5cSStephen Hanson 				    new_susp_state[i];
8065750ef5cSStephen Hanson 				if (new_susp_state[i] == 0)
8075750ef5cSStephen Hanson 					fccdp->fccd_new_susp_state[i] =
8085750ef5cSStephen Hanson 					    SUSPECT_STATE_NO_MATCH;
8095750ef5cSStephen Hanson 			}
81019e1255fScy 		}
8115750ef5cSStephen Hanson 		return;
8125750ef5cSStephen Hanson 	}
813567cc2e6Sstephh 
8145750ef5cSStephen Hanson 	/*
8155750ef5cSStephen Hanson 	 * Otherwise discard the new case and keep the old, again updating the
8165750ef5cSStephen Hanson 	 * state of the suspects as appropriate
8175750ef5cSStephen Hanson 	 */
8185750ef5cSStephen Hanson 	*fccdp->fccd_discard_new = 1;
8195750ef5cSStephen Hanson 	fca.fca_cip = new_cip;
8205750ef5cSStephen Hanson 	fca.fca_do_update = &do_update;
8215750ef5cSStephen Hanson 
8225750ef5cSStephen Hanson 	/*
8235750ef5cSStephen Hanson 	 * See if new case occurred within fmd_case_too_recent seconds of the
8245750ef5cSStephen Hanson 	 * most recent modification to the old case and if so don't do
8255750ef5cSStephen Hanson 	 * auto-acquit. This avoids problems if a flood of ereports come in and
8265750ef5cSStephen Hanson 	 * they don't all get diagnosed before the first case causes some of
8275750ef5cSStephen Hanson 	 * the devices to be isolated making it appear that an isolated device
8285750ef5cSStephen Hanson 	 * was in the suspect list.
8295750ef5cSStephen Hanson 	 */
8305750ef5cSStephen Hanson 	fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
8315750ef5cSStephen Hanson 	    fmd_asru_most_recent, &most_recent);
8325750ef5cSStephen Hanson 	too_recent = (new_cip->ci_tv.tv_sec - most_recent <
8335750ef5cSStephen Hanson 	    fmd_case_too_recent);
8345750ef5cSStephen Hanson 
8355750ef5cSStephen Hanson 	if (got_faulty_overlap) {
836567cc2e6Sstephh 		/*
8375750ef5cSStephen Hanson 		 * Acquit any suspects not present in the new list, plus
8385750ef5cSStephen Hanson 		 * any that are are present but are isolated.
839567cc2e6Sstephh 		 */
8405750ef5cSStephen Hanson 		fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
8415750ef5cSStephen Hanson 		    fmd_case_acquit_no_match, &fca);
8425750ef5cSStephen Hanson 		if (fmd_case_auto_acquit_isolated && !too_recent)
8435750ef5cSStephen Hanson 			fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
8445750ef5cSStephen Hanson 			    fmd_case_acquit_isolated, &do_update);
8455750ef5cSStephen Hanson 	} else if (got_acquit_overlap) {
8465750ef5cSStephen Hanson 		/*
8475750ef5cSStephen Hanson 		 * Re-fault the acquitted matching suspects and acquit all
8485750ef5cSStephen Hanson 		 * isolated suspects.
8495750ef5cSStephen Hanson 		 */
8505750ef5cSStephen Hanson 		if (fmd_case_auto_acquit_isolated && !too_recent) {
8515750ef5cSStephen Hanson 			fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
8525750ef5cSStephen Hanson 			    fmd_case_fault_acquitted_matching, &fca);
8535750ef5cSStephen Hanson 			fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
8545750ef5cSStephen Hanson 			    fmd_case_acquit_isolated, &do_update);
855567cc2e6Sstephh 		}
8565750ef5cSStephen Hanson 	} else if (fmd_case_auto_acquit_isolated) {
8575750ef5cSStephen Hanson 		/*
8585750ef5cSStephen Hanson 		 * To get here, there must be no faulty or acquitted suspects,
8595750ef5cSStephen Hanson 		 * but there must be at least one isolated suspect. Just acquit
8605750ef5cSStephen Hanson 		 * non-matching isolated suspects. If there are no matching
8615750ef5cSStephen Hanson 		 * isolated suspects, then re-fault all matching suspects.
8625750ef5cSStephen Hanson 		 */
8635750ef5cSStephen Hanson 		for (i = 0; i < new_cip->ci_nsuspects; i++)
8645750ef5cSStephen Hanson 			if (new_susp_state[i] == SUSPECT_STATE_ISOLATED)
8655750ef5cSStephen Hanson 				got_isolated_overlap = 1;
8665750ef5cSStephen Hanson 		if (!got_isolated_overlap)
8675750ef5cSStephen Hanson 			fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
8685750ef5cSStephen Hanson 			    fmd_case_fault_all_matching, &fca);
8695750ef5cSStephen Hanson 		fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
8705750ef5cSStephen Hanson 		    fmd_case_acquit_no_match, &fca);
871567cc2e6Sstephh 	}
8725750ef5cSStephen Hanson 
8735750ef5cSStephen Hanson 	/*
8745750ef5cSStephen Hanson 	 * If we've updated anything in the old case, call fmd_case_update()
8755750ef5cSStephen Hanson 	 */
8765750ef5cSStephen Hanson 	if (do_update)
8775750ef5cSStephen Hanson 		fmd_case_update(old_cp);
878567cc2e6Sstephh }
879567cc2e6Sstephh 
8807c478bd9Sstevel@tonic-gate /*
881d9638e54Smws  * Convict suspects in a case by applying a conviction policy and updating the
882d9638e54Smws  * resource cache prior to emitting the list.suspect event for the given case.
883d9638e54Smws  * At present, our policy is very simple: convict every suspect in the case.
884d9638e54Smws  * In the future, this policy can be extended and made configurable to permit:
885d9638e54Smws  *
886d9638e54Smws  * - convicting the suspect with the highest FIT rate
887d9638e54Smws  * - convicting the suspect with the cheapest FRU
888d9638e54Smws  * - convicting the suspect with the FRU that is in a depot's inventory
889d9638e54Smws  * - convicting the suspect with the longest lifetime
890d9638e54Smws  *
891d9638e54Smws  * and so forth.  A word to the wise: this problem is significantly harder that
892d9638e54Smws  * it seems at first glance.  Future work should heed the following advice:
893d9638e54Smws  *
894d9638e54Smws  * Hacking the policy into C code here is a very bad idea.  The policy needs to
895d9638e54Smws  * be decided upon very carefully and fundamentally encodes knowledge of what
896d9638e54Smws  * suspect list combinations can be emitted by what diagnosis engines.  As such
897d9638e54Smws  * fmd's code is the wrong location, because that would require fmd itself to
898d9638e54Smws  * be updated for every diagnosis engine change, defeating the entire design.
899d9638e54Smws  * The FMA Event Registry knows the suspect list combinations: policy inputs
900d9638e54Smws  * can be derived from it and used to produce per-module policy configuration.
901d9638e54Smws  *
902d9638e54Smws  * If the policy needs to be dynamic and not statically fixed at either fmd
903d9638e54Smws  * startup or module load time, any implementation of dynamic policy retrieval
904d9638e54Smws  * must employ some kind of caching mechanism or be part of a built-in module.
905d9638e54Smws  * The fmd_case_convict() function is called with locks held inside of fmd and
906d9638e54Smws  * is not a place where unbounded blocking on some inter-process or inter-
907d9638e54Smws  * system communication to another service (e.g. another daemon) can occur.
9087c478bd9Sstevel@tonic-gate  */
909567cc2e6Sstephh static int
910d9638e54Smws fmd_case_convict(fmd_case_t *cp)
911d9638e54Smws {
912d9638e54Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
913d9638e54Smws 	fmd_asru_hash_t *ahp = fmd.d_asrus;
9145750ef5cSStephen Hanson 	int discard_new = 0, i;
915d9638e54Smws 	fmd_case_susp_t *cis;
916567cc2e6Sstephh 	fmd_asru_link_t *alp;
9175750ef5cSStephen Hanson 	uint8_t *new_susp_state;
9185750ef5cSStephen Hanson 	uint8_t *new_match_state;
9195750ef5cSStephen Hanson 	int adjust_new = 0;
9205750ef5cSStephen Hanson 	fccd_t fccd;
921b0daa853SStephen Hanson 	fmd_case_impl_t *ncp, **cps, **cpp;
922b0daa853SStephen Hanson 	uint_t cpc;
923b0daa853SStephen Hanson 	fmd_case_hash_t *chp;
9245750ef5cSStephen Hanson 
9255750ef5cSStephen Hanson 	/*
9265750ef5cSStephen Hanson 	 * First we must see if any matching cases already exist.
9275750ef5cSStephen Hanson 	 */
9285750ef5cSStephen Hanson 	new_susp_state = alloca(cip->ci_nsuspects * sizeof (uint8_t));
9295750ef5cSStephen Hanson 	for (i = 0; i < cip->ci_nsuspects; i++)
9305750ef5cSStephen Hanson 		new_susp_state[i] = 0;
9315750ef5cSStephen Hanson 	new_match_state = alloca(cip->ci_nsuspects * sizeof (uint8_t));
9325750ef5cSStephen Hanson 	for (i = 0; i < cip->ci_nsuspects; i++)
9335750ef5cSStephen Hanson 		new_match_state[i] = 0;
9345750ef5cSStephen Hanson 	fccd.fccd_cip = cip;
9355750ef5cSStephen Hanson 	fccd.fccd_adjust_new = &adjust_new;
9365750ef5cSStephen Hanson 	fccd.fccd_new_susp_state = new_susp_state;
9375750ef5cSStephen Hanson 	fccd.fccd_new_match_state = new_match_state;
9385750ef5cSStephen Hanson 	fccd.fccd_discard_new = &discard_new;
939b0daa853SStephen Hanson 
940b0daa853SStephen Hanson 	/*
941b0daa853SStephen Hanson 	 * Hold all cases
942b0daa853SStephen Hanson 	 */
943b0daa853SStephen Hanson 	chp = fmd.d_cases;
944b0daa853SStephen Hanson 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
945b0daa853SStephen Hanson 	cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
946b0daa853SStephen Hanson 	cpc = chp->ch_count;
947b0daa853SStephen Hanson 	for (i = 0; i < chp->ch_hashlen; i++)
948b0daa853SStephen Hanson 		for (ncp = chp->ch_hash[i]; ncp != NULL; ncp = ncp->ci_next)
949b0daa853SStephen Hanson 			*cpp++ = fmd_case_tryhold(ncp);
950b0daa853SStephen Hanson 	ASSERT(cpp == cps + cpc);
951b0daa853SStephen Hanson 	(void) pthread_rwlock_unlock(&chp->ch_lock);
952b0daa853SStephen Hanson 
953b0daa853SStephen Hanson 	/*
954b0daa853SStephen Hanson 	 * Run fmd_case_check_for_dups() on all cases except the current one.
955b0daa853SStephen Hanson 	 */
956b0daa853SStephen Hanson 	for (i = 0; i < cpc; i++) {
957b0daa853SStephen Hanson 		if (cps[i] != NULL) {
958b0daa853SStephen Hanson 			if (cps[i] != (fmd_case_impl_t *)cp)
959b0daa853SStephen Hanson 				fmd_case_check_for_dups((fmd_case_t *)cps[i],
960b0daa853SStephen Hanson 				    &fccd);
961b0daa853SStephen Hanson 			fmd_case_rele((fmd_case_t *)cps[i]);
962b0daa853SStephen Hanson 		}
963b0daa853SStephen Hanson 	}
964b0daa853SStephen Hanson 	fmd_free(cps, cpc * sizeof (fmd_case_t *));
965b0daa853SStephen Hanson 
966b0daa853SStephen Hanson 	(void) pthread_mutex_lock(&cip->ci_lock);
967b0daa853SStephen Hanson 	if (cip->ci_code == NULL)
968b0daa853SStephen Hanson 		(void) fmd_case_mkcode(cp);
969b0daa853SStephen Hanson 	else if (cip->ci_precanned)
970b0daa853SStephen Hanson 		fmd_case_code_hash_insert(fmd.d_cases, cip);
9715750ef5cSStephen Hanson 
9725750ef5cSStephen Hanson 	if (discard_new) {
9735750ef5cSStephen Hanson 		/*
9745750ef5cSStephen Hanson 		 * We've found an existing case that is a match and it is not
9755750ef5cSStephen Hanson 		 * already in repaired or resolved state. So we can close this
9765750ef5cSStephen Hanson 		 * one as a duplicate.
9775750ef5cSStephen Hanson 		 */
978567cc2e6Sstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
979567cc2e6Sstephh 		return (1);
980567cc2e6Sstephh 	}
981d9638e54Smws 
982567cc2e6Sstephh 	/*
9835750ef5cSStephen Hanson 	 * Allocate new cache entries
984567cc2e6Sstephh 	 */
985d9638e54Smws 	for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
986567cc2e6Sstephh 		if ((alp = fmd_asru_hash_create_entry(ahp,
987567cc2e6Sstephh 		    cp, cis->cis_nvl)) == NULL) {
988d9638e54Smws 			fmd_error(EFMD_CASE_EVENT, "cannot convict suspect in "
989d9638e54Smws 			    "%s: %s\n", cip->ci_uuid, fmd_strerror(errno));
990d9638e54Smws 			continue;
991d9638e54Smws 		}
992cbf75e67SStephen Hanson 		alp->al_flags |= FMD_ASRU_PRESENT;
993cbf75e67SStephen Hanson 		alp->al_asru->asru_flags |= FMD_ASRU_PRESENT;
99425c6ff4bSstephh 		(void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0);
995567cc2e6Sstephh 		(void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
996d9638e54Smws 	}
997d9638e54Smws 
9985750ef5cSStephen Hanson 	if (adjust_new) {
9995750ef5cSStephen Hanson 		int some_suspect = 0, some_not_suspect = 0;
10005750ef5cSStephen Hanson 
10015750ef5cSStephen Hanson 		/*
10025750ef5cSStephen Hanson 		 * There is one or more matching case but they are already in
10035750ef5cSStephen Hanson 		 * repaired or resolved state. So we need to keep the new
10045750ef5cSStephen Hanson 		 * case, but we can adjust it. Repaired/removed/replaced
10055750ef5cSStephen Hanson 		 * suspects are unlikely to be to blame (unless there are
10065750ef5cSStephen Hanson 		 * actually two separate faults). So if we have a combination of
10075750ef5cSStephen Hanson 		 * repaired/replaced/removed suspects and acquitted suspects in
10085750ef5cSStephen Hanson 		 * the old lists, then we should acquit in the new list those
10095750ef5cSStephen Hanson 		 * that were repaired/replaced/removed in the old.
10105750ef5cSStephen Hanson 		 */
10115750ef5cSStephen Hanson 		for (i = 0; i < cip->ci_nsuspects; i++) {
10125750ef5cSStephen Hanson 			if ((new_susp_state[i] & SUSPECT_STATE_REPLACED) ||
10135750ef5cSStephen Hanson 			    (new_susp_state[i] & SUSPECT_STATE_REPAIRED) ||
10145750ef5cSStephen Hanson 			    (new_susp_state[i] & SUSPECT_STATE_REMOVED) ||
10155750ef5cSStephen Hanson 			    (new_match_state[i] & SUSPECT_STATE_NO_MATCH))
10165750ef5cSStephen Hanson 				some_not_suspect = 1;
10175750ef5cSStephen Hanson 			else
10185750ef5cSStephen Hanson 				some_suspect = 1;
10195750ef5cSStephen Hanson 		}
10205750ef5cSStephen Hanson 		if (some_suspect && some_not_suspect) {
10215750ef5cSStephen Hanson 			for (cis = cip->ci_suspects, i = 0; cis != NULL;
10225750ef5cSStephen Hanson 			    cis = cis->cis_next, i++)
10235750ef5cSStephen Hanson 				if ((new_susp_state[i] &
10245750ef5cSStephen Hanson 				    SUSPECT_STATE_REPLACED) ||
10255750ef5cSStephen Hanson 				    (new_susp_state[i] &
10265750ef5cSStephen Hanson 				    SUSPECT_STATE_REPAIRED) ||
10275750ef5cSStephen Hanson 				    (new_susp_state[i] &
10285750ef5cSStephen Hanson 				    SUSPECT_STATE_REMOVED) ||
10295750ef5cSStephen Hanson 				    (new_match_state[i] &
10305750ef5cSStephen Hanson 				    SUSPECT_STATE_NO_MATCH))
10315750ef5cSStephen Hanson 					fmd_asru_hash_apply_by_case(fmd.d_asrus,
10325750ef5cSStephen Hanson 					    cp, fmd_case_acquit_suspect,
10335750ef5cSStephen Hanson 					    cis->cis_nvl);
10345750ef5cSStephen Hanson 		}
10355750ef5cSStephen Hanson 	}
10365750ef5cSStephen Hanson 
1037d9638e54Smws 	(void) pthread_mutex_unlock(&cip->ci_lock);
1038567cc2e6Sstephh 	return (0);
1039d9638e54Smws }
1040d9638e54Smws 
1041d9638e54Smws void
10427c478bd9Sstevel@tonic-gate fmd_case_publish(fmd_case_t *cp, uint_t state)
10437c478bd9Sstevel@tonic-gate {
10447c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
10457c478bd9Sstevel@tonic-gate 	fmd_event_t *e;
10467c478bd9Sstevel@tonic-gate 	nvlist_t *nvl;
10477c478bd9Sstevel@tonic-gate 	char *class;
10487c478bd9Sstevel@tonic-gate 
1049d9638e54Smws 	if (state == FMD_CASE_CURRENT)
1050d9638e54Smws 		state = cip->ci_state; /* use current state */
1051d9638e54Smws 
10527c478bd9Sstevel@tonic-gate 	switch (state) {
10537c478bd9Sstevel@tonic-gate 	case FMD_CASE_SOLVED:
1054749f21d3Swesolows 		(void) pthread_mutex_lock(&cip->ci_lock);
1055c7d6cfd6SStephen Hanson 
1056c7d6cfd6SStephen Hanson 		/*
1057c7d6cfd6SStephen Hanson 		 * If we already have a code, then case is already solved.
1058c7d6cfd6SStephen Hanson 		 */
1059cbf75e67SStephen Hanson 		if (cip->ci_precanned == 0 && cip->ci_xprt == NULL &&
1060cbf75e67SStephen Hanson 		    cip->ci_code != NULL) {
1061c7d6cfd6SStephen Hanson 			(void) pthread_mutex_unlock(&cip->ci_lock);
1062c7d6cfd6SStephen Hanson 			break;
1063c7d6cfd6SStephen Hanson 		}
1064c7d6cfd6SStephen Hanson 
106544743693Sstephh 		if (cip->ci_tv_valid == 0) {
106644743693Sstephh 			fmd_time_gettimeofday(&cip->ci_tv);
106744743693Sstephh 			cip->ci_tv_valid = 1;
106844743693Sstephh 		}
1069749f21d3Swesolows 		(void) pthread_mutex_unlock(&cip->ci_lock);
1070567cc2e6Sstephh 
1071567cc2e6Sstephh 		if (fmd_case_convict(cp) == 1) { /* dupclose */
1072567cc2e6Sstephh 			cip->ci_flags &= ~FMD_CF_SOLVED;
1073567cc2e6Sstephh 			fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
1074567cc2e6Sstephh 			break;
1075567cc2e6Sstephh 		}
1076cbf75e67SStephen Hanson 		if (cip->ci_xprt != NULL) {
1077cbf75e67SStephen Hanson 			/*
1078cbf75e67SStephen Hanson 			 * For proxy, save some information about the transport
1079cbf75e67SStephen Hanson 			 * in the resource cache.
1080cbf75e67SStephen Hanson 			 */
1081cbf75e67SStephen Hanson 			int count = 0;
1082cbf75e67SStephen Hanson 			fmd_asru_set_on_proxy_t fasp;
1083cbf75e67SStephen Hanson 			fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)cip->ci_xprt;
1084cbf75e67SStephen Hanson 
1085cbf75e67SStephen Hanson 			fasp.fasp_countp = &count;
1086cbf75e67SStephen Hanson 			fasp.fasp_maxcount = cip->ci_nsuspects;
1087cbf75e67SStephen Hanson 			fasp.fasp_proxy_asru = cip->ci_proxy_asru;
1088cbf75e67SStephen Hanson 			fasp.fasp_proxy_external = xip->xi_flags &
1089cbf75e67SStephen Hanson 			    FMD_XPRT_EXTERNAL;
1090cbf75e67SStephen Hanson 			fasp.fasp_proxy_rdonly = ((xip->xi_flags &
1091cbf75e67SStephen Hanson 			    FMD_XPRT_RDWR) == FMD_XPRT_RDONLY);
1092cbf75e67SStephen Hanson 			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1093cbf75e67SStephen Hanson 			    fmd_asru_set_on_proxy, &fasp);
1094cbf75e67SStephen Hanson 		}
109544743693Sstephh 		nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS);
10967c478bd9Sstevel@tonic-gate 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
10977c478bd9Sstevel@tonic-gate 
10987c478bd9Sstevel@tonic-gate 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
10997c478bd9Sstevel@tonic-gate 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
11007c478bd9Sstevel@tonic-gate 		fmd_log_append(fmd.d_fltlog, e, cp);
11017c478bd9Sstevel@tonic-gate 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
11027c478bd9Sstevel@tonic-gate 		fmd_dispq_dispatch(fmd.d_disp, e, class);
11037c478bd9Sstevel@tonic-gate 
11047c478bd9Sstevel@tonic-gate 		(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
11057c478bd9Sstevel@tonic-gate 		cip->ci_mod->mod_stats->ms_casesolved.fmds_value.ui64++;
11067c478bd9Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
11077c478bd9Sstevel@tonic-gate 
11087c478bd9Sstevel@tonic-gate 		break;
11097c478bd9Sstevel@tonic-gate 
1110d9638e54Smws 	case FMD_CASE_CLOSE_WAIT:
11117c478bd9Sstevel@tonic-gate 		fmd_case_hold(cp);
11127c478bd9Sstevel@tonic-gate 		e = fmd_event_create(FMD_EVT_CLOSE, FMD_HRT_NOW, NULL, cp);
11137c478bd9Sstevel@tonic-gate 		fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
11147c478bd9Sstevel@tonic-gate 
11157c478bd9Sstevel@tonic-gate 		(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
11167c478bd9Sstevel@tonic-gate 		cip->ci_mod->mod_stats->ms_caseclosed.fmds_value.ui64++;
11177c478bd9Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
11187c478bd9Sstevel@tonic-gate 
11197c478bd9Sstevel@tonic-gate 		break;
11207c478bd9Sstevel@tonic-gate 
1121d9638e54Smws 	case FMD_CASE_CLOSED:
1122d9638e54Smws 		nvl = fmd_case_mkevent(cp, FM_LIST_ISOLATED_CLASS);
1123d9638e54Smws 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1124d9638e54Smws 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1125d9638e54Smws 		fmd_dispq_dispatch(fmd.d_disp, e, class);
1126d9638e54Smws 		break;
11277c478bd9Sstevel@tonic-gate 
1128d9638e54Smws 	case FMD_CASE_REPAIRED:
1129d9638e54Smws 		nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
1130d9638e54Smws 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1131d9638e54Smws 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1132627351e3Scy 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
1133627351e3Scy 		fmd_log_append(fmd.d_fltlog, e, cp);
1134627351e3Scy 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
1135d9638e54Smws 		fmd_dispq_dispatch(fmd.d_disp, e, class);
1136d9638e54Smws 		break;
113725c6ff4bSstephh 
113825c6ff4bSstephh 	case FMD_CASE_RESOLVED:
113925c6ff4bSstephh 		nvl = fmd_case_mkevent(cp, FM_LIST_RESOLVED_CLASS);
114025c6ff4bSstephh 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
114125c6ff4bSstephh 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
114225c6ff4bSstephh 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
114325c6ff4bSstephh 		fmd_log_append(fmd.d_fltlog, e, cp);
114425c6ff4bSstephh 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
114525c6ff4bSstephh 		fmd_dispq_dispatch(fmd.d_disp, e, class);
114625c6ff4bSstephh 		break;
11477c478bd9Sstevel@tonic-gate 	}
11487c478bd9Sstevel@tonic-gate }
11497c478bd9Sstevel@tonic-gate 
11507c478bd9Sstevel@tonic-gate fmd_case_t *
11517c478bd9Sstevel@tonic-gate fmd_case_hash_lookup(fmd_case_hash_t *chp, const char *uuid)
11527c478bd9Sstevel@tonic-gate {
11537c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip;
11547c478bd9Sstevel@tonic-gate 	uint_t h;
11557c478bd9Sstevel@tonic-gate 
11567c478bd9Sstevel@tonic-gate 	(void) pthread_rwlock_rdlock(&chp->ch_lock);
11577c478bd9Sstevel@tonic-gate 	h = fmd_strhash(uuid) % chp->ch_hashlen;
11587c478bd9Sstevel@tonic-gate 
11597c478bd9Sstevel@tonic-gate 	for (cip = chp->ch_hash[h]; cip != NULL; cip = cip->ci_next) {
11607c478bd9Sstevel@tonic-gate 		if (strcmp(cip->ci_uuid, uuid) == 0)
11617c478bd9Sstevel@tonic-gate 			break;
11627c478bd9Sstevel@tonic-gate 	}
11637c478bd9Sstevel@tonic-gate 
116497c04605Scy 	/*
116597c04605Scy 	 * If deleting bit is set, treat the case as if it doesn't exist.
116697c04605Scy 	 */
11677c478bd9Sstevel@tonic-gate 	if (cip != NULL)
116897c04605Scy 		cip = fmd_case_tryhold(cip);
116997c04605Scy 
117097c04605Scy 	if (cip == NULL)
11717c478bd9Sstevel@tonic-gate 		(void) fmd_set_errno(EFMD_CASE_INVAL);
11727c478bd9Sstevel@tonic-gate 
11737c478bd9Sstevel@tonic-gate 	(void) pthread_rwlock_unlock(&chp->ch_lock);
11747c478bd9Sstevel@tonic-gate 	return ((fmd_case_t *)cip);
11757c478bd9Sstevel@tonic-gate }
11767c478bd9Sstevel@tonic-gate 
11777c478bd9Sstevel@tonic-gate static fmd_case_impl_t *
11787c478bd9Sstevel@tonic-gate fmd_case_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
11797c478bd9Sstevel@tonic-gate {
11807c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *eip;
11817c478bd9Sstevel@tonic-gate 	uint_t h;
11827c478bd9Sstevel@tonic-gate 
11837c478bd9Sstevel@tonic-gate 	(void) pthread_rwlock_wrlock(&chp->ch_lock);
11847c478bd9Sstevel@tonic-gate 	h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
11857c478bd9Sstevel@tonic-gate 
11867c478bd9Sstevel@tonic-gate 	for (eip = chp->ch_hash[h]; eip != NULL; eip = eip->ci_next) {
118797c04605Scy 		if (strcmp(cip->ci_uuid, eip->ci_uuid) == 0 &&
118897c04605Scy 		    fmd_case_tryhold(eip) != NULL) {
11897c478bd9Sstevel@tonic-gate 			(void) pthread_rwlock_unlock(&chp->ch_lock);
1190d9638e54Smws 			return (eip); /* uuid already present */
11917c478bd9Sstevel@tonic-gate 		}
11927c478bd9Sstevel@tonic-gate 	}
11937c478bd9Sstevel@tonic-gate 
11947c478bd9Sstevel@tonic-gate 	cip->ci_next = chp->ch_hash[h];
11957c478bd9Sstevel@tonic-gate 	chp->ch_hash[h] = cip;
11967c478bd9Sstevel@tonic-gate 
1197d9638e54Smws 	chp->ch_count++;
1198d9638e54Smws 	ASSERT(chp->ch_count != 0);
1199d9638e54Smws 
12007c478bd9Sstevel@tonic-gate 	(void) pthread_rwlock_unlock(&chp->ch_lock);
12017c478bd9Sstevel@tonic-gate 	return (cip);
12027c478bd9Sstevel@tonic-gate }
12037c478bd9Sstevel@tonic-gate 
12047c478bd9Sstevel@tonic-gate static void
12057c478bd9Sstevel@tonic-gate fmd_case_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
12067c478bd9Sstevel@tonic-gate {
12077c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cp, **pp;
12087c478bd9Sstevel@tonic-gate 	uint_t h;
12097c478bd9Sstevel@tonic-gate 
121097c04605Scy 	ASSERT(MUTEX_HELD(&cip->ci_lock));
121197c04605Scy 
121297c04605Scy 	cip->ci_flags |= FMD_CF_DELETING;
121397c04605Scy 	(void) pthread_mutex_unlock(&cip->ci_lock);
121497c04605Scy 
12157c478bd9Sstevel@tonic-gate 	(void) pthread_rwlock_wrlock(&chp->ch_lock);
12167c478bd9Sstevel@tonic-gate 
12177c478bd9Sstevel@tonic-gate 	h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
12187c478bd9Sstevel@tonic-gate 	pp = &chp->ch_hash[h];
12197c478bd9Sstevel@tonic-gate 
12207c478bd9Sstevel@tonic-gate 	for (cp = *pp; cp != NULL; cp = cp->ci_next) {
12217c478bd9Sstevel@tonic-gate 		if (cp != cip)
12227c478bd9Sstevel@tonic-gate 			pp = &cp->ci_next;
12237c478bd9Sstevel@tonic-gate 		else
12247c478bd9Sstevel@tonic-gate 			break;
12257c478bd9Sstevel@tonic-gate 	}
12267c478bd9Sstevel@tonic-gate 
12277c478bd9Sstevel@tonic-gate 	if (cp == NULL) {
12287c478bd9Sstevel@tonic-gate 		fmd_panic("case %p (%s) not found on hash chain %u\n",
12297c478bd9Sstevel@tonic-gate 		    (void *)cip, cip->ci_uuid, h);
12307c478bd9Sstevel@tonic-gate 	}
12317c478bd9Sstevel@tonic-gate 
12327c478bd9Sstevel@tonic-gate 	*pp = cp->ci_next;
12337c478bd9Sstevel@tonic-gate 	cp->ci_next = NULL;
12347c478bd9Sstevel@tonic-gate 
1235567cc2e6Sstephh 	/*
1236567cc2e6Sstephh 	 * delete from code hash if it is on it
1237567cc2e6Sstephh 	 */
1238567cc2e6Sstephh 	fmd_case_code_hash_delete(chp, cip);
1239567cc2e6Sstephh 
1240d9638e54Smws 	ASSERT(chp->ch_count != 0);
1241d9638e54Smws 	chp->ch_count--;
1242d9638e54Smws 
12437c478bd9Sstevel@tonic-gate 	(void) pthread_rwlock_unlock(&chp->ch_lock);
124497c04605Scy 
124597c04605Scy 	(void) pthread_mutex_lock(&cip->ci_lock);
124697c04605Scy 	ASSERT(cip->ci_flags & FMD_CF_DELETING);
12477c478bd9Sstevel@tonic-gate }
12487c478bd9Sstevel@tonic-gate 
12497c478bd9Sstevel@tonic-gate fmd_case_t *
12507c478bd9Sstevel@tonic-gate fmd_case_create(fmd_module_t *mp, void *data)
12517c478bd9Sstevel@tonic-gate {
12527c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
1253d9638e54Smws 	fmd_case_impl_t *eip = NULL;
12547c478bd9Sstevel@tonic-gate 	uuid_t uuid;
12557c478bd9Sstevel@tonic-gate 
12567c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_init(&cip->ci_lock, NULL);
12577c478bd9Sstevel@tonic-gate 	fmd_buf_hash_create(&cip->ci_bufs);
12587c478bd9Sstevel@tonic-gate 
12597c478bd9Sstevel@tonic-gate 	fmd_module_hold(mp);
12607c478bd9Sstevel@tonic-gate 	cip->ci_mod = mp;
12617c478bd9Sstevel@tonic-gate 	cip->ci_refs = 1;
12627c478bd9Sstevel@tonic-gate 	cip->ci_state = FMD_CASE_UNSOLVED;
12637c478bd9Sstevel@tonic-gate 	cip->ci_flags = FMD_CF_DIRTY;
12647c478bd9Sstevel@tonic-gate 	cip->ci_data = data;
12657c478bd9Sstevel@tonic-gate 
12667c478bd9Sstevel@tonic-gate 	/*
12677c478bd9Sstevel@tonic-gate 	 * Calling libuuid: get a clue.  The library interfaces cleverly do not
12687c478bd9Sstevel@tonic-gate 	 * define any constant for the length of an unparse string, and do not
12697c478bd9Sstevel@tonic-gate 	 * permit the caller to specify a buffer length for safety.  The spec
12707c478bd9Sstevel@tonic-gate 	 * says it will be 36 bytes, but we make it tunable just in case.
12717c478bd9Sstevel@tonic-gate 	 */
12727c478bd9Sstevel@tonic-gate 	(void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &cip->ci_uuidlen);
12737c478bd9Sstevel@tonic-gate 	cip->ci_uuid = fmd_zalloc(cip->ci_uuidlen + 1, FMD_SLEEP);
12747c478bd9Sstevel@tonic-gate 
12757c478bd9Sstevel@tonic-gate 	/*
12767c478bd9Sstevel@tonic-gate 	 * We expect this loop to execute only once, but code it defensively
12777c478bd9Sstevel@tonic-gate 	 * against the possibility of libuuid bugs.  Keep generating uuids and
12787c478bd9Sstevel@tonic-gate 	 * attempting to do a hash insert until we get a unique one.
12797c478bd9Sstevel@tonic-gate 	 */
12807c478bd9Sstevel@tonic-gate 	do {
1281d9638e54Smws 		if (eip != NULL)
1282d9638e54Smws 			fmd_case_rele((fmd_case_t *)eip);
12837c478bd9Sstevel@tonic-gate 		uuid_generate(uuid);
12847c478bd9Sstevel@tonic-gate 		uuid_unparse(uuid, cip->ci_uuid);
1285d9638e54Smws 	} while ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip);
12867c478bd9Sstevel@tonic-gate 
12877c478bd9Sstevel@tonic-gate 	ASSERT(fmd_module_locked(mp));
12887c478bd9Sstevel@tonic-gate 	fmd_list_append(&mp->mod_cases, cip);
12897c478bd9Sstevel@tonic-gate 	fmd_module_setcdirty(mp);
12907c478bd9Sstevel@tonic-gate 
12917c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
12927c478bd9Sstevel@tonic-gate 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
12937c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
12947c478bd9Sstevel@tonic-gate 
12957c478bd9Sstevel@tonic-gate 	return ((fmd_case_t *)cip);
12967c478bd9Sstevel@tonic-gate }
12977c478bd9Sstevel@tonic-gate 
1298162ba6eaSmws static void
1299162ba6eaSmws fmd_case_destroy_suspects(fmd_case_impl_t *cip)
1300162ba6eaSmws {
1301162ba6eaSmws 	fmd_case_susp_t *cis, *ncis;
1302162ba6eaSmws 
1303162ba6eaSmws 	ASSERT(MUTEX_HELD(&cip->ci_lock));
1304162ba6eaSmws 
1305cbf75e67SStephen Hanson 	if (cip->ci_proxy_asru)
1306cbf75e67SStephen Hanson 		fmd_free(cip->ci_proxy_asru, sizeof (uint8_t) *
1307cbf75e67SStephen Hanson 		    cip->ci_nsuspects);
1308cbf75e67SStephen Hanson 	if (cip->ci_diag_de)
1309cbf75e67SStephen Hanson 		nvlist_free(cip->ci_diag_de);
1310cbf75e67SStephen Hanson 	if (cip->ci_diag_asru)
1311cbf75e67SStephen Hanson 		fmd_free(cip->ci_diag_asru, sizeof (uint8_t) *
1312cbf75e67SStephen Hanson 		    cip->ci_nsuspects);
1313cbf75e67SStephen Hanson 
1314162ba6eaSmws 	for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
1315162ba6eaSmws 		ncis = cis->cis_next;
1316162ba6eaSmws 		nvlist_free(cis->cis_nvl);
1317162ba6eaSmws 		fmd_free(cis, sizeof (fmd_case_susp_t));
1318162ba6eaSmws 	}
1319162ba6eaSmws 
1320162ba6eaSmws 	cip->ci_suspects = NULL;
1321162ba6eaSmws 	cip->ci_nsuspects = 0;
1322162ba6eaSmws }
1323162ba6eaSmws 
13247c478bd9Sstevel@tonic-gate fmd_case_t *
1325d9638e54Smws fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
1326d9638e54Smws     uint_t state, const char *uuid, const char *code)
13277c478bd9Sstevel@tonic-gate {
13287c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
1329d9638e54Smws 	fmd_case_impl_t *eip;
1330d9638e54Smws 
13317c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_init(&cip->ci_lock, NULL);
13327c478bd9Sstevel@tonic-gate 	fmd_buf_hash_create(&cip->ci_bufs);
13337c478bd9Sstevel@tonic-gate 
13347c478bd9Sstevel@tonic-gate 	fmd_module_hold(mp);
13357c478bd9Sstevel@tonic-gate 	cip->ci_mod = mp;
1336d9638e54Smws 	cip->ci_xprt = xp;
13377c478bd9Sstevel@tonic-gate 	cip->ci_refs = 1;
1338d9638e54Smws 	cip->ci_state = state;
13397c478bd9Sstevel@tonic-gate 	cip->ci_uuid = fmd_strdup(uuid, FMD_SLEEP);
13407c478bd9Sstevel@tonic-gate 	cip->ci_uuidlen = strlen(cip->ci_uuid);
1341d9638e54Smws 	cip->ci_code = fmd_strdup(code, FMD_SLEEP);
1342d9638e54Smws 	cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
1343d9638e54Smws 
1344d9638e54Smws 	if (state > FMD_CASE_CLOSE_WAIT)
1345d9638e54Smws 		cip->ci_flags |= FMD_CF_SOLVED;
1346d9638e54Smws 
1347d9638e54Smws 	/*
1348d9638e54Smws 	 * Insert the case into the global case hash.  If the specified UUID is
1349d9638e54Smws 	 * already present, check to see if it is an orphan: if so, reclaim it;
1350d9638e54Smws 	 * otherwise if it is owned by a different module then return NULL.
1351d9638e54Smws 	 */
1352d9638e54Smws 	if ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip) {
1353d9638e54Smws 		(void) pthread_mutex_lock(&cip->ci_lock);
1354d9638e54Smws 		cip->ci_refs--; /* decrement to zero */
1355d9638e54Smws 		fmd_case_destroy((fmd_case_t *)cip, B_FALSE);
1356d9638e54Smws 
1357d9638e54Smws 		cip = eip; /* switch 'cip' to the existing case */
1358d9638e54Smws 		(void) pthread_mutex_lock(&cip->ci_lock);
1359d9638e54Smws 
1360d9638e54Smws 		/*
1361d9638e54Smws 		 * If the ASRU cache is trying to recreate an orphan, then just
1362d9638e54Smws 		 * return the existing case that we found without changing it.
1363d9638e54Smws 		 */
1364d9638e54Smws 		if (mp == fmd.d_rmod) {
1365cbf75e67SStephen Hanson 			/*
1366cbf75e67SStephen Hanson 			 * In case the case has already been created from
1367cbf75e67SStephen Hanson 			 * a checkpoint file we need to set up code now.
1368cbf75e67SStephen Hanson 			 */
1369cbf75e67SStephen Hanson 			if (cip->ci_state < FMD_CASE_CLOSED) {
1370cbf75e67SStephen Hanson 				if (code != NULL && cip->ci_code == NULL) {
1371cbf75e67SStephen Hanson 					cip->ci_code = fmd_strdup(code,
1372cbf75e67SStephen Hanson 					    FMD_SLEEP);
1373cbf75e67SStephen Hanson 					cip->ci_codelen = cip->ci_code ?
1374cbf75e67SStephen Hanson 					    strlen(cip->ci_code) + 1 : 0;
1375cbf75e67SStephen Hanson 					fmd_case_code_hash_insert(fmd.d_cases,
1376cbf75e67SStephen Hanson 					    cip);
1377cbf75e67SStephen Hanson 				}
1378cbf75e67SStephen Hanson 			}
1379cbf75e67SStephen Hanson 
138025c6ff4bSstephh 			/*
138125c6ff4bSstephh 			 * When recreating an orphan case, state passed in may
13825750ef5cSStephen Hanson 			 * be CLOSED (faulty) or REPAIRED/RESOLVED (!faulty). If
138325c6ff4bSstephh 			 * any suspects are still CLOSED (faulty) then the
138425c6ff4bSstephh 			 * overall state needs to be CLOSED.
138525c6ff4bSstephh 			 */
13865750ef5cSStephen Hanson 			if ((cip->ci_state == FMD_CASE_REPAIRED ||
13875750ef5cSStephen Hanson 			    cip->ci_state == FMD_CASE_RESOLVED) &&
1388cbf75e67SStephen Hanson 			    state == FMD_CASE_CLOSED)
138925c6ff4bSstephh 				cip->ci_state = FMD_CASE_CLOSED;
1390d9638e54Smws 			(void) pthread_mutex_unlock(&cip->ci_lock);
1391d9638e54Smws 			fmd_case_rele((fmd_case_t *)cip);
1392d9638e54Smws 			return ((fmd_case_t *)cip);
1393d9638e54Smws 		}
1394d9638e54Smws 
1395d9638e54Smws 		/*
1396d9638e54Smws 		 * If the existing case isn't an orphan or is being proxied,
1397d9638e54Smws 		 * then we have a UUID conflict: return failure to the caller.
1398d9638e54Smws 		 */
1399d9638e54Smws 		if (cip->ci_mod != fmd.d_rmod || xp != NULL) {
1400d9638e54Smws 			(void) pthread_mutex_unlock(&cip->ci_lock);
1401d9638e54Smws 			fmd_case_rele((fmd_case_t *)cip);
1402d9638e54Smws 			return (NULL);
1403d9638e54Smws 		}
1404d9638e54Smws 
1405d9638e54Smws 		/*
1406d9638e54Smws 		 * If the new module is reclaiming an orphaned case, remove
1407d9638e54Smws 		 * the case from the root module, switch ci_mod, and then fall
1408d9638e54Smws 		 * through to adding the case to the new owner module 'mp'.
1409d9638e54Smws 		 */
1410d9638e54Smws 		fmd_module_lock(cip->ci_mod);
1411d9638e54Smws 		fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1412d9638e54Smws 		fmd_module_unlock(cip->ci_mod);
1413d9638e54Smws 
1414d9638e54Smws 		fmd_module_rele(cip->ci_mod);
1415d9638e54Smws 		cip->ci_mod = mp;
1416d9638e54Smws 		fmd_module_hold(mp);
1417d9638e54Smws 
1418c7d6cfd6SStephen Hanson 		/*
1419c7d6cfd6SStephen Hanson 		 * It's possible that fmd crashed or was restarted during a
1420c7d6cfd6SStephen Hanson 		 * previous solve operation between the asru cache being created
1421c7d6cfd6SStephen Hanson 		 * and the ckpt file being updated to SOLVED. Thus when the DE
1422c7d6cfd6SStephen Hanson 		 * recreates the case here from the checkpoint file, the state
1423c7d6cfd6SStephen Hanson 		 * will be UNSOLVED and yet we are having to reclaim because
1424c7d6cfd6SStephen Hanson 		 * the case was in the asru cache. If this happens, revert the
1425c7d6cfd6SStephen Hanson 		 * case back to the UNSOLVED state and let the DE solve it again
1426c7d6cfd6SStephen Hanson 		 */
1427c7d6cfd6SStephen Hanson 		if (state == FMD_CASE_UNSOLVED) {
1428c7d6cfd6SStephen Hanson 			fmd_asru_hash_delete_case(fmd.d_asrus,
1429c7d6cfd6SStephen Hanson 			    (fmd_case_t *)cip);
1430c7d6cfd6SStephen Hanson 			fmd_case_destroy_suspects(cip);
1431c7d6cfd6SStephen Hanson 			fmd_case_code_hash_delete(fmd.d_cases, cip);
1432c7d6cfd6SStephen Hanson 			fmd_free(cip->ci_code, cip->ci_codelen);
1433c7d6cfd6SStephen Hanson 			cip->ci_code = NULL;
1434c7d6cfd6SStephen Hanson 			cip->ci_codelen = 0;
1435c7d6cfd6SStephen Hanson 			cip->ci_tv_valid = 0;
1436c7d6cfd6SStephen Hanson 		}
1437c7d6cfd6SStephen Hanson 
1438162ba6eaSmws 		cip->ci_state = state;
1439162ba6eaSmws 
1440d9638e54Smws 		(void) pthread_mutex_unlock(&cip->ci_lock);
1441d9638e54Smws 		fmd_case_rele((fmd_case_t *)cip);
1442567cc2e6Sstephh 	} else {
1443567cc2e6Sstephh 		/*
1444567cc2e6Sstephh 		 * add into hash of solved cases
1445567cc2e6Sstephh 		 */
1446567cc2e6Sstephh 		if (cip->ci_code)
1447567cc2e6Sstephh 			fmd_case_code_hash_insert(fmd.d_cases, cip);
1448d9638e54Smws 	}
14497c478bd9Sstevel@tonic-gate 
14507c478bd9Sstevel@tonic-gate 	ASSERT(fmd_module_locked(mp));
14517c478bd9Sstevel@tonic-gate 	fmd_list_append(&mp->mod_cases, cip);
14527c478bd9Sstevel@tonic-gate 
14537c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
14547c478bd9Sstevel@tonic-gate 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
14557c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
14567c478bd9Sstevel@tonic-gate 
14577c478bd9Sstevel@tonic-gate 	return ((fmd_case_t *)cip);
14587c478bd9Sstevel@tonic-gate }
14597c478bd9Sstevel@tonic-gate 
14607c478bd9Sstevel@tonic-gate void
1461d9638e54Smws fmd_case_destroy(fmd_case_t *cp, int visible)
14627c478bd9Sstevel@tonic-gate {
14637c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
14647c478bd9Sstevel@tonic-gate 	fmd_case_item_t *cit, *ncit;
14657c478bd9Sstevel@tonic-gate 
14667c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cip->ci_lock));
14677c478bd9Sstevel@tonic-gate 	ASSERT(cip->ci_refs == 0);
14687c478bd9Sstevel@tonic-gate 
1469d9638e54Smws 	if (visible) {
1470d9638e54Smws 		TRACE((FMD_DBG_CASE, "deleting case %s", cip->ci_uuid));
1471d9638e54Smws 		fmd_case_hash_delete(fmd.d_cases, cip);
1472d9638e54Smws 	}
14737c478bd9Sstevel@tonic-gate 
14747c478bd9Sstevel@tonic-gate 	for (cit = cip->ci_items; cit != NULL; cit = ncit) {
14757c478bd9Sstevel@tonic-gate 		ncit = cit->cit_next;
14767c478bd9Sstevel@tonic-gate 		fmd_event_rele(cit->cit_event);
14777c478bd9Sstevel@tonic-gate 		fmd_free(cit, sizeof (fmd_case_item_t));
14787c478bd9Sstevel@tonic-gate 	}
14797c478bd9Sstevel@tonic-gate 
1480162ba6eaSmws 	fmd_case_destroy_suspects(cip);
14817c478bd9Sstevel@tonic-gate 
14827c478bd9Sstevel@tonic-gate 	if (cip->ci_principal != NULL)
14837c478bd9Sstevel@tonic-gate 		fmd_event_rele(cip->ci_principal);
14847c478bd9Sstevel@tonic-gate 
14857c478bd9Sstevel@tonic-gate 	fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1);
1486d9638e54Smws 	fmd_free(cip->ci_code, cip->ci_codelen);
14870b9e3e76Smws 	(void) fmd_buf_hash_destroy(&cip->ci_bufs);
14887c478bd9Sstevel@tonic-gate 
14897c478bd9Sstevel@tonic-gate 	fmd_module_rele(cip->ci_mod);
14907c478bd9Sstevel@tonic-gate 	fmd_free(cip, sizeof (fmd_case_impl_t));
14917c478bd9Sstevel@tonic-gate }
14927c478bd9Sstevel@tonic-gate 
14937c478bd9Sstevel@tonic-gate void
14947c478bd9Sstevel@tonic-gate fmd_case_hold(fmd_case_t *cp)
14957c478bd9Sstevel@tonic-gate {
14967c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
14977c478bd9Sstevel@tonic-gate 
14987c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
149997c04605Scy 	fmd_case_hold_locked(cp);
15007c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
15017c478bd9Sstevel@tonic-gate }
15027c478bd9Sstevel@tonic-gate 
1503d9638e54Smws void
1504d9638e54Smws fmd_case_hold_locked(fmd_case_t *cp)
1505d9638e54Smws {
1506d9638e54Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1507d9638e54Smws 
1508d9638e54Smws 	ASSERT(MUTEX_HELD(&cip->ci_lock));
150997c04605Scy 	if (cip->ci_flags & FMD_CF_DELETING)
151097c04605Scy 		fmd_panic("attempt to hold a deleting case %p (%s)\n",
151197c04605Scy 		    (void *)cip, cip->ci_uuid);
1512d9638e54Smws 	cip->ci_refs++;
1513d9638e54Smws 	ASSERT(cip->ci_refs != 0);
1514d9638e54Smws }
1515d9638e54Smws 
151697c04605Scy static fmd_case_impl_t *
151797c04605Scy fmd_case_tryhold(fmd_case_impl_t *cip)
151897c04605Scy {
151997c04605Scy 	/*
152097c04605Scy 	 * If the case's "deleting" bit is unset, hold and return case,
152197c04605Scy 	 * otherwise, return NULL.
152297c04605Scy 	 */
152397c04605Scy 	(void) pthread_mutex_lock(&cip->ci_lock);
152497c04605Scy 	if (cip->ci_flags & FMD_CF_DELETING) {
152597c04605Scy 		(void) pthread_mutex_unlock(&cip->ci_lock);
152697c04605Scy 		cip = NULL;
152797c04605Scy 	} else {
152897c04605Scy 		fmd_case_hold_locked((fmd_case_t *)cip);
152997c04605Scy 		(void) pthread_mutex_unlock(&cip->ci_lock);
153097c04605Scy 	}
153197c04605Scy 	return (cip);
153297c04605Scy }
153397c04605Scy 
15347c478bd9Sstevel@tonic-gate void
15357c478bd9Sstevel@tonic-gate fmd_case_rele(fmd_case_t *cp)
15367c478bd9Sstevel@tonic-gate {
15377c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
15387c478bd9Sstevel@tonic-gate 
15397c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
15407c478bd9Sstevel@tonic-gate 	ASSERT(cip->ci_refs != 0);
15417c478bd9Sstevel@tonic-gate 
15427c478bd9Sstevel@tonic-gate 	if (--cip->ci_refs == 0)
1543d9638e54Smws 		fmd_case_destroy((fmd_case_t *)cip, B_TRUE);
15447c478bd9Sstevel@tonic-gate 	else
15457c478bd9Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&cip->ci_lock);
15467c478bd9Sstevel@tonic-gate }
15477c478bd9Sstevel@tonic-gate 
1548567cc2e6Sstephh void
1549567cc2e6Sstephh fmd_case_rele_locked(fmd_case_t *cp)
1550567cc2e6Sstephh {
1551567cc2e6Sstephh 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1552567cc2e6Sstephh 
1553567cc2e6Sstephh 	ASSERT(MUTEX_HELD(&cip->ci_lock));
1554567cc2e6Sstephh 	--cip->ci_refs;
1555567cc2e6Sstephh 	ASSERT(cip->ci_refs != 0);
1556567cc2e6Sstephh }
1557567cc2e6Sstephh 
15587aec1d6eScindi int
15597c478bd9Sstevel@tonic-gate fmd_case_insert_principal(fmd_case_t *cp, fmd_event_t *ep)
15607c478bd9Sstevel@tonic-gate {
15617c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
15627aec1d6eScindi 	fmd_case_item_t *cit;
15637c478bd9Sstevel@tonic-gate 	fmd_event_t *oep;
15647c478bd9Sstevel@tonic-gate 	uint_t state;
15657aec1d6eScindi 	int new;
15667c478bd9Sstevel@tonic-gate 
15677c478bd9Sstevel@tonic-gate 	fmd_event_hold(ep);
15687c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
15697c478bd9Sstevel@tonic-gate 
1570d9638e54Smws 	if (cip->ci_flags & FMD_CF_SOLVED)
15717c478bd9Sstevel@tonic-gate 		state = FMD_EVS_DIAGNOSED;
15727c478bd9Sstevel@tonic-gate 	else
15737c478bd9Sstevel@tonic-gate 		state = FMD_EVS_ACCEPTED;
15747c478bd9Sstevel@tonic-gate 
15757c478bd9Sstevel@tonic-gate 	oep = cip->ci_principal;
15767c478bd9Sstevel@tonic-gate 	cip->ci_principal = ep;
15777c478bd9Sstevel@tonic-gate 
15787aec1d6eScindi 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
15797aec1d6eScindi 		if (cit->cit_event == ep)
15807aec1d6eScindi 			break;
15817aec1d6eScindi 	}
15827aec1d6eScindi 
15837c478bd9Sstevel@tonic-gate 	cip->ci_flags |= FMD_CF_DIRTY;
15847aec1d6eScindi 	new = cit == NULL && ep != oep;
15857aec1d6eScindi 
15867c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
15877c478bd9Sstevel@tonic-gate 
15887c478bd9Sstevel@tonic-gate 	fmd_module_setcdirty(cip->ci_mod);
15897c478bd9Sstevel@tonic-gate 	fmd_event_transition(ep, state);
15907c478bd9Sstevel@tonic-gate 
15917c478bd9Sstevel@tonic-gate 	if (oep != NULL)
15927c478bd9Sstevel@tonic-gate 		fmd_event_rele(oep);
15937aec1d6eScindi 
15947aec1d6eScindi 	return (new);
15957c478bd9Sstevel@tonic-gate }
15967c478bd9Sstevel@tonic-gate 
15977aec1d6eScindi int
15987c478bd9Sstevel@tonic-gate fmd_case_insert_event(fmd_case_t *cp, fmd_event_t *ep)
15997c478bd9Sstevel@tonic-gate {
16007c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
16017aec1d6eScindi 	fmd_case_item_t *cit;
16027c478bd9Sstevel@tonic-gate 	uint_t state;
16037aec1d6eScindi 	int new;
1604540db9a9SStephen Hanson 	boolean_t injected;
16057c478bd9Sstevel@tonic-gate 
16067c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
16077c478bd9Sstevel@tonic-gate 
16087aec1d6eScindi 	if (cip->ci_flags & FMD_CF_SOLVED)
16097aec1d6eScindi 		state = FMD_EVS_DIAGNOSED;
16107aec1d6eScindi 	else
16117aec1d6eScindi 		state = FMD_EVS_ACCEPTED;
16127aec1d6eScindi 
16137aec1d6eScindi 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
16147aec1d6eScindi 		if (cit->cit_event == ep)
16157aec1d6eScindi 			break;
16167aec1d6eScindi 	}
16177aec1d6eScindi 
16187aec1d6eScindi 	new = cit == NULL && ep != cip->ci_principal;
16197aec1d6eScindi 
16207aec1d6eScindi 	/*
16217aec1d6eScindi 	 * If the event is already in the case or the case is already solved,
16227aec1d6eScindi 	 * there is no reason to save it: just transition it appropriately.
16237aec1d6eScindi 	 */
16247aec1d6eScindi 	if (cit != NULL || (cip->ci_flags & FMD_CF_SOLVED)) {
16257aec1d6eScindi 		(void) pthread_mutex_unlock(&cip->ci_lock);
16267aec1d6eScindi 		fmd_event_transition(ep, state);
16277aec1d6eScindi 		return (new);
16287aec1d6eScindi 	}
16297aec1d6eScindi 
16307aec1d6eScindi 	cit = fmd_alloc(sizeof (fmd_case_item_t), FMD_SLEEP);
16317aec1d6eScindi 	fmd_event_hold(ep);
16327aec1d6eScindi 
1633540db9a9SStephen Hanson 	if (nvlist_lookup_boolean_value(((fmd_event_impl_t *)ep)->ev_nvl,
1634540db9a9SStephen Hanson 	    "__injected", &injected) == 0 && injected)
1635540db9a9SStephen Hanson 		fmd_case_set_injected(cp);
1636540db9a9SStephen Hanson 
16377c478bd9Sstevel@tonic-gate 	cit->cit_next = cip->ci_items;
16387c478bd9Sstevel@tonic-gate 	cit->cit_event = ep;
16397c478bd9Sstevel@tonic-gate 
16407c478bd9Sstevel@tonic-gate 	cip->ci_items = cit;
16417c478bd9Sstevel@tonic-gate 	cip->ci_nitems++;
16427c478bd9Sstevel@tonic-gate 
16437c478bd9Sstevel@tonic-gate 	cip->ci_flags |= FMD_CF_DIRTY;
16447c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
16457c478bd9Sstevel@tonic-gate 
16467c478bd9Sstevel@tonic-gate 	fmd_module_setcdirty(cip->ci_mod);
16477c478bd9Sstevel@tonic-gate 	fmd_event_transition(ep, state);
16487aec1d6eScindi 
16497aec1d6eScindi 	return (new);
16507c478bd9Sstevel@tonic-gate }
16517c478bd9Sstevel@tonic-gate 
16527c478bd9Sstevel@tonic-gate void
16537c478bd9Sstevel@tonic-gate fmd_case_insert_suspect(fmd_case_t *cp, nvlist_t *nvl)
16547c478bd9Sstevel@tonic-gate {
16557c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
16567c478bd9Sstevel@tonic-gate 	fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
16577c478bd9Sstevel@tonic-gate 
16587c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
1659567cc2e6Sstephh 	ASSERT(cip->ci_state < FMD_CASE_CLOSE_WAIT);
16607c478bd9Sstevel@tonic-gate 	cip->ci_flags |= FMD_CF_DIRTY;
16617c478bd9Sstevel@tonic-gate 
16627c478bd9Sstevel@tonic-gate 	cis->cis_next = cip->ci_suspects;
16637c478bd9Sstevel@tonic-gate 	cis->cis_nvl = nvl;
16647c478bd9Sstevel@tonic-gate 
16657c478bd9Sstevel@tonic-gate 	cip->ci_suspects = cis;
16667c478bd9Sstevel@tonic-gate 	cip->ci_nsuspects++;
16677c478bd9Sstevel@tonic-gate 
16687c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
1669cbf75e67SStephen Hanson 	if (cip->ci_xprt == NULL)
1670cbf75e67SStephen Hanson 		fmd_module_setcdirty(cip->ci_mod);
16717c478bd9Sstevel@tonic-gate }
16727c478bd9Sstevel@tonic-gate 
1673d9638e54Smws void
1674d9638e54Smws fmd_case_recreate_suspect(fmd_case_t *cp, nvlist_t *nvl)
1675d9638e54Smws {
1676d9638e54Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1677d9638e54Smws 	fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
167844743693Sstephh 	boolean_t b;
1679d9638e54Smws 
1680d9638e54Smws 	(void) pthread_mutex_lock(&cip->ci_lock);
1681d9638e54Smws 
1682d9638e54Smws 	cis->cis_next = cip->ci_suspects;
1683d9638e54Smws 	cis->cis_nvl = nvl;
1684d9638e54Smws 
168544743693Sstephh 	if (nvlist_lookup_boolean_value(nvl,
168644743693Sstephh 	    FM_SUSPECT_MESSAGE, &b) == 0 && b == B_FALSE)
168744743693Sstephh 		cip->ci_flags |= FMD_CF_INVISIBLE;
168844743693Sstephh 
1689d9638e54Smws 	cip->ci_suspects = cis;
1690d9638e54Smws 	cip->ci_nsuspects++;
1691d9638e54Smws 
1692d9638e54Smws 	(void) pthread_mutex_unlock(&cip->ci_lock);
1693d9638e54Smws }
1694d9638e54Smws 
16957c478bd9Sstevel@tonic-gate void
16967c478bd9Sstevel@tonic-gate fmd_case_reset_suspects(fmd_case_t *cp)
16977c478bd9Sstevel@tonic-gate {
16987c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
16997c478bd9Sstevel@tonic-gate 
17007c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
17017c478bd9Sstevel@tonic-gate 	ASSERT(cip->ci_state < FMD_CASE_SOLVED);
17027c478bd9Sstevel@tonic-gate 
1703162ba6eaSmws 	fmd_case_destroy_suspects(cip);
17047c478bd9Sstevel@tonic-gate 	cip->ci_flags |= FMD_CF_DIRTY;
17057c478bd9Sstevel@tonic-gate 
17067c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
17077c478bd9Sstevel@tonic-gate 	fmd_module_setcdirty(cip->ci_mod);
17087c478bd9Sstevel@tonic-gate }
17097c478bd9Sstevel@tonic-gate 
1710567cc2e6Sstephh /*ARGSUSED*/
1711567cc2e6Sstephh static void
1712567cc2e6Sstephh fmd_case_unusable(fmd_asru_link_t *alp, void *arg)
1713567cc2e6Sstephh {
1714567cc2e6Sstephh 	(void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE);
1715567cc2e6Sstephh }
1716567cc2e6Sstephh 
1717d9638e54Smws /*
1718d9638e54Smws  * Grab ci_lock and update the case state and set the dirty bit.  Then perform
1719d9638e54Smws  * whatever actions and emit whatever events are appropriate for the state.
1720d9638e54Smws  * Refer to the topmost block comment explaining the state machine for details.
1721d9638e54Smws  */
17227c478bd9Sstevel@tonic-gate void
1723d9638e54Smws fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
17247c478bd9Sstevel@tonic-gate {
17257c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1726d9638e54Smws 	fmd_case_item_t *cit;
1727d9638e54Smws 	fmd_event_t *e;
172825c6ff4bSstephh 	int resolved = 0;
172925c6ff4bSstephh 	int any_unusable_and_present = 0;
17307c478bd9Sstevel@tonic-gate 
173125c6ff4bSstephh 	ASSERT(state <= FMD_CASE_RESOLVED);
17327c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
1733162ba6eaSmws 
173444743693Sstephh 	if (!(cip->ci_flags & FMD_CF_SOLVED) && !(flags & FMD_CF_SOLVED))
1735cbf75e67SStephen Hanson 		flags &= ~(FMD_CF_ISOLATED | FMD_CF_REPAIRED | FMD_CF_RESOLVED);
1736162ba6eaSmws 
1737d9638e54Smws 	cip->ci_flags |= flags;
17387c478bd9Sstevel@tonic-gate 
17397c478bd9Sstevel@tonic-gate 	if (cip->ci_state >= state) {
17407c478bd9Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&cip->ci_lock);
17417c478bd9Sstevel@tonic-gate 		return; /* already in specified state */
17427c478bd9Sstevel@tonic-gate 	}
17437c478bd9Sstevel@tonic-gate 
17447c478bd9Sstevel@tonic-gate 	TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
17457c478bd9Sstevel@tonic-gate 	    _fmd_case_snames[cip->ci_state], _fmd_case_snames[state]));
17467c478bd9Sstevel@tonic-gate 
17477c478bd9Sstevel@tonic-gate 	cip->ci_state = state;
17487c478bd9Sstevel@tonic-gate 	cip->ci_flags |= FMD_CF_DIRTY;
17497c478bd9Sstevel@tonic-gate 
1750d9638e54Smws 	if (cip->ci_xprt == NULL && cip->ci_mod != fmd.d_rmod)
1751d9638e54Smws 		fmd_module_setcdirty(cip->ci_mod);
17527c478bd9Sstevel@tonic-gate 
1753d9638e54Smws 	switch (state) {
1754d9638e54Smws 	case FMD_CASE_SOLVED:
17557c478bd9Sstevel@tonic-gate 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
17567c478bd9Sstevel@tonic-gate 			fmd_event_transition(cit->cit_event, FMD_EVS_DIAGNOSED);
17577c478bd9Sstevel@tonic-gate 
17587c478bd9Sstevel@tonic-gate 		if (cip->ci_principal != NULL) {
17597c478bd9Sstevel@tonic-gate 			fmd_event_transition(cip->ci_principal,
17607c478bd9Sstevel@tonic-gate 			    FMD_EVS_DIAGNOSED);
17617c478bd9Sstevel@tonic-gate 		}
17627c478bd9Sstevel@tonic-gate 		break;
17637c478bd9Sstevel@tonic-gate 
1764d9638e54Smws 	case FMD_CASE_CLOSE_WAIT:
1765d9638e54Smws 		/*
1766d9638e54Smws 		 * If the case was never solved, do not change ASRUs.
1767d9638e54Smws 		 * If the case was never fmd_case_closed, do not change ASRUs.
1768d9638e54Smws 		 * If the case was repaired, do not change ASRUs.
1769d9638e54Smws 		 */
1770d9638e54Smws 		if ((cip->ci_flags & (FMD_CF_SOLVED | FMD_CF_ISOLATED |
1771567cc2e6Sstephh 		    FMD_CF_REPAIRED)) == (FMD_CF_SOLVED | FMD_CF_ISOLATED))
1772567cc2e6Sstephh 			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1773567cc2e6Sstephh 			    fmd_case_unusable, NULL);
1774d9638e54Smws 
1775d9638e54Smws 		/*
1776d9638e54Smws 		 * If an orphaned case transitions to CLOSE_WAIT, the owning
1777*705e9f42SStephen Hanson 		 * module is no longer loaded: continue on to CASE_CLOSED or
1778*705e9f42SStephen Hanson 		 * CASE_REPAIRED as appropriate.
1779d9638e54Smws 		 */
1780*705e9f42SStephen Hanson 		if (fmd_case_orphaned(cp)) {
1781*705e9f42SStephen Hanson 			if (cip->ci_flags & FMD_CF_REPAIRED) {
1782*705e9f42SStephen Hanson 				state = cip->ci_state = FMD_CASE_REPAIRED;
1783*705e9f42SStephen Hanson 				TRACE((FMD_DBG_CASE, "case %s %s->%s",
1784*705e9f42SStephen Hanson 				    cip->ci_uuid,
1785*705e9f42SStephen Hanson 				    _fmd_case_snames[FMD_CASE_CLOSE_WAIT],
1786*705e9f42SStephen Hanson 				    _fmd_case_snames[FMD_CASE_REPAIRED]));
1787*705e9f42SStephen Hanson 				goto do_repair;
1788*705e9f42SStephen Hanson 			} else {
1789*705e9f42SStephen Hanson 				state = cip->ci_state = FMD_CASE_CLOSED;
1790*705e9f42SStephen Hanson 				TRACE((FMD_DBG_CASE, "case %s %s->%s",
1791*705e9f42SStephen Hanson 				    cip->ci_uuid,
1792*705e9f42SStephen Hanson 				    _fmd_case_snames[FMD_CASE_CLOSE_WAIT],
1793*705e9f42SStephen Hanson 				    _fmd_case_snames[FMD_CASE_CLOSED]));
1794*705e9f42SStephen Hanson 			}
1795*705e9f42SStephen Hanson 		}
1796d9638e54Smws 		break;
1797d9638e54Smws 
1798d9638e54Smws 	case FMD_CASE_REPAIRED:
1799*705e9f42SStephen Hanson do_repair:
1800cbf75e67SStephen Hanson 		ASSERT(cip->ci_xprt != NULL || fmd_case_orphaned(cp));
180125c6ff4bSstephh 
180225c6ff4bSstephh 		/*
1803cbf75e67SStephen Hanson 		 * If we've been requested to transition straight on to the
1804cbf75e67SStephen Hanson 		 * RESOLVED state (which can happen with fault proxying where a
1805cbf75e67SStephen Hanson 		 * list.resolved or a uuresolved is received from the other
1806cbf75e67SStephen Hanson 		 * side), or if all suspects are already either usable or not
1807cbf75e67SStephen Hanson 		 * present then transition straight to RESOLVED state,
1808cbf75e67SStephen Hanson 		 * publishing both the list.repaired and list.resolved. For a
1809cbf75e67SStephen Hanson 		 * proxy, if we discover here that all suspects are already
1810cbf75e67SStephen Hanson 		 * either usable or not present, notify the diag side instead
1811cbf75e67SStephen Hanson 		 * using fmd_xprt_uuresolved().
181225c6ff4bSstephh 		 */
1813cbf75e67SStephen Hanson 		if (flags & FMD_CF_RESOLVED) {
18145750ef5cSStephen Hanson 			if (cip->ci_xprt != NULL)
1815cbf75e67SStephen Hanson 				fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1816cbf75e67SStephen Hanson 		} else {
1817cbf75e67SStephen Hanson 			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1818cbf75e67SStephen Hanson 			    fmd_case_unusable_and_present,
1819cbf75e67SStephen Hanson 			    &any_unusable_and_present);
1820cbf75e67SStephen Hanson 			if (any_unusable_and_present)
1821cbf75e67SStephen Hanson 				break;
1822cbf75e67SStephen Hanson 			if (cip->ci_xprt != NULL) {
1823cbf75e67SStephen Hanson 				fmd_xprt_uuresolved(cip->ci_xprt, cip->ci_uuid);
1824cbf75e67SStephen Hanson 				break;
1825cbf75e67SStephen Hanson 			}
1826cbf75e67SStephen Hanson 		}
182725c6ff4bSstephh 
182825c6ff4bSstephh 		cip->ci_state = FMD_CASE_RESOLVED;
182925c6ff4bSstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
183025c6ff4bSstephh 		fmd_case_publish(cp, state);
183125c6ff4bSstephh 		TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
183225c6ff4bSstephh 		    _fmd_case_snames[FMD_CASE_REPAIRED],
183325c6ff4bSstephh 		    _fmd_case_snames[FMD_CASE_RESOLVED]));
183425c6ff4bSstephh 		state = FMD_CASE_RESOLVED;
183525c6ff4bSstephh 		resolved = 1;
183625c6ff4bSstephh 		(void) pthread_mutex_lock(&cip->ci_lock);
183725c6ff4bSstephh 		break;
183825c6ff4bSstephh 
183925c6ff4bSstephh 	case FMD_CASE_RESOLVED:
1840cbf75e67SStephen Hanson 		/*
1841cbf75e67SStephen Hanson 		 * For a proxy, no need to check that all suspects are already
1842cbf75e67SStephen Hanson 		 * either usable or not present - this request has come from
1843cbf75e67SStephen Hanson 		 * the diagnosing side which makes the final decision on this.
1844cbf75e67SStephen Hanson 		 */
1845cbf75e67SStephen Hanson 		if (cip->ci_xprt != NULL) {
1846cbf75e67SStephen Hanson 			fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1847cbf75e67SStephen Hanson 			resolved = 1;
1848cbf75e67SStephen Hanson 			break;
1849cbf75e67SStephen Hanson 		}
1850cbf75e67SStephen Hanson 
185125c6ff4bSstephh 		ASSERT(fmd_case_orphaned(cp));
185225c6ff4bSstephh 
185325c6ff4bSstephh 		/*
185425c6ff4bSstephh 		 * If all suspects are already either usable or not present then
185525c6ff4bSstephh 		 * carry on, publish list.resolved and discard the case.
185625c6ff4bSstephh 		 */
185725c6ff4bSstephh 		fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
185825c6ff4bSstephh 		    fmd_case_unusable_and_present, &any_unusable_and_present);
185925c6ff4bSstephh 		if (any_unusable_and_present) {
186025c6ff4bSstephh 			(void) pthread_mutex_unlock(&cip->ci_lock);
186125c6ff4bSstephh 			return;
186225c6ff4bSstephh 		}
186325c6ff4bSstephh 
186425c6ff4bSstephh 		resolved = 1;
18657c478bd9Sstevel@tonic-gate 		break;
18667c478bd9Sstevel@tonic-gate 	}
18677c478bd9Sstevel@tonic-gate 
18687c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
18697c478bd9Sstevel@tonic-gate 
18707c478bd9Sstevel@tonic-gate 	/*
1871d9638e54Smws 	 * If the module has initialized, then publish the appropriate event
1872d9638e54Smws 	 * for the new case state.  If not, we are being called from the
1873d9638e54Smws 	 * checkpoint code during module load, in which case the module's
1874d9638e54Smws 	 * _fmd_init() routine hasn't finished yet, and our event dictionaries
1875d9638e54Smws 	 * may not be open yet, which will prevent us from computing the event
1876d9638e54Smws 	 * code.  Defer the call to fmd_case_publish() by enqueuing a PUBLISH
1877d9638e54Smws 	 * event in our queue: this won't be processed until _fmd_init is done.
18787c478bd9Sstevel@tonic-gate 	 */
18797c478bd9Sstevel@tonic-gate 	if (cip->ci_mod->mod_flags & FMD_MOD_INIT)
18807c478bd9Sstevel@tonic-gate 		fmd_case_publish(cp, state);
1881d9638e54Smws 	else {
1882d9638e54Smws 		fmd_case_hold(cp);
1883d9638e54Smws 		e = fmd_event_create(FMD_EVT_PUBLISH, FMD_HRT_NOW, NULL, cp);
1884d9638e54Smws 		fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
1885d9638e54Smws 	}
1886d9638e54Smws 
188725c6ff4bSstephh 	if (resolved) {
18885750ef5cSStephen Hanson 		if (cip->ci_xprt != NULL) {
18895750ef5cSStephen Hanson 			/*
18905750ef5cSStephen Hanson 			 * If we transitioned to RESOLVED, adjust the reference
18915750ef5cSStephen Hanson 			 * count to reflect our removal from
18925750ef5cSStephen Hanson 			 * fmd.d_rmod->mod_cases above.  If the caller has not
18935750ef5cSStephen Hanson 			 * placed an additional hold on the case, it will now
18945750ef5cSStephen Hanson 			 * be freed.
18955750ef5cSStephen Hanson 			 */
18965750ef5cSStephen Hanson 			(void) pthread_mutex_lock(&cip->ci_lock);
18975750ef5cSStephen Hanson 			fmd_asru_hash_delete_case(fmd.d_asrus, cp);
18985750ef5cSStephen Hanson 			(void) pthread_mutex_unlock(&cip->ci_lock);
18995750ef5cSStephen Hanson 			fmd_case_rele(cp);
19005750ef5cSStephen Hanson 		} else {
19015750ef5cSStephen Hanson 			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
19025750ef5cSStephen Hanson 			    fmd_asru_log_resolved, NULL);
19035750ef5cSStephen Hanson 			(void) pthread_mutex_lock(&cip->ci_lock);
19045750ef5cSStephen Hanson 			/* mark as "ready to be discarded */
19055750ef5cSStephen Hanson 			cip->ci_flags |= FMD_CF_RES_CMPL;
19065750ef5cSStephen Hanson 			(void) pthread_mutex_unlock(&cip->ci_lock);
19075750ef5cSStephen Hanson 		}
19085750ef5cSStephen Hanson 	}
19095750ef5cSStephen Hanson }
19105750ef5cSStephen Hanson 
19115750ef5cSStephen Hanson /*
19125750ef5cSStephen Hanson  * Discard any case if it is in RESOLVED state (and if check_if_aged argument
19135750ef5cSStephen Hanson  * is set if all suspects have passed the rsrc.aged time).
19145750ef5cSStephen Hanson  */
19155750ef5cSStephen Hanson void
19165750ef5cSStephen Hanson fmd_case_discard_resolved(fmd_case_t *cp, void *arg)
19175750ef5cSStephen Hanson {
19185750ef5cSStephen Hanson 	int check_if_aged = *(int *)arg;
19195750ef5cSStephen Hanson 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
19205750ef5cSStephen Hanson 
19215750ef5cSStephen Hanson 	/*
19225750ef5cSStephen Hanson 	 * First check if case has completed transition to resolved.
19235750ef5cSStephen Hanson 	 */
19245750ef5cSStephen Hanson 	(void) pthread_mutex_lock(&cip->ci_lock);
19255750ef5cSStephen Hanson 	if (!(cip->ci_flags & FMD_CF_RES_CMPL)) {
1926567cc2e6Sstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
19275750ef5cSStephen Hanson 		return;
19285750ef5cSStephen Hanson 	}
19295750ef5cSStephen Hanson 
19305750ef5cSStephen Hanson 	/*
19315750ef5cSStephen Hanson 	 * Now if check_is_aged is set, see if all suspects have aged.
19325750ef5cSStephen Hanson 	 */
19335750ef5cSStephen Hanson 	if (check_if_aged) {
19345750ef5cSStephen Hanson 		int aged = 1;
19355750ef5cSStephen Hanson 
19365750ef5cSStephen Hanson 		fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
19375750ef5cSStephen Hanson 		    fmd_asru_check_if_aged, &aged);
19385750ef5cSStephen Hanson 		if (!aged) {
19395750ef5cSStephen Hanson 			(void) pthread_mutex_unlock(&cip->ci_lock);
19405750ef5cSStephen Hanson 			return;
19415750ef5cSStephen Hanson 		}
1942567cc2e6Sstephh 	}
19435750ef5cSStephen Hanson 
19445750ef5cSStephen Hanson 	/*
19455750ef5cSStephen Hanson 	 * Finally discard the case, clearing FMD_CF_RES_CMPL so we don't
19465750ef5cSStephen Hanson 	 * do it twice.
19475750ef5cSStephen Hanson 	 */
19485750ef5cSStephen Hanson 	fmd_module_lock(cip->ci_mod);
19495750ef5cSStephen Hanson 	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
19505750ef5cSStephen Hanson 	fmd_module_unlock(cip->ci_mod);
19515750ef5cSStephen Hanson 	fmd_asru_hash_delete_case(fmd.d_asrus, cp);
19525750ef5cSStephen Hanson 	cip->ci_flags &= ~FMD_CF_RES_CMPL;
19535750ef5cSStephen Hanson 	(void) pthread_mutex_unlock(&cip->ci_lock);
19545750ef5cSStephen Hanson 	fmd_case_rele(cp);
19557c478bd9Sstevel@tonic-gate }
19567c478bd9Sstevel@tonic-gate 
19570b9e3e76Smws /*
19580b9e3e76Smws  * Transition the specified case to *at least* the specified state by first
19590b9e3e76Smws  * re-validating the suspect list using the resource cache.  This function is
19600b9e3e76Smws  * employed by the checkpoint code when restoring a saved, solved case to see
19610b9e3e76Smws  * if the state of the case has effectively changed while fmd was not running
196225c6ff4bSstephh  * or the module was not loaded.
19630b9e3e76Smws  */
19640b9e3e76Smws void
19650b9e3e76Smws fmd_case_transition_update(fmd_case_t *cp, uint_t state, uint_t flags)
19660b9e3e76Smws {
19670b9e3e76Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
19680b9e3e76Smws 
19690b9e3e76Smws 	int usable = 0;		/* are any suspects usable? */
19700b9e3e76Smws 
19710b9e3e76Smws 	ASSERT(state >= FMD_CASE_SOLVED);
19720b9e3e76Smws 	(void) pthread_mutex_lock(&cip->ci_lock);
19730b9e3e76Smws 
1974567cc2e6Sstephh 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_usable, &usable);
19750b9e3e76Smws 
19760b9e3e76Smws 	(void) pthread_mutex_unlock(&cip->ci_lock);
19770b9e3e76Smws 
197825c6ff4bSstephh 	if (!usable) {
19790b9e3e76Smws 		state = MAX(state, FMD_CASE_CLOSE_WAIT);
19800b9e3e76Smws 		flags |= FMD_CF_ISOLATED;
19810b9e3e76Smws 	}
19820b9e3e76Smws 
19830b9e3e76Smws 	fmd_case_transition(cp, state, flags);
19840b9e3e76Smws }
19850b9e3e76Smws 
19867c478bd9Sstevel@tonic-gate void
19877c478bd9Sstevel@tonic-gate fmd_case_setdirty(fmd_case_t *cp)
19887c478bd9Sstevel@tonic-gate {
19897c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
19907c478bd9Sstevel@tonic-gate 
19917c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
19927c478bd9Sstevel@tonic-gate 	cip->ci_flags |= FMD_CF_DIRTY;
19937c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
19947c478bd9Sstevel@tonic-gate 
19957c478bd9Sstevel@tonic-gate 	fmd_module_setcdirty(cip->ci_mod);
19967c478bd9Sstevel@tonic-gate }
19977c478bd9Sstevel@tonic-gate 
19987c478bd9Sstevel@tonic-gate void
19997c478bd9Sstevel@tonic-gate fmd_case_clrdirty(fmd_case_t *cp)
20007c478bd9Sstevel@tonic-gate {
20017c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
20027c478bd9Sstevel@tonic-gate 
20037c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
20047c478bd9Sstevel@tonic-gate 	cip->ci_flags &= ~FMD_CF_DIRTY;
20057c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
20067c478bd9Sstevel@tonic-gate }
20077c478bd9Sstevel@tonic-gate 
20087c478bd9Sstevel@tonic-gate void
20097c478bd9Sstevel@tonic-gate fmd_case_commit(fmd_case_t *cp)
20107c478bd9Sstevel@tonic-gate {
20117c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
20127c478bd9Sstevel@tonic-gate 	fmd_case_item_t *cit;
20137c478bd9Sstevel@tonic-gate 
20147c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
20157c478bd9Sstevel@tonic-gate 
20167c478bd9Sstevel@tonic-gate 	if (cip->ci_flags & FMD_CF_DIRTY) {
20177c478bd9Sstevel@tonic-gate 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
20187c478bd9Sstevel@tonic-gate 			fmd_event_commit(cit->cit_event);
20197c478bd9Sstevel@tonic-gate 
20207c478bd9Sstevel@tonic-gate 		if (cip->ci_principal != NULL)
20217c478bd9Sstevel@tonic-gate 			fmd_event_commit(cip->ci_principal);
20227c478bd9Sstevel@tonic-gate 
20237c478bd9Sstevel@tonic-gate 		fmd_buf_hash_commit(&cip->ci_bufs);
20247c478bd9Sstevel@tonic-gate 		cip->ci_flags &= ~FMD_CF_DIRTY;
20257c478bd9Sstevel@tonic-gate 	}
20267c478bd9Sstevel@tonic-gate 
20277c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
20287c478bd9Sstevel@tonic-gate }
20297c478bd9Sstevel@tonic-gate 
2030cbf75e67SStephen Hanson /*
2031cbf75e67SStephen Hanson  * On proxy side, send back repair/acquit/etc request to diagnosing side
2032cbf75e67SStephen Hanson  */
2033cbf75e67SStephen Hanson void
2034cbf75e67SStephen Hanson fmd_case_xprt_updated(fmd_case_t *cp)
2035cbf75e67SStephen Hanson {
2036cbf75e67SStephen Hanson 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2037cbf75e67SStephen Hanson 	nvlist_t **nva;
2038cbf75e67SStephen Hanson 	uint8_t *ba;
2039cbf75e67SStephen Hanson 	int msg = B_TRUE;
2040cbf75e67SStephen Hanson 	int count = 0;
2041cbf75e67SStephen Hanson 	fmd_case_lst_t fcl;
2042cbf75e67SStephen Hanson 
2043cbf75e67SStephen Hanson 	ASSERT(cip->ci_xprt != NULL);
2044cbf75e67SStephen Hanson 	(void) pthread_mutex_lock(&cip->ci_lock);
2045cbf75e67SStephen Hanson 	ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
2046cbf75e67SStephen Hanson 	nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
2047cbf75e67SStephen Hanson 	fcl.fcl_countp = &count;
2048cbf75e67SStephen Hanson 	fcl.fcl_maxcount = cip->ci_nsuspects;
2049cbf75e67SStephen Hanson 	fcl.fcl_msgp = &msg;
2050cbf75e67SStephen Hanson 	fcl.fcl_ba = ba;
2051cbf75e67SStephen Hanson 	fcl.fcl_nva = nva;
2052cbf75e67SStephen Hanson 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl);
2053cbf75e67SStephen Hanson 	(void) pthread_mutex_unlock(&cip->ci_lock);
2054cbf75e67SStephen Hanson 	fmd_xprt_updated(cip->ci_xprt, cip->ci_uuid, ba, cip->ci_proxy_asru,
2055cbf75e67SStephen Hanson 	    count);
2056cbf75e67SStephen Hanson }
2057cbf75e67SStephen Hanson 
2058cbf75e67SStephen Hanson /*
2059cbf75e67SStephen Hanson  * fmd_case_update_status() can be called on either the proxy side when a
2060cbf75e67SStephen Hanson  * list.suspect is received, or on the diagnosing side when an update request
2061cbf75e67SStephen Hanson  * is received from the proxy. It updates the status in the resource cache.
2062cbf75e67SStephen Hanson  */
2063cbf75e67SStephen Hanson void
2064cbf75e67SStephen Hanson fmd_case_update_status(fmd_case_t *cp, uint8_t *statusp, uint8_t *proxy_asrup,
2065cbf75e67SStephen Hanson     uint8_t *diag_asrup)
2066cbf75e67SStephen Hanson {
2067cbf75e67SStephen Hanson 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2068cbf75e67SStephen Hanson 	int count = 0;
2069cbf75e67SStephen Hanson 	fmd_asru_update_status_t faus;
2070cbf75e67SStephen Hanson 
2071cbf75e67SStephen Hanson 	/*
2072cbf75e67SStephen Hanson 	 * update status of resource cache entries
2073cbf75e67SStephen Hanson 	 */
2074cbf75e67SStephen Hanson 	faus.faus_countp = &count;
2075cbf75e67SStephen Hanson 	faus.faus_maxcount = cip->ci_nsuspects;
2076cbf75e67SStephen Hanson 	faus.faus_ba = statusp;
2077cbf75e67SStephen Hanson 	faus.faus_proxy_asru = proxy_asrup;
2078cbf75e67SStephen Hanson 	faus.faus_diag_asru = diag_asrup;
2079cbf75e67SStephen Hanson 	faus.faus_is_proxy = (cip->ci_xprt != NULL);
2080cbf75e67SStephen Hanson 	(void) pthread_mutex_lock(&cip->ci_lock);
2081cbf75e67SStephen Hanson 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_update_status,
2082cbf75e67SStephen Hanson 	    &faus);
2083cbf75e67SStephen Hanson 	(void) pthread_mutex_unlock(&cip->ci_lock);
2084cbf75e67SStephen Hanson }
2085cbf75e67SStephen Hanson 
2086cbf75e67SStephen Hanson /*
2087cbf75e67SStephen Hanson  * Called on either the proxy side or the diag side when a repair has taken
2088cbf75e67SStephen Hanson  * place on the other side but this side may know the asru "contains"
2089cbf75e67SStephen Hanson  * relationships.
2090cbf75e67SStephen Hanson  */
2091cbf75e67SStephen Hanson void
2092cbf75e67SStephen Hanson fmd_case_update_containees(fmd_case_t *cp)
2093cbf75e67SStephen Hanson {
2094cbf75e67SStephen Hanson 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2095cbf75e67SStephen Hanson 
2096cbf75e67SStephen Hanson 	(void) pthread_mutex_lock(&cip->ci_lock);
2097cbf75e67SStephen Hanson 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
2098cbf75e67SStephen Hanson 	    fmd_asru_update_containees, NULL);
2099cbf75e67SStephen Hanson 	(void) pthread_mutex_unlock(&cip->ci_lock);
2100cbf75e67SStephen Hanson }
2101cbf75e67SStephen Hanson 
2102cbf75e67SStephen Hanson /*
2103cbf75e67SStephen Hanson  * fmd_case_close_status() is called on diagnosing side when proxy side
2104cbf75e67SStephen Hanson  * has had a uuclose. It updates the status in the resource cache.
2105cbf75e67SStephen Hanson  */
2106cbf75e67SStephen Hanson void
2107cbf75e67SStephen Hanson fmd_case_close_status(fmd_case_t *cp)
2108cbf75e67SStephen Hanson {
2109cbf75e67SStephen Hanson 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2110cbf75e67SStephen Hanson 	int count = 0;
2111cbf75e67SStephen Hanson 	fmd_asru_close_status_t facs;
2112cbf75e67SStephen Hanson 
2113cbf75e67SStephen Hanson 	/*
2114cbf75e67SStephen Hanson 	 * update status of resource cache entries
2115cbf75e67SStephen Hanson 	 */
2116cbf75e67SStephen Hanson 	facs.facs_countp = &count;
2117cbf75e67SStephen Hanson 	facs.facs_maxcount = cip->ci_nsuspects;
2118cbf75e67SStephen Hanson 	(void) pthread_mutex_lock(&cip->ci_lock);
2119cbf75e67SStephen Hanson 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_close_status,
2120cbf75e67SStephen Hanson 	    &facs);
2121cbf75e67SStephen Hanson 	(void) pthread_mutex_unlock(&cip->ci_lock);
2122cbf75e67SStephen Hanson }
2123cbf75e67SStephen Hanson 
21247c478bd9Sstevel@tonic-gate /*
21257c478bd9Sstevel@tonic-gate  * Indicate that the case may need to change state because one or more of the
21267c478bd9Sstevel@tonic-gate  * ASRUs named as a suspect has changed state.  We examine all the suspects
21277c478bd9Sstevel@tonic-gate  * and if none are still faulty, we initiate a case close transition.
21287c478bd9Sstevel@tonic-gate  */
21297c478bd9Sstevel@tonic-gate void
21307c478bd9Sstevel@tonic-gate fmd_case_update(fmd_case_t *cp)
21317c478bd9Sstevel@tonic-gate {
21327c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2133d9638e54Smws 	uint_t cstate;
2134567cc2e6Sstephh 	int faulty = 0;
21357c478bd9Sstevel@tonic-gate 
21367c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
2137d9638e54Smws 	cstate = cip->ci_state;
21387c478bd9Sstevel@tonic-gate 
2139cbf75e67SStephen Hanson 	if (cip->ci_state < FMD_CASE_SOLVED) {
21407c478bd9Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&cip->ci_lock);
2141d9638e54Smws 		return; /* update is not appropriate */
21427c478bd9Sstevel@tonic-gate 	}
21437c478bd9Sstevel@tonic-gate 
2144567cc2e6Sstephh 	if (cip->ci_flags & FMD_CF_REPAIRED) {
2145567cc2e6Sstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
2146567cc2e6Sstephh 		return; /* already repaired */
21477c478bd9Sstevel@tonic-gate 	}
21487c478bd9Sstevel@tonic-gate 
2149cbf75e67SStephen Hanson 	TRACE((FMD_DBG_CASE, "case update %s", cip->ci_uuid));
2150567cc2e6Sstephh 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
21517c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
21527c478bd9Sstevel@tonic-gate 
215325c6ff4bSstephh 	if (faulty) {
215425c6ff4bSstephh 		nvlist_t *nvl;
215525c6ff4bSstephh 		fmd_event_t *e;
215625c6ff4bSstephh 		char *class;
215725c6ff4bSstephh 
2158cbf75e67SStephen Hanson 		TRACE((FMD_DBG_CASE, "sending list.updated %s", cip->ci_uuid));
215925c6ff4bSstephh 		nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
216025c6ff4bSstephh 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
216125c6ff4bSstephh 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
216225c6ff4bSstephh 		(void) pthread_rwlock_rdlock(&fmd.d_log_lock);
216325c6ff4bSstephh 		fmd_log_append(fmd.d_fltlog, e, cp);
216425c6ff4bSstephh 		(void) pthread_rwlock_unlock(&fmd.d_log_lock);
216525c6ff4bSstephh 		fmd_dispq_dispatch(fmd.d_disp, e, class);
2166d9638e54Smws 		return; /* one or more suspects are still marked faulty */
216725c6ff4bSstephh 	}
2168d9638e54Smws 
2169d9638e54Smws 	if (cstate == FMD_CASE_CLOSED)
2170d9638e54Smws 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
2171d9638e54Smws 	else
2172d9638e54Smws 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
2173d9638e54Smws }
2174d9638e54Smws 
2175d9638e54Smws /*
2176d9638e54Smws  * Delete a closed case from the module's case list once the fmdo_close() entry
2177d9638e54Smws  * point has run to completion.  If the case is owned by a transport module,
2178d9638e54Smws  * tell the transport to proxy a case close on the other end of the transport.
2179cbf75e67SStephen Hanson  * Transition to the appropriate next state based on ci_flags.  This
2180d9638e54Smws  * function represents the end of CLOSE_WAIT and transitions the case to either
2181d9638e54Smws  * CLOSED or REPAIRED or discards it entirely because it was never solved;
2182d9638e54Smws  * refer to the topmost block comment explaining the state machine for details.
2183d9638e54Smws  */
2184d9638e54Smws void
2185d9638e54Smws fmd_case_delete(fmd_case_t *cp)
2186d9638e54Smws {
2187d9638e54Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
21880b9e3e76Smws 	fmd_modstat_t *msp;
21890b9e3e76Smws 	size_t buftotal;
2190d9638e54Smws 
2191cbf75e67SStephen Hanson 	TRACE((FMD_DBG_CASE, "case delete %s", cip->ci_uuid));
2192d9638e54Smws 	ASSERT(fmd_module_locked(cip->ci_mod));
2193d9638e54Smws 	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
21940b9e3e76Smws 	buftotal = fmd_buf_hash_destroy(&cip->ci_bufs);
21950b9e3e76Smws 
21960b9e3e76Smws 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
21970b9e3e76Smws 	msp = cip->ci_mod->mod_stats;
21980b9e3e76Smws 
21990b9e3e76Smws 	ASSERT(msp->ms_caseopen.fmds_value.ui64 != 0);
22000b9e3e76Smws 	msp->ms_caseopen.fmds_value.ui64--;
22010b9e3e76Smws 
22020b9e3e76Smws 	ASSERT(msp->ms_buftotal.fmds_value.ui64 >= buftotal);
22030b9e3e76Smws 	msp->ms_buftotal.fmds_value.ui64 -= buftotal;
22040b9e3e76Smws 
22050b9e3e76Smws 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
2206d9638e54Smws 
2207d9638e54Smws 	if (cip->ci_xprt == NULL)
2208d9638e54Smws 		fmd_module_setcdirty(cip->ci_mod);
2209d9638e54Smws 
2210d9638e54Smws 	fmd_module_rele(cip->ci_mod);
2211d9638e54Smws 	cip->ci_mod = fmd.d_rmod;
2212d9638e54Smws 	fmd_module_hold(cip->ci_mod);
2213d9638e54Smws 
2214162ba6eaSmws 	/*
2215cbf75e67SStephen Hanson 	 * If the case has been solved, then retain it
2216162ba6eaSmws 	 * on the root module's case list at least until we're transitioned.
2217162ba6eaSmws 	 * Otherwise free the case with our final fmd_case_rele() below.
2218162ba6eaSmws 	 */
2219cbf75e67SStephen Hanson 	if (cip->ci_flags & FMD_CF_SOLVED) {
2220162ba6eaSmws 		fmd_module_lock(cip->ci_mod);
2221162ba6eaSmws 		fmd_list_append(&cip->ci_mod->mod_cases, cip);
2222162ba6eaSmws 		fmd_module_unlock(cip->ci_mod);
2223162ba6eaSmws 		fmd_case_hold(cp);
2224162ba6eaSmws 	}
2225162ba6eaSmws 
2226d9638e54Smws 	/*
2227cbf75e67SStephen Hanson 	 * Transition onwards to REPAIRED or CLOSED as originally requested.
2228cbf75e67SStephen Hanson 	 * Note that for proxy case if we're transitioning to CLOSED it means
2229cbf75e67SStephen Hanson 	 * the case was isolated locally, so call fmd_xprt_uuclose() to notify
2230cbf75e67SStephen Hanson 	 * the diagnosing side. No need to notify the diagnosing side if we are
2231cbf75e67SStephen Hanson 	 * transitioning to REPAIRED as we only do this when requested to do
2232cbf75e67SStephen Hanson 	 * so by the diagnosing side anyway.
2233d9638e54Smws 	 */
2234cbf75e67SStephen Hanson 	if (cip->ci_flags & FMD_CF_REPAIRED)
2235d9638e54Smws 		fmd_case_transition(cp, FMD_CASE_REPAIRED, 0);
2236cbf75e67SStephen Hanson 	else if (cip->ci_flags & FMD_CF_ISOLATED) {
2237d9638e54Smws 		fmd_case_transition(cp, FMD_CASE_CLOSED, 0);
2238cbf75e67SStephen Hanson 		if (cip->ci_xprt != NULL)
2239cbf75e67SStephen Hanson 			fmd_xprt_uuclose(cip->ci_xprt, cip->ci_uuid);
2240cbf75e67SStephen Hanson 	}
2241d9638e54Smws 
2242d9638e54Smws 	fmd_case_rele(cp);
2243d9638e54Smws }
2244d9638e54Smws 
2245d9638e54Smws void
2246cbf75e67SStephen Hanson fmd_case_discard(fmd_case_t *cp, boolean_t delete_from_asru_cache)
2247d9638e54Smws {
2248d9638e54Smws 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2249d9638e54Smws 
2250d9638e54Smws 	(void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
2251d9638e54Smws 	cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64--;
2252d9638e54Smws 	(void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
2253d9638e54Smws 
2254d9638e54Smws 	ASSERT(fmd_module_locked(cip->ci_mod));
2255d9638e54Smws 	fmd_list_delete(&cip->ci_mod->mod_cases, cip);
2256cbf75e67SStephen Hanson 	if (delete_from_asru_cache) {
2257cbf75e67SStephen Hanson 		(void) pthread_mutex_lock(&cip->ci_lock);
2258cbf75e67SStephen Hanson 		fmd_asru_hash_delete_case(fmd.d_asrus, cp);
2259cbf75e67SStephen Hanson 		(void) pthread_mutex_unlock(&cip->ci_lock);
2260cbf75e67SStephen Hanson 	}
2261d9638e54Smws 	fmd_case_rele(cp);
22627c478bd9Sstevel@tonic-gate }
22637c478bd9Sstevel@tonic-gate 
22647c478bd9Sstevel@tonic-gate /*
22657c478bd9Sstevel@tonic-gate  * Indicate that the problem corresponding to a case has been repaired by
2266d9638e54Smws  * clearing the faulty bit on each ASRU named as a suspect.  If the case hasn't
2267d9638e54Smws  * already been closed, this function initiates the transition to CLOSE_WAIT.
2268d9638e54Smws  * The caller must have the case held from fmd_case_hash_lookup(), so we can
2269d9638e54Smws  * grab and drop ci_lock without the case being able to be freed in between.
22707c478bd9Sstevel@tonic-gate  */
22717c478bd9Sstevel@tonic-gate int
22727c478bd9Sstevel@tonic-gate fmd_case_repair(fmd_case_t *cp)
22737c478bd9Sstevel@tonic-gate {
22747c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2275d9638e54Smws 	uint_t cstate;
2276cbf75e67SStephen Hanson 	fmd_asru_rep_arg_t fara;
2277d9638e54Smws 
22787c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
2279d9638e54Smws 	cstate = cip->ci_state;
22807c478bd9Sstevel@tonic-gate 
2281567cc2e6Sstephh 	if (cstate < FMD_CASE_SOLVED) {
22827c478bd9Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&cip->ci_lock);
22837c478bd9Sstevel@tonic-gate 		return (fmd_set_errno(EFMD_CASE_STATE));
22847c478bd9Sstevel@tonic-gate 	}
22857c478bd9Sstevel@tonic-gate 
2286567cc2e6Sstephh 	if (cip->ci_flags & FMD_CF_REPAIRED) {
2287567cc2e6Sstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
2288567cc2e6Sstephh 		return (0); /* already repaired */
2289d9638e54Smws 	}
2290d9638e54Smws 
2291cbf75e67SStephen Hanson 	TRACE((FMD_DBG_CASE, "case repair %s", cip->ci_uuid));
2292cbf75e67SStephen Hanson 	fara.fara_reason = FMD_ASRU_REPAIRED;
2293cbf75e67SStephen Hanson 	fara.fara_bywhat = FARA_BY_CASE;
2294cbf75e67SStephen Hanson 	fara.fara_rval = NULL;
2295cbf75e67SStephen Hanson 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara);
229625c6ff4bSstephh 	(void) pthread_mutex_unlock(&cip->ci_lock);
229725c6ff4bSstephh 
2298cbf75e67SStephen Hanson 	/*
2299cbf75e67SStephen Hanson 	 * if this is a proxied case, send the repair across the transport.
2300cbf75e67SStephen Hanson 	 * The remote side will then do the repair and send a list.repaired back
2301cbf75e67SStephen Hanson 	 * again such that we can finally repair the case on this side.
2302cbf75e67SStephen Hanson 	 */
2303cbf75e67SStephen Hanson 	if (cip->ci_xprt != NULL) {
2304cbf75e67SStephen Hanson 		fmd_case_xprt_updated(cp);
2305cbf75e67SStephen Hanson 		return (0);
2306cbf75e67SStephen Hanson 	}
2307cbf75e67SStephen Hanson 
230825c6ff4bSstephh 	if (cstate == FMD_CASE_CLOSED)
230925c6ff4bSstephh 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
231025c6ff4bSstephh 	else
231125c6ff4bSstephh 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
231225c6ff4bSstephh 
231325c6ff4bSstephh 	return (0);
231425c6ff4bSstephh }
231525c6ff4bSstephh 
231625c6ff4bSstephh int
231725c6ff4bSstephh fmd_case_acquit(fmd_case_t *cp)
231825c6ff4bSstephh {
231925c6ff4bSstephh 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
232025c6ff4bSstephh 	uint_t cstate;
2321cbf75e67SStephen Hanson 	fmd_asru_rep_arg_t fara;
232225c6ff4bSstephh 
232325c6ff4bSstephh 	(void) pthread_mutex_lock(&cip->ci_lock);
232425c6ff4bSstephh 	cstate = cip->ci_state;
232525c6ff4bSstephh 
232625c6ff4bSstephh 	if (cstate < FMD_CASE_SOLVED) {
232725c6ff4bSstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
232825c6ff4bSstephh 		return (fmd_set_errno(EFMD_CASE_STATE));
232925c6ff4bSstephh 	}
233025c6ff4bSstephh 
233125c6ff4bSstephh 	if (cip->ci_flags & FMD_CF_REPAIRED) {
233225c6ff4bSstephh 		(void) pthread_mutex_unlock(&cip->ci_lock);
233325c6ff4bSstephh 		return (0); /* already repaired */
233425c6ff4bSstephh 	}
233525c6ff4bSstephh 
2336cbf75e67SStephen Hanson 	TRACE((FMD_DBG_CASE, "case acquit %s", cip->ci_uuid));
2337cbf75e67SStephen Hanson 	fara.fara_reason = FMD_ASRU_ACQUITTED;
2338cbf75e67SStephen Hanson 	fara.fara_bywhat = FARA_BY_CASE;
2339cbf75e67SStephen Hanson 	fara.fara_rval = NULL;
2340cbf75e67SStephen Hanson 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara);
2341162ba6eaSmws 	(void) pthread_mutex_unlock(&cip->ci_lock);
2342162ba6eaSmws 
2343cbf75e67SStephen Hanson 	/*
2344cbf75e67SStephen Hanson 	 * if this is a proxied case, send the repair across the transport.
2345cbf75e67SStephen Hanson 	 * The remote side will then do the repair and send a list.repaired back
2346cbf75e67SStephen Hanson 	 * again such that we can finally repair the case on this side.
2347cbf75e67SStephen Hanson 	 */
2348cbf75e67SStephen Hanson 	if (cip->ci_xprt != NULL) {
2349cbf75e67SStephen Hanson 		fmd_case_xprt_updated(cp);
2350cbf75e67SStephen Hanson 		return (0);
2351cbf75e67SStephen Hanson 	}
2352cbf75e67SStephen Hanson 
2353d9638e54Smws 	if (cstate == FMD_CASE_CLOSED)
2354d9638e54Smws 		fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
2355d9638e54Smws 	else
2356d9638e54Smws 		fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
2357d9638e54Smws 
23587c478bd9Sstevel@tonic-gate 	return (0);
23597c478bd9Sstevel@tonic-gate }
23607c478bd9Sstevel@tonic-gate 
23617c478bd9Sstevel@tonic-gate int
23627c478bd9Sstevel@tonic-gate fmd_case_contains(fmd_case_t *cp, fmd_event_t *ep)
23637c478bd9Sstevel@tonic-gate {
23647c478bd9Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
23657c478bd9Sstevel@tonic-gate 	fmd_case_item_t *cit;
23667c478bd9Sstevel@tonic-gate 	uint_t state;
23677c478bd9Sstevel@tonic-gate 	int rv = 0;
23687c478bd9Sstevel@tonic-gate 
23697c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_lock(&cip->ci_lock);
23707c478bd9Sstevel@tonic-gate 
23717c478bd9Sstevel@tonic-gate 	if (cip->ci_state >= FMD_CASE_SOLVED)
23727c478bd9Sstevel@tonic-gate 		state = FMD_EVS_DIAGNOSED;
23737c478bd9Sstevel@tonic-gate 	else
23747c478bd9Sstevel@tonic-gate 		state = FMD_EVS_ACCEPTED;
23757c478bd9Sstevel@tonic-gate 
23767c478bd9Sstevel@tonic-gate 	for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
23777c478bd9Sstevel@tonic-gate 		if ((rv = fmd_event_equal(ep, cit->cit_event)) != 0)
23787c478bd9Sstevel@tonic-gate 			break;
23797c478bd9Sstevel@tonic-gate 	}
23807c478bd9Sstevel@tonic-gate 
23817c478bd9Sstevel@tonic-gate 	if (rv == 0 && cip->ci_principal != NULL)
23827c478bd9Sstevel@tonic-gate 		rv = fmd_event_equal(ep, cip->ci_principal);
23837c478bd9Sstevel@tonic-gate 
23847c478bd9Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&cip->ci_lock);
23857c478bd9Sstevel@tonic-gate 
23867c478bd9Sstevel@tonic-gate 	if (rv != 0)
23877c478bd9Sstevel@tonic-gate 		fmd_event_transition(ep, state);
23887c478bd9Sstevel@tonic-gate 
23897c478bd9Sstevel@tonic-gate 	return (rv);
23907c478bd9Sstevel@tonic-gate }
2391d9638e54Smws 
2392d9638e54Smws int
2393d9638e54Smws fmd_case_orphaned(fmd_case_t *cp)
2394d9638e54Smws {
2395d9638e54Smws 	return (((fmd_case_impl_t *)cp)->ci_mod == fmd.d_rmod);
2396d9638e54Smws }
239744743693Sstephh 
239844743693Sstephh void
239944743693Sstephh fmd_case_settime(fmd_case_t *cp, time_t tv_sec, suseconds_t tv_usec)
240044743693Sstephh {
240144743693Sstephh 	((fmd_case_impl_t *)cp)->ci_tv.tv_sec = tv_sec;
240244743693Sstephh 	((fmd_case_impl_t *)cp)->ci_tv.tv_usec = tv_usec;
240344743693Sstephh 	((fmd_case_impl_t *)cp)->ci_tv_valid = 1;
240444743693Sstephh }
240525c6ff4bSstephh 
2406540db9a9SStephen Hanson void
2407540db9a9SStephen Hanson fmd_case_set_injected(fmd_case_t *cp)
2408540db9a9SStephen Hanson {
2409540db9a9SStephen Hanson 	((fmd_case_impl_t *)cp)->ci_injected = 1;
2410540db9a9SStephen Hanson }
2411540db9a9SStephen Hanson 
2412cbf75e67SStephen Hanson void
2413cbf75e67SStephen Hanson fmd_case_set_de_fmri(fmd_case_t *cp, nvlist_t *nvl)
2414cbf75e67SStephen Hanson {
2415cbf75e67SStephen Hanson 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2416cbf75e67SStephen Hanson 
2417cbf75e67SStephen Hanson 	if (cip->ci_diag_de)
2418cbf75e67SStephen Hanson 		nvlist_free(cip->ci_diag_de);
2419cbf75e67SStephen Hanson 	cip->ci_diag_de = nvl;
2420cbf75e67SStephen Hanson }
2421cbf75e67SStephen Hanson 
2422cbf75e67SStephen Hanson void
2423cbf75e67SStephen Hanson fmd_case_setcode(fmd_case_t *cp, char *code)
2424cbf75e67SStephen Hanson {
2425cbf75e67SStephen Hanson 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2426cbf75e67SStephen Hanson 
2427cbf75e67SStephen Hanson 	cip->ci_code = fmd_strdup(code, FMD_SLEEP);
2428cbf75e67SStephen Hanson 	cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
2429cbf75e67SStephen Hanson }
2430cbf75e67SStephen Hanson 
243125c6ff4bSstephh /*ARGSUSED*/
24325750ef5cSStephen Hanson static void
243325c6ff4bSstephh fmd_case_repair_replay_case(fmd_case_t *cp, void *arg)
243425c6ff4bSstephh {
243525c6ff4bSstephh 	int not_faulty = 0;
243625c6ff4bSstephh 	int faulty = 0;
243725c6ff4bSstephh 	nvlist_t *nvl;
243825c6ff4bSstephh 	fmd_event_t *e;
243925c6ff4bSstephh 	char *class;
244025c6ff4bSstephh 	int any_unusable_and_present = 0;
244125c6ff4bSstephh 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
244225c6ff4bSstephh 
2443cbf75e67SStephen Hanson 	if (cip->ci_state < FMD_CASE_SOLVED || cip->ci_xprt != NULL)
244425c6ff4bSstephh 		return;
244525c6ff4bSstephh 
24465750ef5cSStephen Hanson 	if (cip->ci_state == FMD_CASE_RESOLVED) {
24475750ef5cSStephen Hanson 		cip->ci_flags |= FMD_CF_RES_CMPL;
24485750ef5cSStephen Hanson 		return;
24495750ef5cSStephen Hanson 	}
24505750ef5cSStephen Hanson 
245125c6ff4bSstephh 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
245225c6ff4bSstephh 	fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_not_faulty,
245325c6ff4bSstephh 	    &not_faulty);
245425c6ff4bSstephh 
2455c7d6cfd6SStephen Hanson 	if (cip->ci_state >= FMD_CASE_REPAIRED && !faulty) {
245625c6ff4bSstephh 		/*
245725c6ff4bSstephh 		 * If none of the suspects is faulty, replay the list.repaired.
245825c6ff4bSstephh 		 * If all suspects are already either usable or not present then
245925c6ff4bSstephh 		 * also transition straight to RESOLVED state.
246025c6ff4bSstephh 		 */
246125c6ff4bSstephh 		fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
246225c6ff4bSstephh 		    fmd_case_unusable_and_present, &any_unusable_and_present);
246325c6ff4bSstephh 		if (!any_unusable_and_present) {
246425c6ff4bSstephh 			cip->ci_state = FMD_CASE_RESOLVED;
246525c6ff4bSstephh 
2466cbf75e67SStephen Hanson 			TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
2467cbf75e67SStephen Hanson 			    cip->ci_uuid));
246825c6ff4bSstephh 			nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
246925c6ff4bSstephh 			(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
247025c6ff4bSstephh 			e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
247125c6ff4bSstephh 			    class);
247225c6ff4bSstephh 			fmd_dispq_dispatch(fmd.d_disp, e, class);
247325c6ff4bSstephh 
2474cbf75e67SStephen Hanson 			TRACE((FMD_DBG_CASE, "replay sending list.resolved %s",
2475cbf75e67SStephen Hanson 			    cip->ci_uuid));
247625c6ff4bSstephh 			fmd_case_publish(cp, FMD_CASE_RESOLVED);
2477540db9a9SStephen Hanson 			fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
2478540db9a9SStephen Hanson 			    fmd_asru_log_resolved, NULL);
24795750ef5cSStephen Hanson 			cip->ci_flags |= FMD_CF_RES_CMPL;
248025c6ff4bSstephh 		} else {
2481cbf75e67SStephen Hanson 			TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
2482cbf75e67SStephen Hanson 			    cip->ci_uuid));
248325c6ff4bSstephh 			nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
248425c6ff4bSstephh 			(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
248525c6ff4bSstephh 			e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
248625c6ff4bSstephh 			    class);
248725c6ff4bSstephh 			fmd_dispq_dispatch(fmd.d_disp, e, class);
248825c6ff4bSstephh 		}
2489c7d6cfd6SStephen Hanson 	} else if (faulty && not_faulty) {
249025c6ff4bSstephh 		/*
249125c6ff4bSstephh 		 * if some but not all of the suspects are not faulty, replay
249225c6ff4bSstephh 		 * the list.updated.
249325c6ff4bSstephh 		 */
2494cbf75e67SStephen Hanson 		TRACE((FMD_DBG_CASE, "replay sending list.updated %s",
2495cbf75e67SStephen Hanson 		    cip->ci_uuid));
249625c6ff4bSstephh 		nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
249725c6ff4bSstephh 		(void) nvlist_lookup_string(nvl, FM_CLASS, &class);
249825c6ff4bSstephh 		e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
249925c6ff4bSstephh 		fmd_dispq_dispatch(fmd.d_disp, e, class);
250025c6ff4bSstephh 	}
250125c6ff4bSstephh }
250225c6ff4bSstephh 
250325c6ff4bSstephh void
250425c6ff4bSstephh fmd_case_repair_replay()
250525c6ff4bSstephh {
250625c6ff4bSstephh 	fmd_case_hash_apply(fmd.d_cases, fmd_case_repair_replay_case, NULL);
250725c6ff4bSstephh }
2508