1*f6e214c7SGavin Maltby /*
2*f6e214c7SGavin Maltby  * CDDL HEADER START
3*f6e214c7SGavin Maltby  *
4*f6e214c7SGavin Maltby  * The contents of this file are subject to the terms of the
5*f6e214c7SGavin Maltby  * Common Development and Distribution License (the "License").
6*f6e214c7SGavin Maltby  * You may not use this file except in compliance with the License.
7*f6e214c7SGavin Maltby  *
8*f6e214c7SGavin Maltby  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*f6e214c7SGavin Maltby  * or http://www.opensolaris.org/os/licensing.
10*f6e214c7SGavin Maltby  * See the License for the specific language governing permissions
11*f6e214c7SGavin Maltby  * and limitations under the License.
12*f6e214c7SGavin Maltby  *
13*f6e214c7SGavin Maltby  * When distributing Covered Code, include this CDDL HEADER in each
14*f6e214c7SGavin Maltby  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*f6e214c7SGavin Maltby  * If applicable, add the following below this CDDL HEADER, with the
16*f6e214c7SGavin Maltby  * fields enclosed by brackets "[]" replaced with your own identifying
17*f6e214c7SGavin Maltby  * information: Portions Copyright [yyyy] [name of copyright owner]
18*f6e214c7SGavin Maltby  *
19*f6e214c7SGavin Maltby  * CDDL HEADER END
20*f6e214c7SGavin Maltby  */
21*f6e214c7SGavin Maltby 
22*f6e214c7SGavin Maltby /*
23*f6e214c7SGavin Maltby  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24*f6e214c7SGavin Maltby  */
25*f6e214c7SGavin Maltby 
26*f6e214c7SGavin Maltby /*
27*f6e214c7SGavin Maltby  * SMF software-response subsidiary
28*f6e214c7SGavin Maltby  */
29*f6e214c7SGavin Maltby 
30*f6e214c7SGavin Maltby #include <strings.h>
31*f6e214c7SGavin Maltby #include <fm/libtopo.h>
32*f6e214c7SGavin Maltby #include <libscf.h>
33*f6e214c7SGavin Maltby #include <sys/fm/protocol.h>
34*f6e214c7SGavin Maltby #include <fm/fmd_fmri.h>
35*f6e214c7SGavin Maltby 
36*f6e214c7SGavin Maltby #include "../../common/sw.h"
37*f6e214c7SGavin Maltby #include "smf.h"
38*f6e214c7SGavin Maltby 
39*f6e214c7SGavin Maltby static struct {
40*f6e214c7SGavin Maltby 	fmd_stat_t swrp_smf_repairs;
41*f6e214c7SGavin Maltby 	fmd_stat_t swrp_smf_clears;
42*f6e214c7SGavin Maltby 	fmd_stat_t swrp_smf_closed;
43*f6e214c7SGavin Maltby 	fmd_stat_t swrp_smf_wrongclass;
44*f6e214c7SGavin Maltby 	fmd_stat_t swrp_smf_badlist;
45*f6e214c7SGavin Maltby 	fmd_stat_t swrp_smf_badresource;
46*f6e214c7SGavin Maltby 	fmd_stat_t swrp_smf_badclrevent;
47*f6e214c7SGavin Maltby 	fmd_stat_t swrp_smf_noloop;
48*f6e214c7SGavin Maltby 	fmd_stat_t swrp_smf_suppressed;
49*f6e214c7SGavin Maltby 	fmd_stat_t swrp_smf_cachefull;
50*f6e214c7SGavin Maltby } swrp_smf_stats = {
51*f6e214c7SGavin Maltby 	{ "swrp_smf_repairs", FMD_TYPE_UINT64,
52*f6e214c7SGavin Maltby 	    "repair events received for propogation to SMF" },
53*f6e214c7SGavin Maltby 	{ "swrp_smf_clears", FMD_TYPE_UINT64,
54*f6e214c7SGavin Maltby 	    "notifications from SMF of exiting maint state" },
55*f6e214c7SGavin Maltby 	{ "swrp_smf_closed", FMD_TYPE_UINT64,
56*f6e214c7SGavin Maltby 	    "cases closed" },
57*f6e214c7SGavin Maltby 	{ "swrp_smf_wrongclass", FMD_TYPE_UINT64,
58*f6e214c7SGavin Maltby 	    "unexpected event class received" },
59*f6e214c7SGavin Maltby 	{ "swrp_smf_badlist", FMD_TYPE_UINT64,
60*f6e214c7SGavin Maltby 	    "list event with invalid structure" },
61*f6e214c7SGavin Maltby 	{ "swrp_smf_badresource", FMD_TYPE_UINT64,
62*f6e214c7SGavin Maltby 	    "list.repaired with smf fault but bad svc fmri" },
63*f6e214c7SGavin Maltby 	{ "swrp_smf_badclrevent", FMD_TYPE_UINT64,
64*f6e214c7SGavin Maltby 	    "maint clear event from SMF malformed" },
65*f6e214c7SGavin Maltby 	{ "swrp_smf_noloop", FMD_TYPE_UINT64,
66*f6e214c7SGavin Maltby 	    "avoidance of smf->fmd->smf repairs propogations" },
67*f6e214c7SGavin Maltby 	{ "swrp_smf_suppressed", FMD_TYPE_UINT64,
68*f6e214c7SGavin Maltby 	    "not propogated to smf because no longer in maint" },
69*f6e214c7SGavin Maltby 	{ "swrp_smf_cachefull", FMD_TYPE_UINT64,
70*f6e214c7SGavin Maltby 	    "uuid cache full" },
71*f6e214c7SGavin Maltby };
72*f6e214c7SGavin Maltby 
73*f6e214c7SGavin Maltby #define	BUMPSTAT(stat)		swrp_smf_stats.stat.fmds_value.ui64++
74*f6e214c7SGavin Maltby 
75*f6e214c7SGavin Maltby #define	CACHE_NENT_INC		16
76*f6e214c7SGavin Maltby #define	CACHE_NENT_MAX		128
77*f6e214c7SGavin Maltby 
78*f6e214c7SGavin Maltby struct smf_uuid_cache_ent {
79*f6e214c7SGavin Maltby 	char uuid[37];
80*f6e214c7SGavin Maltby 	char fmristr[90];
81*f6e214c7SGavin Maltby 	uint8_t mark;
82*f6e214c7SGavin Maltby };
83*f6e214c7SGavin Maltby 
84*f6e214c7SGavin Maltby #define	CACHE_VERSION		1
85*f6e214c7SGavin Maltby 
86*f6e214c7SGavin Maltby struct smf_uuid_cache {
87*f6e214c7SGavin Maltby 	uint32_t version;			/* Version */
88*f6e214c7SGavin Maltby 	uint32_t nentries;			/* Real size of array below */
89*f6e214c7SGavin Maltby 	struct smf_uuid_cache_ent entry[1];	/* Cache entries */
90*f6e214c7SGavin Maltby };
91*f6e214c7SGavin Maltby 
92*f6e214c7SGavin Maltby static struct smf_uuid_cache *uuid_cache;
93*f6e214c7SGavin Maltby 
94*f6e214c7SGavin Maltby #define	UUID_CACHE_BUFNAME	"uuid_cache"
95*f6e214c7SGavin Maltby 
96*f6e214c7SGavin Maltby static void
uuid_cache_grow(fmd_hdl_t * hdl)97*f6e214c7SGavin Maltby uuid_cache_grow(fmd_hdl_t *hdl)
98*f6e214c7SGavin Maltby {
99*f6e214c7SGavin Maltby 	struct smf_uuid_cache *newcache;
100*f6e214c7SGavin Maltby 	size_t newsz;
101*f6e214c7SGavin Maltby 	uint32_t n;
102*f6e214c7SGavin Maltby 
103*f6e214c7SGavin Maltby 	n = (uuid_cache == NULL ? 0 : uuid_cache->nentries) + CACHE_NENT_INC;
104*f6e214c7SGavin Maltby 	newsz = sizeof (struct smf_uuid_cache) + (n - 1) *
105*f6e214c7SGavin Maltby 	    sizeof (struct smf_uuid_cache_ent);
106*f6e214c7SGavin Maltby 
107*f6e214c7SGavin Maltby 	newcache = fmd_hdl_zalloc(hdl, newsz, FMD_SLEEP);
108*f6e214c7SGavin Maltby 	newcache->version = CACHE_VERSION;
109*f6e214c7SGavin Maltby 	newcache->nentries = n;
110*f6e214c7SGavin Maltby 
111*f6e214c7SGavin Maltby 	if (uuid_cache != NULL) {
112*f6e214c7SGavin Maltby 		uint32_t oldn = uuid_cache->nentries;
113*f6e214c7SGavin Maltby 		size_t oldsz = sizeof (struct smf_uuid_cache) +
114*f6e214c7SGavin Maltby 		    (oldn - 1) * sizeof (struct smf_uuid_cache_ent);
115*f6e214c7SGavin Maltby 
116*f6e214c7SGavin Maltby 		bcopy(&uuid_cache->entry[0], &newcache->entry[0], oldsz);
117*f6e214c7SGavin Maltby 		fmd_hdl_free(hdl, uuid_cache, oldsz);
118*f6e214c7SGavin Maltby 		fmd_buf_destroy(hdl, NULL, UUID_CACHE_BUFNAME);
119*f6e214c7SGavin Maltby 	}
120*f6e214c7SGavin Maltby 
121*f6e214c7SGavin Maltby 	uuid_cache = newcache;
122*f6e214c7SGavin Maltby 	fmd_buf_create(hdl, NULL, UUID_CACHE_BUFNAME, newsz);
123*f6e214c7SGavin Maltby }
124*f6e214c7SGavin Maltby 
125*f6e214c7SGavin Maltby static void
uuid_cache_persist(fmd_hdl_t * hdl)126*f6e214c7SGavin Maltby uuid_cache_persist(fmd_hdl_t *hdl)
127*f6e214c7SGavin Maltby {
128*f6e214c7SGavin Maltby 	size_t sz = sizeof (struct smf_uuid_cache) +
129*f6e214c7SGavin Maltby 	    (uuid_cache->nentries - 1) * sizeof (struct smf_uuid_cache_ent);
130*f6e214c7SGavin Maltby 
131*f6e214c7SGavin Maltby 	fmd_buf_write(hdl, NULL, UUID_CACHE_BUFNAME, uuid_cache, sz);
132*f6e214c7SGavin Maltby }
133*f6e214c7SGavin Maltby 
134*f6e214c7SGavin Maltby /*
135*f6e214c7SGavin Maltby  * Garbage-collect the uuid cache.  Any cases that are already resolved
136*f6e214c7SGavin Maltby  * we do not need an entry for.  If a case is not resolved but the
137*f6e214c7SGavin Maltby  * service involved in that case is no longer in maintenance state
138*f6e214c7SGavin Maltby  * then we've lost sync somehow, so repair the asru (which will
139*f6e214c7SGavin Maltby  * also resolve the case).
140*f6e214c7SGavin Maltby  */
141*f6e214c7SGavin Maltby static void
uuid_cache_gc(fmd_hdl_t * hdl)142*f6e214c7SGavin Maltby uuid_cache_gc(fmd_hdl_t *hdl)
143*f6e214c7SGavin Maltby {
144*f6e214c7SGavin Maltby 	struct smf_uuid_cache_ent *entp;
145*f6e214c7SGavin Maltby 	topo_hdl_t *thp = NULL;
146*f6e214c7SGavin Maltby 	nvlist_t *svcfmri;
147*f6e214c7SGavin Maltby 	char *svcname;
148*f6e214c7SGavin Maltby 	int err, i;
149*f6e214c7SGavin Maltby 
150*f6e214c7SGavin Maltby 	for (i = 0; i < uuid_cache->nentries; i++) {
151*f6e214c7SGavin Maltby 		entp = &uuid_cache->entry[i];
152*f6e214c7SGavin Maltby 
153*f6e214c7SGavin Maltby 		if (entp->uuid[0] == '\0')
154*f6e214c7SGavin Maltby 			continue;
155*f6e214c7SGavin Maltby 
156*f6e214c7SGavin Maltby 		if (fmd_case_uuisresolved(hdl, entp->uuid)) {
157*f6e214c7SGavin Maltby 			bzero(entp->uuid, sizeof (entp->uuid));
158*f6e214c7SGavin Maltby 			bzero(entp->fmristr, sizeof (entp->fmristr));
159*f6e214c7SGavin Maltby 			entp->mark = 0;
160*f6e214c7SGavin Maltby 		} else {
161*f6e214c7SGavin Maltby 			if (thp == NULL)
162*f6e214c7SGavin Maltby 				thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
163*f6e214c7SGavin Maltby 
164*f6e214c7SGavin Maltby 			if (topo_fmri_str2nvl(thp, entp->fmristr, &svcfmri,
165*f6e214c7SGavin Maltby 			    &err) != 0) {
166*f6e214c7SGavin Maltby 				fmd_hdl_error(hdl, "str2nvl failed for %s\n",
167*f6e214c7SGavin Maltby 				    entp->fmristr);
168*f6e214c7SGavin Maltby 				continue;
169*f6e214c7SGavin Maltby 			}
170*f6e214c7SGavin Maltby 
171*f6e214c7SGavin Maltby 			if (fmd_nvl_fmri_service_state(hdl, svcfmri) !=
172*f6e214c7SGavin Maltby 			    FMD_SERVICE_STATE_UNUSABLE) {
173*f6e214c7SGavin Maltby 				svcname = sw_smf_svcfmri2shortstr(hdl, svcfmri);
174*f6e214c7SGavin Maltby 				(void) fmd_repair_asru(hdl, entp->fmristr);
175*f6e214c7SGavin Maltby 				fmd_hdl_strfree(hdl, svcname);
176*f6e214c7SGavin Maltby 			}
177*f6e214c7SGavin Maltby 
178*f6e214c7SGavin Maltby 			nvlist_free(svcfmri);
179*f6e214c7SGavin Maltby 		}
180*f6e214c7SGavin Maltby 	}
181*f6e214c7SGavin Maltby 
182*f6e214c7SGavin Maltby 	if (thp)
183*f6e214c7SGavin Maltby 		fmd_hdl_topo_rele(hdl, thp);
184*f6e214c7SGavin Maltby 
185*f6e214c7SGavin Maltby 	uuid_cache_persist(hdl);
186*f6e214c7SGavin Maltby }
187*f6e214c7SGavin Maltby 
188*f6e214c7SGavin Maltby static void
uuid_cache_restore(fmd_hdl_t * hdl)189*f6e214c7SGavin Maltby uuid_cache_restore(fmd_hdl_t *hdl)
190*f6e214c7SGavin Maltby {
191*f6e214c7SGavin Maltby 	size_t sz = fmd_buf_size(hdl, NULL, UUID_CACHE_BUFNAME);
192*f6e214c7SGavin Maltby 
193*f6e214c7SGavin Maltby 	if (sz == 0)
194*f6e214c7SGavin Maltby 		return;
195*f6e214c7SGavin Maltby 
196*f6e214c7SGavin Maltby 	uuid_cache = fmd_hdl_alloc(hdl, sz, FMD_SLEEP);
197*f6e214c7SGavin Maltby 	fmd_buf_read(hdl, NULL, UUID_CACHE_BUFNAME, uuid_cache, sz);
198*f6e214c7SGavin Maltby 
199*f6e214c7SGavin Maltby 	/*
200*f6e214c7SGavin Maltby 	 * Garbage collect now, not just for tidiness but also to help
201*f6e214c7SGavin Maltby 	 * fmd and smf state stay in sync at module startup.
202*f6e214c7SGavin Maltby 	 */
203*f6e214c7SGavin Maltby 	uuid_cache_gc(hdl);
204*f6e214c7SGavin Maltby }
205*f6e214c7SGavin Maltby 
206*f6e214c7SGavin Maltby /*
207*f6e214c7SGavin Maltby  * Add the UUID of an SMF maintenance defect case to our cache and
208*f6e214c7SGavin Maltby  * record the associated full svc FMRI string for the case.
209*f6e214c7SGavin Maltby  */
210*f6e214c7SGavin Maltby static void
swrp_smf_cache_add(fmd_hdl_t * hdl,char * uuid,char * fmristr)211*f6e214c7SGavin Maltby swrp_smf_cache_add(fmd_hdl_t *hdl, char *uuid, char *fmristr)
212*f6e214c7SGavin Maltby {
213*f6e214c7SGavin Maltby 	struct smf_uuid_cache_ent *entp = NULL;
214*f6e214c7SGavin Maltby 	int gced = 0;
215*f6e214c7SGavin Maltby 	int i;
216*f6e214c7SGavin Maltby 
217*f6e214c7SGavin Maltby 	if (uuid_cache == NULL)
218*f6e214c7SGavin Maltby 		uuid_cache_grow(hdl);
219*f6e214c7SGavin Maltby 
220*f6e214c7SGavin Maltby 	/*
221*f6e214c7SGavin Maltby 	 * If we somehow already have an entry for this uuid then
222*f6e214c7SGavin Maltby 	 * return leaving it undisturbed.
223*f6e214c7SGavin Maltby 	 */
224*f6e214c7SGavin Maltby 	for (i = 0; i < uuid_cache->nentries; i++) {
225*f6e214c7SGavin Maltby 		if (strcmp(uuid, uuid_cache->entry[i].uuid) == 0)
226*f6e214c7SGavin Maltby 			return;
227*f6e214c7SGavin Maltby 	}
228*f6e214c7SGavin Maltby 
229*f6e214c7SGavin Maltby scan:
230*f6e214c7SGavin Maltby 	for (i = 0; i < uuid_cache->nentries; i++) {
231*f6e214c7SGavin Maltby 		if (uuid_cache->entry[i].uuid[0] == '\0') {
232*f6e214c7SGavin Maltby 			entp = &uuid_cache->entry[i];
233*f6e214c7SGavin Maltby 			break;
234*f6e214c7SGavin Maltby 		}
235*f6e214c7SGavin Maltby 	}
236*f6e214c7SGavin Maltby 
237*f6e214c7SGavin Maltby 	if (entp == NULL) {
238*f6e214c7SGavin Maltby 		uint32_t oldn = uuid_cache->nentries;
239*f6e214c7SGavin Maltby 
240*f6e214c7SGavin Maltby 		/*
241*f6e214c7SGavin Maltby 		 * Before growing the cache we try again after first
242*f6e214c7SGavin Maltby 		 * garbage-collecting the existing cache for any cases
243*f6e214c7SGavin Maltby 		 * that are confirmed as resolved.
244*f6e214c7SGavin Maltby 		 */
245*f6e214c7SGavin Maltby 		if (!gced) {
246*f6e214c7SGavin Maltby 			uuid_cache_gc(hdl);
247*f6e214c7SGavin Maltby 			gced = 1;
248*f6e214c7SGavin Maltby 			goto scan;
249*f6e214c7SGavin Maltby 		}
250*f6e214c7SGavin Maltby 
251*f6e214c7SGavin Maltby 		if (oldn < CACHE_NENT_MAX) {
252*f6e214c7SGavin Maltby 			uuid_cache_grow(hdl);
253*f6e214c7SGavin Maltby 			entp = &uuid_cache->entry[oldn];
254*f6e214c7SGavin Maltby 		} else {
255*f6e214c7SGavin Maltby 			BUMPSTAT(swrp_smf_cachefull);
256*f6e214c7SGavin Maltby 			return;
257*f6e214c7SGavin Maltby 		}
258*f6e214c7SGavin Maltby 	}
259*f6e214c7SGavin Maltby 
260*f6e214c7SGavin Maltby 	(void) strncpy(entp->uuid, uuid, sizeof (entp->uuid));
261*f6e214c7SGavin Maltby 	(void) strncpy(entp->fmristr, fmristr, sizeof (entp->fmristr));
262*f6e214c7SGavin Maltby 	uuid_cache_persist(hdl);
263*f6e214c7SGavin Maltby }
264*f6e214c7SGavin Maltby 
265*f6e214c7SGavin Maltby /*
266*f6e214c7SGavin Maltby  * Mark cache entry/entries as resolved - if they match in either uuid
267*f6e214c7SGavin Maltby  * (if not NULL) or fmristr (if not NULL) mark as resolved.  Return 1 iff
268*f6e214c7SGavin Maltby  * an entry that matched on uuid was already marked, otherwise (entry
269*f6e214c7SGavin Maltby  * matched on either, matched on uuid but not marked, not found).
270*f6e214c7SGavin Maltby  */
271*f6e214c7SGavin Maltby static int
swrp_smf_cache_mark(fmd_hdl_t * hdl,char * uuid,char * fmristr)272*f6e214c7SGavin Maltby swrp_smf_cache_mark(fmd_hdl_t *hdl, char *uuid, char *fmristr)
273*f6e214c7SGavin Maltby {
274*f6e214c7SGavin Maltby 	int dirty = 0;
275*f6e214c7SGavin Maltby 	int rv = 0;
276*f6e214c7SGavin Maltby 	int i;
277*f6e214c7SGavin Maltby 
278*f6e214c7SGavin Maltby 	if (uuid_cache == NULL)
279*f6e214c7SGavin Maltby 		return (0);
280*f6e214c7SGavin Maltby 
281*f6e214c7SGavin Maltby 	for (i = 0; i < uuid_cache->nentries; i++) {
282*f6e214c7SGavin Maltby 		struct smf_uuid_cache_ent *entp = &uuid_cache->entry[i];
283*f6e214c7SGavin Maltby 
284*f6e214c7SGavin Maltby 		if (entp->uuid[0] == '\0')
285*f6e214c7SGavin Maltby 			continue;
286*f6e214c7SGavin Maltby 
287*f6e214c7SGavin Maltby 		if (uuid && strcmp(uuid, entp->uuid) == 0) {
288*f6e214c7SGavin Maltby 			if (entp->mark)
289*f6e214c7SGavin Maltby 				rv = 1;
290*f6e214c7SGavin Maltby 			entp->mark = 1;
291*f6e214c7SGavin Maltby 			dirty++;
292*f6e214c7SGavin Maltby 		} else if (fmristr && strcmp(fmristr, entp->fmristr) == 0) {
293*f6e214c7SGavin Maltby 			entp->mark = 1;
294*f6e214c7SGavin Maltby 			dirty++;
295*f6e214c7SGavin Maltby 		}
296*f6e214c7SGavin Maltby 	}
297*f6e214c7SGavin Maltby 
298*f6e214c7SGavin Maltby 	if (dirty)
299*f6e214c7SGavin Maltby 		uuid_cache_persist(hdl);
300*f6e214c7SGavin Maltby 
301*f6e214c7SGavin Maltby 	return (rv);
302*f6e214c7SGavin Maltby }
303*f6e214c7SGavin Maltby 
304*f6e214c7SGavin Maltby /*
305*f6e214c7SGavin Maltby  * We will receive list events for cases we are not interested in.  Test
306*f6e214c7SGavin Maltby  * that this list has exactly one suspect and that it matches the maintenance
307*f6e214c7SGavin Maltby  * defect.  Return the defect to the caller in the second argument,
308*f6e214c7SGavin Maltby  * and the defect resource element in the third arg.
309*f6e214c7SGavin Maltby  */
310*f6e214c7SGavin Maltby static int
suspect_is_maint_defect(fmd_hdl_t * hdl,nvlist_t * nvl,nvlist_t ** defectnvl,nvlist_t ** rsrcnvl)311*f6e214c7SGavin Maltby suspect_is_maint_defect(fmd_hdl_t *hdl, nvlist_t *nvl,
312*f6e214c7SGavin Maltby     nvlist_t **defectnvl, nvlist_t **rsrcnvl)
313*f6e214c7SGavin Maltby {
314*f6e214c7SGavin Maltby 	nvlist_t **faults;
315*f6e214c7SGavin Maltby 	uint_t nfaults;
316*f6e214c7SGavin Maltby 
317*f6e214c7SGavin Maltby 	if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
318*f6e214c7SGavin Maltby 	    &faults, &nfaults) != 0) {
319*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_badlist);
320*f6e214c7SGavin Maltby 		return (0);
321*f6e214c7SGavin Maltby 	}
322*f6e214c7SGavin Maltby 
323*f6e214c7SGavin Maltby 	if (nfaults != 1 ||
324*f6e214c7SGavin Maltby 	    !fmd_nvl_class_match(hdl, faults[0], SW_SMF_MAINT_DEFECT))
325*f6e214c7SGavin Maltby 		return (0);
326*f6e214c7SGavin Maltby 
327*f6e214c7SGavin Maltby 	if (nvlist_lookup_nvlist(faults[0], FM_FAULT_RESOURCE, rsrcnvl) != 0) {
328*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_badlist);
329*f6e214c7SGavin Maltby 		return (0);
330*f6e214c7SGavin Maltby 	}
331*f6e214c7SGavin Maltby 
332*f6e214c7SGavin Maltby 	*defectnvl = faults[0];
333*f6e214c7SGavin Maltby 
334*f6e214c7SGavin Maltby 	return (1);
335*f6e214c7SGavin Maltby }
336*f6e214c7SGavin Maltby 
337*f6e214c7SGavin Maltby /*
338*f6e214c7SGavin Maltby  * Received newly-diagnosed list.suspect events that are for the
339*f6e214c7SGavin Maltby  * maintenane defect we diagnose.  Close the case (the resource was already
340*f6e214c7SGavin Maltby  * isolated by SMF) after cachng the case UUID.
341*f6e214c7SGavin Maltby  */
342*f6e214c7SGavin Maltby /*ARGSUSED*/
343*f6e214c7SGavin Maltby static void
swrp_smf_cacheuuid(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,void * arg)344*f6e214c7SGavin Maltby swrp_smf_cacheuuid(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
345*f6e214c7SGavin Maltby     const char *class, void *arg)
346*f6e214c7SGavin Maltby {
347*f6e214c7SGavin Maltby 	nvlist_t *defect, *rsrc;
348*f6e214c7SGavin Maltby 	char *fmristr, *uuid;
349*f6e214c7SGavin Maltby 
350*f6e214c7SGavin Maltby 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0) {
351*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_badlist);
352*f6e214c7SGavin Maltby 		return;
353*f6e214c7SGavin Maltby 	}
354*f6e214c7SGavin Maltby 
355*f6e214c7SGavin Maltby 	if (!suspect_is_maint_defect(hdl, nvl, &defect, &rsrc))
356*f6e214c7SGavin Maltby 		return;
357*f6e214c7SGavin Maltby 
358*f6e214c7SGavin Maltby 	if ((fmristr = sw_smf_svcfmri2str(hdl, rsrc)) == NULL) {
359*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_badlist);
360*f6e214c7SGavin Maltby 		return;
361*f6e214c7SGavin Maltby 	}
362*f6e214c7SGavin Maltby 
363*f6e214c7SGavin Maltby 	swrp_smf_cache_add(hdl, uuid, fmristr);
364*f6e214c7SGavin Maltby 	fmd_hdl_strfree(hdl, fmristr);
365*f6e214c7SGavin Maltby 
366*f6e214c7SGavin Maltby 	if (!fmd_case_uuclosed(hdl, uuid)) {
367*f6e214c7SGavin Maltby 		fmd_case_uuclose(hdl, uuid);
368*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_closed);
369*f6e214c7SGavin Maltby 	}
370*f6e214c7SGavin Maltby }
371*f6e214c7SGavin Maltby 
372*f6e214c7SGavin Maltby /*ARGSUSED*/
373*f6e214c7SGavin Maltby static void
swrp_smf2fmd(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,void * arg)374*f6e214c7SGavin Maltby swrp_smf2fmd(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
375*f6e214c7SGavin Maltby     const char *class, void *arg)
376*f6e214c7SGavin Maltby {
377*f6e214c7SGavin Maltby 	nvlist_t *attr, *fmri;
378*f6e214c7SGavin Maltby 	char *fromstate;
379*f6e214c7SGavin Maltby 	char *fmristr;
380*f6e214c7SGavin Maltby 
381*f6e214c7SGavin Maltby 	if (!fmd_nvl_class_match(hdl, nvl, TRANCLASS("*"))) {
382*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_wrongclass);
383*f6e214c7SGavin Maltby 		return;
384*f6e214c7SGavin Maltby 	}
385*f6e214c7SGavin Maltby 
386*f6e214c7SGavin Maltby 	if (nvlist_lookup_nvlist(nvl, FM_IREPORT_ATTRIBUTES, &attr) != 0 ||
387*f6e214c7SGavin Maltby 	    nvlist_lookup_string(attr, "from-state", &fromstate) != 0) {
388*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_badclrevent);
389*f6e214c7SGavin Maltby 		return;
390*f6e214c7SGavin Maltby 	}
391*f6e214c7SGavin Maltby 
392*f6e214c7SGavin Maltby 	/*
393*f6e214c7SGavin Maltby 	 * Filter those not describing a transition out of maintenance.
394*f6e214c7SGavin Maltby 	 */
395*f6e214c7SGavin Maltby 	if (strcmp(fromstate, "maintenance") != 0)
396*f6e214c7SGavin Maltby 		return;
397*f6e214c7SGavin Maltby 
398*f6e214c7SGavin Maltby 	if (nvlist_lookup_nvlist(attr, "svc", &fmri) != 0) {
399*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_badclrevent);
400*f6e214c7SGavin Maltby 		return;
401*f6e214c7SGavin Maltby 	}
402*f6e214c7SGavin Maltby 
403*f6e214c7SGavin Maltby 	if ((fmristr = sw_smf_svcfmri2str(hdl, fmri)) == NULL) {
404*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_badclrevent);
405*f6e214c7SGavin Maltby 		return;
406*f6e214c7SGavin Maltby 	}
407*f6e214c7SGavin Maltby 
408*f6e214c7SGavin Maltby 	/*
409*f6e214c7SGavin Maltby 	 * Mark any UUID for a case against this service as resolved
410*f6e214c7SGavin Maltby 	 * in our cache.  When we fmd_repair_asru below fmd will emit
411*f6e214c7SGavin Maltby 	 * a list.repaired as a result, and our handling of that event
412*f6e214c7SGavin Maltby 	 * must not propogate the repair towards SMF (since the repair
413*f6e214c7SGavin Maltby 	 * was initiated via SMF itself and not via fmadm).
414*f6e214c7SGavin Maltby 	 */
415*f6e214c7SGavin Maltby 	(void) swrp_smf_cache_mark(hdl, NULL, fmristr);
416*f6e214c7SGavin Maltby 
417*f6e214c7SGavin Maltby 	(void) fmd_repair_asru(hdl, fmristr);
418*f6e214c7SGavin Maltby 	fmd_hdl_strfree(hdl, fmristr);
419*f6e214c7SGavin Maltby 	BUMPSTAT(swrp_smf_clears);
420*f6e214c7SGavin Maltby }
421*f6e214c7SGavin Maltby 
422*f6e214c7SGavin Maltby /*ARGSUSED*/
423*f6e214c7SGavin Maltby static void
swrp_fmd2smf(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class,void * arg)424*f6e214c7SGavin Maltby swrp_fmd2smf(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
425*f6e214c7SGavin Maltby     const char *class, void *arg)
426*f6e214c7SGavin Maltby {
427*f6e214c7SGavin Maltby 	char *fmristr, *shrtfmristr;
428*f6e214c7SGavin Maltby 	nvlist_t *defect, *rsrc;
429*f6e214c7SGavin Maltby 	char *uuid;
430*f6e214c7SGavin Maltby 	int already;
431*f6e214c7SGavin Maltby 
432*f6e214c7SGavin Maltby 	if (strcmp(class, FM_LIST_REPAIRED_CLASS) != 0) {
433*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_wrongclass);
434*f6e214c7SGavin Maltby 		return;
435*f6e214c7SGavin Maltby 	}
436*f6e214c7SGavin Maltby 
437*f6e214c7SGavin Maltby 	if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) != 0) {
438*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_badlist);
439*f6e214c7SGavin Maltby 		return;
440*f6e214c7SGavin Maltby 	}
441*f6e214c7SGavin Maltby 
442*f6e214c7SGavin Maltby 	if (!suspect_is_maint_defect(hdl, nvl, &defect, &rsrc))
443*f6e214c7SGavin Maltby 		return;
444*f6e214c7SGavin Maltby 
445*f6e214c7SGavin Maltby 	if ((fmristr = sw_smf_svcfmri2str(hdl, rsrc)) == NULL) {
446*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_badresource);
447*f6e214c7SGavin Maltby 		return;
448*f6e214c7SGavin Maltby 	}
449*f6e214c7SGavin Maltby 
450*f6e214c7SGavin Maltby 	already = swrp_smf_cache_mark(hdl, uuid, fmristr);
451*f6e214c7SGavin Maltby 	fmd_hdl_strfree(hdl, fmristr);
452*f6e214c7SGavin Maltby 
453*f6e214c7SGavin Maltby 	/*
454*f6e214c7SGavin Maltby 	 * If the cache already had a marked entry for this UUID then
455*f6e214c7SGavin Maltby 	 * this is a list.repaired arising from a SMF-initiated maintenance
456*f6e214c7SGavin Maltby 	 * clear (propogated with fmd_repair_asru above which then results
457*f6e214c7SGavin Maltby 	 * in a list.repaired) and so we should not propogate the repair
458*f6e214c7SGavin Maltby 	 * back towards SMF.  But do still force the case to RESOLVED state in
459*f6e214c7SGavin Maltby 	 * case fmd is unable to confirm the service no longer in maintenance
460*f6e214c7SGavin Maltby 	 * state (it may have failed again) so that a new case can be opened.
461*f6e214c7SGavin Maltby 	 */
462*f6e214c7SGavin Maltby 	fmd_case_uuresolved(hdl, uuid);
463*f6e214c7SGavin Maltby 	if (already) {
464*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_noloop);
465*f6e214c7SGavin Maltby 		return;
466*f6e214c7SGavin Maltby 	}
467*f6e214c7SGavin Maltby 
468*f6e214c7SGavin Maltby 	/*
469*f6e214c7SGavin Maltby 	 * Only propogate to SMF if we can see that service still
470*f6e214c7SGavin Maltby 	 * in maintenance state.  We're not synchronized with SMF
471*f6e214c7SGavin Maltby 	 * and this state could change at any time, but if we can
472*f6e214c7SGavin Maltby 	 * see it's not in maintenance state then things are obviously
473*f6e214c7SGavin Maltby 	 * moving (e.g., external svcadm active) so we don't poke
474*f6e214c7SGavin Maltby 	 * at SMF otherwise we confuse things or duplicate operations.
475*f6e214c7SGavin Maltby 	 */
476*f6e214c7SGavin Maltby 
477*f6e214c7SGavin Maltby 	if (fmd_nvl_fmri_service_state(hdl, rsrc) ==
478*f6e214c7SGavin Maltby 	    FMD_SERVICE_STATE_UNUSABLE) {
479*f6e214c7SGavin Maltby 		shrtfmristr = sw_smf_svcfmri2shortstr(hdl, rsrc);
480*f6e214c7SGavin Maltby 
481*f6e214c7SGavin Maltby 		if (shrtfmristr != NULL) {
482*f6e214c7SGavin Maltby 			(void) smf_restore_instance(shrtfmristr);
483*f6e214c7SGavin Maltby 			fmd_hdl_strfree(hdl, shrtfmristr);
484*f6e214c7SGavin Maltby 			BUMPSTAT(swrp_smf_repairs);
485*f6e214c7SGavin Maltby 		} else {
486*f6e214c7SGavin Maltby 			BUMPSTAT(swrp_smf_badresource);
487*f6e214c7SGavin Maltby 		}
488*f6e214c7SGavin Maltby 	} else {
489*f6e214c7SGavin Maltby 		BUMPSTAT(swrp_smf_suppressed);
490*f6e214c7SGavin Maltby 	}
491*f6e214c7SGavin Maltby }
492*f6e214c7SGavin Maltby 
493*f6e214c7SGavin Maltby const struct sw_disp swrp_smf_disp[] = {
494*f6e214c7SGavin Maltby 	{ TRANCLASS("*"), swrp_smf2fmd, NULL },
495*f6e214c7SGavin Maltby 	{ FM_LIST_SUSPECT_CLASS, swrp_smf_cacheuuid, NULL },
496*f6e214c7SGavin Maltby 	{ FM_LIST_REPAIRED_CLASS, swrp_fmd2smf, NULL },
497*f6e214c7SGavin Maltby 	{ NULL, NULL, NULL }
498*f6e214c7SGavin Maltby };
499*f6e214c7SGavin Maltby 
500*f6e214c7SGavin Maltby /*ARGSUSED*/
501*f6e214c7SGavin Maltby int
swrp_smf_init(fmd_hdl_t * hdl,id_t id,const struct sw_disp ** dpp,int * nelemp)502*f6e214c7SGavin Maltby swrp_smf_init(fmd_hdl_t *hdl, id_t id, const struct sw_disp **dpp, int *nelemp)
503*f6e214c7SGavin Maltby {
504*f6e214c7SGavin Maltby 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC, sizeof (swrp_smf_stats) /
505*f6e214c7SGavin Maltby 	    sizeof (fmd_stat_t), (fmd_stat_t *)&swrp_smf_stats);
506*f6e214c7SGavin Maltby 
507*f6e214c7SGavin Maltby 	uuid_cache_restore(hdl);
508*f6e214c7SGavin Maltby 
509*f6e214c7SGavin Maltby 	/*
510*f6e214c7SGavin Maltby 	 * We need to subscribe to all SMF transition class events because
511*f6e214c7SGavin Maltby 	 * we need to look inside the payload to see which events indicate
512*f6e214c7SGavin Maltby 	 * a transition out of maintenance state.
513*f6e214c7SGavin Maltby 	 */
514*f6e214c7SGavin Maltby 	fmd_hdl_subscribe(hdl, TRANCLASS("*"));
515*f6e214c7SGavin Maltby 
516*f6e214c7SGavin Maltby 	/*
517*f6e214c7SGavin Maltby 	 * Subscribe to the defect class diagnosed for maintenance events.
518*f6e214c7SGavin Maltby 	 * The module will then receive list.suspect events including
519*f6e214c7SGavin Maltby 	 * these defects, and in our dispatch table above we list routing
520*f6e214c7SGavin Maltby 	 * for list.suspect.
521*f6e214c7SGavin Maltby 	 */
522*f6e214c7SGavin Maltby 	fmd_hdl_subscribe(hdl, SW_SMF_MAINT_DEFECT);
523*f6e214c7SGavin Maltby 
524*f6e214c7SGavin Maltby 	*dpp = &swrp_smf_disp[0];
525*f6e214c7SGavin Maltby 	*nelemp = sizeof (swrp_smf_disp) / sizeof (swrp_smf_disp[0]);
526*f6e214c7SGavin Maltby 	return (SW_SUB_INIT_SUCCESS);
527*f6e214c7SGavin Maltby }
528*f6e214c7SGavin Maltby 
529*f6e214c7SGavin Maltby /*ARGSUSED*/
530*f6e214c7SGavin Maltby void
swrp_smf_fini(fmd_hdl_t * hdl)531*f6e214c7SGavin Maltby swrp_smf_fini(fmd_hdl_t *hdl)
532*f6e214c7SGavin Maltby {
533*f6e214c7SGavin Maltby }
534*f6e214c7SGavin Maltby 
535*f6e214c7SGavin Maltby const struct sw_subinfo smf_response_info = {
536*f6e214c7SGavin Maltby 	"smf repair",			/* swsub_name */
537*f6e214c7SGavin Maltby 	SW_CASE_NONE,			/* swsub_casetype */
538*f6e214c7SGavin Maltby 	swrp_smf_init,			/* swsub_init */
539*f6e214c7SGavin Maltby 	swrp_smf_fini,			/* swsub_fini */
540*f6e214c7SGavin Maltby 	NULL,				/* swsub_timeout */
541*f6e214c7SGavin Maltby 	NULL,				/* swsub_case_close */
542*f6e214c7SGavin Maltby 	NULL,				/* swsub_case_vrfy */
543*f6e214c7SGavin Maltby };
544