124db4641Seschrock /*
224db4641Seschrock  * CDDL HEADER START
324db4641Seschrock  *
424db4641Seschrock  * The contents of this file are subject to the terms of the
524db4641Seschrock  * Common Development and Distribution License (the "License").
624db4641Seschrock  * You may not use this file except in compliance with the License.
724db4641Seschrock  *
824db4641Seschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
924db4641Seschrock  * or http://www.opensolaris.org/os/licensing.
1024db4641Seschrock  * See the License for the specific language governing permissions
1124db4641Seschrock  * and limitations under the License.
1224db4641Seschrock  *
1324db4641Seschrock  * When distributing Covered Code, include this CDDL HEADER in each
1424db4641Seschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1524db4641Seschrock  * If applicable, add the following below this CDDL HEADER, with the
1624db4641Seschrock  * fields enclosed by brackets "[]" replaced with your own identifying
1724db4641Seschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
1824db4641Seschrock  *
1924db4641Seschrock  * CDDL HEADER END
2024db4641Seschrock  */
2124db4641Seschrock /*
2224db4641Seschrock  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
2324db4641Seschrock  * Use is subject to license terms.
24*0244979bSAlek Pinchuk  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
2524db4641Seschrock  */
2624db4641Seschrock 
2724db4641Seschrock /*
2824db4641Seschrock  * Disk error transport module
2924db4641Seschrock  *
3024db4641Seschrock  * This transport module is responsible for translating between disk errors
3124db4641Seschrock  * and FMA ereports.  It is a read-only transport module, and checks for the
3224db4641Seschrock  * following failures:
3324db4641Seschrock  *
34*0244979bSAlek Pinchuk  *	- overtemp
35*0244979bSAlek Pinchuk  *	- predictive failure
36*0244979bSAlek Pinchuk  *	- self-test failure
37*0244979bSAlek Pinchuk  *	- solid state media wearout
3824db4641Seschrock  *
3924db4641Seschrock  * These failures are detected via the TOPO_METH_DISK_STATUS method, which
4024db4641Seschrock  * leverages libdiskstatus to do the actual analysis.  This transport module is
4124db4641Seschrock  * in charge of the following tasks:
4224db4641Seschrock  *
43*0244979bSAlek Pinchuk  *	- discovering available devices
44*0244979bSAlek Pinchuk  *	- periodically checking devices
45*0244979bSAlek Pinchuk  *	- managing device addition/removal
4624db4641Seschrock  */
4724db4641Seschrock 
4824db4641Seschrock #include <ctype.h>
4924db4641Seschrock #include <fm/fmd_api.h>
5024db4641Seschrock #include <fm/libdiskstatus.h>
5124db4641Seschrock #include <fm/libtopo.h>
5224db4641Seschrock #include <fm/topo_hc.h>
5324db4641Seschrock #include <fm/topo_mod.h>
5424db4641Seschrock #include <limits.h>
5524db4641Seschrock #include <string.h>
5624db4641Seschrock #include <sys/fm/io/scsi.h>
5724db4641Seschrock #include <sys/fm/protocol.h>
5824db4641Seschrock 
5924db4641Seschrock static struct dt_stat {
6024db4641Seschrock 	fmd_stat_t dropped;
6124db4641Seschrock } dt_stats = {
6224db4641Seschrock 	{ "dropped", FMD_TYPE_UINT64, "number of dropped ereports" }
6324db4641Seschrock };
6424db4641Seschrock 
6524db4641Seschrock typedef struct disk_monitor {
6624db4641Seschrock 	fmd_hdl_t	*dm_hdl;
6724db4641Seschrock 	fmd_xprt_t	*dm_xprt;
6824db4641Seschrock 	id_t		dm_timer;
6924db4641Seschrock 	hrtime_t	dm_interval;
7024db4641Seschrock 	char		*dm_sim_search;
7124db4641Seschrock 	char		*dm_sim_file;
7224db4641Seschrock 	boolean_t	dm_timer_istopo;
7324db4641Seschrock } disk_monitor_t;
7424db4641Seschrock 
7524db4641Seschrock static void
dt_post_ereport(fmd_hdl_t * hdl,fmd_xprt_t * xprt,const char * protocol,const char * faultname,uint64_t ena,nvlist_t * detector,nvlist_t * payload)7624db4641Seschrock dt_post_ereport(fmd_hdl_t *hdl, fmd_xprt_t *xprt, const char *protocol,
7724db4641Seschrock     const char *faultname, uint64_t ena, nvlist_t *detector, nvlist_t *payload)
7824db4641Seschrock {
7924db4641Seschrock 	nvlist_t *nvl;
8024db4641Seschrock 	int e = 0;
8124db4641Seschrock 	char fullclass[PATH_MAX];
8224db4641Seschrock 
8324db4641Seschrock 	(void) snprintf(fullclass, sizeof (fullclass), "%s.io.%s.disk.%s",
8424db4641Seschrock 	    FM_EREPORT_CLASS, protocol, faultname);
8524db4641Seschrock 
8624db4641Seschrock 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) == 0) {
8724db4641Seschrock 		e |= nvlist_add_string(nvl, FM_CLASS, fullclass);
8824db4641Seschrock 		e |= nvlist_add_uint8(nvl, FM_VERSION, FM_EREPORT_VERSION);
8924db4641Seschrock 		e |= nvlist_add_uint64(nvl, FM_EREPORT_ENA, ena);
9024db4641Seschrock 		e |= nvlist_add_nvlist(nvl, FM_EREPORT_DETECTOR, detector);
9124db4641Seschrock 		e |= nvlist_merge(nvl, payload, 0);
9224db4641Seschrock 
9324db4641Seschrock 		if (e == 0) {
9424db4641Seschrock 			fmd_xprt_post(hdl, xprt, nvl, 0);
9524db4641Seschrock 		} else {
9624db4641Seschrock 			nvlist_free(nvl);
9724db4641Seschrock 			dt_stats.dropped.fmds_value.ui64++;
9824db4641Seschrock 		}
9924db4641Seschrock 	} else {
10024db4641Seschrock 		dt_stats.dropped.fmds_value.ui64++;
10124db4641Seschrock 	}
10224db4641Seschrock }
10324db4641Seschrock 
10424db4641Seschrock /*
10524db4641Seschrock  * Check a single topo node for failure.  This simply invokes the disk status
10624db4641Seschrock  * method, and generates any ereports as necessary.
10724db4641Seschrock  */
10824db4641Seschrock static int
dt_analyze_disk(topo_hdl_t * thp,tnode_t * node,void * arg)10924db4641Seschrock dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
11024db4641Seschrock {
11124db4641Seschrock 	nvlist_t *result;
11224db4641Seschrock 	nvlist_t *fmri, *faults;
11324db4641Seschrock 	char *protocol;
11424db4641Seschrock 	int err;
11524db4641Seschrock 	disk_monitor_t *dmp = arg;
11624db4641Seschrock 	nvpair_t *elem;
11724db4641Seschrock 	boolean_t fault;
11824db4641Seschrock 	nvlist_t *details;
11924db4641Seschrock 	char *fmristr;
12024db4641Seschrock 	nvlist_t *in = NULL;
12124db4641Seschrock 
12224db4641Seschrock 	if (topo_node_resource(node, &fmri, &err) != 0) {
12324db4641Seschrock 		fmd_hdl_error(dmp->dm_hdl, "failed to get fmri: %s\n",
12424db4641Seschrock 		    topo_strerror(err));
12524db4641Seschrock 		return (TOPO_WALK_ERR);
12624db4641Seschrock 	}
12724db4641Seschrock 
12824db4641Seschrock 	if (topo_hdl_nvalloc(thp, &in, NV_UNIQUE_NAME) != 0) {
12924db4641Seschrock 		nvlist_free(fmri);
13024db4641Seschrock 		return (TOPO_WALK_ERR);
13124db4641Seschrock 	}
13224db4641Seschrock 
13324db4641Seschrock 	if (dmp->dm_sim_search) {
13424db4641Seschrock 		fmristr = NULL;
13524db4641Seschrock 		if (topo_fmri_nvl2str(thp, fmri, &fmristr, &err) == 0 &&
13624db4641Seschrock 		    strstr(fmristr, dmp->dm_sim_search) != 0)
13724db4641Seschrock 			(void) nvlist_add_string(in, "path", dmp->dm_sim_file);
13824db4641Seschrock 		topo_hdl_strfree(thp, fmristr);
13924db4641Seschrock 	}
14024db4641Seschrock 
14124db4641Seschrock 	/*
14224db4641Seschrock 	 * Try to invoke the method.  If this fails (most likely because the
14324db4641Seschrock 	 * method is not supported), then ignore this node.
14424db4641Seschrock 	 */
14524db4641Seschrock 	if (topo_method_invoke(node, TOPO_METH_DISK_STATUS,
14624db4641Seschrock 	    TOPO_METH_DISK_STATUS_VERSION, in, &result, &err) != 0) {
14724db4641Seschrock 		nvlist_free(fmri);
14824db4641Seschrock 		nvlist_free(in);
14924db4641Seschrock 		return (TOPO_WALK_NEXT);
15024db4641Seschrock 	}
15124db4641Seschrock 
15224db4641Seschrock 	nvlist_free(in);
15324db4641Seschrock 
15424db4641Seschrock 	/*
155*0244979bSAlek Pinchuk 	 * Check for faults and post ereport(s) if needed
15624db4641Seschrock 	 */
15724db4641Seschrock 	if (nvlist_lookup_nvlist(result, "faults", &faults) == 0 &&
15824db4641Seschrock 	    nvlist_lookup_string(result, "protocol", &protocol) == 0) {
15924db4641Seschrock 		elem = NULL;
16024db4641Seschrock 		while ((elem = nvlist_next_nvpair(faults, elem)) != NULL) {
16124db4641Seschrock 			if (nvpair_type(elem) != DATA_TYPE_BOOLEAN_VALUE)
16224db4641Seschrock 				continue;
16324db4641Seschrock 
16424db4641Seschrock 			(void) nvpair_value_boolean_value(elem, &fault);
16524db4641Seschrock 			if (!fault ||
16624db4641Seschrock 			    nvlist_lookup_nvlist(result, nvpair_name(elem),
16724db4641Seschrock 			    &details) != 0)
16824db4641Seschrock 				continue;
16924db4641Seschrock 
170*0244979bSAlek Pinchuk 			if (strcmp(nvpair_name(elem),
171*0244979bSAlek Pinchuk 			    FM_EREPORT_SCSI_SSMWEAROUT) == 0 &&
172*0244979bSAlek Pinchuk 			    fmd_prop_get_int32(dmp->dm_hdl,
173*0244979bSAlek Pinchuk 			    "ignore-ssm-wearout") == FMD_B_TRUE)
174*0244979bSAlek Pinchuk 				continue;
175*0244979bSAlek Pinchuk 
17624db4641Seschrock 			dt_post_ereport(dmp->dm_hdl, dmp->dm_xprt, protocol,
177*0244979bSAlek Pinchuk 			    nvpair_name(elem),
178*0244979bSAlek Pinchuk 			    fmd_event_ena_create(dmp->dm_hdl), fmri, details);
17924db4641Seschrock 		}
18024db4641Seschrock 	}
18124db4641Seschrock 
18224db4641Seschrock 	nvlist_free(result);
18324db4641Seschrock 	nvlist_free(fmri);
18424db4641Seschrock 
18524db4641Seschrock 	return (TOPO_WALK_NEXT);
18624db4641Seschrock }
18724db4641Seschrock 
18824db4641Seschrock /*
18924db4641Seschrock  * Periodic timeout.  Iterates over all hc:// topo nodes, calling
19024db4641Seschrock  * dt_analyze_disk() for each one.
19124db4641Seschrock  */
19224db4641Seschrock /*ARGSUSED*/
19324db4641Seschrock static void
dt_timeout(fmd_hdl_t * hdl,id_t id,void * data)19424db4641Seschrock dt_timeout(fmd_hdl_t *hdl, id_t id, void *data)
19524db4641Seschrock {
19624db4641Seschrock 	topo_hdl_t *thp;
19724db4641Seschrock 	topo_walk_t *twp;
19824db4641Seschrock 	int err;
19924db4641Seschrock 	disk_monitor_t *dmp = fmd_hdl_getspecific(hdl);
20024db4641Seschrock 
20124db4641Seschrock 	dmp->dm_hdl = hdl;
20224db4641Seschrock 
20324db4641Seschrock 	thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
20424db4641Seschrock 	if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC, dt_analyze_disk,
20524db4641Seschrock 	    dmp, &err)) == NULL) {
20624db4641Seschrock 		fmd_hdl_topo_rele(hdl, thp);
20724db4641Seschrock 		fmd_hdl_error(hdl, "failed to get topology: %s\n",
20824db4641Seschrock 		    topo_strerror(err));
20924db4641Seschrock 		return;
21024db4641Seschrock 	}
21124db4641Seschrock 
21224db4641Seschrock 	if (topo_walk_step(twp, TOPO_WALK_CHILD) == TOPO_WALK_ERR) {
21324db4641Seschrock 		topo_walk_fini(twp);
21424db4641Seschrock 		fmd_hdl_topo_rele(hdl, thp);
21524db4641Seschrock 		fmd_hdl_error(hdl, "failed to walk topology\n");
21624db4641Seschrock 		return;
21724db4641Seschrock 	}
21824db4641Seschrock 
21924db4641Seschrock 	topo_walk_fini(twp);
22024db4641Seschrock 	fmd_hdl_topo_rele(hdl, thp);
22124db4641Seschrock 
22224db4641Seschrock 	dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL, dmp->dm_interval);
22324db4641Seschrock 	dmp->dm_timer_istopo = B_FALSE;
22424db4641Seschrock }
22524db4641Seschrock 
22624db4641Seschrock /*
22724db4641Seschrock  * Called when the topology may have changed.  We want to examine all disks in
22824db4641Seschrock  * case a new one has been inserted, but we don't want to overwhelm the system
22924db4641Seschrock  * in the event of a flurry of topology changes, as most likely only a small
23024db4641Seschrock  * number of disks are changing.  To avoid this, we set the timer for a small
23124db4641Seschrock  * but non-trivial interval (by default 1 minute), and ignore intervening
23224db4641Seschrock  * changes during this period.  This still gives us a reasonable response time
23324db4641Seschrock  * to newly inserted devices without overwhelming the system if lots of hotplug
23424db4641Seschrock  * activity is going on.
23524db4641Seschrock  */
23624db4641Seschrock /*ARGSUSED*/
23724db4641Seschrock static void
dt_topo_change(fmd_hdl_t * hdl,topo_hdl_t * thp)23824db4641Seschrock dt_topo_change(fmd_hdl_t *hdl, topo_hdl_t *thp)
23924db4641Seschrock {
24024db4641Seschrock 	disk_monitor_t *dmp = fmd_hdl_getspecific(hdl);
24124db4641Seschrock 
24224db4641Seschrock 	if (dmp->dm_timer_istopo)
24324db4641Seschrock 		return;
24424db4641Seschrock 
24524db4641Seschrock 	fmd_timer_remove(hdl, dmp->dm_timer);
24624db4641Seschrock 	dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL,
24724db4641Seschrock 	    fmd_prop_get_int64(hdl, "min-interval"));
24824db4641Seschrock 	dmp->dm_timer_istopo = B_TRUE;
24924db4641Seschrock }
25024db4641Seschrock 
25124db4641Seschrock static const fmd_prop_t fmd_props[] = {
25224db4641Seschrock 	{ "interval", FMD_TYPE_TIME, "1h" },
25324db4641Seschrock 	{ "min-interval", FMD_TYPE_TIME, "1min" },
25424db4641Seschrock 	{ "simulate", FMD_TYPE_STRING, "" },
255*0244979bSAlek Pinchuk 	{ "ignore-ssm-wearout", FMD_TYPE_BOOL, "false"},
25624db4641Seschrock 	{ NULL, 0, NULL }
25724db4641Seschrock };
25824db4641Seschrock 
25924db4641Seschrock static const fmd_hdl_ops_t fmd_ops = {
26024db4641Seschrock 	NULL,			/* fmdo_recv */
26124db4641Seschrock 	dt_timeout,		/* fmdo_timeout */
26224db4641Seschrock 	NULL, 			/* fmdo_close */
26324db4641Seschrock 	NULL,			/* fmdo_stats */
26424db4641Seschrock 	NULL,			/* fmdo_gc */
26524db4641Seschrock 	NULL,			/* fmdo_send */
26624db4641Seschrock 	dt_topo_change,		/* fmdo_topo_change */
26724db4641Seschrock };
26824db4641Seschrock 
26924db4641Seschrock static const fmd_hdl_info_t fmd_info = {
270*0244979bSAlek Pinchuk 	"Disk Transport Agent", "1.1", &fmd_ops, fmd_props
27124db4641Seschrock };
27224db4641Seschrock 
27324db4641Seschrock void
_fmd_init(fmd_hdl_t * hdl)27424db4641Seschrock _fmd_init(fmd_hdl_t *hdl)
27524db4641Seschrock {
27624db4641Seschrock 	disk_monitor_t *dmp;
27724db4641Seschrock 	char *simulate;
27824db4641Seschrock 
27924db4641Seschrock 	if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
28024db4641Seschrock 		return;
28124db4641Seschrock 
28224db4641Seschrock 	(void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
28324db4641Seschrock 	    sizeof (dt_stats) / sizeof (fmd_stat_t),
28424db4641Seschrock 	    (fmd_stat_t *)&dt_stats);
28524db4641Seschrock 
28624db4641Seschrock 	dmp = fmd_hdl_zalloc(hdl, sizeof (disk_monitor_t), FMD_SLEEP);
28724db4641Seschrock 	fmd_hdl_setspecific(hdl, dmp);
28824db4641Seschrock 
28924db4641Seschrock 	dmp->dm_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
29024db4641Seschrock 	dmp->dm_interval = fmd_prop_get_int64(hdl, "interval");
29124db4641Seschrock 
29224db4641Seschrock 	/*
29324db4641Seschrock 	 * Determine if we have the simulate property set.  This property allows
29424db4641Seschrock 	 * the developer to substitute a faulty device based off all or part of
29524db4641Seschrock 	 * an FMRI string.  For example, one could do:
29624db4641Seschrock 	 *
297*0244979bSAlek Pinchuk 	 *	setprop simulate "bay=4/disk=4	/path/to/sim.so"
29824db4641Seschrock 	 *
29924db4641Seschrock 	 * When the transport module encounters an FMRI containing the given
30024db4641Seschrock 	 * string, then it will open the simulator file instead of the
30124db4641Seschrock 	 * corresponding device.  This can be any file, but is intended to be a
30224db4641Seschrock 	 * libdiskstatus simulator shared object, capable of faking up SCSI
30324db4641Seschrock 	 * responses.
30424db4641Seschrock 	 *
30524db4641Seschrock 	 * The property consists of two strings, an FMRI fragment and an
30624db4641Seschrock 	 * absolute path, separated by whitespace.
30724db4641Seschrock 	 */
30824db4641Seschrock 	simulate = fmd_prop_get_string(hdl, "simulate");
30924db4641Seschrock 	if (simulate[0] != '\0') {
31024db4641Seschrock 		const char *sep;
31124db4641Seschrock 		size_t len;
31224db4641Seschrock 
31324db4641Seschrock 		for (sep = simulate; *sep != '\0'; sep++) {
31424db4641Seschrock 			if (isspace(*sep))
31524db4641Seschrock 				break;
31624db4641Seschrock 		}
31724db4641Seschrock 
31824db4641Seschrock 		if (*sep != '\0') {
31924db4641Seschrock 			len = sep - simulate;
32024db4641Seschrock 
32124db4641Seschrock 			dmp->dm_sim_search = fmd_hdl_alloc(hdl,
32224db4641Seschrock 			    len + 1, FMD_SLEEP);
32324db4641Seschrock 			(void) memcpy(dmp->dm_sim_search, simulate, len);
32424db4641Seschrock 			dmp->dm_sim_search[len] = '\0';
32524db4641Seschrock 		}
32624db4641Seschrock 
32724db4641Seschrock 		for (; *sep != '\0'; sep++) {
32824db4641Seschrock 			if (!isspace(*sep))
32924db4641Seschrock 				break;
33024db4641Seschrock 		}
33124db4641Seschrock 
33224db4641Seschrock 		if (*sep != '\0') {
33324db4641Seschrock 			dmp->dm_sim_file = fmd_hdl_strdup(hdl, sep, FMD_SLEEP);
33424db4641Seschrock 		} else if (dmp->dm_sim_search) {
33524db4641Seschrock 			fmd_hdl_strfree(hdl, dmp->dm_sim_search);
33624db4641Seschrock 			dmp->dm_sim_search = NULL;
33724db4641Seschrock 		}
33824db4641Seschrock 	}
33924db4641Seschrock 	fmd_prop_free_string(hdl, simulate);
34024db4641Seschrock 
34124db4641Seschrock 	/*
34224db4641Seschrock 	 * Call our initial timer routine.  This will do an initial check of all
34324db4641Seschrock 	 * the disks, and then start the periodic timeout.
34424db4641Seschrock 	 */
34524db4641Seschrock 	dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL, 0);
34624db4641Seschrock }
34724db4641Seschrock 
34824db4641Seschrock void
_fmd_fini(fmd_hdl_t * hdl)34924db4641Seschrock _fmd_fini(fmd_hdl_t *hdl)
35024db4641Seschrock {
35124db4641Seschrock 	disk_monitor_t *dmp;
35224db4641Seschrock 
35324db4641Seschrock 	dmp = fmd_hdl_getspecific(hdl);
35424db4641Seschrock 	if (dmp) {
35524db4641Seschrock 		fmd_xprt_close(hdl, dmp->dm_xprt);
35624db4641Seschrock 		fmd_hdl_strfree(hdl, dmp->dm_sim_search);
35724db4641Seschrock 		fmd_hdl_strfree(hdl, dmp->dm_sim_file);
35824db4641Seschrock 		fmd_hdl_free(hdl, dmp, sizeof (*dmp));
35924db4641Seschrock 	}
36024db4641Seschrock }
361