124db4641Seschrock /*
224db4641Seschrock * CDDL HEADER START
324db4641Seschrock *
424db4641Seschrock * The contents of this file are subject to the terms of the
524db4641Seschrock * Common Development and Distribution License (the "License").
624db4641Seschrock * You may not use this file except in compliance with the License.
724db4641Seschrock *
824db4641Seschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
924db4641Seschrock * or http://www.opensolaris.org/os/licensing.
1024db4641Seschrock * See the License for the specific language governing permissions
1124db4641Seschrock * and limitations under the License.
1224db4641Seschrock *
1324db4641Seschrock * When distributing Covered Code, include this CDDL HEADER in each
1424db4641Seschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1524db4641Seschrock * If applicable, add the following below this CDDL HEADER, with the
1624db4641Seschrock * fields enclosed by brackets "[]" replaced with your own identifying
1724db4641Seschrock * information: Portions Copyright [yyyy] [name of copyright owner]
1824db4641Seschrock *
1924db4641Seschrock * CDDL HEADER END
2024db4641Seschrock */
2124db4641Seschrock /*
2224db4641Seschrock * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
2324db4641Seschrock * Use is subject to license terms.
24*0244979bSAlek Pinchuk * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
2524db4641Seschrock */
2624db4641Seschrock
2724db4641Seschrock /*
2824db4641Seschrock * Disk error transport module
2924db4641Seschrock *
3024db4641Seschrock * This transport module is responsible for translating between disk errors
3124db4641Seschrock * and FMA ereports. It is a read-only transport module, and checks for the
3224db4641Seschrock * following failures:
3324db4641Seschrock *
34*0244979bSAlek Pinchuk * - overtemp
35*0244979bSAlek Pinchuk * - predictive failure
36*0244979bSAlek Pinchuk * - self-test failure
37*0244979bSAlek Pinchuk * - solid state media wearout
3824db4641Seschrock *
3924db4641Seschrock * These failures are detected via the TOPO_METH_DISK_STATUS method, which
4024db4641Seschrock * leverages libdiskstatus to do the actual analysis. This transport module is
4124db4641Seschrock * in charge of the following tasks:
4224db4641Seschrock *
43*0244979bSAlek Pinchuk * - discovering available devices
44*0244979bSAlek Pinchuk * - periodically checking devices
45*0244979bSAlek Pinchuk * - managing device addition/removal
4624db4641Seschrock */
4724db4641Seschrock
4824db4641Seschrock #include <ctype.h>
4924db4641Seschrock #include <fm/fmd_api.h>
5024db4641Seschrock #include <fm/libdiskstatus.h>
5124db4641Seschrock #include <fm/libtopo.h>
5224db4641Seschrock #include <fm/topo_hc.h>
5324db4641Seschrock #include <fm/topo_mod.h>
5424db4641Seschrock #include <limits.h>
5524db4641Seschrock #include <string.h>
5624db4641Seschrock #include <sys/fm/io/scsi.h>
5724db4641Seschrock #include <sys/fm/protocol.h>
5824db4641Seschrock
5924db4641Seschrock static struct dt_stat {
6024db4641Seschrock fmd_stat_t dropped;
6124db4641Seschrock } dt_stats = {
6224db4641Seschrock { "dropped", FMD_TYPE_UINT64, "number of dropped ereports" }
6324db4641Seschrock };
6424db4641Seschrock
6524db4641Seschrock typedef struct disk_monitor {
6624db4641Seschrock fmd_hdl_t *dm_hdl;
6724db4641Seschrock fmd_xprt_t *dm_xprt;
6824db4641Seschrock id_t dm_timer;
6924db4641Seschrock hrtime_t dm_interval;
7024db4641Seschrock char *dm_sim_search;
7124db4641Seschrock char *dm_sim_file;
7224db4641Seschrock boolean_t dm_timer_istopo;
7324db4641Seschrock } disk_monitor_t;
7424db4641Seschrock
7524db4641Seschrock static void
dt_post_ereport(fmd_hdl_t * hdl,fmd_xprt_t * xprt,const char * protocol,const char * faultname,uint64_t ena,nvlist_t * detector,nvlist_t * payload)7624db4641Seschrock dt_post_ereport(fmd_hdl_t *hdl, fmd_xprt_t *xprt, const char *protocol,
7724db4641Seschrock const char *faultname, uint64_t ena, nvlist_t *detector, nvlist_t *payload)
7824db4641Seschrock {
7924db4641Seschrock nvlist_t *nvl;
8024db4641Seschrock int e = 0;
8124db4641Seschrock char fullclass[PATH_MAX];
8224db4641Seschrock
8324db4641Seschrock (void) snprintf(fullclass, sizeof (fullclass), "%s.io.%s.disk.%s",
8424db4641Seschrock FM_EREPORT_CLASS, protocol, faultname);
8524db4641Seschrock
8624db4641Seschrock if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) == 0) {
8724db4641Seschrock e |= nvlist_add_string(nvl, FM_CLASS, fullclass);
8824db4641Seschrock e |= nvlist_add_uint8(nvl, FM_VERSION, FM_EREPORT_VERSION);
8924db4641Seschrock e |= nvlist_add_uint64(nvl, FM_EREPORT_ENA, ena);
9024db4641Seschrock e |= nvlist_add_nvlist(nvl, FM_EREPORT_DETECTOR, detector);
9124db4641Seschrock e |= nvlist_merge(nvl, payload, 0);
9224db4641Seschrock
9324db4641Seschrock if (e == 0) {
9424db4641Seschrock fmd_xprt_post(hdl, xprt, nvl, 0);
9524db4641Seschrock } else {
9624db4641Seschrock nvlist_free(nvl);
9724db4641Seschrock dt_stats.dropped.fmds_value.ui64++;
9824db4641Seschrock }
9924db4641Seschrock } else {
10024db4641Seschrock dt_stats.dropped.fmds_value.ui64++;
10124db4641Seschrock }
10224db4641Seschrock }
10324db4641Seschrock
10424db4641Seschrock /*
10524db4641Seschrock * Check a single topo node for failure. This simply invokes the disk status
10624db4641Seschrock * method, and generates any ereports as necessary.
10724db4641Seschrock */
10824db4641Seschrock static int
dt_analyze_disk(topo_hdl_t * thp,tnode_t * node,void * arg)10924db4641Seschrock dt_analyze_disk(topo_hdl_t *thp, tnode_t *node, void *arg)
11024db4641Seschrock {
11124db4641Seschrock nvlist_t *result;
11224db4641Seschrock nvlist_t *fmri, *faults;
11324db4641Seschrock char *protocol;
11424db4641Seschrock int err;
11524db4641Seschrock disk_monitor_t *dmp = arg;
11624db4641Seschrock nvpair_t *elem;
11724db4641Seschrock boolean_t fault;
11824db4641Seschrock nvlist_t *details;
11924db4641Seschrock char *fmristr;
12024db4641Seschrock nvlist_t *in = NULL;
12124db4641Seschrock
12224db4641Seschrock if (topo_node_resource(node, &fmri, &err) != 0) {
12324db4641Seschrock fmd_hdl_error(dmp->dm_hdl, "failed to get fmri: %s\n",
12424db4641Seschrock topo_strerror(err));
12524db4641Seschrock return (TOPO_WALK_ERR);
12624db4641Seschrock }
12724db4641Seschrock
12824db4641Seschrock if (topo_hdl_nvalloc(thp, &in, NV_UNIQUE_NAME) != 0) {
12924db4641Seschrock nvlist_free(fmri);
13024db4641Seschrock return (TOPO_WALK_ERR);
13124db4641Seschrock }
13224db4641Seschrock
13324db4641Seschrock if (dmp->dm_sim_search) {
13424db4641Seschrock fmristr = NULL;
13524db4641Seschrock if (topo_fmri_nvl2str(thp, fmri, &fmristr, &err) == 0 &&
13624db4641Seschrock strstr(fmristr, dmp->dm_sim_search) != 0)
13724db4641Seschrock (void) nvlist_add_string(in, "path", dmp->dm_sim_file);
13824db4641Seschrock topo_hdl_strfree(thp, fmristr);
13924db4641Seschrock }
14024db4641Seschrock
14124db4641Seschrock /*
14224db4641Seschrock * Try to invoke the method. If this fails (most likely because the
14324db4641Seschrock * method is not supported), then ignore this node.
14424db4641Seschrock */
14524db4641Seschrock if (topo_method_invoke(node, TOPO_METH_DISK_STATUS,
14624db4641Seschrock TOPO_METH_DISK_STATUS_VERSION, in, &result, &err) != 0) {
14724db4641Seschrock nvlist_free(fmri);
14824db4641Seschrock nvlist_free(in);
14924db4641Seschrock return (TOPO_WALK_NEXT);
15024db4641Seschrock }
15124db4641Seschrock
15224db4641Seschrock nvlist_free(in);
15324db4641Seschrock
15424db4641Seschrock /*
155*0244979bSAlek Pinchuk * Check for faults and post ereport(s) if needed
15624db4641Seschrock */
15724db4641Seschrock if (nvlist_lookup_nvlist(result, "faults", &faults) == 0 &&
15824db4641Seschrock nvlist_lookup_string(result, "protocol", &protocol) == 0) {
15924db4641Seschrock elem = NULL;
16024db4641Seschrock while ((elem = nvlist_next_nvpair(faults, elem)) != NULL) {
16124db4641Seschrock if (nvpair_type(elem) != DATA_TYPE_BOOLEAN_VALUE)
16224db4641Seschrock continue;
16324db4641Seschrock
16424db4641Seschrock (void) nvpair_value_boolean_value(elem, &fault);
16524db4641Seschrock if (!fault ||
16624db4641Seschrock nvlist_lookup_nvlist(result, nvpair_name(elem),
16724db4641Seschrock &details) != 0)
16824db4641Seschrock continue;
16924db4641Seschrock
170*0244979bSAlek Pinchuk if (strcmp(nvpair_name(elem),
171*0244979bSAlek Pinchuk FM_EREPORT_SCSI_SSMWEAROUT) == 0 &&
172*0244979bSAlek Pinchuk fmd_prop_get_int32(dmp->dm_hdl,
173*0244979bSAlek Pinchuk "ignore-ssm-wearout") == FMD_B_TRUE)
174*0244979bSAlek Pinchuk continue;
175*0244979bSAlek Pinchuk
17624db4641Seschrock dt_post_ereport(dmp->dm_hdl, dmp->dm_xprt, protocol,
177*0244979bSAlek Pinchuk nvpair_name(elem),
178*0244979bSAlek Pinchuk fmd_event_ena_create(dmp->dm_hdl), fmri, details);
17924db4641Seschrock }
18024db4641Seschrock }
18124db4641Seschrock
18224db4641Seschrock nvlist_free(result);
18324db4641Seschrock nvlist_free(fmri);
18424db4641Seschrock
18524db4641Seschrock return (TOPO_WALK_NEXT);
18624db4641Seschrock }
18724db4641Seschrock
18824db4641Seschrock /*
18924db4641Seschrock * Periodic timeout. Iterates over all hc:// topo nodes, calling
19024db4641Seschrock * dt_analyze_disk() for each one.
19124db4641Seschrock */
19224db4641Seschrock /*ARGSUSED*/
19324db4641Seschrock static void
dt_timeout(fmd_hdl_t * hdl,id_t id,void * data)19424db4641Seschrock dt_timeout(fmd_hdl_t *hdl, id_t id, void *data)
19524db4641Seschrock {
19624db4641Seschrock topo_hdl_t *thp;
19724db4641Seschrock topo_walk_t *twp;
19824db4641Seschrock int err;
19924db4641Seschrock disk_monitor_t *dmp = fmd_hdl_getspecific(hdl);
20024db4641Seschrock
20124db4641Seschrock dmp->dm_hdl = hdl;
20224db4641Seschrock
20324db4641Seschrock thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
20424db4641Seschrock if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC, dt_analyze_disk,
20524db4641Seschrock dmp, &err)) == NULL) {
20624db4641Seschrock fmd_hdl_topo_rele(hdl, thp);
20724db4641Seschrock fmd_hdl_error(hdl, "failed to get topology: %s\n",
20824db4641Seschrock topo_strerror(err));
20924db4641Seschrock return;
21024db4641Seschrock }
21124db4641Seschrock
21224db4641Seschrock if (topo_walk_step(twp, TOPO_WALK_CHILD) == TOPO_WALK_ERR) {
21324db4641Seschrock topo_walk_fini(twp);
21424db4641Seschrock fmd_hdl_topo_rele(hdl, thp);
21524db4641Seschrock fmd_hdl_error(hdl, "failed to walk topology\n");
21624db4641Seschrock return;
21724db4641Seschrock }
21824db4641Seschrock
21924db4641Seschrock topo_walk_fini(twp);
22024db4641Seschrock fmd_hdl_topo_rele(hdl, thp);
22124db4641Seschrock
22224db4641Seschrock dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL, dmp->dm_interval);
22324db4641Seschrock dmp->dm_timer_istopo = B_FALSE;
22424db4641Seschrock }
22524db4641Seschrock
22624db4641Seschrock /*
22724db4641Seschrock * Called when the topology may have changed. We want to examine all disks in
22824db4641Seschrock * case a new one has been inserted, but we don't want to overwhelm the system
22924db4641Seschrock * in the event of a flurry of topology changes, as most likely only a small
23024db4641Seschrock * number of disks are changing. To avoid this, we set the timer for a small
23124db4641Seschrock * but non-trivial interval (by default 1 minute), and ignore intervening
23224db4641Seschrock * changes during this period. This still gives us a reasonable response time
23324db4641Seschrock * to newly inserted devices without overwhelming the system if lots of hotplug
23424db4641Seschrock * activity is going on.
23524db4641Seschrock */
23624db4641Seschrock /*ARGSUSED*/
23724db4641Seschrock static void
dt_topo_change(fmd_hdl_t * hdl,topo_hdl_t * thp)23824db4641Seschrock dt_topo_change(fmd_hdl_t *hdl, topo_hdl_t *thp)
23924db4641Seschrock {
24024db4641Seschrock disk_monitor_t *dmp = fmd_hdl_getspecific(hdl);
24124db4641Seschrock
24224db4641Seschrock if (dmp->dm_timer_istopo)
24324db4641Seschrock return;
24424db4641Seschrock
24524db4641Seschrock fmd_timer_remove(hdl, dmp->dm_timer);
24624db4641Seschrock dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL,
24724db4641Seschrock fmd_prop_get_int64(hdl, "min-interval"));
24824db4641Seschrock dmp->dm_timer_istopo = B_TRUE;
24924db4641Seschrock }
25024db4641Seschrock
25124db4641Seschrock static const fmd_prop_t fmd_props[] = {
25224db4641Seschrock { "interval", FMD_TYPE_TIME, "1h" },
25324db4641Seschrock { "min-interval", FMD_TYPE_TIME, "1min" },
25424db4641Seschrock { "simulate", FMD_TYPE_STRING, "" },
255*0244979bSAlek Pinchuk { "ignore-ssm-wearout", FMD_TYPE_BOOL, "false"},
25624db4641Seschrock { NULL, 0, NULL }
25724db4641Seschrock };
25824db4641Seschrock
25924db4641Seschrock static const fmd_hdl_ops_t fmd_ops = {
26024db4641Seschrock NULL, /* fmdo_recv */
26124db4641Seschrock dt_timeout, /* fmdo_timeout */
26224db4641Seschrock NULL, /* fmdo_close */
26324db4641Seschrock NULL, /* fmdo_stats */
26424db4641Seschrock NULL, /* fmdo_gc */
26524db4641Seschrock NULL, /* fmdo_send */
26624db4641Seschrock dt_topo_change, /* fmdo_topo_change */
26724db4641Seschrock };
26824db4641Seschrock
26924db4641Seschrock static const fmd_hdl_info_t fmd_info = {
270*0244979bSAlek Pinchuk "Disk Transport Agent", "1.1", &fmd_ops, fmd_props
27124db4641Seschrock };
27224db4641Seschrock
27324db4641Seschrock void
_fmd_init(fmd_hdl_t * hdl)27424db4641Seschrock _fmd_init(fmd_hdl_t *hdl)
27524db4641Seschrock {
27624db4641Seschrock disk_monitor_t *dmp;
27724db4641Seschrock char *simulate;
27824db4641Seschrock
27924db4641Seschrock if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0)
28024db4641Seschrock return;
28124db4641Seschrock
28224db4641Seschrock (void) fmd_stat_create(hdl, FMD_STAT_NOALLOC,
28324db4641Seschrock sizeof (dt_stats) / sizeof (fmd_stat_t),
28424db4641Seschrock (fmd_stat_t *)&dt_stats);
28524db4641Seschrock
28624db4641Seschrock dmp = fmd_hdl_zalloc(hdl, sizeof (disk_monitor_t), FMD_SLEEP);
28724db4641Seschrock fmd_hdl_setspecific(hdl, dmp);
28824db4641Seschrock
28924db4641Seschrock dmp->dm_xprt = fmd_xprt_open(hdl, FMD_XPRT_RDONLY, NULL, NULL);
29024db4641Seschrock dmp->dm_interval = fmd_prop_get_int64(hdl, "interval");
29124db4641Seschrock
29224db4641Seschrock /*
29324db4641Seschrock * Determine if we have the simulate property set. This property allows
29424db4641Seschrock * the developer to substitute a faulty device based off all or part of
29524db4641Seschrock * an FMRI string. For example, one could do:
29624db4641Seschrock *
297*0244979bSAlek Pinchuk * setprop simulate "bay=4/disk=4 /path/to/sim.so"
29824db4641Seschrock *
29924db4641Seschrock * When the transport module encounters an FMRI containing the given
30024db4641Seschrock * string, then it will open the simulator file instead of the
30124db4641Seschrock * corresponding device. This can be any file, but is intended to be a
30224db4641Seschrock * libdiskstatus simulator shared object, capable of faking up SCSI
30324db4641Seschrock * responses.
30424db4641Seschrock *
30524db4641Seschrock * The property consists of two strings, an FMRI fragment and an
30624db4641Seschrock * absolute path, separated by whitespace.
30724db4641Seschrock */
30824db4641Seschrock simulate = fmd_prop_get_string(hdl, "simulate");
30924db4641Seschrock if (simulate[0] != '\0') {
31024db4641Seschrock const char *sep;
31124db4641Seschrock size_t len;
31224db4641Seschrock
31324db4641Seschrock for (sep = simulate; *sep != '\0'; sep++) {
31424db4641Seschrock if (isspace(*sep))
31524db4641Seschrock break;
31624db4641Seschrock }
31724db4641Seschrock
31824db4641Seschrock if (*sep != '\0') {
31924db4641Seschrock len = sep - simulate;
32024db4641Seschrock
32124db4641Seschrock dmp->dm_sim_search = fmd_hdl_alloc(hdl,
32224db4641Seschrock len + 1, FMD_SLEEP);
32324db4641Seschrock (void) memcpy(dmp->dm_sim_search, simulate, len);
32424db4641Seschrock dmp->dm_sim_search[len] = '\0';
32524db4641Seschrock }
32624db4641Seschrock
32724db4641Seschrock for (; *sep != '\0'; sep++) {
32824db4641Seschrock if (!isspace(*sep))
32924db4641Seschrock break;
33024db4641Seschrock }
33124db4641Seschrock
33224db4641Seschrock if (*sep != '\0') {
33324db4641Seschrock dmp->dm_sim_file = fmd_hdl_strdup(hdl, sep, FMD_SLEEP);
33424db4641Seschrock } else if (dmp->dm_sim_search) {
33524db4641Seschrock fmd_hdl_strfree(hdl, dmp->dm_sim_search);
33624db4641Seschrock dmp->dm_sim_search = NULL;
33724db4641Seschrock }
33824db4641Seschrock }
33924db4641Seschrock fmd_prop_free_string(hdl, simulate);
34024db4641Seschrock
34124db4641Seschrock /*
34224db4641Seschrock * Call our initial timer routine. This will do an initial check of all
34324db4641Seschrock * the disks, and then start the periodic timeout.
34424db4641Seschrock */
34524db4641Seschrock dmp->dm_timer = fmd_timer_install(hdl, NULL, NULL, 0);
34624db4641Seschrock }
34724db4641Seschrock
34824db4641Seschrock void
_fmd_fini(fmd_hdl_t * hdl)34924db4641Seschrock _fmd_fini(fmd_hdl_t *hdl)
35024db4641Seschrock {
35124db4641Seschrock disk_monitor_t *dmp;
35224db4641Seschrock
35324db4641Seschrock dmp = fmd_hdl_getspecific(hdl);
35424db4641Seschrock if (dmp) {
35524db4641Seschrock fmd_xprt_close(hdl, dmp->dm_xprt);
35624db4641Seschrock fmd_hdl_strfree(hdl, dmp->dm_sim_search);
35724db4641Seschrock fmd_hdl_strfree(hdl, dmp->dm_sim_file);
35824db4641Seschrock fmd_hdl_free(hdl, dmp, sizeof (*dmp));
35924db4641Seschrock }
36024db4641Seschrock }
361