124db4641Seschrock/*
224db4641Seschrock * CDDL HEADER START
324db4641Seschrock *
424db4641Seschrock * The contents of this file are subject to the terms of the
524db4641Seschrock * Common Development and Distribution License (the "License").
624db4641Seschrock * You may not use this file except in compliance with the License.
724db4641Seschrock *
824db4641Seschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
924db4641Seschrock * or http://www.opensolaris.org/os/licensing.
1024db4641Seschrock * See the License for the specific language governing permissions
1124db4641Seschrock * and limitations under the License.
1224db4641Seschrock *
1324db4641Seschrock * When distributing Covered Code, include this CDDL HEADER in each
1424db4641Seschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1524db4641Seschrock * If applicable, add the following below this CDDL HEADER, with the
1624db4641Seschrock * fields enclosed by brackets "[]" replaced with your own identifying
1724db4641Seschrock * information: Portions Copyright [yyyy] [name of copyright owner]
1824db4641Seschrock *
1924db4641Seschrock * CDDL HEADER END
2024db4641Seschrock */
2124db4641Seschrock/*
22e58a33b6SStephen Hanson * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
230244979bSAlek Pinchuk * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
2424db4641Seschrock */
2524db4641Seschrock
2624db4641Seschrock#pragma dictionary "DISK"
2724db4641Seschrock
285dc9a986SDavid Zhang#define	P			disk
2924db4641Seschrock
3024db4641Seschrockfru P;
3124db4641Seschrockasru P;
3224db4641Seschrock
339e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
345dc9a986SDavid Zhang * Over all comments for this file:
355dc9a986SDavid Zhang * <disk-as-detector> The disk-as-detector DE provides the mapping between
36*bbf21555SRichard Lowe * ereports generated by a kernel disk driver sd(4D) and resulting faults.
379e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
389e1c849eSDavid Zhang - Sun Microsystems - Beijing China
395dc9a986SDavid Zhang/*
405dc9a986SDavid Zhang * SERD engine for media error fault propagation:
415dc9a986SDavid Zhang *
425dc9a986SDavid Zhang * This strategy is designed to give a file system, like ZFS, the
435dc9a986SDavid Zhang * ability to attempt data recovery/relocation without faulting a disk.
445dc9a986SDavid Zhang * This implementation depends on a file system retry to the same lba
455dc9a986SDavid Zhang * to trigger a fault when recovery/relocation is not possible.
465dc9a986SDavid Zhang *
475dc9a986SDavid Zhang * We let the engine propagate one error only once every 1 minute and then if we
480244979bSAlek Pinchuk * still get 2 or more errors within 24 hours for the same LBA,
490244979bSAlek Pinchuk * there is a fault.
505dc9a986SDavid Zhang */
515dc9a986SDavid Zhangengine serd.io.scsi.cmd.disk.dev.rqs.merr@P, N=1, T=24h;
525dc9a986SDavid Zhang
539e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
549e1c849eSDavid Zhang - Sun Microsystems - Beijing China * disk-as-detector: fault events.
559e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
569e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent fault.io.scsi.cmd.disk.dev.rqs.derr@P;
575dc9a986SDavid Zhangevent fault.io.scsi.cmd.disk.dev.rqs.merr@P,
585dc9a986SDavid Zhang    engine=serd.io.scsi.cmd.disk.dev.rqs.merr@P;
595dc9a986SDavid Zhang
609e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
619e1c849eSDavid Zhang - Sun Microsystems - Beijing China * The uderr fault will be defined at some future time.
629e1c849eSDavid Zhang - Sun Microsystems - Beijing China * event fault.io.scsi.cmd.disk.dev.uderr@P;
639e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
649e1c849eSDavid Zhang - Sun Microsystems - Beijing China
659e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
669e1c849eSDavid Zhang - Sun Microsystems - Beijing China * disk-as-detector: upset events.
679e1c849eSDavid Zhang - Sun Microsystems - Beijing China * NOTE: For now we define an upset to implement discard.
689e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
699e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent upset.io.scsi.cmd.disk.dev.rqs.derr@P;
709e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent upset.io.scsi.cmd.disk.dev.rqs.merr@P;
719e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent upset.io.scsi.cmd.disk.dev.uderr@P;
729e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent upset.io.scsi.cmd.disk.dev.serr@P;
739e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent upset.io.scsi.cmd.disk.tran@P;
749e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent upset.io.scsi.cmd.disk.recovered@P;
759e1c849eSDavid Zhang - Sun Microsystems - Beijing China
769e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
779e1c849eSDavid Zhang - Sun Microsystems - Beijing China * disk-as-detector: ereports from the kernel.
789e1c849eSDavid Zhang - Sun Microsystems - Beijing China *
799e1c849eSDavid Zhang - Sun Microsystems - Beijing China * We don't know the topology for all scsi disks, but the kernel will always
809e1c849eSDavid Zhang - Sun Microsystems - Beijing China * generate ereport telemetry assuming that we do. We define these ereports
819e1c849eSDavid Zhang - Sun Microsystems - Beijing China * with 'discard_if_config_unknown=1', which permits ereports against things
829e1c849eSDavid Zhang - Sun Microsystems - Beijing China * with unknown topology to be silently discarded.  The ereport data is logged
839e1c849eSDavid Zhang - Sun Microsystems - Beijing China * in either case, and can be viewed via 'fmdump -eV'.
849e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
859e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent ereport.io.scsi.cmd.disk.dev.rqs.derr@P, discard_if_config_unknown=1;
869e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent ereport.io.scsi.cmd.disk.dev.rqs.merr@P, discard_if_config_unknown=1;
879e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent ereport.io.scsi.cmd.disk.dev.serr@P, discard_if_config_unknown=1;
889e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent ereport.io.scsi.cmd.disk.dev.uderr@P, discard_if_config_unknown=1;
899e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent ereport.io.scsi.cmd.disk.recovered@P, discard_if_config_unknown=1;
909e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaevent ereport.io.scsi.cmd.disk.tran@P, discard_if_config_unknown=1;
919e1c849eSDavid Zhang - Sun Microsystems - Beijing China
929e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
939e1c849eSDavid Zhang - Sun Microsystems - Beijing China * For some ereports we let the 'driver-assessment', communicated as part of
949e1c849eSDavid Zhang - Sun Microsystems - Beijing China * the ereport payload, determine fault .vs. upset via propagation constraints.
959e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
969e1c849eSDavid Zhang - Sun Microsystems - Beijing China#define DRIVER_ASSESSMENT_FATAL		\
979e1c849eSDavid Zhang - Sun Microsystems - Beijing China	    (payloadprop_contains("driver-assessment", "fatal"))
989e1c849eSDavid Zhang - Sun Microsystems - Beijing China#define DRIVER_ASSESSMENT_NONFATAL	(!DRIVER_ASSESSMENT_FATAL)
999e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1009e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
1019e1c849eSDavid Zhang - Sun Microsystems - Beijing China * disk-as-detector: propagations from faults(based on
1029e1c849eSDavid Zhang - Sun Microsystems - Beijing China * DRIVER_ASSESSMENT_FATAL).
1039e1c849eSDavid Zhang - Sun Microsystems - Beijing China * We need to set additional fault payloads to indicate fault details.
1049e1c849eSDavid Zhang - Sun Microsystems - Beijing China * The payload we may need are listed as following:
1059e1c849eSDavid Zhang - Sun Microsystems - Beijing China * fault.io.scsi.cmd.disk.dev.rqs.derr
1069e1c849eSDavid Zhang - Sun Microsystems - Beijing China *     op_code, key, asc, ascq
1079e1c849eSDavid Zhang - Sun Microsystems - Beijing China * fault.io.scsi.cmd.disk.dev.rqs.merr
1089e1c849eSDavid Zhang - Sun Microsystems - Beijing China *     op_code, key, asc, ascq, lba
1099e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
1109e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop fault.io.scsi.cmd.disk.dev.rqs.derr@P->
1119e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.dev.rqs.derr@P{ DRIVER_ASSESSMENT_FATAL &&
1129e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("key", payloadprop("key")) &&
1139e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("asc", payloadprop("asc")) &&
1149e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("ascq", payloadprop("ascq"))};
1159e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1165dc9a986SDavid Zhang/*
1175dc9a986SDavid Zhang * Utilize setserdsuffix with specific LBA,
1185dc9a986SDavid Zhang * the serd engine would only trigger if the fault recurred on the same LBA
1195dc9a986SDavid Zhang */
1209e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop fault.io.scsi.cmd.disk.dev.rqs.merr@P->
1219e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.dev.rqs.merr@P{ DRIVER_ASSESSMENT_FATAL &&
1225dc9a986SDavid Zhang    setserdsuffix(payloadprop("lba")) &&
1239e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("key", payloadprop("key")) &&
1249e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("asc", payloadprop("asc")) &&
1259e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("ascq", payloadprop("ascq")) &&
1269e1c849eSDavid Zhang - Sun Microsystems - Beijing China    setpayloadprop("lba", payloadprop("lba"))};
1279e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1285dc9a986SDavid Zhang/*
1295dc9a986SDavid Zhang * NOTE: this propagation uses the "may" propagation of eversholt.
1305dc9a986SDavid Zhang * The ereport need never exist. It's just a way of making
1315dc9a986SDavid Zhang * the diagnosis wait for the within time on that ereport
1325dc9a986SDavid Zhang * to complete. Once it has completed the diagnosis continues
1335dc9a986SDavid Zhang * even though the dummy ereport didn't occur.
1345dc9a986SDavid Zhang */
1355dc9a986SDavid Zhangevent ereport.io.scsi.cmd.disk.dev.rqs.merr.dummy@P {within(60s)};
1365dc9a986SDavid Zhangprop fault.io.scsi.cmd.disk.dev.rqs.merr@P (0) ->
1375dc9a986SDavid Zhang	ereport.io.scsi.cmd.disk.dev.rqs.merr.dummy@P;
1385dc9a986SDavid Zhang
1399e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
1409e1c849eSDavid Zhang - Sun Microsystems - Beijing China * The uderr fault will be propagated at some future time.
1419e1c849eSDavid Zhang - Sun Microsystems - Beijing China * prop fault.io.scsi.cmd.disk.dev.uderr@P->
1429e1c849eSDavid Zhang - Sun Microsystems - Beijing China *     ereport.io.scsi.cmd.disk.dev.uderr@P{ DRIVER_ASSESSMENT_FATAL };
1439e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
1449e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1459e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
1469e1c849eSDavid Zhang - Sun Microsystems - Beijing China * disk-as-detector: propagations from upsets(based on
1479e1c849eSDavid Zhang - Sun Microsystems - Beijing China * DRIVER_ASSESSMENT_NONFATAL).
1489e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
1499e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop upset.io.scsi.cmd.disk.dev.rqs.derr@P->
1509e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.dev.rqs.derr@P{ DRIVER_ASSESSMENT_NONFATAL };
1519e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1529e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop upset.io.scsi.cmd.disk.dev.rqs.merr@P->
1539e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.dev.rqs.merr@P{ DRIVER_ASSESSMENT_NONFATAL };
1549e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1559e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
1569e1c849eSDavid Zhang - Sun Microsystems - Beijing China * disk-as-detector: propagations from upsets(independent of
1579e1c849eSDavid Zhang - Sun Microsystems - Beijing China * driver-assessment)
1589e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
1599e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1609e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop upset.io.scsi.cmd.disk.dev.serr@P->
1619e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.dev.serr@P;
1629e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1639e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop upset.io.scsi.cmd.disk.dev.uderr@P->
1649e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.dev.uderr@P;
1659e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1669e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop upset.io.scsi.cmd.disk.recovered@P->
1679e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.recovered@P;
1689e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1699e1c849eSDavid Zhang - Sun Microsystems - Beijing Chinaprop upset.io.scsi.cmd.disk.tran@P->
1709e1c849eSDavid Zhang - Sun Microsystems - Beijing China    ereport.io.scsi.cmd.disk.tran@P;
1719e1c849eSDavid Zhang - Sun Microsystems - Beijing China
1729e1c849eSDavid Zhang - Sun Microsystems - Beijing China/*
1739e1c849eSDavid Zhang - Sun Microsystems - Beijing China * --------------------------------------
1749e1c849eSDavid Zhang - Sun Microsystems - Beijing China * The remainder of this file contains rules associated with the operation of
1759e1c849eSDavid Zhang - Sun Microsystems - Beijing China * cmd/fm/modules/common/disk-monitor/disk_monitor.c code.
1769e1c849eSDavid Zhang - Sun Microsystems - Beijing China *
1779e1c849eSDavid Zhang - Sun Microsystems - Beijing China * The disk DE provides a very simple 1-to-1 mapping between SCSI disk events
1789e1c849eSDavid Zhang - Sun Microsystems - Beijing China * generated by the disk-transport fmd module, and the resulting faults.
1799e1c849eSDavid Zhang - Sun Microsystems - Beijing China */
1809e1c849eSDavid Zhang - Sun Microsystems - Beijing China
18124db4641Seschrock/*
18224db4641Seschrock * Fault events.
18324db4641Seschrock */
18424db4641Seschrockevent fault.io.disk.over-temperature@P,
18524db4641Seschrock    FITrate=10, FRU=P, ASRU=P;
18624db4641Seschrockevent fault.io.disk.predictive-failure@P, FITrate=10,
18724db4641Seschrock    FITrate=10, FRU=P, ASRU=P;
18824db4641Seschrockevent fault.io.disk.self-test-failure@P, FITrate=10,
18924db4641Seschrock    FITrate=10, FRU=P, ASRU=P;
1900244979bSAlek Pinchukevent fault.io.disk.ssm-wearout@P;
19124db4641Seschrock
19224db4641Seschrock/*
19324db4641Seschrock * ereports.
19424db4641Seschrock */
19524db4641Seschrockevent ereport.io.scsi.disk.over-temperature@P;
19624db4641Seschrockevent ereport.io.scsi.disk.predictive-failure@P;
19724db4641Seschrockevent ereport.io.scsi.disk.self-test-failure@P;
1980244979bSAlek Pinchukevent ereport.io.scsi.disk.ssm-wearout@P;
19924db4641Seschrock
20024db4641Seschrock/*
20124db4641Seschrock * Propagations.
20224db4641Seschrock */
20324db4641Seschrockprop fault.io.disk.over-temperature@P ->
20424db4641Seschrock    ereport.io.scsi.disk.over-temperature@P;
20524db4641Seschrock
20624db4641Seschrockprop fault.io.disk.self-test-failure@P ->
20724db4641Seschrock    ereport.io.scsi.disk.self-test-failure@P;
20824db4641Seschrock
20924db4641Seschrockprop fault.io.disk.predictive-failure@P ->
210e58a33b6SStephen Hanson    ereport.io.scsi.disk.predictive-failure@P {
211e58a33b6SStephen Hanson    setpayloadprop("asc", payloadprop("additional-sense-code")) &&
212e58a33b6SStephen Hanson    setpayloadprop("ascq", payloadprop("additional-sense-code-qualifier")) };
2130244979bSAlek Pinchuk
2140244979bSAlek Pinchukprop fault.io.disk.ssm-wearout@P ->
2150244979bSAlek Pinchuk    ereport.io.scsi.disk.ssm-wearout@P {
2160244979bSAlek Pinchuk    setpayloadprop("current-wearout-percentage",
2170244979bSAlek Pinchuk    payloadprop("current-ssm-wearout"))
2180244979bSAlek Pinchuk    && setpayloadprop("threshold-wearout-percentage",
2190244979bSAlek Pinchuk    payloadprop("threshold-ssm-wearout")) };
220