1ea8dc4b6Seschrock /* 2ea8dc4b6Seschrock * CDDL HEADER START 3ea8dc4b6Seschrock * 4ea8dc4b6Seschrock * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7ea8dc4b6Seschrock * 8ea8dc4b6Seschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9ea8dc4b6Seschrock * or http://www.opensolaris.org/os/licensing. 10ea8dc4b6Seschrock * See the License for the specific language governing permissions 11ea8dc4b6Seschrock * and limitations under the License. 12ea8dc4b6Seschrock * 13ea8dc4b6Seschrock * When distributing Covered Code, include this CDDL HEADER in each 14ea8dc4b6Seschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15ea8dc4b6Seschrock * If applicable, add the following below this CDDL HEADER, with the 16ea8dc4b6Seschrock * fields enclosed by brackets "[]" replaced with your own identifying 17ea8dc4b6Seschrock * information: Portions Copyright [yyyy] [name of copyright owner] 18ea8dc4b6Seschrock * 19ea8dc4b6Seschrock * CDDL HEADER END 20ea8dc4b6Seschrock */ 21ea8dc4b6Seschrock /* 22ea8dc4b6Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23ea8dc4b6Seschrock * Use is subject to license terms. 24ea8dc4b6Seschrock */ 25ea8dc4b6Seschrock 26ea8dc4b6Seschrock #pragma ident "%Z%%M% %I% %E% SMI" 27ea8dc4b6Seschrock 28ea8dc4b6Seschrock #include <sys/spa.h> 29ea8dc4b6Seschrock #include <sys/spa_impl.h> 30ea8dc4b6Seschrock #include <sys/vdev.h> 31ea8dc4b6Seschrock #include <sys/vdev_impl.h> 32ea8dc4b6Seschrock #include <sys/zio.h> 33ea8dc4b6Seschrock 34ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h> 35ea8dc4b6Seschrock #include <sys/fm/protocol.h> 36ea8dc4b6Seschrock #include <sys/fm/util.h> 37ea8dc4b6Seschrock #include <sys/sysevent.h> 38ea8dc4b6Seschrock 39ea8dc4b6Seschrock /* 40ea8dc4b6Seschrock * This general routine is responsible for generating all the different ZFS 41ea8dc4b6Seschrock * ereports. The payload is dependent on the class, and which arguments are 42ea8dc4b6Seschrock * supplied to the function: 43ea8dc4b6Seschrock * 44ea8dc4b6Seschrock * EREPORT POOL VDEV IO 45ea8dc4b6Seschrock * block X X X 46ea8dc4b6Seschrock * data X X 47ea8dc4b6Seschrock * device X X 48ea8dc4b6Seschrock * pool X 49ea8dc4b6Seschrock * 50ea8dc4b6Seschrock * If we are in a loading state, all errors are chained together by the same 51ea8dc4b6Seschrock * SPA-wide ENA. 52ea8dc4b6Seschrock * 53ea8dc4b6Seschrock * For isolated I/O requests, we get the ENA from the zio_t. The propagation 54ea8dc4b6Seschrock * gets very complicated due to RAID-Z, gang blocks, and vdev caching. We want 55ea8dc4b6Seschrock * to chain together all ereports associated with a logical piece of data. For 56ea8dc4b6Seschrock * read I/Os, there are basically three 'types' of I/O, which form a roughly 57ea8dc4b6Seschrock * layered diagram: 58ea8dc4b6Seschrock * 59ea8dc4b6Seschrock * +---------------+ 60ea8dc4b6Seschrock * | Aggregate I/O | No associated logical data or device 61ea8dc4b6Seschrock * +---------------+ 62ea8dc4b6Seschrock * | 63ea8dc4b6Seschrock * V 64ea8dc4b6Seschrock * +---------------+ Reads associated with a piece of logical data. 65ea8dc4b6Seschrock * | Read I/O | This includes reads on behalf of RAID-Z, 66ea8dc4b6Seschrock * +---------------+ mirrors, gang blocks, retries, etc. 67ea8dc4b6Seschrock * | 68ea8dc4b6Seschrock * V 69ea8dc4b6Seschrock * +---------------+ Reads associated with a particular device, but 70ea8dc4b6Seschrock * | Physical I/O | no logical data. Issued as part of vdev caching 71ea8dc4b6Seschrock * +---------------+ and I/O aggregation. 72ea8dc4b6Seschrock * 73ea8dc4b6Seschrock * Note that 'physical I/O' here is not the same terminology as used in the rest 74ea8dc4b6Seschrock * of ZIO. Typically, 'physical I/O' simply means that there is no attached 75ea8dc4b6Seschrock * blockpointer. But I/O with no associated block pointer can still be related 76ea8dc4b6Seschrock * to a logical piece of data (i.e. RAID-Z requests). 77ea8dc4b6Seschrock * 78ea8dc4b6Seschrock * Purely physical I/O always have unique ENAs. They are not related to a 79ea8dc4b6Seschrock * particular piece of logical data, and therefore cannot be chained together. 80ea8dc4b6Seschrock * We still generate an ereport, but the DE doesn't correlate it with any 81ea8dc4b6Seschrock * logical piece of data. When such an I/O fails, the delegated I/O requests 82ea8dc4b6Seschrock * will issue a retry, which will trigger the 'real' ereport with the correct 83ea8dc4b6Seschrock * ENA. 84ea8dc4b6Seschrock * 85ea8dc4b6Seschrock * We keep track of the ENA for a ZIO chain through the 'io_logical' member. 86ea8dc4b6Seschrock * When a new logical I/O is issued, we set this to point to itself. Child I/Os 87ea8dc4b6Seschrock * then inherit this pointer, so that when it is first set subsequent failures 88ea8dc4b6Seschrock * will use the same ENA. If a physical I/O is issued (by passing the 89ea8dc4b6Seschrock * ZIO_FLAG_NOBOOKMARK flag), then this pointer is reset, guaranteeing that a 90ea8dc4b6Seschrock * unique ENA will be generated. For an aggregate I/O, this pointer is set to 91ea8dc4b6Seschrock * NULL, and no ereport will be generated (since it doesn't actually correspond 92ea8dc4b6Seschrock * to any particular device or piece of data). 93ea8dc4b6Seschrock */ 94ea8dc4b6Seschrock void 95ea8dc4b6Seschrock zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, 96ea8dc4b6Seschrock uint64_t stateoroffset, uint64_t size) 97ea8dc4b6Seschrock { 98ea8dc4b6Seschrock #ifdef _KERNEL 99ea8dc4b6Seschrock nvlist_t *ereport, *detector; 100ea8dc4b6Seschrock uint64_t ena; 101ea8dc4b6Seschrock char class[64]; 102ea8dc4b6Seschrock 103ea8dc4b6Seschrock /* 104ea8dc4b6Seschrock * If we are doing a spa_tryimport(), ignore errors. 105ea8dc4b6Seschrock */ 106ea8dc4b6Seschrock if (spa->spa_load_state == SPA_LOAD_TRYIMPORT) 107ea8dc4b6Seschrock return; 108ea8dc4b6Seschrock 109ea8dc4b6Seschrock /* 110ea8dc4b6Seschrock * If we are in the middle of opening a pool, and the previous attempt 111ea8dc4b6Seschrock * failed, don't bother logging any new ereports - we're just going to 112ea8dc4b6Seschrock * get the same diagnosis anyway. 113ea8dc4b6Seschrock */ 114ea8dc4b6Seschrock if (spa->spa_load_state != SPA_LOAD_NONE && 115ea8dc4b6Seschrock spa->spa_last_open_failed) 116ea8dc4b6Seschrock return; 117ea8dc4b6Seschrock 118ea8dc4b6Seschrock /* 119ea8dc4b6Seschrock * Ignore any errors from I/Os that we are going to retry anyway - we 120ea8dc4b6Seschrock * only generate errors from the final failure. 121ea8dc4b6Seschrock */ 122ea8dc4b6Seschrock if (zio && zio_should_retry(zio)) 123ea8dc4b6Seschrock return; 124ea8dc4b6Seschrock 125*b468a217Seschrock /* 126*b468a217Seschrock * If this is not a read or write zio, ignore the error. This can occur 127*b468a217Seschrock * if the DKIOCFLUSHWRITECACHE ioctl fails. 128*b468a217Seschrock */ 129*b468a217Seschrock if (zio && zio->io_type != ZIO_TYPE_READ && 130*b468a217Seschrock zio->io_type != ZIO_TYPE_WRITE) 131*b468a217Seschrock return; 132*b468a217Seschrock 133ea8dc4b6Seschrock if ((ereport = fm_nvlist_create(NULL)) == NULL) 134ea8dc4b6Seschrock return; 135ea8dc4b6Seschrock 136ea8dc4b6Seschrock if ((detector = fm_nvlist_create(NULL)) == NULL) { 137ea8dc4b6Seschrock fm_nvlist_destroy(ereport, FM_NVA_FREE); 138ea8dc4b6Seschrock return; 139ea8dc4b6Seschrock } 140ea8dc4b6Seschrock 141ea8dc4b6Seschrock /* 142ea8dc4b6Seschrock * Serialize ereport generation 143ea8dc4b6Seschrock */ 144ea8dc4b6Seschrock mutex_enter(&spa->spa_errlist_lock); 145ea8dc4b6Seschrock 146ea8dc4b6Seschrock /* 147ea8dc4b6Seschrock * Determine the ENA to use for this event. If we are in a loading 148ea8dc4b6Seschrock * state, use a SPA-wide ENA. Otherwise, if we are in an I/O state, use 149ea8dc4b6Seschrock * a root zio-wide ENA. Otherwise, simply use a unique ENA. 150ea8dc4b6Seschrock */ 151ea8dc4b6Seschrock if (spa->spa_load_state != SPA_LOAD_NONE) { 152ea8dc4b6Seschrock if (spa->spa_ena == 0) 153ea8dc4b6Seschrock spa->spa_ena = fm_ena_generate(0, FM_ENA_FMT1); 154ea8dc4b6Seschrock ena = spa->spa_ena; 155ea8dc4b6Seschrock } else if (zio != NULL && zio->io_logical != NULL) { 156ea8dc4b6Seschrock if (zio->io_logical->io_ena == 0) 157ea8dc4b6Seschrock zio->io_logical->io_ena = 158ea8dc4b6Seschrock fm_ena_generate(0, FM_ENA_FMT1); 159ea8dc4b6Seschrock ena = zio->io_logical->io_ena; 160ea8dc4b6Seschrock } else { 161ea8dc4b6Seschrock ena = fm_ena_generate(0, FM_ENA_FMT1); 162ea8dc4b6Seschrock } 163ea8dc4b6Seschrock 164ea8dc4b6Seschrock /* 165ea8dc4b6Seschrock * Construct the full class, detector, and other standard FMA fields. 166ea8dc4b6Seschrock */ 167ea8dc4b6Seschrock (void) snprintf(class, sizeof (class), "%s.%s", 168ea8dc4b6Seschrock ZFS_ERROR_CLASS, subclass); 169ea8dc4b6Seschrock 170ea8dc4b6Seschrock fm_fmri_zfs_set(detector, FM_ZFS_SCHEME_VERSION, spa_guid(spa), 171ea8dc4b6Seschrock vd != NULL ? vd->vdev_guid : 0); 172ea8dc4b6Seschrock 173ea8dc4b6Seschrock fm_ereport_set(ereport, FM_EREPORT_VERSION, class, ena, detector, NULL); 174ea8dc4b6Seschrock 175ea8dc4b6Seschrock /* 176ea8dc4b6Seschrock * Construct the per-ereport payload, depending on which parameters are 177ea8dc4b6Seschrock * passed in. 178ea8dc4b6Seschrock */ 179ea8dc4b6Seschrock 180ea8dc4b6Seschrock /* 181ea8dc4b6Seschrock * Generic payload members common to all ereports. 182ea8dc4b6Seschrock * 183ea8dc4b6Seschrock * The direct reference to spa_name is used rather than spa_name() 184ea8dc4b6Seschrock * because of the asynchronous nature of the zio pipeline. spa_name() 185ea8dc4b6Seschrock * asserts that the config lock is held in some form. This is always 186ea8dc4b6Seschrock * the case in I/O context, but because the check for RW_WRITER compares 187ea8dc4b6Seschrock * against 'curthread', we may be in an asynchronous context and blow 188ea8dc4b6Seschrock * this assert. Rather than loosen this assert, we acknowledge that all 189ea8dc4b6Seschrock * contexts in which this function is called (pool open, I/O) are safe, 190ea8dc4b6Seschrock * and dereference the name directly. 191ea8dc4b6Seschrock */ 192ea8dc4b6Seschrock fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL, 193ea8dc4b6Seschrock DATA_TYPE_STRING, spa->spa_name, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 194ea8dc4b6Seschrock DATA_TYPE_UINT64, spa_guid(spa), 195ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32, 196ea8dc4b6Seschrock spa->spa_load_state, NULL); 197ea8dc4b6Seschrock 198ea8dc4b6Seschrock if (vd != NULL) { 199ea8dc4b6Seschrock vdev_t *pvd = vd->vdev_parent; 200ea8dc4b6Seschrock 201ea8dc4b6Seschrock fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, 202ea8dc4b6Seschrock DATA_TYPE_UINT64, vd->vdev_guid, 203ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, 204ea8dc4b6Seschrock DATA_TYPE_STRING, vd->vdev_ops->vdev_op_type, NULL); 205ea8dc4b6Seschrock if (vd->vdev_path) 206ea8dc4b6Seschrock fm_payload_set(ereport, 207ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH, 208ea8dc4b6Seschrock DATA_TYPE_STRING, vd->vdev_path, NULL); 209ea8dc4b6Seschrock if (vd->vdev_devid) 210ea8dc4b6Seschrock fm_payload_set(ereport, 211ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID, 212ea8dc4b6Seschrock DATA_TYPE_STRING, vd->vdev_devid, NULL); 213ea8dc4b6Seschrock 214ea8dc4b6Seschrock if (pvd != NULL) { 215ea8dc4b6Seschrock fm_payload_set(ereport, 216ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_PARENT_GUID, 217ea8dc4b6Seschrock DATA_TYPE_UINT64, pvd->vdev_guid, 218ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_PARENT_TYPE, 219ea8dc4b6Seschrock DATA_TYPE_STRING, pvd->vdev_ops->vdev_op_type, 220ea8dc4b6Seschrock NULL); 221ea8dc4b6Seschrock if (pvd->vdev_path) 222ea8dc4b6Seschrock fm_payload_set(ereport, 223ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_PARENT_PATH, 224ea8dc4b6Seschrock DATA_TYPE_STRING, vd->vdev_path, NULL); 225ea8dc4b6Seschrock if (pvd->vdev_devid) 226ea8dc4b6Seschrock fm_payload_set(ereport, 227ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID, 228ea8dc4b6Seschrock DATA_TYPE_STRING, pvd->vdev_devid, NULL); 229ea8dc4b6Seschrock } 230ea8dc4b6Seschrock } 231ea8dc4b6Seschrock 232ea8dc4b6Seschrock if (zio != NULL) { 233ea8dc4b6Seschrock /* 234ea8dc4b6Seschrock * Payload common to all I/Os. 235ea8dc4b6Seschrock */ 236ea8dc4b6Seschrock fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_ERR, 237ea8dc4b6Seschrock DATA_TYPE_INT32, zio->io_error, NULL); 238ea8dc4b6Seschrock 239ea8dc4b6Seschrock /* 240ea8dc4b6Seschrock * If the 'size' parameter is non-zero, it indicates this is a 241ea8dc4b6Seschrock * RAID-Z or other I/O where the physical offset and length are 242ea8dc4b6Seschrock * provided for us, instead of within the zio_t. 243ea8dc4b6Seschrock */ 244ea8dc4b6Seschrock if (vd != NULL) { 245ea8dc4b6Seschrock if (size) 246ea8dc4b6Seschrock fm_payload_set(ereport, 247ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET, 248ea8dc4b6Seschrock DATA_TYPE_UINT64, stateoroffset, 249ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE, 250ea8dc4b6Seschrock DATA_TYPE_UINT64, size); 251ea8dc4b6Seschrock else 252ea8dc4b6Seschrock fm_payload_set(ereport, 253ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_ZIO_OFFSET, 254ea8dc4b6Seschrock DATA_TYPE_UINT64, zio->io_offset, 255ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_ZIO_SIZE, 256ea8dc4b6Seschrock DATA_TYPE_UINT64, zio->io_size); 257ea8dc4b6Seschrock } 258ea8dc4b6Seschrock 259ea8dc4b6Seschrock /* 260ea8dc4b6Seschrock * Payload for I/Os with corresponding logical information. 261ea8dc4b6Seschrock */ 262ea8dc4b6Seschrock if (zio->io_logical != NULL) 263ea8dc4b6Seschrock fm_payload_set(ereport, 264ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJSET, 265ea8dc4b6Seschrock DATA_TYPE_UINT64, 266ea8dc4b6Seschrock zio->io_logical->io_bookmark.zb_objset, 267ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_ZIO_OBJECT, 268ea8dc4b6Seschrock DATA_TYPE_UINT64, 269ea8dc4b6Seschrock zio->io_logical->io_bookmark.zb_object, 270ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_ZIO_LEVEL, 271ea8dc4b6Seschrock DATA_TYPE_INT32, 272ea8dc4b6Seschrock zio->io_logical->io_bookmark.zb_level, 273ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_ZIO_BLKID, 274ea8dc4b6Seschrock DATA_TYPE_UINT64, 275ea8dc4b6Seschrock zio->io_logical->io_bookmark.zb_blkid); 276ea8dc4b6Seschrock } else if (vd != NULL) { 277ea8dc4b6Seschrock /* 278ea8dc4b6Seschrock * If we have a vdev but no zio, this is a device fault, and the 279ea8dc4b6Seschrock * 'stateoroffset' parameter indicates the previous state of the 280ea8dc4b6Seschrock * vdev. 281ea8dc4b6Seschrock */ 282ea8dc4b6Seschrock fm_payload_set(ereport, 283ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_PREV_STATE, 284ea8dc4b6Seschrock DATA_TYPE_UINT64, stateoroffset, NULL); 285ea8dc4b6Seschrock } 286ea8dc4b6Seschrock mutex_exit(&spa->spa_errlist_lock); 287ea8dc4b6Seschrock 288ea8dc4b6Seschrock fm_ereport_post(ereport, EVCH_SLEEP); 289ea8dc4b6Seschrock 290ea8dc4b6Seschrock fm_nvlist_destroy(ereport, FM_NVA_FREE); 291ea8dc4b6Seschrock fm_nvlist_destroy(detector, FM_NVA_FREE); 292ea8dc4b6Seschrock #endif 293ea8dc4b6Seschrock } 294ea8dc4b6Seschrock 295ea8dc4b6Seschrock /* 296ea8dc4b6Seschrock * The 'resource.fs.zfs.ok' event is an internal signal that the associated 297ea8dc4b6Seschrock * resource (pool or disk) has been identified by ZFS as healthy. This will 298ea8dc4b6Seschrock * then trigger the DE to close the associated case, if any. 299ea8dc4b6Seschrock */ 300ea8dc4b6Seschrock void 301ea8dc4b6Seschrock zfs_post_ok(spa_t *spa, vdev_t *vd) 302ea8dc4b6Seschrock { 303ea8dc4b6Seschrock #ifdef _KERNEL 304ea8dc4b6Seschrock nvlist_t *resource; 305ea8dc4b6Seschrock char class[64]; 306ea8dc4b6Seschrock 307ea8dc4b6Seschrock if ((resource = fm_nvlist_create(NULL)) == NULL) 308ea8dc4b6Seschrock return; 309ea8dc4b6Seschrock 310ea8dc4b6Seschrock (void) snprintf(class, sizeof (class), "%s.%s.%s", FM_RSRC_RESOURCE, 311ea8dc4b6Seschrock ZFS_ERROR_CLASS, FM_RESOURCE_OK); 312ea8dc4b6Seschrock VERIFY(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION) == 0); 313ea8dc4b6Seschrock VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0); 314ea8dc4b6Seschrock VERIFY(nvlist_add_uint64(resource, 315ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0); 316ea8dc4b6Seschrock if (vd) 317ea8dc4b6Seschrock VERIFY(nvlist_add_uint64(resource, 318ea8dc4b6Seschrock FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0); 319ea8dc4b6Seschrock 320ea8dc4b6Seschrock fm_ereport_post(resource, EVCH_SLEEP); 321ea8dc4b6Seschrock 322ea8dc4b6Seschrock fm_nvlist_destroy(resource, FM_NVA_FREE); 323ea8dc4b6Seschrock #endif 324ea8dc4b6Seschrock } 325