xref: /illumos-gate/usr/src/uts/common/fs/zfs/vdev_disk.c (revision dcba9f3f)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5bef6b7d2Swebaker  * Common Development and Distribution License (the "License").
6bef6b7d2Swebaker  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
22*dcba9f3fSGeorge Wilson  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #include <sys/zfs_context.h>
27*dcba9f3fSGeorge Wilson #include <sys/spa_impl.h>
28e7cbe64fSgw #include <sys/refcount.h>
29fa9e4066Sahrens #include <sys/vdev_disk.h>
30fa9e4066Sahrens #include <sys/vdev_impl.h>
31fa9e4066Sahrens #include <sys/fs/zfs.h>
32fa9e4066Sahrens #include <sys/zio.h>
33afefbcddSeschrock #include <sys/sunldi.h>
3451ece835Seschrock #include <sys/fm/fs/zfs.h>
35fa9e4066Sahrens 
36fa9e4066Sahrens /*
37fa9e4066Sahrens  * Virtual device vector for disks.
38fa9e4066Sahrens  */
39fa9e4066Sahrens 
40fa9e4066Sahrens extern ldi_ident_t zfs_li;
41fa9e4066Sahrens 
42fa9e4066Sahrens typedef struct vdev_disk_buf {
43fa9e4066Sahrens 	buf_t	vdb_buf;
44fa9e4066Sahrens 	zio_t	*vdb_io;
45fa9e4066Sahrens } vdev_disk_buf_t;
46fa9e4066Sahrens 
47*dcba9f3fSGeorge Wilson static void
48*dcba9f3fSGeorge Wilson vdev_disk_hold(vdev_t *vd)
49*dcba9f3fSGeorge Wilson {
50*dcba9f3fSGeorge Wilson 	ddi_devid_t devid;
51*dcba9f3fSGeorge Wilson 	char *minor;
52*dcba9f3fSGeorge Wilson 
53*dcba9f3fSGeorge Wilson 	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
54*dcba9f3fSGeorge Wilson 
55*dcba9f3fSGeorge Wilson 	/*
56*dcba9f3fSGeorge Wilson 	 * We must have a pathname, and it must be absolute.
57*dcba9f3fSGeorge Wilson 	 */
58*dcba9f3fSGeorge Wilson 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/')
59*dcba9f3fSGeorge Wilson 		return;
60*dcba9f3fSGeorge Wilson 
61*dcba9f3fSGeorge Wilson 	/*
62*dcba9f3fSGeorge Wilson 	 * Only prefetch path and devid info if the device has
63*dcba9f3fSGeorge Wilson 	 * never been opened.
64*dcba9f3fSGeorge Wilson 	 */
65*dcba9f3fSGeorge Wilson 	if (vd->vdev_tsd != NULL)
66*dcba9f3fSGeorge Wilson 		return;
67*dcba9f3fSGeorge Wilson 
68*dcba9f3fSGeorge Wilson 	if (vd->vdev_wholedisk == -1ULL) {
69*dcba9f3fSGeorge Wilson 		size_t len = strlen(vd->vdev_path) + 3;
70*dcba9f3fSGeorge Wilson 		char *buf = kmem_alloc(len, KM_SLEEP);
71*dcba9f3fSGeorge Wilson 
72*dcba9f3fSGeorge Wilson 		(void) snprintf(buf, len, "%ss0", vd->vdev_path);
73*dcba9f3fSGeorge Wilson 
74*dcba9f3fSGeorge Wilson 		(void) ldi_vp_from_name(buf, &vd->vdev_name_vp);
75*dcba9f3fSGeorge Wilson 		kmem_free(buf, len);
76*dcba9f3fSGeorge Wilson 	}
77*dcba9f3fSGeorge Wilson 
78*dcba9f3fSGeorge Wilson 	if (vd->vdev_name_vp == NULL)
79*dcba9f3fSGeorge Wilson 		(void) ldi_vp_from_name(vd->vdev_path, &vd->vdev_name_vp);
80*dcba9f3fSGeorge Wilson 
81*dcba9f3fSGeorge Wilson 	if (vd->vdev_devid != NULL &&
82*dcba9f3fSGeorge Wilson 	    ddi_devid_str_decode(vd->vdev_devid, &devid, &minor) == 0) {
83*dcba9f3fSGeorge Wilson 		(void) ldi_vp_from_devid(devid, minor, &vd->vdev_devid_vp);
84*dcba9f3fSGeorge Wilson 		ddi_devid_str_free(minor);
85*dcba9f3fSGeorge Wilson 		ddi_devid_free(devid);
86*dcba9f3fSGeorge Wilson 	}
87*dcba9f3fSGeorge Wilson }
88*dcba9f3fSGeorge Wilson 
89*dcba9f3fSGeorge Wilson static void
90*dcba9f3fSGeorge Wilson vdev_disk_rele(vdev_t *vd)
91*dcba9f3fSGeorge Wilson {
92*dcba9f3fSGeorge Wilson 	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
93*dcba9f3fSGeorge Wilson 
94*dcba9f3fSGeorge Wilson 	if (vd->vdev_name_vp) {
95*dcba9f3fSGeorge Wilson 		VN_RELE_ASYNC(vd->vdev_name_vp,
96*dcba9f3fSGeorge Wilson 		    dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
97*dcba9f3fSGeorge Wilson 		vd->vdev_name_vp = NULL;
98*dcba9f3fSGeorge Wilson 	}
99*dcba9f3fSGeorge Wilson 	if (vd->vdev_devid_vp) {
100*dcba9f3fSGeorge Wilson 		VN_RELE_ASYNC(vd->vdev_devid_vp,
101*dcba9f3fSGeorge Wilson 		    dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
102*dcba9f3fSGeorge Wilson 		vd->vdev_devid_vp = NULL;
103*dcba9f3fSGeorge Wilson 	}
104*dcba9f3fSGeorge Wilson }
105*dcba9f3fSGeorge Wilson 
106fa9e4066Sahrens static int
107e14bb325SJeff Bonwick vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
108fa9e4066Sahrens {
1098ad4d6ddSJeff Bonwick 	spa_t *spa = vd->vdev_spa;
110fa9e4066Sahrens 	vdev_disk_t *dvd;
111e14bb325SJeff Bonwick 	struct dk_minfo dkm;
1120a4e9518Sgw 	int error;
113e14bb325SJeff Bonwick 	dev_t dev;
114e14bb325SJeff Bonwick 	int otyp;
115fa9e4066Sahrens 
116fa9e4066Sahrens 	/*
117fa9e4066Sahrens 	 * We must have a pathname, and it must be absolute.
118fa9e4066Sahrens 	 */
119fa9e4066Sahrens 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
120fa9e4066Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
121fa9e4066Sahrens 		return (EINVAL);
122fa9e4066Sahrens 	}
123fa9e4066Sahrens 
124095bcd66SGeorge Wilson 	/*
125095bcd66SGeorge Wilson 	 * Reopen the device if it's not currently open. Otherwise,
126095bcd66SGeorge Wilson 	 * just update the physical size of the device.
127095bcd66SGeorge Wilson 	 */
128095bcd66SGeorge Wilson 	if (vd->vdev_tsd != NULL) {
129095bcd66SGeorge Wilson 		ASSERT(vd->vdev_reopening);
130095bcd66SGeorge Wilson 		dvd = vd->vdev_tsd;
131095bcd66SGeorge Wilson 		goto skip_open;
132095bcd66SGeorge Wilson 	}
133095bcd66SGeorge Wilson 
134fa9e4066Sahrens 	dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
135fa9e4066Sahrens 
136fa9e4066Sahrens 	/*
137fa9e4066Sahrens 	 * When opening a disk device, we want to preserve the user's original
138fa9e4066Sahrens 	 * intent.  We always want to open the device by the path the user gave
139fa9e4066Sahrens 	 * us, even if it is one of multiple paths to the save device.  But we
140fa9e4066Sahrens 	 * also want to be able to survive disks being removed/recabled.
141fa9e4066Sahrens 	 * Therefore the sequence of opening devices is:
142fa9e4066Sahrens 	 *
143afefbcddSeschrock 	 * 1. Try opening the device by path.  For legacy pools without the
144afefbcddSeschrock 	 *    'whole_disk' property, attempt to fix the path by appending 's0'.
145fa9e4066Sahrens 	 *
146fa9e4066Sahrens 	 * 2. If the devid of the device matches the stored value, return
147fa9e4066Sahrens 	 *    success.
148fa9e4066Sahrens 	 *
149fa9e4066Sahrens 	 * 3. Otherwise, the device may have moved.  Try opening the device
150fa9e4066Sahrens 	 *    by the devid instead.
151fa9e4066Sahrens 	 */
152fa9e4066Sahrens 	if (vd->vdev_devid != NULL) {
153fa9e4066Sahrens 		if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
154fa9e4066Sahrens 		    &dvd->vd_minor) != 0) {
155fa9e4066Sahrens 			vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
156fa9e4066Sahrens 			return (EINVAL);
157fa9e4066Sahrens 		}
158fa9e4066Sahrens 	}
159fa9e4066Sahrens 
160fa9e4066Sahrens 	error = EINVAL;		/* presume failure */
161fa9e4066Sahrens 
162095bcd66SGeorge Wilson 	if (vd->vdev_path != NULL) {
163fa9e4066Sahrens 		ddi_devid_t devid;
164fa9e4066Sahrens 
165afefbcddSeschrock 		if (vd->vdev_wholedisk == -1ULL) {
166afefbcddSeschrock 			size_t len = strlen(vd->vdev_path) + 3;
167afefbcddSeschrock 			char *buf = kmem_alloc(len, KM_SLEEP);
168afefbcddSeschrock 			ldi_handle_t lh;
169afefbcddSeschrock 
170afefbcddSeschrock 			(void) snprintf(buf, len, "%ss0", vd->vdev_path);
171afefbcddSeschrock 
1728ad4d6ddSJeff Bonwick 			if (ldi_open_by_name(buf, spa_mode(spa), kcred,
173afefbcddSeschrock 			    &lh, zfs_li) == 0) {
174afefbcddSeschrock 				spa_strfree(vd->vdev_path);
175afefbcddSeschrock 				vd->vdev_path = buf;
176afefbcddSeschrock 				vd->vdev_wholedisk = 1ULL;
1778ad4d6ddSJeff Bonwick 				(void) ldi_close(lh, spa_mode(spa), kcred);
178afefbcddSeschrock 			} else {
179afefbcddSeschrock 				kmem_free(buf, len);
180afefbcddSeschrock 			}
181afefbcddSeschrock 		}
182fa9e4066Sahrens 
1838ad4d6ddSJeff Bonwick 		error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred,
184afefbcddSeschrock 		    &dvd->vd_lh, zfs_li);
185fa9e4066Sahrens 
186fa9e4066Sahrens 		/*
187fa9e4066Sahrens 		 * Compare the devid to the stored value.
188fa9e4066Sahrens 		 */
189fa9e4066Sahrens 		if (error == 0 && vd->vdev_devid != NULL &&
190fa9e4066Sahrens 		    ldi_get_devid(dvd->vd_lh, &devid) == 0) {
191fa9e4066Sahrens 			if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
192fa9e4066Sahrens 				error = EINVAL;
1938ad4d6ddSJeff Bonwick 				(void) ldi_close(dvd->vd_lh, spa_mode(spa),
1948ad4d6ddSJeff Bonwick 				    kcred);
195fa9e4066Sahrens 				dvd->vd_lh = NULL;
196fa9e4066Sahrens 			}
197fa9e4066Sahrens 			ddi_devid_free(devid);
198fa9e4066Sahrens 		}
199afefbcddSeschrock 
200afefbcddSeschrock 		/*
201afefbcddSeschrock 		 * If we succeeded in opening the device, but 'vdev_wholedisk'
202afefbcddSeschrock 		 * is not yet set, then this must be a slice.
203afefbcddSeschrock 		 */
204afefbcddSeschrock 		if (error == 0 && vd->vdev_wholedisk == -1ULL)
205afefbcddSeschrock 			vd->vdev_wholedisk = 0;
206fa9e4066Sahrens 	}
207fa9e4066Sahrens 
208fa9e4066Sahrens 	/*
209fa9e4066Sahrens 	 * If we were unable to open by path, or the devid check fails, open by
210fa9e4066Sahrens 	 * devid instead.
211fa9e4066Sahrens 	 */
212fa9e4066Sahrens 	if (error != 0 && vd->vdev_devid != NULL)
213fa9e4066Sahrens 		error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
2148ad4d6ddSJeff Bonwick 		    spa_mode(spa), kcred, &dvd->vd_lh, zfs_li);
215fa9e4066Sahrens 
2163d7072f8Seschrock 	/*
2173d7072f8Seschrock 	 * If all else fails, then try opening by physical path (if available)
2183d7072f8Seschrock 	 * or the logical path (if we failed due to the devid check).  While not
2193d7072f8Seschrock 	 * as reliable as the devid, this will give us something, and the higher
2203d7072f8Seschrock 	 * level vdev validation will prevent us from opening the wrong device.
2213d7072f8Seschrock 	 */
2223d7072f8Seschrock 	if (error) {
2233d7072f8Seschrock 		if (vd->vdev_physpath != NULL &&
224deb8317bSMark J Musante 		    (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV)
2258ad4d6ddSJeff Bonwick 			error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa),
2263d7072f8Seschrock 			    kcred, &dvd->vd_lh, zfs_li);
2273d7072f8Seschrock 
2283d7072f8Seschrock 		/*
2293d7072f8Seschrock 		 * Note that we don't support the legacy auto-wholedisk support
2303d7072f8Seschrock 		 * as above.  This hasn't been used in a very long time and we
2313d7072f8Seschrock 		 * don't need to propagate its oddities to this edge condition.
2323d7072f8Seschrock 		 */
233095bcd66SGeorge Wilson 		if (error && vd->vdev_path != NULL)
2348ad4d6ddSJeff Bonwick 			error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
2358ad4d6ddSJeff Bonwick 			    kcred, &dvd->vd_lh, zfs_li);
2363d7072f8Seschrock 	}
2373d7072f8Seschrock 
238e14bb325SJeff Bonwick 	if (error) {
239fa9e4066Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
240fa9e4066Sahrens 		return (error);
241e14bb325SJeff Bonwick 	}
242fa9e4066Sahrens 
2433d7072f8Seschrock 	/*
2443d7072f8Seschrock 	 * Once a device is opened, verify that the physical device path (if
2453d7072f8Seschrock 	 * available) is up to date.
2463d7072f8Seschrock 	 */
2473d7072f8Seschrock 	if (ldi_get_dev(dvd->vd_lh, &dev) == 0 &&
2483d7072f8Seschrock 	    ldi_get_otyp(dvd->vd_lh, &otyp) == 0) {
2490a4e9518Sgw 		char *physpath, *minorname;
2500a4e9518Sgw 
2513d7072f8Seschrock 		physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2523d7072f8Seschrock 		minorname = NULL;
2533d7072f8Seschrock 		if (ddi_dev_pathname(dev, otyp, physpath) == 0 &&
2543d7072f8Seschrock 		    ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 &&
2553d7072f8Seschrock 		    (vd->vdev_physpath == NULL ||
2563d7072f8Seschrock 		    strcmp(vd->vdev_physpath, physpath) != 0)) {
2573d7072f8Seschrock 			if (vd->vdev_physpath)
2583d7072f8Seschrock 				spa_strfree(vd->vdev_physpath);
2593d7072f8Seschrock 			(void) strlcat(physpath, ":", MAXPATHLEN);
2603d7072f8Seschrock 			(void) strlcat(physpath, minorname, MAXPATHLEN);
2613d7072f8Seschrock 			vd->vdev_physpath = spa_strdup(physpath);
2623d7072f8Seschrock 		}
2633d7072f8Seschrock 		if (minorname)
2643d7072f8Seschrock 			kmem_free(minorname, strlen(minorname) + 1);
2653d7072f8Seschrock 		kmem_free(physpath, MAXPATHLEN);
2663d7072f8Seschrock 	}
2673d7072f8Seschrock 
268095bcd66SGeorge Wilson skip_open:
269fa9e4066Sahrens 	/*
270fa9e4066Sahrens 	 * Determine the actual size of the device.
271fa9e4066Sahrens 	 */
272fa9e4066Sahrens 	if (ldi_get_size(dvd->vd_lh, psize) != 0) {
273fa9e4066Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
274fa9e4066Sahrens 		return (EINVAL);
275fa9e4066Sahrens 	}
276fa9e4066Sahrens 
277ecc2d604Sbonwick 	/*
278ecc2d604Sbonwick 	 * If we own the whole disk, try to enable disk write caching.
279ecc2d604Sbonwick 	 * We ignore errors because it's OK if we can't do it.
280ecc2d604Sbonwick 	 */
281bef6b7d2Swebaker 	if (vd->vdev_wholedisk == 1) {
282ecc2d604Sbonwick 		int wce = 1;
283ecc2d604Sbonwick 		(void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
284ecc2d604Sbonwick 		    FKIOCTL, kcred, NULL);
285ecc2d604Sbonwick 	}
286bef6b7d2Swebaker 
287ecc2d604Sbonwick 	/*
288ecc2d604Sbonwick 	 * Determine the device's minimum transfer size.
289ecc2d604Sbonwick 	 * If the ioctl isn't supported, assume DEV_BSIZE.
290ecc2d604Sbonwick 	 */
291ecc2d604Sbonwick 	if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, (intptr_t)&dkm,
292ecc2d604Sbonwick 	    FKIOCTL, kcred, NULL) != 0)
293ecc2d604Sbonwick 		dkm.dki_lbsize = DEV_BSIZE;
294bef6b7d2Swebaker 
295ecc2d604Sbonwick 	*ashift = highbit(MAX(dkm.dki_lbsize, SPA_MINBLOCKSIZE)) - 1;
296bef6b7d2Swebaker 
297b468a217Seschrock 	/*
298b468a217Seschrock 	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
299b468a217Seschrock 	 * try again.
300b468a217Seschrock 	 */
301b468a217Seschrock 	vd->vdev_nowritecache = B_FALSE;
302b468a217Seschrock 
303fa9e4066Sahrens 	return (0);
304fa9e4066Sahrens }
305fa9e4066Sahrens 
306fa9e4066Sahrens static void
307fa9e4066Sahrens vdev_disk_close(vdev_t *vd)
308fa9e4066Sahrens {
309fa9e4066Sahrens 	vdev_disk_t *dvd = vd->vdev_tsd;
310fa9e4066Sahrens 
311095bcd66SGeorge Wilson 	if (vd->vdev_reopening || dvd == NULL)
312fa9e4066Sahrens 		return;
313fa9e4066Sahrens 
314fa9e4066Sahrens 	if (dvd->vd_minor != NULL)
315fa9e4066Sahrens 		ddi_devid_str_free(dvd->vd_minor);
316fa9e4066Sahrens 
317fa9e4066Sahrens 	if (dvd->vd_devid != NULL)
318fa9e4066Sahrens 		ddi_devid_free(dvd->vd_devid);
319fa9e4066Sahrens 
320fa9e4066Sahrens 	if (dvd->vd_lh != NULL)
3218ad4d6ddSJeff Bonwick 		(void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
322fa9e4066Sahrens 
323fa9e4066Sahrens 	kmem_free(dvd, sizeof (vdev_disk_t));
324fa9e4066Sahrens 	vd->vdev_tsd = NULL;
325fa9e4066Sahrens }
326fa9e4066Sahrens 
327e7cbe64fSgw int
328e7cbe64fSgw vdev_disk_physio(ldi_handle_t vd_lh, caddr_t data, size_t size,
329e7cbe64fSgw     uint64_t offset, int flags)
330e7cbe64fSgw {
331e7cbe64fSgw 	buf_t *bp;
332e7cbe64fSgw 	int error = 0;
333e7cbe64fSgw 
334e7cbe64fSgw 	if (vd_lh == NULL)
335e7cbe64fSgw 		return (EINVAL);
336e7cbe64fSgw 
337e7cbe64fSgw 	ASSERT(flags & B_READ || flags & B_WRITE);
338e7cbe64fSgw 
339e7cbe64fSgw 	bp = getrbuf(KM_SLEEP);
340e7cbe64fSgw 	bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST;
341e7cbe64fSgw 	bp->b_bcount = size;
342e7cbe64fSgw 	bp->b_un.b_addr = (void *)data;
343e7cbe64fSgw 	bp->b_lblkno = lbtodb(offset);
344e7cbe64fSgw 	bp->b_bufsize = size;
345e7cbe64fSgw 
346e7cbe64fSgw 	error = ldi_strategy(vd_lh, bp);
347e7cbe64fSgw 	ASSERT(error == 0);
348e7cbe64fSgw 	if ((error = biowait(bp)) == 0 && bp->b_resid != 0)
349e7cbe64fSgw 		error = EIO;
350e7cbe64fSgw 	freerbuf(bp);
351e7cbe64fSgw 
352e7cbe64fSgw 	return (error);
353e7cbe64fSgw }
354e7cbe64fSgw 
355fa9e4066Sahrens static void
356fa9e4066Sahrens vdev_disk_io_intr(buf_t *bp)
357fa9e4066Sahrens {
358fa9e4066Sahrens 	vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
359fa9e4066Sahrens 	zio_t *zio = vdb->vdb_io;
360fa9e4066Sahrens 
36151ece835Seschrock 	/*
36251ece835Seschrock 	 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
36351ece835Seschrock 	 * Rather than teach the rest of the stack about other error
36451ece835Seschrock 	 * possibilities (EFAULT, etc), we normalize the error value here.
36551ece835Seschrock 	 */
36651ece835Seschrock 	zio->io_error = (geterror(bp) != 0 ? EIO : 0);
36751ece835Seschrock 
36851ece835Seschrock 	if (zio->io_error == 0 && bp->b_resid != 0)
369fa9e4066Sahrens 		zio->io_error = EIO;
370fa9e4066Sahrens 
371fa9e4066Sahrens 	kmem_free(vdb, sizeof (vdev_disk_buf_t));
372fa9e4066Sahrens 
373e05725b1Sbonwick 	zio_interrupt(zio);
374fa9e4066Sahrens }
375fa9e4066Sahrens 
376f4a72450SJeff Bonwick static void
377f4a72450SJeff Bonwick vdev_disk_ioctl_free(zio_t *zio)
378f4a72450SJeff Bonwick {
379f4a72450SJeff Bonwick 	kmem_free(zio->io_vsd, sizeof (struct dk_callback));
380f4a72450SJeff Bonwick }
381f4a72450SJeff Bonwick 
38222fe2c88SJonathan Adams static const zio_vsd_ops_t vdev_disk_vsd_ops = {
38322fe2c88SJonathan Adams 	vdev_disk_ioctl_free,
38422fe2c88SJonathan Adams 	zio_vsd_default_cksum_report
38522fe2c88SJonathan Adams };
38622fe2c88SJonathan Adams 
387fa9e4066Sahrens static void
388fa9e4066Sahrens vdev_disk_ioctl_done(void *zio_arg, int error)
389fa9e4066Sahrens {
390fa9e4066Sahrens 	zio_t *zio = zio_arg;
391fa9e4066Sahrens 
392fa9e4066Sahrens 	zio->io_error = error;
393fa9e4066Sahrens 
394e05725b1Sbonwick 	zio_interrupt(zio);
395fa9e4066Sahrens }
396fa9e4066Sahrens 
397e05725b1Sbonwick static int
398fa9e4066Sahrens vdev_disk_io_start(zio_t *zio)
399fa9e4066Sahrens {
400fa9e4066Sahrens 	vdev_t *vd = zio->io_vd;
401fa9e4066Sahrens 	vdev_disk_t *dvd = vd->vdev_tsd;
402fa9e4066Sahrens 	vdev_disk_buf_t *vdb;
403e14bb325SJeff Bonwick 	struct dk_callback *dkc;
404fa9e4066Sahrens 	buf_t *bp;
405e14bb325SJeff Bonwick 	int error;
406fa9e4066Sahrens 
407fa9e4066Sahrens 	if (zio->io_type == ZIO_TYPE_IOCTL) {
408fa9e4066Sahrens 		/* XXPOLICY */
4090a4e9518Sgw 		if (!vdev_readable(vd)) {
410fa9e4066Sahrens 			zio->io_error = ENXIO;
411e05725b1Sbonwick 			return (ZIO_PIPELINE_CONTINUE);
412fa9e4066Sahrens 		}
413fa9e4066Sahrens 
414fa9e4066Sahrens 		switch (zio->io_cmd) {
415fa9e4066Sahrens 
416fa9e4066Sahrens 		case DKIOCFLUSHWRITECACHE:
417fa9e4066Sahrens 
418a2eea2e1Sahrens 			if (zfs_nocacheflush)
419a2eea2e1Sahrens 				break;
420a2eea2e1Sahrens 
421b468a217Seschrock 			if (vd->vdev_nowritecache) {
422b468a217Seschrock 				zio->io_error = ENOTSUP;
423b468a217Seschrock 				break;
424b468a217Seschrock 			}
425b468a217Seschrock 
426e14bb325SJeff Bonwick 			zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP);
42722fe2c88SJonathan Adams 			zio->io_vsd_ops = &vdev_disk_vsd_ops;
428e14bb325SJeff Bonwick 
429e14bb325SJeff Bonwick 			dkc->dkc_callback = vdev_disk_ioctl_done;
430e14bb325SJeff Bonwick 			dkc->dkc_flag = FLUSH_VOLATILE;
431e14bb325SJeff Bonwick 			dkc->dkc_cookie = zio;
432fa9e4066Sahrens 
433fa9e4066Sahrens 			error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
434e14bb325SJeff Bonwick 			    (uintptr_t)dkc, FKIOCTL, kcred, NULL);
435fa9e4066Sahrens 
436fa9e4066Sahrens 			if (error == 0) {
437fa9e4066Sahrens 				/*
438fa9e4066Sahrens 				 * The ioctl will be done asychronously,
439fa9e4066Sahrens 				 * and will call vdev_disk_ioctl_done()
440fa9e4066Sahrens 				 * upon completion.
441fa9e4066Sahrens 				 */
442e05725b1Sbonwick 				return (ZIO_PIPELINE_STOP);
443e05725b1Sbonwick 			}
444e05725b1Sbonwick 
445e05725b1Sbonwick 			if (error == ENOTSUP || error == ENOTTY) {
446b468a217Seschrock 				/*
447d5782879Smishra 				 * If we get ENOTSUP or ENOTTY, we know that
448d5782879Smishra 				 * no future attempts will ever succeed.
449d5782879Smishra 				 * In this case we set a persistent bit so
450d5782879Smishra 				 * that we don't bother with the ioctl in the
451d5782879Smishra 				 * future.
452b468a217Seschrock 				 */
453b468a217Seschrock 				vd->vdev_nowritecache = B_TRUE;
454fa9e4066Sahrens 			}
455fa9e4066Sahrens 			zio->io_error = error;
456b468a217Seschrock 
457fa9e4066Sahrens 			break;
458fa9e4066Sahrens 
459fa9e4066Sahrens 		default:
460fa9e4066Sahrens 			zio->io_error = ENOTSUP;
461fa9e4066Sahrens 		}
462fa9e4066Sahrens 
463e05725b1Sbonwick 		return (ZIO_PIPELINE_CONTINUE);
464fa9e4066Sahrens 	}
465fa9e4066Sahrens 
466fa9e4066Sahrens 	vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
467fa9e4066Sahrens 
468fa9e4066Sahrens 	vdb->vdb_io = zio;
469fa9e4066Sahrens 	bp = &vdb->vdb_buf;
470fa9e4066Sahrens 
471fa9e4066Sahrens 	bioinit(bp);
472e14bb325SJeff Bonwick 	bp->b_flags = B_BUSY | B_NOCACHE |
4738956713aSEric Schrock 	    (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
4748956713aSEric Schrock 	if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
4758956713aSEric Schrock 		bp->b_flags |= B_FAILFAST;
476fa9e4066Sahrens 	bp->b_bcount = zio->io_size;
477fa9e4066Sahrens 	bp->b_un.b_addr = zio->io_data;
478fa9e4066Sahrens 	bp->b_lblkno = lbtodb(zio->io_offset);
479fa9e4066Sahrens 	bp->b_bufsize = zio->io_size;
480fa9e4066Sahrens 	bp->b_iodone = (int (*)())vdev_disk_io_intr;
481fa9e4066Sahrens 
482fa9e4066Sahrens 	/* ldi_strategy() will return non-zero only on programming errors */
483e14bb325SJeff Bonwick 	VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0);
484e05725b1Sbonwick 
485e05725b1Sbonwick 	return (ZIO_PIPELINE_STOP);
486fa9e4066Sahrens }
487fa9e4066Sahrens 
488e14bb325SJeff Bonwick static void
489fa9e4066Sahrens vdev_disk_io_done(zio_t *zio)
490fa9e4066Sahrens {
491e14bb325SJeff Bonwick 	vdev_t *vd = zio->io_vd;
492ea8dc4b6Seschrock 
4933d7072f8Seschrock 	/*
4943d7072f8Seschrock 	 * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if
4953d7072f8Seschrock 	 * the device has been removed.  If this is the case, then we trigger an
4960a4e9518Sgw 	 * asynchronous removal of the device. Otherwise, probe the device and
4971f7ad2e1Sgw 	 * make sure it's still accessible.
4983d7072f8Seschrock 	 */
4991d713200SEric Schrock 	if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
5000a4e9518Sgw 		vdev_disk_t *dvd = vd->vdev_tsd;
501e14bb325SJeff Bonwick 		int state = DKIO_NONE;
5020a4e9518Sgw 
503e14bb325SJeff Bonwick 		if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state,
504e14bb325SJeff Bonwick 		    FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) {
5051d713200SEric Schrock 			/*
5061d713200SEric Schrock 			 * We post the resource as soon as possible, instead of
5071d713200SEric Schrock 			 * when the async removal actually happens, because the
5081d713200SEric Schrock 			 * DE is using this information to discard previous I/O
5091d713200SEric Schrock 			 * errors.
5101d713200SEric Schrock 			 */
5111d713200SEric Schrock 			zfs_post_remove(zio->io_spa, vd);
5123d7072f8Seschrock 			vd->vdev_remove_wanted = B_TRUE;
5133d7072f8Seschrock 			spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
5143d7072f8Seschrock 		}
5153d7072f8Seschrock 	}
516fa9e4066Sahrens }
517fa9e4066Sahrens 
518fa9e4066Sahrens vdev_ops_t vdev_disk_ops = {
519fa9e4066Sahrens 	vdev_disk_open,
520fa9e4066Sahrens 	vdev_disk_close,
521fa9e4066Sahrens 	vdev_default_asize,
522fa9e4066Sahrens 	vdev_disk_io_start,
523fa9e4066Sahrens 	vdev_disk_io_done,
524fa9e4066Sahrens 	NULL,
525*dcba9f3fSGeorge Wilson 	vdev_disk_hold,
526*dcba9f3fSGeorge Wilson 	vdev_disk_rele,
527fa9e4066Sahrens 	VDEV_TYPE_DISK,		/* name of this vdev type */
528fa9e4066Sahrens 	B_TRUE			/* leaf vdev */
529fa9e4066Sahrens };
530e7cbe64fSgw 
531e7cbe64fSgw /*
532051aabe6Staylor  * Given the root disk device devid or pathname, read the label from
533051aabe6Staylor  * the device, and construct a configuration nvlist.
534e7cbe64fSgw  */
535f940fbb1SLin Ling int
536f940fbb1SLin Ling vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
537e7cbe64fSgw {
538e7cbe64fSgw 	ldi_handle_t vd_lh;
539e7cbe64fSgw 	vdev_label_t *label;
540e7cbe64fSgw 	uint64_t s, size;
541e7cbe64fSgw 	int l;
542051aabe6Staylor 	ddi_devid_t tmpdevid;
543f4565e39SLin Ling 	int error = -1;
544051aabe6Staylor 	char *minor_name;
545e7cbe64fSgw 
546e7cbe64fSgw 	/*
547e7cbe64fSgw 	 * Read the device label and build the nvlist.
548e7cbe64fSgw 	 */
549f4565e39SLin Ling 	if (devid != NULL && ddi_devid_str_decode(devid, &tmpdevid,
550051aabe6Staylor 	    &minor_name) == 0) {
551051aabe6Staylor 		error = ldi_open_by_devid(tmpdevid, minor_name,
5528ad4d6ddSJeff Bonwick 		    FREAD, kcred, &vd_lh, zfs_li);
553051aabe6Staylor 		ddi_devid_free(tmpdevid);
554051aabe6Staylor 		ddi_devid_str_free(minor_name);
555051aabe6Staylor 	}
556051aabe6Staylor 
557f4565e39SLin Ling 	if (error && (error = ldi_open_by_name(devpath, FREAD, kcred, &vd_lh,
558f4565e39SLin Ling 	    zfs_li)))
559f940fbb1SLin Ling 		return (error);
560e7cbe64fSgw 
561bf82a41bSeschrock 	if (ldi_get_size(vd_lh, &s)) {
562bf82a41bSeschrock 		(void) ldi_close(vd_lh, FREAD, kcred);
563f940fbb1SLin Ling 		return (EIO);
564bf82a41bSeschrock 	}
565e7cbe64fSgw 
566e7cbe64fSgw 	size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t);
567e7cbe64fSgw 	label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP);
568e7cbe64fSgw 
56917f1e64aSEric Taylor 	*config = NULL;
570e7cbe64fSgw 	for (l = 0; l < VDEV_LABELS; l++) {
571e7cbe64fSgw 		uint64_t offset, state, txg = 0;
572e7cbe64fSgw 
573e7cbe64fSgw 		/* read vdev label */
574e7cbe64fSgw 		offset = vdev_label_offset(size, l, 0);
575e7cbe64fSgw 		if (vdev_disk_physio(vd_lh, (caddr_t)label,
5762264ca7fSLin Ling 		    VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, B_READ) != 0)
577e7cbe64fSgw 			continue;
578e7cbe64fSgw 
579e7cbe64fSgw 		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
580f940fbb1SLin Ling 		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) {
581f940fbb1SLin Ling 			*config = NULL;
582e7cbe64fSgw 			continue;
583e7cbe64fSgw 		}
584e7cbe64fSgw 
585f940fbb1SLin Ling 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
586e7cbe64fSgw 		    &state) != 0 || state >= POOL_STATE_DESTROYED) {
587f940fbb1SLin Ling 			nvlist_free(*config);
588f940fbb1SLin Ling 			*config = NULL;
589e7cbe64fSgw 			continue;
590e7cbe64fSgw 		}
591e7cbe64fSgw 
592f940fbb1SLin Ling 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
593e7cbe64fSgw 		    &txg) != 0 || txg == 0) {
594f940fbb1SLin Ling 			nvlist_free(*config);
595f940fbb1SLin Ling 			*config = NULL;
596e7cbe64fSgw 			continue;
597e7cbe64fSgw 		}
598e7cbe64fSgw 
599e7cbe64fSgw 		break;
600e7cbe64fSgw 	}
601e7cbe64fSgw 
602e7cbe64fSgw 	kmem_free(label, sizeof (vdev_label_t));
603bf82a41bSeschrock 	(void) ldi_close(vd_lh, FREAD, kcred);
60417f1e64aSEric Taylor 	if (*config == NULL)
60517f1e64aSEric Taylor 		error = EIDRM;
606bf82a41bSeschrock 
607f940fbb1SLin Ling 	return (error);
608e7cbe64fSgw }
609