xref: /illumos-gate/usr/src/uts/common/fs/zfs/vdev_disk.c (revision 8ad4d6dd)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5bef6b7d2Swebaker  * Common Development and Distribution License (the "License").
6bef6b7d2Swebaker  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
22e7cbe64fSgw  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #include <sys/zfs_context.h>
27fa9e4066Sahrens #include <sys/spa.h>
28e7cbe64fSgw #include <sys/refcount.h>
29fa9e4066Sahrens #include <sys/vdev_disk.h>
30fa9e4066Sahrens #include <sys/vdev_impl.h>
31fa9e4066Sahrens #include <sys/fs/zfs.h>
32fa9e4066Sahrens #include <sys/zio.h>
33afefbcddSeschrock #include <sys/sunldi.h>
3451ece835Seschrock #include <sys/fm/fs/zfs.h>
35fa9e4066Sahrens 
36fa9e4066Sahrens /*
37fa9e4066Sahrens  * Virtual device vector for disks.
38fa9e4066Sahrens  */
39fa9e4066Sahrens 
40fa9e4066Sahrens extern ldi_ident_t zfs_li;
41fa9e4066Sahrens 
42fa9e4066Sahrens typedef struct vdev_disk_buf {
43fa9e4066Sahrens 	buf_t	vdb_buf;
44fa9e4066Sahrens 	zio_t	*vdb_io;
45fa9e4066Sahrens } vdev_disk_buf_t;
46fa9e4066Sahrens 
47fa9e4066Sahrens static int
48e14bb325SJeff Bonwick vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
49fa9e4066Sahrens {
50*8ad4d6ddSJeff Bonwick 	spa_t *spa = vd->vdev_spa;
51fa9e4066Sahrens 	vdev_disk_t *dvd;
52e14bb325SJeff Bonwick 	struct dk_minfo dkm;
530a4e9518Sgw 	int error;
54e14bb325SJeff Bonwick 	dev_t dev;
55e14bb325SJeff Bonwick 	int otyp;
56fa9e4066Sahrens 
57fa9e4066Sahrens 	/*
58fa9e4066Sahrens 	 * We must have a pathname, and it must be absolute.
59fa9e4066Sahrens 	 */
60fa9e4066Sahrens 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
61fa9e4066Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
62fa9e4066Sahrens 		return (EINVAL);
63fa9e4066Sahrens 	}
64fa9e4066Sahrens 
65fa9e4066Sahrens 	dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
66fa9e4066Sahrens 
67fa9e4066Sahrens 	/*
68fa9e4066Sahrens 	 * When opening a disk device, we want to preserve the user's original
69fa9e4066Sahrens 	 * intent.  We always want to open the device by the path the user gave
70fa9e4066Sahrens 	 * us, even if it is one of multiple paths to the save device.  But we
71fa9e4066Sahrens 	 * also want to be able to survive disks being removed/recabled.
72fa9e4066Sahrens 	 * Therefore the sequence of opening devices is:
73fa9e4066Sahrens 	 *
74afefbcddSeschrock 	 * 1. Try opening the device by path.  For legacy pools without the
75afefbcddSeschrock 	 *    'whole_disk' property, attempt to fix the path by appending 's0'.
76fa9e4066Sahrens 	 *
77fa9e4066Sahrens 	 * 2. If the devid of the device matches the stored value, return
78fa9e4066Sahrens 	 *    success.
79fa9e4066Sahrens 	 *
80fa9e4066Sahrens 	 * 3. Otherwise, the device may have moved.  Try opening the device
81fa9e4066Sahrens 	 *    by the devid instead.
82fa9e4066Sahrens 	 *
83bf82a41bSeschrock 	 * If the vdev is part of the root pool, we avoid opening it by path.
84bf82a41bSeschrock 	 * We do this because there is no /dev path available early in boot,
85bf82a41bSeschrock 	 * and if we try to open the device by path at a later point, we can
86bf82a41bSeschrock 	 * deadlock when devfsadm attempts to open the underlying backing store
87bf82a41bSeschrock 	 * file.
88fa9e4066Sahrens 	 */
89fa9e4066Sahrens 	if (vd->vdev_devid != NULL) {
90fa9e4066Sahrens 		if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
91fa9e4066Sahrens 		    &dvd->vd_minor) != 0) {
92fa9e4066Sahrens 			vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
93fa9e4066Sahrens 			return (EINVAL);
94fa9e4066Sahrens 		}
95fa9e4066Sahrens 	}
96fa9e4066Sahrens 
97fa9e4066Sahrens 	error = EINVAL;		/* presume failure */
98fa9e4066Sahrens 
99*8ad4d6ddSJeff Bonwick 	if (vd->vdev_path != NULL && !spa_is_root(spa)) {
100fa9e4066Sahrens 		ddi_devid_t devid;
101fa9e4066Sahrens 
102afefbcddSeschrock 		if (vd->vdev_wholedisk == -1ULL) {
103afefbcddSeschrock 			size_t len = strlen(vd->vdev_path) + 3;
104afefbcddSeschrock 			char *buf = kmem_alloc(len, KM_SLEEP);
105afefbcddSeschrock 			ldi_handle_t lh;
106afefbcddSeschrock 
107afefbcddSeschrock 			(void) snprintf(buf, len, "%ss0", vd->vdev_path);
108afefbcddSeschrock 
109*8ad4d6ddSJeff Bonwick 			if (ldi_open_by_name(buf, spa_mode(spa), kcred,
110afefbcddSeschrock 			    &lh, zfs_li) == 0) {
111afefbcddSeschrock 				spa_strfree(vd->vdev_path);
112afefbcddSeschrock 				vd->vdev_path = buf;
113afefbcddSeschrock 				vd->vdev_wholedisk = 1ULL;
114*8ad4d6ddSJeff Bonwick 				(void) ldi_close(lh, spa_mode(spa), kcred);
115afefbcddSeschrock 			} else {
116afefbcddSeschrock 				kmem_free(buf, len);
117afefbcddSeschrock 			}
118afefbcddSeschrock 		}
119fa9e4066Sahrens 
120*8ad4d6ddSJeff Bonwick 		error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred,
121afefbcddSeschrock 		    &dvd->vd_lh, zfs_li);
122fa9e4066Sahrens 
123fa9e4066Sahrens 		/*
124fa9e4066Sahrens 		 * Compare the devid to the stored value.
125fa9e4066Sahrens 		 */
126fa9e4066Sahrens 		if (error == 0 && vd->vdev_devid != NULL &&
127fa9e4066Sahrens 		    ldi_get_devid(dvd->vd_lh, &devid) == 0) {
128fa9e4066Sahrens 			if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
129fa9e4066Sahrens 				error = EINVAL;
130*8ad4d6ddSJeff Bonwick 				(void) ldi_close(dvd->vd_lh, spa_mode(spa),
131*8ad4d6ddSJeff Bonwick 				    kcred);
132fa9e4066Sahrens 				dvd->vd_lh = NULL;
133fa9e4066Sahrens 			}
134fa9e4066Sahrens 			ddi_devid_free(devid);
135fa9e4066Sahrens 		}
136afefbcddSeschrock 
137afefbcddSeschrock 		/*
138afefbcddSeschrock 		 * If we succeeded in opening the device, but 'vdev_wholedisk'
139afefbcddSeschrock 		 * is not yet set, then this must be a slice.
140afefbcddSeschrock 		 */
141afefbcddSeschrock 		if (error == 0 && vd->vdev_wholedisk == -1ULL)
142afefbcddSeschrock 			vd->vdev_wholedisk = 0;
143fa9e4066Sahrens 	}
144fa9e4066Sahrens 
145fa9e4066Sahrens 	/*
146fa9e4066Sahrens 	 * If we were unable to open by path, or the devid check fails, open by
147fa9e4066Sahrens 	 * devid instead.
148fa9e4066Sahrens 	 */
149fa9e4066Sahrens 	if (error != 0 && vd->vdev_devid != NULL)
150fa9e4066Sahrens 		error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
151*8ad4d6ddSJeff Bonwick 		    spa_mode(spa), kcred, &dvd->vd_lh, zfs_li);
152fa9e4066Sahrens 
1533d7072f8Seschrock 	/*
1543d7072f8Seschrock 	 * If all else fails, then try opening by physical path (if available)
1553d7072f8Seschrock 	 * or the logical path (if we failed due to the devid check).  While not
1563d7072f8Seschrock 	 * as reliable as the devid, this will give us something, and the higher
1573d7072f8Seschrock 	 * level vdev validation will prevent us from opening the wrong device.
1583d7072f8Seschrock 	 */
1593d7072f8Seschrock 	if (error) {
1603d7072f8Seschrock 		if (vd->vdev_physpath != NULL &&
1613d7072f8Seschrock 		    (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != ENODEV)
162*8ad4d6ddSJeff Bonwick 			error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa),
1633d7072f8Seschrock 			    kcred, &dvd->vd_lh, zfs_li);
1643d7072f8Seschrock 
1653d7072f8Seschrock 		/*
1663d7072f8Seschrock 		 * Note that we don't support the legacy auto-wholedisk support
1673d7072f8Seschrock 		 * as above.  This hasn't been used in a very long time and we
1683d7072f8Seschrock 		 * don't need to propagate its oddities to this edge condition.
1693d7072f8Seschrock 		 */
170*8ad4d6ddSJeff Bonwick 		if (error && vd->vdev_path != NULL && !spa_is_root(spa))
171*8ad4d6ddSJeff Bonwick 			error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
172*8ad4d6ddSJeff Bonwick 			    kcred, &dvd->vd_lh, zfs_li);
1733d7072f8Seschrock 	}
1743d7072f8Seschrock 
175e14bb325SJeff Bonwick 	if (error) {
176fa9e4066Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
177fa9e4066Sahrens 		return (error);
178e14bb325SJeff Bonwick 	}
179fa9e4066Sahrens 
1803d7072f8Seschrock 	/*
1813d7072f8Seschrock 	 * Once a device is opened, verify that the physical device path (if
1823d7072f8Seschrock 	 * available) is up to date.
1833d7072f8Seschrock 	 */
1843d7072f8Seschrock 	if (ldi_get_dev(dvd->vd_lh, &dev) == 0 &&
1853d7072f8Seschrock 	    ldi_get_otyp(dvd->vd_lh, &otyp) == 0) {
1860a4e9518Sgw 		char *physpath, *minorname;
1870a4e9518Sgw 
1883d7072f8Seschrock 		physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1893d7072f8Seschrock 		minorname = NULL;
1903d7072f8Seschrock 		if (ddi_dev_pathname(dev, otyp, physpath) == 0 &&
1913d7072f8Seschrock 		    ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 &&
1923d7072f8Seschrock 		    (vd->vdev_physpath == NULL ||
1933d7072f8Seschrock 		    strcmp(vd->vdev_physpath, physpath) != 0)) {
1943d7072f8Seschrock 			if (vd->vdev_physpath)
1953d7072f8Seschrock 				spa_strfree(vd->vdev_physpath);
1963d7072f8Seschrock 			(void) strlcat(physpath, ":", MAXPATHLEN);
1973d7072f8Seschrock 			(void) strlcat(physpath, minorname, MAXPATHLEN);
1983d7072f8Seschrock 			vd->vdev_physpath = spa_strdup(physpath);
1993d7072f8Seschrock 		}
2003d7072f8Seschrock 		if (minorname)
2013d7072f8Seschrock 			kmem_free(minorname, strlen(minorname) + 1);
2023d7072f8Seschrock 		kmem_free(physpath, MAXPATHLEN);
2033d7072f8Seschrock 	}
2043d7072f8Seschrock 
205fa9e4066Sahrens 	/*
206fa9e4066Sahrens 	 * Determine the actual size of the device.
207fa9e4066Sahrens 	 */
208fa9e4066Sahrens 	if (ldi_get_size(dvd->vd_lh, psize) != 0) {
209fa9e4066Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
210fa9e4066Sahrens 		return (EINVAL);
211fa9e4066Sahrens 	}
212fa9e4066Sahrens 
213ecc2d604Sbonwick 	/*
214ecc2d604Sbonwick 	 * If we own the whole disk, try to enable disk write caching.
215ecc2d604Sbonwick 	 * We ignore errors because it's OK if we can't do it.
216ecc2d604Sbonwick 	 */
217bef6b7d2Swebaker 	if (vd->vdev_wholedisk == 1) {
218ecc2d604Sbonwick 		int wce = 1;
219ecc2d604Sbonwick 		(void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
220ecc2d604Sbonwick 		    FKIOCTL, kcred, NULL);
221ecc2d604Sbonwick 	}
222bef6b7d2Swebaker 
223ecc2d604Sbonwick 	/*
224ecc2d604Sbonwick 	 * Determine the device's minimum transfer size.
225ecc2d604Sbonwick 	 * If the ioctl isn't supported, assume DEV_BSIZE.
226ecc2d604Sbonwick 	 */
227ecc2d604Sbonwick 	if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, (intptr_t)&dkm,
228ecc2d604Sbonwick 	    FKIOCTL, kcred, NULL) != 0)
229ecc2d604Sbonwick 		dkm.dki_lbsize = DEV_BSIZE;
230bef6b7d2Swebaker 
231ecc2d604Sbonwick 	*ashift = highbit(MAX(dkm.dki_lbsize, SPA_MINBLOCKSIZE)) - 1;
232bef6b7d2Swebaker 
233b468a217Seschrock 	/*
234b468a217Seschrock 	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
235b468a217Seschrock 	 * try again.
236b468a217Seschrock 	 */
237b468a217Seschrock 	vd->vdev_nowritecache = B_FALSE;
238b468a217Seschrock 
239fa9e4066Sahrens 	return (0);
240fa9e4066Sahrens }
241fa9e4066Sahrens 
242fa9e4066Sahrens static void
243fa9e4066Sahrens vdev_disk_close(vdev_t *vd)
244fa9e4066Sahrens {
245fa9e4066Sahrens 	vdev_disk_t *dvd = vd->vdev_tsd;
246fa9e4066Sahrens 
247fa9e4066Sahrens 	if (dvd == NULL)
248fa9e4066Sahrens 		return;
249fa9e4066Sahrens 
250fa9e4066Sahrens 	if (dvd->vd_minor != NULL)
251fa9e4066Sahrens 		ddi_devid_str_free(dvd->vd_minor);
252fa9e4066Sahrens 
253fa9e4066Sahrens 	if (dvd->vd_devid != NULL)
254fa9e4066Sahrens 		ddi_devid_free(dvd->vd_devid);
255fa9e4066Sahrens 
256fa9e4066Sahrens 	if (dvd->vd_lh != NULL)
257*8ad4d6ddSJeff Bonwick 		(void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
258fa9e4066Sahrens 
259fa9e4066Sahrens 	kmem_free(dvd, sizeof (vdev_disk_t));
260fa9e4066Sahrens 	vd->vdev_tsd = NULL;
261fa9e4066Sahrens }
262fa9e4066Sahrens 
263e7cbe64fSgw int
264e7cbe64fSgw vdev_disk_physio(ldi_handle_t vd_lh, caddr_t data, size_t size,
265e7cbe64fSgw     uint64_t offset, int flags)
266e7cbe64fSgw {
267e7cbe64fSgw 	buf_t *bp;
268e7cbe64fSgw 	int error = 0;
269e7cbe64fSgw 
270e7cbe64fSgw 	if (vd_lh == NULL)
271e7cbe64fSgw 		return (EINVAL);
272e7cbe64fSgw 
273e7cbe64fSgw 	ASSERT(flags & B_READ || flags & B_WRITE);
274e7cbe64fSgw 
275e7cbe64fSgw 	bp = getrbuf(KM_SLEEP);
276e7cbe64fSgw 	bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST;
277e7cbe64fSgw 	bp->b_bcount = size;
278e7cbe64fSgw 	bp->b_un.b_addr = (void *)data;
279e7cbe64fSgw 	bp->b_lblkno = lbtodb(offset);
280e7cbe64fSgw 	bp->b_bufsize = size;
281e7cbe64fSgw 
282e7cbe64fSgw 	error = ldi_strategy(vd_lh, bp);
283e7cbe64fSgw 	ASSERT(error == 0);
284e7cbe64fSgw 	if ((error = biowait(bp)) == 0 && bp->b_resid != 0)
285e7cbe64fSgw 		error = EIO;
286e7cbe64fSgw 	freerbuf(bp);
287e7cbe64fSgw 
288e7cbe64fSgw 	return (error);
289e7cbe64fSgw }
290e7cbe64fSgw 
291fa9e4066Sahrens static void
292fa9e4066Sahrens vdev_disk_io_intr(buf_t *bp)
293fa9e4066Sahrens {
294fa9e4066Sahrens 	vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
295fa9e4066Sahrens 	zio_t *zio = vdb->vdb_io;
296fa9e4066Sahrens 
29751ece835Seschrock 	/*
29851ece835Seschrock 	 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
29951ece835Seschrock 	 * Rather than teach the rest of the stack about other error
30051ece835Seschrock 	 * possibilities (EFAULT, etc), we normalize the error value here.
30151ece835Seschrock 	 */
30251ece835Seschrock 	zio->io_error = (geterror(bp) != 0 ? EIO : 0);
30351ece835Seschrock 
30451ece835Seschrock 	if (zio->io_error == 0 && bp->b_resid != 0)
305fa9e4066Sahrens 		zio->io_error = EIO;
306fa9e4066Sahrens 
307fa9e4066Sahrens 	kmem_free(vdb, sizeof (vdev_disk_buf_t));
308fa9e4066Sahrens 
309e05725b1Sbonwick 	zio_interrupt(zio);
310fa9e4066Sahrens }
311fa9e4066Sahrens 
312f4a72450SJeff Bonwick static void
313f4a72450SJeff Bonwick vdev_disk_ioctl_free(zio_t *zio)
314f4a72450SJeff Bonwick {
315f4a72450SJeff Bonwick 	kmem_free(zio->io_vsd, sizeof (struct dk_callback));
316f4a72450SJeff Bonwick }
317f4a72450SJeff Bonwick 
318fa9e4066Sahrens static void
319fa9e4066Sahrens vdev_disk_ioctl_done(void *zio_arg, int error)
320fa9e4066Sahrens {
321fa9e4066Sahrens 	zio_t *zio = zio_arg;
322fa9e4066Sahrens 
323fa9e4066Sahrens 	zio->io_error = error;
324fa9e4066Sahrens 
325e05725b1Sbonwick 	zio_interrupt(zio);
326fa9e4066Sahrens }
327fa9e4066Sahrens 
328e05725b1Sbonwick static int
329fa9e4066Sahrens vdev_disk_io_start(zio_t *zio)
330fa9e4066Sahrens {
331fa9e4066Sahrens 	vdev_t *vd = zio->io_vd;
332fa9e4066Sahrens 	vdev_disk_t *dvd = vd->vdev_tsd;
333fa9e4066Sahrens 	vdev_disk_buf_t *vdb;
334e14bb325SJeff Bonwick 	struct dk_callback *dkc;
335fa9e4066Sahrens 	buf_t *bp;
336e14bb325SJeff Bonwick 	int error;
337fa9e4066Sahrens 
338fa9e4066Sahrens 	if (zio->io_type == ZIO_TYPE_IOCTL) {
339fa9e4066Sahrens 		/* XXPOLICY */
3400a4e9518Sgw 		if (!vdev_readable(vd)) {
341fa9e4066Sahrens 			zio->io_error = ENXIO;
342e05725b1Sbonwick 			return (ZIO_PIPELINE_CONTINUE);
343fa9e4066Sahrens 		}
344fa9e4066Sahrens 
345fa9e4066Sahrens 		switch (zio->io_cmd) {
346fa9e4066Sahrens 
347fa9e4066Sahrens 		case DKIOCFLUSHWRITECACHE:
348fa9e4066Sahrens 
349a2eea2e1Sahrens 			if (zfs_nocacheflush)
350a2eea2e1Sahrens 				break;
351a2eea2e1Sahrens 
352b468a217Seschrock 			if (vd->vdev_nowritecache) {
353b468a217Seschrock 				zio->io_error = ENOTSUP;
354b468a217Seschrock 				break;
355b468a217Seschrock 			}
356b468a217Seschrock 
357e14bb325SJeff Bonwick 			zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP);
358f4a72450SJeff Bonwick 			zio->io_vsd_free = vdev_disk_ioctl_free;
359e14bb325SJeff Bonwick 
360e14bb325SJeff Bonwick 			dkc->dkc_callback = vdev_disk_ioctl_done;
361e14bb325SJeff Bonwick 			dkc->dkc_flag = FLUSH_VOLATILE;
362e14bb325SJeff Bonwick 			dkc->dkc_cookie = zio;
363fa9e4066Sahrens 
364fa9e4066Sahrens 			error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
365e14bb325SJeff Bonwick 			    (uintptr_t)dkc, FKIOCTL, kcred, NULL);
366fa9e4066Sahrens 
367fa9e4066Sahrens 			if (error == 0) {
368fa9e4066Sahrens 				/*
369fa9e4066Sahrens 				 * The ioctl will be done asychronously,
370fa9e4066Sahrens 				 * and will call vdev_disk_ioctl_done()
371fa9e4066Sahrens 				 * upon completion.
372fa9e4066Sahrens 				 */
373e05725b1Sbonwick 				return (ZIO_PIPELINE_STOP);
374e05725b1Sbonwick 			}
375e05725b1Sbonwick 
376e05725b1Sbonwick 			if (error == ENOTSUP || error == ENOTTY) {
377b468a217Seschrock 				/*
378d5782879Smishra 				 * If we get ENOTSUP or ENOTTY, we know that
379d5782879Smishra 				 * no future attempts will ever succeed.
380d5782879Smishra 				 * In this case we set a persistent bit so
381d5782879Smishra 				 * that we don't bother with the ioctl in the
382d5782879Smishra 				 * future.
383b468a217Seschrock 				 */
384b468a217Seschrock 				vd->vdev_nowritecache = B_TRUE;
385fa9e4066Sahrens 			}
386fa9e4066Sahrens 			zio->io_error = error;
387b468a217Seschrock 
388fa9e4066Sahrens 			break;
389fa9e4066Sahrens 
390fa9e4066Sahrens 		default:
391fa9e4066Sahrens 			zio->io_error = ENOTSUP;
392fa9e4066Sahrens 		}
393fa9e4066Sahrens 
394e05725b1Sbonwick 		return (ZIO_PIPELINE_CONTINUE);
395fa9e4066Sahrens 	}
396fa9e4066Sahrens 
397fa9e4066Sahrens 	vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
398fa9e4066Sahrens 
399fa9e4066Sahrens 	vdb->vdb_io = zio;
400fa9e4066Sahrens 	bp = &vdb->vdb_buf;
401fa9e4066Sahrens 
402fa9e4066Sahrens 	bioinit(bp);
403e14bb325SJeff Bonwick 	bp->b_flags = B_BUSY | B_NOCACHE |
404e14bb325SJeff Bonwick 	    (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE) |
405e14bb325SJeff Bonwick 	    ((zio->io_flags & ZIO_FLAG_IO_RETRY) ? 0 : B_FAILFAST);
406fa9e4066Sahrens 	bp->b_bcount = zio->io_size;
407fa9e4066Sahrens 	bp->b_un.b_addr = zio->io_data;
408fa9e4066Sahrens 	bp->b_lblkno = lbtodb(zio->io_offset);
409fa9e4066Sahrens 	bp->b_bufsize = zio->io_size;
410fa9e4066Sahrens 	bp->b_iodone = (int (*)())vdev_disk_io_intr;
411fa9e4066Sahrens 
412fa9e4066Sahrens 	/* ldi_strategy() will return non-zero only on programming errors */
413e14bb325SJeff Bonwick 	VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0);
414e05725b1Sbonwick 
415e05725b1Sbonwick 	return (ZIO_PIPELINE_STOP);
416fa9e4066Sahrens }
417fa9e4066Sahrens 
418e14bb325SJeff Bonwick static void
419fa9e4066Sahrens vdev_disk_io_done(zio_t *zio)
420fa9e4066Sahrens {
421e14bb325SJeff Bonwick 	vdev_t *vd = zio->io_vd;
422ea8dc4b6Seschrock 
4233d7072f8Seschrock 	/*
4243d7072f8Seschrock 	 * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if
4253d7072f8Seschrock 	 * the device has been removed.  If this is the case, then we trigger an
4260a4e9518Sgw 	 * asynchronous removal of the device. Otherwise, probe the device and
4271f7ad2e1Sgw 	 * make sure it's still accessible.
4283d7072f8Seschrock 	 */
4293d7072f8Seschrock 	if (zio->io_error == EIO) {
4300a4e9518Sgw 		vdev_disk_t *dvd = vd->vdev_tsd;
431e14bb325SJeff Bonwick 		int state = DKIO_NONE;
4320a4e9518Sgw 
433e14bb325SJeff Bonwick 		if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state,
434e14bb325SJeff Bonwick 		    FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) {
4353d7072f8Seschrock 			vd->vdev_remove_wanted = B_TRUE;
4363d7072f8Seschrock 			spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
4373d7072f8Seschrock 		}
4383d7072f8Seschrock 	}
439fa9e4066Sahrens }
440fa9e4066Sahrens 
441fa9e4066Sahrens vdev_ops_t vdev_disk_ops = {
442fa9e4066Sahrens 	vdev_disk_open,
443fa9e4066Sahrens 	vdev_disk_close,
444fa9e4066Sahrens 	vdev_default_asize,
445fa9e4066Sahrens 	vdev_disk_io_start,
446fa9e4066Sahrens 	vdev_disk_io_done,
447fa9e4066Sahrens 	NULL,
448fa9e4066Sahrens 	VDEV_TYPE_DISK,		/* name of this vdev type */
449fa9e4066Sahrens 	B_TRUE			/* leaf vdev */
450fa9e4066Sahrens };
451e7cbe64fSgw 
452e7cbe64fSgw /*
453051aabe6Staylor  * Given the root disk device devid or pathname, read the label from
454051aabe6Staylor  * the device, and construct a configuration nvlist.
455e7cbe64fSgw  */
456f940fbb1SLin Ling int
457f940fbb1SLin Ling vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
458e7cbe64fSgw {
459e7cbe64fSgw 	ldi_handle_t vd_lh;
460e7cbe64fSgw 	vdev_label_t *label;
461e7cbe64fSgw 	uint64_t s, size;
462e7cbe64fSgw 	int l;
463051aabe6Staylor 	ddi_devid_t tmpdevid;
464f4565e39SLin Ling 	int error = -1;
465051aabe6Staylor 	char *minor_name;
466e7cbe64fSgw 
467e7cbe64fSgw 	/*
468e7cbe64fSgw 	 * Read the device label and build the nvlist.
469e7cbe64fSgw 	 */
470f4565e39SLin Ling 	if (devid != NULL && ddi_devid_str_decode(devid, &tmpdevid,
471051aabe6Staylor 	    &minor_name) == 0) {
472051aabe6Staylor 		error = ldi_open_by_devid(tmpdevid, minor_name,
473*8ad4d6ddSJeff Bonwick 		    FREAD, kcred, &vd_lh, zfs_li);
474051aabe6Staylor 		ddi_devid_free(tmpdevid);
475051aabe6Staylor 		ddi_devid_str_free(minor_name);
476051aabe6Staylor 	}
477051aabe6Staylor 
478f4565e39SLin Ling 	if (error && (error = ldi_open_by_name(devpath, FREAD, kcred, &vd_lh,
479f4565e39SLin Ling 	    zfs_li)))
480f940fbb1SLin Ling 		return (error);
481e7cbe64fSgw 
482bf82a41bSeschrock 	if (ldi_get_size(vd_lh, &s)) {
483bf82a41bSeschrock 		(void) ldi_close(vd_lh, FREAD, kcred);
484f940fbb1SLin Ling 		return (EIO);
485bf82a41bSeschrock 	}
486e7cbe64fSgw 
487e7cbe64fSgw 	size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t);
488e7cbe64fSgw 	label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP);
489e7cbe64fSgw 
490e7cbe64fSgw 	for (l = 0; l < VDEV_LABELS; l++) {
491e7cbe64fSgw 		uint64_t offset, state, txg = 0;
492e7cbe64fSgw 
493e7cbe64fSgw 		/* read vdev label */
494e7cbe64fSgw 		offset = vdev_label_offset(size, l, 0);
495e7cbe64fSgw 		if (vdev_disk_physio(vd_lh, (caddr_t)label,
496e7cbe64fSgw 		    VDEV_SKIP_SIZE + VDEV_BOOT_HEADER_SIZE +
497e7cbe64fSgw 		    VDEV_PHYS_SIZE, offset, B_READ) != 0)
498e7cbe64fSgw 			continue;
499e7cbe64fSgw 
500e7cbe64fSgw 		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
501f940fbb1SLin Ling 		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) {
502f940fbb1SLin Ling 			*config = NULL;
503e7cbe64fSgw 			continue;
504e7cbe64fSgw 		}
505e7cbe64fSgw 
506f940fbb1SLin Ling 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
507e7cbe64fSgw 		    &state) != 0 || state >= POOL_STATE_DESTROYED) {
508f940fbb1SLin Ling 			nvlist_free(*config);
509f940fbb1SLin Ling 			*config = NULL;
510e7cbe64fSgw 			continue;
511e7cbe64fSgw 		}
512e7cbe64fSgw 
513f940fbb1SLin Ling 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
514e7cbe64fSgw 		    &txg) != 0 || txg == 0) {
515f940fbb1SLin Ling 			nvlist_free(*config);
516f940fbb1SLin Ling 			*config = NULL;
517e7cbe64fSgw 			continue;
518e7cbe64fSgw 		}
519e7cbe64fSgw 
520e7cbe64fSgw 		break;
521e7cbe64fSgw 	}
522e7cbe64fSgw 
523e7cbe64fSgw 	kmem_free(label, sizeof (vdev_label_t));
524bf82a41bSeschrock 	(void) ldi_close(vd_lh, FREAD, kcred);
525bf82a41bSeschrock 
526f940fbb1SLin Ling 	return (error);
527e7cbe64fSgw }
528