xref: /illumos-gate/usr/src/uts/common/fs/zfs/vdev_disk.c (revision 095bcd66)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5bef6b7d2Swebaker  * Common Development and Distribution License (the "License").
6bef6b7d2Swebaker  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
222264ca7fSLin Ling  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #include <sys/zfs_context.h>
27fa9e4066Sahrens #include <sys/spa.h>
28e7cbe64fSgw #include <sys/refcount.h>
29fa9e4066Sahrens #include <sys/vdev_disk.h>
30fa9e4066Sahrens #include <sys/vdev_impl.h>
31fa9e4066Sahrens #include <sys/fs/zfs.h>
32fa9e4066Sahrens #include <sys/zio.h>
33afefbcddSeschrock #include <sys/sunldi.h>
3451ece835Seschrock #include <sys/fm/fs/zfs.h>
35fa9e4066Sahrens 
36fa9e4066Sahrens /*
37fa9e4066Sahrens  * Virtual device vector for disks.
38fa9e4066Sahrens  */
39fa9e4066Sahrens 
40fa9e4066Sahrens extern ldi_ident_t zfs_li;
41fa9e4066Sahrens 
42fa9e4066Sahrens typedef struct vdev_disk_buf {
43fa9e4066Sahrens 	buf_t	vdb_buf;
44fa9e4066Sahrens 	zio_t	*vdb_io;
45fa9e4066Sahrens } vdev_disk_buf_t;
46fa9e4066Sahrens 
47fa9e4066Sahrens static int
48e14bb325SJeff Bonwick vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
49fa9e4066Sahrens {
508ad4d6ddSJeff Bonwick 	spa_t *spa = vd->vdev_spa;
51fa9e4066Sahrens 	vdev_disk_t *dvd;
52e14bb325SJeff Bonwick 	struct dk_minfo dkm;
530a4e9518Sgw 	int error;
54e14bb325SJeff Bonwick 	dev_t dev;
55e14bb325SJeff Bonwick 	int otyp;
56fa9e4066Sahrens 
57fa9e4066Sahrens 	/*
58fa9e4066Sahrens 	 * We must have a pathname, and it must be absolute.
59fa9e4066Sahrens 	 */
60fa9e4066Sahrens 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
61fa9e4066Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
62fa9e4066Sahrens 		return (EINVAL);
63fa9e4066Sahrens 	}
64fa9e4066Sahrens 
65*095bcd66SGeorge Wilson 	/*
66*095bcd66SGeorge Wilson 	 * Reopen the device if it's not currently open. Otherwise,
67*095bcd66SGeorge Wilson 	 * just update the physical size of the device.
68*095bcd66SGeorge Wilson 	 */
69*095bcd66SGeorge Wilson 	if (vd->vdev_tsd != NULL) {
70*095bcd66SGeorge Wilson 		ASSERT(vd->vdev_reopening);
71*095bcd66SGeorge Wilson 		dvd = vd->vdev_tsd;
72*095bcd66SGeorge Wilson 		goto skip_open;
73*095bcd66SGeorge Wilson 	}
74*095bcd66SGeorge Wilson 
75fa9e4066Sahrens 	dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
76fa9e4066Sahrens 
77fa9e4066Sahrens 	/*
78fa9e4066Sahrens 	 * When opening a disk device, we want to preserve the user's original
79fa9e4066Sahrens 	 * intent.  We always want to open the device by the path the user gave
80fa9e4066Sahrens 	 * us, even if it is one of multiple paths to the save device.  But we
81fa9e4066Sahrens 	 * also want to be able to survive disks being removed/recabled.
82fa9e4066Sahrens 	 * Therefore the sequence of opening devices is:
83fa9e4066Sahrens 	 *
84afefbcddSeschrock 	 * 1. Try opening the device by path.  For legacy pools without the
85afefbcddSeschrock 	 *    'whole_disk' property, attempt to fix the path by appending 's0'.
86fa9e4066Sahrens 	 *
87fa9e4066Sahrens 	 * 2. If the devid of the device matches the stored value, return
88fa9e4066Sahrens 	 *    success.
89fa9e4066Sahrens 	 *
90fa9e4066Sahrens 	 * 3. Otherwise, the device may have moved.  Try opening the device
91fa9e4066Sahrens 	 *    by the devid instead.
92fa9e4066Sahrens 	 */
93fa9e4066Sahrens 	if (vd->vdev_devid != NULL) {
94fa9e4066Sahrens 		if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
95fa9e4066Sahrens 		    &dvd->vd_minor) != 0) {
96fa9e4066Sahrens 			vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
97fa9e4066Sahrens 			return (EINVAL);
98fa9e4066Sahrens 		}
99fa9e4066Sahrens 	}
100fa9e4066Sahrens 
101fa9e4066Sahrens 	error = EINVAL;		/* presume failure */
102fa9e4066Sahrens 
103*095bcd66SGeorge Wilson 	if (vd->vdev_path != NULL) {
104fa9e4066Sahrens 		ddi_devid_t devid;
105fa9e4066Sahrens 
106afefbcddSeschrock 		if (vd->vdev_wholedisk == -1ULL) {
107afefbcddSeschrock 			size_t len = strlen(vd->vdev_path) + 3;
108afefbcddSeschrock 			char *buf = kmem_alloc(len, KM_SLEEP);
109afefbcddSeschrock 			ldi_handle_t lh;
110afefbcddSeschrock 
111afefbcddSeschrock 			(void) snprintf(buf, len, "%ss0", vd->vdev_path);
112afefbcddSeschrock 
1138ad4d6ddSJeff Bonwick 			if (ldi_open_by_name(buf, spa_mode(spa), kcred,
114afefbcddSeschrock 			    &lh, zfs_li) == 0) {
115afefbcddSeschrock 				spa_strfree(vd->vdev_path);
116afefbcddSeschrock 				vd->vdev_path = buf;
117afefbcddSeschrock 				vd->vdev_wholedisk = 1ULL;
1188ad4d6ddSJeff Bonwick 				(void) ldi_close(lh, spa_mode(spa), kcred);
119afefbcddSeschrock 			} else {
120afefbcddSeschrock 				kmem_free(buf, len);
121afefbcddSeschrock 			}
122afefbcddSeschrock 		}
123fa9e4066Sahrens 
1248ad4d6ddSJeff Bonwick 		error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred,
125afefbcddSeschrock 		    &dvd->vd_lh, zfs_li);
126fa9e4066Sahrens 
127fa9e4066Sahrens 		/*
128fa9e4066Sahrens 		 * Compare the devid to the stored value.
129fa9e4066Sahrens 		 */
130fa9e4066Sahrens 		if (error == 0 && vd->vdev_devid != NULL &&
131fa9e4066Sahrens 		    ldi_get_devid(dvd->vd_lh, &devid) == 0) {
132fa9e4066Sahrens 			if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
133fa9e4066Sahrens 				error = EINVAL;
1348ad4d6ddSJeff Bonwick 				(void) ldi_close(dvd->vd_lh, spa_mode(spa),
1358ad4d6ddSJeff Bonwick 				    kcred);
136fa9e4066Sahrens 				dvd->vd_lh = NULL;
137fa9e4066Sahrens 			}
138fa9e4066Sahrens 			ddi_devid_free(devid);
139fa9e4066Sahrens 		}
140afefbcddSeschrock 
141afefbcddSeschrock 		/*
142afefbcddSeschrock 		 * If we succeeded in opening the device, but 'vdev_wholedisk'
143afefbcddSeschrock 		 * is not yet set, then this must be a slice.
144afefbcddSeschrock 		 */
145afefbcddSeschrock 		if (error == 0 && vd->vdev_wholedisk == -1ULL)
146afefbcddSeschrock 			vd->vdev_wholedisk = 0;
147fa9e4066Sahrens 	}
148fa9e4066Sahrens 
149fa9e4066Sahrens 	/*
150fa9e4066Sahrens 	 * If we were unable to open by path, or the devid check fails, open by
151fa9e4066Sahrens 	 * devid instead.
152fa9e4066Sahrens 	 */
153fa9e4066Sahrens 	if (error != 0 && vd->vdev_devid != NULL)
154fa9e4066Sahrens 		error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
1558ad4d6ddSJeff Bonwick 		    spa_mode(spa), kcred, &dvd->vd_lh, zfs_li);
156fa9e4066Sahrens 
1573d7072f8Seschrock 	/*
1583d7072f8Seschrock 	 * If all else fails, then try opening by physical path (if available)
1593d7072f8Seschrock 	 * or the logical path (if we failed due to the devid check).  While not
1603d7072f8Seschrock 	 * as reliable as the devid, this will give us something, and the higher
1613d7072f8Seschrock 	 * level vdev validation will prevent us from opening the wrong device.
1623d7072f8Seschrock 	 */
1633d7072f8Seschrock 	if (error) {
1643d7072f8Seschrock 		if (vd->vdev_physpath != NULL &&
165deb8317bSMark J Musante 		    (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV)
1668ad4d6ddSJeff Bonwick 			error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa),
1673d7072f8Seschrock 			    kcred, &dvd->vd_lh, zfs_li);
1683d7072f8Seschrock 
1693d7072f8Seschrock 		/*
1703d7072f8Seschrock 		 * Note that we don't support the legacy auto-wholedisk support
1713d7072f8Seschrock 		 * as above.  This hasn't been used in a very long time and we
1723d7072f8Seschrock 		 * don't need to propagate its oddities to this edge condition.
1733d7072f8Seschrock 		 */
174*095bcd66SGeorge Wilson 		if (error && vd->vdev_path != NULL)
1758ad4d6ddSJeff Bonwick 			error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
1768ad4d6ddSJeff Bonwick 			    kcred, &dvd->vd_lh, zfs_li);
1773d7072f8Seschrock 	}
1783d7072f8Seschrock 
179e14bb325SJeff Bonwick 	if (error) {
180fa9e4066Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
181fa9e4066Sahrens 		return (error);
182e14bb325SJeff Bonwick 	}
183fa9e4066Sahrens 
1843d7072f8Seschrock 	/*
1853d7072f8Seschrock 	 * Once a device is opened, verify that the physical device path (if
1863d7072f8Seschrock 	 * available) is up to date.
1873d7072f8Seschrock 	 */
1883d7072f8Seschrock 	if (ldi_get_dev(dvd->vd_lh, &dev) == 0 &&
1893d7072f8Seschrock 	    ldi_get_otyp(dvd->vd_lh, &otyp) == 0) {
1900a4e9518Sgw 		char *physpath, *minorname;
1910a4e9518Sgw 
1923d7072f8Seschrock 		physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1933d7072f8Seschrock 		minorname = NULL;
1943d7072f8Seschrock 		if (ddi_dev_pathname(dev, otyp, physpath) == 0 &&
1953d7072f8Seschrock 		    ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 &&
1963d7072f8Seschrock 		    (vd->vdev_physpath == NULL ||
1973d7072f8Seschrock 		    strcmp(vd->vdev_physpath, physpath) != 0)) {
1983d7072f8Seschrock 			if (vd->vdev_physpath)
1993d7072f8Seschrock 				spa_strfree(vd->vdev_physpath);
2003d7072f8Seschrock 			(void) strlcat(physpath, ":", MAXPATHLEN);
2013d7072f8Seschrock 			(void) strlcat(physpath, minorname, MAXPATHLEN);
2023d7072f8Seschrock 			vd->vdev_physpath = spa_strdup(physpath);
2033d7072f8Seschrock 		}
2043d7072f8Seschrock 		if (minorname)
2053d7072f8Seschrock 			kmem_free(minorname, strlen(minorname) + 1);
2063d7072f8Seschrock 		kmem_free(physpath, MAXPATHLEN);
2073d7072f8Seschrock 	}
2083d7072f8Seschrock 
209*095bcd66SGeorge Wilson skip_open:
210fa9e4066Sahrens 	/*
211fa9e4066Sahrens 	 * Determine the actual size of the device.
212fa9e4066Sahrens 	 */
213fa9e4066Sahrens 	if (ldi_get_size(dvd->vd_lh, psize) != 0) {
214fa9e4066Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
215fa9e4066Sahrens 		return (EINVAL);
216fa9e4066Sahrens 	}
217fa9e4066Sahrens 
218ecc2d604Sbonwick 	/*
219ecc2d604Sbonwick 	 * If we own the whole disk, try to enable disk write caching.
220ecc2d604Sbonwick 	 * We ignore errors because it's OK if we can't do it.
221ecc2d604Sbonwick 	 */
222bef6b7d2Swebaker 	if (vd->vdev_wholedisk == 1) {
223ecc2d604Sbonwick 		int wce = 1;
224ecc2d604Sbonwick 		(void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
225ecc2d604Sbonwick 		    FKIOCTL, kcred, NULL);
226ecc2d604Sbonwick 	}
227bef6b7d2Swebaker 
228ecc2d604Sbonwick 	/*
229ecc2d604Sbonwick 	 * Determine the device's minimum transfer size.
230ecc2d604Sbonwick 	 * If the ioctl isn't supported, assume DEV_BSIZE.
231ecc2d604Sbonwick 	 */
232ecc2d604Sbonwick 	if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO, (intptr_t)&dkm,
233ecc2d604Sbonwick 	    FKIOCTL, kcred, NULL) != 0)
234ecc2d604Sbonwick 		dkm.dki_lbsize = DEV_BSIZE;
235bef6b7d2Swebaker 
236ecc2d604Sbonwick 	*ashift = highbit(MAX(dkm.dki_lbsize, SPA_MINBLOCKSIZE)) - 1;
237bef6b7d2Swebaker 
238b468a217Seschrock 	/*
239b468a217Seschrock 	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
240b468a217Seschrock 	 * try again.
241b468a217Seschrock 	 */
242b468a217Seschrock 	vd->vdev_nowritecache = B_FALSE;
243b468a217Seschrock 
244fa9e4066Sahrens 	return (0);
245fa9e4066Sahrens }
246fa9e4066Sahrens 
247fa9e4066Sahrens static void
248fa9e4066Sahrens vdev_disk_close(vdev_t *vd)
249fa9e4066Sahrens {
250fa9e4066Sahrens 	vdev_disk_t *dvd = vd->vdev_tsd;
251fa9e4066Sahrens 
252*095bcd66SGeorge Wilson 	if (vd->vdev_reopening || dvd == NULL)
253fa9e4066Sahrens 		return;
254fa9e4066Sahrens 
255fa9e4066Sahrens 	if (dvd->vd_minor != NULL)
256fa9e4066Sahrens 		ddi_devid_str_free(dvd->vd_minor);
257fa9e4066Sahrens 
258fa9e4066Sahrens 	if (dvd->vd_devid != NULL)
259fa9e4066Sahrens 		ddi_devid_free(dvd->vd_devid);
260fa9e4066Sahrens 
261fa9e4066Sahrens 	if (dvd->vd_lh != NULL)
2628ad4d6ddSJeff Bonwick 		(void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
263fa9e4066Sahrens 
264fa9e4066Sahrens 	kmem_free(dvd, sizeof (vdev_disk_t));
265fa9e4066Sahrens 	vd->vdev_tsd = NULL;
266fa9e4066Sahrens }
267fa9e4066Sahrens 
268e7cbe64fSgw int
269e7cbe64fSgw vdev_disk_physio(ldi_handle_t vd_lh, caddr_t data, size_t size,
270e7cbe64fSgw     uint64_t offset, int flags)
271e7cbe64fSgw {
272e7cbe64fSgw 	buf_t *bp;
273e7cbe64fSgw 	int error = 0;
274e7cbe64fSgw 
275e7cbe64fSgw 	if (vd_lh == NULL)
276e7cbe64fSgw 		return (EINVAL);
277e7cbe64fSgw 
278e7cbe64fSgw 	ASSERT(flags & B_READ || flags & B_WRITE);
279e7cbe64fSgw 
280e7cbe64fSgw 	bp = getrbuf(KM_SLEEP);
281e7cbe64fSgw 	bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST;
282e7cbe64fSgw 	bp->b_bcount = size;
283e7cbe64fSgw 	bp->b_un.b_addr = (void *)data;
284e7cbe64fSgw 	bp->b_lblkno = lbtodb(offset);
285e7cbe64fSgw 	bp->b_bufsize = size;
286e7cbe64fSgw 
287e7cbe64fSgw 	error = ldi_strategy(vd_lh, bp);
288e7cbe64fSgw 	ASSERT(error == 0);
289e7cbe64fSgw 	if ((error = biowait(bp)) == 0 && bp->b_resid != 0)
290e7cbe64fSgw 		error = EIO;
291e7cbe64fSgw 	freerbuf(bp);
292e7cbe64fSgw 
293e7cbe64fSgw 	return (error);
294e7cbe64fSgw }
295e7cbe64fSgw 
296fa9e4066Sahrens static void
297fa9e4066Sahrens vdev_disk_io_intr(buf_t *bp)
298fa9e4066Sahrens {
299fa9e4066Sahrens 	vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
300fa9e4066Sahrens 	zio_t *zio = vdb->vdb_io;
301fa9e4066Sahrens 
30251ece835Seschrock 	/*
30351ece835Seschrock 	 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
30451ece835Seschrock 	 * Rather than teach the rest of the stack about other error
30551ece835Seschrock 	 * possibilities (EFAULT, etc), we normalize the error value here.
30651ece835Seschrock 	 */
30751ece835Seschrock 	zio->io_error = (geterror(bp) != 0 ? EIO : 0);
30851ece835Seschrock 
30951ece835Seschrock 	if (zio->io_error == 0 && bp->b_resid != 0)
310fa9e4066Sahrens 		zio->io_error = EIO;
311fa9e4066Sahrens 
312fa9e4066Sahrens 	kmem_free(vdb, sizeof (vdev_disk_buf_t));
313fa9e4066Sahrens 
314e05725b1Sbonwick 	zio_interrupt(zio);
315fa9e4066Sahrens }
316fa9e4066Sahrens 
317f4a72450SJeff Bonwick static void
318f4a72450SJeff Bonwick vdev_disk_ioctl_free(zio_t *zio)
319f4a72450SJeff Bonwick {
320f4a72450SJeff Bonwick 	kmem_free(zio->io_vsd, sizeof (struct dk_callback));
321f4a72450SJeff Bonwick }
322f4a72450SJeff Bonwick 
32322fe2c88SJonathan Adams static const zio_vsd_ops_t vdev_disk_vsd_ops = {
32422fe2c88SJonathan Adams 	vdev_disk_ioctl_free,
32522fe2c88SJonathan Adams 	zio_vsd_default_cksum_report
32622fe2c88SJonathan Adams };
32722fe2c88SJonathan Adams 
328fa9e4066Sahrens static void
329fa9e4066Sahrens vdev_disk_ioctl_done(void *zio_arg, int error)
330fa9e4066Sahrens {
331fa9e4066Sahrens 	zio_t *zio = zio_arg;
332fa9e4066Sahrens 
333fa9e4066Sahrens 	zio->io_error = error;
334fa9e4066Sahrens 
335e05725b1Sbonwick 	zio_interrupt(zio);
336fa9e4066Sahrens }
337fa9e4066Sahrens 
338e05725b1Sbonwick static int
339fa9e4066Sahrens vdev_disk_io_start(zio_t *zio)
340fa9e4066Sahrens {
341fa9e4066Sahrens 	vdev_t *vd = zio->io_vd;
342fa9e4066Sahrens 	vdev_disk_t *dvd = vd->vdev_tsd;
343fa9e4066Sahrens 	vdev_disk_buf_t *vdb;
344e14bb325SJeff Bonwick 	struct dk_callback *dkc;
345fa9e4066Sahrens 	buf_t *bp;
346e14bb325SJeff Bonwick 	int error;
347fa9e4066Sahrens 
348fa9e4066Sahrens 	if (zio->io_type == ZIO_TYPE_IOCTL) {
349fa9e4066Sahrens 		/* XXPOLICY */
3500a4e9518Sgw 		if (!vdev_readable(vd)) {
351fa9e4066Sahrens 			zio->io_error = ENXIO;
352e05725b1Sbonwick 			return (ZIO_PIPELINE_CONTINUE);
353fa9e4066Sahrens 		}
354fa9e4066Sahrens 
355fa9e4066Sahrens 		switch (zio->io_cmd) {
356fa9e4066Sahrens 
357fa9e4066Sahrens 		case DKIOCFLUSHWRITECACHE:
358fa9e4066Sahrens 
359a2eea2e1Sahrens 			if (zfs_nocacheflush)
360a2eea2e1Sahrens 				break;
361a2eea2e1Sahrens 
362b468a217Seschrock 			if (vd->vdev_nowritecache) {
363b468a217Seschrock 				zio->io_error = ENOTSUP;
364b468a217Seschrock 				break;
365b468a217Seschrock 			}
366b468a217Seschrock 
367e14bb325SJeff Bonwick 			zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP);
36822fe2c88SJonathan Adams 			zio->io_vsd_ops = &vdev_disk_vsd_ops;
369e14bb325SJeff Bonwick 
370e14bb325SJeff Bonwick 			dkc->dkc_callback = vdev_disk_ioctl_done;
371e14bb325SJeff Bonwick 			dkc->dkc_flag = FLUSH_VOLATILE;
372e14bb325SJeff Bonwick 			dkc->dkc_cookie = zio;
373fa9e4066Sahrens 
374fa9e4066Sahrens 			error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
375e14bb325SJeff Bonwick 			    (uintptr_t)dkc, FKIOCTL, kcred, NULL);
376fa9e4066Sahrens 
377fa9e4066Sahrens 			if (error == 0) {
378fa9e4066Sahrens 				/*
379fa9e4066Sahrens 				 * The ioctl will be done asychronously,
380fa9e4066Sahrens 				 * and will call vdev_disk_ioctl_done()
381fa9e4066Sahrens 				 * upon completion.
382fa9e4066Sahrens 				 */
383e05725b1Sbonwick 				return (ZIO_PIPELINE_STOP);
384e05725b1Sbonwick 			}
385e05725b1Sbonwick 
386e05725b1Sbonwick 			if (error == ENOTSUP || error == ENOTTY) {
387b468a217Seschrock 				/*
388d5782879Smishra 				 * If we get ENOTSUP or ENOTTY, we know that
389d5782879Smishra 				 * no future attempts will ever succeed.
390d5782879Smishra 				 * In this case we set a persistent bit so
391d5782879Smishra 				 * that we don't bother with the ioctl in the
392d5782879Smishra 				 * future.
393b468a217Seschrock 				 */
394b468a217Seschrock 				vd->vdev_nowritecache = B_TRUE;
395fa9e4066Sahrens 			}
396fa9e4066Sahrens 			zio->io_error = error;
397b468a217Seschrock 
398fa9e4066Sahrens 			break;
399fa9e4066Sahrens 
400fa9e4066Sahrens 		default:
401fa9e4066Sahrens 			zio->io_error = ENOTSUP;
402fa9e4066Sahrens 		}
403fa9e4066Sahrens 
404e05725b1Sbonwick 		return (ZIO_PIPELINE_CONTINUE);
405fa9e4066Sahrens 	}
406fa9e4066Sahrens 
407fa9e4066Sahrens 	vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
408fa9e4066Sahrens 
409fa9e4066Sahrens 	vdb->vdb_io = zio;
410fa9e4066Sahrens 	bp = &vdb->vdb_buf;
411fa9e4066Sahrens 
412fa9e4066Sahrens 	bioinit(bp);
413e14bb325SJeff Bonwick 	bp->b_flags = B_BUSY | B_NOCACHE |
4148956713aSEric Schrock 	    (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
4158956713aSEric Schrock 	if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
4168956713aSEric Schrock 		bp->b_flags |= B_FAILFAST;
417fa9e4066Sahrens 	bp->b_bcount = zio->io_size;
418fa9e4066Sahrens 	bp->b_un.b_addr = zio->io_data;
419fa9e4066Sahrens 	bp->b_lblkno = lbtodb(zio->io_offset);
420fa9e4066Sahrens 	bp->b_bufsize = zio->io_size;
421fa9e4066Sahrens 	bp->b_iodone = (int (*)())vdev_disk_io_intr;
422fa9e4066Sahrens 
423fa9e4066Sahrens 	/* ldi_strategy() will return non-zero only on programming errors */
424e14bb325SJeff Bonwick 	VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0);
425e05725b1Sbonwick 
426e05725b1Sbonwick 	return (ZIO_PIPELINE_STOP);
427fa9e4066Sahrens }
428fa9e4066Sahrens 
429e14bb325SJeff Bonwick static void
430fa9e4066Sahrens vdev_disk_io_done(zio_t *zio)
431fa9e4066Sahrens {
432e14bb325SJeff Bonwick 	vdev_t *vd = zio->io_vd;
433ea8dc4b6Seschrock 
4343d7072f8Seschrock 	/*
4353d7072f8Seschrock 	 * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if
4363d7072f8Seschrock 	 * the device has been removed.  If this is the case, then we trigger an
4370a4e9518Sgw 	 * asynchronous removal of the device. Otherwise, probe the device and
4381f7ad2e1Sgw 	 * make sure it's still accessible.
4393d7072f8Seschrock 	 */
4401d713200SEric Schrock 	if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
4410a4e9518Sgw 		vdev_disk_t *dvd = vd->vdev_tsd;
442e14bb325SJeff Bonwick 		int state = DKIO_NONE;
4430a4e9518Sgw 
444e14bb325SJeff Bonwick 		if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state,
445e14bb325SJeff Bonwick 		    FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) {
4461d713200SEric Schrock 			/*
4471d713200SEric Schrock 			 * We post the resource as soon as possible, instead of
4481d713200SEric Schrock 			 * when the async removal actually happens, because the
4491d713200SEric Schrock 			 * DE is using this information to discard previous I/O
4501d713200SEric Schrock 			 * errors.
4511d713200SEric Schrock 			 */
4521d713200SEric Schrock 			zfs_post_remove(zio->io_spa, vd);
4533d7072f8Seschrock 			vd->vdev_remove_wanted = B_TRUE;
4543d7072f8Seschrock 			spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
4553d7072f8Seschrock 		}
4563d7072f8Seschrock 	}
457fa9e4066Sahrens }
458fa9e4066Sahrens 
459fa9e4066Sahrens vdev_ops_t vdev_disk_ops = {
460fa9e4066Sahrens 	vdev_disk_open,
461fa9e4066Sahrens 	vdev_disk_close,
462fa9e4066Sahrens 	vdev_default_asize,
463fa9e4066Sahrens 	vdev_disk_io_start,
464fa9e4066Sahrens 	vdev_disk_io_done,
465fa9e4066Sahrens 	NULL,
466fa9e4066Sahrens 	VDEV_TYPE_DISK,		/* name of this vdev type */
467fa9e4066Sahrens 	B_TRUE			/* leaf vdev */
468fa9e4066Sahrens };
469e7cbe64fSgw 
470e7cbe64fSgw /*
471051aabe6Staylor  * Given the root disk device devid or pathname, read the label from
472051aabe6Staylor  * the device, and construct a configuration nvlist.
473e7cbe64fSgw  */
474f940fbb1SLin Ling int
475f940fbb1SLin Ling vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
476e7cbe64fSgw {
477e7cbe64fSgw 	ldi_handle_t vd_lh;
478e7cbe64fSgw 	vdev_label_t *label;
479e7cbe64fSgw 	uint64_t s, size;
480e7cbe64fSgw 	int l;
481051aabe6Staylor 	ddi_devid_t tmpdevid;
482f4565e39SLin Ling 	int error = -1;
483051aabe6Staylor 	char *minor_name;
484e7cbe64fSgw 
485e7cbe64fSgw 	/*
486e7cbe64fSgw 	 * Read the device label and build the nvlist.
487e7cbe64fSgw 	 */
488f4565e39SLin Ling 	if (devid != NULL && ddi_devid_str_decode(devid, &tmpdevid,
489051aabe6Staylor 	    &minor_name) == 0) {
490051aabe6Staylor 		error = ldi_open_by_devid(tmpdevid, minor_name,
4918ad4d6ddSJeff Bonwick 		    FREAD, kcred, &vd_lh, zfs_li);
492051aabe6Staylor 		ddi_devid_free(tmpdevid);
493051aabe6Staylor 		ddi_devid_str_free(minor_name);
494051aabe6Staylor 	}
495051aabe6Staylor 
496f4565e39SLin Ling 	if (error && (error = ldi_open_by_name(devpath, FREAD, kcred, &vd_lh,
497f4565e39SLin Ling 	    zfs_li)))
498f940fbb1SLin Ling 		return (error);
499e7cbe64fSgw 
500bf82a41bSeschrock 	if (ldi_get_size(vd_lh, &s)) {
501bf82a41bSeschrock 		(void) ldi_close(vd_lh, FREAD, kcred);
502f940fbb1SLin Ling 		return (EIO);
503bf82a41bSeschrock 	}
504e7cbe64fSgw 
505e7cbe64fSgw 	size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t);
506e7cbe64fSgw 	label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP);
507e7cbe64fSgw 
50817f1e64aSEric Taylor 	*config = NULL;
509e7cbe64fSgw 	for (l = 0; l < VDEV_LABELS; l++) {
510e7cbe64fSgw 		uint64_t offset, state, txg = 0;
511e7cbe64fSgw 
512e7cbe64fSgw 		/* read vdev label */
513e7cbe64fSgw 		offset = vdev_label_offset(size, l, 0);
514e7cbe64fSgw 		if (vdev_disk_physio(vd_lh, (caddr_t)label,
5152264ca7fSLin Ling 		    VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, B_READ) != 0)
516e7cbe64fSgw 			continue;
517e7cbe64fSgw 
518e7cbe64fSgw 		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
519f940fbb1SLin Ling 		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) {
520f940fbb1SLin Ling 			*config = NULL;
521e7cbe64fSgw 			continue;
522e7cbe64fSgw 		}
523e7cbe64fSgw 
524f940fbb1SLin Ling 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
525e7cbe64fSgw 		    &state) != 0 || state >= POOL_STATE_DESTROYED) {
526f940fbb1SLin Ling 			nvlist_free(*config);
527f940fbb1SLin Ling 			*config = NULL;
528e7cbe64fSgw 			continue;
529e7cbe64fSgw 		}
530e7cbe64fSgw 
531f940fbb1SLin Ling 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
532e7cbe64fSgw 		    &txg) != 0 || txg == 0) {
533f940fbb1SLin Ling 			nvlist_free(*config);
534f940fbb1SLin Ling 			*config = NULL;
535e7cbe64fSgw 			continue;
536e7cbe64fSgw 		}
537e7cbe64fSgw 
538e7cbe64fSgw 		break;
539e7cbe64fSgw 	}
540e7cbe64fSgw 
541e7cbe64fSgw 	kmem_free(label, sizeof (vdev_label_t));
542bf82a41bSeschrock 	(void) ldi_close(vd_lh, FREAD, kcred);
54317f1e64aSEric Taylor 	if (*config == NULL)
54417f1e64aSEric Taylor 		error = EIDRM;
545bf82a41bSeschrock 
546f940fbb1SLin Ling 	return (error);
547e7cbe64fSgw }
548