xref: /illumos-gate/usr/src/uts/common/fs/zfs/vdev_disk.c (revision fa9e4066)
1*fa9e4066Sahrens /*
2*fa9e4066Sahrens  * CDDL HEADER START
3*fa9e4066Sahrens  *
4*fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5*fa9e4066Sahrens  * Common Development and Distribution License, Version 1.0 only
6*fa9e4066Sahrens  * (the "License").  You may not use this file except in compliance
7*fa9e4066Sahrens  * with the License.
8*fa9e4066Sahrens  *
9*fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
11*fa9e4066Sahrens  * See the License for the specific language governing permissions
12*fa9e4066Sahrens  * and limitations under the License.
13*fa9e4066Sahrens  *
14*fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
15*fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
17*fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
18*fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
19*fa9e4066Sahrens  *
20*fa9e4066Sahrens  * CDDL HEADER END
21*fa9e4066Sahrens  */
22*fa9e4066Sahrens /*
23*fa9e4066Sahrens  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*fa9e4066Sahrens  * Use is subject to license terms.
25*fa9e4066Sahrens  */
26*fa9e4066Sahrens 
27*fa9e4066Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*fa9e4066Sahrens 
29*fa9e4066Sahrens #include <sys/zfs_context.h>
30*fa9e4066Sahrens #include <sys/spa.h>
31*fa9e4066Sahrens #include <sys/vdev_disk.h>
32*fa9e4066Sahrens #include <sys/vdev_impl.h>
33*fa9e4066Sahrens #include <sys/fs/zfs.h>
34*fa9e4066Sahrens #include <sys/zio.h>
35*fa9e4066Sahrens #include <sys/sunddi.h>
36*fa9e4066Sahrens 
37*fa9e4066Sahrens /*
38*fa9e4066Sahrens  * Virtual device vector for disks.
39*fa9e4066Sahrens  */
40*fa9e4066Sahrens 
41*fa9e4066Sahrens extern ldi_ident_t zfs_li;
42*fa9e4066Sahrens 
43*fa9e4066Sahrens typedef struct vdev_disk_buf {
44*fa9e4066Sahrens 	buf_t	vdb_buf;
45*fa9e4066Sahrens 	zio_t	*vdb_io;
46*fa9e4066Sahrens } vdev_disk_buf_t;
47*fa9e4066Sahrens 
48*fa9e4066Sahrens static int
49*fa9e4066Sahrens vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
50*fa9e4066Sahrens {
51*fa9e4066Sahrens 	vdev_disk_t *dvd;
52*fa9e4066Sahrens 	int error;
53*fa9e4066Sahrens 
54*fa9e4066Sahrens 	/*
55*fa9e4066Sahrens 	 * We must have a pathname, and it must be absolute.
56*fa9e4066Sahrens 	 */
57*fa9e4066Sahrens 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
58*fa9e4066Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
59*fa9e4066Sahrens 		return (EINVAL);
60*fa9e4066Sahrens 	}
61*fa9e4066Sahrens 
62*fa9e4066Sahrens 	dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
63*fa9e4066Sahrens 
64*fa9e4066Sahrens 	/*
65*fa9e4066Sahrens 	 * When opening a disk device, we want to preserve the user's original
66*fa9e4066Sahrens 	 * intent.  We always want to open the device by the path the user gave
67*fa9e4066Sahrens 	 * us, even if it is one of multiple paths to the save device.  But we
68*fa9e4066Sahrens 	 * also want to be able to survive disks being removed/recabled.
69*fa9e4066Sahrens 	 * Therefore the sequence of opening devices is:
70*fa9e4066Sahrens 	 *
71*fa9e4066Sahrens 	 * 1. Try opening the device by path.
72*fa9e4066Sahrens 	 *
73*fa9e4066Sahrens 	 * 	a. First append "s0" to see if this is a whole disk
74*fa9e4066Sahrens 	 * 	b. Fall back to path otherwise
75*fa9e4066Sahrens 	 *
76*fa9e4066Sahrens 	 * 2. If the devid of the device matches the stored value, return
77*fa9e4066Sahrens 	 *    success.
78*fa9e4066Sahrens 	 *
79*fa9e4066Sahrens 	 * 3. Otherwise, the device may have moved.  Try opening the device
80*fa9e4066Sahrens 	 *    by the devid instead.
81*fa9e4066Sahrens 	 *
82*fa9e4066Sahrens 	 */
83*fa9e4066Sahrens 	if (vd->vdev_devid != NULL) {
84*fa9e4066Sahrens 		if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
85*fa9e4066Sahrens 		    &dvd->vd_minor) != 0) {
86*fa9e4066Sahrens 			vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
87*fa9e4066Sahrens 			return (EINVAL);
88*fa9e4066Sahrens 		}
89*fa9e4066Sahrens 	}
90*fa9e4066Sahrens 
91*fa9e4066Sahrens 	error = EINVAL;		/* presume failure */
92*fa9e4066Sahrens 
93*fa9e4066Sahrens 	if (vd->vdev_path != NULL) {
94*fa9e4066Sahrens 		size_t len = strlen(vd->vdev_path) + 3;
95*fa9e4066Sahrens 		char *buf = kmem_alloc(len, KM_SLEEP);
96*fa9e4066Sahrens 		ddi_devid_t devid;
97*fa9e4066Sahrens 
98*fa9e4066Sahrens 		(void) snprintf(buf, len, "%ss0", vd->vdev_path);
99*fa9e4066Sahrens 
100*fa9e4066Sahrens 		/*
101*fa9e4066Sahrens 		 * Try whole disk first, then slice name.
102*fa9e4066Sahrens 		 */
103*fa9e4066Sahrens 		if ((error = ldi_open_by_name(buf, spa_mode, kcred,
104*fa9e4066Sahrens 		    &dvd->vd_lh, zfs_li)) != 0)
105*fa9e4066Sahrens 			error = ldi_open_by_name(vd->vdev_path,
106*fa9e4066Sahrens 			    spa_mode, kcred, &dvd->vd_lh, zfs_li);
107*fa9e4066Sahrens 
108*fa9e4066Sahrens 		kmem_free(buf, len);
109*fa9e4066Sahrens 
110*fa9e4066Sahrens 		/*
111*fa9e4066Sahrens 		 * Compare the devid to the stored value.
112*fa9e4066Sahrens 		 */
113*fa9e4066Sahrens 		if (error == 0 && vd->vdev_devid != NULL &&
114*fa9e4066Sahrens 		    ldi_get_devid(dvd->vd_lh, &devid) == 0) {
115*fa9e4066Sahrens 			if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
116*fa9e4066Sahrens 				error = EINVAL;
117*fa9e4066Sahrens 				(void) ldi_close(dvd->vd_lh, spa_mode, kcred);
118*fa9e4066Sahrens 				dvd->vd_lh = NULL;
119*fa9e4066Sahrens 			}
120*fa9e4066Sahrens 			ddi_devid_free(devid);
121*fa9e4066Sahrens 		}
122*fa9e4066Sahrens 	}
123*fa9e4066Sahrens 
124*fa9e4066Sahrens 	/*
125*fa9e4066Sahrens 	 * If we were unable to open by path, or the devid check fails, open by
126*fa9e4066Sahrens 	 * devid instead.
127*fa9e4066Sahrens 	 */
128*fa9e4066Sahrens 	if (error != 0 && vd->vdev_devid != NULL)
129*fa9e4066Sahrens 		error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
130*fa9e4066Sahrens 		    spa_mode, kcred, &dvd->vd_lh, zfs_li);
131*fa9e4066Sahrens 
132*fa9e4066Sahrens 	if (error) {
133*fa9e4066Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
134*fa9e4066Sahrens 		return (error);
135*fa9e4066Sahrens 	}
136*fa9e4066Sahrens 
137*fa9e4066Sahrens 	/*
138*fa9e4066Sahrens 	 * Determine the actual size of the device.
139*fa9e4066Sahrens 	 */
140*fa9e4066Sahrens 	if (ldi_get_size(dvd->vd_lh, psize) != 0) {
141*fa9e4066Sahrens 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
142*fa9e4066Sahrens 		return (EINVAL);
143*fa9e4066Sahrens 	}
144*fa9e4066Sahrens 
145*fa9e4066Sahrens 	*ashift = SPA_MINBLOCKSHIFT;
146*fa9e4066Sahrens 
147*fa9e4066Sahrens 	return (0);
148*fa9e4066Sahrens }
149*fa9e4066Sahrens 
150*fa9e4066Sahrens static void
151*fa9e4066Sahrens vdev_disk_close(vdev_t *vd)
152*fa9e4066Sahrens {
153*fa9e4066Sahrens 	vdev_disk_t *dvd = vd->vdev_tsd;
154*fa9e4066Sahrens 
155*fa9e4066Sahrens 	if (dvd == NULL)
156*fa9e4066Sahrens 		return;
157*fa9e4066Sahrens 
158*fa9e4066Sahrens 	dprintf("removing disk %s, devid %s\n",
159*fa9e4066Sahrens 	    vd->vdev_path ? vd->vdev_path : "<none>",
160*fa9e4066Sahrens 	    vd->vdev_devid ? vd->vdev_devid : "<none>");
161*fa9e4066Sahrens 
162*fa9e4066Sahrens 	if (dvd->vd_minor != NULL)
163*fa9e4066Sahrens 		ddi_devid_str_free(dvd->vd_minor);
164*fa9e4066Sahrens 
165*fa9e4066Sahrens 	if (dvd->vd_devid != NULL)
166*fa9e4066Sahrens 		ddi_devid_free(dvd->vd_devid);
167*fa9e4066Sahrens 
168*fa9e4066Sahrens 	if (dvd->vd_lh != NULL)
169*fa9e4066Sahrens 		(void) ldi_close(dvd->vd_lh, spa_mode, kcred);
170*fa9e4066Sahrens 
171*fa9e4066Sahrens 	kmem_free(dvd, sizeof (vdev_disk_t));
172*fa9e4066Sahrens 	vd->vdev_tsd = NULL;
173*fa9e4066Sahrens }
174*fa9e4066Sahrens 
175*fa9e4066Sahrens static void
176*fa9e4066Sahrens vdev_disk_io_intr(buf_t *bp)
177*fa9e4066Sahrens {
178*fa9e4066Sahrens 	vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
179*fa9e4066Sahrens 	zio_t *zio = vdb->vdb_io;
180*fa9e4066Sahrens 
181*fa9e4066Sahrens 	if ((zio->io_error = geterror(bp)) == 0 && bp->b_resid != 0)
182*fa9e4066Sahrens 		zio->io_error = EIO;
183*fa9e4066Sahrens 
184*fa9e4066Sahrens 	kmem_free(vdb, sizeof (vdev_disk_buf_t));
185*fa9e4066Sahrens 
186*fa9e4066Sahrens 	zio_next_stage_async(zio);
187*fa9e4066Sahrens }
188*fa9e4066Sahrens 
189*fa9e4066Sahrens static void
190*fa9e4066Sahrens vdev_disk_ioctl_done(void *zio_arg, int error)
191*fa9e4066Sahrens {
192*fa9e4066Sahrens 	zio_t *zio = zio_arg;
193*fa9e4066Sahrens 
194*fa9e4066Sahrens 	zio->io_error = error;
195*fa9e4066Sahrens 
196*fa9e4066Sahrens 	zio_next_stage_async(zio);
197*fa9e4066Sahrens }
198*fa9e4066Sahrens 
199*fa9e4066Sahrens static void
200*fa9e4066Sahrens vdev_disk_io_start(zio_t *zio)
201*fa9e4066Sahrens {
202*fa9e4066Sahrens 	vdev_t *vd = zio->io_vd;
203*fa9e4066Sahrens 	vdev_disk_t *dvd = vd->vdev_tsd;
204*fa9e4066Sahrens 	vdev_disk_buf_t *vdb;
205*fa9e4066Sahrens 	buf_t *bp;
206*fa9e4066Sahrens 	int flags, error;
207*fa9e4066Sahrens 
208*fa9e4066Sahrens 	if (zio->io_type == ZIO_TYPE_IOCTL) {
209*fa9e4066Sahrens 		zio_vdev_io_bypass(zio);
210*fa9e4066Sahrens 
211*fa9e4066Sahrens 		/* XXPOLICY */
212*fa9e4066Sahrens 		if (vdev_is_dead(vd)) {
213*fa9e4066Sahrens 			zio->io_error = ENXIO;
214*fa9e4066Sahrens 			zio_next_stage_async(zio);
215*fa9e4066Sahrens 			return;
216*fa9e4066Sahrens 		}
217*fa9e4066Sahrens 
218*fa9e4066Sahrens 		switch (zio->io_cmd) {
219*fa9e4066Sahrens 
220*fa9e4066Sahrens 		case DKIOCFLUSHWRITECACHE:
221*fa9e4066Sahrens 
222*fa9e4066Sahrens 			zio->io_dk_callback.dkc_callback = vdev_disk_ioctl_done;
223*fa9e4066Sahrens 			zio->io_dk_callback.dkc_cookie = zio;
224*fa9e4066Sahrens 
225*fa9e4066Sahrens 			error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
226*fa9e4066Sahrens 			    (uintptr_t)&zio->io_dk_callback,
227*fa9e4066Sahrens 			    FKIOCTL, kcred, NULL);
228*fa9e4066Sahrens 
229*fa9e4066Sahrens 			if (error == 0) {
230*fa9e4066Sahrens 				/*
231*fa9e4066Sahrens 				 * The ioctl will be done asychronously,
232*fa9e4066Sahrens 				 * and will call vdev_disk_ioctl_done()
233*fa9e4066Sahrens 				 * upon completion.
234*fa9e4066Sahrens 				 */
235*fa9e4066Sahrens 				return;
236*fa9e4066Sahrens 			}
237*fa9e4066Sahrens 			zio->io_error = error;
238*fa9e4066Sahrens 			break;
239*fa9e4066Sahrens 
240*fa9e4066Sahrens 		default:
241*fa9e4066Sahrens 			zio->io_error = ENOTSUP;
242*fa9e4066Sahrens 		}
243*fa9e4066Sahrens 
244*fa9e4066Sahrens 		zio_next_stage_async(zio);
245*fa9e4066Sahrens 		return;
246*fa9e4066Sahrens 	}
247*fa9e4066Sahrens 
248*fa9e4066Sahrens 	if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
249*fa9e4066Sahrens 		return;
250*fa9e4066Sahrens 
251*fa9e4066Sahrens 	if ((zio = vdev_queue_io(zio)) == NULL)
252*fa9e4066Sahrens 		return;
253*fa9e4066Sahrens 
254*fa9e4066Sahrens 	flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
255*fa9e4066Sahrens 	flags |= B_BUSY | B_NOCACHE;
256*fa9e4066Sahrens 	if (zio->io_flags & ZIO_FLAG_FAILFAST)
257*fa9e4066Sahrens 		flags |= B_FAILFAST;
258*fa9e4066Sahrens 
259*fa9e4066Sahrens 	vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
260*fa9e4066Sahrens 
261*fa9e4066Sahrens 	vdb->vdb_io = zio;
262*fa9e4066Sahrens 	bp = &vdb->vdb_buf;
263*fa9e4066Sahrens 
264*fa9e4066Sahrens 	bioinit(bp);
265*fa9e4066Sahrens 	bp->b_flags = flags;
266*fa9e4066Sahrens 	bp->b_bcount = zio->io_size;
267*fa9e4066Sahrens 	bp->b_un.b_addr = zio->io_data;
268*fa9e4066Sahrens 	bp->b_lblkno = lbtodb(zio->io_offset);
269*fa9e4066Sahrens 	bp->b_bufsize = zio->io_size;
270*fa9e4066Sahrens 	bp->b_iodone = (int (*)())vdev_disk_io_intr;
271*fa9e4066Sahrens 
272*fa9e4066Sahrens 	/* XXPOLICY */
273*fa9e4066Sahrens 	error = vdev_is_dead(vd) ? ENXIO : vdev_error_inject(vd, zio);
274*fa9e4066Sahrens 	if (error) {
275*fa9e4066Sahrens 		zio->io_error = error;
276*fa9e4066Sahrens 		bioerror(bp, error);
277*fa9e4066Sahrens 		bp->b_resid = bp->b_bcount;
278*fa9e4066Sahrens 		bp->b_iodone(bp);
279*fa9e4066Sahrens 		return;
280*fa9e4066Sahrens 	}
281*fa9e4066Sahrens 
282*fa9e4066Sahrens 	error = ldi_strategy(dvd->vd_lh, bp);
283*fa9e4066Sahrens 	/* ldi_strategy() will return non-zero only on programming errors */
284*fa9e4066Sahrens 	ASSERT(error == 0);
285*fa9e4066Sahrens }
286*fa9e4066Sahrens 
287*fa9e4066Sahrens static void
288*fa9e4066Sahrens vdev_disk_io_done(zio_t *zio)
289*fa9e4066Sahrens {
290*fa9e4066Sahrens 	vdev_queue_io_done(zio);
291*fa9e4066Sahrens 
292*fa9e4066Sahrens 	if (zio->io_type == ZIO_TYPE_WRITE)
293*fa9e4066Sahrens 		vdev_cache_write(zio);
294*fa9e4066Sahrens 
295*fa9e4066Sahrens 	zio_next_stage(zio);
296*fa9e4066Sahrens }
297*fa9e4066Sahrens 
298*fa9e4066Sahrens vdev_ops_t vdev_disk_ops = {
299*fa9e4066Sahrens 	vdev_disk_open,
300*fa9e4066Sahrens 	vdev_disk_close,
301*fa9e4066Sahrens 	vdev_default_asize,
302*fa9e4066Sahrens 	vdev_disk_io_start,
303*fa9e4066Sahrens 	vdev_disk_io_done,
304*fa9e4066Sahrens 	NULL,
305*fa9e4066Sahrens 	VDEV_TYPE_DISK,		/* name of this vdev type */
306*fa9e4066Sahrens 	B_TRUE			/* leaf vdev */
307*fa9e4066Sahrens };
308