xref: /illumos-gate/usr/src/uts/common/io/blkdev/blkdev.c (revision d2c5b266)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
24  * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
25  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
26  * Copyright 2017 The MathWorks, Inc.  All rights reserved.
27  */
28 
29 #include <sys/types.h>
30 #include <sys/ksynch.h>
31 #include <sys/kmem.h>
32 #include <sys/file.h>
33 #include <sys/errno.h>
34 #include <sys/open.h>
35 #include <sys/buf.h>
36 #include <sys/uio.h>
37 #include <sys/aio_req.h>
38 #include <sys/cred.h>
39 #include <sys/modctl.h>
40 #include <sys/cmlb.h>
41 #include <sys/conf.h>
42 #include <sys/devops.h>
43 #include <sys/list.h>
44 #include <sys/sysmacros.h>
45 #include <sys/dkio.h>
46 #include <sys/vtoc.h>
47 #include <sys/scsi/scsi.h>	/* for DTYPE_DIRECT */
48 #include <sys/kstat.h>
49 #include <sys/fs/dv_node.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/note.h>
53 #include <sys/blkdev.h>
54 #include <sys/scsi/impl/inquiry.h>
55 
56 #define	BD_MAXPART	64
57 #define	BDINST(dev)	(getminor(dev) / BD_MAXPART)
58 #define	BDPART(dev)	(getminor(dev) % BD_MAXPART)
59 
60 typedef struct bd bd_t;
61 typedef struct bd_xfer_impl bd_xfer_impl_t;
62 
63 struct bd {
64 	void		*d_private;
65 	dev_info_t	*d_dip;
66 	kmutex_t	d_ocmutex;
67 	kmutex_t	d_iomutex;
68 	kmutex_t	*d_errmutex;
69 	kmutex_t	d_statemutex;
70 	kcondvar_t	d_statecv;
71 	enum dkio_state	d_state;
72 	cmlb_handle_t	d_cmlbh;
73 	unsigned	d_open_lyr[BD_MAXPART];	/* open count */
74 	uint64_t	d_open_excl;	/* bit mask indexed by partition */
75 	uint64_t	d_open_reg[OTYPCNT];		/* bit mask */
76 
77 	uint32_t	d_qsize;
78 	uint32_t	d_qactive;
79 	uint32_t	d_maxxfer;
80 	uint32_t	d_blkshift;
81 	uint32_t	d_pblkshift;
82 	uint64_t	d_numblks;
83 	ddi_devid_t	d_devid;
84 
85 	kmem_cache_t	*d_cache;
86 	list_t		d_runq;
87 	list_t		d_waitq;
88 	kstat_t		*d_ksp;
89 	kstat_io_t	*d_kiop;
90 	kstat_t		*d_errstats;
91 	struct bd_errstats *d_kerr;
92 
93 	boolean_t	d_rdonly;
94 	boolean_t	d_ssd;
95 	boolean_t	d_removable;
96 	boolean_t	d_hotpluggable;
97 	boolean_t	d_use_dma;
98 
99 	ddi_dma_attr_t	d_dma;
100 	bd_ops_t	d_ops;
101 	bd_handle_t	d_handle;
102 };
103 
104 struct bd_handle {
105 	bd_ops_t	h_ops;
106 	ddi_dma_attr_t	*h_dma;
107 	dev_info_t	*h_parent;
108 	dev_info_t	*h_child;
109 	void		*h_private;
110 	bd_t		*h_bd;
111 	char		*h_name;
112 	char		h_addr[30];	/* enough for w%0.16x,%X */
113 };
114 
115 struct bd_xfer_impl {
116 	bd_xfer_t	i_public;
117 	list_node_t	i_linkage;
118 	bd_t		*i_bd;
119 	buf_t		*i_bp;
120 	uint_t		i_num_win;
121 	uint_t		i_cur_win;
122 	off_t		i_offset;
123 	int		(*i_func)(void *, bd_xfer_t *);
124 	uint32_t	i_blkshift;
125 	size_t		i_len;
126 	size_t		i_resid;
127 };
128 
129 #define	i_dmah		i_public.x_dmah
130 #define	i_dmac		i_public.x_dmac
131 #define	i_ndmac		i_public.x_ndmac
132 #define	i_kaddr		i_public.x_kaddr
133 #define	i_nblks		i_public.x_nblks
134 #define	i_blkno		i_public.x_blkno
135 #define	i_flags		i_public.x_flags
136 
137 
138 /*
139  * Private prototypes.
140  */
141 
142 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t);
143 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *);
144 static void bd_create_errstats(bd_t *, int, bd_drive_t *);
145 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *);
146 static void bd_init_errstats(bd_t *, bd_drive_t *);
147 
148 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
149 static int bd_attach(dev_info_t *, ddi_attach_cmd_t);
150 static int bd_detach(dev_info_t *, ddi_detach_cmd_t);
151 
152 static int bd_open(dev_t *, int, int, cred_t *);
153 static int bd_close(dev_t, int, int, cred_t *);
154 static int bd_strategy(struct buf *);
155 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
156 static int bd_dump(dev_t, caddr_t, daddr_t, int);
157 static int bd_read(dev_t, struct uio *, cred_t *);
158 static int bd_write(dev_t, struct uio *, cred_t *);
159 static int bd_aread(dev_t, struct aio_req *, cred_t *);
160 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
161 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
162     caddr_t, int *);
163 
164 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
165     void *);
166 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
167 static int bd_xfer_ctor(void *, void *, int);
168 static void bd_xfer_dtor(void *, void *);
169 static void bd_sched(bd_t *);
170 static void bd_submit(bd_t *, bd_xfer_impl_t *);
171 static void bd_runq_exit(bd_xfer_impl_t *, int);
172 static void bd_update_state(bd_t *);
173 static int bd_check_state(bd_t *, enum dkio_state *);
174 static int bd_flush_write_cache(bd_t *, struct dk_callback *);
175 
176 struct cmlb_tg_ops bd_tg_ops = {
177 	TG_DK_OPS_VERSION_1,
178 	bd_tg_rdwr,
179 	bd_tg_getinfo,
180 };
181 
182 static struct cb_ops bd_cb_ops = {
183 	bd_open, 		/* open */
184 	bd_close, 		/* close */
185 	bd_strategy, 		/* strategy */
186 	nodev, 			/* print */
187 	bd_dump,		/* dump */
188 	bd_read, 		/* read */
189 	bd_write, 		/* write */
190 	bd_ioctl, 		/* ioctl */
191 	nodev, 			/* devmap */
192 	nodev, 			/* mmap */
193 	nodev, 			/* segmap */
194 	nochpoll, 		/* poll */
195 	bd_prop_op, 		/* cb_prop_op */
196 	0, 			/* streamtab  */
197 	D_64BIT | D_MP,		/* Driver comaptibility flag */
198 	CB_REV,			/* cb_rev */
199 	bd_aread,		/* async read */
200 	bd_awrite		/* async write */
201 };
202 
203 struct dev_ops bd_dev_ops = {
204 	DEVO_REV, 		/* devo_rev, */
205 	0, 			/* refcnt  */
206 	bd_getinfo,		/* getinfo */
207 	nulldev, 		/* identify */
208 	nulldev, 		/* probe */
209 	bd_attach, 		/* attach */
210 	bd_detach,		/* detach */
211 	nodev, 			/* reset */
212 	&bd_cb_ops, 		/* driver operations */
213 	NULL,			/* bus operations */
214 	NULL,			/* power */
215 	ddi_quiesce_not_needed,	/* quiesce */
216 };
217 
218 static struct modldrv modldrv = {
219 	&mod_driverops,
220 	"Generic Block Device",
221 	&bd_dev_ops,
222 };
223 
224 static struct modlinkage modlinkage = {
225 	MODREV_1, { &modldrv, NULL }
226 };
227 
228 static void *bd_state;
229 static krwlock_t bd_lock;
230 
231 int
232 _init(void)
233 {
234 	int	rv;
235 
236 	rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2);
237 	if (rv != DDI_SUCCESS) {
238 		return (rv);
239 	}
240 	rw_init(&bd_lock, NULL, RW_DRIVER, NULL);
241 	rv = mod_install(&modlinkage);
242 	if (rv != DDI_SUCCESS) {
243 		rw_destroy(&bd_lock);
244 		ddi_soft_state_fini(&bd_state);
245 	}
246 	return (rv);
247 }
248 
249 int
250 _fini(void)
251 {
252 	int	rv;
253 
254 	rv = mod_remove(&modlinkage);
255 	if (rv == DDI_SUCCESS) {
256 		rw_destroy(&bd_lock);
257 		ddi_soft_state_fini(&bd_state);
258 	}
259 	return (rv);
260 }
261 
262 int
263 _info(struct modinfo *modinfop)
264 {
265 	return (mod_info(&modlinkage, modinfop));
266 }
267 
268 static int
269 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
270 {
271 	bd_t	*bd;
272 	minor_t	inst;
273 
274 	_NOTE(ARGUNUSED(dip));
275 
276 	inst = BDINST((dev_t)arg);
277 
278 	switch (cmd) {
279 	case DDI_INFO_DEVT2DEVINFO:
280 		bd = ddi_get_soft_state(bd_state, inst);
281 		if (bd == NULL) {
282 			return (DDI_FAILURE);
283 		}
284 		*resultp = (void *)bd->d_dip;
285 		break;
286 
287 	case DDI_INFO_DEVT2INSTANCE:
288 		*resultp = (void *)(intptr_t)inst;
289 		break;
290 
291 	default:
292 		return (DDI_FAILURE);
293 	}
294 	return (DDI_SUCCESS);
295 }
296 
297 static void
298 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len)
299 {
300 	int	ilen;
301 	char	*data_string;
302 
303 	ilen = scsi_ascii_inquiry_len(data, len);
304 	ASSERT3U(ilen, <=, len);
305 	if (ilen <= 0)
306 		return;
307 	/* ensure null termination */
308 	data_string = kmem_zalloc(ilen + 1, KM_SLEEP);
309 	bcopy(data, data_string, ilen);
310 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string);
311 	kmem_free(data_string, ilen + 1);
312 }
313 
314 static void
315 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive)
316 {
317 	if (drive->d_vendor_len > 0)
318 		bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID,
319 		    drive->d_vendor, drive->d_vendor_len);
320 
321 	if (drive->d_product_len > 0)
322 		bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID,
323 		    drive->d_product, drive->d_product_len);
324 
325 	if (drive->d_serial_len > 0)
326 		bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO,
327 		    drive->d_serial, drive->d_serial_len);
328 
329 	if (drive->d_revision_len > 0)
330 		bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID,
331 		    drive->d_revision, drive->d_revision_len);
332 }
333 
334 static void
335 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive)
336 {
337 	char	ks_module[KSTAT_STRLEN];
338 	char	ks_name[KSTAT_STRLEN];
339 	int	ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t);
340 
341 	if (bd->d_errstats != NULL)
342 		return;
343 
344 	(void) snprintf(ks_module, sizeof (ks_module), "%serr",
345 	    ddi_driver_name(bd->d_dip));
346 	(void) snprintf(ks_name, sizeof (ks_name), "%s%d,err",
347 	    ddi_driver_name(bd->d_dip), inst);
348 
349 	bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error",
350 	    KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
351 
352 	if (bd->d_errstats == NULL) {
353 		/*
354 		 * Even if we cannot create the kstat, we create a
355 		 * scratch kstat.  The reason for this is to ensure
356 		 * that we can update the kstat all of the time,
357 		 * without adding an extra branch instruction.
358 		 */
359 		bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats),
360 		    KM_SLEEP);
361 		bd->d_errmutex = kmem_zalloc(sizeof (kmutex_t), KM_SLEEP);
362 		mutex_init(bd->d_errmutex, NULL, MUTEX_DRIVER, NULL);
363 	} else {
364 		if (bd->d_errstats->ks_lock == NULL) {
365 			bd->d_errstats->ks_lock = kmem_zalloc(sizeof (kmutex_t),
366 			    KM_SLEEP);
367 			mutex_init(bd->d_errstats->ks_lock, NULL, MUTEX_DRIVER,
368 			    NULL);
369 		}
370 
371 		bd->d_errmutex = bd->d_errstats->ks_lock;
372 		bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data;
373 	}
374 
375 	kstat_named_init(&bd->d_kerr->bd_softerrs,	"Soft Errors",
376 	    KSTAT_DATA_UINT32);
377 	kstat_named_init(&bd->d_kerr->bd_harderrs,	"Hard Errors",
378 	    KSTAT_DATA_UINT32);
379 	kstat_named_init(&bd->d_kerr->bd_transerrs,	"Transport Errors",
380 	    KSTAT_DATA_UINT32);
381 
382 	if (drive->d_model_len > 0) {
383 		kstat_named_init(&bd->d_kerr->bd_model,	"Model",
384 		    KSTAT_DATA_STRING);
385 	} else {
386 		kstat_named_init(&bd->d_kerr->bd_vid,	"Vendor",
387 		    KSTAT_DATA_STRING);
388 		kstat_named_init(&bd->d_kerr->bd_pid,	"Product",
389 		    KSTAT_DATA_STRING);
390 	}
391 
392 	kstat_named_init(&bd->d_kerr->bd_revision,	"Revision",
393 	    KSTAT_DATA_STRING);
394 	kstat_named_init(&bd->d_kerr->bd_serial,	"Serial No",
395 	    KSTAT_DATA_STRING);
396 	kstat_named_init(&bd->d_kerr->bd_capacity,	"Size",
397 	    KSTAT_DATA_ULONGLONG);
398 	kstat_named_init(&bd->d_kerr->bd_rq_media_err,	"Media Error",
399 	    KSTAT_DATA_UINT32);
400 	kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err,	"Device Not Ready",
401 	    KSTAT_DATA_UINT32);
402 	kstat_named_init(&bd->d_kerr->bd_rq_nodev_err,	"No Device",
403 	    KSTAT_DATA_UINT32);
404 	kstat_named_init(&bd->d_kerr->bd_rq_recov_err,	"Recoverable",
405 	    KSTAT_DATA_UINT32);
406 	kstat_named_init(&bd->d_kerr->bd_rq_illrq_err,	"Illegal Request",
407 	    KSTAT_DATA_UINT32);
408 	kstat_named_init(&bd->d_kerr->bd_rq_pfa_err,
409 	    "Predictive Failure Analysis", KSTAT_DATA_UINT32);
410 
411 	bd->d_errstats->ks_private = bd;
412 
413 	kstat_install(bd->d_errstats);
414 }
415 
416 static void
417 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt)
418 {
419 	char	*tmp;
420 
421 	if (KSTAT_NAMED_STR_PTR(k) == NULL) {
422 		if (len > 0) {
423 			tmp = kmem_alloc(len + 1, KM_SLEEP);
424 			(void) strlcpy(tmp, str, len + 1);
425 		} else {
426 			tmp = alt;
427 		}
428 
429 		kstat_named_setstr(k, tmp);
430 	}
431 }
432 
433 static void
434 bd_init_errstats(bd_t *bd, bd_drive_t *drive)
435 {
436 	struct bd_errstats	*est = bd->d_kerr;
437 
438 	mutex_enter(bd->d_errmutex);
439 
440 	if (drive->d_model_len > 0 &&
441 	    KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) {
442 		bd_errstats_setstr(&est->bd_model, drive->d_model,
443 		    drive->d_model_len, NULL);
444 	} else {
445 		bd_errstats_setstr(&est->bd_vid, drive->d_vendor,
446 		    drive->d_vendor_len, "Unknown ");
447 		bd_errstats_setstr(&est->bd_pid, drive->d_product,
448 		    drive->d_product_len, "Unknown         ");
449 	}
450 
451 	bd_errstats_setstr(&est->bd_revision, drive->d_revision,
452 	    drive->d_revision_len, "0001");
453 	bd_errstats_setstr(&est->bd_serial, drive->d_serial,
454 	    drive->d_serial_len, "0               ");
455 
456 	mutex_exit(bd->d_errmutex);
457 }
458 
459 static int
460 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
461 {
462 	int		inst;
463 	bd_handle_t	hdl;
464 	bd_t		*bd;
465 	bd_drive_t	drive;
466 	int		rv;
467 	char		name[16];
468 	char		kcache[32];
469 
470 	switch (cmd) {
471 	case DDI_ATTACH:
472 		break;
473 	case DDI_RESUME:
474 		/* We don't do anything native for suspend/resume */
475 		return (DDI_SUCCESS);
476 	default:
477 		return (DDI_FAILURE);
478 	}
479 
480 	inst = ddi_get_instance(dip);
481 	hdl = ddi_get_parent_data(dip);
482 
483 	(void) snprintf(name, sizeof (name), "%s%d",
484 	    ddi_driver_name(dip), ddi_get_instance(dip));
485 	(void) snprintf(kcache, sizeof (kcache), "%s_xfer", name);
486 
487 	if (hdl == NULL) {
488 		cmn_err(CE_WARN, "%s: missing parent data!", name);
489 		return (DDI_FAILURE);
490 	}
491 
492 	if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) {
493 		cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name);
494 		return (DDI_FAILURE);
495 	}
496 	bd = ddi_get_soft_state(bd_state, inst);
497 
498 	if (hdl->h_dma) {
499 		bd->d_dma = *(hdl->h_dma);
500 		bd->d_dma.dma_attr_granular =
501 		    max(DEV_BSIZE, bd->d_dma.dma_attr_granular);
502 		bd->d_use_dma = B_TRUE;
503 
504 		if (bd->d_maxxfer &&
505 		    (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) {
506 			cmn_err(CE_WARN,
507 			    "%s: inconsistent maximum transfer size!",
508 			    name);
509 			/* We force it */
510 			bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
511 		} else {
512 			bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
513 		}
514 	} else {
515 		bd->d_use_dma = B_FALSE;
516 		if (bd->d_maxxfer == 0) {
517 			bd->d_maxxfer = 1024 * 1024;
518 		}
519 	}
520 	bd->d_ops = hdl->h_ops;
521 	bd->d_private = hdl->h_private;
522 	bd->d_blkshift = 9;	/* 512 bytes, to start */
523 
524 	if (bd->d_maxxfer % DEV_BSIZE) {
525 		cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name);
526 		bd->d_maxxfer &= ~(DEV_BSIZE - 1);
527 	}
528 	if (bd->d_maxxfer < DEV_BSIZE) {
529 		cmn_err(CE_WARN, "%s: maximum transfer size too small!", name);
530 		ddi_soft_state_free(bd_state, inst);
531 		return (DDI_FAILURE);
532 	}
533 
534 	bd->d_dip = dip;
535 	bd->d_handle = hdl;
536 	hdl->h_bd = bd;
537 	ddi_set_driver_private(dip, bd);
538 
539 	mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL);
540 	mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL);
541 	mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL);
542 	cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL);
543 
544 	list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t),
545 	    offsetof(struct bd_xfer_impl, i_linkage));
546 	list_create(&bd->d_runq, sizeof (bd_xfer_impl_t),
547 	    offsetof(struct bd_xfer_impl, i_linkage));
548 
549 	bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8,
550 	    bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0);
551 
552 	bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk",
553 	    KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
554 	if (bd->d_ksp != NULL) {
555 		bd->d_ksp->ks_lock = &bd->d_iomutex;
556 		kstat_install(bd->d_ksp);
557 		bd->d_kiop = bd->d_ksp->ks_data;
558 	} else {
559 		/*
560 		 * Even if we cannot create the kstat, we create a
561 		 * scratch kstat.  The reason for this is to ensure
562 		 * that we can update the kstat all of the time,
563 		 * without adding an extra branch instruction.
564 		 */
565 		bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP);
566 	}
567 
568 	cmlb_alloc_handle(&bd->d_cmlbh);
569 
570 	bd->d_state = DKIO_NONE;
571 
572 	bzero(&drive, sizeof (drive));
573 	bd->d_ops.o_drive_info(bd->d_private, &drive);
574 	bd->d_qsize = drive.d_qsize;
575 	bd->d_removable = drive.d_removable;
576 	bd->d_hotpluggable = drive.d_hotpluggable;
577 
578 	if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer)
579 		bd->d_maxxfer = drive.d_maxxfer;
580 
581 	bd_create_inquiry_props(dip, &drive);
582 
583 	bd_create_errstats(bd, inst, &drive);
584 	bd_init_errstats(bd, &drive);
585 	bd_update_state(bd);
586 
587 	rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT,
588 	    bd->d_removable, bd->d_hotpluggable,
589 	    /*LINTED: E_BAD_PTR_CAST_ALIGN*/
590 	    *(uint64_t *)drive.d_eui64 != 0 ? DDI_NT_BLOCK_BLKDEV :
591 	    drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK,
592 	    CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
593 	if (rv != 0) {
594 		cmlb_free_handle(&bd->d_cmlbh);
595 		kmem_cache_destroy(bd->d_cache);
596 		mutex_destroy(&bd->d_iomutex);
597 		mutex_destroy(&bd->d_ocmutex);
598 		mutex_destroy(&bd->d_statemutex);
599 		cv_destroy(&bd->d_statecv);
600 		list_destroy(&bd->d_waitq);
601 		list_destroy(&bd->d_runq);
602 		if (bd->d_ksp != NULL) {
603 			kstat_delete(bd->d_ksp);
604 			bd->d_ksp = NULL;
605 		} else {
606 			kmem_free(bd->d_kiop, sizeof (kstat_io_t));
607 		}
608 		ddi_soft_state_free(bd_state, inst);
609 		return (DDI_FAILURE);
610 	}
611 
612 	if (bd->d_ops.o_devid_init != NULL) {
613 		rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid);
614 		if (rv == DDI_SUCCESS) {
615 			if (ddi_devid_register(dip, bd->d_devid) !=
616 			    DDI_SUCCESS) {
617 				cmn_err(CE_WARN,
618 				    "%s: unable to register devid", name);
619 			}
620 		}
621 	}
622 
623 	/*
624 	 * Add a zero-length attribute to tell the world we support
625 	 * kernel ioctls (for layered drivers).  Also set up properties
626 	 * used by HAL to identify removable media.
627 	 */
628 	(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
629 	    DDI_KERNEL_IOCTL, NULL, 0);
630 	if (bd->d_removable) {
631 		(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
632 		    "removable-media", NULL, 0);
633 	}
634 	if (bd->d_hotpluggable) {
635 		(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
636 		    "hotpluggable", NULL, 0);
637 	}
638 
639 	ddi_report_dev(dip);
640 
641 	return (DDI_SUCCESS);
642 }
643 
644 static int
645 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
646 {
647 	bd_t	*bd;
648 
649 	bd = ddi_get_driver_private(dip);
650 
651 	switch (cmd) {
652 	case DDI_DETACH:
653 		break;
654 	case DDI_SUSPEND:
655 		/* We don't suspend, but our parent does */
656 		return (DDI_SUCCESS);
657 	default:
658 		return (DDI_FAILURE);
659 	}
660 	if (bd->d_ksp != NULL) {
661 		kstat_delete(bd->d_ksp);
662 		bd->d_ksp = NULL;
663 	} else {
664 		kmem_free(bd->d_kiop, sizeof (kstat_io_t));
665 	}
666 
667 	if (bd->d_errstats != NULL) {
668 		kstat_delete(bd->d_errstats);
669 		bd->d_errstats = NULL;
670 	} else {
671 		kmem_free(bd->d_kerr, sizeof (struct bd_errstats));
672 		mutex_destroy(bd->d_errmutex);
673 	}
674 
675 	cmlb_detach(bd->d_cmlbh, 0);
676 	cmlb_free_handle(&bd->d_cmlbh);
677 	if (bd->d_devid)
678 		ddi_devid_free(bd->d_devid);
679 	kmem_cache_destroy(bd->d_cache);
680 	mutex_destroy(&bd->d_iomutex);
681 	mutex_destroy(&bd->d_ocmutex);
682 	mutex_destroy(&bd->d_statemutex);
683 	cv_destroy(&bd->d_statecv);
684 	list_destroy(&bd->d_waitq);
685 	list_destroy(&bd->d_runq);
686 	ddi_soft_state_free(bd_state, ddi_get_instance(dip));
687 	return (DDI_SUCCESS);
688 }
689 
690 static int
691 bd_xfer_ctor(void *buf, void *arg, int kmflag)
692 {
693 	bd_xfer_impl_t	*xi;
694 	bd_t		*bd = arg;
695 	int		(*dcb)(caddr_t);
696 
697 	if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) {
698 		dcb = DDI_DMA_SLEEP;
699 	} else {
700 		dcb = DDI_DMA_DONTWAIT;
701 	}
702 
703 	xi = buf;
704 	bzero(xi, sizeof (*xi));
705 	xi->i_bd = bd;
706 
707 	if (bd->d_use_dma) {
708 		if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL,
709 		    &xi->i_dmah) != DDI_SUCCESS) {
710 			return (-1);
711 		}
712 	}
713 
714 	return (0);
715 }
716 
717 static void
718 bd_xfer_dtor(void *buf, void *arg)
719 {
720 	bd_xfer_impl_t	*xi = buf;
721 
722 	_NOTE(ARGUNUSED(arg));
723 
724 	if (xi->i_dmah)
725 		ddi_dma_free_handle(&xi->i_dmah);
726 	xi->i_dmah = NULL;
727 }
728 
729 static bd_xfer_impl_t *
730 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *),
731     int kmflag)
732 {
733 	bd_xfer_impl_t		*xi;
734 	int			rv = 0;
735 	int			status;
736 	unsigned		dir;
737 	int			(*cb)(caddr_t);
738 	size_t			len;
739 	uint32_t		shift;
740 
741 	if (kmflag == KM_SLEEP) {
742 		cb = DDI_DMA_SLEEP;
743 	} else {
744 		cb = DDI_DMA_DONTWAIT;
745 	}
746 
747 	xi = kmem_cache_alloc(bd->d_cache, kmflag);
748 	if (xi == NULL) {
749 		bioerror(bp, ENOMEM);
750 		return (NULL);
751 	}
752 
753 	ASSERT(bp);
754 
755 	xi->i_bp = bp;
756 	xi->i_func = func;
757 	xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT);
758 
759 	if (bp->b_bcount == 0) {
760 		xi->i_len = 0;
761 		xi->i_nblks = 0;
762 		xi->i_kaddr = NULL;
763 		xi->i_resid = 0;
764 		xi->i_num_win = 0;
765 		goto done;
766 	}
767 
768 	if (bp->b_flags & B_READ) {
769 		dir = DDI_DMA_READ;
770 		xi->i_func = bd->d_ops.o_read;
771 	} else {
772 		dir = DDI_DMA_WRITE;
773 		xi->i_func = bd->d_ops.o_write;
774 	}
775 
776 	shift = bd->d_blkshift;
777 	xi->i_blkshift = shift;
778 
779 	if (!bd->d_use_dma) {
780 		bp_mapin(bp);
781 		rv = 0;
782 		xi->i_offset = 0;
783 		xi->i_num_win =
784 		    (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
785 		xi->i_cur_win = 0;
786 		xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
787 		xi->i_nblks = howmany(xi->i_len, (1U << shift));
788 		xi->i_kaddr = bp->b_un.b_addr;
789 		xi->i_resid = bp->b_bcount;
790 	} else {
791 
792 		/*
793 		 * We have to use consistent DMA if the address is misaligned.
794 		 */
795 		if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
796 		    ((uintptr_t)bp->b_un.b_addr & 0x7)) {
797 			dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
798 		} else {
799 			dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
800 		}
801 
802 		status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
803 		    NULL, &xi->i_dmac, &xi->i_ndmac);
804 		switch (status) {
805 		case DDI_DMA_MAPPED:
806 			xi->i_num_win = 1;
807 			xi->i_cur_win = 0;
808 			xi->i_offset = 0;
809 			xi->i_len = bp->b_bcount;
810 			xi->i_nblks = howmany(xi->i_len, (1U << shift));
811 			xi->i_resid = bp->b_bcount;
812 			rv = 0;
813 			break;
814 		case DDI_DMA_PARTIAL_MAP:
815 			xi->i_cur_win = 0;
816 
817 			if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
818 			    DDI_SUCCESS) ||
819 			    (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
820 			    &len, &xi->i_dmac, &xi->i_ndmac) !=
821 			    DDI_SUCCESS) ||
822 			    (P2PHASE(len, (1U << DEV_BSHIFT)) != 0)) {
823 				(void) ddi_dma_unbind_handle(xi->i_dmah);
824 				rv = EFAULT;
825 				goto done;
826 			}
827 			xi->i_len = len;
828 			xi->i_nblks = howmany(xi->i_len, (1U << shift));
829 			xi->i_resid = bp->b_bcount;
830 			rv = 0;
831 			break;
832 		case DDI_DMA_NORESOURCES:
833 			rv = EAGAIN;
834 			goto done;
835 		case DDI_DMA_TOOBIG:
836 			rv = EINVAL;
837 			goto done;
838 		case DDI_DMA_NOMAPPING:
839 		case DDI_DMA_INUSE:
840 		default:
841 			rv = EFAULT;
842 			goto done;
843 		}
844 	}
845 
846 done:
847 	if (rv != 0) {
848 		kmem_cache_free(bd->d_cache, xi);
849 		bioerror(bp, rv);
850 		return (NULL);
851 	}
852 
853 	return (xi);
854 }
855 
856 static void
857 bd_xfer_free(bd_xfer_impl_t *xi)
858 {
859 	if (xi->i_dmah) {
860 		(void) ddi_dma_unbind_handle(xi->i_dmah);
861 	}
862 	kmem_cache_free(xi->i_bd->d_cache, xi);
863 }
864 
865 static int
866 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
867 {
868 	dev_t		dev = *devp;
869 	bd_t		*bd;
870 	minor_t		part;
871 	minor_t		inst;
872 	uint64_t	mask;
873 	boolean_t	ndelay;
874 	int		rv;
875 	diskaddr_t	nblks;
876 	diskaddr_t	lba;
877 
878 	_NOTE(ARGUNUSED(credp));
879 
880 	part = BDPART(dev);
881 	inst = BDINST(dev);
882 
883 	if (otyp >= OTYPCNT)
884 		return (EINVAL);
885 
886 	ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE;
887 
888 	/*
889 	 * Block any DR events from changing the set of registered
890 	 * devices while we function.
891 	 */
892 	rw_enter(&bd_lock, RW_READER);
893 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
894 		rw_exit(&bd_lock);
895 		return (ENXIO);
896 	}
897 
898 	mutex_enter(&bd->d_ocmutex);
899 
900 	ASSERT(part < 64);
901 	mask = (1U << part);
902 
903 	bd_update_state(bd);
904 
905 	if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) {
906 
907 		/* non-blocking opens are allowed to succeed */
908 		if (!ndelay) {
909 			rv = ENXIO;
910 			goto done;
911 		}
912 	} else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba,
913 	    NULL, NULL, 0) == 0) {
914 
915 		/*
916 		 * We read the partinfo, verify valid ranges.  If the
917 		 * partition is invalid, and we aren't blocking or
918 		 * doing a raw access, then fail. (Non-blocking and
919 		 * raw accesses can still succeed to allow a disk with
920 		 * bad partition data to opened by format and fdisk.)
921 		 */
922 		if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) {
923 			rv = ENXIO;
924 			goto done;
925 		}
926 	} else if (!ndelay) {
927 		/*
928 		 * cmlb_partinfo failed -- invalid partition or no
929 		 * disk label.
930 		 */
931 		rv = ENXIO;
932 		goto done;
933 	}
934 
935 	if ((flag & FWRITE) && bd->d_rdonly) {
936 		rv = EROFS;
937 		goto done;
938 	}
939 
940 	if ((bd->d_open_excl) & (mask)) {
941 		rv = EBUSY;
942 		goto done;
943 	}
944 	if (flag & FEXCL) {
945 		if (bd->d_open_lyr[part]) {
946 			rv = EBUSY;
947 			goto done;
948 		}
949 		for (int i = 0; i < OTYP_LYR; i++) {
950 			if (bd->d_open_reg[i] & mask) {
951 				rv = EBUSY;
952 				goto done;
953 			}
954 		}
955 	}
956 
957 	if (otyp == OTYP_LYR) {
958 		bd->d_open_lyr[part]++;
959 	} else {
960 		bd->d_open_reg[otyp] |= mask;
961 	}
962 	if (flag & FEXCL) {
963 		bd->d_open_excl |= mask;
964 	}
965 
966 	rv = 0;
967 done:
968 	mutex_exit(&bd->d_ocmutex);
969 	rw_exit(&bd_lock);
970 
971 	return (rv);
972 }
973 
974 static int
975 bd_close(dev_t dev, int flag, int otyp, cred_t *credp)
976 {
977 	bd_t		*bd;
978 	minor_t		inst;
979 	minor_t		part;
980 	uint64_t	mask;
981 	boolean_t	last = B_TRUE;
982 
983 	_NOTE(ARGUNUSED(flag));
984 	_NOTE(ARGUNUSED(credp));
985 
986 	part = BDPART(dev);
987 	inst = BDINST(dev);
988 
989 	ASSERT(part < 64);
990 	mask = (1U << part);
991 
992 	rw_enter(&bd_lock, RW_READER);
993 
994 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
995 		rw_exit(&bd_lock);
996 		return (ENXIO);
997 	}
998 
999 	mutex_enter(&bd->d_ocmutex);
1000 	if (bd->d_open_excl & mask) {
1001 		bd->d_open_excl &= ~mask;
1002 	}
1003 	if (otyp == OTYP_LYR) {
1004 		bd->d_open_lyr[part]--;
1005 	} else {
1006 		bd->d_open_reg[otyp] &= ~mask;
1007 	}
1008 	for (int i = 0; i < 64; i++) {
1009 		if (bd->d_open_lyr[part]) {
1010 			last = B_FALSE;
1011 		}
1012 	}
1013 	for (int i = 0; last && (i < OTYP_LYR); i++) {
1014 		if (bd->d_open_reg[i]) {
1015 			last = B_FALSE;
1016 		}
1017 	}
1018 	mutex_exit(&bd->d_ocmutex);
1019 
1020 	if (last) {
1021 		cmlb_invalidate(bd->d_cmlbh, 0);
1022 	}
1023 	rw_exit(&bd_lock);
1024 
1025 	return (0);
1026 }
1027 
1028 static int
1029 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1030 {
1031 	minor_t		inst;
1032 	minor_t		part;
1033 	diskaddr_t	pstart;
1034 	diskaddr_t	psize;
1035 	bd_t		*bd;
1036 	bd_xfer_impl_t	*xi;
1037 	buf_t		*bp;
1038 	int		rv;
1039 	uint32_t	shift;
1040 	daddr_t		d_blkno;
1041 	int	d_nblk;
1042 
1043 	rw_enter(&bd_lock, RW_READER);
1044 
1045 	part = BDPART(dev);
1046 	inst = BDINST(dev);
1047 
1048 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1049 		rw_exit(&bd_lock);
1050 		return (ENXIO);
1051 	}
1052 	shift = bd->d_blkshift;
1053 	d_blkno = blkno >> (shift - DEV_BSHIFT);
1054 	d_nblk = howmany((nblk << DEV_BSHIFT), (1U << shift));
1055 	/*
1056 	 * do cmlb, but do it synchronously unless we already have the
1057 	 * partition (which we probably should.)
1058 	 */
1059 	if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1060 	    (void *)1)) {
1061 		rw_exit(&bd_lock);
1062 		return (ENXIO);
1063 	}
1064 
1065 	if ((d_blkno + d_nblk) > psize) {
1066 		rw_exit(&bd_lock);
1067 		return (EINVAL);
1068 	}
1069 	bp = getrbuf(KM_NOSLEEP);
1070 	if (bp == NULL) {
1071 		rw_exit(&bd_lock);
1072 		return (ENOMEM);
1073 	}
1074 
1075 	bp->b_bcount = nblk << DEV_BSHIFT;
1076 	bp->b_resid = bp->b_bcount;
1077 	bp->b_lblkno = blkno;
1078 	bp->b_un.b_addr = caddr;
1079 
1080 	xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1081 	if (xi == NULL) {
1082 		rw_exit(&bd_lock);
1083 		freerbuf(bp);
1084 		return (ENOMEM);
1085 	}
1086 	xi->i_blkno = d_blkno + pstart;
1087 	xi->i_flags = BD_XFER_POLL;
1088 	bd_submit(bd, xi);
1089 	rw_exit(&bd_lock);
1090 
1091 	/*
1092 	 * Generally, we should have run this entirely synchronously
1093 	 * at this point and the biowait call should be a no-op.  If
1094 	 * it didn't happen this way, it's a bug in the underlying
1095 	 * driver not honoring BD_XFER_POLL.
1096 	 */
1097 	(void) biowait(bp);
1098 	rv = geterror(bp);
1099 	freerbuf(bp);
1100 	return (rv);
1101 }
1102 
1103 void
1104 bd_minphys(struct buf *bp)
1105 {
1106 	minor_t inst;
1107 	bd_t	*bd;
1108 	inst = BDINST(bp->b_edev);
1109 
1110 	bd = ddi_get_soft_state(bd_state, inst);
1111 
1112 	/*
1113 	 * In a non-debug kernel, bd_strategy will catch !bd as
1114 	 * well, and will fail nicely.
1115 	 */
1116 	ASSERT(bd);
1117 
1118 	if (bp->b_bcount > bd->d_maxxfer)
1119 		bp->b_bcount = bd->d_maxxfer;
1120 }
1121 
1122 static int
1123 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1124 {
1125 	_NOTE(ARGUNUSED(credp));
1126 	return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1127 }
1128 
1129 static int
1130 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1131 {
1132 	_NOTE(ARGUNUSED(credp));
1133 	return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1134 }
1135 
1136 static int
1137 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1138 {
1139 	_NOTE(ARGUNUSED(credp));
1140 	return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1141 }
1142 
1143 static int
1144 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1145 {
1146 	_NOTE(ARGUNUSED(credp));
1147 	return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1148 }
1149 
1150 static int
1151 bd_strategy(struct buf *bp)
1152 {
1153 	minor_t		inst;
1154 	minor_t		part;
1155 	bd_t		*bd;
1156 	diskaddr_t	p_lba;
1157 	diskaddr_t	p_nblks;
1158 	diskaddr_t	b_nblks;
1159 	bd_xfer_impl_t	*xi;
1160 	uint32_t	shift;
1161 	int		(*func)(void *, bd_xfer_t *);
1162 	diskaddr_t 	lblkno;
1163 
1164 	part = BDPART(bp->b_edev);
1165 	inst = BDINST(bp->b_edev);
1166 
1167 	ASSERT(bp);
1168 
1169 	bp->b_resid = bp->b_bcount;
1170 
1171 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1172 		bioerror(bp, ENXIO);
1173 		biodone(bp);
1174 		return (0);
1175 	}
1176 
1177 	if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1178 	    NULL, NULL, 0)) {
1179 		bioerror(bp, ENXIO);
1180 		biodone(bp);
1181 		return (0);
1182 	}
1183 
1184 	shift = bd->d_blkshift;
1185 	lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT);
1186 	if ((P2PHASE(bp->b_bcount, (1U << DEV_BSHIFT)) != 0) ||
1187 	    (lblkno > p_nblks)) {
1188 		bioerror(bp, ENXIO);
1189 		biodone(bp);
1190 		return (0);
1191 	}
1192 	b_nblks = howmany(bp->b_bcount, (1U << shift));
1193 	if ((lblkno == p_nblks) || (bp->b_bcount == 0)) {
1194 		biodone(bp);
1195 		return (0);
1196 	}
1197 
1198 	if ((b_nblks + lblkno) > p_nblks) {
1199 		bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift);
1200 		bp->b_bcount -= bp->b_resid;
1201 	} else {
1202 		bp->b_resid = 0;
1203 	}
1204 	func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1205 
1206 	xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1207 	if (xi == NULL) {
1208 		xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1209 	}
1210 	if (xi == NULL) {
1211 		/* bd_request_alloc will have done bioerror */
1212 		biodone(bp);
1213 		return (0);
1214 	}
1215 	xi->i_blkno = lblkno + p_lba;
1216 
1217 	bd_submit(bd, xi);
1218 
1219 	return (0);
1220 }
1221 
1222 static int
1223 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1224 {
1225 	minor_t		inst;
1226 	uint16_t	part;
1227 	bd_t		*bd;
1228 	void		*ptr = (void *)arg;
1229 	int		rv;
1230 
1231 	part = BDPART(dev);
1232 	inst = BDINST(dev);
1233 
1234 	if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1235 		return (ENXIO);
1236 	}
1237 
1238 	rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0);
1239 	if (rv != ENOTTY)
1240 		return (rv);
1241 
1242 	if (rvalp != NULL) {
1243 		/* the return value of the ioctl is 0 by default */
1244 		*rvalp = 0;
1245 	}
1246 
1247 	switch (cmd) {
1248 	case DKIOCGMEDIAINFO: {
1249 		struct dk_minfo minfo;
1250 
1251 		/* make sure our state information is current */
1252 		bd_update_state(bd);
1253 		bzero(&minfo, sizeof (minfo));
1254 		minfo.dki_media_type = DK_FIXED_DISK;
1255 		minfo.dki_lbsize = (1U << bd->d_blkshift);
1256 		minfo.dki_capacity = bd->d_numblks;
1257 		if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) {
1258 			return (EFAULT);
1259 		}
1260 		return (0);
1261 	}
1262 	case DKIOCGMEDIAINFOEXT: {
1263 		struct dk_minfo_ext miext;
1264 
1265 		/* make sure our state information is current */
1266 		bd_update_state(bd);
1267 		bzero(&miext, sizeof (miext));
1268 		miext.dki_media_type = DK_FIXED_DISK;
1269 		miext.dki_lbsize = (1U << bd->d_blkshift);
1270 		miext.dki_pbsize = (1U << bd->d_pblkshift);
1271 		miext.dki_capacity = bd->d_numblks;
1272 		if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) {
1273 			return (EFAULT);
1274 		}
1275 		return (0);
1276 	}
1277 	case DKIOCINFO: {
1278 		struct dk_cinfo cinfo;
1279 		bzero(&cinfo, sizeof (cinfo));
1280 		cinfo.dki_ctype = DKC_BLKDEV;
1281 		cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip));
1282 		(void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname),
1283 		    "%s", ddi_driver_name(ddi_get_parent(bd->d_dip)));
1284 		(void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname),
1285 		    "%s", ddi_driver_name(bd->d_dip));
1286 		cinfo.dki_unit = inst;
1287 		cinfo.dki_flags = DKI_FMTVOL;
1288 		cinfo.dki_partition = part;
1289 		cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE;
1290 		cinfo.dki_addr = 0;
1291 		cinfo.dki_slave = 0;
1292 		cinfo.dki_space = 0;
1293 		cinfo.dki_prio = 0;
1294 		cinfo.dki_vec = 0;
1295 		if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) {
1296 			return (EFAULT);
1297 		}
1298 		return (0);
1299 	}
1300 	case DKIOCREMOVABLE: {
1301 		int i;
1302 		i = bd->d_removable ? 1 : 0;
1303 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1304 			return (EFAULT);
1305 		}
1306 		return (0);
1307 	}
1308 	case DKIOCHOTPLUGGABLE: {
1309 		int i;
1310 		i = bd->d_hotpluggable ? 1 : 0;
1311 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1312 			return (EFAULT);
1313 		}
1314 		return (0);
1315 	}
1316 	case DKIOCREADONLY: {
1317 		int i;
1318 		i = bd->d_rdonly ? 1 : 0;
1319 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1320 			return (EFAULT);
1321 		}
1322 		return (0);
1323 	}
1324 	case DKIOCSOLIDSTATE: {
1325 		int i;
1326 		i = bd->d_ssd ? 1 : 0;
1327 		if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1328 			return (EFAULT);
1329 		}
1330 		return (0);
1331 	}
1332 	case DKIOCSTATE: {
1333 		enum dkio_state	state;
1334 		if (ddi_copyin(ptr, &state, sizeof (state), flag)) {
1335 			return (EFAULT);
1336 		}
1337 		if ((rv = bd_check_state(bd, &state)) != 0) {
1338 			return (rv);
1339 		}
1340 		if (ddi_copyout(&state, ptr, sizeof (state), flag)) {
1341 			return (EFAULT);
1342 		}
1343 		return (0);
1344 	}
1345 	case DKIOCFLUSHWRITECACHE: {
1346 		struct dk_callback *dkc = NULL;
1347 
1348 		if (flag & FKIOCTL)
1349 			dkc = (void *)arg;
1350 
1351 		rv = bd_flush_write_cache(bd, dkc);
1352 		return (rv);
1353 	}
1354 
1355 	default:
1356 		break;
1357 
1358 	}
1359 	return (ENOTTY);
1360 }
1361 
1362 static int
1363 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
1364     char *name, caddr_t valuep, int *lengthp)
1365 {
1366 	bd_t	*bd;
1367 
1368 	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1369 	if (bd == NULL)
1370 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
1371 		    name, valuep, lengthp));
1372 
1373 	return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name,
1374 	    valuep, lengthp, BDPART(dev), 0));
1375 }
1376 
1377 
1378 static int
1379 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1380     size_t length, void *tg_cookie)
1381 {
1382 	bd_t		*bd;
1383 	buf_t		*bp;
1384 	bd_xfer_impl_t	*xi;
1385 	int		rv;
1386 	int		(*func)(void *, bd_xfer_t *);
1387 	int		kmflag;
1388 
1389 	/*
1390 	 * If we are running in polled mode (such as during dump(9e)
1391 	 * execution), then we cannot sleep for kernel allocations.
1392 	 */
1393 	kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1394 
1395 	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1396 
1397 	if (P2PHASE(length, (1U << DEV_BSHIFT)) != 0) {
1398 		/* We can only transfer whole blocks at a time! */
1399 		return (EINVAL);
1400 	}
1401 
1402 	if ((bp = getrbuf(kmflag)) == NULL) {
1403 		return (ENOMEM);
1404 	}
1405 
1406 	switch (cmd) {
1407 	case TG_READ:
1408 		bp->b_flags = B_READ;
1409 		func = bd->d_ops.o_read;
1410 		break;
1411 	case TG_WRITE:
1412 		bp->b_flags = B_WRITE;
1413 		func = bd->d_ops.o_write;
1414 		break;
1415 	default:
1416 		freerbuf(bp);
1417 		return (EINVAL);
1418 	}
1419 
1420 	bp->b_un.b_addr = bufaddr;
1421 	bp->b_bcount = length;
1422 	xi = bd_xfer_alloc(bd, bp, func, kmflag);
1423 	if (xi == NULL) {
1424 		rv = geterror(bp);
1425 		freerbuf(bp);
1426 		return (rv);
1427 	}
1428 	xi->i_flags = tg_cookie ? BD_XFER_POLL : 0;
1429 	xi->i_blkno = start;
1430 	bd_submit(bd, xi);
1431 	(void) biowait(bp);
1432 	rv = geterror(bp);
1433 	freerbuf(bp);
1434 
1435 	return (rv);
1436 }
1437 
1438 static int
1439 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
1440 {
1441 	bd_t		*bd;
1442 
1443 	_NOTE(ARGUNUSED(tg_cookie));
1444 	bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1445 
1446 	switch (cmd) {
1447 	case TG_GETPHYGEOM:
1448 	case TG_GETVIRTGEOM:
1449 		/*
1450 		 * We don't have any "geometry" as such, let cmlb
1451 		 * fabricate something.
1452 		 */
1453 		return (ENOTTY);
1454 
1455 	case TG_GETCAPACITY:
1456 		bd_update_state(bd);
1457 		*(diskaddr_t *)arg = bd->d_numblks;
1458 		return (0);
1459 
1460 	case TG_GETBLOCKSIZE:
1461 		*(uint32_t *)arg = (1U << bd->d_blkshift);
1462 		return (0);
1463 
1464 	case TG_GETATTR:
1465 		/*
1466 		 * It turns out that cmlb really doesn't do much for
1467 		 * non-writable media, but lets make the information
1468 		 * available for it in case it does more in the
1469 		 * future.  (The value is currently used for
1470 		 * triggering special behavior for CD-ROMs.)
1471 		 */
1472 		bd_update_state(bd);
1473 		((tg_attribute_t *)arg)->media_is_writable =
1474 		    bd->d_rdonly ? B_FALSE : B_TRUE;
1475 		((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd;
1476 		return (0);
1477 
1478 	default:
1479 		return (EINVAL);
1480 	}
1481 }
1482 
1483 
1484 static void
1485 bd_sched(bd_t *bd)
1486 {
1487 	bd_xfer_impl_t	*xi;
1488 	struct buf	*bp;
1489 	int		rv;
1490 
1491 	mutex_enter(&bd->d_iomutex);
1492 
1493 	while ((bd->d_qactive < bd->d_qsize) &&
1494 	    ((xi = list_remove_head(&bd->d_waitq)) != NULL)) {
1495 		bd->d_qactive++;
1496 		kstat_waitq_to_runq(bd->d_kiop);
1497 		list_insert_tail(&bd->d_runq, xi);
1498 
1499 		/*
1500 		 * Submit the job to the driver.  We drop the I/O mutex
1501 		 * so that we can deal with the case where the driver
1502 		 * completion routine calls back into us synchronously.
1503 		 */
1504 
1505 		mutex_exit(&bd->d_iomutex);
1506 
1507 		rv = xi->i_func(bd->d_private, &xi->i_public);
1508 		if (rv != 0) {
1509 			bp = xi->i_bp;
1510 			bioerror(bp, rv);
1511 			biodone(bp);
1512 
1513 			atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1514 
1515 			mutex_enter(&bd->d_iomutex);
1516 			bd->d_qactive--;
1517 			kstat_runq_exit(bd->d_kiop);
1518 			list_remove(&bd->d_runq, xi);
1519 			bd_xfer_free(xi);
1520 		} else {
1521 			mutex_enter(&bd->d_iomutex);
1522 		}
1523 	}
1524 
1525 	mutex_exit(&bd->d_iomutex);
1526 }
1527 
1528 static void
1529 bd_submit(bd_t *bd, bd_xfer_impl_t *xi)
1530 {
1531 	mutex_enter(&bd->d_iomutex);
1532 	list_insert_tail(&bd->d_waitq, xi);
1533 	kstat_waitq_enter(bd->d_kiop);
1534 	mutex_exit(&bd->d_iomutex);
1535 
1536 	bd_sched(bd);
1537 }
1538 
1539 static void
1540 bd_runq_exit(bd_xfer_impl_t *xi, int err)
1541 {
1542 	bd_t	*bd = xi->i_bd;
1543 	buf_t	*bp = xi->i_bp;
1544 
1545 	mutex_enter(&bd->d_iomutex);
1546 	bd->d_qactive--;
1547 	kstat_runq_exit(bd->d_kiop);
1548 	list_remove(&bd->d_runq, xi);
1549 	mutex_exit(&bd->d_iomutex);
1550 
1551 	if (err == 0) {
1552 		if (bp->b_flags & B_READ) {
1553 			bd->d_kiop->reads++;
1554 			bd->d_kiop->nread += (bp->b_bcount - xi->i_resid);
1555 		} else {
1556 			bd->d_kiop->writes++;
1557 			bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid);
1558 		}
1559 	}
1560 	bd_sched(bd);
1561 }
1562 
1563 static void
1564 bd_update_state(bd_t *bd)
1565 {
1566 	enum	dkio_state	state = DKIO_INSERTED;
1567 	boolean_t		docmlb = B_FALSE;
1568 	bd_media_t		media;
1569 
1570 	bzero(&media, sizeof (media));
1571 
1572 	mutex_enter(&bd->d_statemutex);
1573 	if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) {
1574 		bd->d_numblks = 0;
1575 		state = DKIO_EJECTED;
1576 		goto done;
1577 	}
1578 
1579 	if ((media.m_blksize < 512) ||
1580 	    (!ISP2(media.m_blksize)) ||
1581 	    (P2PHASE(bd->d_maxxfer, media.m_blksize))) {
1582 		cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)",
1583 		    ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip),
1584 		    media.m_blksize);
1585 		/*
1586 		 * We can't use the media, treat it as not present.
1587 		 */
1588 		state = DKIO_EJECTED;
1589 		bd->d_numblks = 0;
1590 		goto done;
1591 	}
1592 
1593 	if (((1U << bd->d_blkshift) != media.m_blksize) ||
1594 	    (bd->d_numblks != media.m_nblks)) {
1595 		/* Device size changed */
1596 		docmlb = B_TRUE;
1597 	}
1598 
1599 	bd->d_blkshift = ddi_ffs(media.m_blksize) - 1;
1600 	bd->d_pblkshift = bd->d_blkshift;
1601 	bd->d_numblks = media.m_nblks;
1602 	bd->d_rdonly = media.m_readonly;
1603 	bd->d_ssd = media.m_solidstate;
1604 
1605 	/*
1606 	 * Only use the supplied physical block size if it is non-zero,
1607 	 * greater or equal to the block size, and a power of 2. Ignore it
1608 	 * if not, it's just informational and we can still use the media.
1609 	 */
1610 	if ((media.m_pblksize != 0) &&
1611 	    (media.m_pblksize >= media.m_blksize) &&
1612 	    (ISP2(media.m_pblksize)))
1613 		bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1;
1614 
1615 done:
1616 	if (state != bd->d_state) {
1617 		bd->d_state = state;
1618 		cv_broadcast(&bd->d_statecv);
1619 		docmlb = B_TRUE;
1620 	}
1621 	mutex_exit(&bd->d_statemutex);
1622 
1623 	bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift;
1624 
1625 	if (docmlb) {
1626 		if (state == DKIO_INSERTED) {
1627 			(void) cmlb_validate(bd->d_cmlbh, 0, 0);
1628 		} else {
1629 			cmlb_invalidate(bd->d_cmlbh, 0);
1630 		}
1631 	}
1632 }
1633 
1634 static int
1635 bd_check_state(bd_t *bd, enum dkio_state *state)
1636 {
1637 	clock_t		when;
1638 
1639 	for (;;) {
1640 
1641 		bd_update_state(bd);
1642 
1643 		mutex_enter(&bd->d_statemutex);
1644 
1645 		if (bd->d_state != *state) {
1646 			*state = bd->d_state;
1647 			mutex_exit(&bd->d_statemutex);
1648 			break;
1649 		}
1650 
1651 		when = drv_usectohz(1000000);
1652 		if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex,
1653 		    when, TR_CLOCK_TICK) == 0) {
1654 			mutex_exit(&bd->d_statemutex);
1655 			return (EINTR);
1656 		}
1657 
1658 		mutex_exit(&bd->d_statemutex);
1659 	}
1660 
1661 	return (0);
1662 }
1663 
1664 static int
1665 bd_flush_write_cache_done(struct buf *bp)
1666 {
1667 	struct dk_callback *dc = (void *)bp->b_private;
1668 
1669 	(*dc->dkc_callback)(dc->dkc_cookie, geterror(bp));
1670 	kmem_free(dc, sizeof (*dc));
1671 	freerbuf(bp);
1672 	return (0);
1673 }
1674 
1675 static int
1676 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc)
1677 {
1678 	buf_t			*bp;
1679 	struct dk_callback	*dc;
1680 	bd_xfer_impl_t		*xi;
1681 	int			rv;
1682 
1683 	if (bd->d_ops.o_sync_cache == NULL) {
1684 		return (ENOTSUP);
1685 	}
1686 	if ((bp = getrbuf(KM_SLEEP)) == NULL) {
1687 		return (ENOMEM);
1688 	}
1689 	bp->b_resid = 0;
1690 	bp->b_bcount = 0;
1691 
1692 	xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP);
1693 	if (xi == NULL) {
1694 		rv = geterror(bp);
1695 		freerbuf(bp);
1696 		return (rv);
1697 	}
1698 
1699 	/* Make an asynchronous flush, but only if there is a callback */
1700 	if (dkc != NULL && dkc->dkc_callback != NULL) {
1701 		/* Make a private copy of the callback structure */
1702 		dc = kmem_alloc(sizeof (*dc), KM_SLEEP);
1703 		*dc = *dkc;
1704 		bp->b_private = dc;
1705 		bp->b_iodone = bd_flush_write_cache_done;
1706 
1707 		bd_submit(bd, xi);
1708 		return (0);
1709 	}
1710 
1711 	/* In case there is no callback, perform a synchronous flush */
1712 	bd_submit(bd, xi);
1713 	(void) biowait(bp);
1714 	rv = geterror(bp);
1715 	freerbuf(bp);
1716 
1717 	return (rv);
1718 }
1719 
1720 /*
1721  * Nexus support.
1722  */
1723 int
1724 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
1725     void *arg, void *result)
1726 {
1727 	bd_handle_t	hdl;
1728 
1729 	switch (ctlop) {
1730 	case DDI_CTLOPS_REPORTDEV:
1731 		cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n",
1732 		    ddi_node_name(rdip), ddi_get_name_addr(rdip),
1733 		    ddi_driver_name(rdip), ddi_get_instance(rdip));
1734 		return (DDI_SUCCESS);
1735 
1736 	case DDI_CTLOPS_INITCHILD:
1737 		hdl = ddi_get_parent_data((dev_info_t *)arg);
1738 		if (hdl == NULL) {
1739 			return (DDI_NOT_WELL_FORMED);
1740 		}
1741 		ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr);
1742 		return (DDI_SUCCESS);
1743 
1744 	case DDI_CTLOPS_UNINITCHILD:
1745 		ddi_set_name_addr((dev_info_t *)arg, NULL);
1746 		ndi_prop_remove_all((dev_info_t *)arg);
1747 		return (DDI_SUCCESS);
1748 
1749 	default:
1750 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
1751 	}
1752 }
1753 
1754 /*
1755  * Functions for device drivers.
1756  */
1757 bd_handle_t
1758 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag)
1759 {
1760 	bd_handle_t	hdl;
1761 
1762 	hdl = kmem_zalloc(sizeof (*hdl), kmflag);
1763 	if (hdl != NULL) {
1764 		hdl->h_ops = *ops;
1765 		hdl->h_dma = dma;
1766 		hdl->h_private = private;
1767 	}
1768 
1769 	return (hdl);
1770 }
1771 
1772 void
1773 bd_free_handle(bd_handle_t hdl)
1774 {
1775 	kmem_free(hdl, sizeof (*hdl));
1776 }
1777 
1778 int
1779 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
1780 {
1781 	dev_info_t	*child;
1782 	bd_drive_t	drive = { 0 };
1783 
1784 	/* if drivers don't override this, make it assume none */
1785 	drive.d_lun = -1;
1786 	hdl->h_ops.o_drive_info(hdl->h_private, &drive);
1787 
1788 	hdl->h_parent = dip;
1789 	hdl->h_name = "blkdev";
1790 
1791 	/*LINTED: E_BAD_PTR_CAST_ALIGN*/
1792 	if (*(uint64_t *)drive.d_eui64 != 0) {
1793 		if (drive.d_lun >= 0) {
1794 			(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1795 			    "w%02X%02X%02X%02X%02X%02X%02X%02X,%X",
1796 			    drive.d_eui64[0], drive.d_eui64[1],
1797 			    drive.d_eui64[2], drive.d_eui64[3],
1798 			    drive.d_eui64[4], drive.d_eui64[5],
1799 			    drive.d_eui64[6], drive.d_eui64[7], drive.d_lun);
1800 		} else {
1801 			(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1802 			    "w%02X%02X%02X%02X%02X%02X%02X%02X",
1803 			    drive.d_eui64[0], drive.d_eui64[1],
1804 			    drive.d_eui64[2], drive.d_eui64[3],
1805 			    drive.d_eui64[4], drive.d_eui64[5],
1806 			    drive.d_eui64[6], drive.d_eui64[7]);
1807 		}
1808 	} else {
1809 		if (drive.d_lun >= 0) {
1810 			(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1811 			    "%X,%X", drive.d_target, drive.d_lun);
1812 		} else {
1813 			(void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1814 			    "%X", drive.d_target);
1815 		}
1816 	}
1817 
1818 	if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID,
1819 	    &child) != NDI_SUCCESS) {
1820 		cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s",
1821 		    ddi_driver_name(dip), ddi_get_instance(dip),
1822 		    "blkdev", hdl->h_addr);
1823 		return (DDI_FAILURE);
1824 	}
1825 
1826 	ddi_set_parent_data(child, hdl);
1827 	hdl->h_child = child;
1828 
1829 	if (ndi_devi_online(child, 0) == NDI_FAILURE) {
1830 		cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online",
1831 		    ddi_driver_name(dip), ddi_get_instance(dip),
1832 		    hdl->h_name, hdl->h_addr);
1833 		(void) ndi_devi_free(child);
1834 		return (DDI_FAILURE);
1835 	}
1836 
1837 	return (DDI_SUCCESS);
1838 }
1839 
1840 int
1841 bd_detach_handle(bd_handle_t hdl)
1842 {
1843 	int	circ;
1844 	int	rv;
1845 	char	*devnm;
1846 
1847 	if (hdl->h_child == NULL) {
1848 		return (DDI_SUCCESS);
1849 	}
1850 	ndi_devi_enter(hdl->h_parent, &circ);
1851 	if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) {
1852 		rv = ddi_remove_child(hdl->h_child, 0);
1853 	} else {
1854 		devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
1855 		(void) ddi_deviname(hdl->h_child, devnm);
1856 		(void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE);
1857 		rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL,
1858 		    NDI_DEVI_REMOVE | NDI_UNCONFIG);
1859 		kmem_free(devnm, MAXNAMELEN + 1);
1860 	}
1861 	if (rv == 0) {
1862 		hdl->h_child = NULL;
1863 	}
1864 
1865 	ndi_devi_exit(hdl->h_parent, circ);
1866 	return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
1867 }
1868 
1869 void
1870 bd_xfer_done(bd_xfer_t *xfer, int err)
1871 {
1872 	bd_xfer_impl_t	*xi = (void *)xfer;
1873 	buf_t		*bp = xi->i_bp;
1874 	int		rv = DDI_SUCCESS;
1875 	bd_t		*bd = xi->i_bd;
1876 	size_t		len;
1877 
1878 	if (err != 0) {
1879 		bd_runq_exit(xi, err);
1880 		atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32);
1881 
1882 		bp->b_resid += xi->i_resid;
1883 		bd_xfer_free(xi);
1884 		bioerror(bp, err);
1885 		biodone(bp);
1886 		return;
1887 	}
1888 
1889 	xi->i_cur_win++;
1890 	xi->i_resid -= xi->i_len;
1891 
1892 	if (xi->i_resid == 0) {
1893 		/* Job completed succcessfully! */
1894 		bd_runq_exit(xi, 0);
1895 
1896 		bd_xfer_free(xi);
1897 		biodone(bp);
1898 		return;
1899 	}
1900 
1901 	xi->i_blkno += xi->i_nblks;
1902 
1903 	if (bd->d_use_dma) {
1904 		/* More transfer still pending... advance to next DMA window. */
1905 		rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
1906 		    &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1907 	} else {
1908 		/* Advance memory window. */
1909 		xi->i_kaddr += xi->i_len;
1910 		xi->i_offset += xi->i_len;
1911 		len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1912 	}
1913 
1914 
1915 	if ((rv != DDI_SUCCESS) ||
1916 	    (P2PHASE(len, (1U << DEV_BSHIFT) != 0))) {
1917 		bd_runq_exit(xi, EFAULT);
1918 
1919 		bp->b_resid += xi->i_resid;
1920 		bd_xfer_free(xi);
1921 		bioerror(bp, EFAULT);
1922 		biodone(bp);
1923 		return;
1924 	}
1925 	xi->i_len = len;
1926 	xi->i_nblks = howmany(len, (1U << xi->i_blkshift));
1927 
1928 	/* Submit next window to hardware. */
1929 	rv = xi->i_func(bd->d_private, &xi->i_public);
1930 	if (rv != 0) {
1931 		bd_runq_exit(xi, rv);
1932 
1933 		atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1934 
1935 		bp->b_resid += xi->i_resid;
1936 		bd_xfer_free(xi);
1937 		bioerror(bp, rv);
1938 		biodone(bp);
1939 	}
1940 }
1941 
1942 void
1943 bd_error(bd_xfer_t *xfer, int error)
1944 {
1945 	bd_xfer_impl_t	*xi = (void *)xfer;
1946 	bd_t		*bd = xi->i_bd;
1947 
1948 	switch (error) {
1949 	case BD_ERR_MEDIA:
1950 		atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32);
1951 		break;
1952 	case BD_ERR_NTRDY:
1953 		atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32);
1954 		break;
1955 	case BD_ERR_NODEV:
1956 		atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32);
1957 		break;
1958 	case BD_ERR_RECOV:
1959 		atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32);
1960 		break;
1961 	case BD_ERR_ILLRQ:
1962 		atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32);
1963 		break;
1964 	case BD_ERR_PFA:
1965 		atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32);
1966 		break;
1967 	default:
1968 		cmn_err(CE_PANIC, "bd_error: unknown error type %d", error);
1969 		break;
1970 	}
1971 }
1972 
1973 void
1974 bd_state_change(bd_handle_t hdl)
1975 {
1976 	bd_t		*bd;
1977 
1978 	if ((bd = hdl->h_bd) != NULL) {
1979 		bd_update_state(bd);
1980 	}
1981 }
1982 
1983 void
1984 bd_mod_init(struct dev_ops *devops)
1985 {
1986 	static struct bus_ops bd_bus_ops = {
1987 		BUSO_REV,		/* busops_rev */
1988 		nullbusmap,		/* bus_map */
1989 		NULL,			/* bus_get_intrspec (OBSOLETE) */
1990 		NULL,			/* bus_add_intrspec (OBSOLETE) */
1991 		NULL,			/* bus_remove_intrspec (OBSOLETE) */
1992 		i_ddi_map_fault,	/* bus_map_fault */
1993 		NULL,			/* bus_dma_map (OBSOLETE) */
1994 		ddi_dma_allochdl,	/* bus_dma_allochdl */
1995 		ddi_dma_freehdl,	/* bus_dma_freehdl */
1996 		ddi_dma_bindhdl,	/* bus_dma_bindhdl */
1997 		ddi_dma_unbindhdl,	/* bus_dma_unbindhdl */
1998 		ddi_dma_flush,		/* bus_dma_flush */
1999 		ddi_dma_win,		/* bus_dma_win */
2000 		ddi_dma_mctl,		/* bus_dma_ctl */
2001 		bd_bus_ctl,		/* bus_ctl */
2002 		ddi_bus_prop_op,	/* bus_prop_op */
2003 		NULL,			/* bus_get_eventcookie */
2004 		NULL,			/* bus_add_eventcall */
2005 		NULL,			/* bus_remove_eventcall */
2006 		NULL,			/* bus_post_event */
2007 		NULL,			/* bus_intr_ctl (OBSOLETE) */
2008 		NULL,			/* bus_config */
2009 		NULL,			/* bus_unconfig */
2010 		NULL,			/* bus_fm_init */
2011 		NULL,			/* bus_fm_fini */
2012 		NULL,			/* bus_fm_access_enter */
2013 		NULL,			/* bus_fm_access_exit */
2014 		NULL,			/* bus_power */
2015 		NULL,			/* bus_intr_op */
2016 	};
2017 
2018 	devops->devo_bus_ops = &bd_bus_ops;
2019 
2020 	/*
2021 	 * NB: The device driver is free to supply its own
2022 	 * character entry device support.
2023 	 */
2024 }
2025 
2026 void
2027 bd_mod_fini(struct dev_ops *devops)
2028 {
2029 	devops->devo_bus_ops = NULL;
2030 }
2031