1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2012 Garrett D'Amore <garrett@damore.org>. All rights reserved.
24 * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
25 * Copyright 2017 The MathWorks, Inc. All rights reserved.
26 * Copyright 2020 Joyent, Inc.
27 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
28 * Copyright 2022 Tintri by DDN, Inc. All rights reserved.
29 * Copyright 2023 Oxide Computer Company
30 */
31
32 #include <sys/types.h>
33 #include <sys/ksynch.h>
34 #include <sys/kmem.h>
35 #include <sys/file.h>
36 #include <sys/errno.h>
37 #include <sys/open.h>
38 #include <sys/buf.h>
39 #include <sys/uio.h>
40 #include <sys/aio_req.h>
41 #include <sys/cred.h>
42 #include <sys/modctl.h>
43 #include <sys/cmlb.h>
44 #include <sys/conf.h>
45 #include <sys/devops.h>
46 #include <sys/list.h>
47 #include <sys/sysmacros.h>
48 #include <sys/dkio.h>
49 #include <sys/dkioc_free_util.h>
50 #include <sys/vtoc.h>
51 #include <sys/scsi/scsi.h> /* for DTYPE_DIRECT */
52 #include <sys/kstat.h>
53 #include <sys/fs/dv_node.h>
54 #include <sys/ddi.h>
55 #include <sys/sunddi.h>
56 #include <sys/note.h>
57 #include <sys/blkdev.h>
58 #include <sys/scsi/impl/inquiry.h>
59 #include <sys/taskq.h>
60 #include <sys/taskq_impl.h>
61 #include <sys/disp.h>
62 #include <sys/sysevent/eventdefs.h>
63 #include <sys/sysevent/dev.h>
64
65 /*
66 * blkdev is a driver which provides a lot of the common functionality
67 * a block device driver may need and helps by removing code which
68 * is frequently duplicated in block device drivers.
69 *
70 * Within this driver all the struct cb_ops functions required for a
71 * block device driver are written with appropriate call back functions
72 * to be provided by the parent driver.
73 *
74 * To use blkdev, a driver needs to:
75 * 1. Create a bd_ops_t structure which has the call back operations
76 * blkdev will use.
77 * 2. Create a handle by calling bd_alloc_handle(). One of the
78 * arguments to this function is the bd_ops_t.
79 * 3. Call bd_attach_handle(). This will instantiate a blkdev device
80 * as a child device node of the calling driver.
81 *
82 * A parent driver is not restricted to just allocating and attaching a
83 * single instance, it may attach as many as it wishes. For each handle
84 * attached, appropriate entries in /dev/[r]dsk are created.
85 *
86 * The bd_ops_t routines that a parent of blkdev need to provide are:
87 *
88 * o_drive_info: Provide information to blkdev such as how many I/O queues
89 * to create and the size of those queues. Also some device
90 * specifics such as EUI, vendor, product, model, serial
91 * number ....
92 *
93 * o_media_info: Provide information about the media. Eg size and block size.
94 *
95 * o_devid_init: Creates and initializes the device id. Typically calls
96 * ddi_devid_init().
97 *
98 * o_sync_cache: Issues a device appropriate command to flush any write
99 * caches.
100 *
101 * o_read: Read data as described by bd_xfer_t argument.
102 *
103 * o_write: Write data as described by bd_xfer_t argument.
104 *
105 * o_free_space: Free the space described by bd_xfer_t argument (optional).
106 *
107 * Queues
108 * ------
109 * Part of the drive_info data is a queue count. blkdev will create
110 * "queue count" number of waitq/runq pairs. Each waitq/runq pair
111 * operates independently. As an I/O is scheduled up to the parent
112 * driver via o_read or o_write its queue number is given. If the
113 * parent driver supports multiple hardware queues it can then select
114 * where to submit the I/O request.
115 *
116 * Currently blkdev uses a simplistic round-robin queue selection method.
117 * It has the advantage that it is lockless. In the future it will be
118 * worthwhile reviewing this strategy for something which prioritizes queues
119 * depending on how busy they are.
120 *
121 * Each waitq/runq pair is protected by its mutex (q_iomutex). Incoming
122 * I/O requests are initially added to the waitq. They are taken off the
123 * waitq, added to the runq and submitted, providing the runq is less
124 * than the qsize as specified in the drive_info. As an I/O request
125 * completes, the parent driver is required to call bd_xfer_done(), which
126 * will remove the I/O request from the runq and pass I/O completion
127 * status up the stack.
128 *
129 * Locks
130 * -----
131 * There are 5 instance global locks d_ocmutex, d_ksmutex, d_errmutex,
132 * d_statemutex and d_dle_mutex. As well a q_iomutex per waitq/runq pair.
133 *
134 * Lock Hierarchy
135 * --------------
136 * The only two locks which may be held simultaneously are q_iomutex and
137 * d_ksmutex. In all cases q_iomutex must be acquired before d_ksmutex.
138 */
139
140 #define BD_MAXPART 64
141 #define BDINST(dev) (getminor(dev) / BD_MAXPART)
142 #define BDPART(dev) (getminor(dev) % BD_MAXPART)
143
144 typedef struct bd bd_t;
145 typedef struct bd_xfer_impl bd_xfer_impl_t;
146 typedef struct bd_queue bd_queue_t;
147
148 typedef enum {
149 BD_DLE_PENDING = 1 << 0,
150 BD_DLE_RUNNING = 1 << 1
151 } bd_dle_state_t;
152
153 struct bd {
154 void *d_private;
155 dev_info_t *d_dip;
156 kmutex_t d_ocmutex; /* open/close */
157 kmutex_t d_ksmutex; /* kstat */
158 kmutex_t d_errmutex;
159 kmutex_t d_statemutex;
160 kcondvar_t d_statecv;
161 enum dkio_state d_state;
162 cmlb_handle_t d_cmlbh;
163 unsigned d_open_lyr[BD_MAXPART]; /* open count */
164 uint64_t d_open_excl; /* bit mask indexed by partition */
165 uint64_t d_open_reg[OTYPCNT]; /* bit mask */
166 uint64_t d_io_counter;
167
168 uint32_t d_qcount;
169 uint32_t d_qactive;
170 uint32_t d_maxxfer;
171 uint32_t d_blkshift;
172 uint32_t d_pblkshift;
173 uint64_t d_numblks;
174 ddi_devid_t d_devid;
175
176 uint64_t d_max_free_seg;
177 uint64_t d_max_free_blks;
178 uint64_t d_max_free_seg_blks;
179 uint64_t d_free_align;
180
181 kmem_cache_t *d_cache;
182 bd_queue_t *d_queues;
183 kstat_t *d_ksp;
184 kstat_io_t *d_kiop;
185 kstat_t *d_errstats;
186 struct bd_errstats *d_kerr;
187
188 boolean_t d_rdonly;
189 boolean_t d_ssd;
190 boolean_t d_removable;
191 boolean_t d_hotpluggable;
192 boolean_t d_use_dma;
193
194 ddi_dma_attr_t d_dma;
195 bd_ops_t d_ops;
196 bd_handle_t d_handle;
197
198 kmutex_t d_dle_mutex;
199 taskq_ent_t d_dle_ent;
200 bd_dle_state_t d_dle_state;
201 };
202
203 struct bd_handle {
204 bd_ops_t h_ops;
205 ddi_dma_attr_t *h_dma;
206 dev_info_t *h_parent;
207 dev_info_t *h_child;
208 void *h_private;
209 bd_t *h_bd;
210 char *h_name;
211 char h_addr[50]; /* enough for w%0.32x,%X */
212 };
213
214 struct bd_xfer_impl {
215 bd_xfer_t i_public;
216 list_node_t i_linkage;
217 bd_t *i_bd;
218 buf_t *i_bp;
219 bd_queue_t *i_bq;
220 uint_t i_num_win;
221 uint_t i_cur_win;
222 off_t i_offset;
223 int (*i_func)(void *, bd_xfer_t *);
224 uint32_t i_blkshift;
225 size_t i_len;
226 size_t i_resid;
227 };
228
229 struct bd_queue {
230 kmutex_t q_iomutex;
231 uint32_t q_qsize;
232 uint32_t q_qactive;
233 list_t q_runq;
234 list_t q_waitq;
235 };
236
237 #define i_dmah i_public.x_dmah
238 #define i_dmac i_public.x_dmac
239 #define i_ndmac i_public.x_ndmac
240 #define i_kaddr i_public.x_kaddr
241 #define i_nblks i_public.x_nblks
242 #define i_blkno i_public.x_blkno
243 #define i_flags i_public.x_flags
244 #define i_qnum i_public.x_qnum
245 #define i_dfl i_public.x_dfl
246
247 #define CAN_FREESPACE(bd) \
248 (((bd)->d_ops.o_free_space == NULL) ? B_FALSE : B_TRUE)
249
250 /*
251 * Private prototypes.
252 */
253
254 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t);
255 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *);
256 static void bd_create_errstats(bd_t *, int, bd_drive_t *);
257 static void bd_destroy_errstats(bd_t *);
258 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *);
259 static void bd_init_errstats(bd_t *, bd_drive_t *);
260 static void bd_fini_errstats(bd_t *);
261
262 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
263 static int bd_attach(dev_info_t *, ddi_attach_cmd_t);
264 static int bd_detach(dev_info_t *, ddi_detach_cmd_t);
265
266 static int bd_open(dev_t *, int, int, cred_t *);
267 static int bd_close(dev_t, int, int, cred_t *);
268 static int bd_strategy(struct buf *);
269 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
270 static int bd_dump(dev_t, caddr_t, daddr_t, int);
271 static int bd_read(dev_t, struct uio *, cred_t *);
272 static int bd_write(dev_t, struct uio *, cred_t *);
273 static int bd_aread(dev_t, struct aio_req *, cred_t *);
274 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
275 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
276 caddr_t, int *);
277
278 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
279 void *);
280 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
281 static int bd_xfer_ctor(void *, void *, int);
282 static void bd_xfer_dtor(void *, void *);
283 static void bd_sched(bd_t *, bd_queue_t *);
284 static void bd_submit(bd_t *, bd_xfer_impl_t *);
285 static void bd_runq_exit(bd_xfer_impl_t *, int);
286 static void bd_update_state(bd_t *);
287 static int bd_check_state(bd_t *, enum dkio_state *);
288 static int bd_flush_write_cache(bd_t *, struct dk_callback *);
289 static int bd_check_uio(dev_t, struct uio *);
290 static int bd_free_space(dev_t, bd_t *, dkioc_free_list_t *);
291
292 struct cmlb_tg_ops bd_tg_ops = {
293 TG_DK_OPS_VERSION_1,
294 bd_tg_rdwr,
295 bd_tg_getinfo,
296 };
297
298 static struct cb_ops bd_cb_ops = {
299 bd_open, /* open */
300 bd_close, /* close */
301 bd_strategy, /* strategy */
302 nodev, /* print */
303 bd_dump, /* dump */
304 bd_read, /* read */
305 bd_write, /* write */
306 bd_ioctl, /* ioctl */
307 nodev, /* devmap */
308 nodev, /* mmap */
309 nodev, /* segmap */
310 nochpoll, /* poll */
311 bd_prop_op, /* cb_prop_op */
312 0, /* streamtab */
313 D_64BIT | D_MP, /* Driver comaptibility flag */
314 CB_REV, /* cb_rev */
315 bd_aread, /* async read */
316 bd_awrite /* async write */
317 };
318
319 struct dev_ops bd_dev_ops = {
320 DEVO_REV, /* devo_rev, */
321 0, /* refcnt */
322 bd_getinfo, /* getinfo */
323 nulldev, /* identify */
324 nulldev, /* probe */
325 bd_attach, /* attach */
326 bd_detach, /* detach */
327 nodev, /* reset */
328 &bd_cb_ops, /* driver operations */
329 NULL, /* bus operations */
330 NULL, /* power */
331 ddi_quiesce_not_needed, /* quiesce */
332 };
333
334 static struct modldrv modldrv = {
335 &mod_driverops,
336 "Generic Block Device",
337 &bd_dev_ops,
338 };
339
340 static struct modlinkage modlinkage = {
341 MODREV_1, { &modldrv, NULL }
342 };
343
344 static void *bd_state;
345 static krwlock_t bd_lock;
346 static taskq_t *bd_taskq;
347
348 int
_init(void)349 _init(void)
350 {
351 char taskq_name[TASKQ_NAMELEN];
352 const char *name;
353 int rv;
354
355 rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2);
356 if (rv != DDI_SUCCESS)
357 return (rv);
358
359 name = mod_modname(&modlinkage);
360 (void) snprintf(taskq_name, sizeof (taskq_name), "%s_taskq", name);
361 bd_taskq = taskq_create(taskq_name, 1, minclsyspri, 0, 0, 0);
362 if (bd_taskq == NULL) {
363 cmn_err(CE_WARN, "%s: unable to create %s", name, taskq_name);
364 ddi_soft_state_fini(&bd_state);
365 return (DDI_FAILURE);
366 }
367
368 rw_init(&bd_lock, NULL, RW_DRIVER, NULL);
369
370 rv = mod_install(&modlinkage);
371 if (rv != DDI_SUCCESS) {
372 rw_destroy(&bd_lock);
373 taskq_destroy(bd_taskq);
374 ddi_soft_state_fini(&bd_state);
375 }
376 return (rv);
377 }
378
379 int
_fini(void)380 _fini(void)
381 {
382 int rv;
383
384 rv = mod_remove(&modlinkage);
385 if (rv == DDI_SUCCESS) {
386 rw_destroy(&bd_lock);
387 taskq_destroy(bd_taskq);
388 ddi_soft_state_fini(&bd_state);
389 }
390 return (rv);
391 }
392
393 int
_info(struct modinfo * modinfop)394 _info(struct modinfo *modinfop)
395 {
396 return (mod_info(&modlinkage, modinfop));
397 }
398
399 static int
bd_getinfo(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** resultp)400 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
401 {
402 bd_t *bd;
403 minor_t inst;
404
405 _NOTE(ARGUNUSED(dip));
406
407 inst = BDINST((dev_t)arg);
408
409 switch (cmd) {
410 case DDI_INFO_DEVT2DEVINFO:
411 bd = ddi_get_soft_state(bd_state, inst);
412 if (bd == NULL) {
413 return (DDI_FAILURE);
414 }
415 *resultp = (void *)bd->d_dip;
416 break;
417
418 case DDI_INFO_DEVT2INSTANCE:
419 *resultp = (void *)(intptr_t)inst;
420 break;
421
422 default:
423 return (DDI_FAILURE);
424 }
425 return (DDI_SUCCESS);
426 }
427
428 static void
bd_prop_update_inqstring(dev_info_t * dip,char * name,char * data,size_t len)429 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len)
430 {
431 int ilen;
432 char *data_string;
433
434 ilen = scsi_ascii_inquiry_len(data, len);
435 ASSERT3U(ilen, <=, len);
436 if (ilen <= 0)
437 return;
438 /* ensure null termination */
439 data_string = kmem_zalloc(ilen + 1, KM_SLEEP);
440 bcopy(data, data_string, ilen);
441 (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string);
442 kmem_free(data_string, ilen + 1);
443 }
444
445 static void
bd_create_inquiry_props(dev_info_t * dip,bd_drive_t * drive)446 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive)
447 {
448 if (drive->d_vendor_len > 0)
449 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID,
450 drive->d_vendor, drive->d_vendor_len);
451
452 if (drive->d_product_len > 0)
453 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID,
454 drive->d_product, drive->d_product_len);
455
456 if (drive->d_serial_len > 0)
457 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO,
458 drive->d_serial, drive->d_serial_len);
459
460 if (drive->d_revision_len > 0)
461 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID,
462 drive->d_revision, drive->d_revision_len);
463 }
464
465 static void
bd_create_errstats(bd_t * bd,int inst,bd_drive_t * drive)466 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive)
467 {
468 char ks_module[KSTAT_STRLEN];
469 char ks_name[KSTAT_STRLEN];
470 int ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t);
471
472 if (bd->d_errstats != NULL)
473 return;
474
475 (void) snprintf(ks_module, sizeof (ks_module), "%serr",
476 ddi_driver_name(bd->d_dip));
477 (void) snprintf(ks_name, sizeof (ks_name), "%s%d,err",
478 ddi_driver_name(bd->d_dip), inst);
479
480 bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error",
481 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
482
483 mutex_init(&bd->d_errmutex, NULL, MUTEX_DRIVER, NULL);
484 if (bd->d_errstats == NULL) {
485 /*
486 * Even if we cannot create the kstat, we create a
487 * scratch kstat. The reason for this is to ensure
488 * that we can update the kstat all of the time,
489 * without adding an extra branch instruction.
490 */
491 bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats),
492 KM_SLEEP);
493 } else {
494 bd->d_errstats->ks_lock = &bd->d_errmutex;
495 bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data;
496 }
497
498 kstat_named_init(&bd->d_kerr->bd_softerrs, "Soft Errors",
499 KSTAT_DATA_UINT32);
500 kstat_named_init(&bd->d_kerr->bd_harderrs, "Hard Errors",
501 KSTAT_DATA_UINT32);
502 kstat_named_init(&bd->d_kerr->bd_transerrs, "Transport Errors",
503 KSTAT_DATA_UINT32);
504
505 if (drive->d_model_len > 0) {
506 kstat_named_init(&bd->d_kerr->bd_model, "Model",
507 KSTAT_DATA_STRING);
508 } else {
509 kstat_named_init(&bd->d_kerr->bd_vid, "Vendor",
510 KSTAT_DATA_STRING);
511 kstat_named_init(&bd->d_kerr->bd_pid, "Product",
512 KSTAT_DATA_STRING);
513 }
514
515 kstat_named_init(&bd->d_kerr->bd_revision, "Revision",
516 KSTAT_DATA_STRING);
517 kstat_named_init(&bd->d_kerr->bd_serial, "Serial No",
518 KSTAT_DATA_STRING);
519 kstat_named_init(&bd->d_kerr->bd_capacity, "Size",
520 KSTAT_DATA_ULONGLONG);
521 kstat_named_init(&bd->d_kerr->bd_rq_media_err, "Media Error",
522 KSTAT_DATA_UINT32);
523 kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err, "Device Not Ready",
524 KSTAT_DATA_UINT32);
525 kstat_named_init(&bd->d_kerr->bd_rq_nodev_err, "No Device",
526 KSTAT_DATA_UINT32);
527 kstat_named_init(&bd->d_kerr->bd_rq_recov_err, "Recoverable",
528 KSTAT_DATA_UINT32);
529 kstat_named_init(&bd->d_kerr->bd_rq_illrq_err, "Illegal Request",
530 KSTAT_DATA_UINT32);
531 kstat_named_init(&bd->d_kerr->bd_rq_pfa_err,
532 "Predictive Failure Analysis", KSTAT_DATA_UINT32);
533
534 bd->d_errstats->ks_private = bd;
535
536 kstat_install(bd->d_errstats);
537 bd_init_errstats(bd, drive);
538 }
539
540 static void
bd_destroy_errstats(bd_t * bd)541 bd_destroy_errstats(bd_t *bd)
542 {
543 if (bd->d_errstats != NULL) {
544 bd_fini_errstats(bd);
545 kstat_delete(bd->d_errstats);
546 bd->d_errstats = NULL;
547 } else {
548 kmem_free(bd->d_kerr, sizeof (struct bd_errstats));
549 bd->d_kerr = NULL;
550 mutex_destroy(&bd->d_errmutex);
551 }
552 }
553
554 static void
bd_errstats_setstr(kstat_named_t * k,char * str,size_t len,char * alt)555 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt)
556 {
557 char *tmp;
558 size_t km_len;
559
560 if (KSTAT_NAMED_STR_PTR(k) == NULL) {
561 if (len > 0)
562 km_len = strnlen(str, len);
563 else if (alt != NULL)
564 km_len = strlen(alt);
565 else
566 return;
567
568 tmp = kmem_alloc(km_len + 1, KM_SLEEP);
569 bcopy(len > 0 ? str : alt, tmp, km_len);
570 tmp[km_len] = '\0';
571
572 kstat_named_setstr(k, tmp);
573 }
574 }
575
576 static void
bd_errstats_clrstr(kstat_named_t * k)577 bd_errstats_clrstr(kstat_named_t *k)
578 {
579 if (KSTAT_NAMED_STR_PTR(k) == NULL)
580 return;
581
582 kmem_free(KSTAT_NAMED_STR_PTR(k), KSTAT_NAMED_STR_BUFLEN(k));
583 kstat_named_setstr(k, NULL);
584 }
585
586 static void
bd_init_errstats(bd_t * bd,bd_drive_t * drive)587 bd_init_errstats(bd_t *bd, bd_drive_t *drive)
588 {
589 struct bd_errstats *est = bd->d_kerr;
590
591 mutex_enter(&bd->d_errmutex);
592
593 if (drive->d_model_len > 0 &&
594 KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) {
595 bd_errstats_setstr(&est->bd_model, drive->d_model,
596 drive->d_model_len, NULL);
597 } else {
598 bd_errstats_setstr(&est->bd_vid, drive->d_vendor,
599 drive->d_vendor_len, "Unknown ");
600 bd_errstats_setstr(&est->bd_pid, drive->d_product,
601 drive->d_product_len, "Unknown ");
602 }
603
604 bd_errstats_setstr(&est->bd_revision, drive->d_revision,
605 drive->d_revision_len, "0001");
606 bd_errstats_setstr(&est->bd_serial, drive->d_serial,
607 drive->d_serial_len, "0 ");
608
609 mutex_exit(&bd->d_errmutex);
610 }
611
612 static void
bd_fini_errstats(bd_t * bd)613 bd_fini_errstats(bd_t *bd)
614 {
615 struct bd_errstats *est = bd->d_kerr;
616
617 mutex_enter(&bd->d_errmutex);
618
619 bd_errstats_clrstr(&est->bd_model);
620 bd_errstats_clrstr(&est->bd_vid);
621 bd_errstats_clrstr(&est->bd_pid);
622 bd_errstats_clrstr(&est->bd_revision);
623 bd_errstats_clrstr(&est->bd_serial);
624
625 mutex_exit(&bd->d_errmutex);
626 }
627
628 static void
bd_queues_free(bd_t * bd)629 bd_queues_free(bd_t *bd)
630 {
631 uint32_t i;
632
633 for (i = 0; i < bd->d_qcount; i++) {
634 bd_queue_t *bq = &bd->d_queues[i];
635
636 mutex_destroy(&bq->q_iomutex);
637 list_destroy(&bq->q_waitq);
638 list_destroy(&bq->q_runq);
639 }
640
641 kmem_free(bd->d_queues, sizeof (*bd->d_queues) * bd->d_qcount);
642 }
643
644 static int
bd_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)645 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
646 {
647 int inst;
648 bd_handle_t hdl;
649 bd_t *bd;
650 bd_drive_t drive;
651 uint32_t i;
652 int rv;
653 char name[16];
654 char kcache[32];
655 char *node_type;
656
657 switch (cmd) {
658 case DDI_ATTACH:
659 break;
660 case DDI_RESUME:
661 /* We don't do anything native for suspend/resume */
662 return (DDI_SUCCESS);
663 default:
664 return (DDI_FAILURE);
665 }
666
667 inst = ddi_get_instance(dip);
668 hdl = ddi_get_parent_data(dip);
669
670 (void) snprintf(name, sizeof (name), "%s%d",
671 ddi_driver_name(dip), ddi_get_instance(dip));
672 (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name);
673
674 if (hdl == NULL) {
675 cmn_err(CE_WARN, "%s: missing parent data!", name);
676 return (DDI_FAILURE);
677 }
678
679 if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) {
680 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name);
681 return (DDI_FAILURE);
682 }
683 bd = ddi_get_soft_state(bd_state, inst);
684
685 if (hdl->h_dma) {
686 bd->d_dma = *(hdl->h_dma);
687 bd->d_dma.dma_attr_granular =
688 max(DEV_BSIZE, bd->d_dma.dma_attr_granular);
689 bd->d_use_dma = B_TRUE;
690
691 if (bd->d_maxxfer &&
692 (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) {
693 cmn_err(CE_WARN,
694 "%s: inconsistent maximum transfer size!",
695 name);
696 /* We force it */
697 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
698 } else {
699 bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
700 }
701 } else {
702 bd->d_use_dma = B_FALSE;
703 if (bd->d_maxxfer == 0) {
704 bd->d_maxxfer = 1024 * 1024;
705 }
706 }
707 bd->d_ops = hdl->h_ops;
708 bd->d_private = hdl->h_private;
709 bd->d_blkshift = DEV_BSHIFT; /* 512 bytes, to start */
710
711 if (bd->d_maxxfer % DEV_BSIZE) {
712 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name);
713 bd->d_maxxfer &= ~(DEV_BSIZE - 1);
714 }
715 if (bd->d_maxxfer < DEV_BSIZE) {
716 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name);
717 ddi_soft_state_free(bd_state, inst);
718 return (DDI_FAILURE);
719 }
720
721 bd->d_dip = dip;
722 bd->d_handle = hdl;
723 ddi_set_driver_private(dip, bd);
724
725 mutex_init(&bd->d_ksmutex, NULL, MUTEX_DRIVER, NULL);
726 mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL);
727 mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL);
728 cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL);
729 mutex_init(&bd->d_dle_mutex, NULL, MUTEX_DRIVER, NULL);
730 bd->d_dle_state = 0;
731
732 bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8,
733 bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0);
734
735 bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk",
736 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
737 if (bd->d_ksp != NULL) {
738 bd->d_ksp->ks_lock = &bd->d_ksmutex;
739 kstat_install(bd->d_ksp);
740 bd->d_kiop = bd->d_ksp->ks_data;
741 } else {
742 /*
743 * Even if we cannot create the kstat, we create a
744 * scratch kstat. The reason for this is to ensure
745 * that we can update the kstat all of the time,
746 * without adding an extra branch instruction.
747 */
748 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP);
749 }
750
751 cmlb_alloc_handle(&bd->d_cmlbh);
752
753 bd->d_state = DKIO_NONE;
754
755 bzero(&drive, sizeof (drive));
756 /*
757 * Default to one queue, and no restrictions on free space requests
758 * (if driver provides method) parent driver can override.
759 */
760 drive.d_qcount = 1;
761 drive.d_free_align = 1;
762 bd->d_ops.o_drive_info(bd->d_private, &drive);
763
764 /*
765 * Several checks to make sure o_drive_info() didn't return bad
766 * values:
767 *
768 * There must be at least one queue
769 */
770 if (drive.d_qcount == 0)
771 goto fail_drive_info;
772
773 /* FREE/UNMAP/TRIM alignment needs to be at least 1 block */
774 if (drive.d_free_align == 0)
775 goto fail_drive_info;
776
777 /*
778 * If d_max_free_blks is not unlimited (not 0), then we cannot allow
779 * an unlimited segment size. It is however permissible to not impose
780 * a limit on the total number of blocks freed while limiting the
781 * amount allowed in an individual segment.
782 */
783 if ((drive.d_max_free_blks > 0 && drive.d_max_free_seg_blks == 0))
784 goto fail_drive_info;
785
786 /*
787 * If a limit is set on d_max_free_blks (by the above check, we know
788 * if there's a limit on d_max_free_blks, d_max_free_seg_blks cannot
789 * be unlimited), it cannot be smaller than the limit on an individual
790 * segment.
791 */
792 if ((drive.d_max_free_blks > 0 &&
793 drive.d_max_free_seg_blks > drive.d_max_free_blks)) {
794 goto fail_drive_info;
795 }
796
797 bd->d_qcount = drive.d_qcount;
798 bd->d_removable = drive.d_removable;
799 bd->d_hotpluggable = drive.d_hotpluggable;
800
801 if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer)
802 bd->d_maxxfer = drive.d_maxxfer;
803
804 bd->d_free_align = drive.d_free_align;
805 bd->d_max_free_seg = drive.d_max_free_seg;
806 bd->d_max_free_blks = drive.d_max_free_blks;
807 bd->d_max_free_seg_blks = drive.d_max_free_seg_blks;
808
809 bd_create_inquiry_props(dip, &drive);
810 bd_create_errstats(bd, inst, &drive);
811 bd_update_state(bd);
812
813 bd->d_queues = kmem_alloc(sizeof (*bd->d_queues) * bd->d_qcount,
814 KM_SLEEP);
815 for (i = 0; i < bd->d_qcount; i++) {
816 bd_queue_t *bq = &bd->d_queues[i];
817
818 bq->q_qsize = drive.d_qsize;
819 bq->q_qactive = 0;
820 mutex_init(&bq->q_iomutex, NULL, MUTEX_DRIVER, NULL);
821
822 list_create(&bq->q_waitq, sizeof (bd_xfer_impl_t),
823 offsetof(struct bd_xfer_impl, i_linkage));
824 list_create(&bq->q_runq, sizeof (bd_xfer_impl_t),
825 offsetof(struct bd_xfer_impl, i_linkage));
826 }
827
828 if (*(uint64_t *)drive.d_eui64 != 0 ||
829 *(uint64_t *)drive.d_guid != 0 ||
830 *((uint64_t *)drive.d_guid + 1) != 0)
831 node_type = DDI_NT_BLOCK_BLKDEV;
832 else if (drive.d_lun >= 0)
833 node_type = DDI_NT_BLOCK_CHAN;
834 else
835 node_type = DDI_NT_BLOCK;
836
837 rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT,
838 bd->d_removable, bd->d_hotpluggable, node_type,
839 CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
840 if (rv != 0) {
841 goto fail_cmlb_attach;
842 }
843
844 if (bd->d_ops.o_devid_init != NULL) {
845 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid);
846 if (rv == DDI_SUCCESS) {
847 if (ddi_devid_register(dip, bd->d_devid) !=
848 DDI_SUCCESS) {
849 cmn_err(CE_WARN,
850 "%s: unable to register devid", name);
851 }
852 }
853 }
854
855 /*
856 * Add a zero-length attribute to tell the world we support
857 * kernel ioctls (for layered drivers). Also set up properties
858 * used by HAL to identify removable media.
859 */
860 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
861 DDI_KERNEL_IOCTL, NULL, 0);
862 if (bd->d_removable) {
863 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
864 "removable-media", NULL, 0);
865 }
866 if (bd->d_hotpluggable) {
867 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
868 "hotpluggable", NULL, 0);
869 }
870
871 /*
872 * Before we proceed, we need to ensure that the geometry and labels on
873 * the cmlb disk are reasonable. When cmlb first attaches, it does not
874 * perform label validation and creates minor nodes based on the
875 * assumption of the size. This may not be correct and the rest of the
876 * system assumes that this will have been done before we allow opens
877 * to proceed. Otherwise, on first open, this'll all end up changing
878 * around on users. We do not care if it succeeds or not. It is totally
879 * acceptable for this device to be unlabeled or not to have anything on
880 * it.
881 */
882 (void) cmlb_validate(bd->d_cmlbh, 0, 0);
883
884 hdl->h_bd = bd;
885 ddi_report_dev(dip);
886
887 return (DDI_SUCCESS);
888
889 fail_cmlb_attach:
890 bd_queues_free(bd);
891 bd_destroy_errstats(bd);
892
893 fail_drive_info:
894 cmlb_free_handle(&bd->d_cmlbh);
895
896 if (bd->d_ksp != NULL) {
897 kstat_delete(bd->d_ksp);
898 bd->d_ksp = NULL;
899 } else {
900 kmem_free(bd->d_kiop, sizeof (kstat_io_t));
901 }
902
903 kmem_cache_destroy(bd->d_cache);
904 cv_destroy(&bd->d_statecv);
905 mutex_destroy(&bd->d_statemutex);
906 mutex_destroy(&bd->d_ocmutex);
907 mutex_destroy(&bd->d_ksmutex);
908 mutex_destroy(&bd->d_dle_mutex);
909 ddi_soft_state_free(bd_state, inst);
910 return (DDI_FAILURE);
911 }
912
913 static int
bd_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)914 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
915 {
916 bd_handle_t hdl;
917 bd_t *bd;
918
919 bd = ddi_get_driver_private(dip);
920 hdl = ddi_get_parent_data(dip);
921
922 switch (cmd) {
923 case DDI_DETACH:
924 break;
925 case DDI_SUSPEND:
926 /* We don't suspend, but our parent does */
927 return (DDI_SUCCESS);
928 default:
929 return (DDI_FAILURE);
930 }
931
932 hdl->h_bd = NULL;
933
934 if (bd->d_ksp != NULL) {
935 kstat_delete(bd->d_ksp);
936 bd->d_ksp = NULL;
937 } else {
938 kmem_free(bd->d_kiop, sizeof (kstat_io_t));
939 }
940
941 bd_destroy_errstats(bd);
942 cmlb_detach(bd->d_cmlbh, 0);
943 cmlb_free_handle(&bd->d_cmlbh);
944 if (bd->d_devid)
945 ddi_devid_free(bd->d_devid);
946 kmem_cache_destroy(bd->d_cache);
947 mutex_destroy(&bd->d_ksmutex);
948 mutex_destroy(&bd->d_ocmutex);
949 mutex_destroy(&bd->d_statemutex);
950 cv_destroy(&bd->d_statecv);
951 mutex_destroy(&bd->d_dle_mutex);
952 bd_queues_free(bd);
953 ddi_soft_state_free(bd_state, ddi_get_instance(dip));
954 return (DDI_SUCCESS);
955 }
956
957 static int
bd_xfer_ctor(void * buf,void * arg,int kmflag)958 bd_xfer_ctor(void *buf, void *arg, int kmflag)
959 {
960 bd_xfer_impl_t *xi;
961 bd_t *bd = arg;
962 int (*dcb)(caddr_t);
963
964 if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) {
965 dcb = DDI_DMA_SLEEP;
966 } else {
967 dcb = DDI_DMA_DONTWAIT;
968 }
969
970 xi = buf;
971 bzero(xi, sizeof (*xi));
972 xi->i_bd = bd;
973
974 if (bd->d_use_dma) {
975 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL,
976 &xi->i_dmah) != DDI_SUCCESS) {
977 return (-1);
978 }
979 }
980
981 return (0);
982 }
983
984 static void
bd_xfer_dtor(void * buf,void * arg)985 bd_xfer_dtor(void *buf, void *arg)
986 {
987 bd_xfer_impl_t *xi = buf;
988
989 _NOTE(ARGUNUSED(arg));
990
991 if (xi->i_dmah)
992 ddi_dma_free_handle(&xi->i_dmah);
993 xi->i_dmah = NULL;
994 }
995
996 static bd_xfer_impl_t *
bd_xfer_alloc(bd_t * bd,struct buf * bp,int (* func)(void *,bd_xfer_t *),int kmflag)997 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *),
998 int kmflag)
999 {
1000 bd_xfer_impl_t *xi;
1001 int rv = 0;
1002 int status;
1003 unsigned dir;
1004 int (*cb)(caddr_t);
1005 size_t len;
1006 uint32_t shift;
1007
1008 if (kmflag == KM_SLEEP) {
1009 cb = DDI_DMA_SLEEP;
1010 } else {
1011 cb = DDI_DMA_DONTWAIT;
1012 }
1013
1014 xi = kmem_cache_alloc(bd->d_cache, kmflag);
1015 if (xi == NULL) {
1016 bioerror(bp, ENOMEM);
1017 return (NULL);
1018 }
1019
1020 ASSERT(bp);
1021
1022 xi->i_bp = bp;
1023 xi->i_func = func;
1024 xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT);
1025
1026 if (bp->b_bcount == 0) {
1027 xi->i_len = 0;
1028 xi->i_nblks = 0;
1029 xi->i_kaddr = NULL;
1030 xi->i_resid = 0;
1031 xi->i_num_win = 0;
1032 goto done;
1033 }
1034
1035 if (bp->b_flags & B_READ) {
1036 dir = DDI_DMA_READ;
1037 xi->i_func = bd->d_ops.o_read;
1038 } else {
1039 dir = DDI_DMA_WRITE;
1040 xi->i_func = bd->d_ops.o_write;
1041 }
1042
1043 shift = bd->d_blkshift;
1044 xi->i_blkshift = shift;
1045
1046 if (!bd->d_use_dma) {
1047 bp_mapin(bp);
1048 rv = 0;
1049 xi->i_offset = 0;
1050 xi->i_num_win =
1051 (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
1052 xi->i_cur_win = 0;
1053 xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
1054 xi->i_nblks = xi->i_len >> shift;
1055 xi->i_kaddr = bp->b_un.b_addr;
1056 xi->i_resid = bp->b_bcount;
1057 } else {
1058
1059 /*
1060 * We have to use consistent DMA if the address is misaligned.
1061 */
1062 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
1063 ((uintptr_t)bp->b_un.b_addr & 0x7)) {
1064 dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
1065 } else {
1066 dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
1067 }
1068
1069 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
1070 NULL, &xi->i_dmac, &xi->i_ndmac);
1071 switch (status) {
1072 case DDI_DMA_MAPPED:
1073 xi->i_num_win = 1;
1074 xi->i_cur_win = 0;
1075 xi->i_offset = 0;
1076 xi->i_len = bp->b_bcount;
1077 xi->i_nblks = xi->i_len >> shift;
1078 xi->i_resid = bp->b_bcount;
1079 rv = 0;
1080 break;
1081 case DDI_DMA_PARTIAL_MAP:
1082 xi->i_cur_win = 0;
1083
1084 if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
1085 DDI_SUCCESS) ||
1086 (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
1087 &len, &xi->i_dmac, &xi->i_ndmac) !=
1088 DDI_SUCCESS) ||
1089 (P2PHASE(len, (1U << shift)) != 0)) {
1090 (void) ddi_dma_unbind_handle(xi->i_dmah);
1091 rv = EFAULT;
1092 goto done;
1093 }
1094 xi->i_len = len;
1095 xi->i_nblks = xi->i_len >> shift;
1096 xi->i_resid = bp->b_bcount;
1097 rv = 0;
1098 break;
1099 case DDI_DMA_NORESOURCES:
1100 rv = EAGAIN;
1101 goto done;
1102 case DDI_DMA_TOOBIG:
1103 rv = EINVAL;
1104 goto done;
1105 case DDI_DMA_NOMAPPING:
1106 case DDI_DMA_INUSE:
1107 default:
1108 rv = EFAULT;
1109 goto done;
1110 }
1111 }
1112
1113 done:
1114 if (rv != 0) {
1115 kmem_cache_free(bd->d_cache, xi);
1116 bioerror(bp, rv);
1117 return (NULL);
1118 }
1119
1120 return (xi);
1121 }
1122
1123 static void
bd_xfer_free(bd_xfer_impl_t * xi)1124 bd_xfer_free(bd_xfer_impl_t *xi)
1125 {
1126 if (xi->i_dmah) {
1127 (void) ddi_dma_unbind_handle(xi->i_dmah);
1128 }
1129 if (xi->i_dfl != NULL) {
1130 dfl_free((dkioc_free_list_t *)xi->i_dfl);
1131 xi->i_dfl = NULL;
1132 }
1133 kmem_cache_free(xi->i_bd->d_cache, xi);
1134 }
1135
1136 static int
bd_open(dev_t * devp,int flag,int otyp,cred_t * credp)1137 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
1138 {
1139 dev_t dev = *devp;
1140 bd_t *bd;
1141 minor_t part;
1142 minor_t inst;
1143 uint64_t mask;
1144 boolean_t ndelay;
1145 int rv;
1146 diskaddr_t nblks;
1147 diskaddr_t lba;
1148
1149 _NOTE(ARGUNUSED(credp));
1150
1151 part = BDPART(dev);
1152 inst = BDINST(dev);
1153
1154 if (otyp >= OTYPCNT)
1155 return (EINVAL);
1156
1157 ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE;
1158
1159 /*
1160 * Block any DR events from changing the set of registered
1161 * devices while we function.
1162 */
1163 rw_enter(&bd_lock, RW_READER);
1164 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1165 rw_exit(&bd_lock);
1166 return (ENXIO);
1167 }
1168
1169 mutex_enter(&bd->d_ocmutex);
1170
1171 ASSERT(part < 64);
1172 mask = (1U << part);
1173
1174 bd_update_state(bd);
1175
1176 if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) {
1177
1178 /* non-blocking opens are allowed to succeed */
1179 if (!ndelay) {
1180 rv = ENXIO;
1181 goto done;
1182 }
1183 } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba,
1184 NULL, NULL, 0) == 0) {
1185
1186 /*
1187 * We read the partinfo, verify valid ranges. If the
1188 * partition is invalid, and we aren't blocking or
1189 * doing a raw access, then fail. (Non-blocking and
1190 * raw accesses can still succeed to allow a disk with
1191 * bad partition data to opened by format and fdisk.)
1192 */
1193 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) {
1194 rv = ENXIO;
1195 goto done;
1196 }
1197 } else if (!ndelay) {
1198 /*
1199 * cmlb_partinfo failed -- invalid partition or no
1200 * disk label.
1201 */
1202 rv = ENXIO;
1203 goto done;
1204 }
1205
1206 if ((flag & FWRITE) && bd->d_rdonly) {
1207 rv = EROFS;
1208 goto done;
1209 }
1210
1211 if ((bd->d_open_excl) & (mask)) {
1212 rv = EBUSY;
1213 goto done;
1214 }
1215 if (flag & FEXCL) {
1216 if (bd->d_open_lyr[part]) {
1217 rv = EBUSY;
1218 goto done;
1219 }
1220 for (int i = 0; i < OTYP_LYR; i++) {
1221 if (bd->d_open_reg[i] & mask) {
1222 rv = EBUSY;
1223 goto done;
1224 }
1225 }
1226 }
1227
1228 if (otyp == OTYP_LYR) {
1229 bd->d_open_lyr[part]++;
1230 } else {
1231 bd->d_open_reg[otyp] |= mask;
1232 }
1233 if (flag & FEXCL) {
1234 bd->d_open_excl |= mask;
1235 }
1236
1237 rv = 0;
1238 done:
1239 mutex_exit(&bd->d_ocmutex);
1240 rw_exit(&bd_lock);
1241
1242 return (rv);
1243 }
1244
1245 static int
bd_close(dev_t dev,int flag,int otyp,cred_t * credp)1246 bd_close(dev_t dev, int flag, int otyp, cred_t *credp)
1247 {
1248 bd_t *bd;
1249 minor_t inst;
1250 minor_t part;
1251 uint64_t mask;
1252 boolean_t last = B_TRUE;
1253
1254 _NOTE(ARGUNUSED(flag));
1255 _NOTE(ARGUNUSED(credp));
1256
1257 part = BDPART(dev);
1258 inst = BDINST(dev);
1259
1260 ASSERT(part < 64);
1261 mask = (1U << part);
1262
1263 rw_enter(&bd_lock, RW_READER);
1264
1265 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1266 rw_exit(&bd_lock);
1267 return (ENXIO);
1268 }
1269
1270 mutex_enter(&bd->d_ocmutex);
1271 if (bd->d_open_excl & mask) {
1272 bd->d_open_excl &= ~mask;
1273 }
1274 if (otyp == OTYP_LYR) {
1275 bd->d_open_lyr[part]--;
1276 } else {
1277 bd->d_open_reg[otyp] &= ~mask;
1278 }
1279 for (int i = 0; i < 64; i++) {
1280 if (bd->d_open_lyr[part]) {
1281 last = B_FALSE;
1282 }
1283 }
1284 for (int i = 0; last && (i < OTYP_LYR); i++) {
1285 if (bd->d_open_reg[i]) {
1286 last = B_FALSE;
1287 }
1288 }
1289 mutex_exit(&bd->d_ocmutex);
1290
1291 if (last) {
1292 cmlb_invalidate(bd->d_cmlbh, 0);
1293 }
1294 rw_exit(&bd_lock);
1295
1296 return (0);
1297 }
1298
1299 static int
bd_dump(dev_t dev,caddr_t caddr,daddr_t blkno,int nblk)1300 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1301 {
1302 minor_t inst;
1303 minor_t part;
1304 diskaddr_t pstart;
1305 diskaddr_t psize;
1306 bd_t *bd;
1307 bd_xfer_impl_t *xi;
1308 buf_t *bp;
1309 int rv;
1310 uint32_t shift;
1311 daddr_t d_blkno;
1312 int d_nblk;
1313
1314 rw_enter(&bd_lock, RW_READER);
1315
1316 part = BDPART(dev);
1317 inst = BDINST(dev);
1318
1319 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1320 rw_exit(&bd_lock);
1321 return (ENXIO);
1322 }
1323 shift = bd->d_blkshift;
1324 d_blkno = blkno >> (shift - DEV_BSHIFT);
1325 d_nblk = nblk >> (shift - DEV_BSHIFT);
1326 /*
1327 * do cmlb, but do it synchronously unless we already have the
1328 * partition (which we probably should.)
1329 */
1330 if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1331 (void *)1)) {
1332 rw_exit(&bd_lock);
1333 return (ENXIO);
1334 }
1335
1336 if ((d_blkno + d_nblk) > psize) {
1337 rw_exit(&bd_lock);
1338 return (EINVAL);
1339 }
1340 bp = getrbuf(KM_NOSLEEP);
1341 if (bp == NULL) {
1342 rw_exit(&bd_lock);
1343 return (ENOMEM);
1344 }
1345
1346 bp->b_bcount = nblk << DEV_BSHIFT;
1347 bp->b_resid = bp->b_bcount;
1348 bp->b_lblkno = blkno;
1349 bp->b_un.b_addr = caddr;
1350
1351 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_write, KM_NOSLEEP);
1352 if (xi == NULL) {
1353 rw_exit(&bd_lock);
1354 freerbuf(bp);
1355 return (ENOMEM);
1356 }
1357 xi->i_blkno = d_blkno + pstart;
1358 xi->i_flags = BD_XFER_POLL;
1359 bd_submit(bd, xi);
1360 rw_exit(&bd_lock);
1361
1362 /*
1363 * Generally, we should have run this entirely synchronously
1364 * at this point and the biowait call should be a no-op. If
1365 * it didn't happen this way, it's a bug in the underlying
1366 * driver not honoring BD_XFER_POLL.
1367 */
1368 (void) biowait(bp);
1369 rv = geterror(bp);
1370 freerbuf(bp);
1371 return (rv);
1372 }
1373
1374 void
bd_minphys(struct buf * bp)1375 bd_minphys(struct buf *bp)
1376 {
1377 minor_t inst;
1378 bd_t *bd;
1379 inst = BDINST(bp->b_edev);
1380
1381 bd = ddi_get_soft_state(bd_state, inst);
1382
1383 /*
1384 * In a non-debug kernel, bd_strategy will catch !bd as
1385 * well, and will fail nicely.
1386 */
1387 ASSERT(bd);
1388
1389 if (bp->b_bcount > bd->d_maxxfer)
1390 bp->b_bcount = bd->d_maxxfer;
1391 }
1392
1393 static int
bd_check_uio(dev_t dev,struct uio * uio)1394 bd_check_uio(dev_t dev, struct uio *uio)
1395 {
1396 bd_t *bd;
1397 uint32_t shift;
1398
1399 if ((bd = ddi_get_soft_state(bd_state, BDINST(dev))) == NULL) {
1400 return (ENXIO);
1401 }
1402
1403 shift = bd->d_blkshift;
1404 if ((P2PHASE(uio->uio_loffset, (1U << shift)) != 0) ||
1405 (P2PHASE(uio->uio_iov->iov_len, (1U << shift)) != 0)) {
1406 return (EINVAL);
1407 }
1408
1409 return (0);
1410 }
1411
1412 static int
bd_read(dev_t dev,struct uio * uio,cred_t * credp)1413 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1414 {
1415 _NOTE(ARGUNUSED(credp));
1416 int ret = bd_check_uio(dev, uio);
1417 if (ret != 0) {
1418 return (ret);
1419 }
1420 return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1421 }
1422
1423 static int
bd_write(dev_t dev,struct uio * uio,cred_t * credp)1424 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1425 {
1426 _NOTE(ARGUNUSED(credp));
1427 int ret = bd_check_uio(dev, uio);
1428 if (ret != 0) {
1429 return (ret);
1430 }
1431 return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1432 }
1433
1434 static int
bd_aread(dev_t dev,struct aio_req * aio,cred_t * credp)1435 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1436 {
1437 _NOTE(ARGUNUSED(credp));
1438 int ret = bd_check_uio(dev, aio->aio_uio);
1439 if (ret != 0) {
1440 return (ret);
1441 }
1442 return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1443 }
1444
1445 static int
bd_awrite(dev_t dev,struct aio_req * aio,cred_t * credp)1446 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1447 {
1448 _NOTE(ARGUNUSED(credp));
1449 int ret = bd_check_uio(dev, aio->aio_uio);
1450 if (ret != 0) {
1451 return (ret);
1452 }
1453 return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1454 }
1455
1456 static int
bd_strategy(struct buf * bp)1457 bd_strategy(struct buf *bp)
1458 {
1459 minor_t inst;
1460 minor_t part;
1461 bd_t *bd;
1462 diskaddr_t p_lba;
1463 diskaddr_t p_nblks;
1464 diskaddr_t b_nblks;
1465 bd_xfer_impl_t *xi;
1466 uint32_t shift;
1467 int (*func)(void *, bd_xfer_t *);
1468 diskaddr_t lblkno;
1469
1470 part = BDPART(bp->b_edev);
1471 inst = BDINST(bp->b_edev);
1472
1473 ASSERT(bp);
1474
1475 bp->b_resid = bp->b_bcount;
1476
1477 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1478 bioerror(bp, ENXIO);
1479 biodone(bp);
1480 return (0);
1481 }
1482
1483 if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1484 NULL, NULL, 0)) {
1485 bioerror(bp, ENXIO);
1486 biodone(bp);
1487 return (0);
1488 }
1489
1490 shift = bd->d_blkshift;
1491 lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT);
1492 if ((P2PHASE(bp->b_lblkno, (1U << (shift - DEV_BSHIFT))) != 0) ||
1493 (P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1494 (lblkno > p_nblks)) {
1495 bioerror(bp, EINVAL);
1496 biodone(bp);
1497 return (0);
1498 }
1499 b_nblks = bp->b_bcount >> shift;
1500 if ((lblkno == p_nblks) || (bp->b_bcount == 0)) {
1501 biodone(bp);
1502 return (0);
1503 }
1504
1505 if ((b_nblks + lblkno) > p_nblks) {
1506 bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift);
1507 bp->b_bcount -= bp->b_resid;
1508 } else {
1509 bp->b_resid = 0;
1510 }
1511 func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1512
1513 xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1514 if (xi == NULL) {
1515 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1516 }
1517 if (xi == NULL) {
1518 /* bd_request_alloc will have done bioerror */
1519 biodone(bp);
1520 return (0);
1521 }
1522 xi->i_blkno = lblkno + p_lba;
1523
1524 bd_submit(bd, xi);
1525
1526 return (0);
1527 }
1528
1529 static int
bd_ioctl(dev_t dev,int cmd,intptr_t arg,int flag,cred_t * credp,int * rvalp)1530 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1531 {
1532 minor_t inst;
1533 uint16_t part;
1534 bd_t *bd;
1535 void *ptr = (void *)arg;
1536 int rv;
1537
1538 part = BDPART(dev);
1539 inst = BDINST(dev);
1540
1541 if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1542 return (ENXIO);
1543 }
1544
1545 rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0);
1546 if (rv != ENOTTY)
1547 return (rv);
1548
1549 if (rvalp != NULL) {
1550 /* the return value of the ioctl is 0 by default */
1551 *rvalp = 0;
1552 }
1553
1554 switch (cmd) {
1555 case DKIOCGMEDIAINFO: {
1556 struct dk_minfo minfo;
1557
1558 /* make sure our state information is current */
1559 bd_update_state(bd);
1560 bzero(&minfo, sizeof (minfo));
1561 minfo.dki_media_type = DK_FIXED_DISK;
1562 minfo.dki_lbsize = (1U << bd->d_blkshift);
1563 minfo.dki_capacity = bd->d_numblks;
1564 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) {
1565 return (EFAULT);
1566 }
1567 return (0);
1568 }
1569 case DKIOCGMEDIAINFOEXT: {
1570 struct dk_minfo_ext miext;
1571 size_t len;
1572
1573 /* make sure our state information is current */
1574 bd_update_state(bd);
1575 bzero(&miext, sizeof (miext));
1576 miext.dki_media_type = DK_FIXED_DISK;
1577 miext.dki_lbsize = (1U << bd->d_blkshift);
1578 miext.dki_pbsize = (1U << bd->d_pblkshift);
1579 miext.dki_capacity = bd->d_numblks;
1580
1581 switch (ddi_model_convert_from(flag & FMODELS)) {
1582 case DDI_MODEL_ILP32:
1583 len = sizeof (struct dk_minfo_ext32);
1584 break;
1585 default:
1586 len = sizeof (struct dk_minfo_ext);
1587 break;
1588 }
1589
1590 if (ddi_copyout(&miext, ptr, len, flag)) {
1591 return (EFAULT);
1592 }
1593 return (0);
1594 }
1595 case DKIOCINFO: {
1596 struct dk_cinfo cinfo;
1597 bzero(&cinfo, sizeof (cinfo));
1598 cinfo.dki_ctype = DKC_BLKDEV;
1599 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip));
1600 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname),
1601 "%s", ddi_driver_name(ddi_get_parent(bd->d_dip)));
1602 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname),
1603 "%s", ddi_driver_name(bd->d_dip));
1604 cinfo.dki_unit = inst;
1605 cinfo.dki_flags = DKI_FMTVOL;
1606 cinfo.dki_partition = part;
1607 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE;
1608 cinfo.dki_addr = 0;
1609 cinfo.dki_slave = 0;
1610 cinfo.dki_space = 0;
1611 cinfo.dki_prio = 0;
1612 cinfo.dki_vec = 0;
1613 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) {
1614 return (EFAULT);
1615 }
1616 return (0);
1617 }
1618 case DKIOCREMOVABLE: {
1619 int i;
1620 i = bd->d_removable ? 1 : 0;
1621 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1622 return (EFAULT);
1623 }
1624 return (0);
1625 }
1626 case DKIOCHOTPLUGGABLE: {
1627 int i;
1628 i = bd->d_hotpluggable ? 1 : 0;
1629 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1630 return (EFAULT);
1631 }
1632 return (0);
1633 }
1634 case DKIOCREADONLY: {
1635 int i;
1636 i = bd->d_rdonly ? 1 : 0;
1637 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1638 return (EFAULT);
1639 }
1640 return (0);
1641 }
1642 case DKIOCSOLIDSTATE: {
1643 int i;
1644 i = bd->d_ssd ? 1 : 0;
1645 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1646 return (EFAULT);
1647 }
1648 return (0);
1649 }
1650 case DKIOCSTATE: {
1651 enum dkio_state state;
1652 if (ddi_copyin(ptr, &state, sizeof (state), flag)) {
1653 return (EFAULT);
1654 }
1655 if ((rv = bd_check_state(bd, &state)) != 0) {
1656 return (rv);
1657 }
1658 if (ddi_copyout(&state, ptr, sizeof (state), flag)) {
1659 return (EFAULT);
1660 }
1661 return (0);
1662 }
1663 case DKIOCFLUSHWRITECACHE: {
1664 struct dk_callback *dkc = NULL;
1665
1666 if (flag & FKIOCTL)
1667 dkc = (void *)arg;
1668
1669 rv = bd_flush_write_cache(bd, dkc);
1670 return (rv);
1671 }
1672 case DKIOCFREE: {
1673 dkioc_free_list_t *dfl = NULL;
1674
1675 /*
1676 * Check free space support early to avoid copyin/allocation
1677 * when unnecessary.
1678 */
1679 if (!CAN_FREESPACE(bd))
1680 return (ENOTSUP);
1681
1682 rv = dfl_copyin(ptr, &dfl, flag, KM_SLEEP);
1683 if (rv != 0)
1684 return (rv);
1685
1686 /*
1687 * bd_free_space() consumes 'dfl'. bd_free_space() will
1688 * call dfl_iter() which will normally try to pass dfl through
1689 * to bd_free_space_cb() which attaches dfl to the bd_xfer_t
1690 * that is then queued for the underlying driver. Once the
1691 * driver processes the request, the bd_xfer_t instance is
1692 * disposed of, including any attached dkioc_free_list_t.
1693 *
1694 * If dfl cannot be processed by the underlying driver due to
1695 * size or alignment requirements of the driver, dfl_iter()
1696 * will replace dfl with one or more new dkioc_free_list_t
1697 * instances with the correct alignment and sizes for the driver
1698 * (and free the original dkioc_free_list_t).
1699 */
1700 rv = bd_free_space(dev, bd, dfl);
1701 return (rv);
1702 }
1703
1704 case DKIOC_CANFREE: {
1705 boolean_t supported = CAN_FREESPACE(bd);
1706
1707 if (ddi_copyout(&supported, (void *)arg, sizeof (supported),
1708 flag) != 0) {
1709 return (EFAULT);
1710 }
1711
1712 return (0);
1713 }
1714
1715 default:
1716 break;
1717
1718 }
1719 return (ENOTTY);
1720 }
1721
1722 static int
bd_prop_op(dev_t dev,dev_info_t * dip,ddi_prop_op_t prop_op,int mod_flags,char * name,caddr_t valuep,int * lengthp)1723 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
1724 char *name, caddr_t valuep, int *lengthp)
1725 {
1726 bd_t *bd;
1727
1728 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1729 if (bd == NULL)
1730 return (ddi_prop_op(dev, dip, prop_op, mod_flags,
1731 name, valuep, lengthp));
1732
1733 return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name,
1734 valuep, lengthp, BDPART(dev), 0));
1735 }
1736
1737
1738 static int
bd_tg_rdwr(dev_info_t * dip,uchar_t cmd,void * bufaddr,diskaddr_t start,size_t length,void * tg_cookie)1739 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1740 size_t length, void *tg_cookie)
1741 {
1742 bd_t *bd;
1743 buf_t *bp;
1744 bd_xfer_impl_t *xi;
1745 int rv;
1746 int (*func)(void *, bd_xfer_t *);
1747 int kmflag;
1748
1749 /*
1750 * If we are running in polled mode (such as during dump(9e)
1751 * execution), then we cannot sleep for kernel allocations.
1752 */
1753 kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1754
1755 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1756
1757 if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) {
1758 /* We can only transfer whole blocks at a time! */
1759 return (EINVAL);
1760 }
1761
1762 if ((bp = getrbuf(kmflag)) == NULL) {
1763 return (ENOMEM);
1764 }
1765
1766 switch (cmd) {
1767 case TG_READ:
1768 bp->b_flags = B_READ;
1769 func = bd->d_ops.o_read;
1770 break;
1771 case TG_WRITE:
1772 bp->b_flags = B_WRITE;
1773 func = bd->d_ops.o_write;
1774 break;
1775 default:
1776 freerbuf(bp);
1777 return (EINVAL);
1778 }
1779
1780 bp->b_un.b_addr = bufaddr;
1781 bp->b_bcount = length;
1782 xi = bd_xfer_alloc(bd, bp, func, kmflag);
1783 if (xi == NULL) {
1784 rv = geterror(bp);
1785 freerbuf(bp);
1786 return (rv);
1787 }
1788 xi->i_flags = tg_cookie ? BD_XFER_POLL : 0;
1789 xi->i_blkno = start;
1790 bd_submit(bd, xi);
1791 (void) biowait(bp);
1792 rv = geterror(bp);
1793 freerbuf(bp);
1794
1795 return (rv);
1796 }
1797
1798 static int
bd_tg_getinfo(dev_info_t * dip,int cmd,void * arg,void * tg_cookie)1799 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
1800 {
1801 bd_t *bd;
1802
1803 _NOTE(ARGUNUSED(tg_cookie));
1804 bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1805
1806 switch (cmd) {
1807 case TG_GETPHYGEOM:
1808 case TG_GETVIRTGEOM:
1809 /*
1810 * We don't have any "geometry" as such, let cmlb
1811 * fabricate something.
1812 */
1813 return (ENOTTY);
1814
1815 case TG_GETCAPACITY:
1816 bd_update_state(bd);
1817 *(diskaddr_t *)arg = bd->d_numblks;
1818 return (0);
1819
1820 case TG_GETBLOCKSIZE:
1821 *(uint32_t *)arg = (1U << bd->d_blkshift);
1822 return (0);
1823
1824 case TG_GETATTR:
1825 /*
1826 * It turns out that cmlb really doesn't do much for
1827 * non-writable media, but lets make the information
1828 * available for it in case it does more in the
1829 * future. (The value is currently used for
1830 * triggering special behavior for CD-ROMs.)
1831 */
1832 bd_update_state(bd);
1833 ((tg_attribute_t *)arg)->media_is_writable =
1834 bd->d_rdonly ? B_FALSE : B_TRUE;
1835 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd;
1836 ((tg_attribute_t *)arg)->media_is_rotational = B_FALSE;
1837 return (0);
1838
1839 default:
1840 return (EINVAL);
1841 }
1842 }
1843
1844
1845 static void
bd_sched(bd_t * bd,bd_queue_t * bq)1846 bd_sched(bd_t *bd, bd_queue_t *bq)
1847 {
1848 bd_xfer_impl_t *xi;
1849 struct buf *bp;
1850 int rv;
1851
1852 mutex_enter(&bq->q_iomutex);
1853
1854 while ((bq->q_qactive < bq->q_qsize) &&
1855 ((xi = list_remove_head(&bq->q_waitq)) != NULL)) {
1856 mutex_enter(&bd->d_ksmutex);
1857 kstat_waitq_to_runq(bd->d_kiop);
1858 mutex_exit(&bd->d_ksmutex);
1859
1860 bq->q_qactive++;
1861 list_insert_tail(&bq->q_runq, xi);
1862
1863 /*
1864 * Submit the job to the driver. We drop the I/O mutex
1865 * so that we can deal with the case where the driver
1866 * completion routine calls back into us synchronously.
1867 */
1868
1869 mutex_exit(&bq->q_iomutex);
1870
1871 rv = xi->i_func(bd->d_private, &xi->i_public);
1872 if (rv != 0) {
1873 bp = xi->i_bp;
1874 bioerror(bp, rv);
1875 biodone(bp);
1876
1877 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1878
1879 mutex_enter(&bq->q_iomutex);
1880
1881 mutex_enter(&bd->d_ksmutex);
1882 kstat_runq_exit(bd->d_kiop);
1883 mutex_exit(&bd->d_ksmutex);
1884
1885 bq->q_qactive--;
1886 list_remove(&bq->q_runq, xi);
1887 bd_xfer_free(xi);
1888 } else {
1889 mutex_enter(&bq->q_iomutex);
1890 }
1891 }
1892
1893 mutex_exit(&bq->q_iomutex);
1894 }
1895
1896 static void
bd_submit(bd_t * bd,bd_xfer_impl_t * xi)1897 bd_submit(bd_t *bd, bd_xfer_impl_t *xi)
1898 {
1899 uint64_t nv = atomic_inc_64_nv(&bd->d_io_counter);
1900 unsigned q = nv % bd->d_qcount;
1901 bd_queue_t *bq = &bd->d_queues[q];
1902
1903 xi->i_bq = bq;
1904 xi->i_qnum = q;
1905
1906 mutex_enter(&bq->q_iomutex);
1907
1908 list_insert_tail(&bq->q_waitq, xi);
1909
1910 mutex_enter(&bd->d_ksmutex);
1911 kstat_waitq_enter(bd->d_kiop);
1912 mutex_exit(&bd->d_ksmutex);
1913
1914 mutex_exit(&bq->q_iomutex);
1915
1916 bd_sched(bd, bq);
1917 }
1918
1919 static void
bd_runq_exit(bd_xfer_impl_t * xi,int err)1920 bd_runq_exit(bd_xfer_impl_t *xi, int err)
1921 {
1922 bd_t *bd = xi->i_bd;
1923 buf_t *bp = xi->i_bp;
1924 bd_queue_t *bq = xi->i_bq;
1925
1926 mutex_enter(&bq->q_iomutex);
1927 bq->q_qactive--;
1928
1929 mutex_enter(&bd->d_ksmutex);
1930 kstat_runq_exit(bd->d_kiop);
1931 mutex_exit(&bd->d_ksmutex);
1932
1933 list_remove(&bq->q_runq, xi);
1934 mutex_exit(&bq->q_iomutex);
1935
1936 if (err == 0) {
1937 if (bp->b_flags & B_READ) {
1938 atomic_inc_uint(&bd->d_kiop->reads);
1939 atomic_add_64((uint64_t *)&bd->d_kiop->nread,
1940 bp->b_bcount - xi->i_resid);
1941 } else {
1942 atomic_inc_uint(&bd->d_kiop->writes);
1943 atomic_add_64((uint64_t *)&bd->d_kiop->nwritten,
1944 bp->b_bcount - xi->i_resid);
1945 }
1946 }
1947 bd_sched(bd, bq);
1948 }
1949
1950 static void
bd_dle_sysevent_task(void * arg)1951 bd_dle_sysevent_task(void *arg)
1952 {
1953 nvlist_t *attr = NULL;
1954 char *path = NULL;
1955 bd_t *bd = arg;
1956 dev_info_t *dip = bd->d_dip;
1957 size_t n;
1958
1959 mutex_enter(&bd->d_dle_mutex);
1960 bd->d_dle_state &= ~BD_DLE_PENDING;
1961 bd->d_dle_state |= BD_DLE_RUNNING;
1962 mutex_exit(&bd->d_dle_mutex);
1963
1964 dev_err(dip, CE_NOTE, "!dynamic LUN expansion");
1965
1966 if (nvlist_alloc(&attr, NV_UNIQUE_NAME_TYPE, KM_SLEEP) != 0) {
1967 mutex_enter(&bd->d_dle_mutex);
1968 bd->d_dle_state &= ~(BD_DLE_RUNNING|BD_DLE_PENDING);
1969 mutex_exit(&bd->d_dle_mutex);
1970 return;
1971 }
1972
1973 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1974
1975 n = snprintf(path, MAXPATHLEN, "/devices");
1976 (void) ddi_pathname(dip, path + n);
1977 n = strlen(path);
1978 n += snprintf(path + n, MAXPATHLEN - n, ":x");
1979
1980 for (;;) {
1981 /*
1982 * On receipt of this event, the ZFS sysevent module will scan
1983 * active zpools for child vdevs matching this physical path.
1984 * In order to catch both whole disk pools and those with an
1985 * EFI boot partition, generate separate sysevents for minor
1986 * node 'a' and 'b'.
1987 */
1988 for (char c = 'a'; c < 'c'; c++) {
1989 path[n - 1] = c;
1990
1991 if (nvlist_add_string(attr, DEV_PHYS_PATH, path) != 0)
1992 break;
1993
1994 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW,
1995 EC_DEV_STATUS, ESC_DEV_DLE, attr, NULL, DDI_SLEEP);
1996 }
1997
1998 mutex_enter(&bd->d_dle_mutex);
1999 if ((bd->d_dle_state & BD_DLE_PENDING) == 0) {
2000 bd->d_dle_state &= ~BD_DLE_RUNNING;
2001 mutex_exit(&bd->d_dle_mutex);
2002 break;
2003 }
2004 bd->d_dle_state &= ~BD_DLE_PENDING;
2005 mutex_exit(&bd->d_dle_mutex);
2006 }
2007
2008 nvlist_free(attr);
2009 kmem_free(path, MAXPATHLEN);
2010 }
2011
2012 static void
bd_update_state(bd_t * bd)2013 bd_update_state(bd_t *bd)
2014 {
2015 enum dkio_state state = DKIO_INSERTED;
2016 boolean_t docmlb = B_FALSE;
2017 bd_media_t media;
2018
2019 bzero(&media, sizeof (media));
2020
2021 mutex_enter(&bd->d_statemutex);
2022 if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) {
2023 bd->d_numblks = 0;
2024 state = DKIO_EJECTED;
2025 goto done;
2026 }
2027
2028 if ((media.m_blksize < 512) ||
2029 (!ISP2(media.m_blksize)) ||
2030 (P2PHASE(bd->d_maxxfer, media.m_blksize))) {
2031 dev_err(bd->d_dip, CE_WARN, "Invalid media block size (%d)",
2032 media.m_blksize);
2033 /*
2034 * We can't use the media, treat it as not present.
2035 */
2036 state = DKIO_EJECTED;
2037 bd->d_numblks = 0;
2038 goto done;
2039 }
2040
2041 if (((1U << bd->d_blkshift) != media.m_blksize) ||
2042 (bd->d_numblks != media.m_nblks)) {
2043 /* Device size changed */
2044 docmlb = B_TRUE;
2045 }
2046
2047 bd->d_blkshift = ddi_ffs(media.m_blksize) - 1;
2048 bd->d_pblkshift = bd->d_blkshift;
2049 bd->d_numblks = media.m_nblks;
2050 bd->d_rdonly = media.m_readonly;
2051 bd->d_ssd = media.m_solidstate;
2052
2053 /*
2054 * Only use the supplied physical block size if it is non-zero,
2055 * greater or equal to the block size, and a power of 2. Ignore it
2056 * if not, it's just informational and we can still use the media.
2057 */
2058 if ((media.m_pblksize != 0) &&
2059 (media.m_pblksize >= media.m_blksize) &&
2060 (ISP2(media.m_pblksize)))
2061 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1;
2062
2063 done:
2064 if (state != bd->d_state) {
2065 bd->d_state = state;
2066 cv_broadcast(&bd->d_statecv);
2067 docmlb = B_TRUE;
2068 }
2069 mutex_exit(&bd->d_statemutex);
2070
2071 bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift;
2072
2073 if (docmlb) {
2074 if (state == DKIO_INSERTED) {
2075 (void) cmlb_validate(bd->d_cmlbh, 0, 0);
2076
2077 mutex_enter(&bd->d_dle_mutex);
2078 /*
2079 * If there is already an event pending, there's
2080 * nothing to do; we coalesce multiple events.
2081 */
2082 if ((bd->d_dle_state & BD_DLE_PENDING) == 0) {
2083 if ((bd->d_dle_state & BD_DLE_RUNNING) == 0) {
2084 taskq_dispatch_ent(bd_taskq,
2085 bd_dle_sysevent_task, bd, 0,
2086 &bd->d_dle_ent);
2087 }
2088 bd->d_dle_state |= BD_DLE_PENDING;
2089 }
2090 mutex_exit(&bd->d_dle_mutex);
2091 } else {
2092 cmlb_invalidate(bd->d_cmlbh, 0);
2093 }
2094 }
2095 }
2096
2097 static int
bd_check_state(bd_t * bd,enum dkio_state * state)2098 bd_check_state(bd_t *bd, enum dkio_state *state)
2099 {
2100 clock_t when;
2101
2102 for (;;) {
2103
2104 bd_update_state(bd);
2105
2106 mutex_enter(&bd->d_statemutex);
2107
2108 if (bd->d_state != *state) {
2109 *state = bd->d_state;
2110 mutex_exit(&bd->d_statemutex);
2111 break;
2112 }
2113
2114 when = drv_usectohz(1000000);
2115 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex,
2116 when, TR_CLOCK_TICK) == 0) {
2117 mutex_exit(&bd->d_statemutex);
2118 return (EINTR);
2119 }
2120
2121 mutex_exit(&bd->d_statemutex);
2122 }
2123
2124 return (0);
2125 }
2126
2127 static int
bd_flush_write_cache_done(struct buf * bp)2128 bd_flush_write_cache_done(struct buf *bp)
2129 {
2130 struct dk_callback *dc = (void *)bp->b_private;
2131
2132 (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp));
2133 kmem_free(dc, sizeof (*dc));
2134 freerbuf(bp);
2135 return (0);
2136 }
2137
2138 static int
bd_flush_write_cache(bd_t * bd,struct dk_callback * dkc)2139 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc)
2140 {
2141 buf_t *bp;
2142 struct dk_callback *dc;
2143 bd_xfer_impl_t *xi;
2144 int rv;
2145
2146 if (bd->d_ops.o_sync_cache == NULL) {
2147 return (ENOTSUP);
2148 }
2149 if ((bp = getrbuf(KM_SLEEP)) == NULL) {
2150 return (ENOMEM);
2151 }
2152 bp->b_resid = 0;
2153 bp->b_bcount = 0;
2154
2155 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP);
2156 if (xi == NULL) {
2157 rv = geterror(bp);
2158 freerbuf(bp);
2159 return (rv);
2160 }
2161
2162 /* Make an asynchronous flush, but only if there is a callback */
2163 if (dkc != NULL && dkc->dkc_callback != NULL) {
2164 /* Make a private copy of the callback structure */
2165 dc = kmem_alloc(sizeof (*dc), KM_SLEEP);
2166 *dc = *dkc;
2167 bp->b_private = dc;
2168 bp->b_iodone = bd_flush_write_cache_done;
2169
2170 bd_submit(bd, xi);
2171 return (0);
2172 }
2173
2174 /* In case there is no callback, perform a synchronous flush */
2175 bd_submit(bd, xi);
2176 (void) biowait(bp);
2177 rv = geterror(bp);
2178 freerbuf(bp);
2179
2180 return (rv);
2181 }
2182
2183 static int
bd_free_space_done(struct buf * bp)2184 bd_free_space_done(struct buf *bp)
2185 {
2186 freerbuf(bp);
2187 return (0);
2188 }
2189
2190 static int
bd_free_space_cb(dkioc_free_list_t * dfl,void * arg,int kmflag)2191 bd_free_space_cb(dkioc_free_list_t *dfl, void *arg, int kmflag)
2192 {
2193 bd_t *bd = arg;
2194 buf_t *bp = NULL;
2195 bd_xfer_impl_t *xi = NULL;
2196 boolean_t sync = DFL_ISSYNC(dfl) ? B_TRUE : B_FALSE;
2197 int rv = 0;
2198
2199 bp = getrbuf(KM_SLEEP);
2200 bp->b_resid = 0;
2201 bp->b_bcount = 0;
2202 bp->b_lblkno = 0;
2203
2204 xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_free_space, kmflag);
2205 xi->i_dfl = dfl;
2206
2207 if (!sync) {
2208 bp->b_iodone = bd_free_space_done;
2209 bd_submit(bd, xi);
2210 return (0);
2211 }
2212
2213 xi->i_flags |= BD_XFER_POLL;
2214 bd_submit(bd, xi);
2215
2216 (void) biowait(bp);
2217 rv = geterror(bp);
2218 freerbuf(bp);
2219
2220 return (rv);
2221 }
2222
2223 static int
bd_free_space(dev_t dev,bd_t * bd,dkioc_free_list_t * dfl)2224 bd_free_space(dev_t dev, bd_t *bd, dkioc_free_list_t *dfl)
2225 {
2226 diskaddr_t p_len, p_offset;
2227 uint64_t offset_bytes, len_bytes;
2228 minor_t part = BDPART(dev);
2229 const uint_t bshift = bd->d_blkshift;
2230 dkioc_free_info_t dfi = {
2231 .dfi_bshift = bshift,
2232 .dfi_align = bd->d_free_align << bshift,
2233 .dfi_max_bytes = bd->d_max_free_blks << bshift,
2234 .dfi_max_ext = bd->d_max_free_seg,
2235 .dfi_max_ext_bytes = bd->d_max_free_seg_blks << bshift,
2236 };
2237
2238 if (cmlb_partinfo(bd->d_cmlbh, part, &p_len, &p_offset, NULL,
2239 NULL, 0) != 0) {
2240 dfl_free(dfl);
2241 return (ENXIO);
2242 }
2243
2244 /*
2245 * bd_ioctl created our own copy of dfl, so we can modify as
2246 * necessary
2247 */
2248 offset_bytes = (uint64_t)p_offset << bshift;
2249 len_bytes = (uint64_t)p_len << bshift;
2250
2251 dfl->dfl_offset += offset_bytes;
2252 if (dfl->dfl_offset < offset_bytes) {
2253 dfl_free(dfl);
2254 return (EOVERFLOW);
2255 }
2256
2257 return (dfl_iter(dfl, &dfi, offset_bytes + len_bytes, bd_free_space_cb,
2258 bd, KM_SLEEP));
2259 }
2260
2261 /*
2262 * Nexus support.
2263 */
2264 int
bd_bus_ctl(dev_info_t * dip,dev_info_t * rdip,ddi_ctl_enum_t ctlop,void * arg,void * result)2265 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
2266 void *arg, void *result)
2267 {
2268 bd_handle_t hdl;
2269
2270 switch (ctlop) {
2271 case DDI_CTLOPS_REPORTDEV:
2272 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n",
2273 ddi_node_name(rdip), ddi_get_name_addr(rdip),
2274 ddi_driver_name(rdip), ddi_get_instance(rdip));
2275 return (DDI_SUCCESS);
2276
2277 case DDI_CTLOPS_INITCHILD:
2278 hdl = ddi_get_parent_data((dev_info_t *)arg);
2279 if (hdl == NULL) {
2280 return (DDI_NOT_WELL_FORMED);
2281 }
2282 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr);
2283 return (DDI_SUCCESS);
2284
2285 case DDI_CTLOPS_UNINITCHILD:
2286 ddi_set_name_addr((dev_info_t *)arg, NULL);
2287 ndi_prop_remove_all((dev_info_t *)arg);
2288 return (DDI_SUCCESS);
2289
2290 default:
2291 return (ddi_ctlops(dip, rdip, ctlop, arg, result));
2292 }
2293 }
2294
2295 /*
2296 * Functions for device drivers.
2297 */
2298 bd_handle_t
bd_alloc_handle(void * private,bd_ops_t * ops,ddi_dma_attr_t * dma,int kmflag)2299 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag)
2300 {
2301 bd_handle_t hdl;
2302
2303 switch (ops->o_version) {
2304 case BD_OPS_VERSION_0:
2305 case BD_OPS_VERSION_1:
2306 case BD_OPS_VERSION_2:
2307 break;
2308
2309 default:
2310 /* Unsupported version */
2311 return (NULL);
2312 }
2313
2314 hdl = kmem_zalloc(sizeof (*hdl), kmflag);
2315 if (hdl == NULL) {
2316 return (NULL);
2317 }
2318
2319 switch (ops->o_version) {
2320 case BD_OPS_VERSION_2:
2321 hdl->h_ops.o_free_space = ops->o_free_space;
2322 /*FALLTHRU*/
2323 case BD_OPS_VERSION_1:
2324 case BD_OPS_VERSION_0:
2325 hdl->h_ops.o_drive_info = ops->o_drive_info;
2326 hdl->h_ops.o_media_info = ops->o_media_info;
2327 hdl->h_ops.o_devid_init = ops->o_devid_init;
2328 hdl->h_ops.o_sync_cache = ops->o_sync_cache;
2329 hdl->h_ops.o_read = ops->o_read;
2330 hdl->h_ops.o_write = ops->o_write;
2331 break;
2332 }
2333
2334 hdl->h_dma = dma;
2335 hdl->h_private = private;
2336
2337 return (hdl);
2338 }
2339
2340 void
bd_free_handle(bd_handle_t hdl)2341 bd_free_handle(bd_handle_t hdl)
2342 {
2343 kmem_free(hdl, sizeof (*hdl));
2344 }
2345
2346 int
bd_attach_handle(dev_info_t * dip,bd_handle_t hdl)2347 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
2348 {
2349 bd_drive_t drive = { 0 };
2350 dev_info_t *child;
2351 size_t len;
2352
2353 /*
2354 * It's not an error if bd_attach_handle() is called on a handle that
2355 * already is attached. We just ignore the request to attach and return.
2356 * This way drivers using blkdev don't have to keep track about blkdev
2357 * state, they can just call this function to make sure it attached.
2358 */
2359 if (hdl->h_child != NULL) {
2360 return (DDI_SUCCESS);
2361 }
2362
2363 /* if drivers don't override this, make it assume none */
2364 drive.d_lun = -1;
2365 hdl->h_ops.o_drive_info(hdl->h_private, &drive);
2366
2367 hdl->h_parent = dip;
2368 hdl->h_name = "blkdev";
2369
2370 /*
2371 * Prefer the GUID over the EUI64.
2372 */
2373 if (*(uint64_t *)drive.d_guid != 0 ||
2374 *((uint64_t *)drive.d_guid + 1) != 0) {
2375 len = snprintf(hdl->h_addr, sizeof (hdl->h_addr),
2376 "w%02X%02X%02X%02X%02X%02X%02X%02X"
2377 "%02X%02X%02X%02X%02X%02X%02X%02X",
2378 drive.d_guid[0], drive.d_guid[1], drive.d_guid[2],
2379 drive.d_guid[3], drive.d_guid[4], drive.d_guid[5],
2380 drive.d_guid[6], drive.d_guid[7], drive.d_guid[8],
2381 drive.d_guid[9], drive.d_guid[10], drive.d_guid[11],
2382 drive.d_guid[12], drive.d_guid[13], drive.d_guid[14],
2383 drive.d_guid[15]);
2384 } else if (*(uint64_t *)drive.d_eui64 != 0) {
2385 len = snprintf(hdl->h_addr, sizeof (hdl->h_addr),
2386 "w%02X%02X%02X%02X%02X%02X%02X%02X",
2387 drive.d_eui64[0], drive.d_eui64[1],
2388 drive.d_eui64[2], drive.d_eui64[3],
2389 drive.d_eui64[4], drive.d_eui64[5],
2390 drive.d_eui64[6], drive.d_eui64[7]);
2391 } else {
2392 len = snprintf(hdl->h_addr, sizeof (hdl->h_addr),
2393 "%X", drive.d_target);
2394 }
2395
2396 VERIFY(len <= sizeof (hdl->h_addr));
2397
2398 if (drive.d_lun >= 0) {
2399 (void) snprintf(hdl->h_addr + len, sizeof (hdl->h_addr) - len,
2400 ",%X", drive.d_lun);
2401 }
2402
2403 if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID,
2404 &child) != NDI_SUCCESS) {
2405 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s",
2406 ddi_driver_name(dip), ddi_get_instance(dip),
2407 "blkdev", hdl->h_addr);
2408 return (DDI_FAILURE);
2409 }
2410
2411 ddi_set_parent_data(child, hdl);
2412 hdl->h_child = child;
2413
2414 if (ndi_devi_online(child, 0) != NDI_SUCCESS) {
2415 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online",
2416 ddi_driver_name(dip), ddi_get_instance(dip),
2417 hdl->h_name, hdl->h_addr);
2418 (void) ndi_devi_free(child);
2419 hdl->h_child = NULL;
2420 return (DDI_FAILURE);
2421 }
2422
2423 return (DDI_SUCCESS);
2424 }
2425
2426 int
bd_detach_handle(bd_handle_t hdl)2427 bd_detach_handle(bd_handle_t hdl)
2428 {
2429 int rv;
2430 char *devnm;
2431
2432 /*
2433 * It's not an error if bd_detach_handle() is called on a handle that
2434 * already is detached. We just ignore the request to detach and return.
2435 * This way drivers using blkdev don't have to keep track about blkdev
2436 * state, they can just call this function to make sure it detached.
2437 */
2438 if (hdl->h_child == NULL) {
2439 return (DDI_SUCCESS);
2440 }
2441 ndi_devi_enter(hdl->h_parent);
2442 if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) {
2443 rv = ddi_remove_child(hdl->h_child, 0);
2444 } else {
2445 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
2446 (void) ddi_deviname(hdl->h_child, devnm);
2447 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE);
2448 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL,
2449 NDI_DEVI_REMOVE | NDI_UNCONFIG);
2450 kmem_free(devnm, MAXNAMELEN + 1);
2451 }
2452 if (rv == 0) {
2453 hdl->h_child = NULL;
2454 }
2455
2456 ndi_devi_exit(hdl->h_parent);
2457 return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
2458 }
2459
2460 void
bd_xfer_done(bd_xfer_t * xfer,int err)2461 bd_xfer_done(bd_xfer_t *xfer, int err)
2462 {
2463 bd_xfer_impl_t *xi = (void *)xfer;
2464 buf_t *bp = xi->i_bp;
2465 int rv = DDI_SUCCESS;
2466 bd_t *bd = xi->i_bd;
2467 size_t len;
2468
2469 if (err != 0) {
2470 bd_runq_exit(xi, err);
2471 atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32);
2472
2473 bp->b_resid += xi->i_resid;
2474 bd_xfer_free(xi);
2475 bioerror(bp, err);
2476 biodone(bp);
2477 return;
2478 }
2479
2480 xi->i_cur_win++;
2481 xi->i_resid -= xi->i_len;
2482
2483 if (xi->i_resid == 0) {
2484 /* Job completed succcessfully! */
2485 bd_runq_exit(xi, 0);
2486
2487 bd_xfer_free(xi);
2488 biodone(bp);
2489 return;
2490 }
2491
2492 xi->i_blkno += xi->i_nblks;
2493
2494 if (bd->d_use_dma) {
2495 /* More transfer still pending... advance to next DMA window. */
2496 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
2497 &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
2498 } else {
2499 /* Advance memory window. */
2500 xi->i_kaddr += xi->i_len;
2501 xi->i_offset += xi->i_len;
2502 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
2503 }
2504
2505
2506 if ((rv != DDI_SUCCESS) ||
2507 (P2PHASE(len, (1U << xi->i_blkshift)) != 0)) {
2508 bd_runq_exit(xi, EFAULT);
2509
2510 bp->b_resid += xi->i_resid;
2511 bd_xfer_free(xi);
2512 bioerror(bp, EFAULT);
2513 biodone(bp);
2514 return;
2515 }
2516 xi->i_len = len;
2517 xi->i_nblks = len >> xi->i_blkshift;
2518
2519 /* Submit next window to hardware. */
2520 rv = xi->i_func(bd->d_private, &xi->i_public);
2521 if (rv != 0) {
2522 bd_runq_exit(xi, rv);
2523
2524 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
2525
2526 bp->b_resid += xi->i_resid;
2527 bd_xfer_free(xi);
2528 bioerror(bp, rv);
2529 biodone(bp);
2530 }
2531 }
2532
2533 void
bd_error(bd_xfer_t * xfer,int error)2534 bd_error(bd_xfer_t *xfer, int error)
2535 {
2536 bd_xfer_impl_t *xi = (void *)xfer;
2537 bd_t *bd = xi->i_bd;
2538
2539 switch (error) {
2540 case BD_ERR_MEDIA:
2541 atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32);
2542 break;
2543 case BD_ERR_NTRDY:
2544 atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32);
2545 break;
2546 case BD_ERR_NODEV:
2547 atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32);
2548 break;
2549 case BD_ERR_RECOV:
2550 atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32);
2551 break;
2552 case BD_ERR_ILLRQ:
2553 atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32);
2554 break;
2555 case BD_ERR_PFA:
2556 atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32);
2557 break;
2558 default:
2559 cmn_err(CE_PANIC, "bd_error: unknown error type %d", error);
2560 break;
2561 }
2562 }
2563
2564 void
bd_state_change(bd_handle_t hdl)2565 bd_state_change(bd_handle_t hdl)
2566 {
2567 bd_t *bd;
2568
2569 if ((bd = hdl->h_bd) != NULL) {
2570 bd_update_state(bd);
2571 }
2572 }
2573
2574 const char *
bd_address(bd_handle_t hdl)2575 bd_address(bd_handle_t hdl)
2576 {
2577 return (hdl->h_addr);
2578 }
2579
2580 void
bd_mod_init(struct dev_ops * devops)2581 bd_mod_init(struct dev_ops *devops)
2582 {
2583 static struct bus_ops bd_bus_ops = {
2584 BUSO_REV, /* busops_rev */
2585 nullbusmap, /* bus_map */
2586 NULL, /* bus_get_intrspec (OBSOLETE) */
2587 NULL, /* bus_add_intrspec (OBSOLETE) */
2588 NULL, /* bus_remove_intrspec (OBSOLETE) */
2589 i_ddi_map_fault, /* bus_map_fault */
2590 NULL, /* bus_dma_map (OBSOLETE) */
2591 ddi_dma_allochdl, /* bus_dma_allochdl */
2592 ddi_dma_freehdl, /* bus_dma_freehdl */
2593 ddi_dma_bindhdl, /* bus_dma_bindhdl */
2594 ddi_dma_unbindhdl, /* bus_dma_unbindhdl */
2595 ddi_dma_flush, /* bus_dma_flush */
2596 ddi_dma_win, /* bus_dma_win */
2597 ddi_dma_mctl, /* bus_dma_ctl */
2598 bd_bus_ctl, /* bus_ctl */
2599 ddi_bus_prop_op, /* bus_prop_op */
2600 NULL, /* bus_get_eventcookie */
2601 NULL, /* bus_add_eventcall */
2602 NULL, /* bus_remove_eventcall */
2603 NULL, /* bus_post_event */
2604 NULL, /* bus_intr_ctl (OBSOLETE) */
2605 NULL, /* bus_config */
2606 NULL, /* bus_unconfig */
2607 NULL, /* bus_fm_init */
2608 NULL, /* bus_fm_fini */
2609 NULL, /* bus_fm_access_enter */
2610 NULL, /* bus_fm_access_exit */
2611 NULL, /* bus_power */
2612 NULL, /* bus_intr_op */
2613 };
2614
2615 devops->devo_bus_ops = &bd_bus_ops;
2616
2617 /*
2618 * NB: The device driver is free to supply its own
2619 * character entry device support.
2620 */
2621 }
2622
2623 void
bd_mod_fini(struct dev_ops * devops)2624 bd_mod_fini(struct dev_ops *devops)
2625 {
2626 devops->devo_bus_ops = NULL;
2627 }
2628