xref: /illumos-gate/usr/src/uts/common/io/scsi/targets/sd.c (revision 86ef0a63)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2011 Bayard G. Bell.  All rights reserved.
27  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
28  * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
29  * Copyright 2019 Joyent, Inc.
30  * Copyright 2017 Nexenta Systems, Inc.
31  * Copyright 2019 Racktop Systems
32  */
33 /*
34  * Copyright 2011 cyril.galibern@opensvc.com
35  */
36 
37 /*
38  * SCSI disk target driver.
39  */
40 #include <sys/scsi/scsi.h>
41 #include <sys/dkbad.h>
42 #include <sys/dklabel.h>
43 #include <sys/dkio.h>
44 #include <sys/fdio.h>
45 #include <sys/cdio.h>
46 #include <sys/mhd.h>
47 #include <sys/vtoc.h>
48 #include <sys/dktp/fdisk.h>
49 #include <sys/kstat.h>
50 #include <sys/vtrace.h>
51 #include <sys/note.h>
52 #include <sys/thread.h>
53 #include <sys/proc.h>
54 #include <sys/efi_partition.h>
55 #include <sys/var.h>
56 #include <sys/aio_req.h>
57 #include <sys/dkioc_free_util.h>
58 
59 #ifdef __lock_lint
60 #define	_LP64
61 #define	__amd64
62 #endif
63 
64 #if (defined(__fibre))
65 /* Note: is there a leadville version of the following? */
66 #include <sys/fc4/fcal_linkapp.h>
67 #endif
68 #include <sys/taskq.h>
69 #include <sys/uuid.h>
70 #include <sys/byteorder.h>
71 #include <sys/sdt.h>
72 
73 #include "sd_xbuf.h"
74 
75 #include <sys/scsi/targets/sddef.h>
76 #include <sys/cmlb.h>
77 #include <sys/sysevent/eventdefs.h>
78 #include <sys/sysevent/dev.h>
79 
80 #include <sys/fm/protocol.h>
81 
82 /*
83  * Loadable module info.
84  */
85 #if (defined(__fibre))
86 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver"
87 #else /* !__fibre */
88 #define	SD_MODULE_NAME	"SCSI Disk Driver"
89 #endif /* !__fibre */
90 
91 /*
92  * Define the interconnect type, to allow the driver to distinguish
93  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
94  *
95  * This is really for backward compatibility. In the future, the driver
96  * should actually check the "interconnect-type" property as reported by
97  * the HBA; however at present this property is not defined by all HBAs,
98  * so we will use this #define (1) to permit the driver to run in
99  * backward-compatibility mode; and (2) to print a notification message
100  * if an FC HBA does not support the "interconnect-type" property.  The
101  * behavior of the driver will be to assume parallel SCSI behaviors unless
102  * the "interconnect-type" property is defined by the HBA **AND** has a
103  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
104  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
105  * Channel behaviors (as per the old ssd).  (Note that the
106  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
107  * will result in the driver assuming parallel SCSI behaviors.)
108  *
109  * (see common/sys/scsi/impl/services.h)
110  *
111  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
112  * since some FC HBAs may already support that, and there is some code in
113  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
114  * default would confuse that code, and besides things should work fine
115  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
116  * "interconnect_type" property.
117  *
118  */
119 #if (defined(__fibre))
120 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
121 #else
122 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
123 #endif
124 
125 /*
126  * The name of the driver, established from the module name in _init.
127  */
128 static	char *sd_label			= NULL;
129 
130 /*
131  * Driver name is unfortunately prefixed on some driver.conf properties.
132  */
133 #if (defined(__fibre))
134 #define	sd_max_xfer_size		ssd_max_xfer_size
135 #define	sd_config_list			ssd_config_list
136 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
137 static	char *sd_config_list		= "ssd-config-list";
138 #else
139 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
140 static	char *sd_config_list		= "sd-config-list";
141 #endif
142 
143 /*
144  * Driver global variables
145  */
146 
147 #if (defined(__fibre))
148 /*
149  * These #defines are to avoid namespace collisions that occur because this
150  * code is currently used to compile two separate driver modules: sd and ssd.
151  * All global variables need to be treated this way (even if declared static)
152  * in order to allow the debugger to resolve the names properly.
153  * It is anticipated that in the near future the ssd module will be obsoleted,
154  * at which time this namespace issue should go away.
155  */
156 #define	sd_state			ssd_state
157 #define	sd_io_time			ssd_io_time
158 #define	sd_failfast_enable		ssd_failfast_enable
159 #define	sd_ua_retry_count		ssd_ua_retry_count
160 #define	sd_report_pfa			ssd_report_pfa
161 #define	sd_max_throttle			ssd_max_throttle
162 #define	sd_min_throttle			ssd_min_throttle
163 #define	sd_rot_delay			ssd_rot_delay
164 
165 #define	sd_retry_on_reservation_conflict	\
166 					ssd_retry_on_reservation_conflict
167 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
168 #define	sd_resv_conflict_name		ssd_resv_conflict_name
169 
170 #define	sd_component_mask		ssd_component_mask
171 #define	sd_level_mask			ssd_level_mask
172 #define	sd_debug_un			ssd_debug_un
173 #define	sd_error_level			ssd_error_level
174 
175 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
176 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
177 
178 #define	sd_tr				ssd_tr
179 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
180 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
181 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
182 #define	sd_check_media_time		ssd_check_media_time
183 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
184 #define	sd_label_mutex			ssd_label_mutex
185 #define	sd_detach_mutex			ssd_detach_mutex
186 #define	sd_log_buf			ssd_log_buf
187 #define	sd_log_mutex			ssd_log_mutex
188 
189 #define	sd_disk_table			ssd_disk_table
190 #define	sd_disk_table_size		ssd_disk_table_size
191 #define	sd_sense_mutex			ssd_sense_mutex
192 #define	sd_cdbtab			ssd_cdbtab
193 
194 #define	sd_cb_ops			ssd_cb_ops
195 #define	sd_ops				ssd_ops
196 #define	sd_additional_codes		ssd_additional_codes
197 #define	sd_tgops			ssd_tgops
198 
199 #define	sd_minor_data			ssd_minor_data
200 #define	sd_minor_data_efi		ssd_minor_data_efi
201 
202 #define	sd_tq				ssd_tq
203 #define	sd_wmr_tq			ssd_wmr_tq
204 #define	sd_taskq_name			ssd_taskq_name
205 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
206 #define	sd_taskq_minalloc		ssd_taskq_minalloc
207 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
208 
209 #define	sd_dump_format_string		ssd_dump_format_string
210 
211 #define	sd_iostart_chain		ssd_iostart_chain
212 #define	sd_iodone_chain			ssd_iodone_chain
213 
214 #define	sd_pm_idletime			ssd_pm_idletime
215 
216 #define	sd_force_pm_supported		ssd_force_pm_supported
217 
218 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
219 
220 #define	sd_ssc_init			ssd_ssc_init
221 #define	sd_ssc_send			ssd_ssc_send
222 #define	sd_ssc_fini			ssd_ssc_fini
223 #define	sd_ssc_assessment		ssd_ssc_assessment
224 #define	sd_ssc_post			ssd_ssc_post
225 #define	sd_ssc_print			ssd_ssc_print
226 #define	sd_ssc_ereport_post		ssd_ssc_ereport_post
227 #define	sd_ssc_set_info			ssd_ssc_set_info
228 #define	sd_ssc_extract_info		ssd_ssc_extract_info
229 
230 #endif
231 
232 #ifdef	SDDEBUG
233 int	sd_force_pm_supported		= 0;
234 #endif	/* SDDEBUG */
235 
236 void *sd_state				= NULL;
237 int sd_io_time				= SD_IO_TIME;
238 int sd_failfast_enable			= 1;
239 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
240 int sd_report_pfa			= 1;
241 int sd_max_throttle			= SD_MAX_THROTTLE;
242 int sd_min_throttle			= SD_MIN_THROTTLE;
243 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
244 int sd_qfull_throttle_enable		= TRUE;
245 
246 int sd_retry_on_reservation_conflict	= 1;
247 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
248 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
249 
250 static int sd_dtype_optical_bind	= -1;
251 
252 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
253 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
254 
255 /*
256  * Global data for debug logging. To enable debug printing, sd_component_mask
257  * and sd_level_mask should be set to the desired bit patterns as outlined in
258  * sddef.h.
259  */
260 uint_t	sd_component_mask		= 0x0;
261 uint_t	sd_level_mask			= 0x0;
262 struct	sd_lun *sd_debug_un		= NULL;
263 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
264 
265 /* Note: these may go away in the future... */
266 static uint32_t	sd_xbuf_active_limit	= 512;
267 static uint32_t sd_xbuf_reserve_limit	= 16;
268 
269 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
270 
271 /*
272  * Timer value used to reset the throttle after it has been reduced
273  * (typically in response to TRAN_BUSY or STATUS_QFULL)
274  */
275 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
276 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
277 
278 /*
279  * Interval value associated with the media change scsi watch.
280  */
281 static int sd_check_media_time		= 3000000;
282 
283 /*
284  * Wait value used for in progress operations during a DDI_SUSPEND
285  */
286 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
287 
288 /*
289  * sd_label_mutex protects a static buffer used in the disk label
290  * component of the driver
291  */
292 static kmutex_t sd_label_mutex;
293 
294 /*
295  * sd_detach_mutex protects un_layer_count, un_detach_count, and
296  * un_opens_in_progress in the sd_lun structure.
297  */
298 static kmutex_t sd_detach_mutex;
299 
300 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
301 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
302 
303 /*
304  * Global buffer and mutex for debug logging
305  */
306 static char	sd_log_buf[1024];
307 static kmutex_t	sd_log_mutex;
308 
309 /*
310  * Structs and globals for recording attached lun information.
311  * This maintains a chain. Each node in the chain represents a SCSI controller.
312  * The structure records the number of luns attached to each target connected
313  * with the controller.
314  * For parallel scsi device only.
315  */
316 struct sd_scsi_hba_tgt_lun {
317 	struct sd_scsi_hba_tgt_lun	*next;
318 	dev_info_t			*pdip;
319 	int				nlun[NTARGETS_WIDE];
320 };
321 
322 /*
323  * Flag to indicate the lun is attached or detached
324  */
325 #define	SD_SCSI_LUN_ATTACH	0
326 #define	SD_SCSI_LUN_DETACH	1
327 
328 static kmutex_t	sd_scsi_target_lun_mutex;
329 static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
330 
331 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
332     sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
333 
334 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
335     sd_scsi_target_lun_head))
336 
337 /*
338  * "Smart" Probe Caching structs, globals, #defines, etc.
339  * For parallel scsi and non-self-identify device only.
340  */
341 
342 /*
343  * The following resources and routines are implemented to support
344  * "smart" probing, which caches the scsi_probe() results in an array,
345  * in order to help avoid long probe times.
346  */
347 struct sd_scsi_probe_cache {
348 	struct	sd_scsi_probe_cache	*next;
349 	dev_info_t	*pdip;
350 	int		cache[NTARGETS_WIDE];
351 };
352 
353 static kmutex_t	sd_scsi_probe_cache_mutex;
354 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
355 
356 /*
357  * Really we only need protection on the head of the linked list, but
358  * better safe than sorry.
359  */
360 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
361     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
362 
363 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
364     sd_scsi_probe_cache_head))
365 
366 /*
367  * Power attribute table
368  */
369 static sd_power_attr_ss sd_pwr_ss = {
370 	{ "NAME=spindle-motor", "0=off", "1=on", NULL },
371 	{0, 100},
372 	{30, 0},
373 	{20000, 0}
374 };
375 
376 static sd_power_attr_pc sd_pwr_pc = {
377 	{ "NAME=spindle-motor", "0=stopped", "1=standby", "2=idle",
378 		"3=active", NULL },
379 	{0, 0, 0, 100},
380 	{90, 90, 20, 0},
381 	{15000, 15000, 1000, 0}
382 };
383 
384 /*
385  * Power level to power condition
386  */
387 static int sd_pl2pc[] = {
388 	SD_TARGET_START_VALID,
389 	SD_TARGET_STANDBY,
390 	SD_TARGET_IDLE,
391 	SD_TARGET_ACTIVE
392 };
393 
394 /*
395  * Vendor specific data name property declarations
396  */
397 
398 #if defined(__fibre) || defined(__x86)
399 
400 static sd_tunables seagate_properties = {
401 	SEAGATE_THROTTLE_VALUE,
402 	0,
403 	0,
404 	0,
405 	0,
406 	0,
407 	0,
408 	0,
409 	0
410 };
411 
412 
413 static sd_tunables fujitsu_properties = {
414 	FUJITSU_THROTTLE_VALUE,
415 	0,
416 	0,
417 	0,
418 	0,
419 	0,
420 	0,
421 	0,
422 	0
423 };
424 
425 static sd_tunables ibm_properties = {
426 	IBM_THROTTLE_VALUE,
427 	0,
428 	0,
429 	0,
430 	0,
431 	0,
432 	0,
433 	0,
434 	0
435 };
436 
437 static sd_tunables sve_properties = {
438 	SVE_THROTTLE_VALUE,
439 	0,
440 	0,
441 	SVE_BUSY_RETRIES,
442 	SVE_RESET_RETRY_COUNT,
443 	SVE_RESERVE_RELEASE_TIME,
444 	SVE_MIN_THROTTLE_VALUE,
445 	SVE_DISKSORT_DISABLED_FLAG,
446 	0
447 };
448 
449 static sd_tunables maserati_properties = {
450 	0,
451 	0,
452 	0,
453 	0,
454 	0,
455 	0,
456 	0,
457 	MASERATI_DISKSORT_DISABLED_FLAG,
458 	MASERATI_LUN_RESET_ENABLED_FLAG
459 };
460 
461 static sd_tunables pirus_properties = {
462 	PIRUS_THROTTLE_VALUE,
463 	0,
464 	PIRUS_NRR_COUNT,
465 	PIRUS_BUSY_RETRIES,
466 	PIRUS_RESET_RETRY_COUNT,
467 	0,
468 	PIRUS_MIN_THROTTLE_VALUE,
469 	PIRUS_DISKSORT_DISABLED_FLAG,
470 	PIRUS_LUN_RESET_ENABLED_FLAG
471 };
472 
473 #endif
474 
475 #if (defined(__sparc) && !defined(__fibre)) || \
476 	(defined(__x86))
477 
478 
479 static sd_tunables elite_properties = {
480 	ELITE_THROTTLE_VALUE,
481 	0,
482 	0,
483 	0,
484 	0,
485 	0,
486 	0,
487 	0,
488 	0
489 };
490 
491 static sd_tunables st31200n_properties = {
492 	ST31200N_THROTTLE_VALUE,
493 	0,
494 	0,
495 	0,
496 	0,
497 	0,
498 	0,
499 	0,
500 	0
501 };
502 
503 #endif /* Fibre or not */
504 
505 static sd_tunables lsi_properties_scsi = {
506 	LSI_THROTTLE_VALUE,
507 	0,
508 	LSI_NOTREADY_RETRIES,
509 	0,
510 	0,
511 	0,
512 	0,
513 	0,
514 	0
515 };
516 
517 static sd_tunables symbios_properties = {
518 	SYMBIOS_THROTTLE_VALUE,
519 	0,
520 	SYMBIOS_NOTREADY_RETRIES,
521 	0,
522 	0,
523 	0,
524 	0,
525 	0,
526 	0
527 };
528 
529 static sd_tunables lsi_properties = {
530 	0,
531 	0,
532 	LSI_NOTREADY_RETRIES,
533 	0,
534 	0,
535 	0,
536 	0,
537 	0,
538 	0
539 };
540 
541 static sd_tunables lsi_oem_properties = {
542 	0,
543 	0,
544 	LSI_OEM_NOTREADY_RETRIES,
545 	0,
546 	0,
547 	0,
548 	0,
549 	0,
550 	0,
551 	1
552 };
553 
554 
555 
556 #if (defined(SD_PROP_TST))
557 
558 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
559 #define	SD_TST_THROTTLE_VAL	16
560 #define	SD_TST_NOTREADY_VAL	12
561 #define	SD_TST_BUSY_VAL		60
562 #define	SD_TST_RST_RETRY_VAL	36
563 #define	SD_TST_RSV_REL_TIME	60
564 
565 static sd_tunables tst_properties = {
566 	SD_TST_THROTTLE_VAL,
567 	SD_TST_CTYPE_VAL,
568 	SD_TST_NOTREADY_VAL,
569 	SD_TST_BUSY_VAL,
570 	SD_TST_RST_RETRY_VAL,
571 	SD_TST_RSV_REL_TIME,
572 	0,
573 	0,
574 	0
575 };
576 #endif
577 
578 /* This is similar to the ANSI toupper implementation */
579 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
580 
581 /*
582  * Static Driver Configuration Table
583  *
584  * This is the table of disks which need throttle adjustment (or, perhaps
585  * something else as defined by the flags at a future time.)  device_id
586  * is a string consisting of concatenated vid (vendor), pid (product/model)
587  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
588  * the parts of the string are as defined by the sizes in the scsi_inquiry
589  * structure.  Device type is searched as far as the device_id string is
590  * defined.  Flags defines which values are to be set in the driver from the
591  * properties list.
592  *
593  * Entries below which begin and end with a "*" are a special case.
594  * These do not have a specific vendor, and the string which follows
595  * can appear anywhere in the 16 byte PID portion of the inquiry data.
596  *
597  * Entries below which begin and end with a " " (blank) are a special
598  * case. The comparison function will treat multiple consecutive blanks
599  * as equivalent to a single blank. For example, this causes a
600  * sd_disk_table entry of " NEC CDROM " to match a device's id string
601  * of  "NEC       CDROM".
602  *
603  * Note: The MD21 controller type has been obsoleted.
604  *	 ST318202F is a Legacy device
605  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
606  *	 made with an FC connection. The entries here are a legacy.
607  */
608 static sd_disk_config_t sd_disk_table[] = {
609 #if defined(__fibre) || defined(__x86)
610 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
611 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
612 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
613 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
614 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
615 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
616 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
617 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
618 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
619 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
620 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
621 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
622 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
623 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
624 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
625 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
626 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
627 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
628 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
629 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
630 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
631 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
632 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
633 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
634 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
635 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
636 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
637 	{ "IBM     1724-100",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
638 	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
639 	{ "IBM     1726-22x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
640 	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
641 	{ "IBM     1726-42x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
642 	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
643 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
644 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
645 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
646 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
647 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
648 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
649 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
650 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
651 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
652 	{ "IBM     1818",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
653 	{ "DELL    MD3000",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
654 	{ "DELL    MD3000i",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
655 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
656 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
657 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
658 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
659 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT |
660 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
661 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT |
662 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
663 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
664 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
665 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
666 		SD_CONF_BSET_BSY_RETRY_COUNT|
667 		SD_CONF_BSET_RST_RETRIES|
668 		SD_CONF_BSET_RSV_REL_TIME|
669 		SD_CONF_BSET_MIN_THROTTLE|
670 		SD_CONF_BSET_DISKSORT_DISABLED,
671 		&sve_properties },
672 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
673 		SD_CONF_BSET_LUN_RESET_ENABLED,
674 		&maserati_properties },
675 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
676 		SD_CONF_BSET_NRR_COUNT|
677 		SD_CONF_BSET_BSY_RETRY_COUNT|
678 		SD_CONF_BSET_RST_RETRIES|
679 		SD_CONF_BSET_MIN_THROTTLE|
680 		SD_CONF_BSET_DISKSORT_DISABLED|
681 		SD_CONF_BSET_LUN_RESET_ENABLED,
682 		&pirus_properties },
683 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
684 		SD_CONF_BSET_NRR_COUNT|
685 		SD_CONF_BSET_BSY_RETRY_COUNT|
686 		SD_CONF_BSET_RST_RETRIES|
687 		SD_CONF_BSET_MIN_THROTTLE|
688 		SD_CONF_BSET_DISKSORT_DISABLED|
689 		SD_CONF_BSET_LUN_RESET_ENABLED,
690 		&pirus_properties },
691 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
692 		SD_CONF_BSET_NRR_COUNT|
693 		SD_CONF_BSET_BSY_RETRY_COUNT|
694 		SD_CONF_BSET_RST_RETRIES|
695 		SD_CONF_BSET_MIN_THROTTLE|
696 		SD_CONF_BSET_DISKSORT_DISABLED|
697 		SD_CONF_BSET_LUN_RESET_ENABLED,
698 		&pirus_properties },
699 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
700 		SD_CONF_BSET_NRR_COUNT|
701 		SD_CONF_BSET_BSY_RETRY_COUNT|
702 		SD_CONF_BSET_RST_RETRIES|
703 		SD_CONF_BSET_MIN_THROTTLE|
704 		SD_CONF_BSET_DISKSORT_DISABLED|
705 		SD_CONF_BSET_LUN_RESET_ENABLED,
706 		&pirus_properties },
707 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
708 		SD_CONF_BSET_NRR_COUNT|
709 		SD_CONF_BSET_BSY_RETRY_COUNT|
710 		SD_CONF_BSET_RST_RETRIES|
711 		SD_CONF_BSET_MIN_THROTTLE|
712 		SD_CONF_BSET_DISKSORT_DISABLED|
713 		SD_CONF_BSET_LUN_RESET_ENABLED,
714 		&pirus_properties },
715 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
716 		SD_CONF_BSET_NRR_COUNT|
717 		SD_CONF_BSET_BSY_RETRY_COUNT|
718 		SD_CONF_BSET_RST_RETRIES|
719 		SD_CONF_BSET_MIN_THROTTLE|
720 		SD_CONF_BSET_DISKSORT_DISABLED|
721 		SD_CONF_BSET_LUN_RESET_ENABLED,
722 		&pirus_properties },
723 	{ "SUN     STK6580_6780", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
724 	{ "SUN     SUN_6180", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
725 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
726 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
727 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
728 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
729 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
730 #endif /* fibre or NON-sparc platforms */
731 #if ((defined(__sparc) && !defined(__fibre)) ||\
732 	(defined(__x86)))
733 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
734 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
735 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
736 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
737 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
738 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
739 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
740 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
741 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
742 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
743 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
744 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
745 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
746 	    &symbios_properties },
747 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
748 	    &lsi_properties_scsi },
749 #if defined(__x86)
750 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
751 				    | SD_CONF_BSET_READSUB_BCD
752 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
753 				    | SD_CONF_BSET_NO_READ_HEADER
754 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
755 
756 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
757 				    | SD_CONF_BSET_READSUB_BCD
758 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
759 				    | SD_CONF_BSET_NO_READ_HEADER
760 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
761 #endif /* __x86 */
762 #endif /* sparc NON-fibre or NON-sparc platforms */
763 
764 #if (defined(SD_PROP_TST))
765 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
766 				| SD_CONF_BSET_CTYPE
767 				| SD_CONF_BSET_NRR_COUNT
768 				| SD_CONF_BSET_FAB_DEVID
769 				| SD_CONF_BSET_NOCACHE
770 				| SD_CONF_BSET_BSY_RETRY_COUNT
771 				| SD_CONF_BSET_PLAYMSF_BCD
772 				| SD_CONF_BSET_READSUB_BCD
773 				| SD_CONF_BSET_READ_TOC_TRK_BCD
774 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
775 				| SD_CONF_BSET_NO_READ_HEADER
776 				| SD_CONF_BSET_READ_CD_XD4
777 				| SD_CONF_BSET_RST_RETRIES
778 				| SD_CONF_BSET_RSV_REL_TIME
779 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
780 #endif
781 };
782 
783 static const int sd_disk_table_size =
784 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
785 
786 /*
787  * Emulation mode disk drive VID/PID table
788  */
789 static char sd_flash_dev_table[][25] = {
790 	"ATA     MARVELL SD88SA02",
791 	"MARVELL SD88SA02",
792 	"TOSHIBA THNSNV05",
793 };
794 
795 static const int sd_flash_dev_table_size =
796 	sizeof (sd_flash_dev_table) / sizeof (sd_flash_dev_table[0]);
797 
798 #define	SD_INTERCONNECT_PARALLEL	0
799 #define	SD_INTERCONNECT_FABRIC		1
800 #define	SD_INTERCONNECT_FIBRE		2
801 #define	SD_INTERCONNECT_SSA		3
802 #define	SD_INTERCONNECT_SATA		4
803 #define	SD_INTERCONNECT_SAS		5
804 
805 #define	SD_IS_PARALLEL_SCSI(un)		\
806 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
807 #define	SD_IS_SERIAL(un)		\
808 	(((un)->un_interconnect_type == SD_INTERCONNECT_SATA) ||\
809 	((un)->un_interconnect_type == SD_INTERCONNECT_SAS))
810 
811 /*
812  * Definitions used by device id registration routines
813  */
814 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
815 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
816 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
817 
818 static kmutex_t sd_sense_mutex = {0};
819 
820 /*
821  * Macros for updates of the driver state
822  */
823 #define	New_state(un, s)        \
824 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
825 #define	Restore_state(un)	\
826 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
827 
828 static struct sd_cdbinfo sd_cdbtab[] = {
829 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
830 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
831 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
832 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
833 };
834 
835 /*
836  * Specifies the number of seconds that must have elapsed since the last
837  * cmd. has completed for a device to be declared idle to the PM framework.
838  */
839 static int sd_pm_idletime = 1;
840 
841 /*
842  * Internal function prototypes
843  */
844 
845 #if (defined(__fibre))
846 /*
847  * These #defines are to avoid namespace collisions that occur because this
848  * code is currently used to compile two separate driver modules: sd and ssd.
849  * All function names need to be treated this way (even if declared static)
850  * in order to allow the debugger to resolve the names properly.
851  * It is anticipated that in the near future the ssd module will be obsoleted,
852  * at which time this ugliness should go away.
853  */
854 #define	sd_log_trace			ssd_log_trace
855 #define	sd_log_info			ssd_log_info
856 #define	sd_log_err			ssd_log_err
857 #define	sdprobe				ssdprobe
858 #define	sdinfo				ssdinfo
859 #define	sd_prop_op			ssd_prop_op
860 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
861 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
862 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
863 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
864 #define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
865 #define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
866 #define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
867 #define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
868 #define	sd_spin_up_unit			ssd_spin_up_unit
869 #define	sd_enable_descr_sense		ssd_enable_descr_sense
870 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
871 #define	sd_set_mmc_caps			ssd_set_mmc_caps
872 #define	sd_read_unit_properties		ssd_read_unit_properties
873 #define	sd_process_sdconf_file		ssd_process_sdconf_file
874 #define	sd_process_sdconf_table		ssd_process_sdconf_table
875 #define	sd_sdconf_id_match		ssd_sdconf_id_match
876 #define	sd_blank_cmp			ssd_blank_cmp
877 #define	sd_chk_vers1_data		ssd_chk_vers1_data
878 #define	sd_set_vers1_properties		ssd_set_vers1_properties
879 #define	sd_check_bdc_vpd		ssd_check_bdc_vpd
880 #define	sd_check_emulation_mode		ssd_check_emulation_mode
881 
882 #define	sd_get_physical_geometry	ssd_get_physical_geometry
883 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
884 #define	sd_update_block_info		ssd_update_block_info
885 #define	sd_register_devid		ssd_register_devid
886 #define	sd_get_devid			ssd_get_devid
887 #define	sd_create_devid			ssd_create_devid
888 #define	sd_write_deviceid		ssd_write_deviceid
889 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
890 #define	sd_setup_pm			ssd_setup_pm
891 #define	sd_create_pm_components		ssd_create_pm_components
892 #define	sd_ddi_suspend			ssd_ddi_suspend
893 #define	sd_ddi_resume			ssd_ddi_resume
894 #define	sd_pm_state_change		ssd_pm_state_change
895 #define	sdpower				ssdpower
896 #define	sdattach			ssdattach
897 #define	sddetach			ssddetach
898 #define	sd_unit_attach			ssd_unit_attach
899 #define	sd_unit_detach			ssd_unit_detach
900 #define	sd_set_unit_attributes		ssd_set_unit_attributes
901 #define	sd_create_errstats		ssd_create_errstats
902 #define	sd_set_errstats			ssd_set_errstats
903 #define	sd_set_pstats			ssd_set_pstats
904 #define	sddump				ssddump
905 #define	sd_scsi_poll			ssd_scsi_poll
906 #define	sd_send_polled_RQS		ssd_send_polled_RQS
907 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
908 #define	sd_init_event_callbacks		ssd_init_event_callbacks
909 #define	sd_event_callback		ssd_event_callback
910 #define	sd_cache_control		ssd_cache_control
911 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
912 #define	sd_get_write_cache_changeable	ssd_get_write_cache_changeable
913 #define	sd_get_nv_sup			ssd_get_nv_sup
914 #define	sd_make_device			ssd_make_device
915 #define	sdopen				ssdopen
916 #define	sdclose				ssdclose
917 #define	sd_ready_and_valid		ssd_ready_and_valid
918 #define	sdmin				ssdmin
919 #define	sdread				ssdread
920 #define	sdwrite				ssdwrite
921 #define	sdaread				ssdaread
922 #define	sdawrite			ssdawrite
923 #define	sdstrategy			ssdstrategy
924 #define	sdioctl				ssdioctl
925 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
926 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
927 #define	sd_checksum_iostart		ssd_checksum_iostart
928 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
929 #define	sd_pm_iostart			ssd_pm_iostart
930 #define	sd_core_iostart			ssd_core_iostart
931 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
932 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
933 #define	sd_checksum_iodone		ssd_checksum_iodone
934 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
935 #define	sd_pm_iodone			ssd_pm_iodone
936 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
937 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
938 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
939 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
940 #define	sd_buf_iodone			ssd_buf_iodone
941 #define	sd_uscsi_strategy		ssd_uscsi_strategy
942 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
943 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
944 #define	sd_uscsi_iodone			ssd_uscsi_iodone
945 #define	sd_xbuf_strategy		ssd_xbuf_strategy
946 #define	sd_xbuf_init			ssd_xbuf_init
947 #define	sd_pm_entry			ssd_pm_entry
948 #define	sd_pm_exit			ssd_pm_exit
949 
950 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
951 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
952 
953 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
954 #define	sdintr				ssdintr
955 #define	sd_start_cmds			ssd_start_cmds
956 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
957 #define	sd_bioclone_alloc		ssd_bioclone_alloc
958 #define	sd_bioclone_free		ssd_bioclone_free
959 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
960 #define	sd_shadow_buf_free		ssd_shadow_buf_free
961 #define	sd_print_transport_rejected_message	\
962 					ssd_print_transport_rejected_message
963 #define	sd_retry_command		ssd_retry_command
964 #define	sd_set_retry_bp			ssd_set_retry_bp
965 #define	sd_send_request_sense_command	ssd_send_request_sense_command
966 #define	sd_start_retry_command		ssd_start_retry_command
967 #define	sd_start_direct_priority_command	\
968 					ssd_start_direct_priority_command
969 #define	sd_return_failed_command	ssd_return_failed_command
970 #define	sd_return_failed_command_no_restart	\
971 					ssd_return_failed_command_no_restart
972 #define	sd_return_command		ssd_return_command
973 #define	sd_sync_with_callback		ssd_sync_with_callback
974 #define	sdrunout			ssdrunout
975 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
976 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
977 #define	sd_reduce_throttle		ssd_reduce_throttle
978 #define	sd_restore_throttle		ssd_restore_throttle
979 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
980 #define	sd_init_cdb_limits		ssd_init_cdb_limits
981 #define	sd_pkt_status_good		ssd_pkt_status_good
982 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
983 #define	sd_pkt_status_busy		ssd_pkt_status_busy
984 #define	sd_pkt_status_reservation_conflict	\
985 					ssd_pkt_status_reservation_conflict
986 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
987 #define	sd_handle_request_sense		ssd_handle_request_sense
988 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
989 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
990 #define	sd_validate_sense_data		ssd_validate_sense_data
991 #define	sd_decode_sense			ssd_decode_sense
992 #define	sd_print_sense_msg		ssd_print_sense_msg
993 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
994 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
995 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
996 #define	sd_sense_key_medium_or_hardware_error	\
997 					ssd_sense_key_medium_or_hardware_error
998 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
999 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
1000 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
1001 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
1002 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
1003 #define	sd_sense_key_default		ssd_sense_key_default
1004 #define	sd_print_retry_msg		ssd_print_retry_msg
1005 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
1006 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
1007 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
1008 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
1009 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
1010 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
1011 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
1012 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
1013 #define	sd_pkt_reason_default		ssd_pkt_reason_default
1014 #define	sd_reset_target			ssd_reset_target
1015 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
1016 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
1017 #define	sd_taskq_create			ssd_taskq_create
1018 #define	sd_taskq_delete			ssd_taskq_delete
1019 #define	sd_target_change_task		ssd_target_change_task
1020 #define	sd_log_dev_status_event		ssd_log_dev_status_event
1021 #define	sd_log_lun_expansion_event	ssd_log_lun_expansion_event
1022 #define	sd_log_eject_request_event	ssd_log_eject_request_event
1023 #define	sd_media_change_task		ssd_media_change_task
1024 #define	sd_handle_mchange		ssd_handle_mchange
1025 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
1026 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
1027 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
1028 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
1029 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
1030 					sd_send_scsi_feature_GET_CONFIGURATION
1031 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
1032 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
1033 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
1034 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
1035 					ssd_send_scsi_PERSISTENT_RESERVE_IN
1036 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
1037 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
1038 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
1039 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
1040 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
1041 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
1042 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
1043 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
1044 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
1045 #define	sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION	\
1046 				ssd_send_scsi_GET_EVENT_STATUS_NOTIFICATION
1047 #define	sd_gesn_media_data_valid	ssd_gesn_media_data_valid
1048 #define	sd_alloc_rqs			ssd_alloc_rqs
1049 #define	sd_free_rqs			ssd_free_rqs
1050 #define	sd_dump_memory			ssd_dump_memory
1051 #define	sd_get_media_info_com		ssd_get_media_info_com
1052 #define	sd_get_media_info		ssd_get_media_info
1053 #define	sd_get_media_info_ext		ssd_get_media_info_ext
1054 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1055 #define	sd_nvpair_str_decode		ssd_nvpair_str_decode
1056 #define	sd_strtok_r			ssd_strtok_r
1057 #define	sd_set_properties		ssd_set_properties
1058 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1059 #define	sd_setup_next_xfer		ssd_setup_next_xfer
1060 #define	sd_dkio_get_temp		ssd_dkio_get_temp
1061 #define	sd_check_mhd			ssd_check_mhd
1062 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1063 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1064 #define	sd_sname			ssd_sname
1065 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1066 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1067 #define	sd_take_ownership		ssd_take_ownership
1068 #define	sd_reserve_release		ssd_reserve_release
1069 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1070 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1071 #define	sd_persistent_reservation_in_read_keys	\
1072 					ssd_persistent_reservation_in_read_keys
1073 #define	sd_persistent_reservation_in_read_resv	\
1074 					ssd_persistent_reservation_in_read_resv
1075 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1076 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1077 #define	sd_mhdioc_release		ssd_mhdioc_release
1078 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1079 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1080 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1081 #define	sr_change_blkmode		ssr_change_blkmode
1082 #define	sr_change_speed			ssr_change_speed
1083 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1084 #define	sr_pause_resume			ssr_pause_resume
1085 #define	sr_play_msf			ssr_play_msf
1086 #define	sr_play_trkind			ssr_play_trkind
1087 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1088 #define	sr_read_subchannel		ssr_read_subchannel
1089 #define	sr_read_tocentry		ssr_read_tocentry
1090 #define	sr_read_tochdr			ssr_read_tochdr
1091 #define	sr_read_cdda			ssr_read_cdda
1092 #define	sr_read_cdxa			ssr_read_cdxa
1093 #define	sr_read_mode1			ssr_read_mode1
1094 #define	sr_read_mode2			ssr_read_mode2
1095 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1096 #define	sr_sector_mode			ssr_sector_mode
1097 #define	sr_eject			ssr_eject
1098 #define	sr_ejected			ssr_ejected
1099 #define	sr_check_wp			ssr_check_wp
1100 #define	sd_watch_request_submit		ssd_watch_request_submit
1101 #define	sd_check_media			ssd_check_media
1102 #define	sd_media_watch_cb		ssd_media_watch_cb
1103 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1104 #define	sr_volume_ctrl			ssr_volume_ctrl
1105 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1106 #define	sd_log_page_supported		ssd_log_page_supported
1107 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1108 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1109 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1110 #define	sd_range_lock			ssd_range_lock
1111 #define	sd_get_range			ssd_get_range
1112 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1113 #define	sd_range_unlock			ssd_range_unlock
1114 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1115 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1116 
1117 #define	sd_iostart_chain		ssd_iostart_chain
1118 #define	sd_iodone_chain			ssd_iodone_chain
1119 #define	sd_initpkt_map			ssd_initpkt_map
1120 #define	sd_destroypkt_map		ssd_destroypkt_map
1121 #define	sd_chain_type_map		ssd_chain_type_map
1122 #define	sd_chain_index_map		ssd_chain_index_map
1123 
1124 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1125 #define	sd_failfast_flushq		ssd_failfast_flushq
1126 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1127 
1128 #define	sd_is_lsi			ssd_is_lsi
1129 #define	sd_tg_rdwr			ssd_tg_rdwr
1130 #define	sd_tg_getinfo			ssd_tg_getinfo
1131 #define	sd_rmw_msg_print_handler	ssd_rmw_msg_print_handler
1132 
1133 #endif	/* #if (defined(__fibre)) */
1134 
1135 typedef struct unmap_param_hdr_s {
1136 	uint16_t	uph_data_len;
1137 	uint16_t	uph_descr_data_len;
1138 	uint32_t	uph_reserved;
1139 } unmap_param_hdr_t;
1140 
1141 typedef struct unmap_blk_descr_s {
1142 	uint64_t	ubd_lba;
1143 	uint32_t	ubd_lba_cnt;
1144 	uint32_t	ubd_reserved;
1145 } unmap_blk_descr_t;
1146 
1147 /* Max number of block descriptors in UNMAP command */
1148 #define	SD_UNMAP_MAX_DESCR \
1149 	((UINT16_MAX - sizeof (unmap_param_hdr_t)) / sizeof (unmap_blk_descr_t))
1150 /* Max size of the UNMAP parameter list in bytes */
1151 #define	SD_UNMAP_PARAM_LIST_MAXSZ	(sizeof (unmap_param_hdr_t) + \
1152 	SD_UNMAP_MAX_DESCR * sizeof (unmap_blk_descr_t))
1153 
1154 int _init(void);
1155 int _fini(void);
1156 int _info(struct modinfo *modinfop);
1157 
1158 /*PRINTFLIKE3*/
1159 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1160 /*PRINTFLIKE3*/
1161 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1162 /*PRINTFLIKE3*/
1163 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1164 
1165 static int sdprobe(dev_info_t *devi);
1166 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1167     void **result);
1168 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1169     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1170 
1171 /*
1172  * Smart probe for parallel scsi
1173  */
1174 static void sd_scsi_probe_cache_init(void);
1175 static void sd_scsi_probe_cache_fini(void);
1176 static void sd_scsi_clear_probe_cache(void);
1177 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1178 
1179 /*
1180  * Attached luns on target for parallel scsi
1181  */
1182 static void sd_scsi_target_lun_init(void);
1183 static void sd_scsi_target_lun_fini(void);
1184 static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1185 static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1186 
1187 static int sd_spin_up_unit(sd_ssc_t *ssc);
1188 
1189 /*
1190  * Using sd_ssc_init to establish sd_ssc_t struct
1191  * Using sd_ssc_send to send uscsi internal command
1192  * Using sd_ssc_fini to free sd_ssc_t struct
1193  */
1194 static sd_ssc_t *sd_ssc_init(struct sd_lun *un);
1195 static int sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd,
1196     int flag, enum uio_seg dataspace, int path_flag);
1197 static void sd_ssc_fini(sd_ssc_t *ssc);
1198 
1199 /*
1200  * Using sd_ssc_assessment to set correct type-of-assessment
1201  * Using sd_ssc_post to post ereport & system log
1202  *       sd_ssc_post will call sd_ssc_print to print system log
1203  *       sd_ssc_post will call sd_ssd_ereport_post to post ereport
1204  */
1205 static void sd_ssc_assessment(sd_ssc_t *ssc,
1206     enum sd_type_assessment tp_assess);
1207 
1208 static void sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess);
1209 static void sd_ssc_print(sd_ssc_t *ssc, int sd_severity);
1210 static void sd_ssc_ereport_post(sd_ssc_t *ssc,
1211     enum sd_driver_assessment drv_assess);
1212 
1213 /*
1214  * Using sd_ssc_set_info to mark an un-decodable-data error.
1215  * Using sd_ssc_extract_info to transfer information from internal
1216  *       data structures to sd_ssc_t.
1217  */
1218 static void sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp,
1219     const char *fmt, ...);
1220 static void sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un,
1221     struct scsi_pkt *pktp, struct buf *bp, struct sd_xbuf *xp);
1222 
1223 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1224     enum uio_seg dataspace, int path_flag);
1225 
1226 #ifdef _LP64
1227 static void	sd_enable_descr_sense(sd_ssc_t *ssc);
1228 static void	sd_reenable_dsense_task(void *arg);
1229 #endif /* _LP64 */
1230 
1231 static void	sd_set_mmc_caps(sd_ssc_t *ssc);
1232 
1233 static void sd_read_unit_properties(struct sd_lun *un);
1234 static int  sd_process_sdconf_file(struct sd_lun *un);
1235 static void sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str);
1236 static char *sd_strtok_r(char *string, const char *sepset, char **lasts);
1237 static void sd_set_properties(struct sd_lun *un, char *name, char *value);
1238 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1239     int *data_list, sd_tunables *values);
1240 static void sd_process_sdconf_table(struct sd_lun *un);
1241 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1242 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1243 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1244     int list_len, char *dataname_ptr);
1245 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1246     sd_tunables *prop_list);
1247 
1248 static void sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi,
1249     int reservation_flag);
1250 static int  sd_get_devid(sd_ssc_t *ssc);
1251 static ddi_devid_t sd_create_devid(sd_ssc_t *ssc);
1252 static int  sd_write_deviceid(sd_ssc_t *ssc);
1253 static int  sd_check_vpd_page_support(sd_ssc_t *ssc);
1254 
1255 static void sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi);
1256 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1257 
1258 static int  sd_ddi_suspend(dev_info_t *devi);
1259 static int  sd_ddi_resume(dev_info_t *devi);
1260 static int  sd_pm_state_change(struct sd_lun *un, int level, int flag);
1261 static int  sdpower(dev_info_t *devi, int component, int level);
1262 
1263 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1264 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1265 static int  sd_unit_attach(dev_info_t *devi);
1266 static int  sd_unit_detach(dev_info_t *devi);
1267 
1268 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1269 static void sd_create_errstats(struct sd_lun *un, int instance);
1270 static void sd_set_errstats(struct sd_lun *un);
1271 static void sd_set_pstats(struct sd_lun *un);
1272 
1273 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1274 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1275 static int  sd_send_polled_RQS(struct sd_lun *un);
1276 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1277 
1278 #if (defined(__fibre))
1279 /*
1280  * Event callbacks (photon)
1281  */
1282 static void sd_init_event_callbacks(struct sd_lun *un);
1283 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1284 #endif
1285 
1286 /*
1287  * Defines for sd_cache_control
1288  */
1289 
1290 #define	SD_CACHE_ENABLE		1
1291 #define	SD_CACHE_DISABLE	0
1292 #define	SD_CACHE_NOCHANGE	-1
1293 
1294 static int   sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag);
1295 static int   sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled);
1296 static void  sd_get_write_cache_changeable(sd_ssc_t *ssc, int *is_changeable);
1297 static void  sd_get_nv_sup(sd_ssc_t *ssc);
1298 static dev_t sd_make_device(dev_info_t *devi);
1299 static void  sd_check_bdc_vpd(sd_ssc_t *ssc);
1300 static void  sd_check_emulation_mode(sd_ssc_t *ssc);
1301 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1302     uint64_t capacity);
1303 
1304 /*
1305  * Driver entry point functions.
1306  */
1307 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1308 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1309 static int  sd_ready_and_valid(sd_ssc_t *ssc, int part);
1310 
1311 static void sdmin(struct buf *bp);
1312 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1313 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1314 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1315 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1316 
1317 static int sdstrategy(struct buf *bp);
1318 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1319 
1320 /*
1321  * Function prototypes for layering functions in the iostart chain.
1322  */
1323 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1324     struct buf *bp);
1325 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1326     struct buf *bp);
1327 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1328 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1329     struct buf *bp);
1330 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1331 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1332 
1333 /*
1334  * Function prototypes for layering functions in the iodone chain.
1335  */
1336 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1337 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1338 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1339     struct buf *bp);
1340 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1341     struct buf *bp);
1342 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1343 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1344     struct buf *bp);
1345 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1346 
1347 /*
1348  * Prototypes for functions to support buf(9S) based IO.
1349  */
1350 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1351 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1352 static void sd_destroypkt_for_buf(struct buf *);
1353 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1354     struct buf *bp, int flags,
1355     int (*callback)(caddr_t), caddr_t callback_arg,
1356     diskaddr_t lba, uint32_t blockcount);
1357 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1358     struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1359 
1360 /*
1361  * Prototypes for functions to support USCSI IO.
1362  */
1363 static int sd_uscsi_strategy(struct buf *bp);
1364 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1365 static void sd_destroypkt_for_uscsi(struct buf *);
1366 
1367 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1368     uchar_t chain_type, void *pktinfop);
1369 
1370 static int  sd_pm_entry(struct sd_lun *un);
1371 static void sd_pm_exit(struct sd_lun *un);
1372 
1373 static void sd_pm_idletimeout_handler(void *arg);
1374 
1375 /*
1376  * sd_core internal functions (used at the sd_core_io layer).
1377  */
1378 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1379 static void sdintr(struct scsi_pkt *pktp);
1380 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1381 
1382 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1383     enum uio_seg dataspace, int path_flag);
1384 
1385 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1386     daddr_t blkno, int (*func)(struct buf *));
1387 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1388     uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1389 static void sd_bioclone_free(struct buf *bp);
1390 static void sd_shadow_buf_free(struct buf *bp);
1391 
1392 static void sd_print_transport_rejected_message(struct sd_lun *un,
1393     struct sd_xbuf *xp, int code);
1394 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1395     void *arg, int code);
1396 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1397     void *arg, int code);
1398 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1399     void *arg, int code);
1400 
1401 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1402     int retry_check_flag,
1403     void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int c),
1404     void *user_arg, int failure_code,  clock_t retry_delay,
1405     void (*statp)(kstat_io_t *));
1406 
1407 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1408     clock_t retry_delay, void (*statp)(kstat_io_t *));
1409 
1410 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1411     struct scsi_pkt *pktp);
1412 static void sd_start_retry_command(void *arg);
1413 static void sd_start_direct_priority_command(void *arg);
1414 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1415     int errcode);
1416 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1417     struct buf *bp, int errcode);
1418 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1419 static void sd_sync_with_callback(struct sd_lun *un);
1420 static int sdrunout(caddr_t arg);
1421 
1422 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1423 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1424 
1425 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1426 static void sd_restore_throttle(void *arg);
1427 
1428 static void sd_init_cdb_limits(struct sd_lun *un);
1429 
1430 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1431     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1432 
1433 /*
1434  * Error handling functions
1435  */
1436 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1437     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1438 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1439     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1440 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1441     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1442 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1443     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1444 
1445 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1446     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1447 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1448     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1449 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1450     struct sd_xbuf *xp, size_t actual_len);
1451 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1452     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1453 
1454 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1455     void *arg, int code);
1456 
1457 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1458     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1459 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1460     uint8_t *sense_datap,
1461     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1462 static void sd_sense_key_not_ready(struct sd_lun *un,
1463     uint8_t *sense_datap,
1464     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1465 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1466     uint8_t *sense_datap,
1467     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1468 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1469     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1470 static void sd_sense_key_unit_attention(struct sd_lun *un,
1471     uint8_t *sense_datap,
1472     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1473 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1474     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1475 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1476     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1477 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1478     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1479 static void sd_sense_key_default(struct sd_lun *un,
1480     uint8_t *sense_datap,
1481     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1482 
1483 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1484     void *arg, int flag);
1485 
1486 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1487     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1488 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1489     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1490 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1491     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1492 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1493     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1494 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1495     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1496 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1497     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1498 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1499     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1500 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1501     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1502 
1503 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1504 
1505 static void sd_start_stop_unit_callback(void *arg);
1506 static void sd_start_stop_unit_task(void *arg);
1507 
1508 static void sd_taskq_create(void);
1509 static void sd_taskq_delete(void);
1510 static void sd_target_change_task(void *arg);
1511 static void sd_log_dev_status_event(struct sd_lun *un, char *esc, int km_flag);
1512 static void sd_log_lun_expansion_event(struct sd_lun *un, int km_flag);
1513 static void sd_log_eject_request_event(struct sd_lun *un, int km_flag);
1514 static void sd_media_change_task(void *arg);
1515 
1516 static int sd_handle_mchange(struct sd_lun *un);
1517 static int sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag);
1518 static int sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp,
1519     uint32_t *lbap, int path_flag);
1520 static int sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
1521     uint32_t *lbap, uint32_t *psp, int path_flag);
1522 static int sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int pc_flag,
1523     int flag, int path_flag);
1524 static int sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr,
1525     size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1526 static int sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag);
1527 static int sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc,
1528     uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1529 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc,
1530     uchar_t usr_cmd, uchar_t *usr_bufp);
1531 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1532     struct dk_callback *dkc);
1533 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1534 static int sd_send_scsi_UNMAP(dev_t dev, sd_ssc_t *ssc, dkioc_free_list_t *dfl,
1535     int flag);
1536 static int sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc,
1537     struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1538     uchar_t *bufaddr, uint_t buflen, int path_flag);
1539 static int sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc,
1540     struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1541     uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1542 static int sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize,
1543     uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1544 static int sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize,
1545     uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1546 static int sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
1547     size_t buflen, daddr_t start_block, int path_flag);
1548 #define	sd_send_scsi_READ(ssc, bufaddr, buflen, start_block, path_flag)	\
1549     sd_send_scsi_RDWR(ssc, SCMD_READ, bufaddr, buflen, start_block, \
1550     path_flag)
1551 #define	sd_send_scsi_WRITE(ssc, bufaddr, buflen, start_block, path_flag)\
1552     sd_send_scsi_RDWR(ssc, SCMD_WRITE, bufaddr, buflen, start_block,\
1553     path_flag)
1554 
1555 static int sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr,
1556     uint16_t buflen, uchar_t page_code, uchar_t page_control,
1557     uint16_t param_ptr, int path_flag);
1558 static int sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(sd_ssc_t *ssc,
1559     uchar_t *bufaddr, size_t buflen, uchar_t class_req);
1560 static boolean_t sd_gesn_media_data_valid(uchar_t *data);
1561 
1562 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1563 static void sd_free_rqs(struct sd_lun *un);
1564 
1565 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1566     uchar_t *data, int len, int fmt);
1567 static void sd_panic_for_res_conflict(struct sd_lun *un);
1568 
1569 /*
1570  * Disk Ioctl Function Prototypes
1571  */
1572 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1573 static int sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag);
1574 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1575 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1576 
1577 /*
1578  * Multi-host Ioctl Prototypes
1579  */
1580 static int sd_check_mhd(dev_t dev, int interval);
1581 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1582 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1583 static char *sd_sname(uchar_t status);
1584 static void sd_mhd_resvd_recover(void *arg);
1585 static void sd_resv_reclaim_thread();
1586 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1587 static int sd_reserve_release(dev_t dev, int cmd);
1588 static void sd_rmv_resv_reclaim_req(dev_t dev);
1589 static void sd_mhd_reset_notify_cb(caddr_t arg);
1590 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1591     mhioc_inkeys_t *usrp, int flag);
1592 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1593     mhioc_inresvs_t *usrp, int flag);
1594 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1595 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1596 static int sd_mhdioc_release(dev_t dev);
1597 static int sd_mhdioc_register_devid(dev_t dev);
1598 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1599 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1600 
1601 /*
1602  * SCSI removable prototypes
1603  */
1604 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1605 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1606 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1607 static int sr_pause_resume(dev_t dev, int mode);
1608 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1609 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1610 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1611 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1612 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1613 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1614 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1615 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1616 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1617 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1618 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1619 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1620 static int sr_eject(dev_t dev);
1621 static void sr_ejected(register struct sd_lun *un);
1622 static int sr_check_wp(dev_t dev);
1623 static opaque_t sd_watch_request_submit(struct sd_lun *un);
1624 static int sd_check_media(dev_t dev, enum dkio_state state);
1625 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1626 static void sd_delayed_cv_broadcast(void *arg);
1627 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1628 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1629 
1630 static int sd_log_page_supported(sd_ssc_t *ssc, int log_page);
1631 
1632 /*
1633  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1634  */
1635 static void sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag);
1636 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1637 static void sd_wm_cache_destructor(void *wm, void *un);
1638 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1639     daddr_t endb, ushort_t typ);
1640 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1641     daddr_t endb);
1642 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1643 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1644 static void sd_read_modify_write_task(void * arg);
1645 static int
1646 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1647     struct buf **bpp);
1648 
1649 
1650 /*
1651  * Function prototypes for failfast support.
1652  */
1653 static void sd_failfast_flushq(struct sd_lun *un);
1654 static int sd_failfast_flushq_callback(struct buf *bp);
1655 
1656 /*
1657  * Function prototypes to check for lsi devices
1658  */
1659 static void sd_is_lsi(struct sd_lun *un);
1660 
1661 /*
1662  * Function prototypes for partial DMA support
1663  */
1664 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1665 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1666 
1667 
1668 /* Function prototypes for cmlb */
1669 static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1670     diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1671 
1672 static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1673 
1674 /*
1675  * For printing RMW warning message timely
1676  */
1677 static void sd_rmw_msg_print_handler(void *arg);
1678 
1679 /*
1680  * Constants for failfast support:
1681  *
1682  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1683  * failfast processing being performed.
1684  *
1685  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1686  * failfast processing on all bufs with B_FAILFAST set.
1687  */
1688 
1689 #define	SD_FAILFAST_INACTIVE		0
1690 #define	SD_FAILFAST_ACTIVE		1
1691 
1692 /*
1693  * Bitmask to control behavior of buf(9S) flushes when a transition to
1694  * the failfast state occurs. Optional bits include:
1695  *
1696  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1697  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1698  * be flushed.
1699  *
1700  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1701  * driver, in addition to the regular wait queue. This includes the xbuf
1702  * queues. When clear, only the driver's wait queue will be flushed.
1703  */
1704 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1705 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1706 
1707 /*
1708  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1709  * to flush all queues within the driver.
1710  */
1711 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1712 
1713 
1714 /*
1715  * SD Testing Fault Injection
1716  */
1717 #ifdef SD_FAULT_INJECTION
1718 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1719 static void sd_faultinjection(struct scsi_pkt *pktp);
1720 static void sd_injection_log(char *buf, struct sd_lun *un);
1721 #endif
1722 
1723 /*
1724  * Device driver ops vector
1725  */
1726 static struct cb_ops sd_cb_ops = {
1727 	sdopen,			/* open */
1728 	sdclose,		/* close */
1729 	sdstrategy,		/* strategy */
1730 	nodev,			/* print */
1731 	sddump,			/* dump */
1732 	sdread,			/* read */
1733 	sdwrite,		/* write */
1734 	sdioctl,		/* ioctl */
1735 	nodev,			/* devmap */
1736 	nodev,			/* mmap */
1737 	nodev,			/* segmap */
1738 	nochpoll,		/* poll */
1739 	sd_prop_op,		/* cb_prop_op */
1740 	0,			/* streamtab  */
1741 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1742 	CB_REV,			/* cb_rev */
1743 	sdaread,		/* async I/O read entry point */
1744 	sdawrite		/* async I/O write entry point */
1745 };
1746 
1747 struct dev_ops sd_ops = {
1748 	DEVO_REV,		/* devo_rev, */
1749 	0,			/* refcnt  */
1750 	sdinfo,			/* info */
1751 	nulldev,		/* identify */
1752 	sdprobe,		/* probe */
1753 	sdattach,		/* attach */
1754 	sddetach,		/* detach */
1755 	nodev,			/* reset */
1756 	&sd_cb_ops,		/* driver operations */
1757 	NULL,			/* bus operations */
1758 	sdpower,		/* power */
1759 	ddi_quiesce_not_needed,		/* quiesce */
1760 };
1761 
1762 /*
1763  * This is the loadable module wrapper.
1764  */
1765 #include <sys/modctl.h>
1766 
1767 static struct modldrv modldrv = {
1768 	&mod_driverops,		/* Type of module. This one is a driver */
1769 	SD_MODULE_NAME,		/* Module name. */
1770 	&sd_ops			/* driver ops */
1771 };
1772 
1773 static struct modlinkage modlinkage = {
1774 	MODREV_1, &modldrv, NULL
1775 };
1776 
1777 static cmlb_tg_ops_t sd_tgops = {
1778 	TG_DK_OPS_VERSION_1,
1779 	sd_tg_rdwr,
1780 	sd_tg_getinfo
1781 };
1782 
1783 static struct scsi_asq_key_strings sd_additional_codes[] = {
1784 	0x81, 0, "Logical Unit is Reserved",
1785 	0x85, 0, "Audio Address Not Valid",
1786 	0xb6, 0, "Media Load Mechanism Failed",
1787 	0xB9, 0, "Audio Play Operation Aborted",
1788 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1789 	0x53, 2, "Medium removal prevented",
1790 	0x6f, 0, "Authentication failed during key exchange",
1791 	0x6f, 1, "Key not present",
1792 	0x6f, 2, "Key not established",
1793 	0x6f, 3, "Read without proper authentication",
1794 	0x6f, 4, "Mismatched region to this logical unit",
1795 	0x6f, 5, "Region reset count error",
1796 	0xffff, 0x0, NULL
1797 };
1798 
1799 
1800 /*
1801  * Struct for passing printing information for sense data messages
1802  */
1803 struct sd_sense_info {
1804 	int	ssi_severity;
1805 	int	ssi_pfa_flag;
1806 };
1807 
1808 /*
1809  * Table of function pointers for iostart-side routines. Separate "chains"
1810  * of layered function calls are formed by placing the function pointers
1811  * sequentially in the desired order. Functions are called according to an
1812  * incrementing table index ordering. The last function in each chain must
1813  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1814  * in the sd_iodone_chain[] array.
1815  *
1816  * Note: It may seem more natural to organize both the iostart and iodone
1817  * functions together, into an array of structures (or some similar
1818  * organization) with a common index, rather than two separate arrays which
1819  * must be maintained in synchronization. The purpose of this division is
1820  * to achieve improved performance: individual arrays allows for more
1821  * effective cache line utilization on certain platforms.
1822  */
1823 
1824 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1825 
1826 
1827 static sd_chain_t sd_iostart_chain[] = {
1828 
1829 	/* Chain for buf IO for disk drive targets (PM enabled) */
1830 	sd_mapblockaddr_iostart,	/* Index: 0 */
1831 	sd_pm_iostart,			/* Index: 1 */
1832 	sd_core_iostart,		/* Index: 2 */
1833 
1834 	/* Chain for buf IO for disk drive targets (PM disabled) */
1835 	sd_mapblockaddr_iostart,	/* Index: 3 */
1836 	sd_core_iostart,		/* Index: 4 */
1837 
1838 	/*
1839 	 * Chain for buf IO for removable-media or large sector size
1840 	 * disk drive targets with RMW needed (PM enabled)
1841 	 */
1842 	sd_mapblockaddr_iostart,	/* Index: 5 */
1843 	sd_mapblocksize_iostart,	/* Index: 6 */
1844 	sd_pm_iostart,			/* Index: 7 */
1845 	sd_core_iostart,		/* Index: 8 */
1846 
1847 	/*
1848 	 * Chain for buf IO for removable-media or large sector size
1849 	 * disk drive targets with RMW needed (PM disabled)
1850 	 */
1851 	sd_mapblockaddr_iostart,	/* Index: 9 */
1852 	sd_mapblocksize_iostart,	/* Index: 10 */
1853 	sd_core_iostart,		/* Index: 11 */
1854 
1855 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1856 	sd_mapblockaddr_iostart,	/* Index: 12 */
1857 	sd_checksum_iostart,		/* Index: 13 */
1858 	sd_pm_iostart,			/* Index: 14 */
1859 	sd_core_iostart,		/* Index: 15 */
1860 
1861 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1862 	sd_mapblockaddr_iostart,	/* Index: 16 */
1863 	sd_checksum_iostart,		/* Index: 17 */
1864 	sd_core_iostart,		/* Index: 18 */
1865 
1866 	/* Chain for USCSI commands (all targets) */
1867 	sd_pm_iostart,			/* Index: 19 */
1868 	sd_core_iostart,		/* Index: 20 */
1869 
1870 	/* Chain for checksumming USCSI commands (all targets) */
1871 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1872 	sd_pm_iostart,			/* Index: 22 */
1873 	sd_core_iostart,		/* Index: 23 */
1874 
1875 	/* Chain for "direct" USCSI commands (all targets) */
1876 	sd_core_iostart,		/* Index: 24 */
1877 
1878 	/* Chain for "direct priority" USCSI commands (all targets) */
1879 	sd_core_iostart,		/* Index: 25 */
1880 
1881 	/*
1882 	 * Chain for buf IO for large sector size disk drive targets
1883 	 * with RMW needed with checksumming (PM enabled)
1884 	 */
1885 	sd_mapblockaddr_iostart,	/* Index: 26 */
1886 	sd_mapblocksize_iostart,	/* Index: 27 */
1887 	sd_checksum_iostart,		/* Index: 28 */
1888 	sd_pm_iostart,			/* Index: 29 */
1889 	sd_core_iostart,		/* Index: 30 */
1890 
1891 	/*
1892 	 * Chain for buf IO for large sector size disk drive targets
1893 	 * with RMW needed with checksumming (PM disabled)
1894 	 */
1895 	sd_mapblockaddr_iostart,	/* Index: 31 */
1896 	sd_mapblocksize_iostart,	/* Index: 32 */
1897 	sd_checksum_iostart,		/* Index: 33 */
1898 	sd_core_iostart,		/* Index: 34 */
1899 
1900 };
1901 
1902 /*
1903  * Macros to locate the first function of each iostart chain in the
1904  * sd_iostart_chain[] array. These are located by the index in the array.
1905  */
1906 #define	SD_CHAIN_DISK_IOSTART			0
1907 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1908 #define	SD_CHAIN_MSS_DISK_IOSTART		5
1909 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1910 #define	SD_CHAIN_MSS_DISK_IOSTART_NO_PM		9
1911 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1912 #define	SD_CHAIN_CHKSUM_IOSTART			12
1913 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1914 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1915 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1916 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1917 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1918 #define	SD_CHAIN_MSS_CHKSUM_IOSTART		26
1919 #define	SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM	31
1920 
1921 
1922 /*
1923  * Table of function pointers for the iodone-side routines for the driver-
1924  * internal layering mechanism.  The calling sequence for iodone routines
1925  * uses a decrementing table index, so the last routine called in a chain
1926  * must be at the lowest array index location for that chain.  The last
1927  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1928  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1929  * of the functions in an iodone side chain must correspond to the ordering
1930  * of the iostart routines for that chain.  Note that there is no iodone
1931  * side routine that corresponds to sd_core_iostart(), so there is no
1932  * entry in the table for this.
1933  */
1934 
1935 static sd_chain_t sd_iodone_chain[] = {
1936 
1937 	/* Chain for buf IO for disk drive targets (PM enabled) */
1938 	sd_buf_iodone,			/* Index: 0 */
1939 	sd_mapblockaddr_iodone,		/* Index: 1 */
1940 	sd_pm_iodone,			/* Index: 2 */
1941 
1942 	/* Chain for buf IO for disk drive targets (PM disabled) */
1943 	sd_buf_iodone,			/* Index: 3 */
1944 	sd_mapblockaddr_iodone,		/* Index: 4 */
1945 
1946 	/*
1947 	 * Chain for buf IO for removable-media or large sector size
1948 	 * disk drive targets with RMW needed (PM enabled)
1949 	 */
1950 	sd_buf_iodone,			/* Index: 5 */
1951 	sd_mapblockaddr_iodone,		/* Index: 6 */
1952 	sd_mapblocksize_iodone,		/* Index: 7 */
1953 	sd_pm_iodone,			/* Index: 8 */
1954 
1955 	/*
1956 	 * Chain for buf IO for removable-media or large sector size
1957 	 * disk drive targets with RMW needed (PM disabled)
1958 	 */
1959 	sd_buf_iodone,			/* Index: 9 */
1960 	sd_mapblockaddr_iodone,		/* Index: 10 */
1961 	sd_mapblocksize_iodone,		/* Index: 11 */
1962 
1963 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1964 	sd_buf_iodone,			/* Index: 12 */
1965 	sd_mapblockaddr_iodone,		/* Index: 13 */
1966 	sd_checksum_iodone,		/* Index: 14 */
1967 	sd_pm_iodone,			/* Index: 15 */
1968 
1969 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1970 	sd_buf_iodone,			/* Index: 16 */
1971 	sd_mapblockaddr_iodone,		/* Index: 17 */
1972 	sd_checksum_iodone,		/* Index: 18 */
1973 
1974 	/* Chain for USCSI commands (non-checksum targets) */
1975 	sd_uscsi_iodone,		/* Index: 19 */
1976 	sd_pm_iodone,			/* Index: 20 */
1977 
1978 	/* Chain for USCSI commands (checksum targets) */
1979 	sd_uscsi_iodone,		/* Index: 21 */
1980 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1981 	sd_pm_iodone,			/* Index: 22 */
1982 
1983 	/* Chain for "direct" USCSI commands (all targets) */
1984 	sd_uscsi_iodone,		/* Index: 24 */
1985 
1986 	/* Chain for "direct priority" USCSI commands (all targets) */
1987 	sd_uscsi_iodone,		/* Index: 25 */
1988 
1989 	/*
1990 	 * Chain for buf IO for large sector size disk drive targets
1991 	 * with checksumming (PM enabled)
1992 	 */
1993 	sd_buf_iodone,			/* Index: 26 */
1994 	sd_mapblockaddr_iodone,		/* Index: 27 */
1995 	sd_mapblocksize_iodone,		/* Index: 28 */
1996 	sd_checksum_iodone,		/* Index: 29 */
1997 	sd_pm_iodone,			/* Index: 30 */
1998 
1999 	/*
2000 	 * Chain for buf IO for large sector size disk drive targets
2001 	 * with checksumming (PM disabled)
2002 	 */
2003 	sd_buf_iodone,			/* Index: 31 */
2004 	sd_mapblockaddr_iodone,		/* Index: 32 */
2005 	sd_mapblocksize_iodone,		/* Index: 33 */
2006 	sd_checksum_iodone,		/* Index: 34 */
2007 };
2008 
2009 
2010 /*
2011  * Macros to locate the "first" function in the sd_iodone_chain[] array for
2012  * each iodone-side chain. These are located by the array index, but as the
2013  * iodone side functions are called in a decrementing-index order, the
2014  * highest index number in each chain must be specified (as these correspond
2015  * to the first function in the iodone chain that will be called by the core
2016  * at IO completion time).
2017  */
2018 
2019 #define	SD_CHAIN_DISK_IODONE			2
2020 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
2021 #define	SD_CHAIN_RMMEDIA_IODONE			8
2022 #define	SD_CHAIN_MSS_DISK_IODONE		8
2023 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
2024 #define	SD_CHAIN_MSS_DISK_IODONE_NO_PM		11
2025 #define	SD_CHAIN_CHKSUM_IODONE			15
2026 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
2027 #define	SD_CHAIN_USCSI_CMD_IODONE		20
2028 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
2029 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
2030 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
2031 #define	SD_CHAIN_MSS_CHKSUM_IODONE		30
2032 #define	SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM	34
2033 
2034 
2035 
2036 /*
2037  * Array to map a layering chain index to the appropriate initpkt routine.
2038  * The redundant entries are present so that the index used for accessing
2039  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2040  * with this table as well.
2041  */
2042 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
2043 
2044 static sd_initpkt_t	sd_initpkt_map[] = {
2045 
2046 	/* Chain for buf IO for disk drive targets (PM enabled) */
2047 	sd_initpkt_for_buf,		/* Index: 0 */
2048 	sd_initpkt_for_buf,		/* Index: 1 */
2049 	sd_initpkt_for_buf,		/* Index: 2 */
2050 
2051 	/* Chain for buf IO for disk drive targets (PM disabled) */
2052 	sd_initpkt_for_buf,		/* Index: 3 */
2053 	sd_initpkt_for_buf,		/* Index: 4 */
2054 
2055 	/*
2056 	 * Chain for buf IO for removable-media or large sector size
2057 	 * disk drive targets (PM enabled)
2058 	 */
2059 	sd_initpkt_for_buf,		/* Index: 5 */
2060 	sd_initpkt_for_buf,		/* Index: 6 */
2061 	sd_initpkt_for_buf,		/* Index: 7 */
2062 	sd_initpkt_for_buf,		/* Index: 8 */
2063 
2064 	/*
2065 	 * Chain for buf IO for removable-media or large sector size
2066 	 * disk drive targets (PM disabled)
2067 	 */
2068 	sd_initpkt_for_buf,		/* Index: 9 */
2069 	sd_initpkt_for_buf,		/* Index: 10 */
2070 	sd_initpkt_for_buf,		/* Index: 11 */
2071 
2072 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2073 	sd_initpkt_for_buf,		/* Index: 12 */
2074 	sd_initpkt_for_buf,		/* Index: 13 */
2075 	sd_initpkt_for_buf,		/* Index: 14 */
2076 	sd_initpkt_for_buf,		/* Index: 15 */
2077 
2078 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2079 	sd_initpkt_for_buf,		/* Index: 16 */
2080 	sd_initpkt_for_buf,		/* Index: 17 */
2081 	sd_initpkt_for_buf,		/* Index: 18 */
2082 
2083 	/* Chain for USCSI commands (non-checksum targets) */
2084 	sd_initpkt_for_uscsi,		/* Index: 19 */
2085 	sd_initpkt_for_uscsi,		/* Index: 20 */
2086 
2087 	/* Chain for USCSI commands (checksum targets) */
2088 	sd_initpkt_for_uscsi,		/* Index: 21 */
2089 	sd_initpkt_for_uscsi,		/* Index: 22 */
2090 	sd_initpkt_for_uscsi,		/* Index: 22 */
2091 
2092 	/* Chain for "direct" USCSI commands (all targets) */
2093 	sd_initpkt_for_uscsi,		/* Index: 24 */
2094 
2095 	/* Chain for "direct priority" USCSI commands (all targets) */
2096 	sd_initpkt_for_uscsi,		/* Index: 25 */
2097 
2098 	/*
2099 	 * Chain for buf IO for large sector size disk drive targets
2100 	 * with checksumming (PM enabled)
2101 	 */
2102 	sd_initpkt_for_buf,		/* Index: 26 */
2103 	sd_initpkt_for_buf,		/* Index: 27 */
2104 	sd_initpkt_for_buf,		/* Index: 28 */
2105 	sd_initpkt_for_buf,		/* Index: 29 */
2106 	sd_initpkt_for_buf,		/* Index: 30 */
2107 
2108 	/*
2109 	 * Chain for buf IO for large sector size disk drive targets
2110 	 * with checksumming (PM disabled)
2111 	 */
2112 	sd_initpkt_for_buf,		/* Index: 31 */
2113 	sd_initpkt_for_buf,		/* Index: 32 */
2114 	sd_initpkt_for_buf,		/* Index: 33 */
2115 	sd_initpkt_for_buf,		/* Index: 34 */
2116 };
2117 
2118 
2119 /*
2120  * Array to map a layering chain index to the appropriate destroypktpkt routine.
2121  * The redundant entries are present so that the index used for accessing
2122  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2123  * with this table as well.
2124  */
2125 typedef void (*sd_destroypkt_t)(struct buf *);
2126 
2127 static sd_destroypkt_t	sd_destroypkt_map[] = {
2128 
2129 	/* Chain for buf IO for disk drive targets (PM enabled) */
2130 	sd_destroypkt_for_buf,		/* Index: 0 */
2131 	sd_destroypkt_for_buf,		/* Index: 1 */
2132 	sd_destroypkt_for_buf,		/* Index: 2 */
2133 
2134 	/* Chain for buf IO for disk drive targets (PM disabled) */
2135 	sd_destroypkt_for_buf,		/* Index: 3 */
2136 	sd_destroypkt_for_buf,		/* Index: 4 */
2137 
2138 	/*
2139 	 * Chain for buf IO for removable-media or large sector size
2140 	 * disk drive targets (PM enabled)
2141 	 */
2142 	sd_destroypkt_for_buf,		/* Index: 5 */
2143 	sd_destroypkt_for_buf,		/* Index: 6 */
2144 	sd_destroypkt_for_buf,		/* Index: 7 */
2145 	sd_destroypkt_for_buf,		/* Index: 8 */
2146 
2147 	/*
2148 	 * Chain for buf IO for removable-media or large sector size
2149 	 * disk drive targets (PM disabled)
2150 	 */
2151 	sd_destroypkt_for_buf,		/* Index: 9 */
2152 	sd_destroypkt_for_buf,		/* Index: 10 */
2153 	sd_destroypkt_for_buf,		/* Index: 11 */
2154 
2155 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2156 	sd_destroypkt_for_buf,		/* Index: 12 */
2157 	sd_destroypkt_for_buf,		/* Index: 13 */
2158 	sd_destroypkt_for_buf,		/* Index: 14 */
2159 	sd_destroypkt_for_buf,		/* Index: 15 */
2160 
2161 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2162 	sd_destroypkt_for_buf,		/* Index: 16 */
2163 	sd_destroypkt_for_buf,		/* Index: 17 */
2164 	sd_destroypkt_for_buf,		/* Index: 18 */
2165 
2166 	/* Chain for USCSI commands (non-checksum targets) */
2167 	sd_destroypkt_for_uscsi,	/* Index: 19 */
2168 	sd_destroypkt_for_uscsi,	/* Index: 20 */
2169 
2170 	/* Chain for USCSI commands (checksum targets) */
2171 	sd_destroypkt_for_uscsi,	/* Index: 21 */
2172 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2173 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2174 
2175 	/* Chain for "direct" USCSI commands (all targets) */
2176 	sd_destroypkt_for_uscsi,	/* Index: 24 */
2177 
2178 	/* Chain for "direct priority" USCSI commands (all targets) */
2179 	sd_destroypkt_for_uscsi,	/* Index: 25 */
2180 
2181 	/*
2182 	 * Chain for buf IO for large sector size disk drive targets
2183 	 * with checksumming (PM disabled)
2184 	 */
2185 	sd_destroypkt_for_buf,		/* Index: 26 */
2186 	sd_destroypkt_for_buf,		/* Index: 27 */
2187 	sd_destroypkt_for_buf,		/* Index: 28 */
2188 	sd_destroypkt_for_buf,		/* Index: 29 */
2189 	sd_destroypkt_for_buf,		/* Index: 30 */
2190 
2191 	/*
2192 	 * Chain for buf IO for large sector size disk drive targets
2193 	 * with checksumming (PM enabled)
2194 	 */
2195 	sd_destroypkt_for_buf,		/* Index: 31 */
2196 	sd_destroypkt_for_buf,		/* Index: 32 */
2197 	sd_destroypkt_for_buf,		/* Index: 33 */
2198 	sd_destroypkt_for_buf,		/* Index: 34 */
2199 };
2200 
2201 
2202 
2203 /*
2204  * Array to map a layering chain index to the appropriate chain "type".
2205  * The chain type indicates a specific property/usage of the chain.
2206  * The redundant entries are present so that the index used for accessing
2207  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2208  * with this table as well.
2209  */
2210 
2211 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2212 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2213 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2214 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2215 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2216 						/* (for error recovery) */
2217 
2218 static int sd_chain_type_map[] = {
2219 
2220 	/* Chain for buf IO for disk drive targets (PM enabled) */
2221 	SD_CHAIN_BUFIO,			/* Index: 0 */
2222 	SD_CHAIN_BUFIO,			/* Index: 1 */
2223 	SD_CHAIN_BUFIO,			/* Index: 2 */
2224 
2225 	/* Chain for buf IO for disk drive targets (PM disabled) */
2226 	SD_CHAIN_BUFIO,			/* Index: 3 */
2227 	SD_CHAIN_BUFIO,			/* Index: 4 */
2228 
2229 	/*
2230 	 * Chain for buf IO for removable-media or large sector size
2231 	 * disk drive targets (PM enabled)
2232 	 */
2233 	SD_CHAIN_BUFIO,			/* Index: 5 */
2234 	SD_CHAIN_BUFIO,			/* Index: 6 */
2235 	SD_CHAIN_BUFIO,			/* Index: 7 */
2236 	SD_CHAIN_BUFIO,			/* Index: 8 */
2237 
2238 	/*
2239 	 * Chain for buf IO for removable-media or large sector size
2240 	 * disk drive targets (PM disabled)
2241 	 */
2242 	SD_CHAIN_BUFIO,			/* Index: 9 */
2243 	SD_CHAIN_BUFIO,			/* Index: 10 */
2244 	SD_CHAIN_BUFIO,			/* Index: 11 */
2245 
2246 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2247 	SD_CHAIN_BUFIO,			/* Index: 12 */
2248 	SD_CHAIN_BUFIO,			/* Index: 13 */
2249 	SD_CHAIN_BUFIO,			/* Index: 14 */
2250 	SD_CHAIN_BUFIO,			/* Index: 15 */
2251 
2252 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2253 	SD_CHAIN_BUFIO,			/* Index: 16 */
2254 	SD_CHAIN_BUFIO,			/* Index: 17 */
2255 	SD_CHAIN_BUFIO,			/* Index: 18 */
2256 
2257 	/* Chain for USCSI commands (non-checksum targets) */
2258 	SD_CHAIN_USCSI,			/* Index: 19 */
2259 	SD_CHAIN_USCSI,			/* Index: 20 */
2260 
2261 	/* Chain for USCSI commands (checksum targets) */
2262 	SD_CHAIN_USCSI,			/* Index: 21 */
2263 	SD_CHAIN_USCSI,			/* Index: 22 */
2264 	SD_CHAIN_USCSI,			/* Index: 23 */
2265 
2266 	/* Chain for "direct" USCSI commands (all targets) */
2267 	SD_CHAIN_DIRECT,		/* Index: 24 */
2268 
2269 	/* Chain for "direct priority" USCSI commands (all targets) */
2270 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2271 
2272 	/*
2273 	 * Chain for buf IO for large sector size disk drive targets
2274 	 * with checksumming (PM enabled)
2275 	 */
2276 	SD_CHAIN_BUFIO,			/* Index: 26 */
2277 	SD_CHAIN_BUFIO,			/* Index: 27 */
2278 	SD_CHAIN_BUFIO,			/* Index: 28 */
2279 	SD_CHAIN_BUFIO,			/* Index: 29 */
2280 	SD_CHAIN_BUFIO,			/* Index: 30 */
2281 
2282 	/*
2283 	 * Chain for buf IO for large sector size disk drive targets
2284 	 * with checksumming (PM disabled)
2285 	 */
2286 	SD_CHAIN_BUFIO,			/* Index: 31 */
2287 	SD_CHAIN_BUFIO,			/* Index: 32 */
2288 	SD_CHAIN_BUFIO,			/* Index: 33 */
2289 	SD_CHAIN_BUFIO,			/* Index: 34 */
2290 };
2291 
2292 
2293 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2294 #define	SD_IS_BUFIO(xp)			\
2295 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2296 
2297 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2298 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2299 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2300 
2301 
2302 
2303 /*
2304  * Struct, array, and macros to map a specific chain to the appropriate
2305  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2306  *
2307  * The sd_chain_index_map[] array is used at attach time to set the various
2308  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2309  * chain to be used with the instance. This allows different instances to use
2310  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2311  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2312  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2313  * dynamically & without the use of locking; and (2) a layer may update the
2314  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2315  * to allow for deferred processing of an IO within the same chain from a
2316  * different execution context.
2317  */
2318 
2319 struct sd_chain_index {
2320 	int	sci_iostart_index;
2321 	int	sci_iodone_index;
2322 };
2323 
2324 static struct sd_chain_index	sd_chain_index_map[] = {
2325 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2326 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2327 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2328 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2329 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2330 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2331 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2332 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2333 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2334 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2335 	{ SD_CHAIN_MSS_CHKSUM_IOSTART,		SD_CHAIN_MSS_CHKSUM_IODONE },
2336 	{ SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM, SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM },
2337 
2338 };
2339 
2340 
2341 /*
2342  * The following are indexes into the sd_chain_index_map[] array.
2343  */
2344 
2345 /* un->un_buf_chain_type must be set to one of these */
2346 #define	SD_CHAIN_INFO_DISK		0
2347 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2348 #define	SD_CHAIN_INFO_RMMEDIA		2
2349 #define	SD_CHAIN_INFO_MSS_DISK		2
2350 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2351 #define	SD_CHAIN_INFO_MSS_DSK_NO_PM	3
2352 #define	SD_CHAIN_INFO_CHKSUM		4
2353 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2354 #define	SD_CHAIN_INFO_MSS_DISK_CHKSUM	10
2355 #define	SD_CHAIN_INFO_MSS_DISK_CHKSUM_NO_PM	11
2356 
2357 /* un->un_uscsi_chain_type must be set to one of these */
2358 #define	SD_CHAIN_INFO_USCSI_CMD		6
2359 /* USCSI with PM disabled is the same as DIRECT */
2360 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2361 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2362 
2363 /* un->un_direct_chain_type must be set to one of these */
2364 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2365 
2366 /* un->un_priority_chain_type must be set to one of these */
2367 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2368 
2369 /* size for devid inquiries */
2370 #define	MAX_INQUIRY_SIZE		0xF0
2371 
2372 /*
2373  * Macros used by functions to pass a given buf(9S) struct along to the
2374  * next function in the layering chain for further processing.
2375  *
2376  * In the following macros, passing more than three arguments to the called
2377  * routines causes the optimizer for the SPARC compiler to stop doing tail
2378  * call elimination which results in significant performance degradation.
2379  */
2380 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2381 	((*(sd_iostart_chain[index]))(index, un, bp))
2382 
2383 #define	SD_BEGIN_IODONE(index, un, bp)	\
2384 	((*(sd_iodone_chain[index]))(index, un, bp))
2385 
2386 #define	SD_NEXT_IOSTART(index, un, bp)				\
2387 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2388 
2389 #define	SD_NEXT_IODONE(index, un, bp)				\
2390 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2391 
2392 /*
2393  *    Function: _init
2394  *
2395  * Description: This is the driver _init(9E) entry point.
2396  *
2397  * Return Code: Returns the value from mod_install(9F) or
2398  *		ddi_soft_state_init(9F) as appropriate.
2399  *
2400  *     Context: Called when driver module loaded.
2401  */
2402 
2403 int
2404 _init(void)
2405 {
2406 	int	err;
2407 
2408 	/* establish driver name from module name */
2409 	sd_label = (char *)mod_modname(&modlinkage);
2410 
2411 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2412 	    SD_MAXUNIT);
2413 	if (err != 0) {
2414 		return (err);
2415 	}
2416 
2417 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2418 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2419 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2420 
2421 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2422 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2423 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2424 
2425 	/*
2426 	 * it's ok to init here even for fibre device
2427 	 */
2428 	sd_scsi_probe_cache_init();
2429 
2430 	sd_scsi_target_lun_init();
2431 
2432 	/*
2433 	 * Creating taskq before mod_install ensures that all callers (threads)
2434 	 * that enter the module after a successful mod_install encounter
2435 	 * a valid taskq.
2436 	 */
2437 	sd_taskq_create();
2438 
2439 	err = mod_install(&modlinkage);
2440 	if (err != 0) {
2441 		/* delete taskq if install fails */
2442 		sd_taskq_delete();
2443 
2444 		mutex_destroy(&sd_detach_mutex);
2445 		mutex_destroy(&sd_log_mutex);
2446 		mutex_destroy(&sd_label_mutex);
2447 
2448 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2449 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2450 		cv_destroy(&sd_tr.srq_inprocess_cv);
2451 
2452 		sd_scsi_probe_cache_fini();
2453 
2454 		sd_scsi_target_lun_fini();
2455 
2456 		ddi_soft_state_fini(&sd_state);
2457 
2458 		return (err);
2459 	}
2460 
2461 	return (err);
2462 }
2463 
2464 
2465 /*
2466  *    Function: _fini
2467  *
2468  * Description: This is the driver _fini(9E) entry point.
2469  *
2470  * Return Code: Returns the value from mod_remove(9F)
2471  *
2472  *     Context: Called when driver module is unloaded.
2473  */
2474 
2475 int
2476 _fini(void)
2477 {
2478 	int err;
2479 
2480 	if ((err = mod_remove(&modlinkage)) != 0) {
2481 		return (err);
2482 	}
2483 
2484 	sd_taskq_delete();
2485 
2486 	mutex_destroy(&sd_detach_mutex);
2487 	mutex_destroy(&sd_log_mutex);
2488 	mutex_destroy(&sd_label_mutex);
2489 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2490 
2491 	sd_scsi_probe_cache_fini();
2492 
2493 	sd_scsi_target_lun_fini();
2494 
2495 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2496 	cv_destroy(&sd_tr.srq_inprocess_cv);
2497 
2498 	ddi_soft_state_fini(&sd_state);
2499 
2500 	return (err);
2501 }
2502 
2503 
2504 /*
2505  *    Function: _info
2506  *
2507  * Description: This is the driver _info(9E) entry point.
2508  *
2509  *   Arguments: modinfop - pointer to the driver modinfo structure
2510  *
2511  * Return Code: Returns the value from mod_info(9F).
2512  *
2513  *     Context: Kernel thread context
2514  */
2515 
2516 int
2517 _info(struct modinfo *modinfop)
2518 {
2519 	return (mod_info(&modlinkage, modinfop));
2520 }
2521 
2522 
2523 /*
2524  * The following routines implement the driver message logging facility.
2525  * They provide component- and level- based debug output filtering.
2526  * Output may also be restricted to messages for a single instance by
2527  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2528  * to NULL, then messages for all instances are printed.
2529  *
2530  * These routines have been cloned from each other due to the language
2531  * constraints of macros and variable argument list processing.
2532  */
2533 
2534 
2535 /*
2536  *    Function: sd_log_err
2537  *
2538  * Description: This routine is called by the SD_ERROR macro for debug
2539  *		logging of error conditions.
2540  *
2541  *   Arguments: comp - driver component being logged
2542  *		dev  - pointer to driver info structure
2543  *		fmt  - error string and format to be logged
2544  */
2545 
2546 static void
2547 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2548 {
2549 	va_list		ap;
2550 	dev_info_t	*dev;
2551 
2552 	ASSERT(un != NULL);
2553 	dev = SD_DEVINFO(un);
2554 	ASSERT(dev != NULL);
2555 
2556 	/*
2557 	 * Filter messages based on the global component and level masks.
2558 	 * Also print if un matches the value of sd_debug_un, or if
2559 	 * sd_debug_un is set to NULL.
2560 	 */
2561 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2562 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2563 		mutex_enter(&sd_log_mutex);
2564 		va_start(ap, fmt);
2565 		(void) vsprintf(sd_log_buf, fmt, ap);
2566 		va_end(ap);
2567 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2568 		mutex_exit(&sd_log_mutex);
2569 	}
2570 #ifdef SD_FAULT_INJECTION
2571 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2572 	if (un->sd_injection_mask & comp) {
2573 		mutex_enter(&sd_log_mutex);
2574 		va_start(ap, fmt);
2575 		(void) vsprintf(sd_log_buf, fmt, ap);
2576 		va_end(ap);
2577 		sd_injection_log(sd_log_buf, un);
2578 		mutex_exit(&sd_log_mutex);
2579 	}
2580 #endif
2581 }
2582 
2583 
2584 /*
2585  *    Function: sd_log_info
2586  *
2587  * Description: This routine is called by the SD_INFO macro for debug
2588  *		logging of general purpose informational conditions.
2589  *
2590  *   Arguments: comp - driver component being logged
2591  *		dev  - pointer to driver info structure
2592  *		fmt  - info string and format to be logged
2593  */
2594 
2595 static void
2596 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2597 {
2598 	va_list		ap;
2599 	dev_info_t	*dev;
2600 
2601 	ASSERT(un != NULL);
2602 	dev = SD_DEVINFO(un);
2603 	ASSERT(dev != NULL);
2604 
2605 	/*
2606 	 * Filter messages based on the global component and level masks.
2607 	 * Also print if un matches the value of sd_debug_un, or if
2608 	 * sd_debug_un is set to NULL.
2609 	 */
2610 	if ((sd_component_mask & component) &&
2611 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2612 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2613 		mutex_enter(&sd_log_mutex);
2614 		va_start(ap, fmt);
2615 		(void) vsprintf(sd_log_buf, fmt, ap);
2616 		va_end(ap);
2617 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2618 		mutex_exit(&sd_log_mutex);
2619 	}
2620 #ifdef SD_FAULT_INJECTION
2621 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2622 	if (un->sd_injection_mask & component) {
2623 		mutex_enter(&sd_log_mutex);
2624 		va_start(ap, fmt);
2625 		(void) vsprintf(sd_log_buf, fmt, ap);
2626 		va_end(ap);
2627 		sd_injection_log(sd_log_buf, un);
2628 		mutex_exit(&sd_log_mutex);
2629 	}
2630 #endif
2631 }
2632 
2633 
2634 /*
2635  *    Function: sd_log_trace
2636  *
2637  * Description: This routine is called by the SD_TRACE macro for debug
2638  *		logging of trace conditions (i.e. function entry/exit).
2639  *
2640  *   Arguments: comp - driver component being logged
2641  *		dev  - pointer to driver info structure
2642  *		fmt  - trace string and format to be logged
2643  */
2644 
2645 static void
2646 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2647 {
2648 	va_list		ap;
2649 	dev_info_t	*dev;
2650 
2651 	ASSERT(un != NULL);
2652 	dev = SD_DEVINFO(un);
2653 	ASSERT(dev != NULL);
2654 
2655 	/*
2656 	 * Filter messages based on the global component and level masks.
2657 	 * Also print if un matches the value of sd_debug_un, or if
2658 	 * sd_debug_un is set to NULL.
2659 	 */
2660 	if ((sd_component_mask & component) &&
2661 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2662 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2663 		mutex_enter(&sd_log_mutex);
2664 		va_start(ap, fmt);
2665 		(void) vsprintf(sd_log_buf, fmt, ap);
2666 		va_end(ap);
2667 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2668 		mutex_exit(&sd_log_mutex);
2669 	}
2670 #ifdef SD_FAULT_INJECTION
2671 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2672 	if (un->sd_injection_mask & component) {
2673 		mutex_enter(&sd_log_mutex);
2674 		va_start(ap, fmt);
2675 		(void) vsprintf(sd_log_buf, fmt, ap);
2676 		va_end(ap);
2677 		sd_injection_log(sd_log_buf, un);
2678 		mutex_exit(&sd_log_mutex);
2679 	}
2680 #endif
2681 }
2682 
2683 
2684 /*
2685  *    Function: sdprobe
2686  *
2687  * Description: This is the driver probe(9e) entry point function.
2688  *
2689  *   Arguments: devi - opaque device info handle
2690  *
2691  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2692  *              DDI_PROBE_FAILURE: If the probe failed.
2693  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2694  *				   but may be present in the future.
2695  */
2696 
2697 static int
2698 sdprobe(dev_info_t *devi)
2699 {
2700 	struct scsi_device	*devp;
2701 	int			rval;
2702 	int			instance = ddi_get_instance(devi);
2703 
2704 	/*
2705 	 * if it wasn't for pln, sdprobe could actually be nulldev
2706 	 * in the "__fibre" case.
2707 	 */
2708 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2709 		return (DDI_PROBE_DONTCARE);
2710 	}
2711 
2712 	devp = ddi_get_driver_private(devi);
2713 
2714 	if (devp == NULL) {
2715 		/* Ooops... nexus driver is mis-configured... */
2716 		return (DDI_PROBE_FAILURE);
2717 	}
2718 
2719 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2720 		return (DDI_PROBE_PARTIAL);
2721 	}
2722 
2723 	/*
2724 	 * Call the SCSA utility probe routine to see if we actually
2725 	 * have a target at this SCSI nexus.
2726 	 */
2727 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2728 	case SCSIPROBE_EXISTS:
2729 		switch (devp->sd_inq->inq_dtype) {
2730 		case DTYPE_DIRECT:
2731 			rval = DDI_PROBE_SUCCESS;
2732 			break;
2733 		case DTYPE_RODIRECT:
2734 			/* CDs etc. Can be removable media */
2735 			rval = DDI_PROBE_SUCCESS;
2736 			break;
2737 		case DTYPE_OPTICAL:
2738 			/*
2739 			 * Rewritable optical driver HP115AA
2740 			 * Can also be removable media
2741 			 */
2742 
2743 			/*
2744 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2745 			 * pre solaris 9 sparc sd behavior is required
2746 			 *
2747 			 * If first time through and sd_dtype_optical_bind
2748 			 * has not been set in /etc/system check properties
2749 			 */
2750 
2751 			if (sd_dtype_optical_bind  < 0) {
2752 				sd_dtype_optical_bind = ddi_prop_get_int
2753 				    (DDI_DEV_T_ANY, devi, 0,
2754 				    "optical-device-bind", 1);
2755 			}
2756 
2757 			if (sd_dtype_optical_bind == 0) {
2758 				rval = DDI_PROBE_FAILURE;
2759 			} else {
2760 				rval = DDI_PROBE_SUCCESS;
2761 			}
2762 			break;
2763 
2764 		case DTYPE_NOTPRESENT:
2765 		default:
2766 			rval = DDI_PROBE_FAILURE;
2767 			break;
2768 		}
2769 		break;
2770 	default:
2771 		rval = DDI_PROBE_PARTIAL;
2772 		break;
2773 	}
2774 
2775 	/*
2776 	 * This routine checks for resource allocation prior to freeing,
2777 	 * so it will take care of the "smart probing" case where a
2778 	 * scsi_probe() may or may not have been issued and will *not*
2779 	 * free previously-freed resources.
2780 	 */
2781 	scsi_unprobe(devp);
2782 	return (rval);
2783 }
2784 
2785 
2786 /*
2787  *    Function: sdinfo
2788  *
2789  * Description: This is the driver getinfo(9e) entry point function.
2790  *		Given the device number, return the devinfo pointer from
2791  *		the scsi_device structure or the instance number
2792  *		associated with the dev_t.
2793  *
2794  *   Arguments: dip     - pointer to device info structure
2795  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2796  *			  DDI_INFO_DEVT2INSTANCE)
2797  *		arg     - driver dev_t
2798  *		resultp - user buffer for request response
2799  *
2800  * Return Code: DDI_SUCCESS
2801  *              DDI_FAILURE
2802  */
2803 /* ARGSUSED */
2804 static int
2805 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2806 {
2807 	struct sd_lun	*un;
2808 	dev_t		dev;
2809 	int		instance;
2810 	int		error;
2811 
2812 	switch (infocmd) {
2813 	case DDI_INFO_DEVT2DEVINFO:
2814 		dev = (dev_t)arg;
2815 		instance = SDUNIT(dev);
2816 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2817 			return (DDI_FAILURE);
2818 		}
2819 		*result = (void *) SD_DEVINFO(un);
2820 		error = DDI_SUCCESS;
2821 		break;
2822 	case DDI_INFO_DEVT2INSTANCE:
2823 		dev = (dev_t)arg;
2824 		instance = SDUNIT(dev);
2825 		*result = (void *)(uintptr_t)instance;
2826 		error = DDI_SUCCESS;
2827 		break;
2828 	default:
2829 		error = DDI_FAILURE;
2830 	}
2831 	return (error);
2832 }
2833 
2834 /*
2835  *    Function: sd_prop_op
2836  *
2837  * Description: This is the driver prop_op(9e) entry point function.
2838  *		Return the number of blocks for the partition in question
2839  *		or forward the request to the property facilities.
2840  *
2841  *   Arguments: dev       - device number
2842  *		dip       - pointer to device info structure
2843  *		prop_op   - property operator
2844  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2845  *		name      - pointer to property name
2846  *		valuep    - pointer or address of the user buffer
2847  *		lengthp   - property length
2848  *
2849  * Return Code: DDI_PROP_SUCCESS
2850  *              DDI_PROP_NOT_FOUND
2851  *              DDI_PROP_UNDEFINED
2852  *              DDI_PROP_NO_MEMORY
2853  *              DDI_PROP_BUF_TOO_SMALL
2854  */
2855 
2856 static int
2857 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2858     char *name, caddr_t valuep, int *lengthp)
2859 {
2860 	struct sd_lun	*un;
2861 
2862 	if ((un = ddi_get_soft_state(sd_state, ddi_get_instance(dip))) == NULL)
2863 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2864 		    name, valuep, lengthp));
2865 
2866 	return (cmlb_prop_op(un->un_cmlbhandle,
2867 	    dev, dip, prop_op, mod_flags, name, valuep, lengthp,
2868 	    SDPART(dev), (void *)SD_PATH_DIRECT));
2869 }
2870 
2871 /*
2872  * The following functions are for smart probing:
2873  * sd_scsi_probe_cache_init()
2874  * sd_scsi_probe_cache_fini()
2875  * sd_scsi_clear_probe_cache()
2876  * sd_scsi_probe_with_cache()
2877  */
2878 
2879 /*
2880  *    Function: sd_scsi_probe_cache_init
2881  *
2882  * Description: Initializes the probe response cache mutex and head pointer.
2883  *
2884  *     Context: Kernel thread context
2885  */
2886 
2887 static void
2888 sd_scsi_probe_cache_init(void)
2889 {
2890 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2891 	sd_scsi_probe_cache_head = NULL;
2892 }
2893 
2894 
2895 /*
2896  *    Function: sd_scsi_probe_cache_fini
2897  *
2898  * Description: Frees all resources associated with the probe response cache.
2899  *
2900  *     Context: Kernel thread context
2901  */
2902 
2903 static void
2904 sd_scsi_probe_cache_fini(void)
2905 {
2906 	struct sd_scsi_probe_cache *cp;
2907 	struct sd_scsi_probe_cache *ncp;
2908 
2909 	/* Clean up our smart probing linked list */
2910 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2911 		ncp = cp->next;
2912 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2913 	}
2914 	sd_scsi_probe_cache_head = NULL;
2915 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2916 }
2917 
2918 
2919 /*
2920  *    Function: sd_scsi_clear_probe_cache
2921  *
2922  * Description: This routine clears the probe response cache. This is
2923  *		done when open() returns ENXIO so that when deferred
2924  *		attach is attempted (possibly after a device has been
2925  *		turned on) we will retry the probe. Since we don't know
2926  *		which target we failed to open, we just clear the
2927  *		entire cache.
2928  *
2929  *     Context: Kernel thread context
2930  */
2931 
2932 static void
2933 sd_scsi_clear_probe_cache(void)
2934 {
2935 	struct sd_scsi_probe_cache	*cp;
2936 	int				i;
2937 
2938 	mutex_enter(&sd_scsi_probe_cache_mutex);
2939 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2940 		/*
2941 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2942 		 * force probing to be performed the next time
2943 		 * sd_scsi_probe_with_cache is called.
2944 		 */
2945 		for (i = 0; i < NTARGETS_WIDE; i++) {
2946 			cp->cache[i] = SCSIPROBE_EXISTS;
2947 		}
2948 	}
2949 	mutex_exit(&sd_scsi_probe_cache_mutex);
2950 }
2951 
2952 
2953 /*
2954  *    Function: sd_scsi_probe_with_cache
2955  *
2956  * Description: This routine implements support for a scsi device probe
2957  *		with cache. The driver maintains a cache of the target
2958  *		responses to scsi probes. If we get no response from a
2959  *		target during a probe inquiry, we remember that, and we
2960  *		avoid additional calls to scsi_probe on non-zero LUNs
2961  *		on the same target until the cache is cleared. By doing
2962  *		so we avoid the 1/4 sec selection timeout for nonzero
2963  *		LUNs. lun0 of a target is always probed.
2964  *
2965  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2966  *              waitfunc - indicates what the allocator routines should
2967  *			   do when resources are not available. This value
2968  *			   is passed on to scsi_probe() when that routine
2969  *			   is called.
2970  *
2971  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2972  *		otherwise the value returned by scsi_probe(9F).
2973  *
2974  *     Context: Kernel thread context
2975  */
2976 
2977 static int
2978 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2979 {
2980 	struct sd_scsi_probe_cache	*cp;
2981 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2982 	int		lun, tgt;
2983 
2984 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2985 	    SCSI_ADDR_PROP_LUN, 0);
2986 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2987 	    SCSI_ADDR_PROP_TARGET, -1);
2988 
2989 	/* Make sure caching enabled and target in range */
2990 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2991 		/* do it the old way (no cache) */
2992 		return (scsi_probe(devp, waitfn));
2993 	}
2994 
2995 	mutex_enter(&sd_scsi_probe_cache_mutex);
2996 
2997 	/* Find the cache for this scsi bus instance */
2998 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2999 		if (cp->pdip == pdip) {
3000 			break;
3001 		}
3002 	}
3003 
3004 	/* If we can't find a cache for this pdip, create one */
3005 	if (cp == NULL) {
3006 		int i;
3007 
3008 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
3009 		    KM_SLEEP);
3010 		cp->pdip = pdip;
3011 		cp->next = sd_scsi_probe_cache_head;
3012 		sd_scsi_probe_cache_head = cp;
3013 		for (i = 0; i < NTARGETS_WIDE; i++) {
3014 			cp->cache[i] = SCSIPROBE_EXISTS;
3015 		}
3016 	}
3017 
3018 	mutex_exit(&sd_scsi_probe_cache_mutex);
3019 
3020 	/* Recompute the cache for this target if LUN zero */
3021 	if (lun == 0) {
3022 		cp->cache[tgt] = SCSIPROBE_EXISTS;
3023 	}
3024 
3025 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
3026 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
3027 		return (SCSIPROBE_NORESP);
3028 	}
3029 
3030 	/* Do the actual probe; save & return the result */
3031 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
3032 }
3033 
3034 
3035 /*
3036  *    Function: sd_scsi_target_lun_init
3037  *
3038  * Description: Initializes the attached lun chain mutex and head pointer.
3039  *
3040  *     Context: Kernel thread context
3041  */
3042 
3043 static void
3044 sd_scsi_target_lun_init(void)
3045 {
3046 	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
3047 	sd_scsi_target_lun_head = NULL;
3048 }
3049 
3050 
3051 /*
3052  *    Function: sd_scsi_target_lun_fini
3053  *
3054  * Description: Frees all resources associated with the attached lun
3055  *              chain
3056  *
3057  *     Context: Kernel thread context
3058  */
3059 
3060 static void
3061 sd_scsi_target_lun_fini(void)
3062 {
3063 	struct sd_scsi_hba_tgt_lun	*cp;
3064 	struct sd_scsi_hba_tgt_lun	*ncp;
3065 
3066 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
3067 		ncp = cp->next;
3068 		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
3069 	}
3070 	sd_scsi_target_lun_head = NULL;
3071 	mutex_destroy(&sd_scsi_target_lun_mutex);
3072 }
3073 
3074 
3075 /*
3076  *    Function: sd_scsi_get_target_lun_count
3077  *
3078  * Description: This routine will check in the attached lun chain to see
3079  *		how many luns are attached on the required SCSI controller
3080  *		and target. Currently, some capabilities like tagged queue
3081  *		are supported per target based by HBA. So all luns in a
3082  *		target have the same capabilities. Based on this assumption,
3083  *		sd should only set these capabilities once per target. This
3084  *		function is called when sd needs to decide how many luns
3085  *		already attached on a target.
3086  *
3087  *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
3088  *			  controller device.
3089  *              target	- The target ID on the controller's SCSI bus.
3090  *
3091  * Return Code: The number of luns attached on the required target and
3092  *		controller.
3093  *		-1 if target ID is not in parallel SCSI scope or the given
3094  *		dip is not in the chain.
3095  *
3096  *     Context: Kernel thread context
3097  */
3098 
3099 static int
3100 sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
3101 {
3102 	struct sd_scsi_hba_tgt_lun	*cp;
3103 
3104 	if ((target < 0) || (target >= NTARGETS_WIDE)) {
3105 		return (-1);
3106 	}
3107 
3108 	mutex_enter(&sd_scsi_target_lun_mutex);
3109 
3110 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
3111 		if (cp->pdip == dip) {
3112 			break;
3113 		}
3114 	}
3115 
3116 	mutex_exit(&sd_scsi_target_lun_mutex);
3117 
3118 	if (cp == NULL) {
3119 		return (-1);
3120 	}
3121 
3122 	return (cp->nlun[target]);
3123 }
3124 
3125 
3126 /*
3127  *    Function: sd_scsi_update_lun_on_target
3128  *
3129  * Description: This routine is used to update the attached lun chain when a
3130  *		lun is attached or detached on a target.
3131  *
3132  *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
3133  *                        controller device.
3134  *              target  - The target ID on the controller's SCSI bus.
3135  *		flag	- Indicate the lun is attached or detached.
3136  *
3137  *     Context: Kernel thread context
3138  */
3139 
3140 static void
3141 sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
3142 {
3143 	struct sd_scsi_hba_tgt_lun	*cp;
3144 
3145 	mutex_enter(&sd_scsi_target_lun_mutex);
3146 
3147 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
3148 		if (cp->pdip == dip) {
3149 			break;
3150 		}
3151 	}
3152 
3153 	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
3154 		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
3155 		    KM_SLEEP);
3156 		cp->pdip = dip;
3157 		cp->next = sd_scsi_target_lun_head;
3158 		sd_scsi_target_lun_head = cp;
3159 	}
3160 
3161 	mutex_exit(&sd_scsi_target_lun_mutex);
3162 
3163 	if (cp != NULL) {
3164 		if (flag == SD_SCSI_LUN_ATTACH) {
3165 			cp->nlun[target] ++;
3166 		} else {
3167 			cp->nlun[target] --;
3168 		}
3169 	}
3170 }
3171 
3172 
3173 /*
3174  *    Function: sd_spin_up_unit
3175  *
3176  * Description: Issues the following commands to spin-up the device:
3177  *		START STOP UNIT, and INQUIRY.
3178  *
3179  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3180  *                      structure for this target.
3181  *
3182  * Return Code: 0 - success
3183  *		EIO - failure
3184  *		EACCES - reservation conflict
3185  *
3186  *     Context: Kernel thread context
3187  */
3188 
3189 static int
3190 sd_spin_up_unit(sd_ssc_t *ssc)
3191 {
3192 	size_t	resid		= 0;
3193 	int	has_conflict	= FALSE;
3194 	uchar_t *bufaddr;
3195 	int	status;
3196 	struct sd_lun	*un;
3197 
3198 	ASSERT(ssc != NULL);
3199 	un = ssc->ssc_un;
3200 	ASSERT(un != NULL);
3201 
3202 	/*
3203 	 * Send a throwaway START UNIT command.
3204 	 *
3205 	 * If we fail on this, we don't care presently what precisely
3206 	 * is wrong.  EMC's arrays will also fail this with a check
3207 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
3208 	 * we don't want to fail the attach because it may become
3209 	 * "active" later.
3210 	 * We don't know if power condition is supported or not at
3211 	 * this stage, use START STOP bit.
3212 	 */
3213 	status = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
3214 	    SD_TARGET_START, SD_PATH_DIRECT);
3215 
3216 	if (status != 0) {
3217 		if (status == EACCES)
3218 			has_conflict = TRUE;
3219 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3220 	}
3221 
3222 	/*
3223 	 * Send another INQUIRY command to the target. This is necessary for
3224 	 * non-removable media direct access devices because their INQUIRY data
3225 	 * may not be fully qualified until they are spun up (perhaps via the
3226 	 * START command above).  Note: This seems to be needed for some
3227 	 * legacy devices only.) The INQUIRY command should succeed even if a
3228 	 * Reservation Conflict is present.
3229 	 */
3230 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
3231 
3232 	if (sd_send_scsi_INQUIRY(ssc, bufaddr, SUN_INQSIZE, 0, 0, &resid)
3233 	    != 0) {
3234 		kmem_free(bufaddr, SUN_INQSIZE);
3235 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
3236 		return (EIO);
3237 	}
3238 
3239 	/*
3240 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
3241 	 * Note that this routine does not return a failure here even if the
3242 	 * INQUIRY command did not return any data.  This is a legacy behavior.
3243 	 */
3244 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
3245 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
3246 	}
3247 
3248 	kmem_free(bufaddr, SUN_INQSIZE);
3249 
3250 	/* If we hit a reservation conflict above, tell the caller. */
3251 	if (has_conflict == TRUE) {
3252 		return (EACCES);
3253 	}
3254 
3255 	return (0);
3256 }
3257 
3258 #ifdef _LP64
3259 /*
3260  *    Function: sd_enable_descr_sense
3261  *
3262  * Description: This routine attempts to select descriptor sense format
3263  *		using the Control mode page.  Devices that support 64 bit
3264  *		LBAs (for >2TB luns) should also implement descriptor
3265  *		sense data so we will call this function whenever we see
3266  *		a lun larger than 2TB.  If for some reason the device
3267  *		supports 64 bit LBAs but doesn't support descriptor sense
3268  *		presumably the mode select will fail.  Everything will
3269  *		continue to work normally except that we will not get
3270  *		complete sense data for commands that fail with an LBA
3271  *		larger than 32 bits.
3272  *
3273  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3274  *                      structure for this target.
3275  *
3276  *     Context: Kernel thread context only
3277  */
3278 
3279 static void
3280 sd_enable_descr_sense(sd_ssc_t *ssc)
3281 {
3282 	uchar_t			*header;
3283 	struct mode_control_scsi3 *ctrl_bufp;
3284 	size_t			buflen;
3285 	size_t			bd_len;
3286 	int			status;
3287 	struct sd_lun		*un;
3288 
3289 	ASSERT(ssc != NULL);
3290 	un = ssc->ssc_un;
3291 	ASSERT(un != NULL);
3292 
3293 	/*
3294 	 * Read MODE SENSE page 0xA, Control Mode Page
3295 	 */
3296 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3297 	    sizeof (struct mode_control_scsi3);
3298 	header = kmem_zalloc(buflen, KM_SLEEP);
3299 
3300 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
3301 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT);
3302 
3303 	if (status != 0) {
3304 		SD_ERROR(SD_LOG_COMMON, un,
3305 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3306 		goto eds_exit;
3307 	}
3308 
3309 	/*
3310 	 * Determine size of Block Descriptors in order to locate
3311 	 * the mode page data. ATAPI devices return 0, SCSI devices
3312 	 * should return MODE_BLK_DESC_LENGTH.
3313 	 */
3314 	bd_len  = ((struct mode_header *)header)->bdesc_length;
3315 
3316 	/* Clear the mode data length field for MODE SELECT */
3317 	((struct mode_header *)header)->length = 0;
3318 
3319 	ctrl_bufp = (struct mode_control_scsi3 *)
3320 	    (header + MODE_HEADER_LENGTH + bd_len);
3321 
3322 	/*
3323 	 * If the page length is smaller than the expected value,
3324 	 * the target device doesn't support D_SENSE. Bail out here.
3325 	 */
3326 	if (ctrl_bufp->mode_page.length <
3327 	    sizeof (struct mode_control_scsi3) - 2) {
3328 		SD_ERROR(SD_LOG_COMMON, un,
3329 		    "sd_enable_descr_sense: enable D_SENSE failed\n");
3330 		goto eds_exit;
3331 	}
3332 
3333 	/*
3334 	 * Clear PS bit for MODE SELECT
3335 	 */
3336 	ctrl_bufp->mode_page.ps = 0;
3337 
3338 	/*
3339 	 * Set D_SENSE to enable descriptor sense format.
3340 	 */
3341 	ctrl_bufp->d_sense = 1;
3342 
3343 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3344 
3345 	/*
3346 	 * Use MODE SELECT to commit the change to the D_SENSE bit
3347 	 */
3348 	status = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, header,
3349 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT);
3350 
3351 	if (status != 0) {
3352 		SD_INFO(SD_LOG_COMMON, un,
3353 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3354 	} else {
3355 		kmem_free(header, buflen);
3356 		return;
3357 	}
3358 
3359 eds_exit:
3360 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3361 	kmem_free(header, buflen);
3362 }
3363 
3364 /*
3365  *    Function: sd_reenable_dsense_task
3366  *
3367  * Description: Re-enable descriptor sense after device or bus reset
3368  *
3369  *     Context: Executes in a taskq() thread context
3370  */
3371 static void
3372 sd_reenable_dsense_task(void *arg)
3373 {
3374 	struct	sd_lun	*un = arg;
3375 	sd_ssc_t	*ssc;
3376 
3377 	ASSERT(un != NULL);
3378 
3379 	ssc = sd_ssc_init(un);
3380 	sd_enable_descr_sense(ssc);
3381 	sd_ssc_fini(ssc);
3382 }
3383 #endif /* _LP64 */
3384 
3385 /*
3386  *    Function: sd_set_mmc_caps
3387  *
3388  * Description: This routine determines if the device is MMC compliant and if
3389  *		the device supports CDDA via a mode sense of the CDVD
3390  *		capabilities mode page. Also checks if the device is a
3391  *		dvdram writable device.
3392  *
3393  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3394  *                      structure for this target.
3395  *
3396  *     Context: Kernel thread context only
3397  */
3398 
3399 static void
3400 sd_set_mmc_caps(sd_ssc_t *ssc)
3401 {
3402 	struct mode_header_grp2		*sense_mhp;
3403 	uchar_t				*sense_page;
3404 	caddr_t				buf;
3405 	int				bd_len;
3406 	int				status;
3407 	struct uscsi_cmd		com;
3408 	int				rtn;
3409 	uchar_t				*out_data_rw, *out_data_hd;
3410 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3411 	uchar_t				*out_data_gesn;
3412 	int				gesn_len;
3413 	struct sd_lun			*un;
3414 
3415 	ASSERT(ssc != NULL);
3416 	un = ssc->ssc_un;
3417 	ASSERT(un != NULL);
3418 
3419 	/*
3420 	 * The flags which will be set in this function are - mmc compliant,
3421 	 * dvdram writable device, cdda support. Initialize them to FALSE
3422 	 * and if a capability is detected - it will be set to TRUE.
3423 	 */
3424 	un->un_f_mmc_cap = FALSE;
3425 	un->un_f_dvdram_writable_device = FALSE;
3426 	un->un_f_cfg_cdda = FALSE;
3427 
3428 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3429 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3430 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3431 
3432 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3433 
3434 	if (status != 0) {
3435 		/* command failed; just return */
3436 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3437 		return;
3438 	}
3439 	/*
3440 	 * If the mode sense request for the CDROM CAPABILITIES
3441 	 * page (0x2A) succeeds the device is assumed to be MMC.
3442 	 */
3443 	un->un_f_mmc_cap = TRUE;
3444 
3445 	/* See if GET STATUS EVENT NOTIFICATION is supported */
3446 	if (un->un_f_mmc_gesn_polling) {
3447 		gesn_len = SD_GESN_HEADER_LEN + SD_GESN_MEDIA_DATA_LEN;
3448 		out_data_gesn = kmem_zalloc(gesn_len, KM_SLEEP);
3449 
3450 		rtn = sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(ssc,
3451 		    out_data_gesn, gesn_len, 1 << SD_GESN_MEDIA_CLASS);
3452 
3453 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3454 
3455 		if ((rtn != 0) || !sd_gesn_media_data_valid(out_data_gesn)) {
3456 			un->un_f_mmc_gesn_polling = FALSE;
3457 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3458 			    "sd_set_mmc_caps: gesn not supported "
3459 			    "%d %x %x %x %x\n", rtn,
3460 			    out_data_gesn[0], out_data_gesn[1],
3461 			    out_data_gesn[2], out_data_gesn[3]);
3462 		}
3463 
3464 		kmem_free(out_data_gesn, gesn_len);
3465 	}
3466 
3467 	/* Get to the page data */
3468 	sense_mhp = (struct mode_header_grp2 *)buf;
3469 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3470 	    sense_mhp->bdesc_length_lo;
3471 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3472 		/*
3473 		 * We did not get back the expected block descriptor
3474 		 * length so we cannot determine if the device supports
3475 		 * CDDA. However, we still indicate the device is MMC
3476 		 * according to the successful response to the page
3477 		 * 0x2A mode sense request.
3478 		 */
3479 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3480 		    "sd_set_mmc_caps: Mode Sense returned "
3481 		    "invalid block descriptor length\n");
3482 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3483 		return;
3484 	}
3485 
3486 	/* See if read CDDA is supported */
3487 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3488 	    bd_len);
3489 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3490 
3491 	/* See if writing DVD RAM is supported. */
3492 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3493 	if (un->un_f_dvdram_writable_device == TRUE) {
3494 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3495 		return;
3496 	}
3497 
3498 	/*
3499 	 * If the device presents DVD or CD capabilities in the mode
3500 	 * page, we can return here since a RRD will not have
3501 	 * these capabilities.
3502 	 */
3503 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3504 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3505 		return;
3506 	}
3507 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3508 
3509 	/*
3510 	 * If un->un_f_dvdram_writable_device is still FALSE,
3511 	 * check for a Removable Rigid Disk (RRD).  A RRD
3512 	 * device is identified by the features RANDOM_WRITABLE and
3513 	 * HARDWARE_DEFECT_MANAGEMENT.
3514 	 */
3515 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3516 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3517 
3518 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3519 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3520 	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3521 
3522 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3523 
3524 	if (rtn != 0) {
3525 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3526 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3527 		return;
3528 	}
3529 
3530 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3531 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3532 
3533 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3534 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3535 	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3536 
3537 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3538 
3539 	if (rtn == 0) {
3540 		/*
3541 		 * We have good information, check for random writable
3542 		 * and hardware defect features.
3543 		 */
3544 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3545 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3546 			un->un_f_dvdram_writable_device = TRUE;
3547 		}
3548 	}
3549 
3550 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3551 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3552 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3553 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3554 }
3555 
3556 /*
3557  *    Function: sd_check_for_writable_cd
3558  *
3559  * Description: This routine determines if the media in the device is
3560  *		writable or not. It uses the get configuration command (0x46)
3561  *		to determine if the media is writable
3562  *
3563  *   Arguments: un - driver soft state (unit) structure
3564  *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3565  *                           chain and the normal command waitq, or
3566  *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3567  *                           "direct" chain and bypass the normal command
3568  *                           waitq.
3569  *
3570  *     Context: Never called at interrupt context.
3571  */
3572 
3573 static void
3574 sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag)
3575 {
3576 	struct uscsi_cmd		com;
3577 	uchar_t				*out_data;
3578 	uchar_t				*rqbuf;
3579 	int				rtn;
3580 	uchar_t				*out_data_rw, *out_data_hd;
3581 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3582 	struct mode_header_grp2		*sense_mhp;
3583 	uchar_t				*sense_page;
3584 	caddr_t				buf;
3585 	int				bd_len;
3586 	int				status;
3587 	struct sd_lun			*un;
3588 
3589 	ASSERT(ssc != NULL);
3590 	un = ssc->ssc_un;
3591 	ASSERT(un != NULL);
3592 	ASSERT(mutex_owned(SD_MUTEX(un)));
3593 
3594 	/*
3595 	 * Initialize the writable media to false, if configuration info.
3596 	 * tells us otherwise then only we will set it.
3597 	 */
3598 	un->un_f_mmc_writable_media = FALSE;
3599 	mutex_exit(SD_MUTEX(un));
3600 
3601 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3602 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3603 
3604 	rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf, SENSE_LENGTH,
3605 	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3606 
3607 	if (rtn != 0)
3608 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3609 
3610 	mutex_enter(SD_MUTEX(un));
3611 	if (rtn == 0) {
3612 		/*
3613 		 * We have good information, check for writable DVD.
3614 		 */
3615 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3616 			un->un_f_mmc_writable_media = TRUE;
3617 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3618 			kmem_free(rqbuf, SENSE_LENGTH);
3619 			return;
3620 		}
3621 	}
3622 
3623 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3624 	kmem_free(rqbuf, SENSE_LENGTH);
3625 
3626 	/*
3627 	 * Determine if this is a RRD type device.
3628 	 */
3629 	mutex_exit(SD_MUTEX(un));
3630 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3631 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3632 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3633 
3634 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3635 
3636 	mutex_enter(SD_MUTEX(un));
3637 	if (status != 0) {
3638 		/* command failed; just return */
3639 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3640 		return;
3641 	}
3642 
3643 	/* Get to the page data */
3644 	sense_mhp = (struct mode_header_grp2 *)buf;
3645 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3646 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3647 		/*
3648 		 * We did not get back the expected block descriptor length so
3649 		 * we cannot check the mode page.
3650 		 */
3651 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3652 		    "sd_check_for_writable_cd: Mode Sense returned "
3653 		    "invalid block descriptor length\n");
3654 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3655 		return;
3656 	}
3657 
3658 	/*
3659 	 * If the device presents DVD or CD capabilities in the mode
3660 	 * page, we can return here since a RRD device will not have
3661 	 * these capabilities.
3662 	 */
3663 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3664 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3665 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3666 		return;
3667 	}
3668 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3669 
3670 	/*
3671 	 * If un->un_f_mmc_writable_media is still FALSE,
3672 	 * check for RRD type media.  A RRD device is identified
3673 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3674 	 */
3675 	mutex_exit(SD_MUTEX(un));
3676 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3677 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3678 
3679 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3680 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3681 	    RANDOM_WRITABLE, path_flag);
3682 
3683 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3684 	if (rtn != 0) {
3685 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3686 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3687 		mutex_enter(SD_MUTEX(un));
3688 		return;
3689 	}
3690 
3691 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3692 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3693 
3694 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3695 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3696 	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3697 
3698 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3699 	mutex_enter(SD_MUTEX(un));
3700 	if (rtn == 0) {
3701 		/*
3702 		 * We have good information, check for random writable
3703 		 * and hardware defect features as current.
3704 		 */
3705 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3706 		    (out_data_rw[10] & 0x1) &&
3707 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3708 		    (out_data_hd[10] & 0x1)) {
3709 			un->un_f_mmc_writable_media = TRUE;
3710 		}
3711 	}
3712 
3713 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3714 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3715 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3716 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3717 }
3718 
3719 /*
3720  *    Function: sd_read_unit_properties
3721  *
3722  * Description: The following implements a property lookup mechanism.
3723  *		Properties for particular disks (keyed on vendor, model
3724  *		and rev numbers) are sought in the sd.conf file via
3725  *		sd_process_sdconf_file(), and if not found there, are
3726  *		looked for in a list hardcoded in this driver via
3727  *		sd_process_sdconf_table() Once located the properties
3728  *		are used to update the driver unit structure.
3729  *
3730  *   Arguments: un - driver soft state (unit) structure
3731  */
3732 
3733 static void
3734 sd_read_unit_properties(struct sd_lun *un)
3735 {
3736 	/*
3737 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3738 	 * the "sd-config-list" property (from the sd.conf file) or if
3739 	 * there was not a match for the inquiry vid/pid. If this event
3740 	 * occurs the static driver configuration table is searched for
3741 	 * a match.
3742 	 */
3743 	ASSERT(un != NULL);
3744 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3745 		sd_process_sdconf_table(un);
3746 	}
3747 
3748 	/* check for LSI device */
3749 	sd_is_lsi(un);
3750 
3751 
3752 }
3753 
3754 
3755 /*
3756  *    Function: sd_process_sdconf_file
3757  *
3758  * Description: Use ddi_prop_lookup(9F) to obtain the properties from the
3759  *		driver's config file (ie, sd.conf) and update the driver
3760  *		soft state structure accordingly.
3761  *
3762  *   Arguments: un - driver soft state (unit) structure
3763  *
3764  * Return Code: SD_SUCCESS - The properties were successfully set according
3765  *			     to the driver configuration file.
3766  *		SD_FAILURE - The driver config list was not obtained or
3767  *			     there was no vid/pid match. This indicates that
3768  *			     the static config table should be used.
3769  *
3770  * The config file has a property, "sd-config-list". Currently we support
3771  * two kinds of formats. For both formats, the value of this property
3772  * is a list of duplets:
3773  *
3774  *  sd-config-list=
3775  *	<duplet>,
3776  *	[,<duplet>]*;
3777  *
3778  * For the improved format, where
3779  *
3780  *     <duplet>:= "<vid+pid>","<tunable-list>"
3781  *
3782  * and
3783  *
3784  *     <tunable-list>:=   <tunable> [, <tunable> ]*;
3785  *     <tunable> =        <name> : <value>
3786  *
3787  * The <vid+pid> is the string that is returned by the target device on a
3788  * SCSI inquiry command, the <tunable-list> contains one or more tunables
3789  * to apply to all target devices with the specified <vid+pid>.
3790  *
3791  * Each <tunable> is a "<name> : <value>" pair.
3792  *
3793  * For the old format, the structure of each duplet is as follows:
3794  *
3795  *  <duplet>:= "<vid+pid>","<data-property-name_list>"
3796  *
3797  * The first entry of the duplet is the device ID string (the concatenated
3798  * vid & pid; not to be confused with a device_id).  This is defined in
3799  * the same way as in the sd_disk_table.
3800  *
3801  * The second part of the duplet is a string that identifies a
3802  * data-property-name-list. The data-property-name-list is defined as
3803  * follows:
3804  *
3805  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3806  *
3807  * The syntax of <data-property-name> depends on the <version> field.
3808  *
3809  * If version = SD_CONF_VERSION_1 we have the following syntax:
3810  *
3811  *	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3812  *
3813  * where the prop0 value will be used to set prop0 if bit0 set in the
3814  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3815  *
3816  */
3817 
3818 static int
3819 sd_process_sdconf_file(struct sd_lun *un)
3820 {
3821 	char	**config_list = NULL;
3822 	uint_t	nelements;
3823 	char	*vidptr;
3824 	int	vidlen;
3825 	char	*dnlist_ptr;
3826 	char	*dataname_ptr;
3827 	char	*dataname_lasts;
3828 	int	*data_list = NULL;
3829 	uint_t	data_list_len;
3830 	int	rval = SD_FAILURE;
3831 	int	i;
3832 
3833 	ASSERT(un != NULL);
3834 
3835 	/* Obtain the configuration list associated with the .conf file */
3836 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, SD_DEVINFO(un),
3837 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, sd_config_list,
3838 	    &config_list, &nelements) != DDI_PROP_SUCCESS) {
3839 		return (SD_FAILURE);
3840 	}
3841 
3842 	/*
3843 	 * Compare vids in each duplet to the inquiry vid - if a match is
3844 	 * made, get the data value and update the soft state structure
3845 	 * accordingly.
3846 	 *
3847 	 * Each duplet should show as a pair of strings, return SD_FAILURE
3848 	 * otherwise.
3849 	 */
3850 	if (nelements & 1) {
3851 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3852 		    "sd-config-list should show as pairs of strings.\n");
3853 		if (config_list)
3854 			ddi_prop_free(config_list);
3855 		return (SD_FAILURE);
3856 	}
3857 
3858 	for (i = 0; i < nelements; i += 2) {
3859 		/*
3860 		 * Note: The assumption here is that each vid entry is on
3861 		 * a unique line from its associated duplet.
3862 		 */
3863 		vidptr = config_list[i];
3864 		vidlen = (int)strlen(vidptr);
3865 		if (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS) {
3866 			continue;
3867 		}
3868 
3869 		/*
3870 		 * dnlist contains 1 or more blank separated
3871 		 * data-property-name entries
3872 		 */
3873 		dnlist_ptr = config_list[i + 1];
3874 
3875 		if (strchr(dnlist_ptr, ':') != NULL) {
3876 			/*
3877 			 * Decode the improved format sd-config-list.
3878 			 */
3879 			sd_nvpair_str_decode(un, dnlist_ptr);
3880 		} else {
3881 			/*
3882 			 * The old format sd-config-list, loop through all
3883 			 * data-property-name entries in the
3884 			 * data-property-name-list
3885 			 * setting the properties for each.
3886 			 */
3887 			for (dataname_ptr = sd_strtok_r(dnlist_ptr, " \t",
3888 			    &dataname_lasts); dataname_ptr != NULL;
3889 			    dataname_ptr = sd_strtok_r(NULL, " \t",
3890 			    &dataname_lasts)) {
3891 				int version;
3892 
3893 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3894 				    "sd_process_sdconf_file: disk:%s, "
3895 				    "data:%s\n", vidptr, dataname_ptr);
3896 
3897 				/* Get the data list */
3898 				if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY,
3899 				    SD_DEVINFO(un), 0, dataname_ptr, &data_list,
3900 				    &data_list_len) != DDI_PROP_SUCCESS) {
3901 					SD_INFO(SD_LOG_ATTACH_DETACH, un,
3902 					    "sd_process_sdconf_file: data "
3903 					    "property (%s) has no value\n",
3904 					    dataname_ptr);
3905 					continue;
3906 				}
3907 
3908 				version = data_list[0];
3909 
3910 				if (version == SD_CONF_VERSION_1) {
3911 					sd_tunables values;
3912 
3913 					/* Set the properties */
3914 					if (sd_chk_vers1_data(un, data_list[1],
3915 					    &data_list[2], data_list_len,
3916 					    dataname_ptr) == SD_SUCCESS) {
3917 						sd_get_tunables_from_conf(un,
3918 						    data_list[1], &data_list[2],
3919 						    &values);
3920 						sd_set_vers1_properties(un,
3921 						    data_list[1], &values);
3922 						rval = SD_SUCCESS;
3923 					} else {
3924 						rval = SD_FAILURE;
3925 					}
3926 				} else {
3927 					scsi_log(SD_DEVINFO(un), sd_label,
3928 					    CE_WARN, "data property %s version "
3929 					    "0x%x is invalid.",
3930 					    dataname_ptr, version);
3931 					rval = SD_FAILURE;
3932 				}
3933 				if (data_list)
3934 					ddi_prop_free(data_list);
3935 			}
3936 		}
3937 	}
3938 
3939 	/* free up the memory allocated by ddi_prop_lookup_string_array(). */
3940 	if (config_list) {
3941 		ddi_prop_free(config_list);
3942 	}
3943 
3944 	return (rval);
3945 }
3946 
3947 /*
3948  *    Function: sd_nvpair_str_decode()
3949  *
3950  * Description: Parse the improved format sd-config-list to get
3951  *    each entry of tunable, which includes a name-value pair.
3952  *    Then call sd_set_properties() to set the property.
3953  *
3954  *   Arguments: un - driver soft state (unit) structure
3955  *    nvpair_str - the tunable list
3956  */
3957 static void
3958 sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str)
3959 {
3960 	char	*nv, *name, *value, *token;
3961 	char	*nv_lasts, *v_lasts, *x_lasts;
3962 
3963 	for (nv = sd_strtok_r(nvpair_str, ",", &nv_lasts); nv != NULL;
3964 	    nv = sd_strtok_r(NULL, ",", &nv_lasts)) {
3965 		token = sd_strtok_r(nv, ":", &v_lasts);
3966 		name  = sd_strtok_r(token, " \t", &x_lasts);
3967 		token = sd_strtok_r(NULL, ":", &v_lasts);
3968 		value = sd_strtok_r(token, " \t", &x_lasts);
3969 		if (name == NULL || value == NULL) {
3970 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3971 			    "sd_nvpair_str_decode: "
3972 			    "name or value is not valid!\n");
3973 		} else {
3974 			sd_set_properties(un, name, value);
3975 		}
3976 	}
3977 }
3978 
3979 /*
3980  *    Function: sd_strtok_r()
3981  *
3982  * Description: This function uses strpbrk and strspn to break
3983  *    string into tokens on sequentially subsequent calls. Return
3984  *    NULL when no non-separator characters remain. The first
3985  *    argument is NULL for subsequent calls.
3986  */
3987 static char *
3988 sd_strtok_r(char *string, const char *sepset, char **lasts)
3989 {
3990 	char	*q, *r;
3991 
3992 	/* First or subsequent call */
3993 	if (string == NULL)
3994 		string = *lasts;
3995 
3996 	if (string == NULL)
3997 		return (NULL);
3998 
3999 	/* Skip leading separators */
4000 	q = string + strspn(string, sepset);
4001 
4002 	if (*q == '\0')
4003 		return (NULL);
4004 
4005 	if ((r = strpbrk(q, sepset)) == NULL) {
4006 		*lasts = NULL;
4007 	} else {
4008 		*r = '\0';
4009 		*lasts = r + 1;
4010 	}
4011 	return (q);
4012 }
4013 
4014 /*
4015  *    Function: sd_set_properties()
4016  *
4017  * Description: Set device properties based on the improved
4018  *    format sd-config-list.
4019  *
4020  *   Arguments: un - driver soft state (unit) structure
4021  *    name  - supported tunable name
4022  *    value - tunable value
4023  */
4024 static void
4025 sd_set_properties(struct sd_lun *un, char *name, char *value)
4026 {
4027 	char	*endptr = NULL;
4028 	long	val = 0;
4029 
4030 	if (strcasecmp(name, "cache-nonvolatile") == 0) {
4031 		if (strcasecmp(value, "true") == 0) {
4032 			un->un_f_suppress_cache_flush = TRUE;
4033 		} else if (strcasecmp(value, "false") == 0) {
4034 			un->un_f_suppress_cache_flush = FALSE;
4035 		} else {
4036 			goto value_invalid;
4037 		}
4038 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4039 		    "suppress_cache_flush flag set to %d\n",
4040 		    un->un_f_suppress_cache_flush);
4041 		return;
4042 	}
4043 
4044 	if (strcasecmp(name, "controller-type") == 0) {
4045 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4046 			un->un_ctype = val;
4047 		} else {
4048 			goto value_invalid;
4049 		}
4050 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4051 		    "ctype set to %d\n", un->un_ctype);
4052 		return;
4053 	}
4054 
4055 	if (strcasecmp(name, "delay-busy") == 0) {
4056 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4057 			un->un_busy_timeout = drv_usectohz(val / 1000);
4058 		} else {
4059 			goto value_invalid;
4060 		}
4061 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4062 		    "busy_timeout set to %d\n", un->un_busy_timeout);
4063 		return;
4064 	}
4065 
4066 	if (strcasecmp(name, "disksort") == 0) {
4067 		if (strcasecmp(value, "true") == 0) {
4068 			un->un_f_disksort_disabled = FALSE;
4069 		} else if (strcasecmp(value, "false") == 0) {
4070 			un->un_f_disksort_disabled = TRUE;
4071 		} else {
4072 			goto value_invalid;
4073 		}
4074 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4075 		    "disksort disabled flag set to %d\n",
4076 		    un->un_f_disksort_disabled);
4077 		return;
4078 	}
4079 
4080 	if (strcasecmp(name, "power-condition") == 0) {
4081 		if (strcasecmp(value, "true") == 0) {
4082 			un->un_f_power_condition_disabled = FALSE;
4083 		} else if (strcasecmp(value, "false") == 0) {
4084 			un->un_f_power_condition_disabled = TRUE;
4085 		} else {
4086 			goto value_invalid;
4087 		}
4088 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4089 		    "power condition disabled flag set to %d\n",
4090 		    un->un_f_power_condition_disabled);
4091 		return;
4092 	}
4093 
4094 	if (strcasecmp(name, "timeout-releasereservation") == 0) {
4095 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4096 			un->un_reserve_release_time = val;
4097 		} else {
4098 			goto value_invalid;
4099 		}
4100 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4101 		    "reservation release timeout set to %d\n",
4102 		    un->un_reserve_release_time);
4103 		return;
4104 	}
4105 
4106 	if (strcasecmp(name, "reset-lun") == 0) {
4107 		if (strcasecmp(value, "true") == 0) {
4108 			un->un_f_lun_reset_enabled = TRUE;
4109 		} else if (strcasecmp(value, "false") == 0) {
4110 			un->un_f_lun_reset_enabled = FALSE;
4111 		} else {
4112 			goto value_invalid;
4113 		}
4114 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4115 		    "lun reset enabled flag set to %d\n",
4116 		    un->un_f_lun_reset_enabled);
4117 		return;
4118 	}
4119 
4120 	if (strcasecmp(name, "retries-busy") == 0) {
4121 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4122 			un->un_busy_retry_count = val;
4123 		} else {
4124 			goto value_invalid;
4125 		}
4126 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4127 		    "busy retry count set to %d\n", un->un_busy_retry_count);
4128 		return;
4129 	}
4130 
4131 	if (strcasecmp(name, "retries-timeout") == 0) {
4132 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4133 			un->un_retry_count = val;
4134 		} else {
4135 			goto value_invalid;
4136 		}
4137 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4138 		    "timeout retry count set to %d\n", un->un_retry_count);
4139 		return;
4140 	}
4141 
4142 	if (strcasecmp(name, "retries-notready") == 0) {
4143 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4144 			un->un_notready_retry_count = val;
4145 		} else {
4146 			goto value_invalid;
4147 		}
4148 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4149 		    "notready retry count set to %d\n",
4150 		    un->un_notready_retry_count);
4151 		return;
4152 	}
4153 
4154 	if (strcasecmp(name, "retries-reset") == 0) {
4155 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4156 			un->un_reset_retry_count = val;
4157 		} else {
4158 			goto value_invalid;
4159 		}
4160 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4161 		    "reset retry count set to %d\n",
4162 		    un->un_reset_retry_count);
4163 		return;
4164 	}
4165 
4166 	if (strcasecmp(name, "throttle-max") == 0) {
4167 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4168 			un->un_saved_throttle = un->un_throttle = val;
4169 		} else {
4170 			goto value_invalid;
4171 		}
4172 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4173 		    "throttle set to %d\n", un->un_throttle);
4174 	}
4175 
4176 	if (strcasecmp(name, "throttle-min") == 0) {
4177 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4178 			un->un_min_throttle = val;
4179 		} else {
4180 			goto value_invalid;
4181 		}
4182 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4183 		    "min throttle set to %d\n", un->un_min_throttle);
4184 	}
4185 
4186 	if (strcasecmp(name, "rmw-type") == 0) {
4187 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4188 			un->un_f_rmw_type = val;
4189 		} else {
4190 			goto value_invalid;
4191 		}
4192 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4193 		    "RMW type set to %d\n", un->un_f_rmw_type);
4194 	}
4195 
4196 	if (strcasecmp(name, "physical-block-size") == 0) {
4197 		if (ddi_strtol(value, &endptr, 0, &val) == 0 &&
4198 		    ISP2(val) && val >= un->un_tgt_blocksize &&
4199 		    val >= un->un_sys_blocksize) {
4200 			un->un_phy_blocksize = val;
4201 		} else {
4202 			goto value_invalid;
4203 		}
4204 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4205 		    "physical block size set to %d\n", un->un_phy_blocksize);
4206 	}
4207 
4208 	if (strcasecmp(name, "retries-victim") == 0) {
4209 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4210 			un->un_victim_retry_count = val;
4211 		} else {
4212 			goto value_invalid;
4213 		}
4214 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4215 		    "victim retry count set to %d\n",
4216 		    un->un_victim_retry_count);
4217 		return;
4218 	}
4219 
4220 	/*
4221 	 * Validate the throttle values.
4222 	 * If any of the numbers are invalid, set everything to defaults.
4223 	 */
4224 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4225 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4226 	    (un->un_min_throttle > un->un_throttle)) {
4227 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4228 		un->un_min_throttle = sd_min_throttle;
4229 	}
4230 
4231 	if (strcasecmp(name, "mmc-gesn-polling") == 0) {
4232 		if (strcasecmp(value, "true") == 0) {
4233 			un->un_f_mmc_gesn_polling = TRUE;
4234 		} else if (strcasecmp(value, "false") == 0) {
4235 			un->un_f_mmc_gesn_polling = FALSE;
4236 		} else {
4237 			goto value_invalid;
4238 		}
4239 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4240 		    "mmc-gesn-polling set to %d\n",
4241 		    un->un_f_mmc_gesn_polling);
4242 	}
4243 
4244 	return;
4245 
4246 value_invalid:
4247 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4248 	    "value of prop %s is invalid\n", name);
4249 }
4250 
4251 /*
4252  *    Function: sd_get_tunables_from_conf()
4253  *
4254  *
4255  *    This function reads the data list from the sd.conf file and pulls
4256  *    the values that can have numeric values as arguments and places
4257  *    the values in the appropriate sd_tunables member.
4258  *    Since the order of the data list members varies across platforms
4259  *    This function reads them from the data list in a platform specific
4260  *    order and places them into the correct sd_tunable member that is
4261  *    consistent across all platforms.
4262  */
4263 static void
4264 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
4265     sd_tunables *values)
4266 {
4267 	int i;
4268 	int mask;
4269 
4270 	bzero(values, sizeof (sd_tunables));
4271 
4272 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4273 
4274 		mask = 1 << i;
4275 		if (mask > flags) {
4276 			break;
4277 		}
4278 
4279 		switch (mask & flags) {
4280 		case 0:	/* This mask bit not set in flags */
4281 			continue;
4282 		case SD_CONF_BSET_THROTTLE:
4283 			values->sdt_throttle = data_list[i];
4284 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4285 			    "sd_get_tunables_from_conf: throttle = %d\n",
4286 			    values->sdt_throttle);
4287 			break;
4288 		case SD_CONF_BSET_CTYPE:
4289 			values->sdt_ctype = data_list[i];
4290 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4291 			    "sd_get_tunables_from_conf: ctype = %d\n",
4292 			    values->sdt_ctype);
4293 			break;
4294 		case SD_CONF_BSET_NRR_COUNT:
4295 			values->sdt_not_rdy_retries = data_list[i];
4296 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4297 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
4298 			    values->sdt_not_rdy_retries);
4299 			break;
4300 		case SD_CONF_BSET_BSY_RETRY_COUNT:
4301 			values->sdt_busy_retries = data_list[i];
4302 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4303 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
4304 			    values->sdt_busy_retries);
4305 			break;
4306 		case SD_CONF_BSET_RST_RETRIES:
4307 			values->sdt_reset_retries = data_list[i];
4308 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4309 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
4310 			    values->sdt_reset_retries);
4311 			break;
4312 		case SD_CONF_BSET_RSV_REL_TIME:
4313 			values->sdt_reserv_rel_time = data_list[i];
4314 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4315 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
4316 			    values->sdt_reserv_rel_time);
4317 			break;
4318 		case SD_CONF_BSET_MIN_THROTTLE:
4319 			values->sdt_min_throttle = data_list[i];
4320 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4321 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
4322 			    values->sdt_min_throttle);
4323 			break;
4324 		case SD_CONF_BSET_DISKSORT_DISABLED:
4325 			values->sdt_disk_sort_dis = data_list[i];
4326 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4327 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
4328 			    values->sdt_disk_sort_dis);
4329 			break;
4330 		case SD_CONF_BSET_LUN_RESET_ENABLED:
4331 			values->sdt_lun_reset_enable = data_list[i];
4332 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4333 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
4334 			    "\n", values->sdt_lun_reset_enable);
4335 			break;
4336 		case SD_CONF_BSET_CACHE_IS_NV:
4337 			values->sdt_suppress_cache_flush = data_list[i];
4338 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4339 			    "sd_get_tunables_from_conf: \
4340 			    suppress_cache_flush = %d"
4341 			    "\n", values->sdt_suppress_cache_flush);
4342 			break;
4343 		case SD_CONF_BSET_PC_DISABLED:
4344 			values->sdt_disk_sort_dis = data_list[i];
4345 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4346 			    "sd_get_tunables_from_conf: power_condition_dis = "
4347 			    "%d\n", values->sdt_power_condition_dis);
4348 			break;
4349 		}
4350 	}
4351 }
4352 
4353 /*
4354  *    Function: sd_process_sdconf_table
4355  *
4356  * Description: Search the static configuration table for a match on the
4357  *		inquiry vid/pid and update the driver soft state structure
4358  *		according to the table property values for the device.
4359  *
4360  *		The form of a configuration table entry is:
4361  *		  <vid+pid>,<flags>,<property-data>
4362  *		  "SEAGATE ST42400N",1,0x40000,
4363  *		  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1;
4364  *
4365  *   Arguments: un - driver soft state (unit) structure
4366  */
4367 
4368 static void
4369 sd_process_sdconf_table(struct sd_lun *un)
4370 {
4371 	char	*id = NULL;
4372 	int	table_index;
4373 	int	idlen;
4374 
4375 	ASSERT(un != NULL);
4376 	for (table_index = 0; table_index < sd_disk_table_size;
4377 	    table_index++) {
4378 		id = sd_disk_table[table_index].device_id;
4379 		idlen = strlen(id);
4380 
4381 		/*
4382 		 * The static configuration table currently does not
4383 		 * implement version 10 properties. Additionally,
4384 		 * multiple data-property-name entries are not
4385 		 * implemented in the static configuration table.
4386 		 */
4387 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4388 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4389 			    "sd_process_sdconf_table: disk %s\n", id);
4390 			sd_set_vers1_properties(un,
4391 			    sd_disk_table[table_index].flags,
4392 			    sd_disk_table[table_index].properties);
4393 			break;
4394 		}
4395 	}
4396 }
4397 
4398 
4399 /*
4400  *    Function: sd_sdconf_id_match
4401  *
4402  * Description: This local function implements a case sensitive vid/pid
4403  *		comparison as well as the boundary cases of wild card and
4404  *		multiple blanks.
4405  *
4406  *		Note: An implicit assumption made here is that the scsi
4407  *		inquiry structure will always keep the vid, pid and
4408  *		revision strings in consecutive sequence, so they can be
4409  *		read as a single string. If this assumption is not the
4410  *		case, a separate string, to be used for the check, needs
4411  *		to be built with these strings concatenated.
4412  *
4413  *   Arguments: un - driver soft state (unit) structure
4414  *		id - table or config file vid/pid
4415  *		idlen  - length of the vid/pid (bytes)
4416  *
4417  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
4418  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
4419  */
4420 
4421 static int
4422 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
4423 {
4424 	struct scsi_inquiry	*sd_inq;
4425 	int			rval = SD_SUCCESS;
4426 
4427 	ASSERT(un != NULL);
4428 	sd_inq = un->un_sd->sd_inq;
4429 	ASSERT(id != NULL);
4430 
4431 	/*
4432 	 * We use the inq_vid as a pointer to a buffer containing the
4433 	 * vid and pid and use the entire vid/pid length of the table
4434 	 * entry for the comparison. This works because the inq_pid
4435 	 * data member follows inq_vid in the scsi_inquiry structure.
4436 	 */
4437 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
4438 		/*
4439 		 * The user id string is compared to the inquiry vid/pid
4440 		 * using a case insensitive comparison and ignoring
4441 		 * multiple spaces.
4442 		 */
4443 		rval = sd_blank_cmp(un, id, idlen);
4444 		if (rval != SD_SUCCESS) {
4445 			/*
4446 			 * User id strings that start and end with a "*"
4447 			 * are a special case. These do not have a
4448 			 * specific vendor, and the product string can
4449 			 * appear anywhere in the 16 byte PID portion of
4450 			 * the inquiry data. This is a simple strstr()
4451 			 * type search for the user id in the inquiry data.
4452 			 */
4453 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
4454 				char	*pidptr = &id[1];
4455 				int	i;
4456 				int	j;
4457 				int	pidstrlen = idlen - 2;
4458 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
4459 				    pidstrlen;
4460 
4461 				if (j < 0) {
4462 					return (SD_FAILURE);
4463 				}
4464 				for (i = 0; i < j; i++) {
4465 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
4466 					    pidptr, pidstrlen) == 0) {
4467 						rval = SD_SUCCESS;
4468 						break;
4469 					}
4470 				}
4471 			}
4472 		}
4473 	}
4474 	return (rval);
4475 }
4476 
4477 
4478 /*
4479  *    Function: sd_blank_cmp
4480  *
4481  * Description: If the id string starts and ends with a space, treat
4482  *		multiple consecutive spaces as equivalent to a single
4483  *		space. For example, this causes a sd_disk_table entry
4484  *		of " NEC CDROM " to match a device's id string of
4485  *		"NEC       CDROM".
4486  *
4487  *		Note: The success exit condition for this routine is if
4488  *		the pointer to the table entry is '\0' and the cnt of
4489  *		the inquiry length is zero. This will happen if the inquiry
4490  *		string returned by the device is padded with spaces to be
4491  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
4492  *		SCSI spec states that the inquiry string is to be padded with
4493  *		spaces.
4494  *
4495  *   Arguments: un - driver soft state (unit) structure
4496  *		id - table or config file vid/pid
4497  *		idlen  - length of the vid/pid (bytes)
4498  *
4499  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
4500  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
4501  */
4502 
4503 static int
4504 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
4505 {
4506 	char		*p1;
4507 	char		*p2;
4508 	int		cnt;
4509 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
4510 	    sizeof (SD_INQUIRY(un)->inq_pid);
4511 
4512 	ASSERT(un != NULL);
4513 	p2 = un->un_sd->sd_inq->inq_vid;
4514 	ASSERT(id != NULL);
4515 	p1 = id;
4516 
4517 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
4518 		/*
4519 		 * Note: string p1 is terminated by a NUL but string p2
4520 		 * isn't.  The end of p2 is determined by cnt.
4521 		 */
4522 		for (;;) {
4523 			/* skip over any extra blanks in both strings */
4524 			while ((*p1 != '\0') && (*p1 == ' ')) {
4525 				p1++;
4526 			}
4527 			while ((cnt != 0) && (*p2 == ' ')) {
4528 				p2++;
4529 				cnt--;
4530 			}
4531 
4532 			/* compare the two strings */
4533 			if ((cnt == 0) ||
4534 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
4535 				break;
4536 			}
4537 			while ((cnt > 0) &&
4538 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
4539 				p1++;
4540 				p2++;
4541 				cnt--;
4542 			}
4543 		}
4544 	}
4545 
4546 	/* return SD_SUCCESS if both strings match */
4547 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
4548 }
4549 
4550 
4551 /*
4552  *    Function: sd_chk_vers1_data
4553  *
4554  * Description: Verify the version 1 device properties provided by the
4555  *		user via the configuration file
4556  *
4557  *   Arguments: un	     - driver soft state (unit) structure
4558  *		flags	     - integer mask indicating properties to be set
4559  *		prop_list    - integer list of property values
4560  *		list_len     - number of the elements
4561  *
4562  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
4563  *		SD_FAILURE - Indicates the user provided data is invalid
4564  */
4565 
4566 static int
4567 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
4568     int list_len, char *dataname_ptr)
4569 {
4570 	int i;
4571 	int mask = 1;
4572 	int index = 0;
4573 
4574 	ASSERT(un != NULL);
4575 
4576 	/* Check for a NULL property name and list */
4577 	if (dataname_ptr == NULL) {
4578 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4579 		    "sd_chk_vers1_data: NULL data property name.");
4580 		return (SD_FAILURE);
4581 	}
4582 	if (prop_list == NULL) {
4583 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4584 		    "sd_chk_vers1_data: %s NULL data property list.",
4585 		    dataname_ptr);
4586 		return (SD_FAILURE);
4587 	}
4588 
4589 	/* Display a warning if undefined bits are set in the flags */
4590 	if (flags & ~SD_CONF_BIT_MASK) {
4591 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4592 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
4593 		    "Properties not set.",
4594 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
4595 		return (SD_FAILURE);
4596 	}
4597 
4598 	/*
4599 	 * Verify the length of the list by identifying the highest bit set
4600 	 * in the flags and validating that the property list has a length
4601 	 * up to the index of this bit.
4602 	 */
4603 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4604 		if (flags & mask) {
4605 			index++;
4606 		}
4607 		mask = 1 << i;
4608 	}
4609 	if (list_len < (index + 2)) {
4610 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4611 		    "sd_chk_vers1_data: "
4612 		    "Data property list %s size is incorrect. "
4613 		    "Properties not set.", dataname_ptr);
4614 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
4615 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
4616 		return (SD_FAILURE);
4617 	}
4618 	return (SD_SUCCESS);
4619 }
4620 
4621 
4622 /*
4623  *    Function: sd_set_vers1_properties
4624  *
4625  * Description: Set version 1 device properties based on a property list
4626  *		retrieved from the driver configuration file or static
4627  *		configuration table. Version 1 properties have the format:
4628  *
4629  *	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
4630  *
4631  *		where the prop0 value will be used to set prop0 if bit0
4632  *		is set in the flags
4633  *
4634  *   Arguments: un	     - driver soft state (unit) structure
4635  *		flags	     - integer mask indicating properties to be set
4636  *		prop_list    - integer list of property values
4637  */
4638 
4639 static void
4640 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
4641 {
4642 	ASSERT(un != NULL);
4643 
4644 	/*
4645 	 * Set the flag to indicate cache is to be disabled. An attempt
4646 	 * to disable the cache via sd_cache_control() will be made
4647 	 * later during attach once the basic initialization is complete.
4648 	 */
4649 	if (flags & SD_CONF_BSET_NOCACHE) {
4650 		un->un_f_opt_disable_cache = TRUE;
4651 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4652 		    "sd_set_vers1_properties: caching disabled flag set\n");
4653 	}
4654 
4655 	/* CD-specific configuration parameters */
4656 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4657 		un->un_f_cfg_playmsf_bcd = TRUE;
4658 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4659 		    "sd_set_vers1_properties: playmsf_bcd set\n");
4660 	}
4661 	if (flags & SD_CONF_BSET_READSUB_BCD) {
4662 		un->un_f_cfg_readsub_bcd = TRUE;
4663 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4664 		    "sd_set_vers1_properties: readsub_bcd set\n");
4665 	}
4666 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4667 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4668 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4669 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4670 	}
4671 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4672 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4673 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4674 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4675 	}
4676 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4677 		un->un_f_cfg_no_read_header = TRUE;
4678 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4679 		    "sd_set_vers1_properties: no_read_header set\n");
4680 	}
4681 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4682 		un->un_f_cfg_read_cd_xd4 = TRUE;
4683 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4684 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4685 	}
4686 
4687 	/* Support for devices which do not have valid/unique serial numbers */
4688 	if (flags & SD_CONF_BSET_FAB_DEVID) {
4689 		un->un_f_opt_fab_devid = TRUE;
4690 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4691 		    "sd_set_vers1_properties: fab_devid bit set\n");
4692 	}
4693 
4694 	/* Support for user throttle configuration */
4695 	if (flags & SD_CONF_BSET_THROTTLE) {
4696 		ASSERT(prop_list != NULL);
4697 		un->un_saved_throttle = un->un_throttle =
4698 		    prop_list->sdt_throttle;
4699 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4700 		    "sd_set_vers1_properties: throttle set to %d\n",
4701 		    prop_list->sdt_throttle);
4702 	}
4703 
4704 	/* Set the per disk retry count according to the conf file or table. */
4705 	if (flags & SD_CONF_BSET_NRR_COUNT) {
4706 		ASSERT(prop_list != NULL);
4707 		if (prop_list->sdt_not_rdy_retries) {
4708 			un->un_notready_retry_count =
4709 			    prop_list->sdt_not_rdy_retries;
4710 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4711 			    "sd_set_vers1_properties: not ready retry count"
4712 			    " set to %d\n", un->un_notready_retry_count);
4713 		}
4714 	}
4715 
4716 	/* The controller type is reported for generic disk driver ioctls */
4717 	if (flags & SD_CONF_BSET_CTYPE) {
4718 		ASSERT(prop_list != NULL);
4719 		switch (prop_list->sdt_ctype) {
4720 		case CTYPE_CDROM:
4721 			un->un_ctype = prop_list->sdt_ctype;
4722 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4723 			    "sd_set_vers1_properties: ctype set to "
4724 			    "CTYPE_CDROM\n");
4725 			break;
4726 		case CTYPE_CCS:
4727 			un->un_ctype = prop_list->sdt_ctype;
4728 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4729 			    "sd_set_vers1_properties: ctype set to "
4730 			    "CTYPE_CCS\n");
4731 			break;
4732 		case CTYPE_ROD:		/* RW optical */
4733 			un->un_ctype = prop_list->sdt_ctype;
4734 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4735 			    "sd_set_vers1_properties: ctype set to "
4736 			    "CTYPE_ROD\n");
4737 			break;
4738 		default:
4739 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4740 			    "sd_set_vers1_properties: Could not set "
4741 			    "invalid ctype value (%d)",
4742 			    prop_list->sdt_ctype);
4743 		}
4744 	}
4745 
4746 	/* Purple failover timeout */
4747 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4748 		ASSERT(prop_list != NULL);
4749 		un->un_busy_retry_count =
4750 		    prop_list->sdt_busy_retries;
4751 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4752 		    "sd_set_vers1_properties: "
4753 		    "busy retry count set to %d\n",
4754 		    un->un_busy_retry_count);
4755 	}
4756 
4757 	/* Purple reset retry count */
4758 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4759 		ASSERT(prop_list != NULL);
4760 		un->un_reset_retry_count =
4761 		    prop_list->sdt_reset_retries;
4762 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4763 		    "sd_set_vers1_properties: "
4764 		    "reset retry count set to %d\n",
4765 		    un->un_reset_retry_count);
4766 	}
4767 
4768 	/* Purple reservation release timeout */
4769 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4770 		ASSERT(prop_list != NULL);
4771 		un->un_reserve_release_time =
4772 		    prop_list->sdt_reserv_rel_time;
4773 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4774 		    "sd_set_vers1_properties: "
4775 		    "reservation release timeout set to %d\n",
4776 		    un->un_reserve_release_time);
4777 	}
4778 
4779 	/*
4780 	 * Driver flag telling the driver to verify that no commands are pending
4781 	 * for a device before issuing a Test Unit Ready. This is a workaround
4782 	 * for a firmware bug in some Seagate eliteI drives.
4783 	 */
4784 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4785 		un->un_f_cfg_tur_check = TRUE;
4786 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4787 		    "sd_set_vers1_properties: tur queue check set\n");
4788 	}
4789 
4790 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4791 		un->un_min_throttle = prop_list->sdt_min_throttle;
4792 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4793 		    "sd_set_vers1_properties: min throttle set to %d\n",
4794 		    un->un_min_throttle);
4795 	}
4796 
4797 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4798 		un->un_f_disksort_disabled =
4799 		    (prop_list->sdt_disk_sort_dis != 0) ?
4800 		    TRUE : FALSE;
4801 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4802 		    "sd_set_vers1_properties: disksort disabled "
4803 		    "flag set to %d\n",
4804 		    prop_list->sdt_disk_sort_dis);
4805 	}
4806 
4807 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4808 		un->un_f_lun_reset_enabled =
4809 		    (prop_list->sdt_lun_reset_enable != 0) ?
4810 		    TRUE : FALSE;
4811 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4812 		    "sd_set_vers1_properties: lun reset enabled "
4813 		    "flag set to %d\n",
4814 		    prop_list->sdt_lun_reset_enable);
4815 	}
4816 
4817 	if (flags & SD_CONF_BSET_CACHE_IS_NV) {
4818 		un->un_f_suppress_cache_flush =
4819 		    (prop_list->sdt_suppress_cache_flush != 0) ?
4820 		    TRUE : FALSE;
4821 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4822 		    "sd_set_vers1_properties: suppress_cache_flush "
4823 		    "flag set to %d\n",
4824 		    prop_list->sdt_suppress_cache_flush);
4825 	}
4826 
4827 	if (flags & SD_CONF_BSET_PC_DISABLED) {
4828 		un->un_f_power_condition_disabled =
4829 		    (prop_list->sdt_power_condition_dis != 0) ?
4830 		    TRUE : FALSE;
4831 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4832 		    "sd_set_vers1_properties: power_condition_disabled "
4833 		    "flag set to %d\n",
4834 		    prop_list->sdt_power_condition_dis);
4835 	}
4836 
4837 	/*
4838 	 * Validate the throttle values.
4839 	 * If any of the numbers are invalid, set everything to defaults.
4840 	 */
4841 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4842 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4843 	    (un->un_min_throttle > un->un_throttle)) {
4844 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4845 		un->un_min_throttle = sd_min_throttle;
4846 	}
4847 }
4848 
4849 /*
4850  *   Function: sd_is_lsi()
4851  *
4852  *   Description: Check for lsi devices, step through the static device
4853  *	table to match vid/pid.
4854  *
4855  *   Args: un - ptr to sd_lun
4856  *
4857  *   Notes:  When creating new LSI property, need to add the new LSI property
4858  *		to this function.
4859  */
4860 static void
4861 sd_is_lsi(struct sd_lun *un)
4862 {
4863 	char	*id = NULL;
4864 	int	table_index;
4865 	int	idlen;
4866 	void	*prop;
4867 
4868 	ASSERT(un != NULL);
4869 	for (table_index = 0; table_index < sd_disk_table_size;
4870 	    table_index++) {
4871 		id = sd_disk_table[table_index].device_id;
4872 		idlen = strlen(id);
4873 		if (idlen == 0) {
4874 			continue;
4875 		}
4876 
4877 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4878 			prop = sd_disk_table[table_index].properties;
4879 			if (prop == &lsi_properties ||
4880 			    prop == &lsi_oem_properties ||
4881 			    prop == &lsi_properties_scsi ||
4882 			    prop == &symbios_properties) {
4883 				un->un_f_cfg_is_lsi = TRUE;
4884 			}
4885 			break;
4886 		}
4887 	}
4888 }
4889 
4890 /*
4891  *    Function: sd_get_physical_geometry
4892  *
4893  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4894  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4895  *		target, and use this information to initialize the physical
4896  *		geometry cache specified by pgeom_p.
4897  *
4898  *		MODE SENSE is an optional command, so failure in this case
4899  *		does not necessarily denote an error. We want to use the
4900  *		MODE SENSE commands to derive the physical geometry of the
4901  *		device, but if either command fails, the logical geometry is
4902  *		used as the fallback for disk label geometry in cmlb.
4903  *
4904  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4905  *		have already been initialized for the current target and
4906  *		that the current values be passed as args so that we don't
4907  *		end up ever trying to use -1 as a valid value. This could
4908  *		happen if either value is reset while we're not holding
4909  *		the mutex.
4910  *
4911  *   Arguments: un - driver soft state (unit) structure
4912  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4913  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4914  *			to use the USCSI "direct" chain and bypass the normal
4915  *			command waitq.
4916  *
4917  *     Context: Kernel thread only (can sleep).
4918  */
4919 
4920 static int
4921 sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4922     diskaddr_t capacity, int lbasize, int path_flag)
4923 {
4924 	struct	mode_format	*page3p;
4925 	struct	mode_geometry	*page4p;
4926 	struct	mode_header	*headerp;
4927 	int	sector_size;
4928 	int	nsect;
4929 	int	nhead;
4930 	int	ncyl;
4931 	int	intrlv;
4932 	int	spc;
4933 	diskaddr_t	modesense_capacity;
4934 	int	rpm;
4935 	int	bd_len;
4936 	int	mode_header_length;
4937 	uchar_t	*p3bufp;
4938 	uchar_t	*p4bufp;
4939 	int	cdbsize;
4940 	int	ret = EIO;
4941 	sd_ssc_t *ssc;
4942 	int	status;
4943 
4944 	ASSERT(un != NULL);
4945 
4946 	if (lbasize == 0) {
4947 		if (ISCD(un)) {
4948 			lbasize = 2048;
4949 		} else {
4950 			lbasize = un->un_sys_blocksize;
4951 		}
4952 	}
4953 	pgeom_p->g_secsize = (unsigned short)lbasize;
4954 
4955 	/*
4956 	 * If the unit is a cd/dvd drive MODE SENSE page three
4957 	 * and MODE SENSE page four are reserved (see SBC spec
4958 	 * and MMC spec). To prevent soft errors just return
4959 	 * using the default LBA size.
4960 	 *
4961 	 * Since SATA MODE SENSE function (sata_txlt_mode_sense()) does not
4962 	 * implement support for mode pages 3 and 4 return here to prevent
4963 	 * illegal requests on SATA drives.
4964 	 *
4965 	 * These pages are also reserved in SBC-2 and later.  We assume SBC-2
4966 	 * or later for a direct-attached block device if the SCSI version is
4967 	 * at least SPC-3.
4968 	 */
4969 
4970 	if (ISCD(un) ||
4971 	    un->un_interconnect_type == SD_INTERCONNECT_SATA ||
4972 	    (un->un_ctype == CTYPE_CCS && SD_INQUIRY(un)->inq_ansi >= 5))
4973 		return (ret);
4974 
4975 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4976 
4977 	/*
4978 	 * Retrieve MODE SENSE page 3 - Format Device Page
4979 	 */
4980 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4981 	ssc = sd_ssc_init(un);
4982 	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p3bufp,
4983 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag);
4984 	if (status != 0) {
4985 		SD_ERROR(SD_LOG_COMMON, un,
4986 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4987 		goto page3_exit;
4988 	}
4989 
4990 	/*
4991 	 * Determine size of Block Descriptors in order to locate the mode
4992 	 * page data.  ATAPI devices return 0, SCSI devices should return
4993 	 * MODE_BLK_DESC_LENGTH.
4994 	 */
4995 	headerp = (struct mode_header *)p3bufp;
4996 	if (un->un_f_cfg_is_atapi == TRUE) {
4997 		struct mode_header_grp2 *mhp =
4998 		    (struct mode_header_grp2 *)headerp;
4999 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
5000 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5001 	} else {
5002 		mode_header_length = MODE_HEADER_LENGTH;
5003 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5004 	}
5005 
5006 	if (bd_len > MODE_BLK_DESC_LENGTH) {
5007 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5008 		    "sd_get_physical_geometry: received unexpected bd_len "
5009 		    "of %d, page3\n", bd_len);
5010 		status = EIO;
5011 		goto page3_exit;
5012 	}
5013 
5014 	page3p = (struct mode_format *)
5015 	    ((caddr_t)headerp + mode_header_length + bd_len);
5016 
5017 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
5018 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5019 		    "sd_get_physical_geometry: mode sense pg3 code mismatch "
5020 		    "%d\n", page3p->mode_page.code);
5021 		status = EIO;
5022 		goto page3_exit;
5023 	}
5024 
5025 	/*
5026 	 * Use this physical geometry data only if BOTH MODE SENSE commands
5027 	 * complete successfully; otherwise, revert to the logical geometry.
5028 	 * So, we need to save everything in temporary variables.
5029 	 */
5030 	sector_size = BE_16(page3p->data_bytes_sect);
5031 
5032 	/*
5033 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
5034 	 */
5035 	if (sector_size == 0) {
5036 		sector_size = un->un_sys_blocksize;
5037 	} else {
5038 		sector_size &= ~(un->un_sys_blocksize - 1);
5039 	}
5040 
5041 	nsect  = BE_16(page3p->sect_track);
5042 	intrlv = BE_16(page3p->interleave);
5043 
5044 	SD_INFO(SD_LOG_COMMON, un,
5045 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
5046 	SD_INFO(SD_LOG_COMMON, un,
5047 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
5048 	    page3p->mode_page.code, nsect, sector_size);
5049 	SD_INFO(SD_LOG_COMMON, un,
5050 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
5051 	    BE_16(page3p->track_skew),
5052 	    BE_16(page3p->cylinder_skew));
5053 
5054 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
5055 
5056 	/*
5057 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
5058 	 */
5059 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
5060 	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p4bufp,
5061 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag);
5062 	if (status != 0) {
5063 		SD_ERROR(SD_LOG_COMMON, un,
5064 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
5065 		goto page4_exit;
5066 	}
5067 
5068 	/*
5069 	 * Determine size of Block Descriptors in order to locate the mode
5070 	 * page data.  ATAPI devices return 0, SCSI devices should return
5071 	 * MODE_BLK_DESC_LENGTH.
5072 	 */
5073 	headerp = (struct mode_header *)p4bufp;
5074 	if (un->un_f_cfg_is_atapi == TRUE) {
5075 		struct mode_header_grp2 *mhp =
5076 		    (struct mode_header_grp2 *)headerp;
5077 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5078 	} else {
5079 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5080 	}
5081 
5082 	if (bd_len > MODE_BLK_DESC_LENGTH) {
5083 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5084 		    "sd_get_physical_geometry: received unexpected bd_len of "
5085 		    "%d, page4\n", bd_len);
5086 		status = EIO;
5087 		goto page4_exit;
5088 	}
5089 
5090 	page4p = (struct mode_geometry *)
5091 	    ((caddr_t)headerp + mode_header_length + bd_len);
5092 
5093 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
5094 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5095 		    "sd_get_physical_geometry: mode sense pg4 code mismatch "
5096 		    "%d\n", page4p->mode_page.code);
5097 		status = EIO;
5098 		goto page4_exit;
5099 	}
5100 
5101 	/*
5102 	 * Stash the data now, after we know that both commands completed.
5103 	 */
5104 
5105 
5106 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
5107 	spc   = nhead * nsect;
5108 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
5109 	rpm   = BE_16(page4p->rpm);
5110 
5111 	modesense_capacity = spc * ncyl;
5112 
5113 	SD_INFO(SD_LOG_COMMON, un,
5114 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
5115 	SD_INFO(SD_LOG_COMMON, un,
5116 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
5117 	SD_INFO(SD_LOG_COMMON, un,
5118 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
5119 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
5120 	    (void *)pgeom_p, capacity);
5121 
5122 	/*
5123 	 * Compensate if the drive's geometry is not rectangular, i.e.,
5124 	 * the product of C * H * S returned by MODE SENSE >= that returned
5125 	 * by read capacity. This is an idiosyncrasy of the original x86
5126 	 * disk subsystem.
5127 	 */
5128 	if (modesense_capacity >= capacity) {
5129 		SD_INFO(SD_LOG_COMMON, un,
5130 		    "sd_get_physical_geometry: adjusting acyl; "
5131 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
5132 		    (modesense_capacity - capacity + spc - 1) / spc);
5133 		if (sector_size != 0) {
5134 			/* 1243403: NEC D38x7 drives don't support sec size */
5135 			pgeom_p->g_secsize = (unsigned short)sector_size;
5136 		}
5137 		pgeom_p->g_nsect    = (unsigned short)nsect;
5138 		pgeom_p->g_nhead    = (unsigned short)nhead;
5139 		pgeom_p->g_capacity = capacity;
5140 		pgeom_p->g_acyl	    =
5141 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5142 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5143 	}
5144 
5145 	pgeom_p->g_rpm    = (unsigned short)rpm;
5146 	pgeom_p->g_intrlv = (unsigned short)intrlv;
5147 	ret = 0;
5148 
5149 	SD_INFO(SD_LOG_COMMON, un,
5150 	    "sd_get_physical_geometry: mode sense geometry:\n");
5151 	SD_INFO(SD_LOG_COMMON, un,
5152 	    "   nsect: %d; sector size: %d; interlv: %d\n",
5153 	    nsect, sector_size, intrlv);
5154 	SD_INFO(SD_LOG_COMMON, un,
5155 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5156 	    nhead, ncyl, rpm, modesense_capacity);
5157 	SD_INFO(SD_LOG_COMMON, un,
5158 	    "sd_get_physical_geometry: (cached)\n");
5159 	SD_INFO(SD_LOG_COMMON, un,
5160 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5161 	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
5162 	    pgeom_p->g_nhead, pgeom_p->g_nsect);
5163 	SD_INFO(SD_LOG_COMMON, un,
5164 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5165 	    pgeom_p->g_secsize, pgeom_p->g_capacity,
5166 	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
5167 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
5168 
5169 page4_exit:
5170 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5171 
5172 page3_exit:
5173 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5174 
5175 	if (status != 0) {
5176 		if (status == EIO) {
5177 			/*
5178 			 * Some disks do not support mode sense(6), we
5179 			 * should ignore this kind of error(sense key is
5180 			 * 0x5 - illegal request).
5181 			 */
5182 			uint8_t *sensep;
5183 			int senlen;
5184 
5185 			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
5186 			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
5187 			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
5188 
5189 			if (senlen > 0 &&
5190 			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
5191 				sd_ssc_assessment(ssc,
5192 				    SD_FMT_IGNORE_COMPROMISE);
5193 			} else {
5194 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
5195 			}
5196 		} else {
5197 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5198 		}
5199 	}
5200 	sd_ssc_fini(ssc);
5201 	return (ret);
5202 }
5203 
5204 /*
5205  *    Function: sd_get_virtual_geometry
5206  *
5207  * Description: Ask the controller to tell us about the target device.
5208  *
5209  *   Arguments: un - pointer to softstate
5210  *		capacity - disk capacity in #blocks
5211  *		lbasize - disk block size in bytes
5212  *
5213  *     Context: Kernel thread only
5214  */
5215 
5216 static int
5217 sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
5218     diskaddr_t capacity, int lbasize)
5219 {
5220 	uint_t	geombuf;
5221 	int	spc;
5222 
5223 	ASSERT(un != NULL);
5224 
5225 	/* Set sector size, and total number of sectors */
5226 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5227 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5228 
5229 	/* Let the HBA tell us its geometry */
5230 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5231 
5232 	/* A value of -1 indicates an undefined "geometry" property */
5233 	if (geombuf == (-1)) {
5234 		return (EINVAL);
5235 	}
5236 
5237 	/* Initialize the logical geometry cache. */
5238 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5239 	lgeom_p->g_nsect   = geombuf & 0xffff;
5240 	lgeom_p->g_secsize = un->un_sys_blocksize;
5241 
5242 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5243 
5244 	/*
5245 	 * Note: The driver originally converted the capacity value from
5246 	 * target blocks to system blocks. However, the capacity value passed
5247 	 * to this routine is already in terms of system blocks (this scaling
5248 	 * is done when the READ CAPACITY command is issued and processed).
5249 	 * This 'error' may have gone undetected because the usage of g_ncyl
5250 	 * (which is based upon g_capacity) is very limited within the driver
5251 	 */
5252 	lgeom_p->g_capacity = capacity;
5253 
5254 	/*
5255 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5256 	 * hba may return zero values if the device has been removed.
5257 	 */
5258 	if (spc == 0) {
5259 		lgeom_p->g_ncyl = 0;
5260 	} else {
5261 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5262 	}
5263 	lgeom_p->g_acyl = 0;
5264 
5265 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5266 	return (0);
5267 
5268 }
5269 /*
5270  *    Function: sd_update_block_info
5271  *
5272  * Description: Calculate a byte count to sector count bitshift value
5273  *		from sector size.
5274  *
5275  *   Arguments: un: unit struct.
5276  *		lbasize: new target sector size
5277  *		capacity: new target capacity, ie. block count
5278  *
5279  *     Context: Kernel thread context
5280  */
5281 
5282 static void
5283 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5284 {
5285 	if (lbasize != 0) {
5286 		un->un_tgt_blocksize = lbasize;
5287 		un->un_f_tgt_blocksize_is_valid = TRUE;
5288 		if (!un->un_f_has_removable_media) {
5289 			un->un_sys_blocksize = lbasize;
5290 		}
5291 	}
5292 
5293 	if (capacity != 0) {
5294 		un->un_blockcount		= capacity;
5295 		un->un_f_blockcount_is_valid	= TRUE;
5296 
5297 		/*
5298 		 * The capacity has changed so update the errstats.
5299 		 */
5300 		if (un->un_errstats != NULL) {
5301 			struct sd_errstats *stp;
5302 
5303 			capacity *= un->un_sys_blocksize;
5304 			stp = (struct sd_errstats *)un->un_errstats->ks_data;
5305 			if (stp->sd_capacity.value.ui64 < capacity)
5306 				stp->sd_capacity.value.ui64 = capacity;
5307 		}
5308 	}
5309 }
5310 
5311 /*
5312  * Parses the SCSI Block Limits VPD page (0xB0). It's legal to pass NULL for
5313  * vpd_pg, in which case all the block limits will be reset to the defaults.
5314  */
5315 static void
5316 sd_parse_blk_limits_vpd(struct sd_lun *un, uchar_t *vpd_pg)
5317 {
5318 	sd_blk_limits_t *lim = &un->un_blk_lim;
5319 	unsigned pg_len;
5320 
5321 	if (vpd_pg != NULL)
5322 		pg_len = BE_IN16(&vpd_pg[2]);
5323 	else
5324 		pg_len = 0;
5325 
5326 	/* Block Limits VPD can be 16 bytes or 64 bytes long - support both */
5327 	if (pg_len >= 0x10) {
5328 		lim->lim_opt_xfer_len_gran = BE_IN16(&vpd_pg[6]);
5329 		lim->lim_max_xfer_len = BE_IN32(&vpd_pg[8]);
5330 		lim->lim_opt_xfer_len = BE_IN32(&vpd_pg[12]);
5331 
5332 		/* Zero means not reported, so use "unlimited" */
5333 		if (lim->lim_max_xfer_len == 0)
5334 			lim->lim_max_xfer_len = UINT32_MAX;
5335 		if (lim->lim_opt_xfer_len == 0)
5336 			lim->lim_opt_xfer_len = UINT32_MAX;
5337 	} else {
5338 		lim->lim_opt_xfer_len_gran = 0;
5339 		lim->lim_max_xfer_len = UINT32_MAX;
5340 		lim->lim_opt_xfer_len = UINT32_MAX;
5341 	}
5342 	if (pg_len >= 0x3c) {
5343 		lim->lim_max_pfetch_len = BE_IN32(&vpd_pg[16]);
5344 		/*
5345 		 * A zero in either of the following two fields indicates lack
5346 		 * of UNMAP support.
5347 		 */
5348 		lim->lim_max_unmap_lba_cnt = BE_IN32(&vpd_pg[20]);
5349 		lim->lim_max_unmap_descr_cnt = BE_IN32(&vpd_pg[24]);
5350 		lim->lim_opt_unmap_gran = BE_IN32(&vpd_pg[28]);
5351 		if ((vpd_pg[32] >> 7) == 1) {
5352 			lim->lim_unmap_gran_align =
5353 			    ((vpd_pg[32] & 0x7f) << 24) | (vpd_pg[33] << 16) |
5354 			    (vpd_pg[34] << 8) | vpd_pg[35];
5355 		} else {
5356 			lim->lim_unmap_gran_align = 0;
5357 		}
5358 		lim->lim_max_write_same_len = BE_IN64(&vpd_pg[36]);
5359 	} else {
5360 		lim->lim_max_pfetch_len = UINT32_MAX;
5361 		lim->lim_max_unmap_lba_cnt = UINT32_MAX;
5362 		lim->lim_max_unmap_descr_cnt = SD_UNMAP_MAX_DESCR;
5363 		lim->lim_opt_unmap_gran = 0;
5364 		lim->lim_unmap_gran_align = 0;
5365 		lim->lim_max_write_same_len = UINT64_MAX;
5366 	}
5367 }
5368 
5369 /*
5370  * Collects VPD page B0 data if available (block limits). If the data is
5371  * not available or querying the device failed, we revert to the defaults.
5372  */
5373 static void
5374 sd_setup_blk_limits(sd_ssc_t *ssc)
5375 {
5376 	struct sd_lun	*un		= ssc->ssc_un;
5377 	uchar_t		*inqB0		= NULL;
5378 	size_t		inqB0_resid	= 0;
5379 	int		rval;
5380 
5381 	if (un->un_vpd_page_mask & SD_VPD_BLK_LIMITS_PG) {
5382 		inqB0 = kmem_zalloc(MAX_INQUIRY_SIZE, KM_SLEEP);
5383 		rval = sd_send_scsi_INQUIRY(ssc, inqB0, MAX_INQUIRY_SIZE, 0x01,
5384 		    0xB0, &inqB0_resid);
5385 		if (rval != 0) {
5386 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5387 			kmem_free(inqB0, MAX_INQUIRY_SIZE);
5388 			inqB0 = NULL;
5389 		}
5390 	}
5391 	/* passing NULL inqB0 will reset to defaults */
5392 	sd_parse_blk_limits_vpd(ssc->ssc_un, inqB0);
5393 	if (inqB0)
5394 		kmem_free(inqB0, MAX_INQUIRY_SIZE);
5395 }
5396 
5397 /*
5398  *    Function: sd_register_devid
5399  *
5400  * Description: This routine will obtain the device id information from the
5401  *		target, obtain the serial number, and register the device
5402  *		id with the ddi framework.
5403  *
5404  *   Arguments: devi - the system's dev_info_t for the device.
5405  *		un - driver soft state (unit) structure
5406  *		reservation_flag - indicates if a reservation conflict
5407  *		occurred during attach
5408  *
5409  *     Context: Kernel Thread
5410  */
5411 static void
5412 sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi, int reservation_flag)
5413 {
5414 	int		rval		= 0;
5415 	uchar_t		*inq80		= NULL;
5416 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
5417 	size_t		inq80_resid	= 0;
5418 	uchar_t		*inq83		= NULL;
5419 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
5420 	size_t		inq83_resid	= 0;
5421 	int		dlen, len;
5422 	char		*sn;
5423 	struct sd_lun	*un;
5424 
5425 	ASSERT(ssc != NULL);
5426 	un = ssc->ssc_un;
5427 	ASSERT(un != NULL);
5428 	ASSERT(mutex_owned(SD_MUTEX(un)));
5429 	ASSERT((SD_DEVINFO(un)) == devi);
5430 
5431 
5432 	/*
5433 	 * We check the availability of the World Wide Name (0x83) and Unit
5434 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
5435 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
5436 	 * 0x83 is available, that is the best choice.  Our next choice is
5437 	 * 0x80.  If neither are available, we munge the devid from the device
5438 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
5439 	 * to fabricate a devid for non-Sun qualified disks.
5440 	 */
5441 	if (sd_check_vpd_page_support(ssc) == 0) {
5442 		/* collect page 80 data if available */
5443 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
5444 
5445 			mutex_exit(SD_MUTEX(un));
5446 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
5447 
5448 			rval = sd_send_scsi_INQUIRY(ssc, inq80, inq80_len,
5449 			    0x01, 0x80, &inq80_resid);
5450 
5451 			if (rval != 0) {
5452 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5453 				kmem_free(inq80, inq80_len);
5454 				inq80 = NULL;
5455 				inq80_len = 0;
5456 			} else if (ddi_prop_exists(
5457 			    DDI_DEV_T_NONE, SD_DEVINFO(un),
5458 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
5459 			    INQUIRY_SERIAL_NO) == 0) {
5460 				/*
5461 				 * If we don't already have a serial number
5462 				 * property, do quick verify of data returned
5463 				 * and define property.
5464 				 */
5465 				dlen = inq80_len - inq80_resid;
5466 				len = (size_t)inq80[3];
5467 				if ((dlen >= 4) && ((len + 4) <= dlen)) {
5468 					/*
5469 					 * Ensure sn termination, skip leading
5470 					 * blanks, and create property
5471 					 * 'inquiry-serial-no'.
5472 					 */
5473 					sn = (char *)&inq80[4];
5474 					sn[len] = 0;
5475 					while (*sn && (*sn == ' '))
5476 						sn++;
5477 					if (*sn) {
5478 						(void) ddi_prop_update_string(
5479 						    DDI_DEV_T_NONE,
5480 						    SD_DEVINFO(un),
5481 						    INQUIRY_SERIAL_NO, sn);
5482 					}
5483 				}
5484 			}
5485 			mutex_enter(SD_MUTEX(un));
5486 		}
5487 
5488 		/* collect page 83 data if available */
5489 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
5490 			mutex_exit(SD_MUTEX(un));
5491 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
5492 
5493 			rval = sd_send_scsi_INQUIRY(ssc, inq83, inq83_len,
5494 			    0x01, 0x83, &inq83_resid);
5495 
5496 			if (rval != 0) {
5497 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5498 				kmem_free(inq83, inq83_len);
5499 				inq83 = NULL;
5500 				inq83_len = 0;
5501 			}
5502 			mutex_enter(SD_MUTEX(un));
5503 		}
5504 	}
5505 
5506 	/*
5507 	 * If transport has already registered a devid for this target
5508 	 * then that takes precedence over the driver's determination
5509 	 * of the devid.
5510 	 *
5511 	 * NOTE: The reason this check is done here instead of at the beginning
5512 	 * of the function is to allow the code above to create the
5513 	 * 'inquiry-serial-no' property.
5514 	 */
5515 	if (ddi_devid_get(SD_DEVINFO(un), &un->un_devid) == DDI_SUCCESS) {
5516 		ASSERT(un->un_devid);
5517 		un->un_f_devid_transport_defined = TRUE;
5518 		goto cleanup; /* use devid registered by the transport */
5519 	}
5520 
5521 	/*
5522 	 * This is the case of antiquated Sun disk drives that have the
5523 	 * FAB_DEVID property set in the disk_table.  These drives
5524 	 * manage the devid's by storing them in last 2 available sectors
5525 	 * on the drive and have them fabricated by the ddi layer by calling
5526 	 * ddi_devid_init and passing the DEVID_FAB flag.
5527 	 */
5528 	if (un->un_f_opt_fab_devid == TRUE) {
5529 		/*
5530 		 * Depending on EINVAL isn't reliable, since a reserved disk
5531 		 * may result in invalid geometry, so check to make sure a
5532 		 * reservation conflict did not occur during attach.
5533 		 */
5534 		if ((sd_get_devid(ssc) == EINVAL) &&
5535 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
5536 			/*
5537 			 * The devid is invalid AND there is no reservation
5538 			 * conflict.  Fabricate a new devid.
5539 			 */
5540 			(void) sd_create_devid(ssc);
5541 		}
5542 
5543 		/* Register the devid if it exists */
5544 		if (un->un_devid != NULL) {
5545 			(void) ddi_devid_register(SD_DEVINFO(un),
5546 			    un->un_devid);
5547 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5548 			    "sd_register_devid: Devid Fabricated\n");
5549 		}
5550 		goto cleanup;
5551 	}
5552 
5553 	/* encode best devid possible based on data available */
5554 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
5555 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
5556 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
5557 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
5558 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
5559 
5560 		/* devid successfully encoded, register devid */
5561 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
5562 
5563 	} else {
5564 		/*
5565 		 * Unable to encode a devid based on data available.
5566 		 * This is not a Sun qualified disk.  Older Sun disk
5567 		 * drives that have the SD_FAB_DEVID property
5568 		 * set in the disk_table and non Sun qualified
5569 		 * disks are treated in the same manner.  These
5570 		 * drives manage the devid's by storing them in
5571 		 * last 2 available sectors on the drive and
5572 		 * have them fabricated by the ddi layer by
5573 		 * calling ddi_devid_init and passing the
5574 		 * DEVID_FAB flag.
5575 		 * Create a fabricate devid only if there's no
5576 		 * fabricate devid existed.
5577 		 */
5578 		if (sd_get_devid(ssc) == EINVAL) {
5579 			(void) sd_create_devid(ssc);
5580 		}
5581 		un->un_f_opt_fab_devid = TRUE;
5582 
5583 		/* Register the devid if it exists */
5584 		if (un->un_devid != NULL) {
5585 			(void) ddi_devid_register(SD_DEVINFO(un),
5586 			    un->un_devid);
5587 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5588 			    "sd_register_devid: devid fabricated using "
5589 			    "ddi framework\n");
5590 		}
5591 	}
5592 
5593 cleanup:
5594 	/* clean up resources */
5595 	if (inq80 != NULL) {
5596 		kmem_free(inq80, inq80_len);
5597 	}
5598 	if (inq83 != NULL) {
5599 		kmem_free(inq83, inq83_len);
5600 	}
5601 }
5602 
5603 
5604 
5605 /*
5606  *    Function: sd_get_devid
5607  *
5608  * Description: This routine will return 0 if a valid device id has been
5609  *		obtained from the target and stored in the soft state. If a
5610  *		valid device id has not been previously read and stored, a
5611  *		read attempt will be made.
5612  *
5613  *   Arguments: un - driver soft state (unit) structure
5614  *
5615  * Return Code: 0 if we successfully get the device id
5616  *
5617  *     Context: Kernel Thread
5618  */
5619 
5620 static int
5621 sd_get_devid(sd_ssc_t *ssc)
5622 {
5623 	struct dk_devid		*dkdevid;
5624 	ddi_devid_t		tmpid;
5625 	uint_t			*ip;
5626 	size_t			sz;
5627 	diskaddr_t		blk;
5628 	int			status;
5629 	int			chksum;
5630 	int			i;
5631 	size_t			buffer_size;
5632 	struct sd_lun		*un;
5633 
5634 	ASSERT(ssc != NULL);
5635 	un = ssc->ssc_un;
5636 	ASSERT(un != NULL);
5637 	ASSERT(mutex_owned(SD_MUTEX(un)));
5638 
5639 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
5640 	    un);
5641 
5642 	if (un->un_devid != NULL) {
5643 		return (0);
5644 	}
5645 
5646 	mutex_exit(SD_MUTEX(un));
5647 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5648 	    (void *)SD_PATH_DIRECT) != 0) {
5649 		mutex_enter(SD_MUTEX(un));
5650 		return (EINVAL);
5651 	}
5652 
5653 	/*
5654 	 * Read and verify device id, stored in the reserved cylinders at the
5655 	 * end of the disk. Backup label is on the odd sectors of the last
5656 	 * track of the last cylinder. Device id will be on track of the next
5657 	 * to last cylinder.
5658 	 */
5659 	mutex_enter(SD_MUTEX(un));
5660 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
5661 	mutex_exit(SD_MUTEX(un));
5662 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
5663 	status = sd_send_scsi_READ(ssc, dkdevid, buffer_size, blk,
5664 	    SD_PATH_DIRECT);
5665 
5666 	if (status != 0) {
5667 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5668 		goto error;
5669 	}
5670 
5671 	/* Validate the revision */
5672 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
5673 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
5674 		status = EINVAL;
5675 		goto error;
5676 	}
5677 
5678 	/* Calculate the checksum */
5679 	chksum = 0;
5680 	ip = (uint_t *)dkdevid;
5681 	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
5682 	    i++) {
5683 		chksum ^= ip[i];
5684 	}
5685 
5686 	/* Compare the checksums */
5687 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
5688 		status = EINVAL;
5689 		goto error;
5690 	}
5691 
5692 	/* Validate the device id */
5693 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
5694 		status = EINVAL;
5695 		goto error;
5696 	}
5697 
5698 	/*
5699 	 * Store the device id in the driver soft state
5700 	 */
5701 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
5702 	tmpid = kmem_alloc(sz, KM_SLEEP);
5703 
5704 	mutex_enter(SD_MUTEX(un));
5705 
5706 	un->un_devid = tmpid;
5707 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
5708 
5709 	kmem_free(dkdevid, buffer_size);
5710 
5711 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
5712 
5713 	return (status);
5714 error:
5715 	mutex_enter(SD_MUTEX(un));
5716 	kmem_free(dkdevid, buffer_size);
5717 	return (status);
5718 }
5719 
5720 
5721 /*
5722  *    Function: sd_create_devid
5723  *
5724  * Description: This routine will fabricate the device id and write it
5725  *		to the disk.
5726  *
5727  *   Arguments: un - driver soft state (unit) structure
5728  *
5729  * Return Code: value of the fabricated device id
5730  *
5731  *     Context: Kernel Thread
5732  */
5733 
5734 static ddi_devid_t
5735 sd_create_devid(sd_ssc_t *ssc)
5736 {
5737 	struct sd_lun	*un;
5738 
5739 	ASSERT(ssc != NULL);
5740 	un = ssc->ssc_un;
5741 	ASSERT(un != NULL);
5742 
5743 	/* Fabricate the devid */
5744 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
5745 	    == DDI_FAILURE) {
5746 		return (NULL);
5747 	}
5748 
5749 	/* Write the devid to disk */
5750 	if (sd_write_deviceid(ssc) != 0) {
5751 		ddi_devid_free(un->un_devid);
5752 		un->un_devid = NULL;
5753 	}
5754 
5755 	return (un->un_devid);
5756 }
5757 
5758 
5759 /*
5760  *    Function: sd_write_deviceid
5761  *
5762  * Description: This routine will write the device id to the disk
5763  *		reserved sector.
5764  *
5765  *   Arguments: un - driver soft state (unit) structure
5766  *
5767  * Return Code: EINVAL
5768  *		value returned by sd_send_scsi_cmd
5769  *
5770  *     Context: Kernel Thread
5771  */
5772 
5773 static int
5774 sd_write_deviceid(sd_ssc_t *ssc)
5775 {
5776 	struct dk_devid		*dkdevid;
5777 	uchar_t			*buf;
5778 	diskaddr_t		blk;
5779 	uint_t			*ip, chksum;
5780 	int			status;
5781 	int			i;
5782 	struct sd_lun		*un;
5783 
5784 	ASSERT(ssc != NULL);
5785 	un = ssc->ssc_un;
5786 	ASSERT(un != NULL);
5787 	ASSERT(mutex_owned(SD_MUTEX(un)));
5788 
5789 	mutex_exit(SD_MUTEX(un));
5790 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5791 	    (void *)SD_PATH_DIRECT) != 0) {
5792 		mutex_enter(SD_MUTEX(un));
5793 		return (-1);
5794 	}
5795 
5796 
5797 	/* Allocate the buffer */
5798 	buf = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
5799 	dkdevid = (struct dk_devid *)buf;
5800 
5801 	/* Fill in the revision */
5802 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
5803 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
5804 
5805 	/* Copy in the device id */
5806 	mutex_enter(SD_MUTEX(un));
5807 	bcopy(un->un_devid, &dkdevid->dkd_devid,
5808 	    ddi_devid_sizeof(un->un_devid));
5809 	mutex_exit(SD_MUTEX(un));
5810 
5811 	/* Calculate the checksum */
5812 	chksum = 0;
5813 	ip = (uint_t *)dkdevid;
5814 	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
5815 	    i++) {
5816 		chksum ^= ip[i];
5817 	}
5818 
5819 	/* Fill-in checksum */
5820 	DKD_FORMCHKSUM(chksum, dkdevid);
5821 
5822 	/* Write the reserved sector */
5823 	status = sd_send_scsi_WRITE(ssc, buf, un->un_sys_blocksize, blk,
5824 	    SD_PATH_DIRECT);
5825 	if (status != 0)
5826 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5827 
5828 	kmem_free(buf, un->un_sys_blocksize);
5829 
5830 	mutex_enter(SD_MUTEX(un));
5831 	return (status);
5832 }
5833 
5834 
5835 /*
5836  *    Function: sd_check_vpd_page_support
5837  *
5838  * Description: This routine sends an inquiry command with the EVPD bit set and
5839  *		a page code of 0x00 to the device. It is used to determine which
5840  *		vital product pages are available to find the devid. We are
5841  *		looking for pages 0x83 0x80 or 0xB1.  If we return a negative 1,
5842  *		the device does not support that command.
5843  *
5844  *   Arguments: un  - driver soft state (unit) structure
5845  *
5846  * Return Code: 0 - success
5847  *		1 - check condition
5848  *
5849  *     Context: This routine can sleep.
5850  */
5851 
5852 static int
5853 sd_check_vpd_page_support(sd_ssc_t *ssc)
5854 {
5855 	uchar_t	*page_list	= NULL;
5856 	uchar_t	page_length	= 0xff;	/* Use max possible length */
5857 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
5858 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
5859 	int	rval		= 0;
5860 	int	counter;
5861 	struct sd_lun		*un;
5862 
5863 	ASSERT(ssc != NULL);
5864 	un = ssc->ssc_un;
5865 	ASSERT(un != NULL);
5866 	ASSERT(mutex_owned(SD_MUTEX(un)));
5867 
5868 	mutex_exit(SD_MUTEX(un));
5869 
5870 	/*
5871 	 * We'll set the page length to the maximum to save figuring it out
5872 	 * with an additional call.
5873 	 */
5874 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
5875 
5876 	rval = sd_send_scsi_INQUIRY(ssc, page_list, page_length, evpd,
5877 	    page_code, NULL);
5878 
5879 	if (rval != 0)
5880 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5881 
5882 	mutex_enter(SD_MUTEX(un));
5883 
5884 	/*
5885 	 * Now we must validate that the device accepted the command, as some
5886 	 * drives do not support it.  If the drive does support it, we will
5887 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
5888 	 * not, we return -1.
5889 	 */
5890 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
5891 		/* Loop to find one of the 2 pages we need */
5892 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
5893 
5894 		/*
5895 		 * Pages are returned in ascending order, and 0x83 is what we
5896 		 * are hoping for.
5897 		 */
5898 		while ((page_list[counter] <= 0xB1) &&
5899 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
5900 		    VPD_HEAD_OFFSET))) {
5901 			/*
5902 			 * Add 3 because page_list[3] is the number of
5903 			 * pages minus 3
5904 			 */
5905 
5906 			switch (page_list[counter]) {
5907 			case 0x00:
5908 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
5909 				break;
5910 			case 0x80:
5911 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
5912 				break;
5913 			case 0x81:
5914 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
5915 				break;
5916 			case 0x82:
5917 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
5918 				break;
5919 			case 0x83:
5920 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5921 				break;
5922 			case 0x86:
5923 				un->un_vpd_page_mask |= SD_VPD_EXTENDED_DATA_PG;
5924 				break;
5925 			case 0xB0:
5926 				un->un_vpd_page_mask |= SD_VPD_BLK_LIMITS_PG;
5927 				break;
5928 			case 0xB1:
5929 				un->un_vpd_page_mask |= SD_VPD_DEV_CHARACTER_PG;
5930 				break;
5931 			}
5932 			counter++;
5933 		}
5934 
5935 	} else {
5936 		rval = -1;
5937 
5938 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5939 		    "sd_check_vpd_page_support: This drive does not implement "
5940 		    "VPD pages.\n");
5941 	}
5942 
5943 	kmem_free(page_list, page_length);
5944 
5945 	return (rval);
5946 }
5947 
5948 
5949 /*
5950  *    Function: sd_setup_pm
5951  *
5952  * Description: Initialize Power Management on the device
5953  *
5954  *     Context: Kernel Thread
5955  */
5956 
5957 static void
5958 sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi)
5959 {
5960 	uint_t		log_page_size;
5961 	uchar_t		*log_page_data;
5962 	int		rval = 0;
5963 	struct sd_lun	*un;
5964 
5965 	ASSERT(ssc != NULL);
5966 	un = ssc->ssc_un;
5967 	ASSERT(un != NULL);
5968 
5969 	/*
5970 	 * Since we are called from attach, holding a mutex for
5971 	 * un is unnecessary. Because some of the routines called
5972 	 * from here require SD_MUTEX to not be held, assert this
5973 	 * right up front.
5974 	 */
5975 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5976 	/*
5977 	 * Since the sd device does not have the 'reg' property,
5978 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5979 	 * The following code is to tell cpr that this device
5980 	 * DOES need to be suspended and resumed.
5981 	 */
5982 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5983 	    "pm-hardware-state", "needs-suspend-resume");
5984 
5985 	/*
5986 	 * This complies with the new power management framework
5987 	 * for certain desktop machines. Create the pm_components
5988 	 * property as a string array property.
5989 	 * If un_f_pm_supported is TRUE, that means the disk
5990 	 * attached HBA has set the "pm-capable" property and
5991 	 * the value of this property is bigger than 0.
5992 	 */
5993 	if (un->un_f_pm_supported) {
5994 		/*
5995 		 * not all devices have a motor, try it first.
5996 		 * some devices may return ILLEGAL REQUEST, some
5997 		 * will hang
5998 		 * The following START_STOP_UNIT is used to check if target
5999 		 * device has a motor.
6000 		 */
6001 		un->un_f_start_stop_supported = TRUE;
6002 
6003 		if (un->un_f_power_condition_supported) {
6004 			rval = sd_send_scsi_START_STOP_UNIT(ssc,
6005 			    SD_POWER_CONDITION, SD_TARGET_ACTIVE,
6006 			    SD_PATH_DIRECT);
6007 			if (rval != 0) {
6008 				un->un_f_power_condition_supported = FALSE;
6009 			}
6010 		}
6011 		if (!un->un_f_power_condition_supported) {
6012 			rval = sd_send_scsi_START_STOP_UNIT(ssc,
6013 			    SD_START_STOP, SD_TARGET_START, SD_PATH_DIRECT);
6014 		}
6015 		if (rval != 0) {
6016 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6017 			un->un_f_start_stop_supported = FALSE;
6018 		}
6019 
6020 		/*
6021 		 * create pm properties anyways otherwise the parent can't
6022 		 * go to sleep
6023 		 */
6024 		un->un_f_pm_is_enabled = TRUE;
6025 		(void) sd_create_pm_components(devi, un);
6026 
6027 		/*
6028 		 * If it claims that log sense is supported, check it out.
6029 		 */
6030 		if (un->un_f_log_sense_supported) {
6031 			rval = sd_log_page_supported(ssc,
6032 			    START_STOP_CYCLE_PAGE);
6033 			if (rval == 1) {
6034 				/* Page found, use it. */
6035 				un->un_start_stop_cycle_page =
6036 				    START_STOP_CYCLE_PAGE;
6037 			} else {
6038 				/*
6039 				 * Page not found or log sense is not
6040 				 * supported.
6041 				 * Notice we do not check the old style
6042 				 * START_STOP_CYCLE_VU_PAGE because this
6043 				 * code path does not apply to old disks.
6044 				 */
6045 				un->un_f_log_sense_supported = FALSE;
6046 				un->un_f_pm_log_sense_smart = FALSE;
6047 			}
6048 		}
6049 
6050 		return;
6051 	}
6052 
6053 	/*
6054 	 * For the disk whose attached HBA has not set the "pm-capable"
6055 	 * property, check if it supports the power management.
6056 	 */
6057 	if (!un->un_f_log_sense_supported) {
6058 		un->un_power_level = SD_SPINDLE_ON;
6059 		un->un_f_pm_is_enabled = FALSE;
6060 		return;
6061 	}
6062 
6063 	rval = sd_log_page_supported(ssc, START_STOP_CYCLE_PAGE);
6064 
6065 #ifdef	SDDEBUG
6066 	if (sd_force_pm_supported) {
6067 		/* Force a successful result */
6068 		rval = 1;
6069 	}
6070 #endif
6071 
6072 	/*
6073 	 * If the start-stop cycle counter log page is not supported
6074 	 * or if the pm-capable property is set to be false (0),
6075 	 * then we should not create the pm_components property.
6076 	 */
6077 	if (rval == -1) {
6078 		/*
6079 		 * Error.
6080 		 * Reading log sense failed, most likely this is
6081 		 * an older drive that does not support log sense.
6082 		 * If this fails auto-pm is not supported.
6083 		 */
6084 		un->un_power_level = SD_SPINDLE_ON;
6085 		un->un_f_pm_is_enabled = FALSE;
6086 
6087 	} else if (rval == 0) {
6088 		/*
6089 		 * Page not found.
6090 		 * The start stop cycle counter is implemented as page
6091 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6092 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6093 		 */
6094 		if (sd_log_page_supported(ssc, START_STOP_CYCLE_VU_PAGE) == 1) {
6095 			/*
6096 			 * Page found, use this one.
6097 			 */
6098 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6099 			un->un_f_pm_is_enabled = TRUE;
6100 		} else {
6101 			/*
6102 			 * Error or page not found.
6103 			 * auto-pm is not supported for this device.
6104 			 */
6105 			un->un_power_level = SD_SPINDLE_ON;
6106 			un->un_f_pm_is_enabled = FALSE;
6107 		}
6108 	} else {
6109 		/*
6110 		 * Page found, use it.
6111 		 */
6112 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6113 		un->un_f_pm_is_enabled = TRUE;
6114 	}
6115 
6116 
6117 	if (un->un_f_pm_is_enabled == TRUE) {
6118 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6119 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6120 
6121 		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
6122 		    log_page_size, un->un_start_stop_cycle_page,
6123 		    0x01, 0, SD_PATH_DIRECT);
6124 
6125 		if (rval != 0) {
6126 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6127 		}
6128 
6129 #ifdef	SDDEBUG
6130 		if (sd_force_pm_supported) {
6131 			/* Force a successful result */
6132 			rval = 0;
6133 		}
6134 #endif
6135 
6136 		/*
6137 		 * If the Log sense for Page( Start/stop cycle counter page)
6138 		 * succeeds, then power management is supported and we can
6139 		 * enable auto-pm.
6140 		 */
6141 		if (rval == 0)  {
6142 			(void) sd_create_pm_components(devi, un);
6143 		} else {
6144 			un->un_power_level = SD_SPINDLE_ON;
6145 			un->un_f_pm_is_enabled = FALSE;
6146 		}
6147 
6148 		kmem_free(log_page_data, log_page_size);
6149 	}
6150 }
6151 
6152 
6153 /*
6154  *    Function: sd_create_pm_components
6155  *
6156  * Description: Initialize PM property.
6157  *
6158  *     Context: Kernel thread context
6159  */
6160 
6161 static void
6162 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6163 {
6164 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6165 
6166 	if (un->un_f_power_condition_supported) {
6167 		if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6168 		    "pm-components", sd_pwr_pc.pm_comp, 5)
6169 		    != DDI_PROP_SUCCESS) {
6170 			un->un_power_level = SD_SPINDLE_ACTIVE;
6171 			un->un_f_pm_is_enabled = FALSE;
6172 			return;
6173 		}
6174 	} else {
6175 		if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6176 		    "pm-components", sd_pwr_ss.pm_comp, 3)
6177 		    != DDI_PROP_SUCCESS) {
6178 			un->un_power_level = SD_SPINDLE_ON;
6179 			un->un_f_pm_is_enabled = FALSE;
6180 			return;
6181 		}
6182 	}
6183 	/*
6184 	 * When components are initially created they are idle,
6185 	 * power up any non-removables.
6186 	 * Note: the return value of pm_raise_power can't be used
6187 	 * for determining if PM should be enabled for this device.
6188 	 * Even if you check the return values and remove this
6189 	 * property created above, the PM framework will not honor the
6190 	 * change after the first call to pm_raise_power. Hence,
6191 	 * removal of that property does not help if pm_raise_power
6192 	 * fails. In the case of removable media, the start/stop
6193 	 * will fail if the media is not present.
6194 	 */
6195 	if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6196 	    SD_PM_STATE_ACTIVE(un)) == DDI_SUCCESS)) {
6197 		mutex_enter(SD_MUTEX(un));
6198 		un->un_power_level = SD_PM_STATE_ACTIVE(un);
6199 		mutex_enter(&un->un_pm_mutex);
6200 		/* Set to on and not busy. */
6201 		un->un_pm_count = 0;
6202 	} else {
6203 		mutex_enter(SD_MUTEX(un));
6204 		un->un_power_level = SD_PM_STATE_STOPPED(un);
6205 		mutex_enter(&un->un_pm_mutex);
6206 		/* Set to off. */
6207 		un->un_pm_count = -1;
6208 	}
6209 	mutex_exit(&un->un_pm_mutex);
6210 	mutex_exit(SD_MUTEX(un));
6211 }
6212 
6213 
6214 /*
6215  *    Function: sd_ddi_suspend
6216  *
6217  * Description: Performs system power-down operations. This includes
6218  *		setting the drive state to indicate its suspended so
6219  *		that no new commands will be accepted. Also, wait for
6220  *		all commands that are in transport or queued to a timer
6221  *		for retry to complete. All timeout threads are cancelled.
6222  *
6223  * Return Code: DDI_FAILURE or DDI_SUCCESS
6224  *
6225  *     Context: Kernel thread context
6226  */
6227 
6228 static int
6229 sd_ddi_suspend(dev_info_t *devi)
6230 {
6231 	struct	sd_lun	*un;
6232 	clock_t		wait_cmds_complete;
6233 
6234 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6235 	if (un == NULL) {
6236 		return (DDI_FAILURE);
6237 	}
6238 
6239 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6240 
6241 	mutex_enter(SD_MUTEX(un));
6242 
6243 	/* Return success if the device is already suspended. */
6244 	if (un->un_state == SD_STATE_SUSPENDED) {
6245 		mutex_exit(SD_MUTEX(un));
6246 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6247 		    "device already suspended, exiting\n");
6248 		return (DDI_SUCCESS);
6249 	}
6250 
6251 	/* Return failure if the device is being used by HA */
6252 	if (un->un_resvd_status &
6253 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6254 		mutex_exit(SD_MUTEX(un));
6255 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6256 		    "device in use by HA, exiting\n");
6257 		return (DDI_FAILURE);
6258 	}
6259 
6260 	/*
6261 	 * Return failure if the device is in a resource wait
6262 	 * or power changing state.
6263 	 */
6264 	if ((un->un_state == SD_STATE_RWAIT) ||
6265 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6266 		mutex_exit(SD_MUTEX(un));
6267 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6268 		    "device in resource wait state, exiting\n");
6269 		return (DDI_FAILURE);
6270 	}
6271 
6272 
6273 	un->un_save_state = un->un_last_state;
6274 	New_state(un, SD_STATE_SUSPENDED);
6275 
6276 	/*
6277 	 * Wait for all commands that are in transport or queued to a timer
6278 	 * for retry to complete.
6279 	 *
6280 	 * While waiting, no new commands will be accepted or sent because of
6281 	 * the new state we set above.
6282 	 *
6283 	 * Wait till current operation has completed. If we are in the resource
6284 	 * wait state (with an intr outstanding) then we need to wait till the
6285 	 * intr completes and starts the next cmd. We want to wait for
6286 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6287 	 */
6288 	wait_cmds_complete = ddi_get_lbolt() +
6289 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6290 
6291 	while (un->un_ncmds_in_transport != 0) {
6292 		/*
6293 		 * Fail if commands do not finish in the specified time.
6294 		 */
6295 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6296 		    wait_cmds_complete) == -1) {
6297 			/*
6298 			 * Undo the state changes made above. Everything
6299 			 * must go back to it's original value.
6300 			 */
6301 			Restore_state(un);
6302 			un->un_last_state = un->un_save_state;
6303 			/* Wake up any threads that might be waiting. */
6304 			cv_broadcast(&un->un_suspend_cv);
6305 			mutex_exit(SD_MUTEX(un));
6306 			SD_ERROR(SD_LOG_IO_PM, un,
6307 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6308 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6309 			return (DDI_FAILURE);
6310 		}
6311 	}
6312 
6313 	/*
6314 	 * Cancel SCSI watch thread and timeouts, if any are active
6315 	 */
6316 
6317 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6318 		opaque_t temp_token = un->un_swr_token;
6319 		mutex_exit(SD_MUTEX(un));
6320 		scsi_watch_suspend(temp_token);
6321 		mutex_enter(SD_MUTEX(un));
6322 	}
6323 
6324 	if (un->un_reset_throttle_timeid != NULL) {
6325 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6326 		un->un_reset_throttle_timeid = NULL;
6327 		mutex_exit(SD_MUTEX(un));
6328 		(void) untimeout(temp_id);
6329 		mutex_enter(SD_MUTEX(un));
6330 	}
6331 
6332 	if (un->un_dcvb_timeid != NULL) {
6333 		timeout_id_t temp_id = un->un_dcvb_timeid;
6334 		un->un_dcvb_timeid = NULL;
6335 		mutex_exit(SD_MUTEX(un));
6336 		(void) untimeout(temp_id);
6337 		mutex_enter(SD_MUTEX(un));
6338 	}
6339 
6340 	mutex_enter(&un->un_pm_mutex);
6341 	if (un->un_pm_timeid != NULL) {
6342 		timeout_id_t temp_id = un->un_pm_timeid;
6343 		un->un_pm_timeid = NULL;
6344 		mutex_exit(&un->un_pm_mutex);
6345 		mutex_exit(SD_MUTEX(un));
6346 		(void) untimeout(temp_id);
6347 		mutex_enter(SD_MUTEX(un));
6348 	} else {
6349 		mutex_exit(&un->un_pm_mutex);
6350 	}
6351 
6352 	if (un->un_rmw_msg_timeid != NULL) {
6353 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
6354 		un->un_rmw_msg_timeid = NULL;
6355 		mutex_exit(SD_MUTEX(un));
6356 		(void) untimeout(temp_id);
6357 		mutex_enter(SD_MUTEX(un));
6358 	}
6359 
6360 	if (un->un_retry_timeid != NULL) {
6361 		timeout_id_t temp_id = un->un_retry_timeid;
6362 		un->un_retry_timeid = NULL;
6363 		mutex_exit(SD_MUTEX(un));
6364 		(void) untimeout(temp_id);
6365 		mutex_enter(SD_MUTEX(un));
6366 
6367 		if (un->un_retry_bp != NULL) {
6368 			un->un_retry_bp->av_forw = un->un_waitq_headp;
6369 			un->un_waitq_headp = un->un_retry_bp;
6370 			if (un->un_waitq_tailp == NULL) {
6371 				un->un_waitq_tailp = un->un_retry_bp;
6372 			}
6373 			un->un_retry_bp = NULL;
6374 			un->un_retry_statp = NULL;
6375 		}
6376 	}
6377 
6378 	if (un->un_direct_priority_timeid != NULL) {
6379 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6380 		un->un_direct_priority_timeid = NULL;
6381 		mutex_exit(SD_MUTEX(un));
6382 		(void) untimeout(temp_id);
6383 		mutex_enter(SD_MUTEX(un));
6384 	}
6385 
6386 	if (un->un_f_is_fibre == TRUE) {
6387 		/*
6388 		 * Remove callbacks for insert and remove events
6389 		 */
6390 		if (un->un_insert_event != NULL) {
6391 			mutex_exit(SD_MUTEX(un));
6392 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6393 			mutex_enter(SD_MUTEX(un));
6394 			un->un_insert_event = NULL;
6395 		}
6396 
6397 		if (un->un_remove_event != NULL) {
6398 			mutex_exit(SD_MUTEX(un));
6399 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6400 			mutex_enter(SD_MUTEX(un));
6401 			un->un_remove_event = NULL;
6402 		}
6403 	}
6404 
6405 	mutex_exit(SD_MUTEX(un));
6406 
6407 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6408 
6409 	return (DDI_SUCCESS);
6410 }
6411 
6412 
6413 /*
6414  *    Function: sd_ddi_resume
6415  *
6416  * Description: Performs system power-up operations..
6417  *
6418  * Return Code: DDI_SUCCESS
6419  *		DDI_FAILURE
6420  *
6421  *     Context: Kernel thread context
6422  */
6423 
6424 static int
6425 sd_ddi_resume(dev_info_t *devi)
6426 {
6427 	struct	sd_lun	*un;
6428 
6429 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6430 	if (un == NULL) {
6431 		return (DDI_FAILURE);
6432 	}
6433 
6434 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6435 
6436 	mutex_enter(SD_MUTEX(un));
6437 	Restore_state(un);
6438 
6439 	/*
6440 	 * Restore the state which was saved to give the
6441 	 * the right state in un_last_state
6442 	 */
6443 	un->un_last_state = un->un_save_state;
6444 	/*
6445 	 * Note: throttle comes back at full.
6446 	 * Also note: this MUST be done before calling pm_raise_power
6447 	 * otherwise the system can get hung in biowait. The scenario where
6448 	 * this'll happen is under cpr suspend. Writing of the system
6449 	 * state goes through sddump, which writes 0 to un_throttle. If
6450 	 * writing the system state then fails, example if the partition is
6451 	 * too small, then cpr attempts a resume. If throttle isn't restored
6452 	 * from the saved value until after calling pm_raise_power then
6453 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
6454 	 * in biowait.
6455 	 */
6456 	un->un_throttle = un->un_saved_throttle;
6457 
6458 	/*
6459 	 * The chance of failure is very rare as the only command done in power
6460 	 * entry point is START command when you transition from 0->1 or
6461 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
6462 	 * which suspend was done. Ignore the return value as the resume should
6463 	 * not be failed. In the case of removable media the media need not be
6464 	 * inserted and hence there is a chance that raise power will fail with
6465 	 * media not present.
6466 	 */
6467 	if (un->un_f_attach_spinup) {
6468 		mutex_exit(SD_MUTEX(un));
6469 		(void) pm_raise_power(SD_DEVINFO(un), 0,
6470 		    SD_PM_STATE_ACTIVE(un));
6471 		mutex_enter(SD_MUTEX(un));
6472 	}
6473 
6474 	/*
6475 	 * Don't broadcast to the suspend cv and therefore possibly
6476 	 * start I/O until after power has been restored.
6477 	 */
6478 	cv_broadcast(&un->un_suspend_cv);
6479 	cv_broadcast(&un->un_state_cv);
6480 
6481 	/* restart thread */
6482 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
6483 		scsi_watch_resume(un->un_swr_token);
6484 	}
6485 
6486 #if (defined(__fibre))
6487 	if (un->un_f_is_fibre == TRUE) {
6488 		/*
6489 		 * Add callbacks for insert and remove events
6490 		 */
6491 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
6492 			sd_init_event_callbacks(un);
6493 		}
6494 	}
6495 #endif
6496 
6497 	/*
6498 	 * Transport any pending commands to the target.
6499 	 *
6500 	 * If this is a low-activity device commands in queue will have to wait
6501 	 * until new commands come in, which may take awhile. Also, we
6502 	 * specifically don't check un_ncmds_in_transport because we know that
6503 	 * there really are no commands in progress after the unit was
6504 	 * suspended and we could have reached the throttle level, been
6505 	 * suspended, and have no new commands coming in for awhile. Highly
6506 	 * unlikely, but so is the low-activity disk scenario.
6507 	 */
6508 	ddi_xbuf_dispatch(un->un_xbuf_attr);
6509 
6510 	sd_start_cmds(un, NULL);
6511 	mutex_exit(SD_MUTEX(un));
6512 
6513 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
6514 
6515 	return (DDI_SUCCESS);
6516 }
6517 
6518 
6519 /*
6520  *    Function: sd_pm_state_change
6521  *
6522  * Description: Change the driver power state.
6523  *		Someone else is required to actually change the driver
6524  *		power level.
6525  *
6526  *   Arguments: un - driver soft state (unit) structure
6527  *              level - the power level that is changed to
6528  *              flag - to decide how to change the power state
6529  *
6530  * Return Code: DDI_SUCCESS
6531  *
6532  *     Context: Kernel thread context
6533  */
6534 static int
6535 sd_pm_state_change(struct sd_lun *un, int level, int flag)
6536 {
6537 	ASSERT(un != NULL);
6538 	SD_TRACE(SD_LOG_POWER, un, "sd_pm_state_change: entry\n");
6539 
6540 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6541 	mutex_enter(SD_MUTEX(un));
6542 
6543 	if (flag == SD_PM_STATE_ROLLBACK || SD_PM_IS_IO_CAPABLE(un, level)) {
6544 		un->un_power_level = level;
6545 		ASSERT(!mutex_owned(&un->un_pm_mutex));
6546 		mutex_enter(&un->un_pm_mutex);
6547 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
6548 			un->un_pm_count++;
6549 			ASSERT(un->un_pm_count == 0);
6550 		}
6551 		mutex_exit(&un->un_pm_mutex);
6552 	} else {
6553 		/*
6554 		 * Exit if power management is not enabled for this device,
6555 		 * or if the device is being used by HA.
6556 		 */
6557 		if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6558 		    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6559 			mutex_exit(SD_MUTEX(un));
6560 			SD_TRACE(SD_LOG_POWER, un,
6561 			    "sd_pm_state_change: exiting\n");
6562 			return (DDI_FAILURE);
6563 		}
6564 
6565 		SD_INFO(SD_LOG_POWER, un, "sd_pm_state_change: "
6566 		    "un_ncmds_in_driver=%ld\n", un->un_ncmds_in_driver);
6567 
6568 		/*
6569 		 * See if the device is not busy, ie.:
6570 		 *    - we have no commands in the driver for this device
6571 		 *    - not waiting for resources
6572 		 */
6573 		if ((un->un_ncmds_in_driver == 0) &&
6574 		    (un->un_state != SD_STATE_RWAIT)) {
6575 			/*
6576 			 * The device is not busy, so it is OK to go to low
6577 			 * power state. Indicate low power, but rely on someone
6578 			 * else to actually change it.
6579 			 */
6580 			mutex_enter(&un->un_pm_mutex);
6581 			un->un_pm_count = -1;
6582 			mutex_exit(&un->un_pm_mutex);
6583 			un->un_power_level = level;
6584 		}
6585 	}
6586 
6587 	mutex_exit(SD_MUTEX(un));
6588 
6589 	SD_TRACE(SD_LOG_POWER, un, "sd_pm_state_change: exit\n");
6590 
6591 	return (DDI_SUCCESS);
6592 }
6593 
6594 
6595 /*
6596  *    Function: sd_pm_idletimeout_handler
6597  *
6598  * Description: A timer routine that's active only while a device is busy.
6599  *		The purpose is to extend slightly the pm framework's busy
6600  *		view of the device to prevent busy/idle thrashing for
6601  *		back-to-back commands. Do this by comparing the current time
6602  *		to the time at which the last command completed and when the
6603  *		difference is greater than sd_pm_idletime, call
6604  *		pm_idle_component. In addition to indicating idle to the pm
6605  *		framework, update the chain type to again use the internal pm
6606  *		layers of the driver.
6607  *
6608  *   Arguments: arg - driver soft state (unit) structure
6609  *
6610  *     Context: Executes in a timeout(9F) thread context
6611  */
6612 
6613 static void
6614 sd_pm_idletimeout_handler(void *arg)
6615 {
6616 	const hrtime_t idletime = sd_pm_idletime * NANOSEC;
6617 	struct sd_lun *un = arg;
6618 
6619 	mutex_enter(&sd_detach_mutex);
6620 	if (un->un_detach_count != 0) {
6621 		/* Abort if the instance is detaching */
6622 		mutex_exit(&sd_detach_mutex);
6623 		return;
6624 	}
6625 	mutex_exit(&sd_detach_mutex);
6626 
6627 	/*
6628 	 * Grab both mutexes, in the proper order, since we're accessing
6629 	 * both PM and softstate variables.
6630 	 */
6631 	mutex_enter(SD_MUTEX(un));
6632 	mutex_enter(&un->un_pm_mutex);
6633 	if (((gethrtime() - un->un_pm_idle_time) > idletime) &&
6634 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
6635 		/*
6636 		 * Update the chain types.
6637 		 * This takes affect on the next new command received.
6638 		 */
6639 		if (un->un_f_non_devbsize_supported) {
6640 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6641 		} else {
6642 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6643 		}
6644 		un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD;
6645 
6646 		SD_TRACE(SD_LOG_IO_PM, un,
6647 		    "sd_pm_idletimeout_handler: idling device\n");
6648 		(void) pm_idle_component(SD_DEVINFO(un), 0);
6649 		un->un_pm_idle_timeid = NULL;
6650 	} else {
6651 		un->un_pm_idle_timeid =
6652 		    timeout(sd_pm_idletimeout_handler, un,
6653 		    (drv_usectohz((clock_t)300000))); /* 300 ms. */
6654 	}
6655 	mutex_exit(&un->un_pm_mutex);
6656 	mutex_exit(SD_MUTEX(un));
6657 }
6658 
6659 
6660 /*
6661  *    Function: sd_pm_timeout_handler
6662  *
6663  * Description: Callback to tell framework we are idle.
6664  *
6665  *     Context: timeout(9f) thread context.
6666  */
6667 
6668 static void
6669 sd_pm_timeout_handler(void *arg)
6670 {
6671 	struct sd_lun *un = arg;
6672 
6673 	(void) pm_idle_component(SD_DEVINFO(un), 0);
6674 	mutex_enter(&un->un_pm_mutex);
6675 	un->un_pm_timeid = NULL;
6676 	mutex_exit(&un->un_pm_mutex);
6677 }
6678 
6679 
6680 /*
6681  *    Function: sdpower
6682  *
6683  * Description: PM entry point.
6684  *
6685  * Return Code: DDI_SUCCESS
6686  *		DDI_FAILURE
6687  *
6688  *     Context: Kernel thread context
6689  */
6690 
6691 static int
6692 sdpower(dev_info_t *devi, int component, int level)
6693 {
6694 	struct sd_lun	*un;
6695 	int		instance;
6696 	int		rval = DDI_SUCCESS;
6697 	uint_t		i, log_page_size, maxcycles, ncycles;
6698 	uchar_t		*log_page_data;
6699 	int		log_sense_page;
6700 	int		medium_present;
6701 	time_t		intvlp;
6702 	struct pm_trans_data	sd_pm_tran_data;
6703 	uchar_t		save_state = SD_STATE_NORMAL;
6704 	int		sval;
6705 	uchar_t		state_before_pm;
6706 	int		got_semaphore_here;
6707 	sd_ssc_t	*ssc;
6708 	int	last_power_level = SD_SPINDLE_UNINIT;
6709 
6710 	instance = ddi_get_instance(devi);
6711 
6712 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
6713 	    !SD_PM_IS_LEVEL_VALID(un, level) || component != 0) {
6714 		return (DDI_FAILURE);
6715 	}
6716 
6717 	ssc = sd_ssc_init(un);
6718 
6719 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
6720 
6721 	/*
6722 	 * Must synchronize power down with close.
6723 	 * Attempt to decrement/acquire the open/close semaphore,
6724 	 * but do NOT wait on it. If it's not greater than zero,
6725 	 * ie. it can't be decremented without waiting, then
6726 	 * someone else, either open or close, already has it
6727 	 * and the try returns 0. Use that knowledge here to determine
6728 	 * if it's OK to change the device power level.
6729 	 * Also, only increment it on exit if it was decremented, ie. gotten,
6730 	 * here.
6731 	 */
6732 	got_semaphore_here = sema_tryp(&un->un_semoclose);
6733 
6734 	mutex_enter(SD_MUTEX(un));
6735 
6736 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
6737 	    un->un_ncmds_in_driver);
6738 
6739 	/*
6740 	 * If un_ncmds_in_driver is non-zero it indicates commands are
6741 	 * already being processed in the driver, or if the semaphore was
6742 	 * not gotten here it indicates an open or close is being processed.
6743 	 * At the same time somebody is requesting to go to a lower power
6744 	 * that can't perform I/O, which can't happen, therefore we need to
6745 	 * return failure.
6746 	 */
6747 	if ((!SD_PM_IS_IO_CAPABLE(un, level)) &&
6748 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
6749 		mutex_exit(SD_MUTEX(un));
6750 
6751 		if (got_semaphore_here != 0) {
6752 			sema_v(&un->un_semoclose);
6753 		}
6754 		SD_TRACE(SD_LOG_IO_PM, un,
6755 		    "sdpower: exit, device has queued cmds.\n");
6756 
6757 		goto sdpower_failed;
6758 	}
6759 
6760 	/*
6761 	 * if it is OFFLINE that means the disk is completely dead
6762 	 * in our case we have to put the disk in on or off by sending commands
6763 	 * Of course that will fail anyway so return back here.
6764 	 *
6765 	 * Power changes to a device that's OFFLINE or SUSPENDED
6766 	 * are not allowed.
6767 	 */
6768 	if ((un->un_state == SD_STATE_OFFLINE) ||
6769 	    (un->un_state == SD_STATE_SUSPENDED)) {
6770 		mutex_exit(SD_MUTEX(un));
6771 
6772 		if (got_semaphore_here != 0) {
6773 			sema_v(&un->un_semoclose);
6774 		}
6775 		SD_TRACE(SD_LOG_IO_PM, un,
6776 		    "sdpower: exit, device is off-line.\n");
6777 
6778 		goto sdpower_failed;
6779 	}
6780 
6781 	/*
6782 	 * Change the device's state to indicate it's power level
6783 	 * is being changed. Do this to prevent a power off in the
6784 	 * middle of commands, which is especially bad on devices
6785 	 * that are really powered off instead of just spun down.
6786 	 */
6787 	state_before_pm = un->un_state;
6788 	un->un_state = SD_STATE_PM_CHANGING;
6789 
6790 	mutex_exit(SD_MUTEX(un));
6791 
6792 	/*
6793 	 * If log sense command is not supported, bypass the
6794 	 * following checking, otherwise, check the log sense
6795 	 * information for this device.
6796 	 */
6797 	if (SD_PM_STOP_MOTOR_NEEDED(un, level) &&
6798 	    un->un_f_log_sense_supported) {
6799 		/*
6800 		 * Get the log sense information to understand whether the
6801 		 * the powercycle counts have gone beyond the threshhold.
6802 		 */
6803 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6804 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6805 
6806 		mutex_enter(SD_MUTEX(un));
6807 		log_sense_page = un->un_start_stop_cycle_page;
6808 		mutex_exit(SD_MUTEX(un));
6809 
6810 		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
6811 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
6812 
6813 		if (rval != 0) {
6814 			if (rval == EIO)
6815 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6816 			else
6817 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6818 		}
6819 
6820 #ifdef	SDDEBUG
6821 		if (sd_force_pm_supported) {
6822 			/* Force a successful result */
6823 			rval = 0;
6824 		}
6825 #endif
6826 		if (rval != 0) {
6827 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
6828 			    "Log Sense Failed\n");
6829 
6830 			kmem_free(log_page_data, log_page_size);
6831 			/* Cannot support power management on those drives */
6832 
6833 			if (got_semaphore_here != 0) {
6834 				sema_v(&un->un_semoclose);
6835 			}
6836 			/*
6837 			 * On exit put the state back to it's original value
6838 			 * and broadcast to anyone waiting for the power
6839 			 * change completion.
6840 			 */
6841 			mutex_enter(SD_MUTEX(un));
6842 			un->un_state = state_before_pm;
6843 			cv_broadcast(&un->un_suspend_cv);
6844 			mutex_exit(SD_MUTEX(un));
6845 			SD_TRACE(SD_LOG_IO_PM, un,
6846 			    "sdpower: exit, Log Sense Failed.\n");
6847 
6848 			goto sdpower_failed;
6849 		}
6850 
6851 		/*
6852 		 * From the page data - Convert the essential information to
6853 		 * pm_trans_data
6854 		 */
6855 		maxcycles =
6856 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
6857 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
6858 
6859 		ncycles =
6860 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
6861 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
6862 
6863 		if (un->un_f_pm_log_sense_smart) {
6864 			sd_pm_tran_data.un.smart_count.allowed = maxcycles;
6865 			sd_pm_tran_data.un.smart_count.consumed = ncycles;
6866 			sd_pm_tran_data.un.smart_count.flag = 0;
6867 			sd_pm_tran_data.format = DC_SMART_FORMAT;
6868 		} else {
6869 			sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
6870 			sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
6871 			for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
6872 				sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
6873 				    log_page_data[8+i];
6874 			}
6875 			sd_pm_tran_data.un.scsi_cycles.flag = 0;
6876 			sd_pm_tran_data.format = DC_SCSI_FORMAT;
6877 		}
6878 
6879 		kmem_free(log_page_data, log_page_size);
6880 
6881 		/*
6882 		 * Call pm_trans_check routine to get the Ok from
6883 		 * the global policy
6884 		 */
6885 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
6886 #ifdef	SDDEBUG
6887 		if (sd_force_pm_supported) {
6888 			/* Force a successful result */
6889 			rval = 1;
6890 		}
6891 #endif
6892 		switch (rval) {
6893 		case 0:
6894 			/*
6895 			 * Not Ok to Power cycle or error in parameters passed
6896 			 * Would have given the advised time to consider power
6897 			 * cycle. Based on the new intvlp parameter we are
6898 			 * supposed to pretend we are busy so that pm framework
6899 			 * will never call our power entry point. Because of
6900 			 * that install a timeout handler and wait for the
6901 			 * recommended time to elapse so that power management
6902 			 * can be effective again.
6903 			 *
6904 			 * To effect this behavior, call pm_busy_component to
6905 			 * indicate to the framework this device is busy.
6906 			 * By not adjusting un_pm_count the rest of PM in
6907 			 * the driver will function normally, and independent
6908 			 * of this but because the framework is told the device
6909 			 * is busy it won't attempt powering down until it gets
6910 			 * a matching idle. The timeout handler sends this.
6911 			 * Note: sd_pm_entry can't be called here to do this
6912 			 * because sdpower may have been called as a result
6913 			 * of a call to pm_raise_power from within sd_pm_entry.
6914 			 *
6915 			 * If a timeout handler is already active then
6916 			 * don't install another.
6917 			 */
6918 			mutex_enter(&un->un_pm_mutex);
6919 			if (un->un_pm_timeid == NULL) {
6920 				un->un_pm_timeid =
6921 				    timeout(sd_pm_timeout_handler,
6922 				    un, intvlp * drv_usectohz(1000000));
6923 				mutex_exit(&un->un_pm_mutex);
6924 				(void) pm_busy_component(SD_DEVINFO(un), 0);
6925 			} else {
6926 				mutex_exit(&un->un_pm_mutex);
6927 			}
6928 			if (got_semaphore_here != 0) {
6929 				sema_v(&un->un_semoclose);
6930 			}
6931 			/*
6932 			 * On exit put the state back to it's original value
6933 			 * and broadcast to anyone waiting for the power
6934 			 * change completion.
6935 			 */
6936 			mutex_enter(SD_MUTEX(un));
6937 			un->un_state = state_before_pm;
6938 			cv_broadcast(&un->un_suspend_cv);
6939 			mutex_exit(SD_MUTEX(un));
6940 
6941 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
6942 			    "trans check Failed, not ok to power cycle.\n");
6943 
6944 			goto sdpower_failed;
6945 		case -1:
6946 			if (got_semaphore_here != 0) {
6947 				sema_v(&un->un_semoclose);
6948 			}
6949 			/*
6950 			 * On exit put the state back to it's original value
6951 			 * and broadcast to anyone waiting for the power
6952 			 * change completion.
6953 			 */
6954 			mutex_enter(SD_MUTEX(un));
6955 			un->un_state = state_before_pm;
6956 			cv_broadcast(&un->un_suspend_cv);
6957 			mutex_exit(SD_MUTEX(un));
6958 			SD_TRACE(SD_LOG_IO_PM, un,
6959 			    "sdpower: exit, trans check command Failed.\n");
6960 
6961 			goto sdpower_failed;
6962 		}
6963 	}
6964 
6965 	if (!SD_PM_IS_IO_CAPABLE(un, level)) {
6966 		/*
6967 		 * Save the last state... if the STOP FAILS we need it
6968 		 * for restoring
6969 		 */
6970 		mutex_enter(SD_MUTEX(un));
6971 		save_state = un->un_last_state;
6972 		last_power_level = un->un_power_level;
6973 		/*
6974 		 * There must not be any cmds. getting processed
6975 		 * in the driver when we get here. Power to the
6976 		 * device is potentially going off.
6977 		 */
6978 		ASSERT(un->un_ncmds_in_driver == 0);
6979 		mutex_exit(SD_MUTEX(un));
6980 
6981 		/*
6982 		 * For now PM suspend the device completely before spindle is
6983 		 * turned off
6984 		 */
6985 		if ((rval = sd_pm_state_change(un, level, SD_PM_STATE_CHANGE))
6986 		    == DDI_FAILURE) {
6987 			if (got_semaphore_here != 0) {
6988 				sema_v(&un->un_semoclose);
6989 			}
6990 			/*
6991 			 * On exit put the state back to it's original value
6992 			 * and broadcast to anyone waiting for the power
6993 			 * change completion.
6994 			 */
6995 			mutex_enter(SD_MUTEX(un));
6996 			un->un_state = state_before_pm;
6997 			un->un_power_level = last_power_level;
6998 			cv_broadcast(&un->un_suspend_cv);
6999 			mutex_exit(SD_MUTEX(un));
7000 			SD_TRACE(SD_LOG_IO_PM, un,
7001 			    "sdpower: exit, PM suspend Failed.\n");
7002 
7003 			goto sdpower_failed;
7004 		}
7005 	}
7006 
7007 	/*
7008 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7009 	 * close, or strategy. Dump no long uses this routine, it uses it's
7010 	 * own code so it can be done in polled mode.
7011 	 */
7012 
7013 	medium_present = TRUE;
7014 
7015 	/*
7016 	 * When powering up, issue a TUR in case the device is at unit
7017 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7018 	 * a deadlock on un_pm_busy_cv will occur.
7019 	 */
7020 	if (SD_PM_IS_IO_CAPABLE(un, level)) {
7021 		sval = sd_send_scsi_TEST_UNIT_READY(ssc,
7022 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7023 		if (sval != 0)
7024 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7025 	}
7026 
7027 	if (un->un_f_power_condition_supported) {
7028 		char *pm_condition_name[] = {"STOPPED", "STANDBY",
7029 		    "IDLE", "ACTIVE"};
7030 		SD_TRACE(SD_LOG_IO_PM, un,
7031 		    "sdpower: sending \'%s\' power condition",
7032 		    pm_condition_name[level]);
7033 		sval = sd_send_scsi_START_STOP_UNIT(ssc, SD_POWER_CONDITION,
7034 		    sd_pl2pc[level], SD_PATH_DIRECT);
7035 	} else {
7036 		SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7037 		    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7038 		sval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
7039 		    ((level == SD_SPINDLE_ON) ? SD_TARGET_START :
7040 		    SD_TARGET_STOP), SD_PATH_DIRECT);
7041 	}
7042 	if (sval != 0) {
7043 		if (sval == EIO)
7044 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
7045 		else
7046 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7047 	}
7048 
7049 	/* Command failed, check for media present. */
7050 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
7051 		medium_present = FALSE;
7052 	}
7053 
7054 	/*
7055 	 * The conditions of interest here are:
7056 	 *   if a spindle off with media present fails,
7057 	 *	then restore the state and return an error.
7058 	 *   else if a spindle on fails,
7059 	 *	then return an error (there's no state to restore).
7060 	 * In all other cases we setup for the new state
7061 	 * and return success.
7062 	 */
7063 	if (!SD_PM_IS_IO_CAPABLE(un, level)) {
7064 		if ((medium_present == TRUE) && (sval != 0)) {
7065 			/* The stop command from above failed */
7066 			rval = DDI_FAILURE;
7067 			/*
7068 			 * The stop command failed, and we have media
7069 			 * present. Put the level back by calling the
7070 			 * sd_pm_resume() and set the state back to
7071 			 * it's previous value.
7072 			 */
7073 			(void) sd_pm_state_change(un, last_power_level,
7074 			    SD_PM_STATE_ROLLBACK);
7075 			mutex_enter(SD_MUTEX(un));
7076 			un->un_last_state = save_state;
7077 			mutex_exit(SD_MUTEX(un));
7078 		} else if (un->un_f_monitor_media_state) {
7079 			/*
7080 			 * The stop command from above succeeded.
7081 			 * Terminate watch thread in case of removable media
7082 			 * devices going into low power state. This is as per
7083 			 * the requirements of pm framework, otherwise commands
7084 			 * will be generated for the device (through watch
7085 			 * thread), even when the device is in low power state.
7086 			 */
7087 			mutex_enter(SD_MUTEX(un));
7088 			un->un_f_watcht_stopped = FALSE;
7089 			if (un->un_swr_token != NULL) {
7090 				opaque_t temp_token = un->un_swr_token;
7091 				un->un_f_watcht_stopped = TRUE;
7092 				un->un_swr_token = NULL;
7093 				mutex_exit(SD_MUTEX(un));
7094 				(void) scsi_watch_request_terminate(temp_token,
7095 				    SCSI_WATCH_TERMINATE_ALL_WAIT);
7096 			} else {
7097 				mutex_exit(SD_MUTEX(un));
7098 			}
7099 		}
7100 	} else {
7101 		/*
7102 		 * The level requested is I/O capable.
7103 		 * Legacy behavior: return success on a failed spinup
7104 		 * if there is no media in the drive.
7105 		 * Do this by looking at medium_present here.
7106 		 */
7107 		if ((sval != 0) && medium_present) {
7108 			/* The start command from above failed */
7109 			rval = DDI_FAILURE;
7110 		} else {
7111 			/*
7112 			 * The start command from above succeeded
7113 			 * PM resume the devices now that we have
7114 			 * started the disks
7115 			 */
7116 			(void) sd_pm_state_change(un, level,
7117 			    SD_PM_STATE_CHANGE);
7118 
7119 			/*
7120 			 * Resume the watch thread since it was suspended
7121 			 * when the device went into low power mode.
7122 			 */
7123 			if (un->un_f_monitor_media_state) {
7124 				mutex_enter(SD_MUTEX(un));
7125 				if (un->un_f_watcht_stopped == TRUE) {
7126 					opaque_t temp_token;
7127 
7128 					un->un_f_watcht_stopped = FALSE;
7129 					mutex_exit(SD_MUTEX(un));
7130 					temp_token =
7131 					    sd_watch_request_submit(un);
7132 					mutex_enter(SD_MUTEX(un));
7133 					un->un_swr_token = temp_token;
7134 				}
7135 				mutex_exit(SD_MUTEX(un));
7136 			}
7137 		}
7138 	}
7139 
7140 	if (got_semaphore_here != 0) {
7141 		sema_v(&un->un_semoclose);
7142 	}
7143 	/*
7144 	 * On exit put the state back to it's original value
7145 	 * and broadcast to anyone waiting for the power
7146 	 * change completion.
7147 	 */
7148 	mutex_enter(SD_MUTEX(un));
7149 	un->un_state = state_before_pm;
7150 	cv_broadcast(&un->un_suspend_cv);
7151 	mutex_exit(SD_MUTEX(un));
7152 
7153 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7154 
7155 	sd_ssc_fini(ssc);
7156 	return (rval);
7157 
7158 sdpower_failed:
7159 
7160 	sd_ssc_fini(ssc);
7161 	return (DDI_FAILURE);
7162 }
7163 
7164 
7165 
7166 /*
7167  *    Function: sdattach
7168  *
7169  * Description: Driver's attach(9e) entry point function.
7170  *
7171  *   Arguments: devi - opaque device info handle
7172  *		cmd  - attach  type
7173  *
7174  * Return Code: DDI_SUCCESS
7175  *		DDI_FAILURE
7176  *
7177  *     Context: Kernel thread context
7178  */
7179 
7180 static int
7181 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7182 {
7183 	switch (cmd) {
7184 	case DDI_ATTACH:
7185 		return (sd_unit_attach(devi));
7186 	case DDI_RESUME:
7187 		return (sd_ddi_resume(devi));
7188 	default:
7189 		break;
7190 	}
7191 	return (DDI_FAILURE);
7192 }
7193 
7194 
7195 /*
7196  *    Function: sddetach
7197  *
7198  * Description: Driver's detach(9E) entry point function.
7199  *
7200  *   Arguments: devi - opaque device info handle
7201  *		cmd  - detach  type
7202  *
7203  * Return Code: DDI_SUCCESS
7204  *		DDI_FAILURE
7205  *
7206  *     Context: Kernel thread context
7207  */
7208 
7209 static int
7210 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7211 {
7212 	switch (cmd) {
7213 	case DDI_DETACH:
7214 		return (sd_unit_detach(devi));
7215 	case DDI_SUSPEND:
7216 		return (sd_ddi_suspend(devi));
7217 	default:
7218 		break;
7219 	}
7220 	return (DDI_FAILURE);
7221 }
7222 
7223 
7224 /*
7225  *     Function: sd_sync_with_callback
7226  *
7227  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7228  *		 state while the callback routine is active.
7229  *
7230  *    Arguments: un: softstate structure for the instance
7231  *
7232  *	Context: Kernel thread context
7233  */
7234 
7235 static void
7236 sd_sync_with_callback(struct sd_lun *un)
7237 {
7238 	ASSERT(un != NULL);
7239 
7240 	mutex_enter(SD_MUTEX(un));
7241 
7242 	ASSERT(un->un_in_callback >= 0);
7243 
7244 	while (un->un_in_callback > 0) {
7245 		mutex_exit(SD_MUTEX(un));
7246 		delay(2);
7247 		mutex_enter(SD_MUTEX(un));
7248 	}
7249 
7250 	mutex_exit(SD_MUTEX(un));
7251 }
7252 
7253 /*
7254  *    Function: sd_unit_attach
7255  *
7256  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7257  *		the soft state structure for the device and performs
7258  *		all necessary structure and device initializations.
7259  *
7260  *   Arguments: devi: the system's dev_info_t for the device.
7261  *
7262  * Return Code: DDI_SUCCESS if attach is successful.
7263  *		DDI_FAILURE if any part of the attach fails.
7264  *
7265  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7266  *		Kernel thread context only.  Can sleep.
7267  */
7268 
7269 static int
7270 sd_unit_attach(dev_info_t *devi)
7271 {
7272 	struct	scsi_device	*devp;
7273 	struct	sd_lun		*un;
7274 	char			*variantp;
7275 	char			name_str[48];
7276 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7277 	int	instance;
7278 	int	rval;
7279 	int	wc_enabled;
7280 	int	wc_changeable;
7281 	int	tgt;
7282 	uint64_t	capacity;
7283 	uint_t		lbasize = 0;
7284 	dev_info_t	*pdip = ddi_get_parent(devi);
7285 	int		offbyone = 0;
7286 	int		geom_label_valid = 0;
7287 	sd_ssc_t	*ssc;
7288 	int		status;
7289 	struct sd_fm_internal	*sfip = NULL;
7290 	int		max_xfer_size;
7291 
7292 	/*
7293 	 * Retrieve the target driver's private data area. This was set
7294 	 * up by the HBA.
7295 	 */
7296 	devp = ddi_get_driver_private(devi);
7297 
7298 	/*
7299 	 * Retrieve the target ID of the device.
7300 	 */
7301 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7302 	    SCSI_ADDR_PROP_TARGET, -1);
7303 
7304 	/*
7305 	 * Since we have no idea what state things were left in by the last
7306 	 * user of the device, set up some 'default' settings, ie. turn 'em
7307 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7308 	 * Do this before the scsi_probe, which sends an inquiry.
7309 	 * This is a fix for bug (4430280).
7310 	 * Of special importance is wide-xfer. The drive could have been left
7311 	 * in wide transfer mode by the last driver to communicate with it,
7312 	 * this includes us. If that's the case, and if the following is not
7313 	 * setup properly or we don't re-negotiate with the drive prior to
7314 	 * transferring data to/from the drive, it causes bus parity errors,
7315 	 * data overruns, and unexpected interrupts. This first occurred when
7316 	 * the fix for bug (4378686) was made.
7317 	 */
7318 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7319 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7320 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7321 
7322 	/*
7323 	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
7324 	 * on a target. Setting it per lun instance actually sets the
7325 	 * capability of this target, which affects those luns already
7326 	 * attached on the same target. So during attach, we can only disable
7327 	 * this capability only when no other lun has been attached on this
7328 	 * target. By doing this, we assume a target has the same tagged-qing
7329 	 * capability for every lun. The condition can be removed when HBA
7330 	 * is changed to support per lun based tagged-qing capability.
7331 	 */
7332 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7333 		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7334 	}
7335 
7336 	/*
7337 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7338 	 * This call will allocate and fill in the scsi_inquiry structure
7339 	 * and point the sd_inq member of the scsi_device structure to it.
7340 	 * If the attach succeeds, then this memory will not be de-allocated
7341 	 * (via scsi_unprobe()) until the instance is detached.
7342 	 */
7343 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7344 		goto probe_failed;
7345 	}
7346 
7347 	/*
7348 	 * Check the device type as specified in the inquiry data and
7349 	 * claim it if it is of a type that we support.
7350 	 */
7351 	switch (devp->sd_inq->inq_dtype) {
7352 	case DTYPE_DIRECT:
7353 		break;
7354 	case DTYPE_RODIRECT:
7355 		break;
7356 	case DTYPE_OPTICAL:
7357 		break;
7358 	case DTYPE_NOTPRESENT:
7359 	default:
7360 		/* Unsupported device type; fail the attach. */
7361 		goto probe_failed;
7362 	}
7363 
7364 	/*
7365 	 * Allocate the soft state structure for this unit.
7366 	 *
7367 	 * We rely upon this memory being set to all zeroes by
7368 	 * ddi_soft_state_zalloc().  We assume that any member of the
7369 	 * soft state structure that is not explicitly initialized by
7370 	 * this routine will have a value of zero.
7371 	 */
7372 	instance = ddi_get_instance(devp->sd_dev);
7373 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7374 		goto probe_failed;
7375 	}
7376 
7377 	/*
7378 	 * Retrieve a pointer to the newly-allocated soft state.
7379 	 *
7380 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7381 	 * was successful, unless something has gone horribly wrong and the
7382 	 * ddi's soft state internals are corrupt (in which case it is
7383 	 * probably better to halt here than just fail the attach....)
7384 	 */
7385 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7386 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7387 		    instance);
7388 		/*NOTREACHED*/
7389 	}
7390 
7391 	/*
7392 	 * Link the back ptr of the driver soft state to the scsi_device
7393 	 * struct for this lun.
7394 	 * Save a pointer to the softstate in the driver-private area of
7395 	 * the scsi_device struct.
7396 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7397 	 * we first set un->un_sd below.
7398 	 */
7399 	un->un_sd = devp;
7400 	devp->sd_private = (opaque_t)un;
7401 
7402 	/*
7403 	 * The following must be after devp is stored in the soft state struct.
7404 	 */
7405 #ifdef SDDEBUG
7406 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7407 	    "%s_unit_attach: un:0x%p instance:%d\n",
7408 	    ddi_driver_name(devi), un, instance);
7409 #endif
7410 
7411 	/*
7412 	 * Set up the device type and node type (for the minor nodes).
7413 	 * By default we assume that the device can at least support the
7414 	 * Common Command Set. Call it a CD-ROM if it reports itself
7415 	 * as a RODIRECT device.
7416 	 */
7417 	switch (devp->sd_inq->inq_dtype) {
7418 	case DTYPE_RODIRECT:
7419 		un->un_node_type = DDI_NT_CD_CHAN;
7420 		un->un_ctype	 = CTYPE_CDROM;
7421 		break;
7422 	case DTYPE_OPTICAL:
7423 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7424 		un->un_ctype	 = CTYPE_ROD;
7425 		break;
7426 	default:
7427 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7428 		un->un_ctype	 = CTYPE_CCS;
7429 		break;
7430 	}
7431 
7432 	/*
7433 	 * Try to read the interconnect type from the HBA.
7434 	 *
7435 	 * Note: This driver is currently compiled as two binaries, a parallel
7436 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7437 	 * differences are determined at compile time. In the future a single
7438 	 * binary will be provided and the interconnect type will be used to
7439 	 * differentiate between fibre and parallel scsi behaviors. At that time
7440 	 * it will be necessary for all fibre channel HBAs to support this
7441 	 * property.
7442 	 *
7443 	 * set un_f_is_fiber to TRUE ( default fiber )
7444 	 */
7445 	un->un_f_is_fibre = TRUE;
7446 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7447 	case INTERCONNECT_SSA:
7448 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7449 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7450 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7451 		break;
7452 	case INTERCONNECT_PARALLEL:
7453 		un->un_f_is_fibre = FALSE;
7454 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7455 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7456 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7457 		break;
7458 	case INTERCONNECT_SAS:
7459 		un->un_f_is_fibre = FALSE;
7460 		un->un_interconnect_type = SD_INTERCONNECT_SAS;
7461 		un->un_node_type = DDI_NT_BLOCK_SAS;
7462 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7463 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SAS\n", un);
7464 		break;
7465 	case INTERCONNECT_SATA:
7466 		un->un_f_is_fibre = FALSE;
7467 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
7468 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7469 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
7470 		break;
7471 	case INTERCONNECT_FIBRE:
7472 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7473 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7474 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7475 		break;
7476 	case INTERCONNECT_FABRIC:
7477 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7478 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7479 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7480 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7481 		break;
7482 	default:
7483 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7484 		/*
7485 		 * The HBA does not support the "interconnect-type" property
7486 		 * (or did not provide a recognized type).
7487 		 *
7488 		 * Note: This will be obsoleted when a single fibre channel
7489 		 * and parallel scsi driver is delivered. In the meantime the
7490 		 * interconnect type will be set to the platform default.If that
7491 		 * type is not parallel SCSI, it means that we should be
7492 		 * assuming "ssd" semantics. However, here this also means that
7493 		 * the FC HBA is not supporting the "interconnect-type" property
7494 		 * like we expect it to, so log this occurrence.
7495 		 */
7496 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7497 		if (!SD_IS_PARALLEL_SCSI(un)) {
7498 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7499 			    "sd_unit_attach: un:0x%p Assuming "
7500 			    "INTERCONNECT_FIBRE\n", un);
7501 		} else {
7502 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7503 			    "sd_unit_attach: un:0x%p Assuming "
7504 			    "INTERCONNECT_PARALLEL\n", un);
7505 			un->un_f_is_fibre = FALSE;
7506 		}
7507 #else
7508 		/*
7509 		 * Note: This source will be implemented when a single fibre
7510 		 * channel and parallel scsi driver is delivered. The default
7511 		 * will be to assume that if a device does not support the
7512 		 * "interconnect-type" property it is a parallel SCSI HBA and
7513 		 * we will set the interconnect type for parallel scsi.
7514 		 */
7515 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7516 		un->un_f_is_fibre = FALSE;
7517 #endif
7518 		break;
7519 	}
7520 
7521 	if (un->un_f_is_fibre == TRUE) {
7522 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7523 		    SCSI_VERSION_3) {
7524 			switch (un->un_interconnect_type) {
7525 			case SD_INTERCONNECT_FIBRE:
7526 			case SD_INTERCONNECT_SSA:
7527 				un->un_node_type = DDI_NT_BLOCK_WWN;
7528 				break;
7529 			default:
7530 				break;
7531 			}
7532 		}
7533 	}
7534 
7535 	/*
7536 	 * Initialize the Request Sense command for the target
7537 	 */
7538 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7539 		goto alloc_rqs_failed;
7540 	}
7541 
7542 	/*
7543 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7544 	 * with separate binary for sd and ssd.
7545 	 *
7546 	 * x86 has 1 binary, un_retry_count is set base on connection type.
7547 	 * The hardcoded values will go away when Sparc uses 1 binary
7548 	 * for sd and ssd.  This hardcoded values need to match
7549 	 * SD_RETRY_COUNT in sddef.h
7550 	 * The value used is base on interconnect type.
7551 	 * fibre = 3, parallel = 5
7552 	 */
7553 #if defined(__x86)
7554 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
7555 #else
7556 	un->un_retry_count = SD_RETRY_COUNT;
7557 #endif
7558 
7559 	/*
7560 	 * Set the per disk retry count to the default number of retries
7561 	 * for disks and CDROMs. This value can be overridden by the
7562 	 * disk property list or an entry in sd.conf.
7563 	 */
7564 	un->un_notready_retry_count =
7565 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
7566 	    : DISK_NOT_READY_RETRY_COUNT(un);
7567 
7568 	/*
7569 	 * Set the busy retry count to the default value of un_retry_count.
7570 	 * This can be overridden by entries in sd.conf or the device
7571 	 * config table.
7572 	 */
7573 	un->un_busy_retry_count = un->un_retry_count;
7574 
7575 	/*
7576 	 * Init the reset threshold for retries.  This number determines
7577 	 * how many retries must be performed before a reset can be issued
7578 	 * (for certain error conditions). This can be overridden by entries
7579 	 * in sd.conf or the device config table.
7580 	 */
7581 	un->un_reset_retry_count = (un->un_retry_count / 2);
7582 
7583 	/*
7584 	 * Set the victim_retry_count to the default un_retry_count
7585 	 */
7586 	un->un_victim_retry_count = (2 * un->un_retry_count);
7587 
7588 	/*
7589 	 * Set the reservation release timeout to the default value of
7590 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
7591 	 * device config table.
7592 	 */
7593 	un->un_reserve_release_time = 5;
7594 
7595 	/*
7596 	 * Set up the default maximum transfer size. Note that this may
7597 	 * get updated later in the attach, when setting up default wide
7598 	 * operations for disks.
7599 	 */
7600 #if defined(__x86)
7601 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
7602 	un->un_partial_dma_supported = 1;
7603 #else
7604 	un->un_max_xfer_size = (uint_t)maxphys;
7605 #endif
7606 
7607 	/*
7608 	 * Get "allow bus device reset" property (defaults to "enabled" if
7609 	 * the property was not defined). This is to disable bus resets for
7610 	 * certain kinds of error recovery. Note: In the future when a run-time
7611 	 * fibre check is available the soft state flag should default to
7612 	 * enabled.
7613 	 */
7614 	if (un->un_f_is_fibre == TRUE) {
7615 		un->un_f_allow_bus_device_reset = TRUE;
7616 	} else {
7617 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7618 		    "allow-bus-device-reset", 1) != 0) {
7619 			un->un_f_allow_bus_device_reset = TRUE;
7620 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7621 			    "sd_unit_attach: un:0x%p Bus device reset "
7622 			    "enabled\n", un);
7623 		} else {
7624 			un->un_f_allow_bus_device_reset = FALSE;
7625 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7626 			    "sd_unit_attach: un:0x%p Bus device reset "
7627 			    "disabled\n", un);
7628 		}
7629 	}
7630 
7631 	/*
7632 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7633 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7634 	 *
7635 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7636 	 * property. The new "variant" property with a value of "atapi" has been
7637 	 * introduced so that future 'variants' of standard SCSI behavior (like
7638 	 * atapi) could be specified by the underlying HBA drivers by supplying
7639 	 * a new value for the "variant" property, instead of having to define a
7640 	 * new property.
7641 	 */
7642 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7643 		un->un_f_cfg_is_atapi = TRUE;
7644 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7645 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7646 	}
7647 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7648 	    &variantp) == DDI_PROP_SUCCESS) {
7649 		if (strcmp(variantp, "atapi") == 0) {
7650 			un->un_f_cfg_is_atapi = TRUE;
7651 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7652 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7653 		}
7654 		ddi_prop_free(variantp);
7655 	}
7656 
7657 	un->un_cmd_timeout	= SD_IO_TIME;
7658 
7659 	un->un_busy_timeout  = SD_BSY_TIMEOUT;
7660 
7661 	/* Info on current states, statuses, etc. (Updated frequently) */
7662 	un->un_state		= SD_STATE_NORMAL;
7663 	un->un_last_state	= SD_STATE_NORMAL;
7664 
7665 	/* Control & status info for command throttling */
7666 	un->un_throttle		= sd_max_throttle;
7667 	un->un_saved_throttle	= sd_max_throttle;
7668 	un->un_min_throttle	= sd_min_throttle;
7669 
7670 	if (un->un_f_is_fibre == TRUE) {
7671 		un->un_f_use_adaptive_throttle = TRUE;
7672 	} else {
7673 		un->un_f_use_adaptive_throttle = FALSE;
7674 	}
7675 
7676 	/* Removable media support. */
7677 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7678 	un->un_mediastate		= DKIO_NONE;
7679 	un->un_specified_mediastate	= DKIO_NONE;
7680 
7681 	/* CVs for suspend/resume (PM or DR) */
7682 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7683 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
7684 
7685 	/* Power management support. */
7686 	un->un_power_level = SD_SPINDLE_UNINIT;
7687 
7688 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
7689 	un->un_f_wcc_inprog = 0;
7690 
7691 	/*
7692 	 * The open/close semaphore is used to serialize threads executing
7693 	 * in the driver's open & close entry point routines for a given
7694 	 * instance.
7695 	 */
7696 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
7697 
7698 	/*
7699 	 * The conf file entry and softstate variable is a forceful override,
7700 	 * meaning a non-zero value must be entered to change the default.
7701 	 */
7702 	un->un_f_disksort_disabled = FALSE;
7703 	un->un_f_rmw_type = SD_RMW_TYPE_DEFAULT;
7704 	un->un_f_enable_rmw = FALSE;
7705 
7706 	/*
7707 	 * GET EVENT STATUS NOTIFICATION media polling enabled by default, but
7708 	 * can be overridden via [s]sd-config-list "mmc-gesn-polling" property.
7709 	 */
7710 	un->un_f_mmc_gesn_polling = TRUE;
7711 
7712 	/*
7713 	 * physical sector size defaults to DEV_BSIZE currently. We can
7714 	 * override this value via the driver configuration file so we must
7715 	 * set it before calling sd_read_unit_properties().
7716 	 */
7717 	un->un_phy_blocksize = DEV_BSIZE;
7718 
7719 	/*
7720 	 * Retrieve the properties from the static driver table or the driver
7721 	 * configuration file (.conf) for this unit and update the soft state
7722 	 * for the device as needed for the indicated properties.
7723 	 * Note: the property configuration needs to occur here as some of the
7724 	 * following routines may have dependencies on soft state flags set
7725 	 * as part of the driver property configuration.
7726 	 */
7727 	sd_read_unit_properties(un);
7728 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7729 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
7730 
7731 	/*
7732 	 * Only if a device has "hotpluggable" property, it is
7733 	 * treated as hotpluggable device. Otherwise, it is
7734 	 * regarded as non-hotpluggable one.
7735 	 */
7736 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
7737 	    -1) != -1) {
7738 		un->un_f_is_hotpluggable = TRUE;
7739 	}
7740 
7741 	/*
7742 	 * set unit's attributes(flags) according to "hotpluggable" and
7743 	 * RMB bit in INQUIRY data.
7744 	 */
7745 	sd_set_unit_attributes(un, devi);
7746 
7747 	/*
7748 	 * By default, we mark the capacity, lbasize, and geometry
7749 	 * as invalid. Only if we successfully read a valid capacity
7750 	 * will we update the un_blockcount and un_tgt_blocksize with the
7751 	 * valid values (the geometry will be validated later).
7752 	 */
7753 	un->un_f_blockcount_is_valid	= FALSE;
7754 	un->un_f_tgt_blocksize_is_valid	= FALSE;
7755 
7756 	/*
7757 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
7758 	 * otherwise.
7759 	 */
7760 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
7761 	un->un_blockcount = 0;
7762 
7763 	/*
7764 	 * Set up the per-instance info needed to determine the correct
7765 	 * CDBs and other info for issuing commands to the target.
7766 	 */
7767 	sd_init_cdb_limits(un);
7768 
7769 	/*
7770 	 * Set up the IO chains to use, based upon the target type.
7771 	 */
7772 	if (un->un_f_non_devbsize_supported) {
7773 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7774 	} else {
7775 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7776 	}
7777 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7778 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
7779 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
7780 
7781 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
7782 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
7783 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
7784 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
7785 
7786 
7787 	if (ISCD(un)) {
7788 		un->un_additional_codes = sd_additional_codes;
7789 	} else {
7790 		un->un_additional_codes = NULL;
7791 	}
7792 
7793 	/*
7794 	 * Create the kstats here so they can be available for attach-time
7795 	 * routines that send commands to the unit (either polled or via
7796 	 * sd_send_scsi_cmd).
7797 	 *
7798 	 * Note: This is a critical sequence that needs to be maintained:
7799 	 *	1) Instantiate the kstats here, before any routines using the
7800 	 *	   iopath (i.e. sd_send_scsi_cmd).
7801 	 *	2) Instantiate and initialize the partition stats
7802 	 *	   (sd_set_pstats).
7803 	 *	3) Initialize the error stats (sd_set_errstats), following
7804 	 *	   sd_validate_geometry(),sd_register_devid(),
7805 	 *	   and sd_cache_control().
7806 	 */
7807 
7808 	un->un_stats = kstat_create(sd_label, instance,
7809 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
7810 	if (un->un_stats != NULL) {
7811 		un->un_stats->ks_lock = SD_MUTEX(un);
7812 		kstat_install(un->un_stats);
7813 	}
7814 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7815 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
7816 
7817 	un->un_unmapstats_ks = kstat_create(sd_label, instance, "unmapstats",
7818 	    "misc", KSTAT_TYPE_NAMED, sizeof (*un->un_unmapstats) /
7819 	    sizeof (kstat_named_t), 0);
7820 	if (un->un_unmapstats_ks) {
7821 		un->un_unmapstats = un->un_unmapstats_ks->ks_data;
7822 
7823 		kstat_named_init(&un->un_unmapstats->us_cmds,
7824 		    "commands", KSTAT_DATA_UINT64);
7825 		kstat_named_init(&un->un_unmapstats->us_errs,
7826 		    "errors", KSTAT_DATA_UINT64);
7827 		kstat_named_init(&un->un_unmapstats->us_extents,
7828 		    "extents", KSTAT_DATA_UINT64);
7829 		kstat_named_init(&un->un_unmapstats->us_bytes,
7830 		    "bytes", KSTAT_DATA_UINT64);
7831 
7832 		kstat_install(un->un_unmapstats_ks);
7833 	} else {
7834 		cmn_err(CE_NOTE, "!Cannot create unmap kstats for disk %d",
7835 		    instance);
7836 	}
7837 
7838 	sd_create_errstats(un, instance);
7839 	if (un->un_errstats == NULL) {
7840 		goto create_errstats_failed;
7841 	}
7842 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7843 	    "sd_unit_attach: un:0x%p errstats created\n", un);
7844 
7845 	/*
7846 	 * The following if/else code was relocated here from below as part
7847 	 * of the fix for bug (4430280). However with the default setup added
7848 	 * on entry to this routine, it's no longer absolutely necessary for
7849 	 * this to be before the call to sd_spin_up_unit.
7850 	 */
7851 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
7852 		int tq_trigger_flag = (((devp->sd_inq->inq_ansi == 4) ||
7853 		    (devp->sd_inq->inq_ansi == 5)) &&
7854 		    devp->sd_inq->inq_bque) || devp->sd_inq->inq_cmdque;
7855 
7856 		/*
7857 		 * If tagged queueing is supported by the target
7858 		 * and by the host adapter then we will enable it
7859 		 */
7860 		un->un_tagflags = 0;
7861 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) && tq_trigger_flag &&
7862 		    (un->un_f_arq_enabled == TRUE)) {
7863 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
7864 			    1, 1) == 1) {
7865 				un->un_tagflags = FLAG_STAG;
7866 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7867 				    "sd_unit_attach: un:0x%p tag queueing "
7868 				    "enabled\n", un);
7869 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
7870 			    "untagged-qing", 0) == 1) {
7871 				un->un_f_opt_queueing = TRUE;
7872 				un->un_saved_throttle = un->un_throttle =
7873 				    min(un->un_throttle, 3);
7874 			} else {
7875 				un->un_f_opt_queueing = FALSE;
7876 				un->un_saved_throttle = un->un_throttle = 1;
7877 			}
7878 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
7879 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
7880 			/* The Host Adapter supports internal queueing. */
7881 			un->un_f_opt_queueing = TRUE;
7882 			un->un_saved_throttle = un->un_throttle =
7883 			    min(un->un_throttle, 3);
7884 		} else {
7885 			un->un_f_opt_queueing = FALSE;
7886 			un->un_saved_throttle = un->un_throttle = 1;
7887 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7888 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
7889 		}
7890 
7891 		/*
7892 		 * Enable large transfers for SATA/SAS drives
7893 		 */
7894 		if (SD_IS_SERIAL(un)) {
7895 			un->un_max_xfer_size =
7896 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7897 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7898 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7899 			    "sd_unit_attach: un:0x%p max transfer "
7900 			    "size=0x%x\n", un, un->un_max_xfer_size);
7901 
7902 		}
7903 
7904 		/* Setup or tear down default wide operations for disks */
7905 
7906 		/*
7907 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
7908 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
7909 		 * system and be set to different values. In the future this
7910 		 * code may need to be updated when the ssd module is
7911 		 * obsoleted and removed from the system. (4299588)
7912 		 */
7913 		if (SD_IS_PARALLEL_SCSI(un) &&
7914 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
7915 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
7916 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7917 			    1, 1) == 1) {
7918 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7919 				    "sd_unit_attach: un:0x%p Wide Transfer "
7920 				    "enabled\n", un);
7921 			}
7922 
7923 			/*
7924 			 * If tagged queuing has also been enabled, then
7925 			 * enable large xfers
7926 			 */
7927 			if (un->un_saved_throttle == sd_max_throttle) {
7928 				un->un_max_xfer_size =
7929 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7930 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7931 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7932 				    "sd_unit_attach: un:0x%p max transfer "
7933 				    "size=0x%x\n", un, un->un_max_xfer_size);
7934 			}
7935 		} else {
7936 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7937 			    0, 1) == 1) {
7938 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7939 				    "sd_unit_attach: un:0x%p "
7940 				    "Wide Transfer disabled\n", un);
7941 			}
7942 		}
7943 	} else {
7944 		un->un_tagflags = FLAG_STAG;
7945 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
7946 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
7947 	}
7948 
7949 	/*
7950 	 * If this target supports LUN reset, try to enable it.
7951 	 */
7952 	if (un->un_f_lun_reset_enabled) {
7953 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
7954 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7955 			    "un:0x%p lun_reset capability set\n", un);
7956 		} else {
7957 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7958 			    "un:0x%p lun-reset capability not set\n", un);
7959 		}
7960 	}
7961 
7962 	/*
7963 	 * Adjust the maximum transfer size. This is to fix
7964 	 * the problem of partial DMA support on SPARC. Some
7965 	 * HBA driver, like aac, has very small dma_attr_maxxfer
7966 	 * size, which requires partial DMA support on SPARC.
7967 	 * In the future the SPARC pci nexus driver may solve
7968 	 * the problem instead of this fix.
7969 	 */
7970 	max_xfer_size = scsi_ifgetcap(SD_ADDRESS(un), "dma-max", 1);
7971 	if ((max_xfer_size > 0) && (max_xfer_size < un->un_max_xfer_size)) {
7972 		/* We need DMA partial even on sparc to ensure sddump() works */
7973 		un->un_max_xfer_size = max_xfer_size;
7974 		if (un->un_partial_dma_supported == 0)
7975 			un->un_partial_dma_supported = 1;
7976 	}
7977 	if (ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7978 	    DDI_PROP_DONTPASS, "buf_break", 0) == 1) {
7979 		if (ddi_xbuf_attr_setup_brk(un->un_xbuf_attr,
7980 		    un->un_max_xfer_size) == 1) {
7981 			un->un_buf_breakup_supported = 1;
7982 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7983 			    "un:0x%p Buf breakup enabled\n", un);
7984 		}
7985 	}
7986 
7987 	/*
7988 	 * Set PKT_DMA_PARTIAL flag.
7989 	 */
7990 	if (un->un_partial_dma_supported == 1) {
7991 		un->un_pkt_flags = PKT_DMA_PARTIAL;
7992 	} else {
7993 		un->un_pkt_flags = 0;
7994 	}
7995 
7996 	/* Initialize sd_ssc_t for internal uscsi commands */
7997 	ssc = sd_ssc_init(un);
7998 	scsi_fm_init(devp);
7999 
8000 	/*
8001 	 * Allocate memory for SCSI FMA stuffs.
8002 	 */
8003 	un->un_fm_private =
8004 	    kmem_zalloc(sizeof (struct sd_fm_internal), KM_SLEEP);
8005 	sfip = (struct sd_fm_internal *)un->un_fm_private;
8006 	sfip->fm_ssc.ssc_uscsi_cmd = &sfip->fm_ucmd;
8007 	sfip->fm_ssc.ssc_uscsi_info = &sfip->fm_uinfo;
8008 	sfip->fm_ssc.ssc_un = un;
8009 
8010 	if (ISCD(un) ||
8011 	    un->un_f_has_removable_media ||
8012 	    devp->sd_fm_capable == DDI_FM_NOT_CAPABLE) {
8013 		/*
8014 		 * We don't touch CDROM or the DDI_FM_NOT_CAPABLE device.
8015 		 * Their log are unchanged.
8016 		 */
8017 		sfip->fm_log_level = SD_FM_LOG_NSUP;
8018 	} else {
8019 		/*
8020 		 * If enter here, it should be non-CDROM and FM-capable
8021 		 * device, and it will not keep the old scsi_log as before
8022 		 * in /var/adm/messages. However, the property
8023 		 * "fm-scsi-log" will control whether the FM telemetry will
8024 		 * be logged in /var/adm/messages.
8025 		 */
8026 		int fm_scsi_log;
8027 		fm_scsi_log = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
8028 		    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "fm-scsi-log", 0);
8029 
8030 		if (fm_scsi_log)
8031 			sfip->fm_log_level = SD_FM_LOG_EREPORT;
8032 		else
8033 			sfip->fm_log_level = SD_FM_LOG_SILENT;
8034 	}
8035 
8036 	/*
8037 	 * At this point in the attach, we have enough info in the
8038 	 * soft state to be able to issue commands to the target.
8039 	 *
8040 	 * All command paths used below MUST issue their commands as
8041 	 * SD_PATH_DIRECT. This is important as intermediate layers
8042 	 * are not all initialized yet (such as PM).
8043 	 */
8044 
8045 	/*
8046 	 * Send a TEST UNIT READY command to the device. This should clear
8047 	 * any outstanding UNIT ATTENTION that may be present.
8048 	 *
8049 	 * Note: Don't check for success, just track if there is a reservation,
8050 	 * this is a throw away command to clear any unit attentions.
8051 	 *
8052 	 * Note: This MUST be the first command issued to the target during
8053 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8054 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8055 	 * with attempts at spinning up a device with no media.
8056 	 */
8057 	status = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
8058 	if (status != 0) {
8059 		if (status == EACCES)
8060 			reservation_flag = SD_TARGET_IS_RESERVED;
8061 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8062 	}
8063 
8064 	/*
8065 	 * If the device is NOT a removable media device, attempt to spin
8066 	 * it up (using the START_STOP_UNIT command) and read its capacity
8067 	 * (using the READ CAPACITY command).  Note, however, that either
8068 	 * of these could fail and in some cases we would continue with
8069 	 * the attach despite the failure (see below).
8070 	 */
8071 	if (un->un_f_descr_format_supported) {
8072 
8073 		switch (sd_spin_up_unit(ssc)) {
8074 		case 0:
8075 			/*
8076 			 * Spin-up was successful; now try to read the
8077 			 * capacity.  If successful then save the results
8078 			 * and mark the capacity & lbasize as valid.
8079 			 */
8080 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8081 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8082 
8083 			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
8084 			    &lbasize, SD_PATH_DIRECT);
8085 
8086 			switch (status) {
8087 			case 0: {
8088 				if (capacity > DK_MAX_BLOCKS) {
8089 #ifdef _LP64
8090 					if ((capacity + 1) >
8091 					    SD_GROUP1_MAX_ADDRESS) {
8092 						/*
8093 						 * Enable descriptor format
8094 						 * sense data so that we can
8095 						 * get 64 bit sense data
8096 						 * fields.
8097 						 */
8098 						sd_enable_descr_sense(ssc);
8099 					}
8100 #else
8101 					/* 32-bit kernels can't handle this */
8102 					scsi_log(SD_DEVINFO(un),
8103 					    sd_label, CE_WARN,
8104 					    "disk has %llu blocks, which "
8105 					    "is too large for a 32-bit "
8106 					    "kernel", capacity);
8107 
8108 #if defined(__x86)
8109 					/*
8110 					 * 1TB disk was treated as (1T - 512)B
8111 					 * in the past, so that it might have
8112 					 * valid VTOC and solaris partitions,
8113 					 * we have to allow it to continue to
8114 					 * work.
8115 					 */
8116 					if (capacity - 1 > DK_MAX_BLOCKS)
8117 #endif
8118 					goto spinup_failed;
8119 #endif
8120 				}
8121 
8122 				/*
8123 				 * Here it's not necessary to check the case:
8124 				 * the capacity of the device is bigger than
8125 				 * what the max hba cdb can support. Because
8126 				 * sd_send_scsi_READ_CAPACITY will retrieve
8127 				 * the capacity by sending USCSI command, which
8128 				 * is constrained by the max hba cdb. Actually,
8129 				 * sd_send_scsi_READ_CAPACITY will return
8130 				 * EINVAL when using bigger cdb than required
8131 				 * cdb length. Will handle this case in
8132 				 * "case EINVAL".
8133 				 */
8134 
8135 				/*
8136 				 * The following relies on
8137 				 * sd_send_scsi_READ_CAPACITY never
8138 				 * returning 0 for capacity and/or lbasize.
8139 				 */
8140 				sd_update_block_info(un, lbasize, capacity);
8141 
8142 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8143 				    "sd_unit_attach: un:0x%p capacity = %ld "
8144 				    "blocks; lbasize= %ld.\n", un,
8145 				    un->un_blockcount, un->un_tgt_blocksize);
8146 
8147 				break;
8148 			}
8149 			case EINVAL:
8150 				/*
8151 				 * In the case where the max-cdb-length property
8152 				 * is smaller than the required CDB length for
8153 				 * a SCSI device, a target driver can fail to
8154 				 * attach to that device.
8155 				 */
8156 				scsi_log(SD_DEVINFO(un),
8157 				    sd_label, CE_WARN,
8158 				    "disk capacity is too large "
8159 				    "for current cdb length");
8160 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8161 
8162 				goto spinup_failed;
8163 			case EACCES:
8164 				/*
8165 				 * Should never get here if the spin-up
8166 				 * succeeded, but code it in anyway.
8167 				 * From here, just continue with the attach...
8168 				 */
8169 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8170 				    "sd_unit_attach: un:0x%p "
8171 				    "sd_send_scsi_READ_CAPACITY "
8172 				    "returned reservation conflict\n", un);
8173 				reservation_flag = SD_TARGET_IS_RESERVED;
8174 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8175 				break;
8176 			default:
8177 				/*
8178 				 * Likewise, should never get here if the
8179 				 * spin-up succeeded. Just continue with
8180 				 * the attach...
8181 				 */
8182 				if (status == EIO)
8183 					sd_ssc_assessment(ssc,
8184 					    SD_FMT_STATUS_CHECK);
8185 				else
8186 					sd_ssc_assessment(ssc,
8187 					    SD_FMT_IGNORE);
8188 				break;
8189 			}
8190 			break;
8191 		case EACCES:
8192 			/*
8193 			 * Device is reserved by another host.  In this case
8194 			 * we could not spin it up or read the capacity, but
8195 			 * we continue with the attach anyway.
8196 			 */
8197 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8198 			    "sd_unit_attach: un:0x%p spin-up reservation "
8199 			    "conflict.\n", un);
8200 			reservation_flag = SD_TARGET_IS_RESERVED;
8201 			break;
8202 		default:
8203 			/* Fail the attach if the spin-up failed. */
8204 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8205 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8206 			goto spinup_failed;
8207 		}
8208 
8209 	}
8210 
8211 	/*
8212 	 * Check to see if this is a MMC drive
8213 	 */
8214 	if (ISCD(un)) {
8215 		sd_set_mmc_caps(ssc);
8216 	}
8217 
8218 	/*
8219 	 * Add a zero-length attribute to tell the world we support
8220 	 * kernel ioctls (for layered drivers)
8221 	 */
8222 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8223 	    DDI_KERNEL_IOCTL, NULL, 0);
8224 
8225 	/*
8226 	 * Add a boolean property to tell the world we support
8227 	 * the B_FAILFAST flag (for layered drivers)
8228 	 */
8229 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8230 	    "ddi-failfast-supported", NULL, 0);
8231 
8232 	/*
8233 	 * Initialize power management
8234 	 */
8235 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8236 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8237 	sd_setup_pm(ssc, devi);
8238 	if (un->un_f_pm_is_enabled == FALSE) {
8239 		/*
8240 		 * For performance, point to a jump table that does
8241 		 * not include pm.
8242 		 * The direct and priority chains don't change with PM.
8243 		 *
8244 		 * Note: this is currently done based on individual device
8245 		 * capabilities. When an interface for determining system
8246 		 * power enabled state becomes available, or when additional
8247 		 * layers are added to the command chain, these values will
8248 		 * have to be re-evaluated for correctness.
8249 		 */
8250 		if (un->un_f_non_devbsize_supported) {
8251 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8252 		} else {
8253 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8254 		}
8255 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8256 	}
8257 
8258 	/*
8259 	 * This property is set to 0 by HA software to avoid retries
8260 	 * on a reserved disk. (The preferred property name is
8261 	 * "retry-on-reservation-conflict") (1189689)
8262 	 *
8263 	 * Note: The use of a global here can have unintended consequences. A
8264 	 * per instance variable is preferable to match the capabilities of
8265 	 * different underlying hba's (4402600)
8266 	 */
8267 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8268 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8269 	    sd_retry_on_reservation_conflict);
8270 	if (sd_retry_on_reservation_conflict != 0) {
8271 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8272 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8273 		    sd_retry_on_reservation_conflict);
8274 	}
8275 
8276 	/* Set up options for QFULL handling. */
8277 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8278 	    "qfull-retries", -1)) != -1) {
8279 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8280 		    rval, 1);
8281 	}
8282 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8283 	    "qfull-retry-interval", -1)) != -1) {
8284 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8285 		    rval, 1);
8286 	}
8287 
8288 	/*
8289 	 * This just prints a message that announces the existence of the
8290 	 * device. The message is always printed in the system logfile, but
8291 	 * only appears on the console if the system is booted with the
8292 	 * -v (verbose) argument.
8293 	 */
8294 	ddi_report_dev(devi);
8295 
8296 	un->un_mediastate = DKIO_NONE;
8297 
8298 	/*
8299 	 * Check Block Device Characteristics VPD.
8300 	 */
8301 	sd_check_bdc_vpd(ssc);
8302 
8303 	/*
8304 	 * Check whether the drive is in emulation mode.
8305 	 */
8306 	sd_check_emulation_mode(ssc);
8307 
8308 	cmlb_alloc_handle(&un->un_cmlbhandle);
8309 
8310 #if defined(__x86)
8311 	/*
8312 	 * On x86, compensate for off-by-1 legacy error
8313 	 */
8314 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
8315 	    (lbasize == un->un_sys_blocksize))
8316 		offbyone = CMLB_OFF_BY_ONE;
8317 #endif
8318 
8319 	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
8320 	    VOID2BOOLEAN(un->un_f_has_removable_media != 0),
8321 	    VOID2BOOLEAN(un->un_f_is_hotpluggable != 0),
8322 	    un->un_node_type, offbyone, un->un_cmlbhandle,
8323 	    (void *)SD_PATH_DIRECT) != 0) {
8324 		goto cmlb_attach_failed;
8325 	}
8326 
8327 
8328 	/*
8329 	 * Read and validate the device's geometry (ie, disk label)
8330 	 * A new unformatted drive will not have a valid geometry, but
8331 	 * the driver needs to successfully attach to this device so
8332 	 * the drive can be formatted via ioctls.
8333 	 */
8334 	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
8335 	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
8336 
8337 	mutex_enter(SD_MUTEX(un));
8338 
8339 	/*
8340 	 * Read and initialize the devid for the unit.
8341 	 */
8342 	if (un->un_f_devid_supported) {
8343 		sd_register_devid(ssc, devi, reservation_flag);
8344 	}
8345 	mutex_exit(SD_MUTEX(un));
8346 
8347 #if (defined(__fibre))
8348 	/*
8349 	 * Register callbacks for fibre only.  You can't do this solely
8350 	 * on the basis of the devid_type because this is hba specific.
8351 	 * We need to query our hba capabilities to find out whether to
8352 	 * register or not.
8353 	 */
8354 	if (un->un_f_is_fibre) {
8355 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8356 			sd_init_event_callbacks(un);
8357 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8358 			    "sd_unit_attach: un:0x%p event callbacks inserted",
8359 			    un);
8360 		}
8361 	}
8362 #endif
8363 
8364 	if (un->un_f_opt_disable_cache == TRUE) {
8365 		/*
8366 		 * Disable both read cache and write cache.  This is
8367 		 * the historic behavior of the keywords in the config file.
8368 		 */
8369 		if (sd_cache_control(ssc, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8370 		    0) {
8371 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8372 			    "sd_unit_attach: un:0x%p Could not disable "
8373 			    "caching", un);
8374 			goto devid_failed;
8375 		}
8376 	}
8377 
8378 	/*
8379 	 * Check the value of the WCE bit and if it's allowed to be changed,
8380 	 * set un_f_write_cache_enabled and un_f_cache_mode_changeable
8381 	 * accordingly.
8382 	 */
8383 	(void) sd_get_write_cache_enabled(ssc, &wc_enabled);
8384 	sd_get_write_cache_changeable(ssc, &wc_changeable);
8385 	mutex_enter(SD_MUTEX(un));
8386 	un->un_f_write_cache_enabled = (wc_enabled != 0);
8387 	un->un_f_cache_mode_changeable = (wc_changeable != 0);
8388 	mutex_exit(SD_MUTEX(un));
8389 
8390 	if ((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR &&
8391 	    un->un_tgt_blocksize != DEV_BSIZE) ||
8392 	    un->un_f_enable_rmw) {
8393 		if (!(un->un_wm_cache)) {
8394 			(void) snprintf(name_str, sizeof (name_str),
8395 			    "%s%d_cache",
8396 			    ddi_driver_name(SD_DEVINFO(un)),
8397 			    ddi_get_instance(SD_DEVINFO(un)));
8398 			un->un_wm_cache = kmem_cache_create(
8399 			    name_str, sizeof (struct sd_w_map),
8400 			    8, sd_wm_cache_constructor,
8401 			    sd_wm_cache_destructor, NULL,
8402 			    (void *)un, NULL, 0);
8403 			if (!(un->un_wm_cache)) {
8404 				goto wm_cache_failed;
8405 			}
8406 		}
8407 	}
8408 
8409 	/*
8410 	 * Check the value of the NV_SUP bit and set
8411 	 * un_f_suppress_cache_flush accordingly.
8412 	 */
8413 	sd_get_nv_sup(ssc);
8414 
8415 	/*
8416 	 * Find out what type of reservation this disk supports.
8417 	 */
8418 	status = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS, 0, NULL);
8419 
8420 	switch (status) {
8421 	case 0:
8422 		/*
8423 		 * SCSI-3 reservations are supported.
8424 		 */
8425 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8426 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8427 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8428 		break;
8429 	case ENOTSUP:
8430 		/*
8431 		 * The PERSISTENT RESERVE IN command would not be recognized by
8432 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8433 		 */
8434 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8435 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8436 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8437 
8438 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8439 		break;
8440 	default:
8441 		/*
8442 		 * default to SCSI-3 reservations
8443 		 */
8444 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8445 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8446 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8447 
8448 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8449 		break;
8450 	}
8451 
8452 	/*
8453 	 * Set the pstat and error stat values here, so data obtained during the
8454 	 * previous attach-time routines is available.
8455 	 *
8456 	 * Note: This is a critical sequence that needs to be maintained:
8457 	 *	1) Instantiate the kstats before any routines using the iopath
8458 	 *	   (i.e. sd_send_scsi_cmd).
8459 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8460 	 *	   stats (sd_set_pstats)here, following
8461 	 *	   cmlb_validate_geometry(), sd_register_devid(), and
8462 	 *	   sd_cache_control().
8463 	 */
8464 
8465 	if (un->un_f_pkstats_enabled && geom_label_valid) {
8466 		sd_set_pstats(un);
8467 		SD_TRACE(SD_LOG_IO_PARTITION, un,
8468 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8469 	}
8470 
8471 	sd_set_errstats(un);
8472 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8473 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8474 
8475 	sd_setup_blk_limits(ssc);
8476 
8477 	/*
8478 	 * After successfully attaching an instance, we record the information
8479 	 * of how many luns have been attached on the relative target and
8480 	 * controller for parallel SCSI. This information is used when sd tries
8481 	 * to set the tagged queuing capability in HBA.
8482 	 */
8483 	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
8484 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
8485 	}
8486 
8487 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8488 	    "sd_unit_attach: un:0x%p exit success\n", un);
8489 
8490 	/* Uninitialize sd_ssc_t pointer */
8491 	sd_ssc_fini(ssc);
8492 
8493 	return (DDI_SUCCESS);
8494 
8495 	/*
8496 	 * An error occurred during the attach; clean up & return failure.
8497 	 */
8498 wm_cache_failed:
8499 devid_failed:
8500 	ddi_remove_minor_node(devi, NULL);
8501 
8502 cmlb_attach_failed:
8503 	/*
8504 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8505 	 */
8506 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8507 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8508 
8509 	/*
8510 	 * Refer to the comments of setting tagged-qing in the beginning of
8511 	 * sd_unit_attach. We can only disable tagged queuing when there is
8512 	 * no lun attached on the target.
8513 	 */
8514 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
8515 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8516 	}
8517 
8518 	if (un->un_f_is_fibre == FALSE) {
8519 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8520 	}
8521 
8522 spinup_failed:
8523 
8524 	/* Uninitialize sd_ssc_t pointer */
8525 	sd_ssc_fini(ssc);
8526 
8527 	mutex_enter(SD_MUTEX(un));
8528 
8529 	/* Deallocate SCSI FMA memory spaces */
8530 	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
8531 
8532 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8533 	if (un->un_direct_priority_timeid != NULL) {
8534 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8535 		un->un_direct_priority_timeid = NULL;
8536 		mutex_exit(SD_MUTEX(un));
8537 		(void) untimeout(temp_id);
8538 		mutex_enter(SD_MUTEX(un));
8539 	}
8540 
8541 	/* Cancel any pending start/stop timeouts */
8542 	if (un->un_startstop_timeid != NULL) {
8543 		timeout_id_t temp_id = un->un_startstop_timeid;
8544 		un->un_startstop_timeid = NULL;
8545 		mutex_exit(SD_MUTEX(un));
8546 		(void) untimeout(temp_id);
8547 		mutex_enter(SD_MUTEX(un));
8548 	}
8549 
8550 	/* Cancel any pending reset-throttle timeouts */
8551 	if (un->un_reset_throttle_timeid != NULL) {
8552 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8553 		un->un_reset_throttle_timeid = NULL;
8554 		mutex_exit(SD_MUTEX(un));
8555 		(void) untimeout(temp_id);
8556 		mutex_enter(SD_MUTEX(un));
8557 	}
8558 
8559 	/* Cancel rmw warning message timeouts */
8560 	if (un->un_rmw_msg_timeid != NULL) {
8561 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
8562 		un->un_rmw_msg_timeid = NULL;
8563 		mutex_exit(SD_MUTEX(un));
8564 		(void) untimeout(temp_id);
8565 		mutex_enter(SD_MUTEX(un));
8566 	}
8567 
8568 	/* Cancel any pending retry timeouts */
8569 	if (un->un_retry_timeid != NULL) {
8570 		timeout_id_t temp_id = un->un_retry_timeid;
8571 		un->un_retry_timeid = NULL;
8572 		mutex_exit(SD_MUTEX(un));
8573 		(void) untimeout(temp_id);
8574 		mutex_enter(SD_MUTEX(un));
8575 	}
8576 
8577 	/* Cancel any pending delayed cv broadcast timeouts */
8578 	if (un->un_dcvb_timeid != NULL) {
8579 		timeout_id_t temp_id = un->un_dcvb_timeid;
8580 		un->un_dcvb_timeid = NULL;
8581 		mutex_exit(SD_MUTEX(un));
8582 		(void) untimeout(temp_id);
8583 		mutex_enter(SD_MUTEX(un));
8584 	}
8585 
8586 	mutex_exit(SD_MUTEX(un));
8587 
8588 	/* There should not be any in-progress I/O so ASSERT this check */
8589 	ASSERT(un->un_ncmds_in_transport == 0);
8590 	ASSERT(un->un_ncmds_in_driver == 0);
8591 
8592 	/* Do not free the softstate if the callback routine is active */
8593 	sd_sync_with_callback(un);
8594 
8595 	/*
8596 	 * Partition stats apparently are not used with removables. These would
8597 	 * not have been created during attach, so no need to clean them up...
8598 	 */
8599 	if (un->un_errstats != NULL) {
8600 		kstat_delete(un->un_errstats);
8601 		un->un_errstats = NULL;
8602 	}
8603 
8604 create_errstats_failed:
8605 
8606 	if (un->un_stats != NULL) {
8607 		kstat_delete(un->un_stats);
8608 		un->un_stats = NULL;
8609 	}
8610 
8611 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8612 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8613 
8614 	ddi_prop_remove_all(devi);
8615 	sema_destroy(&un->un_semoclose);
8616 	cv_destroy(&un->un_state_cv);
8617 
8618 	sd_free_rqs(un);
8619 
8620 alloc_rqs_failed:
8621 
8622 	devp->sd_private = NULL;
8623 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8624 
8625 	/*
8626 	 * Note: the man pages are unclear as to whether or not doing a
8627 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8628 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8629 	 * ddi_get_soft_state() fails.  The implication seems to be
8630 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8631 	 */
8632 #ifndef XPV_HVM_DRIVER
8633 	ddi_soft_state_free(sd_state, instance);
8634 #endif /* !XPV_HVM_DRIVER */
8635 
8636 probe_failed:
8637 	scsi_unprobe(devp);
8638 
8639 	return (DDI_FAILURE);
8640 }
8641 
8642 
8643 /*
8644  *    Function: sd_unit_detach
8645  *
8646  * Description: Performs DDI_DETACH processing for sddetach().
8647  *
8648  * Return Code: DDI_SUCCESS
8649  *		DDI_FAILURE
8650  *
8651  *     Context: Kernel thread context
8652  */
8653 
8654 static int
8655 sd_unit_detach(dev_info_t *devi)
8656 {
8657 	struct scsi_device	*devp;
8658 	struct sd_lun		*un;
8659 	int			i;
8660 	int			tgt;
8661 	dev_t			dev;
8662 	dev_info_t		*pdip = ddi_get_parent(devi);
8663 	int			instance = ddi_get_instance(devi);
8664 
8665 	mutex_enter(&sd_detach_mutex);
8666 
8667 	/*
8668 	 * Fail the detach for any of the following:
8669 	 *  - Unable to get the sd_lun struct for the instance
8670 	 *  - A layered driver has an outstanding open on the instance
8671 	 *  - Another thread is already detaching this instance
8672 	 *  - Another thread is currently performing an open
8673 	 */
8674 	devp = ddi_get_driver_private(devi);
8675 	if ((devp == NULL) ||
8676 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8677 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8678 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8679 		mutex_exit(&sd_detach_mutex);
8680 		return (DDI_FAILURE);
8681 	}
8682 
8683 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8684 
8685 	/*
8686 	 * Mark this instance as currently in a detach, to inhibit any
8687 	 * opens from a layered driver.
8688 	 */
8689 	un->un_detach_count++;
8690 	mutex_exit(&sd_detach_mutex);
8691 
8692 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8693 	    SCSI_ADDR_PROP_TARGET, -1);
8694 
8695 	dev = sd_make_device(SD_DEVINFO(un));
8696 
8697 #ifndef lint
8698 	_NOTE(COMPETING_THREADS_NOW);
8699 #endif
8700 
8701 	mutex_enter(SD_MUTEX(un));
8702 
8703 	/*
8704 	 * Fail the detach if there are any outstanding layered
8705 	 * opens on this device.
8706 	 */
8707 	for (i = 0; i < NDKMAP; i++) {
8708 		if (un->un_ocmap.lyropen[i] != 0) {
8709 			goto err_notclosed;
8710 		}
8711 	}
8712 
8713 	/*
8714 	 * Verify there are NO outstanding commands issued to this device.
8715 	 * ie, un_ncmds_in_transport == 0.
8716 	 * It's possible to have outstanding commands through the physio
8717 	 * code path, even though everything's closed.
8718 	 */
8719 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8720 	    (un->un_direct_priority_timeid != NULL) ||
8721 	    (un->un_state == SD_STATE_RWAIT)) {
8722 		mutex_exit(SD_MUTEX(un));
8723 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8724 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8725 		goto err_stillbusy;
8726 	}
8727 
8728 	/*
8729 	 * If we have the device reserved, release the reservation.
8730 	 */
8731 	if ((un->un_resvd_status & SD_RESERVE) &&
8732 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8733 		mutex_exit(SD_MUTEX(un));
8734 		/*
8735 		 * Note: sd_reserve_release sends a command to the device
8736 		 * via the sd_ioctlcmd() path, and can sleep.
8737 		 */
8738 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8739 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8740 			    "sd_dr_detach: Cannot release reservation \n");
8741 		}
8742 	} else {
8743 		mutex_exit(SD_MUTEX(un));
8744 	}
8745 
8746 	/*
8747 	 * Untimeout any reserve recover, throttle reset, restart unit
8748 	 * and delayed broadcast timeout threads. Protect the timeout pointer
8749 	 * from getting nulled by their callback functions.
8750 	 */
8751 	mutex_enter(SD_MUTEX(un));
8752 	if (un->un_resvd_timeid != NULL) {
8753 		timeout_id_t temp_id = un->un_resvd_timeid;
8754 		un->un_resvd_timeid = NULL;
8755 		mutex_exit(SD_MUTEX(un));
8756 		(void) untimeout(temp_id);
8757 		mutex_enter(SD_MUTEX(un));
8758 	}
8759 
8760 	if (un->un_reset_throttle_timeid != NULL) {
8761 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8762 		un->un_reset_throttle_timeid = NULL;
8763 		mutex_exit(SD_MUTEX(un));
8764 		(void) untimeout(temp_id);
8765 		mutex_enter(SD_MUTEX(un));
8766 	}
8767 
8768 	if (un->un_startstop_timeid != NULL) {
8769 		timeout_id_t temp_id = un->un_startstop_timeid;
8770 		un->un_startstop_timeid = NULL;
8771 		mutex_exit(SD_MUTEX(un));
8772 		(void) untimeout(temp_id);
8773 		mutex_enter(SD_MUTEX(un));
8774 	}
8775 
8776 	if (un->un_rmw_msg_timeid != NULL) {
8777 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
8778 		un->un_rmw_msg_timeid = NULL;
8779 		mutex_exit(SD_MUTEX(un));
8780 		(void) untimeout(temp_id);
8781 		mutex_enter(SD_MUTEX(un));
8782 	}
8783 
8784 	if (un->un_dcvb_timeid != NULL) {
8785 		timeout_id_t temp_id = un->un_dcvb_timeid;
8786 		un->un_dcvb_timeid = NULL;
8787 		mutex_exit(SD_MUTEX(un));
8788 		(void) untimeout(temp_id);
8789 	} else {
8790 		mutex_exit(SD_MUTEX(un));
8791 	}
8792 
8793 	/* Remove any pending reservation reclaim requests for this device */
8794 	sd_rmv_resv_reclaim_req(dev);
8795 
8796 	mutex_enter(SD_MUTEX(un));
8797 
8798 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8799 	if (un->un_direct_priority_timeid != NULL) {
8800 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8801 		un->un_direct_priority_timeid = NULL;
8802 		mutex_exit(SD_MUTEX(un));
8803 		(void) untimeout(temp_id);
8804 		mutex_enter(SD_MUTEX(un));
8805 	}
8806 
8807 	/* Cancel any active multi-host disk watch thread requests */
8808 	if (un->un_mhd_token != NULL) {
8809 		mutex_exit(SD_MUTEX(un));
8810 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8811 		if (scsi_watch_request_terminate(un->un_mhd_token,
8812 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8813 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8814 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8815 			/*
8816 			 * Note: We are returning here after having removed
8817 			 * some driver timeouts above. This is consistent with
8818 			 * the legacy implementation but perhaps the watch
8819 			 * terminate call should be made with the wait flag set.
8820 			 */
8821 			goto err_stillbusy;
8822 		}
8823 		mutex_enter(SD_MUTEX(un));
8824 		un->un_mhd_token = NULL;
8825 	}
8826 
8827 	if (un->un_swr_token != NULL) {
8828 		mutex_exit(SD_MUTEX(un));
8829 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8830 		if (scsi_watch_request_terminate(un->un_swr_token,
8831 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8832 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8833 			    "sd_dr_detach: Cannot cancel swr watch request\n");
8834 			/*
8835 			 * Note: We are returning here after having removed
8836 			 * some driver timeouts above. This is consistent with
8837 			 * the legacy implementation but perhaps the watch
8838 			 * terminate call should be made with the wait flag set.
8839 			 */
8840 			goto err_stillbusy;
8841 		}
8842 		mutex_enter(SD_MUTEX(un));
8843 		un->un_swr_token = NULL;
8844 	}
8845 
8846 	mutex_exit(SD_MUTEX(un));
8847 
8848 	/*
8849 	 * Clear any scsi_reset_notifies. We clear the reset notifies
8850 	 * if we have not registered one.
8851 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8852 	 */
8853 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8854 	    sd_mhd_reset_notify_cb, (caddr_t)un);
8855 
8856 	/*
8857 	 * protect the timeout pointers from getting nulled by
8858 	 * their callback functions during the cancellation process.
8859 	 * In such a scenario untimeout can be invoked with a null value.
8860 	 */
8861 	_NOTE(NO_COMPETING_THREADS_NOW);
8862 
8863 	mutex_enter(&un->un_pm_mutex);
8864 	if (un->un_pm_idle_timeid != NULL) {
8865 		timeout_id_t temp_id = un->un_pm_idle_timeid;
8866 		un->un_pm_idle_timeid = NULL;
8867 		mutex_exit(&un->un_pm_mutex);
8868 
8869 		/*
8870 		 * Timeout is active; cancel it.
8871 		 * Note that it'll never be active on a device
8872 		 * that does not support PM therefore we don't
8873 		 * have to check before calling pm_idle_component.
8874 		 */
8875 		(void) untimeout(temp_id);
8876 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8877 		mutex_enter(&un->un_pm_mutex);
8878 	}
8879 
8880 	/*
8881 	 * Check whether there is already a timeout scheduled for power
8882 	 * management. If yes then don't lower the power here, that's.
8883 	 * the timeout handler's job.
8884 	 */
8885 	if (un->un_pm_timeid != NULL) {
8886 		timeout_id_t temp_id = un->un_pm_timeid;
8887 		un->un_pm_timeid = NULL;
8888 		mutex_exit(&un->un_pm_mutex);
8889 		/*
8890 		 * Timeout is active; cancel it.
8891 		 * Note that it'll never be active on a device
8892 		 * that does not support PM therefore we don't
8893 		 * have to check before calling pm_idle_component.
8894 		 */
8895 		(void) untimeout(temp_id);
8896 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8897 
8898 	} else {
8899 		mutex_exit(&un->un_pm_mutex);
8900 		if ((un->un_f_pm_is_enabled == TRUE) &&
8901 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_PM_STATE_STOPPED(un))
8902 		    != DDI_SUCCESS)) {
8903 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8904 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8905 			/*
8906 			 * Fix for bug: 4297749, item # 13
8907 			 * The above test now includes a check to see if PM is
8908 			 * supported by this device before call
8909 			 * pm_lower_power().
8910 			 * Note, the following is not dead code. The call to
8911 			 * pm_lower_power above will generate a call back into
8912 			 * our sdpower routine which might result in a timeout
8913 			 * handler getting activated. Therefore the following
8914 			 * code is valid and necessary.
8915 			 */
8916 			mutex_enter(&un->un_pm_mutex);
8917 			if (un->un_pm_timeid != NULL) {
8918 				timeout_id_t temp_id = un->un_pm_timeid;
8919 				un->un_pm_timeid = NULL;
8920 				mutex_exit(&un->un_pm_mutex);
8921 				(void) untimeout(temp_id);
8922 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8923 			} else {
8924 				mutex_exit(&un->un_pm_mutex);
8925 			}
8926 		}
8927 	}
8928 
8929 	/*
8930 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8931 	 * Relocated here from above to be after the call to
8932 	 * pm_lower_power, which was getting errors.
8933 	 */
8934 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8935 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8936 
8937 	/*
8938 	 * Currently, tagged queuing is supported per target based by HBA.
8939 	 * Setting this per lun instance actually sets the capability of this
8940 	 * target in HBA, which affects those luns already attached on the
8941 	 * same target. So during detach, we can only disable this capability
8942 	 * only when this is the only lun left on this target. By doing
8943 	 * this, we assume a target has the same tagged queuing capability
8944 	 * for every lun. The condition can be removed when HBA is changed to
8945 	 * support per lun based tagged queuing capability.
8946 	 */
8947 	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
8948 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8949 	}
8950 
8951 	if (un->un_f_is_fibre == FALSE) {
8952 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8953 	}
8954 
8955 	/*
8956 	 * Remove any event callbacks, fibre only
8957 	 */
8958 	if (un->un_f_is_fibre == TRUE) {
8959 		if ((un->un_insert_event != NULL) &&
8960 		    (ddi_remove_event_handler(un->un_insert_cb_id) !=
8961 		    DDI_SUCCESS)) {
8962 			/*
8963 			 * Note: We are returning here after having done
8964 			 * substantial cleanup above. This is consistent
8965 			 * with the legacy implementation but this may not
8966 			 * be the right thing to do.
8967 			 */
8968 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8969 			    "sd_dr_detach: Cannot cancel insert event\n");
8970 			goto err_remove_event;
8971 		}
8972 		un->un_insert_event = NULL;
8973 
8974 		if ((un->un_remove_event != NULL) &&
8975 		    (ddi_remove_event_handler(un->un_remove_cb_id) !=
8976 		    DDI_SUCCESS)) {
8977 			/*
8978 			 * Note: We are returning here after having done
8979 			 * substantial cleanup above. This is consistent
8980 			 * with the legacy implementation but this may not
8981 			 * be the right thing to do.
8982 			 */
8983 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8984 			    "sd_dr_detach: Cannot cancel remove event\n");
8985 			goto err_remove_event;
8986 		}
8987 		un->un_remove_event = NULL;
8988 	}
8989 
8990 	/* Do not free the softstate if the callback routine is active */
8991 	sd_sync_with_callback(un);
8992 
8993 	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
8994 	cmlb_free_handle(&un->un_cmlbhandle);
8995 
8996 	/*
8997 	 * Hold the detach mutex here, to make sure that no other threads ever
8998 	 * can access a (partially) freed soft state structure.
8999 	 */
9000 	mutex_enter(&sd_detach_mutex);
9001 
9002 	/*
9003 	 * Clean up the soft state struct.
9004 	 * Cleanup is done in reverse order of allocs/inits.
9005 	 * At this point there should be no competing threads anymore.
9006 	 */
9007 
9008 	scsi_fm_fini(devp);
9009 
9010 	/*
9011 	 * Deallocate memory for SCSI FMA.
9012 	 */
9013 	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
9014 
9015 	/*
9016 	 * Unregister and free device id if it was not registered
9017 	 * by the transport.
9018 	 */
9019 	if (un->un_f_devid_transport_defined == FALSE)
9020 		ddi_devid_unregister(devi);
9021 
9022 	/*
9023 	 * free the devid structure if allocated before (by ddi_devid_init()
9024 	 * or ddi_devid_get()).
9025 	 */
9026 	if (un->un_devid) {
9027 		ddi_devid_free(un->un_devid);
9028 		un->un_devid = NULL;
9029 	}
9030 
9031 	/*
9032 	 * Destroy wmap cache if it exists.
9033 	 */
9034 	if (un->un_wm_cache != NULL) {
9035 		kmem_cache_destroy(un->un_wm_cache);
9036 		un->un_wm_cache = NULL;
9037 	}
9038 
9039 	/*
9040 	 * kstat cleanup is done in detach for all device types (4363169).
9041 	 * We do not want to fail detach if the device kstats are not deleted
9042 	 * since there is a confusion about the devo_refcnt for the device.
9043 	 * We just delete the kstats and let detach complete successfully.
9044 	 */
9045 	if (un->un_stats != NULL) {
9046 		kstat_delete(un->un_stats);
9047 		un->un_stats = NULL;
9048 	}
9049 	if (un->un_unmapstats != NULL) {
9050 		kstat_delete(un->un_unmapstats_ks);
9051 		un->un_unmapstats_ks = NULL;
9052 		un->un_unmapstats = NULL;
9053 	}
9054 	if (un->un_errstats != NULL) {
9055 		kstat_delete(un->un_errstats);
9056 		un->un_errstats = NULL;
9057 	}
9058 
9059 	/* Remove partition stats */
9060 	if (un->un_f_pkstats_enabled) {
9061 		for (i = 0; i < NSDMAP; i++) {
9062 			if (un->un_pstats[i] != NULL) {
9063 				kstat_delete(un->un_pstats[i]);
9064 				un->un_pstats[i] = NULL;
9065 			}
9066 		}
9067 	}
9068 
9069 	/* Remove xbuf registration */
9070 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9071 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9072 
9073 	/* Remove driver properties */
9074 	ddi_prop_remove_all(devi);
9075 
9076 	mutex_destroy(&un->un_pm_mutex);
9077 	cv_destroy(&un->un_pm_busy_cv);
9078 
9079 	cv_destroy(&un->un_wcc_cv);
9080 
9081 	/* Open/close semaphore */
9082 	sema_destroy(&un->un_semoclose);
9083 
9084 	/* Removable media condvar. */
9085 	cv_destroy(&un->un_state_cv);
9086 
9087 	/* Suspend/resume condvar. */
9088 	cv_destroy(&un->un_suspend_cv);
9089 	cv_destroy(&un->un_disk_busy_cv);
9090 
9091 	sd_free_rqs(un);
9092 
9093 	/* Free up soft state */
9094 	devp->sd_private = NULL;
9095 
9096 	bzero(un, sizeof (struct sd_lun));
9097 
9098 	ddi_soft_state_free(sd_state, instance);
9099 
9100 	mutex_exit(&sd_detach_mutex);
9101 
9102 	/* This frees up the INQUIRY data associated with the device. */
9103 	scsi_unprobe(devp);
9104 
9105 	/*
9106 	 * After successfully detaching an instance, we update the information
9107 	 * of how many luns have been attached in the relative target and
9108 	 * controller for parallel SCSI. This information is used when sd tries
9109 	 * to set the tagged queuing capability in HBA.
9110 	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
9111 	 * check if the device is parallel SCSI. However, we don't need to
9112 	 * check here because we've already checked during attach. No device
9113 	 * that is not parallel SCSI is in the chain.
9114 	 */
9115 	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
9116 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
9117 	}
9118 
9119 	return (DDI_SUCCESS);
9120 
9121 err_notclosed:
9122 	mutex_exit(SD_MUTEX(un));
9123 
9124 err_stillbusy:
9125 	_NOTE(NO_COMPETING_THREADS_NOW);
9126 
9127 err_remove_event:
9128 	mutex_enter(&sd_detach_mutex);
9129 	un->un_detach_count--;
9130 	mutex_exit(&sd_detach_mutex);
9131 
9132 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9133 	return (DDI_FAILURE);
9134 }
9135 
9136 
9137 /*
9138  *    Function: sd_create_errstats
9139  *
9140  * Description: This routine instantiates the device error stats.
9141  *
9142  *		Note: During attach the stats are instantiated first so they are
9143  *		available for attach-time routines that utilize the driver
9144  *		iopath to send commands to the device. The stats are initialized
9145  *		separately so data obtained during some attach-time routines is
9146  *		available. (4362483)
9147  *
9148  *   Arguments: un - driver soft state (unit) structure
9149  *		instance - driver instance
9150  *
9151  *     Context: Kernel thread context
9152  */
9153 
9154 static void
9155 sd_create_errstats(struct sd_lun *un, int instance)
9156 {
9157 	struct	sd_errstats	*stp;
9158 	char	kstatmodule_err[KSTAT_STRLEN];
9159 	char	kstatname[KSTAT_STRLEN];
9160 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9161 
9162 	ASSERT(un != NULL);
9163 
9164 	if (un->un_errstats != NULL) {
9165 		return;
9166 	}
9167 
9168 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9169 	    "%serr", sd_label);
9170 	(void) snprintf(kstatname, sizeof (kstatname),
9171 	    "%s%d,err", sd_label, instance);
9172 
9173 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9174 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9175 
9176 	if (un->un_errstats == NULL) {
9177 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9178 		    "sd_create_errstats: Failed kstat_create\n");
9179 		return;
9180 	}
9181 
9182 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9183 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9184 	    KSTAT_DATA_UINT32);
9185 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9186 	    KSTAT_DATA_UINT32);
9187 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9188 	    KSTAT_DATA_UINT32);
9189 	kstat_named_init(&stp->sd_vid,		"Vendor",
9190 	    KSTAT_DATA_CHAR);
9191 	kstat_named_init(&stp->sd_pid,		"Product",
9192 	    KSTAT_DATA_CHAR);
9193 	kstat_named_init(&stp->sd_revision,	"Revision",
9194 	    KSTAT_DATA_CHAR);
9195 	kstat_named_init(&stp->sd_serial,	"Serial No",
9196 	    KSTAT_DATA_CHAR);
9197 	kstat_named_init(&stp->sd_capacity,	"Size",
9198 	    KSTAT_DATA_ULONGLONG);
9199 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9200 	    KSTAT_DATA_UINT32);
9201 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9202 	    KSTAT_DATA_UINT32);
9203 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9204 	    KSTAT_DATA_UINT32);
9205 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9206 	    KSTAT_DATA_UINT32);
9207 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9208 	    KSTAT_DATA_UINT32);
9209 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9210 	    KSTAT_DATA_UINT32);
9211 
9212 	un->un_errstats->ks_private = un;
9213 	un->un_errstats->ks_update  = nulldev;
9214 
9215 	kstat_install(un->un_errstats);
9216 }
9217 
9218 
9219 /*
9220  *    Function: sd_set_errstats
9221  *
9222  * Description: This routine sets the value of the vendor id, product id,
9223  *		revision, serial number, and capacity device error stats.
9224  *
9225  *		Note: During attach the stats are instantiated first so they are
9226  *		available for attach-time routines that utilize the driver
9227  *		iopath to send commands to the device. The stats are initialized
9228  *		separately so data obtained during some attach-time routines is
9229  *		available. (4362483)
9230  *
9231  *   Arguments: un - driver soft state (unit) structure
9232  *
9233  *     Context: Kernel thread context
9234  */
9235 
9236 static void
9237 sd_set_errstats(struct sd_lun *un)
9238 {
9239 	struct	sd_errstats	*stp;
9240 	char			*sn;
9241 
9242 	ASSERT(un != NULL);
9243 	ASSERT(un->un_errstats != NULL);
9244 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9245 	ASSERT(stp != NULL);
9246 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9247 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9248 	(void) strncpy(stp->sd_revision.value.c,
9249 	    un->un_sd->sd_inq->inq_revision, 4);
9250 
9251 	/*
9252 	 * All the errstats are persistent across detach/attach,
9253 	 * so reset all the errstats here in case of the hot
9254 	 * replacement of disk drives, except for not changed
9255 	 * Sun qualified drives.
9256 	 */
9257 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
9258 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9259 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
9260 		stp->sd_softerrs.value.ui32 = 0;
9261 		stp->sd_harderrs.value.ui32 = 0;
9262 		stp->sd_transerrs.value.ui32 = 0;
9263 		stp->sd_rq_media_err.value.ui32 = 0;
9264 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
9265 		stp->sd_rq_nodev_err.value.ui32 = 0;
9266 		stp->sd_rq_recov_err.value.ui32 = 0;
9267 		stp->sd_rq_illrq_err.value.ui32 = 0;
9268 		stp->sd_rq_pfa_err.value.ui32 = 0;
9269 	}
9270 
9271 	/*
9272 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9273 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9274 	 * (4376302))
9275 	 */
9276 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9277 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9278 		    sizeof (SD_INQUIRY(un)->inq_serial));
9279 	} else {
9280 		/*
9281 		 * Set the "Serial No" kstat for non-Sun qualified drives
9282 		 */
9283 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, SD_DEVINFO(un),
9284 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
9285 		    INQUIRY_SERIAL_NO, &sn) == DDI_SUCCESS) {
9286 			(void) strlcpy(stp->sd_serial.value.c, sn,
9287 			    sizeof (stp->sd_serial.value.c));
9288 			ddi_prop_free(sn);
9289 		}
9290 	}
9291 
9292 	if (un->un_f_blockcount_is_valid != TRUE) {
9293 		/*
9294 		 * Set capacity error stat to 0 for no media. This ensures
9295 		 * a valid capacity is displayed in response to 'iostat -E'
9296 		 * when no media is present in the device.
9297 		 */
9298 		stp->sd_capacity.value.ui64 = 0;
9299 	} else {
9300 		/*
9301 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9302 		 * capacity.
9303 		 *
9304 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9305 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9306 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9307 		 */
9308 		stp->sd_capacity.value.ui64 = (uint64_t)
9309 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9310 	}
9311 }
9312 
9313 
9314 /*
9315  *    Function: sd_set_pstats
9316  *
9317  * Description: This routine instantiates and initializes the partition
9318  *              stats for each partition with more than zero blocks.
9319  *		(4363169)
9320  *
9321  *   Arguments: un - driver soft state (unit) structure
9322  *
9323  *     Context: Kernel thread context
9324  */
9325 
9326 static void
9327 sd_set_pstats(struct sd_lun *un)
9328 {
9329 	char	kstatname[KSTAT_STRLEN];
9330 	int	instance;
9331 	int	i;
9332 	diskaddr_t	nblks = 0;
9333 	char	*partname = NULL;
9334 
9335 	ASSERT(un != NULL);
9336 
9337 	instance = ddi_get_instance(SD_DEVINFO(un));
9338 
9339 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9340 	for (i = 0; i < NSDMAP; i++) {
9341 
9342 		if (cmlb_partinfo(un->un_cmlbhandle, i,
9343 		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
9344 			continue;
9345 		mutex_enter(SD_MUTEX(un));
9346 
9347 		if ((un->un_pstats[i] == NULL) &&
9348 		    (nblks != 0)) {
9349 
9350 			(void) snprintf(kstatname, sizeof (kstatname),
9351 			    "%s%d,%s", sd_label, instance,
9352 			    partname);
9353 
9354 			un->un_pstats[i] = kstat_create(sd_label,
9355 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9356 			    1, KSTAT_FLAG_PERSISTENT);
9357 			if (un->un_pstats[i] != NULL) {
9358 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9359 				kstat_install(un->un_pstats[i]);
9360 			}
9361 		}
9362 		mutex_exit(SD_MUTEX(un));
9363 	}
9364 }
9365 
9366 
9367 #if (defined(__fibre))
9368 /*
9369  *    Function: sd_init_event_callbacks
9370  *
9371  * Description: This routine initializes the insertion and removal event
9372  *		callbacks. (fibre only)
9373  *
9374  *   Arguments: un - driver soft state (unit) structure
9375  *
9376  *     Context: Kernel thread context
9377  */
9378 
9379 static void
9380 sd_init_event_callbacks(struct sd_lun *un)
9381 {
9382 	ASSERT(un != NULL);
9383 
9384 	if ((un->un_insert_event == NULL) &&
9385 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9386 	    &un->un_insert_event) == DDI_SUCCESS)) {
9387 		/*
9388 		 * Add the callback for an insertion event
9389 		 */
9390 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9391 		    un->un_insert_event, sd_event_callback, (void *)un,
9392 		    &(un->un_insert_cb_id));
9393 	}
9394 
9395 	if ((un->un_remove_event == NULL) &&
9396 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9397 	    &un->un_remove_event) == DDI_SUCCESS)) {
9398 		/*
9399 		 * Add the callback for a removal event
9400 		 */
9401 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9402 		    un->un_remove_event, sd_event_callback, (void *)un,
9403 		    &(un->un_remove_cb_id));
9404 	}
9405 }
9406 
9407 
9408 /*
9409  *    Function: sd_event_callback
9410  *
9411  * Description: This routine handles insert/remove events (photon). The
9412  *		state is changed to OFFLINE which can be used to supress
9413  *		error msgs. (fibre only)
9414  *
9415  *   Arguments: un - driver soft state (unit) structure
9416  *
9417  *     Context: Callout thread context
9418  */
9419 /* ARGSUSED */
9420 static void
9421 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9422     void *bus_impldata)
9423 {
9424 	struct sd_lun *un = (struct sd_lun *)arg;
9425 
9426 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9427 	if (event == un->un_insert_event) {
9428 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9429 		mutex_enter(SD_MUTEX(un));
9430 		if (un->un_state == SD_STATE_OFFLINE) {
9431 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9432 				un->un_state = un->un_last_state;
9433 			} else {
9434 				/*
9435 				 * We have gone through SUSPEND/RESUME while
9436 				 * we were offline. Restore the last state
9437 				 */
9438 				un->un_state = un->un_save_state;
9439 			}
9440 		}
9441 		mutex_exit(SD_MUTEX(un));
9442 
9443 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9444 	} else if (event == un->un_remove_event) {
9445 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9446 		mutex_enter(SD_MUTEX(un));
9447 		/*
9448 		 * We need to handle an event callback that occurs during
9449 		 * the suspend operation, since we don't prevent it.
9450 		 */
9451 		if (un->un_state != SD_STATE_OFFLINE) {
9452 			if (un->un_state != SD_STATE_SUSPENDED) {
9453 				New_state(un, SD_STATE_OFFLINE);
9454 			} else {
9455 				un->un_last_state = SD_STATE_OFFLINE;
9456 			}
9457 		}
9458 		mutex_exit(SD_MUTEX(un));
9459 	} else {
9460 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9461 		    "!Unknown event\n");
9462 	}
9463 
9464 }
9465 #endif
9466 
9467 /*
9468  * Values related to caching mode page depending on whether the unit is ATAPI.
9469  */
9470 #define	SDC_CDB_GROUP(un) ((un->un_f_cfg_is_atapi == TRUE) ? \
9471 	CDB_GROUP1 : CDB_GROUP0)
9472 #define	SDC_HDRLEN(un) ((un->un_f_cfg_is_atapi == TRUE) ? \
9473 	MODE_HEADER_LENGTH_GRP2 : MODE_HEADER_LENGTH)
9474 /*
9475  * Use mode_cache_scsi3 to ensure we get all of the mode sense data, otherwise
9476  * the mode select will fail (mode_cache_scsi3 is a superset of mode_caching).
9477  */
9478 #define	SDC_BUFLEN(un) (SDC_HDRLEN(un) + MODE_BLK_DESC_LENGTH + \
9479 	sizeof (struct mode_cache_scsi3))
9480 
9481 static int
9482 sd_get_caching_mode_page(sd_ssc_t *ssc, uchar_t page_control, uchar_t **header,
9483     int *bdlen)
9484 {
9485 	struct sd_lun	*un = ssc->ssc_un;
9486 	struct mode_caching *mode_caching_page;
9487 	size_t		buflen = SDC_BUFLEN(un);
9488 	int		hdrlen = SDC_HDRLEN(un);
9489 	int		rval;
9490 
9491 	/*
9492 	 * Do a test unit ready, otherwise a mode sense may not work if this
9493 	 * is the first command sent to the device after boot.
9494 	 */
9495 	if (sd_send_scsi_TEST_UNIT_READY(ssc, 0) != 0)
9496 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9497 
9498 	/*
9499 	 * Allocate memory for the retrieved mode page and its headers.  Set
9500 	 * a pointer to the page itself.
9501 	 */
9502 	*header = kmem_zalloc(buflen, KM_SLEEP);
9503 
9504 	/* Get the information from the device */
9505 	rval = sd_send_scsi_MODE_SENSE(ssc, SDC_CDB_GROUP(un), *header, buflen,
9506 	    page_control | MODEPAGE_CACHING, SD_PATH_DIRECT);
9507 	if (rval != 0) {
9508 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un, "%s: Mode Sense Failed\n",
9509 		    __func__);
9510 		goto mode_sense_failed;
9511 	}
9512 
9513 	/*
9514 	 * Determine size of Block Descriptors in order to locate
9515 	 * the mode page data. ATAPI devices return 0, SCSI devices
9516 	 * should return MODE_BLK_DESC_LENGTH.
9517 	 */
9518 	if (un->un_f_cfg_is_atapi == TRUE) {
9519 		struct mode_header_grp2 *mhp =
9520 		    (struct mode_header_grp2 *)(*header);
9521 		*bdlen = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9522 	} else {
9523 		*bdlen = ((struct mode_header *)(*header))->bdesc_length;
9524 	}
9525 
9526 	if (*bdlen > MODE_BLK_DESC_LENGTH) {
9527 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, 0,
9528 		    "%s: Mode Sense returned invalid block descriptor length\n",
9529 		    __func__);
9530 		rval = EIO;
9531 		goto mode_sense_failed;
9532 	}
9533 
9534 	mode_caching_page = (struct mode_caching *)(*header + hdrlen + *bdlen);
9535 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9536 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
9537 		    "%s: Mode Sense caching page code mismatch %d\n",
9538 		    __func__, mode_caching_page->mode_page.code);
9539 		rval = EIO;
9540 	}
9541 
9542 mode_sense_failed:
9543 	if (rval != 0) {
9544 		kmem_free(*header, buflen);
9545 		*header = NULL;
9546 		*bdlen = 0;
9547 	}
9548 	return (rval);
9549 }
9550 
9551 /*
9552  *    Function: sd_cache_control()
9553  *
9554  * Description: This routine is the driver entry point for setting
9555  *		read and write caching by modifying the WCE (write cache
9556  *		enable) and RCD (read cache disable) bits of mode
9557  *		page 8 (MODEPAGE_CACHING).
9558  *
9559  *   Arguments: ssc		- ssc contains pointer to driver soft state
9560  *				  (unit) structure for this target.
9561  *		rcd_flag	- flag for controlling the read cache
9562  *		wce_flag	- flag for controlling the write cache
9563  *
9564  * Return Code: EIO
9565  *		code returned by sd_send_scsi_MODE_SENSE and
9566  *		sd_send_scsi_MODE_SELECT
9567  *
9568  *     Context: Kernel Thread
9569  */
9570 
9571 static int
9572 sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag)
9573 {
9574 	struct sd_lun	*un = ssc->ssc_un;
9575 	struct mode_caching *mode_caching_page;
9576 	uchar_t		*header;
9577 	size_t		buflen = SDC_BUFLEN(un);
9578 	int		hdrlen = SDC_HDRLEN(un);
9579 	int		bdlen;
9580 	int		rval;
9581 
9582 	rval = sd_get_caching_mode_page(ssc, MODEPAGE_CURRENT, &header, &bdlen);
9583 	switch (rval) {
9584 	case 0:
9585 		/* Check the relevant bits on successful mode sense */
9586 		mode_caching_page = (struct mode_caching *)(header + hdrlen +
9587 		    bdlen);
9588 		if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9589 		    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9590 		    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9591 		    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9592 			size_t sbuflen;
9593 			uchar_t save_pg;
9594 
9595 			/*
9596 			 * Construct select buffer length based on the
9597 			 * length of the sense data returned.
9598 			 */
9599 			sbuflen = hdrlen + bdlen + sizeof (struct mode_page) +
9600 			    (int)mode_caching_page->mode_page.length;
9601 
9602 			/* Set the caching bits as requested */
9603 			if (rcd_flag == SD_CACHE_ENABLE)
9604 				mode_caching_page->rcd = 0;
9605 			else if (rcd_flag == SD_CACHE_DISABLE)
9606 				mode_caching_page->rcd = 1;
9607 
9608 			if (wce_flag == SD_CACHE_ENABLE)
9609 				mode_caching_page->wce = 1;
9610 			else if (wce_flag == SD_CACHE_DISABLE)
9611 				mode_caching_page->wce = 0;
9612 
9613 			/*
9614 			 * Save the page if the mode sense says the
9615 			 * drive supports it.
9616 			 */
9617 			save_pg = mode_caching_page->mode_page.ps ?
9618 			    SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9619 
9620 			/* Clear reserved bits before mode select */
9621 			mode_caching_page->mode_page.ps = 0;
9622 
9623 			/*
9624 			 * Clear out mode header for mode select.
9625 			 * The rest of the retrieved page will be reused.
9626 			 */
9627 			bzero(header, hdrlen);
9628 
9629 			if (un->un_f_cfg_is_atapi == TRUE) {
9630 				struct mode_header_grp2 *mhp =
9631 				    (struct mode_header_grp2 *)header;
9632 				mhp->bdesc_length_hi = bdlen >> 8;
9633 				mhp->bdesc_length_lo = (uchar_t)bdlen & 0xff;
9634 			} else {
9635 				((struct mode_header *)header)->bdesc_length =
9636 				    bdlen;
9637 			}
9638 
9639 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9640 
9641 			/* Issue mode select to change the cache settings */
9642 			rval = sd_send_scsi_MODE_SELECT(ssc, SDC_CDB_GROUP(un),
9643 			    header, sbuflen, save_pg, SD_PATH_DIRECT);
9644 		}
9645 		kmem_free(header, buflen);
9646 		break;
9647 	case EIO:
9648 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9649 		break;
9650 	default:
9651 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9652 		break;
9653 	}
9654 
9655 	return (rval);
9656 }
9657 
9658 
9659 /*
9660  *    Function: sd_get_write_cache_enabled()
9661  *
9662  * Description: This routine is the driver entry point for determining if write
9663  *		caching is enabled.  It examines the WCE (write cache enable)
9664  *		bits of mode page 8 (MODEPAGE_CACHING) with Page Control field
9665  *		bits set to MODEPAGE_CURRENT.
9666  *
9667  *   Arguments: ssc		- ssc contains pointer to driver soft state
9668  *				  (unit) structure for this target.
9669  *		is_enabled	- pointer to int where write cache enabled state
9670  *				  is returned (non-zero -> write cache enabled)
9671  *
9672  * Return Code: EIO
9673  *		code returned by sd_send_scsi_MODE_SENSE
9674  *
9675  *     Context: Kernel Thread
9676  *
9677  * NOTE: If ioctl is added to disable write cache, this sequence should
9678  * be followed so that no locking is required for accesses to
9679  * un->un_f_write_cache_enabled:
9680  *	do mode select to clear wce
9681  *	do synchronize cache to flush cache
9682  *	set un->un_f_write_cache_enabled = FALSE
9683  *
9684  * Conversely, an ioctl to enable the write cache should be done
9685  * in this order:
9686  *	set un->un_f_write_cache_enabled = TRUE
9687  *	do mode select to set wce
9688  */
9689 
9690 static int
9691 sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled)
9692 {
9693 	struct sd_lun	*un = ssc->ssc_un;
9694 	struct mode_caching *mode_caching_page;
9695 	uchar_t		*header;
9696 	size_t		buflen = SDC_BUFLEN(un);
9697 	int		hdrlen = SDC_HDRLEN(un);
9698 	int		bdlen;
9699 	int		rval;
9700 
9701 	/* In case of error, flag as enabled */
9702 	*is_enabled = TRUE;
9703 
9704 	rval = sd_get_caching_mode_page(ssc, MODEPAGE_CURRENT, &header, &bdlen);
9705 	switch (rval) {
9706 	case 0:
9707 		mode_caching_page = (struct mode_caching *)(header + hdrlen +
9708 		    bdlen);
9709 		*is_enabled = mode_caching_page->wce;
9710 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
9711 		kmem_free(header, buflen);
9712 		break;
9713 	case EIO: {
9714 		/*
9715 		 * Some disks do not support Mode Sense(6), we
9716 		 * should ignore this kind of error (sense key is
9717 		 * 0x5 - illegal request).
9718 		 */
9719 		uint8_t *sensep;
9720 		int senlen;
9721 
9722 		sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
9723 		senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
9724 		    ssc->ssc_uscsi_cmd->uscsi_rqresid);
9725 
9726 		if (senlen > 0 &&
9727 		    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
9728 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
9729 		} else {
9730 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9731 		}
9732 		break;
9733 	}
9734 	default:
9735 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9736 		break;
9737 	}
9738 
9739 	return (rval);
9740 }
9741 
9742 /*
9743  *    Function: sd_get_write_cache_changeable()
9744  *
9745  * Description: This routine is the driver entry point for determining if write
9746  *		caching is changeable.  It examines the WCE (write cache enable)
9747  *		bits of mode page 8 (MODEPAGE_CACHING) with Page Control field
9748  *		bits set to MODEPAGE_CHANGEABLE.
9749  *
9750  *   Arguments: ssc		- ssc contains pointer to driver soft state
9751  *				  (unit) structure for this target.
9752  *		is_changeable	- pointer to int where write cache changeable
9753  *				  state is returned (non-zero -> write cache
9754  *				  changeable)
9755  *
9756  *     Context: Kernel Thread
9757  */
9758 
9759 static void
9760 sd_get_write_cache_changeable(sd_ssc_t *ssc, int *is_changeable)
9761 {
9762 	struct sd_lun	*un = ssc->ssc_un;
9763 	struct mode_caching *mode_caching_page;
9764 	uchar_t		*header;
9765 	size_t		buflen = SDC_BUFLEN(un);
9766 	int		hdrlen = SDC_HDRLEN(un);
9767 	int		bdlen;
9768 	int		rval;
9769 
9770 	/* In case of error, flag as enabled */
9771 	*is_changeable = TRUE;
9772 
9773 	rval = sd_get_caching_mode_page(ssc, MODEPAGE_CHANGEABLE, &header,
9774 	    &bdlen);
9775 	switch (rval) {
9776 	case 0:
9777 		mode_caching_page = (struct mode_caching *)(header + hdrlen +
9778 		    bdlen);
9779 		*is_changeable = mode_caching_page->wce;
9780 		kmem_free(header, buflen);
9781 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
9782 		break;
9783 	case EIO:
9784 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9785 		break;
9786 	default:
9787 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9788 		break;
9789 	}
9790 }
9791 
9792 /*
9793  *    Function: sd_get_nv_sup()
9794  *
9795  * Description: This routine is the driver entry point for
9796  * determining whether non-volatile cache is supported. This
9797  * determination process works as follows:
9798  *
9799  * 1. sd first queries sd.conf on whether
9800  * suppress_cache_flush bit is set for this device.
9801  *
9802  * 2. if not there, then queries the internal disk table.
9803  *
9804  * 3. if either sd.conf or internal disk table specifies
9805  * cache flush be suppressed, we don't bother checking
9806  * NV_SUP bit.
9807  *
9808  * If SUPPRESS_CACHE_FLUSH bit is not set to 1, sd queries
9809  * the optional INQUIRY VPD page 0x86. If the device
9810  * supports VPD page 0x86, sd examines the NV_SUP
9811  * (non-volatile cache support) bit in the INQUIRY VPD page
9812  * 0x86:
9813  *   o If NV_SUP bit is set, sd assumes the device has a
9814  *   non-volatile cache and set the
9815  *   un_f_sync_nv_supported to TRUE.
9816  *   o Otherwise cache is not non-volatile,
9817  *   un_f_sync_nv_supported is set to FALSE.
9818  *
9819  * Arguments: un - driver soft state (unit) structure
9820  *
9821  * Return Code:
9822  *
9823  *     Context: Kernel Thread
9824  */
9825 
9826 static void
9827 sd_get_nv_sup(sd_ssc_t *ssc)
9828 {
9829 	int		rval		= 0;
9830 	uchar_t		*inq86		= NULL;
9831 	size_t		inq86_len	= MAX_INQUIRY_SIZE;
9832 	size_t		inq86_resid	= 0;
9833 	struct		dk_callback *dkc;
9834 	struct sd_lun	*un;
9835 
9836 	ASSERT(ssc != NULL);
9837 	un = ssc->ssc_un;
9838 	ASSERT(un != NULL);
9839 
9840 	mutex_enter(SD_MUTEX(un));
9841 
9842 	/*
9843 	 * Be conservative on the device's support of
9844 	 * SYNC_NV bit: un_f_sync_nv_supported is
9845 	 * initialized to be false.
9846 	 */
9847 	un->un_f_sync_nv_supported = FALSE;
9848 
9849 	/*
9850 	 * If either sd.conf or internal disk table
9851 	 * specifies cache flush be suppressed, then
9852 	 * we don't bother checking NV_SUP bit.
9853 	 */
9854 	if (un->un_f_suppress_cache_flush == TRUE) {
9855 		mutex_exit(SD_MUTEX(un));
9856 		return;
9857 	}
9858 
9859 	if (sd_check_vpd_page_support(ssc) == 0 &&
9860 	    un->un_vpd_page_mask & SD_VPD_EXTENDED_DATA_PG) {
9861 		mutex_exit(SD_MUTEX(un));
9862 		/* collect page 86 data if available */
9863 		inq86 = kmem_zalloc(inq86_len, KM_SLEEP);
9864 
9865 		rval = sd_send_scsi_INQUIRY(ssc, inq86, inq86_len,
9866 		    0x01, 0x86, &inq86_resid);
9867 
9868 		if (rval == 0 && (inq86_len - inq86_resid > 6)) {
9869 			SD_TRACE(SD_LOG_COMMON, un,
9870 			    "sd_get_nv_sup: \
9871 			    successfully get VPD page: %x \
9872 			    PAGE LENGTH: %x BYTE 6: %x\n",
9873 			    inq86[1], inq86[3], inq86[6]);
9874 
9875 			mutex_enter(SD_MUTEX(un));
9876 			/*
9877 			 * check the value of NV_SUP bit: only if the device
9878 			 * reports NV_SUP bit to be 1, the
9879 			 * un_f_sync_nv_supported bit will be set to true.
9880 			 */
9881 			if (inq86[6] & SD_VPD_NV_SUP) {
9882 				un->un_f_sync_nv_supported = TRUE;
9883 			}
9884 			mutex_exit(SD_MUTEX(un));
9885 		} else if (rval != 0) {
9886 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9887 		}
9888 
9889 		kmem_free(inq86, inq86_len);
9890 	} else {
9891 		mutex_exit(SD_MUTEX(un));
9892 	}
9893 
9894 	/*
9895 	 * Send a SYNC CACHE command to check whether
9896 	 * SYNC_NV bit is supported. This command should have
9897 	 * un_f_sync_nv_supported set to correct value.
9898 	 */
9899 	mutex_enter(SD_MUTEX(un));
9900 	if (un->un_f_sync_nv_supported) {
9901 		mutex_exit(SD_MUTEX(un));
9902 		dkc = kmem_zalloc(sizeof (struct dk_callback), KM_SLEEP);
9903 		dkc->dkc_flag = FLUSH_VOLATILE;
9904 		(void) sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
9905 
9906 		/*
9907 		 * Send a TEST UNIT READY command to the device. This should
9908 		 * clear any outstanding UNIT ATTENTION that may be present.
9909 		 */
9910 		rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
9911 		if (rval != 0)
9912 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9913 
9914 		kmem_free(dkc, sizeof (struct dk_callback));
9915 	} else {
9916 		mutex_exit(SD_MUTEX(un));
9917 	}
9918 
9919 	SD_TRACE(SD_LOG_COMMON, un, "sd_get_nv_sup: \
9920 	    un_f_suppress_cache_flush is set to %d\n",
9921 	    un->un_f_suppress_cache_flush);
9922 }
9923 
9924 /*
9925  *    Function: sd_make_device
9926  *
9927  * Description: Utility routine to return the Solaris device number from
9928  *		the data in the device's dev_info structure.
9929  *
9930  * Return Code: The Solaris device number
9931  *
9932  *     Context: Any
9933  */
9934 
9935 static dev_t
9936 sd_make_device(dev_info_t *devi)
9937 {
9938 	return (makedevice(ddi_driver_major(devi),
9939 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9940 }
9941 
9942 
9943 /*
9944  *    Function: sd_pm_entry
9945  *
9946  * Description: Called at the start of a new command to manage power
9947  *		and busy status of a device. This includes determining whether
9948  *		the current power state of the device is sufficient for
9949  *		performing the command or whether it must be changed.
9950  *		The PM framework is notified appropriately.
9951  *		Only with a return status of DDI_SUCCESS will the
9952  *		component be busy to the framework.
9953  *
9954  *		All callers of sd_pm_entry must check the return status
9955  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9956  *		of DDI_FAILURE indicates the device failed to power up.
9957  *		In this case un_pm_count has been adjusted so the result
9958  *		on exit is still powered down, ie. count is less than 0.
9959  *		Calling sd_pm_exit with this count value hits an ASSERT.
9960  *
9961  * Return Code: DDI_SUCCESS or DDI_FAILURE
9962  *
9963  *     Context: Kernel thread context.
9964  */
9965 
9966 static int
9967 sd_pm_entry(struct sd_lun *un)
9968 {
9969 	int return_status = DDI_SUCCESS;
9970 
9971 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9972 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9973 
9974 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9975 
9976 	if (un->un_f_pm_is_enabled == FALSE) {
9977 		SD_TRACE(SD_LOG_IO_PM, un,
9978 		    "sd_pm_entry: exiting, PM not enabled\n");
9979 		return (return_status);
9980 	}
9981 
9982 	/*
9983 	 * Just increment a counter if PM is enabled. On the transition from
9984 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9985 	 * the count with each IO and mark the device as idle when the count
9986 	 * hits 0.
9987 	 *
9988 	 * If the count is less than 0 the device is powered down. If a powered
9989 	 * down device is successfully powered up then the count must be
9990 	 * incremented to reflect the power up. Note that it'll get incremented
9991 	 * a second time to become busy.
9992 	 *
9993 	 * Because the following has the potential to change the device state
9994 	 * and must release the un_pm_mutex to do so, only one thread can be
9995 	 * allowed through at a time.
9996 	 */
9997 
9998 	mutex_enter(&un->un_pm_mutex);
9999 	while (un->un_pm_busy == TRUE) {
10000 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
10001 	}
10002 	un->un_pm_busy = TRUE;
10003 
10004 	if (un->un_pm_count < 1) {
10005 
10006 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
10007 
10008 		/*
10009 		 * Indicate we are now busy so the framework won't attempt to
10010 		 * power down the device. This call will only fail if either
10011 		 * we passed a bad component number or the device has no
10012 		 * components. Neither of these should ever happen.
10013 		 */
10014 		mutex_exit(&un->un_pm_mutex);
10015 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
10016 		ASSERT(return_status == DDI_SUCCESS);
10017 
10018 		mutex_enter(&un->un_pm_mutex);
10019 
10020 		if (un->un_pm_count < 0) {
10021 			mutex_exit(&un->un_pm_mutex);
10022 
10023 			SD_TRACE(SD_LOG_IO_PM, un,
10024 			    "sd_pm_entry: power up component\n");
10025 
10026 			/*
10027 			 * pm_raise_power will cause sdpower to be called
10028 			 * which brings the device power level to the
10029 			 * desired state, If successful, un_pm_count and
10030 			 * un_power_level will be updated appropriately.
10031 			 */
10032 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
10033 			    SD_PM_STATE_ACTIVE(un));
10034 
10035 			mutex_enter(&un->un_pm_mutex);
10036 
10037 			if (return_status != DDI_SUCCESS) {
10038 				/*
10039 				 * Power up failed.
10040 				 * Idle the device and adjust the count
10041 				 * so the result on exit is that we're
10042 				 * still powered down, ie. count is less than 0.
10043 				 */
10044 				SD_TRACE(SD_LOG_IO_PM, un,
10045 				    "sd_pm_entry: power up failed,"
10046 				    " idle the component\n");
10047 
10048 				(void) pm_idle_component(SD_DEVINFO(un), 0);
10049 				un->un_pm_count--;
10050 			} else {
10051 				/*
10052 				 * Device is powered up, verify the
10053 				 * count is non-negative.
10054 				 * This is debug only.
10055 				 */
10056 				ASSERT(un->un_pm_count == 0);
10057 			}
10058 		}
10059 
10060 		if (return_status == DDI_SUCCESS) {
10061 			/*
10062 			 * For performance, now that the device has been tagged
10063 			 * as busy, and it's known to be powered up, update the
10064 			 * chain types to use jump tables that do not include
10065 			 * pm. This significantly lowers the overhead and
10066 			 * therefore improves performance.
10067 			 */
10068 
10069 			mutex_exit(&un->un_pm_mutex);
10070 			mutex_enter(SD_MUTEX(un));
10071 			SD_TRACE(SD_LOG_IO_PM, un,
10072 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
10073 			    un->un_uscsi_chain_type);
10074 
10075 			if (un->un_f_non_devbsize_supported) {
10076 				un->un_buf_chain_type =
10077 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
10078 			} else {
10079 				un->un_buf_chain_type =
10080 				    SD_CHAIN_INFO_DISK_NO_PM;
10081 			}
10082 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
10083 
10084 			SD_TRACE(SD_LOG_IO_PM, un,
10085 			    "             changed  uscsi_chain_type to   %d\n",
10086 			    un->un_uscsi_chain_type);
10087 			mutex_exit(SD_MUTEX(un));
10088 			mutex_enter(&un->un_pm_mutex);
10089 
10090 			if (un->un_pm_idle_timeid == NULL) {
10091 				/* 300 ms. */
10092 				un->un_pm_idle_timeid =
10093 				    timeout(sd_pm_idletimeout_handler, un,
10094 				    (drv_usectohz((clock_t)300000)));
10095 				/*
10096 				 * Include an extra call to busy which keeps the
10097 				 * device busy with-respect-to the PM layer
10098 				 * until the timer fires, at which time it'll
10099 				 * get the extra idle call.
10100 				 */
10101 				(void) pm_busy_component(SD_DEVINFO(un), 0);
10102 			}
10103 		}
10104 	}
10105 	un->un_pm_busy = FALSE;
10106 	/* Next... */
10107 	cv_signal(&un->un_pm_busy_cv);
10108 
10109 	un->un_pm_count++;
10110 
10111 	SD_TRACE(SD_LOG_IO_PM, un,
10112 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
10113 
10114 	mutex_exit(&un->un_pm_mutex);
10115 
10116 	return (return_status);
10117 }
10118 
10119 
10120 /*
10121  *    Function: sd_pm_exit
10122  *
10123  * Description: Called at the completion of a command to manage busy
10124  *		status for the device. If the device becomes idle the
10125  *		PM framework is notified.
10126  *
10127  *     Context: Kernel thread context
10128  */
10129 
10130 static void
10131 sd_pm_exit(struct sd_lun *un)
10132 {
10133 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10134 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10135 
10136 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10137 
10138 	/*
10139 	 * After attach the following flag is only read, so don't
10140 	 * take the penalty of acquiring a mutex for it.
10141 	 */
10142 	if (un->un_f_pm_is_enabled == TRUE) {
10143 
10144 		mutex_enter(&un->un_pm_mutex);
10145 		un->un_pm_count--;
10146 
10147 		SD_TRACE(SD_LOG_IO_PM, un,
10148 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10149 
10150 		ASSERT(un->un_pm_count >= 0);
10151 		if (un->un_pm_count == 0) {
10152 			mutex_exit(&un->un_pm_mutex);
10153 
10154 			SD_TRACE(SD_LOG_IO_PM, un,
10155 			    "sd_pm_exit: idle component\n");
10156 
10157 			(void) pm_idle_component(SD_DEVINFO(un), 0);
10158 
10159 		} else {
10160 			mutex_exit(&un->un_pm_mutex);
10161 		}
10162 	}
10163 
10164 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10165 }
10166 
10167 
10168 /*
10169  *    Function: sdopen
10170  *
10171  * Description: Driver's open(9e) entry point function.
10172  *
10173  *   Arguments: dev_i   - pointer to device number
10174  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10175  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10176  *		cred_p  - user credential pointer
10177  *
10178  * Return Code: EINVAL
10179  *		ENXIO
10180  *		EIO
10181  *		EROFS
10182  *		EBUSY
10183  *
10184  *     Context: Kernel thread context
10185  */
10186 /* ARGSUSED */
10187 static int
10188 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10189 {
10190 	struct sd_lun	*un;
10191 	int		nodelay;
10192 	int		part;
10193 	uint64_t	partmask;
10194 	int		instance;
10195 	dev_t		dev;
10196 	int		rval = EIO;
10197 	diskaddr_t	nblks = 0;
10198 	diskaddr_t	label_cap;
10199 
10200 	/* Validate the open type */
10201 	if (otyp >= OTYPCNT) {
10202 		return (EINVAL);
10203 	}
10204 
10205 	dev = *dev_p;
10206 	instance = SDUNIT(dev);
10207 	mutex_enter(&sd_detach_mutex);
10208 
10209 	/*
10210 	 * Fail the open if there is no softstate for the instance, or
10211 	 * if another thread somewhere is trying to detach the instance.
10212 	 */
10213 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10214 	    (un->un_detach_count != 0)) {
10215 		mutex_exit(&sd_detach_mutex);
10216 		/*
10217 		 * The probe cache only needs to be cleared when open (9e) fails
10218 		 * with ENXIO (4238046).
10219 		 */
10220 		/*
10221 		 * un-conditionally clearing probe cache is ok with
10222 		 * separate sd/ssd binaries
10223 		 * x86 platform can be an issue with both parallel
10224 		 * and fibre in 1 binary
10225 		 */
10226 		sd_scsi_clear_probe_cache();
10227 		return (ENXIO);
10228 	}
10229 
10230 	/*
10231 	 * The un_layer_count is to prevent another thread in specfs from
10232 	 * trying to detach the instance, which can happen when we are
10233 	 * called from a higher-layer driver instead of thru specfs.
10234 	 * This will not be needed when DDI provides a layered driver
10235 	 * interface that allows specfs to know that an instance is in
10236 	 * use by a layered driver & should not be detached.
10237 	 *
10238 	 * Note: the semantics for layered driver opens are exactly one
10239 	 * close for every open.
10240 	 */
10241 	if (otyp == OTYP_LYR) {
10242 		un->un_layer_count++;
10243 	}
10244 
10245 	/*
10246 	 * Keep a count of the current # of opens in progress. This is because
10247 	 * some layered drivers try to call us as a regular open. This can
10248 	 * cause problems that we cannot prevent, however by keeping this count
10249 	 * we can at least keep our open and detach routines from racing against
10250 	 * each other under such conditions.
10251 	 */
10252 	un->un_opens_in_progress++;
10253 	mutex_exit(&sd_detach_mutex);
10254 
10255 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10256 	part	 = SDPART(dev);
10257 	partmask = 1 << part;
10258 
10259 	/*
10260 	 * We use a semaphore here in order to serialize
10261 	 * open and close requests on the device.
10262 	 */
10263 	sema_p(&un->un_semoclose);
10264 
10265 	mutex_enter(SD_MUTEX(un));
10266 
10267 	/*
10268 	 * All device accesses go thru sdstrategy() where we check
10269 	 * on suspend status but there could be a scsi_poll command,
10270 	 * which bypasses sdstrategy(), so we need to check pm
10271 	 * status.
10272 	 */
10273 
10274 	if (!nodelay) {
10275 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10276 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10277 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10278 		}
10279 
10280 		mutex_exit(SD_MUTEX(un));
10281 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10282 			rval = EIO;
10283 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10284 			    "sdopen: sd_pm_entry failed\n");
10285 			goto open_failed_with_pm;
10286 		}
10287 		mutex_enter(SD_MUTEX(un));
10288 	}
10289 
10290 	/* check for previous exclusive open */
10291 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10292 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10293 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10294 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10295 
10296 	if (un->un_exclopen & (partmask)) {
10297 		goto excl_open_fail;
10298 	}
10299 
10300 	if (flag & FEXCL) {
10301 		int i;
10302 		if (un->un_ocmap.lyropen[part]) {
10303 			goto excl_open_fail;
10304 		}
10305 		for (i = 0; i < (OTYPCNT - 1); i++) {
10306 			if (un->un_ocmap.regopen[i] & (partmask)) {
10307 				goto excl_open_fail;
10308 			}
10309 		}
10310 	}
10311 
10312 	/*
10313 	 * Check the write permission if this is a removable media device,
10314 	 * NDELAY has not been set, and writable permission is requested.
10315 	 *
10316 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10317 	 * attempt will fail with EIO as part of the I/O processing. This is a
10318 	 * more permissive implementation that allows the open to succeed and
10319 	 * WRITE attempts to fail when appropriate.
10320 	 */
10321 	if (un->un_f_chk_wp_open) {
10322 		if ((flag & FWRITE) && (!nodelay)) {
10323 			mutex_exit(SD_MUTEX(un));
10324 			/*
10325 			 * Defer the check for write permission on writable
10326 			 * DVD drive till sdstrategy and will not fail open even
10327 			 * if FWRITE is set as the device can be writable
10328 			 * depending upon the media and the media can change
10329 			 * after the call to open().
10330 			 */
10331 			if (un->un_f_dvdram_writable_device == FALSE) {
10332 				if (ISCD(un) || sr_check_wp(dev)) {
10333 				rval = EROFS;
10334 				mutex_enter(SD_MUTEX(un));
10335 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10336 				    "write to cd or write protected media\n");
10337 				goto open_fail;
10338 				}
10339 			}
10340 			mutex_enter(SD_MUTEX(un));
10341 		}
10342 	}
10343 
10344 	/*
10345 	 * If opening in NDELAY/NONBLOCK mode, just return.
10346 	 * Check if disk is ready and has a valid geometry later.
10347 	 */
10348 	if (!nodelay) {
10349 		sd_ssc_t	*ssc;
10350 
10351 		mutex_exit(SD_MUTEX(un));
10352 		ssc = sd_ssc_init(un);
10353 		rval = sd_ready_and_valid(ssc, part);
10354 		sd_ssc_fini(ssc);
10355 		mutex_enter(SD_MUTEX(un));
10356 		/*
10357 		 * Fail if device is not ready or if the number of disk
10358 		 * blocks is zero or negative for non CD devices.
10359 		 */
10360 
10361 		nblks = 0;
10362 
10363 		if (rval == SD_READY_VALID && (!ISCD(un))) {
10364 			/* if cmlb_partinfo fails, nblks remains 0 */
10365 			mutex_exit(SD_MUTEX(un));
10366 			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
10367 			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
10368 			mutex_enter(SD_MUTEX(un));
10369 		}
10370 
10371 		if ((rval != SD_READY_VALID) ||
10372 		    (!ISCD(un) && nblks <= 0)) {
10373 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10374 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10375 			    "device not ready or invalid disk block value\n");
10376 			goto open_fail;
10377 		}
10378 #if defined(__x86)
10379 	} else {
10380 		uchar_t *cp;
10381 		/*
10382 		 * x86 requires special nodelay handling, so that p0 is
10383 		 * always defined and accessible.
10384 		 * Invalidate geometry only if device is not already open.
10385 		 */
10386 		cp = &un->un_ocmap.chkd[0];
10387 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10388 			if (*cp != (uchar_t)0) {
10389 				break;
10390 			}
10391 			cp++;
10392 		}
10393 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10394 			mutex_exit(SD_MUTEX(un));
10395 			cmlb_invalidate(un->un_cmlbhandle,
10396 			    (void *)SD_PATH_DIRECT);
10397 			mutex_enter(SD_MUTEX(un));
10398 		}
10399 
10400 #endif
10401 	}
10402 
10403 	if (otyp == OTYP_LYR) {
10404 		un->un_ocmap.lyropen[part]++;
10405 	} else {
10406 		un->un_ocmap.regopen[otyp] |= partmask;
10407 	}
10408 
10409 	/* Set up open and exclusive open flags */
10410 	if (flag & FEXCL) {
10411 		un->un_exclopen |= (partmask);
10412 	}
10413 
10414 	/*
10415 	 * If the lun is EFI labeled and lun capacity is greater than the
10416 	 * capacity contained in the label, log a sys-event to notify the
10417 	 * interested module.
10418 	 * To avoid an infinite loop of logging sys-event, we only log the
10419 	 * event when the lun is not opened in NDELAY mode. The event handler
10420 	 * should open the lun in NDELAY mode.
10421 	 */
10422 	if (!nodelay) {
10423 		mutex_exit(SD_MUTEX(un));
10424 		if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
10425 		    (void*)SD_PATH_DIRECT) == 0) {
10426 			mutex_enter(SD_MUTEX(un));
10427 			if (un->un_f_blockcount_is_valid &&
10428 			    un->un_blockcount > label_cap &&
10429 			    un->un_f_expnevent == B_FALSE) {
10430 				un->un_f_expnevent = B_TRUE;
10431 				mutex_exit(SD_MUTEX(un));
10432 				sd_log_lun_expansion_event(un,
10433 				    (nodelay ? KM_NOSLEEP : KM_SLEEP));
10434 				mutex_enter(SD_MUTEX(un));
10435 			}
10436 		} else {
10437 			mutex_enter(SD_MUTEX(un));
10438 		}
10439 	}
10440 
10441 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10442 	    "open of part %d type %d\n", part, otyp);
10443 
10444 	mutex_exit(SD_MUTEX(un));
10445 	if (!nodelay) {
10446 		sd_pm_exit(un);
10447 	}
10448 
10449 	sema_v(&un->un_semoclose);
10450 
10451 	mutex_enter(&sd_detach_mutex);
10452 	un->un_opens_in_progress--;
10453 	mutex_exit(&sd_detach_mutex);
10454 
10455 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10456 	return (DDI_SUCCESS);
10457 
10458 excl_open_fail:
10459 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10460 	rval = EBUSY;
10461 
10462 open_fail:
10463 	mutex_exit(SD_MUTEX(un));
10464 
10465 	/*
10466 	 * On a failed open we must exit the pm management.
10467 	 */
10468 	if (!nodelay) {
10469 		sd_pm_exit(un);
10470 	}
10471 open_failed_with_pm:
10472 	sema_v(&un->un_semoclose);
10473 
10474 	mutex_enter(&sd_detach_mutex);
10475 	un->un_opens_in_progress--;
10476 	if (otyp == OTYP_LYR) {
10477 		un->un_layer_count--;
10478 	}
10479 	mutex_exit(&sd_detach_mutex);
10480 
10481 	return (rval);
10482 }
10483 
10484 
10485 /*
10486  *    Function: sdclose
10487  *
10488  * Description: Driver's close(9e) entry point function.
10489  *
10490  *   Arguments: dev    - device number
10491  *		flag   - file status flag, informational only
10492  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10493  *		cred_p - user credential pointer
10494  *
10495  * Return Code: ENXIO
10496  *
10497  *     Context: Kernel thread context
10498  */
10499 /* ARGSUSED */
10500 static int
10501 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10502 {
10503 	struct sd_lun	*un;
10504 	uchar_t		*cp;
10505 	int		part;
10506 	int		nodelay;
10507 	int		rval = 0;
10508 
10509 	/* Validate the open type */
10510 	if (otyp >= OTYPCNT) {
10511 		return (ENXIO);
10512 	}
10513 
10514 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10515 		return (ENXIO);
10516 	}
10517 
10518 	part = SDPART(dev);
10519 	nodelay = flag & (FNDELAY | FNONBLOCK);
10520 
10521 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10522 	    "sdclose: close of part %d type %d\n", part, otyp);
10523 
10524 	/*
10525 	 * We use a semaphore here in order to serialize
10526 	 * open and close requests on the device.
10527 	 */
10528 	sema_p(&un->un_semoclose);
10529 
10530 	mutex_enter(SD_MUTEX(un));
10531 
10532 	/* Don't proceed if power is being changed. */
10533 	while (un->un_state == SD_STATE_PM_CHANGING) {
10534 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10535 	}
10536 
10537 	if (un->un_exclopen & (1 << part)) {
10538 		un->un_exclopen &= ~(1 << part);
10539 	}
10540 
10541 	/* Update the open partition map */
10542 	if (otyp == OTYP_LYR) {
10543 		un->un_ocmap.lyropen[part] -= 1;
10544 	} else {
10545 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10546 	}
10547 
10548 	cp = &un->un_ocmap.chkd[0];
10549 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10550 		if (*cp != '\0') {
10551 			break;
10552 		}
10553 		cp++;
10554 	}
10555 
10556 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10557 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10558 
10559 		/*
10560 		 * We avoid persistance upon the last close, and set
10561 		 * the throttle back to the maximum.
10562 		 */
10563 		un->un_throttle = un->un_saved_throttle;
10564 
10565 		if (un->un_state == SD_STATE_OFFLINE) {
10566 			if (un->un_f_is_fibre == FALSE) {
10567 				scsi_log(SD_DEVINFO(un), sd_label,
10568 				    CE_WARN, "offline\n");
10569 			}
10570 			mutex_exit(SD_MUTEX(un));
10571 			cmlb_invalidate(un->un_cmlbhandle,
10572 			    (void *)SD_PATH_DIRECT);
10573 			mutex_enter(SD_MUTEX(un));
10574 
10575 		} else {
10576 			/*
10577 			 * Flush any outstanding writes in NVRAM cache.
10578 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10579 			 * cmd, it may not work for non-Pluto devices.
10580 			 * SYNCHRONIZE CACHE is not required for removables,
10581 			 * except DVD-RAM drives.
10582 			 *
10583 			 * Also note: because SYNCHRONIZE CACHE is currently
10584 			 * the only command issued here that requires the
10585 			 * drive be powered up, only do the power up before
10586 			 * sending the Sync Cache command. If additional
10587 			 * commands are added which require a powered up
10588 			 * drive, the following sequence may have to change.
10589 			 *
10590 			 * And finally, note that parallel SCSI on SPARC
10591 			 * only issues a Sync Cache to DVD-RAM, a newly
10592 			 * supported device.
10593 			 */
10594 #if defined(__x86)
10595 			if ((un->un_f_sync_cache_supported &&
10596 			    un->un_f_sync_cache_required) ||
10597 			    un->un_f_dvdram_writable_device == TRUE) {
10598 #else
10599 			if (un->un_f_dvdram_writable_device == TRUE) {
10600 #endif
10601 				mutex_exit(SD_MUTEX(un));
10602 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10603 					rval =
10604 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10605 					    NULL);
10606 					/* ignore error if not supported */
10607 					if (rval == ENOTSUP) {
10608 						rval = 0;
10609 					} else if (rval != 0) {
10610 						rval = EIO;
10611 					}
10612 					sd_pm_exit(un);
10613 				} else {
10614 					rval = EIO;
10615 				}
10616 				mutex_enter(SD_MUTEX(un));
10617 			}
10618 
10619 			/*
10620 			 * For devices which supports DOOR_LOCK, send an ALLOW
10621 			 * MEDIA REMOVAL command, but don't get upset if it
10622 			 * fails. We need to raise the power of the drive before
10623 			 * we can call sd_send_scsi_DOORLOCK()
10624 			 */
10625 			if (un->un_f_doorlock_supported) {
10626 				mutex_exit(SD_MUTEX(un));
10627 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10628 					sd_ssc_t	*ssc;
10629 
10630 					ssc = sd_ssc_init(un);
10631 					rval = sd_send_scsi_DOORLOCK(ssc,
10632 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10633 					if (rval != 0)
10634 						sd_ssc_assessment(ssc,
10635 						    SD_FMT_IGNORE);
10636 					sd_ssc_fini(ssc);
10637 
10638 					sd_pm_exit(un);
10639 					if (ISCD(un) && (rval != 0) &&
10640 					    (nodelay != 0)) {
10641 						rval = ENXIO;
10642 					}
10643 				} else {
10644 					rval = EIO;
10645 				}
10646 				mutex_enter(SD_MUTEX(un));
10647 			}
10648 
10649 			/*
10650 			 * If a device has removable media, invalidate all
10651 			 * parameters related to media, such as geometry,
10652 			 * blocksize, and blockcount.
10653 			 */
10654 			if (un->un_f_has_removable_media) {
10655 				sr_ejected(un);
10656 			}
10657 
10658 			/*
10659 			 * Destroy the cache (if it exists) which was
10660 			 * allocated for the write maps since this is
10661 			 * the last close for this media.
10662 			 */
10663 			if (un->un_wm_cache) {
10664 				/*
10665 				 * Check if there are pending commands.
10666 				 * and if there are give a warning and
10667 				 * do not destroy the cache.
10668 				 */
10669 				if (un->un_ncmds_in_driver > 0) {
10670 					scsi_log(SD_DEVINFO(un),
10671 					    sd_label, CE_WARN,
10672 					    "Unable to clean up memory "
10673 					    "because of pending I/O\n");
10674 				} else {
10675 					kmem_cache_destroy(
10676 					    un->un_wm_cache);
10677 					un->un_wm_cache = NULL;
10678 				}
10679 			}
10680 		}
10681 	}
10682 
10683 	mutex_exit(SD_MUTEX(un));
10684 	sema_v(&un->un_semoclose);
10685 
10686 	if (otyp == OTYP_LYR) {
10687 		mutex_enter(&sd_detach_mutex);
10688 		/*
10689 		 * The detach routine may run when the layer count
10690 		 * drops to zero.
10691 		 */
10692 		un->un_layer_count--;
10693 		mutex_exit(&sd_detach_mutex);
10694 	}
10695 
10696 	return (rval);
10697 }
10698 
10699 
10700 /*
10701  *    Function: sd_ready_and_valid
10702  *
10703  * Description: Test if device is ready and has a valid geometry.
10704  *
10705  *   Arguments: ssc - sd_ssc_t will contain un
10706  *		un  - driver soft state (unit) structure
10707  *
10708  * Return Code: SD_READY_VALID		ready and valid label
10709  *		SD_NOT_READY_VALID	not ready, no label
10710  *		SD_RESERVED_BY_OTHERS	reservation conflict
10711  *
10712  *     Context: Never called at interrupt context.
10713  */
10714 
10715 static int
10716 sd_ready_and_valid(sd_ssc_t *ssc, int part)
10717 {
10718 	struct sd_errstats	*stp;
10719 	uint64_t		capacity;
10720 	uint_t			lbasize;
10721 	int			rval = SD_READY_VALID;
10722 	char			name_str[48];
10723 	boolean_t		is_valid;
10724 	struct sd_lun		*un;
10725 	int			status;
10726 
10727 	ASSERT(ssc != NULL);
10728 	un = ssc->ssc_un;
10729 	ASSERT(un != NULL);
10730 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10731 
10732 	mutex_enter(SD_MUTEX(un));
10733 	/*
10734 	 * If a device has removable media, we must check if media is
10735 	 * ready when checking if this device is ready and valid.
10736 	 */
10737 	if (un->un_f_has_removable_media) {
10738 		mutex_exit(SD_MUTEX(un));
10739 		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10740 
10741 		if (status != 0) {
10742 			rval = SD_NOT_READY_VALID;
10743 			mutex_enter(SD_MUTEX(un));
10744 
10745 			/* Ignore all failed status for removalbe media */
10746 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10747 
10748 			goto done;
10749 		}
10750 
10751 		is_valid = SD_IS_VALID_LABEL(un);
10752 		mutex_enter(SD_MUTEX(un));
10753 		if (!is_valid ||
10754 		    (un->un_f_blockcount_is_valid == FALSE) ||
10755 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10756 
10757 			/* capacity has to be read every open. */
10758 			mutex_exit(SD_MUTEX(un));
10759 			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
10760 			    &lbasize, SD_PATH_DIRECT);
10761 
10762 			if (status != 0) {
10763 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10764 
10765 				cmlb_invalidate(un->un_cmlbhandle,
10766 				    (void *)SD_PATH_DIRECT);
10767 				mutex_enter(SD_MUTEX(un));
10768 				rval = SD_NOT_READY_VALID;
10769 
10770 				goto done;
10771 			} else {
10772 				mutex_enter(SD_MUTEX(un));
10773 				sd_update_block_info(un, lbasize, capacity);
10774 			}
10775 		}
10776 
10777 		/*
10778 		 * Check if the media in the device is writable or not.
10779 		 */
10780 		if (!is_valid && ISCD(un)) {
10781 			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
10782 		}
10783 
10784 	} else {
10785 		/*
10786 		 * Do a test unit ready to clear any unit attention from non-cd
10787 		 * devices.
10788 		 */
10789 		mutex_exit(SD_MUTEX(un));
10790 
10791 		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10792 		if (status != 0) {
10793 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10794 		}
10795 
10796 		mutex_enter(SD_MUTEX(un));
10797 	}
10798 
10799 
10800 	/*
10801 	 * If this is a non 512 block device, allocate space for
10802 	 * the wmap cache. This is being done here since every time
10803 	 * a media is changed this routine will be called and the
10804 	 * block size is a function of media rather than device.
10805 	 */
10806 	if (((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR ||
10807 	    un->un_f_non_devbsize_supported) &&
10808 	    un->un_tgt_blocksize != DEV_BSIZE) ||
10809 	    un->un_f_enable_rmw) {
10810 		if (!(un->un_wm_cache)) {
10811 			(void) snprintf(name_str, sizeof (name_str),
10812 			    "%s%d_cache",
10813 			    ddi_driver_name(SD_DEVINFO(un)),
10814 			    ddi_get_instance(SD_DEVINFO(un)));
10815 			un->un_wm_cache = kmem_cache_create(
10816 			    name_str, sizeof (struct sd_w_map),
10817 			    8, sd_wm_cache_constructor,
10818 			    sd_wm_cache_destructor, NULL,
10819 			    (void *)un, NULL, 0);
10820 			if (!(un->un_wm_cache)) {
10821 				rval = ENOMEM;
10822 				goto done;
10823 			}
10824 		}
10825 	}
10826 
10827 	if (un->un_state == SD_STATE_NORMAL) {
10828 		/*
10829 		 * If the target is not yet ready here (defined by a TUR
10830 		 * failure), invalidate the geometry and print an 'offline'
10831 		 * message. This is a legacy message, as the state of the
10832 		 * target is not actually changed to SD_STATE_OFFLINE.
10833 		 *
10834 		 * If the TUR fails for EACCES (Reservation Conflict),
10835 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
10836 		 * reservation conflict. If the TUR fails for other
10837 		 * reasons, SD_NOT_READY_VALID will be returned.
10838 		 */
10839 		int err;
10840 
10841 		mutex_exit(SD_MUTEX(un));
10842 		err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10843 		mutex_enter(SD_MUTEX(un));
10844 
10845 		if (err != 0) {
10846 			mutex_exit(SD_MUTEX(un));
10847 			cmlb_invalidate(un->un_cmlbhandle,
10848 			    (void *)SD_PATH_DIRECT);
10849 			mutex_enter(SD_MUTEX(un));
10850 			if (err == EACCES) {
10851 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10852 				    "reservation conflict\n");
10853 				rval = SD_RESERVED_BY_OTHERS;
10854 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10855 			} else {
10856 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10857 				    "drive offline\n");
10858 				rval = SD_NOT_READY_VALID;
10859 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
10860 			}
10861 			goto done;
10862 		}
10863 	}
10864 
10865 	if (un->un_f_format_in_progress == FALSE) {
10866 		mutex_exit(SD_MUTEX(un));
10867 
10868 		(void) cmlb_validate(un->un_cmlbhandle, 0,
10869 		    (void *)SD_PATH_DIRECT);
10870 		if (cmlb_partinfo(un->un_cmlbhandle, part, NULL, NULL, NULL,
10871 		    NULL, (void *) SD_PATH_DIRECT) != 0) {
10872 			rval = SD_NOT_READY_VALID;
10873 			mutex_enter(SD_MUTEX(un));
10874 
10875 			goto done;
10876 		}
10877 		if (un->un_f_pkstats_enabled) {
10878 			sd_set_pstats(un);
10879 			SD_TRACE(SD_LOG_IO_PARTITION, un,
10880 			    "sd_ready_and_valid: un:0x%p pstats created and "
10881 			    "set\n", un);
10882 		}
10883 		mutex_enter(SD_MUTEX(un));
10884 	}
10885 
10886 	/*
10887 	 * If this device supports DOOR_LOCK command, try and send
10888 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10889 	 * if it fails. For a CD, however, it is an error
10890 	 */
10891 	if (un->un_f_doorlock_supported) {
10892 		mutex_exit(SD_MUTEX(un));
10893 		status = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
10894 		    SD_PATH_DIRECT);
10895 
10896 		if ((status != 0) && ISCD(un)) {
10897 			rval = SD_NOT_READY_VALID;
10898 			mutex_enter(SD_MUTEX(un));
10899 
10900 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10901 
10902 			goto done;
10903 		} else if (status != 0)
10904 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10905 		mutex_enter(SD_MUTEX(un));
10906 	}
10907 
10908 	/* The state has changed, inform the media watch routines */
10909 	un->un_mediastate = DKIO_INSERTED;
10910 	cv_broadcast(&un->un_state_cv);
10911 	rval = SD_READY_VALID;
10912 
10913 done:
10914 
10915 	/*
10916 	 * Initialize the capacity kstat value, if no media previously
10917 	 * (capacity kstat is 0) and a media has been inserted
10918 	 * (un_blockcount > 0).
10919 	 */
10920 	if (un->un_errstats != NULL) {
10921 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10922 		if ((stp->sd_capacity.value.ui64 == 0) &&
10923 		    (un->un_f_blockcount_is_valid == TRUE)) {
10924 			stp->sd_capacity.value.ui64 =
10925 			    (uint64_t)((uint64_t)un->un_blockcount *
10926 			    un->un_sys_blocksize);
10927 		}
10928 	}
10929 
10930 	mutex_exit(SD_MUTEX(un));
10931 	return (rval);
10932 }
10933 
10934 
10935 /*
10936  *    Function: sdmin
10937  *
10938  * Description: Routine to limit the size of a data transfer. Used in
10939  *		conjunction with physio(9F).
10940  *
10941  *   Arguments: bp - pointer to the indicated buf(9S) struct.
10942  *
10943  *     Context: Kernel thread context.
10944  */
10945 
10946 static void
10947 sdmin(struct buf *bp)
10948 {
10949 	struct sd_lun	*un;
10950 	int		instance;
10951 
10952 	instance = SDUNIT(bp->b_edev);
10953 
10954 	un = ddi_get_soft_state(sd_state, instance);
10955 	ASSERT(un != NULL);
10956 
10957 	/*
10958 	 * We depend on buf breakup to restrict
10959 	 * IO size if it is enabled.
10960 	 */
10961 	if (un->un_buf_breakup_supported) {
10962 		return;
10963 	}
10964 
10965 	if (bp->b_bcount > un->un_max_xfer_size) {
10966 		bp->b_bcount = un->un_max_xfer_size;
10967 	}
10968 }
10969 
10970 
10971 /*
10972  *    Function: sdread
10973  *
10974  * Description: Driver's read(9e) entry point function.
10975  *
10976  *   Arguments: dev   - device number
10977  *		uio   - structure pointer describing where data is to be stored
10978  *			in user's space
10979  *		cred_p  - user credential pointer
10980  *
10981  * Return Code: ENXIO
10982  *		EIO
10983  *		EINVAL
10984  *		value returned by physio
10985  *
10986  *     Context: Kernel thread context.
10987  */
10988 /* ARGSUSED */
10989 static int
10990 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10991 {
10992 	struct sd_lun	*un = NULL;
10993 	int		secmask;
10994 	int		err = 0;
10995 	sd_ssc_t	*ssc;
10996 
10997 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10998 		return (ENXIO);
10999 	}
11000 
11001 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11002 
11003 
11004 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
11005 		mutex_enter(SD_MUTEX(un));
11006 		/*
11007 		 * Because the call to sd_ready_and_valid will issue I/O we
11008 		 * must wait here if either the device is suspended or
11009 		 * if it's power level is changing.
11010 		 */
11011 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11012 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11013 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11014 		}
11015 		un->un_ncmds_in_driver++;
11016 		mutex_exit(SD_MUTEX(un));
11017 
11018 		/* Initialize sd_ssc_t for internal uscsi commands */
11019 		ssc = sd_ssc_init(un);
11020 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
11021 			err = EIO;
11022 		} else {
11023 			err = 0;
11024 		}
11025 		sd_ssc_fini(ssc);
11026 
11027 		mutex_enter(SD_MUTEX(un));
11028 		un->un_ncmds_in_driver--;
11029 		ASSERT(un->un_ncmds_in_driver >= 0);
11030 		mutex_exit(SD_MUTEX(un));
11031 		if (err != 0)
11032 			return (err);
11033 	}
11034 
11035 	/*
11036 	 * Read requests are restricted to multiples of the system block size.
11037 	 */
11038 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
11039 	    !un->un_f_enable_rmw)
11040 		secmask = un->un_tgt_blocksize - 1;
11041 	else
11042 		secmask = DEV_BSIZE - 1;
11043 
11044 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11045 		SD_ERROR(SD_LOG_READ_WRITE, un,
11046 		    "sdread: file offset not modulo %d\n",
11047 		    secmask + 1);
11048 		err = EINVAL;
11049 	} else if (uio->uio_iov->iov_len & (secmask)) {
11050 		SD_ERROR(SD_LOG_READ_WRITE, un,
11051 		    "sdread: transfer length not modulo %d\n",
11052 		    secmask + 1);
11053 		err = EINVAL;
11054 	} else {
11055 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
11056 	}
11057 
11058 	return (err);
11059 }
11060 
11061 
11062 /*
11063  *    Function: sdwrite
11064  *
11065  * Description: Driver's write(9e) entry point function.
11066  *
11067  *   Arguments: dev   - device number
11068  *		uio   - structure pointer describing where data is stored in
11069  *			user's space
11070  *		cred_p  - user credential pointer
11071  *
11072  * Return Code: ENXIO
11073  *		EIO
11074  *		EINVAL
11075  *		value returned by physio
11076  *
11077  *     Context: Kernel thread context.
11078  */
11079 /* ARGSUSED */
11080 static int
11081 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
11082 {
11083 	struct sd_lun	*un = NULL;
11084 	int		secmask;
11085 	int		err = 0;
11086 	sd_ssc_t	*ssc;
11087 
11088 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11089 		return (ENXIO);
11090 	}
11091 
11092 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11093 
11094 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
11095 		mutex_enter(SD_MUTEX(un));
11096 		/*
11097 		 * Because the call to sd_ready_and_valid will issue I/O we
11098 		 * must wait here if either the device is suspended or
11099 		 * if it's power level is changing.
11100 		 */
11101 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11102 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11103 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11104 		}
11105 		un->un_ncmds_in_driver++;
11106 		mutex_exit(SD_MUTEX(un));
11107 
11108 		/* Initialize sd_ssc_t for internal uscsi commands */
11109 		ssc = sd_ssc_init(un);
11110 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
11111 			err = EIO;
11112 		} else {
11113 			err = 0;
11114 		}
11115 		sd_ssc_fini(ssc);
11116 
11117 		mutex_enter(SD_MUTEX(un));
11118 		un->un_ncmds_in_driver--;
11119 		ASSERT(un->un_ncmds_in_driver >= 0);
11120 		mutex_exit(SD_MUTEX(un));
11121 		if (err != 0)
11122 			return (err);
11123 	}
11124 
11125 	/*
11126 	 * Write requests are restricted to multiples of the system block size.
11127 	 */
11128 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
11129 	    !un->un_f_enable_rmw)
11130 		secmask = un->un_tgt_blocksize - 1;
11131 	else
11132 		secmask = DEV_BSIZE - 1;
11133 
11134 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11135 		SD_ERROR(SD_LOG_READ_WRITE, un,
11136 		    "sdwrite: file offset not modulo %d\n",
11137 		    secmask + 1);
11138 		err = EINVAL;
11139 	} else if (uio->uio_iov->iov_len & (secmask)) {
11140 		SD_ERROR(SD_LOG_READ_WRITE, un,
11141 		    "sdwrite: transfer length not modulo %d\n",
11142 		    secmask + 1);
11143 		err = EINVAL;
11144 	} else {
11145 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
11146 	}
11147 
11148 	return (err);
11149 }
11150 
11151 
11152 /*
11153  *    Function: sdaread
11154  *
11155  * Description: Driver's aread(9e) entry point function.
11156  *
11157  *   Arguments: dev   - device number
11158  *		aio   - structure pointer describing where data is to be stored
11159  *		cred_p  - user credential pointer
11160  *
11161  * Return Code: ENXIO
11162  *		EIO
11163  *		EINVAL
11164  *		value returned by aphysio
11165  *
11166  *     Context: Kernel thread context.
11167  */
11168 /* ARGSUSED */
11169 static int
11170 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11171 {
11172 	struct sd_lun	*un = NULL;
11173 	struct uio	*uio = aio->aio_uio;
11174 	int		secmask;
11175 	int		err = 0;
11176 	sd_ssc_t	*ssc;
11177 
11178 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11179 		return (ENXIO);
11180 	}
11181 
11182 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11183 
11184 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
11185 		mutex_enter(SD_MUTEX(un));
11186 		/*
11187 		 * Because the call to sd_ready_and_valid will issue I/O we
11188 		 * must wait here if either the device is suspended or
11189 		 * if it's power level is changing.
11190 		 */
11191 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11192 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11193 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11194 		}
11195 		un->un_ncmds_in_driver++;
11196 		mutex_exit(SD_MUTEX(un));
11197 
11198 		/* Initialize sd_ssc_t for internal uscsi commands */
11199 		ssc = sd_ssc_init(un);
11200 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
11201 			err = EIO;
11202 		} else {
11203 			err = 0;
11204 		}
11205 		sd_ssc_fini(ssc);
11206 
11207 		mutex_enter(SD_MUTEX(un));
11208 		un->un_ncmds_in_driver--;
11209 		ASSERT(un->un_ncmds_in_driver >= 0);
11210 		mutex_exit(SD_MUTEX(un));
11211 		if (err != 0)
11212 			return (err);
11213 	}
11214 
11215 	/*
11216 	 * Read requests are restricted to multiples of the system block size.
11217 	 */
11218 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
11219 	    !un->un_f_enable_rmw)
11220 		secmask = un->un_tgt_blocksize - 1;
11221 	else
11222 		secmask = DEV_BSIZE - 1;
11223 
11224 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11225 		SD_ERROR(SD_LOG_READ_WRITE, un,
11226 		    "sdaread: file offset not modulo %d\n",
11227 		    secmask + 1);
11228 		err = EINVAL;
11229 	} else if (uio->uio_iov->iov_len & (secmask)) {
11230 		SD_ERROR(SD_LOG_READ_WRITE, un,
11231 		    "sdaread: transfer length not modulo %d\n",
11232 		    secmask + 1);
11233 		err = EINVAL;
11234 	} else {
11235 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11236 	}
11237 
11238 	return (err);
11239 }
11240 
11241 
11242 /*
11243  *    Function: sdawrite
11244  *
11245  * Description: Driver's awrite(9e) entry point function.
11246  *
11247  *   Arguments: dev   - device number
11248  *		aio   - structure pointer describing where data is stored
11249  *		cred_p  - user credential pointer
11250  *
11251  * Return Code: ENXIO
11252  *		EIO
11253  *		EINVAL
11254  *		value returned by aphysio
11255  *
11256  *     Context: Kernel thread context.
11257  */
11258 /* ARGSUSED */
11259 static int
11260 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11261 {
11262 	struct sd_lun	*un = NULL;
11263 	struct uio	*uio = aio->aio_uio;
11264 	int		secmask;
11265 	int		err = 0;
11266 	sd_ssc_t	*ssc;
11267 
11268 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11269 		return (ENXIO);
11270 	}
11271 
11272 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11273 
11274 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
11275 		mutex_enter(SD_MUTEX(un));
11276 		/*
11277 		 * Because the call to sd_ready_and_valid will issue I/O we
11278 		 * must wait here if either the device is suspended or
11279 		 * if it's power level is changing.
11280 		 */
11281 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11282 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11283 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11284 		}
11285 		un->un_ncmds_in_driver++;
11286 		mutex_exit(SD_MUTEX(un));
11287 
11288 		/* Initialize sd_ssc_t for internal uscsi commands */
11289 		ssc = sd_ssc_init(un);
11290 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
11291 			err = EIO;
11292 		} else {
11293 			err = 0;
11294 		}
11295 		sd_ssc_fini(ssc);
11296 
11297 		mutex_enter(SD_MUTEX(un));
11298 		un->un_ncmds_in_driver--;
11299 		ASSERT(un->un_ncmds_in_driver >= 0);
11300 		mutex_exit(SD_MUTEX(un));
11301 		if (err != 0)
11302 			return (err);
11303 	}
11304 
11305 	/*
11306 	 * Write requests are restricted to multiples of the system block size.
11307 	 */
11308 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
11309 	    !un->un_f_enable_rmw)
11310 		secmask = un->un_tgt_blocksize - 1;
11311 	else
11312 		secmask = DEV_BSIZE - 1;
11313 
11314 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11315 		SD_ERROR(SD_LOG_READ_WRITE, un,
11316 		    "sdawrite: file offset not modulo %d\n",
11317 		    secmask + 1);
11318 		err = EINVAL;
11319 	} else if (uio->uio_iov->iov_len & (secmask)) {
11320 		SD_ERROR(SD_LOG_READ_WRITE, un,
11321 		    "sdawrite: transfer length not modulo %d\n",
11322 		    secmask + 1);
11323 		err = EINVAL;
11324 	} else {
11325 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11326 	}
11327 
11328 	return (err);
11329 }
11330 
11331 
11332 
11333 
11334 
11335 /*
11336  * Driver IO processing follows the following sequence:
11337  *
11338  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11339  *         |                |                     ^
11340  *         v                v                     |
11341  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11342  *         |                |                     |                   |
11343  *         v                |                     |                   |
11344  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11345  *         |                |                     ^                   ^
11346  *         v                v                     |                   |
11347  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11348  *         |                |                     |                   |
11349  *     +---+                |                     +------------+      +-------+
11350  *     |                    |                                  |              |
11351  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11352  *     |                    v                                  |              |
11353  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11354  *     |                    |                                  ^              |
11355  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11356  *     |                    v                                  |              |
11357  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11358  *     |                    |                                  ^              |
11359  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11360  *     |                    v                                  |              |
11361  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11362  *     |                    |                                  ^              |
11363  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11364  *     |                    v                                  |              |
11365  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11366  *     |                    |                                  ^              |
11367  *     |                    |                                  |              |
11368  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11369  *                          |                           ^
11370  *                          v                           |
11371  *                   sd_core_iostart()                  |
11372  *                          |                           |
11373  *                          |                           +------>(*destroypkt)()
11374  *                          +-> sd_start_cmds() <-+     |           |
11375  *                          |                     |     |           v
11376  *                          |                     |     |  scsi_destroy_pkt(9F)
11377  *                          |                     |     |
11378  *                          +->(*initpkt)()       +- sdintr()
11379  *                          |  |                        |  |
11380  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11381  *                          |  +-> scsi_setup_cdb(9F)   |
11382  *                          |                           |
11383  *                          +--> scsi_transport(9F)     |
11384  *                                     |                |
11385  *                                     +----> SCSA ---->+
11386  *
11387  *
11388  * This code is based upon the following presumptions:
11389  *
11390  *   - iostart and iodone functions operate on buf(9S) structures. These
11391  *     functions perform the necessary operations on the buf(9S) and pass
11392  *     them along to the next function in the chain by using the macros
11393  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11394  *     (for iodone side functions).
11395  *
11396  *   - The iostart side functions may sleep. The iodone side functions
11397  *     are called under interrupt context and may NOT sleep. Therefore
11398  *     iodone side functions also may not call iostart side functions.
11399  *     (NOTE: iostart side functions should NOT sleep for memory, as
11400  *     this could result in deadlock.)
11401  *
11402  *   - An iostart side function may call its corresponding iodone side
11403  *     function directly (if necessary).
11404  *
11405  *   - In the event of an error, an iostart side function can return a buf(9S)
11406  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11407  *     b_error in the usual way of course).
11408  *
11409  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11410  *     requests to the iostart side functions.  The iostart side functions in
11411  *     this case would be called under the context of a taskq thread, so it's
11412  *     OK for them to block/sleep/spin in this case.
11413  *
11414  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11415  *     pass them along to the next function in the chain.  The corresponding
11416  *     iodone side functions must coalesce the "shadow" bufs and return
11417  *     the "original" buf to the next higher layer.
11418  *
11419  *   - The b_private field of the buf(9S) struct holds a pointer to
11420  *     an sd_xbuf struct, which contains information needed to
11421  *     construct the scsi_pkt for the command.
11422  *
11423  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11424  *     layer must acquire & release the SD_MUTEX(un) as needed.
11425  */
11426 
11427 
11428 /*
11429  * Create taskq for all targets in the system. This is created at
11430  * _init(9E) and destroyed at _fini(9E).
11431  *
11432  * Note: here we set the minalloc to a reasonably high number to ensure that
11433  * we will have an adequate supply of task entries available at interrupt time.
11434  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11435  * sd_create_taskq().  Since we do not want to sleep for allocations at
11436  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11437  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11438  * requests any one instant in time.
11439  */
11440 #define	SD_TASKQ_NUMTHREADS	8
11441 #define	SD_TASKQ_MINALLOC	256
11442 #define	SD_TASKQ_MAXALLOC	256
11443 
11444 static taskq_t	*sd_tq = NULL;
11445 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11446 
11447 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11448 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11449 
11450 /*
11451  * The following task queue is being created for the write part of
11452  * read-modify-write of non-512 block size devices.
11453  * Limit the number of threads to 1 for now. This number has been chosen
11454  * considering the fact that it applies only to dvd ram drives/MO drives
11455  * currently. Performance for which is not main criteria at this stage.
11456  * Note: It needs to be explored if we can use a single taskq in future
11457  */
11458 #define	SD_WMR_TASKQ_NUMTHREADS	1
11459 static taskq_t	*sd_wmr_tq = NULL;
11460 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11461 
11462 /*
11463  *    Function: sd_taskq_create
11464  *
11465  * Description: Create taskq thread(s) and preallocate task entries
11466  *
11467  * Return Code: Returns a pointer to the allocated taskq_t.
11468  *
11469  *     Context: Can sleep. Requires blockable context.
11470  *
11471  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11472  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11473  *		- taskq_create() will block for memory, also it will panic
11474  *		  if it cannot create the requested number of threads.
11475  *		- Currently taskq_create() creates threads that cannot be
11476  *		  swapped.
11477  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11478  *		  supply of taskq entries at interrupt time (ie, so that we
11479  *		  do not have to sleep for memory)
11480  */
11481 
11482 static void
11483 sd_taskq_create(void)
11484 {
11485 	char	taskq_name[TASKQ_NAMELEN];
11486 
11487 	ASSERT(sd_tq == NULL);
11488 	ASSERT(sd_wmr_tq == NULL);
11489 
11490 	(void) snprintf(taskq_name, sizeof (taskq_name),
11491 	    "%s_drv_taskq", sd_label);
11492 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11493 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11494 	    TASKQ_PREPOPULATE));
11495 
11496 	(void) snprintf(taskq_name, sizeof (taskq_name),
11497 	    "%s_rmw_taskq", sd_label);
11498 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11499 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11500 	    TASKQ_PREPOPULATE));
11501 }
11502 
11503 
11504 /*
11505  *    Function: sd_taskq_delete
11506  *
11507  * Description: Complementary cleanup routine for sd_taskq_create().
11508  *
11509  *     Context: Kernel thread context.
11510  */
11511 
11512 static void
11513 sd_taskq_delete(void)
11514 {
11515 	ASSERT(sd_tq != NULL);
11516 	ASSERT(sd_wmr_tq != NULL);
11517 	taskq_destroy(sd_tq);
11518 	taskq_destroy(sd_wmr_tq);
11519 	sd_tq = NULL;
11520 	sd_wmr_tq = NULL;
11521 }
11522 
11523 
11524 /*
11525  *    Function: sdstrategy
11526  *
11527  * Description: Driver's strategy (9E) entry point function.
11528  *
11529  *   Arguments: bp - pointer to buf(9S)
11530  *
11531  * Return Code: Always returns zero
11532  *
11533  *     Context: Kernel thread context.
11534  */
11535 
11536 static int
11537 sdstrategy(struct buf *bp)
11538 {
11539 	struct sd_lun *un;
11540 
11541 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11542 	if (un == NULL) {
11543 		bioerror(bp, EIO);
11544 		bp->b_resid = bp->b_bcount;
11545 		biodone(bp);
11546 		return (0);
11547 	}
11548 
11549 	/* As was done in the past, fail new cmds. if state is dumping. */
11550 	if (un->un_state == SD_STATE_DUMPING) {
11551 		bioerror(bp, ENXIO);
11552 		bp->b_resid = bp->b_bcount;
11553 		biodone(bp);
11554 		return (0);
11555 	}
11556 
11557 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11558 
11559 	/*
11560 	 * Commands may sneak in while we released the mutex in
11561 	 * DDI_SUSPEND, we should block new commands. However, old
11562 	 * commands that are still in the driver at this point should
11563 	 * still be allowed to drain.
11564 	 */
11565 	mutex_enter(SD_MUTEX(un));
11566 	/*
11567 	 * Must wait here if either the device is suspended or
11568 	 * if it's power level is changing.
11569 	 */
11570 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11571 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11572 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11573 	}
11574 
11575 	un->un_ncmds_in_driver++;
11576 
11577 	/*
11578 	 * atapi: Since we are running the CD for now in PIO mode we need to
11579 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11580 	 * the HBA's init_pkt routine.
11581 	 */
11582 	if (un->un_f_cfg_is_atapi == TRUE) {
11583 		mutex_exit(SD_MUTEX(un));
11584 		bp_mapin(bp);
11585 		mutex_enter(SD_MUTEX(un));
11586 	}
11587 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11588 	    un->un_ncmds_in_driver);
11589 
11590 	if (bp->b_flags & B_WRITE)
11591 		un->un_f_sync_cache_required = TRUE;
11592 
11593 	mutex_exit(SD_MUTEX(un));
11594 
11595 	/*
11596 	 * This will (eventually) allocate the sd_xbuf area and
11597 	 * call sd_xbuf_strategy().  We just want to return the
11598 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11599 	 * imized tail call which saves us a stack frame.
11600 	 */
11601 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11602 }
11603 
11604 
11605 /*
11606  *    Function: sd_xbuf_strategy
11607  *
11608  * Description: Function for initiating IO operations via the
11609  *		ddi_xbuf_qstrategy() mechanism.
11610  *
11611  *     Context: Kernel thread context.
11612  */
11613 
11614 static void
11615 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11616 {
11617 	struct sd_lun *un = arg;
11618 
11619 	ASSERT(bp != NULL);
11620 	ASSERT(xp != NULL);
11621 	ASSERT(un != NULL);
11622 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11623 
11624 	/*
11625 	 * Initialize the fields in the xbuf and save a pointer to the
11626 	 * xbuf in bp->b_private.
11627 	 */
11628 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11629 
11630 	/* Send the buf down the iostart chain */
11631 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11632 }
11633 
11634 
11635 /*
11636  *    Function: sd_xbuf_init
11637  *
11638  * Description: Prepare the given sd_xbuf struct for use.
11639  *
11640  *   Arguments: un - ptr to softstate
11641  *		bp - ptr to associated buf(9S)
11642  *		xp - ptr to associated sd_xbuf
11643  *		chain_type - IO chain type to use:
11644  *			SD_CHAIN_NULL
11645  *			SD_CHAIN_BUFIO
11646  *			SD_CHAIN_USCSI
11647  *			SD_CHAIN_DIRECT
11648  *			SD_CHAIN_DIRECT_PRIORITY
11649  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11650  *			initialization; may be NULL if none.
11651  *
11652  *     Context: Kernel thread context
11653  */
11654 
11655 static void
11656 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11657     uchar_t chain_type, void *pktinfop)
11658 {
11659 	int index;
11660 
11661 	ASSERT(un != NULL);
11662 	ASSERT(bp != NULL);
11663 	ASSERT(xp != NULL);
11664 
11665 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11666 	    bp, chain_type);
11667 
11668 	xp->xb_un	= un;
11669 	xp->xb_pktp	= NULL;
11670 	xp->xb_pktinfo	= pktinfop;
11671 	xp->xb_private	= bp->b_private;
11672 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11673 
11674 	/*
11675 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11676 	 * upon the specified chain type to use.
11677 	 */
11678 	switch (chain_type) {
11679 	case SD_CHAIN_NULL:
11680 		/*
11681 		 * Fall thru to just use the values for the buf type, even
11682 		 * tho for the NULL chain these values will never be used.
11683 		 */
11684 		/* FALLTHRU */
11685 	case SD_CHAIN_BUFIO:
11686 		index = un->un_buf_chain_type;
11687 		if ((!un->un_f_has_removable_media) &&
11688 		    (un->un_tgt_blocksize != 0) &&
11689 		    (un->un_tgt_blocksize != DEV_BSIZE ||
11690 		    un->un_f_enable_rmw)) {
11691 			int secmask = 0, blknomask = 0;
11692 			if (un->un_f_enable_rmw) {
11693 				blknomask =
11694 				    (un->un_phy_blocksize / DEV_BSIZE) - 1;
11695 				secmask = un->un_phy_blocksize - 1;
11696 			} else {
11697 				blknomask =
11698 				    (un->un_tgt_blocksize / DEV_BSIZE) - 1;
11699 				secmask = un->un_tgt_blocksize - 1;
11700 			}
11701 
11702 			if ((bp->b_lblkno & (blknomask)) ||
11703 			    (bp->b_bcount & (secmask))) {
11704 				if ((un->un_f_rmw_type !=
11705 				    SD_RMW_TYPE_RETURN_ERROR) ||
11706 				    un->un_f_enable_rmw) {
11707 					if (un->un_f_pm_is_enabled == FALSE)
11708 						index =
11709 						    SD_CHAIN_INFO_MSS_DSK_NO_PM;
11710 					else
11711 						index =
11712 						    SD_CHAIN_INFO_MSS_DISK;
11713 				}
11714 			}
11715 		}
11716 		break;
11717 	case SD_CHAIN_USCSI:
11718 		index = un->un_uscsi_chain_type;
11719 		break;
11720 	case SD_CHAIN_DIRECT:
11721 		index = un->un_direct_chain_type;
11722 		break;
11723 	case SD_CHAIN_DIRECT_PRIORITY:
11724 		index = un->un_priority_chain_type;
11725 		break;
11726 	default:
11727 		/* We're really broken if we ever get here... */
11728 		panic("sd_xbuf_init: illegal chain type!");
11729 		/*NOTREACHED*/
11730 	}
11731 
11732 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11733 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11734 
11735 	/*
11736 	 * It might be a bit easier to simply bzero the entire xbuf above,
11737 	 * but it turns out that since we init a fair number of members anyway,
11738 	 * we save a fair number cycles by doing explicit assignment of zero.
11739 	 */
11740 	xp->xb_pkt_flags	= 0;
11741 	xp->xb_dma_resid	= 0;
11742 	xp->xb_retry_count	= 0;
11743 	xp->xb_victim_retry_count = 0;
11744 	xp->xb_ua_retry_count	= 0;
11745 	xp->xb_nr_retry_count	= 0;
11746 	xp->xb_sense_bp		= NULL;
11747 	xp->xb_sense_status	= 0;
11748 	xp->xb_sense_state	= 0;
11749 	xp->xb_sense_resid	= 0;
11750 	xp->xb_ena		= 0;
11751 
11752 	bp->b_private	= xp;
11753 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11754 	bp->b_resid	= 0;
11755 	bp->av_forw	= NULL;
11756 	bp->av_back	= NULL;
11757 	bioerror(bp, 0);
11758 
11759 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11760 }
11761 
11762 
11763 /*
11764  *    Function: sd_uscsi_strategy
11765  *
11766  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11767  *
11768  *   Arguments: bp - buf struct ptr
11769  *
11770  * Return Code: Always returns 0
11771  *
11772  *     Context: Kernel thread context
11773  */
11774 
11775 static int
11776 sd_uscsi_strategy(struct buf *bp)
11777 {
11778 	struct sd_lun		*un;
11779 	struct sd_uscsi_info	*uip;
11780 	struct sd_xbuf		*xp;
11781 	uchar_t			chain_type;
11782 	uchar_t			cmd;
11783 
11784 	ASSERT(bp != NULL);
11785 
11786 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11787 	if (un == NULL) {
11788 		bioerror(bp, EIO);
11789 		bp->b_resid = bp->b_bcount;
11790 		biodone(bp);
11791 		return (0);
11792 	}
11793 
11794 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11795 
11796 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11797 
11798 	/*
11799 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11800 	 */
11801 	ASSERT(bp->b_private != NULL);
11802 	uip = (struct sd_uscsi_info *)bp->b_private;
11803 	cmd = ((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_cdb[0];
11804 
11805 	mutex_enter(SD_MUTEX(un));
11806 	/*
11807 	 * atapi: Since we are running the CD for now in PIO mode we need to
11808 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11809 	 * the HBA's init_pkt routine.
11810 	 */
11811 	if (un->un_f_cfg_is_atapi == TRUE) {
11812 		mutex_exit(SD_MUTEX(un));
11813 		bp_mapin(bp);
11814 		mutex_enter(SD_MUTEX(un));
11815 	}
11816 	un->un_ncmds_in_driver++;
11817 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11818 	    un->un_ncmds_in_driver);
11819 
11820 	if ((bp->b_flags & B_WRITE) && (bp->b_bcount != 0) &&
11821 	    (cmd != SCMD_MODE_SELECT) && (cmd != SCMD_MODE_SELECT_G1))
11822 		un->un_f_sync_cache_required = TRUE;
11823 
11824 	mutex_exit(SD_MUTEX(un));
11825 
11826 	switch (uip->ui_flags) {
11827 	case SD_PATH_DIRECT:
11828 		chain_type = SD_CHAIN_DIRECT;
11829 		break;
11830 	case SD_PATH_DIRECT_PRIORITY:
11831 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11832 		break;
11833 	default:
11834 		chain_type = SD_CHAIN_USCSI;
11835 		break;
11836 	}
11837 
11838 	/*
11839 	 * We may allocate extra buf for external USCSI commands. If the
11840 	 * application asks for bigger than 20-byte sense data via USCSI,
11841 	 * SCSA layer will allocate 252 bytes sense buf for that command.
11842 	 */
11843 	if (((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_rqlen >
11844 	    SENSE_LENGTH) {
11845 		xp = kmem_zalloc(sizeof (struct sd_xbuf) - SENSE_LENGTH +
11846 		    MAX_SENSE_LENGTH, KM_SLEEP);
11847 	} else {
11848 		xp = kmem_zalloc(sizeof (struct sd_xbuf), KM_SLEEP);
11849 	}
11850 
11851 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11852 
11853 	/* Use the index obtained within xbuf_init */
11854 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11855 
11856 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11857 
11858 	return (0);
11859 }
11860 
11861 /*
11862  *    Function: sd_send_scsi_cmd
11863  *
11864  * Description: Runs a USCSI command for user (when called thru sdioctl),
11865  *		or for the driver
11866  *
11867  *   Arguments: dev - the dev_t for the device
11868  *		incmd - ptr to a valid uscsi_cmd struct
11869  *		flag - bit flag, indicating open settings, 32/64 bit type
11870  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11871  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11872  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11873  *			to use the USCSI "direct" chain and bypass the normal
11874  *			command waitq.
11875  *
11876  * Return Code: 0 -  successful completion of the given command
11877  *		EIO - scsi_uscsi_handle_command() failed
11878  *		ENXIO  - soft state not found for specified dev
11879  *		EINVAL
11880  *		EFAULT - copyin/copyout error
11881  *		return code of scsi_uscsi_handle_command():
11882  *			EIO
11883  *			ENXIO
11884  *			EACCES
11885  *
11886  *     Context: Waits for command to complete. Can sleep.
11887  */
11888 
11889 static int
11890 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
11891     enum uio_seg dataspace, int path_flag)
11892 {
11893 	struct sd_lun	*un;
11894 	sd_ssc_t	*ssc;
11895 	int		rval;
11896 
11897 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11898 	if (un == NULL) {
11899 		return (ENXIO);
11900 	}
11901 
11902 	/*
11903 	 * Using sd_ssc_send to handle uscsi cmd
11904 	 */
11905 	ssc = sd_ssc_init(un);
11906 	rval = sd_ssc_send(ssc, incmd, flag, dataspace, path_flag);
11907 	sd_ssc_fini(ssc);
11908 
11909 	return (rval);
11910 }
11911 
11912 /*
11913  *    Function: sd_ssc_init
11914  *
11915  * Description: Uscsi end-user call this function to initialize necessary
11916  *              fields, such as uscsi_cmd and sd_uscsi_info struct.
11917  *
11918  *              The return value of sd_send_scsi_cmd will be treated as a
11919  *              fault in various conditions. Even it is not Zero, some
11920  *              callers may ignore the return value. That is to say, we can
11921  *              not make an accurate assessment in sdintr, since if a
11922  *              command is failed in sdintr it does not mean the caller of
11923  *              sd_send_scsi_cmd will treat it as a real failure.
11924  *
11925  *              To avoid printing too many error logs for a failed uscsi
11926  *              packet that the caller may not treat it as a failure, the
11927  *              sd will keep silent for handling all uscsi commands.
11928  *
11929  *              During detach->attach and attach-open, for some types of
11930  *              problems, the driver should be providing information about
11931  *              the problem encountered. Device use USCSI_SILENT, which
11932  *              suppresses all driver information. The result is that no
11933  *              information about the problem is available. Being
11934  *              completely silent during this time is inappropriate. The
11935  *              driver needs a more selective filter than USCSI_SILENT, so
11936  *              that information related to faults is provided.
11937  *
11938  *              To make the accurate accessment, the caller  of
11939  *              sd_send_scsi_USCSI_CMD should take the ownership and
11940  *              get necessary information to print error messages.
11941  *
11942  *              If we want to print necessary info of uscsi command, we need to
11943  *              keep the uscsi_cmd and sd_uscsi_info till we can make the
11944  *              assessment. We use sd_ssc_init to alloc necessary
11945  *              structs for sending an uscsi command and we are also
11946  *              responsible for free the memory by calling
11947  *              sd_ssc_fini.
11948  *
11949  *              The calling secquences will look like:
11950  *              sd_ssc_init->
11951  *
11952  *                  ...
11953  *
11954  *                  sd_send_scsi_USCSI_CMD->
11955  *                      sd_ssc_send-> - - - sdintr
11956  *                  ...
11957  *
11958  *                  if we think the return value should be treated as a
11959  *                  failure, we make the accessment here and print out
11960  *                  necessary by retrieving uscsi_cmd and sd_uscsi_info'
11961  *
11962  *                  ...
11963  *
11964  *              sd_ssc_fini
11965  *
11966  *
11967  *   Arguments: un - pointer to driver soft state (unit) structure for this
11968  *                   target.
11969  *
11970  * Return code: sd_ssc_t - pointer to allocated sd_ssc_t struct, it contains
11971  *                         uscsi_cmd and sd_uscsi_info.
11972  *                  NULL - if can not alloc memory for sd_ssc_t struct
11973  *
11974  *     Context: Kernel Thread.
11975  */
11976 static sd_ssc_t *
11977 sd_ssc_init(struct sd_lun *un)
11978 {
11979 	sd_ssc_t		*ssc;
11980 	struct uscsi_cmd	*ucmdp;
11981 	struct sd_uscsi_info	*uip;
11982 
11983 	ASSERT(un != NULL);
11984 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11985 
11986 	/*
11987 	 * Allocate sd_ssc_t structure
11988 	 */
11989 	ssc = kmem_zalloc(sizeof (sd_ssc_t), KM_SLEEP);
11990 
11991 	/*
11992 	 * Allocate uscsi_cmd by calling scsi_uscsi_alloc common routine
11993 	 */
11994 	ucmdp = scsi_uscsi_alloc();
11995 
11996 	/*
11997 	 * Allocate sd_uscsi_info structure
11998 	 */
11999 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
12000 
12001 	ssc->ssc_uscsi_cmd = ucmdp;
12002 	ssc->ssc_uscsi_info = uip;
12003 	ssc->ssc_un = un;
12004 
12005 	return (ssc);
12006 }
12007 
12008 /*
12009  * Function: sd_ssc_fini
12010  *
12011  * Description: To free sd_ssc_t and it's hanging off
12012  *
12013  * Arguments: ssc - struct pointer of sd_ssc_t.
12014  */
12015 static void
12016 sd_ssc_fini(sd_ssc_t *ssc)
12017 {
12018 	scsi_uscsi_free(ssc->ssc_uscsi_cmd);
12019 
12020 	if (ssc->ssc_uscsi_info != NULL) {
12021 		kmem_free(ssc->ssc_uscsi_info, sizeof (struct sd_uscsi_info));
12022 		ssc->ssc_uscsi_info = NULL;
12023 	}
12024 
12025 	kmem_free(ssc, sizeof (sd_ssc_t));
12026 	ssc = NULL;
12027 }
12028 
12029 /*
12030  * Function: sd_ssc_send
12031  *
12032  * Description: Runs a USCSI command for user when called through sdioctl,
12033  *              or for the driver.
12034  *
12035  *   Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12036  *                    sd_uscsi_info in.
12037  *		incmd - ptr to a valid uscsi_cmd struct
12038  *		flag - bit flag, indicating open settings, 32/64 bit type
12039  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
12040  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
12041  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
12042  *			to use the USCSI "direct" chain and bypass the normal
12043  *			command waitq.
12044  *
12045  * Return Code: 0 -  successful completion of the given command
12046  *		EIO - scsi_uscsi_handle_command() failed
12047  *		ENXIO  - soft state not found for specified dev
12048  *		ECANCELED - command cancelled due to low power
12049  *		EINVAL
12050  *		EFAULT - copyin/copyout error
12051  *		return code of scsi_uscsi_handle_command():
12052  *			EIO
12053  *			ENXIO
12054  *			EACCES
12055  *
12056  *     Context: Kernel Thread;
12057  *              Waits for command to complete. Can sleep.
12058  */
12059 static int
12060 sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd, int flag,
12061     enum uio_seg dataspace, int path_flag)
12062 {
12063 	struct sd_uscsi_info	*uip;
12064 	struct uscsi_cmd	*uscmd;
12065 	struct sd_lun		*un;
12066 	dev_t			dev;
12067 
12068 	int	format = 0;
12069 	int	rval;
12070 
12071 	ASSERT(ssc != NULL);
12072 	un = ssc->ssc_un;
12073 	ASSERT(un != NULL);
12074 	uscmd = ssc->ssc_uscsi_cmd;
12075 	ASSERT(uscmd != NULL);
12076 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12077 	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
12078 		/*
12079 		 * If enter here, it indicates that the previous uscsi
12080 		 * command has not been processed by sd_ssc_assessment.
12081 		 * This is violating our rules of FMA telemetry processing.
12082 		 * We should print out this message and the last undisposed
12083 		 * uscsi command.
12084 		 */
12085 		if (uscmd->uscsi_cdb != NULL) {
12086 			SD_INFO(SD_LOG_SDTEST, un,
12087 			    "sd_ssc_send is missing the alternative "
12088 			    "sd_ssc_assessment when running command 0x%x.\n",
12089 			    uscmd->uscsi_cdb[0]);
12090 		}
12091 		/*
12092 		 * Set the ssc_flags to SSC_FLAGS_UNKNOWN, which should be
12093 		 * the initial status.
12094 		 */
12095 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12096 	}
12097 
12098 	/*
12099 	 * We need to make sure sd_ssc_send will have sd_ssc_assessment
12100 	 * followed to avoid missing FMA telemetries.
12101 	 */
12102 	ssc->ssc_flags |= SSC_FLAGS_NEED_ASSESSMENT;
12103 
12104 	/*
12105 	 * if USCSI_PMFAILFAST is set and un is in low power, fail the
12106 	 * command immediately.
12107 	 */
12108 	mutex_enter(SD_MUTEX(un));
12109 	mutex_enter(&un->un_pm_mutex);
12110 	if ((uscmd->uscsi_flags & USCSI_PMFAILFAST) &&
12111 	    SD_DEVICE_IS_IN_LOW_POWER(un)) {
12112 		SD_TRACE(SD_LOG_IO, un, "sd_ssc_send:"
12113 		    "un:0x%p is in low power\n", un);
12114 		mutex_exit(&un->un_pm_mutex);
12115 		mutex_exit(SD_MUTEX(un));
12116 		return (ECANCELED);
12117 	}
12118 	mutex_exit(&un->un_pm_mutex);
12119 	mutex_exit(SD_MUTEX(un));
12120 
12121 #ifdef SDDEBUG
12122 	switch (dataspace) {
12123 	case UIO_USERSPACE:
12124 		SD_TRACE(SD_LOG_IO, un,
12125 		    "sd_ssc_send: entry: un:0x%p UIO_USERSPACE\n", un);
12126 		break;
12127 	case UIO_SYSSPACE:
12128 		SD_TRACE(SD_LOG_IO, un,
12129 		    "sd_ssc_send: entry: un:0x%p UIO_SYSSPACE\n", un);
12130 		break;
12131 	default:
12132 		SD_TRACE(SD_LOG_IO, un,
12133 		    "sd_ssc_send: entry: un:0x%p UNEXPECTED SPACE\n", un);
12134 		break;
12135 	}
12136 #endif
12137 
12138 	rval = scsi_uscsi_copyin((intptr_t)incmd, flag,
12139 	    SD_ADDRESS(un), &uscmd);
12140 	if (rval != 0) {
12141 		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
12142 		    "scsi_uscsi_alloc_and_copyin failed\n", un);
12143 		return (rval);
12144 	}
12145 
12146 	if ((uscmd->uscsi_cdb != NULL) &&
12147 	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
12148 		mutex_enter(SD_MUTEX(un));
12149 		un->un_f_format_in_progress = TRUE;
12150 		mutex_exit(SD_MUTEX(un));
12151 		format = 1;
12152 	}
12153 
12154 	/*
12155 	 * Allocate an sd_uscsi_info struct and fill it with the info
12156 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
12157 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
12158 	 * since we allocate the buf here in this function, we do not
12159 	 * need to preserve the prior contents of b_private.
12160 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
12161 	 */
12162 	uip = ssc->ssc_uscsi_info;
12163 	uip->ui_flags = path_flag;
12164 	uip->ui_cmdp = uscmd;
12165 
12166 	/*
12167 	 * Commands sent with priority are intended for error recovery
12168 	 * situations, and do not have retries performed.
12169 	 */
12170 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
12171 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
12172 	}
12173 	uscmd->uscsi_flags &= ~USCSI_NOINTR;
12174 
12175 	dev = SD_GET_DEV(un);
12176 	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
12177 	    sd_uscsi_strategy, NULL, uip);
12178 
12179 	/*
12180 	 * mark ssc_flags right after handle_cmd to make sure
12181 	 * the uscsi has been sent
12182 	 */
12183 	ssc->ssc_flags |= SSC_FLAGS_CMD_ISSUED;
12184 
12185 #ifdef SDDEBUG
12186 	SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
12187 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
12188 	    uscmd->uscsi_status, uscmd->uscsi_resid);
12189 	if (uscmd->uscsi_bufaddr != NULL) {
12190 		SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
12191 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
12192 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
12193 		if (dataspace == UIO_SYSSPACE) {
12194 			SD_DUMP_MEMORY(un, SD_LOG_IO,
12195 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
12196 			    uscmd->uscsi_buflen, SD_LOG_HEX);
12197 		}
12198 	}
12199 #endif
12200 
12201 	if (format == 1) {
12202 		mutex_enter(SD_MUTEX(un));
12203 		un->un_f_format_in_progress = FALSE;
12204 		mutex_exit(SD_MUTEX(un));
12205 	}
12206 
12207 	(void) scsi_uscsi_copyout((intptr_t)incmd, uscmd);
12208 
12209 	return (rval);
12210 }
12211 
12212 /*
12213  *     Function: sd_ssc_print
12214  *
12215  * Description: Print information available to the console.
12216  *
12217  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12218  *                    sd_uscsi_info in.
12219  *            sd_severity - log level.
12220  *     Context: Kernel thread or interrupt context.
12221  */
12222 static void
12223 sd_ssc_print(sd_ssc_t *ssc, int sd_severity)
12224 {
12225 	struct uscsi_cmd	*ucmdp;
12226 	struct scsi_device	*devp;
12227 	dev_info_t		*devinfo;
12228 	uchar_t			*sensep;
12229 	int			senlen;
12230 	union scsi_cdb		*cdbp;
12231 	uchar_t			com;
12232 	extern struct scsi_key_strings scsi_cmds[];
12233 
12234 	ASSERT(ssc != NULL);
12235 	ASSERT(ssc->ssc_un != NULL);
12236 
12237 	if (SD_FM_LOG(ssc->ssc_un) != SD_FM_LOG_EREPORT)
12238 		return;
12239 	ucmdp = ssc->ssc_uscsi_cmd;
12240 	devp = SD_SCSI_DEVP(ssc->ssc_un);
12241 	devinfo = SD_DEVINFO(ssc->ssc_un);
12242 	ASSERT(ucmdp != NULL);
12243 	ASSERT(devp != NULL);
12244 	ASSERT(devinfo != NULL);
12245 	sensep = (uint8_t *)ucmdp->uscsi_rqbuf;
12246 	senlen = ucmdp->uscsi_rqlen - ucmdp->uscsi_rqresid;
12247 	cdbp = (union scsi_cdb *)ucmdp->uscsi_cdb;
12248 
12249 	/* In certain case (like DOORLOCK), the cdb could be NULL. */
12250 	if (cdbp == NULL)
12251 		return;
12252 	/* We don't print log if no sense data available. */
12253 	if (senlen == 0)
12254 		sensep = NULL;
12255 	com = cdbp->scc_cmd;
12256 	scsi_generic_errmsg(devp, sd_label, sd_severity, 0, 0, com,
12257 	    scsi_cmds, sensep, ssc->ssc_un->un_additional_codes, NULL);
12258 }
12259 
12260 /*
12261  *     Function: sd_ssc_assessment
12262  *
12263  * Description: We use this function to make an assessment at the point
12264  *              where SD driver may encounter a potential error.
12265  *
12266  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12267  *                  sd_uscsi_info in.
12268  *            tp_assess - a hint of strategy for ereport posting.
12269  *            Possible values of tp_assess include:
12270  *                SD_FMT_IGNORE - we don't post any ereport because we're
12271  *                sure that it is ok to ignore the underlying problems.
12272  *                SD_FMT_IGNORE_COMPROMISE - we don't post any ereport for now
12273  *                but it might be not correct to ignore the underlying hardware
12274  *                error.
12275  *                SD_FMT_STATUS_CHECK - we will post an ereport with the
12276  *                payload driver-assessment of value "fail" or
12277  *                "fatal"(depending on what information we have here). This
12278  *                assessment value is usually set when SD driver think there
12279  *                is a potential error occurred(Typically, when return value
12280  *                of the SCSI command is EIO).
12281  *                SD_FMT_STANDARD - we will post an ereport with the payload
12282  *                driver-assessment of value "info". This assessment value is
12283  *                set when the SCSI command returned successfully and with
12284  *                sense data sent back.
12285  *
12286  *     Context: Kernel thread.
12287  */
12288 static void
12289 sd_ssc_assessment(sd_ssc_t *ssc, enum sd_type_assessment tp_assess)
12290 {
12291 	int senlen = 0;
12292 	struct uscsi_cmd *ucmdp = NULL;
12293 	struct sd_lun *un;
12294 
12295 	ASSERT(ssc != NULL);
12296 	un = ssc->ssc_un;
12297 	ASSERT(un != NULL);
12298 	ucmdp = ssc->ssc_uscsi_cmd;
12299 	ASSERT(ucmdp != NULL);
12300 
12301 	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
12302 		ssc->ssc_flags &= ~SSC_FLAGS_NEED_ASSESSMENT;
12303 	} else {
12304 		/*
12305 		 * If enter here, it indicates that we have a wrong
12306 		 * calling sequence of sd_ssc_send and sd_ssc_assessment,
12307 		 * both of which should be called in a pair in case of
12308 		 * loss of FMA telemetries.
12309 		 */
12310 		if (ucmdp->uscsi_cdb != NULL) {
12311 			SD_INFO(SD_LOG_SDTEST, un,
12312 			    "sd_ssc_assessment is missing the "
12313 			    "alternative sd_ssc_send when running 0x%x, "
12314 			    "or there are superfluous sd_ssc_assessment for "
12315 			    "the same sd_ssc_send.\n",
12316 			    ucmdp->uscsi_cdb[0]);
12317 		}
12318 		/*
12319 		 * Set the ssc_flags to the initial value to avoid passing
12320 		 * down dirty flags to the following sd_ssc_send function.
12321 		 */
12322 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12323 		return;
12324 	}
12325 
12326 	/*
12327 	 * Only handle an issued command which is waiting for assessment.
12328 	 * A command which is not issued will not have
12329 	 * SSC_FLAGS_INVALID_DATA set, so it'ok we just return here.
12330 	 */
12331 	if (!(ssc->ssc_flags & SSC_FLAGS_CMD_ISSUED)) {
12332 		sd_ssc_print(ssc, SCSI_ERR_INFO);
12333 		return;
12334 	} else {
12335 		/*
12336 		 * For an issued command, we should clear this flag in
12337 		 * order to make the sd_ssc_t structure be used off
12338 		 * multiple uscsi commands.
12339 		 */
12340 		ssc->ssc_flags &= ~SSC_FLAGS_CMD_ISSUED;
12341 	}
12342 
12343 	/*
12344 	 * We will not deal with non-retryable(flag USCSI_DIAGNOSE set)
12345 	 * commands here. And we should clear the ssc_flags before return.
12346 	 */
12347 	if (ucmdp->uscsi_flags & USCSI_DIAGNOSE) {
12348 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12349 		return;
12350 	}
12351 
12352 	switch (tp_assess) {
12353 	case SD_FMT_IGNORE:
12354 	case SD_FMT_IGNORE_COMPROMISE:
12355 		break;
12356 	case SD_FMT_STATUS_CHECK:
12357 		/*
12358 		 * For a failed command(including the succeeded command
12359 		 * with invalid data sent back).
12360 		 */
12361 		sd_ssc_post(ssc, SD_FM_DRV_FATAL);
12362 		break;
12363 	case SD_FMT_STANDARD:
12364 		/*
12365 		 * Always for the succeeded commands probably with sense
12366 		 * data sent back.
12367 		 * Limitation:
12368 		 *	We can only handle a succeeded command with sense
12369 		 *	data sent back when auto-request-sense is enabled.
12370 		 */
12371 		senlen = ssc->ssc_uscsi_cmd->uscsi_rqlen -
12372 		    ssc->ssc_uscsi_cmd->uscsi_rqresid;
12373 		if ((ssc->ssc_uscsi_info->ui_pkt_state & STATE_ARQ_DONE) &&
12374 		    (un->un_f_arq_enabled == TRUE) &&
12375 		    senlen > 0 &&
12376 		    ssc->ssc_uscsi_cmd->uscsi_rqbuf != NULL) {
12377 			sd_ssc_post(ssc, SD_FM_DRV_NOTICE);
12378 		}
12379 		break;
12380 	default:
12381 		/*
12382 		 * Should not have other type of assessment.
12383 		 */
12384 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
12385 		    "sd_ssc_assessment got wrong "
12386 		    "sd_type_assessment %d.\n", tp_assess);
12387 		break;
12388 	}
12389 	/*
12390 	 * Clear up the ssc_flags before return.
12391 	 */
12392 	ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12393 }
12394 
12395 /*
12396  *    Function: sd_ssc_post
12397  *
12398  * Description: 1. read the driver property to get fm-scsi-log flag.
12399  *              2. print log if fm_log_capable is non-zero.
12400  *              3. call sd_ssc_ereport_post to post ereport if possible.
12401  *
12402  *    Context: May be called from kernel thread or interrupt context.
12403  */
12404 static void
12405 sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess)
12406 {
12407 	struct sd_lun	*un;
12408 	int		sd_severity;
12409 
12410 	ASSERT(ssc != NULL);
12411 	un = ssc->ssc_un;
12412 	ASSERT(un != NULL);
12413 
12414 	/*
12415 	 * We may enter here from sd_ssc_assessment(for USCSI command) or
12416 	 * by directly called from sdintr context.
12417 	 * We don't handle a non-disk drive(CD-ROM, removable media).
12418 	 * Clear the ssc_flags before return in case we've set
12419 	 * SSC_FLAGS_INVALID_XXX which should be skipped for a non-disk
12420 	 * driver.
12421 	 */
12422 	if (ISCD(un) || un->un_f_has_removable_media) {
12423 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12424 		return;
12425 	}
12426 
12427 	switch (sd_assess) {
12428 		case SD_FM_DRV_FATAL:
12429 			sd_severity = SCSI_ERR_FATAL;
12430 			break;
12431 		case SD_FM_DRV_RECOVERY:
12432 			sd_severity = SCSI_ERR_RECOVERED;
12433 			break;
12434 		case SD_FM_DRV_RETRY:
12435 			sd_severity = SCSI_ERR_RETRYABLE;
12436 			break;
12437 		case SD_FM_DRV_NOTICE:
12438 			sd_severity = SCSI_ERR_INFO;
12439 			break;
12440 		default:
12441 			sd_severity = SCSI_ERR_UNKNOWN;
12442 	}
12443 	/* print log */
12444 	sd_ssc_print(ssc, sd_severity);
12445 
12446 	/* always post ereport */
12447 	sd_ssc_ereport_post(ssc, sd_assess);
12448 }
12449 
12450 /*
12451  *    Function: sd_ssc_set_info
12452  *
12453  * Description: Mark ssc_flags and set ssc_info which would be the
12454  *              payload of uderr ereport. This function will cause
12455  *              sd_ssc_ereport_post to post uderr ereport only.
12456  *              Besides, when ssc_flags == SSC_FLAGS_INVALID_DATA(USCSI),
12457  *              the function will also call SD_ERROR or scsi_log for a
12458  *              CDROM/removable-media/DDI_FM_NOT_CAPABLE device.
12459  *
12460  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12461  *                  sd_uscsi_info in.
12462  *            ssc_flags - indicate the sub-category of a uderr.
12463  *            comp - this argument is meaningful only when
12464  *                   ssc_flags == SSC_FLAGS_INVALID_DATA, and its possible
12465  *                   values include:
12466  *                   > 0, SD_ERROR is used with comp as the driver logging
12467  *                   component;
12468  *                   = 0, scsi-log is used to log error telemetries;
12469  *                   < 0, no log available for this telemetry.
12470  *
12471  *    Context: Kernel thread or interrupt context
12472  */
12473 static void
12474 sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp, const char *fmt, ...)
12475 {
12476 	va_list	ap;
12477 
12478 	ASSERT(ssc != NULL);
12479 	ASSERT(ssc->ssc_un != NULL);
12480 
12481 	ssc->ssc_flags |= ssc_flags;
12482 	va_start(ap, fmt);
12483 	(void) vsnprintf(ssc->ssc_info, sizeof (ssc->ssc_info), fmt, ap);
12484 	va_end(ap);
12485 
12486 	/*
12487 	 * If SSC_FLAGS_INVALID_DATA is set, it should be a uscsi command
12488 	 * with invalid data sent back. For non-uscsi command, the
12489 	 * following code will be bypassed.
12490 	 */
12491 	if (ssc_flags & SSC_FLAGS_INVALID_DATA) {
12492 		if (SD_FM_LOG(ssc->ssc_un) == SD_FM_LOG_NSUP) {
12493 			/*
12494 			 * If the error belong to certain component and we
12495 			 * do not want it to show up on the console, we
12496 			 * will use SD_ERROR, otherwise scsi_log is
12497 			 * preferred.
12498 			 */
12499 			if (comp > 0) {
12500 				SD_ERROR(comp, ssc->ssc_un, ssc->ssc_info);
12501 			} else if (comp == 0) {
12502 				scsi_log(SD_DEVINFO(ssc->ssc_un), sd_label,
12503 				    CE_WARN, ssc->ssc_info);
12504 			}
12505 		}
12506 	}
12507 }
12508 
12509 /*
12510  *    Function: sd_buf_iodone
12511  *
12512  * Description: Frees the sd_xbuf & returns the buf to its originator.
12513  *
12514  *     Context: May be called from interrupt context.
12515  */
12516 /* ARGSUSED */
12517 static void
12518 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
12519 {
12520 	struct sd_xbuf *xp;
12521 
12522 	ASSERT(un != NULL);
12523 	ASSERT(bp != NULL);
12524 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12525 
12526 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
12527 
12528 	xp = SD_GET_XBUF(bp);
12529 	ASSERT(xp != NULL);
12530 
12531 	/* xbuf is gone after this */
12532 	if (ddi_xbuf_done(bp, un->un_xbuf_attr)) {
12533 		mutex_enter(SD_MUTEX(un));
12534 
12535 		/*
12536 		 * Grab time when the cmd completed.
12537 		 * This is used for determining if the system has been
12538 		 * idle long enough to make it idle to the PM framework.
12539 		 * This is for lowering the overhead, and therefore improving
12540 		 * performance per I/O operation.
12541 		 */
12542 		un->un_pm_idle_time = gethrtime();
12543 
12544 		un->un_ncmds_in_driver--;
12545 		ASSERT(un->un_ncmds_in_driver >= 0);
12546 		SD_INFO(SD_LOG_IO, un,
12547 		    "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12548 		    un->un_ncmds_in_driver);
12549 
12550 		mutex_exit(SD_MUTEX(un));
12551 	}
12552 
12553 	biodone(bp);				/* bp is gone after this */
12554 
12555 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12556 }
12557 
12558 
12559 /*
12560  *    Function: sd_uscsi_iodone
12561  *
12562  * Description: Frees the sd_xbuf & returns the buf to its originator.
12563  *
12564  *     Context: May be called from interrupt context.
12565  */
12566 /* ARGSUSED */
12567 static void
12568 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12569 {
12570 	struct sd_xbuf *xp;
12571 
12572 	ASSERT(un != NULL);
12573 	ASSERT(bp != NULL);
12574 
12575 	xp = SD_GET_XBUF(bp);
12576 	ASSERT(xp != NULL);
12577 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12578 
12579 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12580 
12581 	bp->b_private = xp->xb_private;
12582 
12583 	mutex_enter(SD_MUTEX(un));
12584 
12585 	/*
12586 	 * Grab time when the cmd completed.
12587 	 * This is used for determining if the system has been
12588 	 * idle long enough to make it idle to the PM framework.
12589 	 * This is for lowering the overhead, and therefore improving
12590 	 * performance per I/O operation.
12591 	 */
12592 	un->un_pm_idle_time = gethrtime();
12593 
12594 	un->un_ncmds_in_driver--;
12595 	ASSERT(un->un_ncmds_in_driver >= 0);
12596 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12597 	    un->un_ncmds_in_driver);
12598 
12599 	mutex_exit(SD_MUTEX(un));
12600 
12601 	if (((struct uscsi_cmd *)(xp->xb_pktinfo))->uscsi_rqlen >
12602 	    SENSE_LENGTH) {
12603 		kmem_free(xp, sizeof (struct sd_xbuf) - SENSE_LENGTH +
12604 		    MAX_SENSE_LENGTH);
12605 	} else {
12606 		kmem_free(xp, sizeof (struct sd_xbuf));
12607 	}
12608 
12609 	biodone(bp);
12610 
12611 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12612 }
12613 
12614 
12615 /*
12616  *    Function: sd_mapblockaddr_iostart
12617  *
12618  * Description: Verify request lies within the partition limits for
12619  *		the indicated minor device.  Issue "overrun" buf if
12620  *		request would exceed partition range.  Converts
12621  *		partition-relative block address to absolute.
12622  *
12623  *              Upon exit of this function:
12624  *              1.I/O is aligned
12625  *                 xp->xb_blkno represents the absolute sector address
12626  *              2.I/O is misaligned
12627  *                 xp->xb_blkno represents the absolute logical block address
12628  *                 based on DEV_BSIZE. The logical block address will be
12629  *                 converted to physical sector address in sd_mapblocksize_\
12630  *                 iostart.
12631  *              3.I/O is misaligned but is aligned in "overrun" buf
12632  *                 xp->xb_blkno represents the absolute logical block address
12633  *                 based on DEV_BSIZE. The logical block address will be
12634  *                 converted to physical sector address in sd_mapblocksize_\
12635  *                 iostart. But no RMW will be issued in this case.
12636  *
12637  *     Context: Can sleep
12638  *
12639  *      Issues: This follows what the old code did, in terms of accessing
12640  *		some of the partition info in the unit struct without holding
12641  *		the mutext.  This is a general issue, if the partition info
12642  *		can be altered while IO is in progress... as soon as we send
12643  *		a buf, its partitioning can be invalid before it gets to the
12644  *		device.  Probably the right fix is to move partitioning out
12645  *		of the driver entirely.
12646  */
12647 
12648 static void
12649 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12650 {
12651 	diskaddr_t	nblocks;	/* #blocks in the given partition */
12652 	daddr_t	blocknum;	/* Block number specified by the buf */
12653 	size_t	requested_nblocks;
12654 	size_t	available_nblocks;
12655 	int	partition;
12656 	diskaddr_t	partition_offset;
12657 	struct sd_xbuf *xp;
12658 	int secmask = 0, blknomask = 0;
12659 	ushort_t is_aligned = TRUE;
12660 
12661 	ASSERT(un != NULL);
12662 	ASSERT(bp != NULL);
12663 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12664 
12665 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12666 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12667 
12668 	xp = SD_GET_XBUF(bp);
12669 	ASSERT(xp != NULL);
12670 
12671 	/*
12672 	 * If the geometry is not indicated as valid, attempt to access
12673 	 * the unit & verify the geometry/label. This can be the case for
12674 	 * removable-media devices, of if the device was opened in
12675 	 * NDELAY/NONBLOCK mode.
12676 	 */
12677 	partition = SDPART(bp->b_edev);
12678 
12679 	if (!SD_IS_VALID_LABEL(un)) {
12680 		sd_ssc_t *ssc;
12681 		/*
12682 		 * Initialize sd_ssc_t for internal uscsi commands
12683 		 * In case of potential porformance issue, we need
12684 		 * to alloc memory only if there is invalid label
12685 		 */
12686 		ssc = sd_ssc_init(un);
12687 
12688 		if (sd_ready_and_valid(ssc, partition) != SD_READY_VALID) {
12689 			/*
12690 			 * For removable devices it is possible to start an
12691 			 * I/O without a media by opening the device in nodelay
12692 			 * mode. Also for writable CDs there can be many
12693 			 * scenarios where there is no geometry yet but volume
12694 			 * manager is trying to issue a read() just because
12695 			 * it can see TOC on the CD. So do not print a message
12696 			 * for removables.
12697 			 */
12698 			if (!un->un_f_has_removable_media) {
12699 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12700 				    "i/o to invalid geometry\n");
12701 			}
12702 			bioerror(bp, EIO);
12703 			bp->b_resid = bp->b_bcount;
12704 			SD_BEGIN_IODONE(index, un, bp);
12705 
12706 			sd_ssc_fini(ssc);
12707 			return;
12708 		}
12709 		sd_ssc_fini(ssc);
12710 	}
12711 
12712 	nblocks = 0;
12713 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
12714 	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
12715 
12716 	if (un->un_f_enable_rmw) {
12717 		blknomask = (un->un_phy_blocksize / DEV_BSIZE) - 1;
12718 		secmask = un->un_phy_blocksize - 1;
12719 	} else {
12720 		blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
12721 		secmask = un->un_tgt_blocksize - 1;
12722 	}
12723 
12724 	if ((bp->b_lblkno & (blknomask)) || (bp->b_bcount & (secmask))) {
12725 		is_aligned = FALSE;
12726 	}
12727 
12728 	if (!(NOT_DEVBSIZE(un)) || un->un_f_enable_rmw) {
12729 		/*
12730 		 * If I/O is aligned, no need to involve RMW(Read Modify Write)
12731 		 * Convert the logical block number to target's physical sector
12732 		 * number.
12733 		 */
12734 		if (is_aligned) {
12735 			xp->xb_blkno = SD_SYS2TGTBLOCK(un, xp->xb_blkno);
12736 		} else {
12737 			/*
12738 			 * There is no RMW if we're just reading, so don't
12739 			 * warn or error out because of it.
12740 			 */
12741 			if (bp->b_flags & B_READ) {
12742 				/*EMPTY*/
12743 			} else if (!un->un_f_enable_rmw &&
12744 			    un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR) {
12745 				bp->b_flags |= B_ERROR;
12746 				goto error_exit;
12747 			} else if (un->un_f_rmw_type == SD_RMW_TYPE_DEFAULT) {
12748 				mutex_enter(SD_MUTEX(un));
12749 				if (!un->un_f_enable_rmw &&
12750 				    un->un_rmw_msg_timeid == NULL) {
12751 					scsi_log(SD_DEVINFO(un), sd_label,
12752 					    CE_WARN, "I/O request is not "
12753 					    "aligned with %d disk sector size. "
12754 					    "It is handled through Read Modify "
12755 					    "Write but the performance is "
12756 					    "very low.\n",
12757 					    un->un_tgt_blocksize);
12758 					un->un_rmw_msg_timeid =
12759 					    timeout(sd_rmw_msg_print_handler,
12760 					    un, SD_RMW_MSG_PRINT_TIMEOUT);
12761 				} else {
12762 					un->un_rmw_incre_count ++;
12763 				}
12764 				mutex_exit(SD_MUTEX(un));
12765 			}
12766 
12767 			nblocks = SD_TGT2SYSBLOCK(un, nblocks);
12768 			partition_offset = SD_TGT2SYSBLOCK(un,
12769 			    partition_offset);
12770 		}
12771 	}
12772 
12773 	/*
12774 	 * blocknum is the starting block number of the request. At this
12775 	 * point it is still relative to the start of the minor device.
12776 	 */
12777 	blocknum = xp->xb_blkno;
12778 
12779 	/*
12780 	 * Legacy: If the starting block number is one past the last block
12781 	 * in the partition, do not set B_ERROR in the buf.
12782 	 */
12783 	if (blocknum == nblocks)  {
12784 		goto error_exit;
12785 	}
12786 
12787 	/*
12788 	 * Confirm that the first block of the request lies within the
12789 	 * partition limits. Also the requested number of bytes must be
12790 	 * a multiple of the system block size.
12791 	 */
12792 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12793 	    ((bp->b_bcount & (DEV_BSIZE - 1)) != 0)) {
12794 		bp->b_flags |= B_ERROR;
12795 		goto error_exit;
12796 	}
12797 
12798 	/*
12799 	 * If the requsted # blocks exceeds the available # blocks, that
12800 	 * is an overrun of the partition.
12801 	 */
12802 	if ((!NOT_DEVBSIZE(un)) && is_aligned) {
12803 		requested_nblocks = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
12804 	} else {
12805 		requested_nblocks = SD_BYTES2SYSBLOCKS(bp->b_bcount);
12806 	}
12807 
12808 	available_nblocks = (size_t)(nblocks - blocknum);
12809 	ASSERT(nblocks >= blocknum);
12810 
12811 	if (requested_nblocks > available_nblocks) {
12812 		size_t resid;
12813 
12814 		/*
12815 		 * Allocate an "overrun" buf to allow the request to proceed
12816 		 * for the amount of space available in the partition. The
12817 		 * amount not transferred will be added into the b_resid
12818 		 * when the operation is complete. The overrun buf
12819 		 * replaces the original buf here, and the original buf
12820 		 * is saved inside the overrun buf, for later use.
12821 		 */
12822 		if ((!NOT_DEVBSIZE(un)) && is_aligned) {
12823 			resid = SD_TGTBLOCKS2BYTES(un,
12824 			    (offset_t)(requested_nblocks - available_nblocks));
12825 		} else {
12826 			resid = SD_SYSBLOCKS2BYTES(
12827 			    (offset_t)(requested_nblocks - available_nblocks));
12828 		}
12829 
12830 		size_t count = bp->b_bcount - resid;
12831 		/*
12832 		 * Note: count is an unsigned entity thus it'll NEVER
12833 		 * be less than 0 so ASSERT the original values are
12834 		 * correct.
12835 		 */
12836 		ASSERT(bp->b_bcount >= resid);
12837 
12838 		bp = sd_bioclone_alloc(bp, count, blocknum,
12839 		    (int (*)(struct buf *))(uintptr_t)sd_mapblockaddr_iodone);
12840 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12841 		ASSERT(xp != NULL);
12842 	}
12843 
12844 	/* At this point there should be no residual for this buf. */
12845 	ASSERT(bp->b_resid == 0);
12846 
12847 	/* Convert the block number to an absolute address. */
12848 	xp->xb_blkno += partition_offset;
12849 
12850 	SD_NEXT_IOSTART(index, un, bp);
12851 
12852 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12853 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12854 
12855 	return;
12856 
12857 error_exit:
12858 	bp->b_resid = bp->b_bcount;
12859 	SD_BEGIN_IODONE(index, un, bp);
12860 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12861 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12862 }
12863 
12864 
12865 /*
12866  *    Function: sd_mapblockaddr_iodone
12867  *
12868  * Description: Completion-side processing for partition management.
12869  *
12870  *     Context: May be called under interrupt context
12871  */
12872 
12873 static void
12874 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12875 {
12876 	/* int	partition; */	/* Not used, see below. */
12877 	ASSERT(un != NULL);
12878 	ASSERT(bp != NULL);
12879 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12880 
12881 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12882 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12883 
12884 	if ((uintptr_t)bp->b_iodone == (uintptr_t)sd_mapblockaddr_iodone) {
12885 		/*
12886 		 * We have an "overrun" buf to deal with...
12887 		 */
12888 		struct sd_xbuf	*xp;
12889 		struct buf	*obp;	/* ptr to the original buf */
12890 
12891 		xp = SD_GET_XBUF(bp);
12892 		ASSERT(xp != NULL);
12893 
12894 		/* Retrieve the pointer to the original buf */
12895 		obp = (struct buf *)xp->xb_private;
12896 		ASSERT(obp != NULL);
12897 
12898 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12899 		bioerror(obp, bp->b_error);
12900 
12901 		sd_bioclone_free(bp);
12902 
12903 		/*
12904 		 * Get back the original buf.
12905 		 * Note that since the restoration of xb_blkno below
12906 		 * was removed, the sd_xbuf is not needed.
12907 		 */
12908 		bp = obp;
12909 		/*
12910 		 * xp = SD_GET_XBUF(bp);
12911 		 * ASSERT(xp != NULL);
12912 		 */
12913 	}
12914 
12915 	/*
12916 	 * Convert sd->xb_blkno back to a minor-device relative value.
12917 	 * Note: this has been commented out, as it is not needed in the
12918 	 * current implementation of the driver (ie, since this function
12919 	 * is at the top of the layering chains, so the info will be
12920 	 * discarded) and it is in the "hot" IO path.
12921 	 *
12922 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12923 	 * xp->xb_blkno -= un->un_offset[partition];
12924 	 */
12925 
12926 	SD_NEXT_IODONE(index, un, bp);
12927 
12928 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12929 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12930 }
12931 
12932 
12933 /*
12934  *    Function: sd_mapblocksize_iostart
12935  *
12936  * Description: Convert between system block size (un->un_sys_blocksize)
12937  *		and target block size (un->un_tgt_blocksize).
12938  *
12939  *     Context: Can sleep to allocate resources.
12940  *
12941  * Assumptions: A higher layer has already performed any partition validation,
12942  *		and converted the xp->xb_blkno to an absolute value relative
12943  *		to the start of the device.
12944  *
12945  *		It is also assumed that the higher layer has implemented
12946  *		an "overrun" mechanism for the case where the request would
12947  *		read/write beyond the end of a partition.  In this case we
12948  *		assume (and ASSERT) that bp->b_resid == 0.
12949  *
12950  *		Note: The implementation for this routine assumes the target
12951  *		block size remains constant between allocation and transport.
12952  */
12953 
12954 static void
12955 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12956 {
12957 	struct sd_mapblocksize_info	*bsp;
12958 	struct sd_xbuf			*xp;
12959 	offset_t first_byte;
12960 	daddr_t	start_block, end_block;
12961 	daddr_t	request_bytes;
12962 	ushort_t is_aligned = FALSE;
12963 
12964 	ASSERT(un != NULL);
12965 	ASSERT(bp != NULL);
12966 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12967 	ASSERT(bp->b_resid == 0);
12968 
12969 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12970 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12971 
12972 	/*
12973 	 * For a non-writable CD, a write request is an error
12974 	 */
12975 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12976 	    (un->un_f_mmc_writable_media == FALSE)) {
12977 		bioerror(bp, EIO);
12978 		bp->b_resid = bp->b_bcount;
12979 		SD_BEGIN_IODONE(index, un, bp);
12980 		return;
12981 	}
12982 
12983 	/*
12984 	 * We do not need a shadow buf if the device is using
12985 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12986 	 * In this case there is no layer-private data block allocated.
12987 	 */
12988 	if ((un->un_tgt_blocksize == DEV_BSIZE && !un->un_f_enable_rmw) ||
12989 	    (bp->b_bcount == 0)) {
12990 		goto done;
12991 	}
12992 
12993 #if defined(__x86)
12994 	/* We do not support non-block-aligned transfers for ROD devices */
12995 	ASSERT(!ISROD(un));
12996 #endif
12997 
12998 	xp = SD_GET_XBUF(bp);
12999 	ASSERT(xp != NULL);
13000 
13001 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
13002 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
13003 	    un->un_tgt_blocksize, DEV_BSIZE);
13004 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
13005 	    "request start block:0x%x\n", xp->xb_blkno);
13006 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
13007 	    "request len:0x%x\n", bp->b_bcount);
13008 
13009 	/*
13010 	 * Allocate the layer-private data area for the mapblocksize layer.
13011 	 * Layers are allowed to use the xp_private member of the sd_xbuf
13012 	 * struct to store the pointer to their layer-private data block, but
13013 	 * each layer also has the responsibility of restoring the prior
13014 	 * contents of xb_private before returning the buf/xbuf to the
13015 	 * higher layer that sent it.
13016 	 *
13017 	 * Here we save the prior contents of xp->xb_private into the
13018 	 * bsp->mbs_oprivate field of our layer-private data area. This value
13019 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
13020 	 * the layer-private area and returning the buf/xbuf to the layer
13021 	 * that sent it.
13022 	 *
13023 	 * Note that here we use kmem_zalloc for the allocation as there are
13024 	 * parts of the mapblocksize code that expect certain fields to be
13025 	 * zero unless explicitly set to a required value.
13026 	 */
13027 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
13028 	bsp->mbs_oprivate = xp->xb_private;
13029 	xp->xb_private = bsp;
13030 
13031 	/*
13032 	 * This treats the data on the disk (target) as an array of bytes.
13033 	 * first_byte is the byte offset, from the beginning of the device,
13034 	 * to the location of the request. This is converted from a
13035 	 * un->un_sys_blocksize block address to a byte offset, and then back
13036 	 * to a block address based upon a un->un_tgt_blocksize block size.
13037 	 *
13038 	 * xp->xb_blkno should be absolute upon entry into this function,
13039 	 * but, but it is based upon partitions that use the "system"
13040 	 * block size. It must be adjusted to reflect the block size of
13041 	 * the target.
13042 	 *
13043 	 * Note that end_block is actually the block that follows the last
13044 	 * block of the request, but that's what is needed for the computation.
13045 	 */
13046 	first_byte  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
13047 	if (un->un_f_enable_rmw) {
13048 		start_block = xp->xb_blkno =
13049 		    (first_byte / un->un_phy_blocksize) *
13050 		    (un->un_phy_blocksize / DEV_BSIZE);
13051 		end_block   = ((first_byte + bp->b_bcount +
13052 		    un->un_phy_blocksize - 1) / un->un_phy_blocksize) *
13053 		    (un->un_phy_blocksize / DEV_BSIZE);
13054 	} else {
13055 		start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
13056 		end_block   = (first_byte + bp->b_bcount +
13057 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
13058 	}
13059 
13060 	/* request_bytes is rounded up to a multiple of the target block size */
13061 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
13062 
13063 	/*
13064 	 * See if the starting address of the request and the request
13065 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
13066 	 * then we do not need to allocate a shadow buf to handle the request.
13067 	 */
13068 	if (un->un_f_enable_rmw) {
13069 		if (((first_byte % un->un_phy_blocksize) == 0) &&
13070 		    ((bp->b_bcount % un->un_phy_blocksize) == 0)) {
13071 			is_aligned = TRUE;
13072 		}
13073 	} else {
13074 		if (((first_byte % un->un_tgt_blocksize) == 0) &&
13075 		    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
13076 			is_aligned = TRUE;
13077 		}
13078 	}
13079 
13080 	if ((bp->b_flags & B_READ) == 0) {
13081 		/*
13082 		 * Lock the range for a write operation. An aligned request is
13083 		 * considered a simple write; otherwise the request must be a
13084 		 * read-modify-write.
13085 		 */
13086 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
13087 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
13088 	}
13089 
13090 	/*
13091 	 * Alloc a shadow buf if the request is not aligned. Also, this is
13092 	 * where the READ command is generated for a read-modify-write. (The
13093 	 * write phase is deferred until after the read completes.)
13094 	 */
13095 	if (is_aligned == FALSE) {
13096 
13097 		struct sd_mapblocksize_info	*shadow_bsp;
13098 		struct sd_xbuf	*shadow_xp;
13099 		struct buf	*shadow_bp;
13100 
13101 		/*
13102 		 * Allocate the shadow buf and it associated xbuf. Note that
13103 		 * after this call the xb_blkno value in both the original
13104 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
13105 		 * same: absolute relative to the start of the device, and
13106 		 * adjusted for the target block size. The b_blkno in the
13107 		 * shadow buf will also be set to this value. We should never
13108 		 * change b_blkno in the original bp however.
13109 		 *
13110 		 * Note also that the shadow buf will always need to be a
13111 		 * READ command, regardless of whether the incoming command
13112 		 * is a READ or a WRITE.
13113 		 */
13114 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
13115 		    xp->xb_blkno,
13116 		    (int (*)(struct buf *))(uintptr_t)sd_mapblocksize_iodone);
13117 
13118 		shadow_xp = SD_GET_XBUF(shadow_bp);
13119 
13120 		/*
13121 		 * Allocate the layer-private data for the shadow buf.
13122 		 * (No need to preserve xb_private in the shadow xbuf.)
13123 		 */
13124 		shadow_xp->xb_private = shadow_bsp =
13125 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
13126 
13127 		/*
13128 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
13129 		 * to figure out where the start of the user data is (based upon
13130 		 * the system block size) in the data returned by the READ
13131 		 * command (which will be based upon the target blocksize). Note
13132 		 * that this is only really used if the request is unaligned.
13133 		 */
13134 		if (un->un_f_enable_rmw) {
13135 			bsp->mbs_copy_offset = (ssize_t)(first_byte -
13136 			    ((offset_t)xp->xb_blkno * un->un_sys_blocksize));
13137 			ASSERT((bsp->mbs_copy_offset >= 0) &&
13138 			    (bsp->mbs_copy_offset < un->un_phy_blocksize));
13139 		} else {
13140 			bsp->mbs_copy_offset = (ssize_t)(first_byte -
13141 			    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
13142 			ASSERT((bsp->mbs_copy_offset >= 0) &&
13143 			    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
13144 		}
13145 
13146 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
13147 
13148 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
13149 
13150 		/* Transfer the wmap (if any) to the shadow buf */
13151 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
13152 		bsp->mbs_wmp = NULL;
13153 
13154 		/*
13155 		 * The shadow buf goes on from here in place of the
13156 		 * original buf.
13157 		 */
13158 		shadow_bsp->mbs_orig_bp = bp;
13159 		bp = shadow_bp;
13160 	}
13161 
13162 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
13163 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
13164 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
13165 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
13166 	    request_bytes);
13167 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
13168 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
13169 
13170 done:
13171 	SD_NEXT_IOSTART(index, un, bp);
13172 
13173 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
13174 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
13175 }
13176 
13177 
13178 /*
13179  *    Function: sd_mapblocksize_iodone
13180  *
13181  * Description: Completion side processing for block-size mapping.
13182  *
13183  *     Context: May be called under interrupt context
13184  */
13185 
13186 static void
13187 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
13188 {
13189 	struct sd_mapblocksize_info	*bsp;
13190 	struct sd_xbuf	*xp;
13191 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
13192 	struct buf	*orig_bp;	/* ptr to the original buf */
13193 	offset_t	shadow_end;
13194 	offset_t	request_end;
13195 	offset_t	shadow_start;
13196 	ssize_t		copy_offset;
13197 	size_t		copy_length;
13198 	size_t		shortfall;
13199 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
13200 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
13201 
13202 	ASSERT(un != NULL);
13203 	ASSERT(bp != NULL);
13204 
13205 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
13206 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
13207 
13208 	/*
13209 	 * There is no shadow buf or layer-private data if the target is
13210 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
13211 	 */
13212 	if ((un->un_tgt_blocksize == DEV_BSIZE && !un->un_f_enable_rmw) ||
13213 	    (bp->b_bcount == 0)) {
13214 		goto exit;
13215 	}
13216 
13217 	xp = SD_GET_XBUF(bp);
13218 	ASSERT(xp != NULL);
13219 
13220 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
13221 	bsp = xp->xb_private;
13222 
13223 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
13224 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
13225 
13226 	if (is_write) {
13227 		/*
13228 		 * For a WRITE request we must free up the block range that
13229 		 * we have locked up.  This holds regardless of whether this is
13230 		 * an aligned write request or a read-modify-write request.
13231 		 */
13232 		sd_range_unlock(un, bsp->mbs_wmp);
13233 		bsp->mbs_wmp = NULL;
13234 	}
13235 
13236 	if ((uintptr_t)bp->b_iodone != (uintptr_t)sd_mapblocksize_iodone) {
13237 		/*
13238 		 * An aligned read or write command will have no shadow buf;
13239 		 * there is not much else to do with it.
13240 		 */
13241 		goto done;
13242 	}
13243 
13244 	orig_bp = bsp->mbs_orig_bp;
13245 	ASSERT(orig_bp != NULL);
13246 	orig_xp = SD_GET_XBUF(orig_bp);
13247 	ASSERT(orig_xp != NULL);
13248 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13249 
13250 	if (!is_write && has_wmap) {
13251 		/*
13252 		 * A READ with a wmap means this is the READ phase of a
13253 		 * read-modify-write. If an error occurred on the READ then
13254 		 * we do not proceed with the WRITE phase or copy any data.
13255 		 * Just release the write maps and return with an error.
13256 		 */
13257 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
13258 			orig_bp->b_resid = orig_bp->b_bcount;
13259 			bioerror(orig_bp, bp->b_error);
13260 			sd_range_unlock(un, bsp->mbs_wmp);
13261 			goto freebuf_done;
13262 		}
13263 	}
13264 
13265 	/*
13266 	 * Here is where we set up to copy the data from the shadow buf
13267 	 * into the space associated with the original buf.
13268 	 *
13269 	 * To deal with the conversion between block sizes, these
13270 	 * computations treat the data as an array of bytes, with the
13271 	 * first byte (byte 0) corresponding to the first byte in the
13272 	 * first block on the disk.
13273 	 */
13274 
13275 	/*
13276 	 * shadow_start and shadow_len indicate the location and size of
13277 	 * the data returned with the shadow IO request.
13278 	 */
13279 	if (un->un_f_enable_rmw) {
13280 		shadow_start  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
13281 	} else {
13282 		shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
13283 	}
13284 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
13285 
13286 	/*
13287 	 * copy_offset gives the offset (in bytes) from the start of the first
13288 	 * block of the READ request to the beginning of the data.  We retrieve
13289 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
13290 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
13291 	 * data to be copied (in bytes).
13292 	 */
13293 	copy_offset  = bsp->mbs_copy_offset;
13294 	if (un->un_f_enable_rmw) {
13295 		ASSERT((copy_offset >= 0) &&
13296 		    (copy_offset < un->un_phy_blocksize));
13297 	} else {
13298 		ASSERT((copy_offset >= 0) &&
13299 		    (copy_offset < un->un_tgt_blocksize));
13300 	}
13301 
13302 	copy_length  = orig_bp->b_bcount;
13303 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
13304 
13305 	/*
13306 	 * Set up the resid and error fields of orig_bp as appropriate.
13307 	 */
13308 	if (shadow_end >= request_end) {
13309 		/* We got all the requested data; set resid to zero */
13310 		orig_bp->b_resid = 0;
13311 	} else {
13312 		/*
13313 		 * We failed to get enough data to fully satisfy the original
13314 		 * request. Just copy back whatever data we got and set
13315 		 * up the residual and error code as required.
13316 		 *
13317 		 * 'shortfall' is the amount by which the data received with the
13318 		 * shadow buf has "fallen short" of the requested amount.
13319 		 */
13320 		shortfall = (size_t)(request_end - shadow_end);
13321 
13322 		if (shortfall > orig_bp->b_bcount) {
13323 			/*
13324 			 * We did not get enough data to even partially
13325 			 * fulfill the original request.  The residual is
13326 			 * equal to the amount requested.
13327 			 */
13328 			orig_bp->b_resid = orig_bp->b_bcount;
13329 		} else {
13330 			/*
13331 			 * We did not get all the data that we requested
13332 			 * from the device, but we will try to return what
13333 			 * portion we did get.
13334 			 */
13335 			orig_bp->b_resid = shortfall;
13336 		}
13337 		ASSERT(copy_length >= orig_bp->b_resid);
13338 		copy_length  -= orig_bp->b_resid;
13339 	}
13340 
13341 	/* Propagate the error code from the shadow buf to the original buf */
13342 	bioerror(orig_bp, bp->b_error);
13343 
13344 	if (is_write) {
13345 		goto freebuf_done;	/* No data copying for a WRITE */
13346 	}
13347 
13348 	if (has_wmap) {
13349 		/*
13350 		 * This is a READ command from the READ phase of a
13351 		 * read-modify-write request. We have to copy the data given
13352 		 * by the user OVER the data returned by the READ command,
13353 		 * then convert the command from a READ to a WRITE and send
13354 		 * it back to the target.
13355 		 */
13356 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
13357 		    copy_length);
13358 
13359 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
13360 
13361 		/*
13362 		 * Dispatch the WRITE command to the taskq thread, which
13363 		 * will in turn send the command to the target. When the
13364 		 * WRITE command completes, we (sd_mapblocksize_iodone())
13365 		 * will get called again as part of the iodone chain
13366 		 * processing for it. Note that we will still be dealing
13367 		 * with the shadow buf at that point.
13368 		 */
13369 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
13370 		    KM_NOSLEEP) != TASKQID_INVALID) {
13371 			/*
13372 			 * Dispatch was successful so we are done. Return
13373 			 * without going any higher up the iodone chain. Do
13374 			 * not free up any layer-private data until after the
13375 			 * WRITE completes.
13376 			 */
13377 			return;
13378 		}
13379 
13380 		/*
13381 		 * Dispatch of the WRITE command failed; set up the error
13382 		 * condition and send this IO back up the iodone chain.
13383 		 */
13384 		bioerror(orig_bp, EIO);
13385 		orig_bp->b_resid = orig_bp->b_bcount;
13386 
13387 	} else {
13388 		/*
13389 		 * This is a regular READ request (ie, not a RMW). Copy the
13390 		 * data from the shadow buf into the original buf. The
13391 		 * copy_offset compensates for any "misalignment" between the
13392 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
13393 		 * original buf (with its un->un_sys_blocksize blocks).
13394 		 */
13395 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
13396 		    copy_length);
13397 	}
13398 
13399 freebuf_done:
13400 
13401 	/*
13402 	 * At this point we still have both the shadow buf AND the original
13403 	 * buf to deal with, as well as the layer-private data area in each.
13404 	 * Local variables are as follows:
13405 	 *
13406 	 * bp -- points to shadow buf
13407 	 * xp -- points to xbuf of shadow buf
13408 	 * bsp -- points to layer-private data area of shadow buf
13409 	 * orig_bp -- points to original buf
13410 	 *
13411 	 * First free the shadow buf and its associated xbuf, then free the
13412 	 * layer-private data area from the shadow buf. There is no need to
13413 	 * restore xb_private in the shadow xbuf.
13414 	 */
13415 	sd_shadow_buf_free(bp);
13416 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
13417 
13418 	/*
13419 	 * Now update the local variables to point to the original buf, xbuf,
13420 	 * and layer-private area.
13421 	 */
13422 	bp = orig_bp;
13423 	xp = SD_GET_XBUF(bp);
13424 	ASSERT(xp != NULL);
13425 	ASSERT(xp == orig_xp);
13426 	bsp = xp->xb_private;
13427 	ASSERT(bsp != NULL);
13428 
13429 done:
13430 	/*
13431 	 * Restore xb_private to whatever it was set to by the next higher
13432 	 * layer in the chain, then free the layer-private data area.
13433 	 */
13434 	xp->xb_private = bsp->mbs_oprivate;
13435 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
13436 
13437 exit:
13438 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
13439 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
13440 
13441 	SD_NEXT_IODONE(index, un, bp);
13442 }
13443 
13444 
13445 /*
13446  *    Function: sd_checksum_iostart
13447  *
13448  * Description: A stub function for a layer that's currently not used.
13449  *		For now just a placeholder.
13450  *
13451  *     Context: Kernel thread context
13452  */
13453 
13454 static void
13455 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
13456 {
13457 	ASSERT(un != NULL);
13458 	ASSERT(bp != NULL);
13459 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13460 	SD_NEXT_IOSTART(index, un, bp);
13461 }
13462 
13463 
13464 /*
13465  *    Function: sd_checksum_iodone
13466  *
13467  * Description: A stub function for a layer that's currently not used.
13468  *		For now just a placeholder.
13469  *
13470  *     Context: May be called under interrupt context
13471  */
13472 
13473 static void
13474 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
13475 {
13476 	ASSERT(un != NULL);
13477 	ASSERT(bp != NULL);
13478 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13479 	SD_NEXT_IODONE(index, un, bp);
13480 }
13481 
13482 
13483 /*
13484  *    Function: sd_checksum_uscsi_iostart
13485  *
13486  * Description: A stub function for a layer that's currently not used.
13487  *		For now just a placeholder.
13488  *
13489  *     Context: Kernel thread context
13490  */
13491 
13492 static void
13493 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
13494 {
13495 	ASSERT(un != NULL);
13496 	ASSERT(bp != NULL);
13497 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13498 	SD_NEXT_IOSTART(index, un, bp);
13499 }
13500 
13501 
13502 /*
13503  *    Function: sd_checksum_uscsi_iodone
13504  *
13505  * Description: A stub function for a layer that's currently not used.
13506  *		For now just a placeholder.
13507  *
13508  *     Context: May be called under interrupt context
13509  */
13510 
13511 static void
13512 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
13513 {
13514 	ASSERT(un != NULL);
13515 	ASSERT(bp != NULL);
13516 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13517 	SD_NEXT_IODONE(index, un, bp);
13518 }
13519 
13520 
13521 /*
13522  *    Function: sd_pm_iostart
13523  *
13524  * Description: iostart-side routine for Power mangement.
13525  *
13526  *     Context: Kernel thread context
13527  */
13528 
13529 static void
13530 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
13531 {
13532 	ASSERT(un != NULL);
13533 	ASSERT(bp != NULL);
13534 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13535 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13536 
13537 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
13538 
13539 	if (sd_pm_entry(un) != DDI_SUCCESS) {
13540 		/*
13541 		 * Set up to return the failed buf back up the 'iodone'
13542 		 * side of the calling chain.
13543 		 */
13544 		bioerror(bp, EIO);
13545 		bp->b_resid = bp->b_bcount;
13546 
13547 		SD_BEGIN_IODONE(index, un, bp);
13548 
13549 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13550 		return;
13551 	}
13552 
13553 	SD_NEXT_IOSTART(index, un, bp);
13554 
13555 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13556 }
13557 
13558 
13559 /*
13560  *    Function: sd_pm_iodone
13561  *
13562  * Description: iodone-side routine for power mangement.
13563  *
13564  *     Context: may be called from interrupt context
13565  */
13566 
13567 static void
13568 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
13569 {
13570 	ASSERT(un != NULL);
13571 	ASSERT(bp != NULL);
13572 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13573 
13574 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
13575 
13576 	/*
13577 	 * After attach the following flag is only read, so don't
13578 	 * take the penalty of acquiring a mutex for it.
13579 	 */
13580 	if (un->un_f_pm_is_enabled == TRUE) {
13581 		sd_pm_exit(un);
13582 	}
13583 
13584 	SD_NEXT_IODONE(index, un, bp);
13585 
13586 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
13587 }
13588 
13589 
13590 /*
13591  *    Function: sd_core_iostart
13592  *
13593  * Description: Primary driver function for enqueuing buf(9S) structs from
13594  *		the system and initiating IO to the target device
13595  *
13596  *     Context: Kernel thread context. Can sleep.
13597  *
13598  * Assumptions:  - The given xp->xb_blkno is absolute
13599  *		   (ie, relative to the start of the device).
13600  *		 - The IO is to be done using the native blocksize of
13601  *		   the device, as specified in un->un_tgt_blocksize.
13602  */
13603 /* ARGSUSED */
13604 static void
13605 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
13606 {
13607 	struct sd_xbuf *xp;
13608 
13609 	ASSERT(un != NULL);
13610 	ASSERT(bp != NULL);
13611 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13612 	ASSERT(bp->b_resid == 0);
13613 
13614 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
13615 
13616 	xp = SD_GET_XBUF(bp);
13617 	ASSERT(xp != NULL);
13618 
13619 	mutex_enter(SD_MUTEX(un));
13620 
13621 	/*
13622 	 * If we are currently in the failfast state, fail any new IO
13623 	 * that has B_FAILFAST set, then return.
13624 	 */
13625 	if ((bp->b_flags & B_FAILFAST) &&
13626 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
13627 		mutex_exit(SD_MUTEX(un));
13628 		bioerror(bp, EIO);
13629 		bp->b_resid = bp->b_bcount;
13630 		SD_BEGIN_IODONE(index, un, bp);
13631 		return;
13632 	}
13633 
13634 	if (SD_IS_DIRECT_PRIORITY(xp)) {
13635 		/*
13636 		 * Priority command -- transport it immediately.
13637 		 *
13638 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
13639 		 * because all direct priority commands should be associated
13640 		 * with error recovery actions which we don't want to retry.
13641 		 */
13642 		sd_start_cmds(un, bp);
13643 	} else {
13644 		/*
13645 		 * Normal command -- add it to the wait queue, then start
13646 		 * transporting commands from the wait queue.
13647 		 */
13648 		sd_add_buf_to_waitq(un, bp);
13649 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13650 		sd_start_cmds(un, NULL);
13651 	}
13652 
13653 	mutex_exit(SD_MUTEX(un));
13654 
13655 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
13656 }
13657 
13658 
13659 /*
13660  *    Function: sd_init_cdb_limits
13661  *
13662  * Description: This is to handle scsi_pkt initialization differences
13663  *		between the driver platforms.
13664  *
13665  *		Legacy behaviors:
13666  *
13667  *		If the block number or the sector count exceeds the
13668  *		capabilities of a Group 0 command, shift over to a
13669  *		Group 1 command. We don't blindly use Group 1
13670  *		commands because a) some drives (CDC Wren IVs) get a
13671  *		bit confused, and b) there is probably a fair amount
13672  *		of speed difference for a target to receive and decode
13673  *		a 10 byte command instead of a 6 byte command.
13674  *
13675  *		The xfer time difference of 6 vs 10 byte CDBs is
13676  *		still significant so this code is still worthwhile.
13677  *		10 byte CDBs are very inefficient with the fas HBA driver
13678  *		and older disks. Each CDB byte took 1 usec with some
13679  *		popular disks.
13680  *
13681  *     Context: Must be called at attach time
13682  */
13683 
13684 static void
13685 sd_init_cdb_limits(struct sd_lun *un)
13686 {
13687 	int hba_cdb_limit;
13688 
13689 	/*
13690 	 * Use CDB_GROUP1 commands for most devices except for
13691 	 * parallel SCSI fixed drives in which case we get better
13692 	 * performance using CDB_GROUP0 commands (where applicable).
13693 	 */
13694 	un->un_mincdb = SD_CDB_GROUP1;
13695 #if !defined(__fibre)
13696 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13697 	    !un->un_f_has_removable_media) {
13698 		un->un_mincdb = SD_CDB_GROUP0;
13699 	}
13700 #endif
13701 
13702 	/*
13703 	 * Try to read the max-cdb-length supported by HBA.
13704 	 */
13705 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
13706 	if (0 >= un->un_max_hba_cdb) {
13707 		un->un_max_hba_cdb = CDB_GROUP4;
13708 		hba_cdb_limit = SD_CDB_GROUP4;
13709 	} else if (0 < un->un_max_hba_cdb &&
13710 	    un->un_max_hba_cdb < CDB_GROUP1) {
13711 		hba_cdb_limit = SD_CDB_GROUP0;
13712 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
13713 	    un->un_max_hba_cdb < CDB_GROUP5) {
13714 		hba_cdb_limit = SD_CDB_GROUP1;
13715 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
13716 	    un->un_max_hba_cdb < CDB_GROUP4) {
13717 		hba_cdb_limit = SD_CDB_GROUP5;
13718 	} else {
13719 		hba_cdb_limit = SD_CDB_GROUP4;
13720 	}
13721 
13722 	/*
13723 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13724 	 * commands for fixed disks unless we are building for a 32 bit
13725 	 * kernel.
13726 	 */
13727 #ifdef _LP64
13728 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13729 	    min(hba_cdb_limit, SD_CDB_GROUP4);
13730 #else
13731 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13732 	    min(hba_cdb_limit, SD_CDB_GROUP1);
13733 #endif
13734 
13735 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13736 	    ? sizeof (struct scsi_arq_status) : 1);
13737 	if (!ISCD(un))
13738 		un->un_cmd_timeout = (ushort_t)sd_io_time;
13739 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13740 }
13741 
13742 
13743 /*
13744  *    Function: sd_initpkt_for_buf
13745  *
13746  * Description: Allocate and initialize for transport a scsi_pkt struct,
13747  *		based upon the info specified in the given buf struct.
13748  *
13749  *		Assumes the xb_blkno in the request is absolute (ie,
13750  *		relative to the start of the device (NOT partition!).
13751  *		Also assumes that the request is using the native block
13752  *		size of the device (as returned by the READ CAPACITY
13753  *		command).
13754  *
13755  * Return Code: SD_PKT_ALLOC_SUCCESS
13756  *		SD_PKT_ALLOC_FAILURE
13757  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13758  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13759  *
13760  *     Context: Kernel thread and may be called from software interrupt context
13761  *		as part of a sdrunout callback. This function may not block or
13762  *		call routines that block
13763  */
13764 
13765 static int
13766 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13767 {
13768 	struct sd_xbuf	*xp;
13769 	struct scsi_pkt *pktp = NULL;
13770 	struct sd_lun	*un;
13771 	size_t		blockcount;
13772 	daddr_t		startblock;
13773 	int		rval;
13774 	int		cmd_flags;
13775 
13776 	ASSERT(bp != NULL);
13777 	ASSERT(pktpp != NULL);
13778 	xp = SD_GET_XBUF(bp);
13779 	ASSERT(xp != NULL);
13780 	un = SD_GET_UN(bp);
13781 	ASSERT(un != NULL);
13782 	ASSERT(mutex_owned(SD_MUTEX(un)));
13783 	ASSERT(bp->b_resid == 0);
13784 
13785 	SD_TRACE(SD_LOG_IO_CORE, un,
13786 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13787 
13788 	mutex_exit(SD_MUTEX(un));
13789 
13790 #if defined(__x86)	/* DMAFREE for x86 only */
13791 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13792 		/*
13793 		 * Already have a scsi_pkt -- just need DMA resources.
13794 		 * We must recompute the CDB in case the mapping returns
13795 		 * a nonzero pkt_resid.
13796 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13797 		 * that is being retried, the unmap/remap of the DMA resouces
13798 		 * will result in the entire transfer starting over again
13799 		 * from the very first block.
13800 		 */
13801 		ASSERT(xp->xb_pktp != NULL);
13802 		pktp = xp->xb_pktp;
13803 	} else {
13804 		pktp = NULL;
13805 	}
13806 #endif /* __x86 */
13807 
13808 	startblock = xp->xb_blkno;	/* Absolute block num. */
13809 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13810 
13811 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13812 
13813 	/*
13814 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13815 	 * call scsi_init_pkt, and build the CDB.
13816 	 */
13817 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13818 	    cmd_flags, sdrunout, (caddr_t)un,
13819 	    startblock, blockcount);
13820 
13821 	if (rval == 0) {
13822 		/*
13823 		 * Success.
13824 		 *
13825 		 * If partial DMA is being used and required for this transfer.
13826 		 * set it up here.
13827 		 */
13828 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13829 		    (pktp->pkt_resid != 0)) {
13830 
13831 			/*
13832 			 * Save the CDB length and pkt_resid for the
13833 			 * next xfer
13834 			 */
13835 			xp->xb_dma_resid = pktp->pkt_resid;
13836 
13837 			/* rezero resid */
13838 			pktp->pkt_resid = 0;
13839 
13840 		} else {
13841 			xp->xb_dma_resid = 0;
13842 		}
13843 
13844 		pktp->pkt_flags = un->un_tagflags;
13845 		pktp->pkt_time  = un->un_cmd_timeout;
13846 		pktp->pkt_comp  = sdintr;
13847 
13848 		pktp->pkt_private = bp;
13849 		*pktpp = pktp;
13850 
13851 		SD_TRACE(SD_LOG_IO_CORE, un,
13852 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13853 
13854 #if defined(__x86)	/* DMAFREE for x86 only */
13855 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13856 #endif
13857 
13858 		mutex_enter(SD_MUTEX(un));
13859 		return (SD_PKT_ALLOC_SUCCESS);
13860 
13861 	}
13862 
13863 	/*
13864 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13865 	 * from sd_setup_rw_pkt.
13866 	 */
13867 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13868 
13869 	if (rval == SD_PKT_ALLOC_FAILURE) {
13870 		*pktpp = NULL;
13871 		/*
13872 		 * Set the driver state to RWAIT to indicate the driver
13873 		 * is waiting on resource allocations. The driver will not
13874 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13875 		 */
13876 		mutex_enter(SD_MUTEX(un));
13877 		New_state(un, SD_STATE_RWAIT);
13878 
13879 		SD_ERROR(SD_LOG_IO_CORE, un,
13880 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13881 
13882 		if ((bp->b_flags & B_ERROR) != 0) {
13883 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13884 		}
13885 		return (SD_PKT_ALLOC_FAILURE);
13886 	} else {
13887 		/*
13888 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13889 		 *
13890 		 * This should never happen.  Maybe someone messed with the
13891 		 * kernel's minphys?
13892 		 */
13893 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13894 		    "Request rejected: too large for CDB: "
13895 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13896 		SD_ERROR(SD_LOG_IO_CORE, un,
13897 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13898 		mutex_enter(SD_MUTEX(un));
13899 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13900 
13901 	}
13902 }
13903 
13904 
13905 /*
13906  *    Function: sd_destroypkt_for_buf
13907  *
13908  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13909  *
13910  *     Context: Kernel thread or interrupt context
13911  */
13912 
13913 static void
13914 sd_destroypkt_for_buf(struct buf *bp)
13915 {
13916 	ASSERT(bp != NULL);
13917 	ASSERT(SD_GET_UN(bp) != NULL);
13918 
13919 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13920 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13921 
13922 	ASSERT(SD_GET_PKTP(bp) != NULL);
13923 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13924 
13925 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13926 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13927 }
13928 
13929 /*
13930  *    Function: sd_setup_rw_pkt
13931  *
13932  * Description: Determines appropriate CDB group for the requested LBA
13933  *		and transfer length, calls scsi_init_pkt, and builds
13934  *		the CDB.  Do not use for partial DMA transfers except
13935  *		for the initial transfer since the CDB size must
13936  *		remain constant.
13937  *
13938  *     Context: Kernel thread and may be called from software interrupt
13939  *		context as part of a sdrunout callback. This function may not
13940  *		block or call routines that block
13941  */
13942 
13943 
13944 int
13945 sd_setup_rw_pkt(struct sd_lun *un,
13946     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13947     int (*callback)(caddr_t), caddr_t callback_arg,
13948     diskaddr_t lba, uint32_t blockcount)
13949 {
13950 	struct scsi_pkt *return_pktp;
13951 	union scsi_cdb *cdbp;
13952 	struct sd_cdbinfo *cp = NULL;
13953 	int i;
13954 
13955 	/*
13956 	 * See which size CDB to use, based upon the request.
13957 	 */
13958 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13959 
13960 		/*
13961 		 * Check lba and block count against sd_cdbtab limits.
13962 		 * In the partial DMA case, we have to use the same size
13963 		 * CDB for all the transfers.  Check lba + blockcount
13964 		 * against the max LBA so we know that segment of the
13965 		 * transfer can use the CDB we select.
13966 		 */
13967 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13968 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13969 
13970 			/*
13971 			 * The command will fit into the CDB type
13972 			 * specified by sd_cdbtab[i].
13973 			 */
13974 			cp = sd_cdbtab + i;
13975 
13976 			/*
13977 			 * Call scsi_init_pkt so we can fill in the
13978 			 * CDB.
13979 			 */
13980 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13981 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13982 			    flags, callback, callback_arg);
13983 
13984 			if (return_pktp != NULL) {
13985 
13986 				/*
13987 				 * Return new value of pkt
13988 				 */
13989 				*pktpp = return_pktp;
13990 
13991 				/*
13992 				 * To be safe, zero the CDB insuring there is
13993 				 * no leftover data from a previous command.
13994 				 */
13995 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13996 
13997 				/*
13998 				 * Handle partial DMA mapping
13999 				 */
14000 				if (return_pktp->pkt_resid != 0) {
14001 
14002 					/*
14003 					 * Not going to xfer as many blocks as
14004 					 * originally expected
14005 					 */
14006 					blockcount -=
14007 					    SD_BYTES2TGTBLOCKS(un,
14008 					    return_pktp->pkt_resid);
14009 				}
14010 
14011 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
14012 
14013 				/*
14014 				 * Set command byte based on the CDB
14015 				 * type we matched.
14016 				 */
14017 				cdbp->scc_cmd = cp->sc_grpmask |
14018 				    ((bp->b_flags & B_READ) ?
14019 				    SCMD_READ : SCMD_WRITE);
14020 
14021 				SD_FILL_SCSI1_LUN(un, return_pktp);
14022 
14023 				/*
14024 				 * Fill in LBA and length
14025 				 */
14026 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
14027 				    (cp->sc_grpcode == CDB_GROUP4) ||
14028 				    (cp->sc_grpcode == CDB_GROUP0) ||
14029 				    (cp->sc_grpcode == CDB_GROUP5));
14030 
14031 				if (cp->sc_grpcode == CDB_GROUP1) {
14032 					FORMG1ADDR(cdbp, lba);
14033 					FORMG1COUNT(cdbp, blockcount);
14034 					return (0);
14035 				} else if (cp->sc_grpcode == CDB_GROUP4) {
14036 					FORMG4LONGADDR(cdbp, lba);
14037 					FORMG4COUNT(cdbp, blockcount);
14038 					return (0);
14039 				} else if (cp->sc_grpcode == CDB_GROUP0) {
14040 					FORMG0ADDR(cdbp, lba);
14041 					FORMG0COUNT(cdbp, blockcount);
14042 					return (0);
14043 				} else if (cp->sc_grpcode == CDB_GROUP5) {
14044 					FORMG5ADDR(cdbp, lba);
14045 					FORMG5COUNT(cdbp, blockcount);
14046 					return (0);
14047 				}
14048 
14049 				/*
14050 				 * It should be impossible to not match one
14051 				 * of the CDB types above, so we should never
14052 				 * reach this point.  Set the CDB command byte
14053 				 * to test-unit-ready to avoid writing
14054 				 * to somewhere we don't intend.
14055 				 */
14056 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
14057 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
14058 			} else {
14059 				/*
14060 				 * Couldn't get scsi_pkt
14061 				 */
14062 				return (SD_PKT_ALLOC_FAILURE);
14063 			}
14064 		}
14065 	}
14066 
14067 	/*
14068 	 * None of the available CDB types were suitable.  This really
14069 	 * should never happen:  on a 64 bit system we support
14070 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
14071 	 * and on a 32 bit system we will refuse to bind to a device
14072 	 * larger than 2TB so addresses will never be larger than 32 bits.
14073 	 */
14074 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
14075 }
14076 
14077 /*
14078  *    Function: sd_setup_next_rw_pkt
14079  *
14080  * Description: Setup packet for partial DMA transfers, except for the
14081  *		initial transfer.  sd_setup_rw_pkt should be used for
14082  *		the initial transfer.
14083  *
14084  *     Context: Kernel thread and may be called from interrupt context.
14085  */
14086 
14087 int
14088 sd_setup_next_rw_pkt(struct sd_lun *un,
14089     struct scsi_pkt *pktp, struct buf *bp,
14090     diskaddr_t lba, uint32_t blockcount)
14091 {
14092 	uchar_t com;
14093 	union scsi_cdb *cdbp;
14094 	uchar_t cdb_group_id;
14095 
14096 	ASSERT(pktp != NULL);
14097 	ASSERT(pktp->pkt_cdbp != NULL);
14098 
14099 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
14100 	com = cdbp->scc_cmd;
14101 	cdb_group_id = CDB_GROUPID(com);
14102 
14103 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
14104 	    (cdb_group_id == CDB_GROUPID_1) ||
14105 	    (cdb_group_id == CDB_GROUPID_4) ||
14106 	    (cdb_group_id == CDB_GROUPID_5));
14107 
14108 	/*
14109 	 * Move pkt to the next portion of the xfer.
14110 	 * func is NULL_FUNC so we do not have to release
14111 	 * the disk mutex here.
14112 	 */
14113 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
14114 	    NULL_FUNC, NULL) == pktp) {
14115 		/* Success.  Handle partial DMA */
14116 		if (pktp->pkt_resid != 0) {
14117 			blockcount -=
14118 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
14119 		}
14120 
14121 		cdbp->scc_cmd = com;
14122 		SD_FILL_SCSI1_LUN(un, pktp);
14123 		if (cdb_group_id == CDB_GROUPID_1) {
14124 			FORMG1ADDR(cdbp, lba);
14125 			FORMG1COUNT(cdbp, blockcount);
14126 			return (0);
14127 		} else if (cdb_group_id == CDB_GROUPID_4) {
14128 			FORMG4LONGADDR(cdbp, lba);
14129 			FORMG4COUNT(cdbp, blockcount);
14130 			return (0);
14131 		} else if (cdb_group_id == CDB_GROUPID_0) {
14132 			FORMG0ADDR(cdbp, lba);
14133 			FORMG0COUNT(cdbp, blockcount);
14134 			return (0);
14135 		} else if (cdb_group_id == CDB_GROUPID_5) {
14136 			FORMG5ADDR(cdbp, lba);
14137 			FORMG5COUNT(cdbp, blockcount);
14138 			return (0);
14139 		}
14140 
14141 		/* Unreachable */
14142 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
14143 	}
14144 
14145 	/*
14146 	 * Error setting up next portion of cmd transfer.
14147 	 * Something is definitely very wrong and this
14148 	 * should not happen.
14149 	 */
14150 	return (SD_PKT_ALLOC_FAILURE);
14151 }
14152 
14153 /*
14154  *    Function: sd_initpkt_for_uscsi
14155  *
14156  * Description: Allocate and initialize for transport a scsi_pkt struct,
14157  *		based upon the info specified in the given uscsi_cmd struct.
14158  *
14159  * Return Code: SD_PKT_ALLOC_SUCCESS
14160  *		SD_PKT_ALLOC_FAILURE
14161  *		SD_PKT_ALLOC_FAILURE_NO_DMA
14162  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
14163  *
14164  *     Context: Kernel thread and may be called from software interrupt context
14165  *		as part of a sdrunout callback. This function may not block or
14166  *		call routines that block
14167  */
14168 
14169 static int
14170 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
14171 {
14172 	struct uscsi_cmd *uscmd;
14173 	struct sd_xbuf	*xp;
14174 	struct scsi_pkt	*pktp;
14175 	struct sd_lun	*un;
14176 	uint32_t	flags = 0;
14177 
14178 	ASSERT(bp != NULL);
14179 	ASSERT(pktpp != NULL);
14180 	xp = SD_GET_XBUF(bp);
14181 	ASSERT(xp != NULL);
14182 	un = SD_GET_UN(bp);
14183 	ASSERT(un != NULL);
14184 	ASSERT(mutex_owned(SD_MUTEX(un)));
14185 
14186 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
14187 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
14188 	ASSERT(uscmd != NULL);
14189 
14190 	SD_TRACE(SD_LOG_IO_CORE, un,
14191 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
14192 
14193 	/*
14194 	 * Allocate the scsi_pkt for the command.
14195 	 *
14196 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
14197 	 *	 during scsi_init_pkt time and will continue to use the
14198 	 *	 same path as long as the same scsi_pkt is used without
14199 	 *	 intervening scsi_dmafree(). Since uscsi command does
14200 	 *	 not call scsi_dmafree() before retry failed command, it
14201 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
14202 	 *	 set such that scsi_vhci can use other available path for
14203 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
14204 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
14205 	 *
14206 	 *	 More fundamentally, we can't support breaking up this DMA into
14207 	 *	 multiple windows on x86. There is, in general, no guarantee
14208 	 *	 that arbitrary SCSI commands are idempotent, which is required
14209 	 *	 if we want to use multiple windows for a given command.
14210 	 */
14211 	if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
14212 		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
14213 		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
14214 		    ((int)(uscmd->uscsi_rqlen) + sizeof (struct scsi_arq_status)
14215 		    - sizeof (struct scsi_extended_sense)), 0,
14216 		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL) | PKT_XARQ,
14217 		    sdrunout, (caddr_t)un);
14218 	} else {
14219 		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
14220 		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
14221 		    sizeof (struct scsi_arq_status), 0,
14222 		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
14223 		    sdrunout, (caddr_t)un);
14224 	}
14225 
14226 	if (pktp == NULL) {
14227 		*pktpp = NULL;
14228 		/*
14229 		 * Set the driver state to RWAIT to indicate the driver
14230 		 * is waiting on resource allocations. The driver will not
14231 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
14232 		 */
14233 		New_state(un, SD_STATE_RWAIT);
14234 
14235 		SD_ERROR(SD_LOG_IO_CORE, un,
14236 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
14237 
14238 		if ((bp->b_flags & B_ERROR) != 0) {
14239 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
14240 		}
14241 		return (SD_PKT_ALLOC_FAILURE);
14242 	}
14243 
14244 	/*
14245 	 * We do not do DMA breakup for USCSI commands, so return failure
14246 	 * here if all the needed DMA resources were not allocated.
14247 	 */
14248 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
14249 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
14250 		scsi_destroy_pkt(pktp);
14251 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
14252 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
14253 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
14254 	}
14255 
14256 	/* Init the cdb from the given uscsi struct */
14257 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
14258 	    uscmd->uscsi_cdb[0], 0, 0, 0);
14259 
14260 	SD_FILL_SCSI1_LUN(un, pktp);
14261 
14262 	/*
14263 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
14264 	 * for listing of the supported flags.
14265 	 */
14266 
14267 	if (uscmd->uscsi_flags & USCSI_SILENT) {
14268 		flags |= FLAG_SILENT;
14269 	}
14270 
14271 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
14272 		flags |= FLAG_DIAGNOSE;
14273 	}
14274 
14275 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
14276 		flags |= FLAG_ISOLATE;
14277 	}
14278 
14279 	if (un->un_f_is_fibre == FALSE) {
14280 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
14281 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
14282 		}
14283 	}
14284 
14285 	/*
14286 	 * Set the pkt flags here so we save time later.
14287 	 * Note: These flags are NOT in the uscsi man page!!!
14288 	 */
14289 	if (uscmd->uscsi_flags & USCSI_HEAD) {
14290 		flags |= FLAG_HEAD;
14291 	}
14292 
14293 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
14294 		flags |= FLAG_NOINTR;
14295 	}
14296 
14297 	/*
14298 	 * For tagged queueing, things get a bit complicated.
14299 	 * Check first for head of queue and last for ordered queue.
14300 	 * If neither head nor order, use the default driver tag flags.
14301 	 */
14302 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
14303 		if (uscmd->uscsi_flags & USCSI_HTAG) {
14304 			flags |= FLAG_HTAG;
14305 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
14306 			flags |= FLAG_OTAG;
14307 		} else {
14308 			flags |= un->un_tagflags & FLAG_TAGMASK;
14309 		}
14310 	}
14311 
14312 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
14313 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
14314 	}
14315 
14316 	pktp->pkt_flags = flags;
14317 
14318 	/* Transfer uscsi information to scsi_pkt */
14319 	(void) scsi_uscsi_pktinit(uscmd, pktp);
14320 
14321 	/* Copy the caller's CDB into the pkt... */
14322 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
14323 
14324 	if (uscmd->uscsi_timeout == 0) {
14325 		pktp->pkt_time = un->un_uscsi_timeout;
14326 	} else {
14327 		pktp->pkt_time = uscmd->uscsi_timeout;
14328 	}
14329 
14330 	/* need it later to identify USCSI request in sdintr */
14331 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
14332 
14333 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
14334 
14335 	pktp->pkt_private = bp;
14336 	pktp->pkt_comp = sdintr;
14337 	*pktpp = pktp;
14338 
14339 	SD_TRACE(SD_LOG_IO_CORE, un,
14340 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
14341 
14342 	return (SD_PKT_ALLOC_SUCCESS);
14343 }
14344 
14345 
14346 /*
14347  *    Function: sd_destroypkt_for_uscsi
14348  *
14349  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
14350  *		IOs.. Also saves relevant info into the associated uscsi_cmd
14351  *		struct.
14352  *
14353  *     Context: May be called under interrupt context
14354  */
14355 
14356 static void
14357 sd_destroypkt_for_uscsi(struct buf *bp)
14358 {
14359 	struct uscsi_cmd *uscmd;
14360 	struct sd_xbuf	*xp;
14361 	struct scsi_pkt	*pktp;
14362 	struct sd_lun	*un;
14363 	struct sd_uscsi_info *suip;
14364 
14365 	ASSERT(bp != NULL);
14366 	xp = SD_GET_XBUF(bp);
14367 	ASSERT(xp != NULL);
14368 	un = SD_GET_UN(bp);
14369 	ASSERT(un != NULL);
14370 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14371 	pktp = SD_GET_PKTP(bp);
14372 	ASSERT(pktp != NULL);
14373 
14374 	SD_TRACE(SD_LOG_IO_CORE, un,
14375 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
14376 
14377 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
14378 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
14379 	ASSERT(uscmd != NULL);
14380 
14381 	/* Save the status and the residual into the uscsi_cmd struct */
14382 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
14383 	uscmd->uscsi_resid  = bp->b_resid;
14384 
14385 	/* Transfer scsi_pkt information to uscsi */
14386 	(void) scsi_uscsi_pktfini(pktp, uscmd);
14387 
14388 	/*
14389 	 * If enabled, copy any saved sense data into the area specified
14390 	 * by the uscsi command.
14391 	 */
14392 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
14393 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
14394 		/*
14395 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
14396 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
14397 		 */
14398 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
14399 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
14400 		if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
14401 			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
14402 			    MAX_SENSE_LENGTH);
14403 		} else {
14404 			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
14405 			    SENSE_LENGTH);
14406 		}
14407 	}
14408 	/*
14409 	 * The following assignments are for SCSI FMA.
14410 	 */
14411 	ASSERT(xp->xb_private != NULL);
14412 	suip = (struct sd_uscsi_info *)xp->xb_private;
14413 	suip->ui_pkt_reason = pktp->pkt_reason;
14414 	suip->ui_pkt_state = pktp->pkt_state;
14415 	suip->ui_pkt_statistics = pktp->pkt_statistics;
14416 	suip->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
14417 
14418 	/* We are done with the scsi_pkt; free it now */
14419 	ASSERT(SD_GET_PKTP(bp) != NULL);
14420 	scsi_destroy_pkt(SD_GET_PKTP(bp));
14421 
14422 	SD_TRACE(SD_LOG_IO_CORE, un,
14423 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
14424 }
14425 
14426 
14427 /*
14428  *    Function: sd_bioclone_alloc
14429  *
14430  * Description: Allocate a buf(9S) and init it as per the given buf
14431  *		and the various arguments.  The associated sd_xbuf
14432  *		struct is (nearly) duplicated.  The struct buf *bp
14433  *		argument is saved in new_xp->xb_private.
14434  *
14435  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14436  *		datalen - size of data area for the shadow bp
14437  *		blkno - starting LBA
14438  *		func - function pointer for b_iodone in the shadow buf. (May
14439  *			be NULL if none.)
14440  *
14441  * Return Code: Pointer to allocates buf(9S) struct
14442  *
14443  *     Context: Can sleep.
14444  */
14445 
14446 static struct buf *
14447 sd_bioclone_alloc(struct buf *bp, size_t datalen, daddr_t blkno,
14448     int (*func)(struct buf *))
14449 {
14450 	struct	sd_lun	*un;
14451 	struct	sd_xbuf	*xp;
14452 	struct	sd_xbuf	*new_xp;
14453 	struct	buf	*new_bp;
14454 
14455 	ASSERT(bp != NULL);
14456 	xp = SD_GET_XBUF(bp);
14457 	ASSERT(xp != NULL);
14458 	un = SD_GET_UN(bp);
14459 	ASSERT(un != NULL);
14460 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14461 
14462 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
14463 	    NULL, KM_SLEEP);
14464 
14465 	new_bp->b_lblkno	= blkno;
14466 
14467 	/*
14468 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14469 	 * original xbuf into it.
14470 	 */
14471 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14472 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14473 
14474 	/*
14475 	 * The given bp is automatically saved in the xb_private member
14476 	 * of the new xbuf.  Callers are allowed to depend on this.
14477 	 */
14478 	new_xp->xb_private = bp;
14479 
14480 	new_bp->b_private  = new_xp;
14481 
14482 	return (new_bp);
14483 }
14484 
14485 /*
14486  *    Function: sd_shadow_buf_alloc
14487  *
14488  * Description: Allocate a buf(9S) and init it as per the given buf
14489  *		and the various arguments.  The associated sd_xbuf
14490  *		struct is (nearly) duplicated.  The struct buf *bp
14491  *		argument is saved in new_xp->xb_private.
14492  *
14493  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14494  *		datalen - size of data area for the shadow bp
14495  *		bflags - B_READ or B_WRITE (pseudo flag)
14496  *		blkno - starting LBA
14497  *		func - function pointer for b_iodone in the shadow buf. (May
14498  *			be NULL if none.)
14499  *
14500  * Return Code: Pointer to allocates buf(9S) struct
14501  *
14502  *     Context: Can sleep.
14503  */
14504 
14505 static struct buf *
14506 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
14507     daddr_t blkno, int (*func)(struct buf *))
14508 {
14509 	struct	sd_lun	*un;
14510 	struct	sd_xbuf	*xp;
14511 	struct	sd_xbuf	*new_xp;
14512 	struct	buf	*new_bp;
14513 
14514 	ASSERT(bp != NULL);
14515 	xp = SD_GET_XBUF(bp);
14516 	ASSERT(xp != NULL);
14517 	un = SD_GET_UN(bp);
14518 	ASSERT(un != NULL);
14519 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14520 
14521 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
14522 		bp_mapin(bp);
14523 	}
14524 
14525 	bflags &= (B_READ | B_WRITE);
14526 #if defined(__x86)
14527 	new_bp = getrbuf(KM_SLEEP);
14528 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
14529 	new_bp->b_bcount = datalen;
14530 	new_bp->b_flags = bflags |
14531 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
14532 #else
14533 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
14534 	    datalen, bflags, SLEEP_FUNC, NULL);
14535 #endif
14536 	new_bp->av_forw	= NULL;
14537 	new_bp->av_back	= NULL;
14538 	new_bp->b_dev	= bp->b_dev;
14539 	new_bp->b_blkno	= blkno;
14540 	new_bp->b_iodone = func;
14541 	new_bp->b_edev	= bp->b_edev;
14542 	new_bp->b_resid	= 0;
14543 
14544 	/* We need to preserve the B_FAILFAST flag */
14545 	if (bp->b_flags & B_FAILFAST) {
14546 		new_bp->b_flags |= B_FAILFAST;
14547 	}
14548 
14549 	/*
14550 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14551 	 * original xbuf into it.
14552 	 */
14553 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14554 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14555 
14556 	/* Need later to copy data between the shadow buf & original buf! */
14557 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
14558 
14559 	/*
14560 	 * The given bp is automatically saved in the xb_private member
14561 	 * of the new xbuf.  Callers are allowed to depend on this.
14562 	 */
14563 	new_xp->xb_private = bp;
14564 
14565 	new_bp->b_private  = new_xp;
14566 
14567 	return (new_bp);
14568 }
14569 
14570 /*
14571  *    Function: sd_bioclone_free
14572  *
14573  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
14574  *		in the larger than partition operation.
14575  *
14576  *     Context: May be called under interrupt context
14577  */
14578 
14579 static void
14580 sd_bioclone_free(struct buf *bp)
14581 {
14582 	struct sd_xbuf	*xp;
14583 
14584 	ASSERT(bp != NULL);
14585 	xp = SD_GET_XBUF(bp);
14586 	ASSERT(xp != NULL);
14587 
14588 	/*
14589 	 * Call bp_mapout() before freeing the buf,  in case a lower
14590 	 * layer or HBA  had done a bp_mapin().  we must do this here
14591 	 * as we are the "originator" of the shadow buf.
14592 	 */
14593 	bp_mapout(bp);
14594 
14595 	/*
14596 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14597 	 * never gets confused by a stale value in this field. (Just a little
14598 	 * extra defensiveness here.)
14599 	 */
14600 	bp->b_iodone = NULL;
14601 
14602 	freerbuf(bp);
14603 
14604 	kmem_free(xp, sizeof (struct sd_xbuf));
14605 }
14606 
14607 /*
14608  *    Function: sd_shadow_buf_free
14609  *
14610  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
14611  *
14612  *     Context: May be called under interrupt context
14613  */
14614 
14615 static void
14616 sd_shadow_buf_free(struct buf *bp)
14617 {
14618 	struct sd_xbuf	*xp;
14619 
14620 	ASSERT(bp != NULL);
14621 	xp = SD_GET_XBUF(bp);
14622 	ASSERT(xp != NULL);
14623 
14624 #if defined(__sparc)
14625 	/*
14626 	 * Call bp_mapout() before freeing the buf,  in case a lower
14627 	 * layer or HBA  had done a bp_mapin().  we must do this here
14628 	 * as we are the "originator" of the shadow buf.
14629 	 */
14630 	bp_mapout(bp);
14631 #endif
14632 
14633 	/*
14634 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14635 	 * never gets confused by a stale value in this field. (Just a little
14636 	 * extra defensiveness here.)
14637 	 */
14638 	bp->b_iodone = NULL;
14639 
14640 #if defined(__x86)
14641 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
14642 	freerbuf(bp);
14643 #else
14644 	scsi_free_consistent_buf(bp);
14645 #endif
14646 
14647 	kmem_free(xp, sizeof (struct sd_xbuf));
14648 }
14649 
14650 
14651 /*
14652  *    Function: sd_print_transport_rejected_message
14653  *
14654  * Description: This implements the ludicrously complex rules for printing
14655  *		a "transport rejected" message.  This is to address the
14656  *		specific problem of having a flood of this error message
14657  *		produced when a failover occurs.
14658  *
14659  *     Context: Any.
14660  */
14661 
14662 static void
14663 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
14664     int code)
14665 {
14666 	ASSERT(un != NULL);
14667 	ASSERT(mutex_owned(SD_MUTEX(un)));
14668 	ASSERT(xp != NULL);
14669 
14670 	/*
14671 	 * Print the "transport rejected" message under the following
14672 	 * conditions:
14673 	 *
14674 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
14675 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
14676 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
14677 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
14678 	 *   scsi_transport(9F) (which indicates that the target might have
14679 	 *   gone off-line).  This uses the un->un_tran_fatal_count
14680 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
14681 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
14682 	 *   from scsi_transport().
14683 	 *
14684 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
14685 	 * the preceeding cases in order for the message to be printed.
14686 	 */
14687 	if (((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) &&
14688 	    (SD_FM_LOG(un) == SD_FM_LOG_NSUP)) {
14689 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
14690 		    (code != TRAN_FATAL_ERROR) ||
14691 		    (un->un_tran_fatal_count == 1)) {
14692 			switch (code) {
14693 			case TRAN_BADPKT:
14694 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14695 				    "transport rejected bad packet\n");
14696 				break;
14697 			case TRAN_FATAL_ERROR:
14698 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14699 				    "transport rejected fatal error\n");
14700 				break;
14701 			default:
14702 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14703 				    "transport rejected (%d)\n", code);
14704 				break;
14705 			}
14706 		}
14707 	}
14708 }
14709 
14710 
14711 /*
14712  *    Function: sd_add_buf_to_waitq
14713  *
14714  * Description: Add the given buf(9S) struct to the wait queue for the
14715  *		instance.  If sorting is enabled, then the buf is added
14716  *		to the queue via an elevator sort algorithm (a la
14717  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
14718  *		If sorting is not enabled, then the buf is just added
14719  *		to the end of the wait queue.
14720  *
14721  * Return Code: void
14722  *
14723  *     Context: Does not sleep/block, therefore technically can be called
14724  *		from any context.  However if sorting is enabled then the
14725  *		execution time is indeterminate, and may take long if
14726  *		the wait queue grows large.
14727  */
14728 
14729 static void
14730 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14731 {
14732 	struct buf *ap;
14733 
14734 	ASSERT(bp != NULL);
14735 	ASSERT(un != NULL);
14736 	ASSERT(mutex_owned(SD_MUTEX(un)));
14737 
14738 	/* If the queue is empty, add the buf as the only entry & return. */
14739 	if (un->un_waitq_headp == NULL) {
14740 		ASSERT(un->un_waitq_tailp == NULL);
14741 		un->un_waitq_headp = un->un_waitq_tailp = bp;
14742 		bp->av_forw = NULL;
14743 		return;
14744 	}
14745 
14746 	ASSERT(un->un_waitq_tailp != NULL);
14747 
14748 	/*
14749 	 * If sorting is disabled, just add the buf to the tail end of
14750 	 * the wait queue and return.
14751 	 */
14752 	if (un->un_f_disksort_disabled || un->un_f_enable_rmw) {
14753 		un->un_waitq_tailp->av_forw = bp;
14754 		un->un_waitq_tailp = bp;
14755 		bp->av_forw = NULL;
14756 		return;
14757 	}
14758 
14759 	/*
14760 	 * Sort thru the list of requests currently on the wait queue
14761 	 * and add the new buf request at the appropriate position.
14762 	 *
14763 	 * The un->un_waitq_headp is an activity chain pointer on which
14764 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14765 	 * first queue holds those requests which are positioned after
14766 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14767 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14768 	 * Thus we implement a one way scan, retracting after reaching
14769 	 * the end of the drive to the first request on the second
14770 	 * queue, at which time it becomes the first queue.
14771 	 * A one-way scan is natural because of the way UNIX read-ahead
14772 	 * blocks are allocated.
14773 	 *
14774 	 * If we lie after the first request, then we must locate the
14775 	 * second request list and add ourselves to it.
14776 	 */
14777 	ap = un->un_waitq_headp;
14778 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14779 		while (ap->av_forw != NULL) {
14780 			/*
14781 			 * Look for an "inversion" in the (normally
14782 			 * ascending) block numbers. This indicates
14783 			 * the start of the second request list.
14784 			 */
14785 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14786 				/*
14787 				 * Search the second request list for the
14788 				 * first request at a larger block number.
14789 				 * We go before that; however if there is
14790 				 * no such request, we go at the end.
14791 				 */
14792 				do {
14793 					if (SD_GET_BLKNO(bp) <
14794 					    SD_GET_BLKNO(ap->av_forw)) {
14795 						goto insert;
14796 					}
14797 					ap = ap->av_forw;
14798 				} while (ap->av_forw != NULL);
14799 				goto insert;		/* after last */
14800 			}
14801 			ap = ap->av_forw;
14802 		}
14803 
14804 		/*
14805 		 * No inversions... we will go after the last, and
14806 		 * be the first request in the second request list.
14807 		 */
14808 		goto insert;
14809 	}
14810 
14811 	/*
14812 	 * Request is at/after the current request...
14813 	 * sort in the first request list.
14814 	 */
14815 	while (ap->av_forw != NULL) {
14816 		/*
14817 		 * We want to go after the current request (1) if
14818 		 * there is an inversion after it (i.e. it is the end
14819 		 * of the first request list), or (2) if the next
14820 		 * request is a larger block no. than our request.
14821 		 */
14822 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14823 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14824 			goto insert;
14825 		}
14826 		ap = ap->av_forw;
14827 	}
14828 
14829 	/*
14830 	 * Neither a second list nor a larger request, therefore
14831 	 * we go at the end of the first list (which is the same
14832 	 * as the end of the whole schebang).
14833 	 */
14834 insert:
14835 	bp->av_forw = ap->av_forw;
14836 	ap->av_forw = bp;
14837 
14838 	/*
14839 	 * If we inserted onto the tail end of the waitq, make sure the
14840 	 * tail pointer is updated.
14841 	 */
14842 	if (ap == un->un_waitq_tailp) {
14843 		un->un_waitq_tailp = bp;
14844 	}
14845 }
14846 
14847 
14848 /*
14849  *    Function: sd_start_cmds
14850  *
14851  * Description: Remove and transport cmds from the driver queues.
14852  *
14853  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14854  *
14855  *		immed_bp - ptr to a buf to be transported immediately. Only
14856  *		the immed_bp is transported; bufs on the waitq are not
14857  *		processed and the un_retry_bp is not checked.  If immed_bp is
14858  *		NULL, then normal queue processing is performed.
14859  *
14860  *     Context: May be called from kernel thread context, interrupt context,
14861  *		or runout callback context. This function may not block or
14862  *		call routines that block.
14863  */
14864 
14865 static void
14866 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14867 {
14868 	struct	sd_xbuf	*xp;
14869 	struct	buf	*bp;
14870 	void	(*statp)(kstat_io_t *);
14871 #if defined(__x86)	/* DMAFREE for x86 only */
14872 	void	(*saved_statp)(kstat_io_t *);
14873 #endif
14874 	int	rval;
14875 	struct sd_fm_internal *sfip = NULL;
14876 
14877 	ASSERT(un != NULL);
14878 	ASSERT(mutex_owned(SD_MUTEX(un)));
14879 	ASSERT(un->un_ncmds_in_transport >= 0);
14880 	ASSERT(un->un_throttle >= 0);
14881 
14882 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14883 
14884 	do {
14885 #if defined(__x86)	/* DMAFREE for x86 only */
14886 		saved_statp = NULL;
14887 #endif
14888 
14889 		/*
14890 		 * If we are syncing or dumping, fail the command to
14891 		 * avoid recursively calling back into scsi_transport().
14892 		 * The dump I/O itself uses a separate code path so this
14893 		 * only prevents non-dump I/O from being sent while dumping.
14894 		 * File system sync takes place before dumping begins.
14895 		 * During panic, filesystem I/O is allowed provided
14896 		 * un_in_callback is <= 1.  This is to prevent recursion
14897 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14898 		 * sd_start_cmds and so on.  See panic.c for more information
14899 		 * about the states the system can be in during panic.
14900 		 */
14901 		if ((un->un_state == SD_STATE_DUMPING) ||
14902 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14903 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14904 			    "sd_start_cmds: panicking\n");
14905 			goto exit;
14906 		}
14907 
14908 		if ((bp = immed_bp) != NULL) {
14909 			/*
14910 			 * We have a bp that must be transported immediately.
14911 			 * It's OK to transport the immed_bp here without doing
14912 			 * the throttle limit check because the immed_bp is
14913 			 * always used in a retry/recovery case. This means
14914 			 * that we know we are not at the throttle limit by
14915 			 * virtue of the fact that to get here we must have
14916 			 * already gotten a command back via sdintr(). This also
14917 			 * relies on (1) the command on un_retry_bp preventing
14918 			 * further commands from the waitq from being issued;
14919 			 * and (2) the code in sd_retry_command checking the
14920 			 * throttle limit before issuing a delayed or immediate
14921 			 * retry. This holds even if the throttle limit is
14922 			 * currently ratcheted down from its maximum value.
14923 			 */
14924 			statp = kstat_runq_enter;
14925 			if (bp == un->un_retry_bp) {
14926 				ASSERT((un->un_retry_statp == NULL) ||
14927 				    (un->un_retry_statp == kstat_waitq_enter) ||
14928 				    (un->un_retry_statp ==
14929 				    kstat_runq_back_to_waitq));
14930 				/*
14931 				 * If the waitq kstat was incremented when
14932 				 * sd_set_retry_bp() queued this bp for a retry,
14933 				 * then we must set up statp so that the waitq
14934 				 * count will get decremented correctly below.
14935 				 * Also we must clear un->un_retry_statp to
14936 				 * ensure that we do not act on a stale value
14937 				 * in this field.
14938 				 */
14939 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14940 				    (un->un_retry_statp ==
14941 				    kstat_runq_back_to_waitq)) {
14942 					statp = kstat_waitq_to_runq;
14943 				}
14944 #if defined(__x86)	/* DMAFREE for x86 only */
14945 				saved_statp = un->un_retry_statp;
14946 #endif
14947 				un->un_retry_statp = NULL;
14948 
14949 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14950 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14951 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14952 				    un, un->un_retry_bp, un->un_throttle,
14953 				    un->un_ncmds_in_transport);
14954 			} else {
14955 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14956 				    "processing priority bp:0x%p\n", bp);
14957 			}
14958 
14959 		} else if ((bp = un->un_waitq_headp) != NULL) {
14960 			/*
14961 			 * A command on the waitq is ready to go, but do not
14962 			 * send it if:
14963 			 *
14964 			 * (1) the throttle limit has been reached, or
14965 			 * (2) a retry is pending, or
14966 			 * (3) a START_STOP_UNIT callback pending, or
14967 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14968 			 *	command is pending.
14969 			 *
14970 			 * For all of these conditions, IO processing will
14971 			 * restart after the condition is cleared.
14972 			 */
14973 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14974 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14975 				    "sd_start_cmds: exiting, "
14976 				    "throttle limit reached!\n");
14977 				goto exit;
14978 			}
14979 			if (un->un_retry_bp != NULL) {
14980 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14981 				    "sd_start_cmds: exiting, retry pending!\n");
14982 				goto exit;
14983 			}
14984 			if (un->un_startstop_timeid != NULL) {
14985 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14986 				    "sd_start_cmds: exiting, "
14987 				    "START_STOP pending!\n");
14988 				goto exit;
14989 			}
14990 			if (un->un_direct_priority_timeid != NULL) {
14991 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14992 				    "sd_start_cmds: exiting, "
14993 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14994 				goto exit;
14995 			}
14996 
14997 			/* Dequeue the command */
14998 			un->un_waitq_headp = bp->av_forw;
14999 			if (un->un_waitq_headp == NULL) {
15000 				un->un_waitq_tailp = NULL;
15001 			}
15002 			bp->av_forw = NULL;
15003 			statp = kstat_waitq_to_runq;
15004 			SD_TRACE(SD_LOG_IO_CORE, un,
15005 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
15006 
15007 		} else {
15008 			/* No work to do so bail out now */
15009 			SD_TRACE(SD_LOG_IO_CORE, un,
15010 			    "sd_start_cmds: no more work, exiting!\n");
15011 			goto exit;
15012 		}
15013 
15014 		/*
15015 		 * Reset the state to normal. This is the mechanism by which
15016 		 * the state transitions from either SD_STATE_RWAIT or
15017 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
15018 		 * If state is SD_STATE_PM_CHANGING then this command is
15019 		 * part of the device power control and the state must
15020 		 * not be put back to normal. Doing so would would
15021 		 * allow new commands to proceed when they shouldn't,
15022 		 * the device may be going off.
15023 		 */
15024 		if ((un->un_state != SD_STATE_SUSPENDED) &&
15025 		    (un->un_state != SD_STATE_PM_CHANGING)) {
15026 			New_state(un, SD_STATE_NORMAL);
15027 		}
15028 
15029 		xp = SD_GET_XBUF(bp);
15030 		ASSERT(xp != NULL);
15031 
15032 #if defined(__x86)	/* DMAFREE for x86 only */
15033 		/*
15034 		 * Allocate the scsi_pkt if we need one, or attach DMA
15035 		 * resources if we have a scsi_pkt that needs them. The
15036 		 * latter should only occur for commands that are being
15037 		 * retried.
15038 		 */
15039 		if ((xp->xb_pktp == NULL) ||
15040 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
15041 #else
15042 		if (xp->xb_pktp == NULL) {
15043 #endif
15044 			/*
15045 			 * There is no scsi_pkt allocated for this buf. Call
15046 			 * the initpkt function to allocate & init one.
15047 			 *
15048 			 * The scsi_init_pkt runout callback functionality is
15049 			 * implemented as follows:
15050 			 *
15051 			 * 1) The initpkt function always calls
15052 			 *    scsi_init_pkt(9F) with sdrunout specified as the
15053 			 *    callback routine.
15054 			 * 2) A successful packet allocation is initialized and
15055 			 *    the I/O is transported.
15056 			 * 3) The I/O associated with an allocation resource
15057 			 *    failure is left on its queue to be retried via
15058 			 *    runout or the next I/O.
15059 			 * 4) The I/O associated with a DMA error is removed
15060 			 *    from the queue and failed with EIO. Processing of
15061 			 *    the transport queues is also halted to be
15062 			 *    restarted via runout or the next I/O.
15063 			 * 5) The I/O associated with a CDB size or packet
15064 			 *    size error is removed from the queue and failed
15065 			 *    with EIO. Processing of the transport queues is
15066 			 *    continued.
15067 			 *
15068 			 * Note: there is no interface for canceling a runout
15069 			 * callback. To prevent the driver from detaching or
15070 			 * suspending while a runout is pending the driver
15071 			 * state is set to SD_STATE_RWAIT
15072 			 *
15073 			 * Note: using the scsi_init_pkt callback facility can
15074 			 * result in an I/O request persisting at the head of
15075 			 * the list which cannot be satisfied even after
15076 			 * multiple retries. In the future the driver may
15077 			 * implement some kind of maximum runout count before
15078 			 * failing an I/O.
15079 			 *
15080 			 * Note: the use of funcp below may seem superfluous,
15081 			 * but it helps warlock figure out the correct
15082 			 * initpkt function calls (see [s]sd.wlcmd).
15083 			 */
15084 			struct scsi_pkt	*pktp;
15085 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
15086 
15087 			ASSERT(bp != un->un_rqs_bp);
15088 
15089 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
15090 			switch ((*funcp)(bp, &pktp)) {
15091 			case  SD_PKT_ALLOC_SUCCESS:
15092 				xp->xb_pktp = pktp;
15093 				SD_TRACE(SD_LOG_IO_CORE, un,
15094 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
15095 				    pktp);
15096 				goto got_pkt;
15097 
15098 			case SD_PKT_ALLOC_FAILURE:
15099 				/*
15100 				 * Temporary (hopefully) resource depletion.
15101 				 * Since retries and RQS commands always have a
15102 				 * scsi_pkt allocated, these cases should never
15103 				 * get here. So the only cases this needs to
15104 				 * handle is a bp from the waitq (which we put
15105 				 * back onto the waitq for sdrunout), or a bp
15106 				 * sent as an immed_bp (which we just fail).
15107 				 */
15108 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15109 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
15110 
15111 #if defined(__x86)	/* DMAFREE for x86 only */
15112 
15113 				if (bp == immed_bp) {
15114 					/*
15115 					 * If SD_XB_DMA_FREED is clear, then
15116 					 * this is a failure to allocate a
15117 					 * scsi_pkt, and we must fail the
15118 					 * command.
15119 					 */
15120 					if ((xp->xb_pkt_flags &
15121 					    SD_XB_DMA_FREED) == 0) {
15122 						break;
15123 					}
15124 
15125 					/*
15126 					 * If this immediate command is NOT our
15127 					 * un_retry_bp, then we must fail it.
15128 					 */
15129 					if (bp != un->un_retry_bp) {
15130 						break;
15131 					}
15132 
15133 					/*
15134 					 * We get here if this cmd is our
15135 					 * un_retry_bp that was DMAFREED, but
15136 					 * scsi_init_pkt() failed to reallocate
15137 					 * DMA resources when we attempted to
15138 					 * retry it. This can happen when an
15139 					 * mpxio failover is in progress, but
15140 					 * we don't want to just fail the
15141 					 * command in this case.
15142 					 *
15143 					 * Use timeout(9F) to restart it after
15144 					 * a 100ms delay.  We don't want to
15145 					 * let sdrunout() restart it, because
15146 					 * sdrunout() is just supposed to start
15147 					 * commands that are sitting on the
15148 					 * wait queue.  The un_retry_bp stays
15149 					 * set until the command completes, but
15150 					 * sdrunout can be called many times
15151 					 * before that happens.  Since sdrunout
15152 					 * cannot tell if the un_retry_bp is
15153 					 * already in the transport, it could
15154 					 * end up calling scsi_transport() for
15155 					 * the un_retry_bp multiple times.
15156 					 *
15157 					 * Also: don't schedule the callback
15158 					 * if some other callback is already
15159 					 * pending.
15160 					 */
15161 					if (un->un_retry_statp == NULL) {
15162 						/*
15163 						 * restore the kstat pointer to
15164 						 * keep kstat counts coherent
15165 						 * when we do retry the command.
15166 						 */
15167 						un->un_retry_statp =
15168 						    saved_statp;
15169 					}
15170 
15171 					if ((un->un_startstop_timeid == NULL) &&
15172 					    (un->un_retry_timeid == NULL) &&
15173 					    (un->un_direct_priority_timeid ==
15174 					    NULL)) {
15175 
15176 						un->un_retry_timeid =
15177 						    timeout(
15178 						    sd_start_retry_command,
15179 						    un, SD_RESTART_TIMEOUT);
15180 					}
15181 					goto exit;
15182 				}
15183 
15184 #else
15185 				if (bp == immed_bp) {
15186 					break;	/* Just fail the command */
15187 				}
15188 #endif
15189 
15190 				/* Add the buf back to the head of the waitq */
15191 				bp->av_forw = un->un_waitq_headp;
15192 				un->un_waitq_headp = bp;
15193 				if (un->un_waitq_tailp == NULL) {
15194 					un->un_waitq_tailp = bp;
15195 				}
15196 				goto exit;
15197 
15198 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
15199 				/*
15200 				 * HBA DMA resource failure. Fail the command
15201 				 * and continue processing of the queues.
15202 				 */
15203 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15204 				    "sd_start_cmds: "
15205 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
15206 				break;
15207 
15208 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
15209 				/*
15210 				 * Note:x86: Partial DMA mapping not supported
15211 				 * for USCSI commands, and all the needed DMA
15212 				 * resources were not allocated.
15213 				 */
15214 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15215 				    "sd_start_cmds: "
15216 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
15217 				break;
15218 
15219 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
15220 				/*
15221 				 * Note:x86: Request cannot fit into CDB based
15222 				 * on lba and len.
15223 				 */
15224 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15225 				    "sd_start_cmds: "
15226 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
15227 				break;
15228 
15229 			default:
15230 				/* Should NEVER get here! */
15231 				panic("scsi_initpkt error");
15232 				/*NOTREACHED*/
15233 			}
15234 
15235 			/*
15236 			 * Fatal error in allocating a scsi_pkt for this buf.
15237 			 * Update kstats & return the buf with an error code.
15238 			 * We must use sd_return_failed_command_no_restart() to
15239 			 * avoid a recursive call back into sd_start_cmds().
15240 			 * However this also means that we must keep processing
15241 			 * the waitq here in order to avoid stalling.
15242 			 */
15243 			if (statp == kstat_waitq_to_runq) {
15244 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
15245 			}
15246 			sd_return_failed_command_no_restart(un, bp, EIO);
15247 			if (bp == immed_bp) {
15248 				/* immed_bp is gone by now, so clear this */
15249 				immed_bp = NULL;
15250 			}
15251 			continue;
15252 		}
15253 got_pkt:
15254 		if (bp == immed_bp) {
15255 			/* goto the head of the class.... */
15256 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15257 		}
15258 
15259 		un->un_ncmds_in_transport++;
15260 		SD_UPDATE_KSTATS(un, statp, bp);
15261 
15262 		/*
15263 		 * Call scsi_transport() to send the command to the target.
15264 		 * According to SCSA architecture, we must drop the mutex here
15265 		 * before calling scsi_transport() in order to avoid deadlock.
15266 		 * Note that the scsi_pkt's completion routine can be executed
15267 		 * (from interrupt context) even before the call to
15268 		 * scsi_transport() returns.
15269 		 */
15270 		SD_TRACE(SD_LOG_IO_CORE, un,
15271 		    "sd_start_cmds: calling scsi_transport()\n");
15272 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
15273 
15274 		mutex_exit(SD_MUTEX(un));
15275 		rval = scsi_transport(xp->xb_pktp);
15276 		mutex_enter(SD_MUTEX(un));
15277 
15278 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15279 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
15280 
15281 		switch (rval) {
15282 		case TRAN_ACCEPT:
15283 			/* Clear this with every pkt accepted by the HBA */
15284 			un->un_tran_fatal_count = 0;
15285 			break;	/* Success; try the next cmd (if any) */
15286 
15287 		case TRAN_BUSY:
15288 			un->un_ncmds_in_transport--;
15289 			ASSERT(un->un_ncmds_in_transport >= 0);
15290 
15291 			/*
15292 			 * Don't retry request sense, the sense data
15293 			 * is lost when another request is sent.
15294 			 * Free up the rqs buf and retry
15295 			 * the original failed cmd.  Update kstat.
15296 			 */
15297 			if (bp == un->un_rqs_bp) {
15298 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15299 				bp = sd_mark_rqs_idle(un, xp);
15300 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15301 				    NULL, NULL, EIO, un->un_busy_timeout / 500,
15302 				    kstat_waitq_enter);
15303 				goto exit;
15304 			}
15305 
15306 #if defined(__x86)	/* DMAFREE for x86 only */
15307 			/*
15308 			 * Free the DMA resources for the  scsi_pkt. This will
15309 			 * allow mpxio to select another path the next time
15310 			 * we call scsi_transport() with this scsi_pkt.
15311 			 * See sdintr() for the rationalization behind this.
15312 			 */
15313 			if ((un->un_f_is_fibre == TRUE) &&
15314 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
15315 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
15316 				scsi_dmafree(xp->xb_pktp);
15317 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
15318 			}
15319 #endif
15320 
15321 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
15322 				/*
15323 				 * Commands that are SD_PATH_DIRECT_PRIORITY
15324 				 * are for error recovery situations. These do
15325 				 * not use the normal command waitq, so if they
15326 				 * get a TRAN_BUSY we cannot put them back onto
15327 				 * the waitq for later retry. One possible
15328 				 * problem is that there could already be some
15329 				 * other command on un_retry_bp that is waiting
15330 				 * for this one to complete, so we would be
15331 				 * deadlocked if we put this command back onto
15332 				 * the waitq for later retry (since un_retry_bp
15333 				 * must complete before the driver gets back to
15334 				 * commands on the waitq).
15335 				 *
15336 				 * To avoid deadlock we must schedule a callback
15337 				 * that will restart this command after a set
15338 				 * interval.  This should keep retrying for as
15339 				 * long as the underlying transport keeps
15340 				 * returning TRAN_BUSY (just like for other
15341 				 * commands).  Use the same timeout interval as
15342 				 * for the ordinary TRAN_BUSY retry.
15343 				 */
15344 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15345 				    "sd_start_cmds: scsi_transport() returned "
15346 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
15347 
15348 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15349 				un->un_direct_priority_timeid =
15350 				    timeout(sd_start_direct_priority_command,
15351 				    bp, un->un_busy_timeout / 500);
15352 
15353 				goto exit;
15354 			}
15355 
15356 			/*
15357 			 * For TRAN_BUSY, we want to reduce the throttle value,
15358 			 * unless we are retrying a command.
15359 			 */
15360 			if (bp != un->un_retry_bp) {
15361 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
15362 			}
15363 
15364 			/*
15365 			 * Set up the bp to be tried again 10 ms later.
15366 			 * Note:x86: Is there a timeout value in the sd_lun
15367 			 * for this condition?
15368 			 */
15369 			sd_set_retry_bp(un, bp, un->un_busy_timeout / 500,
15370 			    kstat_runq_back_to_waitq);
15371 			goto exit;
15372 
15373 		case TRAN_FATAL_ERROR:
15374 			un->un_tran_fatal_count++;
15375 			/* FALLTHRU */
15376 
15377 		case TRAN_BADPKT:
15378 		default:
15379 			un->un_ncmds_in_transport--;
15380 			ASSERT(un->un_ncmds_in_transport >= 0);
15381 
15382 			/*
15383 			 * If this is our REQUEST SENSE command with a
15384 			 * transport error, we must get back the pointers
15385 			 * to the original buf, and mark the REQUEST
15386 			 * SENSE command as "available".
15387 			 */
15388 			if (bp == un->un_rqs_bp) {
15389 				bp = sd_mark_rqs_idle(un, xp);
15390 				xp = SD_GET_XBUF(bp);
15391 			} else {
15392 				/*
15393 				 * Legacy behavior: do not update transport
15394 				 * error count for request sense commands.
15395 				 */
15396 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
15397 			}
15398 
15399 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15400 			sd_print_transport_rejected_message(un, xp, rval);
15401 
15402 			/*
15403 			 * This command will be terminated by SD driver due
15404 			 * to a fatal transport error. We should post
15405 			 * ereport.io.scsi.cmd.disk.tran with driver-assessment
15406 			 * of "fail" for any command to indicate this
15407 			 * situation.
15408 			 */
15409 			if (xp->xb_ena > 0) {
15410 				ASSERT(un->un_fm_private != NULL);
15411 				sfip = un->un_fm_private;
15412 				sfip->fm_ssc.ssc_flags |= SSC_FLAGS_TRAN_ABORT;
15413 				sd_ssc_extract_info(&sfip->fm_ssc, un,
15414 				    xp->xb_pktp, bp, xp);
15415 				sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
15416 			}
15417 
15418 			/*
15419 			 * We must use sd_return_failed_command_no_restart() to
15420 			 * avoid a recursive call back into sd_start_cmds().
15421 			 * However this also means that we must keep processing
15422 			 * the waitq here in order to avoid stalling.
15423 			 */
15424 			sd_return_failed_command_no_restart(un, bp, EIO);
15425 
15426 			/*
15427 			 * Notify any threads waiting in sd_ddi_suspend() that
15428 			 * a command completion has occurred.
15429 			 */
15430 			if (un->un_state == SD_STATE_SUSPENDED) {
15431 				cv_broadcast(&un->un_disk_busy_cv);
15432 			}
15433 
15434 			if (bp == immed_bp) {
15435 				/* immed_bp is gone by now, so clear this */
15436 				immed_bp = NULL;
15437 			}
15438 			break;
15439 		}
15440 
15441 	} while (immed_bp == NULL);
15442 
15443 exit:
15444 	ASSERT(mutex_owned(SD_MUTEX(un)));
15445 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
15446 }
15447 
15448 
15449 /*
15450  *    Function: sd_return_command
15451  *
15452  * Description: Returns a command to its originator (with or without an
15453  *		error).  Also starts commands waiting to be transported
15454  *		to the target.
15455  *
15456  *     Context: May be called from interrupt, kernel, or timeout context
15457  */
15458 
15459 static void
15460 sd_return_command(struct sd_lun *un, struct buf *bp)
15461 {
15462 	struct sd_xbuf *xp;
15463 	struct scsi_pkt *pktp;
15464 	struct sd_fm_internal *sfip;
15465 
15466 	ASSERT(bp != NULL);
15467 	ASSERT(un != NULL);
15468 	ASSERT(mutex_owned(SD_MUTEX(un)));
15469 	ASSERT(bp != un->un_rqs_bp);
15470 	xp = SD_GET_XBUF(bp);
15471 	ASSERT(xp != NULL);
15472 
15473 	pktp = SD_GET_PKTP(bp);
15474 	sfip = (struct sd_fm_internal *)un->un_fm_private;
15475 	ASSERT(sfip != NULL);
15476 
15477 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
15478 
15479 	/*
15480 	 * Note: check for the "sdrestart failed" case.
15481 	 */
15482 	if ((un->un_partial_dma_supported == 1) &&
15483 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
15484 	    (geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
15485 	    (xp->xb_pktp->pkt_resid == 0)) {
15486 
15487 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
15488 			/*
15489 			 * Successfully set up next portion of cmd
15490 			 * transfer, try sending it
15491 			 */
15492 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15493 			    NULL, NULL, 0, (clock_t)0, NULL);
15494 			sd_start_cmds(un, NULL);
15495 			return;	/* Note:x86: need a return here? */
15496 		}
15497 	}
15498 
15499 	/*
15500 	 * If this is the failfast bp, clear it from un_failfast_bp. This
15501 	 * can happen if upon being re-tried the failfast bp either
15502 	 * succeeded or encountered another error (possibly even a different
15503 	 * error than the one that precipitated the failfast state, but in
15504 	 * that case it would have had to exhaust retries as well). Regardless,
15505 	 * this should not occur whenever the instance is in the active
15506 	 * failfast state.
15507 	 */
15508 	if (bp == un->un_failfast_bp) {
15509 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15510 		un->un_failfast_bp = NULL;
15511 	}
15512 
15513 	/*
15514 	 * Clear the failfast state upon successful completion of ANY cmd.
15515 	 */
15516 	if (bp->b_error == 0) {
15517 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15518 		/*
15519 		 * If this is a successful command, but used to be retried,
15520 		 * we will take it as a recovered command and post an
15521 		 * ereport with driver-assessment of "recovered".
15522 		 */
15523 		if (xp->xb_ena > 0) {
15524 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15525 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RECOVERY);
15526 		}
15527 	} else {
15528 		/*
15529 		 * If this is a failed non-USCSI command we will post an
15530 		 * ereport with driver-assessment set accordingly("fail" or
15531 		 * "fatal").
15532 		 */
15533 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
15534 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15535 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
15536 		}
15537 	}
15538 
15539 	/*
15540 	 * This is used if the command was retried one or more times. Show that
15541 	 * we are done with it, and allow processing of the waitq to resume.
15542 	 */
15543 	if (bp == un->un_retry_bp) {
15544 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15545 		    "sd_return_command: un:0x%p: "
15546 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15547 		un->un_retry_bp = NULL;
15548 		un->un_retry_statp = NULL;
15549 	}
15550 
15551 	SD_UPDATE_RDWR_STATS(un, bp);
15552 	SD_UPDATE_PARTITION_STATS(un, bp);
15553 
15554 	switch (un->un_state) {
15555 	case SD_STATE_SUSPENDED:
15556 		/*
15557 		 * Notify any threads waiting in sd_ddi_suspend() that
15558 		 * a command completion has occurred.
15559 		 */
15560 		cv_broadcast(&un->un_disk_busy_cv);
15561 		break;
15562 	default:
15563 		sd_start_cmds(un, NULL);
15564 		break;
15565 	}
15566 
15567 	/* Return this command up the iodone chain to its originator. */
15568 	mutex_exit(SD_MUTEX(un));
15569 
15570 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15571 	xp->xb_pktp = NULL;
15572 
15573 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15574 
15575 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15576 	mutex_enter(SD_MUTEX(un));
15577 
15578 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
15579 }
15580 
15581 
15582 /*
15583  *    Function: sd_return_failed_command
15584  *
15585  * Description: Command completion when an error occurred.
15586  *
15587  *     Context: May be called from interrupt context
15588  */
15589 
15590 static void
15591 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
15592 {
15593 	ASSERT(bp != NULL);
15594 	ASSERT(un != NULL);
15595 	ASSERT(mutex_owned(SD_MUTEX(un)));
15596 
15597 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15598 	    "sd_return_failed_command: entry\n");
15599 
15600 	/*
15601 	 * b_resid could already be nonzero due to a partial data
15602 	 * transfer, so do not change it here.
15603 	 */
15604 	SD_BIOERROR(bp, errcode);
15605 
15606 	sd_return_command(un, bp);
15607 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15608 	    "sd_return_failed_command: exit\n");
15609 }
15610 
15611 
15612 /*
15613  *    Function: sd_return_failed_command_no_restart
15614  *
15615  * Description: Same as sd_return_failed_command, but ensures that no
15616  *		call back into sd_start_cmds will be issued.
15617  *
15618  *     Context: May be called from interrupt context
15619  */
15620 
15621 static void
15622 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
15623     int errcode)
15624 {
15625 	struct sd_xbuf *xp;
15626 
15627 	ASSERT(bp != NULL);
15628 	ASSERT(un != NULL);
15629 	ASSERT(mutex_owned(SD_MUTEX(un)));
15630 	xp = SD_GET_XBUF(bp);
15631 	ASSERT(xp != NULL);
15632 	ASSERT(errcode != 0);
15633 
15634 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15635 	    "sd_return_failed_command_no_restart: entry\n");
15636 
15637 	/*
15638 	 * b_resid could already be nonzero due to a partial data
15639 	 * transfer, so do not change it here.
15640 	 */
15641 	SD_BIOERROR(bp, errcode);
15642 
15643 	/*
15644 	 * If this is the failfast bp, clear it. This can happen if the
15645 	 * failfast bp encounterd a fatal error when we attempted to
15646 	 * re-try it (such as a scsi_transport(9F) failure).  However
15647 	 * we should NOT be in an active failfast state if the failfast
15648 	 * bp is not NULL.
15649 	 */
15650 	if (bp == un->un_failfast_bp) {
15651 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15652 		un->un_failfast_bp = NULL;
15653 	}
15654 
15655 	if (bp == un->un_retry_bp) {
15656 		/*
15657 		 * This command was retried one or more times. Show that we are
15658 		 * done with it, and allow processing of the waitq to resume.
15659 		 */
15660 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15661 		    "sd_return_failed_command_no_restart: "
15662 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15663 		un->un_retry_bp = NULL;
15664 		un->un_retry_statp = NULL;
15665 	}
15666 
15667 	SD_UPDATE_RDWR_STATS(un, bp);
15668 	SD_UPDATE_PARTITION_STATS(un, bp);
15669 
15670 	mutex_exit(SD_MUTEX(un));
15671 
15672 	if (xp->xb_pktp != NULL) {
15673 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15674 		xp->xb_pktp = NULL;
15675 	}
15676 
15677 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15678 
15679 	mutex_enter(SD_MUTEX(un));
15680 
15681 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15682 	    "sd_return_failed_command_no_restart: exit\n");
15683 }
15684 
15685 
15686 /*
15687  *    Function: sd_retry_command
15688  *
15689  * Description: queue up a command for retry, or (optionally) fail it
15690  *		if retry counts are exhausted.
15691  *
15692  *   Arguments: un - Pointer to the sd_lun struct for the target.
15693  *
15694  *		bp - Pointer to the buf for the command to be retried.
15695  *
15696  *		retry_check_flag - Flag to see which (if any) of the retry
15697  *		   counts should be decremented/checked. If the indicated
15698  *		   retry count is exhausted, then the command will not be
15699  *		   retried; it will be failed instead. This should use a
15700  *		   value equal to one of the following:
15701  *
15702  *			SD_RETRIES_NOCHECK
15703  *			SD_RESD_RETRIES_STANDARD
15704  *			SD_RETRIES_VICTIM
15705  *
15706  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
15707  *		   if the check should be made to see of FLAG_ISOLATE is set
15708  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
15709  *		   not retried, it is simply failed.
15710  *
15711  *		user_funcp - Ptr to function to call before dispatching the
15712  *		   command. May be NULL if no action needs to be performed.
15713  *		   (Primarily intended for printing messages.)
15714  *
15715  *		user_arg - Optional argument to be passed along to
15716  *		   the user_funcp call.
15717  *
15718  *		failure_code - errno return code to set in the bp if the
15719  *		   command is going to be failed.
15720  *
15721  *		retry_delay - Retry delay interval in (clock_t) units. May
15722  *		   be zero which indicates that the retry should be retried
15723  *		   immediately (ie, without an intervening delay).
15724  *
15725  *		statp - Ptr to kstat function to be updated if the command
15726  *		   is queued for a delayed retry. May be NULL if no kstat
15727  *		   update is desired.
15728  *
15729  *     Context: May be called from interrupt context.
15730  */
15731 
15732 static void
15733 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15734     void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int code),
15735     void *user_arg, int failure_code, clock_t retry_delay,
15736     void (*statp)(kstat_io_t *))
15737 {
15738 	struct sd_xbuf	*xp;
15739 	struct scsi_pkt	*pktp;
15740 	struct sd_fm_internal *sfip;
15741 
15742 	ASSERT(un != NULL);
15743 	ASSERT(mutex_owned(SD_MUTEX(un)));
15744 	ASSERT(bp != NULL);
15745 	xp = SD_GET_XBUF(bp);
15746 	ASSERT(xp != NULL);
15747 	pktp = SD_GET_PKTP(bp);
15748 	ASSERT(pktp != NULL);
15749 
15750 	sfip = (struct sd_fm_internal *)un->un_fm_private;
15751 	ASSERT(sfip != NULL);
15752 
15753 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15754 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15755 
15756 	/*
15757 	 * If we are syncing or dumping, fail the command to avoid
15758 	 * recursively calling back into scsi_transport().
15759 	 */
15760 	if (ddi_in_panic()) {
15761 		goto fail_command_no_log;
15762 	}
15763 
15764 	/*
15765 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15766 	 * log an error and fail the command.
15767 	 */
15768 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15769 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15770 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15771 		sd_dump_memory(un, SD_LOG_IO, "CDB",
15772 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15773 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15774 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15775 		goto fail_command;
15776 	}
15777 
15778 	/*
15779 	 * If we are suspended, then put the command onto head of the
15780 	 * wait queue since we don't want to start more commands, and
15781 	 * clear the un_retry_bp. Next time when we are resumed, will
15782 	 * handle the command in the wait queue.
15783 	 */
15784 	switch (un->un_state) {
15785 	case SD_STATE_SUSPENDED:
15786 	case SD_STATE_DUMPING:
15787 		bp->av_forw = un->un_waitq_headp;
15788 		un->un_waitq_headp = bp;
15789 		if (un->un_waitq_tailp == NULL) {
15790 			un->un_waitq_tailp = bp;
15791 		}
15792 		if (bp == un->un_retry_bp) {
15793 			un->un_retry_bp = NULL;
15794 			un->un_retry_statp = NULL;
15795 		}
15796 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15797 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15798 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15799 		return;
15800 	default:
15801 		break;
15802 	}
15803 
15804 	/*
15805 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15806 	 * is set; if it is then we do not want to retry the command.
15807 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15808 	 */
15809 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15810 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15811 			goto fail_command;
15812 		}
15813 	}
15814 
15815 
15816 	/*
15817 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15818 	 * command timeout or a selection timeout has occurred. This means
15819 	 * that we were unable to establish an kind of communication with
15820 	 * the target, and subsequent retries and/or commands are likely
15821 	 * to encounter similar results and take a long time to complete.
15822 	 *
15823 	 * If this is a failfast error condition, we need to update the
15824 	 * failfast state, even if this bp does not have B_FAILFAST set.
15825 	 */
15826 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15827 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15828 			ASSERT(un->un_failfast_bp == NULL);
15829 			/*
15830 			 * If we are already in the active failfast state, and
15831 			 * another failfast error condition has been detected,
15832 			 * then fail this command if it has B_FAILFAST set.
15833 			 * If B_FAILFAST is clear, then maintain the legacy
15834 			 * behavior of retrying heroically, even tho this will
15835 			 * take a lot more time to fail the command.
15836 			 */
15837 			if (bp->b_flags & B_FAILFAST) {
15838 				goto fail_command;
15839 			}
15840 		} else {
15841 			/*
15842 			 * We're not in the active failfast state, but we
15843 			 * have a failfast error condition, so we must begin
15844 			 * transition to the next state. We do this regardless
15845 			 * of whether or not this bp has B_FAILFAST set.
15846 			 */
15847 			if (un->un_failfast_bp == NULL) {
15848 				/*
15849 				 * This is the first bp to meet a failfast
15850 				 * condition so save it on un_failfast_bp &
15851 				 * do normal retry processing. Do not enter
15852 				 * active failfast state yet. This marks
15853 				 * entry into the "failfast pending" state.
15854 				 */
15855 				un->un_failfast_bp = bp;
15856 
15857 			} else if (un->un_failfast_bp == bp) {
15858 				/*
15859 				 * This is the second time *this* bp has
15860 				 * encountered a failfast error condition,
15861 				 * so enter active failfast state & flush
15862 				 * queues as appropriate.
15863 				 */
15864 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15865 				un->un_failfast_bp = NULL;
15866 				sd_failfast_flushq(un);
15867 
15868 				/*
15869 				 * Fail this bp now if B_FAILFAST set;
15870 				 * otherwise continue with retries. (It would
15871 				 * be pretty ironic if this bp succeeded on a
15872 				 * subsequent retry after we just flushed all
15873 				 * the queues).
15874 				 */
15875 				if (bp->b_flags & B_FAILFAST) {
15876 					goto fail_command;
15877 				}
15878 
15879 #if !defined(lint) && !defined(__lint)
15880 			} else {
15881 				/*
15882 				 * If neither of the preceeding conditionals
15883 				 * was true, it means that there is some
15884 				 * *other* bp that has met an inital failfast
15885 				 * condition and is currently either being
15886 				 * retried or is waiting to be retried. In
15887 				 * that case we should perform normal retry
15888 				 * processing on *this* bp, since there is a
15889 				 * chance that the current failfast condition
15890 				 * is transient and recoverable. If that does
15891 				 * not turn out to be the case, then retries
15892 				 * will be cleared when the wait queue is
15893 				 * flushed anyway.
15894 				 */
15895 #endif
15896 			}
15897 		}
15898 	} else {
15899 		/*
15900 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15901 		 * likely were able to at least establish some level of
15902 		 * communication with the target and subsequent commands
15903 		 * and/or retries are likely to get through to the target,
15904 		 * In this case we want to be aggressive about clearing
15905 		 * the failfast state. Note that this does not affect
15906 		 * the "failfast pending" condition.
15907 		 */
15908 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15909 	}
15910 
15911 
15912 	/*
15913 	 * Check the specified retry count to see if we can still do
15914 	 * any retries with this pkt before we should fail it.
15915 	 */
15916 	switch (retry_check_flag & SD_RETRIES_MASK) {
15917 	case SD_RETRIES_VICTIM:
15918 		/*
15919 		 * Check the victim retry count. If exhausted, then fall
15920 		 * thru & check against the standard retry count.
15921 		 */
15922 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15923 			/* Increment count & proceed with the retry */
15924 			xp->xb_victim_retry_count++;
15925 			break;
15926 		}
15927 		/* Victim retries exhausted, fall back to std. retries... */
15928 		/* FALLTHRU */
15929 
15930 	case SD_RETRIES_STANDARD:
15931 		if (xp->xb_retry_count >= un->un_retry_count) {
15932 			/* Retries exhausted, fail the command */
15933 			SD_TRACE(SD_LOG_IO_CORE, un,
15934 			    "sd_retry_command: retries exhausted!\n");
15935 			/*
15936 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15937 			 * commands with nonzero pkt_resid.
15938 			 */
15939 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15940 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15941 			    (pktp->pkt_resid != 0)) {
15942 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15943 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15944 					SD_UPDATE_B_RESID(bp, pktp);
15945 				}
15946 			}
15947 			goto fail_command;
15948 		}
15949 		xp->xb_retry_count++;
15950 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15951 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15952 		break;
15953 
15954 	case SD_RETRIES_UA:
15955 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15956 			/* Retries exhausted, fail the command */
15957 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15958 			    "Unit Attention retries exhausted. "
15959 			    "Check the target.\n");
15960 			goto fail_command;
15961 		}
15962 		xp->xb_ua_retry_count++;
15963 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15964 		    "sd_retry_command: retry count:%d\n",
15965 		    xp->xb_ua_retry_count);
15966 		break;
15967 
15968 	case SD_RETRIES_BUSY:
15969 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15970 			/* Retries exhausted, fail the command */
15971 			SD_TRACE(SD_LOG_IO_CORE, un,
15972 			    "sd_retry_command: retries exhausted!\n");
15973 			goto fail_command;
15974 		}
15975 		xp->xb_retry_count++;
15976 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15977 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15978 		break;
15979 
15980 	case SD_RETRIES_NOCHECK:
15981 	default:
15982 		/* No retry count to check. Just proceed with the retry */
15983 		break;
15984 	}
15985 
15986 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15987 
15988 	/*
15989 	 * If this is a non-USCSI command being retried
15990 	 * during execution last time, we should post an ereport with
15991 	 * driver-assessment of the value "retry".
15992 	 * For partial DMA, request sense and STATUS_QFULL, there are no
15993 	 * hardware errors, we bypass ereport posting.
15994 	 */
15995 	if (failure_code != 0) {
15996 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
15997 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15998 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RETRY);
15999 		}
16000 	}
16001 
16002 	/*
16003 	 * If we were given a zero timeout, we must attempt to retry the
16004 	 * command immediately (ie, without a delay).
16005 	 */
16006 	if (retry_delay == 0) {
16007 		/*
16008 		 * Check some limiting conditions to see if we can actually
16009 		 * do the immediate retry.  If we cannot, then we must
16010 		 * fall back to queueing up a delayed retry.
16011 		 */
16012 		if (un->un_ncmds_in_transport >= un->un_throttle) {
16013 			/*
16014 			 * We are at the throttle limit for the target,
16015 			 * fall back to delayed retry.
16016 			 */
16017 			retry_delay = un->un_busy_timeout;
16018 			statp = kstat_waitq_enter;
16019 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16020 			    "sd_retry_command: immed. retry hit "
16021 			    "throttle!\n");
16022 		} else {
16023 			/*
16024 			 * We're clear to proceed with the immediate retry.
16025 			 * First call the user-provided function (if any)
16026 			 */
16027 			if (user_funcp != NULL) {
16028 				(*user_funcp)(un, bp, user_arg,
16029 				    SD_IMMEDIATE_RETRY_ISSUED);
16030 #ifdef __lock_lint
16031 				sd_print_incomplete_msg(un, bp, user_arg,
16032 				    SD_IMMEDIATE_RETRY_ISSUED);
16033 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
16034 				    SD_IMMEDIATE_RETRY_ISSUED);
16035 				sd_print_sense_failed_msg(un, bp, user_arg,
16036 				    SD_IMMEDIATE_RETRY_ISSUED);
16037 #endif
16038 			}
16039 
16040 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16041 			    "sd_retry_command: issuing immediate retry\n");
16042 
16043 			/*
16044 			 * Call sd_start_cmds() to transport the command to
16045 			 * the target.
16046 			 */
16047 			sd_start_cmds(un, bp);
16048 
16049 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16050 			    "sd_retry_command exit\n");
16051 			return;
16052 		}
16053 	}
16054 
16055 	/*
16056 	 * Set up to retry the command after a delay.
16057 	 * First call the user-provided function (if any)
16058 	 */
16059 	if (user_funcp != NULL) {
16060 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
16061 	}
16062 
16063 	sd_set_retry_bp(un, bp, retry_delay, statp);
16064 
16065 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
16066 	return;
16067 
16068 fail_command:
16069 
16070 	if (user_funcp != NULL) {
16071 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
16072 	}
16073 
16074 fail_command_no_log:
16075 
16076 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16077 	    "sd_retry_command: returning failed command\n");
16078 
16079 	sd_return_failed_command(un, bp, failure_code);
16080 
16081 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
16082 }
16083 
16084 
16085 /*
16086  *    Function: sd_set_retry_bp
16087  *
16088  * Description: Set up the given bp for retry.
16089  *
16090  *   Arguments: un - ptr to associated softstate
16091  *		bp - ptr to buf(9S) for the command
16092  *		retry_delay - time interval before issuing retry (may be 0)
16093  *		statp - optional pointer to kstat function
16094  *
16095  *     Context: May be called under interrupt context
16096  */
16097 
16098 static void
16099 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
16100     void (*statp)(kstat_io_t *))
16101 {
16102 	ASSERT(un != NULL);
16103 	ASSERT(mutex_owned(SD_MUTEX(un)));
16104 	ASSERT(bp != NULL);
16105 
16106 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16107 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
16108 
16109 	/*
16110 	 * Indicate that the command is being retried. This will not allow any
16111 	 * other commands on the wait queue to be transported to the target
16112 	 * until this command has been completed (success or failure). The
16113 	 * "retry command" is not transported to the target until the given
16114 	 * time delay expires, unless the user specified a 0 retry_delay.
16115 	 *
16116 	 * Note: the timeout(9F) callback routine is what actually calls
16117 	 * sd_start_cmds() to transport the command, with the exception of a
16118 	 * zero retry_delay. The only current implementor of a zero retry delay
16119 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
16120 	 */
16121 	if (un->un_retry_bp == NULL) {
16122 		ASSERT(un->un_retry_statp == NULL);
16123 		un->un_retry_bp = bp;
16124 
16125 		/*
16126 		 * If the user has not specified a delay the command should
16127 		 * be queued and no timeout should be scheduled.
16128 		 */
16129 		if (retry_delay == 0) {
16130 			/*
16131 			 * Save the kstat pointer that will be used in the
16132 			 * call to SD_UPDATE_KSTATS() below, so that
16133 			 * sd_start_cmds() can correctly decrement the waitq
16134 			 * count when it is time to transport this command.
16135 			 */
16136 			un->un_retry_statp = statp;
16137 			goto done;
16138 		}
16139 	}
16140 
16141 	if (un->un_retry_bp == bp) {
16142 		/*
16143 		 * Save the kstat pointer that will be used in the call to
16144 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
16145 		 * correctly decrement the waitq count when it is time to
16146 		 * transport this command.
16147 		 */
16148 		un->un_retry_statp = statp;
16149 
16150 		/*
16151 		 * Schedule a timeout if:
16152 		 *   1) The user has specified a delay.
16153 		 *   2) There is not a START_STOP_UNIT callback pending.
16154 		 *
16155 		 * If no delay has been specified, then it is up to the caller
16156 		 * to ensure that IO processing continues without stalling.
16157 		 * Effectively, this means that the caller will issue the
16158 		 * required call to sd_start_cmds(). The START_STOP_UNIT
16159 		 * callback does this after the START STOP UNIT command has
16160 		 * completed. In either of these cases we should not schedule
16161 		 * a timeout callback here.  Also don't schedule the timeout if
16162 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
16163 		 */
16164 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
16165 		    (un->un_direct_priority_timeid == NULL)) {
16166 			un->un_retry_timeid =
16167 			    timeout(sd_start_retry_command, un, retry_delay);
16168 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16169 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
16170 			    " bp:0x%p un_retry_timeid:0x%p\n",
16171 			    un, bp, un->un_retry_timeid);
16172 		}
16173 	} else {
16174 		/*
16175 		 * We only get in here if there is already another command
16176 		 * waiting to be retried.  In this case, we just put the
16177 		 * given command onto the wait queue, so it can be transported
16178 		 * after the current retry command has completed.
16179 		 *
16180 		 * Also we have to make sure that if the command at the head
16181 		 * of the wait queue is the un_failfast_bp, that we do not
16182 		 * put ahead of it any other commands that are to be retried.
16183 		 */
16184 		if ((un->un_failfast_bp != NULL) &&
16185 		    (un->un_failfast_bp == un->un_waitq_headp)) {
16186 			/*
16187 			 * Enqueue this command AFTER the first command on
16188 			 * the wait queue (which is also un_failfast_bp).
16189 			 */
16190 			bp->av_forw = un->un_waitq_headp->av_forw;
16191 			un->un_waitq_headp->av_forw = bp;
16192 			if (un->un_waitq_headp == un->un_waitq_tailp) {
16193 				un->un_waitq_tailp = bp;
16194 			}
16195 		} else {
16196 			/* Enqueue this command at the head of the waitq. */
16197 			bp->av_forw = un->un_waitq_headp;
16198 			un->un_waitq_headp = bp;
16199 			if (un->un_waitq_tailp == NULL) {
16200 				un->un_waitq_tailp = bp;
16201 			}
16202 		}
16203 
16204 		if (statp == NULL) {
16205 			statp = kstat_waitq_enter;
16206 		}
16207 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16208 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
16209 	}
16210 
16211 done:
16212 	if (statp != NULL) {
16213 		SD_UPDATE_KSTATS(un, statp, bp);
16214 	}
16215 
16216 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16217 	    "sd_set_retry_bp: exit un:0x%p\n", un);
16218 }
16219 
16220 
16221 /*
16222  *    Function: sd_start_retry_command
16223  *
16224  * Description: Start the command that has been waiting on the target's
16225  *		retry queue.  Called from timeout(9F) context after the
16226  *		retry delay interval has expired.
16227  *
16228  *   Arguments: arg - pointer to associated softstate for the device.
16229  *
16230  *     Context: timeout(9F) thread context.  May not sleep.
16231  */
16232 
16233 static void
16234 sd_start_retry_command(void *arg)
16235 {
16236 	struct sd_lun *un = arg;
16237 
16238 	ASSERT(un != NULL);
16239 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16240 
16241 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16242 	    "sd_start_retry_command: entry\n");
16243 
16244 	mutex_enter(SD_MUTEX(un));
16245 
16246 	un->un_retry_timeid = NULL;
16247 
16248 	if (un->un_retry_bp != NULL) {
16249 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16250 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
16251 		    un, un->un_retry_bp);
16252 		sd_start_cmds(un, un->un_retry_bp);
16253 	}
16254 
16255 	mutex_exit(SD_MUTEX(un));
16256 
16257 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16258 	    "sd_start_retry_command: exit\n");
16259 }
16260 
16261 /*
16262  *    Function: sd_rmw_msg_print_handler
16263  *
16264  * Description: If RMW mode is enabled and warning message is triggered
16265  *              print I/O count during a fixed interval.
16266  *
16267  *   Arguments: arg - pointer to associated softstate for the device.
16268  *
16269  *     Context: timeout(9F) thread context. May not sleep.
16270  */
16271 static void
16272 sd_rmw_msg_print_handler(void *arg)
16273 {
16274 	struct sd_lun *un = arg;
16275 
16276 	ASSERT(un != NULL);
16277 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16278 
16279 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16280 	    "sd_rmw_msg_print_handler: entry\n");
16281 
16282 	mutex_enter(SD_MUTEX(un));
16283 
16284 	if (un->un_rmw_incre_count > 0) {
16285 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16286 		    "%"PRIu64" I/O requests are not aligned with %d disk "
16287 		    "sector size in %ld seconds. They are handled through "
16288 		    "Read Modify Write but the performance is very low!\n",
16289 		    un->un_rmw_incre_count, un->un_tgt_blocksize,
16290 		    drv_hztousec(SD_RMW_MSG_PRINT_TIMEOUT) / 1000000);
16291 		un->un_rmw_incre_count = 0;
16292 		un->un_rmw_msg_timeid = timeout(sd_rmw_msg_print_handler,
16293 		    un, SD_RMW_MSG_PRINT_TIMEOUT);
16294 	} else {
16295 		un->un_rmw_msg_timeid = NULL;
16296 	}
16297 
16298 	mutex_exit(SD_MUTEX(un));
16299 
16300 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16301 	    "sd_rmw_msg_print_handler: exit\n");
16302 }
16303 
16304 /*
16305  *    Function: sd_start_direct_priority_command
16306  *
16307  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
16308  *		received TRAN_BUSY when we called scsi_transport() to send it
16309  *		to the underlying HBA. This function is called from timeout(9F)
16310  *		context after the delay interval has expired.
16311  *
16312  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
16313  *
16314  *     Context: timeout(9F) thread context.  May not sleep.
16315  */
16316 
16317 static void
16318 sd_start_direct_priority_command(void *arg)
16319 {
16320 	struct buf	*priority_bp = arg;
16321 	struct sd_lun	*un;
16322 
16323 	ASSERT(priority_bp != NULL);
16324 	un = SD_GET_UN(priority_bp);
16325 	ASSERT(un != NULL);
16326 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16327 
16328 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16329 	    "sd_start_direct_priority_command: entry\n");
16330 
16331 	mutex_enter(SD_MUTEX(un));
16332 	un->un_direct_priority_timeid = NULL;
16333 	sd_start_cmds(un, priority_bp);
16334 	mutex_exit(SD_MUTEX(un));
16335 
16336 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16337 	    "sd_start_direct_priority_command: exit\n");
16338 }
16339 
16340 
16341 /*
16342  *    Function: sd_send_request_sense_command
16343  *
16344  * Description: Sends a REQUEST SENSE command to the target
16345  *
16346  *     Context: May be called from interrupt context.
16347  */
16348 
16349 static void
16350 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
16351     struct scsi_pkt *pktp)
16352 {
16353 	ASSERT(bp != NULL);
16354 	ASSERT(un != NULL);
16355 	ASSERT(mutex_owned(SD_MUTEX(un)));
16356 
16357 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
16358 	    "entry: buf:0x%p\n", bp);
16359 
16360 	/*
16361 	 * If we are syncing or dumping, then fail the command to avoid a
16362 	 * recursive callback into scsi_transport(). Also fail the command
16363 	 * if we are suspended (legacy behavior).
16364 	 */
16365 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
16366 	    (un->un_state == SD_STATE_DUMPING)) {
16367 		sd_return_failed_command(un, bp, EIO);
16368 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16369 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
16370 		return;
16371 	}
16372 
16373 	/*
16374 	 * Retry the failed command and don't issue the request sense if:
16375 	 *    1) the sense buf is busy
16376 	 *    2) we have 1 or more outstanding commands on the target
16377 	 *    (the sense data will be cleared or invalidated any way)
16378 	 *
16379 	 * Note: There could be an issue with not checking a retry limit here,
16380 	 * the problem is determining which retry limit to check.
16381 	 */
16382 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
16383 		/* Don't retry if the command is flagged as non-retryable */
16384 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16385 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
16386 			    NULL, NULL, 0, un->un_busy_timeout,
16387 			    kstat_waitq_enter);
16388 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16389 			    "sd_send_request_sense_command: "
16390 			    "at full throttle, retrying exit\n");
16391 		} else {
16392 			sd_return_failed_command(un, bp, EIO);
16393 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16394 			    "sd_send_request_sense_command: "
16395 			    "at full throttle, non-retryable exit\n");
16396 		}
16397 		return;
16398 	}
16399 
16400 	sd_mark_rqs_busy(un, bp);
16401 	sd_start_cmds(un, un->un_rqs_bp);
16402 
16403 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16404 	    "sd_send_request_sense_command: exit\n");
16405 }
16406 
16407 
16408 /*
16409  *    Function: sd_mark_rqs_busy
16410  *
16411  * Description: Indicate that the request sense bp for this instance is
16412  *		in use.
16413  *
16414  *     Context: May be called under interrupt context
16415  */
16416 
16417 static void
16418 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
16419 {
16420 	struct sd_xbuf	*sense_xp;
16421 
16422 	ASSERT(un != NULL);
16423 	ASSERT(bp != NULL);
16424 	ASSERT(mutex_owned(SD_MUTEX(un)));
16425 	ASSERT(un->un_sense_isbusy == 0);
16426 
16427 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
16428 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
16429 
16430 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
16431 	ASSERT(sense_xp != NULL);
16432 
16433 	SD_INFO(SD_LOG_IO, un,
16434 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
16435 
16436 	ASSERT(sense_xp->xb_pktp != NULL);
16437 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
16438 	    == (FLAG_SENSING | FLAG_HEAD));
16439 
16440 	un->un_sense_isbusy = 1;
16441 	un->un_rqs_bp->b_resid = 0;
16442 	sense_xp->xb_pktp->pkt_resid  = 0;
16443 	sense_xp->xb_pktp->pkt_reason = 0;
16444 
16445 	/* So we can get back the bp at interrupt time! */
16446 	sense_xp->xb_sense_bp = bp;
16447 
16448 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
16449 
16450 	/*
16451 	 * Mark this buf as awaiting sense data. (This is already set in
16452 	 * the pkt_flags for the RQS packet.)
16453 	 */
16454 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
16455 
16456 	/* Request sense down same path */
16457 	if (scsi_pkt_allocated_correctly((SD_GET_XBUF(bp))->xb_pktp) &&
16458 	    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance)
16459 		sense_xp->xb_pktp->pkt_path_instance =
16460 		    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance;
16461 
16462 	sense_xp->xb_retry_count = 0;
16463 	sense_xp->xb_victim_retry_count = 0;
16464 	sense_xp->xb_ua_retry_count = 0;
16465 	sense_xp->xb_nr_retry_count = 0;
16466 	sense_xp->xb_dma_resid  = 0;
16467 
16468 	/* Clean up the fields for auto-request sense */
16469 	sense_xp->xb_sense_status = 0;
16470 	sense_xp->xb_sense_state = 0;
16471 	sense_xp->xb_sense_resid = 0;
16472 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
16473 
16474 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
16475 }
16476 
16477 
16478 /*
16479  *    Function: sd_mark_rqs_idle
16480  *
16481  * Description: SD_MUTEX must be held continuously through this routine
16482  *		to prevent reuse of the rqs struct before the caller can
16483  *		complete it's processing.
16484  *
16485  * Return Code: Pointer to the RQS buf
16486  *
16487  *     Context: May be called under interrupt context
16488  */
16489 
16490 static struct buf *
16491 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
16492 {
16493 	struct buf *bp;
16494 	ASSERT(un != NULL);
16495 	ASSERT(sense_xp != NULL);
16496 	ASSERT(mutex_owned(SD_MUTEX(un)));
16497 	ASSERT(un->un_sense_isbusy != 0);
16498 
16499 	un->un_sense_isbusy = 0;
16500 	bp = sense_xp->xb_sense_bp;
16501 	sense_xp->xb_sense_bp = NULL;
16502 
16503 	/* This pkt is no longer interested in getting sense data */
16504 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
16505 
16506 	return (bp);
16507 }
16508 
16509 
16510 
16511 /*
16512  *    Function: sd_alloc_rqs
16513  *
16514  * Description: Set up the unit to receive auto request sense data
16515  *
16516  * Return Code: DDI_SUCCESS or DDI_FAILURE
16517  *
16518  *     Context: Called under attach(9E) context
16519  */
16520 
16521 static int
16522 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
16523 {
16524 	struct sd_xbuf *xp;
16525 
16526 	ASSERT(un != NULL);
16527 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16528 	ASSERT(un->un_rqs_bp == NULL);
16529 	ASSERT(un->un_rqs_pktp == NULL);
16530 
16531 	/*
16532 	 * First allocate the required buf and scsi_pkt structs, then set up
16533 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
16534 	 */
16535 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
16536 	    MAX_SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
16537 	if (un->un_rqs_bp == NULL) {
16538 		return (DDI_FAILURE);
16539 	}
16540 
16541 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
16542 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
16543 
16544 	if (un->un_rqs_pktp == NULL) {
16545 		sd_free_rqs(un);
16546 		return (DDI_FAILURE);
16547 	}
16548 
16549 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
16550 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
16551 	    SCMD_REQUEST_SENSE, 0, MAX_SENSE_LENGTH, 0);
16552 
16553 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
16554 
16555 	/* Set up the other needed members in the ARQ scsi_pkt. */
16556 	un->un_rqs_pktp->pkt_comp   = sdintr;
16557 	un->un_rqs_pktp->pkt_time   = sd_io_time;
16558 	un->un_rqs_pktp->pkt_flags |=
16559 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
16560 
16561 	/*
16562 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
16563 	 * provide any intpkt, destroypkt routines as we take care of
16564 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
16565 	 */
16566 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
16567 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
16568 	xp->xb_pktp = un->un_rqs_pktp;
16569 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
16570 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
16571 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
16572 
16573 	/*
16574 	 * Save the pointer to the request sense private bp so it can
16575 	 * be retrieved in sdintr.
16576 	 */
16577 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
16578 	ASSERT(un->un_rqs_bp->b_private == xp);
16579 
16580 	/*
16581 	 * See if the HBA supports auto-request sense for the specified
16582 	 * target/lun. If it does, then try to enable it (if not already
16583 	 * enabled).
16584 	 *
16585 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
16586 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
16587 	 * return success.  However, in both of these cases ARQ is always
16588 	 * enabled and scsi_ifgetcap will always return true. The best approach
16589 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
16590 	 *
16591 	 * The 3rd case is the HBA (adp) always return enabled on
16592 	 * scsi_ifgetgetcap even when it's not enable, the best approach
16593 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
16594 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
16595 	 */
16596 
16597 	if (un->un_f_is_fibre == TRUE) {
16598 		un->un_f_arq_enabled = TRUE;
16599 	} else {
16600 #if defined(__x86)
16601 		/*
16602 		 * Circumvent the Adaptec bug, remove this code when
16603 		 * the bug is fixed
16604 		 */
16605 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
16606 #endif
16607 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
16608 		case 0:
16609 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16610 			    "sd_alloc_rqs: HBA supports ARQ\n");
16611 			/*
16612 			 * ARQ is supported by this HBA but currently is not
16613 			 * enabled. Attempt to enable it and if successful then
16614 			 * mark this instance as ARQ enabled.
16615 			 */
16616 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
16617 			    == 1) {
16618 				/* Successfully enabled ARQ in the HBA */
16619 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16620 				    "sd_alloc_rqs: ARQ enabled\n");
16621 				un->un_f_arq_enabled = TRUE;
16622 			} else {
16623 				/* Could not enable ARQ in the HBA */
16624 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16625 				    "sd_alloc_rqs: failed ARQ enable\n");
16626 				un->un_f_arq_enabled = FALSE;
16627 			}
16628 			break;
16629 		case 1:
16630 			/*
16631 			 * ARQ is supported by this HBA and is already enabled.
16632 			 * Just mark ARQ as enabled for this instance.
16633 			 */
16634 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16635 			    "sd_alloc_rqs: ARQ already enabled\n");
16636 			un->un_f_arq_enabled = TRUE;
16637 			break;
16638 		default:
16639 			/*
16640 			 * ARQ is not supported by this HBA; disable it for this
16641 			 * instance.
16642 			 */
16643 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16644 			    "sd_alloc_rqs: HBA does not support ARQ\n");
16645 			un->un_f_arq_enabled = FALSE;
16646 			break;
16647 		}
16648 	}
16649 
16650 	return (DDI_SUCCESS);
16651 }
16652 
16653 
16654 /*
16655  *    Function: sd_free_rqs
16656  *
16657  * Description: Cleanup for the pre-instance RQS command.
16658  *
16659  *     Context: Kernel thread context
16660  */
16661 
16662 static void
16663 sd_free_rqs(struct sd_lun *un)
16664 {
16665 	ASSERT(un != NULL);
16666 
16667 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
16668 
16669 	/*
16670 	 * If consistent memory is bound to a scsi_pkt, the pkt
16671 	 * has to be destroyed *before* freeing the consistent memory.
16672 	 * Don't change the sequence of this operations.
16673 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
16674 	 * after it was freed in scsi_free_consistent_buf().
16675 	 */
16676 	if (un->un_rqs_pktp != NULL) {
16677 		scsi_destroy_pkt(un->un_rqs_pktp);
16678 		un->un_rqs_pktp = NULL;
16679 	}
16680 
16681 	if (un->un_rqs_bp != NULL) {
16682 		struct sd_xbuf *xp = SD_GET_XBUF(un->un_rqs_bp);
16683 		if (xp != NULL) {
16684 			kmem_free(xp, sizeof (struct sd_xbuf));
16685 		}
16686 		scsi_free_consistent_buf(un->un_rqs_bp);
16687 		un->un_rqs_bp = NULL;
16688 	}
16689 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
16690 }
16691 
16692 
16693 
16694 /*
16695  *    Function: sd_reduce_throttle
16696  *
16697  * Description: Reduces the maximum # of outstanding commands on a
16698  *		target to the current number of outstanding commands.
16699  *		Queues a tiemout(9F) callback to restore the limit
16700  *		after a specified interval has elapsed.
16701  *		Typically used when we get a TRAN_BUSY return code
16702  *		back from scsi_transport().
16703  *
16704  *   Arguments: un - ptr to the sd_lun softstate struct
16705  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
16706  *
16707  *     Context: May be called from interrupt context
16708  */
16709 
16710 static void
16711 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
16712 {
16713 	ASSERT(un != NULL);
16714 	ASSERT(mutex_owned(SD_MUTEX(un)));
16715 	ASSERT(un->un_ncmds_in_transport >= 0);
16716 
16717 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16718 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
16719 	    un, un->un_throttle, un->un_ncmds_in_transport);
16720 
16721 	if (un->un_throttle > 1) {
16722 		if (un->un_f_use_adaptive_throttle == TRUE) {
16723 			switch (throttle_type) {
16724 			case SD_THROTTLE_TRAN_BUSY:
16725 				if (un->un_busy_throttle == 0) {
16726 					un->un_busy_throttle = un->un_throttle;
16727 				}
16728 				break;
16729 			case SD_THROTTLE_QFULL:
16730 				un->un_busy_throttle = 0;
16731 				break;
16732 			default:
16733 				ASSERT(FALSE);
16734 			}
16735 
16736 			if (un->un_ncmds_in_transport > 0) {
16737 				un->un_throttle = un->un_ncmds_in_transport;
16738 			}
16739 
16740 		} else {
16741 			if (un->un_ncmds_in_transport == 0) {
16742 				un->un_throttle = 1;
16743 			} else {
16744 				un->un_throttle = un->un_ncmds_in_transport;
16745 			}
16746 		}
16747 	}
16748 
16749 	/* Reschedule the timeout if none is currently active */
16750 	if (un->un_reset_throttle_timeid == NULL) {
16751 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
16752 		    un, SD_THROTTLE_RESET_INTERVAL);
16753 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16754 		    "sd_reduce_throttle: timeout scheduled!\n");
16755 	}
16756 
16757 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16758 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16759 }
16760 
16761 
16762 
16763 /*
16764  *    Function: sd_restore_throttle
16765  *
16766  * Description: Callback function for timeout(9F).  Resets the current
16767  *		value of un->un_throttle to its default.
16768  *
16769  *   Arguments: arg - pointer to associated softstate for the device.
16770  *
16771  *     Context: May be called from interrupt context
16772  */
16773 
16774 static void
16775 sd_restore_throttle(void *arg)
16776 {
16777 	struct sd_lun	*un = arg;
16778 
16779 	ASSERT(un != NULL);
16780 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16781 
16782 	mutex_enter(SD_MUTEX(un));
16783 
16784 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16785 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16786 
16787 	un->un_reset_throttle_timeid = NULL;
16788 
16789 	if (un->un_f_use_adaptive_throttle == TRUE) {
16790 		/*
16791 		 * If un_busy_throttle is nonzero, then it contains the
16792 		 * value that un_throttle was when we got a TRAN_BUSY back
16793 		 * from scsi_transport(). We want to revert back to this
16794 		 * value.
16795 		 *
16796 		 * In the QFULL case, the throttle limit will incrementally
16797 		 * increase until it reaches max throttle.
16798 		 */
16799 		if (un->un_busy_throttle > 0) {
16800 			un->un_throttle = un->un_busy_throttle;
16801 			un->un_busy_throttle = 0;
16802 		} else {
16803 			/*
16804 			 * increase throttle by 10% open gate slowly, schedule
16805 			 * another restore if saved throttle has not been
16806 			 * reached
16807 			 */
16808 			short throttle;
16809 			if (sd_qfull_throttle_enable) {
16810 				throttle = un->un_throttle +
16811 				    max((un->un_throttle / 10), 1);
16812 				un->un_throttle =
16813 				    (throttle < un->un_saved_throttle) ?
16814 				    throttle : un->un_saved_throttle;
16815 				if (un->un_throttle < un->un_saved_throttle) {
16816 					un->un_reset_throttle_timeid =
16817 					    timeout(sd_restore_throttle,
16818 					    un,
16819 					    SD_QFULL_THROTTLE_RESET_INTERVAL);
16820 				}
16821 			}
16822 		}
16823 
16824 		/*
16825 		 * If un_throttle has fallen below the low-water mark, we
16826 		 * restore the maximum value here (and allow it to ratchet
16827 		 * down again if necessary).
16828 		 */
16829 		if (un->un_throttle < un->un_min_throttle) {
16830 			un->un_throttle = un->un_saved_throttle;
16831 		}
16832 	} else {
16833 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16834 		    "restoring limit from 0x%x to 0x%x\n",
16835 		    un->un_throttle, un->un_saved_throttle);
16836 		un->un_throttle = un->un_saved_throttle;
16837 	}
16838 
16839 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16840 	    "sd_restore_throttle: calling sd_start_cmds!\n");
16841 
16842 	sd_start_cmds(un, NULL);
16843 
16844 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16845 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16846 	    un, un->un_throttle);
16847 
16848 	mutex_exit(SD_MUTEX(un));
16849 
16850 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16851 }
16852 
16853 /*
16854  *    Function: sdrunout
16855  *
16856  * Description: Callback routine for scsi_init_pkt when a resource allocation
16857  *		fails.
16858  *
16859  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16860  *		soft state instance.
16861  *
16862  * Return Code: The scsi_init_pkt routine allows for the callback function to
16863  *		return a 0 indicating the callback should be rescheduled or a 1
16864  *		indicating not to reschedule. This routine always returns 1
16865  *		because the driver always provides a callback function to
16866  *		scsi_init_pkt. This results in a callback always being scheduled
16867  *		(via the scsi_init_pkt callback implementation) if a resource
16868  *		failure occurs.
16869  *
16870  *     Context: This callback function may not block or call routines that block
16871  *
16872  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16873  *		request persisting at the head of the list which cannot be
16874  *		satisfied even after multiple retries. In the future the driver
16875  *		may implement some time of maximum runout count before failing
16876  *		an I/O.
16877  */
16878 
16879 static int
16880 sdrunout(caddr_t arg)
16881 {
16882 	struct sd_lun	*un = (struct sd_lun *)arg;
16883 
16884 	ASSERT(un != NULL);
16885 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16886 
16887 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16888 
16889 	mutex_enter(SD_MUTEX(un));
16890 	sd_start_cmds(un, NULL);
16891 	mutex_exit(SD_MUTEX(un));
16892 	/*
16893 	 * This callback routine always returns 1 (i.e. do not reschedule)
16894 	 * because we always specify sdrunout as the callback handler for
16895 	 * scsi_init_pkt inside the call to sd_start_cmds.
16896 	 */
16897 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16898 	return (1);
16899 }
16900 
16901 
16902 /*
16903  *    Function: sdintr
16904  *
16905  * Description: Completion callback routine for scsi_pkt(9S) structs
16906  *		sent to the HBA driver via scsi_transport(9F).
16907  *
16908  *     Context: Interrupt context
16909  */
16910 
16911 static void
16912 sdintr(struct scsi_pkt *pktp)
16913 {
16914 	struct buf	*bp;
16915 	struct sd_xbuf	*xp;
16916 	struct sd_lun	*un;
16917 	size_t		actual_len;
16918 	sd_ssc_t	*sscp;
16919 
16920 	ASSERT(pktp != NULL);
16921 	bp = (struct buf *)pktp->pkt_private;
16922 	ASSERT(bp != NULL);
16923 	xp = SD_GET_XBUF(bp);
16924 	ASSERT(xp != NULL);
16925 	ASSERT(xp->xb_pktp != NULL);
16926 	un = SD_GET_UN(bp);
16927 	ASSERT(un != NULL);
16928 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16929 
16930 #ifdef SD_FAULT_INJECTION
16931 
16932 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16933 	/* SD FaultInjection */
16934 	sd_faultinjection(pktp);
16935 
16936 #endif /* SD_FAULT_INJECTION */
16937 
16938 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16939 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16940 
16941 	mutex_enter(SD_MUTEX(un));
16942 
16943 	ASSERT(un->un_fm_private != NULL);
16944 	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
16945 	ASSERT(sscp != NULL);
16946 
16947 	/* Reduce the count of the #commands currently in transport */
16948 	un->un_ncmds_in_transport--;
16949 	ASSERT(un->un_ncmds_in_transport >= 0);
16950 
16951 	/* Increment counter to indicate that the callback routine is active */
16952 	un->un_in_callback++;
16953 
16954 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16955 
16956 #ifdef	SDDEBUG
16957 	if (bp == un->un_retry_bp) {
16958 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16959 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16960 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16961 	}
16962 #endif
16963 
16964 	/*
16965 	 * If pkt_reason is CMD_DEV_GONE, fail the command, and update the media
16966 	 * state if needed.
16967 	 */
16968 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16969 		/* Prevent multiple console messages for the same failure. */
16970 		if (un->un_last_pkt_reason != CMD_DEV_GONE) {
16971 			un->un_last_pkt_reason = CMD_DEV_GONE;
16972 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16973 			    "Command failed to complete...Device is gone\n");
16974 		}
16975 		if (un->un_mediastate != DKIO_DEV_GONE) {
16976 			un->un_mediastate = DKIO_DEV_GONE;
16977 			cv_broadcast(&un->un_state_cv);
16978 		}
16979 		/*
16980 		 * If the command happens to be the REQUEST SENSE command,
16981 		 * free up the rqs buf and fail the original command.
16982 		 */
16983 		if (bp == un->un_rqs_bp) {
16984 			bp = sd_mark_rqs_idle(un, xp);
16985 		}
16986 		sd_return_failed_command(un, bp, EIO);
16987 		goto exit;
16988 	}
16989 
16990 	if (pktp->pkt_state & STATE_XARQ_DONE) {
16991 		SD_TRACE(SD_LOG_COMMON, un,
16992 		    "sdintr: extra sense data received. pkt=%p\n", pktp);
16993 	}
16994 
16995 	/*
16996 	 * First see if the pkt has auto-request sense data with it....
16997 	 * Look at the packet state first so we don't take a performance
16998 	 * hit looking at the arq enabled flag unless absolutely necessary.
16999 	 */
17000 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
17001 	    (un->un_f_arq_enabled == TRUE)) {
17002 		/*
17003 		 * The HBA did an auto request sense for this command so check
17004 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
17005 		 * driver command that should not be retried.
17006 		 */
17007 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
17008 			/*
17009 			 * Save the relevant sense info into the xp for the
17010 			 * original cmd.
17011 			 */
17012 			struct scsi_arq_status *asp;
17013 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
17014 			xp->xb_sense_status =
17015 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
17016 			xp->xb_sense_state  = asp->sts_rqpkt_state;
17017 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
17018 			if (pktp->pkt_state & STATE_XARQ_DONE) {
17019 				actual_len = MAX_SENSE_LENGTH -
17020 				    xp->xb_sense_resid;
17021 				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
17022 				    MAX_SENSE_LENGTH);
17023 			} else {
17024 				if (xp->xb_sense_resid > SENSE_LENGTH) {
17025 					actual_len = MAX_SENSE_LENGTH -
17026 					    xp->xb_sense_resid;
17027 				} else {
17028 					actual_len = SENSE_LENGTH -
17029 					    xp->xb_sense_resid;
17030 				}
17031 				if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
17032 					if ((((struct uscsi_cmd *)
17033 					    (xp->xb_pktinfo))->uscsi_rqlen) >
17034 					    actual_len) {
17035 						xp->xb_sense_resid =
17036 						    (((struct uscsi_cmd *)
17037 						    (xp->xb_pktinfo))->
17038 						    uscsi_rqlen) - actual_len;
17039 					} else {
17040 						xp->xb_sense_resid = 0;
17041 					}
17042 				}
17043 				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
17044 				    SENSE_LENGTH);
17045 			}
17046 
17047 			/* fail the command */
17048 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17049 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
17050 			sd_return_failed_command(un, bp, EIO);
17051 			goto exit;
17052 		}
17053 
17054 #if (defined(__x86))	/* DMAFREE for x86 only */
17055 		/*
17056 		 * We want to either retry or fail this command, so free
17057 		 * the DMA resources here.  If we retry the command then
17058 		 * the DMA resources will be reallocated in sd_start_cmds().
17059 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
17060 		 * causes the *entire* transfer to start over again from the
17061 		 * beginning of the request, even for PARTIAL chunks that
17062 		 * have already transferred successfully.
17063 		 */
17064 		if ((un->un_f_is_fibre == TRUE) &&
17065 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
17066 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
17067 			scsi_dmafree(pktp);
17068 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
17069 		}
17070 #endif
17071 
17072 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17073 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
17074 
17075 		sd_handle_auto_request_sense(un, bp, xp, pktp);
17076 		goto exit;
17077 	}
17078 
17079 	/* Next see if this is the REQUEST SENSE pkt for the instance */
17080 	if (pktp->pkt_flags & FLAG_SENSING)  {
17081 		/* This pktp is from the unit's REQUEST_SENSE command */
17082 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17083 		    "sdintr: sd_handle_request_sense\n");
17084 		sd_handle_request_sense(un, bp, xp, pktp);
17085 		goto exit;
17086 	}
17087 
17088 	/*
17089 	 * Check to see if the command successfully completed as requested;
17090 	 * this is the most common case (and also the hot performance path).
17091 	 *
17092 	 * Requirements for successful completion are:
17093 	 * pkt_reason is CMD_CMPLT and packet status is status good.
17094 	 * In addition:
17095 	 * - A residual of zero indicates successful completion no matter what
17096 	 *   the command is.
17097 	 * - If the residual is not zero and the command is not a read or
17098 	 *   write, then it's still defined as successful completion. In other
17099 	 *   words, if the command is a read or write the residual must be
17100 	 *   zero for successful completion.
17101 	 * - If the residual is not zero and the command is a read or
17102 	 *   write, and it's a USCSICMD, then it's still defined as
17103 	 *   successful completion.
17104 	 */
17105 	if ((pktp->pkt_reason == CMD_CMPLT) &&
17106 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
17107 
17108 		/*
17109 		 * Since this command is returned with a good status, we
17110 		 * can reset the count for Sonoma failover.
17111 		 */
17112 		un->un_sonoma_failure_count = 0;
17113 
17114 		/*
17115 		 * Return all USCSI commands on good status
17116 		 */
17117 		if (pktp->pkt_resid == 0) {
17118 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17119 			    "sdintr: returning command for resid == 0\n");
17120 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
17121 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
17122 			SD_UPDATE_B_RESID(bp, pktp);
17123 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17124 			    "sdintr: returning command for resid != 0\n");
17125 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
17126 			SD_UPDATE_B_RESID(bp, pktp);
17127 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17128 			    "sdintr: returning uscsi command\n");
17129 		} else {
17130 			goto not_successful;
17131 		}
17132 		sd_return_command(un, bp);
17133 
17134 		/*
17135 		 * Decrement counter to indicate that the callback routine
17136 		 * is done.
17137 		 */
17138 		un->un_in_callback--;
17139 		ASSERT(un->un_in_callback >= 0);
17140 		mutex_exit(SD_MUTEX(un));
17141 
17142 		return;
17143 	}
17144 
17145 not_successful:
17146 
17147 #if (defined(__x86))	/* DMAFREE for x86 only */
17148 	/*
17149 	 * The following is based upon knowledge of the underlying transport
17150 	 * and its use of DMA resources.  This code should be removed when
17151 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
17152 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
17153 	 * and sd_start_cmds().
17154 	 *
17155 	 * Free any DMA resources associated with this command if there
17156 	 * is a chance it could be retried or enqueued for later retry.
17157 	 * If we keep the DMA binding then mpxio cannot reissue the
17158 	 * command on another path whenever a path failure occurs.
17159 	 *
17160 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
17161 	 * causes the *entire* transfer to start over again from the
17162 	 * beginning of the request, even for PARTIAL chunks that
17163 	 * have already transferred successfully.
17164 	 *
17165 	 * This is only done for non-uscsi commands (and also skipped for the
17166 	 * driver's internal RQS command). Also just do this for Fibre Channel
17167 	 * devices as these are the only ones that support mpxio.
17168 	 */
17169 	if ((un->un_f_is_fibre == TRUE) &&
17170 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
17171 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
17172 		scsi_dmafree(pktp);
17173 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
17174 	}
17175 #endif
17176 
17177 	/*
17178 	 * The command did not successfully complete as requested so check
17179 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
17180 	 * driver command that should not be retried so just return. If
17181 	 * FLAG_DIAGNOSE is not set the error will be processed below.
17182 	 */
17183 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
17184 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17185 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
17186 		/*
17187 		 * Issue a request sense if a check condition caused the error
17188 		 * (we handle the auto request sense case above), otherwise
17189 		 * just fail the command.
17190 		 */
17191 		if ((pktp->pkt_reason == CMD_CMPLT) &&
17192 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
17193 			sd_send_request_sense_command(un, bp, pktp);
17194 		} else {
17195 			sd_return_failed_command(un, bp, EIO);
17196 		}
17197 		goto exit;
17198 	}
17199 
17200 	/*
17201 	 * The command did not successfully complete as requested so process
17202 	 * the error, retry, and/or attempt recovery.
17203 	 */
17204 	switch (pktp->pkt_reason) {
17205 	case CMD_CMPLT:
17206 		switch (SD_GET_PKT_STATUS(pktp)) {
17207 		case STATUS_GOOD:
17208 			/*
17209 			 * The command completed successfully with a non-zero
17210 			 * residual
17211 			 */
17212 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17213 			    "sdintr: STATUS_GOOD \n");
17214 			sd_pkt_status_good(un, bp, xp, pktp);
17215 			break;
17216 
17217 		case STATUS_CHECK:
17218 		case STATUS_TERMINATED:
17219 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17220 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
17221 			sd_pkt_status_check_condition(un, bp, xp, pktp);
17222 			break;
17223 
17224 		case STATUS_BUSY:
17225 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17226 			    "sdintr: STATUS_BUSY\n");
17227 			sd_pkt_status_busy(un, bp, xp, pktp);
17228 			break;
17229 
17230 		case STATUS_RESERVATION_CONFLICT:
17231 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17232 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
17233 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17234 			break;
17235 
17236 		case STATUS_QFULL:
17237 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17238 			    "sdintr: STATUS_QFULL\n");
17239 			sd_pkt_status_qfull(un, bp, xp, pktp);
17240 			break;
17241 
17242 		case STATUS_MET:
17243 		case STATUS_INTERMEDIATE:
17244 		case STATUS_SCSI2:
17245 		case STATUS_INTERMEDIATE_MET:
17246 		case STATUS_ACA_ACTIVE:
17247 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17248 			    "Unexpected SCSI status received: 0x%x\n",
17249 			    SD_GET_PKT_STATUS(pktp));
17250 			/*
17251 			 * Mark the ssc_flags when detected invalid status
17252 			 * code for non-USCSI command.
17253 			 */
17254 			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17255 				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
17256 				    0, "stat-code");
17257 			}
17258 			sd_return_failed_command(un, bp, EIO);
17259 			break;
17260 
17261 		default:
17262 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17263 			    "Invalid SCSI status received: 0x%x\n",
17264 			    SD_GET_PKT_STATUS(pktp));
17265 			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17266 				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
17267 				    0, "stat-code");
17268 			}
17269 			sd_return_failed_command(un, bp, EIO);
17270 			break;
17271 
17272 		}
17273 		break;
17274 
17275 	case CMD_INCOMPLETE:
17276 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17277 		    "sdintr:  CMD_INCOMPLETE\n");
17278 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
17279 		break;
17280 	case CMD_TRAN_ERR:
17281 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17282 		    "sdintr: CMD_TRAN_ERR\n");
17283 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
17284 		break;
17285 	case CMD_RESET:
17286 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17287 		    "sdintr: CMD_RESET \n");
17288 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
17289 		break;
17290 	case CMD_ABORTED:
17291 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17292 		    "sdintr: CMD_ABORTED \n");
17293 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
17294 		break;
17295 	case CMD_TIMEOUT:
17296 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17297 		    "sdintr: CMD_TIMEOUT\n");
17298 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
17299 		break;
17300 	case CMD_UNX_BUS_FREE:
17301 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17302 		    "sdintr: CMD_UNX_BUS_FREE \n");
17303 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
17304 		break;
17305 	case CMD_TAG_REJECT:
17306 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17307 		    "sdintr: CMD_TAG_REJECT\n");
17308 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
17309 		break;
17310 	default:
17311 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17312 		    "sdintr: default\n");
17313 		/*
17314 		 * Mark the ssc_flags for detecting invliad pkt_reason.
17315 		 */
17316 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17317 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_PKT_REASON,
17318 			    0, "pkt-reason");
17319 		}
17320 		sd_pkt_reason_default(un, bp, xp, pktp);
17321 		break;
17322 	}
17323 
17324 exit:
17325 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
17326 
17327 	/* Decrement counter to indicate that the callback routine is done. */
17328 	un->un_in_callback--;
17329 	ASSERT(un->un_in_callback >= 0);
17330 
17331 	/*
17332 	 * At this point, the pkt has been dispatched, ie, it is either
17333 	 * being re-tried or has been returned to its caller and should
17334 	 * not be referenced.
17335 	 */
17336 
17337 	mutex_exit(SD_MUTEX(un));
17338 }
17339 
17340 
17341 /*
17342  *    Function: sd_print_incomplete_msg
17343  *
17344  * Description: Prints the error message for a CMD_INCOMPLETE error.
17345  *
17346  *   Arguments: un - ptr to associated softstate for the device.
17347  *		bp - ptr to the buf(9S) for the command.
17348  *		arg - message string ptr
17349  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
17350  *			or SD_NO_RETRY_ISSUED.
17351  *
17352  *     Context: May be called under interrupt context
17353  */
17354 
17355 static void
17356 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17357 {
17358 	struct scsi_pkt	*pktp;
17359 	char	*msgp;
17360 	char	*cmdp = arg;
17361 
17362 	ASSERT(un != NULL);
17363 	ASSERT(mutex_owned(SD_MUTEX(un)));
17364 	ASSERT(bp != NULL);
17365 	ASSERT(arg != NULL);
17366 	pktp = SD_GET_PKTP(bp);
17367 	ASSERT(pktp != NULL);
17368 
17369 	switch (code) {
17370 	case SD_DELAYED_RETRY_ISSUED:
17371 	case SD_IMMEDIATE_RETRY_ISSUED:
17372 		msgp = "retrying";
17373 		break;
17374 	case SD_NO_RETRY_ISSUED:
17375 	default:
17376 		msgp = "giving up";
17377 		break;
17378 	}
17379 
17380 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17381 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17382 		    "incomplete %s- %s\n", cmdp, msgp);
17383 	}
17384 }
17385 
17386 
17387 
17388 /*
17389  *    Function: sd_pkt_status_good
17390  *
17391  * Description: Processing for a STATUS_GOOD code in pkt_status.
17392  *
17393  *     Context: May be called under interrupt context
17394  */
17395 
17396 static void
17397 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
17398     struct sd_xbuf *xp, struct scsi_pkt *pktp)
17399 {
17400 	char	*cmdp;
17401 
17402 	ASSERT(un != NULL);
17403 	ASSERT(mutex_owned(SD_MUTEX(un)));
17404 	ASSERT(bp != NULL);
17405 	ASSERT(xp != NULL);
17406 	ASSERT(pktp != NULL);
17407 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
17408 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
17409 	ASSERT(pktp->pkt_resid != 0);
17410 
17411 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
17412 
17413 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17414 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
17415 	case SCMD_READ:
17416 		cmdp = "read";
17417 		break;
17418 	case SCMD_WRITE:
17419 		cmdp = "write";
17420 		break;
17421 	default:
17422 		SD_UPDATE_B_RESID(bp, pktp);
17423 		sd_return_command(un, bp);
17424 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
17425 		return;
17426 	}
17427 
17428 	/*
17429 	 * See if we can retry the read/write, preferrably immediately.
17430 	 * If retries are exhaused, then sd_retry_command() will update
17431 	 * the b_resid count.
17432 	 */
17433 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
17434 	    cmdp, EIO, (clock_t)0, NULL);
17435 
17436 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
17437 }
17438 
17439 
17440 
17441 
17442 
17443 /*
17444  *    Function: sd_handle_request_sense
17445  *
17446  * Description: Processing for non-auto Request Sense command.
17447  *
17448  *   Arguments: un - ptr to associated softstate
17449  *		sense_bp - ptr to buf(9S) for the RQS command
17450  *		sense_xp - ptr to the sd_xbuf for the RQS command
17451  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
17452  *
17453  *     Context: May be called under interrupt context
17454  */
17455 
17456 static void
17457 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
17458     struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
17459 {
17460 	struct buf	*cmd_bp;	/* buf for the original command */
17461 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
17462 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
17463 	size_t		actual_len;	/* actual sense data length */
17464 
17465 	ASSERT(un != NULL);
17466 	ASSERT(mutex_owned(SD_MUTEX(un)));
17467 	ASSERT(sense_bp != NULL);
17468 	ASSERT(sense_xp != NULL);
17469 	ASSERT(sense_pktp != NULL);
17470 
17471 	/*
17472 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
17473 	 * RQS command and not the original command.
17474 	 */
17475 	ASSERT(sense_pktp == un->un_rqs_pktp);
17476 	ASSERT(sense_bp   == un->un_rqs_bp);
17477 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
17478 	    (FLAG_SENSING | FLAG_HEAD));
17479 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
17480 	    FLAG_SENSING) == FLAG_SENSING);
17481 
17482 	/* These are the bp, xp, and pktp for the original command */
17483 	cmd_bp = sense_xp->xb_sense_bp;
17484 	cmd_xp = SD_GET_XBUF(cmd_bp);
17485 	cmd_pktp = SD_GET_PKTP(cmd_bp);
17486 
17487 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
17488 		/*
17489 		 * The REQUEST SENSE command failed.  Release the REQUEST
17490 		 * SENSE command for re-use, get back the bp for the original
17491 		 * command, and attempt to re-try the original command if
17492 		 * FLAG_DIAGNOSE is not set in the original packet.
17493 		 */
17494 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17495 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
17496 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
17497 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
17498 			    NULL, NULL, EIO, (clock_t)0, NULL);
17499 			return;
17500 		}
17501 	}
17502 
17503 	/*
17504 	 * Save the relevant sense info into the xp for the original cmd.
17505 	 *
17506 	 * Note: if the request sense failed the state info will be zero
17507 	 * as set in sd_mark_rqs_busy()
17508 	 */
17509 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
17510 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
17511 	actual_len = MAX_SENSE_LENGTH - sense_pktp->pkt_resid;
17512 	if ((cmd_xp->xb_pkt_flags & SD_XB_USCSICMD) &&
17513 	    (((struct uscsi_cmd *)cmd_xp->xb_pktinfo)->uscsi_rqlen >
17514 	    SENSE_LENGTH)) {
17515 		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
17516 		    MAX_SENSE_LENGTH);
17517 		cmd_xp->xb_sense_resid = sense_pktp->pkt_resid;
17518 	} else {
17519 		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
17520 		    SENSE_LENGTH);
17521 		if (actual_len < SENSE_LENGTH) {
17522 			cmd_xp->xb_sense_resid = SENSE_LENGTH - actual_len;
17523 		} else {
17524 			cmd_xp->xb_sense_resid = 0;
17525 		}
17526 	}
17527 
17528 	/*
17529 	 *  Free up the RQS command....
17530 	 *  NOTE:
17531 	 *	Must do this BEFORE calling sd_validate_sense_data!
17532 	 *	sd_validate_sense_data may return the original command in
17533 	 *	which case the pkt will be freed and the flags can no
17534 	 *	longer be touched.
17535 	 *	SD_MUTEX is held through this process until the command
17536 	 *	is dispatched based upon the sense data, so there are
17537 	 *	no race conditions.
17538 	 */
17539 	(void) sd_mark_rqs_idle(un, sense_xp);
17540 
17541 	/*
17542 	 * For a retryable command see if we have valid sense data, if so then
17543 	 * turn it over to sd_decode_sense() to figure out the right course of
17544 	 * action. Just fail a non-retryable command.
17545 	 */
17546 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
17547 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp, actual_len) ==
17548 		    SD_SENSE_DATA_IS_VALID) {
17549 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
17550 		}
17551 	} else {
17552 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
17553 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17554 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
17555 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
17556 		sd_return_failed_command(un, cmd_bp, EIO);
17557 	}
17558 }
17559 
17560 
17561 
17562 
17563 /*
17564  *    Function: sd_handle_auto_request_sense
17565  *
17566  * Description: Processing for auto-request sense information.
17567  *
17568  *   Arguments: un - ptr to associated softstate
17569  *		bp - ptr to buf(9S) for the command
17570  *		xp - ptr to the sd_xbuf for the command
17571  *		pktp - ptr to the scsi_pkt(9S) for the command
17572  *
17573  *     Context: May be called under interrupt context
17574  */
17575 
17576 static void
17577 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
17578     struct sd_xbuf *xp, struct scsi_pkt *pktp)
17579 {
17580 	struct scsi_arq_status *asp;
17581 	size_t actual_len;
17582 
17583 	ASSERT(un != NULL);
17584 	ASSERT(mutex_owned(SD_MUTEX(un)));
17585 	ASSERT(bp != NULL);
17586 	ASSERT(xp != NULL);
17587 	ASSERT(pktp != NULL);
17588 	ASSERT(pktp != un->un_rqs_pktp);
17589 	ASSERT(bp   != un->un_rqs_bp);
17590 
17591 	/*
17592 	 * For auto-request sense, we get a scsi_arq_status back from
17593 	 * the HBA, with the sense data in the sts_sensedata member.
17594 	 * The pkt_scbp of the packet points to this scsi_arq_status.
17595 	 */
17596 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
17597 
17598 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
17599 		/*
17600 		 * The auto REQUEST SENSE failed; see if we can re-try
17601 		 * the original command.
17602 		 */
17603 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17604 		    "auto request sense failed (reason=%s)\n",
17605 		    scsi_rname(asp->sts_rqpkt_reason));
17606 
17607 		sd_reset_target(un, pktp);
17608 
17609 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17610 		    NULL, NULL, EIO, (clock_t)0, NULL);
17611 		return;
17612 	}
17613 
17614 	/* Save the relevant sense info into the xp for the original cmd. */
17615 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
17616 	xp->xb_sense_state  = asp->sts_rqpkt_state;
17617 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
17618 	if (xp->xb_sense_state & STATE_XARQ_DONE) {
17619 		actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
17620 		bcopy(&asp->sts_sensedata, xp->xb_sense_data,
17621 		    MAX_SENSE_LENGTH);
17622 	} else {
17623 		if (xp->xb_sense_resid > SENSE_LENGTH) {
17624 			actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
17625 		} else {
17626 			actual_len = SENSE_LENGTH - xp->xb_sense_resid;
17627 		}
17628 		if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
17629 			if ((((struct uscsi_cmd *)
17630 			    (xp->xb_pktinfo))->uscsi_rqlen) > actual_len) {
17631 				xp->xb_sense_resid = (((struct uscsi_cmd *)
17632 				    (xp->xb_pktinfo))->uscsi_rqlen) -
17633 				    actual_len;
17634 			} else {
17635 				xp->xb_sense_resid = 0;
17636 			}
17637 		}
17638 		bcopy(&asp->sts_sensedata, xp->xb_sense_data, SENSE_LENGTH);
17639 	}
17640 
17641 	/*
17642 	 * See if we have valid sense data, if so then turn it over to
17643 	 * sd_decode_sense() to figure out the right course of action.
17644 	 */
17645 	if (sd_validate_sense_data(un, bp, xp, actual_len) ==
17646 	    SD_SENSE_DATA_IS_VALID) {
17647 		sd_decode_sense(un, bp, xp, pktp);
17648 	}
17649 }
17650 
17651 
17652 /*
17653  *    Function: sd_print_sense_failed_msg
17654  *
17655  * Description: Print log message when RQS has failed.
17656  *
17657  *   Arguments: un - ptr to associated softstate
17658  *		bp - ptr to buf(9S) for the command
17659  *		arg - generic message string ptr
17660  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17661  *			or SD_NO_RETRY_ISSUED
17662  *
17663  *     Context: May be called from interrupt context
17664  */
17665 
17666 static void
17667 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
17668     int code)
17669 {
17670 	char	*msgp = arg;
17671 
17672 	ASSERT(un != NULL);
17673 	ASSERT(mutex_owned(SD_MUTEX(un)));
17674 	ASSERT(bp != NULL);
17675 
17676 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
17677 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
17678 	}
17679 }
17680 
17681 
17682 /*
17683  *    Function: sd_validate_sense_data
17684  *
17685  * Description: Check the given sense data for validity.
17686  *		If the sense data is not valid, the command will
17687  *		be either failed or retried!
17688  *
17689  * Return Code: SD_SENSE_DATA_IS_INVALID
17690  *		SD_SENSE_DATA_IS_VALID
17691  *
17692  *     Context: May be called from interrupt context
17693  */
17694 
17695 static int
17696 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17697     size_t actual_len)
17698 {
17699 	struct scsi_extended_sense *esp;
17700 	struct	scsi_pkt *pktp;
17701 	char	*msgp = NULL;
17702 	sd_ssc_t *sscp;
17703 
17704 	ASSERT(un != NULL);
17705 	ASSERT(mutex_owned(SD_MUTEX(un)));
17706 	ASSERT(bp != NULL);
17707 	ASSERT(bp != un->un_rqs_bp);
17708 	ASSERT(xp != NULL);
17709 	ASSERT(un->un_fm_private != NULL);
17710 
17711 	pktp = SD_GET_PKTP(bp);
17712 	ASSERT(pktp != NULL);
17713 
17714 	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
17715 	ASSERT(sscp != NULL);
17716 
17717 	/*
17718 	 * Check the status of the RQS command (auto or manual).
17719 	 */
17720 	switch (xp->xb_sense_status & STATUS_MASK) {
17721 	case STATUS_GOOD:
17722 		break;
17723 
17724 	case STATUS_RESERVATION_CONFLICT:
17725 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17726 		return (SD_SENSE_DATA_IS_INVALID);
17727 
17728 	case STATUS_BUSY:
17729 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17730 		    "Busy Status on REQUEST SENSE\n");
17731 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
17732 		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
17733 		return (SD_SENSE_DATA_IS_INVALID);
17734 
17735 	case STATUS_QFULL:
17736 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17737 		    "QFULL Status on REQUEST SENSE\n");
17738 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
17739 		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
17740 		return (SD_SENSE_DATA_IS_INVALID);
17741 
17742 	case STATUS_CHECK:
17743 	case STATUS_TERMINATED:
17744 		msgp = "Check Condition on REQUEST SENSE\n";
17745 		goto sense_failed;
17746 
17747 	default:
17748 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
17749 		goto sense_failed;
17750 	}
17751 
17752 	/*
17753 	 * See if we got the minimum required amount of sense data.
17754 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
17755 	 * or less.
17756 	 */
17757 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
17758 	    (actual_len == 0)) {
17759 		msgp = "Request Sense couldn't get sense data\n";
17760 		goto sense_failed;
17761 	}
17762 
17763 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
17764 		msgp = "Not enough sense information\n";
17765 		/* Mark the ssc_flags for detecting invalid sense data */
17766 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17767 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17768 			    "sense-data");
17769 		}
17770 		goto sense_failed;
17771 	}
17772 
17773 	/*
17774 	 * We require the extended sense data
17775 	 */
17776 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
17777 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
17778 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17779 			static char tmp[8];
17780 			static char buf[148];
17781 			char *p = (char *)(xp->xb_sense_data);
17782 			int i;
17783 
17784 			mutex_enter(&sd_sense_mutex);
17785 			(void) strcpy(buf, "undecodable sense information:");
17786 			for (i = 0; i < actual_len; i++) {
17787 				(void) sprintf(tmp, " 0x%x", *(p++) & 0xff);
17788 				(void) strcpy(&buf[strlen(buf)], tmp);
17789 			}
17790 			i = strlen(buf);
17791 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
17792 
17793 			if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
17794 				scsi_log(SD_DEVINFO(un), sd_label,
17795 				    CE_WARN, buf);
17796 			}
17797 			mutex_exit(&sd_sense_mutex);
17798 		}
17799 
17800 		/* Mark the ssc_flags for detecting invalid sense data */
17801 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17802 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17803 			    "sense-data");
17804 		}
17805 
17806 		/* Note: Legacy behavior, fail the command with no retry */
17807 		sd_return_failed_command(un, bp, EIO);
17808 		return (SD_SENSE_DATA_IS_INVALID);
17809 	}
17810 
17811 	/*
17812 	 * Check that es_code is valid (es_class concatenated with es_code
17813 	 * make up the "response code" field.  es_class will always be 7, so
17814 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
17815 	 * format.
17816 	 */
17817 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
17818 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
17819 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
17820 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
17821 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
17822 		/* Mark the ssc_flags for detecting invalid sense data */
17823 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17824 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17825 			    "sense-data");
17826 		}
17827 		goto sense_failed;
17828 	}
17829 
17830 	return (SD_SENSE_DATA_IS_VALID);
17831 
17832 sense_failed:
17833 	/*
17834 	 * If the request sense failed (for whatever reason), attempt
17835 	 * to retry the original command.
17836 	 */
17837 #if defined(__x86)
17838 	/*
17839 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
17840 	 * sddef.h for Sparc platform, and x86 uses 1 binary
17841 	 * for both SCSI/FC.
17842 	 * The SD_RETRY_DELAY value need to be adjusted here
17843 	 * when SD_RETRY_DELAY change in sddef.h
17844 	 */
17845 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17846 	    sd_print_sense_failed_msg, msgp, EIO,
17847 	    un->un_f_is_fibre ? drv_usectohz(100000) : (clock_t)0, NULL);
17848 #else
17849 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17850 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
17851 #endif
17852 
17853 	return (SD_SENSE_DATA_IS_INVALID);
17854 }
17855 
17856 /*
17857  *    Function: sd_decode_sense
17858  *
17859  * Description: Take recovery action(s) when SCSI Sense Data is received.
17860  *
17861  *     Context: Interrupt context.
17862  */
17863 
17864 static void
17865 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17866     struct scsi_pkt *pktp)
17867 {
17868 	uint8_t sense_key;
17869 
17870 	ASSERT(un != NULL);
17871 	ASSERT(mutex_owned(SD_MUTEX(un)));
17872 	ASSERT(bp != NULL);
17873 	ASSERT(bp != un->un_rqs_bp);
17874 	ASSERT(xp != NULL);
17875 	ASSERT(pktp != NULL);
17876 
17877 	sense_key = scsi_sense_key(xp->xb_sense_data);
17878 
17879 	switch (sense_key) {
17880 	case KEY_NO_SENSE:
17881 		sd_sense_key_no_sense(un, bp, xp, pktp);
17882 		break;
17883 	case KEY_RECOVERABLE_ERROR:
17884 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17885 		    bp, xp, pktp);
17886 		break;
17887 	case KEY_NOT_READY:
17888 		sd_sense_key_not_ready(un, xp->xb_sense_data,
17889 		    bp, xp, pktp);
17890 		break;
17891 	case KEY_MEDIUM_ERROR:
17892 	case KEY_HARDWARE_ERROR:
17893 		sd_sense_key_medium_or_hardware_error(un,
17894 		    xp->xb_sense_data, bp, xp, pktp);
17895 		break;
17896 	case KEY_ILLEGAL_REQUEST:
17897 		sd_sense_key_illegal_request(un, bp, xp, pktp);
17898 		break;
17899 	case KEY_UNIT_ATTENTION:
17900 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17901 		    bp, xp, pktp);
17902 		break;
17903 	case KEY_WRITE_PROTECT:
17904 	case KEY_VOLUME_OVERFLOW:
17905 	case KEY_MISCOMPARE:
17906 		sd_sense_key_fail_command(un, bp, xp, pktp);
17907 		break;
17908 	case KEY_BLANK_CHECK:
17909 		sd_sense_key_blank_check(un, bp, xp, pktp);
17910 		break;
17911 	case KEY_ABORTED_COMMAND:
17912 		sd_sense_key_aborted_command(un, bp, xp, pktp);
17913 		break;
17914 	case KEY_VENDOR_UNIQUE:
17915 	case KEY_COPY_ABORTED:
17916 	case KEY_EQUAL:
17917 	case KEY_RESERVED:
17918 	default:
17919 		sd_sense_key_default(un, xp->xb_sense_data,
17920 		    bp, xp, pktp);
17921 		break;
17922 	}
17923 }
17924 
17925 
17926 /*
17927  *    Function: sd_dump_memory
17928  *
17929  * Description: Debug logging routine to print the contents of a user provided
17930  *		buffer. The output of the buffer is broken up into 256 byte
17931  *		segments due to a size constraint of the scsi_log.
17932  *		implementation.
17933  *
17934  *   Arguments: un - ptr to softstate
17935  *		comp - component mask
17936  *		title - "title" string to preceed data when printed
17937  *		data - ptr to data block to be printed
17938  *		len - size of data block to be printed
17939  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17940  *
17941  *     Context: May be called from interrupt context
17942  */
17943 
17944 #define	SD_DUMP_MEMORY_BUF_SIZE	256
17945 
17946 static char *sd_dump_format_string[] = {
17947 		" 0x%02x",
17948 		" %c"
17949 };
17950 
17951 static void
17952 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17953     int len, int fmt)
17954 {
17955 	int	i, j;
17956 	int	avail_count;
17957 	int	start_offset;
17958 	int	end_offset;
17959 	size_t	entry_len;
17960 	char	*bufp;
17961 	char	*local_buf;
17962 	char	*format_string;
17963 
17964 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17965 
17966 	/*
17967 	 * In the debug version of the driver, this function is called from a
17968 	 * number of places which are NOPs in the release driver.
17969 	 * The debug driver therefore has additional methods of filtering
17970 	 * debug output.
17971 	 */
17972 #ifdef SDDEBUG
17973 	/*
17974 	 * In the debug version of the driver we can reduce the amount of debug
17975 	 * messages by setting sd_error_level to something other than
17976 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17977 	 * sd_component_mask.
17978 	 */
17979 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17980 	    (sd_error_level != SCSI_ERR_ALL)) {
17981 		return;
17982 	}
17983 	if (((sd_component_mask & comp) == 0) ||
17984 	    (sd_error_level != SCSI_ERR_ALL)) {
17985 		return;
17986 	}
17987 #else
17988 	if (sd_error_level != SCSI_ERR_ALL) {
17989 		return;
17990 	}
17991 #endif
17992 
17993 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17994 	bufp = local_buf;
17995 	/*
17996 	 * Available length is the length of local_buf[], minus the
17997 	 * length of the title string, minus one for the ":", minus
17998 	 * one for the newline, minus one for the NULL terminator.
17999 	 * This gives the #bytes available for holding the printed
18000 	 * values from the given data buffer.
18001 	 */
18002 	if (fmt == SD_LOG_HEX) {
18003 		format_string = sd_dump_format_string[0];
18004 	} else /* SD_LOG_CHAR */ {
18005 		format_string = sd_dump_format_string[1];
18006 	}
18007 	/*
18008 	 * Available count is the number of elements from the given
18009 	 * data buffer that we can fit into the available length.
18010 	 * This is based upon the size of the format string used.
18011 	 * Make one entry and find it's size.
18012 	 */
18013 	(void) sprintf(bufp, format_string, data[0]);
18014 	entry_len = strlen(bufp);
18015 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
18016 
18017 	j = 0;
18018 	while (j < len) {
18019 		bufp = local_buf;
18020 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
18021 		start_offset = j;
18022 
18023 		end_offset = start_offset + avail_count;
18024 
18025 		(void) sprintf(bufp, "%s:", title);
18026 		bufp += strlen(bufp);
18027 		for (i = start_offset; ((i < end_offset) && (j < len));
18028 		    i++, j++) {
18029 			(void) sprintf(bufp, format_string, data[i]);
18030 			bufp += entry_len;
18031 		}
18032 		(void) sprintf(bufp, "\n");
18033 
18034 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
18035 	}
18036 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
18037 }
18038 
18039 /*
18040  *    Function: sd_print_sense_msg
18041  *
18042  * Description: Log a message based upon the given sense data.
18043  *
18044  *   Arguments: un - ptr to associated softstate
18045  *		bp - ptr to buf(9S) for the command
18046  *		arg - ptr to associate sd_sense_info struct
18047  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18048  *			or SD_NO_RETRY_ISSUED
18049  *
18050  *     Context: May be called from interrupt context
18051  */
18052 
18053 static void
18054 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
18055 {
18056 	struct sd_xbuf	*xp;
18057 	struct scsi_pkt	*pktp;
18058 	uint8_t *sensep;
18059 	daddr_t request_blkno;
18060 	diskaddr_t err_blkno;
18061 	int severity;
18062 	int pfa_flag;
18063 	extern struct scsi_key_strings scsi_cmds[];
18064 
18065 	ASSERT(un != NULL);
18066 	ASSERT(mutex_owned(SD_MUTEX(un)));
18067 	ASSERT(bp != NULL);
18068 	xp = SD_GET_XBUF(bp);
18069 	ASSERT(xp != NULL);
18070 	pktp = SD_GET_PKTP(bp);
18071 	ASSERT(pktp != NULL);
18072 	ASSERT(arg != NULL);
18073 
18074 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
18075 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
18076 
18077 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
18078 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
18079 		severity = SCSI_ERR_RETRYABLE;
18080 	}
18081 
18082 	/* Use absolute block number for the request block number */
18083 	request_blkno = xp->xb_blkno;
18084 
18085 	/*
18086 	 * Now try to get the error block number from the sense data
18087 	 */
18088 	sensep = xp->xb_sense_data;
18089 
18090 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
18091 	    (uint64_t *)&err_blkno)) {
18092 		/*
18093 		 * We retrieved the error block number from the information
18094 		 * portion of the sense data.
18095 		 *
18096 		 * For USCSI commands we are better off using the error
18097 		 * block no. as the requested block no. (This is the best
18098 		 * we can estimate.)
18099 		 */
18100 		if ((SD_IS_BUFIO(xp) == FALSE) &&
18101 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
18102 			request_blkno = err_blkno;
18103 		}
18104 	} else {
18105 		/*
18106 		 * Without the es_valid bit set (for fixed format) or an
18107 		 * information descriptor (for descriptor format) we cannot
18108 		 * be certain of the error blkno, so just use the
18109 		 * request_blkno.
18110 		 */
18111 		err_blkno = (diskaddr_t)request_blkno;
18112 	}
18113 
18114 	/*
18115 	 * The following will log the buffer contents for the release driver
18116 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
18117 	 * level is set to verbose.
18118 	 */
18119 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
18120 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
18121 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
18122 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
18123 
18124 	if (pfa_flag == FALSE) {
18125 		/* This is normally only set for USCSI */
18126 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
18127 			return;
18128 		}
18129 
18130 		if ((SD_IS_BUFIO(xp) == TRUE) &&
18131 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
18132 		    (severity < sd_error_level))) {
18133 			return;
18134 		}
18135 	}
18136 	/*
18137 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
18138 	 */
18139 	if ((SD_IS_LSI(un)) &&
18140 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
18141 	    (scsi_sense_asc(sensep) == 0x94) &&
18142 	    (scsi_sense_ascq(sensep) == 0x01)) {
18143 		un->un_sonoma_failure_count++;
18144 		if (un->un_sonoma_failure_count > 1) {
18145 			return;
18146 		}
18147 	}
18148 
18149 	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP ||
18150 	    ((scsi_sense_key(sensep) == KEY_RECOVERABLE_ERROR) &&
18151 	    (pktp->pkt_resid == 0))) {
18152 		scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
18153 		    request_blkno, err_blkno, scsi_cmds,
18154 		    (struct scsi_extended_sense *)sensep,
18155 		    un->un_additional_codes, NULL);
18156 	}
18157 }
18158 
18159 /*
18160  *    Function: sd_sense_key_no_sense
18161  *
18162  * Description: Recovery action when sense data was not received.
18163  *
18164  *     Context: May be called from interrupt context
18165  */
18166 
18167 static void
18168 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18169     struct scsi_pkt *pktp)
18170 {
18171 	struct sd_sense_info	si;
18172 
18173 	ASSERT(un != NULL);
18174 	ASSERT(mutex_owned(SD_MUTEX(un)));
18175 	ASSERT(bp != NULL);
18176 	ASSERT(xp != NULL);
18177 	ASSERT(pktp != NULL);
18178 
18179 	si.ssi_severity = SCSI_ERR_FATAL;
18180 	si.ssi_pfa_flag = FALSE;
18181 
18182 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
18183 
18184 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18185 	    &si, EIO, (clock_t)0, NULL);
18186 }
18187 
18188 
18189 /*
18190  *    Function: sd_sense_key_recoverable_error
18191  *
18192  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
18193  *
18194  *     Context: May be called from interrupt context
18195  */
18196 
18197 static void
18198 sd_sense_key_recoverable_error(struct sd_lun *un, uint8_t *sense_datap,
18199     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18200 {
18201 	struct sd_sense_info	si;
18202 	uint8_t asc = scsi_sense_asc(sense_datap);
18203 	uint8_t ascq = scsi_sense_ascq(sense_datap);
18204 
18205 	ASSERT(un != NULL);
18206 	ASSERT(mutex_owned(SD_MUTEX(un)));
18207 	ASSERT(bp != NULL);
18208 	ASSERT(xp != NULL);
18209 	ASSERT(pktp != NULL);
18210 
18211 	/*
18212 	 * 0x00, 0x1D: ATA PASSTHROUGH INFORMATION AVAILABLE
18213 	 */
18214 	if (asc == 0x00 && ascq == 0x1D) {
18215 		sd_return_command(un, bp);
18216 		return;
18217 	}
18218 
18219 	/*
18220 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
18221 	 */
18222 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
18223 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
18224 		si.ssi_severity = SCSI_ERR_INFO;
18225 		si.ssi_pfa_flag = TRUE;
18226 	} else {
18227 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
18228 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
18229 		si.ssi_severity = SCSI_ERR_RECOVERED;
18230 		si.ssi_pfa_flag = FALSE;
18231 	}
18232 
18233 	if (pktp->pkt_resid == 0) {
18234 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18235 		sd_return_command(un, bp);
18236 		return;
18237 	}
18238 
18239 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18240 	    &si, EIO, (clock_t)0, NULL);
18241 }
18242 
18243 
18244 
18245 
18246 /*
18247  *    Function: sd_sense_key_not_ready
18248  *
18249  * Description: Recovery actions for a SCSI "Not Ready" sense key.
18250  *
18251  *     Context: May be called from interrupt context
18252  */
18253 
18254 static void
18255 sd_sense_key_not_ready(struct sd_lun *un, uint8_t *sense_datap, struct buf *bp,
18256     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18257 {
18258 	struct sd_sense_info	si;
18259 	uint8_t asc = scsi_sense_asc(sense_datap);
18260 	uint8_t ascq = scsi_sense_ascq(sense_datap);
18261 
18262 	ASSERT(un != NULL);
18263 	ASSERT(mutex_owned(SD_MUTEX(un)));
18264 	ASSERT(bp != NULL);
18265 	ASSERT(xp != NULL);
18266 	ASSERT(pktp != NULL);
18267 
18268 	si.ssi_severity = SCSI_ERR_FATAL;
18269 	si.ssi_pfa_flag = FALSE;
18270 
18271 	/*
18272 	 * Update error stats after first NOT READY error. Disks may have
18273 	 * been powered down and may need to be restarted.  For CDROMs,
18274 	 * report NOT READY errors only if media is present.
18275 	 */
18276 	if ((ISCD(un) && (asc == 0x3A)) ||
18277 	    (xp->xb_nr_retry_count > 0)) {
18278 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18279 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
18280 	}
18281 
18282 	/*
18283 	 * Just fail if the "not ready" retry limit has been reached.
18284 	 */
18285 	if (xp->xb_nr_retry_count >= un->un_notready_retry_count) {
18286 		/* Special check for error message printing for removables. */
18287 		if (un->un_f_has_removable_media && (asc == 0x04) &&
18288 		    (ascq >= 0x04)) {
18289 			si.ssi_severity = SCSI_ERR_ALL;
18290 		}
18291 		goto fail_command;
18292 	}
18293 
18294 	/*
18295 	 * Check the ASC and ASCQ in the sense data as needed, to determine
18296 	 * what to do.
18297 	 */
18298 	switch (asc) {
18299 	case 0x04:	/* LOGICAL UNIT NOT READY */
18300 		/*
18301 		 * disk drives that don't spin up result in a very long delay
18302 		 * in format without warning messages. We will log a message
18303 		 * if the error level is set to verbose.
18304 		 */
18305 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18306 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18307 			    "logical unit not ready, resetting disk\n");
18308 		}
18309 
18310 		/*
18311 		 * There are different requirements for CDROMs and disks for
18312 		 * the number of retries.  If a CD-ROM is giving this, it is
18313 		 * probably reading TOC and is in the process of getting
18314 		 * ready, so we should keep on trying for a long time to make
18315 		 * sure that all types of media are taken in account (for
18316 		 * some media the drive takes a long time to read TOC).  For
18317 		 * disks we do not want to retry this too many times as this
18318 		 * can cause a long hang in format when the drive refuses to
18319 		 * spin up (a very common failure).
18320 		 */
18321 		switch (ascq) {
18322 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
18323 			/*
18324 			 * Disk drives frequently refuse to spin up which
18325 			 * results in a very long hang in format without
18326 			 * warning messages.
18327 			 *
18328 			 * Note: This code preserves the legacy behavior of
18329 			 * comparing xb_nr_retry_count against zero for fibre
18330 			 * channel targets instead of comparing against the
18331 			 * un_reset_retry_count value.  The reason for this
18332 			 * discrepancy has been so utterly lost beneath the
18333 			 * Sands of Time that even Indiana Jones could not
18334 			 * find it.
18335 			 */
18336 			if (un->un_f_is_fibre == TRUE) {
18337 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
18338 				    (xp->xb_nr_retry_count > 0)) &&
18339 				    (un->un_startstop_timeid == NULL)) {
18340 					scsi_log(SD_DEVINFO(un), sd_label,
18341 					    CE_WARN, "logical unit not ready, "
18342 					    "resetting disk\n");
18343 					sd_reset_target(un, pktp);
18344 				}
18345 			} else {
18346 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
18347 				    (xp->xb_nr_retry_count >
18348 				    un->un_reset_retry_count)) &&
18349 				    (un->un_startstop_timeid == NULL)) {
18350 					scsi_log(SD_DEVINFO(un), sd_label,
18351 					    CE_WARN, "logical unit not ready, "
18352 					    "resetting disk\n");
18353 					sd_reset_target(un, pktp);
18354 				}
18355 			}
18356 			break;
18357 
18358 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
18359 			/*
18360 			 * If the target is in the process of becoming
18361 			 * ready, just proceed with the retry. This can
18362 			 * happen with CD-ROMs that take a long time to
18363 			 * read TOC after a power cycle or reset.
18364 			 */
18365 			goto do_retry;
18366 
18367 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
18368 			break;
18369 
18370 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
18371 			/*
18372 			 * Retries cannot help here so just fail right away.
18373 			 */
18374 			goto fail_command;
18375 
18376 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
18377 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
18378 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
18379 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
18380 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
18381 		default:    /* Possible future codes in SCSI spec? */
18382 			/*
18383 			 * For removable-media devices, do not retry if
18384 			 * ASCQ > 2 as these result mostly from USCSI commands
18385 			 * on MMC devices issued to check status of an
18386 			 * operation initiated in immediate mode.  Also for
18387 			 * ASCQ >= 4 do not print console messages as these
18388 			 * mainly represent a user-initiated operation
18389 			 * instead of a system failure.
18390 			 */
18391 			if (un->un_f_has_removable_media) {
18392 				si.ssi_severity = SCSI_ERR_ALL;
18393 				goto fail_command;
18394 			}
18395 			break;
18396 		}
18397 
18398 		/*
18399 		 * As part of our recovery attempt for the NOT READY
18400 		 * condition, we issue a START STOP UNIT command. However
18401 		 * we want to wait for a short delay before attempting this
18402 		 * as there may still be more commands coming back from the
18403 		 * target with the check condition. To do this we use
18404 		 * timeout(9F) to call sd_start_stop_unit_callback() after
18405 		 * the delay interval expires. (sd_start_stop_unit_callback()
18406 		 * dispatches sd_start_stop_unit_task(), which will issue
18407 		 * the actual START STOP UNIT command. The delay interval
18408 		 * is one-half of the delay that we will use to retry the
18409 		 * command that generated the NOT READY condition.
18410 		 *
18411 		 * Note that we could just dispatch sd_start_stop_unit_task()
18412 		 * from here and allow it to sleep for the delay interval,
18413 		 * but then we would be tying up the taskq thread
18414 		 * uncesessarily for the duration of the delay.
18415 		 *
18416 		 * Do not issue the START STOP UNIT if the current command
18417 		 * is already a START STOP UNIT.
18418 		 */
18419 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
18420 			break;
18421 		}
18422 
18423 		/*
18424 		 * Do not schedule the timeout if one is already pending.
18425 		 */
18426 		if (un->un_startstop_timeid != NULL) {
18427 			SD_INFO(SD_LOG_ERROR, un,
18428 			    "sd_sense_key_not_ready: restart already issued to"
18429 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
18430 			    ddi_get_instance(SD_DEVINFO(un)));
18431 			break;
18432 		}
18433 
18434 		/*
18435 		 * Schedule the START STOP UNIT command, then queue the command
18436 		 * for a retry.
18437 		 *
18438 		 * Note: A timeout is not scheduled for this retry because we
18439 		 * want the retry to be serial with the START_STOP_UNIT. The
18440 		 * retry will be started when the START_STOP_UNIT is completed
18441 		 * in sd_start_stop_unit_task.
18442 		 */
18443 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
18444 		    un, un->un_busy_timeout / 2);
18445 		xp->xb_nr_retry_count++;
18446 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
18447 		return;
18448 
18449 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
18450 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18451 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18452 			    "unit does not respond to selection\n");
18453 		}
18454 		break;
18455 
18456 	case 0x3A:	/* MEDIUM NOT PRESENT */
18457 		if (sd_error_level >= SCSI_ERR_FATAL) {
18458 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18459 			    "Caddy not inserted in drive\n");
18460 		}
18461 
18462 		sr_ejected(un);
18463 		un->un_mediastate = DKIO_EJECTED;
18464 		/* The state has changed, inform the media watch routines */
18465 		cv_broadcast(&un->un_state_cv);
18466 		/* Just fail if no media is present in the drive. */
18467 		goto fail_command;
18468 
18469 	default:
18470 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18471 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
18472 			    "Unit not Ready. Additional sense code 0x%x\n",
18473 			    asc);
18474 		}
18475 		break;
18476 	}
18477 
18478 do_retry:
18479 
18480 	/*
18481 	 * Retry the command, as some targets may report NOT READY for
18482 	 * several seconds after being reset.
18483 	 */
18484 	xp->xb_nr_retry_count++;
18485 	si.ssi_severity = SCSI_ERR_RETRYABLE;
18486 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
18487 	    &si, EIO, un->un_busy_timeout, NULL);
18488 
18489 	return;
18490 
18491 fail_command:
18492 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18493 	sd_return_failed_command(un, bp, EIO);
18494 }
18495 
18496 
18497 
18498 /*
18499  *    Function: sd_sense_key_medium_or_hardware_error
18500  *
18501  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
18502  *		sense key.
18503  *
18504  *     Context: May be called from interrupt context
18505  */
18506 
18507 static void
18508 sd_sense_key_medium_or_hardware_error(struct sd_lun *un, uint8_t *sense_datap,
18509     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18510 {
18511 	struct sd_sense_info	si;
18512 	uint8_t sense_key = scsi_sense_key(sense_datap);
18513 	uint8_t asc = scsi_sense_asc(sense_datap);
18514 
18515 	ASSERT(un != NULL);
18516 	ASSERT(mutex_owned(SD_MUTEX(un)));
18517 	ASSERT(bp != NULL);
18518 	ASSERT(xp != NULL);
18519 	ASSERT(pktp != NULL);
18520 
18521 	si.ssi_severity = SCSI_ERR_FATAL;
18522 	si.ssi_pfa_flag = FALSE;
18523 
18524 	if (sense_key == KEY_MEDIUM_ERROR) {
18525 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
18526 	}
18527 
18528 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18529 
18530 	if ((un->un_reset_retry_count != 0) &&
18531 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
18532 		mutex_exit(SD_MUTEX(un));
18533 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
18534 		if (un->un_f_allow_bus_device_reset == TRUE) {
18535 
18536 			boolean_t try_resetting_target = B_TRUE;
18537 
18538 			/*
18539 			 * We need to be able to handle specific ASC when we are
18540 			 * handling a KEY_HARDWARE_ERROR. In particular
18541 			 * taking the default action of resetting the target may
18542 			 * not be the appropriate way to attempt recovery.
18543 			 * Resetting a target because of a single LUN failure
18544 			 * victimizes all LUNs on that target.
18545 			 *
18546 			 * This is true for the LSI arrays, if an LSI
18547 			 * array controller returns an ASC of 0x84 (LUN Dead) we
18548 			 * should trust it.
18549 			 */
18550 
18551 			if (sense_key == KEY_HARDWARE_ERROR) {
18552 				switch (asc) {
18553 				case 0x84:
18554 					if (SD_IS_LSI(un)) {
18555 						try_resetting_target = B_FALSE;
18556 					}
18557 					break;
18558 				default:
18559 					break;
18560 				}
18561 			}
18562 
18563 			if (try_resetting_target == B_TRUE) {
18564 				int reset_retval = 0;
18565 				if (un->un_f_lun_reset_enabled == TRUE) {
18566 					SD_TRACE(SD_LOG_IO_CORE, un,
18567 					    "sd_sense_key_medium_or_hardware_"
18568 					    "error: issuing RESET_LUN\n");
18569 					reset_retval =
18570 					    scsi_reset(SD_ADDRESS(un),
18571 					    RESET_LUN);
18572 				}
18573 				if (reset_retval == 0) {
18574 					SD_TRACE(SD_LOG_IO_CORE, un,
18575 					    "sd_sense_key_medium_or_hardware_"
18576 					    "error: issuing RESET_TARGET\n");
18577 					(void) scsi_reset(SD_ADDRESS(un),
18578 					    RESET_TARGET);
18579 				}
18580 			}
18581 		}
18582 		mutex_enter(SD_MUTEX(un));
18583 	}
18584 
18585 	/*
18586 	 * This really ought to be a fatal error, but we will retry anyway
18587 	 * as some drives report this as a spurious error.
18588 	 */
18589 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18590 	    &si, EIO, (clock_t)0, NULL);
18591 }
18592 
18593 
18594 
18595 /*
18596  *    Function: sd_sense_key_illegal_request
18597  *
18598  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
18599  *
18600  *     Context: May be called from interrupt context
18601  */
18602 
18603 static void
18604 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
18605     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18606 {
18607 	struct sd_sense_info	si;
18608 
18609 	ASSERT(un != NULL);
18610 	ASSERT(mutex_owned(SD_MUTEX(un)));
18611 	ASSERT(bp != NULL);
18612 	ASSERT(xp != NULL);
18613 	ASSERT(pktp != NULL);
18614 
18615 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
18616 
18617 	si.ssi_severity = SCSI_ERR_INFO;
18618 	si.ssi_pfa_flag = FALSE;
18619 
18620 	/* Pointless to retry if the target thinks it's an illegal request */
18621 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18622 	sd_return_failed_command(un, bp, EIO);
18623 }
18624 
18625 
18626 
18627 
18628 /*
18629  *    Function: sd_sense_key_unit_attention
18630  *
18631  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
18632  *
18633  *     Context: May be called from interrupt context
18634  */
18635 
18636 static void
18637 sd_sense_key_unit_attention(struct sd_lun *un, uint8_t *sense_datap,
18638     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18639 {
18640 	/*
18641 	 * For UNIT ATTENTION we allow retries for one minute. Devices
18642 	 * like Sonoma can return UNIT ATTENTION close to a minute
18643 	 * under certain conditions.
18644 	 */
18645 	int	retry_check_flag = SD_RETRIES_UA;
18646 	boolean_t	kstat_updated = B_FALSE;
18647 	struct	sd_sense_info		si;
18648 	uint8_t asc = scsi_sense_asc(sense_datap);
18649 	uint8_t	ascq = scsi_sense_ascq(sense_datap);
18650 
18651 	ASSERT(un != NULL);
18652 	ASSERT(mutex_owned(SD_MUTEX(un)));
18653 	ASSERT(bp != NULL);
18654 	ASSERT(xp != NULL);
18655 	ASSERT(pktp != NULL);
18656 
18657 	si.ssi_severity = SCSI_ERR_INFO;
18658 	si.ssi_pfa_flag = FALSE;
18659 
18660 
18661 	switch (asc) {
18662 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
18663 		if (sd_report_pfa != 0) {
18664 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
18665 			si.ssi_pfa_flag = TRUE;
18666 			retry_check_flag = SD_RETRIES_STANDARD;
18667 			goto do_retry;
18668 		}
18669 
18670 		break;
18671 
18672 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
18673 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
18674 			un->un_resvd_status |=
18675 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
18676 		}
18677 #ifdef _LP64
18678 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
18679 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
18680 			    un, KM_NOSLEEP) == TASKQID_INVALID) {
18681 				/*
18682 				 * If we can't dispatch the task we'll just
18683 				 * live without descriptor sense.  We can
18684 				 * try again on the next "unit attention"
18685 				 */
18686 				SD_ERROR(SD_LOG_ERROR, un,
18687 				    "sd_sense_key_unit_attention: "
18688 				    "Could not dispatch "
18689 				    "sd_reenable_dsense_task\n");
18690 			}
18691 		}
18692 #endif /* _LP64 */
18693 		/* FALLTHRU */
18694 
18695 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
18696 		if (!un->un_f_has_removable_media) {
18697 			break;
18698 		}
18699 
18700 		/*
18701 		 * When we get a unit attention from a removable-media device,
18702 		 * it may be in a state that will take a long time to recover
18703 		 * (e.g., from a reset).  Since we are executing in interrupt
18704 		 * context here, we cannot wait around for the device to come
18705 		 * back. So hand this command off to sd_media_change_task()
18706 		 * for deferred processing under taskq thread context. (Note
18707 		 * that the command still may be failed if a problem is
18708 		 * encountered at a later time.)
18709 		 */
18710 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
18711 		    KM_NOSLEEP) == TASKQID_INVALID) {
18712 			/*
18713 			 * Cannot dispatch the request so fail the command.
18714 			 */
18715 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
18716 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18717 			si.ssi_severity = SCSI_ERR_FATAL;
18718 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18719 			sd_return_failed_command(un, bp, EIO);
18720 		}
18721 
18722 		/*
18723 		 * If failed to dispatch sd_media_change_task(), we already
18724 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
18725 		 * we should update kstat later if it encounters an error. So,
18726 		 * we update kstat_updated flag here.
18727 		 */
18728 		kstat_updated = B_TRUE;
18729 
18730 		/*
18731 		 * Either the command has been successfully dispatched to a
18732 		 * task Q for retrying, or the dispatch failed. In either case
18733 		 * do NOT retry again by calling sd_retry_command. This sets up
18734 		 * two retries of the same command and when one completes and
18735 		 * frees the resources the other will access freed memory,
18736 		 * a bad thing.
18737 		 */
18738 		return;
18739 
18740 	default:
18741 		break;
18742 	}
18743 
18744 	/*
18745 	 * ASC  ASCQ
18746 	 *  2A   09	Capacity data has changed
18747 	 *  2A   01	Mode parameters changed
18748 	 *  3F   0E	Reported luns data has changed
18749 	 * Arrays that support logical unit expansion should report
18750 	 * capacity changes(2Ah/09). Mode parameters changed and
18751 	 * reported luns data has changed are the approximation.
18752 	 */
18753 	if (((asc == 0x2a) && (ascq == 0x09)) ||
18754 	    ((asc == 0x2a) && (ascq == 0x01)) ||
18755 	    ((asc == 0x3f) && (ascq == 0x0e))) {
18756 		if (taskq_dispatch(sd_tq, sd_target_change_task, un,
18757 		    KM_NOSLEEP) == TASKQID_INVALID) {
18758 			SD_ERROR(SD_LOG_ERROR, un,
18759 			    "sd_sense_key_unit_attention: "
18760 			    "Could not dispatch sd_target_change_task\n");
18761 		}
18762 	}
18763 
18764 	/*
18765 	 * Update kstat if we haven't done that.
18766 	 */
18767 	if (!kstat_updated) {
18768 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18769 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18770 	}
18771 
18772 do_retry:
18773 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
18774 	    EIO, SD_UA_RETRY_DELAY, NULL);
18775 }
18776 
18777 
18778 
18779 /*
18780  *    Function: sd_sense_key_fail_command
18781  *
18782  * Description: Use to fail a command when we don't like the sense key that
18783  *		was returned.
18784  *
18785  *     Context: May be called from interrupt context
18786  */
18787 
18788 static void
18789 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18790     struct scsi_pkt *pktp)
18791 {
18792 	struct sd_sense_info	si;
18793 
18794 	ASSERT(un != NULL);
18795 	ASSERT(mutex_owned(SD_MUTEX(un)));
18796 	ASSERT(bp != NULL);
18797 	ASSERT(xp != NULL);
18798 	ASSERT(pktp != NULL);
18799 
18800 	si.ssi_severity = SCSI_ERR_FATAL;
18801 	si.ssi_pfa_flag = FALSE;
18802 
18803 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18804 	sd_return_failed_command(un, bp, EIO);
18805 }
18806 
18807 
18808 
18809 /*
18810  *    Function: sd_sense_key_blank_check
18811  *
18812  * Description: Recovery actions for a SCSI "Blank Check" sense key.
18813  *		Has no monetary connotation.
18814  *
18815  *     Context: May be called from interrupt context
18816  */
18817 
18818 static void
18819 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18820     struct scsi_pkt *pktp)
18821 {
18822 	struct sd_sense_info	si;
18823 
18824 	ASSERT(un != NULL);
18825 	ASSERT(mutex_owned(SD_MUTEX(un)));
18826 	ASSERT(bp != NULL);
18827 	ASSERT(xp != NULL);
18828 	ASSERT(pktp != NULL);
18829 
18830 	/*
18831 	 * Blank check is not fatal for removable devices, therefore
18832 	 * it does not require a console message.
18833 	 */
18834 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18835 	    SCSI_ERR_FATAL;
18836 	si.ssi_pfa_flag = FALSE;
18837 
18838 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18839 	sd_return_failed_command(un, bp, EIO);
18840 }
18841 
18842 
18843 
18844 
18845 /*
18846  *    Function: sd_sense_key_aborted_command
18847  *
18848  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18849  *
18850  *     Context: May be called from interrupt context
18851  */
18852 
18853 static void
18854 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18855     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18856 {
18857 	struct sd_sense_info	si;
18858 
18859 	ASSERT(un != NULL);
18860 	ASSERT(mutex_owned(SD_MUTEX(un)));
18861 	ASSERT(bp != NULL);
18862 	ASSERT(xp != NULL);
18863 	ASSERT(pktp != NULL);
18864 
18865 	si.ssi_severity = SCSI_ERR_FATAL;
18866 	si.ssi_pfa_flag = FALSE;
18867 
18868 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18869 
18870 	/*
18871 	 * This really ought to be a fatal error, but we will retry anyway
18872 	 * as some drives report this as a spurious error.
18873 	 */
18874 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18875 	    &si, EIO, drv_usectohz(100000), NULL);
18876 }
18877 
18878 
18879 
18880 /*
18881  *    Function: sd_sense_key_default
18882  *
18883  * Description: Default recovery action for several SCSI sense keys (basically
18884  *		attempts a retry).
18885  *
18886  *     Context: May be called from interrupt context
18887  */
18888 
18889 static void
18890 sd_sense_key_default(struct sd_lun *un, uint8_t *sense_datap, struct buf *bp,
18891     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18892 {
18893 	struct sd_sense_info	si;
18894 	uint8_t sense_key = scsi_sense_key(sense_datap);
18895 
18896 	ASSERT(un != NULL);
18897 	ASSERT(mutex_owned(SD_MUTEX(un)));
18898 	ASSERT(bp != NULL);
18899 	ASSERT(xp != NULL);
18900 	ASSERT(pktp != NULL);
18901 
18902 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18903 
18904 	/*
18905 	 * Undecoded sense key.	Attempt retries and hope that will fix
18906 	 * the problem.  Otherwise, we're dead.
18907 	 */
18908 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18909 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18910 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18911 	}
18912 
18913 	si.ssi_severity = SCSI_ERR_FATAL;
18914 	si.ssi_pfa_flag = FALSE;
18915 
18916 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18917 	    &si, EIO, (clock_t)0, NULL);
18918 }
18919 
18920 
18921 
18922 /*
18923  *    Function: sd_print_retry_msg
18924  *
18925  * Description: Print a message indicating the retry action being taken.
18926  *
18927  *   Arguments: un - ptr to associated softstate
18928  *		bp - ptr to buf(9S) for the command
18929  *		arg - not used.
18930  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18931  *			or SD_NO_RETRY_ISSUED
18932  *
18933  *     Context: May be called from interrupt context
18934  */
18935 /* ARGSUSED */
18936 static void
18937 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18938 {
18939 	struct sd_xbuf	*xp;
18940 	struct scsi_pkt *pktp;
18941 	char *reasonp;
18942 	char *msgp;
18943 
18944 	ASSERT(un != NULL);
18945 	ASSERT(mutex_owned(SD_MUTEX(un)));
18946 	ASSERT(bp != NULL);
18947 	pktp = SD_GET_PKTP(bp);
18948 	ASSERT(pktp != NULL);
18949 	xp = SD_GET_XBUF(bp);
18950 	ASSERT(xp != NULL);
18951 
18952 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18953 	mutex_enter(&un->un_pm_mutex);
18954 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18955 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18956 	    (pktp->pkt_flags & FLAG_SILENT)) {
18957 		mutex_exit(&un->un_pm_mutex);
18958 		goto update_pkt_reason;
18959 	}
18960 	mutex_exit(&un->un_pm_mutex);
18961 
18962 	/*
18963 	 * Suppress messages if they are all the same pkt_reason; with
18964 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18965 	 * If we are in panic, then suppress the retry messages.
18966 	 */
18967 	switch (flag) {
18968 	case SD_NO_RETRY_ISSUED:
18969 		msgp = "giving up";
18970 		break;
18971 	case SD_IMMEDIATE_RETRY_ISSUED:
18972 	case SD_DELAYED_RETRY_ISSUED:
18973 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18974 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18975 		    (sd_error_level != SCSI_ERR_ALL))) {
18976 			return;
18977 		}
18978 		msgp = "retrying command";
18979 		break;
18980 	default:
18981 		goto update_pkt_reason;
18982 	}
18983 
18984 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18985 	    scsi_rname(pktp->pkt_reason));
18986 
18987 	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
18988 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18989 		    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18990 	}
18991 
18992 update_pkt_reason:
18993 	/*
18994 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18995 	 * This is to prevent multiple console messages for the same failure
18996 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18997 	 * when the command is retried successfully because there still may be
18998 	 * more commands coming back with the same value of pktp->pkt_reason.
18999 	 */
19000 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
19001 		un->un_last_pkt_reason = pktp->pkt_reason;
19002 	}
19003 }
19004 
19005 
19006 /*
19007  *    Function: sd_print_cmd_incomplete_msg
19008  *
19009  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
19010  *
19011  *   Arguments: un - ptr to associated softstate
19012  *		bp - ptr to buf(9S) for the command
19013  *		arg - passed to sd_print_retry_msg()
19014  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
19015  *			or SD_NO_RETRY_ISSUED
19016  *
19017  *     Context: May be called from interrupt context
19018  */
19019 
19020 static void
19021 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
19022     int code)
19023 {
19024 	dev_info_t	*dip;
19025 
19026 	ASSERT(un != NULL);
19027 	ASSERT(mutex_owned(SD_MUTEX(un)));
19028 	ASSERT(bp != NULL);
19029 
19030 	switch (code) {
19031 	case SD_NO_RETRY_ISSUED:
19032 		/* Command was failed. Someone turned off this target? */
19033 		if (un->un_state != SD_STATE_OFFLINE) {
19034 			/*
19035 			 * Suppress message if we are detaching and
19036 			 * device has been disconnected
19037 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
19038 			 * private interface and not part of the DDI
19039 			 */
19040 			dip = un->un_sd->sd_dev;
19041 			if (!(DEVI_IS_DETACHING(dip) &&
19042 			    DEVI_IS_DEVICE_REMOVED(dip))) {
19043 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
19044 				"disk not responding to selection\n");
19045 			}
19046 			New_state(un, SD_STATE_OFFLINE);
19047 		}
19048 		break;
19049 
19050 	case SD_DELAYED_RETRY_ISSUED:
19051 	case SD_IMMEDIATE_RETRY_ISSUED:
19052 	default:
19053 		/* Command was successfully queued for retry */
19054 		sd_print_retry_msg(un, bp, arg, code);
19055 		break;
19056 	}
19057 }
19058 
19059 
19060 /*
19061  *    Function: sd_pkt_reason_cmd_incomplete
19062  *
19063  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
19064  *
19065  *     Context: May be called from interrupt context
19066  */
19067 
19068 static void
19069 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
19070     struct sd_xbuf *xp, struct scsi_pkt *pktp)
19071 {
19072 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
19073 
19074 	ASSERT(un != NULL);
19075 	ASSERT(mutex_owned(SD_MUTEX(un)));
19076 	ASSERT(bp != NULL);
19077 	ASSERT(xp != NULL);
19078 	ASSERT(pktp != NULL);
19079 
19080 	/* Do not do a reset if selection did not complete */
19081 	/* Note: Should this not just check the bit? */
19082 	if (pktp->pkt_state != STATE_GOT_BUS) {
19083 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
19084 		sd_reset_target(un, pktp);
19085 	}
19086 
19087 	/*
19088 	 * If the target was not successfully selected, then set
19089 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
19090 	 * with the target, and further retries and/or commands are
19091 	 * likely to take a long time.
19092 	 */
19093 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
19094 		flag |= SD_RETRIES_FAILFAST;
19095 	}
19096 
19097 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19098 
19099 	sd_retry_command(un, bp, flag,
19100 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19101 }
19102 
19103 
19104 
19105 /*
19106  *    Function: sd_pkt_reason_cmd_tran_err
19107  *
19108  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
19109  *
19110  *     Context: May be called from interrupt context
19111  */
19112 
19113 static void
19114 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
19115     struct sd_xbuf *xp, struct scsi_pkt *pktp)
19116 {
19117 	ASSERT(un != NULL);
19118 	ASSERT(mutex_owned(SD_MUTEX(un)));
19119 	ASSERT(bp != NULL);
19120 	ASSERT(xp != NULL);
19121 	ASSERT(pktp != NULL);
19122 
19123 	/*
19124 	 * Do not reset if we got a parity error, or if
19125 	 * selection did not complete.
19126 	 */
19127 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
19128 	/* Note: Should this not just check the bit for pkt_state? */
19129 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
19130 	    (pktp->pkt_state != STATE_GOT_BUS)) {
19131 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
19132 		sd_reset_target(un, pktp);
19133 	}
19134 
19135 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19136 
19137 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
19138 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19139 }
19140 
19141 
19142 
19143 /*
19144  *    Function: sd_pkt_reason_cmd_reset
19145  *
19146  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
19147  *
19148  *     Context: May be called from interrupt context
19149  */
19150 
19151 static void
19152 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19153     struct scsi_pkt *pktp)
19154 {
19155 	ASSERT(un != NULL);
19156 	ASSERT(mutex_owned(SD_MUTEX(un)));
19157 	ASSERT(bp != NULL);
19158 	ASSERT(xp != NULL);
19159 	ASSERT(pktp != NULL);
19160 
19161 	/* The target may still be running the command, so try to reset. */
19162 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19163 	sd_reset_target(un, pktp);
19164 
19165 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19166 
19167 	/*
19168 	 * If pkt_reason is CMD_RESET chances are that this pkt got
19169 	 * reset because another target on this bus caused it. The target
19170 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
19171 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
19172 	 */
19173 
19174 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
19175 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19176 }
19177 
19178 
19179 
19180 
19181 /*
19182  *    Function: sd_pkt_reason_cmd_aborted
19183  *
19184  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
19185  *
19186  *     Context: May be called from interrupt context
19187  */
19188 
19189 static void
19190 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19191     struct scsi_pkt *pktp)
19192 {
19193 	ASSERT(un != NULL);
19194 	ASSERT(mutex_owned(SD_MUTEX(un)));
19195 	ASSERT(bp != NULL);
19196 	ASSERT(xp != NULL);
19197 	ASSERT(pktp != NULL);
19198 
19199 	/* The target may still be running the command, so try to reset. */
19200 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19201 	sd_reset_target(un, pktp);
19202 
19203 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19204 
19205 	/*
19206 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
19207 	 * aborted because another target on this bus caused it. The target
19208 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
19209 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
19210 	 */
19211 
19212 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
19213 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19214 }
19215 
19216 
19217 
19218 /*
19219  *    Function: sd_pkt_reason_cmd_timeout
19220  *
19221  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
19222  *
19223  *     Context: May be called from interrupt context
19224  */
19225 
19226 static void
19227 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19228     struct scsi_pkt *pktp)
19229 {
19230 	ASSERT(un != NULL);
19231 	ASSERT(mutex_owned(SD_MUTEX(un)));
19232 	ASSERT(bp != NULL);
19233 	ASSERT(xp != NULL);
19234 	ASSERT(pktp != NULL);
19235 
19236 
19237 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19238 	sd_reset_target(un, pktp);
19239 
19240 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19241 
19242 	/*
19243 	 * A command timeout indicates that we could not establish
19244 	 * communication with the target, so set SD_RETRIES_FAILFAST
19245 	 * as further retries/commands are likely to take a long time.
19246 	 */
19247 	sd_retry_command(un, bp,
19248 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
19249 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19250 }
19251 
19252 
19253 
19254 /*
19255  *    Function: sd_pkt_reason_cmd_unx_bus_free
19256  *
19257  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
19258  *
19259  *     Context: May be called from interrupt context
19260  */
19261 
19262 static void
19263 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
19264     struct sd_xbuf *xp, struct scsi_pkt *pktp)
19265 {
19266 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
19267 
19268 	ASSERT(un != NULL);
19269 	ASSERT(mutex_owned(SD_MUTEX(un)));
19270 	ASSERT(bp != NULL);
19271 	ASSERT(xp != NULL);
19272 	ASSERT(pktp != NULL);
19273 
19274 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
19275 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19276 
19277 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
19278 	    sd_print_retry_msg : NULL;
19279 
19280 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
19281 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19282 }
19283 
19284 
19285 /*
19286  *    Function: sd_pkt_reason_cmd_tag_reject
19287  *
19288  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
19289  *
19290  *     Context: May be called from interrupt context
19291  */
19292 
19293 static void
19294 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
19295     struct sd_xbuf *xp, struct scsi_pkt *pktp)
19296 {
19297 	ASSERT(un != NULL);
19298 	ASSERT(mutex_owned(SD_MUTEX(un)));
19299 	ASSERT(bp != NULL);
19300 	ASSERT(xp != NULL);
19301 	ASSERT(pktp != NULL);
19302 
19303 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
19304 	pktp->pkt_flags = 0;
19305 	un->un_tagflags = 0;
19306 	if (un->un_f_opt_queueing == TRUE) {
19307 		un->un_throttle = min(un->un_throttle, 3);
19308 	} else {
19309 		un->un_throttle = 1;
19310 	}
19311 	mutex_exit(SD_MUTEX(un));
19312 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
19313 	mutex_enter(SD_MUTEX(un));
19314 
19315 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19316 
19317 	/* Legacy behavior not to check retry counts here. */
19318 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
19319 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19320 }
19321 
19322 
19323 /*
19324  *    Function: sd_pkt_reason_default
19325  *
19326  * Description: Default recovery actions for SCSA pkt_reason values that
19327  *		do not have more explicit recovery actions.
19328  *
19329  *     Context: May be called from interrupt context
19330  */
19331 
19332 static void
19333 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19334     struct scsi_pkt *pktp)
19335 {
19336 	ASSERT(un != NULL);
19337 	ASSERT(mutex_owned(SD_MUTEX(un)));
19338 	ASSERT(bp != NULL);
19339 	ASSERT(xp != NULL);
19340 	ASSERT(pktp != NULL);
19341 
19342 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19343 	sd_reset_target(un, pktp);
19344 
19345 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19346 
19347 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
19348 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19349 }
19350 
19351 
19352 
19353 /*
19354  *    Function: sd_pkt_status_check_condition
19355  *
19356  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
19357  *
19358  *     Context: May be called from interrupt context
19359  */
19360 
19361 static void
19362 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
19363     struct sd_xbuf *xp, struct scsi_pkt *pktp)
19364 {
19365 	ASSERT(un != NULL);
19366 	ASSERT(mutex_owned(SD_MUTEX(un)));
19367 	ASSERT(bp != NULL);
19368 	ASSERT(xp != NULL);
19369 	ASSERT(pktp != NULL);
19370 
19371 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
19372 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
19373 
19374 	/*
19375 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
19376 	 * command will be retried after the request sense). Otherwise, retry
19377 	 * the command. Note: we are issuing the request sense even though the
19378 	 * retry limit may have been reached for the failed command.
19379 	 */
19380 	if (un->un_f_arq_enabled == FALSE) {
19381 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
19382 		    "no ARQ, sending request sense command\n");
19383 		sd_send_request_sense_command(un, bp, pktp);
19384 	} else {
19385 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
19386 		    "ARQ,retrying request sense command\n");
19387 #if defined(__x86)
19388 		/*
19389 		 * The SD_RETRY_DELAY value need to be adjusted here
19390 		 * when SD_RETRY_DELAY change in sddef.h
19391 		 */
19392 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
19393 		    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
19394 		    NULL);
19395 #else
19396 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
19397 		    EIO, SD_RETRY_DELAY, NULL);
19398 #endif
19399 	}
19400 
19401 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
19402 }
19403 
19404 
19405 /*
19406  *    Function: sd_pkt_status_busy
19407  *
19408  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
19409  *
19410  *     Context: May be called from interrupt context
19411  */
19412 
19413 static void
19414 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19415     struct scsi_pkt *pktp)
19416 {
19417 	ASSERT(un != NULL);
19418 	ASSERT(mutex_owned(SD_MUTEX(un)));
19419 	ASSERT(bp != NULL);
19420 	ASSERT(xp != NULL);
19421 	ASSERT(pktp != NULL);
19422 
19423 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19424 	    "sd_pkt_status_busy: entry\n");
19425 
19426 	/* If retries are exhausted, just fail the command. */
19427 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
19428 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
19429 		    "device busy too long\n");
19430 		sd_return_failed_command(un, bp, EIO);
19431 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19432 		    "sd_pkt_status_busy: exit\n");
19433 		return;
19434 	}
19435 	xp->xb_retry_count++;
19436 
19437 	/*
19438 	 * Try to reset the target. However, we do not want to perform
19439 	 * more than one reset if the device continues to fail. The reset
19440 	 * will be performed when the retry count reaches the reset
19441 	 * threshold.  This threshold should be set such that at least
19442 	 * one retry is issued before the reset is performed.
19443 	 */
19444 	if (xp->xb_retry_count ==
19445 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
19446 		int rval = 0;
19447 		mutex_exit(SD_MUTEX(un));
19448 		if (un->un_f_allow_bus_device_reset == TRUE) {
19449 			/*
19450 			 * First try to reset the LUN; if we cannot then
19451 			 * try to reset the target.
19452 			 */
19453 			if (un->un_f_lun_reset_enabled == TRUE) {
19454 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19455 				    "sd_pkt_status_busy: RESET_LUN\n");
19456 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
19457 			}
19458 			if (rval == 0) {
19459 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19460 				    "sd_pkt_status_busy: RESET_TARGET\n");
19461 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
19462 			}
19463 		}
19464 		if (rval == 0) {
19465 			/*
19466 			 * If the RESET_LUN and/or RESET_TARGET failed,
19467 			 * try RESET_ALL
19468 			 */
19469 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19470 			    "sd_pkt_status_busy: RESET_ALL\n");
19471 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
19472 		}
19473 		mutex_enter(SD_MUTEX(un));
19474 		if (rval == 0) {
19475 			/*
19476 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
19477 			 * At this point we give up & fail the command.
19478 			 */
19479 			sd_return_failed_command(un, bp, EIO);
19480 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19481 			    "sd_pkt_status_busy: exit (failed cmd)\n");
19482 			return;
19483 		}
19484 	}
19485 
19486 	/*
19487 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
19488 	 * we have already checked the retry counts above.
19489 	 */
19490 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
19491 	    EIO, un->un_busy_timeout, NULL);
19492 
19493 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19494 	    "sd_pkt_status_busy: exit\n");
19495 }
19496 
19497 
19498 /*
19499  *    Function: sd_pkt_status_reservation_conflict
19500  *
19501  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
19502  *		command status.
19503  *
19504  *     Context: May be called from interrupt context
19505  */
19506 
19507 static void
19508 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
19509     struct sd_xbuf *xp, struct scsi_pkt *pktp)
19510 {
19511 	ASSERT(un != NULL);
19512 	ASSERT(mutex_owned(SD_MUTEX(un)));
19513 	ASSERT(bp != NULL);
19514 	ASSERT(xp != NULL);
19515 	ASSERT(pktp != NULL);
19516 
19517 	/*
19518 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
19519 	 * conflict could be due to various reasons like incorrect keys, not
19520 	 * registered or not reserved etc. So, we return EACCES to the caller.
19521 	 */
19522 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
19523 		int cmd = SD_GET_PKT_OPCODE(pktp);
19524 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
19525 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
19526 			sd_return_failed_command(un, bp, EACCES);
19527 			return;
19528 		}
19529 	}
19530 
19531 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
19532 
19533 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
19534 		if (sd_failfast_enable != 0) {
19535 			/* By definition, we must panic here.... */
19536 			sd_panic_for_res_conflict(un);
19537 			/*NOTREACHED*/
19538 		}
19539 		SD_ERROR(SD_LOG_IO, un,
19540 		    "sd_handle_resv_conflict: Disk Reserved\n");
19541 		sd_return_failed_command(un, bp, EACCES);
19542 		return;
19543 	}
19544 
19545 	/*
19546 	 * 1147670: retry only if sd_retry_on_reservation_conflict
19547 	 * property is set (default is 1). Retries will not succeed
19548 	 * on a disk reserved by another initiator. HA systems
19549 	 * may reset this via sd.conf to avoid these retries.
19550 	 *
19551 	 * Note: The legacy return code for this failure is EIO, however EACCES
19552 	 * seems more appropriate for a reservation conflict.
19553 	 */
19554 	if (sd_retry_on_reservation_conflict == 0) {
19555 		SD_ERROR(SD_LOG_IO, un,
19556 		    "sd_handle_resv_conflict: Device Reserved\n");
19557 		sd_return_failed_command(un, bp, EIO);
19558 		return;
19559 	}
19560 
19561 	/*
19562 	 * Retry the command if we can.
19563 	 *
19564 	 * Note: The legacy return code for this failure is EIO, however EACCES
19565 	 * seems more appropriate for a reservation conflict.
19566 	 */
19567 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
19568 	    (clock_t)2, NULL);
19569 }
19570 
19571 
19572 
19573 /*
19574  *    Function: sd_pkt_status_qfull
19575  *
19576  * Description: Handle a QUEUE FULL condition from the target.  This can
19577  *		occur if the HBA does not handle the queue full condition.
19578  *		(Basically this means third-party HBAs as Sun HBAs will
19579  *		handle the queue full condition.)  Note that if there are
19580  *		some commands already in the transport, then the queue full
19581  *		has occurred because the queue for this nexus is actually
19582  *		full. If there are no commands in the transport, then the
19583  *		queue full is resulting from some other initiator or lun
19584  *		consuming all the resources at the target.
19585  *
19586  *     Context: May be called from interrupt context
19587  */
19588 
19589 static void
19590 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19591     struct scsi_pkt *pktp)
19592 {
19593 	ASSERT(un != NULL);
19594 	ASSERT(mutex_owned(SD_MUTEX(un)));
19595 	ASSERT(bp != NULL);
19596 	ASSERT(xp != NULL);
19597 	ASSERT(pktp != NULL);
19598 
19599 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19600 	    "sd_pkt_status_qfull: entry\n");
19601 
19602 	/*
19603 	 * Just lower the QFULL throttle and retry the command.  Note that
19604 	 * we do not limit the number of retries here.
19605 	 */
19606 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
19607 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
19608 	    SD_RESTART_TIMEOUT, NULL);
19609 
19610 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19611 	    "sd_pkt_status_qfull: exit\n");
19612 }
19613 
19614 
19615 /*
19616  *    Function: sd_reset_target
19617  *
19618  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
19619  *		RESET_TARGET, or RESET_ALL.
19620  *
19621  *     Context: May be called under interrupt context.
19622  */
19623 
19624 static void
19625 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
19626 {
19627 	int rval = 0;
19628 
19629 	ASSERT(un != NULL);
19630 	ASSERT(mutex_owned(SD_MUTEX(un)));
19631 	ASSERT(pktp != NULL);
19632 
19633 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
19634 
19635 	/*
19636 	 * No need to reset if the transport layer has already done so.
19637 	 */
19638 	if ((pktp->pkt_statistics &
19639 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
19640 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19641 		    "sd_reset_target: no reset\n");
19642 		return;
19643 	}
19644 
19645 	mutex_exit(SD_MUTEX(un));
19646 
19647 	if (un->un_f_allow_bus_device_reset == TRUE) {
19648 		if (un->un_f_lun_reset_enabled == TRUE) {
19649 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19650 			    "sd_reset_target: RESET_LUN\n");
19651 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
19652 		}
19653 		if (rval == 0) {
19654 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19655 			    "sd_reset_target: RESET_TARGET\n");
19656 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
19657 		}
19658 	}
19659 
19660 	if (rval == 0) {
19661 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19662 		    "sd_reset_target: RESET_ALL\n");
19663 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
19664 	}
19665 
19666 	mutex_enter(SD_MUTEX(un));
19667 
19668 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
19669 }
19670 
19671 /*
19672  *    Function: sd_target_change_task
19673  *
19674  * Description: Handle dynamic target change
19675  *
19676  *     Context: Executes in a taskq() thread context
19677  */
19678 static void
19679 sd_target_change_task(void *arg)
19680 {
19681 	struct sd_lun		*un = arg;
19682 	uint64_t		capacity;
19683 	diskaddr_t		label_cap;
19684 	uint_t			lbasize;
19685 	sd_ssc_t		*ssc;
19686 
19687 	ASSERT(un != NULL);
19688 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19689 
19690 	if ((un->un_f_blockcount_is_valid == FALSE) ||
19691 	    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
19692 		return;
19693 	}
19694 
19695 	ssc = sd_ssc_init(un);
19696 
19697 	if (sd_send_scsi_READ_CAPACITY(ssc, &capacity,
19698 	    &lbasize, SD_PATH_DIRECT) != 0) {
19699 		SD_ERROR(SD_LOG_ERROR, un,
19700 		    "sd_target_change_task: fail to read capacity\n");
19701 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19702 		goto task_exit;
19703 	}
19704 
19705 	mutex_enter(SD_MUTEX(un));
19706 	if (capacity <= un->un_blockcount) {
19707 		mutex_exit(SD_MUTEX(un));
19708 		goto task_exit;
19709 	}
19710 
19711 	sd_update_block_info(un, lbasize, capacity);
19712 	mutex_exit(SD_MUTEX(un));
19713 
19714 	/*
19715 	 * If lun is EFI labeled and lun capacity is greater than the
19716 	 * capacity contained in the label, log a sys event.
19717 	 */
19718 	if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
19719 	    (void*)SD_PATH_DIRECT) == 0) {
19720 		mutex_enter(SD_MUTEX(un));
19721 		if (un->un_f_blockcount_is_valid &&
19722 		    un->un_blockcount > label_cap) {
19723 			mutex_exit(SD_MUTEX(un));
19724 			sd_log_lun_expansion_event(un, KM_SLEEP);
19725 		} else {
19726 			mutex_exit(SD_MUTEX(un));
19727 		}
19728 	}
19729 
19730 task_exit:
19731 	sd_ssc_fini(ssc);
19732 }
19733 
19734 
19735 /*
19736  *    Function: sd_log_dev_status_event
19737  *
19738  * Description: Log EC_dev_status sysevent
19739  *
19740  *     Context: Never called from interrupt context
19741  */
19742 static void
19743 sd_log_dev_status_event(struct sd_lun *un, char *esc, int km_flag)
19744 {
19745 	int err;
19746 	char			*path;
19747 	nvlist_t		*attr_list;
19748 
19749 	/* Allocate and build sysevent attribute list */
19750 	err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, km_flag);
19751 	if (err != 0) {
19752 		SD_ERROR(SD_LOG_ERROR, un,
19753 		    "sd_log_dev_status_event: fail to allocate space\n");
19754 		return;
19755 	}
19756 
19757 	path = kmem_alloc(MAXPATHLEN, km_flag);
19758 	if (path == NULL) {
19759 		nvlist_free(attr_list);
19760 		SD_ERROR(SD_LOG_ERROR, un,
19761 		    "sd_log_dev_status_event: fail to allocate space\n");
19762 		return;
19763 	}
19764 	/*
19765 	 * Add path attribute to identify the lun.
19766 	 * We are using minor node 'a' as the sysevent attribute.
19767 	 */
19768 	(void) snprintf(path, MAXPATHLEN, "/devices");
19769 	(void) ddi_pathname(SD_DEVINFO(un), path + strlen(path));
19770 	(void) snprintf(path + strlen(path), MAXPATHLEN - strlen(path),
19771 	    ":a");
19772 
19773 	err = nvlist_add_string(attr_list, DEV_PHYS_PATH, path);
19774 	if (err != 0) {
19775 		nvlist_free(attr_list);
19776 		kmem_free(path, MAXPATHLEN);
19777 		SD_ERROR(SD_LOG_ERROR, un,
19778 		    "sd_log_dev_status_event: fail to add attribute\n");
19779 		return;
19780 	}
19781 
19782 	/* Log dynamic lun expansion sysevent */
19783 	err = ddi_log_sysevent(SD_DEVINFO(un), SUNW_VENDOR, EC_DEV_STATUS,
19784 	    esc, attr_list, NULL, km_flag);
19785 	if (err != DDI_SUCCESS) {
19786 		SD_ERROR(SD_LOG_ERROR, un,
19787 		    "sd_log_dev_status_event: fail to log sysevent\n");
19788 	}
19789 
19790 	nvlist_free(attr_list);
19791 	kmem_free(path, MAXPATHLEN);
19792 }
19793 
19794 
19795 /*
19796  *    Function: sd_log_lun_expansion_event
19797  *
19798  * Description: Log lun expansion sys event
19799  *
19800  *     Context: Never called from interrupt context
19801  */
19802 static void
19803 sd_log_lun_expansion_event(struct sd_lun *un, int km_flag)
19804 {
19805 	sd_log_dev_status_event(un, ESC_DEV_DLE, km_flag);
19806 }
19807 
19808 
19809 /*
19810  *    Function: sd_log_eject_request_event
19811  *
19812  * Description: Log eject request sysevent
19813  *
19814  *     Context: Never called from interrupt context
19815  */
19816 static void
19817 sd_log_eject_request_event(struct sd_lun *un, int km_flag)
19818 {
19819 	sd_log_dev_status_event(un, ESC_DEV_EJECT_REQUEST, km_flag);
19820 }
19821 
19822 
19823 /*
19824  *    Function: sd_media_change_task
19825  *
19826  * Description: Recovery action for CDROM to become available.
19827  *
19828  *     Context: Executes in a taskq() thread context
19829  */
19830 
19831 static void
19832 sd_media_change_task(void *arg)
19833 {
19834 	struct	scsi_pkt	*pktp = arg;
19835 	struct	sd_lun		*un;
19836 	struct	buf		*bp;
19837 	struct	sd_xbuf		*xp;
19838 	int	err		= 0;
19839 	int	retry_count	= 0;
19840 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
19841 	struct	sd_sense_info	si;
19842 
19843 	ASSERT(pktp != NULL);
19844 	bp = (struct buf *)pktp->pkt_private;
19845 	ASSERT(bp != NULL);
19846 	xp = SD_GET_XBUF(bp);
19847 	ASSERT(xp != NULL);
19848 	un = SD_GET_UN(bp);
19849 	ASSERT(un != NULL);
19850 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19851 	ASSERT(un->un_f_monitor_media_state);
19852 
19853 	si.ssi_severity = SCSI_ERR_INFO;
19854 	si.ssi_pfa_flag = FALSE;
19855 
19856 	/*
19857 	 * When a reset is issued on a CDROM, it takes a long time to
19858 	 * recover. First few attempts to read capacity and other things
19859 	 * related to handling unit attention fail (with a ASC 0x4 and
19860 	 * ASCQ 0x1). In that case we want to do enough retries and we want
19861 	 * to limit the retries in other cases of genuine failures like
19862 	 * no media in drive.
19863 	 */
19864 	while (retry_count++ < retry_limit) {
19865 		if ((err = sd_handle_mchange(un)) == 0) {
19866 			break;
19867 		}
19868 		if (err == EAGAIN) {
19869 			retry_limit = SD_UNIT_ATTENTION_RETRY;
19870 		}
19871 		/* Sleep for 0.5 sec. & try again */
19872 		delay(drv_usectohz(500000));
19873 	}
19874 
19875 	/*
19876 	 * Dispatch (retry or fail) the original command here,
19877 	 * along with appropriate console messages....
19878 	 *
19879 	 * Must grab the mutex before calling sd_retry_command,
19880 	 * sd_print_sense_msg and sd_return_failed_command.
19881 	 */
19882 	mutex_enter(SD_MUTEX(un));
19883 	if (err != SD_CMD_SUCCESS) {
19884 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
19885 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
19886 		si.ssi_severity = SCSI_ERR_FATAL;
19887 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
19888 		sd_return_failed_command(un, bp, EIO);
19889 	} else {
19890 		sd_retry_command(un, bp, SD_RETRIES_UA, sd_print_sense_msg,
19891 		    &si, EIO, (clock_t)0, NULL);
19892 	}
19893 	mutex_exit(SD_MUTEX(un));
19894 }
19895 
19896 
19897 
19898 /*
19899  *    Function: sd_handle_mchange
19900  *
19901  * Description: Perform geometry validation & other recovery when CDROM
19902  *		has been removed from drive.
19903  *
19904  * Return Code: 0 for success
19905  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
19906  *		sd_send_scsi_READ_CAPACITY()
19907  *
19908  *     Context: Executes in a taskq() thread context
19909  */
19910 
19911 static int
19912 sd_handle_mchange(struct sd_lun *un)
19913 {
19914 	uint64_t	capacity;
19915 	uint32_t	lbasize;
19916 	int		rval;
19917 	sd_ssc_t	*ssc;
19918 
19919 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19920 	ASSERT(un->un_f_monitor_media_state);
19921 
19922 	ssc = sd_ssc_init(un);
19923 	rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
19924 	    SD_PATH_DIRECT_PRIORITY);
19925 
19926 	if (rval != 0)
19927 		goto failed;
19928 
19929 	mutex_enter(SD_MUTEX(un));
19930 	sd_update_block_info(un, lbasize, capacity);
19931 
19932 	if (un->un_errstats != NULL) {
19933 		struct	sd_errstats *stp =
19934 		    (struct sd_errstats *)un->un_errstats->ks_data;
19935 		stp->sd_capacity.value.ui64 = (uint64_t)
19936 		    ((uint64_t)un->un_blockcount *
19937 		    (uint64_t)un->un_tgt_blocksize);
19938 	}
19939 
19940 	/*
19941 	 * Check if the media in the device is writable or not
19942 	 */
19943 	if (ISCD(un)) {
19944 		sd_check_for_writable_cd(ssc, SD_PATH_DIRECT_PRIORITY);
19945 	}
19946 
19947 	/*
19948 	 * Note: Maybe let the strategy/partitioning chain worry about getting
19949 	 * valid geometry.
19950 	 */
19951 	mutex_exit(SD_MUTEX(un));
19952 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
19953 
19954 
19955 	if (cmlb_validate(un->un_cmlbhandle, 0,
19956 	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
19957 		sd_ssc_fini(ssc);
19958 		return (EIO);
19959 	} else {
19960 		if (un->un_f_pkstats_enabled) {
19961 			sd_set_pstats(un);
19962 			SD_TRACE(SD_LOG_IO_PARTITION, un,
19963 			    "sd_handle_mchange: un:0x%p pstats created and "
19964 			    "set\n", un);
19965 		}
19966 	}
19967 
19968 	/*
19969 	 * Try to lock the door
19970 	 */
19971 	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
19972 	    SD_PATH_DIRECT_PRIORITY);
19973 failed:
19974 	if (rval != 0)
19975 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19976 	sd_ssc_fini(ssc);
19977 	return (rval);
19978 }
19979 
19980 
19981 /*
19982  *    Function: sd_send_scsi_DOORLOCK
19983  *
19984  * Description: Issue the scsi DOOR LOCK command
19985  *
19986  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19987  *                      structure for this target.
19988  *		flag  - SD_REMOVAL_ALLOW
19989  *			SD_REMOVAL_PREVENT
19990  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19991  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19992  *			to use the USCSI "direct" chain and bypass the normal
19993  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19994  *			command is issued as part of an error recovery action.
19995  *
19996  * Return Code: 0   - Success
19997  *		errno return code from sd_ssc_send()
19998  *
19999  *     Context: Can sleep.
20000  */
20001 
20002 static int
20003 sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag)
20004 {
20005 	struct scsi_extended_sense	sense_buf;
20006 	union scsi_cdb		cdb;
20007 	struct uscsi_cmd	ucmd_buf;
20008 	int			status;
20009 	struct sd_lun		*un;
20010 
20011 	ASSERT(ssc != NULL);
20012 	un = ssc->ssc_un;
20013 	ASSERT(un != NULL);
20014 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20015 
20016 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
20017 
20018 	/* already determined doorlock is not supported, fake success */
20019 	if (un->un_f_doorlock_supported == FALSE) {
20020 		return (0);
20021 	}
20022 
20023 	/*
20024 	 * If we are ejecting and see an SD_REMOVAL_PREVENT
20025 	 * ignore the command so we can complete the eject
20026 	 * operation.
20027 	 */
20028 	if (flag == SD_REMOVAL_PREVENT) {
20029 		mutex_enter(SD_MUTEX(un));
20030 		if (un->un_f_ejecting == TRUE) {
20031 			mutex_exit(SD_MUTEX(un));
20032 			return (EAGAIN);
20033 		}
20034 		mutex_exit(SD_MUTEX(un));
20035 	}
20036 
20037 	bzero(&cdb, sizeof (cdb));
20038 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20039 
20040 	cdb.scc_cmd = SCMD_DOORLOCK;
20041 	cdb.cdb_opaque[4] = (uchar_t)flag;
20042 
20043 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20044 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20045 	ucmd_buf.uscsi_bufaddr	= NULL;
20046 	ucmd_buf.uscsi_buflen	= 0;
20047 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20048 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
20049 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20050 	ucmd_buf.uscsi_timeout	= 15;
20051 
20052 	SD_TRACE(SD_LOG_IO, un,
20053 	    "sd_send_scsi_DOORLOCK: returning sd_ssc_send\n");
20054 
20055 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20056 	    UIO_SYSSPACE, path_flag);
20057 
20058 	if (status == 0)
20059 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20060 
20061 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
20062 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20063 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
20064 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
20065 
20066 		/* fake success and skip subsequent doorlock commands */
20067 		un->un_f_doorlock_supported = FALSE;
20068 		return (0);
20069 	}
20070 
20071 	return (status);
20072 }
20073 
20074 /*
20075  *    Function: sd_send_scsi_READ_CAPACITY
20076  *
20077  * Description: This routine uses the scsi READ CAPACITY command to determine
20078  *		the device capacity in number of blocks and the device native
20079  *		block size. If this function returns a failure, then the
20080  *		values in *capp and *lbap are undefined.  If the capacity
20081  *		returned is 0xffffffff then the lun is too large for a
20082  *		normal READ CAPACITY command and the results of a
20083  *		READ CAPACITY 16 will be used instead.
20084  *
20085  *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
20086  *		capp - ptr to unsigned 64-bit variable to receive the
20087  *			capacity value from the command.
20088  *		lbap - ptr to unsigned 32-bit varaible to receive the
20089  *			block size value from the command
20090  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20091  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20092  *			to use the USCSI "direct" chain and bypass the normal
20093  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
20094  *			command is issued as part of an error recovery action.
20095  *
20096  * Return Code: 0   - Success
20097  *		EIO - IO error
20098  *		EACCES - Reservation conflict detected
20099  *		EAGAIN - Device is becoming ready
20100  *		errno return code from sd_ssc_send()
20101  *
20102  *     Context: Can sleep.  Blocks until command completes.
20103  */
20104 
20105 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
20106 
20107 static int
20108 sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
20109     int path_flag)
20110 {
20111 	struct	scsi_extended_sense	sense_buf;
20112 	struct	uscsi_cmd	ucmd_buf;
20113 	union	scsi_cdb	cdb;
20114 	uint32_t		*capacity_buf;
20115 	uint64_t		capacity;
20116 	uint32_t		lbasize;
20117 	uint32_t		pbsize;
20118 	int			status;
20119 	struct sd_lun		*un;
20120 
20121 	ASSERT(ssc != NULL);
20122 
20123 	un = ssc->ssc_un;
20124 	ASSERT(un != NULL);
20125 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20126 	ASSERT(capp != NULL);
20127 	ASSERT(lbap != NULL);
20128 
20129 	SD_TRACE(SD_LOG_IO, un,
20130 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
20131 
20132 	/*
20133 	 * First send a READ_CAPACITY command to the target.
20134 	 * (This command is mandatory under SCSI-2.)
20135 	 *
20136 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
20137 	 * Medium Indicator bit is cleared.  The address field must be
20138 	 * zero if the PMI bit is zero.
20139 	 */
20140 	bzero(&cdb, sizeof (cdb));
20141 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20142 
20143 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
20144 
20145 	cdb.scc_cmd = SCMD_READ_CAPACITY;
20146 
20147 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20148 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20149 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
20150 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
20151 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20152 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
20153 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20154 	ucmd_buf.uscsi_timeout	= 60;
20155 
20156 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20157 	    UIO_SYSSPACE, path_flag);
20158 
20159 	switch (status) {
20160 	case 0:
20161 		/* Return failure if we did not get valid capacity data. */
20162 		if (ucmd_buf.uscsi_resid != 0) {
20163 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20164 			    "sd_send_scsi_READ_CAPACITY received invalid "
20165 			    "capacity data");
20166 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20167 			return (EIO);
20168 		}
20169 		/*
20170 		 * Read capacity and block size from the READ CAPACITY 10 data.
20171 		 * This data may be adjusted later due to device specific
20172 		 * issues.
20173 		 *
20174 		 * According to the SCSI spec, the READ CAPACITY 10
20175 		 * command returns the following:
20176 		 *
20177 		 *  bytes 0-3: Maximum logical block address available.
20178 		 *		(MSB in byte:0 & LSB in byte:3)
20179 		 *
20180 		 *  bytes 4-7: Block length in bytes
20181 		 *		(MSB in byte:4 & LSB in byte:7)
20182 		 *
20183 		 */
20184 		capacity = BE_32(capacity_buf[0]);
20185 		lbasize = BE_32(capacity_buf[1]);
20186 
20187 		/*
20188 		 * Done with capacity_buf
20189 		 */
20190 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20191 
20192 		/*
20193 		 * if the reported capacity is set to all 0xf's, then
20194 		 * this disk is too large and requires SBC-2 commands.
20195 		 * Reissue the request using READ CAPACITY 16.
20196 		 */
20197 		if (capacity == 0xffffffff) {
20198 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
20199 			status = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity,
20200 			    &lbasize, &pbsize, path_flag);
20201 			if (status != 0) {
20202 				return (status);
20203 			} else {
20204 				goto rc16_done;
20205 			}
20206 		}
20207 		break;	/* Success! */
20208 	case EIO:
20209 		switch (ucmd_buf.uscsi_status) {
20210 		case STATUS_RESERVATION_CONFLICT:
20211 			status = EACCES;
20212 			break;
20213 		case STATUS_CHECK:
20214 			/*
20215 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
20216 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
20217 			 */
20218 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20219 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
20220 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
20221 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20222 				return (EAGAIN);
20223 			}
20224 			break;
20225 		default:
20226 			break;
20227 		}
20228 		/* FALLTHRU */
20229 	default:
20230 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20231 		return (status);
20232 	}
20233 
20234 	/*
20235 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
20236 	 * (2352 and 0 are common) so for these devices always force the value
20237 	 * to 2048 as required by the ATAPI specs.
20238 	 */
20239 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
20240 		lbasize = 2048;
20241 	}
20242 
20243 	/*
20244 	 * Get the maximum LBA value from the READ CAPACITY data.
20245 	 * Here we assume that the Partial Medium Indicator (PMI) bit
20246 	 * was cleared when issuing the command. This means that the LBA
20247 	 * returned from the device is the LBA of the last logical block
20248 	 * on the logical unit.  The actual logical block count will be
20249 	 * this value plus one.
20250 	 */
20251 	capacity += 1;
20252 
20253 	/*
20254 	 * Currently, for removable media, the capacity is saved in terms
20255 	 * of un->un_sys_blocksize, so scale the capacity value to reflect this.
20256 	 */
20257 	if (un->un_f_has_removable_media)
20258 		capacity *= (lbasize / un->un_sys_blocksize);
20259 
20260 rc16_done:
20261 
20262 	/*
20263 	 * Copy the values from the READ CAPACITY command into the space
20264 	 * provided by the caller.
20265 	 */
20266 	*capp = capacity;
20267 	*lbap = lbasize;
20268 
20269 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
20270 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
20271 
20272 	/*
20273 	 * Both the lbasize and capacity from the device must be nonzero,
20274 	 * otherwise we assume that the values are not valid and return
20275 	 * failure to the caller. (4203735)
20276 	 */
20277 	if ((capacity == 0) || (lbasize == 0)) {
20278 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20279 		    "sd_send_scsi_READ_CAPACITY received invalid value "
20280 		    "capacity %llu lbasize %d", capacity, lbasize);
20281 		return (EIO);
20282 	}
20283 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20284 	return (0);
20285 }
20286 
20287 /*
20288  *    Function: sd_send_scsi_READ_CAPACITY_16
20289  *
20290  * Description: This routine uses the scsi READ CAPACITY 16 command to
20291  *		determine the device capacity in number of blocks and the
20292  *		device native block size.  If this function returns a failure,
20293  *		then the values in *capp and *lbap are undefined.
20294  *		This routine should be called by sd_send_scsi_READ_CAPACITY
20295  *              which will apply any device specific adjustments to capacity
20296  *              and lbasize. One exception is it is also called by
20297  *              sd_get_media_info_ext. In that function, there is no need to
20298  *              adjust the capacity and lbasize.
20299  *
20300  *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
20301  *		capp - ptr to unsigned 64-bit variable to receive the
20302  *			capacity value from the command.
20303  *		lbap - ptr to unsigned 32-bit varaible to receive the
20304  *			block size value from the command
20305  *              psp  - ptr to unsigned 32-bit variable to receive the
20306  *                      physical block size value from the command
20307  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20308  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20309  *			to use the USCSI "direct" chain and bypass the normal
20310  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
20311  *			this command is issued as part of an error recovery
20312  *			action.
20313  *
20314  * Return Code: 0   - Success
20315  *		EIO - IO error
20316  *		EACCES - Reservation conflict detected
20317  *		EAGAIN - Device is becoming ready
20318  *		errno return code from sd_ssc_send()
20319  *
20320  *     Context: Can sleep.  Blocks until command completes.
20321  */
20322 
20323 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
20324 
20325 static int
20326 sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
20327     uint32_t *psp, int path_flag)
20328 {
20329 	struct	scsi_extended_sense	sense_buf;
20330 	struct	uscsi_cmd	ucmd_buf;
20331 	union	scsi_cdb	cdb;
20332 	uint64_t		*capacity16_buf;
20333 	uint64_t		capacity;
20334 	uint32_t		lbasize;
20335 	uint32_t		pbsize;
20336 	uint32_t		lbpb_exp;
20337 	int			status;
20338 	struct sd_lun		*un;
20339 
20340 	ASSERT(ssc != NULL);
20341 
20342 	un = ssc->ssc_un;
20343 	ASSERT(un != NULL);
20344 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20345 	ASSERT(capp != NULL);
20346 	ASSERT(lbap != NULL);
20347 
20348 	SD_TRACE(SD_LOG_IO, un,
20349 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
20350 
20351 	/*
20352 	 * First send a READ_CAPACITY_16 command to the target.
20353 	 *
20354 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
20355 	 * Medium Indicator bit is cleared.  The address field must be
20356 	 * zero if the PMI bit is zero.
20357 	 */
20358 	bzero(&cdb, sizeof (cdb));
20359 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20360 
20361 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
20362 
20363 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20364 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
20365 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
20366 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
20367 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20368 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
20369 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20370 	ucmd_buf.uscsi_timeout	= 60;
20371 
20372 	/*
20373 	 * Read Capacity (16) is a Service Action In command.  One
20374 	 * command byte (0x9E) is overloaded for multiple operations,
20375 	 * with the second CDB byte specifying the desired operation
20376 	 */
20377 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
20378 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
20379 
20380 	/*
20381 	 * Fill in allocation length field
20382 	 */
20383 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
20384 
20385 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20386 	    UIO_SYSSPACE, path_flag);
20387 
20388 	switch (status) {
20389 	case 0:
20390 		/* Return failure if we did not get valid capacity data. */
20391 		if (ucmd_buf.uscsi_resid > 20) {
20392 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20393 			    "sd_send_scsi_READ_CAPACITY_16 received invalid "
20394 			    "capacity data");
20395 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20396 			return (EIO);
20397 		}
20398 
20399 		/*
20400 		 * Read capacity and block size from the READ CAPACITY 16 data.
20401 		 * This data may be adjusted later due to device specific
20402 		 * issues.
20403 		 *
20404 		 * According to the SCSI spec, the READ CAPACITY 16
20405 		 * command returns the following:
20406 		 *
20407 		 *  bytes 0-7: Maximum logical block address available.
20408 		 *		(MSB in byte:0 & LSB in byte:7)
20409 		 *
20410 		 *  bytes 8-11: Block length in bytes
20411 		 *		(MSB in byte:8 & LSB in byte:11)
20412 		 *
20413 		 *  byte 13: LOGICAL BLOCKS PER PHYSICAL BLOCK EXPONENT
20414 		 *
20415 		 *  byte 14:
20416 		 *	bit 7: Thin-Provisioning Enabled
20417 		 *	bit 6: Thin-Provisioning Read Zeros
20418 		 */
20419 		capacity = BE_64(capacity16_buf[0]);
20420 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
20421 		lbpb_exp = (BE_64(capacity16_buf[1]) >> 16) & 0x0f;
20422 
20423 		un->un_thin_flags = 0;
20424 		if (((uint8_t *)capacity16_buf)[14] & (1 << 7))
20425 			un->un_thin_flags |= SD_THIN_PROV_ENABLED;
20426 		if (((uint8_t *)capacity16_buf)[14] & (1 << 6))
20427 			un->un_thin_flags |= SD_THIN_PROV_READ_ZEROS;
20428 
20429 		pbsize = lbasize << lbpb_exp;
20430 
20431 		/*
20432 		 * Done with capacity16_buf
20433 		 */
20434 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20435 
20436 		/*
20437 		 * if the reported capacity is set to all 0xf's, then
20438 		 * this disk is too large.  This could only happen with
20439 		 * a device that supports LBAs larger than 64 bits which
20440 		 * are not defined by any current T10 standards.
20441 		 */
20442 		if (capacity == 0xffffffffffffffff) {
20443 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20444 			    "disk is too large");
20445 			return (EIO);
20446 		}
20447 		break;	/* Success! */
20448 	case EIO:
20449 		switch (ucmd_buf.uscsi_status) {
20450 		case STATUS_RESERVATION_CONFLICT:
20451 			status = EACCES;
20452 			break;
20453 		case STATUS_CHECK:
20454 			/*
20455 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
20456 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
20457 			 */
20458 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20459 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
20460 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
20461 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20462 				return (EAGAIN);
20463 			}
20464 			break;
20465 		default:
20466 			break;
20467 		}
20468 		/* FALLTHRU */
20469 	default:
20470 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20471 		return (status);
20472 	}
20473 
20474 	/*
20475 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
20476 	 * (2352 and 0 are common) so for these devices always force the value
20477 	 * to 2048 as required by the ATAPI specs.
20478 	 */
20479 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
20480 		lbasize = 2048;
20481 	}
20482 
20483 	/*
20484 	 * Get the maximum LBA value from the READ CAPACITY 16 data.
20485 	 * Here we assume that the Partial Medium Indicator (PMI) bit
20486 	 * was cleared when issuing the command. This means that the LBA
20487 	 * returned from the device is the LBA of the last logical block
20488 	 * on the logical unit.  The actual logical block count will be
20489 	 * this value plus one.
20490 	 */
20491 	capacity += 1;
20492 
20493 	/*
20494 	 * Currently, for removable media, the capacity is saved in terms
20495 	 * of un->un_sys_blocksize, so scale the capacity value to reflect this.
20496 	 */
20497 	if (un->un_f_has_removable_media)
20498 		capacity *= (lbasize / un->un_sys_blocksize);
20499 
20500 	*capp = capacity;
20501 	*lbap = lbasize;
20502 	*psp = pbsize;
20503 
20504 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
20505 	    "capacity:0x%llx  lbasize:0x%x, pbsize: 0x%x\n",
20506 	    capacity, lbasize, pbsize);
20507 
20508 	if ((capacity == 0) || (lbasize == 0) || (pbsize == 0)) {
20509 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20510 		    "sd_send_scsi_READ_CAPACITY_16 received invalid value "
20511 		    "capacity %llu lbasize %d pbsize %d", capacity, lbasize);
20512 		return (EIO);
20513 	}
20514 
20515 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20516 	return (0);
20517 }
20518 
20519 
20520 /*
20521  *    Function: sd_send_scsi_START_STOP_UNIT
20522  *
20523  * Description: Issue a scsi START STOP UNIT command to the target.
20524  *
20525  *   Arguments: ssc    - ssc contatins pointer to driver soft state (unit)
20526  *                       structure for this target.
20527  *      pc_flag - SD_POWER_CONDITION
20528  *                SD_START_STOP
20529  *		flag  - SD_TARGET_START
20530  *			SD_TARGET_STOP
20531  *			SD_TARGET_EJECT
20532  *			SD_TARGET_CLOSE
20533  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20534  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20535  *			to use the USCSI "direct" chain and bypass the normal
20536  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
20537  *			command is issued as part of an error recovery action.
20538  *
20539  * Return Code: 0   - Success
20540  *		EIO - IO error
20541  *		EACCES - Reservation conflict detected
20542  *		ENXIO  - Not Ready, medium not present
20543  *		errno return code from sd_ssc_send()
20544  *
20545  *     Context: Can sleep.
20546  */
20547 
20548 static int
20549 sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int pc_flag, int flag,
20550     int path_flag)
20551 {
20552 	struct	scsi_extended_sense	sense_buf;
20553 	union scsi_cdb		cdb;
20554 	struct uscsi_cmd	ucmd_buf;
20555 	int			status;
20556 	struct sd_lun		*un;
20557 
20558 	ASSERT(ssc != NULL);
20559 	un = ssc->ssc_un;
20560 	ASSERT(un != NULL);
20561 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20562 
20563 	SD_TRACE(SD_LOG_IO, un,
20564 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
20565 
20566 	if (un->un_f_check_start_stop &&
20567 	    (pc_flag == SD_START_STOP) &&
20568 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
20569 	    (un->un_f_start_stop_supported != TRUE)) {
20570 		return (0);
20571 	}
20572 
20573 	/*
20574 	 * If we are performing an eject operation and
20575 	 * we receive any command other than SD_TARGET_EJECT
20576 	 * we should immediately return.
20577 	 */
20578 	if (flag != SD_TARGET_EJECT) {
20579 		mutex_enter(SD_MUTEX(un));
20580 		if (un->un_f_ejecting == TRUE) {
20581 			mutex_exit(SD_MUTEX(un));
20582 			return (EAGAIN);
20583 		}
20584 		mutex_exit(SD_MUTEX(un));
20585 	}
20586 
20587 	bzero(&cdb, sizeof (cdb));
20588 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20589 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20590 
20591 	cdb.scc_cmd = SCMD_START_STOP;
20592 	cdb.cdb_opaque[4] = (pc_flag == SD_POWER_CONDITION) ?
20593 	    (uchar_t)(flag << 4) : (uchar_t)flag;
20594 
20595 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20596 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20597 	ucmd_buf.uscsi_bufaddr	= NULL;
20598 	ucmd_buf.uscsi_buflen	= 0;
20599 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20600 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20601 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20602 	ucmd_buf.uscsi_timeout	= 200;
20603 
20604 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20605 	    UIO_SYSSPACE, path_flag);
20606 
20607 	switch (status) {
20608 	case 0:
20609 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20610 		break;	/* Success! */
20611 	case EIO:
20612 		switch (ucmd_buf.uscsi_status) {
20613 		case STATUS_RESERVATION_CONFLICT:
20614 			status = EACCES;
20615 			break;
20616 		case STATUS_CHECK:
20617 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
20618 				switch (scsi_sense_key(
20619 				    (uint8_t *)&sense_buf)) {
20620 				case KEY_ILLEGAL_REQUEST:
20621 					status = ENOTSUP;
20622 					break;
20623 				case KEY_NOT_READY:
20624 					if (scsi_sense_asc(
20625 					    (uint8_t *)&sense_buf)
20626 					    == 0x3A) {
20627 						status = ENXIO;
20628 					}
20629 					break;
20630 				default:
20631 					break;
20632 				}
20633 			}
20634 			break;
20635 		default:
20636 			break;
20637 		}
20638 		break;
20639 	default:
20640 		break;
20641 	}
20642 
20643 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
20644 
20645 	return (status);
20646 }
20647 
20648 
20649 /*
20650  *    Function: sd_start_stop_unit_callback
20651  *
20652  * Description: timeout(9F) callback to begin recovery process for a
20653  *		device that has spun down.
20654  *
20655  *   Arguments: arg - pointer to associated softstate struct.
20656  *
20657  *     Context: Executes in a timeout(9F) thread context
20658  */
20659 
20660 static void
20661 sd_start_stop_unit_callback(void *arg)
20662 {
20663 	struct sd_lun	*un = arg;
20664 	ASSERT(un != NULL);
20665 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20666 
20667 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
20668 
20669 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
20670 }
20671 
20672 
20673 /*
20674  *    Function: sd_start_stop_unit_task
20675  *
20676  * Description: Recovery procedure when a drive is spun down.
20677  *
20678  *   Arguments: arg - pointer to associated softstate struct.
20679  *
20680  *     Context: Executes in a taskq() thread context
20681  */
20682 
20683 static void
20684 sd_start_stop_unit_task(void *arg)
20685 {
20686 	struct sd_lun	*un = arg;
20687 	sd_ssc_t	*ssc;
20688 	int		power_level;
20689 	int		rval;
20690 
20691 	ASSERT(un != NULL);
20692 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20693 
20694 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
20695 
20696 	/*
20697 	 * Some unformatted drives report not ready error, no need to
20698 	 * restart if format has been initiated.
20699 	 */
20700 	mutex_enter(SD_MUTEX(un));
20701 	if (un->un_f_format_in_progress == TRUE) {
20702 		mutex_exit(SD_MUTEX(un));
20703 		return;
20704 	}
20705 	mutex_exit(SD_MUTEX(un));
20706 
20707 	ssc = sd_ssc_init(un);
20708 	/*
20709 	 * When a START STOP command is issued from here, it is part of a
20710 	 * failure recovery operation and must be issued before any other
20711 	 * commands, including any pending retries. Thus it must be sent
20712 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
20713 	 * succeeds or not, we will start I/O after the attempt.
20714 	 * If power condition is supported and the current power level
20715 	 * is capable of performing I/O, we should set the power condition
20716 	 * to that level. Otherwise, set the power condition to ACTIVE.
20717 	 */
20718 	if (un->un_f_power_condition_supported) {
20719 		mutex_enter(SD_MUTEX(un));
20720 		ASSERT(SD_PM_IS_LEVEL_VALID(un, un->un_power_level));
20721 		power_level = sd_pwr_pc.ran_perf[un->un_power_level]
20722 		    > 0 ? un->un_power_level : SD_SPINDLE_ACTIVE;
20723 		mutex_exit(SD_MUTEX(un));
20724 		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_POWER_CONDITION,
20725 		    sd_pl2pc[power_level], SD_PATH_DIRECT_PRIORITY);
20726 	} else {
20727 		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
20728 		    SD_TARGET_START, SD_PATH_DIRECT_PRIORITY);
20729 	}
20730 
20731 	if (rval != 0)
20732 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
20733 	sd_ssc_fini(ssc);
20734 	/*
20735 	 * The above call blocks until the START_STOP_UNIT command completes.
20736 	 * Now that it has completed, we must re-try the original IO that
20737 	 * received the NOT READY condition in the first place. There are
20738 	 * three possible conditions here:
20739 	 *
20740 	 *  (1) The original IO is on un_retry_bp.
20741 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
20742 	 *	is NULL.
20743 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
20744 	 *	points to some other, unrelated bp.
20745 	 *
20746 	 * For each case, we must call sd_start_cmds() with un_retry_bp
20747 	 * as the argument. If un_retry_bp is NULL, this will initiate
20748 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
20749 	 * then this will process the bp on un_retry_bp. That may or may not
20750 	 * be the original IO, but that does not matter: the important thing
20751 	 * is to keep the IO processing going at this point.
20752 	 *
20753 	 * Note: This is a very specific error recovery sequence associated
20754 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
20755 	 * serialize the I/O with completion of the spin-up.
20756 	 */
20757 	mutex_enter(SD_MUTEX(un));
20758 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
20759 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
20760 	    un, un->un_retry_bp);
20761 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
20762 	sd_start_cmds(un, un->un_retry_bp);
20763 	mutex_exit(SD_MUTEX(un));
20764 
20765 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
20766 }
20767 
20768 
20769 /*
20770  *    Function: sd_send_scsi_INQUIRY
20771  *
20772  * Description: Issue the scsi INQUIRY command.
20773  *
20774  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20775  *                      structure for this target.
20776  *		bufaddr
20777  *		buflen
20778  *		evpd
20779  *		page_code
20780  *		page_length
20781  *
20782  * Return Code: 0   - Success
20783  *		errno return code from sd_ssc_send()
20784  *
20785  *     Context: Can sleep. Does not return until command is completed.
20786  */
20787 
20788 static int
20789 sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr, size_t buflen,
20790     uchar_t evpd, uchar_t page_code, size_t *residp)
20791 {
20792 	union scsi_cdb		cdb;
20793 	struct uscsi_cmd	ucmd_buf;
20794 	int			status;
20795 	struct sd_lun		*un;
20796 
20797 	ASSERT(ssc != NULL);
20798 	un = ssc->ssc_un;
20799 	ASSERT(un != NULL);
20800 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20801 	ASSERT(bufaddr != NULL);
20802 
20803 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
20804 
20805 	bzero(&cdb, sizeof (cdb));
20806 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20807 	bzero(bufaddr, buflen);
20808 
20809 	cdb.scc_cmd = SCMD_INQUIRY;
20810 	cdb.cdb_opaque[1] = evpd;
20811 	cdb.cdb_opaque[2] = page_code;
20812 	FORMG0COUNT(&cdb, buflen);
20813 
20814 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20815 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20816 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20817 	ucmd_buf.uscsi_buflen	= buflen;
20818 	ucmd_buf.uscsi_rqbuf	= NULL;
20819 	ucmd_buf.uscsi_rqlen	= 0;
20820 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
20821 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
20822 
20823 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20824 	    UIO_SYSSPACE, SD_PATH_DIRECT);
20825 
20826 	/*
20827 	 * Only handle status == 0, the upper-level caller
20828 	 * will put different assessment based on the context.
20829 	 */
20830 	if (status == 0)
20831 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20832 
20833 	if ((status == 0) && (residp != NULL)) {
20834 		*residp = ucmd_buf.uscsi_resid;
20835 	}
20836 
20837 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
20838 
20839 	return (status);
20840 }
20841 
20842 
20843 /*
20844  *    Function: sd_send_scsi_TEST_UNIT_READY
20845  *
20846  * Description: Issue the scsi TEST UNIT READY command.
20847  *		This routine can be told to set the flag USCSI_DIAGNOSE to
20848  *		prevent retrying failed commands. Use this when the intent
20849  *		is either to check for device readiness, to clear a Unit
20850  *		Attention, or to clear any outstanding sense data.
20851  *		However under specific conditions the expected behavior
20852  *		is for retries to bring a device ready, so use the flag
20853  *		with caution.
20854  *
20855  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20856  *                      structure for this target.
20857  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
20858  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
20859  *			0: dont check for media present, do retries on cmd.
20860  *
20861  * Return Code: 0   - Success
20862  *		EIO - IO error
20863  *		EACCES - Reservation conflict detected
20864  *		ENXIO  - Not Ready, medium not present
20865  *		errno return code from sd_ssc_send()
20866  *
20867  *     Context: Can sleep. Does not return until command is completed.
20868  */
20869 
20870 static int
20871 sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag)
20872 {
20873 	struct	scsi_extended_sense	sense_buf;
20874 	union scsi_cdb		cdb;
20875 	struct uscsi_cmd	ucmd_buf;
20876 	int			status;
20877 	struct sd_lun		*un;
20878 
20879 	ASSERT(ssc != NULL);
20880 	un = ssc->ssc_un;
20881 	ASSERT(un != NULL);
20882 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20883 
20884 	SD_TRACE(SD_LOG_IO, un,
20885 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
20886 
20887 	/*
20888 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
20889 	 * timeouts when they receive a TUR and the queue is not empty. Check
20890 	 * the configuration flag set during attach (indicating the drive has
20891 	 * this firmware bug) and un_ncmds_in_transport before issuing the
20892 	 * TUR. If there are
20893 	 * pending commands return success, this is a bit arbitrary but is ok
20894 	 * for non-removables (i.e. the eliteI disks) and non-clustering
20895 	 * configurations.
20896 	 */
20897 	if (un->un_f_cfg_tur_check == TRUE) {
20898 		mutex_enter(SD_MUTEX(un));
20899 		if (un->un_ncmds_in_transport != 0) {
20900 			mutex_exit(SD_MUTEX(un));
20901 			return (0);
20902 		}
20903 		mutex_exit(SD_MUTEX(un));
20904 	}
20905 
20906 	bzero(&cdb, sizeof (cdb));
20907 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20908 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20909 
20910 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
20911 
20912 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20913 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20914 	ucmd_buf.uscsi_bufaddr	= NULL;
20915 	ucmd_buf.uscsi_buflen	= 0;
20916 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20917 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20918 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20919 
20920 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
20921 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
20922 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
20923 	}
20924 	ucmd_buf.uscsi_timeout	= 60;
20925 
20926 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20927 	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
20928 	    SD_PATH_STANDARD));
20929 
20930 	switch (status) {
20931 	case 0:
20932 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20933 		break;	/* Success! */
20934 	case EIO:
20935 		switch (ucmd_buf.uscsi_status) {
20936 		case STATUS_RESERVATION_CONFLICT:
20937 			status = EACCES;
20938 			break;
20939 		case STATUS_CHECK:
20940 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
20941 				break;
20942 			}
20943 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20944 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20945 			    KEY_NOT_READY) &&
20946 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
20947 				status = ENXIO;
20948 			}
20949 			break;
20950 		default:
20951 			break;
20952 		}
20953 		break;
20954 	default:
20955 		break;
20956 	}
20957 
20958 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
20959 
20960 	return (status);
20961 }
20962 
20963 /*
20964  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
20965  *
20966  * Description: Issue the scsi PERSISTENT RESERVE IN command.
20967  *
20968  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20969  *                      structure for this target.
20970  *
20971  * Return Code: 0   - Success
20972  *		EACCES
20973  *		ENOTSUP
20974  *		errno return code from sd_ssc_send()
20975  *
20976  *     Context: Can sleep. Does not return until command is completed.
20977  */
20978 
20979 static int
20980 sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc, uchar_t usr_cmd,
20981     uint16_t data_len, uchar_t *data_bufp)
20982 {
20983 	struct scsi_extended_sense	sense_buf;
20984 	union scsi_cdb		cdb;
20985 	struct uscsi_cmd	ucmd_buf;
20986 	int			status;
20987 	int			no_caller_buf = FALSE;
20988 	struct sd_lun		*un;
20989 
20990 	ASSERT(ssc != NULL);
20991 	un = ssc->ssc_un;
20992 	ASSERT(un != NULL);
20993 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20994 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
20995 
20996 	SD_TRACE(SD_LOG_IO, un,
20997 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
20998 
20999 	bzero(&cdb, sizeof (cdb));
21000 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21001 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21002 	if (data_bufp == NULL) {
21003 		/* Allocate a default buf if the caller did not give one */
21004 		ASSERT(data_len == 0);
21005 		data_len  = MHIOC_RESV_KEY_SIZE;
21006 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
21007 		no_caller_buf = TRUE;
21008 	}
21009 
21010 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
21011 	cdb.cdb_opaque[1] = usr_cmd;
21012 	FORMG1COUNT(&cdb, data_len);
21013 
21014 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21015 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21016 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
21017 	ucmd_buf.uscsi_buflen	= data_len;
21018 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21019 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21020 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21021 	ucmd_buf.uscsi_timeout	= 60;
21022 
21023 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21024 	    UIO_SYSSPACE, SD_PATH_STANDARD);
21025 
21026 	switch (status) {
21027 	case 0:
21028 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21029 
21030 		break;	/* Success! */
21031 	case EIO:
21032 		switch (ucmd_buf.uscsi_status) {
21033 		case STATUS_RESERVATION_CONFLICT:
21034 			status = EACCES;
21035 			break;
21036 		case STATUS_CHECK:
21037 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
21038 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
21039 			    KEY_ILLEGAL_REQUEST)) {
21040 				status = ENOTSUP;
21041 			}
21042 			break;
21043 		default:
21044 			break;
21045 		}
21046 		break;
21047 	default:
21048 		break;
21049 	}
21050 
21051 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
21052 
21053 	if (no_caller_buf == TRUE) {
21054 		kmem_free(data_bufp, data_len);
21055 	}
21056 
21057 	return (status);
21058 }
21059 
21060 
21061 /*
21062  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
21063  *
21064  * Description: This routine is the driver entry point for handling CD-ROM
21065  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
21066  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
21067  *		device.
21068  *
21069  *   Arguments: ssc  -  ssc contains un - pointer to soft state struct
21070  *                      for the target.
21071  *		usr_cmd SCSI-3 reservation facility command (one of
21072  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
21073  *			SD_SCSI3_PREEMPTANDABORT, SD_SCSI3_CLEAR)
21074  *		usr_bufp - user provided pointer register, reserve descriptor or
21075  *			preempt and abort structure (mhioc_register_t,
21076  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
21077  *
21078  * Return Code: 0   - Success
21079  *		EACCES
21080  *		ENOTSUP
21081  *		errno return code from sd_ssc_send()
21082  *
21083  *     Context: Can sleep. Does not return until command is completed.
21084  */
21085 
21086 static int
21087 sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc, uchar_t usr_cmd,
21088     uchar_t *usr_bufp)
21089 {
21090 	struct scsi_extended_sense	sense_buf;
21091 	union scsi_cdb		cdb;
21092 	struct uscsi_cmd	ucmd_buf;
21093 	int			status;
21094 	uchar_t			data_len = sizeof (sd_prout_t);
21095 	sd_prout_t		*prp;
21096 	struct sd_lun		*un;
21097 
21098 	ASSERT(ssc != NULL);
21099 	un = ssc->ssc_un;
21100 	ASSERT(un != NULL);
21101 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21102 	ASSERT(data_len == 24);	/* required by scsi spec */
21103 
21104 	SD_TRACE(SD_LOG_IO, un,
21105 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
21106 
21107 	if (usr_bufp == NULL) {
21108 		return (EINVAL);
21109 	}
21110 
21111 	bzero(&cdb, sizeof (cdb));
21112 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21113 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21114 	prp = kmem_zalloc(data_len, KM_SLEEP);
21115 
21116 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
21117 	cdb.cdb_opaque[1] = usr_cmd;
21118 	FORMG1COUNT(&cdb, data_len);
21119 
21120 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21121 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21122 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
21123 	ucmd_buf.uscsi_buflen	= data_len;
21124 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21125 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21126 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
21127 	ucmd_buf.uscsi_timeout	= 60;
21128 
21129 	switch (usr_cmd) {
21130 	case SD_SCSI3_REGISTER: {
21131 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
21132 
21133 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
21134 		bcopy(ptr->newkey.key, prp->service_key,
21135 		    MHIOC_RESV_KEY_SIZE);
21136 		prp->aptpl = ptr->aptpl;
21137 		break;
21138 	}
21139 	case SD_SCSI3_CLEAR: {
21140 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
21141 
21142 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
21143 		break;
21144 	}
21145 	case SD_SCSI3_RESERVE:
21146 	case SD_SCSI3_RELEASE: {
21147 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
21148 
21149 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
21150 		prp->scope_address = BE_32(ptr->scope_specific_addr);
21151 		cdb.cdb_opaque[2] = ptr->type;
21152 		break;
21153 	}
21154 	case SD_SCSI3_PREEMPTANDABORT: {
21155 		mhioc_preemptandabort_t *ptr =
21156 		    (mhioc_preemptandabort_t *)usr_bufp;
21157 
21158 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
21159 		bcopy(ptr->victim_key.key, prp->service_key,
21160 		    MHIOC_RESV_KEY_SIZE);
21161 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
21162 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
21163 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
21164 		break;
21165 	}
21166 	case SD_SCSI3_REGISTERANDIGNOREKEY:
21167 	{
21168 		mhioc_registerandignorekey_t *ptr;
21169 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
21170 		bcopy(ptr->newkey.key,
21171 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
21172 		prp->aptpl = ptr->aptpl;
21173 		break;
21174 	}
21175 	default:
21176 		ASSERT(FALSE);
21177 		break;
21178 	}
21179 
21180 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21181 	    UIO_SYSSPACE, SD_PATH_STANDARD);
21182 
21183 	switch (status) {
21184 	case 0:
21185 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21186 		break;	/* Success! */
21187 	case EIO:
21188 		switch (ucmd_buf.uscsi_status) {
21189 		case STATUS_RESERVATION_CONFLICT:
21190 			status = EACCES;
21191 			break;
21192 		case STATUS_CHECK:
21193 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
21194 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
21195 			    KEY_ILLEGAL_REQUEST)) {
21196 				status = ENOTSUP;
21197 			}
21198 			break;
21199 		default:
21200 			break;
21201 		}
21202 		break;
21203 	default:
21204 		break;
21205 	}
21206 
21207 	kmem_free(prp, data_len);
21208 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
21209 	return (status);
21210 }
21211 
21212 
21213 /*
21214  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
21215  *
21216  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
21217  *
21218  *   Arguments: un - pointer to the target's soft state struct
21219  *              dkc - pointer to the callback structure
21220  *
21221  * Return Code: 0 - success
21222  *		errno-type error code
21223  *
21224  *     Context: kernel thread context only.
21225  *
21226  *  _______________________________________________________________
21227  * | dkc_flag &   | dkc_callback | DKIOCFLUSHWRITECACHE            |
21228  * |FLUSH_VOLATILE|              | operation                       |
21229  * |______________|______________|_________________________________|
21230  * | 0            | NULL         | Synchronous flush on both       |
21231  * |              |              | volatile and non-volatile cache |
21232  * |______________|______________|_________________________________|
21233  * | 1            | NULL         | Synchronous flush on volatile   |
21234  * |              |              | cache; disk drivers may suppress|
21235  * |              |              | flush if disk table indicates   |
21236  * |              |              | non-volatile cache              |
21237  * |______________|______________|_________________________________|
21238  * | 0            | !NULL        | Asynchronous flush on both      |
21239  * |              |              | volatile and non-volatile cache;|
21240  * |______________|______________|_________________________________|
21241  * | 1            | !NULL        | Asynchronous flush on volatile  |
21242  * |              |              | cache; disk drivers may suppress|
21243  * |              |              | flush if disk table indicates   |
21244  * |              |              | non-volatile cache              |
21245  * |______________|______________|_________________________________|
21246  *
21247  */
21248 
21249 static int
21250 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
21251 {
21252 	struct sd_uscsi_info	*uip;
21253 	struct uscsi_cmd	*uscmd;
21254 	union scsi_cdb		*cdb;
21255 	struct buf		*bp;
21256 	int			rval = 0;
21257 	int			is_async;
21258 
21259 	SD_TRACE(SD_LOG_IO, un,
21260 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
21261 
21262 	ASSERT(un != NULL);
21263 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21264 
21265 	if (dkc == NULL || dkc->dkc_callback == NULL) {
21266 		is_async = FALSE;
21267 	} else {
21268 		is_async = TRUE;
21269 	}
21270 
21271 	mutex_enter(SD_MUTEX(un));
21272 	/* check whether cache flush should be suppressed */
21273 	if (un->un_f_suppress_cache_flush == TRUE) {
21274 		mutex_exit(SD_MUTEX(un));
21275 		/*
21276 		 * suppress the cache flush if the device is told to do
21277 		 * so by sd.conf or disk table
21278 		 */
21279 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_SYNCHRONIZE_CACHE: \
21280 		    skip the cache flush since suppress_cache_flush is %d!\n",
21281 		    un->un_f_suppress_cache_flush);
21282 
21283 		if (is_async == TRUE) {
21284 			/* invoke callback for asynchronous flush */
21285 			(*dkc->dkc_callback)(dkc->dkc_cookie, 0);
21286 		}
21287 		return (rval);
21288 	}
21289 	mutex_exit(SD_MUTEX(un));
21290 
21291 	/*
21292 	 * check dkc_flag & FLUSH_VOLATILE so SYNC_NV bit can be
21293 	 * set properly
21294 	 */
21295 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
21296 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
21297 
21298 	mutex_enter(SD_MUTEX(un));
21299 	if (dkc != NULL && un->un_f_sync_nv_supported &&
21300 	    (dkc->dkc_flag & FLUSH_VOLATILE)) {
21301 		/*
21302 		 * if the device supports SYNC_NV bit, turn on
21303 		 * the SYNC_NV bit to only flush volatile cache
21304 		 */
21305 		cdb->cdb_un.tag |= SD_SYNC_NV_BIT;
21306 	}
21307 	mutex_exit(SD_MUTEX(un));
21308 
21309 	/*
21310 	 * First get some memory for the uscsi_cmd struct and cdb
21311 	 * and initialize for SYNCHRONIZE_CACHE cmd.
21312 	 */
21313 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
21314 	uscmd->uscsi_cdblen = CDB_GROUP1;
21315 	uscmd->uscsi_cdb = (caddr_t)cdb;
21316 	uscmd->uscsi_bufaddr = NULL;
21317 	uscmd->uscsi_buflen = 0;
21318 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
21319 	uscmd->uscsi_rqlen = SENSE_LENGTH;
21320 	uscmd->uscsi_rqresid = SENSE_LENGTH;
21321 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
21322 	uscmd->uscsi_timeout = sd_io_time;
21323 
21324 	/*
21325 	 * Allocate an sd_uscsi_info struct and fill it with the info
21326 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
21327 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
21328 	 * since we allocate the buf here in this function, we do not
21329 	 * need to preserve the prior contents of b_private.
21330 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
21331 	 */
21332 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
21333 	uip->ui_flags = SD_PATH_DIRECT;
21334 	uip->ui_cmdp  = uscmd;
21335 
21336 	bp = getrbuf(KM_SLEEP);
21337 	bp->b_private = uip;
21338 
21339 	/*
21340 	 * Setup buffer to carry uscsi request.
21341 	 */
21342 	bp->b_flags  = B_BUSY;
21343 	bp->b_bcount = 0;
21344 	bp->b_blkno  = 0;
21345 
21346 	if (is_async == TRUE) {
21347 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
21348 		uip->ui_dkc = *dkc;
21349 	}
21350 
21351 	bp->b_edev = SD_GET_DEV(un);
21352 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
21353 
21354 	/*
21355 	 * Unset un_f_sync_cache_required flag
21356 	 */
21357 	mutex_enter(SD_MUTEX(un));
21358 	un->un_f_sync_cache_required = FALSE;
21359 	mutex_exit(SD_MUTEX(un));
21360 
21361 	(void) sd_uscsi_strategy(bp);
21362 
21363 	/*
21364 	 * If synchronous request, wait for completion
21365 	 * If async just return and let b_iodone callback
21366 	 * cleanup.
21367 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
21368 	 * but it was also incremented in sd_uscsi_strategy(), so
21369 	 * we should be ok.
21370 	 */
21371 	if (is_async == FALSE) {
21372 		(void) biowait(bp);
21373 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
21374 	}
21375 
21376 	return (rval);
21377 }
21378 
21379 
21380 static int
21381 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
21382 {
21383 	struct sd_uscsi_info *uip;
21384 	struct uscsi_cmd *uscmd;
21385 	uint8_t *sense_buf;
21386 	struct sd_lun *un;
21387 	int status;
21388 	union scsi_cdb *cdb;
21389 
21390 	uip = (struct sd_uscsi_info *)(bp->b_private);
21391 	ASSERT(uip != NULL);
21392 
21393 	uscmd = uip->ui_cmdp;
21394 	ASSERT(uscmd != NULL);
21395 
21396 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
21397 	ASSERT(sense_buf != NULL);
21398 
21399 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
21400 	ASSERT(un != NULL);
21401 
21402 	cdb = (union scsi_cdb *)uscmd->uscsi_cdb;
21403 
21404 	status = geterror(bp);
21405 	switch (status) {
21406 	case 0:
21407 		break;	/* Success! */
21408 	case EIO:
21409 		switch (uscmd->uscsi_status) {
21410 		case STATUS_RESERVATION_CONFLICT:
21411 			/* Ignore reservation conflict */
21412 			status = 0;
21413 			goto done;
21414 
21415 		case STATUS_CHECK:
21416 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
21417 			    (scsi_sense_key(sense_buf) ==
21418 			    KEY_ILLEGAL_REQUEST)) {
21419 				/* Ignore Illegal Request error */
21420 				if (cdb->cdb_un.tag&SD_SYNC_NV_BIT) {
21421 					mutex_enter(SD_MUTEX(un));
21422 					un->un_f_sync_nv_supported = FALSE;
21423 					mutex_exit(SD_MUTEX(un));
21424 					status = 0;
21425 					SD_TRACE(SD_LOG_IO, un,
21426 					    "un_f_sync_nv_supported \
21427 					    is set to false.\n");
21428 					goto done;
21429 				}
21430 
21431 				mutex_enter(SD_MUTEX(un));
21432 				un->un_f_sync_cache_supported = FALSE;
21433 				mutex_exit(SD_MUTEX(un));
21434 				SD_TRACE(SD_LOG_IO, un,
21435 				    "sd_send_scsi_SYNCHRONIZE_CACHE_biodone: \
21436 				    un_f_sync_cache_supported set to false \
21437 				    with asc = %x, ascq = %x\n",
21438 				    scsi_sense_asc(sense_buf),
21439 				    scsi_sense_ascq(sense_buf));
21440 				status = ENOTSUP;
21441 				goto done;
21442 			}
21443 			break;
21444 		default:
21445 			break;
21446 		}
21447 		/* FALLTHRU */
21448 	default:
21449 		/*
21450 		 * Turn on the un_f_sync_cache_required flag
21451 		 * since the SYNC CACHE command failed
21452 		 */
21453 		mutex_enter(SD_MUTEX(un));
21454 		un->un_f_sync_cache_required = TRUE;
21455 		mutex_exit(SD_MUTEX(un));
21456 
21457 		/*
21458 		 * Don't log an error message if this device
21459 		 * has removable media.
21460 		 */
21461 		if (!un->un_f_has_removable_media) {
21462 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
21463 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
21464 		}
21465 		break;
21466 	}
21467 
21468 done:
21469 	if (uip->ui_dkc.dkc_callback != NULL) {
21470 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
21471 	}
21472 
21473 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
21474 	freerbuf(bp);
21475 	kmem_free(uip, sizeof (struct sd_uscsi_info));
21476 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
21477 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
21478 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
21479 
21480 	return (status);
21481 }
21482 
21483 /*
21484  * Issues a single SCSI UNMAP command with a prepared UNMAP parameter list.
21485  * Returns zero on success, or the non-zero command error code on failure.
21486  */
21487 static int
21488 sd_send_scsi_UNMAP_issue_one(sd_ssc_t *ssc, unmap_param_hdr_t *uph,
21489     uint64_t num_descr, uint64_t bytes)
21490 {
21491 	struct sd_lun		*un = ssc->ssc_un;
21492 	struct scsi_extended_sense	sense_buf;
21493 	union scsi_cdb		cdb;
21494 	struct uscsi_cmd	ucmd_buf;
21495 	int			status;
21496 	const uint64_t		param_size = sizeof (unmap_param_hdr_t) +
21497 	    num_descr * sizeof (unmap_blk_descr_t);
21498 
21499 	ASSERT3U(param_size - 2, <=, UINT16_MAX);
21500 	uph->uph_data_len = BE_16(param_size - 2);
21501 	uph->uph_descr_data_len = BE_16(param_size - 8);
21502 
21503 	bzero(&cdb, sizeof (cdb));
21504 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21505 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21506 
21507 	cdb.scc_cmd = SCMD_UNMAP;
21508 	FORMG1COUNT(&cdb, param_size);
21509 
21510 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21511 	ucmd_buf.uscsi_cdblen	= (uchar_t)CDB_GROUP1;
21512 	ucmd_buf.uscsi_bufaddr	= (caddr_t)uph;
21513 	ucmd_buf.uscsi_buflen	= param_size;
21514 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21515 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21516 	ucmd_buf.uscsi_flags	= USCSI_WRITE | USCSI_RQENABLE | USCSI_SILENT;
21517 	ucmd_buf.uscsi_timeout	= un->un_cmd_timeout;
21518 
21519 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL, UIO_SYSSPACE,
21520 	    SD_PATH_STANDARD);
21521 
21522 	switch (status) {
21523 	case 0:
21524 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21525 
21526 		if (un->un_unmapstats) {
21527 			atomic_inc_64(&un->un_unmapstats->us_cmds.value.ui64);
21528 			atomic_add_64(&un->un_unmapstats->us_extents.value.ui64,
21529 			    num_descr);
21530 			atomic_add_64(&un->un_unmapstats->us_bytes.value.ui64,
21531 			    bytes);
21532 		}
21533 		break;	/* Success! */
21534 	case EIO:
21535 		if (un->un_unmapstats)
21536 			atomic_inc_64(&un->un_unmapstats->us_errs.value.ui64);
21537 		switch (ucmd_buf.uscsi_status) {
21538 		case STATUS_RESERVATION_CONFLICT:
21539 			status = EACCES;
21540 			break;
21541 		default:
21542 			break;
21543 		}
21544 		break;
21545 	default:
21546 		if (un->un_unmapstats)
21547 			atomic_inc_64(&un->un_unmapstats->us_errs.value.ui64);
21548 		break;
21549 	}
21550 
21551 	return (status);
21552 }
21553 
21554 /*
21555  * Returns a pointer to the i'th block descriptor inside an UNMAP param list.
21556  */
21557 static inline unmap_blk_descr_t *
21558 UNMAP_blk_descr_i(void *buf, size_t i)
21559 {
21560 	return ((unmap_blk_descr_t *)((uintptr_t)buf +
21561 	    sizeof (unmap_param_hdr_t) + (i * sizeof (unmap_blk_descr_t))));
21562 }
21563 
21564 /*
21565  * Takes the list of extents from sd_send_scsi_UNMAP, chops it up, prepares
21566  * UNMAP block descriptors and issues individual SCSI UNMAP commands. While
21567  * doing so we consult the block limits to determine at most how many
21568  * extents and LBAs we can UNMAP in one command.
21569  * If a command fails for whatever, reason, extent list processing is aborted
21570  * and the failed command's status is returned. Otherwise returns 0 on
21571  * success.
21572  */
21573 static int
21574 sd_send_scsi_UNMAP_issue(dev_t dev, sd_ssc_t *ssc, const dkioc_free_list_t *dfl)
21575 {
21576 	struct sd_lun		*un = ssc->ssc_un;
21577 	unmap_param_hdr_t	*uph;
21578 	sd_blk_limits_t		*lim = &un->un_blk_lim;
21579 	int			rval = 0;
21580 	int			partition;
21581 	/* partition offset & length in system blocks */
21582 	diskaddr_t		part_off_sysblks = 0, part_len_sysblks = 0;
21583 	uint64_t		part_off, part_len;
21584 	uint64_t		descr_cnt_lim, byte_cnt_lim;
21585 	uint64_t		descr_issued = 0, bytes_issued = 0;
21586 
21587 	uph = kmem_zalloc(SD_UNMAP_PARAM_LIST_MAXSZ, KM_SLEEP);
21588 
21589 	partition = SDPART(dev);
21590 	rval = cmlb_partinfo(un->un_cmlbhandle, partition, &part_len_sysblks,
21591 	    &part_off_sysblks, NULL, NULL, (void *)SD_PATH_DIRECT);
21592 	if (rval != 0)
21593 		goto out;
21594 	part_off = SD_SYSBLOCKS2BYTES(part_off_sysblks);
21595 	part_len = SD_SYSBLOCKS2BYTES(part_len_sysblks);
21596 
21597 	ASSERT(un->un_blk_lim.lim_max_unmap_lba_cnt != 0);
21598 	ASSERT(un->un_blk_lim.lim_max_unmap_descr_cnt != 0);
21599 	/* Spec says 0xffffffff are special values, so compute maximums. */
21600 	byte_cnt_lim = lim->lim_max_unmap_lba_cnt < UINT32_MAX ?
21601 	    (uint64_t)lim->lim_max_unmap_lba_cnt * un->un_tgt_blocksize :
21602 	    UINT64_MAX;
21603 	descr_cnt_lim = MIN(lim->lim_max_unmap_descr_cnt, SD_UNMAP_MAX_DESCR);
21604 
21605 	if (dfl->dfl_offset >= part_len) {
21606 		rval = SET_ERROR(EINVAL);
21607 		goto out;
21608 	}
21609 
21610 	for (size_t i = 0; i < dfl->dfl_num_exts; i++) {
21611 		const dkioc_free_list_ext_t *ext = &dfl->dfl_exts[i];
21612 		uint64_t ext_start = ext->dfle_start;
21613 		uint64_t ext_length = ext->dfle_length;
21614 
21615 		while (ext_length > 0) {
21616 			unmap_blk_descr_t *ubd;
21617 			/* Respect device limit on LBA count per command */
21618 			uint64_t len = MIN(MIN(ext_length, byte_cnt_lim -
21619 			    bytes_issued), SD_TGTBLOCKS2BYTES(un, UINT32_MAX));
21620 
21621 			/* check partition limits */
21622 			if (ext_start >= part_len ||
21623 			    ext_start + len < ext_start ||
21624 			    dfl->dfl_offset + ext_start + len <
21625 			    dfl->dfl_offset ||
21626 			    dfl->dfl_offset + ext_start + len > part_len) {
21627 				rval = SET_ERROR(EINVAL);
21628 				goto out;
21629 			}
21630 
21631 			ASSERT3U(descr_issued, <, descr_cnt_lim);
21632 			ASSERT3U(bytes_issued, <, byte_cnt_lim);
21633 			ubd = UNMAP_blk_descr_i(uph, descr_issued);
21634 
21635 			/* adjust in-partition addresses to be device-global */
21636 			ubd->ubd_lba = BE_64(SD_BYTES2TGTBLOCKS(un,
21637 			    dfl->dfl_offset + ext_start + part_off));
21638 			ubd->ubd_lba_cnt = BE_32(SD_BYTES2TGTBLOCKS(un, len));
21639 
21640 			descr_issued++;
21641 			bytes_issued += len;
21642 
21643 			/* Issue command when device limits reached */
21644 			if (descr_issued == descr_cnt_lim ||
21645 			    bytes_issued == byte_cnt_lim) {
21646 				rval = sd_send_scsi_UNMAP_issue_one(ssc, uph,
21647 				    descr_issued, bytes_issued);
21648 				if (rval != 0)
21649 					goto out;
21650 				descr_issued = 0;
21651 				bytes_issued = 0;
21652 			}
21653 
21654 			ext_start += len;
21655 			ext_length -= len;
21656 		}
21657 	}
21658 
21659 	if (descr_issued > 0) {
21660 		/* issue last command */
21661 		rval = sd_send_scsi_UNMAP_issue_one(ssc, uph, descr_issued,
21662 		    bytes_issued);
21663 	}
21664 
21665 out:
21666 	kmem_free(uph, SD_UNMAP_PARAM_LIST_MAXSZ);
21667 	return (rval);
21668 }
21669 
21670 /*
21671  * Issues one or several UNMAP commands based on a list of extents to be
21672  * unmapped. The internal multi-command processing is hidden, as the exact
21673  * number of commands and extents per command is limited by both SCSI
21674  * command syntax and device limits (as expressed in the SCSI Block Limits
21675  * VPD page and un_blk_lim in struct sd_lun).
21676  * Returns zero on success, or the error code of the first failed SCSI UNMAP
21677  * command.
21678  */
21679 static int
21680 sd_send_scsi_UNMAP(dev_t dev, sd_ssc_t *ssc, dkioc_free_list_t *dfl, int flag)
21681 {
21682 	struct sd_lun		*un = ssc->ssc_un;
21683 	int			rval = 0;
21684 
21685 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21686 	ASSERT(dfl != NULL);
21687 
21688 	/* Per spec, any of these conditions signals lack of UNMAP support. */
21689 	if (!(un->un_thin_flags & SD_THIN_PROV_ENABLED) ||
21690 	    un->un_blk_lim.lim_max_unmap_descr_cnt == 0 ||
21691 	    un->un_blk_lim.lim_max_unmap_lba_cnt == 0) {
21692 		return (SET_ERROR(ENOTSUP));
21693 	}
21694 
21695 	/* For userspace calls we must copy in. */
21696 	if (!(flag & FKIOCTL)) {
21697 		int err = dfl_copyin(dfl, &dfl, flag, KM_SLEEP);
21698 		if (err != 0)
21699 			return (err);
21700 	} else if (dfl->dfl_num_exts > DFL_COPYIN_MAX_EXTS) {
21701 		ASSERT3U(dfl->dfl_num_exts, <=, DFL_COPYIN_MAX_EXTS);
21702 		return (SET_ERROR(EINVAL));
21703 	}
21704 
21705 	rval = sd_send_scsi_UNMAP_issue(dev, ssc, dfl);
21706 
21707 	if (!(flag & FKIOCTL)) {
21708 		dfl_free(dfl);
21709 		dfl = NULL;
21710 	}
21711 
21712 	return (rval);
21713 }
21714 
21715 /*
21716  *    Function: sd_send_scsi_GET_CONFIGURATION
21717  *
21718  * Description: Issues the get configuration command to the device.
21719  *		Called from sd_check_for_writable_cd & sd_get_media_info
21720  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
21721  *   Arguments: ssc
21722  *		ucmdbuf
21723  *		rqbuf
21724  *		rqbuflen
21725  *		bufaddr
21726  *		buflen
21727  *		path_flag
21728  *
21729  * Return Code: 0   - Success
21730  *		errno return code from sd_ssc_send()
21731  *
21732  *     Context: Can sleep. Does not return until command is completed.
21733  *
21734  */
21735 
21736 static int
21737 sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc, struct uscsi_cmd *ucmdbuf,
21738     uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
21739     int path_flag)
21740 {
21741 	char	cdb[CDB_GROUP1];
21742 	int	status;
21743 	struct sd_lun	*un;
21744 
21745 	ASSERT(ssc != NULL);
21746 	un = ssc->ssc_un;
21747 	ASSERT(un != NULL);
21748 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21749 	ASSERT(bufaddr != NULL);
21750 	ASSERT(ucmdbuf != NULL);
21751 	ASSERT(rqbuf != NULL);
21752 
21753 	SD_TRACE(SD_LOG_IO, un,
21754 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
21755 
21756 	bzero(cdb, sizeof (cdb));
21757 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
21758 	bzero(rqbuf, rqbuflen);
21759 	bzero(bufaddr, buflen);
21760 
21761 	/*
21762 	 * Set up cdb field for the get configuration command.
21763 	 */
21764 	cdb[0] = SCMD_GET_CONFIGURATION;
21765 	cdb[1] = 0x02;  /* Requested Type */
21766 	cdb[8] = SD_PROFILE_HEADER_LEN;
21767 	ucmdbuf->uscsi_cdb = cdb;
21768 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
21769 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
21770 	ucmdbuf->uscsi_buflen = buflen;
21771 	ucmdbuf->uscsi_timeout = sd_io_time;
21772 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
21773 	ucmdbuf->uscsi_rqlen = rqbuflen;
21774 	ucmdbuf->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT | USCSI_READ;
21775 
21776 	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
21777 	    UIO_SYSSPACE, path_flag);
21778 
21779 	switch (status) {
21780 	case 0:
21781 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21782 		break;  /* Success! */
21783 	case EIO:
21784 		switch (ucmdbuf->uscsi_status) {
21785 		case STATUS_RESERVATION_CONFLICT:
21786 			status = EACCES;
21787 			break;
21788 		default:
21789 			break;
21790 		}
21791 		break;
21792 	default:
21793 		break;
21794 	}
21795 
21796 	if (status == 0) {
21797 		SD_DUMP_MEMORY(un, SD_LOG_IO,
21798 		    "sd_send_scsi_GET_CONFIGURATION: data",
21799 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
21800 	}
21801 
21802 	SD_TRACE(SD_LOG_IO, un,
21803 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
21804 
21805 	return (status);
21806 }
21807 
21808 /*
21809  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
21810  *
21811  * Description: Issues the get configuration command to the device to
21812  *              retrieve a specific feature. Called from
21813  *		sd_check_for_writable_cd & sd_set_mmc_caps.
21814  *   Arguments: ssc
21815  *              ucmdbuf
21816  *              rqbuf
21817  *              rqbuflen
21818  *              bufaddr
21819  *              buflen
21820  *		feature
21821  *
21822  * Return Code: 0   - Success
21823  *              errno return code from sd_ssc_send()
21824  *
21825  *     Context: Can sleep. Does not return until command is completed.
21826  *
21827  */
21828 static int
21829 sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc, struct uscsi_cmd *ucmdbuf,
21830     uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
21831     char feature, int path_flag)
21832 {
21833 	char    cdb[CDB_GROUP1];
21834 	int	status;
21835 	struct sd_lun	*un;
21836 
21837 	ASSERT(ssc != NULL);
21838 	un = ssc->ssc_un;
21839 	ASSERT(un != NULL);
21840 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21841 	ASSERT(bufaddr != NULL);
21842 	ASSERT(ucmdbuf != NULL);
21843 	ASSERT(rqbuf != NULL);
21844 
21845 	SD_TRACE(SD_LOG_IO, un,
21846 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
21847 
21848 	bzero(cdb, sizeof (cdb));
21849 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
21850 	bzero(rqbuf, rqbuflen);
21851 	bzero(bufaddr, buflen);
21852 
21853 	/*
21854 	 * Set up cdb field for the get configuration command.
21855 	 */
21856 	cdb[0] = SCMD_GET_CONFIGURATION;
21857 	cdb[1] = 0x02;  /* Requested Type */
21858 	cdb[3] = feature;
21859 	cdb[8] = buflen;
21860 	ucmdbuf->uscsi_cdb = cdb;
21861 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
21862 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
21863 	ucmdbuf->uscsi_buflen = buflen;
21864 	ucmdbuf->uscsi_timeout = sd_io_time;
21865 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
21866 	ucmdbuf->uscsi_rqlen = rqbuflen;
21867 	ucmdbuf->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT | USCSI_READ;
21868 
21869 	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
21870 	    UIO_SYSSPACE, path_flag);
21871 
21872 	switch (status) {
21873 	case 0:
21874 
21875 		break;  /* Success! */
21876 	case EIO:
21877 		switch (ucmdbuf->uscsi_status) {
21878 		case STATUS_RESERVATION_CONFLICT:
21879 			status = EACCES;
21880 			break;
21881 		default:
21882 			break;
21883 		}
21884 		break;
21885 	default:
21886 		break;
21887 	}
21888 
21889 	if (status == 0) {
21890 		SD_DUMP_MEMORY(un, SD_LOG_IO,
21891 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
21892 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
21893 	}
21894 
21895 	SD_TRACE(SD_LOG_IO, un,
21896 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
21897 
21898 	return (status);
21899 }
21900 
21901 
21902 /*
21903  *    Function: sd_send_scsi_MODE_SENSE
21904  *
21905  * Description: Utility function for issuing a scsi MODE SENSE command.
21906  *		Note: This routine uses a consistent implementation for Group0,
21907  *		Group1, and Group2 commands across all platforms. ATAPI devices
21908  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
21909  *
21910  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21911  *                      structure for this target.
21912  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
21913  *			  CDB_GROUP[1|2] (10 byte).
21914  *		bufaddr - buffer for page data retrieved from the target.
21915  *		buflen - size of page to be retrieved.
21916  *		page_code - page code of data to be retrieved from the target.
21917  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21918  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21919  *			to use the USCSI "direct" chain and bypass the normal
21920  *			command waitq.
21921  *
21922  * Return Code: 0   - Success
21923  *		errno return code from sd_ssc_send()
21924  *
21925  *     Context: Can sleep. Does not return until command is completed.
21926  */
21927 
21928 static int
21929 sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
21930     size_t buflen,  uchar_t page_code, int path_flag)
21931 {
21932 	struct	scsi_extended_sense	sense_buf;
21933 	union scsi_cdb		cdb;
21934 	struct uscsi_cmd	ucmd_buf;
21935 	int			status;
21936 	int			headlen;
21937 	struct sd_lun		*un;
21938 
21939 	ASSERT(ssc != NULL);
21940 	un = ssc->ssc_un;
21941 	ASSERT(un != NULL);
21942 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21943 	ASSERT(bufaddr != NULL);
21944 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
21945 	    (cdbsize == CDB_GROUP2));
21946 
21947 	SD_TRACE(SD_LOG_IO, un,
21948 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
21949 
21950 	bzero(&cdb, sizeof (cdb));
21951 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21952 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21953 	bzero(bufaddr, buflen);
21954 
21955 	if (cdbsize == CDB_GROUP0) {
21956 		cdb.scc_cmd = SCMD_MODE_SENSE;
21957 		cdb.cdb_opaque[2] = page_code;
21958 		FORMG0COUNT(&cdb, buflen);
21959 		headlen = MODE_HEADER_LENGTH;
21960 	} else {
21961 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
21962 		cdb.cdb_opaque[2] = page_code;
21963 		FORMG1COUNT(&cdb, buflen);
21964 		headlen = MODE_HEADER_LENGTH_GRP2;
21965 	}
21966 
21967 	ASSERT(headlen <= buflen);
21968 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21969 
21970 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21971 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21972 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21973 	ucmd_buf.uscsi_buflen	= buflen;
21974 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21975 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21976 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21977 	ucmd_buf.uscsi_timeout	= 60;
21978 
21979 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21980 	    UIO_SYSSPACE, path_flag);
21981 
21982 	switch (status) {
21983 	case 0:
21984 		/*
21985 		 * sr_check_wp() uses 0x3f page code and check the header of
21986 		 * mode page to determine if target device is write-protected.
21987 		 * But some USB devices return 0 bytes for 0x3f page code. For
21988 		 * this case, make sure that mode page header is returned at
21989 		 * least.
21990 		 */
21991 		if (buflen - ucmd_buf.uscsi_resid <  headlen) {
21992 			status = EIO;
21993 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
21994 			    "mode page header is not returned");
21995 		}
21996 		break;	/* Success! */
21997 	case EIO:
21998 		switch (ucmd_buf.uscsi_status) {
21999 		case STATUS_RESERVATION_CONFLICT:
22000 			status = EACCES;
22001 			break;
22002 		default:
22003 			break;
22004 		}
22005 		break;
22006 	default:
22007 		break;
22008 	}
22009 
22010 	if (status == 0) {
22011 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
22012 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
22013 	}
22014 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
22015 
22016 	return (status);
22017 }
22018 
22019 
22020 /*
22021  *    Function: sd_send_scsi_MODE_SELECT
22022  *
22023  * Description: Utility function for issuing a scsi MODE SELECT command.
22024  *		Note: This routine uses a consistent implementation for Group0,
22025  *		Group1, and Group2 commands across all platforms. ATAPI devices
22026  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
22027  *
22028  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
22029  *                      structure for this target.
22030  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
22031  *			  CDB_GROUP[1|2] (10 byte).
22032  *		bufaddr - buffer for page data retrieved from the target.
22033  *		buflen - size of page to be retrieved.
22034  *		save_page - boolean to determin if SP bit should be set.
22035  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
22036  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
22037  *			to use the USCSI "direct" chain and bypass the normal
22038  *			command waitq.
22039  *
22040  * Return Code: 0   - Success
22041  *		errno return code from sd_ssc_send()
22042  *
22043  *     Context: Can sleep. Does not return until command is completed.
22044  */
22045 
22046 static int
22047 sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
22048     size_t buflen,  uchar_t save_page, int path_flag)
22049 {
22050 	struct	scsi_extended_sense	sense_buf;
22051 	union scsi_cdb		cdb;
22052 	struct uscsi_cmd	ucmd_buf;
22053 	int			status;
22054 	struct sd_lun		*un;
22055 
22056 	ASSERT(ssc != NULL);
22057 	un = ssc->ssc_un;
22058 	ASSERT(un != NULL);
22059 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22060 	ASSERT(bufaddr != NULL);
22061 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
22062 	    (cdbsize == CDB_GROUP2));
22063 
22064 	SD_TRACE(SD_LOG_IO, un,
22065 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
22066 
22067 	bzero(&cdb, sizeof (cdb));
22068 	bzero(&ucmd_buf, sizeof (ucmd_buf));
22069 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
22070 
22071 	/* Set the PF bit for many third party drives */
22072 	cdb.cdb_opaque[1] = 0x10;
22073 
22074 	/* Set the savepage(SP) bit if given */
22075 	if (save_page == SD_SAVE_PAGE) {
22076 		cdb.cdb_opaque[1] |= 0x01;
22077 	}
22078 
22079 	if (cdbsize == CDB_GROUP0) {
22080 		cdb.scc_cmd = SCMD_MODE_SELECT;
22081 		FORMG0COUNT(&cdb, buflen);
22082 	} else {
22083 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
22084 		FORMG1COUNT(&cdb, buflen);
22085 	}
22086 
22087 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
22088 
22089 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
22090 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
22091 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
22092 	ucmd_buf.uscsi_buflen	= buflen;
22093 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
22094 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
22095 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
22096 	ucmd_buf.uscsi_timeout	= 60;
22097 
22098 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
22099 	    UIO_SYSSPACE, path_flag);
22100 
22101 	switch (status) {
22102 	case 0:
22103 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22104 		break;	/* Success! */
22105 	case EIO:
22106 		switch (ucmd_buf.uscsi_status) {
22107 		case STATUS_RESERVATION_CONFLICT:
22108 			status = EACCES;
22109 			break;
22110 		default:
22111 			break;
22112 		}
22113 		break;
22114 	default:
22115 		break;
22116 	}
22117 
22118 	if (status == 0) {
22119 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
22120 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
22121 	}
22122 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
22123 
22124 	return (status);
22125 }
22126 
22127 
22128 /*
22129  *    Function: sd_send_scsi_RDWR
22130  *
22131  * Description: Issue a scsi READ or WRITE command with the given parameters.
22132  *
22133  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
22134  *                      structure for this target.
22135  *		cmd:	 SCMD_READ or SCMD_WRITE
22136  *		bufaddr: Address of caller's buffer to receive the RDWR data
22137  *		buflen:  Length of caller's buffer receive the RDWR data.
22138  *		start_block: Block number for the start of the RDWR operation.
22139  *			 (Assumes target-native block size.)
22140  *		residp:  Pointer to variable to receive the redisual of the
22141  *			 RDWR operation (may be NULL of no residual requested).
22142  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
22143  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
22144  *			to use the USCSI "direct" chain and bypass the normal
22145  *			command waitq.
22146  *
22147  * Return Code: 0   - Success
22148  *		errno return code from sd_ssc_send()
22149  *
22150  *     Context: Can sleep. Does not return until command is completed.
22151  */
22152 
22153 static int
22154 sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
22155     size_t buflen, daddr_t start_block, int path_flag)
22156 {
22157 	struct	scsi_extended_sense	sense_buf;
22158 	union scsi_cdb		cdb;
22159 	struct uscsi_cmd	ucmd_buf;
22160 	uint32_t		block_count;
22161 	int			status;
22162 	int			cdbsize;
22163 	uchar_t			flag;
22164 	struct sd_lun		*un;
22165 
22166 	ASSERT(ssc != NULL);
22167 	un = ssc->ssc_un;
22168 	ASSERT(un != NULL);
22169 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22170 	ASSERT(bufaddr != NULL);
22171 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
22172 
22173 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
22174 
22175 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
22176 		return (EINVAL);
22177 	}
22178 
22179 	mutex_enter(SD_MUTEX(un));
22180 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
22181 	mutex_exit(SD_MUTEX(un));
22182 
22183 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
22184 
22185 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
22186 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
22187 	    bufaddr, buflen, start_block, block_count);
22188 
22189 	bzero(&cdb, sizeof (cdb));
22190 	bzero(&ucmd_buf, sizeof (ucmd_buf));
22191 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
22192 
22193 	/* Compute CDB size to use */
22194 	if (start_block > 0xffffffff)
22195 		cdbsize = CDB_GROUP4;
22196 	else if ((start_block & 0xFFE00000) ||
22197 	    (un->un_f_cfg_is_atapi == TRUE))
22198 		cdbsize = CDB_GROUP1;
22199 	else
22200 		cdbsize = CDB_GROUP0;
22201 
22202 	switch (cdbsize) {
22203 	case CDB_GROUP0:	/* 6-byte CDBs */
22204 		cdb.scc_cmd = cmd;
22205 		FORMG0ADDR(&cdb, start_block);
22206 		FORMG0COUNT(&cdb, block_count);
22207 		break;
22208 	case CDB_GROUP1:	/* 10-byte CDBs */
22209 		cdb.scc_cmd = cmd | SCMD_GROUP1;
22210 		FORMG1ADDR(&cdb, start_block);
22211 		FORMG1COUNT(&cdb, block_count);
22212 		break;
22213 	case CDB_GROUP4:	/* 16-byte CDBs */
22214 		cdb.scc_cmd = cmd | SCMD_GROUP4;
22215 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
22216 		FORMG4COUNT(&cdb, block_count);
22217 		break;
22218 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
22219 	default:
22220 		/* All others reserved */
22221 		return (EINVAL);
22222 	}
22223 
22224 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
22225 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
22226 
22227 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
22228 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
22229 	ucmd_buf.uscsi_bufaddr	= bufaddr;
22230 	ucmd_buf.uscsi_buflen	= buflen;
22231 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
22232 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
22233 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
22234 	ucmd_buf.uscsi_timeout	= 60;
22235 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
22236 	    UIO_SYSSPACE, path_flag);
22237 
22238 	switch (status) {
22239 	case 0:
22240 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22241 		break;	/* Success! */
22242 	case EIO:
22243 		switch (ucmd_buf.uscsi_status) {
22244 		case STATUS_RESERVATION_CONFLICT:
22245 			status = EACCES;
22246 			break;
22247 		default:
22248 			break;
22249 		}
22250 		break;
22251 	default:
22252 		break;
22253 	}
22254 
22255 	if (status == 0) {
22256 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
22257 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
22258 	}
22259 
22260 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
22261 
22262 	return (status);
22263 }
22264 
22265 
22266 /*
22267  *    Function: sd_send_scsi_LOG_SENSE
22268  *
22269  * Description: Issue a scsi LOG_SENSE command with the given parameters.
22270  *
22271  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
22272  *                      structure for this target.
22273  *
22274  * Return Code: 0   - Success
22275  *		errno return code from sd_ssc_send()
22276  *
22277  *     Context: Can sleep. Does not return until command is completed.
22278  */
22279 
22280 static int
22281 sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr, uint16_t buflen,
22282     uchar_t page_code, uchar_t page_control, uint16_t param_ptr, int path_flag)
22283 {
22284 	struct scsi_extended_sense	sense_buf;
22285 	union scsi_cdb		cdb;
22286 	struct uscsi_cmd	ucmd_buf;
22287 	int			status;
22288 	struct sd_lun		*un;
22289 
22290 	ASSERT(ssc != NULL);
22291 	un = ssc->ssc_un;
22292 	ASSERT(un != NULL);
22293 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22294 
22295 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
22296 
22297 	bzero(&cdb, sizeof (cdb));
22298 	bzero(&ucmd_buf, sizeof (ucmd_buf));
22299 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
22300 
22301 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
22302 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
22303 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
22304 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
22305 	FORMG1COUNT(&cdb, buflen);
22306 
22307 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
22308 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
22309 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
22310 	ucmd_buf.uscsi_buflen	= buflen;
22311 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
22312 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
22313 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
22314 	ucmd_buf.uscsi_timeout	= 60;
22315 
22316 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
22317 	    UIO_SYSSPACE, path_flag);
22318 
22319 	switch (status) {
22320 	case 0:
22321 		break;
22322 	case EIO:
22323 		switch (ucmd_buf.uscsi_status) {
22324 		case STATUS_RESERVATION_CONFLICT:
22325 			status = EACCES;
22326 			break;
22327 		case STATUS_CHECK:
22328 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
22329 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
22330 			    KEY_ILLEGAL_REQUEST) &&
22331 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
22332 				/*
22333 				 * ASC 0x24: INVALID FIELD IN CDB
22334 				 */
22335 				switch (page_code) {
22336 				case START_STOP_CYCLE_PAGE:
22337 					/*
22338 					 * The start stop cycle counter is
22339 					 * implemented as page 0x31 in earlier
22340 					 * generation disks. In new generation
22341 					 * disks the start stop cycle counter is
22342 					 * implemented as page 0xE. To properly
22343 					 * handle this case if an attempt for
22344 					 * log page 0xE is made and fails we
22345 					 * will try again using page 0x31.
22346 					 *
22347 					 * Network storage BU committed to
22348 					 * maintain the page 0x31 for this
22349 					 * purpose and will not have any other
22350 					 * page implemented with page code 0x31
22351 					 * until all disks transition to the
22352 					 * standard page.
22353 					 */
22354 					mutex_enter(SD_MUTEX(un));
22355 					un->un_start_stop_cycle_page =
22356 					    START_STOP_CYCLE_VU_PAGE;
22357 					cdb.cdb_opaque[2] =
22358 					    (char)(page_control << 6) |
22359 					    un->un_start_stop_cycle_page;
22360 					mutex_exit(SD_MUTEX(un));
22361 					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22362 					status = sd_ssc_send(
22363 					    ssc, &ucmd_buf, FKIOCTL,
22364 					    UIO_SYSSPACE, path_flag);
22365 
22366 					break;
22367 				case TEMPERATURE_PAGE:
22368 					status = ENOTTY;
22369 					break;
22370 				default:
22371 					break;
22372 				}
22373 			}
22374 			break;
22375 		default:
22376 			break;
22377 		}
22378 		break;
22379 	default:
22380 		break;
22381 	}
22382 
22383 	if (status == 0) {
22384 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22385 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
22386 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
22387 	}
22388 
22389 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
22390 
22391 	return (status);
22392 }
22393 
22394 
22395 /*
22396  *    Function: sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION
22397  *
22398  * Description: Issue the scsi GET EVENT STATUS NOTIFICATION command.
22399  *
22400  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
22401  *                      structure for this target.
22402  *		bufaddr
22403  *		buflen
22404  *		class_req
22405  *
22406  * Return Code: 0   - Success
22407  *		errno return code from sd_ssc_send()
22408  *
22409  *     Context: Can sleep. Does not return until command is completed.
22410  */
22411 
22412 static int
22413 sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(sd_ssc_t *ssc, uchar_t *bufaddr,
22414     size_t buflen, uchar_t class_req)
22415 {
22416 	union scsi_cdb		cdb;
22417 	struct uscsi_cmd	ucmd_buf;
22418 	int			status;
22419 	struct sd_lun		*un;
22420 
22421 	ASSERT(ssc != NULL);
22422 	un = ssc->ssc_un;
22423 	ASSERT(un != NULL);
22424 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22425 	ASSERT(bufaddr != NULL);
22426 
22427 	SD_TRACE(SD_LOG_IO, un,
22428 	    "sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION: entry: un:0x%p\n", un);
22429 
22430 	bzero(&cdb, sizeof (cdb));
22431 	bzero(&ucmd_buf, sizeof (ucmd_buf));
22432 	bzero(bufaddr, buflen);
22433 
22434 	cdb.scc_cmd = SCMD_GET_EVENT_STATUS_NOTIFICATION;
22435 	cdb.cdb_opaque[1] = 1; /* polled */
22436 	cdb.cdb_opaque[4] = class_req;
22437 	FORMG1COUNT(&cdb, buflen);
22438 
22439 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
22440 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
22441 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
22442 	ucmd_buf.uscsi_buflen	= buflen;
22443 	ucmd_buf.uscsi_rqbuf	= NULL;
22444 	ucmd_buf.uscsi_rqlen	= 0;
22445 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
22446 	ucmd_buf.uscsi_timeout	= 60;
22447 
22448 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
22449 	    UIO_SYSSPACE, SD_PATH_DIRECT);
22450 
22451 	/*
22452 	 * Only handle status == 0, the upper-level caller
22453 	 * will put different assessment based on the context.
22454 	 */
22455 	if (status == 0) {
22456 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22457 
22458 		if (ucmd_buf.uscsi_resid != 0) {
22459 			status = EIO;
22460 		}
22461 	}
22462 
22463 	SD_TRACE(SD_LOG_IO, un,
22464 	    "sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION: exit\n");
22465 
22466 	return (status);
22467 }
22468 
22469 
22470 static boolean_t
22471 sd_gesn_media_data_valid(uchar_t *data)
22472 {
22473 	uint16_t			len;
22474 
22475 	len = (data[1] << 8) | data[0];
22476 	return ((len >= 6) &&
22477 	    ((data[2] & SD_GESN_HEADER_NEA) == 0) &&
22478 	    ((data[2] & SD_GESN_HEADER_CLASS) == SD_GESN_MEDIA_CLASS) &&
22479 	    ((data[3] & (1 << SD_GESN_MEDIA_CLASS)) != 0));
22480 }
22481 
22482 
22483 /*
22484  *    Function: sdioctl
22485  *
22486  * Description: Driver's ioctl(9e) entry point function.
22487  *
22488  *   Arguments: dev     - device number
22489  *		cmd     - ioctl operation to be performed
22490  *		arg     - user argument, contains data to be set or reference
22491  *			  parameter for get
22492  *		flag    - bit flag, indicating open settings, 32/64 bit type
22493  *		cred_p  - user credential pointer
22494  *		rval_p  - calling process return value (OPT)
22495  *
22496  * Return Code: EINVAL
22497  *		ENOTTY
22498  *		ENXIO
22499  *		EIO
22500  *		EFAULT
22501  *		ENOTSUP
22502  *		EPERM
22503  *
22504  *     Context: Called from the device switch at normal priority.
22505  */
22506 
22507 static int
22508 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
22509 {
22510 	struct sd_lun	*un = NULL;
22511 	int		err = 0;
22512 	int		i = 0;
22513 	cred_t		*cr;
22514 	int		tmprval = EINVAL;
22515 	boolean_t	is_valid;
22516 	sd_ssc_t	*ssc;
22517 
22518 	/*
22519 	 * All device accesses go thru sdstrategy where we check on suspend
22520 	 * status
22521 	 */
22522 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22523 		return (ENXIO);
22524 	}
22525 
22526 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22527 
22528 	/* Initialize sd_ssc_t for internal uscsi commands */
22529 	ssc = sd_ssc_init(un);
22530 
22531 	is_valid = SD_IS_VALID_LABEL(un);
22532 
22533 	/*
22534 	 * Moved this wait from sd_uscsi_strategy to here for
22535 	 * reasons of deadlock prevention. Internal driver commands,
22536 	 * specifically those to change a devices power level, result
22537 	 * in a call to sd_uscsi_strategy.
22538 	 */
22539 	mutex_enter(SD_MUTEX(un));
22540 	while ((un->un_state == SD_STATE_SUSPENDED) ||
22541 	    (un->un_state == SD_STATE_PM_CHANGING)) {
22542 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
22543 	}
22544 	/*
22545 	 * Twiddling the counter here protects commands from now
22546 	 * through to the top of sd_uscsi_strategy. Without the
22547 	 * counter inc. a power down, for example, could get in
22548 	 * after the above check for state is made and before
22549 	 * execution gets to the top of sd_uscsi_strategy.
22550 	 * That would cause problems.
22551 	 */
22552 	un->un_ncmds_in_driver++;
22553 
22554 	if (!is_valid &&
22555 	    (flag & (FNDELAY | FNONBLOCK))) {
22556 		switch (cmd) {
22557 		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
22558 		case DKIOCGVTOC:
22559 		case DKIOCGEXTVTOC:
22560 		case DKIOCGAPART:
22561 		case DKIOCPARTINFO:
22562 		case DKIOCEXTPARTINFO:
22563 		case DKIOCSGEOM:
22564 		case DKIOCSAPART:
22565 		case DKIOCGETEFI:
22566 		case DKIOCPARTITION:
22567 		case DKIOCSVTOC:
22568 		case DKIOCSEXTVTOC:
22569 		case DKIOCSETEFI:
22570 		case DKIOCGMBOOT:
22571 		case DKIOCSMBOOT:
22572 		case DKIOCG_PHYGEOM:
22573 		case DKIOCG_VIRTGEOM:
22574 #if defined(__x86)
22575 		case DKIOCSETEXTPART:
22576 #endif
22577 			/* let cmlb handle it */
22578 			goto skip_ready_valid;
22579 
22580 		case CDROMPAUSE:
22581 		case CDROMRESUME:
22582 		case CDROMPLAYMSF:
22583 		case CDROMPLAYTRKIND:
22584 		case CDROMREADTOCHDR:
22585 		case CDROMREADTOCENTRY:
22586 		case CDROMSTOP:
22587 		case CDROMSTART:
22588 		case CDROMVOLCTRL:
22589 		case CDROMSUBCHNL:
22590 		case CDROMREADMODE2:
22591 		case CDROMREADMODE1:
22592 		case CDROMREADOFFSET:
22593 		case CDROMSBLKMODE:
22594 		case CDROMGBLKMODE:
22595 		case CDROMGDRVSPEED:
22596 		case CDROMSDRVSPEED:
22597 		case CDROMCDDA:
22598 		case CDROMCDXA:
22599 		case CDROMSUBCODE:
22600 			if (!ISCD(un)) {
22601 				un->un_ncmds_in_driver--;
22602 				ASSERT(un->un_ncmds_in_driver >= 0);
22603 				mutex_exit(SD_MUTEX(un));
22604 				err = ENOTTY;
22605 				goto done_without_assess;
22606 			}
22607 			break;
22608 		case FDEJECT:
22609 		case DKIOCEJECT:
22610 		case CDROMEJECT:
22611 			if (!un->un_f_eject_media_supported) {
22612 				un->un_ncmds_in_driver--;
22613 				ASSERT(un->un_ncmds_in_driver >= 0);
22614 				mutex_exit(SD_MUTEX(un));
22615 				err = ENOTTY;
22616 				goto done_without_assess;
22617 			}
22618 			break;
22619 		case DKIOCFLUSHWRITECACHE:
22620 			mutex_exit(SD_MUTEX(un));
22621 			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
22622 			if (err != 0) {
22623 				mutex_enter(SD_MUTEX(un));
22624 				un->un_ncmds_in_driver--;
22625 				ASSERT(un->un_ncmds_in_driver >= 0);
22626 				mutex_exit(SD_MUTEX(un));
22627 				err = EIO;
22628 				goto done_quick_assess;
22629 			}
22630 			mutex_enter(SD_MUTEX(un));
22631 			/* FALLTHROUGH */
22632 		case DKIOCREMOVABLE:
22633 		case DKIOCHOTPLUGGABLE:
22634 		case DKIOCINFO:
22635 		case DKIOCGMEDIAINFO:
22636 		case DKIOCGMEDIAINFOEXT:
22637 		case DKIOCSOLIDSTATE:
22638 		case DKIOC_CANFREE:
22639 		case MHIOCENFAILFAST:
22640 		case MHIOCSTATUS:
22641 		case MHIOCTKOWN:
22642 		case MHIOCRELEASE:
22643 		case MHIOCGRP_INKEYS:
22644 		case MHIOCGRP_INRESV:
22645 		case MHIOCGRP_REGISTER:
22646 		case MHIOCGRP_CLEAR:
22647 		case MHIOCGRP_RESERVE:
22648 		case MHIOCGRP_PREEMPTANDABORT:
22649 		case MHIOCGRP_REGISTERANDIGNOREKEY:
22650 		case CDROMCLOSETRAY:
22651 		case USCSICMD:
22652 		case USCSIMAXXFER:
22653 			goto skip_ready_valid;
22654 		default:
22655 			break;
22656 		}
22657 
22658 		mutex_exit(SD_MUTEX(un));
22659 		err = sd_ready_and_valid(ssc, SDPART(dev));
22660 		mutex_enter(SD_MUTEX(un));
22661 
22662 		if (err != SD_READY_VALID) {
22663 			switch (cmd) {
22664 			case DKIOCSTATE:
22665 			case CDROMGDRVSPEED:
22666 			case CDROMSDRVSPEED:
22667 			case FDEJECT:	/* for eject command */
22668 			case DKIOCEJECT:
22669 			case CDROMEJECT:
22670 			case DKIOCREMOVABLE:
22671 			case DKIOCHOTPLUGGABLE:
22672 				break;
22673 			default:
22674 				if (un->un_f_has_removable_media) {
22675 					err = ENXIO;
22676 				} else {
22677 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
22678 					if (err == SD_RESERVED_BY_OTHERS) {
22679 						err = EACCES;
22680 					} else {
22681 						err = EIO;
22682 					}
22683 				}
22684 				un->un_ncmds_in_driver--;
22685 				ASSERT(un->un_ncmds_in_driver >= 0);
22686 				mutex_exit(SD_MUTEX(un));
22687 
22688 				goto done_without_assess;
22689 			}
22690 		}
22691 	}
22692 
22693 skip_ready_valid:
22694 	mutex_exit(SD_MUTEX(un));
22695 
22696 	switch (cmd) {
22697 	case DKIOCINFO:
22698 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
22699 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
22700 		break;
22701 
22702 	case DKIOCGMEDIAINFO:
22703 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
22704 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
22705 		break;
22706 
22707 	case DKIOCGMEDIAINFOEXT:
22708 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFOEXT\n");
22709 		err = sd_get_media_info_ext(dev, (caddr_t)arg, flag);
22710 		break;
22711 
22712 	case DKIOCGGEOM:
22713 	case DKIOCGVTOC:
22714 	case DKIOCGEXTVTOC:
22715 	case DKIOCGAPART:
22716 	case DKIOCPARTINFO:
22717 	case DKIOCEXTPARTINFO:
22718 	case DKIOCSGEOM:
22719 	case DKIOCSAPART:
22720 	case DKIOCGETEFI:
22721 	case DKIOCPARTITION:
22722 	case DKIOCSVTOC:
22723 	case DKIOCSEXTVTOC:
22724 	case DKIOCSETEFI:
22725 	case DKIOCGMBOOT:
22726 	case DKIOCSMBOOT:
22727 	case DKIOCG_PHYGEOM:
22728 	case DKIOCG_VIRTGEOM:
22729 #if defined(__x86)
22730 	case DKIOCSETEXTPART:
22731 #endif
22732 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
22733 
22734 		/* TUR should spin up */
22735 
22736 		if (un->un_f_has_removable_media)
22737 			err = sd_send_scsi_TEST_UNIT_READY(ssc,
22738 			    SD_CHECK_FOR_MEDIA);
22739 
22740 		else
22741 			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
22742 
22743 		if (err != 0)
22744 			goto done_with_assess;
22745 
22746 		err = cmlb_ioctl(un->un_cmlbhandle, dev,
22747 		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
22748 
22749 		if ((err == 0) &&
22750 		    ((cmd == DKIOCSETEFI) ||
22751 		    ((un->un_f_pkstats_enabled) &&
22752 		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC ||
22753 		    cmd == DKIOCSEXTVTOC)))) {
22754 
22755 			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
22756 			    (void *)SD_PATH_DIRECT);
22757 			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
22758 				sd_set_pstats(un);
22759 				SD_TRACE(SD_LOG_IO_PARTITION, un,
22760 				    "sd_ioctl: un:0x%p pstats created and "
22761 				    "set\n", un);
22762 			}
22763 		}
22764 
22765 		if ((cmd == DKIOCSVTOC || cmd == DKIOCSEXTVTOC) ||
22766 		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
22767 
22768 			mutex_enter(SD_MUTEX(un));
22769 			if (un->un_f_devid_supported &&
22770 			    (un->un_f_opt_fab_devid == TRUE)) {
22771 				if (un->un_devid == NULL) {
22772 					sd_register_devid(ssc, SD_DEVINFO(un),
22773 					    SD_TARGET_IS_UNRESERVED);
22774 				} else {
22775 					/*
22776 					 * The device id for this disk
22777 					 * has been fabricated. The
22778 					 * device id must be preserved
22779 					 * by writing it back out to
22780 					 * disk.
22781 					 */
22782 					if (sd_write_deviceid(ssc) != 0) {
22783 						ddi_devid_free(un->un_devid);
22784 						un->un_devid = NULL;
22785 					}
22786 				}
22787 			}
22788 			mutex_exit(SD_MUTEX(un));
22789 		}
22790 
22791 		break;
22792 
22793 	case DKIOCLOCK:
22794 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
22795 		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
22796 		    SD_PATH_STANDARD);
22797 		goto done_with_assess;
22798 
22799 	case DKIOCUNLOCK:
22800 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
22801 		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
22802 		    SD_PATH_STANDARD);
22803 		goto done_with_assess;
22804 
22805 	case DKIOCSTATE: {
22806 		enum dkio_state		state;
22807 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
22808 
22809 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
22810 			err = EFAULT;
22811 		} else {
22812 			err = sd_check_media(dev, state);
22813 			if (err == 0) {
22814 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
22815 				    sizeof (int), flag) != 0)
22816 					err = EFAULT;
22817 			}
22818 		}
22819 		break;
22820 	}
22821 
22822 	case DKIOCREMOVABLE:
22823 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
22824 		i = un->un_f_has_removable_media ? 1 : 0;
22825 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22826 			err = EFAULT;
22827 		} else {
22828 			err = 0;
22829 		}
22830 		break;
22831 
22832 	case DKIOCSOLIDSTATE:
22833 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSOLIDSTATE\n");
22834 		i = un->un_f_is_solid_state ? 1 : 0;
22835 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22836 			err = EFAULT;
22837 		} else {
22838 			err = 0;
22839 		}
22840 		break;
22841 
22842 	case DKIOCHOTPLUGGABLE:
22843 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
22844 		i = un->un_f_is_hotpluggable ? 1 : 0;
22845 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22846 			err = EFAULT;
22847 		} else {
22848 			err = 0;
22849 		}
22850 		break;
22851 
22852 	case DKIOCREADONLY:
22853 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREADONLY\n");
22854 		i = 0;
22855 		if ((ISCD(un) && !un->un_f_mmc_writable_media) ||
22856 		    (sr_check_wp(dev) != 0)) {
22857 			i = 1;
22858 		}
22859 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22860 			err = EFAULT;
22861 		} else {
22862 			err = 0;
22863 		}
22864 		break;
22865 
22866 	case DKIOCGTEMPERATURE:
22867 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
22868 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
22869 		break;
22870 
22871 	case MHIOCENFAILFAST:
22872 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
22873 		if ((err = drv_priv(cred_p)) == 0) {
22874 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
22875 		}
22876 		break;
22877 
22878 	case MHIOCTKOWN:
22879 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
22880 		if ((err = drv_priv(cred_p)) == 0) {
22881 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
22882 		}
22883 		break;
22884 
22885 	case MHIOCRELEASE:
22886 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
22887 		if ((err = drv_priv(cred_p)) == 0) {
22888 			err = sd_mhdioc_release(dev);
22889 		}
22890 		break;
22891 
22892 	case MHIOCSTATUS:
22893 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
22894 		if ((err = drv_priv(cred_p)) == 0) {
22895 			switch (sd_send_scsi_TEST_UNIT_READY(ssc, 0)) {
22896 			case 0:
22897 				err = 0;
22898 				break;
22899 			case EACCES:
22900 				*rval_p = 1;
22901 				err = 0;
22902 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22903 				break;
22904 			default:
22905 				err = EIO;
22906 				goto done_with_assess;
22907 			}
22908 		}
22909 		break;
22910 
22911 	case MHIOCQRESERVE:
22912 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
22913 		if ((err = drv_priv(cred_p)) == 0) {
22914 			err = sd_reserve_release(dev, SD_RESERVE);
22915 		}
22916 		break;
22917 
22918 	case MHIOCREREGISTERDEVID:
22919 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
22920 		if (drv_priv(cred_p) == EPERM) {
22921 			err = EPERM;
22922 		} else if (!un->un_f_devid_supported) {
22923 			err = ENOTTY;
22924 		} else {
22925 			err = sd_mhdioc_register_devid(dev);
22926 		}
22927 		break;
22928 
22929 	case MHIOCGRP_INKEYS:
22930 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
22931 		if (((err = drv_priv(cred_p)) != EPERM) &&
22932 		    arg != (intptr_t)NULL) {
22933 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22934 				err = ENOTSUP;
22935 			} else {
22936 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
22937 				    flag);
22938 			}
22939 		}
22940 		break;
22941 
22942 	case MHIOCGRP_INRESV:
22943 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
22944 		if (((err = drv_priv(cred_p)) != EPERM) &&
22945 		    arg != (intptr_t)NULL) {
22946 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22947 				err = ENOTSUP;
22948 			} else {
22949 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
22950 			}
22951 		}
22952 		break;
22953 
22954 	case MHIOCGRP_REGISTER:
22955 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
22956 		if ((err = drv_priv(cred_p)) != EPERM) {
22957 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22958 				err = ENOTSUP;
22959 			} else if (arg != (intptr_t)NULL) {
22960 				mhioc_register_t reg;
22961 				if (ddi_copyin((void *)arg, &reg,
22962 				    sizeof (mhioc_register_t), flag) != 0) {
22963 					err = EFAULT;
22964 				} else {
22965 					err =
22966 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22967 					    ssc, SD_SCSI3_REGISTER,
22968 					    (uchar_t *)&reg);
22969 					if (err != 0)
22970 						goto done_with_assess;
22971 				}
22972 			}
22973 		}
22974 		break;
22975 
22976 	case MHIOCGRP_CLEAR:
22977 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_CLEAR\n");
22978 		if ((err = drv_priv(cred_p)) != EPERM) {
22979 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22980 				err = ENOTSUP;
22981 			} else if (arg != (intptr_t)NULL) {
22982 				mhioc_register_t reg;
22983 				if (ddi_copyin((void *)arg, &reg,
22984 				    sizeof (mhioc_register_t), flag) != 0) {
22985 					err = EFAULT;
22986 				} else {
22987 					err =
22988 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22989 					    ssc, SD_SCSI3_CLEAR,
22990 					    (uchar_t *)&reg);
22991 					if (err != 0)
22992 						goto done_with_assess;
22993 				}
22994 			}
22995 		}
22996 		break;
22997 
22998 	case MHIOCGRP_RESERVE:
22999 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
23000 		if ((err = drv_priv(cred_p)) != EPERM) {
23001 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
23002 				err = ENOTSUP;
23003 			} else if (arg != (intptr_t)NULL) {
23004 				mhioc_resv_desc_t resv_desc;
23005 				if (ddi_copyin((void *)arg, &resv_desc,
23006 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
23007 					err = EFAULT;
23008 				} else {
23009 					err =
23010 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
23011 					    ssc, SD_SCSI3_RESERVE,
23012 					    (uchar_t *)&resv_desc);
23013 					if (err != 0)
23014 						goto done_with_assess;
23015 				}
23016 			}
23017 		}
23018 		break;
23019 
23020 	case MHIOCGRP_PREEMPTANDABORT:
23021 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
23022 		if ((err = drv_priv(cred_p)) != EPERM) {
23023 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
23024 				err = ENOTSUP;
23025 			} else if (arg != (intptr_t)NULL) {
23026 				mhioc_preemptandabort_t preempt_abort;
23027 				if (ddi_copyin((void *)arg, &preempt_abort,
23028 				    sizeof (mhioc_preemptandabort_t),
23029 				    flag) != 0) {
23030 					err = EFAULT;
23031 				} else {
23032 					err =
23033 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
23034 					    ssc, SD_SCSI3_PREEMPTANDABORT,
23035 					    (uchar_t *)&preempt_abort);
23036 					if (err != 0)
23037 						goto done_with_assess;
23038 				}
23039 			}
23040 		}
23041 		break;
23042 
23043 	case MHIOCGRP_REGISTERANDIGNOREKEY:
23044 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTERANDIGNOREKEY\n");
23045 		if ((err = drv_priv(cred_p)) != EPERM) {
23046 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
23047 				err = ENOTSUP;
23048 			} else if (arg != (intptr_t)NULL) {
23049 				mhioc_registerandignorekey_t r_and_i;
23050 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
23051 				    sizeof (mhioc_registerandignorekey_t),
23052 				    flag) != 0) {
23053 					err = EFAULT;
23054 				} else {
23055 					err =
23056 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
23057 					    ssc, SD_SCSI3_REGISTERANDIGNOREKEY,
23058 					    (uchar_t *)&r_and_i);
23059 					if (err != 0)
23060 						goto done_with_assess;
23061 				}
23062 			}
23063 		}
23064 		break;
23065 
23066 	case USCSICMD:
23067 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
23068 		cr = ddi_get_cred();
23069 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
23070 			err = EPERM;
23071 		} else {
23072 			enum uio_seg	uioseg;
23073 
23074 			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
23075 			    UIO_USERSPACE;
23076 			if (un->un_f_format_in_progress == TRUE) {
23077 				err = EAGAIN;
23078 				break;
23079 			}
23080 
23081 			err = sd_ssc_send(ssc,
23082 			    (struct uscsi_cmd *)arg,
23083 			    flag, uioseg, SD_PATH_STANDARD);
23084 			if (err != 0)
23085 				goto done_with_assess;
23086 			else
23087 				sd_ssc_assessment(ssc, SD_FMT_STANDARD);
23088 		}
23089 		break;
23090 
23091 	case USCSIMAXXFER:
23092 		SD_TRACE(SD_LOG_IOCTL, un, "USCSIMAXXFER\n");
23093 		cr = ddi_get_cred();
23094 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
23095 			err = EPERM;
23096 		} else {
23097 			const uscsi_xfer_t xfer = un->un_max_xfer_size;
23098 
23099 			if (ddi_copyout(&xfer, (void *)arg, sizeof (xfer),
23100 			    flag) != 0) {
23101 				err = EFAULT;
23102 			} else {
23103 				err = 0;
23104 			}
23105 		}
23106 		break;
23107 
23108 	case CDROMPAUSE:
23109 	case CDROMRESUME:
23110 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
23111 		if (!ISCD(un)) {
23112 			err = ENOTTY;
23113 		} else {
23114 			err = sr_pause_resume(dev, cmd);
23115 		}
23116 		break;
23117 
23118 	case CDROMPLAYMSF:
23119 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
23120 		if (!ISCD(un)) {
23121 			err = ENOTTY;
23122 		} else {
23123 			err = sr_play_msf(dev, (caddr_t)arg, flag);
23124 		}
23125 		break;
23126 
23127 	case CDROMPLAYTRKIND:
23128 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
23129 #if defined(__x86)
23130 		/*
23131 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
23132 		 */
23133 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
23134 #else
23135 		if (!ISCD(un)) {
23136 #endif
23137 			err = ENOTTY;
23138 		} else {
23139 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
23140 		}
23141 		break;
23142 
23143 	case CDROMREADTOCHDR:
23144 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
23145 		if (!ISCD(un)) {
23146 			err = ENOTTY;
23147 		} else {
23148 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
23149 		}
23150 		break;
23151 
23152 	case CDROMREADTOCENTRY:
23153 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
23154 		if (!ISCD(un)) {
23155 			err = ENOTTY;
23156 		} else {
23157 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
23158 		}
23159 		break;
23160 
23161 	case CDROMSTOP:
23162 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
23163 		if (!ISCD(un)) {
23164 			err = ENOTTY;
23165 		} else {
23166 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
23167 			    SD_TARGET_STOP, SD_PATH_STANDARD);
23168 			goto done_with_assess;
23169 		}
23170 		break;
23171 
23172 	case CDROMSTART:
23173 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
23174 		if (!ISCD(un)) {
23175 			err = ENOTTY;
23176 		} else {
23177 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
23178 			    SD_TARGET_START, SD_PATH_STANDARD);
23179 			goto done_with_assess;
23180 		}
23181 		break;
23182 
23183 	case CDROMCLOSETRAY:
23184 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
23185 		if (!ISCD(un)) {
23186 			err = ENOTTY;
23187 		} else {
23188 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
23189 			    SD_TARGET_CLOSE, SD_PATH_STANDARD);
23190 			goto done_with_assess;
23191 		}
23192 		break;
23193 
23194 	case FDEJECT:	/* for eject command */
23195 	case DKIOCEJECT:
23196 	case CDROMEJECT:
23197 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
23198 		if (!un->un_f_eject_media_supported) {
23199 			err = ENOTTY;
23200 		} else {
23201 			err = sr_eject(dev);
23202 		}
23203 		break;
23204 
23205 	case CDROMVOLCTRL:
23206 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
23207 		if (!ISCD(un)) {
23208 			err = ENOTTY;
23209 		} else {
23210 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
23211 		}
23212 		break;
23213 
23214 	case CDROMSUBCHNL:
23215 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
23216 		if (!ISCD(un)) {
23217 			err = ENOTTY;
23218 		} else {
23219 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
23220 		}
23221 		break;
23222 
23223 	case CDROMREADMODE2:
23224 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
23225 		if (!ISCD(un)) {
23226 			err = ENOTTY;
23227 		} else if (un->un_f_cfg_is_atapi == TRUE) {
23228 			/*
23229 			 * If the drive supports READ CD, use that instead of
23230 			 * switching the LBA size via a MODE SELECT
23231 			 * Block Descriptor
23232 			 */
23233 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
23234 		} else {
23235 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
23236 		}
23237 		break;
23238 
23239 	case CDROMREADMODE1:
23240 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
23241 		if (!ISCD(un)) {
23242 			err = ENOTTY;
23243 		} else {
23244 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
23245 		}
23246 		break;
23247 
23248 	case CDROMREADOFFSET:
23249 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
23250 		if (!ISCD(un)) {
23251 			err = ENOTTY;
23252 		} else {
23253 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
23254 			    flag);
23255 		}
23256 		break;
23257 
23258 	case CDROMSBLKMODE:
23259 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
23260 		/*
23261 		 * There is no means of changing block size in case of atapi
23262 		 * drives, thus return ENOTTY if drive type is atapi
23263 		 */
23264 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
23265 			err = ENOTTY;
23266 		} else if (un->un_f_mmc_cap == TRUE) {
23267 
23268 			/*
23269 			 * MMC Devices do not support changing the
23270 			 * logical block size
23271 			 *
23272 			 * Note: EINVAL is being returned instead of ENOTTY to
23273 			 * maintain consistancy with the original mmc
23274 			 * driver update.
23275 			 */
23276 			err = EINVAL;
23277 		} else {
23278 			mutex_enter(SD_MUTEX(un));
23279 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
23280 			    (un->un_ncmds_in_transport > 0)) {
23281 				mutex_exit(SD_MUTEX(un));
23282 				err = EINVAL;
23283 			} else {
23284 				mutex_exit(SD_MUTEX(un));
23285 				err = sr_change_blkmode(dev, cmd, arg, flag);
23286 			}
23287 		}
23288 		break;
23289 
23290 	case CDROMGBLKMODE:
23291 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
23292 		if (!ISCD(un)) {
23293 			err = ENOTTY;
23294 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
23295 		    (un->un_f_blockcount_is_valid != FALSE)) {
23296 			/*
23297 			 * Drive is an ATAPI drive so return target block
23298 			 * size for ATAPI drives since we cannot change the
23299 			 * blocksize on ATAPI drives. Used primarily to detect
23300 			 * if an ATAPI cdrom is present.
23301 			 */
23302 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
23303 			    sizeof (int), flag) != 0) {
23304 				err = EFAULT;
23305 			} else {
23306 				err = 0;
23307 			}
23308 
23309 		} else {
23310 			/*
23311 			 * Drive supports changing block sizes via a Mode
23312 			 * Select.
23313 			 */
23314 			err = sr_change_blkmode(dev, cmd, arg, flag);
23315 		}
23316 		break;
23317 
23318 	case CDROMGDRVSPEED:
23319 	case CDROMSDRVSPEED:
23320 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
23321 		if (!ISCD(un)) {
23322 			err = ENOTTY;
23323 		} else if (un->un_f_mmc_cap == TRUE) {
23324 			/*
23325 			 * Note: In the future the driver implementation
23326 			 * for getting and
23327 			 * setting cd speed should entail:
23328 			 * 1) If non-mmc try the Toshiba mode page
23329 			 *    (sr_change_speed)
23330 			 * 2) If mmc but no support for Real Time Streaming try
23331 			 *    the SET CD SPEED (0xBB) command
23332 			 *   (sr_atapi_change_speed)
23333 			 * 3) If mmc and support for Real Time Streaming
23334 			 *    try the GET PERFORMANCE and SET STREAMING
23335 			 *    commands (not yet implemented, 4380808)
23336 			 */
23337 			/*
23338 			 * As per recent MMC spec, CD-ROM speed is variable
23339 			 * and changes with LBA. Since there is no such
23340 			 * things as drive speed now, fail this ioctl.
23341 			 *
23342 			 * Note: EINVAL is returned for consistancy of original
23343 			 * implementation which included support for getting
23344 			 * the drive speed of mmc devices but not setting
23345 			 * the drive speed. Thus EINVAL would be returned
23346 			 * if a set request was made for an mmc device.
23347 			 * We no longer support get or set speed for
23348 			 * mmc but need to remain consistent with regard
23349 			 * to the error code returned.
23350 			 */
23351 			err = EINVAL;
23352 		} else if (un->un_f_cfg_is_atapi == TRUE) {
23353 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
23354 		} else {
23355 			err = sr_change_speed(dev, cmd, arg, flag);
23356 		}
23357 		break;
23358 
23359 	case CDROMCDDA:
23360 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
23361 		if (!ISCD(un)) {
23362 			err = ENOTTY;
23363 		} else {
23364 			err = sr_read_cdda(dev, (void *)arg, flag);
23365 		}
23366 		break;
23367 
23368 	case CDROMCDXA:
23369 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
23370 		if (!ISCD(un)) {
23371 			err = ENOTTY;
23372 		} else {
23373 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
23374 		}
23375 		break;
23376 
23377 	case CDROMSUBCODE:
23378 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
23379 		if (!ISCD(un)) {
23380 			err = ENOTTY;
23381 		} else {
23382 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
23383 		}
23384 		break;
23385 
23386 
23387 #ifdef SDDEBUG
23388 /* RESET/ABORTS testing ioctls */
23389 	case DKIOCRESET: {
23390 		int	reset_level;
23391 
23392 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
23393 			err = EFAULT;
23394 		} else {
23395 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
23396 			    "reset_level = 0x%lx\n", reset_level);
23397 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
23398 				err = 0;
23399 			} else {
23400 				err = EIO;
23401 			}
23402 		}
23403 		break;
23404 	}
23405 
23406 	case DKIOCABORT:
23407 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
23408 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
23409 			err = 0;
23410 		} else {
23411 			err = EIO;
23412 		}
23413 		break;
23414 #endif
23415 
23416 #ifdef SD_FAULT_INJECTION
23417 /* SDIOC FaultInjection testing ioctls */
23418 	case SDIOCSTART:
23419 	case SDIOCSTOP:
23420 	case SDIOCINSERTPKT:
23421 	case SDIOCINSERTXB:
23422 	case SDIOCINSERTUN:
23423 	case SDIOCINSERTARQ:
23424 	case SDIOCPUSH:
23425 	case SDIOCRETRIEVE:
23426 	case SDIOCRUN:
23427 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
23428 		    "SDIOC detected cmd:0x%X:\n", cmd);
23429 		/* call error generator */
23430 		sd_faultinjection_ioctl(cmd, arg, un);
23431 		err = 0;
23432 		break;
23433 
23434 #endif /* SD_FAULT_INJECTION */
23435 
23436 	case DKIOCFLUSHWRITECACHE:
23437 		{
23438 			struct dk_callback *dkc = (struct dk_callback *)arg;
23439 
23440 			mutex_enter(SD_MUTEX(un));
23441 			if (!un->un_f_sync_cache_supported ||
23442 			    !un->un_f_write_cache_enabled) {
23443 				err = un->un_f_sync_cache_supported ?
23444 				    0 : ENOTSUP;
23445 				mutex_exit(SD_MUTEX(un));
23446 				if ((flag & FKIOCTL) && dkc != NULL &&
23447 				    dkc->dkc_callback != NULL) {
23448 					(*dkc->dkc_callback)(dkc->dkc_cookie,
23449 					    err);
23450 					/*
23451 					 * Did callback and reported error.
23452 					 * Since we did a callback, ioctl
23453 					 * should return 0.
23454 					 */
23455 					err = 0;
23456 				}
23457 				break;
23458 			}
23459 			mutex_exit(SD_MUTEX(un));
23460 
23461 			if ((flag & FKIOCTL) && dkc != NULL &&
23462 			    dkc->dkc_callback != NULL) {
23463 				/* async SYNC CACHE request */
23464 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
23465 			} else {
23466 				/* synchronous SYNC CACHE request */
23467 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
23468 			}
23469 		}
23470 		break;
23471 
23472 	case DKIOCFREE:
23473 		{
23474 			dkioc_free_list_t *dfl = (dkioc_free_list_t *)arg;
23475 
23476 			/* bad ioctls shouldn't panic */
23477 			if (dfl == NULL) {
23478 				/* check kernel callers strictly in debug */
23479 				ASSERT0(flag & FKIOCTL);
23480 				err = SET_ERROR(EINVAL);
23481 				break;
23482 			}
23483 			/* synchronous UNMAP request */
23484 			err = sd_send_scsi_UNMAP(dev, ssc, dfl, flag);
23485 		}
23486 		break;
23487 
23488 	case DKIOC_CANFREE:
23489 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC_CANFREE\n");
23490 		i = (un->un_thin_flags & SD_THIN_PROV_ENABLED) ? 1 : 0;
23491 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
23492 			err = EFAULT;
23493 		} else {
23494 			err = 0;
23495 		}
23496 		break;
23497 
23498 	case DKIOCGETWCE: {
23499 
23500 		int wce;
23501 
23502 		if ((err = sd_get_write_cache_enabled(ssc, &wce)) != 0) {
23503 			break;
23504 		}
23505 
23506 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
23507 			err = EFAULT;
23508 		}
23509 		break;
23510 	}
23511 
23512 	case DKIOCSETWCE: {
23513 
23514 		int wce, sync_supported;
23515 		int cur_wce = 0;
23516 
23517 		if (!un->un_f_cache_mode_changeable) {
23518 			err = EINVAL;
23519 			break;
23520 		}
23521 
23522 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
23523 			err = EFAULT;
23524 			break;
23525 		}
23526 
23527 		/*
23528 		 * Synchronize multiple threads trying to enable
23529 		 * or disable the cache via the un_f_wcc_cv
23530 		 * condition variable.
23531 		 */
23532 		mutex_enter(SD_MUTEX(un));
23533 
23534 		/*
23535 		 * Don't allow the cache to be enabled if the
23536 		 * config file has it disabled.
23537 		 */
23538 		if (un->un_f_opt_disable_cache && wce) {
23539 			mutex_exit(SD_MUTEX(un));
23540 			err = EINVAL;
23541 			break;
23542 		}
23543 
23544 		/*
23545 		 * Wait for write cache change in progress
23546 		 * bit to be clear before proceeding.
23547 		 */
23548 		while (un->un_f_wcc_inprog)
23549 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
23550 
23551 		un->un_f_wcc_inprog = 1;
23552 
23553 		mutex_exit(SD_MUTEX(un));
23554 
23555 		/*
23556 		 * Get the current write cache state
23557 		 */
23558 		if ((err = sd_get_write_cache_enabled(ssc, &cur_wce)) != 0) {
23559 			mutex_enter(SD_MUTEX(un));
23560 			un->un_f_wcc_inprog = 0;
23561 			cv_broadcast(&un->un_wcc_cv);
23562 			mutex_exit(SD_MUTEX(un));
23563 			break;
23564 		}
23565 
23566 		mutex_enter(SD_MUTEX(un));
23567 		un->un_f_write_cache_enabled = (cur_wce != 0);
23568 
23569 		if (un->un_f_write_cache_enabled && wce == 0) {
23570 			/*
23571 			 * Disable the write cache.  Don't clear
23572 			 * un_f_write_cache_enabled until after
23573 			 * the mode select and flush are complete.
23574 			 */
23575 			sync_supported = un->un_f_sync_cache_supported;
23576 
23577 			/*
23578 			 * If cache flush is suppressed, we assume that the
23579 			 * controller firmware will take care of managing the
23580 			 * write cache for us: no need to explicitly
23581 			 * disable it.
23582 			 */
23583 			if (!un->un_f_suppress_cache_flush) {
23584 				mutex_exit(SD_MUTEX(un));
23585 				if ((err = sd_cache_control(ssc,
23586 				    SD_CACHE_NOCHANGE,
23587 				    SD_CACHE_DISABLE)) == 0 &&
23588 				    sync_supported) {
23589 					err = sd_send_scsi_SYNCHRONIZE_CACHE(un,
23590 					    NULL);
23591 				}
23592 			} else {
23593 				mutex_exit(SD_MUTEX(un));
23594 			}
23595 
23596 			mutex_enter(SD_MUTEX(un));
23597 			if (err == 0) {
23598 				un->un_f_write_cache_enabled = 0;
23599 			}
23600 
23601 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
23602 			/*
23603 			 * Set un_f_write_cache_enabled first, so there is
23604 			 * no window where the cache is enabled, but the
23605 			 * bit says it isn't.
23606 			 */
23607 			un->un_f_write_cache_enabled = 1;
23608 
23609 			/*
23610 			 * If cache flush is suppressed, we assume that the
23611 			 * controller firmware will take care of managing the
23612 			 * write cache for us: no need to explicitly
23613 			 * enable it.
23614 			 */
23615 			if (!un->un_f_suppress_cache_flush) {
23616 				mutex_exit(SD_MUTEX(un));
23617 				err = sd_cache_control(ssc, SD_CACHE_NOCHANGE,
23618 				    SD_CACHE_ENABLE);
23619 			} else {
23620 				mutex_exit(SD_MUTEX(un));
23621 			}
23622 
23623 			mutex_enter(SD_MUTEX(un));
23624 
23625 			if (err) {
23626 				un->un_f_write_cache_enabled = 0;
23627 			}
23628 		}
23629 
23630 		un->un_f_wcc_inprog = 0;
23631 		cv_broadcast(&un->un_wcc_cv);
23632 		mutex_exit(SD_MUTEX(un));
23633 		break;
23634 	}
23635 
23636 	default:
23637 		err = ENOTTY;
23638 		break;
23639 	}
23640 	mutex_enter(SD_MUTEX(un));
23641 	un->un_ncmds_in_driver--;
23642 	ASSERT(un->un_ncmds_in_driver >= 0);
23643 	mutex_exit(SD_MUTEX(un));
23644 
23645 
23646 done_without_assess:
23647 	sd_ssc_fini(ssc);
23648 
23649 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
23650 	return (err);
23651 
23652 done_with_assess:
23653 	mutex_enter(SD_MUTEX(un));
23654 	un->un_ncmds_in_driver--;
23655 	ASSERT(un->un_ncmds_in_driver >= 0);
23656 	mutex_exit(SD_MUTEX(un));
23657 
23658 done_quick_assess:
23659 	if (err != 0)
23660 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23661 	/* Uninitialize sd_ssc_t pointer */
23662 	sd_ssc_fini(ssc);
23663 
23664 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
23665 	return (err);
23666 }
23667 
23668 
23669 /*
23670  *    Function: sd_dkio_ctrl_info
23671  *
23672  * Description: This routine is the driver entry point for handling controller
23673  *		information ioctl requests (DKIOCINFO).
23674  *
23675  *   Arguments: dev  - the device number
23676  *		arg  - pointer to user provided dk_cinfo structure
23677  *		       specifying the controller type and attributes.
23678  *		flag - this argument is a pass through to ddi_copyxxx()
23679  *		       directly from the mode argument of ioctl().
23680  *
23681  * Return Code: 0
23682  *		EFAULT
23683  *		ENXIO
23684  */
23685 
23686 static int
23687 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
23688 {
23689 	struct sd_lun	*un = NULL;
23690 	struct dk_cinfo	*info;
23691 	dev_info_t	*pdip;
23692 	int		lun, tgt;
23693 
23694 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23695 		return (ENXIO);
23696 	}
23697 
23698 	info = (struct dk_cinfo *)
23699 	    kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
23700 
23701 	switch (un->un_ctype) {
23702 	case CTYPE_CDROM:
23703 		info->dki_ctype = DKC_CDROM;
23704 		break;
23705 	default:
23706 		info->dki_ctype = DKC_SCSI_CCS;
23707 		break;
23708 	}
23709 	pdip = ddi_get_parent(SD_DEVINFO(un));
23710 	info->dki_cnum = ddi_get_instance(pdip);
23711 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
23712 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
23713 	} else {
23714 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
23715 		    DK_DEVLEN - 1);
23716 	}
23717 
23718 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
23719 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
23720 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
23721 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
23722 
23723 	/* Unit Information */
23724 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
23725 	info->dki_slave = ((tgt << 3) | lun);
23726 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
23727 	    DK_DEVLEN - 1);
23728 	info->dki_flags = DKI_FMTVOL;
23729 	info->dki_partition = SDPART(dev);
23730 
23731 	/* Max Transfer size of this device in blocks */
23732 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
23733 	info->dki_addr = 0;
23734 	info->dki_space = 0;
23735 	info->dki_prio = 0;
23736 	info->dki_vec = 0;
23737 
23738 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
23739 		kmem_free(info, sizeof (struct dk_cinfo));
23740 		return (EFAULT);
23741 	} else {
23742 		kmem_free(info, sizeof (struct dk_cinfo));
23743 		return (0);
23744 	}
23745 }
23746 
23747 /*
23748  *    Function: sd_get_media_info_com
23749  *
23750  * Description: This routine returns the information required to populate
23751  *		the fields for the dk_minfo/dk_minfo_ext structures.
23752  *
23753  *   Arguments: dev		- the device number
23754  *		dki_media_type	- media_type
23755  *		dki_lbsize	- logical block size
23756  *		dki_capacity	- capacity in blocks
23757  *		dki_pbsize	- physical block size (if requested)
23758  *
23759  * Return Code: 0
23760  *		EACCESS
23761  *		EFAULT
23762  *		ENXIO
23763  *		EIO
23764  */
23765 static int
23766 sd_get_media_info_com(dev_t dev, uint_t *dki_media_type, uint_t *dki_lbsize,
23767     diskaddr_t *dki_capacity, uint_t *dki_pbsize)
23768 {
23769 	struct sd_lun		*un = NULL;
23770 	struct uscsi_cmd	com;
23771 	struct scsi_inquiry	*sinq;
23772 	u_longlong_t		media_capacity;
23773 	uint64_t		capacity;
23774 	uint_t			lbasize;
23775 	uint_t			pbsize;
23776 	uchar_t			*out_data;
23777 	uchar_t			*rqbuf;
23778 	int			rval = 0;
23779 	int			rtn;
23780 	sd_ssc_t		*ssc;
23781 
23782 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
23783 	    (un->un_state == SD_STATE_OFFLINE)) {
23784 		return (ENXIO);
23785 	}
23786 
23787 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info_com: entry\n");
23788 
23789 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
23790 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
23791 	ssc = sd_ssc_init(un);
23792 
23793 	/* Issue a TUR to determine if the drive is ready with media present */
23794 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_CHECK_FOR_MEDIA);
23795 	if (rval == ENXIO) {
23796 		goto done;
23797 	} else if (rval != 0) {
23798 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23799 	}
23800 
23801 	/* Now get configuration data */
23802 	if (ISCD(un)) {
23803 		*dki_media_type = DK_CDROM;
23804 
23805 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
23806 		if (un->un_f_mmc_cap == TRUE) {
23807 			rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf,
23808 			    SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
23809 			    SD_PATH_STANDARD);
23810 
23811 			if (rtn) {
23812 				/*
23813 				 * We ignore all failures for CD and need to
23814 				 * put the assessment before processing code
23815 				 * to avoid missing assessment for FMA.
23816 				 */
23817 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23818 				/*
23819 				 * Failed for other than an illegal request
23820 				 * or command not supported
23821 				 */
23822 				if ((com.uscsi_status == STATUS_CHECK) &&
23823 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
23824 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
23825 					    (rqbuf[12] != 0x20)) {
23826 						rval = EIO;
23827 						goto no_assessment;
23828 					}
23829 				}
23830 			} else {
23831 				/*
23832 				 * The GET CONFIGURATION command succeeded
23833 				 * so set the media type according to the
23834 				 * returned data
23835 				 */
23836 				*dki_media_type = out_data[6];
23837 				*dki_media_type <<= 8;
23838 				*dki_media_type |= out_data[7];
23839 			}
23840 		}
23841 	} else {
23842 		/*
23843 		 * The profile list is not available, so we attempt to identify
23844 		 * the media type based on the inquiry data
23845 		 */
23846 		sinq = un->un_sd->sd_inq;
23847 		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
23848 		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
23849 			/* This is a direct access device  or optical disk */
23850 			*dki_media_type = DK_FIXED_DISK;
23851 
23852 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
23853 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
23854 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
23855 					*dki_media_type = DK_ZIP;
23856 				} else if (
23857 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
23858 					*dki_media_type = DK_JAZ;
23859 				}
23860 			}
23861 		} else {
23862 			/*
23863 			 * Not a CD, direct access or optical disk so return
23864 			 * unknown media
23865 			 */
23866 			*dki_media_type = DK_UNKNOWN;
23867 		}
23868 	}
23869 
23870 	/*
23871 	 * Now read the capacity so we can provide the lbasize,
23872 	 * pbsize and capacity.
23873 	 */
23874 	if (dki_pbsize && un->un_f_descr_format_supported) {
23875 		rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize,
23876 		    &pbsize, SD_PATH_DIRECT);
23877 
23878 		/*
23879 		 * Override the physical blocksize if the instance already
23880 		 * has a larger value.
23881 		 */
23882 		pbsize = MAX(pbsize, un->un_phy_blocksize);
23883 	}
23884 
23885 	if (dki_pbsize == NULL || rval != 0 ||
23886 	    !un->un_f_descr_format_supported) {
23887 		rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
23888 		    SD_PATH_DIRECT);
23889 
23890 		switch (rval) {
23891 		case 0:
23892 			if (un->un_f_enable_rmw &&
23893 			    un->un_phy_blocksize != 0) {
23894 				pbsize = un->un_phy_blocksize;
23895 			} else {
23896 				pbsize = lbasize;
23897 			}
23898 			media_capacity = capacity;
23899 
23900 			/*
23901 			 * sd_send_scsi_READ_CAPACITY() reports capacity in
23902 			 * un->un_sys_blocksize chunks. So we need to convert
23903 			 * it into cap.lbsize chunks.
23904 			 */
23905 			if (un->un_f_has_removable_media) {
23906 				media_capacity *= un->un_sys_blocksize;
23907 				media_capacity /= lbasize;
23908 			}
23909 			break;
23910 		case EACCES:
23911 			rval = EACCES;
23912 			goto done;
23913 		default:
23914 			rval = EIO;
23915 			goto done;
23916 		}
23917 	} else {
23918 		if (un->un_f_enable_rmw &&
23919 		    !ISP2(pbsize % DEV_BSIZE)) {
23920 			pbsize = SSD_SECSIZE;
23921 		} else if (!ISP2(lbasize % DEV_BSIZE) ||
23922 		    !ISP2(pbsize % DEV_BSIZE)) {
23923 			pbsize = lbasize = DEV_BSIZE;
23924 		}
23925 		media_capacity = capacity;
23926 	}
23927 
23928 	/*
23929 	 * If lun is expanded dynamically, update the un structure.
23930 	 */
23931 	mutex_enter(SD_MUTEX(un));
23932 	if ((un->un_f_blockcount_is_valid == TRUE) &&
23933 	    (un->un_f_tgt_blocksize_is_valid == TRUE) &&
23934 	    (capacity > un->un_blockcount)) {
23935 		un->un_f_expnevent = B_FALSE;
23936 		sd_update_block_info(un, lbasize, capacity);
23937 	}
23938 	mutex_exit(SD_MUTEX(un));
23939 
23940 	*dki_lbsize = lbasize;
23941 	*dki_capacity = media_capacity;
23942 	if (dki_pbsize)
23943 		*dki_pbsize = pbsize;
23944 
23945 done:
23946 	if (rval != 0) {
23947 		if (rval == EIO)
23948 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23949 		else
23950 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23951 	}
23952 no_assessment:
23953 	sd_ssc_fini(ssc);
23954 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
23955 	kmem_free(rqbuf, SENSE_LENGTH);
23956 	return (rval);
23957 }
23958 
23959 /*
23960  *    Function: sd_get_media_info
23961  *
23962  * Description: This routine is the driver entry point for handling ioctl
23963  *		requests for the media type or command set profile used by the
23964  *		drive to operate on the media (DKIOCGMEDIAINFO).
23965  *
23966  *   Arguments: dev	- the device number
23967  *		arg	- pointer to user provided dk_minfo structure
23968  *			  specifying the media type, logical block size and
23969  *			  drive capacity.
23970  *		flag	- this argument is a pass through to ddi_copyxxx()
23971  *			  directly from the mode argument of ioctl().
23972  *
23973  * Return Code: returns the value from sd_get_media_info_com
23974  */
23975 static int
23976 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
23977 {
23978 	struct dk_minfo		mi;
23979 	int			rval;
23980 
23981 	rval = sd_get_media_info_com(dev, &mi.dki_media_type,
23982 	    &mi.dki_lbsize, &mi.dki_capacity, NULL);
23983 
23984 	if (rval)
23985 		return (rval);
23986 	if (ddi_copyout(&mi, arg, sizeof (struct dk_minfo), flag))
23987 		rval = EFAULT;
23988 	return (rval);
23989 }
23990 
23991 /*
23992  *    Function: sd_get_media_info_ext
23993  *
23994  * Description: This routine is the driver entry point for handling ioctl
23995  *		requests for the media type or command set profile used by the
23996  *		drive to operate on the media (DKIOCGMEDIAINFOEXT). The
23997  *		difference this ioctl and DKIOCGMEDIAINFO is the return value
23998  *		of this ioctl contains both logical block size and physical
23999  *		block size.
24000  *
24001  *
24002  *   Arguments: dev	- the device number
24003  *		arg	- pointer to user provided dk_minfo_ext structure
24004  *			  specifying the media type, logical block size,
24005  *			  physical block size and disk capacity.
24006  *		flag	- this argument is a pass through to ddi_copyxxx()
24007  *			  directly from the mode argument of ioctl().
24008  *
24009  * Return Code: returns the value from sd_get_media_info_com
24010  */
24011 static int
24012 sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag)
24013 {
24014 	struct dk_minfo_ext	mie;
24015 	int			rval = 0;
24016 	size_t			len;
24017 
24018 	rval = sd_get_media_info_com(dev, &mie.dki_media_type,
24019 	    &mie.dki_lbsize, &mie.dki_capacity, &mie.dki_pbsize);
24020 
24021 	if (rval)
24022 		return (rval);
24023 
24024 	switch (ddi_model_convert_from(flag & FMODELS)) {
24025 	case DDI_MODEL_ILP32:
24026 		len = sizeof (struct dk_minfo_ext32);
24027 		break;
24028 	default:
24029 		len = sizeof (struct dk_minfo_ext);
24030 		break;
24031 	}
24032 
24033 	if (ddi_copyout(&mie, arg, len, flag))
24034 		rval = EFAULT;
24035 	return (rval);
24036 
24037 }
24038 
24039 /*
24040  *    Function: sd_watch_request_submit
24041  *
24042  * Description: Call scsi_watch_request_submit or scsi_mmc_watch_request_submit
24043  *		depending on which is supported by device.
24044  */
24045 static opaque_t
24046 sd_watch_request_submit(struct sd_lun *un)
24047 {
24048 	dev_t			dev;
24049 
24050 	/* All submissions are unified to use same device number */
24051 	dev = sd_make_device(SD_DEVINFO(un));
24052 
24053 	if (un->un_f_mmc_cap && un->un_f_mmc_gesn_polling) {
24054 		return (scsi_mmc_watch_request_submit(SD_SCSI_DEVP(un),
24055 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24056 		    (caddr_t)dev));
24057 	} else {
24058 		return (scsi_watch_request_submit(SD_SCSI_DEVP(un),
24059 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24060 		    (caddr_t)dev));
24061 	}
24062 }
24063 
24064 
24065 /*
24066  *    Function: sd_check_media
24067  *
24068  * Description: This utility routine implements the functionality for the
24069  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
24070  *		driver state changes from that specified by the user
24071  *		(inserted or ejected). For example, if the user specifies
24072  *		DKIO_EJECTED and the current media state is inserted this
24073  *		routine will immediately return DKIO_INSERTED. However, if the
24074  *		current media state is not inserted the user thread will be
24075  *		blocked until the drive state changes. If DKIO_NONE is specified
24076  *		the user thread will block until a drive state change occurs.
24077  *
24078  *   Arguments: dev  - the device number
24079  *		state  - user pointer to a dkio_state, updated with the current
24080  *			drive state at return.
24081  *
24082  * Return Code: ENXIO
24083  *		EIO
24084  *		EAGAIN
24085  *		EINTR
24086  */
24087 
24088 static int
24089 sd_check_media(dev_t dev, enum dkio_state state)
24090 {
24091 	struct sd_lun		*un = NULL;
24092 	enum dkio_state		prev_state;
24093 	opaque_t		token = NULL;
24094 	int			rval = 0;
24095 	sd_ssc_t		*ssc;
24096 
24097 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24098 		return (ENXIO);
24099 	}
24100 
24101 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
24102 
24103 	ssc = sd_ssc_init(un);
24104 
24105 	mutex_enter(SD_MUTEX(un));
24106 
24107 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
24108 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
24109 
24110 	prev_state = un->un_mediastate;
24111 
24112 	/* is there anything to do? */
24113 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
24114 		/*
24115 		 * submit the request to the scsi_watch service;
24116 		 * scsi_media_watch_cb() does the real work
24117 		 */
24118 		mutex_exit(SD_MUTEX(un));
24119 
24120 		/*
24121 		 * This change handles the case where a scsi watch request is
24122 		 * added to a device that is powered down. To accomplish this
24123 		 * we power up the device before adding the scsi watch request,
24124 		 * since the scsi watch sends a TUR directly to the device
24125 		 * which the device cannot handle if it is powered down.
24126 		 */
24127 		if (sd_pm_entry(un) != DDI_SUCCESS) {
24128 			mutex_enter(SD_MUTEX(un));
24129 			goto done;
24130 		}
24131 
24132 		token = sd_watch_request_submit(un);
24133 
24134 		sd_pm_exit(un);
24135 
24136 		mutex_enter(SD_MUTEX(un));
24137 		if (token == NULL) {
24138 			rval = EAGAIN;
24139 			goto done;
24140 		}
24141 
24142 		/*
24143 		 * This is a special case IOCTL that doesn't return
24144 		 * until the media state changes. Routine sdpower
24145 		 * knows about and handles this so don't count it
24146 		 * as an active cmd in the driver, which would
24147 		 * keep the device busy to the pm framework.
24148 		 * If the count isn't decremented the device can't
24149 		 * be powered down.
24150 		 */
24151 		un->un_ncmds_in_driver--;
24152 		ASSERT(un->un_ncmds_in_driver >= 0);
24153 
24154 		/*
24155 		 * if a prior request had been made, this will be the same
24156 		 * token, as scsi_watch was designed that way.
24157 		 */
24158 		un->un_swr_token = token;
24159 		un->un_specified_mediastate = state;
24160 
24161 		/*
24162 		 * now wait for media change
24163 		 * we will not be signalled unless mediastate == state but it is
24164 		 * still better to test for this condition, since there is a
24165 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
24166 		 */
24167 		SD_TRACE(SD_LOG_COMMON, un,
24168 		    "sd_check_media: waiting for media state change\n");
24169 		while (un->un_mediastate == state) {
24170 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
24171 				SD_TRACE(SD_LOG_COMMON, un,
24172 				    "sd_check_media: waiting for media state "
24173 				    "was interrupted\n");
24174 				un->un_ncmds_in_driver++;
24175 				rval = EINTR;
24176 				goto done;
24177 			}
24178 			SD_TRACE(SD_LOG_COMMON, un,
24179 			    "sd_check_media: received signal, state=%x\n",
24180 			    un->un_mediastate);
24181 		}
24182 		/*
24183 		 * Inc the counter to indicate the device once again
24184 		 * has an active outstanding cmd.
24185 		 */
24186 		un->un_ncmds_in_driver++;
24187 	}
24188 
24189 	/* invalidate geometry */
24190 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
24191 		sr_ejected(un);
24192 	}
24193 
24194 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
24195 		uint64_t	capacity;
24196 		uint_t		lbasize;
24197 
24198 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
24199 		mutex_exit(SD_MUTEX(un));
24200 		/*
24201 		 * Since the following routines use SD_PATH_DIRECT, we must
24202 		 * call PM directly before the upcoming disk accesses. This
24203 		 * may cause the disk to be power/spin up.
24204 		 */
24205 
24206 		if (sd_pm_entry(un) == DDI_SUCCESS) {
24207 			rval = sd_send_scsi_READ_CAPACITY(ssc,
24208 			    &capacity, &lbasize, SD_PATH_DIRECT);
24209 			if (rval != 0) {
24210 				sd_pm_exit(un);
24211 				if (rval == EIO)
24212 					sd_ssc_assessment(ssc,
24213 					    SD_FMT_STATUS_CHECK);
24214 				else
24215 					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24216 				mutex_enter(SD_MUTEX(un));
24217 				goto done;
24218 			}
24219 		} else {
24220 			rval = EIO;
24221 			mutex_enter(SD_MUTEX(un));
24222 			goto done;
24223 		}
24224 		mutex_enter(SD_MUTEX(un));
24225 
24226 		sd_update_block_info(un, lbasize, capacity);
24227 
24228 		/*
24229 		 *  Check if the media in the device is writable or not
24230 		 */
24231 		if (ISCD(un)) {
24232 			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
24233 		}
24234 
24235 		mutex_exit(SD_MUTEX(un));
24236 		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
24237 		if ((cmlb_validate(un->un_cmlbhandle, 0,
24238 		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
24239 			sd_set_pstats(un);
24240 			SD_TRACE(SD_LOG_IO_PARTITION, un,
24241 			    "sd_check_media: un:0x%p pstats created and "
24242 			    "set\n", un);
24243 		}
24244 
24245 		rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
24246 		    SD_PATH_DIRECT);
24247 
24248 		sd_pm_exit(un);
24249 
24250 		if (rval != 0) {
24251 			if (rval == EIO)
24252 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24253 			else
24254 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24255 		}
24256 
24257 		mutex_enter(SD_MUTEX(un));
24258 	}
24259 done:
24260 	sd_ssc_fini(ssc);
24261 	un->un_f_watcht_stopped = FALSE;
24262 	if (token != NULL && un->un_swr_token != NULL) {
24263 		/*
24264 		 * Use of this local token and the mutex ensures that we avoid
24265 		 * some race conditions associated with terminating the
24266 		 * scsi watch.
24267 		 */
24268 		token = un->un_swr_token;
24269 		mutex_exit(SD_MUTEX(un));
24270 		(void) scsi_watch_request_terminate(token,
24271 		    SCSI_WATCH_TERMINATE_WAIT);
24272 		if (scsi_watch_get_ref_count(token) == 0) {
24273 			mutex_enter(SD_MUTEX(un));
24274 			un->un_swr_token = (opaque_t)NULL;
24275 		} else {
24276 			mutex_enter(SD_MUTEX(un));
24277 		}
24278 	}
24279 
24280 	/*
24281 	 * Update the capacity kstat value, if no media previously
24282 	 * (capacity kstat is 0) and a media has been inserted
24283 	 * (un_f_blockcount_is_valid == TRUE)
24284 	 */
24285 	if (un->un_errstats) {
24286 		struct sd_errstats	*stp = NULL;
24287 
24288 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
24289 		if ((stp->sd_capacity.value.ui64 == 0) &&
24290 		    (un->un_f_blockcount_is_valid == TRUE)) {
24291 			stp->sd_capacity.value.ui64 =
24292 			    (uint64_t)((uint64_t)un->un_blockcount *
24293 			    un->un_sys_blocksize);
24294 		}
24295 	}
24296 	mutex_exit(SD_MUTEX(un));
24297 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
24298 	return (rval);
24299 }
24300 
24301 
24302 /*
24303  *    Function: sd_delayed_cv_broadcast
24304  *
24305  * Description: Delayed cv_broadcast to allow for target to recover from media
24306  *		insertion.
24307  *
24308  *   Arguments: arg - driver soft state (unit) structure
24309  */
24310 
24311 static void
24312 sd_delayed_cv_broadcast(void *arg)
24313 {
24314 	struct sd_lun *un = arg;
24315 
24316 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
24317 
24318 	mutex_enter(SD_MUTEX(un));
24319 	un->un_dcvb_timeid = NULL;
24320 	cv_broadcast(&un->un_state_cv);
24321 	mutex_exit(SD_MUTEX(un));
24322 }
24323 
24324 
24325 /*
24326  *    Function: sd_media_watch_cb
24327  *
24328  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
24329  *		routine processes the TUR sense data and updates the driver
24330  *		state if a transition has occurred. The user thread
24331  *		(sd_check_media) is then signalled.
24332  *
24333  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24334  *			among multiple watches that share this callback function
24335  *		resultp - scsi watch facility result packet containing scsi
24336  *			  packet, status byte and sense data
24337  *
24338  * Return Code: 0 for success, -1 for failure
24339  */
24340 
24341 static int
24342 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24343 {
24344 	struct sd_lun			*un;
24345 	struct scsi_status		*statusp = resultp->statusp;
24346 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
24347 	enum dkio_state			state = DKIO_NONE;
24348 	dev_t				dev = (dev_t)arg;
24349 	uchar_t				actual_sense_length;
24350 	uint8_t				skey, asc, ascq;
24351 
24352 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24353 		return (-1);
24354 	}
24355 	actual_sense_length = resultp->actual_sense_length;
24356 
24357 	mutex_enter(SD_MUTEX(un));
24358 	SD_TRACE(SD_LOG_COMMON, un,
24359 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
24360 	    *((char *)statusp), (void *)sensep, actual_sense_length);
24361 
24362 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
24363 		un->un_mediastate = DKIO_DEV_GONE;
24364 		cv_broadcast(&un->un_state_cv);
24365 		mutex_exit(SD_MUTEX(un));
24366 
24367 		return (0);
24368 	}
24369 
24370 	if (un->un_f_mmc_cap && un->un_f_mmc_gesn_polling) {
24371 		if (sd_gesn_media_data_valid(resultp->mmc_data)) {
24372 			if ((resultp->mmc_data[5] &
24373 			    SD_GESN_MEDIA_EVENT_STATUS_PRESENT) != 0) {
24374 				state = DKIO_INSERTED;
24375 			} else {
24376 				state = DKIO_EJECTED;
24377 			}
24378 			if ((resultp->mmc_data[4] & SD_GESN_MEDIA_EVENT_CODE) ==
24379 			    SD_GESN_MEDIA_EVENT_EJECTREQUEST) {
24380 				sd_log_eject_request_event(un, KM_NOSLEEP);
24381 			}
24382 		}
24383 	} else if (sensep != NULL) {
24384 		/*
24385 		 * If there was a check condition then sensep points to valid
24386 		 * sense data. If status was not a check condition but a
24387 		 * reservation or busy status then the new state is DKIO_NONE.
24388 		 */
24389 		skey = scsi_sense_key(sensep);
24390 		asc = scsi_sense_asc(sensep);
24391 		ascq = scsi_sense_ascq(sensep);
24392 
24393 		SD_INFO(SD_LOG_COMMON, un,
24394 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
24395 		    skey, asc, ascq);
24396 		/* This routine only uses up to 13 bytes of sense data. */
24397 		if (actual_sense_length >= 13) {
24398 			if (skey == KEY_UNIT_ATTENTION) {
24399 				if (asc == 0x28) {
24400 					state = DKIO_INSERTED;
24401 				}
24402 			} else if (skey == KEY_NOT_READY) {
24403 				/*
24404 				 * Sense data of 02/06/00 means that the
24405 				 * drive could not read the media (No
24406 				 * reference position found). In this case
24407 				 * to prevent a hang on the DKIOCSTATE IOCTL
24408 				 * we set the media state to DKIO_INSERTED.
24409 				 */
24410 				if (asc == 0x06 && ascq == 0x00)
24411 					state = DKIO_INSERTED;
24412 
24413 				/*
24414 				 * if 02/04/02  means that the host
24415 				 * should send start command. Explicitly
24416 				 * leave the media state as is
24417 				 * (inserted) as the media is inserted
24418 				 * and host has stopped device for PM
24419 				 * reasons. Upon next true read/write
24420 				 * to this media will bring the
24421 				 * device to the right state good for
24422 				 * media access.
24423 				 */
24424 				if (asc == 0x3a) {
24425 					state = DKIO_EJECTED;
24426 				} else {
24427 					/*
24428 					 * If the drive is busy with an
24429 					 * operation or long write, keep the
24430 					 * media in an inserted state.
24431 					 */
24432 
24433 					if ((asc == 0x04) &&
24434 					    ((ascq == 0x02) ||
24435 					    (ascq == 0x07) ||
24436 					    (ascq == 0x08))) {
24437 						state = DKIO_INSERTED;
24438 					}
24439 				}
24440 			} else if (skey == KEY_NO_SENSE) {
24441 				if ((asc == 0x00) && (ascq == 0x00)) {
24442 					/*
24443 					 * Sense Data 00/00/00 does not provide
24444 					 * any information about the state of
24445 					 * the media. Ignore it.
24446 					 */
24447 					mutex_exit(SD_MUTEX(un));
24448 					return (0);
24449 				}
24450 			}
24451 		}
24452 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
24453 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
24454 		state = DKIO_INSERTED;
24455 	}
24456 
24457 	SD_TRACE(SD_LOG_COMMON, un,
24458 	    "sd_media_watch_cb: state=%x, specified=%x\n",
24459 	    state, un->un_specified_mediastate);
24460 
24461 	/*
24462 	 * now signal the waiting thread if this is *not* the specified state;
24463 	 * delay the signal if the state is DKIO_INSERTED to allow the target
24464 	 * to recover
24465 	 */
24466 	if (state != un->un_specified_mediastate) {
24467 		un->un_mediastate = state;
24468 		if (state == DKIO_INSERTED) {
24469 			/*
24470 			 * delay the signal to give the drive a chance
24471 			 * to do what it apparently needs to do
24472 			 */
24473 			SD_TRACE(SD_LOG_COMMON, un,
24474 			    "sd_media_watch_cb: delayed cv_broadcast\n");
24475 			if (un->un_dcvb_timeid == NULL) {
24476 				un->un_dcvb_timeid =
24477 				    timeout(sd_delayed_cv_broadcast, un,
24478 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24479 			}
24480 		} else {
24481 			SD_TRACE(SD_LOG_COMMON, un,
24482 			    "sd_media_watch_cb: immediate cv_broadcast\n");
24483 			cv_broadcast(&un->un_state_cv);
24484 		}
24485 	}
24486 	mutex_exit(SD_MUTEX(un));
24487 	return (0);
24488 }
24489 
24490 
24491 /*
24492  *    Function: sd_dkio_get_temp
24493  *
24494  * Description: This routine is the driver entry point for handling ioctl
24495  *		requests to get the disk temperature.
24496  *
24497  *   Arguments: dev  - the device number
24498  *		arg  - pointer to user provided dk_temperature structure.
24499  *		flag - this argument is a pass through to ddi_copyxxx()
24500  *		       directly from the mode argument of ioctl().
24501  *
24502  * Return Code: 0
24503  *		EFAULT
24504  *		ENXIO
24505  *		EAGAIN
24506  */
24507 
24508 static int
24509 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24510 {
24511 	struct sd_lun		*un = NULL;
24512 	struct dk_temperature	*dktemp = NULL;
24513 	uchar_t			*temperature_page;
24514 	int			rval = 0;
24515 	int			path_flag = SD_PATH_STANDARD;
24516 	sd_ssc_t		*ssc;
24517 
24518 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24519 		return (ENXIO);
24520 	}
24521 
24522 	ssc = sd_ssc_init(un);
24523 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24524 
24525 	/* copyin the disk temp argument to get the user flags */
24526 	if (ddi_copyin((void *)arg, dktemp,
24527 	    sizeof (struct dk_temperature), flag) != 0) {
24528 		rval = EFAULT;
24529 		goto done;
24530 	}
24531 
24532 	/* Initialize the temperature to invalid. */
24533 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24534 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24535 
24536 	/*
24537 	 * Note: Investigate removing the "bypass pm" semantic.
24538 	 * Can we just bypass PM always?
24539 	 */
24540 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24541 		path_flag = SD_PATH_DIRECT;
24542 		ASSERT(!mutex_owned(&un->un_pm_mutex));
24543 		mutex_enter(&un->un_pm_mutex);
24544 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24545 			/*
24546 			 * If DKT_BYPASS_PM is set, and the drive happens to be
24547 			 * in low power mode, we can not wake it up, Need to
24548 			 * return EAGAIN.
24549 			 */
24550 			mutex_exit(&un->un_pm_mutex);
24551 			rval = EAGAIN;
24552 			goto done;
24553 		} else {
24554 			/*
24555 			 * Indicate to PM the device is busy. This is required
24556 			 * to avoid a race - i.e. the ioctl is issuing a
24557 			 * command and the pm framework brings down the device
24558 			 * to low power mode (possible power cut-off on some
24559 			 * platforms).
24560 			 */
24561 			mutex_exit(&un->un_pm_mutex);
24562 			if (sd_pm_entry(un) != DDI_SUCCESS) {
24563 				rval = EAGAIN;
24564 				goto done;
24565 			}
24566 		}
24567 	}
24568 
24569 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24570 
24571 	rval = sd_send_scsi_LOG_SENSE(ssc, temperature_page,
24572 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag);
24573 	if (rval != 0)
24574 		goto done2;
24575 
24576 	/*
24577 	 * For the current temperature verify that the parameter length is 0x02
24578 	 * and the parameter code is 0x00
24579 	 */
24580 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24581 	    (temperature_page[5] == 0x00)) {
24582 		if (temperature_page[9] == 0xFF) {
24583 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24584 		} else {
24585 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24586 		}
24587 	}
24588 
24589 	/*
24590 	 * For the reference temperature verify that the parameter
24591 	 * length is 0x02 and the parameter code is 0x01
24592 	 */
24593 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24594 	    (temperature_page[11] == 0x01)) {
24595 		if (temperature_page[15] == 0xFF) {
24596 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24597 		} else {
24598 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24599 		}
24600 	}
24601 
24602 	/* Do the copyout regardless of the temperature commands status. */
24603 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24604 	    flag) != 0) {
24605 		rval = EFAULT;
24606 		goto done1;
24607 	}
24608 
24609 done2:
24610 	if (rval != 0) {
24611 		if (rval == EIO)
24612 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24613 		else
24614 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24615 	}
24616 done1:
24617 	if (path_flag == SD_PATH_DIRECT) {
24618 		sd_pm_exit(un);
24619 	}
24620 
24621 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24622 done:
24623 	sd_ssc_fini(ssc);
24624 	if (dktemp != NULL) {
24625 		kmem_free(dktemp, sizeof (struct dk_temperature));
24626 	}
24627 
24628 	return (rval);
24629 }
24630 
24631 
24632 /*
24633  *    Function: sd_log_page_supported
24634  *
24635  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24636  *		supported log pages.
24637  *
24638  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
24639  *                      structure for this target.
24640  *		log_page -
24641  *
24642  * Return Code: -1 - on error (log sense is optional and may not be supported).
24643  *		0  - log page not found.
24644  *		1  - log page found.
24645  */
24646 
24647 static int
24648 sd_log_page_supported(sd_ssc_t *ssc, int log_page)
24649 {
24650 	uchar_t *log_page_data;
24651 	int	i;
24652 	int	match = 0;
24653 	int	log_size;
24654 	int	status = 0;
24655 	struct sd_lun	*un;
24656 
24657 	ASSERT(ssc != NULL);
24658 	un = ssc->ssc_un;
24659 	ASSERT(un != NULL);
24660 
24661 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24662 
24663 	status = sd_send_scsi_LOG_SENSE(ssc, log_page_data, 0xFF, 0, 0x01, 0,
24664 	    SD_PATH_DIRECT);
24665 
24666 	if (status != 0) {
24667 		if (status == EIO) {
24668 			/*
24669 			 * Some disks do not support log sense, we
24670 			 * should ignore this kind of error(sense key is
24671 			 * 0x5 - illegal request).
24672 			 */
24673 			uint8_t *sensep;
24674 			int senlen;
24675 
24676 			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
24677 			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
24678 			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
24679 
24680 			if (senlen > 0 &&
24681 			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
24682 				sd_ssc_assessment(ssc,
24683 				    SD_FMT_IGNORE_COMPROMISE);
24684 			} else {
24685 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24686 			}
24687 		} else {
24688 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24689 		}
24690 
24691 		SD_ERROR(SD_LOG_COMMON, un,
24692 		    "sd_log_page_supported: failed log page retrieval\n");
24693 		kmem_free(log_page_data, 0xFF);
24694 		return (-1);
24695 	}
24696 
24697 	log_size = log_page_data[3];
24698 
24699 	/*
24700 	 * The list of supported log pages start from the fourth byte. Check
24701 	 * until we run out of log pages or a match is found.
24702 	 */
24703 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24704 		if (log_page_data[i] == log_page) {
24705 			match++;
24706 		}
24707 	}
24708 	kmem_free(log_page_data, 0xFF);
24709 	return (match);
24710 }
24711 
24712 
24713 /*
24714  *    Function: sd_mhdioc_failfast
24715  *
24716  * Description: This routine is the driver entry point for handling ioctl
24717  *		requests to enable/disable the multihost failfast option.
24718  *		(MHIOCENFAILFAST)
24719  *
24720  *   Arguments: dev	- the device number
24721  *		arg	- user specified probing interval.
24722  *		flag	- this argument is a pass through to ddi_copyxxx()
24723  *			  directly from the mode argument of ioctl().
24724  *
24725  * Return Code: 0
24726  *		EFAULT
24727  *		ENXIO
24728  */
24729 
24730 static int
24731 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24732 {
24733 	struct sd_lun	*un = NULL;
24734 	int		mh_time;
24735 	int		rval = 0;
24736 
24737 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24738 		return (ENXIO);
24739 	}
24740 
24741 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24742 		return (EFAULT);
24743 
24744 	if (mh_time) {
24745 		mutex_enter(SD_MUTEX(un));
24746 		un->un_resvd_status |= SD_FAILFAST;
24747 		mutex_exit(SD_MUTEX(un));
24748 		/*
24749 		 * If mh_time is INT_MAX, then this ioctl is being used for
24750 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24751 		 */
24752 		if (mh_time != INT_MAX) {
24753 			rval = sd_check_mhd(dev, mh_time);
24754 		}
24755 	} else {
24756 		(void) sd_check_mhd(dev, 0);
24757 		mutex_enter(SD_MUTEX(un));
24758 		un->un_resvd_status &= ~SD_FAILFAST;
24759 		mutex_exit(SD_MUTEX(un));
24760 	}
24761 	return (rval);
24762 }
24763 
24764 
24765 /*
24766  *    Function: sd_mhdioc_takeown
24767  *
24768  * Description: This routine is the driver entry point for handling ioctl
24769  *		requests to forcefully acquire exclusive access rights to the
24770  *		multihost disk (MHIOCTKOWN).
24771  *
24772  *   Arguments: dev	- the device number
24773  *		arg	- user provided structure specifying the delay
24774  *			  parameters in milliseconds
24775  *		flag	- this argument is a pass through to ddi_copyxxx()
24776  *			  directly from the mode argument of ioctl().
24777  *
24778  * Return Code: 0
24779  *		EFAULT
24780  *		ENXIO
24781  */
24782 
24783 static int
24784 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24785 {
24786 	struct sd_lun		*un = NULL;
24787 	struct mhioctkown	*tkown = NULL;
24788 	int			rval = 0;
24789 
24790 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24791 		return (ENXIO);
24792 	}
24793 
24794 	if (arg != NULL) {
24795 		tkown = (struct mhioctkown *)
24796 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24797 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24798 		if (rval != 0) {
24799 			rval = EFAULT;
24800 			goto error;
24801 		}
24802 	}
24803 
24804 	rval = sd_take_ownership(dev, tkown);
24805 	mutex_enter(SD_MUTEX(un));
24806 	if (rval == 0) {
24807 		un->un_resvd_status |= SD_RESERVE;
24808 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24809 			sd_reinstate_resv_delay =
24810 			    tkown->reinstate_resv_delay * 1000;
24811 		} else {
24812 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24813 		}
24814 		/*
24815 		 * Give the scsi_watch routine interval set by
24816 		 * the MHIOCENFAILFAST ioctl precedence here.
24817 		 */
24818 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24819 			mutex_exit(SD_MUTEX(un));
24820 			(void) sd_check_mhd(dev,
24821 			    sd_reinstate_resv_delay / 1000);
24822 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24823 			    "sd_mhdioc_takeown : %d\n",
24824 			    sd_reinstate_resv_delay);
24825 		} else {
24826 			mutex_exit(SD_MUTEX(un));
24827 		}
24828 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24829 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24830 	} else {
24831 		un->un_resvd_status &= ~SD_RESERVE;
24832 		mutex_exit(SD_MUTEX(un));
24833 	}
24834 
24835 error:
24836 	if (tkown != NULL) {
24837 		kmem_free(tkown, sizeof (struct mhioctkown));
24838 	}
24839 	return (rval);
24840 }
24841 
24842 
24843 /*
24844  *    Function: sd_mhdioc_release
24845  *
24846  * Description: This routine is the driver entry point for handling ioctl
24847  *		requests to release exclusive access rights to the multihost
24848  *		disk (MHIOCRELEASE).
24849  *
24850  *   Arguments: dev	- the device number
24851  *
24852  * Return Code: 0
24853  *		ENXIO
24854  */
24855 
24856 static int
24857 sd_mhdioc_release(dev_t dev)
24858 {
24859 	struct sd_lun		*un = NULL;
24860 	timeout_id_t		resvd_timeid_save;
24861 	int			resvd_status_save;
24862 	int			rval = 0;
24863 
24864 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24865 		return (ENXIO);
24866 	}
24867 
24868 	mutex_enter(SD_MUTEX(un));
24869 	resvd_status_save = un->un_resvd_status;
24870 	un->un_resvd_status &=
24871 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
24872 	if (un->un_resvd_timeid) {
24873 		resvd_timeid_save = un->un_resvd_timeid;
24874 		un->un_resvd_timeid = NULL;
24875 		mutex_exit(SD_MUTEX(un));
24876 		(void) untimeout(resvd_timeid_save);
24877 	} else {
24878 		mutex_exit(SD_MUTEX(un));
24879 	}
24880 
24881 	/*
24882 	 * destroy any pending timeout thread that may be attempting to
24883 	 * reinstate reservation on this device.
24884 	 */
24885 	sd_rmv_resv_reclaim_req(dev);
24886 
24887 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
24888 		mutex_enter(SD_MUTEX(un));
24889 		if ((un->un_mhd_token) &&
24890 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
24891 			mutex_exit(SD_MUTEX(un));
24892 			(void) sd_check_mhd(dev, 0);
24893 		} else {
24894 			mutex_exit(SD_MUTEX(un));
24895 		}
24896 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
24897 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24898 	} else {
24899 		/*
24900 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
24901 		 */
24902 		mutex_enter(SD_MUTEX(un));
24903 		un->un_resvd_status = resvd_status_save;
24904 		mutex_exit(SD_MUTEX(un));
24905 	}
24906 	return (rval);
24907 }
24908 
24909 
24910 /*
24911  *    Function: sd_mhdioc_register_devid
24912  *
24913  * Description: This routine is the driver entry point for handling ioctl
24914  *		requests to register the device id (MHIOCREREGISTERDEVID).
24915  *
24916  *		Note: The implementation for this ioctl has been updated to
24917  *		be consistent with the original PSARC case (1999/357)
24918  *		(4375899, 4241671, 4220005)
24919  *
24920  *   Arguments: dev	- the device number
24921  *
24922  * Return Code: 0
24923  *		ENXIO
24924  */
24925 
24926 static int
24927 sd_mhdioc_register_devid(dev_t dev)
24928 {
24929 	struct sd_lun	*un = NULL;
24930 	int		rval = 0;
24931 	sd_ssc_t	*ssc;
24932 
24933 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24934 		return (ENXIO);
24935 	}
24936 
24937 	ASSERT(!mutex_owned(SD_MUTEX(un)));
24938 
24939 	mutex_enter(SD_MUTEX(un));
24940 
24941 	/* If a devid already exists, de-register it */
24942 	if (un->un_devid != NULL) {
24943 		ddi_devid_unregister(SD_DEVINFO(un));
24944 		/*
24945 		 * After unregister devid, needs to free devid memory
24946 		 */
24947 		ddi_devid_free(un->un_devid);
24948 		un->un_devid = NULL;
24949 	}
24950 
24951 	/* Check for reservation conflict */
24952 	mutex_exit(SD_MUTEX(un));
24953 	ssc = sd_ssc_init(un);
24954 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
24955 	mutex_enter(SD_MUTEX(un));
24956 
24957 	switch (rval) {
24958 	case 0:
24959 		sd_register_devid(ssc, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
24960 		break;
24961 	case EACCES:
24962 		break;
24963 	default:
24964 		rval = EIO;
24965 	}
24966 
24967 	mutex_exit(SD_MUTEX(un));
24968 	if (rval != 0) {
24969 		if (rval == EIO)
24970 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24971 		else
24972 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24973 	}
24974 	sd_ssc_fini(ssc);
24975 	return (rval);
24976 }
24977 
24978 
24979 /*
24980  *    Function: sd_mhdioc_inkeys
24981  *
24982  * Description: This routine is the driver entry point for handling ioctl
24983  *		requests to issue the SCSI-3 Persistent In Read Keys command
24984  *		to the device (MHIOCGRP_INKEYS).
24985  *
24986  *   Arguments: dev	- the device number
24987  *		arg	- user provided in_keys structure
24988  *		flag	- this argument is a pass through to ddi_copyxxx()
24989  *			  directly from the mode argument of ioctl().
24990  *
24991  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
24992  *		ENXIO
24993  *		EFAULT
24994  */
24995 
24996 static int
24997 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
24998 {
24999 	struct sd_lun		*un;
25000 	mhioc_inkeys_t		inkeys;
25001 	int			rval = 0;
25002 
25003 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25004 		return (ENXIO);
25005 	}
25006 
25007 #ifdef _MULTI_DATAMODEL
25008 	switch (ddi_model_convert_from(flag & FMODELS)) {
25009 	case DDI_MODEL_ILP32: {
25010 		struct mhioc_inkeys32	inkeys32;
25011 
25012 		if (ddi_copyin(arg, &inkeys32,
25013 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
25014 			return (EFAULT);
25015 		}
25016 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
25017 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25018 		    &inkeys, flag)) != 0) {
25019 			return (rval);
25020 		}
25021 		inkeys32.generation = inkeys.generation;
25022 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
25023 		    flag) != 0) {
25024 			return (EFAULT);
25025 		}
25026 		break;
25027 	}
25028 	case DDI_MODEL_NONE:
25029 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
25030 		    flag) != 0) {
25031 			return (EFAULT);
25032 		}
25033 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25034 		    &inkeys, flag)) != 0) {
25035 			return (rval);
25036 		}
25037 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
25038 		    flag) != 0) {
25039 			return (EFAULT);
25040 		}
25041 		break;
25042 	}
25043 
25044 #else /* ! _MULTI_DATAMODEL */
25045 
25046 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
25047 		return (EFAULT);
25048 	}
25049 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
25050 	if (rval != 0) {
25051 		return (rval);
25052 	}
25053 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
25054 		return (EFAULT);
25055 	}
25056 
25057 #endif /* _MULTI_DATAMODEL */
25058 
25059 	return (rval);
25060 }
25061 
25062 
25063 /*
25064  *    Function: sd_mhdioc_inresv
25065  *
25066  * Description: This routine is the driver entry point for handling ioctl
25067  *		requests to issue the SCSI-3 Persistent In Read Reservations
25068  *		command to the device (MHIOCGRP_INKEYS).
25069  *
25070  *   Arguments: dev	- the device number
25071  *		arg	- user provided in_resv structure
25072  *		flag	- this argument is a pass through to ddi_copyxxx()
25073  *			  directly from the mode argument of ioctl().
25074  *
25075  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
25076  *		ENXIO
25077  *		EFAULT
25078  */
25079 
25080 static int
25081 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
25082 {
25083 	struct sd_lun		*un;
25084 	mhioc_inresvs_t		inresvs;
25085 	int			rval = 0;
25086 
25087 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25088 		return (ENXIO);
25089 	}
25090 
25091 #ifdef _MULTI_DATAMODEL
25092 
25093 	switch (ddi_model_convert_from(flag & FMODELS)) {
25094 	case DDI_MODEL_ILP32: {
25095 		struct mhioc_inresvs32	inresvs32;
25096 
25097 		if (ddi_copyin(arg, &inresvs32,
25098 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25099 			return (EFAULT);
25100 		}
25101 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
25102 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25103 		    &inresvs, flag)) != 0) {
25104 			return (rval);
25105 		}
25106 		inresvs32.generation = inresvs.generation;
25107 		if (ddi_copyout(&inresvs32, arg,
25108 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25109 			return (EFAULT);
25110 		}
25111 		break;
25112 	}
25113 	case DDI_MODEL_NONE:
25114 		if (ddi_copyin(arg, &inresvs,
25115 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25116 			return (EFAULT);
25117 		}
25118 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25119 		    &inresvs, flag)) != 0) {
25120 			return (rval);
25121 		}
25122 		if (ddi_copyout(&inresvs, arg,
25123 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25124 			return (EFAULT);
25125 		}
25126 		break;
25127 	}
25128 
25129 #else /* ! _MULTI_DATAMODEL */
25130 
25131 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
25132 		return (EFAULT);
25133 	}
25134 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
25135 	if (rval != 0) {
25136 		return (rval);
25137 	}
25138 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
25139 		return (EFAULT);
25140 	}
25141 
25142 #endif /* ! _MULTI_DATAMODEL */
25143 
25144 	return (rval);
25145 }
25146 
25147 
25148 /*
25149  * The following routines support the clustering functionality described below
25150  * and implement lost reservation reclaim functionality.
25151  *
25152  * Clustering
25153  * ----------
25154  * The clustering code uses two different, independent forms of SCSI
25155  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
25156  * Persistent Group Reservations. For any particular disk, it will use either
25157  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
25158  *
25159  * SCSI-2
25160  * The cluster software takes ownership of a multi-hosted disk by issuing the
25161  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
25162  * MHIOCRELEASE ioctl.  Closely related is the MHIOCENFAILFAST ioctl -- a
25163  * cluster, just after taking ownership of the disk with the MHIOCTKOWN ioctl
25164  * then issues the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the
25165  * driver. The meaning of failfast is that if the driver (on this host) ever
25166  * encounters the scsi error return code RESERVATION_CONFLICT from the device,
25167  * it should immediately panic the host. The motivation for this ioctl is that
25168  * if this host does encounter reservation conflict, the underlying cause is
25169  * that some other host of the cluster has decided that this host is no longer
25170  * in the cluster and has seized control of the disks for itself. Since this
25171  * host is no longer in the cluster, it ought to panic itself. The
25172  * MHIOCENFAILFAST ioctl does two things:
25173  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
25174  *      error to panic the host
25175  *      (b) it sets up a periodic timer to test whether this host still has
25176  *      "access" (in that no other host has reserved the device):  if the
25177  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
25178  *      purpose of that periodic timer is to handle scenarios where the host is
25179  *      otherwise temporarily quiescent, temporarily doing no real i/o.
25180  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
25181  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
25182  * the device itself.
25183  *
25184  * SCSI-3 PGR
25185  * A direct semantic implementation of the SCSI-3 Persistent Reservation
25186  * facility is supported through the shared multihost disk ioctls
25187  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
25188  * MHIOCGRP_PREEMPTANDABORT, MHIOCGRP_CLEAR)
25189  *
25190  * Reservation Reclaim:
25191  * --------------------
25192  * To support the lost reservation reclaim operations this driver creates a
25193  * single thread to handle reinstating reservations on all devices that have
25194  * lost reservations sd_resv_reclaim_requests are logged for all devices that
25195  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
25196  * and the reservation reclaim thread loops through the requests to regain the
25197  * lost reservations.
25198  */
25199 
25200 /*
25201  *    Function: sd_check_mhd()
25202  *
25203  * Description: This function sets up and submits a scsi watch request or
25204  *		terminates an existing watch request. This routine is used in
25205  *		support of reservation reclaim.
25206  *
25207  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
25208  *			 among multiple watches that share the callback function
25209  *		interval - the number of microseconds specifying the watch
25210  *			   interval for issuing TEST UNIT READY commands. If
25211  *			   set to 0 the watch should be terminated. If the
25212  *			   interval is set to 0 and if the device is required
25213  *			   to hold reservation while disabling failfast, the
25214  *			   watch is restarted with an interval of
25215  *			   reinstate_resv_delay.
25216  *
25217  * Return Code: 0	   - Successful submit/terminate of scsi watch request
25218  *		ENXIO      - Indicates an invalid device was specified
25219  *		EAGAIN     - Unable to submit the scsi watch request
25220  */
25221 
25222 static int
25223 sd_check_mhd(dev_t dev, int interval)
25224 {
25225 	struct sd_lun	*un;
25226 	opaque_t	token;
25227 
25228 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25229 		return (ENXIO);
25230 	}
25231 
25232 	/* is this a watch termination request? */
25233 	if (interval == 0) {
25234 		mutex_enter(SD_MUTEX(un));
25235 		/* if there is an existing watch task then terminate it */
25236 		if (un->un_mhd_token) {
25237 			token = un->un_mhd_token;
25238 			un->un_mhd_token = NULL;
25239 			mutex_exit(SD_MUTEX(un));
25240 			(void) scsi_watch_request_terminate(token,
25241 			    SCSI_WATCH_TERMINATE_ALL_WAIT);
25242 			mutex_enter(SD_MUTEX(un));
25243 		} else {
25244 			mutex_exit(SD_MUTEX(un));
25245 			/*
25246 			 * Note: If we return here we don't check for the
25247 			 * failfast case. This is the original legacy
25248 			 * implementation but perhaps we should be checking
25249 			 * the failfast case.
25250 			 */
25251 			return (0);
25252 		}
25253 		/*
25254 		 * If the device is required to hold reservation while
25255 		 * disabling failfast, we need to restart the scsi_watch
25256 		 * routine with an interval of reinstate_resv_delay.
25257 		 */
25258 		if (un->un_resvd_status & SD_RESERVE) {
25259 			interval = sd_reinstate_resv_delay / 1000;
25260 		} else {
25261 			/* no failfast so bail */
25262 			mutex_exit(SD_MUTEX(un));
25263 			return (0);
25264 		}
25265 		mutex_exit(SD_MUTEX(un));
25266 	}
25267 
25268 	/*
25269 	 * adjust minimum time interval to 1 second,
25270 	 * and convert from msecs to usecs
25271 	 */
25272 	if (interval > 0 && interval < 1000) {
25273 		interval = 1000;
25274 	}
25275 	interval *= 1000;
25276 
25277 	/*
25278 	 * submit the request to the scsi_watch service
25279 	 */
25280 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
25281 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
25282 	if (token == NULL) {
25283 		return (EAGAIN);
25284 	}
25285 
25286 	/*
25287 	 * save token for termination later on
25288 	 */
25289 	mutex_enter(SD_MUTEX(un));
25290 	un->un_mhd_token = token;
25291 	mutex_exit(SD_MUTEX(un));
25292 	return (0);
25293 }
25294 
25295 
25296 /*
25297  *    Function: sd_mhd_watch_cb()
25298  *
25299  * Description: This function is the call back function used by the scsi watch
25300  *		facility. The scsi watch facility sends the "Test Unit Ready"
25301  *		and processes the status. If applicable (i.e. a "Unit Attention"
25302  *		status and automatic "Request Sense" not used) the scsi watch
25303  *		facility will send a "Request Sense" and retrieve the sense data
25304  *		to be passed to this callback function. In either case the
25305  *		automatic "Request Sense" or the facility submitting one, this
25306  *		callback is passed the status and sense data.
25307  *
25308  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25309  *			among multiple watches that share this callback function
25310  *		resultp - scsi watch facility result packet containing scsi
25311  *			  packet, status byte and sense data
25312  *
25313  * Return Code: 0 - continue the watch task
25314  *		non-zero - terminate the watch task
25315  */
25316 
25317 static int
25318 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
25319 {
25320 	struct sd_lun			*un;
25321 	struct scsi_status		*statusp;
25322 	uint8_t				*sensep;
25323 	struct scsi_pkt			*pkt;
25324 	uchar_t				actual_sense_length;
25325 	dev_t				dev = (dev_t)arg;
25326 
25327 	ASSERT(resultp != NULL);
25328 	statusp			= resultp->statusp;
25329 	sensep			= (uint8_t *)resultp->sensep;
25330 	pkt			= resultp->pkt;
25331 	actual_sense_length	= resultp->actual_sense_length;
25332 
25333 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25334 		return (ENXIO);
25335 	}
25336 
25337 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25338 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
25339 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
25340 
25341 	/* Begin processing of the status and/or sense data */
25342 	if (pkt->pkt_reason != CMD_CMPLT) {
25343 		/* Handle the incomplete packet */
25344 		sd_mhd_watch_incomplete(un, pkt);
25345 		return (0);
25346 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
25347 		if (*((unsigned char *)statusp)
25348 		    == STATUS_RESERVATION_CONFLICT) {
25349 			/*
25350 			 * Handle a reservation conflict by panicking if
25351 			 * configured for failfast or by logging the conflict
25352 			 * and updating the reservation status
25353 			 */
25354 			mutex_enter(SD_MUTEX(un));
25355 			if ((un->un_resvd_status & SD_FAILFAST) &&
25356 			    (sd_failfast_enable)) {
25357 				sd_panic_for_res_conflict(un);
25358 				/*NOTREACHED*/
25359 			}
25360 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25361 			    "sd_mhd_watch_cb: Reservation Conflict\n");
25362 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
25363 			mutex_exit(SD_MUTEX(un));
25364 		}
25365 	}
25366 
25367 	if (sensep != NULL) {
25368 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
25369 			mutex_enter(SD_MUTEX(un));
25370 			if ((scsi_sense_asc(sensep) ==
25371 			    SD_SCSI_RESET_SENSE_CODE) &&
25372 			    (un->un_resvd_status & SD_RESERVE)) {
25373 				/*
25374 				 * The additional sense code indicates a power
25375 				 * on or bus device reset has occurred; update
25376 				 * the reservation status.
25377 				 */
25378 				un->un_resvd_status |=
25379 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25380 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25381 				    "sd_mhd_watch_cb: Lost Reservation\n");
25382 			}
25383 		} else {
25384 			return (0);
25385 		}
25386 	} else {
25387 		mutex_enter(SD_MUTEX(un));
25388 	}
25389 
25390 	if ((un->un_resvd_status & SD_RESERVE) &&
25391 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
25392 		if (un->un_resvd_status & SD_WANT_RESERVE) {
25393 			/*
25394 			 * A reset occurred in between the last probe and this
25395 			 * one so if a timeout is pending cancel it.
25396 			 */
25397 			if (un->un_resvd_timeid) {
25398 				timeout_id_t temp_id = un->un_resvd_timeid;
25399 				un->un_resvd_timeid = NULL;
25400 				mutex_exit(SD_MUTEX(un));
25401 				(void) untimeout(temp_id);
25402 				mutex_enter(SD_MUTEX(un));
25403 			}
25404 			un->un_resvd_status &= ~SD_WANT_RESERVE;
25405 		}
25406 		if (un->un_resvd_timeid == 0) {
25407 			/* Schedule a timeout to handle the lost reservation */
25408 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
25409 			    (void *)dev,
25410 			    drv_usectohz(sd_reinstate_resv_delay));
25411 		}
25412 	}
25413 	mutex_exit(SD_MUTEX(un));
25414 	return (0);
25415 }
25416 
25417 
25418 /*
25419  *    Function: sd_mhd_watch_incomplete()
25420  *
25421  * Description: This function is used to find out why a scsi pkt sent by the
25422  *		scsi watch facility was not completed. Under some scenarios this
25423  *		routine will return. Otherwise it will send a bus reset to see
25424  *		if the drive is still online.
25425  *
25426  *   Arguments: un  - driver soft state (unit) structure
25427  *		pkt - incomplete scsi pkt
25428  */
25429 
25430 static void
25431 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
25432 {
25433 	int	be_chatty;
25434 	int	perr;
25435 
25436 	ASSERT(pkt != NULL);
25437 	ASSERT(un != NULL);
25438 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
25439 	perr		= (pkt->pkt_statistics & STAT_PERR);
25440 
25441 	mutex_enter(SD_MUTEX(un));
25442 	if (un->un_state == SD_STATE_DUMPING) {
25443 		mutex_exit(SD_MUTEX(un));
25444 		return;
25445 	}
25446 
25447 	switch (pkt->pkt_reason) {
25448 	case CMD_UNX_BUS_FREE:
25449 		/*
25450 		 * If we had a parity error that caused the target to drop BSY*,
25451 		 * don't be chatty about it.
25452 		 */
25453 		if (perr && be_chatty) {
25454 			be_chatty = 0;
25455 		}
25456 		break;
25457 	case CMD_TAG_REJECT:
25458 		/*
25459 		 * The SCSI-2 spec states that a tag reject will be sent by the
25460 		 * target if tagged queuing is not supported. A tag reject may
25461 		 * also be sent during certain initialization periods or to
25462 		 * control internal resources. For the latter case the target
25463 		 * may also return Queue Full.
25464 		 *
25465 		 * If this driver receives a tag reject from a target that is
25466 		 * going through an init period or controlling internal
25467 		 * resources tagged queuing will be disabled. This is a less
25468 		 * than optimal behavior but the driver is unable to determine
25469 		 * the target state and assumes tagged queueing is not supported
25470 		 */
25471 		pkt->pkt_flags = 0;
25472 		un->un_tagflags = 0;
25473 
25474 		if (un->un_f_opt_queueing == TRUE) {
25475 			un->un_throttle = min(un->un_throttle, 3);
25476 		} else {
25477 			un->un_throttle = 1;
25478 		}
25479 		mutex_exit(SD_MUTEX(un));
25480 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25481 		mutex_enter(SD_MUTEX(un));
25482 		break;
25483 	case CMD_INCOMPLETE:
25484 		/*
25485 		 * The transport stopped with an abnormal state, fallthrough and
25486 		 * reset the target and/or bus unless selection did not complete
25487 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25488 		 * go through a target/bus reset
25489 		 */
25490 		if (pkt->pkt_state == STATE_GOT_BUS) {
25491 			break;
25492 		}
25493 		/*FALLTHROUGH*/
25494 
25495 	case CMD_TIMEOUT:
25496 	default:
25497 		/*
25498 		 * The lun may still be running the command, so a lun reset
25499 		 * should be attempted. If the lun reset fails or cannot be
25500 		 * issued, than try a target reset. Lastly try a bus reset.
25501 		 */
25502 		if ((pkt->pkt_statistics &
25503 		    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) == 0) {
25504 			int reset_retval = 0;
25505 			mutex_exit(SD_MUTEX(un));
25506 			if (un->un_f_allow_bus_device_reset == TRUE) {
25507 				if (un->un_f_lun_reset_enabled == TRUE) {
25508 					reset_retval =
25509 					    scsi_reset(SD_ADDRESS(un),
25510 					    RESET_LUN);
25511 				}
25512 				if (reset_retval == 0) {
25513 					reset_retval =
25514 					    scsi_reset(SD_ADDRESS(un),
25515 					    RESET_TARGET);
25516 				}
25517 			}
25518 			if (reset_retval == 0) {
25519 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25520 			}
25521 			mutex_enter(SD_MUTEX(un));
25522 		}
25523 		break;
25524 	}
25525 
25526 	/* A device/bus reset has occurred; update the reservation status. */
25527 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25528 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25529 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25530 			un->un_resvd_status |=
25531 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25532 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25533 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25534 		}
25535 	}
25536 
25537 	/*
25538 	 * The disk has been turned off; Update the device state.
25539 	 *
25540 	 * Note: Should we be offlining the disk here?
25541 	 */
25542 	if (pkt->pkt_state == STATE_GOT_BUS) {
25543 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25544 		    "Disk not responding to selection\n");
25545 		if (un->un_state != SD_STATE_OFFLINE) {
25546 			New_state(un, SD_STATE_OFFLINE);
25547 		}
25548 	} else if (be_chatty) {
25549 		/*
25550 		 * suppress messages if they are all the same pkt reason;
25551 		 * with TQ, many (up to 256) are returned with the same
25552 		 * pkt_reason
25553 		 */
25554 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25555 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25556 			    "sd_mhd_watch_incomplete: "
25557 			    "SCSI transport failed: reason '%s'\n",
25558 			    scsi_rname(pkt->pkt_reason));
25559 		}
25560 	}
25561 	un->un_last_pkt_reason = pkt->pkt_reason;
25562 	mutex_exit(SD_MUTEX(un));
25563 }
25564 
25565 
25566 /*
25567  *    Function: sd_sname()
25568  *
25569  * Description: This is a simple little routine to return a string containing
25570  *		a printable description of command status byte for use in
25571  *		logging.
25572  *
25573  *   Arguments: status - pointer to a status byte
25574  *
25575  * Return Code: char * - string containing status description.
25576  */
25577 
25578 static char *
25579 sd_sname(uchar_t status)
25580 {
25581 	switch (status & STATUS_MASK) {
25582 	case STATUS_GOOD:
25583 		return ("good status");
25584 	case STATUS_CHECK:
25585 		return ("check condition");
25586 	case STATUS_MET:
25587 		return ("condition met");
25588 	case STATUS_BUSY:
25589 		return ("busy");
25590 	case STATUS_INTERMEDIATE:
25591 		return ("intermediate");
25592 	case STATUS_INTERMEDIATE_MET:
25593 		return ("intermediate - condition met");
25594 	case STATUS_RESERVATION_CONFLICT:
25595 		return ("reservation_conflict");
25596 	case STATUS_TERMINATED:
25597 		return ("command terminated");
25598 	case STATUS_QFULL:
25599 		return ("queue full");
25600 	default:
25601 		return ("<unknown status>");
25602 	}
25603 }
25604 
25605 
25606 /*
25607  *    Function: sd_mhd_resvd_recover()
25608  *
25609  * Description: This function adds a reservation entry to the
25610  *		sd_resv_reclaim_request list and signals the reservation
25611  *		reclaim thread that there is work pending. If the reservation
25612  *		reclaim thread has not been previously created this function
25613  *		will kick it off.
25614  *
25615  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25616  *			among multiple watches that share this callback function
25617  *
25618  *     Context: This routine is called by timeout() and is run in interrupt
25619  *		context. It must not sleep or call other functions which may
25620  *		sleep.
25621  */
25622 
25623 static void
25624 sd_mhd_resvd_recover(void *arg)
25625 {
25626 	dev_t			dev = (dev_t)arg;
25627 	struct sd_lun		*un;
25628 	struct sd_thr_request	*sd_treq = NULL;
25629 	struct sd_thr_request	*sd_cur = NULL;
25630 	struct sd_thr_request	*sd_prev = NULL;
25631 	int			already_there = 0;
25632 
25633 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25634 		return;
25635 	}
25636 
25637 	mutex_enter(SD_MUTEX(un));
25638 	un->un_resvd_timeid = NULL;
25639 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25640 		/*
25641 		 * There was a reset so don't issue the reserve, allow the
25642 		 * sd_mhd_watch_cb callback function to notice this and
25643 		 * reschedule the timeout for reservation.
25644 		 */
25645 		mutex_exit(SD_MUTEX(un));
25646 		return;
25647 	}
25648 	mutex_exit(SD_MUTEX(un));
25649 
25650 	/*
25651 	 * Add this device to the sd_resv_reclaim_request list and the
25652 	 * sd_resv_reclaim_thread should take care of the rest.
25653 	 *
25654 	 * Note: We can't sleep in this context so if the memory allocation
25655 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25656 	 * reschedule the timeout for reservation.  (4378460)
25657 	 */
25658 	sd_treq = (struct sd_thr_request *)
25659 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25660 	if (sd_treq == NULL) {
25661 		return;
25662 	}
25663 
25664 	sd_treq->sd_thr_req_next = NULL;
25665 	sd_treq->dev = dev;
25666 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25667 	if (sd_tr.srq_thr_req_head == NULL) {
25668 		sd_tr.srq_thr_req_head = sd_treq;
25669 	} else {
25670 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25671 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25672 			if (sd_cur->dev == dev) {
25673 				/*
25674 				 * already in Queue so don't log
25675 				 * another request for the device
25676 				 */
25677 				already_there = 1;
25678 				break;
25679 			}
25680 			sd_prev = sd_cur;
25681 		}
25682 		if (!already_there) {
25683 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25684 			    "logging request for %lx\n", dev);
25685 			sd_prev->sd_thr_req_next = sd_treq;
25686 		} else {
25687 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25688 		}
25689 	}
25690 
25691 	/*
25692 	 * Create a kernel thread to do the reservation reclaim and free up this
25693 	 * thread. We cannot block this thread while we go away to do the
25694 	 * reservation reclaim
25695 	 */
25696 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25697 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25698 		    sd_resv_reclaim_thread, NULL,
25699 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25700 
25701 	/* Tell the reservation reclaim thread that it has work to do */
25702 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25703 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25704 }
25705 
25706 /*
25707  *    Function: sd_resv_reclaim_thread()
25708  *
25709  * Description: This function implements the reservation reclaim operations
25710  *
25711  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25712  *		      among multiple watches that share this callback function
25713  */
25714 
25715 static void
25716 sd_resv_reclaim_thread()
25717 {
25718 	struct sd_lun		*un;
25719 	struct sd_thr_request	*sd_mhreq;
25720 
25721 	/* Wait for work */
25722 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25723 	if (sd_tr.srq_thr_req_head == NULL) {
25724 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25725 		    &sd_tr.srq_resv_reclaim_mutex);
25726 	}
25727 
25728 	/* Loop while we have work */
25729 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25730 		un = ddi_get_soft_state(sd_state,
25731 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25732 		if (un == NULL) {
25733 			/*
25734 			 * softstate structure is NULL so just
25735 			 * dequeue the request and continue
25736 			 */
25737 			sd_tr.srq_thr_req_head =
25738 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25739 			kmem_free(sd_tr.srq_thr_cur_req,
25740 			    sizeof (struct sd_thr_request));
25741 			continue;
25742 		}
25743 
25744 		/* dequeue the request */
25745 		sd_mhreq = sd_tr.srq_thr_cur_req;
25746 		sd_tr.srq_thr_req_head =
25747 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25748 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25749 
25750 		/*
25751 		 * Reclaim reservation only if SD_RESERVE is still set. There
25752 		 * may have been a call to MHIOCRELEASE before we got here.
25753 		 */
25754 		mutex_enter(SD_MUTEX(un));
25755 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25756 			/*
25757 			 * Note: The SD_LOST_RESERVE flag is cleared before
25758 			 * reclaiming the reservation. If this is done after the
25759 			 * call to sd_reserve_release a reservation loss in the
25760 			 * window between pkt completion of reserve cmd and
25761 			 * mutex_enter below may not be recognized
25762 			 */
25763 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25764 			mutex_exit(SD_MUTEX(un));
25765 
25766 			if (sd_reserve_release(sd_mhreq->dev,
25767 			    SD_RESERVE) == 0) {
25768 				mutex_enter(SD_MUTEX(un));
25769 				un->un_resvd_status |= SD_RESERVE;
25770 				mutex_exit(SD_MUTEX(un));
25771 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25772 				    "sd_resv_reclaim_thread: "
25773 				    "Reservation Recovered\n");
25774 			} else {
25775 				mutex_enter(SD_MUTEX(un));
25776 				un->un_resvd_status |= SD_LOST_RESERVE;
25777 				mutex_exit(SD_MUTEX(un));
25778 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25779 				    "sd_resv_reclaim_thread: Failed "
25780 				    "Reservation Recovery\n");
25781 			}
25782 		} else {
25783 			mutex_exit(SD_MUTEX(un));
25784 		}
25785 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25786 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25787 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25788 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25789 		/*
25790 		 * wakeup the destroy thread if anyone is waiting on
25791 		 * us to complete.
25792 		 */
25793 		cv_signal(&sd_tr.srq_inprocess_cv);
25794 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25795 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25796 	}
25797 
25798 	/*
25799 	 * cleanup the sd_tr structure now that this thread will not exist
25800 	 */
25801 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25802 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25803 	sd_tr.srq_resv_reclaim_thread = NULL;
25804 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25805 	thread_exit();
25806 }
25807 
25808 
25809 /*
25810  *    Function: sd_rmv_resv_reclaim_req()
25811  *
25812  * Description: This function removes any pending reservation reclaim requests
25813  *		for the specified device.
25814  *
25815  *   Arguments: dev - the device 'dev_t'
25816  */
25817 
25818 static void
25819 sd_rmv_resv_reclaim_req(dev_t dev)
25820 {
25821 	struct sd_thr_request *sd_mhreq;
25822 	struct sd_thr_request *sd_prev;
25823 
25824 	/* Remove a reservation reclaim request from the list */
25825 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25826 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25827 		/*
25828 		 * We are attempting to reinstate reservation for
25829 		 * this device. We wait for sd_reserve_release()
25830 		 * to return before we return.
25831 		 */
25832 		cv_wait(&sd_tr.srq_inprocess_cv,
25833 		    &sd_tr.srq_resv_reclaim_mutex);
25834 	} else {
25835 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25836 		if (sd_mhreq && sd_mhreq->dev == dev) {
25837 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25838 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25839 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25840 			return;
25841 		}
25842 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25843 			if (sd_mhreq && sd_mhreq->dev == dev) {
25844 				break;
25845 			}
25846 			sd_prev = sd_mhreq;
25847 		}
25848 		if (sd_mhreq != NULL) {
25849 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25850 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25851 		}
25852 	}
25853 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25854 }
25855 
25856 
25857 /*
25858  *    Function: sd_mhd_reset_notify_cb()
25859  *
25860  * Description: This is a call back function for scsi_reset_notify. This
25861  *		function updates the softstate reserved status and logs the
25862  *		reset. The driver scsi watch facility callback function
25863  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25864  *		will reclaim the reservation.
25865  *
25866  *   Arguments: arg  - driver soft state (unit) structure
25867  */
25868 
25869 static void
25870 sd_mhd_reset_notify_cb(caddr_t arg)
25871 {
25872 	struct sd_lun *un = (struct sd_lun *)arg;
25873 
25874 	mutex_enter(SD_MUTEX(un));
25875 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25876 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
25877 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25878 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
25879 	}
25880 	mutex_exit(SD_MUTEX(un));
25881 }
25882 
25883 
25884 /*
25885  *    Function: sd_take_ownership()
25886  *
25887  * Description: This routine implements an algorithm to achieve a stable
25888  *		reservation on disks which don't implement priority reserve,
25889  *		and makes sure that other host lose re-reservation attempts.
25890  *		This algorithm contains of a loop that keeps issuing the RESERVE
25891  *		for some period of time (min_ownership_delay, default 6 seconds)
25892  *		During that loop, it looks to see if there has been a bus device
25893  *		reset or bus reset (both of which cause an existing reservation
25894  *		to be lost). If the reservation is lost issue RESERVE until a
25895  *		period of min_ownership_delay with no resets has gone by, or
25896  *		until max_ownership_delay has expired. This loop ensures that
25897  *		the host really did manage to reserve the device, in spite of
25898  *		resets. The looping for min_ownership_delay (default six
25899  *		seconds) is important to early generation clustering products,
25900  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
25901  *		MHIOCENFAILFAST periodic timer of two seconds. By having
25902  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
25903  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
25904  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
25905  *		have already noticed, via the MHIOCENFAILFAST polling, that it
25906  *		no longer "owns" the disk and will have panicked itself.  Thus,
25907  *		the host issuing the MHIOCTKOWN is assured (with timing
25908  *		dependencies) that by the time it actually starts to use the
25909  *		disk for real work, the old owner is no longer accessing it.
25910  *
25911  *		min_ownership_delay is the minimum amount of time for which the
25912  *		disk must be reserved continuously devoid of resets before the
25913  *		MHIOCTKOWN ioctl will return success.
25914  *
25915  *		max_ownership_delay indicates the amount of time by which the
25916  *		take ownership should succeed or timeout with an error.
25917  *
25918  *   Arguments: dev - the device 'dev_t'
25919  *		*p  - struct containing timing info.
25920  *
25921  * Return Code: 0 for success or error code
25922  */
25923 
25924 static int
25925 sd_take_ownership(dev_t dev, struct mhioctkown *p)
25926 {
25927 	struct sd_lun	*un;
25928 	int		rval;
25929 	int		err;
25930 	int		reservation_count   = 0;
25931 	int		min_ownership_delay =  6000000; /* in usec */
25932 	int		max_ownership_delay = 30000000; /* in usec */
25933 	clock_t		start_time;	/* starting time of this algorithm */
25934 	clock_t		end_time;	/* time limit for giving up */
25935 	clock_t		ownership_time;	/* time limit for stable ownership */
25936 	clock_t		current_time;
25937 	clock_t		previous_current_time;
25938 
25939 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25940 		return (ENXIO);
25941 	}
25942 
25943 	/*
25944 	 * Attempt a device reservation. A priority reservation is requested.
25945 	 */
25946 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
25947 	    != SD_SUCCESS) {
25948 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25949 		    "sd_take_ownership: return(1)=%d\n", rval);
25950 		return (rval);
25951 	}
25952 
25953 	/* Update the softstate reserved status to indicate the reservation */
25954 	mutex_enter(SD_MUTEX(un));
25955 	un->un_resvd_status |= SD_RESERVE;
25956 	un->un_resvd_status &=
25957 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
25958 	mutex_exit(SD_MUTEX(un));
25959 
25960 	if (p != NULL) {
25961 		if (p->min_ownership_delay != 0) {
25962 			min_ownership_delay = p->min_ownership_delay * 1000;
25963 		}
25964 		if (p->max_ownership_delay != 0) {
25965 			max_ownership_delay = p->max_ownership_delay * 1000;
25966 		}
25967 	}
25968 	SD_INFO(SD_LOG_IOCTL_MHD, un,
25969 	    "sd_take_ownership: min, max delays: %d, %d\n",
25970 	    min_ownership_delay, max_ownership_delay);
25971 
25972 	start_time = ddi_get_lbolt();
25973 	current_time	= start_time;
25974 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
25975 	end_time	= start_time + drv_usectohz(max_ownership_delay);
25976 
25977 	while (current_time - end_time < 0) {
25978 		delay(drv_usectohz(500000));
25979 
25980 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
25981 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
25982 				mutex_enter(SD_MUTEX(un));
25983 				rval = (un->un_resvd_status &
25984 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
25985 				mutex_exit(SD_MUTEX(un));
25986 				break;
25987 			}
25988 		}
25989 		previous_current_time = current_time;
25990 		current_time = ddi_get_lbolt();
25991 		mutex_enter(SD_MUTEX(un));
25992 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
25993 			ownership_time = ddi_get_lbolt() +
25994 			    drv_usectohz(min_ownership_delay);
25995 			reservation_count = 0;
25996 		} else {
25997 			reservation_count++;
25998 		}
25999 		un->un_resvd_status |= SD_RESERVE;
26000 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
26001 		mutex_exit(SD_MUTEX(un));
26002 
26003 		SD_INFO(SD_LOG_IOCTL_MHD, un,
26004 		    "sd_take_ownership: ticks for loop iteration=%ld, "
26005 		    "reservation=%s\n", (current_time - previous_current_time),
26006 		    reservation_count ? "ok" : "reclaimed");
26007 
26008 		if (current_time - ownership_time >= 0 &&
26009 		    reservation_count >= 4) {
26010 			rval = 0; /* Achieved a stable ownership */
26011 			break;
26012 		}
26013 		if (current_time - end_time >= 0) {
26014 			rval = EACCES; /* No ownership in max possible time */
26015 			break;
26016 		}
26017 	}
26018 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
26019 	    "sd_take_ownership: return(2)=%d\n", rval);
26020 	return (rval);
26021 }
26022 
26023 
26024 /*
26025  *    Function: sd_reserve_release()
26026  *
26027  * Description: This function builds and sends scsi RESERVE, RELEASE, and
26028  *		PRIORITY RESERVE commands based on a user specified command type
26029  *
26030  *   Arguments: dev - the device 'dev_t'
26031  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
26032  *		      SD_RESERVE, SD_RELEASE
26033  *
26034  * Return Code: 0 or Error Code
26035  */
26036 
26037 static int
26038 sd_reserve_release(dev_t dev, int cmd)
26039 {
26040 	struct uscsi_cmd	*com = NULL;
26041 	struct sd_lun		*un = NULL;
26042 	char			cdb[CDB_GROUP0];
26043 	int			rval;
26044 
26045 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
26046 	    (cmd == SD_PRIORITY_RESERVE));
26047 
26048 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26049 		return (ENXIO);
26050 	}
26051 
26052 	/* instantiate and initialize the command and cdb */
26053 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26054 	bzero(cdb, CDB_GROUP0);
26055 	com->uscsi_flags   = USCSI_SILENT;
26056 	com->uscsi_timeout = un->un_reserve_release_time;
26057 	com->uscsi_cdblen  = CDB_GROUP0;
26058 	com->uscsi_cdb	   = cdb;
26059 	if (cmd == SD_RELEASE) {
26060 		cdb[0] = SCMD_RELEASE;
26061 	} else {
26062 		cdb[0] = SCMD_RESERVE;
26063 	}
26064 
26065 	/* Send the command. */
26066 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26067 	    SD_PATH_STANDARD);
26068 
26069 	/*
26070 	 * "break" a reservation that is held by another host, by issuing a
26071 	 * reset if priority reserve is desired, and we could not get the
26072 	 * device.
26073 	 */
26074 	if ((cmd == SD_PRIORITY_RESERVE) &&
26075 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26076 		/*
26077 		 * First try to reset the LUN. If we cannot, then try a target
26078 		 * reset, followed by a bus reset if the target reset fails.
26079 		 */
26080 		int reset_retval = 0;
26081 		if (un->un_f_lun_reset_enabled == TRUE) {
26082 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
26083 		}
26084 		if (reset_retval == 0) {
26085 			/* The LUN reset either failed or was not issued */
26086 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26087 		}
26088 		if ((reset_retval == 0) &&
26089 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
26090 			rval = EIO;
26091 			kmem_free(com, sizeof (*com));
26092 			return (rval);
26093 		}
26094 
26095 		bzero(com, sizeof (struct uscsi_cmd));
26096 		com->uscsi_flags   = USCSI_SILENT;
26097 		com->uscsi_cdb	   = cdb;
26098 		com->uscsi_cdblen  = CDB_GROUP0;
26099 		com->uscsi_timeout = 5;
26100 
26101 		/*
26102 		 * Reissue the last reserve command, this time without request
26103 		 * sense.  Assume that it is just a regular reserve command.
26104 		 */
26105 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26106 		    SD_PATH_STANDARD);
26107 	}
26108 
26109 	/* Return an error if still getting a reservation conflict. */
26110 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26111 		rval = EACCES;
26112 	}
26113 
26114 	kmem_free(com, sizeof (*com));
26115 	return (rval);
26116 }
26117 
26118 
26119 #define	SD_NDUMP_RETRIES	12
26120 /*
26121  *	System Crash Dump routine
26122  */
26123 
26124 static int
26125 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
26126 {
26127 	int		instance;
26128 	int		partition;
26129 	int		i;
26130 	int		err;
26131 	struct sd_lun	*un;
26132 	struct scsi_pkt *wr_pktp;
26133 	struct buf	*wr_bp;
26134 	struct buf	wr_buf;
26135 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
26136 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
26137 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
26138 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
26139 	size_t		io_start_offset;
26140 	int		doing_rmw = FALSE;
26141 	int		rval;
26142 	ssize_t		dma_resid;
26143 	daddr_t		oblkno;
26144 	diskaddr_t	nblks = 0;
26145 	diskaddr_t	start_block;
26146 
26147 	instance = SDUNIT(dev);
26148 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
26149 	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
26150 		return (ENXIO);
26151 	}
26152 
26153 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
26154 
26155 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
26156 
26157 	partition = SDPART(dev);
26158 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
26159 
26160 	if (!(NOT_DEVBSIZE(un))) {
26161 		int secmask = 0;
26162 		int blknomask = 0;
26163 
26164 		blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
26165 		secmask = un->un_tgt_blocksize - 1;
26166 
26167 		if (blkno & blknomask) {
26168 			SD_TRACE(SD_LOG_DUMP, un,
26169 			    "sddump: dump start block not modulo %d\n",
26170 			    un->un_tgt_blocksize);
26171 			return (EINVAL);
26172 		}
26173 
26174 		if ((nblk * DEV_BSIZE) & secmask) {
26175 			SD_TRACE(SD_LOG_DUMP, un,
26176 			    "sddump: dump length not modulo %d\n",
26177 			    un->un_tgt_blocksize);
26178 			return (EINVAL);
26179 		}
26180 
26181 	}
26182 
26183 	/* Validate blocks to dump at against partition size. */
26184 
26185 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
26186 	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
26187 
26188 	if (NOT_DEVBSIZE(un)) {
26189 		if ((blkno + nblk) > nblks) {
26190 			SD_TRACE(SD_LOG_DUMP, un,
26191 			    "sddump: dump range larger than partition: "
26192 			    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26193 			    blkno, nblk, nblks);
26194 			return (EINVAL);
26195 		}
26196 	} else {
26197 		if (((blkno / (un->un_tgt_blocksize / DEV_BSIZE)) +
26198 		    (nblk / (un->un_tgt_blocksize / DEV_BSIZE))) > nblks) {
26199 			SD_TRACE(SD_LOG_DUMP, un,
26200 			    "sddump: dump range larger than partition: "
26201 			    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26202 			    blkno, nblk, nblks);
26203 			return (EINVAL);
26204 		}
26205 	}
26206 
26207 	mutex_enter(&un->un_pm_mutex);
26208 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
26209 		struct scsi_pkt *start_pktp;
26210 
26211 		mutex_exit(&un->un_pm_mutex);
26212 
26213 		/*
26214 		 * use pm framework to power on HBA 1st
26215 		 */
26216 		(void) pm_raise_power(SD_DEVINFO(un), 0,
26217 		    SD_PM_STATE_ACTIVE(un));
26218 
26219 		/*
26220 		 * Dump no long uses sdpower to power on a device, it's
26221 		 * in-line here so it can be done in polled mode.
26222 		 */
26223 
26224 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
26225 
26226 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
26227 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
26228 
26229 		if (start_pktp == NULL) {
26230 			/* We were not given a SCSI packet, fail. */
26231 			return (EIO);
26232 		}
26233 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
26234 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
26235 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
26236 		start_pktp->pkt_flags = FLAG_NOINTR;
26237 
26238 		mutex_enter(SD_MUTEX(un));
26239 		SD_FILL_SCSI1_LUN(un, start_pktp);
26240 		mutex_exit(SD_MUTEX(un));
26241 		/*
26242 		 * Scsi_poll returns 0 (success) if the command completes and
26243 		 * the status block is STATUS_GOOD.
26244 		 */
26245 		if (sd_scsi_poll(un, start_pktp) != 0) {
26246 			scsi_destroy_pkt(start_pktp);
26247 			return (EIO);
26248 		}
26249 		scsi_destroy_pkt(start_pktp);
26250 		(void) sd_pm_state_change(un, SD_PM_STATE_ACTIVE(un),
26251 		    SD_PM_STATE_CHANGE);
26252 	} else {
26253 		mutex_exit(&un->un_pm_mutex);
26254 	}
26255 
26256 	mutex_enter(SD_MUTEX(un));
26257 	un->un_throttle = 0;
26258 
26259 	/*
26260 	 * The first time through, reset the specific target device.
26261 	 * However, when cpr calls sddump we know that sd is in a
26262 	 * a good state so no bus reset is required.
26263 	 * Clear sense data via Request Sense cmd.
26264 	 * In sddump we don't care about allow_bus_device_reset anymore
26265 	 */
26266 
26267 	if ((un->un_state != SD_STATE_SUSPENDED) &&
26268 	    (un->un_state != SD_STATE_DUMPING)) {
26269 
26270 		New_state(un, SD_STATE_DUMPING);
26271 
26272 		if (un->un_f_is_fibre == FALSE) {
26273 			mutex_exit(SD_MUTEX(un));
26274 			/*
26275 			 * Attempt a bus reset for parallel scsi.
26276 			 *
26277 			 * Note: A bus reset is required because on some host
26278 			 * systems (i.e. E420R) a bus device reset is
26279 			 * insufficient to reset the state of the target.
26280 			 *
26281 			 * Note: Don't issue the reset for fibre-channel,
26282 			 * because this tends to hang the bus (loop) for
26283 			 * too long while everyone is logging out and in
26284 			 * and the deadman timer for dumping will fire
26285 			 * before the dump is complete.
26286 			 */
26287 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
26288 				mutex_enter(SD_MUTEX(un));
26289 				Restore_state(un);
26290 				mutex_exit(SD_MUTEX(un));
26291 				return (EIO);
26292 			}
26293 
26294 			/* Delay to give the device some recovery time. */
26295 			drv_usecwait(10000);
26296 
26297 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
26298 				SD_INFO(SD_LOG_DUMP, un,
26299 				    "sddump: sd_send_polled_RQS failed\n");
26300 			}
26301 			mutex_enter(SD_MUTEX(un));
26302 		}
26303 	}
26304 
26305 	/*
26306 	 * Convert the partition-relative block number to a
26307 	 * disk physical block number.
26308 	 */
26309 	if (NOT_DEVBSIZE(un)) {
26310 		blkno += start_block;
26311 	} else {
26312 		blkno = blkno / (un->un_tgt_blocksize / DEV_BSIZE);
26313 		blkno += start_block;
26314 	}
26315 
26316 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
26317 
26318 
26319 	/*
26320 	 * Check if the device has a non-512 block size.
26321 	 */
26322 	wr_bp = NULL;
26323 	if (NOT_DEVBSIZE(un)) {
26324 		tgt_byte_offset = blkno * un->un_sys_blocksize;
26325 		tgt_byte_count = nblk * un->un_sys_blocksize;
26326 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
26327 		    (tgt_byte_count % un->un_tgt_blocksize)) {
26328 			doing_rmw = TRUE;
26329 			/*
26330 			 * Calculate the block number and number of block
26331 			 * in terms of the media block size.
26332 			 */
26333 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26334 			tgt_nblk =
26335 			    ((tgt_byte_offset + tgt_byte_count +
26336 			    (un->un_tgt_blocksize - 1)) /
26337 			    un->un_tgt_blocksize) - tgt_blkno;
26338 
26339 			/*
26340 			 * Invoke the routine which is going to do read part
26341 			 * of read-modify-write.
26342 			 * Note that this routine returns a pointer to
26343 			 * a valid bp in wr_bp.
26344 			 */
26345 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
26346 			    &wr_bp);
26347 			if (err) {
26348 				mutex_exit(SD_MUTEX(un));
26349 				return (err);
26350 			}
26351 			/*
26352 			 * Offset is being calculated as -
26353 			 * (original block # * system block size) -
26354 			 * (new block # * target block size)
26355 			 */
26356 			io_start_offset =
26357 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
26358 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
26359 
26360 			ASSERT(io_start_offset < un->un_tgt_blocksize);
26361 			/*
26362 			 * Do the modify portion of read modify write.
26363 			 */
26364 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
26365 			    (size_t)nblk * un->un_sys_blocksize);
26366 		} else {
26367 			doing_rmw = FALSE;
26368 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26369 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
26370 		}
26371 
26372 		/* Convert blkno and nblk to target blocks */
26373 		blkno = tgt_blkno;
26374 		nblk = tgt_nblk;
26375 	} else {
26376 		wr_bp = &wr_buf;
26377 		bzero(wr_bp, sizeof (struct buf));
26378 		wr_bp->b_flags		= B_BUSY;
26379 		wr_bp->b_un.b_addr	= addr;
26380 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
26381 		wr_bp->b_resid		= 0;
26382 	}
26383 
26384 	mutex_exit(SD_MUTEX(un));
26385 
26386 	/*
26387 	 * Obtain a SCSI packet for the write command.
26388 	 * It should be safe to call the allocator here without
26389 	 * worrying about being locked for DVMA mapping because
26390 	 * the address we're passed is already a DVMA mapping
26391 	 *
26392 	 * We are also not going to worry about semaphore ownership
26393 	 * in the dump buffer. Dumping is single threaded at present.
26394 	 */
26395 
26396 	wr_pktp = NULL;
26397 
26398 	dma_resid = wr_bp->b_bcount;
26399 	oblkno = blkno;
26400 
26401 	if (!(NOT_DEVBSIZE(un))) {
26402 		nblk = nblk / (un->un_tgt_blocksize / DEV_BSIZE);
26403 	}
26404 
26405 	while (dma_resid != 0) {
26406 
26407 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26408 		wr_bp->b_flags &= ~B_ERROR;
26409 
26410 		if (un->un_partial_dma_supported == 1) {
26411 			blkno = oblkno +
26412 			    ((wr_bp->b_bcount - dma_resid) /
26413 			    un->un_tgt_blocksize);
26414 			nblk = dma_resid / un->un_tgt_blocksize;
26415 
26416 			if (wr_pktp) {
26417 				/*
26418 				 * Partial DMA transfers after initial transfer
26419 				 */
26420 				rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
26421 				    blkno, nblk);
26422 			} else {
26423 				/* Initial transfer */
26424 				rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26425 				    un->un_pkt_flags, NULL_FUNC, NULL,
26426 				    blkno, nblk);
26427 			}
26428 		} else {
26429 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26430 			    0, NULL_FUNC, NULL, blkno, nblk);
26431 		}
26432 
26433 		if (rval == 0) {
26434 			/* We were given a SCSI packet, continue. */
26435 			break;
26436 		}
26437 
26438 		if (i == 0) {
26439 			if (wr_bp->b_flags & B_ERROR) {
26440 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26441 				    "no resources for dumping; "
26442 				    "error code: 0x%x, retrying",
26443 				    geterror(wr_bp));
26444 			} else {
26445 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26446 				    "no resources for dumping; retrying");
26447 			}
26448 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
26449 			if (wr_bp->b_flags & B_ERROR) {
26450 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26451 				    "no resources for dumping; error code: "
26452 				    "0x%x, retrying\n", geterror(wr_bp));
26453 			}
26454 		} else {
26455 			if (wr_bp->b_flags & B_ERROR) {
26456 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26457 				    "no resources for dumping; "
26458 				    "error code: 0x%x, retries failed, "
26459 				    "giving up.\n", geterror(wr_bp));
26460 			} else {
26461 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26462 				    "no resources for dumping; "
26463 				    "retries failed, giving up.\n");
26464 			}
26465 			mutex_enter(SD_MUTEX(un));
26466 			Restore_state(un);
26467 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
26468 				mutex_exit(SD_MUTEX(un));
26469 				scsi_free_consistent_buf(wr_bp);
26470 			} else {
26471 				mutex_exit(SD_MUTEX(un));
26472 			}
26473 			return (EIO);
26474 		}
26475 		drv_usecwait(10000);
26476 	}
26477 
26478 	if (un->un_partial_dma_supported == 1) {
26479 		/*
26480 		 * save the resid from PARTIAL_DMA
26481 		 */
26482 		dma_resid = wr_pktp->pkt_resid;
26483 		if (dma_resid != 0)
26484 			nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26485 		wr_pktp->pkt_resid = 0;
26486 	} else {
26487 		dma_resid = 0;
26488 	}
26489 
26490 	/* SunBug 1222170 */
26491 	wr_pktp->pkt_flags = FLAG_NOINTR;
26492 
26493 	err = EIO;
26494 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26495 
26496 		/*
26497 		 * Scsi_poll returns 0 (success) if the command completes and
26498 		 * the status block is STATUS_GOOD.  We should only check
26499 		 * errors if this condition is not true.  Even then we should
26500 		 * send our own request sense packet only if we have a check
26501 		 * condition and auto request sense has not been performed by
26502 		 * the hba.
26503 		 */
26504 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26505 
26506 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26507 		    (wr_pktp->pkt_resid == 0)) {
26508 			err = SD_SUCCESS;
26509 			break;
26510 		}
26511 
26512 		/*
26513 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26514 		 */
26515 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26516 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26517 			    "Error while dumping state...Device is gone\n");
26518 			break;
26519 		}
26520 
26521 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26522 			SD_INFO(SD_LOG_DUMP, un,
26523 			    "sddump: write failed with CHECK, try # %d\n", i);
26524 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26525 				(void) sd_send_polled_RQS(un);
26526 			}
26527 
26528 			continue;
26529 		}
26530 
26531 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26532 			int reset_retval = 0;
26533 
26534 			SD_INFO(SD_LOG_DUMP, un,
26535 			    "sddump: write failed with BUSY, try # %d\n", i);
26536 
26537 			if (un->un_f_lun_reset_enabled == TRUE) {
26538 				reset_retval = scsi_reset(SD_ADDRESS(un),
26539 				    RESET_LUN);
26540 			}
26541 			if (reset_retval == 0) {
26542 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26543 			}
26544 			(void) sd_send_polled_RQS(un);
26545 
26546 		} else {
26547 			SD_INFO(SD_LOG_DUMP, un,
26548 			    "sddump: write failed with 0x%x, try # %d\n",
26549 			    SD_GET_PKT_STATUS(wr_pktp), i);
26550 			mutex_enter(SD_MUTEX(un));
26551 			sd_reset_target(un, wr_pktp);
26552 			mutex_exit(SD_MUTEX(un));
26553 		}
26554 
26555 		/*
26556 		 * If we are not getting anywhere with lun/target resets,
26557 		 * let's reset the bus.
26558 		 */
26559 		if (i == SD_NDUMP_RETRIES / 2) {
26560 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26561 			(void) sd_send_polled_RQS(un);
26562 		}
26563 	}
26564 	}
26565 
26566 	scsi_destroy_pkt(wr_pktp);
26567 	mutex_enter(SD_MUTEX(un));
26568 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26569 		mutex_exit(SD_MUTEX(un));
26570 		scsi_free_consistent_buf(wr_bp);
26571 	} else {
26572 		mutex_exit(SD_MUTEX(un));
26573 	}
26574 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26575 	return (err);
26576 }
26577 
26578 /*
26579  *    Function: sd_scsi_poll()
26580  *
26581  * Description: This is a wrapper for the scsi_poll call.
26582  *
26583  *   Arguments: sd_lun - The unit structure
26584  *              scsi_pkt - The scsi packet being sent to the device.
26585  *
26586  * Return Code: 0 - Command completed successfully with good status
26587  *             -1 - Command failed.  This could indicate a check condition
26588  *                  or other status value requiring recovery action.
26589  *
26590  * NOTE: This code is only called off sddump().
26591  */
26592 
26593 static int
26594 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26595 {
26596 	int status;
26597 
26598 	ASSERT(un != NULL);
26599 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26600 	ASSERT(pktp != NULL);
26601 
26602 	status = SD_SUCCESS;
26603 
26604 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26605 		pktp->pkt_flags |= un->un_tagflags;
26606 		pktp->pkt_flags &= ~FLAG_NODISCON;
26607 	}
26608 
26609 	status = sd_ddi_scsi_poll(pktp);
26610 	/*
26611 	 * Scsi_poll returns 0 (success) if the command completes and the
26612 	 * status block is STATUS_GOOD.  We should only check errors if this
26613 	 * condition is not true.  Even then we should send our own request
26614 	 * sense packet only if we have a check condition and auto
26615 	 * request sense has not been performed by the hba.
26616 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26617 	 */
26618 	if ((status != SD_SUCCESS) &&
26619 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26620 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26621 	    (pktp->pkt_reason != CMD_DEV_GONE))
26622 		(void) sd_send_polled_RQS(un);
26623 
26624 	return (status);
26625 }
26626 
26627 /*
26628  *    Function: sd_send_polled_RQS()
26629  *
26630  * Description: This sends the request sense command to a device.
26631  *
26632  *   Arguments: sd_lun - The unit structure
26633  *
26634  * Return Code: 0 - Command completed successfully with good status
26635  *             -1 - Command failed.
26636  *
26637  */
26638 
26639 static int
26640 sd_send_polled_RQS(struct sd_lun *un)
26641 {
26642 	int	ret_val;
26643 	struct	scsi_pkt	*rqs_pktp;
26644 	struct	buf		*rqs_bp;
26645 
26646 	ASSERT(un != NULL);
26647 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26648 
26649 	ret_val = SD_SUCCESS;
26650 
26651 	rqs_pktp = un->un_rqs_pktp;
26652 	rqs_bp	 = un->un_rqs_bp;
26653 
26654 	mutex_enter(SD_MUTEX(un));
26655 
26656 	if (un->un_sense_isbusy) {
26657 		ret_val = SD_FAILURE;
26658 		mutex_exit(SD_MUTEX(un));
26659 		return (ret_val);
26660 	}
26661 
26662 	/*
26663 	 * If the request sense buffer (and packet) is not in use,
26664 	 * let's set the un_sense_isbusy and send our packet
26665 	 */
26666 	un->un_sense_isbusy = 1;
26667 	rqs_pktp->pkt_resid = 0;
26668 	rqs_pktp->pkt_reason = 0;
26669 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26670 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26671 
26672 	mutex_exit(SD_MUTEX(un));
26673 
26674 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26675 	    " 0x%p\n", rqs_bp->b_un.b_addr);
26676 
26677 	/*
26678 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26679 	 * axle - it has a call into us!
26680 	 */
26681 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26682 		SD_INFO(SD_LOG_COMMON, un,
26683 		    "sd_send_polled_RQS: RQS failed\n");
26684 	}
26685 
26686 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26687 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26688 
26689 	mutex_enter(SD_MUTEX(un));
26690 	un->un_sense_isbusy = 0;
26691 	mutex_exit(SD_MUTEX(un));
26692 
26693 	return (ret_val);
26694 }
26695 
26696 /*
26697  * Defines needed for localized version of the scsi_poll routine.
26698  */
26699 #define	CSEC		10000			/* usecs */
26700 #define	SEC_TO_CSEC	(1000000 / CSEC)
26701 
26702 /*
26703  *    Function: sd_ddi_scsi_poll()
26704  *
26705  * Description: Localized version of the scsi_poll routine.  The purpose is to
26706  *		send a scsi_pkt to a device as a polled command.  This version
26707  *		is to ensure more robust handling of transport errors.
26708  *		Specifically this routine cures not ready, coming ready
26709  *		transition for power up and reset of sonoma's.  This can take
26710  *		up to 45 seconds for power-on and 20 seconds for reset of a
26711  *		sonoma lun.
26712  *
26713  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26714  *
26715  * Return Code: 0 - Command completed successfully with good status
26716  *             -1 - Command failed.
26717  *
26718  * NOTE: This code is almost identical to scsi_poll, however before 6668774 can
26719  * be fixed (removing this code), we need to determine how to handle the
26720  * KEY_UNIT_ATTENTION condition below in conditions not as limited as sddump().
26721  *
26722  * NOTE: This code is only called off sddump().
26723  */
26724 static int
26725 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26726 {
26727 	int			rval = -1;
26728 	int			savef;
26729 	long			savet;
26730 	void			(*savec)();
26731 	int			timeout;
26732 	int			busy_count;
26733 	int			poll_delay;
26734 	int			rc;
26735 	uint8_t			*sensep;
26736 	struct scsi_arq_status	*arqstat;
26737 	extern int		do_polled_io;
26738 
26739 	ASSERT(pkt->pkt_scbp);
26740 
26741 	/*
26742 	 * save old flags..
26743 	 */
26744 	savef = pkt->pkt_flags;
26745 	savec = pkt->pkt_comp;
26746 	savet = pkt->pkt_time;
26747 
26748 	pkt->pkt_flags |= FLAG_NOINTR;
26749 
26750 	/*
26751 	 * XXX there is nothing in the SCSA spec that states that we should not
26752 	 * do a callback for polled cmds; however, removing this will break sd
26753 	 * and probably other target drivers
26754 	 */
26755 	pkt->pkt_comp = NULL;
26756 
26757 	/*
26758 	 * we don't like a polled command without timeout.
26759 	 * 60 seconds seems long enough.
26760 	 */
26761 	if (pkt->pkt_time == 0)
26762 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26763 
26764 	/*
26765 	 * Send polled cmd.
26766 	 *
26767 	 * We do some error recovery for various errors.  Tran_busy,
26768 	 * queue full, and non-dispatched commands are retried every 10 msec.
26769 	 * as they are typically transient failures.  Busy status and Not
26770 	 * Ready are retried every second as this status takes a while to
26771 	 * change.
26772 	 */
26773 	timeout = pkt->pkt_time * SEC_TO_CSEC;
26774 
26775 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26776 		/*
26777 		 * Initialize pkt status variables.
26778 		 */
26779 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26780 
26781 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26782 			if (rc != TRAN_BUSY) {
26783 				/* Transport failed - give up. */
26784 				break;
26785 			} else {
26786 				/* Transport busy - try again. */
26787 				poll_delay = 1 * CSEC;		/* 10 msec. */
26788 			}
26789 		} else {
26790 			/*
26791 			 * Transport accepted - check pkt status.
26792 			 */
26793 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26794 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26795 			    (rc == STATUS_CHECK) &&
26796 			    (pkt->pkt_state & STATE_ARQ_DONE)) {
26797 				arqstat =
26798 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26799 				sensep = (uint8_t *)&arqstat->sts_sensedata;
26800 			} else {
26801 				sensep = NULL;
26802 			}
26803 
26804 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26805 			    (rc == STATUS_GOOD)) {
26806 				/* No error - we're done */
26807 				rval = 0;
26808 				break;
26809 
26810 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26811 				/* Lost connection - give up */
26812 				break;
26813 
26814 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26815 			    (pkt->pkt_state == 0)) {
26816 				/* Pkt not dispatched - try again. */
26817 				poll_delay = 1 * CSEC;		/* 10 msec. */
26818 
26819 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26820 			    (rc == STATUS_QFULL)) {
26821 				/* Queue full - try again. */
26822 				poll_delay = 1 * CSEC;		/* 10 msec. */
26823 
26824 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26825 			    (rc == STATUS_BUSY)) {
26826 				/* Busy - try again. */
26827 				poll_delay = 100 * CSEC;	/* 1 sec. */
26828 				busy_count += (SEC_TO_CSEC - 1);
26829 
26830 			} else if ((sensep != NULL) &&
26831 			    (scsi_sense_key(sensep) == KEY_UNIT_ATTENTION)) {
26832 				/*
26833 				 * Unit Attention - try again.
26834 				 * Pretend it took 1 sec.
26835 				 * NOTE: 'continue' avoids poll_delay
26836 				 */
26837 				busy_count += (SEC_TO_CSEC - 1);
26838 				continue;
26839 
26840 			} else if ((sensep != NULL) &&
26841 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
26842 			    (scsi_sense_asc(sensep) == 0x04) &&
26843 			    (scsi_sense_ascq(sensep) == 0x01)) {
26844 				/*
26845 				 * Not ready -> ready - try again.
26846 				 * 04h/01h: LUN IS IN PROCESS OF BECOMING READY
26847 				 * ...same as STATUS_BUSY
26848 				 */
26849 				poll_delay = 100 * CSEC;	/* 1 sec. */
26850 				busy_count += (SEC_TO_CSEC - 1);
26851 
26852 			} else {
26853 				/* BAD status - give up. */
26854 				break;
26855 			}
26856 		}
26857 
26858 		if (((curthread->t_flag & T_INTR_THREAD) == 0) &&
26859 		    !do_polled_io) {
26860 			delay(drv_usectohz(poll_delay));
26861 		} else {
26862 			/* we busy wait during cpr_dump or interrupt threads */
26863 			drv_usecwait(poll_delay);
26864 		}
26865 	}
26866 
26867 	pkt->pkt_flags = savef;
26868 	pkt->pkt_comp = savec;
26869 	pkt->pkt_time = savet;
26870 
26871 	/* return on error */
26872 	if (rval)
26873 		return (rval);
26874 
26875 	/*
26876 	 * This is not a performance critical code path.
26877 	 *
26878 	 * As an accommodation for scsi_poll callers, to avoid ddi_dma_sync()
26879 	 * issues associated with looking at DMA memory prior to
26880 	 * scsi_pkt_destroy(), we scsi_sync_pkt() prior to return.
26881 	 */
26882 	scsi_sync_pkt(pkt);
26883 	return (0);
26884 }
26885 
26886 
26887 
26888 /*
26889  *    Function: sd_persistent_reservation_in_read_keys
26890  *
26891  * Description: This routine is the driver entry point for handling CD-ROM
26892  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26893  *		by sending the SCSI-3 PRIN commands to the device.
26894  *		Processes the read keys command response by copying the
26895  *		reservation key information into the user provided buffer.
26896  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26897  *
26898  *   Arguments: un   -  Pointer to soft state struct for the target.
26899  *		usrp -	user provided pointer to multihost Persistent In Read
26900  *			Keys structure (mhioc_inkeys_t)
26901  *		flag -	this argument is a pass through to ddi_copyxxx()
26902  *			directly from the mode argument of ioctl().
26903  *
26904  * Return Code: 0   - Success
26905  *		EACCES
26906  *		ENOTSUP
26907  *		errno return code from sd_send_scsi_cmd()
26908  *
26909  *     Context: Can sleep. Does not return until command is completed.
26910  */
26911 
26912 static int
26913 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26914     mhioc_inkeys_t *usrp, int flag)
26915 {
26916 #ifdef _MULTI_DATAMODEL
26917 	struct mhioc_key_list32	li32;
26918 #endif
26919 	sd_prin_readkeys_t	*in;
26920 	mhioc_inkeys_t		*ptr;
26921 	mhioc_key_list_t	li;
26922 	uchar_t			*data_bufp = NULL;
26923 	int			data_len = 0;
26924 	int			rval = 0;
26925 	size_t			copysz = 0;
26926 	sd_ssc_t		*ssc;
26927 
26928 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26929 		return (EINVAL);
26930 	}
26931 	bzero(&li, sizeof (mhioc_key_list_t));
26932 
26933 	ssc = sd_ssc_init(un);
26934 
26935 	/*
26936 	 * Get the listsize from user
26937 	 */
26938 #ifdef _MULTI_DATAMODEL
26939 	switch (ddi_model_convert_from(flag & FMODELS)) {
26940 	case DDI_MODEL_ILP32:
26941 		copysz = sizeof (struct mhioc_key_list32);
26942 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26943 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26944 			    "sd_persistent_reservation_in_read_keys: "
26945 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26946 			rval = EFAULT;
26947 			goto done;
26948 		}
26949 		li.listsize = li32.listsize;
26950 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26951 		break;
26952 
26953 	case DDI_MODEL_NONE:
26954 		copysz = sizeof (mhioc_key_list_t);
26955 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26956 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26957 			    "sd_persistent_reservation_in_read_keys: "
26958 			    "failed ddi_copyin: mhioc_key_list_t\n");
26959 			rval = EFAULT;
26960 			goto done;
26961 		}
26962 		break;
26963 	}
26964 
26965 #else /* ! _MULTI_DATAMODEL */
26966 	copysz = sizeof (mhioc_key_list_t);
26967 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26968 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26969 		    "sd_persistent_reservation_in_read_keys: "
26970 		    "failed ddi_copyin: mhioc_key_list_t\n");
26971 		rval = EFAULT;
26972 		goto done;
26973 	}
26974 #endif
26975 
26976 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26977 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26978 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26979 
26980 	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS,
26981 	    data_len, data_bufp);
26982 	if (rval != 0) {
26983 		if (rval == EIO)
26984 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
26985 		else
26986 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
26987 		goto done;
26988 	}
26989 	in = (sd_prin_readkeys_t *)data_bufp;
26990 	ptr->generation = BE_32(in->generation);
26991 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
26992 
26993 	/*
26994 	 * Return the min(listsize, listlen) keys
26995 	 */
26996 #ifdef _MULTI_DATAMODEL
26997 
26998 	switch (ddi_model_convert_from(flag & FMODELS)) {
26999 	case DDI_MODEL_ILP32:
27000 		li32.listlen = li.listlen;
27001 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
27002 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27003 			    "sd_persistent_reservation_in_read_keys: "
27004 			    "failed ddi_copyout: mhioc_key_list32_t\n");
27005 			rval = EFAULT;
27006 			goto done;
27007 		}
27008 		break;
27009 
27010 	case DDI_MODEL_NONE:
27011 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27012 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27013 			    "sd_persistent_reservation_in_read_keys: "
27014 			    "failed ddi_copyout: mhioc_key_list_t\n");
27015 			rval = EFAULT;
27016 			goto done;
27017 		}
27018 		break;
27019 	}
27020 
27021 #else /* ! _MULTI_DATAMODEL */
27022 
27023 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27024 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27025 		    "sd_persistent_reservation_in_read_keys: "
27026 		    "failed ddi_copyout: mhioc_key_list_t\n");
27027 		rval = EFAULT;
27028 		goto done;
27029 	}
27030 
27031 #endif /* _MULTI_DATAMODEL */
27032 
27033 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
27034 	    li.listsize * MHIOC_RESV_KEY_SIZE);
27035 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
27036 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27037 		    "sd_persistent_reservation_in_read_keys: "
27038 		    "failed ddi_copyout: keylist\n");
27039 		rval = EFAULT;
27040 	}
27041 done:
27042 	sd_ssc_fini(ssc);
27043 	kmem_free(data_bufp, data_len);
27044 	return (rval);
27045 }
27046 
27047 
27048 /*
27049  *    Function: sd_persistent_reservation_in_read_resv
27050  *
27051  * Description: This routine is the driver entry point for handling CD-ROM
27052  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
27053  *		by sending the SCSI-3 PRIN commands to the device.
27054  *		Process the read persistent reservations command response by
27055  *		copying the reservation information into the user provided
27056  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
27057  *
27058  *   Arguments: un   -  Pointer to soft state struct for the target.
27059  *		usrp -	user provided pointer to multihost Persistent In Read
27060  *			Keys structure (mhioc_inkeys_t)
27061  *		flag -	this argument is a pass through to ddi_copyxxx()
27062  *			directly from the mode argument of ioctl().
27063  *
27064  * Return Code: 0   - Success
27065  *		EACCES
27066  *		ENOTSUP
27067  *		errno return code from sd_send_scsi_cmd()
27068  *
27069  *     Context: Can sleep. Does not return until command is completed.
27070  */
27071 
27072 static int
27073 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
27074     mhioc_inresvs_t *usrp, int flag)
27075 {
27076 #ifdef _MULTI_DATAMODEL
27077 	struct mhioc_resv_desc_list32 resvlist32;
27078 #endif
27079 	sd_prin_readresv_t	*in;
27080 	mhioc_inresvs_t		*ptr;
27081 	sd_readresv_desc_t	*readresv_ptr;
27082 	mhioc_resv_desc_list_t	resvlist;
27083 	mhioc_resv_desc_t	resvdesc;
27084 	uchar_t			*data_bufp = NULL;
27085 	int			data_len;
27086 	int			rval = 0;
27087 	int			i;
27088 	size_t			copysz = 0;
27089 	mhioc_resv_desc_t	*bufp;
27090 	sd_ssc_t		*ssc;
27091 
27092 	if ((ptr = usrp) == NULL) {
27093 		return (EINVAL);
27094 	}
27095 
27096 	ssc = sd_ssc_init(un);
27097 
27098 	/*
27099 	 * Get the listsize from user
27100 	 */
27101 #ifdef _MULTI_DATAMODEL
27102 	switch (ddi_model_convert_from(flag & FMODELS)) {
27103 	case DDI_MODEL_ILP32:
27104 		copysz = sizeof (struct mhioc_resv_desc_list32);
27105 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
27106 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27107 			    "sd_persistent_reservation_in_read_resv: "
27108 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27109 			rval = EFAULT;
27110 			goto done;
27111 		}
27112 		resvlist.listsize = resvlist32.listsize;
27113 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
27114 		break;
27115 
27116 	case DDI_MODEL_NONE:
27117 		copysz = sizeof (mhioc_resv_desc_list_t);
27118 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27119 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27120 			    "sd_persistent_reservation_in_read_resv: "
27121 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27122 			rval = EFAULT;
27123 			goto done;
27124 		}
27125 		break;
27126 	}
27127 #else /* ! _MULTI_DATAMODEL */
27128 	copysz = sizeof (mhioc_resv_desc_list_t);
27129 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27130 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27131 		    "sd_persistent_reservation_in_read_resv: "
27132 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27133 		rval = EFAULT;
27134 		goto done;
27135 	}
27136 #endif /* ! _MULTI_DATAMODEL */
27137 
27138 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
27139 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
27140 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
27141 
27142 	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_RESV,
27143 	    data_len, data_bufp);
27144 	if (rval != 0) {
27145 		if (rval == EIO)
27146 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
27147 		else
27148 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
27149 		goto done;
27150 	}
27151 	in = (sd_prin_readresv_t *)data_bufp;
27152 	ptr->generation = BE_32(in->generation);
27153 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
27154 
27155 	/*
27156 	 * Return the min(listsize, listlen( keys
27157 	 */
27158 #ifdef _MULTI_DATAMODEL
27159 
27160 	switch (ddi_model_convert_from(flag & FMODELS)) {
27161 	case DDI_MODEL_ILP32:
27162 		resvlist32.listlen = resvlist.listlen;
27163 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
27164 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27165 			    "sd_persistent_reservation_in_read_resv: "
27166 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27167 			rval = EFAULT;
27168 			goto done;
27169 		}
27170 		break;
27171 
27172 	case DDI_MODEL_NONE:
27173 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27174 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27175 			    "sd_persistent_reservation_in_read_resv: "
27176 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27177 			rval = EFAULT;
27178 			goto done;
27179 		}
27180 		break;
27181 	}
27182 
27183 #else /* ! _MULTI_DATAMODEL */
27184 
27185 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27186 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27187 		    "sd_persistent_reservation_in_read_resv: "
27188 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27189 		rval = EFAULT;
27190 		goto done;
27191 	}
27192 
27193 #endif /* ! _MULTI_DATAMODEL */
27194 
27195 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
27196 	bufp = resvlist.list;
27197 	copysz = sizeof (mhioc_resv_desc_t);
27198 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
27199 	    i++, readresv_ptr++, bufp++) {
27200 
27201 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
27202 		    MHIOC_RESV_KEY_SIZE);
27203 		resvdesc.type  = readresv_ptr->type;
27204 		resvdesc.scope = readresv_ptr->scope;
27205 		resvdesc.scope_specific_addr =
27206 		    BE_32(readresv_ptr->scope_specific_addr);
27207 
27208 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
27209 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27210 			    "sd_persistent_reservation_in_read_resv: "
27211 			    "failed ddi_copyout: resvlist\n");
27212 			rval = EFAULT;
27213 			goto done;
27214 		}
27215 	}
27216 done:
27217 	sd_ssc_fini(ssc);
27218 	/* only if data_bufp is allocated, we need to free it */
27219 	if (data_bufp) {
27220 		kmem_free(data_bufp, data_len);
27221 	}
27222 	return (rval);
27223 }
27224 
27225 
27226 /*
27227  *    Function: sr_change_blkmode()
27228  *
27229  * Description: This routine is the driver entry point for handling CD-ROM
27230  *		block mode ioctl requests. Support for returning and changing
27231  *		the current block size in use by the device is implemented. The
27232  *		LBA size is changed via a MODE SELECT Block Descriptor.
27233  *
27234  *		This routine issues a mode sense with an allocation length of
27235  *		12 bytes for the mode page header and a single block descriptor.
27236  *
27237  *   Arguments: dev - the device 'dev_t'
27238  *		cmd - the request type; one of CDROMGBLKMODE (get) or
27239  *		      CDROMSBLKMODE (set)
27240  *		data - current block size or requested block size
27241  *		flag - this argument is a pass through to ddi_copyxxx() directly
27242  *		       from the mode argument of ioctl().
27243  *
27244  * Return Code: the code returned by sd_send_scsi_cmd()
27245  *		EINVAL if invalid arguments are provided
27246  *		EFAULT if ddi_copyxxx() fails
27247  *		ENXIO if fail ddi_get_soft_state
27248  *		EIO if invalid mode sense block descriptor length
27249  *
27250  */
27251 
27252 static int
27253 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
27254 {
27255 	struct sd_lun			*un = NULL;
27256 	struct mode_header		*sense_mhp, *select_mhp;
27257 	struct block_descriptor		*sense_desc, *select_desc;
27258 	int				current_bsize;
27259 	int				rval = EINVAL;
27260 	uchar_t				*sense = NULL;
27261 	uchar_t				*select = NULL;
27262 	sd_ssc_t			*ssc;
27263 
27264 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
27265 
27266 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27267 		return (ENXIO);
27268 	}
27269 
27270 	/*
27271 	 * The block length is changed via the Mode Select block descriptor, the
27272 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
27273 	 * required as part of this routine. Therefore the mode sense allocation
27274 	 * length is specified to be the length of a mode page header and a
27275 	 * block descriptor.
27276 	 */
27277 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27278 
27279 	ssc = sd_ssc_init(un);
27280 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
27281 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
27282 	sd_ssc_fini(ssc);
27283 	if (rval != 0) {
27284 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27285 		    "sr_change_blkmode: Mode Sense Failed\n");
27286 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27287 		return (rval);
27288 	}
27289 
27290 	/* Check the block descriptor len to handle only 1 block descriptor */
27291 	sense_mhp = (struct mode_header *)sense;
27292 	if ((sense_mhp->bdesc_length == 0) ||
27293 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
27294 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27295 		    "sr_change_blkmode: Mode Sense returned invalid block"
27296 		    " descriptor length\n");
27297 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27298 		return (EIO);
27299 	}
27300 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
27301 	current_bsize = ((sense_desc->blksize_hi << 16) |
27302 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
27303 
27304 	/* Process command */
27305 	switch (cmd) {
27306 	case CDROMGBLKMODE:
27307 		/* Return the block size obtained during the mode sense */
27308 		if (ddi_copyout(&current_bsize, (void *)data,
27309 		    sizeof (int), flag) != 0)
27310 			rval = EFAULT;
27311 		break;
27312 	case CDROMSBLKMODE:
27313 		/* Validate the requested block size */
27314 		switch (data) {
27315 		case CDROM_BLK_512:
27316 		case CDROM_BLK_1024:
27317 		case CDROM_BLK_2048:
27318 		case CDROM_BLK_2056:
27319 		case CDROM_BLK_2336:
27320 		case CDROM_BLK_2340:
27321 		case CDROM_BLK_2352:
27322 		case CDROM_BLK_2368:
27323 		case CDROM_BLK_2448:
27324 		case CDROM_BLK_2646:
27325 		case CDROM_BLK_2647:
27326 			break;
27327 		default:
27328 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27329 			    "sr_change_blkmode: "
27330 			    "Block Size '%ld' Not Supported\n", data);
27331 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27332 			return (EINVAL);
27333 		}
27334 
27335 		/*
27336 		 * The current block size matches the requested block size so
27337 		 * there is no need to send the mode select to change the size
27338 		 */
27339 		if (current_bsize == data) {
27340 			break;
27341 		}
27342 
27343 		/* Build the select data for the requested block size */
27344 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27345 		select_mhp = (struct mode_header *)select;
27346 		select_desc =
27347 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
27348 		/*
27349 		 * The LBA size is changed via the block descriptor, so the
27350 		 * descriptor is built according to the user data
27351 		 */
27352 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
27353 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
27354 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
27355 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
27356 
27357 		/* Send the mode select for the requested block size */
27358 		ssc = sd_ssc_init(un);
27359 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
27360 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27361 		    SD_PATH_STANDARD);
27362 		sd_ssc_fini(ssc);
27363 		if (rval != 0) {
27364 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27365 			    "sr_change_blkmode: Mode Select Failed\n");
27366 			/*
27367 			 * The mode select failed for the requested block size,
27368 			 * so reset the data for the original block size and
27369 			 * send it to the target. The error is indicated by the
27370 			 * return value for the failed mode select.
27371 			 */
27372 			select_desc->blksize_hi  = sense_desc->blksize_hi;
27373 			select_desc->blksize_mid = sense_desc->blksize_mid;
27374 			select_desc->blksize_lo  = sense_desc->blksize_lo;
27375 			ssc = sd_ssc_init(un);
27376 			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
27377 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27378 			    SD_PATH_STANDARD);
27379 			sd_ssc_fini(ssc);
27380 		} else {
27381 			ASSERT(!mutex_owned(SD_MUTEX(un)));
27382 			mutex_enter(SD_MUTEX(un));
27383 			sd_update_block_info(un, (uint32_t)data, 0);
27384 			mutex_exit(SD_MUTEX(un));
27385 		}
27386 		break;
27387 	default:
27388 		/* should not reach here, but check anyway */
27389 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27390 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
27391 		rval = EINVAL;
27392 		break;
27393 	}
27394 
27395 	if (select) {
27396 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
27397 	}
27398 	if (sense) {
27399 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27400 	}
27401 	return (rval);
27402 }
27403 
27404 
27405 /*
27406  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
27407  * implement driver support for getting and setting the CD speed. The command
27408  * set used will be based on the device type. If the device has not been
27409  * identified as MMC the Toshiba vendor specific mode page will be used. If
27410  * the device is MMC but does not support the Real Time Streaming feature
27411  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
27412  * be used to read the speed.
27413  */
27414 
27415 /*
27416  *    Function: sr_change_speed()
27417  *
27418  * Description: This routine is the driver entry point for handling CD-ROM
27419  *		drive speed ioctl requests for devices supporting the Toshiba
27420  *		vendor specific drive speed mode page. Support for returning
27421  *		and changing the current drive speed in use by the device is
27422  *		implemented.
27423  *
27424  *   Arguments: dev - the device 'dev_t'
27425  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
27426  *		      CDROMSDRVSPEED (set)
27427  *		data - current drive speed or requested drive speed
27428  *		flag - this argument is a pass through to ddi_copyxxx() directly
27429  *		       from the mode argument of ioctl().
27430  *
27431  * Return Code: the code returned by sd_send_scsi_cmd()
27432  *		EINVAL if invalid arguments are provided
27433  *		EFAULT if ddi_copyxxx() fails
27434  *		ENXIO if fail ddi_get_soft_state
27435  *		EIO if invalid mode sense block descriptor length
27436  */
27437 
27438 static int
27439 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27440 {
27441 	struct sd_lun			*un = NULL;
27442 	struct mode_header		*sense_mhp, *select_mhp;
27443 	struct mode_speed		*sense_page, *select_page;
27444 	int				current_speed;
27445 	int				rval = EINVAL;
27446 	int				bd_len;
27447 	uchar_t				*sense = NULL;
27448 	uchar_t				*select = NULL;
27449 	sd_ssc_t			*ssc;
27450 
27451 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27452 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27453 		return (ENXIO);
27454 	}
27455 
27456 	/*
27457 	 * Note: The drive speed is being modified here according to a Toshiba
27458 	 * vendor specific mode page (0x31).
27459 	 */
27460 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27461 
27462 	ssc = sd_ssc_init(un);
27463 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
27464 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
27465 	    SD_PATH_STANDARD);
27466 	sd_ssc_fini(ssc);
27467 	if (rval != 0) {
27468 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27469 		    "sr_change_speed: Mode Sense Failed\n");
27470 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27471 		return (rval);
27472 	}
27473 	sense_mhp  = (struct mode_header *)sense;
27474 
27475 	/* Check the block descriptor len to handle only 1 block descriptor */
27476 	bd_len = sense_mhp->bdesc_length;
27477 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27478 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27479 		    "sr_change_speed: Mode Sense returned invalid block "
27480 		    "descriptor length\n");
27481 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27482 		return (EIO);
27483 	}
27484 
27485 	sense_page = (struct mode_speed *)
27486 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27487 	current_speed = sense_page->speed;
27488 
27489 	/* Process command */
27490 	switch (cmd) {
27491 	case CDROMGDRVSPEED:
27492 		/* Return the drive speed obtained during the mode sense */
27493 		if (current_speed == 0x2) {
27494 			current_speed = CDROM_TWELVE_SPEED;
27495 		}
27496 		if (ddi_copyout(&current_speed, (void *)data,
27497 		    sizeof (int), flag) != 0) {
27498 			rval = EFAULT;
27499 		}
27500 		break;
27501 	case CDROMSDRVSPEED:
27502 		/* Validate the requested drive speed */
27503 		switch ((uchar_t)data) {
27504 		case CDROM_TWELVE_SPEED:
27505 			data = 0x2;
27506 			/*FALLTHROUGH*/
27507 		case CDROM_NORMAL_SPEED:
27508 		case CDROM_DOUBLE_SPEED:
27509 		case CDROM_QUAD_SPEED:
27510 		case CDROM_MAXIMUM_SPEED:
27511 			break;
27512 		default:
27513 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27514 			    "sr_change_speed: "
27515 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27516 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27517 			return (EINVAL);
27518 		}
27519 
27520 		/*
27521 		 * The current drive speed matches the requested drive speed so
27522 		 * there is no need to send the mode select to change the speed
27523 		 */
27524 		if (current_speed == data) {
27525 			break;
27526 		}
27527 
27528 		/* Build the select data for the requested drive speed */
27529 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27530 		select_mhp = (struct mode_header *)select;
27531 		select_mhp->bdesc_length = 0;
27532 		select_page =
27533 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27534 		select_page =
27535 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27536 		select_page->mode_page.code = CDROM_MODE_SPEED;
27537 		select_page->mode_page.length = 2;
27538 		select_page->speed = (uchar_t)data;
27539 
27540 		/* Send the mode select for the requested block size */
27541 		ssc = sd_ssc_init(un);
27542 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
27543 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27544 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27545 		sd_ssc_fini(ssc);
27546 		if (rval != 0) {
27547 			/*
27548 			 * The mode select failed for the requested drive speed,
27549 			 * so reset the data for the original drive speed and
27550 			 * send it to the target. The error is indicated by the
27551 			 * return value for the failed mode select.
27552 			 */
27553 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27554 			    "sr_drive_speed: Mode Select Failed\n");
27555 			select_page->speed = sense_page->speed;
27556 			ssc = sd_ssc_init(un);
27557 			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
27558 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27559 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27560 			sd_ssc_fini(ssc);
27561 		}
27562 		break;
27563 	default:
27564 		/* should not reach here, but check anyway */
27565 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27566 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27567 		rval = EINVAL;
27568 		break;
27569 	}
27570 
27571 	if (select) {
27572 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27573 	}
27574 	if (sense) {
27575 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27576 	}
27577 
27578 	return (rval);
27579 }
27580 
27581 
27582 /*
27583  *    Function: sr_atapi_change_speed()
27584  *
27585  * Description: This routine is the driver entry point for handling CD-ROM
27586  *		drive speed ioctl requests for MMC devices that do not support
27587  *		the Real Time Streaming feature (0x107).
27588  *
27589  *		Note: This routine will use the SET SPEED command which may not
27590  *		be supported by all devices.
27591  *
27592  *   Arguments: dev- the device 'dev_t'
27593  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27594  *		     CDROMSDRVSPEED (set)
27595  *		data- current drive speed or requested drive speed
27596  *		flag- this argument is a pass through to ddi_copyxxx() directly
27597  *		      from the mode argument of ioctl().
27598  *
27599  * Return Code: the code returned by sd_send_scsi_cmd()
27600  *		EINVAL if invalid arguments are provided
27601  *		EFAULT if ddi_copyxxx() fails
27602  *		ENXIO if fail ddi_get_soft_state
27603  *		EIO if invalid mode sense block descriptor length
27604  */
27605 
27606 static int
27607 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27608 {
27609 	struct sd_lun			*un;
27610 	struct uscsi_cmd		*com = NULL;
27611 	struct mode_header_grp2		*sense_mhp;
27612 	uchar_t				*sense_page;
27613 	uchar_t				*sense = NULL;
27614 	char				cdb[CDB_GROUP5];
27615 	int				bd_len;
27616 	int				current_speed = 0;
27617 	int				max_speed = 0;
27618 	int				rval;
27619 	sd_ssc_t			*ssc;
27620 
27621 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27622 
27623 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27624 		return (ENXIO);
27625 	}
27626 
27627 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27628 
27629 	ssc = sd_ssc_init(un);
27630 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
27631 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27632 	    SD_PATH_STANDARD);
27633 	sd_ssc_fini(ssc);
27634 	if (rval != 0) {
27635 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27636 		    "sr_atapi_change_speed: Mode Sense Failed\n");
27637 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27638 		return (rval);
27639 	}
27640 
27641 	/* Check the block descriptor len to handle only 1 block descriptor */
27642 	sense_mhp = (struct mode_header_grp2 *)sense;
27643 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27644 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27645 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27646 		    "sr_atapi_change_speed: Mode Sense returned invalid "
27647 		    "block descriptor length\n");
27648 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27649 		return (EIO);
27650 	}
27651 
27652 	/* Calculate the current and maximum drive speeds */
27653 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27654 	current_speed = (sense_page[14] << 8) | sense_page[15];
27655 	max_speed = (sense_page[8] << 8) | sense_page[9];
27656 
27657 	/* Process the command */
27658 	switch (cmd) {
27659 	case CDROMGDRVSPEED:
27660 		current_speed /= SD_SPEED_1X;
27661 		if (ddi_copyout(&current_speed, (void *)data,
27662 		    sizeof (int), flag) != 0)
27663 			rval = EFAULT;
27664 		break;
27665 	case CDROMSDRVSPEED:
27666 		/* Convert the speed code to KB/sec */
27667 		switch ((uchar_t)data) {
27668 		case CDROM_NORMAL_SPEED:
27669 			current_speed = SD_SPEED_1X;
27670 			break;
27671 		case CDROM_DOUBLE_SPEED:
27672 			current_speed = 2 * SD_SPEED_1X;
27673 			break;
27674 		case CDROM_QUAD_SPEED:
27675 			current_speed = 4 * SD_SPEED_1X;
27676 			break;
27677 		case CDROM_TWELVE_SPEED:
27678 			current_speed = 12 * SD_SPEED_1X;
27679 			break;
27680 		case CDROM_MAXIMUM_SPEED:
27681 			current_speed = 0xffff;
27682 			break;
27683 		default:
27684 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27685 			    "sr_atapi_change_speed: invalid drive speed %d\n",
27686 			    (uchar_t)data);
27687 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27688 			return (EINVAL);
27689 		}
27690 
27691 		/* Check the request against the drive's max speed. */
27692 		if (current_speed != 0xffff) {
27693 			if (current_speed > max_speed) {
27694 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27695 				return (EINVAL);
27696 			}
27697 		}
27698 
27699 		/*
27700 		 * Build and send the SET SPEED command
27701 		 *
27702 		 * Note: The SET SPEED (0xBB) command used in this routine is
27703 		 * obsolete per the SCSI MMC spec but still supported in the
27704 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27705 		 * therefore the command is still implemented in this routine.
27706 		 */
27707 		bzero(cdb, sizeof (cdb));
27708 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27709 		cdb[2] = (uchar_t)(current_speed >> 8);
27710 		cdb[3] = (uchar_t)current_speed;
27711 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27712 		com->uscsi_cdb	   = (caddr_t)cdb;
27713 		com->uscsi_cdblen  = CDB_GROUP5;
27714 		com->uscsi_bufaddr = NULL;
27715 		com->uscsi_buflen  = 0;
27716 		com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT;
27717 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
27718 		break;
27719 	default:
27720 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27721 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27722 		rval = EINVAL;
27723 	}
27724 
27725 	if (sense) {
27726 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27727 	}
27728 	if (com) {
27729 		kmem_free(com, sizeof (*com));
27730 	}
27731 	return (rval);
27732 }
27733 
27734 
27735 /*
27736  *    Function: sr_pause_resume()
27737  *
27738  * Description: This routine is the driver entry point for handling CD-ROM
27739  *		pause/resume ioctl requests. This only affects the audio play
27740  *		operation.
27741  *
27742  *   Arguments: dev - the device 'dev_t'
27743  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27744  *		      for setting the resume bit of the cdb.
27745  *
27746  * Return Code: the code returned by sd_send_scsi_cmd()
27747  *		EINVAL if invalid mode specified
27748  *
27749  */
27750 
27751 static int
27752 sr_pause_resume(dev_t dev, int cmd)
27753 {
27754 	struct sd_lun		*un;
27755 	struct uscsi_cmd	*com;
27756 	char			cdb[CDB_GROUP1];
27757 	int			rval;
27758 
27759 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27760 		return (ENXIO);
27761 	}
27762 
27763 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27764 	bzero(cdb, CDB_GROUP1);
27765 	cdb[0] = SCMD_PAUSE_RESUME;
27766 	switch (cmd) {
27767 	case CDROMRESUME:
27768 		cdb[8] = 1;
27769 		break;
27770 	case CDROMPAUSE:
27771 		cdb[8] = 0;
27772 		break;
27773 	default:
27774 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27775 		    " Command '%x' Not Supported\n", cmd);
27776 		rval = EINVAL;
27777 		goto done;
27778 	}
27779 
27780 	com->uscsi_cdb    = cdb;
27781 	com->uscsi_cdblen = CDB_GROUP1;
27782 	com->uscsi_flags  = USCSI_DIAGNOSE | USCSI_SILENT;
27783 
27784 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27785 	    SD_PATH_STANDARD);
27786 
27787 done:
27788 	kmem_free(com, sizeof (*com));
27789 	return (rval);
27790 }
27791 
27792 
27793 /*
27794  *    Function: sr_play_msf()
27795  *
27796  * Description: This routine is the driver entry point for handling CD-ROM
27797  *		ioctl requests to output the audio signals at the specified
27798  *		starting address and continue the audio play until the specified
27799  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27800  *		Frame (MSF) format.
27801  *
27802  *   Arguments: dev	- the device 'dev_t'
27803  *		data	- pointer to user provided audio msf structure,
27804  *		          specifying start/end addresses.
27805  *		flag	- this argument is a pass through to ddi_copyxxx()
27806  *		          directly from the mode argument of ioctl().
27807  *
27808  * Return Code: the code returned by sd_send_scsi_cmd()
27809  *		EFAULT if ddi_copyxxx() fails
27810  *		ENXIO if fail ddi_get_soft_state
27811  *		EINVAL if data pointer is NULL
27812  */
27813 
27814 static int
27815 sr_play_msf(dev_t dev, caddr_t data, int flag)
27816 {
27817 	struct sd_lun		*un;
27818 	struct uscsi_cmd	*com;
27819 	struct cdrom_msf	msf_struct;
27820 	struct cdrom_msf	*msf = &msf_struct;
27821 	char			cdb[CDB_GROUP1];
27822 	int			rval;
27823 
27824 	if (data == NULL) {
27825 		return (EINVAL);
27826 	}
27827 
27828 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27829 		return (ENXIO);
27830 	}
27831 
27832 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27833 		return (EFAULT);
27834 	}
27835 
27836 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27837 	bzero(cdb, CDB_GROUP1);
27838 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27839 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27840 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27841 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27842 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27843 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27844 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27845 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27846 	} else {
27847 		cdb[3] = msf->cdmsf_min0;
27848 		cdb[4] = msf->cdmsf_sec0;
27849 		cdb[5] = msf->cdmsf_frame0;
27850 		cdb[6] = msf->cdmsf_min1;
27851 		cdb[7] = msf->cdmsf_sec1;
27852 		cdb[8] = msf->cdmsf_frame1;
27853 	}
27854 	com->uscsi_cdb    = cdb;
27855 	com->uscsi_cdblen = CDB_GROUP1;
27856 	com->uscsi_flags  = USCSI_DIAGNOSE | USCSI_SILENT;
27857 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27858 	    SD_PATH_STANDARD);
27859 	kmem_free(com, sizeof (*com));
27860 	return (rval);
27861 }
27862 
27863 
27864 /*
27865  *    Function: sr_play_trkind()
27866  *
27867  * Description: This routine is the driver entry point for handling CD-ROM
27868  *		ioctl requests to output the audio signals at the specified
27869  *		starting address and continue the audio play until the specified
27870  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27871  *		format.
27872  *
27873  *   Arguments: dev	- the device 'dev_t'
27874  *		data	- pointer to user provided audio track/index structure,
27875  *		          specifying start/end addresses.
27876  *		flag	- this argument is a pass through to ddi_copyxxx()
27877  *		          directly from the mode argument of ioctl().
27878  *
27879  * Return Code: the code returned by sd_send_scsi_cmd()
27880  *		EFAULT if ddi_copyxxx() fails
27881  *		ENXIO if fail ddi_get_soft_state
27882  *		EINVAL if data pointer is NULL
27883  */
27884 
27885 static int
27886 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27887 {
27888 	struct cdrom_ti		ti_struct;
27889 	struct cdrom_ti		*ti = &ti_struct;
27890 	struct uscsi_cmd	*com = NULL;
27891 	char			cdb[CDB_GROUP1];
27892 	int			rval;
27893 
27894 	if (data == NULL) {
27895 		return (EINVAL);
27896 	}
27897 
27898 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27899 		return (EFAULT);
27900 	}
27901 
27902 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27903 	bzero(cdb, CDB_GROUP1);
27904 	cdb[0] = SCMD_PLAYAUDIO_TI;
27905 	cdb[4] = ti->cdti_trk0;
27906 	cdb[5] = ti->cdti_ind0;
27907 	cdb[7] = ti->cdti_trk1;
27908 	cdb[8] = ti->cdti_ind1;
27909 	com->uscsi_cdb    = cdb;
27910 	com->uscsi_cdblen = CDB_GROUP1;
27911 	com->uscsi_flags  = USCSI_DIAGNOSE | USCSI_SILENT;
27912 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27913 	    SD_PATH_STANDARD);
27914 	kmem_free(com, sizeof (*com));
27915 	return (rval);
27916 }
27917 
27918 
27919 /*
27920  *    Function: sr_read_all_subcodes()
27921  *
27922  * Description: This routine is the driver entry point for handling CD-ROM
27923  *		ioctl requests to return raw subcode data while the target is
27924  *		playing audio (CDROMSUBCODE).
27925  *
27926  *   Arguments: dev	- the device 'dev_t'
27927  *		data	- pointer to user provided cdrom subcode structure,
27928  *		          specifying the transfer length and address.
27929  *		flag	- this argument is a pass through to ddi_copyxxx()
27930  *		          directly from the mode argument of ioctl().
27931  *
27932  * Return Code: the code returned by sd_send_scsi_cmd()
27933  *		EFAULT if ddi_copyxxx() fails
27934  *		ENXIO if fail ddi_get_soft_state
27935  *		EINVAL if data pointer is NULL
27936  */
27937 
27938 static int
27939 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27940 {
27941 	struct sd_lun		*un = NULL;
27942 	struct uscsi_cmd	*com = NULL;
27943 	struct cdrom_subcode	*subcode = NULL;
27944 	int			rval;
27945 	size_t			buflen;
27946 	char			cdb[CDB_GROUP5];
27947 
27948 #ifdef _MULTI_DATAMODEL
27949 	/* To support ILP32 applications in an LP64 world */
27950 	struct cdrom_subcode32		cdrom_subcode32;
27951 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27952 #endif
27953 	if (data == NULL) {
27954 		return (EINVAL);
27955 	}
27956 
27957 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27958 		return (ENXIO);
27959 	}
27960 
27961 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27962 
27963 #ifdef _MULTI_DATAMODEL
27964 	switch (ddi_model_convert_from(flag & FMODELS)) {
27965 	case DDI_MODEL_ILP32:
27966 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27967 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27968 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27969 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27970 			return (EFAULT);
27971 		}
27972 		/* Convert the ILP32 uscsi data from the application to LP64 */
27973 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27974 		break;
27975 	case DDI_MODEL_NONE:
27976 		if (ddi_copyin(data, subcode,
27977 		    sizeof (struct cdrom_subcode), flag)) {
27978 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27979 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27980 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27981 			return (EFAULT);
27982 		}
27983 		break;
27984 	}
27985 #else /* ! _MULTI_DATAMODEL */
27986 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27987 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27988 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27989 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27990 		return (EFAULT);
27991 	}
27992 #endif /* _MULTI_DATAMODEL */
27993 
27994 	/*
27995 	 * Since MMC-2 expects max 3 bytes for length, check if the
27996 	 * length input is greater than 3 bytes
27997 	 */
27998 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27999 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28000 		    "sr_read_all_subcodes: "
28001 		    "cdrom transfer length too large: %d (limit %d)\n",
28002 		    subcode->cdsc_length, 0xFFFFFF);
28003 		kmem_free(subcode, sizeof (struct cdrom_subcode));
28004 		return (EINVAL);
28005 	}
28006 
28007 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
28008 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28009 	bzero(cdb, CDB_GROUP5);
28010 
28011 	if (un->un_f_mmc_cap == TRUE) {
28012 		cdb[0] = (char)SCMD_READ_CD;
28013 		cdb[2] = (char)0xff;
28014 		cdb[3] = (char)0xff;
28015 		cdb[4] = (char)0xff;
28016 		cdb[5] = (char)0xff;
28017 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28018 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28019 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
28020 		cdb[10] = 1;
28021 	} else {
28022 		/*
28023 		 * Note: A vendor specific command (0xDF) is being used here to
28024 		 * request a read of all subcodes.
28025 		 */
28026 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
28027 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
28028 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28029 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28030 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
28031 	}
28032 	com->uscsi_cdb	   = cdb;
28033 	com->uscsi_cdblen  = CDB_GROUP5;
28034 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
28035 	com->uscsi_buflen  = buflen;
28036 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28037 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28038 	    SD_PATH_STANDARD);
28039 	kmem_free(subcode, sizeof (struct cdrom_subcode));
28040 	kmem_free(com, sizeof (*com));
28041 	return (rval);
28042 }
28043 
28044 
28045 /*
28046  *    Function: sr_read_subchannel()
28047  *
28048  * Description: This routine is the driver entry point for handling CD-ROM
28049  *		ioctl requests to return the Q sub-channel data of the CD
28050  *		current position block. (CDROMSUBCHNL) The data includes the
28051  *		track number, index number, absolute CD-ROM address (LBA or MSF
28052  *		format per the user) , track relative CD-ROM address (LBA or MSF
28053  *		format per the user), control data and audio status.
28054  *
28055  *   Arguments: dev	- the device 'dev_t'
28056  *		data	- pointer to user provided cdrom sub-channel structure
28057  *		flag	- this argument is a pass through to ddi_copyxxx()
28058  *		          directly from the mode argument of ioctl().
28059  *
28060  * Return Code: the code returned by sd_send_scsi_cmd()
28061  *		EFAULT if ddi_copyxxx() fails
28062  *		ENXIO if fail ddi_get_soft_state
28063  *		EINVAL if data pointer is NULL
28064  */
28065 
28066 static int
28067 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
28068 {
28069 	struct sd_lun		*un;
28070 	struct uscsi_cmd	*com;
28071 	struct cdrom_subchnl	subchanel;
28072 	struct cdrom_subchnl	*subchnl = &subchanel;
28073 	char			cdb[CDB_GROUP1];
28074 	caddr_t			buffer;
28075 	int			rval;
28076 
28077 	if (data == NULL) {
28078 		return (EINVAL);
28079 	}
28080 
28081 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28082 	    (un->un_state == SD_STATE_OFFLINE)) {
28083 		return (ENXIO);
28084 	}
28085 
28086 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
28087 		return (EFAULT);
28088 	}
28089 
28090 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
28091 	bzero(cdb, CDB_GROUP1);
28092 	cdb[0] = SCMD_READ_SUBCHANNEL;
28093 	/* Set the MSF bit based on the user requested address format */
28094 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
28095 	/*
28096 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
28097 	 * returned
28098 	 */
28099 	cdb[2] = 0x40;
28100 	/*
28101 	 * Set byte 3 to specify the return data format. A value of 0x01
28102 	 * indicates that the CD-ROM current position should be returned.
28103 	 */
28104 	cdb[3] = 0x01;
28105 	cdb[8] = 0x10;
28106 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28107 	com->uscsi_cdb	   = cdb;
28108 	com->uscsi_cdblen  = CDB_GROUP1;
28109 	com->uscsi_bufaddr = buffer;
28110 	com->uscsi_buflen  = 16;
28111 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28112 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28113 	    SD_PATH_STANDARD);
28114 	if (rval != 0) {
28115 		kmem_free(buffer, 16);
28116 		kmem_free(com, sizeof (*com));
28117 		return (rval);
28118 	}
28119 
28120 	/* Process the returned Q sub-channel data */
28121 	subchnl->cdsc_audiostatus = buffer[1];
28122 	subchnl->cdsc_adr	= (buffer[5] & 0xF0) >> 4;
28123 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
28124 	subchnl->cdsc_trk	= buffer[6];
28125 	subchnl->cdsc_ind	= buffer[7];
28126 	if (subchnl->cdsc_format & CDROM_LBA) {
28127 		subchnl->cdsc_absaddr.lba =
28128 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28129 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28130 		subchnl->cdsc_reladdr.lba =
28131 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
28132 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
28133 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
28134 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
28135 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
28136 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
28137 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
28138 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
28139 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
28140 	} else {
28141 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
28142 		subchnl->cdsc_absaddr.msf.second = buffer[10];
28143 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
28144 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
28145 		subchnl->cdsc_reladdr.msf.second = buffer[14];
28146 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
28147 	}
28148 	kmem_free(buffer, 16);
28149 	kmem_free(com, sizeof (*com));
28150 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
28151 	    != 0) {
28152 		return (EFAULT);
28153 	}
28154 	return (rval);
28155 }
28156 
28157 
28158 /*
28159  *    Function: sr_read_tocentry()
28160  *
28161  * Description: This routine is the driver entry point for handling CD-ROM
28162  *		ioctl requests to read from the Table of Contents (TOC)
28163  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
28164  *		fields, the starting address (LBA or MSF format per the user)
28165  *		and the data mode if the user specified track is a data track.
28166  *
28167  *		Note: The READ HEADER (0x44) command used in this routine is
28168  *		obsolete per the SCSI MMC spec but still supported in the
28169  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
28170  *		therefore the command is still implemented in this routine.
28171  *
28172  *   Arguments: dev	- the device 'dev_t'
28173  *		data	- pointer to user provided toc entry structure,
28174  *			  specifying the track # and the address format
28175  *			  (LBA or MSF).
28176  *		flag	- this argument is a pass through to ddi_copyxxx()
28177  *		          directly from the mode argument of ioctl().
28178  *
28179  * Return Code: the code returned by sd_send_scsi_cmd()
28180  *		EFAULT if ddi_copyxxx() fails
28181  *		ENXIO if fail ddi_get_soft_state
28182  *		EINVAL if data pointer is NULL
28183  */
28184 
28185 static int
28186 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
28187 {
28188 	struct sd_lun		*un = NULL;
28189 	struct uscsi_cmd	*com;
28190 	struct cdrom_tocentry	toc_entry;
28191 	struct cdrom_tocentry	*entry = &toc_entry;
28192 	caddr_t			buffer;
28193 	int			rval;
28194 	char			cdb[CDB_GROUP1];
28195 
28196 	if (data == NULL) {
28197 		return (EINVAL);
28198 	}
28199 
28200 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28201 	    (un->un_state == SD_STATE_OFFLINE)) {
28202 		return (ENXIO);
28203 	}
28204 
28205 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
28206 		return (EFAULT);
28207 	}
28208 
28209 	/* Validate the requested track and address format */
28210 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
28211 		return (EINVAL);
28212 	}
28213 
28214 	if (entry->cdte_track == 0) {
28215 		return (EINVAL);
28216 	}
28217 
28218 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
28219 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28220 	bzero(cdb, CDB_GROUP1);
28221 
28222 	cdb[0] = SCMD_READ_TOC;
28223 	/* Set the MSF bit based on the user requested address format  */
28224 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
28225 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28226 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
28227 	} else {
28228 		cdb[6] = entry->cdte_track;
28229 	}
28230 
28231 	/*
28232 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28233 	 * (4 byte TOC response header + 8 byte track descriptor)
28234 	 */
28235 	cdb[8] = 12;
28236 	com->uscsi_cdb	   = cdb;
28237 	com->uscsi_cdblen  = CDB_GROUP1;
28238 	com->uscsi_bufaddr = buffer;
28239 	com->uscsi_buflen  = 0x0C;
28240 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
28241 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28242 	    SD_PATH_STANDARD);
28243 	if (rval != 0) {
28244 		kmem_free(buffer, 12);
28245 		kmem_free(com, sizeof (*com));
28246 		return (rval);
28247 	}
28248 
28249 	/* Process the toc entry */
28250 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
28251 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
28252 	if (entry->cdte_format & CDROM_LBA) {
28253 		entry->cdte_addr.lba =
28254 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28255 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28256 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
28257 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
28258 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
28259 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
28260 		/*
28261 		 * Send a READ TOC command using the LBA address format to get
28262 		 * the LBA for the track requested so it can be used in the
28263 		 * READ HEADER request
28264 		 *
28265 		 * Note: The MSF bit of the READ HEADER command specifies the
28266 		 * output format. The block address specified in that command
28267 		 * must be in LBA format.
28268 		 */
28269 		cdb[1] = 0;
28270 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28271 		    SD_PATH_STANDARD);
28272 		if (rval != 0) {
28273 			kmem_free(buffer, 12);
28274 			kmem_free(com, sizeof (*com));
28275 			return (rval);
28276 		}
28277 	} else {
28278 		entry->cdte_addr.msf.minute	= buffer[9];
28279 		entry->cdte_addr.msf.second	= buffer[10];
28280 		entry->cdte_addr.msf.frame	= buffer[11];
28281 		/*
28282 		 * Send a READ TOC command using the LBA address format to get
28283 		 * the LBA for the track requested so it can be used in the
28284 		 * READ HEADER request
28285 		 *
28286 		 * Note: The MSF bit of the READ HEADER command specifies the
28287 		 * output format. The block address specified in that command
28288 		 * must be in LBA format.
28289 		 */
28290 		cdb[1] = 0;
28291 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28292 		    SD_PATH_STANDARD);
28293 		if (rval != 0) {
28294 			kmem_free(buffer, 12);
28295 			kmem_free(com, sizeof (*com));
28296 			return (rval);
28297 		}
28298 	}
28299 
28300 	/*
28301 	 * Build and send the READ HEADER command to determine the data mode of
28302 	 * the user specified track.
28303 	 */
28304 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
28305 	    (entry->cdte_track != CDROM_LEADOUT)) {
28306 		bzero(cdb, CDB_GROUP1);
28307 		cdb[0] = SCMD_READ_HEADER;
28308 		cdb[2] = buffer[8];
28309 		cdb[3] = buffer[9];
28310 		cdb[4] = buffer[10];
28311 		cdb[5] = buffer[11];
28312 		cdb[8] = 0x08;
28313 		com->uscsi_buflen = 0x08;
28314 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28315 		    SD_PATH_STANDARD);
28316 		if (rval == 0) {
28317 			entry->cdte_datamode = buffer[0];
28318 		} else {
28319 			/*
28320 			 * READ HEADER command failed, since this is
28321 			 * obsoleted in one spec, its better to return
28322 			 * -1 for an invlid track so that we can still
28323 			 * receive the rest of the TOC data.
28324 			 */
28325 			entry->cdte_datamode = (uchar_t)-1;
28326 		}
28327 	} else {
28328 		entry->cdte_datamode = (uchar_t)-1;
28329 	}
28330 
28331 	kmem_free(buffer, 12);
28332 	kmem_free(com, sizeof (*com));
28333 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
28334 		return (EFAULT);
28335 
28336 	return (rval);
28337 }
28338 
28339 
28340 /*
28341  *    Function: sr_read_tochdr()
28342  *
28343  * Description: This routine is the driver entry point for handling CD-ROM
28344  *		ioctl requests to read the Table of Contents (TOC) header
28345  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
28346  *		and ending track numbers
28347  *
28348  *   Arguments: dev	- the device 'dev_t'
28349  *		data	- pointer to user provided toc header structure,
28350  *			  specifying the starting and ending track numbers.
28351  *		flag	- this argument is a pass through to ddi_copyxxx()
28352  *			  directly from the mode argument of ioctl().
28353  *
28354  * Return Code: the code returned by sd_send_scsi_cmd()
28355  *		EFAULT if ddi_copyxxx() fails
28356  *		ENXIO if fail ddi_get_soft_state
28357  *		EINVAL if data pointer is NULL
28358  */
28359 
28360 static int
28361 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
28362 {
28363 	struct sd_lun		*un;
28364 	struct uscsi_cmd	*com;
28365 	struct cdrom_tochdr	toc_header;
28366 	struct cdrom_tochdr	*hdr = &toc_header;
28367 	char			cdb[CDB_GROUP1];
28368 	int			rval;
28369 	caddr_t			buffer;
28370 
28371 	if (data == NULL) {
28372 		return (EINVAL);
28373 	}
28374 
28375 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28376 	    (un->un_state == SD_STATE_OFFLINE)) {
28377 		return (ENXIO);
28378 	}
28379 
28380 	buffer = kmem_zalloc(4, KM_SLEEP);
28381 	bzero(cdb, CDB_GROUP1);
28382 	cdb[0] = SCMD_READ_TOC;
28383 	/*
28384 	 * Specifying a track number of 0x00 in the READ TOC command indicates
28385 	 * that the TOC header should be returned
28386 	 */
28387 	cdb[6] = 0x00;
28388 	/*
28389 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
28390 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
28391 	 */
28392 	cdb[8] = 0x04;
28393 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28394 	com->uscsi_cdb	   = cdb;
28395 	com->uscsi_cdblen  = CDB_GROUP1;
28396 	com->uscsi_bufaddr = buffer;
28397 	com->uscsi_buflen  = 0x04;
28398 	com->uscsi_timeout = 300;
28399 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28400 
28401 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28402 	    SD_PATH_STANDARD);
28403 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28404 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
28405 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
28406 	} else {
28407 		hdr->cdth_trk0 = buffer[2];
28408 		hdr->cdth_trk1 = buffer[3];
28409 	}
28410 	kmem_free(buffer, 4);
28411 	kmem_free(com, sizeof (*com));
28412 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
28413 		return (EFAULT);
28414 	}
28415 	return (rval);
28416 }
28417 
28418 
28419 /*
28420  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
28421  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
28422  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
28423  * digital audio and extended architecture digital audio. These modes are
28424  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
28425  * MMC specs.
28426  *
28427  * In addition to support for the various data formats these routines also
28428  * include support for devices that implement only the direct access READ
28429  * commands (0x08, 0x28), devices that implement the READ_CD commands
28430  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
28431  * READ CDXA commands (0xD8, 0xDB)
28432  */
28433 
28434 /*
28435  *    Function: sr_read_mode1()
28436  *
28437  * Description: This routine is the driver entry point for handling CD-ROM
28438  *		ioctl read mode1 requests (CDROMREADMODE1).
28439  *
28440  *   Arguments: dev	- the device 'dev_t'
28441  *		data	- pointer to user provided cd read structure specifying
28442  *			  the lba buffer address and length.
28443  *		flag	- this argument is a pass through to ddi_copyxxx()
28444  *			  directly from the mode argument of ioctl().
28445  *
28446  * Return Code: the code returned by sd_send_scsi_cmd()
28447  *		EFAULT if ddi_copyxxx() fails
28448  *		ENXIO if fail ddi_get_soft_state
28449  *		EINVAL if data pointer is NULL
28450  */
28451 
28452 static int
28453 sr_read_mode1(dev_t dev, caddr_t data, int flag)
28454 {
28455 	struct sd_lun		*un;
28456 	struct cdrom_read	mode1_struct;
28457 	struct cdrom_read	*mode1 = &mode1_struct;
28458 	int			rval;
28459 	sd_ssc_t		*ssc;
28460 
28461 #ifdef _MULTI_DATAMODEL
28462 	/* To support ILP32 applications in an LP64 world */
28463 	struct cdrom_read32	cdrom_read32;
28464 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28465 #endif /* _MULTI_DATAMODEL */
28466 
28467 	if (data == NULL) {
28468 		return (EINVAL);
28469 	}
28470 
28471 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28472 	    (un->un_state == SD_STATE_OFFLINE)) {
28473 		return (ENXIO);
28474 	}
28475 
28476 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28477 	    "sd_read_mode1: entry: un:0x%p\n", un);
28478 
28479 #ifdef _MULTI_DATAMODEL
28480 	switch (ddi_model_convert_from(flag & FMODELS)) {
28481 	case DDI_MODEL_ILP32:
28482 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28483 			return (EFAULT);
28484 		}
28485 		/* Convert the ILP32 uscsi data from the application to LP64 */
28486 		cdrom_read32tocdrom_read(cdrd32, mode1);
28487 		break;
28488 	case DDI_MODEL_NONE:
28489 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28490 			return (EFAULT);
28491 		}
28492 	}
28493 #else /* ! _MULTI_DATAMODEL */
28494 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28495 		return (EFAULT);
28496 	}
28497 #endif /* _MULTI_DATAMODEL */
28498 
28499 	ssc = sd_ssc_init(un);
28500 	rval = sd_send_scsi_READ(ssc, mode1->cdread_bufaddr,
28501 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28502 	sd_ssc_fini(ssc);
28503 
28504 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28505 	    "sd_read_mode1: exit: un:0x%p\n", un);
28506 
28507 	return (rval);
28508 }
28509 
28510 
28511 /*
28512  *    Function: sr_read_cd_mode2()
28513  *
28514  * Description: This routine is the driver entry point for handling CD-ROM
28515  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28516  *		support the READ CD (0xBE) command or the 1st generation
28517  *		READ CD (0xD4) command.
28518  *
28519  *   Arguments: dev	- the device 'dev_t'
28520  *		data	- pointer to user provided cd read structure specifying
28521  *			  the lba buffer address and length.
28522  *		flag	- this argument is a pass through to ddi_copyxxx()
28523  *			  directly from the mode argument of ioctl().
28524  *
28525  * Return Code: the code returned by sd_send_scsi_cmd()
28526  *		EFAULT if ddi_copyxxx() fails
28527  *		ENXIO if fail ddi_get_soft_state
28528  *		EINVAL if data pointer is NULL
28529  */
28530 
28531 static int
28532 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28533 {
28534 	struct sd_lun		*un;
28535 	struct uscsi_cmd	*com;
28536 	struct cdrom_read	mode2_struct;
28537 	struct cdrom_read	*mode2 = &mode2_struct;
28538 	uchar_t			cdb[CDB_GROUP5];
28539 	int			nblocks;
28540 	int			rval;
28541 #ifdef _MULTI_DATAMODEL
28542 	/*  To support ILP32 applications in an LP64 world */
28543 	struct cdrom_read32	cdrom_read32;
28544 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28545 #endif /* _MULTI_DATAMODEL */
28546 
28547 	if (data == NULL) {
28548 		return (EINVAL);
28549 	}
28550 
28551 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28552 	    (un->un_state == SD_STATE_OFFLINE)) {
28553 		return (ENXIO);
28554 	}
28555 
28556 #ifdef _MULTI_DATAMODEL
28557 	switch (ddi_model_convert_from(flag & FMODELS)) {
28558 	case DDI_MODEL_ILP32:
28559 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28560 			return (EFAULT);
28561 		}
28562 		/* Convert the ILP32 uscsi data from the application to LP64 */
28563 		cdrom_read32tocdrom_read(cdrd32, mode2);
28564 		break;
28565 	case DDI_MODEL_NONE:
28566 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28567 			return (EFAULT);
28568 		}
28569 		break;
28570 	}
28571 
28572 #else /* ! _MULTI_DATAMODEL */
28573 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28574 		return (EFAULT);
28575 	}
28576 #endif /* _MULTI_DATAMODEL */
28577 
28578 	bzero(cdb, sizeof (cdb));
28579 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28580 		/* Read command supported by 1st generation atapi drives */
28581 		cdb[0] = SCMD_READ_CDD4;
28582 	} else {
28583 		/* Universal CD Access Command */
28584 		cdb[0] = SCMD_READ_CD;
28585 	}
28586 
28587 	/*
28588 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28589 	 */
28590 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28591 
28592 	/* set the start address */
28593 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28594 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28595 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28596 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28597 
28598 	/* set the transfer length */
28599 	nblocks = mode2->cdread_buflen / 2336;
28600 	cdb[6] = (uchar_t)(nblocks >> 16);
28601 	cdb[7] = (uchar_t)(nblocks >> 8);
28602 	cdb[8] = (uchar_t)nblocks;
28603 
28604 	/* set the filter bits */
28605 	cdb[9] = CDROM_READ_CD_USERDATA;
28606 
28607 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28608 	com->uscsi_cdb = (caddr_t)cdb;
28609 	com->uscsi_cdblen = sizeof (cdb);
28610 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28611 	com->uscsi_buflen = mode2->cdread_buflen;
28612 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28613 
28614 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28615 	    SD_PATH_STANDARD);
28616 	kmem_free(com, sizeof (*com));
28617 	return (rval);
28618 }
28619 
28620 
28621 /*
28622  *    Function: sr_read_mode2()
28623  *
28624  * Description: This routine is the driver entry point for handling CD-ROM
28625  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28626  *		do not support the READ CD (0xBE) command.
28627  *
28628  *   Arguments: dev	- the device 'dev_t'
28629  *		data	- pointer to user provided cd read structure specifying
28630  *			  the lba buffer address and length.
28631  *		flag	- this argument is a pass through to ddi_copyxxx()
28632  *			  directly from the mode argument of ioctl().
28633  *
28634  * Return Code: the code returned by sd_send_scsi_cmd()
28635  *		EFAULT if ddi_copyxxx() fails
28636  *		ENXIO if fail ddi_get_soft_state
28637  *		EINVAL if data pointer is NULL
28638  *		EIO if fail to reset block size
28639  *		EAGAIN if commands are in progress in the driver
28640  */
28641 
28642 static int
28643 sr_read_mode2(dev_t dev, caddr_t data, int flag)
28644 {
28645 	struct sd_lun		*un;
28646 	struct cdrom_read	mode2_struct;
28647 	struct cdrom_read	*mode2 = &mode2_struct;
28648 	int			rval;
28649 	uint32_t		restore_blksize;
28650 	struct uscsi_cmd	*com;
28651 	uchar_t			cdb[CDB_GROUP0];
28652 	int			nblocks;
28653 
28654 #ifdef _MULTI_DATAMODEL
28655 	/* To support ILP32 applications in an LP64 world */
28656 	struct cdrom_read32	cdrom_read32;
28657 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28658 #endif /* _MULTI_DATAMODEL */
28659 
28660 	if (data == NULL) {
28661 		return (EINVAL);
28662 	}
28663 
28664 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28665 	    (un->un_state == SD_STATE_OFFLINE)) {
28666 		return (ENXIO);
28667 	}
28668 
28669 	/*
28670 	 * Because this routine will update the device and driver block size
28671 	 * being used we want to make sure there are no commands in progress.
28672 	 * If commands are in progress the user will have to try again.
28673 	 *
28674 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28675 	 * in sdioctl to protect commands from sdioctl through to the top of
28676 	 * sd_uscsi_strategy. See sdioctl for details.
28677 	 */
28678 	mutex_enter(SD_MUTEX(un));
28679 	if (un->un_ncmds_in_driver != 1) {
28680 		mutex_exit(SD_MUTEX(un));
28681 		return (EAGAIN);
28682 	}
28683 	mutex_exit(SD_MUTEX(un));
28684 
28685 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28686 	    "sd_read_mode2: entry: un:0x%p\n", un);
28687 
28688 #ifdef _MULTI_DATAMODEL
28689 	switch (ddi_model_convert_from(flag & FMODELS)) {
28690 	case DDI_MODEL_ILP32:
28691 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28692 			return (EFAULT);
28693 		}
28694 		/* Convert the ILP32 uscsi data from the application to LP64 */
28695 		cdrom_read32tocdrom_read(cdrd32, mode2);
28696 		break;
28697 	case DDI_MODEL_NONE:
28698 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28699 			return (EFAULT);
28700 		}
28701 		break;
28702 	}
28703 #else /* ! _MULTI_DATAMODEL */
28704 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28705 		return (EFAULT);
28706 	}
28707 #endif /* _MULTI_DATAMODEL */
28708 
28709 	/* Store the current target block size for restoration later */
28710 	restore_blksize = un->un_tgt_blocksize;
28711 
28712 	/* Change the device and soft state target block size to 2336 */
28713 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28714 		rval = EIO;
28715 		goto done;
28716 	}
28717 
28718 
28719 	bzero(cdb, sizeof (cdb));
28720 
28721 	/* set READ operation */
28722 	cdb[0] = SCMD_READ;
28723 
28724 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28725 	mode2->cdread_lba >>= 2;
28726 
28727 	/* set the start address */
28728 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28729 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28730 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28731 
28732 	/* set the transfer length */
28733 	nblocks = mode2->cdread_buflen / 2336;
28734 	cdb[4] = (uchar_t)nblocks & 0xFF;
28735 
28736 	/* build command */
28737 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28738 	com->uscsi_cdb = (caddr_t)cdb;
28739 	com->uscsi_cdblen = sizeof (cdb);
28740 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28741 	com->uscsi_buflen = mode2->cdread_buflen;
28742 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28743 
28744 	/*
28745 	 * Issue SCSI command with user space address for read buffer.
28746 	 *
28747 	 * This sends the command through main channel in the driver.
28748 	 *
28749 	 * Since this is accessed via an IOCTL call, we go through the
28750 	 * standard path, so that if the device was powered down, then
28751 	 * it would be 'awakened' to handle the command.
28752 	 */
28753 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28754 	    SD_PATH_STANDARD);
28755 
28756 	kmem_free(com, sizeof (*com));
28757 
28758 	/* Restore the device and soft state target block size */
28759 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28760 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28761 		    "can't do switch back to mode 1\n");
28762 		/*
28763 		 * If sd_send_scsi_READ succeeded we still need to report
28764 		 * an error because we failed to reset the block size
28765 		 */
28766 		if (rval == 0) {
28767 			rval = EIO;
28768 		}
28769 	}
28770 
28771 done:
28772 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28773 	    "sd_read_mode2: exit: un:0x%p\n", un);
28774 
28775 	return (rval);
28776 }
28777 
28778 
28779 /*
28780  *    Function: sr_sector_mode()
28781  *
28782  * Description: This utility function is used by sr_read_mode2 to set the target
28783  *		block size based on the user specified size. This is a legacy
28784  *		implementation based upon a vendor specific mode page
28785  *
28786  *   Arguments: dev	- the device 'dev_t'
28787  *		data	- flag indicating if block size is being set to 2336 or
28788  *			  512.
28789  *
28790  * Return Code: the code returned by sd_send_scsi_cmd()
28791  *		EFAULT if ddi_copyxxx() fails
28792  *		ENXIO if fail ddi_get_soft_state
28793  *		EINVAL if data pointer is NULL
28794  */
28795 
28796 static int
28797 sr_sector_mode(dev_t dev, uint32_t blksize)
28798 {
28799 	struct sd_lun	*un;
28800 	uchar_t		*sense;
28801 	uchar_t		*select;
28802 	int		rval;
28803 	sd_ssc_t	*ssc;
28804 
28805 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28806 	    (un->un_state == SD_STATE_OFFLINE)) {
28807 		return (ENXIO);
28808 	}
28809 
28810 	sense = kmem_zalloc(20, KM_SLEEP);
28811 
28812 	/* Note: This is a vendor specific mode page (0x81) */
28813 	ssc = sd_ssc_init(un);
28814 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, 20, 0x81,
28815 	    SD_PATH_STANDARD);
28816 	sd_ssc_fini(ssc);
28817 	if (rval != 0) {
28818 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28819 		    "sr_sector_mode: Mode Sense failed\n");
28820 		kmem_free(sense, 20);
28821 		return (rval);
28822 	}
28823 	select = kmem_zalloc(20, KM_SLEEP);
28824 	select[3] = 0x08;
28825 	select[10] = ((blksize >> 8) & 0xff);
28826 	select[11] = (blksize & 0xff);
28827 	select[12] = 0x01;
28828 	select[13] = 0x06;
28829 	select[14] = sense[14];
28830 	select[15] = sense[15];
28831 	if (blksize == SD_MODE2_BLKSIZE) {
28832 		select[14] |= 0x01;
28833 	}
28834 
28835 	ssc = sd_ssc_init(un);
28836 	rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select, 20,
28837 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
28838 	sd_ssc_fini(ssc);
28839 	if (rval != 0) {
28840 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28841 		    "sr_sector_mode: Mode Select failed\n");
28842 	} else {
28843 		/*
28844 		 * Only update the softstate block size if we successfully
28845 		 * changed the device block mode.
28846 		 */
28847 		mutex_enter(SD_MUTEX(un));
28848 		sd_update_block_info(un, blksize, 0);
28849 		mutex_exit(SD_MUTEX(un));
28850 	}
28851 	kmem_free(sense, 20);
28852 	kmem_free(select, 20);
28853 	return (rval);
28854 }
28855 
28856 
28857 /*
28858  *    Function: sr_read_cdda()
28859  *
28860  * Description: This routine is the driver entry point for handling CD-ROM
28861  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28862  *		the target supports CDDA these requests are handled via a vendor
28863  *		specific command (0xD8) If the target does not support CDDA
28864  *		these requests are handled via the READ CD command (0xBE).
28865  *
28866  *   Arguments: dev	- the device 'dev_t'
28867  *		data	- pointer to user provided CD-DA structure specifying
28868  *			  the track starting address, transfer length, and
28869  *			  subcode options.
28870  *		flag	- this argument is a pass through to ddi_copyxxx()
28871  *			  directly from the mode argument of ioctl().
28872  *
28873  * Return Code: the code returned by sd_send_scsi_cmd()
28874  *		EFAULT if ddi_copyxxx() fails
28875  *		ENXIO if fail ddi_get_soft_state
28876  *		EINVAL if invalid arguments are provided
28877  *		ENOTTY
28878  */
28879 
28880 static int
28881 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28882 {
28883 	struct sd_lun			*un;
28884 	struct uscsi_cmd		*com;
28885 	struct cdrom_cdda		*cdda;
28886 	int				rval;
28887 	size_t				buflen;
28888 	char				cdb[CDB_GROUP5];
28889 
28890 #ifdef _MULTI_DATAMODEL
28891 	/* To support ILP32 applications in an LP64 world */
28892 	struct cdrom_cdda32	cdrom_cdda32;
28893 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28894 #endif /* _MULTI_DATAMODEL */
28895 
28896 	if (data == NULL) {
28897 		return (EINVAL);
28898 	}
28899 
28900 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28901 		return (ENXIO);
28902 	}
28903 
28904 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28905 
28906 #ifdef _MULTI_DATAMODEL
28907 	switch (ddi_model_convert_from(flag & FMODELS)) {
28908 	case DDI_MODEL_ILP32:
28909 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28910 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28911 			    "sr_read_cdda: ddi_copyin Failed\n");
28912 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28913 			return (EFAULT);
28914 		}
28915 		/* Convert the ILP32 uscsi data from the application to LP64 */
28916 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28917 		break;
28918 	case DDI_MODEL_NONE:
28919 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28920 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28921 			    "sr_read_cdda: ddi_copyin Failed\n");
28922 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28923 			return (EFAULT);
28924 		}
28925 		break;
28926 	}
28927 #else /* ! _MULTI_DATAMODEL */
28928 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28929 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28930 		    "sr_read_cdda: ddi_copyin Failed\n");
28931 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28932 		return (EFAULT);
28933 	}
28934 #endif /* _MULTI_DATAMODEL */
28935 
28936 	/*
28937 	 * Since MMC-2 expects max 3 bytes for length, check if the
28938 	 * length input is greater than 3 bytes
28939 	 */
28940 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28941 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28942 		    "cdrom transfer length too large: %d (limit %d)\n",
28943 		    cdda->cdda_length, 0xFFFFFF);
28944 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28945 		return (EINVAL);
28946 	}
28947 
28948 	switch (cdda->cdda_subcode) {
28949 	case CDROM_DA_NO_SUBCODE:
28950 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28951 		break;
28952 	case CDROM_DA_SUBQ:
28953 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28954 		break;
28955 	case CDROM_DA_ALL_SUBCODE:
28956 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28957 		break;
28958 	case CDROM_DA_SUBCODE_ONLY:
28959 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28960 		break;
28961 	default:
28962 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28963 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28964 		    cdda->cdda_subcode);
28965 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28966 		return (EINVAL);
28967 	}
28968 
28969 	/* Build and send the command */
28970 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28971 	bzero(cdb, CDB_GROUP5);
28972 
28973 	if (un->un_f_cfg_cdda == TRUE) {
28974 		cdb[0] = (char)SCMD_READ_CD;
28975 		cdb[1] = 0x04;
28976 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28977 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28978 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28979 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28980 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28981 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28982 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28983 		cdb[9] = 0x10;
28984 		switch (cdda->cdda_subcode) {
28985 		case CDROM_DA_NO_SUBCODE :
28986 			cdb[10] = 0x0;
28987 			break;
28988 		case CDROM_DA_SUBQ :
28989 			cdb[10] = 0x2;
28990 			break;
28991 		case CDROM_DA_ALL_SUBCODE :
28992 			cdb[10] = 0x1;
28993 			break;
28994 		case CDROM_DA_SUBCODE_ONLY :
28995 			/* FALLTHROUGH */
28996 		default :
28997 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28998 			kmem_free(com, sizeof (*com));
28999 			return (ENOTTY);
29000 		}
29001 	} else {
29002 		cdb[0] = (char)SCMD_READ_CDDA;
29003 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
29004 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
29005 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
29006 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
29007 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
29008 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
29009 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
29010 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
29011 		cdb[10] = cdda->cdda_subcode;
29012 	}
29013 
29014 	com->uscsi_cdb = cdb;
29015 	com->uscsi_cdblen = CDB_GROUP5;
29016 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
29017 	com->uscsi_buflen = buflen;
29018 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
29019 
29020 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
29021 	    SD_PATH_STANDARD);
29022 
29023 	kmem_free(cdda, sizeof (struct cdrom_cdda));
29024 	kmem_free(com, sizeof (*com));
29025 	return (rval);
29026 }
29027 
29028 
29029 /*
29030  *    Function: sr_read_cdxa()
29031  *
29032  * Description: This routine is the driver entry point for handling CD-ROM
29033  *		ioctl requests to return CD-XA (Extended Architecture) data.
29034  *		(CDROMCDXA).
29035  *
29036  *   Arguments: dev	- the device 'dev_t'
29037  *		data	- pointer to user provided CD-XA structure specifying
29038  *			  the data starting address, transfer length, and format
29039  *		flag	- this argument is a pass through to ddi_copyxxx()
29040  *			  directly from the mode argument of ioctl().
29041  *
29042  * Return Code: the code returned by sd_send_scsi_cmd()
29043  *		EFAULT if ddi_copyxxx() fails
29044  *		ENXIO if fail ddi_get_soft_state
29045  *		EINVAL if data pointer is NULL
29046  */
29047 
29048 static int
29049 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
29050 {
29051 	struct sd_lun		*un;
29052 	struct uscsi_cmd	*com;
29053 	struct cdrom_cdxa	*cdxa;
29054 	int			rval;
29055 	size_t			buflen;
29056 	char			cdb[CDB_GROUP5];
29057 	uchar_t			read_flags;
29058 
29059 #ifdef _MULTI_DATAMODEL
29060 	/* To support ILP32 applications in an LP64 world */
29061 	struct cdrom_cdxa32		cdrom_cdxa32;
29062 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
29063 #endif /* _MULTI_DATAMODEL */
29064 
29065 	if (data == NULL) {
29066 		return (EINVAL);
29067 	}
29068 
29069 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29070 		return (ENXIO);
29071 	}
29072 
29073 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
29074 
29075 #ifdef _MULTI_DATAMODEL
29076 	switch (ddi_model_convert_from(flag & FMODELS)) {
29077 	case DDI_MODEL_ILP32:
29078 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
29079 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29080 			return (EFAULT);
29081 		}
29082 		/*
29083 		 * Convert the ILP32 uscsi data from the
29084 		 * application to LP64 for internal use.
29085 		 */
29086 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
29087 		break;
29088 	case DDI_MODEL_NONE:
29089 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29090 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29091 			return (EFAULT);
29092 		}
29093 		break;
29094 	}
29095 #else /* ! _MULTI_DATAMODEL */
29096 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29097 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29098 		return (EFAULT);
29099 	}
29100 #endif /* _MULTI_DATAMODEL */
29101 
29102 	/*
29103 	 * Since MMC-2 expects max 3 bytes for length, check if the
29104 	 * length input is greater than 3 bytes
29105 	 */
29106 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
29107 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
29108 		    "cdrom transfer length too large: %d (limit %d)\n",
29109 		    cdxa->cdxa_length, 0xFFFFFF);
29110 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29111 		return (EINVAL);
29112 	}
29113 
29114 	switch (cdxa->cdxa_format) {
29115 	case CDROM_XA_DATA:
29116 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
29117 		read_flags = 0x10;
29118 		break;
29119 	case CDROM_XA_SECTOR_DATA:
29120 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
29121 		read_flags = 0xf8;
29122 		break;
29123 	case CDROM_XA_DATA_W_ERROR:
29124 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
29125 		read_flags = 0xfc;
29126 		break;
29127 	default:
29128 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29129 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
29130 		    cdxa->cdxa_format);
29131 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29132 		return (EINVAL);
29133 	}
29134 
29135 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29136 	bzero(cdb, CDB_GROUP5);
29137 	if (un->un_f_mmc_cap == TRUE) {
29138 		cdb[0] = (char)SCMD_READ_CD;
29139 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29140 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29141 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29142 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29143 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29144 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29145 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
29146 		cdb[9] = (char)read_flags;
29147 	} else {
29148 		/*
29149 		 * Note: A vendor specific command (0xDB) is being used her to
29150 		 * request a read of all subcodes.
29151 		 */
29152 		cdb[0] = (char)SCMD_READ_CDXA;
29153 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29154 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29155 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29156 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29157 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
29158 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29159 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29160 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
29161 		cdb[10] = cdxa->cdxa_format;
29162 	}
29163 	com->uscsi_cdb	   = cdb;
29164 	com->uscsi_cdblen  = CDB_GROUP5;
29165 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
29166 	com->uscsi_buflen  = buflen;
29167 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
29168 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
29169 	    SD_PATH_STANDARD);
29170 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29171 	kmem_free(com, sizeof (*com));
29172 	return (rval);
29173 }
29174 
29175 
29176 /*
29177  *    Function: sr_eject()
29178  *
29179  * Description: This routine is the driver entry point for handling CD-ROM
29180  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
29181  *
29182  *   Arguments: dev	- the device 'dev_t'
29183  *
29184  * Return Code: the code returned by sd_send_scsi_cmd()
29185  */
29186 
29187 static int
29188 sr_eject(dev_t dev)
29189 {
29190 	struct sd_lun	*un;
29191 	int		rval;
29192 	sd_ssc_t	*ssc;
29193 
29194 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29195 	    (un->un_state == SD_STATE_OFFLINE)) {
29196 		return (ENXIO);
29197 	}
29198 
29199 	/*
29200 	 * To prevent race conditions with the eject
29201 	 * command, keep track of an eject command as
29202 	 * it progresses. If we are already handling
29203 	 * an eject command in the driver for the given
29204 	 * unit and another request to eject is received
29205 	 * immediately return EAGAIN so we don't lose
29206 	 * the command if the current eject command fails.
29207 	 */
29208 	mutex_enter(SD_MUTEX(un));
29209 	if (un->un_f_ejecting == TRUE) {
29210 		mutex_exit(SD_MUTEX(un));
29211 		return (EAGAIN);
29212 	}
29213 	un->un_f_ejecting = TRUE;
29214 	mutex_exit(SD_MUTEX(un));
29215 
29216 	ssc = sd_ssc_init(un);
29217 	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
29218 	    SD_PATH_STANDARD);
29219 	sd_ssc_fini(ssc);
29220 
29221 	if (rval != 0) {
29222 		mutex_enter(SD_MUTEX(un));
29223 		un->un_f_ejecting = FALSE;
29224 		mutex_exit(SD_MUTEX(un));
29225 		return (rval);
29226 	}
29227 
29228 	ssc = sd_ssc_init(un);
29229 	rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
29230 	    SD_TARGET_EJECT, SD_PATH_STANDARD);
29231 	sd_ssc_fini(ssc);
29232 
29233 	if (rval == 0) {
29234 		mutex_enter(SD_MUTEX(un));
29235 		sr_ejected(un);
29236 		un->un_mediastate = DKIO_EJECTED;
29237 		un->un_f_ejecting = FALSE;
29238 		cv_broadcast(&un->un_state_cv);
29239 		mutex_exit(SD_MUTEX(un));
29240 	} else {
29241 		mutex_enter(SD_MUTEX(un));
29242 		un->un_f_ejecting = FALSE;
29243 		mutex_exit(SD_MUTEX(un));
29244 	}
29245 	return (rval);
29246 }
29247 
29248 
29249 /*
29250  *    Function: sr_ejected()
29251  *
29252  * Description: This routine updates the soft state structure to invalidate the
29253  *		geometry information after the media has been ejected or a
29254  *		media eject has been detected.
29255  *
29256  *   Arguments: un - driver soft state (unit) structure
29257  */
29258 
29259 static void
29260 sr_ejected(struct sd_lun *un)
29261 {
29262 	struct sd_errstats *stp;
29263 
29264 	ASSERT(un != NULL);
29265 	ASSERT(mutex_owned(SD_MUTEX(un)));
29266 
29267 	un->un_f_blockcount_is_valid	= FALSE;
29268 	un->un_f_tgt_blocksize_is_valid	= FALSE;
29269 	mutex_exit(SD_MUTEX(un));
29270 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
29271 	mutex_enter(SD_MUTEX(un));
29272 
29273 	if (un->un_errstats != NULL) {
29274 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
29275 		stp->sd_capacity.value.ui64 = 0;
29276 	}
29277 }
29278 
29279 
29280 /*
29281  *    Function: sr_check_wp()
29282  *
29283  * Description: This routine checks the write protection of a removable
29284  *      media disk and hotpluggable devices via the write protect bit of
29285  *      the Mode Page Header device specific field. Some devices choke
29286  *      on unsupported mode page. In order to workaround this issue,
29287  *      this routine has been implemented to use 0x3f mode page(request
29288  *      for all pages) for all device types.
29289  *
29290  *   Arguments: dev             - the device 'dev_t'
29291  *
29292  * Return Code: int indicating if the device is write protected (1) or not (0)
29293  *
29294  *     Context: Kernel thread.
29295  *
29296  */
29297 
29298 static int
29299 sr_check_wp(dev_t dev)
29300 {
29301 	struct sd_lun	*un;
29302 	uchar_t		device_specific;
29303 	uchar_t		*sense;
29304 	int		hdrlen;
29305 	int		rval = FALSE;
29306 	int		status;
29307 	sd_ssc_t	*ssc;
29308 
29309 	/*
29310 	 * Note: The return codes for this routine should be reworked to
29311 	 * properly handle the case of a NULL softstate.
29312 	 */
29313 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29314 		return (FALSE);
29315 	}
29316 
29317 	if (un->un_f_cfg_is_atapi == TRUE) {
29318 		/*
29319 		 * The mode page contents are not required; set the allocation
29320 		 * length for the mode page header only
29321 		 */
29322 		hdrlen = MODE_HEADER_LENGTH_GRP2;
29323 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29324 		ssc = sd_ssc_init(un);
29325 		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense, hdrlen,
29326 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
29327 		sd_ssc_fini(ssc);
29328 		if (status != 0)
29329 			goto err_exit;
29330 		device_specific =
29331 		    ((struct mode_header_grp2 *)sense)->device_specific;
29332 	} else {
29333 		hdrlen = MODE_HEADER_LENGTH;
29334 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29335 		ssc = sd_ssc_init(un);
29336 		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, hdrlen,
29337 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
29338 		sd_ssc_fini(ssc);
29339 		if (status != 0)
29340 			goto err_exit;
29341 		device_specific =
29342 		    ((struct mode_header *)sense)->device_specific;
29343 	}
29344 
29345 
29346 	/*
29347 	 * Write protect mode sense failed; not all disks
29348 	 * understand this query. Return FALSE assuming that
29349 	 * these devices are not writable.
29350 	 */
29351 	if (device_specific & WRITE_PROTECT) {
29352 		rval = TRUE;
29353 	}
29354 
29355 err_exit:
29356 	kmem_free(sense, hdrlen);
29357 	return (rval);
29358 }
29359 
29360 /*
29361  *    Function: sr_volume_ctrl()
29362  *
29363  * Description: This routine is the driver entry point for handling CD-ROM
29364  *		audio output volume ioctl requests. (CDROMVOLCTRL)
29365  *
29366  *   Arguments: dev	- the device 'dev_t'
29367  *		data	- pointer to user audio volume control structure
29368  *		flag	- this argument is a pass through to ddi_copyxxx()
29369  *			  directly from the mode argument of ioctl().
29370  *
29371  * Return Code: the code returned by sd_send_scsi_cmd()
29372  *		EFAULT if ddi_copyxxx() fails
29373  *		ENXIO if fail ddi_get_soft_state
29374  *		EINVAL if data pointer is NULL
29375  *
29376  */
29377 
29378 static int
29379 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
29380 {
29381 	struct sd_lun		*un;
29382 	struct cdrom_volctrl    volume;
29383 	struct cdrom_volctrl    *vol = &volume;
29384 	uchar_t			*sense_page;
29385 	uchar_t			*select_page;
29386 	uchar_t			*sense;
29387 	uchar_t			*select;
29388 	int			sense_buflen;
29389 	int			select_buflen;
29390 	int			rval;
29391 	sd_ssc_t		*ssc;
29392 
29393 	if (data == NULL) {
29394 		return (EINVAL);
29395 	}
29396 
29397 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29398 	    (un->un_state == SD_STATE_OFFLINE)) {
29399 		return (ENXIO);
29400 	}
29401 
29402 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
29403 		return (EFAULT);
29404 	}
29405 
29406 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29407 		struct mode_header_grp2		*sense_mhp;
29408 		struct mode_header_grp2		*select_mhp;
29409 		int				bd_len;
29410 
29411 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
29412 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
29413 		    MODEPAGE_AUDIO_CTRL_LEN;
29414 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29415 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29416 		ssc = sd_ssc_init(un);
29417 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
29418 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29419 		    SD_PATH_STANDARD);
29420 		sd_ssc_fini(ssc);
29421 
29422 		if (rval != 0) {
29423 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29424 			    "sr_volume_ctrl: Mode Sense Failed\n");
29425 			kmem_free(sense, sense_buflen);
29426 			kmem_free(select, select_buflen);
29427 			return (rval);
29428 		}
29429 		sense_mhp = (struct mode_header_grp2 *)sense;
29430 		select_mhp = (struct mode_header_grp2 *)select;
29431 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
29432 		    sense_mhp->bdesc_length_lo;
29433 		if (bd_len > MODE_BLK_DESC_LENGTH) {
29434 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29435 			    "sr_volume_ctrl: Mode Sense returned invalid "
29436 			    "block descriptor length\n");
29437 			kmem_free(sense, sense_buflen);
29438 			kmem_free(select, select_buflen);
29439 			return (EIO);
29440 		}
29441 		sense_page = (uchar_t *)
29442 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
29443 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
29444 		select_mhp->length_msb = 0;
29445 		select_mhp->length_lsb = 0;
29446 		select_mhp->bdesc_length_hi = 0;
29447 		select_mhp->bdesc_length_lo = 0;
29448 	} else {
29449 		struct mode_header		*sense_mhp, *select_mhp;
29450 
29451 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29452 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29453 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29454 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29455 		ssc = sd_ssc_init(un);
29456 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
29457 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29458 		    SD_PATH_STANDARD);
29459 		sd_ssc_fini(ssc);
29460 
29461 		if (rval != 0) {
29462 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29463 			    "sr_volume_ctrl: Mode Sense Failed\n");
29464 			kmem_free(sense, sense_buflen);
29465 			kmem_free(select, select_buflen);
29466 			return (rval);
29467 		}
29468 		sense_mhp  = (struct mode_header *)sense;
29469 		select_mhp = (struct mode_header *)select;
29470 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
29471 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29472 			    "sr_volume_ctrl: Mode Sense returned invalid "
29473 			    "block descriptor length\n");
29474 			kmem_free(sense, sense_buflen);
29475 			kmem_free(select, select_buflen);
29476 			return (EIO);
29477 		}
29478 		sense_page = (uchar_t *)
29479 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
29480 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
29481 		select_mhp->length = 0;
29482 		select_mhp->bdesc_length = 0;
29483 	}
29484 	/*
29485 	 * Note: An audio control data structure could be created and overlayed
29486 	 * on the following in place of the array indexing method implemented.
29487 	 */
29488 
29489 	/* Build the select data for the user volume data */
29490 	select_page[0] = MODEPAGE_AUDIO_CTRL;
29491 	select_page[1] = 0xE;
29492 	/* Set the immediate bit */
29493 	select_page[2] = 0x04;
29494 	/* Zero out reserved fields */
29495 	select_page[3] = 0x00;
29496 	select_page[4] = 0x00;
29497 	/* Return sense data for fields not to be modified */
29498 	select_page[5] = sense_page[5];
29499 	select_page[6] = sense_page[6];
29500 	select_page[7] = sense_page[7];
29501 	/* Set the user specified volume levels for channel 0 and 1 */
29502 	select_page[8] = 0x01;
29503 	select_page[9] = vol->channel0;
29504 	select_page[10] = 0x02;
29505 	select_page[11] = vol->channel1;
29506 	/* Channel 2 and 3 are currently unsupported so return the sense data */
29507 	select_page[12] = sense_page[12];
29508 	select_page[13] = sense_page[13];
29509 	select_page[14] = sense_page[14];
29510 	select_page[15] = sense_page[15];
29511 
29512 	ssc = sd_ssc_init(un);
29513 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29514 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP1, select,
29515 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29516 	} else {
29517 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
29518 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29519 	}
29520 	sd_ssc_fini(ssc);
29521 
29522 	kmem_free(sense, sense_buflen);
29523 	kmem_free(select, select_buflen);
29524 	return (rval);
29525 }
29526 
29527 
29528 /*
29529  *    Function: sr_read_sony_session_offset()
29530  *
29531  * Description: This routine is the driver entry point for handling CD-ROM
29532  *		ioctl requests for session offset information. (CDROMREADOFFSET)
29533  *		The address of the first track in the last session of a
29534  *		multi-session CD-ROM is returned
29535  *
29536  *		Note: This routine uses a vendor specific key value in the
29537  *		command control field without implementing any vendor check here
29538  *		or in the ioctl routine.
29539  *
29540  *   Arguments: dev	- the device 'dev_t'
29541  *		data	- pointer to an int to hold the requested address
29542  *		flag	- this argument is a pass through to ddi_copyxxx()
29543  *			  directly from the mode argument of ioctl().
29544  *
29545  * Return Code: the code returned by sd_send_scsi_cmd()
29546  *		EFAULT if ddi_copyxxx() fails
29547  *		ENXIO if fail ddi_get_soft_state
29548  *		EINVAL if data pointer is NULL
29549  */
29550 
29551 static int
29552 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29553 {
29554 	struct sd_lun		*un;
29555 	struct uscsi_cmd	*com;
29556 	caddr_t			buffer;
29557 	char			cdb[CDB_GROUP1];
29558 	int			session_offset = 0;
29559 	int			rval;
29560 
29561 	if (data == NULL) {
29562 		return (EINVAL);
29563 	}
29564 
29565 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29566 	    (un->un_state == SD_STATE_OFFLINE)) {
29567 		return (ENXIO);
29568 	}
29569 
29570 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29571 	bzero(cdb, CDB_GROUP1);
29572 	cdb[0] = SCMD_READ_TOC;
29573 	/*
29574 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29575 	 * (4 byte TOC response header + 8 byte response data)
29576 	 */
29577 	cdb[8] = SONY_SESSION_OFFSET_LEN;
29578 	/* Byte 9 is the control byte. A vendor specific value is used */
29579 	cdb[9] = SONY_SESSION_OFFSET_KEY;
29580 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29581 	com->uscsi_cdb = cdb;
29582 	com->uscsi_cdblen = CDB_GROUP1;
29583 	com->uscsi_bufaddr = buffer;
29584 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29585 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
29586 
29587 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
29588 	    SD_PATH_STANDARD);
29589 	if (rval != 0) {
29590 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29591 		kmem_free(com, sizeof (*com));
29592 		return (rval);
29593 	}
29594 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29595 		session_offset =
29596 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29597 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29598 		/*
29599 		 * Offset returned offset in current lbasize block's. Convert to
29600 		 * 2k block's to return to the user
29601 		 */
29602 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29603 			session_offset >>= 2;
29604 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29605 			session_offset >>= 1;
29606 		}
29607 	}
29608 
29609 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29610 		rval = EFAULT;
29611 	}
29612 
29613 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29614 	kmem_free(com, sizeof (*com));
29615 	return (rval);
29616 }
29617 
29618 
29619 /*
29620  *    Function: sd_wm_cache_constructor()
29621  *
29622  * Description: Cache Constructor for the wmap cache for the read/modify/write
29623  *		devices.
29624  *
29625  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29626  *		un	- sd_lun structure for the device.
29627  *		flag	- the km flags passed to constructor
29628  *
29629  * Return Code: 0 on success.
29630  *		-1 on failure.
29631  */
29632 
29633 /*ARGSUSED*/
29634 static int
29635 sd_wm_cache_constructor(void *wm, void *un, int flags)
29636 {
29637 	bzero(wm, sizeof (struct sd_w_map));
29638 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29639 	return (0);
29640 }
29641 
29642 
29643 /*
29644  *    Function: sd_wm_cache_destructor()
29645  *
29646  * Description: Cache destructor for the wmap cache for the read/modify/write
29647  *		devices.
29648  *
29649  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29650  *		un	- sd_lun structure for the device.
29651  */
29652 /*ARGSUSED*/
29653 static void
29654 sd_wm_cache_destructor(void *wm, void *un)
29655 {
29656 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29657 }
29658 
29659 
29660 /*
29661  *    Function: sd_range_lock()
29662  *
29663  * Description: Lock the range of blocks specified as parameter to ensure
29664  *		that read, modify write is atomic and no other i/o writes
29665  *		to the same location. The range is specified in terms
29666  *		of start and end blocks. Block numbers are the actual
29667  *		media block numbers and not system.
29668  *
29669  *   Arguments: un	- sd_lun structure for the device.
29670  *		startb - The starting block number
29671  *		endb - The end block number
29672  *		typ - type of i/o - simple/read_modify_write
29673  *
29674  * Return Code: wm  - pointer to the wmap structure.
29675  *
29676  *     Context: This routine can sleep.
29677  */
29678 
29679 static struct sd_w_map *
29680 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29681 {
29682 	struct sd_w_map *wmp = NULL;
29683 	struct sd_w_map *sl_wmp = NULL;
29684 	struct sd_w_map *tmp_wmp;
29685 	wm_state state = SD_WM_CHK_LIST;
29686 
29687 
29688 	ASSERT(un != NULL);
29689 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29690 
29691 	mutex_enter(SD_MUTEX(un));
29692 
29693 	while (state != SD_WM_DONE) {
29694 
29695 		switch (state) {
29696 		case SD_WM_CHK_LIST:
29697 			/*
29698 			 * This is the starting state. Check the wmap list
29699 			 * to see if the range is currently available.
29700 			 */
29701 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29702 				/*
29703 				 * If this is a simple write and no rmw
29704 				 * i/o is pending then try to lock the
29705 				 * range as the range should be available.
29706 				 */
29707 				state = SD_WM_LOCK_RANGE;
29708 			} else {
29709 				tmp_wmp = sd_get_range(un, startb, endb);
29710 				if (tmp_wmp != NULL) {
29711 					if ((wmp != NULL) && ONLIST(un, wmp)) {
29712 						/*
29713 						 * Should not keep onlist wmps
29714 						 * while waiting this macro
29715 						 * will also do wmp = NULL;
29716 						 */
29717 						FREE_ONLIST_WMAP(un, wmp);
29718 					}
29719 					/*
29720 					 * sl_wmp is the wmap on which wait
29721 					 * is done, since the tmp_wmp points
29722 					 * to the inuse wmap, set sl_wmp to
29723 					 * tmp_wmp and change the state to sleep
29724 					 */
29725 					sl_wmp = tmp_wmp;
29726 					state = SD_WM_WAIT_MAP;
29727 				} else {
29728 					state = SD_WM_LOCK_RANGE;
29729 				}
29730 
29731 			}
29732 			break;
29733 
29734 		case SD_WM_LOCK_RANGE:
29735 			ASSERT(un->un_wm_cache);
29736 			/*
29737 			 * The range need to be locked, try to get a wmap.
29738 			 * First attempt it with NO_SLEEP, want to avoid a sleep
29739 			 * if possible as we will have to release the sd mutex
29740 			 * if we have to sleep.
29741 			 */
29742 			if (wmp == NULL)
29743 				wmp = kmem_cache_alloc(un->un_wm_cache,
29744 				    KM_NOSLEEP);
29745 			if (wmp == NULL) {
29746 				mutex_exit(SD_MUTEX(un));
29747 				_NOTE(DATA_READABLE_WITHOUT_LOCK
29748 				    (sd_lun::un_wm_cache))
29749 				wmp = kmem_cache_alloc(un->un_wm_cache,
29750 				    KM_SLEEP);
29751 				mutex_enter(SD_MUTEX(un));
29752 				/*
29753 				 * we released the mutex so recheck and go to
29754 				 * check list state.
29755 				 */
29756 				state = SD_WM_CHK_LIST;
29757 			} else {
29758 				/*
29759 				 * We exit out of state machine since we
29760 				 * have the wmap. Do the housekeeping first.
29761 				 * place the wmap on the wmap list if it is not
29762 				 * on it already and then set the state to done.
29763 				 */
29764 				wmp->wm_start = startb;
29765 				wmp->wm_end = endb;
29766 				wmp->wm_flags = typ | SD_WM_BUSY;
29767 				if (typ & SD_WTYPE_RMW) {
29768 					un->un_rmw_count++;
29769 				}
29770 				/*
29771 				 * If not already on the list then link
29772 				 */
29773 				if (!ONLIST(un, wmp)) {
29774 					wmp->wm_next = un->un_wm;
29775 					wmp->wm_prev = NULL;
29776 					if (wmp->wm_next)
29777 						wmp->wm_next->wm_prev = wmp;
29778 					un->un_wm = wmp;
29779 				}
29780 				state = SD_WM_DONE;
29781 			}
29782 			break;
29783 
29784 		case SD_WM_WAIT_MAP:
29785 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29786 			/*
29787 			 * Wait is done on sl_wmp, which is set in the
29788 			 * check_list state.
29789 			 */
29790 			sl_wmp->wm_wanted_count++;
29791 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29792 			sl_wmp->wm_wanted_count--;
29793 			/*
29794 			 * We can reuse the memory from the completed sl_wmp
29795 			 * lock range for our new lock, but only if noone is
29796 			 * waiting for it.
29797 			 */
29798 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
29799 			if (sl_wmp->wm_wanted_count == 0) {
29800 				if (wmp != NULL) {
29801 					CHK_N_FREEWMP(un, wmp);
29802 				}
29803 				wmp = sl_wmp;
29804 			}
29805 			sl_wmp = NULL;
29806 			/*
29807 			 * After waking up, need to recheck for availability of
29808 			 * range.
29809 			 */
29810 			state = SD_WM_CHK_LIST;
29811 			break;
29812 
29813 		default:
29814 			panic("sd_range_lock: "
29815 			    "Unknown state %d in sd_range_lock", state);
29816 			/*NOTREACHED*/
29817 		} /* switch(state) */
29818 
29819 	} /* while(state != SD_WM_DONE) */
29820 
29821 	mutex_exit(SD_MUTEX(un));
29822 
29823 	ASSERT(wmp != NULL);
29824 
29825 	return (wmp);
29826 }
29827 
29828 
29829 /*
29830  *    Function: sd_get_range()
29831  *
29832  * Description: Find if there any overlapping I/O to this one
29833  *		Returns the write-map of 1st such I/O, NULL otherwise.
29834  *
29835  *   Arguments: un	- sd_lun structure for the device.
29836  *		startb - The starting block number
29837  *		endb - The end block number
29838  *
29839  * Return Code: wm  - pointer to the wmap structure.
29840  */
29841 
29842 static struct sd_w_map *
29843 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29844 {
29845 	struct sd_w_map *wmp;
29846 
29847 	ASSERT(un != NULL);
29848 
29849 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29850 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29851 			continue;
29852 		}
29853 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29854 			break;
29855 		}
29856 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29857 			break;
29858 		}
29859 	}
29860 
29861 	return (wmp);
29862 }
29863 
29864 
29865 /*
29866  *    Function: sd_free_inlist_wmap()
29867  *
29868  * Description: Unlink and free a write map struct.
29869  *
29870  *   Arguments: un      - sd_lun structure for the device.
29871  *		wmp	- sd_w_map which needs to be unlinked.
29872  */
29873 
29874 static void
29875 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29876 {
29877 	ASSERT(un != NULL);
29878 
29879 	if (un->un_wm == wmp) {
29880 		un->un_wm = wmp->wm_next;
29881 	} else {
29882 		wmp->wm_prev->wm_next = wmp->wm_next;
29883 	}
29884 
29885 	if (wmp->wm_next) {
29886 		wmp->wm_next->wm_prev = wmp->wm_prev;
29887 	}
29888 
29889 	wmp->wm_next = wmp->wm_prev = NULL;
29890 
29891 	kmem_cache_free(un->un_wm_cache, wmp);
29892 }
29893 
29894 
29895 /*
29896  *    Function: sd_range_unlock()
29897  *
29898  * Description: Unlock the range locked by wm.
29899  *		Free write map if nobody else is waiting on it.
29900  *
29901  *   Arguments: un      - sd_lun structure for the device.
29902  *              wmp     - sd_w_map which needs to be unlinked.
29903  */
29904 
29905 static void
29906 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29907 {
29908 	ASSERT(un != NULL);
29909 	ASSERT(wm != NULL);
29910 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29911 
29912 	mutex_enter(SD_MUTEX(un));
29913 
29914 	if (wm->wm_flags & SD_WTYPE_RMW) {
29915 		un->un_rmw_count--;
29916 	}
29917 
29918 	if (wm->wm_wanted_count) {
29919 		wm->wm_flags = 0;
29920 		/*
29921 		 * Broadcast that the wmap is available now.
29922 		 */
29923 		cv_broadcast(&wm->wm_avail);
29924 	} else {
29925 		/*
29926 		 * If no one is waiting on the map, it should be free'ed.
29927 		 */
29928 		sd_free_inlist_wmap(un, wm);
29929 	}
29930 
29931 	mutex_exit(SD_MUTEX(un));
29932 }
29933 
29934 
29935 /*
29936  *    Function: sd_read_modify_write_task
29937  *
29938  * Description: Called from a taskq thread to initiate the write phase of
29939  *		a read-modify-write request.  This is used for targets where
29940  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29941  *
29942  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29943  *
29944  *     Context: Called under taskq thread context.
29945  */
29946 
29947 static void
29948 sd_read_modify_write_task(void *arg)
29949 {
29950 	struct sd_mapblocksize_info	*bsp;
29951 	struct buf	*bp;
29952 	struct sd_xbuf	*xp;
29953 	struct sd_lun	*un;
29954 
29955 	bp = arg;	/* The bp is given in arg */
29956 	ASSERT(bp != NULL);
29957 
29958 	/* Get the pointer to the layer-private data struct */
29959 	xp = SD_GET_XBUF(bp);
29960 	ASSERT(xp != NULL);
29961 	bsp = xp->xb_private;
29962 	ASSERT(bsp != NULL);
29963 
29964 	un = SD_GET_UN(bp);
29965 	ASSERT(un != NULL);
29966 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29967 
29968 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29969 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29970 
29971 	/*
29972 	 * This is the write phase of a read-modify-write request, called
29973 	 * under the context of a taskq thread in response to the completion
29974 	 * of the read portion of the rmw request completing under interrupt
29975 	 * context. The write request must be sent from here down the iostart
29976 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29977 	 * we use the layer index saved in the layer-private data area.
29978 	 */
29979 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29980 
29981 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29982 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29983 }
29984 
29985 
29986 /*
29987  *    Function: sddump_do_read_of_rmw()
29988  *
29989  * Description: This routine will be called from sddump, If sddump is called
29990  *		with an I/O which not aligned on device blocksize boundary
29991  *		then the write has to be converted to read-modify-write.
29992  *		Do the read part here in order to keep sddump simple.
29993  *		Note - That the sd_mutex is held across the call to this
29994  *		routine.
29995  *
29996  *   Arguments: un	- sd_lun
29997  *		blkno	- block number in terms of media block size.
29998  *		nblk	- number of blocks.
29999  *		bpp	- pointer to pointer to the buf structure. On return
30000  *			from this function, *bpp points to the valid buffer
30001  *			to which the write has to be done.
30002  *
30003  * Return Code: 0 for success or errno-type return code
30004  */
30005 
30006 static int
30007 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
30008     struct buf **bpp)
30009 {
30010 	int err;
30011 	int i;
30012 	int rval;
30013 	struct buf *bp;
30014 	struct scsi_pkt *pkt = NULL;
30015 	uint32_t target_blocksize;
30016 
30017 	ASSERT(un != NULL);
30018 	ASSERT(mutex_owned(SD_MUTEX(un)));
30019 
30020 	target_blocksize = un->un_tgt_blocksize;
30021 
30022 	mutex_exit(SD_MUTEX(un));
30023 
30024 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
30025 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
30026 	if (bp == NULL) {
30027 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30028 		    "no resources for dumping; giving up");
30029 		err = ENOMEM;
30030 		goto done;
30031 	}
30032 
30033 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
30034 	    blkno, nblk);
30035 	if (rval != 0) {
30036 		scsi_free_consistent_buf(bp);
30037 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30038 		    "no resources for dumping; giving up");
30039 		err = ENOMEM;
30040 		goto done;
30041 	}
30042 
30043 	pkt->pkt_flags |= FLAG_NOINTR;
30044 
30045 	err = EIO;
30046 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
30047 
30048 		/*
30049 		 * Scsi_poll returns 0 (success) if the command completes and
30050 		 * the status block is STATUS_GOOD.  We should only check
30051 		 * errors if this condition is not true.  Even then we should
30052 		 * send our own request sense packet only if we have a check
30053 		 * condition and auto request sense has not been performed by
30054 		 * the hba.
30055 		 */
30056 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
30057 
30058 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
30059 			err = 0;
30060 			break;
30061 		}
30062 
30063 		/*
30064 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
30065 		 * no need to read RQS data.
30066 		 */
30067 		if (pkt->pkt_reason == CMD_DEV_GONE) {
30068 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30069 			    "Error while dumping state with rmw..."
30070 			    "Device is gone\n");
30071 			break;
30072 		}
30073 
30074 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
30075 			SD_INFO(SD_LOG_DUMP, un,
30076 			    "sddump: read failed with CHECK, try # %d\n", i);
30077 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
30078 				(void) sd_send_polled_RQS(un);
30079 			}
30080 
30081 			continue;
30082 		}
30083 
30084 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
30085 			int reset_retval = 0;
30086 
30087 			SD_INFO(SD_LOG_DUMP, un,
30088 			    "sddump: read failed with BUSY, try # %d\n", i);
30089 
30090 			if (un->un_f_lun_reset_enabled == TRUE) {
30091 				reset_retval = scsi_reset(SD_ADDRESS(un),
30092 				    RESET_LUN);
30093 			}
30094 			if (reset_retval == 0) {
30095 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
30096 			}
30097 			(void) sd_send_polled_RQS(un);
30098 
30099 		} else {
30100 			SD_INFO(SD_LOG_DUMP, un,
30101 			    "sddump: read failed with 0x%x, try # %d\n",
30102 			    SD_GET_PKT_STATUS(pkt), i);
30103 			mutex_enter(SD_MUTEX(un));
30104 			sd_reset_target(un, pkt);
30105 			mutex_exit(SD_MUTEX(un));
30106 		}
30107 
30108 		/*
30109 		 * If we are not getting anywhere with lun/target resets,
30110 		 * let's reset the bus.
30111 		 */
30112 		if (i > SD_NDUMP_RETRIES / 2) {
30113 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
30114 			(void) sd_send_polled_RQS(un);
30115 		}
30116 
30117 	}
30118 	scsi_destroy_pkt(pkt);
30119 
30120 	if (err != 0) {
30121 		scsi_free_consistent_buf(bp);
30122 		*bpp = NULL;
30123 	} else {
30124 		*bpp = bp;
30125 	}
30126 
30127 done:
30128 	mutex_enter(SD_MUTEX(un));
30129 	return (err);
30130 }
30131 
30132 
30133 /*
30134  *    Function: sd_failfast_flushq
30135  *
30136  * Description: Take all bp's on the wait queue that have B_FAILFAST set
30137  *		in b_flags and move them onto the failfast queue, then kick
30138  *		off a thread to return all bp's on the failfast queue to
30139  *		their owners with an error set.
30140  *
30141  *   Arguments: un - pointer to the soft state struct for the instance.
30142  *
30143  *     Context: may execute in interrupt context.
30144  */
30145 
30146 static void
30147 sd_failfast_flushq(struct sd_lun *un)
30148 {
30149 	struct buf *bp;
30150 	struct buf *next_waitq_bp;
30151 	struct buf *prev_waitq_bp = NULL;
30152 
30153 	ASSERT(un != NULL);
30154 	ASSERT(mutex_owned(SD_MUTEX(un)));
30155 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
30156 	ASSERT(un->un_failfast_bp == NULL);
30157 
30158 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30159 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
30160 
30161 	/*
30162 	 * Check if we should flush all bufs when entering failfast state, or
30163 	 * just those with B_FAILFAST set.
30164 	 */
30165 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
30166 		/*
30167 		 * Move *all* bp's on the wait queue to the failfast flush
30168 		 * queue, including those that do NOT have B_FAILFAST set.
30169 		 */
30170 		if (un->un_failfast_headp == NULL) {
30171 			ASSERT(un->un_failfast_tailp == NULL);
30172 			un->un_failfast_headp = un->un_waitq_headp;
30173 		} else {
30174 			ASSERT(un->un_failfast_tailp != NULL);
30175 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
30176 		}
30177 
30178 		un->un_failfast_tailp = un->un_waitq_tailp;
30179 
30180 		/* update kstat for each bp moved out of the waitq */
30181 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
30182 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30183 		}
30184 
30185 		/* empty the waitq */
30186 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
30187 
30188 	} else {
30189 		/*
30190 		 * Go thru the wait queue, pick off all entries with
30191 		 * B_FAILFAST set, and move these onto the failfast queue.
30192 		 */
30193 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
30194 			/*
30195 			 * Save the pointer to the next bp on the wait queue,
30196 			 * so we get to it on the next iteration of this loop.
30197 			 */
30198 			next_waitq_bp = bp->av_forw;
30199 
30200 			/*
30201 			 * If this bp from the wait queue does NOT have
30202 			 * B_FAILFAST set, just move on to the next element
30203 			 * in the wait queue. Note, this is the only place
30204 			 * where it is correct to set prev_waitq_bp.
30205 			 */
30206 			if ((bp->b_flags & B_FAILFAST) == 0) {
30207 				prev_waitq_bp = bp;
30208 				continue;
30209 			}
30210 
30211 			/*
30212 			 * Remove the bp from the wait queue.
30213 			 */
30214 			if (bp == un->un_waitq_headp) {
30215 				/* The bp is the first element of the waitq. */
30216 				un->un_waitq_headp = next_waitq_bp;
30217 				if (un->un_waitq_headp == NULL) {
30218 					/* The wait queue is now empty */
30219 					un->un_waitq_tailp = NULL;
30220 				}
30221 			} else {
30222 				/*
30223 				 * The bp is either somewhere in the middle
30224 				 * or at the end of the wait queue.
30225 				 */
30226 				ASSERT(un->un_waitq_headp != NULL);
30227 				ASSERT(prev_waitq_bp != NULL);
30228 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
30229 				    == 0);
30230 				if (bp == un->un_waitq_tailp) {
30231 					/* bp is the last entry on the waitq. */
30232 					ASSERT(next_waitq_bp == NULL);
30233 					un->un_waitq_tailp = prev_waitq_bp;
30234 				}
30235 				prev_waitq_bp->av_forw = next_waitq_bp;
30236 			}
30237 			bp->av_forw = NULL;
30238 
30239 			/*
30240 			 * update kstat since the bp is moved out of
30241 			 * the waitq
30242 			 */
30243 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30244 
30245 			/*
30246 			 * Now put the bp onto the failfast queue.
30247 			 */
30248 			if (un->un_failfast_headp == NULL) {
30249 				/* failfast queue is currently empty */
30250 				ASSERT(un->un_failfast_tailp == NULL);
30251 				un->un_failfast_headp =
30252 				    un->un_failfast_tailp = bp;
30253 			} else {
30254 				/* Add the bp to the end of the failfast q */
30255 				ASSERT(un->un_failfast_tailp != NULL);
30256 				ASSERT(un->un_failfast_tailp->b_flags &
30257 				    B_FAILFAST);
30258 				un->un_failfast_tailp->av_forw = bp;
30259 				un->un_failfast_tailp = bp;
30260 			}
30261 		}
30262 	}
30263 
30264 	/*
30265 	 * Now return all bp's on the failfast queue to their owners.
30266 	 */
30267 	while ((bp = un->un_failfast_headp) != NULL) {
30268 
30269 		un->un_failfast_headp = bp->av_forw;
30270 		if (un->un_failfast_headp == NULL) {
30271 			un->un_failfast_tailp = NULL;
30272 		}
30273 
30274 		/*
30275 		 * We want to return the bp with a failure error code, but
30276 		 * we do not want a call to sd_start_cmds() to occur here,
30277 		 * so use sd_return_failed_command_no_restart() instead of
30278 		 * sd_return_failed_command().
30279 		 */
30280 		sd_return_failed_command_no_restart(un, bp, EIO);
30281 	}
30282 
30283 	/* Flush the xbuf queues if required. */
30284 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
30285 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
30286 	}
30287 
30288 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30289 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
30290 }
30291 
30292 
30293 /*
30294  *    Function: sd_failfast_flushq_callback
30295  *
30296  * Description: Return TRUE if the given bp meets the criteria for failfast
30297  *		flushing. Used with ddi_xbuf_flushq(9F).
30298  *
30299  *   Arguments: bp - ptr to buf struct to be examined.
30300  *
30301  *     Context: Any
30302  */
30303 
30304 static int
30305 sd_failfast_flushq_callback(struct buf *bp)
30306 {
30307 	/*
30308 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
30309 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
30310 	 */
30311 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
30312 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
30313 }
30314 
30315 
30316 
30317 /*
30318  * Function: sd_setup_next_xfer
30319  *
30320  * Description: Prepare next I/O operation using DMA_PARTIAL
30321  *
30322  */
30323 
30324 static int
30325 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
30326     struct scsi_pkt *pkt, struct sd_xbuf *xp)
30327 {
30328 	ssize_t	num_blks_not_xfered;
30329 	daddr_t	strt_blk_num;
30330 	ssize_t	bytes_not_xfered;
30331 	int	rval;
30332 
30333 	ASSERT(pkt->pkt_resid == 0);
30334 
30335 	/*
30336 	 * Calculate next block number and amount to be transferred.
30337 	 *
30338 	 * How much data NOT transfered to the HBA yet.
30339 	 */
30340 	bytes_not_xfered = xp->xb_dma_resid;
30341 
30342 	/*
30343 	 * figure how many blocks NOT transfered to the HBA yet.
30344 	 */
30345 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
30346 
30347 	/*
30348 	 * set starting block number to the end of what WAS transfered.
30349 	 */
30350 	strt_blk_num = xp->xb_blkno +
30351 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
30352 
30353 	/*
30354 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
30355 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
30356 	 * the disk mutex here.
30357 	 */
30358 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
30359 	    strt_blk_num, num_blks_not_xfered);
30360 
30361 	if (rval == 0) {
30362 
30363 		/*
30364 		 * Success.
30365 		 *
30366 		 * Adjust things if there are still more blocks to be
30367 		 * transfered.
30368 		 */
30369 		xp->xb_dma_resid = pkt->pkt_resid;
30370 		pkt->pkt_resid = 0;
30371 
30372 		return (1);
30373 	}
30374 
30375 	/*
30376 	 * There's really only one possible return value from
30377 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
30378 	 * returns NULL.
30379 	 */
30380 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
30381 
30382 	bp->b_resid = bp->b_bcount;
30383 	bp->b_flags |= B_ERROR;
30384 
30385 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30386 	    "Error setting up next portion of DMA transfer\n");
30387 
30388 	return (0);
30389 }
30390 
30391 /*
30392  *    Function: sd_panic_for_res_conflict
30393  *
30394  * Description: Call panic with a string formatted with "Reservation Conflict"
30395  *		and a human readable identifier indicating the SD instance
30396  *		that experienced the reservation conflict.
30397  *
30398  *   Arguments: un - pointer to the soft state struct for the instance.
30399  *
30400  *     Context: may execute in interrupt context.
30401  */
30402 
30403 #define	SD_RESV_CONFLICT_FMT_LEN 40
30404 void
30405 sd_panic_for_res_conflict(struct sd_lun *un)
30406 {
30407 	char panic_str[SD_RESV_CONFLICT_FMT_LEN + MAXPATHLEN];
30408 	char path_str[MAXPATHLEN];
30409 
30410 	(void) snprintf(panic_str, sizeof (panic_str),
30411 	    "Reservation Conflict\nDisk: %s",
30412 	    ddi_pathname(SD_DEVINFO(un), path_str));
30413 
30414 	panic(panic_str);
30415 }
30416 
30417 /*
30418  * Note: The following sd_faultinjection_ioctl( ) routines implement
30419  * driver support for handling fault injection for error analysis
30420  * causing faults in multiple layers of the driver.
30421  *
30422  */
30423 
30424 #ifdef SD_FAULT_INJECTION
30425 static uint_t   sd_fault_injection_on = 0;
30426 
30427 /*
30428  *    Function: sd_faultinjection_ioctl()
30429  *
30430  * Description: This routine is the driver entry point for handling
30431  *              faultinjection ioctls to inject errors into the
30432  *              layer model
30433  *
30434  *   Arguments: cmd	- the ioctl cmd received
30435  *		arg	- the arguments from user and returns
30436  */
30437 
30438 static void
30439 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un)
30440 {
30441 	uint_t i = 0;
30442 	uint_t rval;
30443 
30444 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
30445 
30446 	mutex_enter(SD_MUTEX(un));
30447 
30448 	switch (cmd) {
30449 	case SDIOCRUN:
30450 		/* Allow pushed faults to be injected */
30451 		SD_INFO(SD_LOG_SDTEST, un,
30452 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
30453 
30454 		sd_fault_injection_on = 1;
30455 
30456 		SD_INFO(SD_LOG_IOERR, un,
30457 		    "sd_faultinjection_ioctl: run finished\n");
30458 		break;
30459 
30460 	case SDIOCSTART:
30461 		/* Start Injection Session */
30462 		SD_INFO(SD_LOG_SDTEST, un,
30463 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
30464 
30465 		sd_fault_injection_on = 0;
30466 		un->sd_injection_mask = 0xFFFFFFFF;
30467 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30468 			un->sd_fi_fifo_pkt[i] = NULL;
30469 			un->sd_fi_fifo_xb[i] = NULL;
30470 			un->sd_fi_fifo_un[i] = NULL;
30471 			un->sd_fi_fifo_arq[i] = NULL;
30472 		}
30473 		un->sd_fi_fifo_start = 0;
30474 		un->sd_fi_fifo_end = 0;
30475 
30476 		mutex_enter(&(un->un_fi_mutex));
30477 		un->sd_fi_log[0] = '\0';
30478 		un->sd_fi_buf_len = 0;
30479 		mutex_exit(&(un->un_fi_mutex));
30480 
30481 		SD_INFO(SD_LOG_IOERR, un,
30482 		    "sd_faultinjection_ioctl: start finished\n");
30483 		break;
30484 
30485 	case SDIOCSTOP:
30486 		/* Stop Injection Session */
30487 		SD_INFO(SD_LOG_SDTEST, un,
30488 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30489 		sd_fault_injection_on = 0;
30490 		un->sd_injection_mask = 0x0;
30491 
30492 		/* Empty stray or unuseds structs from fifo */
30493 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30494 			if (un->sd_fi_fifo_pkt[i] != NULL) {
30495 				kmem_free(un->sd_fi_fifo_pkt[i],
30496 				    sizeof (struct sd_fi_pkt));
30497 			}
30498 			if (un->sd_fi_fifo_xb[i] != NULL) {
30499 				kmem_free(un->sd_fi_fifo_xb[i],
30500 				    sizeof (struct sd_fi_xb));
30501 			}
30502 			if (un->sd_fi_fifo_un[i] != NULL) {
30503 				kmem_free(un->sd_fi_fifo_un[i],
30504 				    sizeof (struct sd_fi_un));
30505 			}
30506 			if (un->sd_fi_fifo_arq[i] != NULL) {
30507 				kmem_free(un->sd_fi_fifo_arq[i],
30508 				    sizeof (struct sd_fi_arq));
30509 			}
30510 			un->sd_fi_fifo_pkt[i] = NULL;
30511 			un->sd_fi_fifo_un[i] = NULL;
30512 			un->sd_fi_fifo_xb[i] = NULL;
30513 			un->sd_fi_fifo_arq[i] = NULL;
30514 		}
30515 		un->sd_fi_fifo_start = 0;
30516 		un->sd_fi_fifo_end = 0;
30517 
30518 		SD_INFO(SD_LOG_IOERR, un,
30519 		    "sd_faultinjection_ioctl: stop finished\n");
30520 		break;
30521 
30522 	case SDIOCINSERTPKT:
30523 		/* Store a packet struct to be pushed onto fifo */
30524 		SD_INFO(SD_LOG_SDTEST, un,
30525 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30526 
30527 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30528 
30529 		sd_fault_injection_on = 0;
30530 
30531 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30532 		if (un->sd_fi_fifo_pkt[i] != NULL) {
30533 			kmem_free(un->sd_fi_fifo_pkt[i],
30534 			    sizeof (struct sd_fi_pkt));
30535 		}
30536 		if (arg != (uintptr_t)NULL) {
30537 			un->sd_fi_fifo_pkt[i] =
30538 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30539 			if (un->sd_fi_fifo_pkt[i] == NULL) {
30540 				/* Alloc failed don't store anything */
30541 				break;
30542 			}
30543 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30544 			    sizeof (struct sd_fi_pkt), 0);
30545 			if (rval == -1) {
30546 				kmem_free(un->sd_fi_fifo_pkt[i],
30547 				    sizeof (struct sd_fi_pkt));
30548 				un->sd_fi_fifo_pkt[i] = NULL;
30549 			}
30550 		} else {
30551 			SD_INFO(SD_LOG_IOERR, un,
30552 			    "sd_faultinjection_ioctl: pkt null\n");
30553 		}
30554 		break;
30555 
30556 	case SDIOCINSERTXB:
30557 		/* Store a xb struct to be pushed onto fifo */
30558 		SD_INFO(SD_LOG_SDTEST, un,
30559 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30560 
30561 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30562 
30563 		sd_fault_injection_on = 0;
30564 
30565 		if (un->sd_fi_fifo_xb[i] != NULL) {
30566 			kmem_free(un->sd_fi_fifo_xb[i],
30567 			    sizeof (struct sd_fi_xb));
30568 			un->sd_fi_fifo_xb[i] = NULL;
30569 		}
30570 		if (arg != (uintptr_t)NULL) {
30571 			un->sd_fi_fifo_xb[i] =
30572 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30573 			if (un->sd_fi_fifo_xb[i] == NULL) {
30574 				/* Alloc failed don't store anything */
30575 				break;
30576 			}
30577 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30578 			    sizeof (struct sd_fi_xb), 0);
30579 
30580 			if (rval == -1) {
30581 				kmem_free(un->sd_fi_fifo_xb[i],
30582 				    sizeof (struct sd_fi_xb));
30583 				un->sd_fi_fifo_xb[i] = NULL;
30584 			}
30585 		} else {
30586 			SD_INFO(SD_LOG_IOERR, un,
30587 			    "sd_faultinjection_ioctl: xb null\n");
30588 		}
30589 		break;
30590 
30591 	case SDIOCINSERTUN:
30592 		/* Store a un struct to be pushed onto fifo */
30593 		SD_INFO(SD_LOG_SDTEST, un,
30594 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30595 
30596 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30597 
30598 		sd_fault_injection_on = 0;
30599 
30600 		if (un->sd_fi_fifo_un[i] != NULL) {
30601 			kmem_free(un->sd_fi_fifo_un[i],
30602 			    sizeof (struct sd_fi_un));
30603 			un->sd_fi_fifo_un[i] = NULL;
30604 		}
30605 		if (arg != (uintptr_t)NULL) {
30606 			un->sd_fi_fifo_un[i] =
30607 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30608 			if (un->sd_fi_fifo_un[i] == NULL) {
30609 				/* Alloc failed don't store anything */
30610 				break;
30611 			}
30612 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30613 			    sizeof (struct sd_fi_un), 0);
30614 			if (rval == -1) {
30615 				kmem_free(un->sd_fi_fifo_un[i],
30616 				    sizeof (struct sd_fi_un));
30617 				un->sd_fi_fifo_un[i] = NULL;
30618 			}
30619 
30620 		} else {
30621 			SD_INFO(SD_LOG_IOERR, un,
30622 			    "sd_faultinjection_ioctl: un null\n");
30623 		}
30624 
30625 		break;
30626 
30627 	case SDIOCINSERTARQ:
30628 		/* Store a arq struct to be pushed onto fifo */
30629 		SD_INFO(SD_LOG_SDTEST, un,
30630 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30631 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30632 
30633 		sd_fault_injection_on = 0;
30634 
30635 		if (un->sd_fi_fifo_arq[i] != NULL) {
30636 			kmem_free(un->sd_fi_fifo_arq[i],
30637 			    sizeof (struct sd_fi_arq));
30638 			un->sd_fi_fifo_arq[i] = NULL;
30639 		}
30640 		if (arg != (uintptr_t)NULL) {
30641 			un->sd_fi_fifo_arq[i] =
30642 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30643 			if (un->sd_fi_fifo_arq[i] == NULL) {
30644 				/* Alloc failed don't store anything */
30645 				break;
30646 			}
30647 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30648 			    sizeof (struct sd_fi_arq), 0);
30649 			if (rval == -1) {
30650 				kmem_free(un->sd_fi_fifo_arq[i],
30651 				    sizeof (struct sd_fi_arq));
30652 				un->sd_fi_fifo_arq[i] = NULL;
30653 			}
30654 
30655 		} else {
30656 			SD_INFO(SD_LOG_IOERR, un,
30657 			    "sd_faultinjection_ioctl: arq null\n");
30658 		}
30659 
30660 		break;
30661 
30662 	case SDIOCPUSH:
30663 		/* Push stored xb, pkt, un, and arq onto fifo */
30664 		sd_fault_injection_on = 0;
30665 
30666 		if (arg != (uintptr_t)NULL) {
30667 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30668 			if (rval != -1 &&
30669 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30670 				un->sd_fi_fifo_end += i;
30671 			}
30672 		} else {
30673 			SD_INFO(SD_LOG_IOERR, un,
30674 			    "sd_faultinjection_ioctl: push arg null\n");
30675 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30676 				un->sd_fi_fifo_end++;
30677 			}
30678 		}
30679 		SD_INFO(SD_LOG_IOERR, un,
30680 		    "sd_faultinjection_ioctl: push to end=%d\n",
30681 		    un->sd_fi_fifo_end);
30682 		break;
30683 
30684 	case SDIOCRETRIEVE:
30685 		/* Return buffer of log from Injection session */
30686 		SD_INFO(SD_LOG_SDTEST, un,
30687 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30688 
30689 		sd_fault_injection_on = 0;
30690 
30691 		mutex_enter(&(un->un_fi_mutex));
30692 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30693 		    un->sd_fi_buf_len+1, 0);
30694 		mutex_exit(&(un->un_fi_mutex));
30695 
30696 		if (rval == -1) {
30697 			/*
30698 			 * arg is possibly invalid setting
30699 			 * it to NULL for return
30700 			 */
30701 			arg = (uintptr_t)NULL;
30702 		}
30703 		break;
30704 	}
30705 
30706 	mutex_exit(SD_MUTEX(un));
30707 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: exit\n");
30708 }
30709 
30710 
30711 /*
30712  *    Function: sd_injection_log()
30713  *
30714  * Description: This routine adds buff to the already existing injection log
30715  *              for retrieval via faultinjection_ioctl for use in fault
30716  *              detection and recovery
30717  *
30718  *   Arguments: buf - the string to add to the log
30719  */
30720 
30721 static void
30722 sd_injection_log(char *buf, struct sd_lun *un)
30723 {
30724 	uint_t len;
30725 
30726 	ASSERT(un != NULL);
30727 	ASSERT(buf != NULL);
30728 
30729 	mutex_enter(&(un->un_fi_mutex));
30730 
30731 	len = min(strlen(buf), 255);
30732 	/* Add logged value to Injection log to be returned later */
30733 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30734 		uint_t	offset = strlen((char *)un->sd_fi_log);
30735 		char *destp = (char *)un->sd_fi_log + offset;
30736 		int i;
30737 		for (i = 0; i < len; i++) {
30738 			*destp++ = *buf++;
30739 		}
30740 		un->sd_fi_buf_len += len;
30741 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30742 	}
30743 
30744 	mutex_exit(&(un->un_fi_mutex));
30745 }
30746 
30747 
30748 /*
30749  *    Function: sd_faultinjection()
30750  *
30751  * Description: This routine takes the pkt and changes its
30752  *		content based on error injection scenerio.
30753  *
30754  *   Arguments: pktp	- packet to be changed
30755  */
30756 
30757 static void
30758 sd_faultinjection(struct scsi_pkt *pktp)
30759 {
30760 	uint_t i;
30761 	struct sd_fi_pkt *fi_pkt;
30762 	struct sd_fi_xb *fi_xb;
30763 	struct sd_fi_un *fi_un;
30764 	struct sd_fi_arq *fi_arq;
30765 	struct buf *bp;
30766 	struct sd_xbuf *xb;
30767 	struct sd_lun *un;
30768 
30769 	ASSERT(pktp != NULL);
30770 
30771 	/* pull bp xb and un from pktp */
30772 	bp = (struct buf *)pktp->pkt_private;
30773 	xb = SD_GET_XBUF(bp);
30774 	un = SD_GET_UN(bp);
30775 
30776 	ASSERT(un != NULL);
30777 
30778 	mutex_enter(SD_MUTEX(un));
30779 
30780 	SD_TRACE(SD_LOG_SDTEST, un,
30781 	    "sd_faultinjection: entry Injection from sdintr\n");
30782 
30783 	/* if injection is off return */
30784 	if (sd_fault_injection_on == 0 ||
30785 	    un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30786 		mutex_exit(SD_MUTEX(un));
30787 		return;
30788 	}
30789 
30790 	SD_INFO(SD_LOG_SDTEST, un,
30791 	    "sd_faultinjection: is working for copying\n");
30792 
30793 	/* take next set off fifo */
30794 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30795 
30796 	fi_pkt = un->sd_fi_fifo_pkt[i];
30797 	fi_xb = un->sd_fi_fifo_xb[i];
30798 	fi_un = un->sd_fi_fifo_un[i];
30799 	fi_arq = un->sd_fi_fifo_arq[i];
30800 
30801 
30802 	/* set variables accordingly */
30803 	/* set pkt if it was on fifo */
30804 	if (fi_pkt != NULL) {
30805 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30806 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30807 		if (fi_pkt->pkt_cdbp != 0xff)
30808 			SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30809 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30810 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30811 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30812 
30813 	}
30814 	/* set xb if it was on fifo */
30815 	if (fi_xb != NULL) {
30816 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30817 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30818 		if (fi_xb->xb_retry_count != 0)
30819 			SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30820 		SD_CONDSET(xb, xb, xb_victim_retry_count,
30821 		    "xb_victim_retry_count");
30822 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30823 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30824 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30825 
30826 		/* copy in block data from sense */
30827 		/*
30828 		 * if (fi_xb->xb_sense_data[0] != -1) {
30829 		 *	bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30830 		 *	SENSE_LENGTH);
30831 		 * }
30832 		 */
30833 		bcopy(fi_xb->xb_sense_data, xb->xb_sense_data, SENSE_LENGTH);
30834 
30835 		/* copy in extended sense codes */
30836 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30837 		    xb, es_code, "es_code");
30838 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30839 		    xb, es_key, "es_key");
30840 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30841 		    xb, es_add_code, "es_add_code");
30842 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30843 		    xb, es_qual_code, "es_qual_code");
30844 		struct scsi_extended_sense *esp;
30845 		esp = (struct scsi_extended_sense *)xb->xb_sense_data;
30846 		esp->es_class = CLASS_EXTENDED_SENSE;
30847 	}
30848 
30849 	/* set un if it was on fifo */
30850 	if (fi_un != NULL) {
30851 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30852 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30853 		SD_CONDSET(un, un, un_reset_retry_count,
30854 		    "un_reset_retry_count");
30855 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30856 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30857 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30858 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30859 		    "un_f_allow_bus_device_reset");
30860 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30861 
30862 	}
30863 
30864 	/* copy in auto request sense if it was on fifo */
30865 	if (fi_arq != NULL) {
30866 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30867 	}
30868 
30869 	/* free structs */
30870 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30871 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30872 	}
30873 	if (un->sd_fi_fifo_xb[i] != NULL) {
30874 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30875 	}
30876 	if (un->sd_fi_fifo_un[i] != NULL) {
30877 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30878 	}
30879 	if (un->sd_fi_fifo_arq[i] != NULL) {
30880 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30881 	}
30882 
30883 	/*
30884 	 * kmem_free does not gurantee to set to NULL
30885 	 * since we uses these to determine if we set
30886 	 * values or not lets confirm they are always
30887 	 * NULL after free
30888 	 */
30889 	un->sd_fi_fifo_pkt[i] = NULL;
30890 	un->sd_fi_fifo_un[i] = NULL;
30891 	un->sd_fi_fifo_xb[i] = NULL;
30892 	un->sd_fi_fifo_arq[i] = NULL;
30893 
30894 	un->sd_fi_fifo_start++;
30895 
30896 	mutex_exit(SD_MUTEX(un));
30897 
30898 	SD_INFO(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30899 }
30900 
30901 #endif /* SD_FAULT_INJECTION */
30902 
30903 /*
30904  * This routine is invoked in sd_unit_attach(). Before calling it, the
30905  * properties in conf file should be processed already, and "hotpluggable"
30906  * property was processed also.
30907  *
30908  * The sd driver distinguishes 3 different type of devices: removable media,
30909  * non-removable media, and hotpluggable. Below the differences are defined:
30910  *
30911  * 1. Device ID
30912  *
30913  *     The device ID of a device is used to identify this device. Refer to
30914  *     ddi_devid_register(9F).
30915  *
30916  *     For a non-removable media disk device which can provide 0x80 or 0x83
30917  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
30918  *     device ID is created to identify this device. For other non-removable
30919  *     media devices, a default device ID is created only if this device has
30920  *     at least 2 alter cylinders. Otherwise, this device has no devid.
30921  *
30922  *     -------------------------------------------------------
30923  *     removable media   hotpluggable  | Can Have Device ID
30924  *     -------------------------------------------------------
30925  *         false             false     |     Yes
30926  *         false             true      |     Yes
30927  *         true                x       |     No
30928  *     ------------------------------------------------------
30929  *
30930  *
30931  * 2. SCSI group 4 commands
30932  *
30933  *     In SCSI specs, only some commands in group 4 command set can use
30934  *     8-byte addresses that can be used to access >2TB storage spaces.
30935  *     Other commands have no such capability. Without supporting group4,
30936  *     it is impossible to make full use of storage spaces of a disk with
30937  *     capacity larger than 2TB.
30938  *
30939  *     -----------------------------------------------
30940  *     removable media   hotpluggable   LP64  |  Group
30941  *     -----------------------------------------------
30942  *           false          false       false |   1
30943  *           false          false       true  |   4
30944  *           false          true        false |   1
30945  *           false          true        true  |   4
30946  *           true             x           x   |   5
30947  *     -----------------------------------------------
30948  *
30949  *
30950  * 3. Check for VTOC Label
30951  *
30952  *     If a direct-access disk has no EFI label, sd will check if it has a
30953  *     valid VTOC label. Now, sd also does that check for removable media
30954  *     and hotpluggable devices.
30955  *
30956  *     --------------------------------------------------------------
30957  *     Direct-Access   removable media    hotpluggable |  Check Label
30958  *     -------------------------------------------------------------
30959  *         false          false           false        |   No
30960  *         false          false           true         |   No
30961  *         false          true            false        |   Yes
30962  *         false          true            true         |   Yes
30963  *         true            x                x          |   Yes
30964  *     --------------------------------------------------------------
30965  *
30966  *
30967  * 4. Building default VTOC label
30968  *
30969  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30970  *     If those devices have no valid VTOC label, sd(7d) will attempt to
30971  *     create default VTOC for them. Currently sd creates default VTOC label
30972  *     for all devices on x86 platform (VTOC_16), but only for removable
30973  *     media devices on SPARC (VTOC_8).
30974  *
30975  *     -----------------------------------------------------------
30976  *       removable media hotpluggable platform   |   Default Label
30977  *     -----------------------------------------------------------
30978  *             false          false    sparc     |     No
30979  *             false          true      x86      |     Yes
30980  *             false          true     sparc     |     Yes
30981  *             true             x        x       |     Yes
30982  *     ----------------------------------------------------------
30983  *
30984  *
30985  * 5. Supported blocksizes of target devices
30986  *
30987  *     Sd supports non-512-byte blocksize for removable media devices only.
30988  *     For other devices, only 512-byte blocksize is supported. This may be
30989  *     changed in near future because some RAID devices require non-512-byte
30990  *     blocksize
30991  *
30992  *     -----------------------------------------------------------
30993  *     removable media    hotpluggable    | non-512-byte blocksize
30994  *     -----------------------------------------------------------
30995  *           false          false         |   No
30996  *           false          true          |   No
30997  *           true             x           |   Yes
30998  *     -----------------------------------------------------------
30999  *
31000  *
31001  * 6. Automatic mount & unmount
31002  *
31003  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
31004  *     if a device is removable media device. It return 1 for removable media
31005  *     devices, and 0 for others.
31006  *
31007  *     The automatic mounting subsystem should distinguish between the types
31008  *     of devices and apply automounting policies to each.
31009  *
31010  *
31011  * 7. fdisk partition management
31012  *
31013  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
31014  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
31015  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
31016  *     fdisk partitions on both x86 and SPARC platform.
31017  *
31018  *     -----------------------------------------------------------
31019  *       platform   removable media  USB/1394  |  fdisk supported
31020  *     -----------------------------------------------------------
31021  *        x86         X               X        |       true
31022  *     ------------------------------------------------------------
31023  *        sparc       X               X        |       false
31024  *     ------------------------------------------------------------
31025  *
31026  *
31027  * 8. MBOOT/MBR
31028  *
31029  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
31030  *     read/write mboot for removable media devices on sparc platform.
31031  *
31032  *     -----------------------------------------------------------
31033  *       platform   removable media  USB/1394  |  mboot supported
31034  *     -----------------------------------------------------------
31035  *        x86         X               X        |       true
31036  *     ------------------------------------------------------------
31037  *        sparc      false           false     |       false
31038  *        sparc      false           true      |       true
31039  *        sparc      true            false     |       true
31040  *        sparc      true            true      |       true
31041  *     ------------------------------------------------------------
31042  *
31043  *
31044  * 9.  error handling during opening device
31045  *
31046  *     If failed to open a disk device, an errno is returned. For some kinds
31047  *     of errors, different errno is returned depending on if this device is
31048  *     a removable media device. This brings USB/1394 hard disks in line with
31049  *     expected hard disk behavior. It is not expected that this breaks any
31050  *     application.
31051  *
31052  *     ------------------------------------------------------
31053  *       removable media    hotpluggable   |  errno
31054  *     ------------------------------------------------------
31055  *             false          false        |   EIO
31056  *             false          true         |   EIO
31057  *             true             x          |   ENXIO
31058  *     ------------------------------------------------------
31059  *
31060  *
31061  * 11. ioctls: DKIOCEJECT, CDROMEJECT
31062  *
31063  *     These IOCTLs are applicable only to removable media devices.
31064  *
31065  *     -----------------------------------------------------------
31066  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
31067  *     -----------------------------------------------------------
31068  *             false          false        |     No
31069  *             false          true         |     No
31070  *             true            x           |     Yes
31071  *     -----------------------------------------------------------
31072  *
31073  *
31074  * 12. Kstats for partitions
31075  *
31076  *     sd creates partition kstat for non-removable media devices. USB and
31077  *     Firewire hard disks now have partition kstats
31078  *
31079  *      ------------------------------------------------------
31080  *       removable media    hotpluggable   |   kstat
31081  *      ------------------------------------------------------
31082  *             false          false        |    Yes
31083  *             false          true         |    Yes
31084  *             true             x          |    No
31085  *       ------------------------------------------------------
31086  *
31087  *
31088  * 13. Removable media & hotpluggable properties
31089  *
31090  *     Sd driver creates a "removable-media" property for removable media
31091  *     devices. Parent nexus drivers create a "hotpluggable" property if
31092  *     it supports hotplugging.
31093  *
31094  *     ---------------------------------------------------------------------
31095  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
31096  *     ---------------------------------------------------------------------
31097  *       false            false       |    No                   No
31098  *       false            true        |    No                   Yes
31099  *       true             false       |    Yes                  No
31100  *       true             true        |    Yes                  Yes
31101  *     ---------------------------------------------------------------------
31102  *
31103  *
31104  * 14. Power Management
31105  *
31106  *     sd only power manages removable media devices or devices that support
31107  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
31108  *
31109  *     A parent nexus that supports hotplugging can also set "pm-capable"
31110  *     if the disk can be power managed.
31111  *
31112  *     ------------------------------------------------------------
31113  *       removable media hotpluggable pm-capable  |   power manage
31114  *     ------------------------------------------------------------
31115  *             false          false     false     |     No
31116  *             false          false     true      |     Yes
31117  *             false          true      false     |     No
31118  *             false          true      true      |     Yes
31119  *             true             x        x        |     Yes
31120  *     ------------------------------------------------------------
31121  *
31122  *      USB and firewire hard disks can now be power managed independently
31123  *      of the framebuffer
31124  *
31125  *
31126  * 15. Support for USB disks with capacity larger than 1TB
31127  *
31128  *     Currently, sd doesn't permit a fixed disk device with capacity
31129  *     larger than 1TB to be used in a 32-bit operating system environment.
31130  *     However, sd doesn't do that for removable media devices. Instead, it
31131  *     assumes that removable media devices cannot have a capacity larger
31132  *     than 1TB. Therefore, using those devices on 32-bit system is partially
31133  *     supported, which can cause some unexpected results.
31134  *
31135  *     ---------------------------------------------------------------------
31136  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
31137  *     ---------------------------------------------------------------------
31138  *             false          false  |   true         |     no
31139  *             false          true   |   true         |     no
31140  *             true           false  |   true         |     Yes
31141  *             true           true   |   true         |     Yes
31142  *     ---------------------------------------------------------------------
31143  *
31144  *
31145  * 16. Check write-protection at open time
31146  *
31147  *     When a removable media device is being opened for writing without NDELAY
31148  *     flag, sd will check if this device is writable. If attempting to open
31149  *     without NDELAY flag a write-protected device, this operation will abort.
31150  *
31151  *     ------------------------------------------------------------
31152  *       removable media    USB/1394   |   WP Check
31153  *     ------------------------------------------------------------
31154  *             false          false    |     No
31155  *             false          true     |     No
31156  *             true           false    |     Yes
31157  *             true           true     |     Yes
31158  *     ------------------------------------------------------------
31159  *
31160  *
31161  * 17. syslog when corrupted VTOC is encountered
31162  *
31163  *      Currently, if an invalid VTOC is encountered, sd only print syslog
31164  *      for fixed SCSI disks.
31165  *     ------------------------------------------------------------
31166  *       removable media    USB/1394   |   print syslog
31167  *     ------------------------------------------------------------
31168  *             false          false    |     Yes
31169  *             false          true     |     No
31170  *             true           false    |     No
31171  *             true           true     |     No
31172  *     ------------------------------------------------------------
31173  */
31174 static void
31175 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
31176 {
31177 	int	pm_cap;
31178 
31179 	ASSERT(un->un_sd);
31180 	ASSERT(un->un_sd->sd_inq);
31181 
31182 	/*
31183 	 * Enable SYNC CACHE support for all devices.
31184 	 */
31185 	un->un_f_sync_cache_supported = TRUE;
31186 
31187 	/*
31188 	 * Set the sync cache required flag to false.
31189 	 * This would ensure that there is no SYNC CACHE
31190 	 * sent when there are no writes
31191 	 */
31192 	un->un_f_sync_cache_required = FALSE;
31193 
31194 	if (un->un_sd->sd_inq->inq_rmb) {
31195 		/*
31196 		 * The media of this device is removable. And for this kind
31197 		 * of devices, it is possible to change medium after opening
31198 		 * devices. Thus we should support this operation.
31199 		 */
31200 		un->un_f_has_removable_media = TRUE;
31201 
31202 		/*
31203 		 * support non-512-byte blocksize of removable media devices
31204 		 */
31205 		un->un_f_non_devbsize_supported = TRUE;
31206 
31207 		/*
31208 		 * Assume that all removable media devices support DOOR_LOCK
31209 		 */
31210 		un->un_f_doorlock_supported = TRUE;
31211 
31212 		/*
31213 		 * For a removable media device, it is possible to be opened
31214 		 * with NDELAY flag when there is no media in drive, in this
31215 		 * case we don't care if device is writable. But if without
31216 		 * NDELAY flag, we need to check if media is write-protected.
31217 		 */
31218 		un->un_f_chk_wp_open = TRUE;
31219 
31220 		/*
31221 		 * need to start a SCSI watch thread to monitor media state,
31222 		 * when media is being inserted or ejected, notify syseventd.
31223 		 */
31224 		un->un_f_monitor_media_state = TRUE;
31225 
31226 		/*
31227 		 * Some devices don't support START_STOP_UNIT command.
31228 		 * Therefore, we'd better check if a device supports it
31229 		 * before sending it.
31230 		 */
31231 		un->un_f_check_start_stop = TRUE;
31232 
31233 		/*
31234 		 * support eject media ioctl:
31235 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
31236 		 */
31237 		un->un_f_eject_media_supported = TRUE;
31238 
31239 		/*
31240 		 * Because many removable-media devices don't support
31241 		 * LOG_SENSE, we couldn't use this command to check if
31242 		 * a removable media device support power-management.
31243 		 * We assume that they support power-management via
31244 		 * START_STOP_UNIT command and can be spun up and down
31245 		 * without limitations.
31246 		 */
31247 		un->un_f_pm_supported = TRUE;
31248 
31249 		/*
31250 		 * Need to create a zero length (Boolean) property
31251 		 * removable-media for the removable media devices.
31252 		 * Note that the return value of the property is not being
31253 		 * checked, since if unable to create the property
31254 		 * then do not want the attach to fail altogether. Consistent
31255 		 * with other property creation in attach.
31256 		 */
31257 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
31258 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
31259 
31260 	} else {
31261 		/*
31262 		 * create device ID for device
31263 		 */
31264 		un->un_f_devid_supported = TRUE;
31265 
31266 		/*
31267 		 * Spin up non-removable-media devices once it is attached
31268 		 */
31269 		un->un_f_attach_spinup = TRUE;
31270 
31271 		/*
31272 		 * According to SCSI specification, Sense data has two kinds of
31273 		 * format: fixed format, and descriptor format. At present, we
31274 		 * don't support descriptor format sense data for removable
31275 		 * media.
31276 		 */
31277 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31278 			un->un_f_descr_format_supported = TRUE;
31279 		}
31280 
31281 		/*
31282 		 * kstats are created only for non-removable media devices.
31283 		 *
31284 		 * Set this in sd.conf to 0 in order to disable kstats.  The
31285 		 * default is 1, so they are enabled by default.
31286 		 */
31287 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
31288 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
31289 		    "enable-partition-kstats", 1));
31290 
31291 		/*
31292 		 * Check if HBA has set the "pm-capable" property.
31293 		 * If "pm-capable" exists and is non-zero then we can
31294 		 * power manage the device without checking the start/stop
31295 		 * cycle count log sense page.
31296 		 *
31297 		 * If "pm-capable" exists and is set to be false (0),
31298 		 * then we should not power manage the device.
31299 		 *
31300 		 * If "pm-capable" doesn't exist then pm_cap will
31301 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
31302 		 * sd will check the start/stop cycle count log sense page
31303 		 * and power manage the device if the cycle count limit has
31304 		 * not been exceeded.
31305 		 */
31306 		pm_cap = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
31307 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
31308 		if (SD_PM_CAPABLE_IS_UNDEFINED(pm_cap)) {
31309 			un->un_f_log_sense_supported = TRUE;
31310 			if (!un->un_f_power_condition_disabled &&
31311 			    SD_INQUIRY(un)->inq_ansi == 6) {
31312 				un->un_f_power_condition_supported = TRUE;
31313 			}
31314 		} else {
31315 			/*
31316 			 * pm-capable property exists.
31317 			 *
31318 			 * Convert "TRUE" values for pm_cap to
31319 			 * SD_PM_CAPABLE_IS_TRUE to make it easier to check
31320 			 * later. "TRUE" values are any values defined in
31321 			 * inquiry.h.
31322 			 */
31323 			if (SD_PM_CAPABLE_IS_FALSE(pm_cap)) {
31324 				un->un_f_log_sense_supported = FALSE;
31325 			} else {
31326 				/* SD_PM_CAPABLE_IS_TRUE case */
31327 				un->un_f_pm_supported = TRUE;
31328 				if (!un->un_f_power_condition_disabled &&
31329 				    SD_PM_CAPABLE_IS_SPC_4(pm_cap)) {
31330 					un->un_f_power_condition_supported =
31331 					    TRUE;
31332 				}
31333 				if (SD_PM_CAP_LOG_SUPPORTED(pm_cap)) {
31334 					un->un_f_log_sense_supported = TRUE;
31335 					un->un_f_pm_log_sense_smart =
31336 					    SD_PM_CAP_SMART_LOG(pm_cap);
31337 				}
31338 			}
31339 
31340 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
31341 			    "sd_unit_attach: un:0x%p pm-capable "
31342 			    "property set to %d.\n", un, un->un_f_pm_supported);
31343 		}
31344 	}
31345 
31346 	if (un->un_f_is_hotpluggable) {
31347 
31348 		/*
31349 		 * Have to watch hotpluggable devices as well, since
31350 		 * that's the only way for userland applications to
31351 		 * detect hot removal while device is busy/mounted.
31352 		 */
31353 		un->un_f_monitor_media_state = TRUE;
31354 
31355 		un->un_f_check_start_stop = TRUE;
31356 
31357 	}
31358 }
31359 
31360 /*
31361  * sd_tg_rdwr:
31362  * Provides rdwr access for cmlb via sd_tgops. The start_block is
31363  * in sys block size, req_length in bytes.
31364  *
31365  */
31366 static int
31367 sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
31368     diskaddr_t start_block, size_t reqlength, void *tg_cookie)
31369 {
31370 	struct sd_lun *un;
31371 	int path_flag = (int)(uintptr_t)tg_cookie;
31372 	char *dkl = NULL;
31373 	diskaddr_t real_addr = start_block;
31374 	diskaddr_t first_byte, end_block;
31375 
31376 	size_t	buffer_size = reqlength;
31377 	int rval = 0;
31378 	diskaddr_t	cap;
31379 	uint32_t	lbasize;
31380 	sd_ssc_t	*ssc;
31381 
31382 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
31383 	if (un == NULL)
31384 		return (ENXIO);
31385 
31386 	if (cmd != TG_READ && cmd != TG_WRITE)
31387 		return (EINVAL);
31388 
31389 	ssc = sd_ssc_init(un);
31390 	mutex_enter(SD_MUTEX(un));
31391 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
31392 		mutex_exit(SD_MUTEX(un));
31393 		rval = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
31394 		    &lbasize, path_flag);
31395 		if (rval != 0)
31396 			goto done1;
31397 		mutex_enter(SD_MUTEX(un));
31398 		sd_update_block_info(un, lbasize, cap);
31399 		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
31400 			mutex_exit(SD_MUTEX(un));
31401 			rval = EIO;
31402 			goto done;
31403 		}
31404 	}
31405 
31406 	if (NOT_DEVBSIZE(un)) {
31407 		/*
31408 		 * sys_blocksize != tgt_blocksize, need to re-adjust
31409 		 * blkno and save the index to beginning of dk_label
31410 		 */
31411 		first_byte  = SD_SYSBLOCKS2BYTES(start_block);
31412 		real_addr = first_byte / un->un_tgt_blocksize;
31413 
31414 		end_block = (first_byte + reqlength +
31415 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
31416 
31417 		/* round up buffer size to multiple of target block size */
31418 		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
31419 
31420 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
31421 		    "label_addr: 0x%x allocation size: 0x%x\n",
31422 		    real_addr, buffer_size);
31423 
31424 		if (((first_byte % un->un_tgt_blocksize) != 0) ||
31425 		    (reqlength % un->un_tgt_blocksize) != 0)
31426 			/* the request is not aligned */
31427 			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
31428 	}
31429 
31430 	/*
31431 	 * The MMC standard allows READ CAPACITY to be
31432 	 * inaccurate by a bounded amount (in the interest of
31433 	 * response latency).  As a result, failed READs are
31434 	 * commonplace (due to the reading of metadata and not
31435 	 * data). Depending on the per-Vendor/drive Sense data,
31436 	 * the failed READ can cause many (unnecessary) retries.
31437 	 */
31438 
31439 	if (ISCD(un) && (cmd == TG_READ) &&
31440 	    (un->un_f_blockcount_is_valid == TRUE) &&
31441 	    ((start_block == (un->un_blockcount - 1)) ||
31442 	    (start_block == (un->un_blockcount - 2)))) {
31443 			path_flag = SD_PATH_DIRECT_PRIORITY;
31444 	}
31445 
31446 	mutex_exit(SD_MUTEX(un));
31447 	if (cmd == TG_READ) {
31448 		rval = sd_send_scsi_READ(ssc, (dkl != NULL) ? dkl : bufaddr,
31449 		    buffer_size, real_addr, path_flag);
31450 		if (dkl != NULL)
31451 			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
31452 			    real_addr), bufaddr, reqlength);
31453 	} else {
31454 		if (dkl) {
31455 			rval = sd_send_scsi_READ(ssc, dkl, buffer_size,
31456 			    real_addr, path_flag);
31457 			if (rval) {
31458 				goto done1;
31459 			}
31460 			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
31461 			    real_addr), reqlength);
31462 		}
31463 		rval = sd_send_scsi_WRITE(ssc, (dkl != NULL) ? dkl : bufaddr,
31464 		    buffer_size, real_addr, path_flag);
31465 	}
31466 
31467 done1:
31468 	if (dkl != NULL)
31469 		kmem_free(dkl, buffer_size);
31470 
31471 	if (rval != 0) {
31472 		if (rval == EIO)
31473 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
31474 		else
31475 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
31476 	}
31477 done:
31478 	sd_ssc_fini(ssc);
31479 	return (rval);
31480 }
31481 
31482 
31483 static int
31484 sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
31485 {
31486 
31487 	struct sd_lun *un;
31488 	diskaddr_t	cap;
31489 	uint32_t	lbasize;
31490 	int		path_flag = (int)(uintptr_t)tg_cookie;
31491 	int		ret = 0;
31492 
31493 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
31494 	if (un == NULL)
31495 		return (ENXIO);
31496 
31497 	switch (cmd) {
31498 	case TG_GETPHYGEOM:
31499 	case TG_GETVIRTGEOM:
31500 	case TG_GETCAPACITY:
31501 	case TG_GETBLOCKSIZE:
31502 		mutex_enter(SD_MUTEX(un));
31503 
31504 		if ((un->un_f_blockcount_is_valid == TRUE) &&
31505 		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
31506 			cap = un->un_blockcount;
31507 			lbasize = un->un_tgt_blocksize;
31508 			mutex_exit(SD_MUTEX(un));
31509 		} else {
31510 			sd_ssc_t	*ssc;
31511 			mutex_exit(SD_MUTEX(un));
31512 			ssc = sd_ssc_init(un);
31513 			ret = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
31514 			    &lbasize, path_flag);
31515 			if (ret != 0) {
31516 				if (ret == EIO)
31517 					sd_ssc_assessment(ssc,
31518 					    SD_FMT_STATUS_CHECK);
31519 				else
31520 					sd_ssc_assessment(ssc,
31521 					    SD_FMT_IGNORE);
31522 				sd_ssc_fini(ssc);
31523 				return (ret);
31524 			}
31525 			sd_ssc_fini(ssc);
31526 			mutex_enter(SD_MUTEX(un));
31527 			sd_update_block_info(un, lbasize, cap);
31528 			if ((un->un_f_blockcount_is_valid == FALSE) ||
31529 			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
31530 				mutex_exit(SD_MUTEX(un));
31531 				return (EIO);
31532 			}
31533 			mutex_exit(SD_MUTEX(un));
31534 		}
31535 
31536 		if (cmd == TG_GETCAPACITY) {
31537 			*(diskaddr_t *)arg = cap;
31538 			return (0);
31539 		}
31540 
31541 		if (cmd == TG_GETBLOCKSIZE) {
31542 			*(uint32_t *)arg = lbasize;
31543 			return (0);
31544 		}
31545 
31546 		if (cmd == TG_GETPHYGEOM)
31547 			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
31548 			    cap, lbasize, path_flag);
31549 		else
31550 			/* TG_GETVIRTGEOM */
31551 			ret = sd_get_virtual_geometry(un,
31552 			    (cmlb_geom_t *)arg, cap, lbasize);
31553 
31554 		return (ret);
31555 
31556 	case TG_GETATTR:
31557 		mutex_enter(SD_MUTEX(un));
31558 		((tg_attribute_t *)arg)->media_is_writable =
31559 		    un->un_f_mmc_writable_media;
31560 		((tg_attribute_t *)arg)->media_is_solid_state =
31561 		    un->un_f_is_solid_state;
31562 		((tg_attribute_t *)arg)->media_is_rotational =
31563 		    un->un_f_is_rotational;
31564 		mutex_exit(SD_MUTEX(un));
31565 		return (0);
31566 	default:
31567 		return (ENOTTY);
31568 
31569 	}
31570 }
31571 
31572 /*
31573  *    Function: sd_ssc_ereport_post
31574  *
31575  * Description: Will be called when SD driver need to post an ereport.
31576  *
31577  *    Context: Kernel thread or interrupt context.
31578  */
31579 
31580 #define	DEVID_IF_KNOWN(d) "devid", DATA_TYPE_STRING, (d) ? (d) : "unknown"
31581 
31582 static void
31583 sd_ssc_ereport_post(sd_ssc_t *ssc, enum sd_driver_assessment drv_assess)
31584 {
31585 	int uscsi_path_instance = 0;
31586 	uchar_t	uscsi_pkt_reason;
31587 	uint32_t uscsi_pkt_state;
31588 	uint32_t uscsi_pkt_statistics;
31589 	uint64_t uscsi_ena;
31590 	uchar_t op_code;
31591 	uint8_t *sensep;
31592 	union scsi_cdb *cdbp;
31593 	uint_t cdblen = 0;
31594 	uint_t senlen = 0;
31595 	struct sd_lun *un;
31596 	dev_info_t *dip;
31597 	char *devid;
31598 	int ssc_invalid_flags = SSC_FLAGS_INVALID_PKT_REASON |
31599 	    SSC_FLAGS_INVALID_STATUS |
31600 	    SSC_FLAGS_INVALID_SENSE |
31601 	    SSC_FLAGS_INVALID_DATA;
31602 	char assessment[16];
31603 
31604 	ASSERT(ssc != NULL);
31605 	ASSERT(ssc->ssc_uscsi_cmd != NULL);
31606 	ASSERT(ssc->ssc_uscsi_info != NULL);
31607 
31608 	un = ssc->ssc_un;
31609 	ASSERT(un != NULL);
31610 
31611 	dip = un->un_sd->sd_dev;
31612 
31613 	/*
31614 	 * Get the devid:
31615 	 *	devid will only be passed to non-transport error reports.
31616 	 */
31617 	devid = DEVI(dip)->devi_devid_str;
31618 
31619 	/*
31620 	 * If we are syncing or dumping, the command will not be executed
31621 	 * so we bypass this situation.
31622 	 */
31623 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
31624 	    (un->un_state == SD_STATE_DUMPING))
31625 		return;
31626 
31627 	uscsi_pkt_reason = ssc->ssc_uscsi_info->ui_pkt_reason;
31628 	uscsi_path_instance = ssc->ssc_uscsi_cmd->uscsi_path_instance;
31629 	uscsi_pkt_state = ssc->ssc_uscsi_info->ui_pkt_state;
31630 	uscsi_pkt_statistics = ssc->ssc_uscsi_info->ui_pkt_statistics;
31631 	uscsi_ena = ssc->ssc_uscsi_info->ui_ena;
31632 
31633 	sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
31634 	cdbp = (union scsi_cdb *)ssc->ssc_uscsi_cmd->uscsi_cdb;
31635 
31636 	/* In rare cases, EG:DOORLOCK, the cdb could be NULL */
31637 	if (cdbp == NULL) {
31638 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
31639 		    "sd_ssc_ereport_post meet empty cdb\n");
31640 		return;
31641 	}
31642 
31643 	op_code = cdbp->scc_cmd;
31644 
31645 	cdblen = (int)ssc->ssc_uscsi_cmd->uscsi_cdblen;
31646 	senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
31647 	    ssc->ssc_uscsi_cmd->uscsi_rqresid);
31648 
31649 	if (senlen > 0)
31650 		ASSERT(sensep != NULL);
31651 
31652 	/*
31653 	 * Initialize drv_assess to corresponding values.
31654 	 * SD_FM_DRV_FATAL will be mapped to "fail" or "fatal" depending
31655 	 * on the sense-key returned back.
31656 	 */
31657 	switch (drv_assess) {
31658 		case SD_FM_DRV_RECOVERY:
31659 			(void) sprintf(assessment, "%s", "recovered");
31660 			break;
31661 		case SD_FM_DRV_RETRY:
31662 			(void) sprintf(assessment, "%s", "retry");
31663 			break;
31664 		case SD_FM_DRV_NOTICE:
31665 			(void) sprintf(assessment, "%s", "info");
31666 			break;
31667 		case SD_FM_DRV_FATAL:
31668 		default:
31669 			(void) sprintf(assessment, "%s", "unknown");
31670 	}
31671 	/*
31672 	 * If drv_assess == SD_FM_DRV_RECOVERY, this should be a recovered
31673 	 * command, we will post ereport.io.scsi.cmd.disk.recovered.
31674 	 * driver-assessment will always be "recovered" here.
31675 	 */
31676 	if (drv_assess == SD_FM_DRV_RECOVERY) {
31677 		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance, NULL,
31678 		    "cmd.disk.recovered", uscsi_ena, devid, NULL,
31679 		    DDI_NOSLEEP, NULL,
31680 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31681 		    DEVID_IF_KNOWN(devid),
31682 		    "driver-assessment", DATA_TYPE_STRING, assessment,
31683 		    "op-code", DATA_TYPE_UINT8, op_code,
31684 		    "cdb", DATA_TYPE_UINT8_ARRAY,
31685 		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31686 		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31687 		    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
31688 		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
31689 		    NULL);
31690 		return;
31691 	}
31692 
31693 	/*
31694 	 * If there is un-expected/un-decodable data, we should post
31695 	 * ereport.io.scsi.cmd.disk.dev.uderr.
31696 	 * driver-assessment will be set based on parameter drv_assess.
31697 	 * SSC_FLAGS_INVALID_SENSE - invalid sense data sent back.
31698 	 * SSC_FLAGS_INVALID_PKT_REASON - invalid pkt-reason encountered.
31699 	 * SSC_FLAGS_INVALID_STATUS - invalid stat-code encountered.
31700 	 * SSC_FLAGS_INVALID_DATA - invalid data sent back.
31701 	 */
31702 	if (ssc->ssc_flags & ssc_invalid_flags) {
31703 		if (ssc->ssc_flags & SSC_FLAGS_INVALID_SENSE) {
31704 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31705 			    NULL, "cmd.disk.dev.uderr", uscsi_ena, devid,
31706 			    NULL, DDI_NOSLEEP, NULL,
31707 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31708 			    DEVID_IF_KNOWN(devid),
31709 			    "driver-assessment", DATA_TYPE_STRING,
31710 			    drv_assess == SD_FM_DRV_FATAL ?
31711 			    "fail" : assessment,
31712 			    "op-code", DATA_TYPE_UINT8, op_code,
31713 			    "cdb", DATA_TYPE_UINT8_ARRAY,
31714 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31715 			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31716 			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
31717 			    "pkt-stats", DATA_TYPE_UINT32,
31718 			    uscsi_pkt_statistics,
31719 			    "stat-code", DATA_TYPE_UINT8,
31720 			    ssc->ssc_uscsi_cmd->uscsi_status,
31721 			    "un-decode-info", DATA_TYPE_STRING,
31722 			    ssc->ssc_info,
31723 			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
31724 			    senlen, sensep,
31725 			    NULL);
31726 		} else {
31727 			/*
31728 			 * For other type of invalid data, the
31729 			 * un-decode-value field would be empty because the
31730 			 * un-decodable content could be seen from upper
31731 			 * level payload or inside un-decode-info.
31732 			 */
31733 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31734 			    NULL,
31735 			    "cmd.disk.dev.uderr", uscsi_ena, devid,
31736 			    NULL, DDI_NOSLEEP, NULL,
31737 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31738 			    DEVID_IF_KNOWN(devid),
31739 			    "driver-assessment", DATA_TYPE_STRING,
31740 			    drv_assess == SD_FM_DRV_FATAL ?
31741 			    "fail" : assessment,
31742 			    "op-code", DATA_TYPE_UINT8, op_code,
31743 			    "cdb", DATA_TYPE_UINT8_ARRAY,
31744 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31745 			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31746 			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
31747 			    "pkt-stats", DATA_TYPE_UINT32,
31748 			    uscsi_pkt_statistics,
31749 			    "stat-code", DATA_TYPE_UINT8,
31750 			    ssc->ssc_uscsi_cmd->uscsi_status,
31751 			    "un-decode-info", DATA_TYPE_STRING,
31752 			    ssc->ssc_info,
31753 			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
31754 			    0, NULL,
31755 			    NULL);
31756 		}
31757 		ssc->ssc_flags &= ~ssc_invalid_flags;
31758 		return;
31759 	}
31760 
31761 	if (uscsi_pkt_reason != CMD_CMPLT ||
31762 	    (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)) {
31763 		/*
31764 		 * pkt-reason != CMD_CMPLT or SSC_FLAGS_TRAN_ABORT was
31765 		 * set inside sd_start_cmds due to errors(bad packet or
31766 		 * fatal transport error), we should take it as a
31767 		 * transport error, so we post ereport.io.scsi.cmd.disk.tran.
31768 		 * driver-assessment will be set based on drv_assess.
31769 		 * We will set devid to NULL because it is a transport
31770 		 * error.
31771 		 */
31772 		if (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)
31773 			ssc->ssc_flags &= ~SSC_FLAGS_TRAN_ABORT;
31774 
31775 		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance, NULL,
31776 		    "cmd.disk.tran", uscsi_ena, NULL, NULL, DDI_NOSLEEP, NULL,
31777 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31778 		    DEVID_IF_KNOWN(devid),
31779 		    "driver-assessment", DATA_TYPE_STRING,
31780 		    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
31781 		    "op-code", DATA_TYPE_UINT8, op_code,
31782 		    "cdb", DATA_TYPE_UINT8_ARRAY,
31783 		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31784 		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31785 		    "pkt-state", DATA_TYPE_UINT8, uscsi_pkt_state,
31786 		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
31787 		    NULL);
31788 	} else {
31789 		/*
31790 		 * If we got here, we have a completed command, and we need
31791 		 * to further investigate the sense data to see what kind
31792 		 * of ereport we should post.
31793 		 * No ereport is needed if sense-key is KEY_RECOVERABLE_ERROR
31794 		 * and asc/ascq is "ATA PASS-THROUGH INFORMATION AVAILABLE".
31795 		 * Post ereport.io.scsi.cmd.disk.dev.rqs.merr if sense-key is
31796 		 * KEY_MEDIUM_ERROR.
31797 		 * Post ereport.io.scsi.cmd.disk.dev.rqs.derr otherwise.
31798 		 * driver-assessment will be set based on the parameter
31799 		 * drv_assess.
31800 		 */
31801 		if (senlen > 0) {
31802 			/*
31803 			 * Here we have sense data available.
31804 			 */
31805 			uint8_t sense_key = scsi_sense_key(sensep);
31806 			uint8_t sense_asc = scsi_sense_asc(sensep);
31807 			uint8_t sense_ascq = scsi_sense_ascq(sensep);
31808 
31809 			if (sense_key == KEY_RECOVERABLE_ERROR &&
31810 			    sense_asc == 0x00 && sense_ascq == 0x1d)
31811 				return;
31812 
31813 			if (sense_key == KEY_MEDIUM_ERROR) {
31814 				/*
31815 				 * driver-assessment should be "fatal" if
31816 				 * drv_assess is SD_FM_DRV_FATAL.
31817 				 */
31818 				scsi_fm_ereport_post(un->un_sd,
31819 				    uscsi_path_instance, NULL,
31820 				    "cmd.disk.dev.rqs.merr",
31821 				    uscsi_ena, devid, NULL, DDI_NOSLEEP, NULL,
31822 				    FM_VERSION, DATA_TYPE_UINT8,
31823 				    FM_EREPORT_VERS0,
31824 				    DEVID_IF_KNOWN(devid),
31825 				    "driver-assessment",
31826 				    DATA_TYPE_STRING,
31827 				    drv_assess == SD_FM_DRV_FATAL ?
31828 				    "fatal" : assessment,
31829 				    "op-code",
31830 				    DATA_TYPE_UINT8, op_code,
31831 				    "cdb",
31832 				    DATA_TYPE_UINT8_ARRAY, cdblen,
31833 				    ssc->ssc_uscsi_cmd->uscsi_cdb,
31834 				    "pkt-reason",
31835 				    DATA_TYPE_UINT8, uscsi_pkt_reason,
31836 				    "pkt-state",
31837 				    DATA_TYPE_UINT8, uscsi_pkt_state,
31838 				    "pkt-stats",
31839 				    DATA_TYPE_UINT32,
31840 				    uscsi_pkt_statistics,
31841 				    "stat-code",
31842 				    DATA_TYPE_UINT8,
31843 				    ssc->ssc_uscsi_cmd->uscsi_status,
31844 				    "key",
31845 				    DATA_TYPE_UINT8,
31846 				    scsi_sense_key(sensep),
31847 				    "asc",
31848 				    DATA_TYPE_UINT8,
31849 				    scsi_sense_asc(sensep),
31850 				    "ascq",
31851 				    DATA_TYPE_UINT8,
31852 				    scsi_sense_ascq(sensep),
31853 				    "sense-data",
31854 				    DATA_TYPE_UINT8_ARRAY,
31855 				    senlen, sensep,
31856 				    "lba",
31857 				    DATA_TYPE_UINT64,
31858 				    ssc->ssc_uscsi_info->ui_lba,
31859 				    NULL);
31860 			} else {
31861 				/*
31862 				 * if sense-key == 0x4(hardware
31863 				 * error), driver-assessment should
31864 				 * be "fatal" if drv_assess is
31865 				 * SD_FM_DRV_FATAL.
31866 				 */
31867 				scsi_fm_ereport_post(un->un_sd,
31868 				    uscsi_path_instance, NULL,
31869 				    "cmd.disk.dev.rqs.derr",
31870 				    uscsi_ena, devid,
31871 				    NULL, DDI_NOSLEEP, NULL,
31872 				    FM_VERSION,
31873 				    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31874 				    DEVID_IF_KNOWN(devid),
31875 				    "driver-assessment",
31876 				    DATA_TYPE_STRING,
31877 				    drv_assess == SD_FM_DRV_FATAL ?
31878 				    (sense_key == 0x4 ?
31879 				    "fatal" : "fail") : assessment,
31880 				    "op-code",
31881 				    DATA_TYPE_UINT8, op_code,
31882 				    "cdb",
31883 				    DATA_TYPE_UINT8_ARRAY, cdblen,
31884 				    ssc->ssc_uscsi_cmd->uscsi_cdb,
31885 				    "pkt-reason",
31886 				    DATA_TYPE_UINT8, uscsi_pkt_reason,
31887 				    "pkt-state",
31888 				    DATA_TYPE_UINT8, uscsi_pkt_state,
31889 				    "pkt-stats",
31890 				    DATA_TYPE_UINT32,
31891 				    uscsi_pkt_statistics,
31892 				    "stat-code",
31893 				    DATA_TYPE_UINT8,
31894 				    ssc->ssc_uscsi_cmd->uscsi_status,
31895 				    "key",
31896 				    DATA_TYPE_UINT8,
31897 				    scsi_sense_key(sensep),
31898 				    "asc",
31899 				    DATA_TYPE_UINT8,
31900 				    scsi_sense_asc(sensep),
31901 				    "ascq",
31902 				    DATA_TYPE_UINT8,
31903 				    scsi_sense_ascq(sensep),
31904 				    "sense-data",
31905 				    DATA_TYPE_UINT8_ARRAY,
31906 				    senlen, sensep,
31907 				    NULL);
31908 			}
31909 		} else {
31910 			/*
31911 			 * For stat_code == STATUS_GOOD, this is not a
31912 			 * hardware error.
31913 			 */
31914 			if (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD)
31915 				return;
31916 
31917 			/*
31918 			 * Post ereport.io.scsi.cmd.disk.dev.serr if we got the
31919 			 * stat-code but with sense data unavailable.
31920 			 * driver-assessment will be set based on parameter
31921 			 * drv_assess.
31922 			 */
31923 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31924 			    NULL,
31925 			    "cmd.disk.dev.serr", uscsi_ena,
31926 			    devid, NULL, DDI_NOSLEEP, NULL,
31927 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31928 			    DEVID_IF_KNOWN(devid),
31929 			    "driver-assessment", DATA_TYPE_STRING,
31930 			    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
31931 			    "op-code", DATA_TYPE_UINT8, op_code,
31932 			    "cdb",
31933 			    DATA_TYPE_UINT8_ARRAY,
31934 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31935 			    "pkt-reason",
31936 			    DATA_TYPE_UINT8, uscsi_pkt_reason,
31937 			    "pkt-state",
31938 			    DATA_TYPE_UINT8, uscsi_pkt_state,
31939 			    "pkt-stats",
31940 			    DATA_TYPE_UINT32, uscsi_pkt_statistics,
31941 			    "stat-code",
31942 			    DATA_TYPE_UINT8,
31943 			    ssc->ssc_uscsi_cmd->uscsi_status,
31944 			    NULL);
31945 		}
31946 	}
31947 }
31948 
31949 /*
31950  *     Function: sd_ssc_extract_info
31951  *
31952  * Description: Extract information available to help generate ereport.
31953  *
31954  *     Context: Kernel thread or interrupt context.
31955  */
31956 static void
31957 sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un, struct scsi_pkt *pktp,
31958     struct buf *bp, struct sd_xbuf *xp)
31959 {
31960 	size_t senlen = 0;
31961 	union scsi_cdb *cdbp;
31962 	int path_instance;
31963 	/*
31964 	 * Need scsi_cdb_size array to determine the cdb length.
31965 	 */
31966 	extern uchar_t	scsi_cdb_size[];
31967 
31968 	ASSERT(un != NULL);
31969 	ASSERT(pktp != NULL);
31970 	ASSERT(bp != NULL);
31971 	ASSERT(xp != NULL);
31972 	ASSERT(ssc != NULL);
31973 	ASSERT(mutex_owned(SD_MUTEX(un)));
31974 
31975 	/*
31976 	 * Transfer the cdb buffer pointer here.
31977 	 */
31978 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
31979 
31980 	ssc->ssc_uscsi_cmd->uscsi_cdblen = scsi_cdb_size[GETGROUP(cdbp)];
31981 	ssc->ssc_uscsi_cmd->uscsi_cdb = (caddr_t)cdbp;
31982 
31983 	/*
31984 	 * Transfer the sense data buffer pointer if sense data is available,
31985 	 * calculate the sense data length first.
31986 	 */
31987 	if ((xp->xb_sense_state & STATE_XARQ_DONE) ||
31988 	    (xp->xb_sense_state & STATE_ARQ_DONE)) {
31989 		/*
31990 		 * For arq case, we will enter here.
31991 		 */
31992 		if (xp->xb_sense_state & STATE_XARQ_DONE) {
31993 			senlen = MAX_SENSE_LENGTH - xp->xb_sense_resid;
31994 		} else {
31995 			senlen = SENSE_LENGTH;
31996 		}
31997 	} else {
31998 		/*
31999 		 * For non-arq case, we will enter this branch.
32000 		 */
32001 		if (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK &&
32002 		    (xp->xb_sense_state & STATE_XFERRED_DATA)) {
32003 			senlen = SENSE_LENGTH - xp->xb_sense_resid;
32004 		}
32005 
32006 	}
32007 
32008 	ssc->ssc_uscsi_cmd->uscsi_rqlen = (senlen & 0xff);
32009 	ssc->ssc_uscsi_cmd->uscsi_rqresid = 0;
32010 	ssc->ssc_uscsi_cmd->uscsi_rqbuf = (caddr_t)xp->xb_sense_data;
32011 
32012 	ssc->ssc_uscsi_cmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
32013 
32014 	/*
32015 	 * Only transfer path_instance when scsi_pkt was properly allocated.
32016 	 */
32017 	path_instance = pktp->pkt_path_instance;
32018 	if (scsi_pkt_allocated_correctly(pktp) && path_instance)
32019 		ssc->ssc_uscsi_cmd->uscsi_path_instance = path_instance;
32020 	else
32021 		ssc->ssc_uscsi_cmd->uscsi_path_instance = 0;
32022 
32023 	/*
32024 	 * Copy in the other fields we may need when posting ereport.
32025 	 */
32026 	ssc->ssc_uscsi_info->ui_pkt_reason = pktp->pkt_reason;
32027 	ssc->ssc_uscsi_info->ui_pkt_state = pktp->pkt_state;
32028 	ssc->ssc_uscsi_info->ui_pkt_statistics = pktp->pkt_statistics;
32029 	ssc->ssc_uscsi_info->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
32030 
32031 	/*
32032 	 * For partially read/write command, we will not create ena
32033 	 * in case of a successful command be reconized as recovered.
32034 	 */
32035 	if ((pktp->pkt_reason == CMD_CMPLT) &&
32036 	    (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD) &&
32037 	    (senlen == 0)) {
32038 		return;
32039 	}
32040 
32041 	/*
32042 	 * To associate ereports of a single command execution flow, we
32043 	 * need a shared ena for a specific command.
32044 	 */
32045 	if (xp->xb_ena == 0)
32046 		xp->xb_ena = fm_ena_generate(0, FM_ENA_FMT1);
32047 	ssc->ssc_uscsi_info->ui_ena = xp->xb_ena;
32048 }
32049 
32050 
32051 /*
32052  *     Function: sd_check_bdc_vpd
32053  *
32054  * Description: Query the optional INQUIRY VPD page 0xb1. If the device
32055  *              supports VPD page 0xb1, sd examines the MEDIUM ROTATION
32056  *              RATE.
32057  *
32058  *		Set the following based on RPM value:
32059  *		= 0	device is not solid state, non-rotational
32060  *		= 1	device is solid state, non-rotational
32061  *		> 1	device is not solid state, rotational
32062  *
32063  *     Context: Kernel thread or interrupt context.
32064  */
32065 
32066 static void
32067 sd_check_bdc_vpd(sd_ssc_t *ssc)
32068 {
32069 	int		rval		= 0;
32070 	uchar_t		*inqb1		= NULL;
32071 	size_t		inqb1_len	= MAX_INQUIRY_SIZE;
32072 	size_t		inqb1_resid	= 0;
32073 	struct sd_lun	*un;
32074 
32075 	ASSERT(ssc != NULL);
32076 	un = ssc->ssc_un;
32077 	ASSERT(un != NULL);
32078 	ASSERT(!mutex_owned(SD_MUTEX(un)));
32079 
32080 	mutex_enter(SD_MUTEX(un));
32081 	un->un_f_is_rotational = TRUE;
32082 	un->un_f_is_solid_state = FALSE;
32083 
32084 	if (ISCD(un)) {
32085 		mutex_exit(SD_MUTEX(un));
32086 		return;
32087 	}
32088 
32089 	if (sd_check_vpd_page_support(ssc) == 0 &&
32090 	    un->un_vpd_page_mask & SD_VPD_DEV_CHARACTER_PG) {
32091 		mutex_exit(SD_MUTEX(un));
32092 		/* collect page b1 data */
32093 		inqb1 = kmem_zalloc(inqb1_len, KM_SLEEP);
32094 
32095 		rval = sd_send_scsi_INQUIRY(ssc, inqb1, inqb1_len,
32096 		    0x01, 0xB1, &inqb1_resid);
32097 
32098 		if (rval == 0 && (inqb1_len - inqb1_resid > 5)) {
32099 			SD_TRACE(SD_LOG_COMMON, un,
32100 			    "sd_check_bdc_vpd: \
32101 			    successfully get VPD page: %x \
32102 			    PAGE LENGTH: %x BYTE 4: %x \
32103 			    BYTE 5: %x", inqb1[1], inqb1[3], inqb1[4],
32104 			    inqb1[5]);
32105 
32106 			mutex_enter(SD_MUTEX(un));
32107 			/*
32108 			 * Check the MEDIUM ROTATION RATE.
32109 			 */
32110 			if (inqb1[4] == 0) {
32111 				if (inqb1[5] == 0) {
32112 					un->un_f_is_rotational = FALSE;
32113 				} else if (inqb1[5] == 1) {
32114 					un->un_f_is_rotational = FALSE;
32115 					un->un_f_is_solid_state = TRUE;
32116 					/*
32117 					 * Solid state drives don't need
32118 					 * disksort.
32119 					 */
32120 					un->un_f_disksort_disabled = TRUE;
32121 				}
32122 			}
32123 			mutex_exit(SD_MUTEX(un));
32124 		} else if (rval != 0) {
32125 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
32126 		}
32127 
32128 		kmem_free(inqb1, inqb1_len);
32129 	} else {
32130 		mutex_exit(SD_MUTEX(un));
32131 	}
32132 }
32133 
32134 /*
32135  *	Function: sd_check_emulation_mode
32136  *
32137  *   Description: Check whether the SSD is at emulation mode
32138  *		  by issuing READ_CAPACITY_16 to see whether
32139  *		  we can get physical block size of the drive.
32140  *
32141  *	 Context: Kernel thread or interrupt context.
32142  */
32143 
32144 static void
32145 sd_check_emulation_mode(sd_ssc_t *ssc)
32146 {
32147 	int		rval = 0;
32148 	uint64_t	capacity;
32149 	uint_t		lbasize;
32150 	uint_t		pbsize;
32151 	int		i;
32152 	int		devid_len;
32153 	struct sd_lun	*un;
32154 
32155 	ASSERT(ssc != NULL);
32156 	un = ssc->ssc_un;
32157 	ASSERT(un != NULL);
32158 	ASSERT(!mutex_owned(SD_MUTEX(un)));
32159 
32160 	mutex_enter(SD_MUTEX(un));
32161 	if (ISCD(un)) {
32162 		mutex_exit(SD_MUTEX(un));
32163 		return;
32164 	}
32165 
32166 	if (un->un_f_descr_format_supported) {
32167 		mutex_exit(SD_MUTEX(un));
32168 		rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize,
32169 		    &pbsize, SD_PATH_DIRECT);
32170 		mutex_enter(SD_MUTEX(un));
32171 
32172 		if (rval != 0) {
32173 			un->un_phy_blocksize = DEV_BSIZE;
32174 		} else {
32175 			if (!ISP2(pbsize % DEV_BSIZE) || pbsize == 0) {
32176 				un->un_phy_blocksize = DEV_BSIZE;
32177 			} else if (pbsize > un->un_phy_blocksize) {
32178 				/*
32179 				 * Don't reset the physical blocksize
32180 				 * unless we've detected a larger value.
32181 				 */
32182 				un->un_phy_blocksize = pbsize;
32183 			}
32184 		}
32185 	}
32186 
32187 	for (i = 0; i < sd_flash_dev_table_size; i++) {
32188 		devid_len = (int)strlen(sd_flash_dev_table[i]);
32189 		if (sd_sdconf_id_match(un, sd_flash_dev_table[i], devid_len)
32190 		    == SD_SUCCESS) {
32191 			un->un_phy_blocksize = SSD_SECSIZE;
32192 			if (un->un_f_is_solid_state &&
32193 			    un->un_phy_blocksize != un->un_tgt_blocksize)
32194 				un->un_f_enable_rmw = TRUE;
32195 		}
32196 	}
32197 
32198 	mutex_exit(SD_MUTEX(un));
32199 }
32200