xref: /illumos-gate/usr/src/uts/common/io/scsi/targets/sd.c (revision 706ba8e2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * SCSI disk target driver.
28  */
29 #include <sys/scsi/scsi.h>
30 #include <sys/dkbad.h>
31 #include <sys/dklabel.h>
32 #include <sys/dkio.h>
33 #include <sys/fdio.h>
34 #include <sys/cdio.h>
35 #include <sys/mhd.h>
36 #include <sys/vtoc.h>
37 #include <sys/dktp/fdisk.h>
38 #include <sys/kstat.h>
39 #include <sys/vtrace.h>
40 #include <sys/note.h>
41 #include <sys/thread.h>
42 #include <sys/proc.h>
43 #include <sys/efi_partition.h>
44 #include <sys/var.h>
45 #include <sys/aio_req.h>
46 
47 #ifdef __lock_lint
48 #define	_LP64
49 #define	__amd64
50 #endif
51 
52 #if (defined(__fibre))
53 /* Note: is there a leadville version of the following? */
54 #include <sys/fc4/fcal_linkapp.h>
55 #endif
56 #include <sys/taskq.h>
57 #include <sys/uuid.h>
58 #include <sys/byteorder.h>
59 #include <sys/sdt.h>
60 
61 #include "sd_xbuf.h"
62 
63 #include <sys/scsi/targets/sddef.h>
64 #include <sys/cmlb.h>
65 #include <sys/sysevent/eventdefs.h>
66 #include <sys/sysevent/dev.h>
67 
68 #include <sys/fm/protocol.h>
69 
70 /*
71  * Loadable module info.
72  */
73 #if (defined(__fibre))
74 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver"
75 char _depends_on[]	= "misc/scsi misc/cmlb drv/fcp";
76 #else /* !__fibre */
77 #define	SD_MODULE_NAME	"SCSI Disk Driver"
78 char _depends_on[]	= "misc/scsi misc/cmlb";
79 #endif /* !__fibre */
80 
81 /*
82  * Define the interconnect type, to allow the driver to distinguish
83  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
84  *
85  * This is really for backward compatibility. In the future, the driver
86  * should actually check the "interconnect-type" property as reported by
87  * the HBA; however at present this property is not defined by all HBAs,
88  * so we will use this #define (1) to permit the driver to run in
89  * backward-compatibility mode; and (2) to print a notification message
90  * if an FC HBA does not support the "interconnect-type" property.  The
91  * behavior of the driver will be to assume parallel SCSI behaviors unless
92  * the "interconnect-type" property is defined by the HBA **AND** has a
93  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
94  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
95  * Channel behaviors (as per the old ssd).  (Note that the
96  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
97  * will result in the driver assuming parallel SCSI behaviors.)
98  *
99  * (see common/sys/scsi/impl/services.h)
100  *
101  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
102  * since some FC HBAs may already support that, and there is some code in
103  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
104  * default would confuse that code, and besides things should work fine
105  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
106  * "interconnect_type" property.
107  *
108  */
109 #if (defined(__fibre))
110 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
111 #else
112 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
113 #endif
114 
115 /*
116  * The name of the driver, established from the module name in _init.
117  */
118 static	char *sd_label			= NULL;
119 
120 /*
121  * Driver name is unfortunately prefixed on some driver.conf properties.
122  */
123 #if (defined(__fibre))
124 #define	sd_max_xfer_size		ssd_max_xfer_size
125 #define	sd_config_list			ssd_config_list
126 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
127 static	char *sd_config_list		= "ssd-config-list";
128 #else
129 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
130 static	char *sd_config_list		= "sd-config-list";
131 #endif
132 
133 /*
134  * Driver global variables
135  */
136 
137 #if (defined(__fibre))
138 /*
139  * These #defines are to avoid namespace collisions that occur because this
140  * code is currently used to compile two separate driver modules: sd and ssd.
141  * All global variables need to be treated this way (even if declared static)
142  * in order to allow the debugger to resolve the names properly.
143  * It is anticipated that in the near future the ssd module will be obsoleted,
144  * at which time this namespace issue should go away.
145  */
146 #define	sd_state			ssd_state
147 #define	sd_io_time			ssd_io_time
148 #define	sd_failfast_enable		ssd_failfast_enable
149 #define	sd_ua_retry_count		ssd_ua_retry_count
150 #define	sd_report_pfa			ssd_report_pfa
151 #define	sd_max_throttle			ssd_max_throttle
152 #define	sd_min_throttle			ssd_min_throttle
153 #define	sd_rot_delay			ssd_rot_delay
154 
155 #define	sd_retry_on_reservation_conflict	\
156 					ssd_retry_on_reservation_conflict
157 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
158 #define	sd_resv_conflict_name		ssd_resv_conflict_name
159 
160 #define	sd_component_mask		ssd_component_mask
161 #define	sd_level_mask			ssd_level_mask
162 #define	sd_debug_un			ssd_debug_un
163 #define	sd_error_level			ssd_error_level
164 
165 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
166 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
167 
168 #define	sd_tr				ssd_tr
169 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
170 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
171 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
172 #define	sd_check_media_time		ssd_check_media_time
173 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
174 #define	sd_label_mutex			ssd_label_mutex
175 #define	sd_detach_mutex			ssd_detach_mutex
176 #define	sd_log_buf			ssd_log_buf
177 #define	sd_log_mutex			ssd_log_mutex
178 
179 #define	sd_disk_table			ssd_disk_table
180 #define	sd_disk_table_size		ssd_disk_table_size
181 #define	sd_sense_mutex			ssd_sense_mutex
182 #define	sd_cdbtab			ssd_cdbtab
183 
184 #define	sd_cb_ops			ssd_cb_ops
185 #define	sd_ops				ssd_ops
186 #define	sd_additional_codes		ssd_additional_codes
187 #define	sd_tgops			ssd_tgops
188 
189 #define	sd_minor_data			ssd_minor_data
190 #define	sd_minor_data_efi		ssd_minor_data_efi
191 
192 #define	sd_tq				ssd_tq
193 #define	sd_wmr_tq			ssd_wmr_tq
194 #define	sd_taskq_name			ssd_taskq_name
195 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
196 #define	sd_taskq_minalloc		ssd_taskq_minalloc
197 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
198 
199 #define	sd_dump_format_string		ssd_dump_format_string
200 
201 #define	sd_iostart_chain		ssd_iostart_chain
202 #define	sd_iodone_chain			ssd_iodone_chain
203 
204 #define	sd_pm_idletime			ssd_pm_idletime
205 
206 #define	sd_force_pm_supported		ssd_force_pm_supported
207 
208 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
209 
210 #define	sd_ssc_init			ssd_ssc_init
211 #define	sd_ssc_send			ssd_ssc_send
212 #define	sd_ssc_fini			ssd_ssc_fini
213 #define	sd_ssc_assessment		ssd_ssc_assessment
214 #define	sd_ssc_post			ssd_ssc_post
215 #define	sd_ssc_print			ssd_ssc_print
216 #define	sd_ssc_ereport_post		ssd_ssc_ereport_post
217 #define	sd_ssc_set_info			ssd_ssc_set_info
218 #define	sd_ssc_extract_info		ssd_ssc_extract_info
219 
220 #endif
221 
222 #ifdef	SDDEBUG
223 int	sd_force_pm_supported		= 0;
224 #endif	/* SDDEBUG */
225 
226 void *sd_state				= NULL;
227 int sd_io_time				= SD_IO_TIME;
228 int sd_failfast_enable			= 1;
229 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
230 int sd_report_pfa			= 1;
231 int sd_max_throttle			= SD_MAX_THROTTLE;
232 int sd_min_throttle			= SD_MIN_THROTTLE;
233 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
234 int sd_qfull_throttle_enable		= TRUE;
235 
236 int sd_retry_on_reservation_conflict	= 1;
237 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
238 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
239 
240 static int sd_dtype_optical_bind	= -1;
241 
242 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
243 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
244 
245 /*
246  * Global data for debug logging. To enable debug printing, sd_component_mask
247  * and sd_level_mask should be set to the desired bit patterns as outlined in
248  * sddef.h.
249  */
250 uint_t	sd_component_mask		= 0x0;
251 uint_t	sd_level_mask			= 0x0;
252 struct	sd_lun *sd_debug_un		= NULL;
253 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
254 
255 /* Note: these may go away in the future... */
256 static uint32_t	sd_xbuf_active_limit	= 512;
257 static uint32_t sd_xbuf_reserve_limit	= 16;
258 
259 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
260 
261 /*
262  * Timer value used to reset the throttle after it has been reduced
263  * (typically in response to TRAN_BUSY or STATUS_QFULL)
264  */
265 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
266 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
267 
268 /*
269  * Interval value associated with the media change scsi watch.
270  */
271 static int sd_check_media_time		= 3000000;
272 
273 /*
274  * Wait value used for in progress operations during a DDI_SUSPEND
275  */
276 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
277 
278 /*
279  * sd_label_mutex protects a static buffer used in the disk label
280  * component of the driver
281  */
282 static kmutex_t sd_label_mutex;
283 
284 /*
285  * sd_detach_mutex protects un_layer_count, un_detach_count, and
286  * un_opens_in_progress in the sd_lun structure.
287  */
288 static kmutex_t sd_detach_mutex;
289 
290 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
291 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
292 
293 /*
294  * Global buffer and mutex for debug logging
295  */
296 static char	sd_log_buf[1024];
297 static kmutex_t	sd_log_mutex;
298 
299 /*
300  * Structs and globals for recording attached lun information.
301  * This maintains a chain. Each node in the chain represents a SCSI controller.
302  * The structure records the number of luns attached to each target connected
303  * with the controller.
304  * For parallel scsi device only.
305  */
306 struct sd_scsi_hba_tgt_lun {
307 	struct sd_scsi_hba_tgt_lun	*next;
308 	dev_info_t			*pdip;
309 	int				nlun[NTARGETS_WIDE];
310 };
311 
312 /*
313  * Flag to indicate the lun is attached or detached
314  */
315 #define	SD_SCSI_LUN_ATTACH	0
316 #define	SD_SCSI_LUN_DETACH	1
317 
318 static kmutex_t	sd_scsi_target_lun_mutex;
319 static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
320 
321 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
322     sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
323 
324 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
325     sd_scsi_target_lun_head))
326 
327 /*
328  * "Smart" Probe Caching structs, globals, #defines, etc.
329  * For parallel scsi and non-self-identify device only.
330  */
331 
332 /*
333  * The following resources and routines are implemented to support
334  * "smart" probing, which caches the scsi_probe() results in an array,
335  * in order to help avoid long probe times.
336  */
337 struct sd_scsi_probe_cache {
338 	struct	sd_scsi_probe_cache	*next;
339 	dev_info_t	*pdip;
340 	int		cache[NTARGETS_WIDE];
341 };
342 
343 static kmutex_t	sd_scsi_probe_cache_mutex;
344 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
345 
346 /*
347  * Really we only need protection on the head of the linked list, but
348  * better safe than sorry.
349  */
350 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
351     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
352 
353 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
354     sd_scsi_probe_cache_head))
355 
356 /*
357  * Power attribute table
358  */
359 static sd_power_attr_ss sd_pwr_ss = {
360 	{ "NAME=spindle-motor", "0=off", "1=on", NULL },
361 	{0, 100},
362 	{30, 0},
363 	{20000, 0}
364 };
365 
366 static sd_power_attr_pc sd_pwr_pc = {
367 	{ "NAME=spindle-motor", "0=stopped", "1=standby", "2=idle",
368 		"3=active", NULL },
369 	{0, 0, 0, 100},
370 	{90, 90, 20, 0},
371 	{15000, 15000, 1000, 0}
372 };
373 
374 /*
375  * Power level to power condition
376  */
377 static int sd_pl2pc[] = {
378 	SD_TARGET_START_VALID,
379 	SD_TARGET_STANDBY,
380 	SD_TARGET_IDLE,
381 	SD_TARGET_ACTIVE
382 };
383 
384 /*
385  * Vendor specific data name property declarations
386  */
387 
388 #if defined(__fibre) || defined(__i386) ||defined(__amd64)
389 
390 static sd_tunables seagate_properties = {
391 	SEAGATE_THROTTLE_VALUE,
392 	0,
393 	0,
394 	0,
395 	0,
396 	0,
397 	0,
398 	0,
399 	0
400 };
401 
402 
403 static sd_tunables fujitsu_properties = {
404 	FUJITSU_THROTTLE_VALUE,
405 	0,
406 	0,
407 	0,
408 	0,
409 	0,
410 	0,
411 	0,
412 	0
413 };
414 
415 static sd_tunables ibm_properties = {
416 	IBM_THROTTLE_VALUE,
417 	0,
418 	0,
419 	0,
420 	0,
421 	0,
422 	0,
423 	0,
424 	0
425 };
426 
427 static sd_tunables purple_properties = {
428 	PURPLE_THROTTLE_VALUE,
429 	0,
430 	0,
431 	PURPLE_BUSY_RETRIES,
432 	PURPLE_RESET_RETRY_COUNT,
433 	PURPLE_RESERVE_RELEASE_TIME,
434 	0,
435 	0,
436 	0
437 };
438 
439 static sd_tunables sve_properties = {
440 	SVE_THROTTLE_VALUE,
441 	0,
442 	0,
443 	SVE_BUSY_RETRIES,
444 	SVE_RESET_RETRY_COUNT,
445 	SVE_RESERVE_RELEASE_TIME,
446 	SVE_MIN_THROTTLE_VALUE,
447 	SVE_DISKSORT_DISABLED_FLAG,
448 	0
449 };
450 
451 static sd_tunables maserati_properties = {
452 	0,
453 	0,
454 	0,
455 	0,
456 	0,
457 	0,
458 	0,
459 	MASERATI_DISKSORT_DISABLED_FLAG,
460 	MASERATI_LUN_RESET_ENABLED_FLAG
461 };
462 
463 static sd_tunables pirus_properties = {
464 	PIRUS_THROTTLE_VALUE,
465 	0,
466 	PIRUS_NRR_COUNT,
467 	PIRUS_BUSY_RETRIES,
468 	PIRUS_RESET_RETRY_COUNT,
469 	0,
470 	PIRUS_MIN_THROTTLE_VALUE,
471 	PIRUS_DISKSORT_DISABLED_FLAG,
472 	PIRUS_LUN_RESET_ENABLED_FLAG
473 };
474 
475 #endif
476 
477 #if (defined(__sparc) && !defined(__fibre)) || \
478 	(defined(__i386) || defined(__amd64))
479 
480 
481 static sd_tunables elite_properties = {
482 	ELITE_THROTTLE_VALUE,
483 	0,
484 	0,
485 	0,
486 	0,
487 	0,
488 	0,
489 	0,
490 	0
491 };
492 
493 static sd_tunables st31200n_properties = {
494 	ST31200N_THROTTLE_VALUE,
495 	0,
496 	0,
497 	0,
498 	0,
499 	0,
500 	0,
501 	0,
502 	0
503 };
504 
505 #endif /* Fibre or not */
506 
507 static sd_tunables lsi_properties_scsi = {
508 	LSI_THROTTLE_VALUE,
509 	0,
510 	LSI_NOTREADY_RETRIES,
511 	0,
512 	0,
513 	0,
514 	0,
515 	0,
516 	0
517 };
518 
519 static sd_tunables symbios_properties = {
520 	SYMBIOS_THROTTLE_VALUE,
521 	0,
522 	SYMBIOS_NOTREADY_RETRIES,
523 	0,
524 	0,
525 	0,
526 	0,
527 	0,
528 	0
529 };
530 
531 static sd_tunables lsi_properties = {
532 	0,
533 	0,
534 	LSI_NOTREADY_RETRIES,
535 	0,
536 	0,
537 	0,
538 	0,
539 	0,
540 	0
541 };
542 
543 static sd_tunables lsi_oem_properties = {
544 	0,
545 	0,
546 	LSI_OEM_NOTREADY_RETRIES,
547 	0,
548 	0,
549 	0,
550 	0,
551 	0,
552 	0,
553 	1
554 };
555 
556 
557 
558 #if (defined(SD_PROP_TST))
559 
560 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
561 #define	SD_TST_THROTTLE_VAL	16
562 #define	SD_TST_NOTREADY_VAL	12
563 #define	SD_TST_BUSY_VAL		60
564 #define	SD_TST_RST_RETRY_VAL	36
565 #define	SD_TST_RSV_REL_TIME	60
566 
567 static sd_tunables tst_properties = {
568 	SD_TST_THROTTLE_VAL,
569 	SD_TST_CTYPE_VAL,
570 	SD_TST_NOTREADY_VAL,
571 	SD_TST_BUSY_VAL,
572 	SD_TST_RST_RETRY_VAL,
573 	SD_TST_RSV_REL_TIME,
574 	0,
575 	0,
576 	0
577 };
578 #endif
579 
580 /* This is similar to the ANSI toupper implementation */
581 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
582 
583 /*
584  * Static Driver Configuration Table
585  *
586  * This is the table of disks which need throttle adjustment (or, perhaps
587  * something else as defined by the flags at a future time.)  device_id
588  * is a string consisting of concatenated vid (vendor), pid (product/model)
589  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
590  * the parts of the string are as defined by the sizes in the scsi_inquiry
591  * structure.  Device type is searched as far as the device_id string is
592  * defined.  Flags defines which values are to be set in the driver from the
593  * properties list.
594  *
595  * Entries below which begin and end with a "*" are a special case.
596  * These do not have a specific vendor, and the string which follows
597  * can appear anywhere in the 16 byte PID portion of the inquiry data.
598  *
599  * Entries below which begin and end with a " " (blank) are a special
600  * case. The comparison function will treat multiple consecutive blanks
601  * as equivalent to a single blank. For example, this causes a
602  * sd_disk_table entry of " NEC CDROM " to match a device's id string
603  * of  "NEC       CDROM".
604  *
605  * Note: The MD21 controller type has been obsoleted.
606  *	 ST318202F is a Legacy device
607  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
608  *	 made with an FC connection. The entries here are a legacy.
609  */
610 static sd_disk_config_t sd_disk_table[] = {
611 #if defined(__fibre) || defined(__i386) || defined(__amd64)
612 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
613 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
614 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
615 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
616 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
617 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
618 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
619 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
620 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
621 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
622 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
623 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
624 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
625 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
626 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
627 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
628 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
629 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
630 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
631 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
632 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
633 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
634 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
635 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
636 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
637 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
638 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
639 	{ "IBM     1724-100",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
640 	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
641 	{ "IBM     1726-22x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
642 	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
643 	{ "IBM     1726-42x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
644 	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
645 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
646 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
647 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
648 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
649 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
650 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
651 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
652 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
653 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
654 	{ "IBM     1818",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
655 	{ "DELL    MD3000",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
656 	{ "DELL    MD3000i",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
657 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
658 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
659 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
660 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
661 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT |
662 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
663 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT |
664 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
665 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
666 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
667 	{ "SUN     T3", SD_CONF_BSET_THROTTLE |
668 			SD_CONF_BSET_BSY_RETRY_COUNT|
669 			SD_CONF_BSET_RST_RETRIES|
670 			SD_CONF_BSET_RSV_REL_TIME,
671 		&purple_properties },
672 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
673 		SD_CONF_BSET_BSY_RETRY_COUNT|
674 		SD_CONF_BSET_RST_RETRIES|
675 		SD_CONF_BSET_RSV_REL_TIME|
676 		SD_CONF_BSET_MIN_THROTTLE|
677 		SD_CONF_BSET_DISKSORT_DISABLED,
678 		&sve_properties },
679 	{ "SUN     T4", SD_CONF_BSET_THROTTLE |
680 			SD_CONF_BSET_BSY_RETRY_COUNT|
681 			SD_CONF_BSET_RST_RETRIES|
682 			SD_CONF_BSET_RSV_REL_TIME,
683 		&purple_properties },
684 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
685 		SD_CONF_BSET_LUN_RESET_ENABLED,
686 		&maserati_properties },
687 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
688 		SD_CONF_BSET_NRR_COUNT|
689 		SD_CONF_BSET_BSY_RETRY_COUNT|
690 		SD_CONF_BSET_RST_RETRIES|
691 		SD_CONF_BSET_MIN_THROTTLE|
692 		SD_CONF_BSET_DISKSORT_DISABLED|
693 		SD_CONF_BSET_LUN_RESET_ENABLED,
694 		&pirus_properties },
695 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
696 		SD_CONF_BSET_NRR_COUNT|
697 		SD_CONF_BSET_BSY_RETRY_COUNT|
698 		SD_CONF_BSET_RST_RETRIES|
699 		SD_CONF_BSET_MIN_THROTTLE|
700 		SD_CONF_BSET_DISKSORT_DISABLED|
701 		SD_CONF_BSET_LUN_RESET_ENABLED,
702 		&pirus_properties },
703 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
704 		SD_CONF_BSET_NRR_COUNT|
705 		SD_CONF_BSET_BSY_RETRY_COUNT|
706 		SD_CONF_BSET_RST_RETRIES|
707 		SD_CONF_BSET_MIN_THROTTLE|
708 		SD_CONF_BSET_DISKSORT_DISABLED|
709 		SD_CONF_BSET_LUN_RESET_ENABLED,
710 		&pirus_properties },
711 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
712 		SD_CONF_BSET_NRR_COUNT|
713 		SD_CONF_BSET_BSY_RETRY_COUNT|
714 		SD_CONF_BSET_RST_RETRIES|
715 		SD_CONF_BSET_MIN_THROTTLE|
716 		SD_CONF_BSET_DISKSORT_DISABLED|
717 		SD_CONF_BSET_LUN_RESET_ENABLED,
718 		&pirus_properties },
719 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
720 		SD_CONF_BSET_NRR_COUNT|
721 		SD_CONF_BSET_BSY_RETRY_COUNT|
722 		SD_CONF_BSET_RST_RETRIES|
723 		SD_CONF_BSET_MIN_THROTTLE|
724 		SD_CONF_BSET_DISKSORT_DISABLED|
725 		SD_CONF_BSET_LUN_RESET_ENABLED,
726 		&pirus_properties },
727 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
728 		SD_CONF_BSET_NRR_COUNT|
729 		SD_CONF_BSET_BSY_RETRY_COUNT|
730 		SD_CONF_BSET_RST_RETRIES|
731 		SD_CONF_BSET_MIN_THROTTLE|
732 		SD_CONF_BSET_DISKSORT_DISABLED|
733 		SD_CONF_BSET_LUN_RESET_ENABLED,
734 		&pirus_properties },
735 	{ "SUN     STK6580_6780", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
736 	{ "SUN     SUN_6180", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
737 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
738 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
739 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
740 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
741 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
742 #endif /* fibre or NON-sparc platforms */
743 #if ((defined(__sparc) && !defined(__fibre)) ||\
744 	(defined(__i386) || defined(__amd64)))
745 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
746 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
747 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
748 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
749 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
750 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
751 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
752 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
753 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
754 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
755 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
756 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
757 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
758 	    &symbios_properties },
759 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
760 	    &lsi_properties_scsi },
761 #if defined(__i386) || defined(__amd64)
762 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
763 				    | SD_CONF_BSET_READSUB_BCD
764 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
765 				    | SD_CONF_BSET_NO_READ_HEADER
766 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
767 
768 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
769 				    | SD_CONF_BSET_READSUB_BCD
770 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
771 				    | SD_CONF_BSET_NO_READ_HEADER
772 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
773 #endif /* __i386 || __amd64 */
774 #endif /* sparc NON-fibre or NON-sparc platforms */
775 
776 #if (defined(SD_PROP_TST))
777 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
778 				| SD_CONF_BSET_CTYPE
779 				| SD_CONF_BSET_NRR_COUNT
780 				| SD_CONF_BSET_FAB_DEVID
781 				| SD_CONF_BSET_NOCACHE
782 				| SD_CONF_BSET_BSY_RETRY_COUNT
783 				| SD_CONF_BSET_PLAYMSF_BCD
784 				| SD_CONF_BSET_READSUB_BCD
785 				| SD_CONF_BSET_READ_TOC_TRK_BCD
786 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
787 				| SD_CONF_BSET_NO_READ_HEADER
788 				| SD_CONF_BSET_READ_CD_XD4
789 				| SD_CONF_BSET_RST_RETRIES
790 				| SD_CONF_BSET_RSV_REL_TIME
791 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
792 #endif
793 };
794 
795 static const int sd_disk_table_size =
796 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
797 
798 
799 
800 #define	SD_INTERCONNECT_PARALLEL	0
801 #define	SD_INTERCONNECT_FABRIC		1
802 #define	SD_INTERCONNECT_FIBRE		2
803 #define	SD_INTERCONNECT_SSA		3
804 #define	SD_INTERCONNECT_SATA		4
805 #define	SD_INTERCONNECT_SAS		5
806 
807 #define	SD_IS_PARALLEL_SCSI(un)		\
808 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
809 #define	SD_IS_SERIAL(un)		\
810 	(((un)->un_interconnect_type == SD_INTERCONNECT_SATA) ||\
811 	((un)->un_interconnect_type == SD_INTERCONNECT_SAS))
812 
813 /*
814  * Definitions used by device id registration routines
815  */
816 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
817 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
818 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
819 
820 static kmutex_t sd_sense_mutex = {0};
821 
822 /*
823  * Macros for updates of the driver state
824  */
825 #define	New_state(un, s)        \
826 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
827 #define	Restore_state(un)	\
828 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
829 
830 static struct sd_cdbinfo sd_cdbtab[] = {
831 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
832 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
833 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
834 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
835 };
836 
837 /*
838  * Specifies the number of seconds that must have elapsed since the last
839  * cmd. has completed for a device to be declared idle to the PM framework.
840  */
841 static int sd_pm_idletime = 1;
842 
843 /*
844  * Internal function prototypes
845  */
846 
847 #if (defined(__fibre))
848 /*
849  * These #defines are to avoid namespace collisions that occur because this
850  * code is currently used to compile two separate driver modules: sd and ssd.
851  * All function names need to be treated this way (even if declared static)
852  * in order to allow the debugger to resolve the names properly.
853  * It is anticipated that in the near future the ssd module will be obsoleted,
854  * at which time this ugliness should go away.
855  */
856 #define	sd_log_trace			ssd_log_trace
857 #define	sd_log_info			ssd_log_info
858 #define	sd_log_err			ssd_log_err
859 #define	sdprobe				ssdprobe
860 #define	sdinfo				ssdinfo
861 #define	sd_prop_op			ssd_prop_op
862 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
863 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
864 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
865 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
866 #define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
867 #define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
868 #define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
869 #define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
870 #define	sd_spin_up_unit			ssd_spin_up_unit
871 #define	sd_enable_descr_sense		ssd_enable_descr_sense
872 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
873 #define	sd_set_mmc_caps			ssd_set_mmc_caps
874 #define	sd_read_unit_properties		ssd_read_unit_properties
875 #define	sd_process_sdconf_file		ssd_process_sdconf_file
876 #define	sd_process_sdconf_table		ssd_process_sdconf_table
877 #define	sd_sdconf_id_match		ssd_sdconf_id_match
878 #define	sd_blank_cmp			ssd_blank_cmp
879 #define	sd_chk_vers1_data		ssd_chk_vers1_data
880 #define	sd_set_vers1_properties		ssd_set_vers1_properties
881 #define	sd_check_solid_state		ssd_check_solid_state
882 
883 #define	sd_get_physical_geometry	ssd_get_physical_geometry
884 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
885 #define	sd_update_block_info		ssd_update_block_info
886 #define	sd_register_devid		ssd_register_devid
887 #define	sd_get_devid			ssd_get_devid
888 #define	sd_create_devid			ssd_create_devid
889 #define	sd_write_deviceid		ssd_write_deviceid
890 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
891 #define	sd_setup_pm			ssd_setup_pm
892 #define	sd_create_pm_components		ssd_create_pm_components
893 #define	sd_ddi_suspend			ssd_ddi_suspend
894 #define	sd_ddi_resume			ssd_ddi_resume
895 #define	sd_pm_state_change		ssd_pm_state_change
896 #define	sdpower				ssdpower
897 #define	sdattach			ssdattach
898 #define	sddetach			ssddetach
899 #define	sd_unit_attach			ssd_unit_attach
900 #define	sd_unit_detach			ssd_unit_detach
901 #define	sd_set_unit_attributes		ssd_set_unit_attributes
902 #define	sd_create_errstats		ssd_create_errstats
903 #define	sd_set_errstats			ssd_set_errstats
904 #define	sd_set_pstats			ssd_set_pstats
905 #define	sddump				ssddump
906 #define	sd_scsi_poll			ssd_scsi_poll
907 #define	sd_send_polled_RQS		ssd_send_polled_RQS
908 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
909 #define	sd_init_event_callbacks		ssd_init_event_callbacks
910 #define	sd_event_callback		ssd_event_callback
911 #define	sd_cache_control		ssd_cache_control
912 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
913 #define	sd_get_nv_sup			ssd_get_nv_sup
914 #define	sd_make_device			ssd_make_device
915 #define	sdopen				ssdopen
916 #define	sdclose				ssdclose
917 #define	sd_ready_and_valid		ssd_ready_and_valid
918 #define	sdmin				ssdmin
919 #define	sdread				ssdread
920 #define	sdwrite				ssdwrite
921 #define	sdaread				ssdaread
922 #define	sdawrite			ssdawrite
923 #define	sdstrategy			ssdstrategy
924 #define	sdioctl				ssdioctl
925 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
926 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
927 #define	sd_checksum_iostart		ssd_checksum_iostart
928 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
929 #define	sd_pm_iostart			ssd_pm_iostart
930 #define	sd_core_iostart			ssd_core_iostart
931 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
932 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
933 #define	sd_checksum_iodone		ssd_checksum_iodone
934 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
935 #define	sd_pm_iodone			ssd_pm_iodone
936 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
937 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
938 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
939 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
940 #define	sd_buf_iodone			ssd_buf_iodone
941 #define	sd_uscsi_strategy		ssd_uscsi_strategy
942 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
943 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
944 #define	sd_uscsi_iodone			ssd_uscsi_iodone
945 #define	sd_xbuf_strategy		ssd_xbuf_strategy
946 #define	sd_xbuf_init			ssd_xbuf_init
947 #define	sd_pm_entry			ssd_pm_entry
948 #define	sd_pm_exit			ssd_pm_exit
949 
950 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
951 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
952 
953 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
954 #define	sdintr				ssdintr
955 #define	sd_start_cmds			ssd_start_cmds
956 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
957 #define	sd_bioclone_alloc		ssd_bioclone_alloc
958 #define	sd_bioclone_free		ssd_bioclone_free
959 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
960 #define	sd_shadow_buf_free		ssd_shadow_buf_free
961 #define	sd_print_transport_rejected_message	\
962 					ssd_print_transport_rejected_message
963 #define	sd_retry_command		ssd_retry_command
964 #define	sd_set_retry_bp			ssd_set_retry_bp
965 #define	sd_send_request_sense_command	ssd_send_request_sense_command
966 #define	sd_start_retry_command		ssd_start_retry_command
967 #define	sd_start_direct_priority_command	\
968 					ssd_start_direct_priority_command
969 #define	sd_return_failed_command	ssd_return_failed_command
970 #define	sd_return_failed_command_no_restart	\
971 					ssd_return_failed_command_no_restart
972 #define	sd_return_command		ssd_return_command
973 #define	sd_sync_with_callback		ssd_sync_with_callback
974 #define	sdrunout			ssdrunout
975 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
976 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
977 #define	sd_reduce_throttle		ssd_reduce_throttle
978 #define	sd_restore_throttle		ssd_restore_throttle
979 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
980 #define	sd_init_cdb_limits		ssd_init_cdb_limits
981 #define	sd_pkt_status_good		ssd_pkt_status_good
982 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
983 #define	sd_pkt_status_busy		ssd_pkt_status_busy
984 #define	sd_pkt_status_reservation_conflict	\
985 					ssd_pkt_status_reservation_conflict
986 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
987 #define	sd_handle_request_sense		ssd_handle_request_sense
988 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
989 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
990 #define	sd_validate_sense_data		ssd_validate_sense_data
991 #define	sd_decode_sense			ssd_decode_sense
992 #define	sd_print_sense_msg		ssd_print_sense_msg
993 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
994 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
995 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
996 #define	sd_sense_key_medium_or_hardware_error	\
997 					ssd_sense_key_medium_or_hardware_error
998 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
999 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
1000 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
1001 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
1002 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
1003 #define	sd_sense_key_default		ssd_sense_key_default
1004 #define	sd_print_retry_msg		ssd_print_retry_msg
1005 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
1006 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
1007 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
1008 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
1009 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
1010 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
1011 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
1012 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
1013 #define	sd_pkt_reason_default		ssd_pkt_reason_default
1014 #define	sd_reset_target			ssd_reset_target
1015 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
1016 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
1017 #define	sd_taskq_create			ssd_taskq_create
1018 #define	sd_taskq_delete			ssd_taskq_delete
1019 #define	sd_target_change_task		ssd_target_change_task
1020 #define	sd_log_dev_status_event		ssd_log_dev_status_event
1021 #define	sd_log_lun_expansion_event	ssd_log_lun_expansion_event
1022 #define	sd_log_eject_request_event	ssd_log_eject_request_event
1023 #define	sd_media_change_task		ssd_media_change_task
1024 #define	sd_handle_mchange		ssd_handle_mchange
1025 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
1026 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
1027 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
1028 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
1029 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
1030 					sd_send_scsi_feature_GET_CONFIGURATION
1031 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
1032 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
1033 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
1034 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
1035 					ssd_send_scsi_PERSISTENT_RESERVE_IN
1036 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
1037 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
1038 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
1039 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
1040 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
1041 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
1042 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
1043 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
1044 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
1045 #define	sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION	\
1046 				ssd_send_scsi_GET_EVENT_STATUS_NOTIFICATION
1047 #define	sd_gesn_media_data_valid	ssd_gesn_media_data_valid
1048 #define	sd_alloc_rqs			ssd_alloc_rqs
1049 #define	sd_free_rqs			ssd_free_rqs
1050 #define	sd_dump_memory			ssd_dump_memory
1051 #define	sd_get_media_info		ssd_get_media_info
1052 #define	sd_get_media_info_ext		ssd_get_media_info_ext
1053 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1054 #define	sd_nvpair_str_decode		ssd_nvpair_str_decode
1055 #define	sd_strtok_r			ssd_strtok_r
1056 #define	sd_set_properties		ssd_set_properties
1057 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1058 #define	sd_setup_next_xfer		ssd_setup_next_xfer
1059 #define	sd_dkio_get_temp		ssd_dkio_get_temp
1060 #define	sd_check_mhd			ssd_check_mhd
1061 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1062 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1063 #define	sd_sname			ssd_sname
1064 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1065 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1066 #define	sd_take_ownership		ssd_take_ownership
1067 #define	sd_reserve_release		ssd_reserve_release
1068 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1069 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1070 #define	sd_persistent_reservation_in_read_keys	\
1071 					ssd_persistent_reservation_in_read_keys
1072 #define	sd_persistent_reservation_in_read_resv	\
1073 					ssd_persistent_reservation_in_read_resv
1074 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1075 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1076 #define	sd_mhdioc_release		ssd_mhdioc_release
1077 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1078 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1079 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1080 #define	sr_change_blkmode		ssr_change_blkmode
1081 #define	sr_change_speed			ssr_change_speed
1082 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1083 #define	sr_pause_resume			ssr_pause_resume
1084 #define	sr_play_msf			ssr_play_msf
1085 #define	sr_play_trkind			ssr_play_trkind
1086 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1087 #define	sr_read_subchannel		ssr_read_subchannel
1088 #define	sr_read_tocentry		ssr_read_tocentry
1089 #define	sr_read_tochdr			ssr_read_tochdr
1090 #define	sr_read_cdda			ssr_read_cdda
1091 #define	sr_read_cdxa			ssr_read_cdxa
1092 #define	sr_read_mode1			ssr_read_mode1
1093 #define	sr_read_mode2			ssr_read_mode2
1094 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1095 #define	sr_sector_mode			ssr_sector_mode
1096 #define	sr_eject			ssr_eject
1097 #define	sr_ejected			ssr_ejected
1098 #define	sr_check_wp			ssr_check_wp
1099 #define	sd_watch_request_submit		ssd_watch_request_submit
1100 #define	sd_check_media			ssd_check_media
1101 #define	sd_media_watch_cb		ssd_media_watch_cb
1102 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1103 #define	sr_volume_ctrl			ssr_volume_ctrl
1104 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1105 #define	sd_log_page_supported		ssd_log_page_supported
1106 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1107 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1108 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1109 #define	sd_range_lock			ssd_range_lock
1110 #define	sd_get_range			ssd_get_range
1111 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1112 #define	sd_range_unlock			ssd_range_unlock
1113 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1114 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1115 
1116 #define	sd_iostart_chain		ssd_iostart_chain
1117 #define	sd_iodone_chain			ssd_iodone_chain
1118 #define	sd_initpkt_map			ssd_initpkt_map
1119 #define	sd_destroypkt_map		ssd_destroypkt_map
1120 #define	sd_chain_type_map		ssd_chain_type_map
1121 #define	sd_chain_index_map		ssd_chain_index_map
1122 
1123 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1124 #define	sd_failfast_flushq		ssd_failfast_flushq
1125 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1126 
1127 #define	sd_is_lsi			ssd_is_lsi
1128 #define	sd_tg_rdwr			ssd_tg_rdwr
1129 #define	sd_tg_getinfo			ssd_tg_getinfo
1130 #define	sd_rmw_msg_print_handler	ssd_rmw_msg_print_handler
1131 
1132 #endif	/* #if (defined(__fibre)) */
1133 
1134 
1135 int _init(void);
1136 int _fini(void);
1137 int _info(struct modinfo *modinfop);
1138 
1139 /*PRINTFLIKE3*/
1140 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1141 /*PRINTFLIKE3*/
1142 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1143 /*PRINTFLIKE3*/
1144 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1145 
1146 static int sdprobe(dev_info_t *devi);
1147 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1148     void **result);
1149 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1150     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1151 
1152 /*
1153  * Smart probe for parallel scsi
1154  */
1155 static void sd_scsi_probe_cache_init(void);
1156 static void sd_scsi_probe_cache_fini(void);
1157 static void sd_scsi_clear_probe_cache(void);
1158 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1159 
1160 /*
1161  * Attached luns on target for parallel scsi
1162  */
1163 static void sd_scsi_target_lun_init(void);
1164 static void sd_scsi_target_lun_fini(void);
1165 static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1166 static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1167 
1168 static int	sd_spin_up_unit(sd_ssc_t *ssc);
1169 
1170 /*
1171  * Using sd_ssc_init to establish sd_ssc_t struct
1172  * Using sd_ssc_send to send uscsi internal command
1173  * Using sd_ssc_fini to free sd_ssc_t struct
1174  */
1175 static sd_ssc_t *sd_ssc_init(struct sd_lun *un);
1176 static int sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd,
1177     int flag, enum uio_seg dataspace, int path_flag);
1178 static void sd_ssc_fini(sd_ssc_t *ssc);
1179 
1180 /*
1181  * Using sd_ssc_assessment to set correct type-of-assessment
1182  * Using sd_ssc_post to post ereport & system log
1183  *       sd_ssc_post will call sd_ssc_print to print system log
1184  *       sd_ssc_post will call sd_ssd_ereport_post to post ereport
1185  */
1186 static void sd_ssc_assessment(sd_ssc_t *ssc,
1187     enum sd_type_assessment tp_assess);
1188 
1189 static void sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess);
1190 static void sd_ssc_print(sd_ssc_t *ssc, int sd_severity);
1191 static void sd_ssc_ereport_post(sd_ssc_t *ssc,
1192     enum sd_driver_assessment drv_assess);
1193 
1194 /*
1195  * Using sd_ssc_set_info to mark an un-decodable-data error.
1196  * Using sd_ssc_extract_info to transfer information from internal
1197  *       data structures to sd_ssc_t.
1198  */
1199 static void sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp,
1200     const char *fmt, ...);
1201 static void sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un,
1202     struct scsi_pkt *pktp, struct buf *bp, struct sd_xbuf *xp);
1203 
1204 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1205     enum uio_seg dataspace, int path_flag);
1206 
1207 #ifdef _LP64
1208 static void	sd_enable_descr_sense(sd_ssc_t *ssc);
1209 static void	sd_reenable_dsense_task(void *arg);
1210 #endif /* _LP64 */
1211 
1212 static void	sd_set_mmc_caps(sd_ssc_t *ssc);
1213 
1214 static void sd_read_unit_properties(struct sd_lun *un);
1215 static int  sd_process_sdconf_file(struct sd_lun *un);
1216 static void sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str);
1217 static char *sd_strtok_r(char *string, const char *sepset, char **lasts);
1218 static void sd_set_properties(struct sd_lun *un, char *name, char *value);
1219 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1220     int *data_list, sd_tunables *values);
1221 static void sd_process_sdconf_table(struct sd_lun *un);
1222 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1223 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1224 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1225 	int list_len, char *dataname_ptr);
1226 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1227     sd_tunables *prop_list);
1228 
1229 static void sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi,
1230     int reservation_flag);
1231 static int  sd_get_devid(sd_ssc_t *ssc);
1232 static ddi_devid_t sd_create_devid(sd_ssc_t *ssc);
1233 static int  sd_write_deviceid(sd_ssc_t *ssc);
1234 static int  sd_get_devid_page(struct sd_lun *un, uchar_t *wwn, int *len);
1235 static int  sd_check_vpd_page_support(sd_ssc_t *ssc);
1236 
1237 static void sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi);
1238 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1239 
1240 static int  sd_ddi_suspend(dev_info_t *devi);
1241 static int  sd_ddi_resume(dev_info_t *devi);
1242 static int  sd_pm_state_change(struct sd_lun *un, int level, int flag);
1243 static int  sdpower(dev_info_t *devi, int component, int level);
1244 
1245 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1246 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1247 static int  sd_unit_attach(dev_info_t *devi);
1248 static int  sd_unit_detach(dev_info_t *devi);
1249 
1250 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1251 static void sd_create_errstats(struct sd_lun *un, int instance);
1252 static void sd_set_errstats(struct sd_lun *un);
1253 static void sd_set_pstats(struct sd_lun *un);
1254 
1255 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1256 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1257 static int  sd_send_polled_RQS(struct sd_lun *un);
1258 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1259 
1260 #if (defined(__fibre))
1261 /*
1262  * Event callbacks (photon)
1263  */
1264 static void sd_init_event_callbacks(struct sd_lun *un);
1265 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1266 #endif
1267 
1268 /*
1269  * Defines for sd_cache_control
1270  */
1271 
1272 #define	SD_CACHE_ENABLE		1
1273 #define	SD_CACHE_DISABLE	0
1274 #define	SD_CACHE_NOCHANGE	-1
1275 
1276 static int   sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag);
1277 static int   sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled);
1278 static void  sd_get_nv_sup(sd_ssc_t *ssc);
1279 static dev_t sd_make_device(dev_info_t *devi);
1280 static void  sd_check_solid_state(sd_ssc_t *ssc);
1281 
1282 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1283 	uint64_t capacity);
1284 
1285 /*
1286  * Driver entry point functions.
1287  */
1288 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1289 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1290 static int  sd_ready_and_valid(sd_ssc_t *ssc, int part);
1291 
1292 static void sdmin(struct buf *bp);
1293 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1294 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1295 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1296 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1297 
1298 static int sdstrategy(struct buf *bp);
1299 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1300 
1301 /*
1302  * Function prototypes for layering functions in the iostart chain.
1303  */
1304 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1305 	struct buf *bp);
1306 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1307 	struct buf *bp);
1308 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1309 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1310 	struct buf *bp);
1311 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1312 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1313 
1314 /*
1315  * Function prototypes for layering functions in the iodone chain.
1316  */
1317 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1318 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1319 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1320 	struct buf *bp);
1321 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1322 	struct buf *bp);
1323 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1324 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1325 	struct buf *bp);
1326 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1327 
1328 /*
1329  * Prototypes for functions to support buf(9S) based IO.
1330  */
1331 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1332 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1333 static void sd_destroypkt_for_buf(struct buf *);
1334 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1335 	struct buf *bp, int flags,
1336 	int (*callback)(caddr_t), caddr_t callback_arg,
1337 	diskaddr_t lba, uint32_t blockcount);
1338 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1339 	struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1340 
1341 /*
1342  * Prototypes for functions to support USCSI IO.
1343  */
1344 static int sd_uscsi_strategy(struct buf *bp);
1345 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1346 static void sd_destroypkt_for_uscsi(struct buf *);
1347 
1348 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1349 	uchar_t chain_type, void *pktinfop);
1350 
1351 static int  sd_pm_entry(struct sd_lun *un);
1352 static void sd_pm_exit(struct sd_lun *un);
1353 
1354 static void sd_pm_idletimeout_handler(void *arg);
1355 
1356 /*
1357  * sd_core internal functions (used at the sd_core_io layer).
1358  */
1359 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1360 static void sdintr(struct scsi_pkt *pktp);
1361 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1362 
1363 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1364 	enum uio_seg dataspace, int path_flag);
1365 
1366 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1367 	daddr_t blkno, int (*func)(struct buf *));
1368 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1369 	uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1370 static void sd_bioclone_free(struct buf *bp);
1371 static void sd_shadow_buf_free(struct buf *bp);
1372 
1373 static void sd_print_transport_rejected_message(struct sd_lun *un,
1374 	struct sd_xbuf *xp, int code);
1375 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1376     void *arg, int code);
1377 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1378     void *arg, int code);
1379 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1380     void *arg, int code);
1381 
1382 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1383 	int retry_check_flag,
1384 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp,
1385 		int c),
1386 	void *user_arg, int failure_code,  clock_t retry_delay,
1387 	void (*statp)(kstat_io_t *));
1388 
1389 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1390 	clock_t retry_delay, void (*statp)(kstat_io_t *));
1391 
1392 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1393 	struct scsi_pkt *pktp);
1394 static void sd_start_retry_command(void *arg);
1395 static void sd_start_direct_priority_command(void *arg);
1396 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1397 	int errcode);
1398 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1399 	struct buf *bp, int errcode);
1400 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1401 static void sd_sync_with_callback(struct sd_lun *un);
1402 static int sdrunout(caddr_t arg);
1403 
1404 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1405 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1406 
1407 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1408 static void sd_restore_throttle(void *arg);
1409 
1410 static void sd_init_cdb_limits(struct sd_lun *un);
1411 
1412 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1413 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1414 
1415 /*
1416  * Error handling functions
1417  */
1418 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1419 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1420 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1421 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1422 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1423 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1424 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1425 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1426 
1427 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1428 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1429 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1430 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1431 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1432 	struct sd_xbuf *xp, size_t actual_len);
1433 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1434 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1435 
1436 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1437 	void *arg, int code);
1438 
1439 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1440 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1441 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1442 	uint8_t *sense_datap,
1443 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1444 static void sd_sense_key_not_ready(struct sd_lun *un,
1445 	uint8_t *sense_datap,
1446 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1447 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1448 	uint8_t *sense_datap,
1449 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1450 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1451 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1452 static void sd_sense_key_unit_attention(struct sd_lun *un,
1453 	uint8_t *sense_datap,
1454 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1455 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1456 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1457 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1458 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1459 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1460 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1461 static void sd_sense_key_default(struct sd_lun *un,
1462 	uint8_t *sense_datap,
1463 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1464 
1465 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1466 	void *arg, int flag);
1467 
1468 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1469 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1470 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1471 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1472 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1473 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1474 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1475 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1476 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1477 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1478 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1479 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1480 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1481 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1482 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1483 	struct sd_xbuf *xp, struct scsi_pkt *pktp);
1484 
1485 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1486 
1487 static void sd_start_stop_unit_callback(void *arg);
1488 static void sd_start_stop_unit_task(void *arg);
1489 
1490 static void sd_taskq_create(void);
1491 static void sd_taskq_delete(void);
1492 static void sd_target_change_task(void *arg);
1493 static void sd_log_dev_status_event(struct sd_lun *un, char *esc, int km_flag);
1494 static void sd_log_lun_expansion_event(struct sd_lun *un, int km_flag);
1495 static void sd_log_eject_request_event(struct sd_lun *un, int km_flag);
1496 static void sd_media_change_task(void *arg);
1497 
1498 static int sd_handle_mchange(struct sd_lun *un);
1499 static int sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag);
1500 static int sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp,
1501 	uint32_t *lbap, int path_flag);
1502 static int sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
1503 	uint32_t *lbap, uint32_t *psp, int path_flag);
1504 static int sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int pc_flag,
1505 	int flag, int path_flag);
1506 static int sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr,
1507 	size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1508 static int sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag);
1509 static int sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc,
1510 	uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1511 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc,
1512 	uchar_t usr_cmd, uchar_t *usr_bufp);
1513 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1514 	struct dk_callback *dkc);
1515 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1516 static int sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc,
1517 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1518 	uchar_t *bufaddr, uint_t buflen, int path_flag);
1519 static int sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc,
1520 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1521 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1522 static int sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize,
1523 	uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1524 static int sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize,
1525 	uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1526 static int sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
1527 	size_t buflen, daddr_t start_block, int path_flag);
1528 #define	sd_send_scsi_READ(ssc, bufaddr, buflen, start_block, path_flag)	\
1529 	sd_send_scsi_RDWR(ssc, SCMD_READ, bufaddr, buflen, start_block, \
1530 	path_flag)
1531 #define	sd_send_scsi_WRITE(ssc, bufaddr, buflen, start_block, path_flag)\
1532 	sd_send_scsi_RDWR(ssc, SCMD_WRITE, bufaddr, buflen, start_block,\
1533 	path_flag)
1534 
1535 static int sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr,
1536 	uint16_t buflen, uchar_t page_code, uchar_t page_control,
1537 	uint16_t param_ptr, int path_flag);
1538 static int sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(sd_ssc_t *ssc,
1539 	uchar_t *bufaddr, size_t buflen, uchar_t class_req);
1540 static boolean_t sd_gesn_media_data_valid(uchar_t *data);
1541 
1542 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1543 static void sd_free_rqs(struct sd_lun *un);
1544 
1545 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1546 	uchar_t *data, int len, int fmt);
1547 static void sd_panic_for_res_conflict(struct sd_lun *un);
1548 
1549 /*
1550  * Disk Ioctl Function Prototypes
1551  */
1552 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1553 static int sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag);
1554 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1555 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1556 
1557 /*
1558  * Multi-host Ioctl Prototypes
1559  */
1560 static int sd_check_mhd(dev_t dev, int interval);
1561 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1562 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1563 static char *sd_sname(uchar_t status);
1564 static void sd_mhd_resvd_recover(void *arg);
1565 static void sd_resv_reclaim_thread();
1566 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1567 static int sd_reserve_release(dev_t dev, int cmd);
1568 static void sd_rmv_resv_reclaim_req(dev_t dev);
1569 static void sd_mhd_reset_notify_cb(caddr_t arg);
1570 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1571 	mhioc_inkeys_t *usrp, int flag);
1572 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1573 	mhioc_inresvs_t *usrp, int flag);
1574 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1575 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1576 static int sd_mhdioc_release(dev_t dev);
1577 static int sd_mhdioc_register_devid(dev_t dev);
1578 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1579 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1580 
1581 /*
1582  * SCSI removable prototypes
1583  */
1584 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1585 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1586 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1587 static int sr_pause_resume(dev_t dev, int mode);
1588 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1589 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1590 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1591 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1592 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1593 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1594 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1595 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1596 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1597 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1598 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1599 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1600 static int sr_eject(dev_t dev);
1601 static void sr_ejected(register struct sd_lun *un);
1602 static int sr_check_wp(dev_t dev);
1603 static opaque_t sd_watch_request_submit(struct sd_lun *un);
1604 static int sd_check_media(dev_t dev, enum dkio_state state);
1605 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1606 static void sd_delayed_cv_broadcast(void *arg);
1607 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1608 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1609 
1610 static int sd_log_page_supported(sd_ssc_t *ssc, int log_page);
1611 
1612 /*
1613  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1614  */
1615 static void sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag);
1616 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1617 static void sd_wm_cache_destructor(void *wm, void *un);
1618 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1619 	daddr_t endb, ushort_t typ);
1620 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1621 	daddr_t endb);
1622 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1623 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1624 static void sd_read_modify_write_task(void * arg);
1625 static int
1626 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1627 	struct buf **bpp);
1628 
1629 
1630 /*
1631  * Function prototypes for failfast support.
1632  */
1633 static void sd_failfast_flushq(struct sd_lun *un);
1634 static int sd_failfast_flushq_callback(struct buf *bp);
1635 
1636 /*
1637  * Function prototypes to check for lsi devices
1638  */
1639 static void sd_is_lsi(struct sd_lun *un);
1640 
1641 /*
1642  * Function prototypes for partial DMA support
1643  */
1644 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1645 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1646 
1647 
1648 /* Function prototypes for cmlb */
1649 static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1650     diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1651 
1652 static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1653 
1654 /*
1655  * For printing RMW warning message timely
1656  */
1657 static void sd_rmw_msg_print_handler(void *arg);
1658 
1659 /*
1660  * Constants for failfast support:
1661  *
1662  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1663  * failfast processing being performed.
1664  *
1665  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1666  * failfast processing on all bufs with B_FAILFAST set.
1667  */
1668 
1669 #define	SD_FAILFAST_INACTIVE		0
1670 #define	SD_FAILFAST_ACTIVE		1
1671 
1672 /*
1673  * Bitmask to control behavior of buf(9S) flushes when a transition to
1674  * the failfast state occurs. Optional bits include:
1675  *
1676  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1677  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1678  * be flushed.
1679  *
1680  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1681  * driver, in addition to the regular wait queue. This includes the xbuf
1682  * queues. When clear, only the driver's wait queue will be flushed.
1683  */
1684 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1685 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1686 
1687 /*
1688  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1689  * to flush all queues within the driver.
1690  */
1691 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1692 
1693 
1694 /*
1695  * SD Testing Fault Injection
1696  */
1697 #ifdef SD_FAULT_INJECTION
1698 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1699 static void sd_faultinjection(struct scsi_pkt *pktp);
1700 static void sd_injection_log(char *buf, struct sd_lun *un);
1701 #endif
1702 
1703 /*
1704  * Device driver ops vector
1705  */
1706 static struct cb_ops sd_cb_ops = {
1707 	sdopen,			/* open */
1708 	sdclose,		/* close */
1709 	sdstrategy,		/* strategy */
1710 	nodev,			/* print */
1711 	sddump,			/* dump */
1712 	sdread,			/* read */
1713 	sdwrite,		/* write */
1714 	sdioctl,		/* ioctl */
1715 	nodev,			/* devmap */
1716 	nodev,			/* mmap */
1717 	nodev,			/* segmap */
1718 	nochpoll,		/* poll */
1719 	sd_prop_op,		/* cb_prop_op */
1720 	0,			/* streamtab  */
1721 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1722 	CB_REV,			/* cb_rev */
1723 	sdaread, 		/* async I/O read entry point */
1724 	sdawrite		/* async I/O write entry point */
1725 };
1726 
1727 struct dev_ops sd_ops = {
1728 	DEVO_REV,		/* devo_rev, */
1729 	0,			/* refcnt  */
1730 	sdinfo,			/* info */
1731 	nulldev,		/* identify */
1732 	sdprobe,		/* probe */
1733 	sdattach,		/* attach */
1734 	sddetach,		/* detach */
1735 	nodev,			/* reset */
1736 	&sd_cb_ops,		/* driver operations */
1737 	NULL,			/* bus operations */
1738 	sdpower,		/* power */
1739 	ddi_quiesce_not_needed,		/* quiesce */
1740 };
1741 
1742 /*
1743  * This is the loadable module wrapper.
1744  */
1745 #include <sys/modctl.h>
1746 
1747 #ifndef XPV_HVM_DRIVER
1748 static struct modldrv modldrv = {
1749 	&mod_driverops,		/* Type of module. This one is a driver */
1750 	SD_MODULE_NAME,		/* Module name. */
1751 	&sd_ops			/* driver ops */
1752 };
1753 
1754 static struct modlinkage modlinkage = {
1755 	MODREV_1, &modldrv, NULL
1756 };
1757 
1758 #else /* XPV_HVM_DRIVER */
1759 static struct modlmisc modlmisc = {
1760 	&mod_miscops,		/* Type of module. This one is a misc */
1761 	"HVM " SD_MODULE_NAME,		/* Module name. */
1762 };
1763 
1764 static struct modlinkage modlinkage = {
1765 	MODREV_1, &modlmisc, NULL
1766 };
1767 
1768 #endif /* XPV_HVM_DRIVER */
1769 
1770 static cmlb_tg_ops_t sd_tgops = {
1771 	TG_DK_OPS_VERSION_1,
1772 	sd_tg_rdwr,
1773 	sd_tg_getinfo
1774 };
1775 
1776 static struct scsi_asq_key_strings sd_additional_codes[] = {
1777 	0x81, 0, "Logical Unit is Reserved",
1778 	0x85, 0, "Audio Address Not Valid",
1779 	0xb6, 0, "Media Load Mechanism Failed",
1780 	0xB9, 0, "Audio Play Operation Aborted",
1781 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1782 	0x53, 2, "Medium removal prevented",
1783 	0x6f, 0, "Authentication failed during key exchange",
1784 	0x6f, 1, "Key not present",
1785 	0x6f, 2, "Key not established",
1786 	0x6f, 3, "Read without proper authentication",
1787 	0x6f, 4, "Mismatched region to this logical unit",
1788 	0x6f, 5, "Region reset count error",
1789 	0xffff, 0x0, NULL
1790 };
1791 
1792 
1793 /*
1794  * Struct for passing printing information for sense data messages
1795  */
1796 struct sd_sense_info {
1797 	int	ssi_severity;
1798 	int	ssi_pfa_flag;
1799 };
1800 
1801 /*
1802  * Table of function pointers for iostart-side routines. Separate "chains"
1803  * of layered function calls are formed by placing the function pointers
1804  * sequentially in the desired order. Functions are called according to an
1805  * incrementing table index ordering. The last function in each chain must
1806  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1807  * in the sd_iodone_chain[] array.
1808  *
1809  * Note: It may seem more natural to organize both the iostart and iodone
1810  * functions together, into an array of structures (or some similar
1811  * organization) with a common index, rather than two separate arrays which
1812  * must be maintained in synchronization. The purpose of this division is
1813  * to achieve improved performance: individual arrays allows for more
1814  * effective cache line utilization on certain platforms.
1815  */
1816 
1817 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1818 
1819 
1820 static sd_chain_t sd_iostart_chain[] = {
1821 
1822 	/* Chain for buf IO for disk drive targets (PM enabled) */
1823 	sd_mapblockaddr_iostart,	/* Index: 0 */
1824 	sd_pm_iostart,			/* Index: 1 */
1825 	sd_core_iostart,		/* Index: 2 */
1826 
1827 	/* Chain for buf IO for disk drive targets (PM disabled) */
1828 	sd_mapblockaddr_iostart,	/* Index: 3 */
1829 	sd_core_iostart,		/* Index: 4 */
1830 
1831 	/*
1832 	 * Chain for buf IO for removable-media or large sector size
1833 	 * disk drive targets with RMW needed (PM enabled)
1834 	 */
1835 	sd_mapblockaddr_iostart,	/* Index: 5 */
1836 	sd_mapblocksize_iostart,	/* Index: 6 */
1837 	sd_pm_iostart,			/* Index: 7 */
1838 	sd_core_iostart,		/* Index: 8 */
1839 
1840 	/*
1841 	 * Chain for buf IO for removable-media or large sector size
1842 	 * disk drive targets with RMW needed (PM disabled)
1843 	 */
1844 	sd_mapblockaddr_iostart,	/* Index: 9 */
1845 	sd_mapblocksize_iostart,	/* Index: 10 */
1846 	sd_core_iostart,		/* Index: 11 */
1847 
1848 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1849 	sd_mapblockaddr_iostart,	/* Index: 12 */
1850 	sd_checksum_iostart,		/* Index: 13 */
1851 	sd_pm_iostart,			/* Index: 14 */
1852 	sd_core_iostart,		/* Index: 15 */
1853 
1854 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1855 	sd_mapblockaddr_iostart,	/* Index: 16 */
1856 	sd_checksum_iostart,		/* Index: 17 */
1857 	sd_core_iostart,		/* Index: 18 */
1858 
1859 	/* Chain for USCSI commands (all targets) */
1860 	sd_pm_iostart,			/* Index: 19 */
1861 	sd_core_iostart,		/* Index: 20 */
1862 
1863 	/* Chain for checksumming USCSI commands (all targets) */
1864 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1865 	sd_pm_iostart,			/* Index: 22 */
1866 	sd_core_iostart,		/* Index: 23 */
1867 
1868 	/* Chain for "direct" USCSI commands (all targets) */
1869 	sd_core_iostart,		/* Index: 24 */
1870 
1871 	/* Chain for "direct priority" USCSI commands (all targets) */
1872 	sd_core_iostart,		/* Index: 25 */
1873 
1874 	/*
1875 	 * Chain for buf IO for large sector size disk drive targets
1876 	 * with RMW needed with checksumming (PM enabled)
1877 	 */
1878 	sd_mapblockaddr_iostart,	/* Index: 26 */
1879 	sd_mapblocksize_iostart,	/* Index: 27 */
1880 	sd_checksum_iostart,		/* Index: 28 */
1881 	sd_pm_iostart,			/* Index: 29 */
1882 	sd_core_iostart,		/* Index: 30 */
1883 
1884 	/*
1885 	 * Chain for buf IO for large sector size disk drive targets
1886 	 * with RMW needed with checksumming (PM disabled)
1887 	 */
1888 	sd_mapblockaddr_iostart,	/* Index: 31 */
1889 	sd_mapblocksize_iostart,	/* Index: 32 */
1890 	sd_checksum_iostart,		/* Index: 33 */
1891 	sd_core_iostart,		/* Index: 34 */
1892 
1893 };
1894 
1895 /*
1896  * Macros to locate the first function of each iostart chain in the
1897  * sd_iostart_chain[] array. These are located by the index in the array.
1898  */
1899 #define	SD_CHAIN_DISK_IOSTART			0
1900 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1901 #define	SD_CHAIN_MSS_DISK_IOSTART		5
1902 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1903 #define	SD_CHAIN_MSS_DISK_IOSTART_NO_PM		9
1904 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1905 #define	SD_CHAIN_CHKSUM_IOSTART			12
1906 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1907 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1908 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1909 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1910 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1911 #define	SD_CHAIN_MSS_CHKSUM_IOSTART		26
1912 #define	SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM	31
1913 
1914 
1915 /*
1916  * Table of function pointers for the iodone-side routines for the driver-
1917  * internal layering mechanism.  The calling sequence for iodone routines
1918  * uses a decrementing table index, so the last routine called in a chain
1919  * must be at the lowest array index location for that chain.  The last
1920  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1921  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1922  * of the functions in an iodone side chain must correspond to the ordering
1923  * of the iostart routines for that chain.  Note that there is no iodone
1924  * side routine that corresponds to sd_core_iostart(), so there is no
1925  * entry in the table for this.
1926  */
1927 
1928 static sd_chain_t sd_iodone_chain[] = {
1929 
1930 	/* Chain for buf IO for disk drive targets (PM enabled) */
1931 	sd_buf_iodone,			/* Index: 0 */
1932 	sd_mapblockaddr_iodone,		/* Index: 1 */
1933 	sd_pm_iodone,			/* Index: 2 */
1934 
1935 	/* Chain for buf IO for disk drive targets (PM disabled) */
1936 	sd_buf_iodone,			/* Index: 3 */
1937 	sd_mapblockaddr_iodone,		/* Index: 4 */
1938 
1939 	/*
1940 	 * Chain for buf IO for removable-media or large sector size
1941 	 * disk drive targets with RMW needed (PM enabled)
1942 	 */
1943 	sd_buf_iodone,			/* Index: 5 */
1944 	sd_mapblockaddr_iodone,		/* Index: 6 */
1945 	sd_mapblocksize_iodone,		/* Index: 7 */
1946 	sd_pm_iodone,			/* Index: 8 */
1947 
1948 	/*
1949 	 * Chain for buf IO for removable-media or large sector size
1950 	 * disk drive targets with RMW needed (PM disabled)
1951 	 */
1952 	sd_buf_iodone,			/* Index: 9 */
1953 	sd_mapblockaddr_iodone,		/* Index: 10 */
1954 	sd_mapblocksize_iodone,		/* Index: 11 */
1955 
1956 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1957 	sd_buf_iodone,			/* Index: 12 */
1958 	sd_mapblockaddr_iodone,		/* Index: 13 */
1959 	sd_checksum_iodone,		/* Index: 14 */
1960 	sd_pm_iodone,			/* Index: 15 */
1961 
1962 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1963 	sd_buf_iodone,			/* Index: 16 */
1964 	sd_mapblockaddr_iodone,		/* Index: 17 */
1965 	sd_checksum_iodone,		/* Index: 18 */
1966 
1967 	/* Chain for USCSI commands (non-checksum targets) */
1968 	sd_uscsi_iodone,		/* Index: 19 */
1969 	sd_pm_iodone,			/* Index: 20 */
1970 
1971 	/* Chain for USCSI commands (checksum targets) */
1972 	sd_uscsi_iodone,		/* Index: 21 */
1973 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1974 	sd_pm_iodone,			/* Index: 22 */
1975 
1976 	/* Chain for "direct" USCSI commands (all targets) */
1977 	sd_uscsi_iodone,		/* Index: 24 */
1978 
1979 	/* Chain for "direct priority" USCSI commands (all targets) */
1980 	sd_uscsi_iodone,		/* Index: 25 */
1981 
1982 	/*
1983 	 * Chain for buf IO for large sector size disk drive targets
1984 	 * with checksumming (PM enabled)
1985 	 */
1986 	sd_buf_iodone,			/* Index: 26 */
1987 	sd_mapblockaddr_iodone,		/* Index: 27 */
1988 	sd_mapblocksize_iodone,		/* Index: 28 */
1989 	sd_checksum_iodone,		/* Index: 29 */
1990 	sd_pm_iodone,			/* Index: 30 */
1991 
1992 	/*
1993 	 * Chain for buf IO for large sector size disk drive targets
1994 	 * with checksumming (PM disabled)
1995 	 */
1996 	sd_buf_iodone,			/* Index: 31 */
1997 	sd_mapblockaddr_iodone,		/* Index: 32 */
1998 	sd_mapblocksize_iodone,		/* Index: 33 */
1999 	sd_checksum_iodone,		/* Index: 34 */
2000 };
2001 
2002 
2003 /*
2004  * Macros to locate the "first" function in the sd_iodone_chain[] array for
2005  * each iodone-side chain. These are located by the array index, but as the
2006  * iodone side functions are called in a decrementing-index order, the
2007  * highest index number in each chain must be specified (as these correspond
2008  * to the first function in the iodone chain that will be called by the core
2009  * at IO completion time).
2010  */
2011 
2012 #define	SD_CHAIN_DISK_IODONE			2
2013 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
2014 #define	SD_CHAIN_RMMEDIA_IODONE			8
2015 #define	SD_CHAIN_MSS_DISK_IODONE		8
2016 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
2017 #define	SD_CHAIN_MSS_DISK_IODONE_NO_PM		11
2018 #define	SD_CHAIN_CHKSUM_IODONE			15
2019 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
2020 #define	SD_CHAIN_USCSI_CMD_IODONE		20
2021 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
2022 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
2023 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
2024 #define	SD_CHAIN_MSS_CHKSUM_IODONE		30
2025 #define	SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM	34
2026 
2027 
2028 
2029 /*
2030  * Array to map a layering chain index to the appropriate initpkt routine.
2031  * The redundant entries are present so that the index used for accessing
2032  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2033  * with this table as well.
2034  */
2035 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
2036 
2037 static sd_initpkt_t	sd_initpkt_map[] = {
2038 
2039 	/* Chain for buf IO for disk drive targets (PM enabled) */
2040 	sd_initpkt_for_buf,		/* Index: 0 */
2041 	sd_initpkt_for_buf,		/* Index: 1 */
2042 	sd_initpkt_for_buf,		/* Index: 2 */
2043 
2044 	/* Chain for buf IO for disk drive targets (PM disabled) */
2045 	sd_initpkt_for_buf,		/* Index: 3 */
2046 	sd_initpkt_for_buf,		/* Index: 4 */
2047 
2048 	/*
2049 	 * Chain for buf IO for removable-media or large sector size
2050 	 * disk drive targets (PM enabled)
2051 	 */
2052 	sd_initpkt_for_buf,		/* Index: 5 */
2053 	sd_initpkt_for_buf,		/* Index: 6 */
2054 	sd_initpkt_for_buf,		/* Index: 7 */
2055 	sd_initpkt_for_buf,		/* Index: 8 */
2056 
2057 	/*
2058 	 * Chain for buf IO for removable-media or large sector size
2059 	 * disk drive targets (PM disabled)
2060 	 */
2061 	sd_initpkt_for_buf,		/* Index: 9 */
2062 	sd_initpkt_for_buf,		/* Index: 10 */
2063 	sd_initpkt_for_buf,		/* Index: 11 */
2064 
2065 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2066 	sd_initpkt_for_buf,		/* Index: 12 */
2067 	sd_initpkt_for_buf,		/* Index: 13 */
2068 	sd_initpkt_for_buf,		/* Index: 14 */
2069 	sd_initpkt_for_buf,		/* Index: 15 */
2070 
2071 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2072 	sd_initpkt_for_buf,		/* Index: 16 */
2073 	sd_initpkt_for_buf,		/* Index: 17 */
2074 	sd_initpkt_for_buf,		/* Index: 18 */
2075 
2076 	/* Chain for USCSI commands (non-checksum targets) */
2077 	sd_initpkt_for_uscsi,		/* Index: 19 */
2078 	sd_initpkt_for_uscsi,		/* Index: 20 */
2079 
2080 	/* Chain for USCSI commands (checksum targets) */
2081 	sd_initpkt_for_uscsi,		/* Index: 21 */
2082 	sd_initpkt_for_uscsi,		/* Index: 22 */
2083 	sd_initpkt_for_uscsi,		/* Index: 22 */
2084 
2085 	/* Chain for "direct" USCSI commands (all targets) */
2086 	sd_initpkt_for_uscsi,		/* Index: 24 */
2087 
2088 	/* Chain for "direct priority" USCSI commands (all targets) */
2089 	sd_initpkt_for_uscsi,		/* Index: 25 */
2090 
2091 	/*
2092 	 * Chain for buf IO for large sector size disk drive targets
2093 	 * with checksumming (PM enabled)
2094 	 */
2095 	sd_initpkt_for_buf,		/* Index: 26 */
2096 	sd_initpkt_for_buf,		/* Index: 27 */
2097 	sd_initpkt_for_buf,		/* Index: 28 */
2098 	sd_initpkt_for_buf,		/* Index: 29 */
2099 	sd_initpkt_for_buf,		/* Index: 30 */
2100 
2101 	/*
2102 	 * Chain for buf IO for large sector size disk drive targets
2103 	 * with checksumming (PM disabled)
2104 	 */
2105 	sd_initpkt_for_buf,		/* Index: 31 */
2106 	sd_initpkt_for_buf,		/* Index: 32 */
2107 	sd_initpkt_for_buf,		/* Index: 33 */
2108 	sd_initpkt_for_buf,		/* Index: 34 */
2109 };
2110 
2111 
2112 /*
2113  * Array to map a layering chain index to the appropriate destroypktpkt routine.
2114  * The redundant entries are present so that the index used for accessing
2115  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2116  * with this table as well.
2117  */
2118 typedef void (*sd_destroypkt_t)(struct buf *);
2119 
2120 static sd_destroypkt_t	sd_destroypkt_map[] = {
2121 
2122 	/* Chain for buf IO for disk drive targets (PM enabled) */
2123 	sd_destroypkt_for_buf,		/* Index: 0 */
2124 	sd_destroypkt_for_buf,		/* Index: 1 */
2125 	sd_destroypkt_for_buf,		/* Index: 2 */
2126 
2127 	/* Chain for buf IO for disk drive targets (PM disabled) */
2128 	sd_destroypkt_for_buf,		/* Index: 3 */
2129 	sd_destroypkt_for_buf,		/* Index: 4 */
2130 
2131 	/*
2132 	 * Chain for buf IO for removable-media or large sector size
2133 	 * disk drive targets (PM enabled)
2134 	 */
2135 	sd_destroypkt_for_buf,		/* Index: 5 */
2136 	sd_destroypkt_for_buf,		/* Index: 6 */
2137 	sd_destroypkt_for_buf,		/* Index: 7 */
2138 	sd_destroypkt_for_buf,		/* Index: 8 */
2139 
2140 	/*
2141 	 * Chain for buf IO for removable-media or large sector size
2142 	 * disk drive targets (PM disabled)
2143 	 */
2144 	sd_destroypkt_for_buf,		/* Index: 9 */
2145 	sd_destroypkt_for_buf,		/* Index: 10 */
2146 	sd_destroypkt_for_buf,		/* Index: 11 */
2147 
2148 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2149 	sd_destroypkt_for_buf,		/* Index: 12 */
2150 	sd_destroypkt_for_buf,		/* Index: 13 */
2151 	sd_destroypkt_for_buf,		/* Index: 14 */
2152 	sd_destroypkt_for_buf,		/* Index: 15 */
2153 
2154 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2155 	sd_destroypkt_for_buf,		/* Index: 16 */
2156 	sd_destroypkt_for_buf,		/* Index: 17 */
2157 	sd_destroypkt_for_buf,		/* Index: 18 */
2158 
2159 	/* Chain for USCSI commands (non-checksum targets) */
2160 	sd_destroypkt_for_uscsi,	/* Index: 19 */
2161 	sd_destroypkt_for_uscsi,	/* Index: 20 */
2162 
2163 	/* Chain for USCSI commands (checksum targets) */
2164 	sd_destroypkt_for_uscsi,	/* Index: 21 */
2165 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2166 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2167 
2168 	/* Chain for "direct" USCSI commands (all targets) */
2169 	sd_destroypkt_for_uscsi,	/* Index: 24 */
2170 
2171 	/* Chain for "direct priority" USCSI commands (all targets) */
2172 	sd_destroypkt_for_uscsi,	/* Index: 25 */
2173 
2174 	/*
2175 	 * Chain for buf IO for large sector size disk drive targets
2176 	 * with checksumming (PM disabled)
2177 	 */
2178 	sd_destroypkt_for_buf,		/* Index: 26 */
2179 	sd_destroypkt_for_buf,		/* Index: 27 */
2180 	sd_destroypkt_for_buf,		/* Index: 28 */
2181 	sd_destroypkt_for_buf,		/* Index: 29 */
2182 	sd_destroypkt_for_buf,		/* Index: 30 */
2183 
2184 	/*
2185 	 * Chain for buf IO for large sector size disk drive targets
2186 	 * with checksumming (PM enabled)
2187 	 */
2188 	sd_destroypkt_for_buf,		/* Index: 31 */
2189 	sd_destroypkt_for_buf,		/* Index: 32 */
2190 	sd_destroypkt_for_buf,		/* Index: 33 */
2191 	sd_destroypkt_for_buf,		/* Index: 34 */
2192 };
2193 
2194 
2195 
2196 /*
2197  * Array to map a layering chain index to the appropriate chain "type".
2198  * The chain type indicates a specific property/usage of the chain.
2199  * The redundant entries are present so that the index used for accessing
2200  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2201  * with this table as well.
2202  */
2203 
2204 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2205 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2206 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2207 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2208 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2209 						/* (for error recovery) */
2210 
2211 static int sd_chain_type_map[] = {
2212 
2213 	/* Chain for buf IO for disk drive targets (PM enabled) */
2214 	SD_CHAIN_BUFIO,			/* Index: 0 */
2215 	SD_CHAIN_BUFIO,			/* Index: 1 */
2216 	SD_CHAIN_BUFIO,			/* Index: 2 */
2217 
2218 	/* Chain for buf IO for disk drive targets (PM disabled) */
2219 	SD_CHAIN_BUFIO,			/* Index: 3 */
2220 	SD_CHAIN_BUFIO,			/* Index: 4 */
2221 
2222 	/*
2223 	 * Chain for buf IO for removable-media or large sector size
2224 	 * disk drive targets (PM enabled)
2225 	 */
2226 	SD_CHAIN_BUFIO,			/* Index: 5 */
2227 	SD_CHAIN_BUFIO,			/* Index: 6 */
2228 	SD_CHAIN_BUFIO,			/* Index: 7 */
2229 	SD_CHAIN_BUFIO,			/* Index: 8 */
2230 
2231 	/*
2232 	 * Chain for buf IO for removable-media or large sector size
2233 	 * disk drive targets (PM disabled)
2234 	 */
2235 	SD_CHAIN_BUFIO,			/* Index: 9 */
2236 	SD_CHAIN_BUFIO,			/* Index: 10 */
2237 	SD_CHAIN_BUFIO,			/* Index: 11 */
2238 
2239 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2240 	SD_CHAIN_BUFIO,			/* Index: 12 */
2241 	SD_CHAIN_BUFIO,			/* Index: 13 */
2242 	SD_CHAIN_BUFIO,			/* Index: 14 */
2243 	SD_CHAIN_BUFIO,			/* Index: 15 */
2244 
2245 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2246 	SD_CHAIN_BUFIO,			/* Index: 16 */
2247 	SD_CHAIN_BUFIO,			/* Index: 17 */
2248 	SD_CHAIN_BUFIO,			/* Index: 18 */
2249 
2250 	/* Chain for USCSI commands (non-checksum targets) */
2251 	SD_CHAIN_USCSI,			/* Index: 19 */
2252 	SD_CHAIN_USCSI,			/* Index: 20 */
2253 
2254 	/* Chain for USCSI commands (checksum targets) */
2255 	SD_CHAIN_USCSI,			/* Index: 21 */
2256 	SD_CHAIN_USCSI,			/* Index: 22 */
2257 	SD_CHAIN_USCSI,			/* Index: 23 */
2258 
2259 	/* Chain for "direct" USCSI commands (all targets) */
2260 	SD_CHAIN_DIRECT,		/* Index: 24 */
2261 
2262 	/* Chain for "direct priority" USCSI commands (all targets) */
2263 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2264 
2265 	/*
2266 	 * Chain for buf IO for large sector size disk drive targets
2267 	 * with checksumming (PM enabled)
2268 	 */
2269 	SD_CHAIN_BUFIO,			/* Index: 26 */
2270 	SD_CHAIN_BUFIO,			/* Index: 27 */
2271 	SD_CHAIN_BUFIO,			/* Index: 28 */
2272 	SD_CHAIN_BUFIO,			/* Index: 29 */
2273 	SD_CHAIN_BUFIO,			/* Index: 30 */
2274 
2275 	/*
2276 	 * Chain for buf IO for large sector size disk drive targets
2277 	 * with checksumming (PM disabled)
2278 	 */
2279 	SD_CHAIN_BUFIO,			/* Index: 31 */
2280 	SD_CHAIN_BUFIO,			/* Index: 32 */
2281 	SD_CHAIN_BUFIO,			/* Index: 33 */
2282 	SD_CHAIN_BUFIO,			/* Index: 34 */
2283 };
2284 
2285 
2286 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2287 #define	SD_IS_BUFIO(xp)			\
2288 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2289 
2290 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2291 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2292 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2293 
2294 
2295 
2296 /*
2297  * Struct, array, and macros to map a specific chain to the appropriate
2298  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2299  *
2300  * The sd_chain_index_map[] array is used at attach time to set the various
2301  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2302  * chain to be used with the instance. This allows different instances to use
2303  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2304  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2305  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2306  * dynamically & without the use of locking; and (2) a layer may update the
2307  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2308  * to allow for deferred processing of an IO within the same chain from a
2309  * different execution context.
2310  */
2311 
2312 struct sd_chain_index {
2313 	int	sci_iostart_index;
2314 	int	sci_iodone_index;
2315 };
2316 
2317 static struct sd_chain_index	sd_chain_index_map[] = {
2318 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2319 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2320 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2321 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2322 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2323 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2324 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2325 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2326 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2327 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2328 	{ SD_CHAIN_MSS_CHKSUM_IOSTART,		SD_CHAIN_MSS_CHKSUM_IODONE },
2329 	{ SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM, SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM },
2330 
2331 };
2332 
2333 
2334 /*
2335  * The following are indexes into the sd_chain_index_map[] array.
2336  */
2337 
2338 /* un->un_buf_chain_type must be set to one of these */
2339 #define	SD_CHAIN_INFO_DISK		0
2340 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2341 #define	SD_CHAIN_INFO_RMMEDIA		2
2342 #define	SD_CHAIN_INFO_MSS_DISK		2
2343 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2344 #define	SD_CHAIN_INFO_MSS_DSK_NO_PM	3
2345 #define	SD_CHAIN_INFO_CHKSUM		4
2346 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2347 #define	SD_CHAIN_INFO_MSS_DISK_CHKSUM	10
2348 #define	SD_CHAIN_INFO_MSS_DISK_CHKSUM_NO_PM	11
2349 
2350 /* un->un_uscsi_chain_type must be set to one of these */
2351 #define	SD_CHAIN_INFO_USCSI_CMD		6
2352 /* USCSI with PM disabled is the same as DIRECT */
2353 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2354 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2355 
2356 /* un->un_direct_chain_type must be set to one of these */
2357 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2358 
2359 /* un->un_priority_chain_type must be set to one of these */
2360 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2361 
2362 /* size for devid inquiries */
2363 #define	MAX_INQUIRY_SIZE		0xF0
2364 
2365 /*
2366  * Macros used by functions to pass a given buf(9S) struct along to the
2367  * next function in the layering chain for further processing.
2368  *
2369  * In the following macros, passing more than three arguments to the called
2370  * routines causes the optimizer for the SPARC compiler to stop doing tail
2371  * call elimination which results in significant performance degradation.
2372  */
2373 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2374 	((*(sd_iostart_chain[index]))(index, un, bp))
2375 
2376 #define	SD_BEGIN_IODONE(index, un, bp)	\
2377 	((*(sd_iodone_chain[index]))(index, un, bp))
2378 
2379 #define	SD_NEXT_IOSTART(index, un, bp)				\
2380 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2381 
2382 #define	SD_NEXT_IODONE(index, un, bp)				\
2383 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2384 
2385 /*
2386  *    Function: _init
2387  *
2388  * Description: This is the driver _init(9E) entry point.
2389  *
2390  * Return Code: Returns the value from mod_install(9F) or
2391  *		ddi_soft_state_init(9F) as appropriate.
2392  *
2393  *     Context: Called when driver module loaded.
2394  */
2395 
2396 int
2397 _init(void)
2398 {
2399 	int	err;
2400 
2401 	/* establish driver name from module name */
2402 	sd_label = (char *)mod_modname(&modlinkage);
2403 
2404 #ifndef XPV_HVM_DRIVER
2405 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2406 	    SD_MAXUNIT);
2407 	if (err != 0) {
2408 		return (err);
2409 	}
2410 
2411 #else /* XPV_HVM_DRIVER */
2412 	/* Remove the leading "hvm_" from the module name */
2413 	ASSERT(strncmp(sd_label, "hvm_", strlen("hvm_")) == 0);
2414 	sd_label += strlen("hvm_");
2415 
2416 #endif /* XPV_HVM_DRIVER */
2417 
2418 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2419 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2420 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2421 
2422 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2423 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2424 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2425 
2426 	/*
2427 	 * it's ok to init here even for fibre device
2428 	 */
2429 	sd_scsi_probe_cache_init();
2430 
2431 	sd_scsi_target_lun_init();
2432 
2433 	/*
2434 	 * Creating taskq before mod_install ensures that all callers (threads)
2435 	 * that enter the module after a successful mod_install encounter
2436 	 * a valid taskq.
2437 	 */
2438 	sd_taskq_create();
2439 
2440 	err = mod_install(&modlinkage);
2441 	if (err != 0) {
2442 		/* delete taskq if install fails */
2443 		sd_taskq_delete();
2444 
2445 		mutex_destroy(&sd_detach_mutex);
2446 		mutex_destroy(&sd_log_mutex);
2447 		mutex_destroy(&sd_label_mutex);
2448 
2449 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2450 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2451 		cv_destroy(&sd_tr.srq_inprocess_cv);
2452 
2453 		sd_scsi_probe_cache_fini();
2454 
2455 		sd_scsi_target_lun_fini();
2456 
2457 #ifndef XPV_HVM_DRIVER
2458 		ddi_soft_state_fini(&sd_state);
2459 #endif /* !XPV_HVM_DRIVER */
2460 		return (err);
2461 	}
2462 
2463 	return (err);
2464 }
2465 
2466 
2467 /*
2468  *    Function: _fini
2469  *
2470  * Description: This is the driver _fini(9E) entry point.
2471  *
2472  * Return Code: Returns the value from mod_remove(9F)
2473  *
2474  *     Context: Called when driver module is unloaded.
2475  */
2476 
2477 int
2478 _fini(void)
2479 {
2480 	int err;
2481 
2482 	if ((err = mod_remove(&modlinkage)) != 0) {
2483 		return (err);
2484 	}
2485 
2486 	sd_taskq_delete();
2487 
2488 	mutex_destroy(&sd_detach_mutex);
2489 	mutex_destroy(&sd_log_mutex);
2490 	mutex_destroy(&sd_label_mutex);
2491 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2492 
2493 	sd_scsi_probe_cache_fini();
2494 
2495 	sd_scsi_target_lun_fini();
2496 
2497 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2498 	cv_destroy(&sd_tr.srq_inprocess_cv);
2499 
2500 #ifndef XPV_HVM_DRIVER
2501 	ddi_soft_state_fini(&sd_state);
2502 #endif /* !XPV_HVM_DRIVER */
2503 
2504 	return (err);
2505 }
2506 
2507 
2508 /*
2509  *    Function: _info
2510  *
2511  * Description: This is the driver _info(9E) entry point.
2512  *
2513  *   Arguments: modinfop - pointer to the driver modinfo structure
2514  *
2515  * Return Code: Returns the value from mod_info(9F).
2516  *
2517  *     Context: Kernel thread context
2518  */
2519 
2520 int
2521 _info(struct modinfo *modinfop)
2522 {
2523 	return (mod_info(&modlinkage, modinfop));
2524 }
2525 
2526 
2527 /*
2528  * The following routines implement the driver message logging facility.
2529  * They provide component- and level- based debug output filtering.
2530  * Output may also be restricted to messages for a single instance by
2531  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2532  * to NULL, then messages for all instances are printed.
2533  *
2534  * These routines have been cloned from each other due to the language
2535  * constraints of macros and variable argument list processing.
2536  */
2537 
2538 
2539 /*
2540  *    Function: sd_log_err
2541  *
2542  * Description: This routine is called by the SD_ERROR macro for debug
2543  *		logging of error conditions.
2544  *
2545  *   Arguments: comp - driver component being logged
2546  *		dev  - pointer to driver info structure
2547  *		fmt  - error string and format to be logged
2548  */
2549 
2550 static void
2551 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2552 {
2553 	va_list		ap;
2554 	dev_info_t	*dev;
2555 
2556 	ASSERT(un != NULL);
2557 	dev = SD_DEVINFO(un);
2558 	ASSERT(dev != NULL);
2559 
2560 	/*
2561 	 * Filter messages based on the global component and level masks.
2562 	 * Also print if un matches the value of sd_debug_un, or if
2563 	 * sd_debug_un is set to NULL.
2564 	 */
2565 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2566 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2567 		mutex_enter(&sd_log_mutex);
2568 		va_start(ap, fmt);
2569 		(void) vsprintf(sd_log_buf, fmt, ap);
2570 		va_end(ap);
2571 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2572 		mutex_exit(&sd_log_mutex);
2573 	}
2574 #ifdef SD_FAULT_INJECTION
2575 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2576 	if (un->sd_injection_mask & comp) {
2577 		mutex_enter(&sd_log_mutex);
2578 		va_start(ap, fmt);
2579 		(void) vsprintf(sd_log_buf, fmt, ap);
2580 		va_end(ap);
2581 		sd_injection_log(sd_log_buf, un);
2582 		mutex_exit(&sd_log_mutex);
2583 	}
2584 #endif
2585 }
2586 
2587 
2588 /*
2589  *    Function: sd_log_info
2590  *
2591  * Description: This routine is called by the SD_INFO macro for debug
2592  *		logging of general purpose informational conditions.
2593  *
2594  *   Arguments: comp - driver component being logged
2595  *		dev  - pointer to driver info structure
2596  *		fmt  - info string and format to be logged
2597  */
2598 
2599 static void
2600 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2601 {
2602 	va_list		ap;
2603 	dev_info_t	*dev;
2604 
2605 	ASSERT(un != NULL);
2606 	dev = SD_DEVINFO(un);
2607 	ASSERT(dev != NULL);
2608 
2609 	/*
2610 	 * Filter messages based on the global component and level masks.
2611 	 * Also print if un matches the value of sd_debug_un, or if
2612 	 * sd_debug_un is set to NULL.
2613 	 */
2614 	if ((sd_component_mask & component) &&
2615 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2616 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2617 		mutex_enter(&sd_log_mutex);
2618 		va_start(ap, fmt);
2619 		(void) vsprintf(sd_log_buf, fmt, ap);
2620 		va_end(ap);
2621 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2622 		mutex_exit(&sd_log_mutex);
2623 	}
2624 #ifdef SD_FAULT_INJECTION
2625 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2626 	if (un->sd_injection_mask & component) {
2627 		mutex_enter(&sd_log_mutex);
2628 		va_start(ap, fmt);
2629 		(void) vsprintf(sd_log_buf, fmt, ap);
2630 		va_end(ap);
2631 		sd_injection_log(sd_log_buf, un);
2632 		mutex_exit(&sd_log_mutex);
2633 	}
2634 #endif
2635 }
2636 
2637 
2638 /*
2639  *    Function: sd_log_trace
2640  *
2641  * Description: This routine is called by the SD_TRACE macro for debug
2642  *		logging of trace conditions (i.e. function entry/exit).
2643  *
2644  *   Arguments: comp - driver component being logged
2645  *		dev  - pointer to driver info structure
2646  *		fmt  - trace string and format to be logged
2647  */
2648 
2649 static void
2650 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2651 {
2652 	va_list		ap;
2653 	dev_info_t	*dev;
2654 
2655 	ASSERT(un != NULL);
2656 	dev = SD_DEVINFO(un);
2657 	ASSERT(dev != NULL);
2658 
2659 	/*
2660 	 * Filter messages based on the global component and level masks.
2661 	 * Also print if un matches the value of sd_debug_un, or if
2662 	 * sd_debug_un is set to NULL.
2663 	 */
2664 	if ((sd_component_mask & component) &&
2665 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2666 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2667 		mutex_enter(&sd_log_mutex);
2668 		va_start(ap, fmt);
2669 		(void) vsprintf(sd_log_buf, fmt, ap);
2670 		va_end(ap);
2671 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2672 		mutex_exit(&sd_log_mutex);
2673 	}
2674 #ifdef SD_FAULT_INJECTION
2675 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2676 	if (un->sd_injection_mask & component) {
2677 		mutex_enter(&sd_log_mutex);
2678 		va_start(ap, fmt);
2679 		(void) vsprintf(sd_log_buf, fmt, ap);
2680 		va_end(ap);
2681 		sd_injection_log(sd_log_buf, un);
2682 		mutex_exit(&sd_log_mutex);
2683 	}
2684 #endif
2685 }
2686 
2687 
2688 /*
2689  *    Function: sdprobe
2690  *
2691  * Description: This is the driver probe(9e) entry point function.
2692  *
2693  *   Arguments: devi - opaque device info handle
2694  *
2695  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2696  *              DDI_PROBE_FAILURE: If the probe failed.
2697  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2698  *				   but may be present in the future.
2699  */
2700 
2701 static int
2702 sdprobe(dev_info_t *devi)
2703 {
2704 	struct scsi_device	*devp;
2705 	int			rval;
2706 #ifndef XPV_HVM_DRIVER
2707 	int			instance = ddi_get_instance(devi);
2708 #endif /* !XPV_HVM_DRIVER */
2709 
2710 	/*
2711 	 * if it wasn't for pln, sdprobe could actually be nulldev
2712 	 * in the "__fibre" case.
2713 	 */
2714 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2715 		return (DDI_PROBE_DONTCARE);
2716 	}
2717 
2718 	devp = ddi_get_driver_private(devi);
2719 
2720 	if (devp == NULL) {
2721 		/* Ooops... nexus driver is mis-configured... */
2722 		return (DDI_PROBE_FAILURE);
2723 	}
2724 
2725 #ifndef XPV_HVM_DRIVER
2726 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2727 		return (DDI_PROBE_PARTIAL);
2728 	}
2729 #endif /* !XPV_HVM_DRIVER */
2730 
2731 	/*
2732 	 * Call the SCSA utility probe routine to see if we actually
2733 	 * have a target at this SCSI nexus.
2734 	 */
2735 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2736 	case SCSIPROBE_EXISTS:
2737 		switch (devp->sd_inq->inq_dtype) {
2738 		case DTYPE_DIRECT:
2739 			rval = DDI_PROBE_SUCCESS;
2740 			break;
2741 		case DTYPE_RODIRECT:
2742 			/* CDs etc. Can be removable media */
2743 			rval = DDI_PROBE_SUCCESS;
2744 			break;
2745 		case DTYPE_OPTICAL:
2746 			/*
2747 			 * Rewritable optical driver HP115AA
2748 			 * Can also be removable media
2749 			 */
2750 
2751 			/*
2752 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2753 			 * pre solaris 9 sparc sd behavior is required
2754 			 *
2755 			 * If first time through and sd_dtype_optical_bind
2756 			 * has not been set in /etc/system check properties
2757 			 */
2758 
2759 			if (sd_dtype_optical_bind  < 0) {
2760 				sd_dtype_optical_bind = ddi_prop_get_int
2761 				    (DDI_DEV_T_ANY, devi, 0,
2762 				    "optical-device-bind", 1);
2763 			}
2764 
2765 			if (sd_dtype_optical_bind == 0) {
2766 				rval = DDI_PROBE_FAILURE;
2767 			} else {
2768 				rval = DDI_PROBE_SUCCESS;
2769 			}
2770 			break;
2771 
2772 		case DTYPE_NOTPRESENT:
2773 		default:
2774 			rval = DDI_PROBE_FAILURE;
2775 			break;
2776 		}
2777 		break;
2778 	default:
2779 		rval = DDI_PROBE_PARTIAL;
2780 		break;
2781 	}
2782 
2783 	/*
2784 	 * This routine checks for resource allocation prior to freeing,
2785 	 * so it will take care of the "smart probing" case where a
2786 	 * scsi_probe() may or may not have been issued and will *not*
2787 	 * free previously-freed resources.
2788 	 */
2789 	scsi_unprobe(devp);
2790 	return (rval);
2791 }
2792 
2793 
2794 /*
2795  *    Function: sdinfo
2796  *
2797  * Description: This is the driver getinfo(9e) entry point function.
2798  * 		Given the device number, return the devinfo pointer from
2799  *		the scsi_device structure or the instance number
2800  *		associated with the dev_t.
2801  *
2802  *   Arguments: dip     - pointer to device info structure
2803  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2804  *			  DDI_INFO_DEVT2INSTANCE)
2805  *		arg     - driver dev_t
2806  *		resultp - user buffer for request response
2807  *
2808  * Return Code: DDI_SUCCESS
2809  *              DDI_FAILURE
2810  */
2811 /* ARGSUSED */
2812 static int
2813 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2814 {
2815 	struct sd_lun	*un;
2816 	dev_t		dev;
2817 	int		instance;
2818 	int		error;
2819 
2820 	switch (infocmd) {
2821 	case DDI_INFO_DEVT2DEVINFO:
2822 		dev = (dev_t)arg;
2823 		instance = SDUNIT(dev);
2824 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2825 			return (DDI_FAILURE);
2826 		}
2827 		*result = (void *) SD_DEVINFO(un);
2828 		error = DDI_SUCCESS;
2829 		break;
2830 	case DDI_INFO_DEVT2INSTANCE:
2831 		dev = (dev_t)arg;
2832 		instance = SDUNIT(dev);
2833 		*result = (void *)(uintptr_t)instance;
2834 		error = DDI_SUCCESS;
2835 		break;
2836 	default:
2837 		error = DDI_FAILURE;
2838 	}
2839 	return (error);
2840 }
2841 
2842 /*
2843  *    Function: sd_prop_op
2844  *
2845  * Description: This is the driver prop_op(9e) entry point function.
2846  *		Return the number of blocks for the partition in question
2847  *		or forward the request to the property facilities.
2848  *
2849  *   Arguments: dev       - device number
2850  *		dip       - pointer to device info structure
2851  *		prop_op   - property operator
2852  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2853  *		name      - pointer to property name
2854  *		valuep    - pointer or address of the user buffer
2855  *		lengthp   - property length
2856  *
2857  * Return Code: DDI_PROP_SUCCESS
2858  *              DDI_PROP_NOT_FOUND
2859  *              DDI_PROP_UNDEFINED
2860  *              DDI_PROP_NO_MEMORY
2861  *              DDI_PROP_BUF_TOO_SMALL
2862  */
2863 
2864 static int
2865 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2866 	char *name, caddr_t valuep, int *lengthp)
2867 {
2868 	struct sd_lun	*un;
2869 
2870 	if ((un = ddi_get_soft_state(sd_state, ddi_get_instance(dip))) == NULL)
2871 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2872 		    name, valuep, lengthp));
2873 
2874 	return (cmlb_prop_op(un->un_cmlbhandle,
2875 	    dev, dip, prop_op, mod_flags, name, valuep, lengthp,
2876 	    SDPART(dev), (void *)SD_PATH_DIRECT));
2877 }
2878 
2879 /*
2880  * The following functions are for smart probing:
2881  * sd_scsi_probe_cache_init()
2882  * sd_scsi_probe_cache_fini()
2883  * sd_scsi_clear_probe_cache()
2884  * sd_scsi_probe_with_cache()
2885  */
2886 
2887 /*
2888  *    Function: sd_scsi_probe_cache_init
2889  *
2890  * Description: Initializes the probe response cache mutex and head pointer.
2891  *
2892  *     Context: Kernel thread context
2893  */
2894 
2895 static void
2896 sd_scsi_probe_cache_init(void)
2897 {
2898 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2899 	sd_scsi_probe_cache_head = NULL;
2900 }
2901 
2902 
2903 /*
2904  *    Function: sd_scsi_probe_cache_fini
2905  *
2906  * Description: Frees all resources associated with the probe response cache.
2907  *
2908  *     Context: Kernel thread context
2909  */
2910 
2911 static void
2912 sd_scsi_probe_cache_fini(void)
2913 {
2914 	struct sd_scsi_probe_cache *cp;
2915 	struct sd_scsi_probe_cache *ncp;
2916 
2917 	/* Clean up our smart probing linked list */
2918 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2919 		ncp = cp->next;
2920 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2921 	}
2922 	sd_scsi_probe_cache_head = NULL;
2923 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2924 }
2925 
2926 
2927 /*
2928  *    Function: sd_scsi_clear_probe_cache
2929  *
2930  * Description: This routine clears the probe response cache. This is
2931  *		done when open() returns ENXIO so that when deferred
2932  *		attach is attempted (possibly after a device has been
2933  *		turned on) we will retry the probe. Since we don't know
2934  *		which target we failed to open, we just clear the
2935  *		entire cache.
2936  *
2937  *     Context: Kernel thread context
2938  */
2939 
2940 static void
2941 sd_scsi_clear_probe_cache(void)
2942 {
2943 	struct sd_scsi_probe_cache	*cp;
2944 	int				i;
2945 
2946 	mutex_enter(&sd_scsi_probe_cache_mutex);
2947 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2948 		/*
2949 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2950 		 * force probing to be performed the next time
2951 		 * sd_scsi_probe_with_cache is called.
2952 		 */
2953 		for (i = 0; i < NTARGETS_WIDE; i++) {
2954 			cp->cache[i] = SCSIPROBE_EXISTS;
2955 		}
2956 	}
2957 	mutex_exit(&sd_scsi_probe_cache_mutex);
2958 }
2959 
2960 
2961 /*
2962  *    Function: sd_scsi_probe_with_cache
2963  *
2964  * Description: This routine implements support for a scsi device probe
2965  *		with cache. The driver maintains a cache of the target
2966  *		responses to scsi probes. If we get no response from a
2967  *		target during a probe inquiry, we remember that, and we
2968  *		avoid additional calls to scsi_probe on non-zero LUNs
2969  *		on the same target until the cache is cleared. By doing
2970  *		so we avoid the 1/4 sec selection timeout for nonzero
2971  *		LUNs. lun0 of a target is always probed.
2972  *
2973  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2974  *              waitfunc - indicates what the allocator routines should
2975  *			   do when resources are not available. This value
2976  *			   is passed on to scsi_probe() when that routine
2977  *			   is called.
2978  *
2979  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2980  *		otherwise the value returned by scsi_probe(9F).
2981  *
2982  *     Context: Kernel thread context
2983  */
2984 
2985 static int
2986 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2987 {
2988 	struct sd_scsi_probe_cache	*cp;
2989 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2990 	int		lun, tgt;
2991 
2992 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2993 	    SCSI_ADDR_PROP_LUN, 0);
2994 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2995 	    SCSI_ADDR_PROP_TARGET, -1);
2996 
2997 	/* Make sure caching enabled and target in range */
2998 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2999 		/* do it the old way (no cache) */
3000 		return (scsi_probe(devp, waitfn));
3001 	}
3002 
3003 	mutex_enter(&sd_scsi_probe_cache_mutex);
3004 
3005 	/* Find the cache for this scsi bus instance */
3006 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
3007 		if (cp->pdip == pdip) {
3008 			break;
3009 		}
3010 	}
3011 
3012 	/* If we can't find a cache for this pdip, create one */
3013 	if (cp == NULL) {
3014 		int i;
3015 
3016 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
3017 		    KM_SLEEP);
3018 		cp->pdip = pdip;
3019 		cp->next = sd_scsi_probe_cache_head;
3020 		sd_scsi_probe_cache_head = cp;
3021 		for (i = 0; i < NTARGETS_WIDE; i++) {
3022 			cp->cache[i] = SCSIPROBE_EXISTS;
3023 		}
3024 	}
3025 
3026 	mutex_exit(&sd_scsi_probe_cache_mutex);
3027 
3028 	/* Recompute the cache for this target if LUN zero */
3029 	if (lun == 0) {
3030 		cp->cache[tgt] = SCSIPROBE_EXISTS;
3031 	}
3032 
3033 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
3034 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
3035 		return (SCSIPROBE_NORESP);
3036 	}
3037 
3038 	/* Do the actual probe; save & return the result */
3039 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
3040 }
3041 
3042 
3043 /*
3044  *    Function: sd_scsi_target_lun_init
3045  *
3046  * Description: Initializes the attached lun chain mutex and head pointer.
3047  *
3048  *     Context: Kernel thread context
3049  */
3050 
3051 static void
3052 sd_scsi_target_lun_init(void)
3053 {
3054 	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
3055 	sd_scsi_target_lun_head = NULL;
3056 }
3057 
3058 
3059 /*
3060  *    Function: sd_scsi_target_lun_fini
3061  *
3062  * Description: Frees all resources associated with the attached lun
3063  *              chain
3064  *
3065  *     Context: Kernel thread context
3066  */
3067 
3068 static void
3069 sd_scsi_target_lun_fini(void)
3070 {
3071 	struct sd_scsi_hba_tgt_lun	*cp;
3072 	struct sd_scsi_hba_tgt_lun	*ncp;
3073 
3074 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
3075 		ncp = cp->next;
3076 		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
3077 	}
3078 	sd_scsi_target_lun_head = NULL;
3079 	mutex_destroy(&sd_scsi_target_lun_mutex);
3080 }
3081 
3082 
3083 /*
3084  *    Function: sd_scsi_get_target_lun_count
3085  *
3086  * Description: This routine will check in the attached lun chain to see
3087  * 		how many luns are attached on the required SCSI controller
3088  * 		and target. Currently, some capabilities like tagged queue
3089  *		are supported per target based by HBA. So all luns in a
3090  *		target have the same capabilities. Based on this assumption,
3091  * 		sd should only set these capabilities once per target. This
3092  *		function is called when sd needs to decide how many luns
3093  *		already attached on a target.
3094  *
3095  *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
3096  *			  controller device.
3097  *              target	- The target ID on the controller's SCSI bus.
3098  *
3099  * Return Code: The number of luns attached on the required target and
3100  *		controller.
3101  *		-1 if target ID is not in parallel SCSI scope or the given
3102  * 		dip is not in the chain.
3103  *
3104  *     Context: Kernel thread context
3105  */
3106 
3107 static int
3108 sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
3109 {
3110 	struct sd_scsi_hba_tgt_lun	*cp;
3111 
3112 	if ((target < 0) || (target >= NTARGETS_WIDE)) {
3113 		return (-1);
3114 	}
3115 
3116 	mutex_enter(&sd_scsi_target_lun_mutex);
3117 
3118 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
3119 		if (cp->pdip == dip) {
3120 			break;
3121 		}
3122 	}
3123 
3124 	mutex_exit(&sd_scsi_target_lun_mutex);
3125 
3126 	if (cp == NULL) {
3127 		return (-1);
3128 	}
3129 
3130 	return (cp->nlun[target]);
3131 }
3132 
3133 
3134 /*
3135  *    Function: sd_scsi_update_lun_on_target
3136  *
3137  * Description: This routine is used to update the attached lun chain when a
3138  *		lun is attached or detached on a target.
3139  *
3140  *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
3141  *                        controller device.
3142  *              target  - The target ID on the controller's SCSI bus.
3143  *		flag	- Indicate the lun is attached or detached.
3144  *
3145  *     Context: Kernel thread context
3146  */
3147 
3148 static void
3149 sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
3150 {
3151 	struct sd_scsi_hba_tgt_lun	*cp;
3152 
3153 	mutex_enter(&sd_scsi_target_lun_mutex);
3154 
3155 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
3156 		if (cp->pdip == dip) {
3157 			break;
3158 		}
3159 	}
3160 
3161 	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
3162 		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
3163 		    KM_SLEEP);
3164 		cp->pdip = dip;
3165 		cp->next = sd_scsi_target_lun_head;
3166 		sd_scsi_target_lun_head = cp;
3167 	}
3168 
3169 	mutex_exit(&sd_scsi_target_lun_mutex);
3170 
3171 	if (cp != NULL) {
3172 		if (flag == SD_SCSI_LUN_ATTACH) {
3173 			cp->nlun[target] ++;
3174 		} else {
3175 			cp->nlun[target] --;
3176 		}
3177 	}
3178 }
3179 
3180 
3181 /*
3182  *    Function: sd_spin_up_unit
3183  *
3184  * Description: Issues the following commands to spin-up the device:
3185  *		START STOP UNIT, and INQUIRY.
3186  *
3187  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3188  *                      structure for this target.
3189  *
3190  * Return Code: 0 - success
3191  *		EIO - failure
3192  *		EACCES - reservation conflict
3193  *
3194  *     Context: Kernel thread context
3195  */
3196 
3197 static int
3198 sd_spin_up_unit(sd_ssc_t *ssc)
3199 {
3200 	size_t	resid		= 0;
3201 	int	has_conflict	= FALSE;
3202 	uchar_t *bufaddr;
3203 	int 	status;
3204 	struct sd_lun	*un;
3205 
3206 	ASSERT(ssc != NULL);
3207 	un = ssc->ssc_un;
3208 	ASSERT(un != NULL);
3209 
3210 	/*
3211 	 * Send a throwaway START UNIT command.
3212 	 *
3213 	 * If we fail on this, we don't care presently what precisely
3214 	 * is wrong.  EMC's arrays will also fail this with a check
3215 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
3216 	 * we don't want to fail the attach because it may become
3217 	 * "active" later.
3218 	 * We don't know if power condition is supported or not at
3219 	 * this stage, use START STOP bit.
3220 	 */
3221 	status = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
3222 	    SD_TARGET_START, SD_PATH_DIRECT);
3223 
3224 	if (status != 0) {
3225 		if (status == EACCES)
3226 			has_conflict = TRUE;
3227 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3228 	}
3229 
3230 	/*
3231 	 * Send another INQUIRY command to the target. This is necessary for
3232 	 * non-removable media direct access devices because their INQUIRY data
3233 	 * may not be fully qualified until they are spun up (perhaps via the
3234 	 * START command above).  Note: This seems to be needed for some
3235 	 * legacy devices only.) The INQUIRY command should succeed even if a
3236 	 * Reservation Conflict is present.
3237 	 */
3238 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
3239 
3240 	if (sd_send_scsi_INQUIRY(ssc, bufaddr, SUN_INQSIZE, 0, 0, &resid)
3241 	    != 0) {
3242 		kmem_free(bufaddr, SUN_INQSIZE);
3243 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
3244 		return (EIO);
3245 	}
3246 
3247 	/*
3248 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
3249 	 * Note that this routine does not return a failure here even if the
3250 	 * INQUIRY command did not return any data.  This is a legacy behavior.
3251 	 */
3252 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
3253 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
3254 	}
3255 
3256 	kmem_free(bufaddr, SUN_INQSIZE);
3257 
3258 	/* If we hit a reservation conflict above, tell the caller. */
3259 	if (has_conflict == TRUE) {
3260 		return (EACCES);
3261 	}
3262 
3263 	return (0);
3264 }
3265 
3266 #ifdef _LP64
3267 /*
3268  *    Function: sd_enable_descr_sense
3269  *
3270  * Description: This routine attempts to select descriptor sense format
3271  *		using the Control mode page.  Devices that support 64 bit
3272  *		LBAs (for >2TB luns) should also implement descriptor
3273  *		sense data so we will call this function whenever we see
3274  *		a lun larger than 2TB.  If for some reason the device
3275  *		supports 64 bit LBAs but doesn't support descriptor sense
3276  *		presumably the mode select will fail.  Everything will
3277  *		continue to work normally except that we will not get
3278  *		complete sense data for commands that fail with an LBA
3279  *		larger than 32 bits.
3280  *
3281  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3282  *                      structure for this target.
3283  *
3284  *     Context: Kernel thread context only
3285  */
3286 
3287 static void
3288 sd_enable_descr_sense(sd_ssc_t *ssc)
3289 {
3290 	uchar_t			*header;
3291 	struct mode_control_scsi3 *ctrl_bufp;
3292 	size_t			buflen;
3293 	size_t			bd_len;
3294 	int			status;
3295 	struct sd_lun		*un;
3296 
3297 	ASSERT(ssc != NULL);
3298 	un = ssc->ssc_un;
3299 	ASSERT(un != NULL);
3300 
3301 	/*
3302 	 * Read MODE SENSE page 0xA, Control Mode Page
3303 	 */
3304 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3305 	    sizeof (struct mode_control_scsi3);
3306 	header = kmem_zalloc(buflen, KM_SLEEP);
3307 
3308 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
3309 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT);
3310 
3311 	if (status != 0) {
3312 		SD_ERROR(SD_LOG_COMMON, un,
3313 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3314 		goto eds_exit;
3315 	}
3316 
3317 	/*
3318 	 * Determine size of Block Descriptors in order to locate
3319 	 * the mode page data. ATAPI devices return 0, SCSI devices
3320 	 * should return MODE_BLK_DESC_LENGTH.
3321 	 */
3322 	bd_len  = ((struct mode_header *)header)->bdesc_length;
3323 
3324 	/* Clear the mode data length field for MODE SELECT */
3325 	((struct mode_header *)header)->length = 0;
3326 
3327 	ctrl_bufp = (struct mode_control_scsi3 *)
3328 	    (header + MODE_HEADER_LENGTH + bd_len);
3329 
3330 	/*
3331 	 * If the page length is smaller than the expected value,
3332 	 * the target device doesn't support D_SENSE. Bail out here.
3333 	 */
3334 	if (ctrl_bufp->mode_page.length <
3335 	    sizeof (struct mode_control_scsi3) - 2) {
3336 		SD_ERROR(SD_LOG_COMMON, un,
3337 		    "sd_enable_descr_sense: enable D_SENSE failed\n");
3338 		goto eds_exit;
3339 	}
3340 
3341 	/*
3342 	 * Clear PS bit for MODE SELECT
3343 	 */
3344 	ctrl_bufp->mode_page.ps = 0;
3345 
3346 	/*
3347 	 * Set D_SENSE to enable descriptor sense format.
3348 	 */
3349 	ctrl_bufp->d_sense = 1;
3350 
3351 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3352 
3353 	/*
3354 	 * Use MODE SELECT to commit the change to the D_SENSE bit
3355 	 */
3356 	status = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, header,
3357 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT);
3358 
3359 	if (status != 0) {
3360 		SD_INFO(SD_LOG_COMMON, un,
3361 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3362 	} else {
3363 		kmem_free(header, buflen);
3364 		return;
3365 	}
3366 
3367 eds_exit:
3368 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3369 	kmem_free(header, buflen);
3370 }
3371 
3372 /*
3373  *    Function: sd_reenable_dsense_task
3374  *
3375  * Description: Re-enable descriptor sense after device or bus reset
3376  *
3377  *     Context: Executes in a taskq() thread context
3378  */
3379 static void
3380 sd_reenable_dsense_task(void *arg)
3381 {
3382 	struct	sd_lun	*un = arg;
3383 	sd_ssc_t	*ssc;
3384 
3385 	ASSERT(un != NULL);
3386 
3387 	ssc = sd_ssc_init(un);
3388 	sd_enable_descr_sense(ssc);
3389 	sd_ssc_fini(ssc);
3390 }
3391 #endif /* _LP64 */
3392 
3393 /*
3394  *    Function: sd_set_mmc_caps
3395  *
3396  * Description: This routine determines if the device is MMC compliant and if
3397  *		the device supports CDDA via a mode sense of the CDVD
3398  *		capabilities mode page. Also checks if the device is a
3399  *		dvdram writable device.
3400  *
3401  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3402  *                      structure for this target.
3403  *
3404  *     Context: Kernel thread context only
3405  */
3406 
3407 static void
3408 sd_set_mmc_caps(sd_ssc_t *ssc)
3409 {
3410 	struct mode_header_grp2		*sense_mhp;
3411 	uchar_t				*sense_page;
3412 	caddr_t				buf;
3413 	int				bd_len;
3414 	int				status;
3415 	struct uscsi_cmd		com;
3416 	int				rtn;
3417 	uchar_t				*out_data_rw, *out_data_hd;
3418 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3419 	uchar_t				*out_data_gesn;
3420 	int				gesn_len;
3421 	struct sd_lun			*un;
3422 
3423 	ASSERT(ssc != NULL);
3424 	un = ssc->ssc_un;
3425 	ASSERT(un != NULL);
3426 
3427 	/*
3428 	 * The flags which will be set in this function are - mmc compliant,
3429 	 * dvdram writable device, cdda support. Initialize them to FALSE
3430 	 * and if a capability is detected - it will be set to TRUE.
3431 	 */
3432 	un->un_f_mmc_cap = FALSE;
3433 	un->un_f_dvdram_writable_device = FALSE;
3434 	un->un_f_cfg_cdda = FALSE;
3435 
3436 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3437 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3438 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3439 
3440 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3441 
3442 	if (status != 0) {
3443 		/* command failed; just return */
3444 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3445 		return;
3446 	}
3447 	/*
3448 	 * If the mode sense request for the CDROM CAPABILITIES
3449 	 * page (0x2A) succeeds the device is assumed to be MMC.
3450 	 */
3451 	un->un_f_mmc_cap = TRUE;
3452 
3453 	/* See if GET STATUS EVENT NOTIFICATION is supported */
3454 	if (un->un_f_mmc_gesn_polling) {
3455 		gesn_len = SD_GESN_HEADER_LEN + SD_GESN_MEDIA_DATA_LEN;
3456 		out_data_gesn = kmem_zalloc(gesn_len, KM_SLEEP);
3457 
3458 		rtn = sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(ssc,
3459 		    out_data_gesn, gesn_len, 1 << SD_GESN_MEDIA_CLASS);
3460 
3461 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3462 
3463 		if ((rtn != 0) || !sd_gesn_media_data_valid(out_data_gesn)) {
3464 			un->un_f_mmc_gesn_polling = FALSE;
3465 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3466 			    "sd_set_mmc_caps: gesn not supported "
3467 			    "%d %x %x %x %x\n", rtn,
3468 			    out_data_gesn[0], out_data_gesn[1],
3469 			    out_data_gesn[2], out_data_gesn[3]);
3470 		}
3471 
3472 		kmem_free(out_data_gesn, gesn_len);
3473 	}
3474 
3475 	/* Get to the page data */
3476 	sense_mhp = (struct mode_header_grp2 *)buf;
3477 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3478 	    sense_mhp->bdesc_length_lo;
3479 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3480 		/*
3481 		 * We did not get back the expected block descriptor
3482 		 * length so we cannot determine if the device supports
3483 		 * CDDA. However, we still indicate the device is MMC
3484 		 * according to the successful response to the page
3485 		 * 0x2A mode sense request.
3486 		 */
3487 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3488 		    "sd_set_mmc_caps: Mode Sense returned "
3489 		    "invalid block descriptor length\n");
3490 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3491 		return;
3492 	}
3493 
3494 	/* See if read CDDA is supported */
3495 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3496 	    bd_len);
3497 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3498 
3499 	/* See if writing DVD RAM is supported. */
3500 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3501 	if (un->un_f_dvdram_writable_device == TRUE) {
3502 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3503 		return;
3504 	}
3505 
3506 	/*
3507 	 * If the device presents DVD or CD capabilities in the mode
3508 	 * page, we can return here since a RRD will not have
3509 	 * these capabilities.
3510 	 */
3511 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3512 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3513 		return;
3514 	}
3515 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3516 
3517 	/*
3518 	 * If un->un_f_dvdram_writable_device is still FALSE,
3519 	 * check for a Removable Rigid Disk (RRD).  A RRD
3520 	 * device is identified by the features RANDOM_WRITABLE and
3521 	 * HARDWARE_DEFECT_MANAGEMENT.
3522 	 */
3523 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3524 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3525 
3526 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3527 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3528 	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3529 
3530 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3531 
3532 	if (rtn != 0) {
3533 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3534 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3535 		return;
3536 	}
3537 
3538 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3539 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3540 
3541 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3542 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3543 	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3544 
3545 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3546 
3547 	if (rtn == 0) {
3548 		/*
3549 		 * We have good information, check for random writable
3550 		 * and hardware defect features.
3551 		 */
3552 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3553 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3554 			un->un_f_dvdram_writable_device = TRUE;
3555 		}
3556 	}
3557 
3558 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3559 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3560 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3561 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3562 }
3563 
3564 /*
3565  *    Function: sd_check_for_writable_cd
3566  *
3567  * Description: This routine determines if the media in the device is
3568  *		writable or not. It uses the get configuration command (0x46)
3569  *		to determine if the media is writable
3570  *
3571  *   Arguments: un - driver soft state (unit) structure
3572  *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3573  *                           chain and the normal command waitq, or
3574  *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3575  *                           "direct" chain and bypass the normal command
3576  *                           waitq.
3577  *
3578  *     Context: Never called at interrupt context.
3579  */
3580 
3581 static void
3582 sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag)
3583 {
3584 	struct uscsi_cmd		com;
3585 	uchar_t				*out_data;
3586 	uchar_t				*rqbuf;
3587 	int				rtn;
3588 	uchar_t				*out_data_rw, *out_data_hd;
3589 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3590 	struct mode_header_grp2		*sense_mhp;
3591 	uchar_t				*sense_page;
3592 	caddr_t				buf;
3593 	int				bd_len;
3594 	int				status;
3595 	struct sd_lun			*un;
3596 
3597 	ASSERT(ssc != NULL);
3598 	un = ssc->ssc_un;
3599 	ASSERT(un != NULL);
3600 	ASSERT(mutex_owned(SD_MUTEX(un)));
3601 
3602 	/*
3603 	 * Initialize the writable media to false, if configuration info.
3604 	 * tells us otherwise then only we will set it.
3605 	 */
3606 	un->un_f_mmc_writable_media = FALSE;
3607 	mutex_exit(SD_MUTEX(un));
3608 
3609 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3610 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3611 
3612 	rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf, SENSE_LENGTH,
3613 	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3614 
3615 	if (rtn != 0)
3616 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3617 
3618 	mutex_enter(SD_MUTEX(un));
3619 	if (rtn == 0) {
3620 		/*
3621 		 * We have good information, check for writable DVD.
3622 		 */
3623 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3624 			un->un_f_mmc_writable_media = TRUE;
3625 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3626 			kmem_free(rqbuf, SENSE_LENGTH);
3627 			return;
3628 		}
3629 	}
3630 
3631 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3632 	kmem_free(rqbuf, SENSE_LENGTH);
3633 
3634 	/*
3635 	 * Determine if this is a RRD type device.
3636 	 */
3637 	mutex_exit(SD_MUTEX(un));
3638 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3639 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3640 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3641 
3642 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3643 
3644 	mutex_enter(SD_MUTEX(un));
3645 	if (status != 0) {
3646 		/* command failed; just return */
3647 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3648 		return;
3649 	}
3650 
3651 	/* Get to the page data */
3652 	sense_mhp = (struct mode_header_grp2 *)buf;
3653 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3654 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3655 		/*
3656 		 * We did not get back the expected block descriptor length so
3657 		 * we cannot check the mode page.
3658 		 */
3659 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3660 		    "sd_check_for_writable_cd: Mode Sense returned "
3661 		    "invalid block descriptor length\n");
3662 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3663 		return;
3664 	}
3665 
3666 	/*
3667 	 * If the device presents DVD or CD capabilities in the mode
3668 	 * page, we can return here since a RRD device will not have
3669 	 * these capabilities.
3670 	 */
3671 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3672 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3673 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3674 		return;
3675 	}
3676 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3677 
3678 	/*
3679 	 * If un->un_f_mmc_writable_media is still FALSE,
3680 	 * check for RRD type media.  A RRD device is identified
3681 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3682 	 */
3683 	mutex_exit(SD_MUTEX(un));
3684 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3685 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3686 
3687 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3688 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3689 	    RANDOM_WRITABLE, path_flag);
3690 
3691 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3692 	if (rtn != 0) {
3693 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3694 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3695 		mutex_enter(SD_MUTEX(un));
3696 		return;
3697 	}
3698 
3699 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3700 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3701 
3702 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3703 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3704 	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3705 
3706 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3707 	mutex_enter(SD_MUTEX(un));
3708 	if (rtn == 0) {
3709 		/*
3710 		 * We have good information, check for random writable
3711 		 * and hardware defect features as current.
3712 		 */
3713 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3714 		    (out_data_rw[10] & 0x1) &&
3715 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3716 		    (out_data_hd[10] & 0x1)) {
3717 			un->un_f_mmc_writable_media = TRUE;
3718 		}
3719 	}
3720 
3721 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3722 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3723 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3724 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3725 }
3726 
3727 /*
3728  *    Function: sd_read_unit_properties
3729  *
3730  * Description: The following implements a property lookup mechanism.
3731  *		Properties for particular disks (keyed on vendor, model
3732  *		and rev numbers) are sought in the sd.conf file via
3733  *		sd_process_sdconf_file(), and if not found there, are
3734  *		looked for in a list hardcoded in this driver via
3735  *		sd_process_sdconf_table() Once located the properties
3736  *		are used to update the driver unit structure.
3737  *
3738  *   Arguments: un - driver soft state (unit) structure
3739  */
3740 
3741 static void
3742 sd_read_unit_properties(struct sd_lun *un)
3743 {
3744 	/*
3745 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3746 	 * the "sd-config-list" property (from the sd.conf file) or if
3747 	 * there was not a match for the inquiry vid/pid. If this event
3748 	 * occurs the static driver configuration table is searched for
3749 	 * a match.
3750 	 */
3751 	ASSERT(un != NULL);
3752 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3753 		sd_process_sdconf_table(un);
3754 	}
3755 
3756 	/* check for LSI device */
3757 	sd_is_lsi(un);
3758 
3759 
3760 }
3761 
3762 
3763 /*
3764  *    Function: sd_process_sdconf_file
3765  *
3766  * Description: Use ddi_prop_lookup(9F) to obtain the properties from the
3767  *		driver's config file (ie, sd.conf) and update the driver
3768  *		soft state structure accordingly.
3769  *
3770  *   Arguments: un - driver soft state (unit) structure
3771  *
3772  * Return Code: SD_SUCCESS - The properties were successfully set according
3773  *			     to the driver configuration file.
3774  *		SD_FAILURE - The driver config list was not obtained or
3775  *			     there was no vid/pid match. This indicates that
3776  *			     the static config table should be used.
3777  *
3778  * The config file has a property, "sd-config-list". Currently we support
3779  * two kinds of formats. For both formats, the value of this property
3780  * is a list of duplets:
3781  *
3782  *  sd-config-list=
3783  *	<duplet>,
3784  *	[,<duplet>]*;
3785  *
3786  * For the improved format, where
3787  *
3788  *     <duplet>:= "<vid+pid>","<tunable-list>"
3789  *
3790  * and
3791  *
3792  *     <tunable-list>:=   <tunable> [, <tunable> ]*;
3793  *     <tunable> =        <name> : <value>
3794  *
3795  * The <vid+pid> is the string that is returned by the target device on a
3796  * SCSI inquiry command, the <tunable-list> contains one or more tunables
3797  * to apply to all target devices with the specified <vid+pid>.
3798  *
3799  * Each <tunable> is a "<name> : <value>" pair.
3800  *
3801  * For the old format, the structure of each duplet is as follows:
3802  *
3803  *  <duplet>:= "<vid+pid>","<data-property-name_list>"
3804  *
3805  * The first entry of the duplet is the device ID string (the concatenated
3806  * vid & pid; not to be confused with a device_id).  This is defined in
3807  * the same way as in the sd_disk_table.
3808  *
3809  * The second part of the duplet is a string that identifies a
3810  * data-property-name-list. The data-property-name-list is defined as
3811  * follows:
3812  *
3813  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3814  *
3815  * The syntax of <data-property-name> depends on the <version> field.
3816  *
3817  * If version = SD_CONF_VERSION_1 we have the following syntax:
3818  *
3819  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3820  *
3821  * where the prop0 value will be used to set prop0 if bit0 set in the
3822  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3823  *
3824  */
3825 
3826 static int
3827 sd_process_sdconf_file(struct sd_lun *un)
3828 {
3829 	char	**config_list = NULL;
3830 	uint_t	nelements;
3831 	char	*vidptr;
3832 	int	vidlen;
3833 	char	*dnlist_ptr;
3834 	char	*dataname_ptr;
3835 	char	*dataname_lasts;
3836 	int	*data_list = NULL;
3837 	uint_t	data_list_len;
3838 	int	rval = SD_FAILURE;
3839 	int	i;
3840 
3841 	ASSERT(un != NULL);
3842 
3843 	/* Obtain the configuration list associated with the .conf file */
3844 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, SD_DEVINFO(un),
3845 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, sd_config_list,
3846 	    &config_list, &nelements) != DDI_PROP_SUCCESS) {
3847 		return (SD_FAILURE);
3848 	}
3849 
3850 	/*
3851 	 * Compare vids in each duplet to the inquiry vid - if a match is
3852 	 * made, get the data value and update the soft state structure
3853 	 * accordingly.
3854 	 *
3855 	 * Each duplet should show as a pair of strings, return SD_FAILURE
3856 	 * otherwise.
3857 	 */
3858 	if (nelements & 1) {
3859 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3860 		    "sd-config-list should show as pairs of strings.\n");
3861 		if (config_list)
3862 			ddi_prop_free(config_list);
3863 		return (SD_FAILURE);
3864 	}
3865 
3866 	for (i = 0; i < nelements; i += 2) {
3867 		/*
3868 		 * Note: The assumption here is that each vid entry is on
3869 		 * a unique line from its associated duplet.
3870 		 */
3871 		vidptr = config_list[i];
3872 		vidlen = (int)strlen(vidptr);
3873 		if ((vidlen == 0) ||
3874 		    (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS)) {
3875 			continue;
3876 		}
3877 
3878 		/*
3879 		 * dnlist contains 1 or more blank separated
3880 		 * data-property-name entries
3881 		 */
3882 		dnlist_ptr = config_list[i + 1];
3883 
3884 		if (strchr(dnlist_ptr, ':') != NULL) {
3885 			/*
3886 			 * Decode the improved format sd-config-list.
3887 			 */
3888 			sd_nvpair_str_decode(un, dnlist_ptr);
3889 		} else {
3890 			/*
3891 			 * The old format sd-config-list, loop through all
3892 			 * data-property-name entries in the
3893 			 * data-property-name-list
3894 			 * setting the properties for each.
3895 			 */
3896 			for (dataname_ptr = sd_strtok_r(dnlist_ptr, " \t",
3897 			    &dataname_lasts); dataname_ptr != NULL;
3898 			    dataname_ptr = sd_strtok_r(NULL, " \t",
3899 			    &dataname_lasts)) {
3900 				int version;
3901 
3902 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3903 				    "sd_process_sdconf_file: disk:%s, "
3904 				    "data:%s\n", vidptr, dataname_ptr);
3905 
3906 				/* Get the data list */
3907 				if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY,
3908 				    SD_DEVINFO(un), 0, dataname_ptr, &data_list,
3909 				    &data_list_len) != DDI_PROP_SUCCESS) {
3910 					SD_INFO(SD_LOG_ATTACH_DETACH, un,
3911 					    "sd_process_sdconf_file: data "
3912 					    "property (%s) has no value\n",
3913 					    dataname_ptr);
3914 					continue;
3915 				}
3916 
3917 				version = data_list[0];
3918 
3919 				if (version == SD_CONF_VERSION_1) {
3920 					sd_tunables values;
3921 
3922 					/* Set the properties */
3923 					if (sd_chk_vers1_data(un, data_list[1],
3924 					    &data_list[2], data_list_len,
3925 					    dataname_ptr) == SD_SUCCESS) {
3926 						sd_get_tunables_from_conf(un,
3927 						    data_list[1], &data_list[2],
3928 						    &values);
3929 						sd_set_vers1_properties(un,
3930 						    data_list[1], &values);
3931 						rval = SD_SUCCESS;
3932 					} else {
3933 						rval = SD_FAILURE;
3934 					}
3935 				} else {
3936 					scsi_log(SD_DEVINFO(un), sd_label,
3937 					    CE_WARN, "data property %s version "
3938 					    "0x%x is invalid.",
3939 					    dataname_ptr, version);
3940 					rval = SD_FAILURE;
3941 				}
3942 				if (data_list)
3943 					ddi_prop_free(data_list);
3944 			}
3945 		}
3946 	}
3947 
3948 	/* free up the memory allocated by ddi_prop_lookup_string_array(). */
3949 	if (config_list) {
3950 		ddi_prop_free(config_list);
3951 	}
3952 
3953 	return (rval);
3954 }
3955 
3956 /*
3957  *    Function: sd_nvpair_str_decode()
3958  *
3959  * Description: Parse the improved format sd-config-list to get
3960  *    each entry of tunable, which includes a name-value pair.
3961  *    Then call sd_set_properties() to set the property.
3962  *
3963  *   Arguments: un - driver soft state (unit) structure
3964  *    nvpair_str - the tunable list
3965  */
3966 static void
3967 sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str)
3968 {
3969 	char	*nv, *name, *value, *token;
3970 	char	*nv_lasts, *v_lasts, *x_lasts;
3971 
3972 	for (nv = sd_strtok_r(nvpair_str, ",", &nv_lasts); nv != NULL;
3973 	    nv = sd_strtok_r(NULL, ",", &nv_lasts)) {
3974 		token = sd_strtok_r(nv, ":", &v_lasts);
3975 		name  = sd_strtok_r(token, " \t", &x_lasts);
3976 		token = sd_strtok_r(NULL, ":", &v_lasts);
3977 		value = sd_strtok_r(token, " \t", &x_lasts);
3978 		if (name == NULL || value == NULL) {
3979 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3980 			    "sd_nvpair_str_decode: "
3981 			    "name or value is not valid!\n");
3982 		} else {
3983 			sd_set_properties(un, name, value);
3984 		}
3985 	}
3986 }
3987 
3988 /*
3989  *    Function: sd_strtok_r()
3990  *
3991  * Description: This function uses strpbrk and strspn to break
3992  *    string into tokens on sequentially subsequent calls. Return
3993  *    NULL when no non-separator characters remain. The first
3994  *    argument is NULL for subsequent calls.
3995  */
3996 static char *
3997 sd_strtok_r(char *string, const char *sepset, char **lasts)
3998 {
3999 	char	*q, *r;
4000 
4001 	/* First or subsequent call */
4002 	if (string == NULL)
4003 		string = *lasts;
4004 
4005 	if (string == NULL)
4006 		return (NULL);
4007 
4008 	/* Skip leading separators */
4009 	q = string + strspn(string, sepset);
4010 
4011 	if (*q == '\0')
4012 		return (NULL);
4013 
4014 	if ((r = strpbrk(q, sepset)) == NULL)
4015 		*lasts = NULL;
4016 	else {
4017 		*r = '\0';
4018 		*lasts = r + 1;
4019 	}
4020 	return (q);
4021 }
4022 
4023 /*
4024  *    Function: sd_set_properties()
4025  *
4026  * Description: Set device properties based on the improved
4027  *    format sd-config-list.
4028  *
4029  *   Arguments: un - driver soft state (unit) structure
4030  *    name  - supported tunable name
4031  *    value - tunable value
4032  */
4033 static void
4034 sd_set_properties(struct sd_lun *un, char *name, char *value)
4035 {
4036 	char	*endptr = NULL;
4037 	long	val = 0;
4038 
4039 	if (strcasecmp(name, "cache-nonvolatile") == 0) {
4040 		if (strcasecmp(value, "true") == 0) {
4041 			un->un_f_suppress_cache_flush = TRUE;
4042 		} else if (strcasecmp(value, "false") == 0) {
4043 			un->un_f_suppress_cache_flush = FALSE;
4044 		} else {
4045 			goto value_invalid;
4046 		}
4047 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4048 		    "suppress_cache_flush flag set to %d\n",
4049 		    un->un_f_suppress_cache_flush);
4050 		return;
4051 	}
4052 
4053 	if (strcasecmp(name, "controller-type") == 0) {
4054 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4055 			un->un_ctype = val;
4056 		} else {
4057 			goto value_invalid;
4058 		}
4059 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4060 		    "ctype set to %d\n", un->un_ctype);
4061 		return;
4062 	}
4063 
4064 	if (strcasecmp(name, "delay-busy") == 0) {
4065 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4066 			un->un_busy_timeout = drv_usectohz(val / 1000);
4067 		} else {
4068 			goto value_invalid;
4069 		}
4070 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4071 		    "busy_timeout set to %d\n", un->un_busy_timeout);
4072 		return;
4073 	}
4074 
4075 	if (strcasecmp(name, "disksort") == 0) {
4076 		if (strcasecmp(value, "true") == 0) {
4077 			un->un_f_disksort_disabled = FALSE;
4078 		} else if (strcasecmp(value, "false") == 0) {
4079 			un->un_f_disksort_disabled = TRUE;
4080 		} else {
4081 			goto value_invalid;
4082 		}
4083 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4084 		    "disksort disabled flag set to %d\n",
4085 		    un->un_f_disksort_disabled);
4086 		return;
4087 	}
4088 
4089 	if (strcasecmp(name, "power-condition") == 0) {
4090 		if (strcasecmp(value, "true") == 0) {
4091 			un->un_f_power_condition_disabled = FALSE;
4092 		} else if (strcasecmp(value, "false") == 0) {
4093 			un->un_f_power_condition_disabled = TRUE;
4094 		} else {
4095 			goto value_invalid;
4096 		}
4097 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4098 		    "power condition disabled flag set to %d\n",
4099 		    un->un_f_power_condition_disabled);
4100 		return;
4101 	}
4102 
4103 	if (strcasecmp(name, "timeout-releasereservation") == 0) {
4104 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4105 			un->un_reserve_release_time = val;
4106 		} else {
4107 			goto value_invalid;
4108 		}
4109 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4110 		    "reservation release timeout set to %d\n",
4111 		    un->un_reserve_release_time);
4112 		return;
4113 	}
4114 
4115 	if (strcasecmp(name, "reset-lun") == 0) {
4116 		if (strcasecmp(value, "true") == 0) {
4117 			un->un_f_lun_reset_enabled = TRUE;
4118 		} else if (strcasecmp(value, "false") == 0) {
4119 			un->un_f_lun_reset_enabled = FALSE;
4120 		} else {
4121 			goto value_invalid;
4122 		}
4123 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4124 		    "lun reset enabled flag set to %d\n",
4125 		    un->un_f_lun_reset_enabled);
4126 		return;
4127 	}
4128 
4129 	if (strcasecmp(name, "retries-busy") == 0) {
4130 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4131 			un->un_busy_retry_count = val;
4132 		} else {
4133 			goto value_invalid;
4134 		}
4135 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4136 		    "busy retry count set to %d\n", un->un_busy_retry_count);
4137 		return;
4138 	}
4139 
4140 	if (strcasecmp(name, "retries-timeout") == 0) {
4141 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4142 			un->un_retry_count = val;
4143 		} else {
4144 			goto value_invalid;
4145 		}
4146 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4147 		    "timeout retry count set to %d\n", un->un_retry_count);
4148 		return;
4149 	}
4150 
4151 	if (strcasecmp(name, "retries-notready") == 0) {
4152 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4153 			un->un_notready_retry_count = val;
4154 		} else {
4155 			goto value_invalid;
4156 		}
4157 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4158 		    "notready retry count set to %d\n",
4159 		    un->un_notready_retry_count);
4160 		return;
4161 	}
4162 
4163 	if (strcasecmp(name, "retries-reset") == 0) {
4164 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4165 			un->un_reset_retry_count = val;
4166 		} else {
4167 			goto value_invalid;
4168 		}
4169 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4170 		    "reset retry count set to %d\n",
4171 		    un->un_reset_retry_count);
4172 		return;
4173 	}
4174 
4175 	if (strcasecmp(name, "throttle-max") == 0) {
4176 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4177 			un->un_saved_throttle = un->un_throttle = val;
4178 		} else {
4179 			goto value_invalid;
4180 		}
4181 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4182 		    "throttle set to %d\n", un->un_throttle);
4183 	}
4184 
4185 	if (strcasecmp(name, "throttle-min") == 0) {
4186 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4187 			un->un_min_throttle = val;
4188 		} else {
4189 			goto value_invalid;
4190 		}
4191 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4192 		    "min throttle set to %d\n", un->un_min_throttle);
4193 	}
4194 
4195 	if (strcasecmp(name, "rmw-type") == 0) {
4196 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4197 			un->un_f_rmw_type = val;
4198 		} else {
4199 			goto value_invalid;
4200 		}
4201 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4202 		    "RMW type set to %d\n", un->un_f_rmw_type);
4203 	}
4204 
4205 	/*
4206 	 * Validate the throttle values.
4207 	 * If any of the numbers are invalid, set everything to defaults.
4208 	 */
4209 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4210 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4211 	    (un->un_min_throttle > un->un_throttle)) {
4212 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4213 		un->un_min_throttle = sd_min_throttle;
4214 	}
4215 
4216 	if (strcasecmp(name, "mmc-gesn-polling") == 0) {
4217 		if (strcasecmp(value, "true") == 0) {
4218 			un->un_f_mmc_gesn_polling = TRUE;
4219 		} else if (strcasecmp(value, "false") == 0) {
4220 			un->un_f_mmc_gesn_polling = FALSE;
4221 		} else {
4222 			goto value_invalid;
4223 		}
4224 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4225 		    "mmc-gesn-polling set to %d\n",
4226 		    un->un_f_mmc_gesn_polling);
4227 	}
4228 
4229 	return;
4230 
4231 value_invalid:
4232 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4233 	    "value of prop %s is invalid\n", name);
4234 }
4235 
4236 /*
4237  *    Function: sd_get_tunables_from_conf()
4238  *
4239  *
4240  *    This function reads the data list from the sd.conf file and pulls
4241  *    the values that can have numeric values as arguments and places
4242  *    the values in the appropriate sd_tunables member.
4243  *    Since the order of the data list members varies across platforms
4244  *    This function reads them from the data list in a platform specific
4245  *    order and places them into the correct sd_tunable member that is
4246  *    consistent across all platforms.
4247  */
4248 static void
4249 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
4250     sd_tunables *values)
4251 {
4252 	int i;
4253 	int mask;
4254 
4255 	bzero(values, sizeof (sd_tunables));
4256 
4257 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4258 
4259 		mask = 1 << i;
4260 		if (mask > flags) {
4261 			break;
4262 		}
4263 
4264 		switch (mask & flags) {
4265 		case 0:	/* This mask bit not set in flags */
4266 			continue;
4267 		case SD_CONF_BSET_THROTTLE:
4268 			values->sdt_throttle = data_list[i];
4269 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4270 			    "sd_get_tunables_from_conf: throttle = %d\n",
4271 			    values->sdt_throttle);
4272 			break;
4273 		case SD_CONF_BSET_CTYPE:
4274 			values->sdt_ctype = data_list[i];
4275 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4276 			    "sd_get_tunables_from_conf: ctype = %d\n",
4277 			    values->sdt_ctype);
4278 			break;
4279 		case SD_CONF_BSET_NRR_COUNT:
4280 			values->sdt_not_rdy_retries = data_list[i];
4281 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4282 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
4283 			    values->sdt_not_rdy_retries);
4284 			break;
4285 		case SD_CONF_BSET_BSY_RETRY_COUNT:
4286 			values->sdt_busy_retries = data_list[i];
4287 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4288 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
4289 			    values->sdt_busy_retries);
4290 			break;
4291 		case SD_CONF_BSET_RST_RETRIES:
4292 			values->sdt_reset_retries = data_list[i];
4293 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4294 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
4295 			    values->sdt_reset_retries);
4296 			break;
4297 		case SD_CONF_BSET_RSV_REL_TIME:
4298 			values->sdt_reserv_rel_time = data_list[i];
4299 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4300 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
4301 			    values->sdt_reserv_rel_time);
4302 			break;
4303 		case SD_CONF_BSET_MIN_THROTTLE:
4304 			values->sdt_min_throttle = data_list[i];
4305 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4306 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
4307 			    values->sdt_min_throttle);
4308 			break;
4309 		case SD_CONF_BSET_DISKSORT_DISABLED:
4310 			values->sdt_disk_sort_dis = data_list[i];
4311 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4312 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
4313 			    values->sdt_disk_sort_dis);
4314 			break;
4315 		case SD_CONF_BSET_LUN_RESET_ENABLED:
4316 			values->sdt_lun_reset_enable = data_list[i];
4317 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4318 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
4319 			    "\n", values->sdt_lun_reset_enable);
4320 			break;
4321 		case SD_CONF_BSET_CACHE_IS_NV:
4322 			values->sdt_suppress_cache_flush = data_list[i];
4323 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4324 			    "sd_get_tunables_from_conf: \
4325 			    suppress_cache_flush = %d"
4326 			    "\n", values->sdt_suppress_cache_flush);
4327 			break;
4328 		case SD_CONF_BSET_PC_DISABLED:
4329 			values->sdt_disk_sort_dis = data_list[i];
4330 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4331 			    "sd_get_tunables_from_conf: power_condition_dis = "
4332 			    "%d\n", values->sdt_power_condition_dis);
4333 			break;
4334 		}
4335 	}
4336 }
4337 
4338 /*
4339  *    Function: sd_process_sdconf_table
4340  *
4341  * Description: Search the static configuration table for a match on the
4342  *		inquiry vid/pid and update the driver soft state structure
4343  *		according to the table property values for the device.
4344  *
4345  *		The form of a configuration table entry is:
4346  *		  <vid+pid>,<flags>,<property-data>
4347  *		  "SEAGATE ST42400N",1,0x40000,
4348  *		  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1;
4349  *
4350  *   Arguments: un - driver soft state (unit) structure
4351  */
4352 
4353 static void
4354 sd_process_sdconf_table(struct sd_lun *un)
4355 {
4356 	char	*id = NULL;
4357 	int	table_index;
4358 	int	idlen;
4359 
4360 	ASSERT(un != NULL);
4361 	for (table_index = 0; table_index < sd_disk_table_size;
4362 	    table_index++) {
4363 		id = sd_disk_table[table_index].device_id;
4364 		idlen = strlen(id);
4365 		if (idlen == 0) {
4366 			continue;
4367 		}
4368 
4369 		/*
4370 		 * The static configuration table currently does not
4371 		 * implement version 10 properties. Additionally,
4372 		 * multiple data-property-name entries are not
4373 		 * implemented in the static configuration table.
4374 		 */
4375 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4376 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4377 			    "sd_process_sdconf_table: disk %s\n", id);
4378 			sd_set_vers1_properties(un,
4379 			    sd_disk_table[table_index].flags,
4380 			    sd_disk_table[table_index].properties);
4381 			break;
4382 		}
4383 	}
4384 }
4385 
4386 
4387 /*
4388  *    Function: sd_sdconf_id_match
4389  *
4390  * Description: This local function implements a case sensitive vid/pid
4391  *		comparison as well as the boundary cases of wild card and
4392  *		multiple blanks.
4393  *
4394  *		Note: An implicit assumption made here is that the scsi
4395  *		inquiry structure will always keep the vid, pid and
4396  *		revision strings in consecutive sequence, so they can be
4397  *		read as a single string. If this assumption is not the
4398  *		case, a separate string, to be used for the check, needs
4399  *		to be built with these strings concatenated.
4400  *
4401  *   Arguments: un - driver soft state (unit) structure
4402  *		id - table or config file vid/pid
4403  *		idlen  - length of the vid/pid (bytes)
4404  *
4405  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
4406  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
4407  */
4408 
4409 static int
4410 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
4411 {
4412 	struct scsi_inquiry	*sd_inq;
4413 	int 			rval = SD_SUCCESS;
4414 
4415 	ASSERT(un != NULL);
4416 	sd_inq = un->un_sd->sd_inq;
4417 	ASSERT(id != NULL);
4418 
4419 	/*
4420 	 * We use the inq_vid as a pointer to a buffer containing the
4421 	 * vid and pid and use the entire vid/pid length of the table
4422 	 * entry for the comparison. This works because the inq_pid
4423 	 * data member follows inq_vid in the scsi_inquiry structure.
4424 	 */
4425 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
4426 		/*
4427 		 * The user id string is compared to the inquiry vid/pid
4428 		 * using a case insensitive comparison and ignoring
4429 		 * multiple spaces.
4430 		 */
4431 		rval = sd_blank_cmp(un, id, idlen);
4432 		if (rval != SD_SUCCESS) {
4433 			/*
4434 			 * User id strings that start and end with a "*"
4435 			 * are a special case. These do not have a
4436 			 * specific vendor, and the product string can
4437 			 * appear anywhere in the 16 byte PID portion of
4438 			 * the inquiry data. This is a simple strstr()
4439 			 * type search for the user id in the inquiry data.
4440 			 */
4441 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
4442 				char	*pidptr = &id[1];
4443 				int	i;
4444 				int	j;
4445 				int	pidstrlen = idlen - 2;
4446 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
4447 				    pidstrlen;
4448 
4449 				if (j < 0) {
4450 					return (SD_FAILURE);
4451 				}
4452 				for (i = 0; i < j; i++) {
4453 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
4454 					    pidptr, pidstrlen) == 0) {
4455 						rval = SD_SUCCESS;
4456 						break;
4457 					}
4458 				}
4459 			}
4460 		}
4461 	}
4462 	return (rval);
4463 }
4464 
4465 
4466 /*
4467  *    Function: sd_blank_cmp
4468  *
4469  * Description: If the id string starts and ends with a space, treat
4470  *		multiple consecutive spaces as equivalent to a single
4471  *		space. For example, this causes a sd_disk_table entry
4472  *		of " NEC CDROM " to match a device's id string of
4473  *		"NEC       CDROM".
4474  *
4475  *		Note: The success exit condition for this routine is if
4476  *		the pointer to the table entry is '\0' and the cnt of
4477  *		the inquiry length is zero. This will happen if the inquiry
4478  *		string returned by the device is padded with spaces to be
4479  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
4480  *		SCSI spec states that the inquiry string is to be padded with
4481  *		spaces.
4482  *
4483  *   Arguments: un - driver soft state (unit) structure
4484  *		id - table or config file vid/pid
4485  *		idlen  - length of the vid/pid (bytes)
4486  *
4487  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
4488  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
4489  */
4490 
4491 static int
4492 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
4493 {
4494 	char		*p1;
4495 	char		*p2;
4496 	int		cnt;
4497 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
4498 	    sizeof (SD_INQUIRY(un)->inq_pid);
4499 
4500 	ASSERT(un != NULL);
4501 	p2 = un->un_sd->sd_inq->inq_vid;
4502 	ASSERT(id != NULL);
4503 	p1 = id;
4504 
4505 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
4506 		/*
4507 		 * Note: string p1 is terminated by a NUL but string p2
4508 		 * isn't.  The end of p2 is determined by cnt.
4509 		 */
4510 		for (;;) {
4511 			/* skip over any extra blanks in both strings */
4512 			while ((*p1 != '\0') && (*p1 == ' ')) {
4513 				p1++;
4514 			}
4515 			while ((cnt != 0) && (*p2 == ' ')) {
4516 				p2++;
4517 				cnt--;
4518 			}
4519 
4520 			/* compare the two strings */
4521 			if ((cnt == 0) ||
4522 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
4523 				break;
4524 			}
4525 			while ((cnt > 0) &&
4526 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
4527 				p1++;
4528 				p2++;
4529 				cnt--;
4530 			}
4531 		}
4532 	}
4533 
4534 	/* return SD_SUCCESS if both strings match */
4535 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
4536 }
4537 
4538 
4539 /*
4540  *    Function: sd_chk_vers1_data
4541  *
4542  * Description: Verify the version 1 device properties provided by the
4543  *		user via the configuration file
4544  *
4545  *   Arguments: un	     - driver soft state (unit) structure
4546  *		flags	     - integer mask indicating properties to be set
4547  *		prop_list    - integer list of property values
4548  *		list_len     - number of the elements
4549  *
4550  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
4551  *		SD_FAILURE - Indicates the user provided data is invalid
4552  */
4553 
4554 static int
4555 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
4556     int list_len, char *dataname_ptr)
4557 {
4558 	int i;
4559 	int mask = 1;
4560 	int index = 0;
4561 
4562 	ASSERT(un != NULL);
4563 
4564 	/* Check for a NULL property name and list */
4565 	if (dataname_ptr == NULL) {
4566 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4567 		    "sd_chk_vers1_data: NULL data property name.");
4568 		return (SD_FAILURE);
4569 	}
4570 	if (prop_list == NULL) {
4571 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4572 		    "sd_chk_vers1_data: %s NULL data property list.",
4573 		    dataname_ptr);
4574 		return (SD_FAILURE);
4575 	}
4576 
4577 	/* Display a warning if undefined bits are set in the flags */
4578 	if (flags & ~SD_CONF_BIT_MASK) {
4579 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4580 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
4581 		    "Properties not set.",
4582 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
4583 		return (SD_FAILURE);
4584 	}
4585 
4586 	/*
4587 	 * Verify the length of the list by identifying the highest bit set
4588 	 * in the flags and validating that the property list has a length
4589 	 * up to the index of this bit.
4590 	 */
4591 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4592 		if (flags & mask) {
4593 			index++;
4594 		}
4595 		mask = 1 << i;
4596 	}
4597 	if (list_len < (index + 2)) {
4598 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4599 		    "sd_chk_vers1_data: "
4600 		    "Data property list %s size is incorrect. "
4601 		    "Properties not set.", dataname_ptr);
4602 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
4603 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
4604 		return (SD_FAILURE);
4605 	}
4606 	return (SD_SUCCESS);
4607 }
4608 
4609 
4610 /*
4611  *    Function: sd_set_vers1_properties
4612  *
4613  * Description: Set version 1 device properties based on a property list
4614  *		retrieved from the driver configuration file or static
4615  *		configuration table. Version 1 properties have the format:
4616  *
4617  * 	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
4618  *
4619  *		where the prop0 value will be used to set prop0 if bit0
4620  *		is set in the flags
4621  *
4622  *   Arguments: un	     - driver soft state (unit) structure
4623  *		flags	     - integer mask indicating properties to be set
4624  *		prop_list    - integer list of property values
4625  */
4626 
4627 static void
4628 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
4629 {
4630 	ASSERT(un != NULL);
4631 
4632 	/*
4633 	 * Set the flag to indicate cache is to be disabled. An attempt
4634 	 * to disable the cache via sd_cache_control() will be made
4635 	 * later during attach once the basic initialization is complete.
4636 	 */
4637 	if (flags & SD_CONF_BSET_NOCACHE) {
4638 		un->un_f_opt_disable_cache = TRUE;
4639 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4640 		    "sd_set_vers1_properties: caching disabled flag set\n");
4641 	}
4642 
4643 	/* CD-specific configuration parameters */
4644 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4645 		un->un_f_cfg_playmsf_bcd = TRUE;
4646 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4647 		    "sd_set_vers1_properties: playmsf_bcd set\n");
4648 	}
4649 	if (flags & SD_CONF_BSET_READSUB_BCD) {
4650 		un->un_f_cfg_readsub_bcd = TRUE;
4651 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4652 		    "sd_set_vers1_properties: readsub_bcd set\n");
4653 	}
4654 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4655 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4656 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4657 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4658 	}
4659 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4660 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4661 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4662 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4663 	}
4664 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4665 		un->un_f_cfg_no_read_header = TRUE;
4666 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4667 		    "sd_set_vers1_properties: no_read_header set\n");
4668 	}
4669 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4670 		un->un_f_cfg_read_cd_xd4 = TRUE;
4671 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4672 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4673 	}
4674 
4675 	/* Support for devices which do not have valid/unique serial numbers */
4676 	if (flags & SD_CONF_BSET_FAB_DEVID) {
4677 		un->un_f_opt_fab_devid = TRUE;
4678 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4679 		    "sd_set_vers1_properties: fab_devid bit set\n");
4680 	}
4681 
4682 	/* Support for user throttle configuration */
4683 	if (flags & SD_CONF_BSET_THROTTLE) {
4684 		ASSERT(prop_list != NULL);
4685 		un->un_saved_throttle = un->un_throttle =
4686 		    prop_list->sdt_throttle;
4687 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4688 		    "sd_set_vers1_properties: throttle set to %d\n",
4689 		    prop_list->sdt_throttle);
4690 	}
4691 
4692 	/* Set the per disk retry count according to the conf file or table. */
4693 	if (flags & SD_CONF_BSET_NRR_COUNT) {
4694 		ASSERT(prop_list != NULL);
4695 		if (prop_list->sdt_not_rdy_retries) {
4696 			un->un_notready_retry_count =
4697 			    prop_list->sdt_not_rdy_retries;
4698 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4699 			    "sd_set_vers1_properties: not ready retry count"
4700 			    " set to %d\n", un->un_notready_retry_count);
4701 		}
4702 	}
4703 
4704 	/* The controller type is reported for generic disk driver ioctls */
4705 	if (flags & SD_CONF_BSET_CTYPE) {
4706 		ASSERT(prop_list != NULL);
4707 		switch (prop_list->sdt_ctype) {
4708 		case CTYPE_CDROM:
4709 			un->un_ctype = prop_list->sdt_ctype;
4710 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4711 			    "sd_set_vers1_properties: ctype set to "
4712 			    "CTYPE_CDROM\n");
4713 			break;
4714 		case CTYPE_CCS:
4715 			un->un_ctype = prop_list->sdt_ctype;
4716 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4717 			    "sd_set_vers1_properties: ctype set to "
4718 			    "CTYPE_CCS\n");
4719 			break;
4720 		case CTYPE_ROD:		/* RW optical */
4721 			un->un_ctype = prop_list->sdt_ctype;
4722 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4723 			    "sd_set_vers1_properties: ctype set to "
4724 			    "CTYPE_ROD\n");
4725 			break;
4726 		default:
4727 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4728 			    "sd_set_vers1_properties: Could not set "
4729 			    "invalid ctype value (%d)",
4730 			    prop_list->sdt_ctype);
4731 		}
4732 	}
4733 
4734 	/* Purple failover timeout */
4735 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4736 		ASSERT(prop_list != NULL);
4737 		un->un_busy_retry_count =
4738 		    prop_list->sdt_busy_retries;
4739 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4740 		    "sd_set_vers1_properties: "
4741 		    "busy retry count set to %d\n",
4742 		    un->un_busy_retry_count);
4743 	}
4744 
4745 	/* Purple reset retry count */
4746 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4747 		ASSERT(prop_list != NULL);
4748 		un->un_reset_retry_count =
4749 		    prop_list->sdt_reset_retries;
4750 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4751 		    "sd_set_vers1_properties: "
4752 		    "reset retry count set to %d\n",
4753 		    un->un_reset_retry_count);
4754 	}
4755 
4756 	/* Purple reservation release timeout */
4757 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4758 		ASSERT(prop_list != NULL);
4759 		un->un_reserve_release_time =
4760 		    prop_list->sdt_reserv_rel_time;
4761 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4762 		    "sd_set_vers1_properties: "
4763 		    "reservation release timeout set to %d\n",
4764 		    un->un_reserve_release_time);
4765 	}
4766 
4767 	/*
4768 	 * Driver flag telling the driver to verify that no commands are pending
4769 	 * for a device before issuing a Test Unit Ready. This is a workaround
4770 	 * for a firmware bug in some Seagate eliteI drives.
4771 	 */
4772 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4773 		un->un_f_cfg_tur_check = TRUE;
4774 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4775 		    "sd_set_vers1_properties: tur queue check set\n");
4776 	}
4777 
4778 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4779 		un->un_min_throttle = prop_list->sdt_min_throttle;
4780 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4781 		    "sd_set_vers1_properties: min throttle set to %d\n",
4782 		    un->un_min_throttle);
4783 	}
4784 
4785 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4786 		un->un_f_disksort_disabled =
4787 		    (prop_list->sdt_disk_sort_dis != 0) ?
4788 		    TRUE : FALSE;
4789 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4790 		    "sd_set_vers1_properties: disksort disabled "
4791 		    "flag set to %d\n",
4792 		    prop_list->sdt_disk_sort_dis);
4793 	}
4794 
4795 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4796 		un->un_f_lun_reset_enabled =
4797 		    (prop_list->sdt_lun_reset_enable != 0) ?
4798 		    TRUE : FALSE;
4799 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4800 		    "sd_set_vers1_properties: lun reset enabled "
4801 		    "flag set to %d\n",
4802 		    prop_list->sdt_lun_reset_enable);
4803 	}
4804 
4805 	if (flags & SD_CONF_BSET_CACHE_IS_NV) {
4806 		un->un_f_suppress_cache_flush =
4807 		    (prop_list->sdt_suppress_cache_flush != 0) ?
4808 		    TRUE : FALSE;
4809 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4810 		    "sd_set_vers1_properties: suppress_cache_flush "
4811 		    "flag set to %d\n",
4812 		    prop_list->sdt_suppress_cache_flush);
4813 	}
4814 
4815 	if (flags & SD_CONF_BSET_PC_DISABLED) {
4816 		un->un_f_power_condition_disabled =
4817 		    (prop_list->sdt_power_condition_dis != 0) ?
4818 		    TRUE : FALSE;
4819 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4820 		    "sd_set_vers1_properties: power_condition_disabled "
4821 		    "flag set to %d\n",
4822 		    prop_list->sdt_power_condition_dis);
4823 	}
4824 
4825 	/*
4826 	 * Validate the throttle values.
4827 	 * If any of the numbers are invalid, set everything to defaults.
4828 	 */
4829 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4830 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4831 	    (un->un_min_throttle > un->un_throttle)) {
4832 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4833 		un->un_min_throttle = sd_min_throttle;
4834 	}
4835 }
4836 
4837 /*
4838  *   Function: sd_is_lsi()
4839  *
4840  *   Description: Check for lsi devices, step through the static device
4841  *	table to match vid/pid.
4842  *
4843  *   Args: un - ptr to sd_lun
4844  *
4845  *   Notes:  When creating new LSI property, need to add the new LSI property
4846  *		to this function.
4847  */
4848 static void
4849 sd_is_lsi(struct sd_lun *un)
4850 {
4851 	char	*id = NULL;
4852 	int	table_index;
4853 	int	idlen;
4854 	void	*prop;
4855 
4856 	ASSERT(un != NULL);
4857 	for (table_index = 0; table_index < sd_disk_table_size;
4858 	    table_index++) {
4859 		id = sd_disk_table[table_index].device_id;
4860 		idlen = strlen(id);
4861 		if (idlen == 0) {
4862 			continue;
4863 		}
4864 
4865 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4866 			prop = sd_disk_table[table_index].properties;
4867 			if (prop == &lsi_properties ||
4868 			    prop == &lsi_oem_properties ||
4869 			    prop == &lsi_properties_scsi ||
4870 			    prop == &symbios_properties) {
4871 				un->un_f_cfg_is_lsi = TRUE;
4872 			}
4873 			break;
4874 		}
4875 	}
4876 }
4877 
4878 /*
4879  *    Function: sd_get_physical_geometry
4880  *
4881  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4882  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4883  *		target, and use this information to initialize the physical
4884  *		geometry cache specified by pgeom_p.
4885  *
4886  *		MODE SENSE is an optional command, so failure in this case
4887  *		does not necessarily denote an error. We want to use the
4888  *		MODE SENSE commands to derive the physical geometry of the
4889  *		device, but if either command fails, the logical geometry is
4890  *		used as the fallback for disk label geometry in cmlb.
4891  *
4892  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4893  *		have already been initialized for the current target and
4894  *		that the current values be passed as args so that we don't
4895  *		end up ever trying to use -1 as a valid value. This could
4896  *		happen if either value is reset while we're not holding
4897  *		the mutex.
4898  *
4899  *   Arguments: un - driver soft state (unit) structure
4900  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4901  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4902  *			to use the USCSI "direct" chain and bypass the normal
4903  *			command waitq.
4904  *
4905  *     Context: Kernel thread only (can sleep).
4906  */
4907 
4908 static int
4909 sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4910 	diskaddr_t capacity, int lbasize, int path_flag)
4911 {
4912 	struct	mode_format	*page3p;
4913 	struct	mode_geometry	*page4p;
4914 	struct	mode_header	*headerp;
4915 	int	sector_size;
4916 	int	nsect;
4917 	int	nhead;
4918 	int	ncyl;
4919 	int	intrlv;
4920 	int	spc;
4921 	diskaddr_t	modesense_capacity;
4922 	int	rpm;
4923 	int	bd_len;
4924 	int	mode_header_length;
4925 	uchar_t	*p3bufp;
4926 	uchar_t	*p4bufp;
4927 	int	cdbsize;
4928 	int 	ret = EIO;
4929 	sd_ssc_t *ssc;
4930 	int	status;
4931 
4932 	ASSERT(un != NULL);
4933 
4934 	if (lbasize == 0) {
4935 		if (ISCD(un)) {
4936 			lbasize = 2048;
4937 		} else {
4938 			lbasize = un->un_sys_blocksize;
4939 		}
4940 	}
4941 	pgeom_p->g_secsize = (unsigned short)lbasize;
4942 
4943 	/*
4944 	 * If the unit is a cd/dvd drive MODE SENSE page three
4945 	 * and MODE SENSE page four are reserved (see SBC spec
4946 	 * and MMC spec). To prevent soft errors just return
4947 	 * using the default LBA size.
4948 	 */
4949 	if (ISCD(un))
4950 		return (ret);
4951 
4952 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4953 
4954 	/*
4955 	 * Retrieve MODE SENSE page 3 - Format Device Page
4956 	 */
4957 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4958 	ssc = sd_ssc_init(un);
4959 	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p3bufp,
4960 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag);
4961 	if (status != 0) {
4962 		SD_ERROR(SD_LOG_COMMON, un,
4963 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4964 		goto page3_exit;
4965 	}
4966 
4967 	/*
4968 	 * Determine size of Block Descriptors in order to locate the mode
4969 	 * page data.  ATAPI devices return 0, SCSI devices should return
4970 	 * MODE_BLK_DESC_LENGTH.
4971 	 */
4972 	headerp = (struct mode_header *)p3bufp;
4973 	if (un->un_f_cfg_is_atapi == TRUE) {
4974 		struct mode_header_grp2 *mhp =
4975 		    (struct mode_header_grp2 *)headerp;
4976 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
4977 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
4978 	} else {
4979 		mode_header_length = MODE_HEADER_LENGTH;
4980 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
4981 	}
4982 
4983 	if (bd_len > MODE_BLK_DESC_LENGTH) {
4984 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4985 		    "sd_get_physical_geometry: received unexpected bd_len "
4986 		    "of %d, page3\n", bd_len);
4987 		status = EIO;
4988 		goto page3_exit;
4989 	}
4990 
4991 	page3p = (struct mode_format *)
4992 	    ((caddr_t)headerp + mode_header_length + bd_len);
4993 
4994 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
4995 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
4996 		    "sd_get_physical_geometry: mode sense pg3 code mismatch "
4997 		    "%d\n", page3p->mode_page.code);
4998 		status = EIO;
4999 		goto page3_exit;
5000 	}
5001 
5002 	/*
5003 	 * Use this physical geometry data only if BOTH MODE SENSE commands
5004 	 * complete successfully; otherwise, revert to the logical geometry.
5005 	 * So, we need to save everything in temporary variables.
5006 	 */
5007 	sector_size = BE_16(page3p->data_bytes_sect);
5008 
5009 	/*
5010 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
5011 	 */
5012 	if (sector_size == 0) {
5013 		sector_size = un->un_sys_blocksize;
5014 	} else {
5015 		sector_size &= ~(un->un_sys_blocksize - 1);
5016 	}
5017 
5018 	nsect  = BE_16(page3p->sect_track);
5019 	intrlv = BE_16(page3p->interleave);
5020 
5021 	SD_INFO(SD_LOG_COMMON, un,
5022 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
5023 	SD_INFO(SD_LOG_COMMON, un,
5024 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
5025 	    page3p->mode_page.code, nsect, sector_size);
5026 	SD_INFO(SD_LOG_COMMON, un,
5027 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
5028 	    BE_16(page3p->track_skew),
5029 	    BE_16(page3p->cylinder_skew));
5030 
5031 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
5032 
5033 	/*
5034 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
5035 	 */
5036 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
5037 	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p4bufp,
5038 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag);
5039 	if (status != 0) {
5040 		SD_ERROR(SD_LOG_COMMON, un,
5041 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
5042 		goto page4_exit;
5043 	}
5044 
5045 	/*
5046 	 * Determine size of Block Descriptors in order to locate the mode
5047 	 * page data.  ATAPI devices return 0, SCSI devices should return
5048 	 * MODE_BLK_DESC_LENGTH.
5049 	 */
5050 	headerp = (struct mode_header *)p4bufp;
5051 	if (un->un_f_cfg_is_atapi == TRUE) {
5052 		struct mode_header_grp2 *mhp =
5053 		    (struct mode_header_grp2 *)headerp;
5054 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5055 	} else {
5056 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5057 	}
5058 
5059 	if (bd_len > MODE_BLK_DESC_LENGTH) {
5060 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5061 		    "sd_get_physical_geometry: received unexpected bd_len of "
5062 		    "%d, page4\n", bd_len);
5063 		status = EIO;
5064 		goto page4_exit;
5065 	}
5066 
5067 	page4p = (struct mode_geometry *)
5068 	    ((caddr_t)headerp + mode_header_length + bd_len);
5069 
5070 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
5071 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5072 		    "sd_get_physical_geometry: mode sense pg4 code mismatch "
5073 		    "%d\n", page4p->mode_page.code);
5074 		status = EIO;
5075 		goto page4_exit;
5076 	}
5077 
5078 	/*
5079 	 * Stash the data now, after we know that both commands completed.
5080 	 */
5081 
5082 
5083 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
5084 	spc   = nhead * nsect;
5085 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
5086 	rpm   = BE_16(page4p->rpm);
5087 
5088 	modesense_capacity = spc * ncyl;
5089 
5090 	SD_INFO(SD_LOG_COMMON, un,
5091 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
5092 	SD_INFO(SD_LOG_COMMON, un,
5093 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
5094 	SD_INFO(SD_LOG_COMMON, un,
5095 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
5096 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
5097 	    (void *)pgeom_p, capacity);
5098 
5099 	/*
5100 	 * Compensate if the drive's geometry is not rectangular, i.e.,
5101 	 * the product of C * H * S returned by MODE SENSE >= that returned
5102 	 * by read capacity. This is an idiosyncrasy of the original x86
5103 	 * disk subsystem.
5104 	 */
5105 	if (modesense_capacity >= capacity) {
5106 		SD_INFO(SD_LOG_COMMON, un,
5107 		    "sd_get_physical_geometry: adjusting acyl; "
5108 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
5109 		    (modesense_capacity - capacity + spc - 1) / spc);
5110 		if (sector_size != 0) {
5111 			/* 1243403: NEC D38x7 drives don't support sec size */
5112 			pgeom_p->g_secsize = (unsigned short)sector_size;
5113 		}
5114 		pgeom_p->g_nsect    = (unsigned short)nsect;
5115 		pgeom_p->g_nhead    = (unsigned short)nhead;
5116 		pgeom_p->g_capacity = capacity;
5117 		pgeom_p->g_acyl	    =
5118 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5119 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5120 	}
5121 
5122 	pgeom_p->g_rpm    = (unsigned short)rpm;
5123 	pgeom_p->g_intrlv = (unsigned short)intrlv;
5124 	ret = 0;
5125 
5126 	SD_INFO(SD_LOG_COMMON, un,
5127 	    "sd_get_physical_geometry: mode sense geometry:\n");
5128 	SD_INFO(SD_LOG_COMMON, un,
5129 	    "   nsect: %d; sector size: %d; interlv: %d\n",
5130 	    nsect, sector_size, intrlv);
5131 	SD_INFO(SD_LOG_COMMON, un,
5132 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5133 	    nhead, ncyl, rpm, modesense_capacity);
5134 	SD_INFO(SD_LOG_COMMON, un,
5135 	    "sd_get_physical_geometry: (cached)\n");
5136 	SD_INFO(SD_LOG_COMMON, un,
5137 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5138 	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
5139 	    pgeom_p->g_nhead, pgeom_p->g_nsect);
5140 	SD_INFO(SD_LOG_COMMON, un,
5141 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5142 	    pgeom_p->g_secsize, pgeom_p->g_capacity,
5143 	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
5144 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
5145 
5146 page4_exit:
5147 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5148 
5149 page3_exit:
5150 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5151 
5152 	if (status != 0) {
5153 		if (status == EIO) {
5154 			/*
5155 			 * Some disks do not support mode sense(6), we
5156 			 * should ignore this kind of error(sense key is
5157 			 * 0x5 - illegal request).
5158 			 */
5159 			uint8_t *sensep;
5160 			int senlen;
5161 
5162 			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
5163 			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
5164 			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
5165 
5166 			if (senlen > 0 &&
5167 			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
5168 				sd_ssc_assessment(ssc,
5169 				    SD_FMT_IGNORE_COMPROMISE);
5170 			} else {
5171 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
5172 			}
5173 		} else {
5174 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5175 		}
5176 	}
5177 	sd_ssc_fini(ssc);
5178 	return (ret);
5179 }
5180 
5181 /*
5182  *    Function: sd_get_virtual_geometry
5183  *
5184  * Description: Ask the controller to tell us about the target device.
5185  *
5186  *   Arguments: un - pointer to softstate
5187  *		capacity - disk capacity in #blocks
5188  *		lbasize - disk block size in bytes
5189  *
5190  *     Context: Kernel thread only
5191  */
5192 
5193 static int
5194 sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
5195     diskaddr_t capacity, int lbasize)
5196 {
5197 	uint_t	geombuf;
5198 	int	spc;
5199 
5200 	ASSERT(un != NULL);
5201 
5202 	/* Set sector size, and total number of sectors */
5203 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5204 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5205 
5206 	/* Let the HBA tell us its geometry */
5207 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5208 
5209 	/* A value of -1 indicates an undefined "geometry" property */
5210 	if (geombuf == (-1)) {
5211 		return (EINVAL);
5212 	}
5213 
5214 	/* Initialize the logical geometry cache. */
5215 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5216 	lgeom_p->g_nsect   = geombuf & 0xffff;
5217 	lgeom_p->g_secsize = un->un_sys_blocksize;
5218 
5219 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5220 
5221 	/*
5222 	 * Note: The driver originally converted the capacity value from
5223 	 * target blocks to system blocks. However, the capacity value passed
5224 	 * to this routine is already in terms of system blocks (this scaling
5225 	 * is done when the READ CAPACITY command is issued and processed).
5226 	 * This 'error' may have gone undetected because the usage of g_ncyl
5227 	 * (which is based upon g_capacity) is very limited within the driver
5228 	 */
5229 	lgeom_p->g_capacity = capacity;
5230 
5231 	/*
5232 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5233 	 * hba may return zero values if the device has been removed.
5234 	 */
5235 	if (spc == 0) {
5236 		lgeom_p->g_ncyl = 0;
5237 	} else {
5238 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5239 	}
5240 	lgeom_p->g_acyl = 0;
5241 
5242 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5243 	return (0);
5244 
5245 }
5246 /*
5247  *    Function: sd_update_block_info
5248  *
5249  * Description: Calculate a byte count to sector count bitshift value
5250  *		from sector size.
5251  *
5252  *   Arguments: un: unit struct.
5253  *		lbasize: new target sector size
5254  *		capacity: new target capacity, ie. block count
5255  *
5256  *     Context: Kernel thread context
5257  */
5258 
5259 static void
5260 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5261 {
5262 	if (lbasize != 0) {
5263 		un->un_tgt_blocksize = lbasize;
5264 		un->un_f_tgt_blocksize_is_valid = TRUE;
5265 		if (!un->un_f_has_removable_media) {
5266 			un->un_sys_blocksize = lbasize;
5267 		}
5268 	}
5269 
5270 	if (capacity != 0) {
5271 		un->un_blockcount		= capacity;
5272 		un->un_f_blockcount_is_valid	= TRUE;
5273 	}
5274 }
5275 
5276 
5277 /*
5278  *    Function: sd_register_devid
5279  *
5280  * Description: This routine will obtain the device id information from the
5281  *		target, obtain the serial number, and register the device
5282  *		id with the ddi framework.
5283  *
5284  *   Arguments: devi - the system's dev_info_t for the device.
5285  *		un - driver soft state (unit) structure
5286  *		reservation_flag - indicates if a reservation conflict
5287  *		occurred during attach
5288  *
5289  *     Context: Kernel Thread
5290  */
5291 static void
5292 sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi, int reservation_flag)
5293 {
5294 	int		rval		= 0;
5295 	uchar_t		*inq80		= NULL;
5296 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
5297 	size_t		inq80_resid	= 0;
5298 	uchar_t		*inq83		= NULL;
5299 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
5300 	size_t		inq83_resid	= 0;
5301 	int		dlen, len;
5302 	char		*sn;
5303 	struct sd_lun	*un;
5304 
5305 	ASSERT(ssc != NULL);
5306 	un = ssc->ssc_un;
5307 	ASSERT(un != NULL);
5308 	ASSERT(mutex_owned(SD_MUTEX(un)));
5309 	ASSERT((SD_DEVINFO(un)) == devi);
5310 
5311 
5312 	/*
5313 	 * We check the availability of the World Wide Name (0x83) and Unit
5314 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
5315 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
5316 	 * 0x83 is available, that is the best choice.  Our next choice is
5317 	 * 0x80.  If neither are available, we munge the devid from the device
5318 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
5319 	 * to fabricate a devid for non-Sun qualified disks.
5320 	 */
5321 	if (sd_check_vpd_page_support(ssc) == 0) {
5322 		/* collect page 80 data if available */
5323 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
5324 
5325 			mutex_exit(SD_MUTEX(un));
5326 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
5327 
5328 			rval = sd_send_scsi_INQUIRY(ssc, inq80, inq80_len,
5329 			    0x01, 0x80, &inq80_resid);
5330 
5331 			if (rval != 0) {
5332 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5333 				kmem_free(inq80, inq80_len);
5334 				inq80 = NULL;
5335 				inq80_len = 0;
5336 			} else if (ddi_prop_exists(
5337 			    DDI_DEV_T_NONE, SD_DEVINFO(un),
5338 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
5339 			    INQUIRY_SERIAL_NO) == 0) {
5340 				/*
5341 				 * If we don't already have a serial number
5342 				 * property, do quick verify of data returned
5343 				 * and define property.
5344 				 */
5345 				dlen = inq80_len - inq80_resid;
5346 				len = (size_t)inq80[3];
5347 				if ((dlen >= 4) && ((len + 4) <= dlen)) {
5348 					/*
5349 					 * Ensure sn termination, skip leading
5350 					 * blanks, and create property
5351 					 * 'inquiry-serial-no'.
5352 					 */
5353 					sn = (char *)&inq80[4];
5354 					sn[len] = 0;
5355 					while (*sn && (*sn == ' '))
5356 						sn++;
5357 					if (*sn) {
5358 						(void) ddi_prop_update_string(
5359 						    DDI_DEV_T_NONE,
5360 						    SD_DEVINFO(un),
5361 						    INQUIRY_SERIAL_NO, sn);
5362 					}
5363 				}
5364 			}
5365 			mutex_enter(SD_MUTEX(un));
5366 		}
5367 
5368 		/* collect page 83 data if available */
5369 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
5370 			mutex_exit(SD_MUTEX(un));
5371 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
5372 
5373 			rval = sd_send_scsi_INQUIRY(ssc, inq83, inq83_len,
5374 			    0x01, 0x83, &inq83_resid);
5375 
5376 			if (rval != 0) {
5377 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5378 				kmem_free(inq83, inq83_len);
5379 				inq83 = NULL;
5380 				inq83_len = 0;
5381 			}
5382 			mutex_enter(SD_MUTEX(un));
5383 		}
5384 	}
5385 
5386 	/*
5387 	 * If transport has already registered a devid for this target
5388 	 * then that takes precedence over the driver's determination
5389 	 * of the devid.
5390 	 *
5391 	 * NOTE: The reason this check is done here instead of at the beginning
5392 	 * of the function is to allow the code above to create the
5393 	 * 'inquiry-serial-no' property.
5394 	 */
5395 	if (ddi_devid_get(SD_DEVINFO(un), &un->un_devid) == DDI_SUCCESS) {
5396 		ASSERT(un->un_devid);
5397 		un->un_f_devid_transport_defined = TRUE;
5398 		goto cleanup; /* use devid registered by the transport */
5399 	}
5400 
5401 	/*
5402 	 * This is the case of antiquated Sun disk drives that have the
5403 	 * FAB_DEVID property set in the disk_table.  These drives
5404 	 * manage the devid's by storing them in last 2 available sectors
5405 	 * on the drive and have them fabricated by the ddi layer by calling
5406 	 * ddi_devid_init and passing the DEVID_FAB flag.
5407 	 */
5408 	if (un->un_f_opt_fab_devid == TRUE) {
5409 		/*
5410 		 * Depending on EINVAL isn't reliable, since a reserved disk
5411 		 * may result in invalid geometry, so check to make sure a
5412 		 * reservation conflict did not occur during attach.
5413 		 */
5414 		if ((sd_get_devid(ssc) == EINVAL) &&
5415 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
5416 			/*
5417 			 * The devid is invalid AND there is no reservation
5418 			 * conflict.  Fabricate a new devid.
5419 			 */
5420 			(void) sd_create_devid(ssc);
5421 		}
5422 
5423 		/* Register the devid if it exists */
5424 		if (un->un_devid != NULL) {
5425 			(void) ddi_devid_register(SD_DEVINFO(un),
5426 			    un->un_devid);
5427 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5428 			    "sd_register_devid: Devid Fabricated\n");
5429 		}
5430 		goto cleanup;
5431 	}
5432 
5433 	/* encode best devid possible based on data available */
5434 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
5435 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
5436 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
5437 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
5438 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
5439 
5440 		/* devid successfully encoded, register devid */
5441 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
5442 
5443 	} else {
5444 		/*
5445 		 * Unable to encode a devid based on data available.
5446 		 * This is not a Sun qualified disk.  Older Sun disk
5447 		 * drives that have the SD_FAB_DEVID property
5448 		 * set in the disk_table and non Sun qualified
5449 		 * disks are treated in the same manner.  These
5450 		 * drives manage the devid's by storing them in
5451 		 * last 2 available sectors on the drive and
5452 		 * have them fabricated by the ddi layer by
5453 		 * calling ddi_devid_init and passing the
5454 		 * DEVID_FAB flag.
5455 		 * Create a fabricate devid only if there's no
5456 		 * fabricate devid existed.
5457 		 */
5458 		if (sd_get_devid(ssc) == EINVAL) {
5459 			(void) sd_create_devid(ssc);
5460 		}
5461 		un->un_f_opt_fab_devid = TRUE;
5462 
5463 		/* Register the devid if it exists */
5464 		if (un->un_devid != NULL) {
5465 			(void) ddi_devid_register(SD_DEVINFO(un),
5466 			    un->un_devid);
5467 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5468 			    "sd_register_devid: devid fabricated using "
5469 			    "ddi framework\n");
5470 		}
5471 	}
5472 
5473 cleanup:
5474 	/* clean up resources */
5475 	if (inq80 != NULL) {
5476 		kmem_free(inq80, inq80_len);
5477 	}
5478 	if (inq83 != NULL) {
5479 		kmem_free(inq83, inq83_len);
5480 	}
5481 }
5482 
5483 
5484 
5485 /*
5486  *    Function: sd_get_devid
5487  *
5488  * Description: This routine will return 0 if a valid device id has been
5489  *		obtained from the target and stored in the soft state. If a
5490  *		valid device id has not been previously read and stored, a
5491  *		read attempt will be made.
5492  *
5493  *   Arguments: un - driver soft state (unit) structure
5494  *
5495  * Return Code: 0 if we successfully get the device id
5496  *
5497  *     Context: Kernel Thread
5498  */
5499 
5500 static int
5501 sd_get_devid(sd_ssc_t *ssc)
5502 {
5503 	struct dk_devid		*dkdevid;
5504 	ddi_devid_t		tmpid;
5505 	uint_t			*ip;
5506 	size_t			sz;
5507 	diskaddr_t		blk;
5508 	int			status;
5509 	int			chksum;
5510 	int			i;
5511 	size_t			buffer_size;
5512 	struct sd_lun		*un;
5513 
5514 	ASSERT(ssc != NULL);
5515 	un = ssc->ssc_un;
5516 	ASSERT(un != NULL);
5517 	ASSERT(mutex_owned(SD_MUTEX(un)));
5518 
5519 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
5520 	    un);
5521 
5522 	if (un->un_devid != NULL) {
5523 		return (0);
5524 	}
5525 
5526 	mutex_exit(SD_MUTEX(un));
5527 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5528 	    (void *)SD_PATH_DIRECT) != 0) {
5529 		mutex_enter(SD_MUTEX(un));
5530 		return (EINVAL);
5531 	}
5532 
5533 	/*
5534 	 * Read and verify device id, stored in the reserved cylinders at the
5535 	 * end of the disk. Backup label is on the odd sectors of the last
5536 	 * track of the last cylinder. Device id will be on track of the next
5537 	 * to last cylinder.
5538 	 */
5539 	mutex_enter(SD_MUTEX(un));
5540 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
5541 	mutex_exit(SD_MUTEX(un));
5542 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
5543 	status = sd_send_scsi_READ(ssc, dkdevid, buffer_size, blk,
5544 	    SD_PATH_DIRECT);
5545 
5546 	if (status != 0) {
5547 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5548 		goto error;
5549 	}
5550 
5551 	/* Validate the revision */
5552 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
5553 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
5554 		status = EINVAL;
5555 		goto error;
5556 	}
5557 
5558 	/* Calculate the checksum */
5559 	chksum = 0;
5560 	ip = (uint_t *)dkdevid;
5561 	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
5562 	    i++) {
5563 		chksum ^= ip[i];
5564 	}
5565 
5566 	/* Compare the checksums */
5567 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
5568 		status = EINVAL;
5569 		goto error;
5570 	}
5571 
5572 	/* Validate the device id */
5573 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
5574 		status = EINVAL;
5575 		goto error;
5576 	}
5577 
5578 	/*
5579 	 * Store the device id in the driver soft state
5580 	 */
5581 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
5582 	tmpid = kmem_alloc(sz, KM_SLEEP);
5583 
5584 	mutex_enter(SD_MUTEX(un));
5585 
5586 	un->un_devid = tmpid;
5587 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
5588 
5589 	kmem_free(dkdevid, buffer_size);
5590 
5591 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
5592 
5593 	return (status);
5594 error:
5595 	mutex_enter(SD_MUTEX(un));
5596 	kmem_free(dkdevid, buffer_size);
5597 	return (status);
5598 }
5599 
5600 
5601 /*
5602  *    Function: sd_create_devid
5603  *
5604  * Description: This routine will fabricate the device id and write it
5605  *		to the disk.
5606  *
5607  *   Arguments: un - driver soft state (unit) structure
5608  *
5609  * Return Code: value of the fabricated device id
5610  *
5611  *     Context: Kernel Thread
5612  */
5613 
5614 static ddi_devid_t
5615 sd_create_devid(sd_ssc_t *ssc)
5616 {
5617 	struct sd_lun	*un;
5618 
5619 	ASSERT(ssc != NULL);
5620 	un = ssc->ssc_un;
5621 	ASSERT(un != NULL);
5622 
5623 	/* Fabricate the devid */
5624 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
5625 	    == DDI_FAILURE) {
5626 		return (NULL);
5627 	}
5628 
5629 	/* Write the devid to disk */
5630 	if (sd_write_deviceid(ssc) != 0) {
5631 		ddi_devid_free(un->un_devid);
5632 		un->un_devid = NULL;
5633 	}
5634 
5635 	return (un->un_devid);
5636 }
5637 
5638 
5639 /*
5640  *    Function: sd_write_deviceid
5641  *
5642  * Description: This routine will write the device id to the disk
5643  *		reserved sector.
5644  *
5645  *   Arguments: un - driver soft state (unit) structure
5646  *
5647  * Return Code: EINVAL
5648  *		value returned by sd_send_scsi_cmd
5649  *
5650  *     Context: Kernel Thread
5651  */
5652 
5653 static int
5654 sd_write_deviceid(sd_ssc_t *ssc)
5655 {
5656 	struct dk_devid		*dkdevid;
5657 	uchar_t			*buf;
5658 	diskaddr_t		blk;
5659 	uint_t			*ip, chksum;
5660 	int			status;
5661 	int			i;
5662 	struct sd_lun		*un;
5663 
5664 	ASSERT(ssc != NULL);
5665 	un = ssc->ssc_un;
5666 	ASSERT(un != NULL);
5667 	ASSERT(mutex_owned(SD_MUTEX(un)));
5668 
5669 	mutex_exit(SD_MUTEX(un));
5670 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5671 	    (void *)SD_PATH_DIRECT) != 0) {
5672 		mutex_enter(SD_MUTEX(un));
5673 		return (-1);
5674 	}
5675 
5676 
5677 	/* Allocate the buffer */
5678 	buf = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
5679 	dkdevid = (struct dk_devid *)buf;
5680 
5681 	/* Fill in the revision */
5682 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
5683 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
5684 
5685 	/* Copy in the device id */
5686 	mutex_enter(SD_MUTEX(un));
5687 	bcopy(un->un_devid, &dkdevid->dkd_devid,
5688 	    ddi_devid_sizeof(un->un_devid));
5689 	mutex_exit(SD_MUTEX(un));
5690 
5691 	/* Calculate the checksum */
5692 	chksum = 0;
5693 	ip = (uint_t *)dkdevid;
5694 	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
5695 	    i++) {
5696 		chksum ^= ip[i];
5697 	}
5698 
5699 	/* Fill-in checksum */
5700 	DKD_FORMCHKSUM(chksum, dkdevid);
5701 
5702 	/* Write the reserved sector */
5703 	status = sd_send_scsi_WRITE(ssc, buf, un->un_sys_blocksize, blk,
5704 	    SD_PATH_DIRECT);
5705 	if (status != 0)
5706 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5707 
5708 	kmem_free(buf, un->un_sys_blocksize);
5709 
5710 	mutex_enter(SD_MUTEX(un));
5711 	return (status);
5712 }
5713 
5714 
5715 /*
5716  *    Function: sd_check_vpd_page_support
5717  *
5718  * Description: This routine sends an inquiry command with the EVPD bit set and
5719  *		a page code of 0x00 to the device. It is used to determine which
5720  *		vital product pages are available to find the devid. We are
5721  *		looking for pages 0x83 0x80 or 0xB1.  If we return a negative 1,
5722  *		the device does not support that command.
5723  *
5724  *   Arguments: un  - driver soft state (unit) structure
5725  *
5726  * Return Code: 0 - success
5727  *		1 - check condition
5728  *
5729  *     Context: This routine can sleep.
5730  */
5731 
5732 static int
5733 sd_check_vpd_page_support(sd_ssc_t *ssc)
5734 {
5735 	uchar_t	*page_list	= NULL;
5736 	uchar_t	page_length	= 0xff;	/* Use max possible length */
5737 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
5738 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
5739 	int    	rval		= 0;
5740 	int	counter;
5741 	struct sd_lun		*un;
5742 
5743 	ASSERT(ssc != NULL);
5744 	un = ssc->ssc_un;
5745 	ASSERT(un != NULL);
5746 	ASSERT(mutex_owned(SD_MUTEX(un)));
5747 
5748 	mutex_exit(SD_MUTEX(un));
5749 
5750 	/*
5751 	 * We'll set the page length to the maximum to save figuring it out
5752 	 * with an additional call.
5753 	 */
5754 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
5755 
5756 	rval = sd_send_scsi_INQUIRY(ssc, page_list, page_length, evpd,
5757 	    page_code, NULL);
5758 
5759 	if (rval != 0)
5760 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5761 
5762 	mutex_enter(SD_MUTEX(un));
5763 
5764 	/*
5765 	 * Now we must validate that the device accepted the command, as some
5766 	 * drives do not support it.  If the drive does support it, we will
5767 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
5768 	 * not, we return -1.
5769 	 */
5770 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
5771 		/* Loop to find one of the 2 pages we need */
5772 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
5773 
5774 		/*
5775 		 * Pages are returned in ascending order, and 0x83 is what we
5776 		 * are hoping for.
5777 		 */
5778 		while ((page_list[counter] <= 0xB1) &&
5779 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
5780 		    VPD_HEAD_OFFSET))) {
5781 			/*
5782 			 * Add 3 because page_list[3] is the number of
5783 			 * pages minus 3
5784 			 */
5785 
5786 			switch (page_list[counter]) {
5787 			case 0x00:
5788 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
5789 				break;
5790 			case 0x80:
5791 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
5792 				break;
5793 			case 0x81:
5794 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
5795 				break;
5796 			case 0x82:
5797 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
5798 				break;
5799 			case 0x83:
5800 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5801 				break;
5802 			case 0x86:
5803 				un->un_vpd_page_mask |= SD_VPD_EXTENDED_DATA_PG;
5804 				break;
5805 			case 0xB1:
5806 				un->un_vpd_page_mask |= SD_VPD_DEV_CHARACTER_PG;
5807 				break;
5808 			}
5809 			counter++;
5810 		}
5811 
5812 	} else {
5813 		rval = -1;
5814 
5815 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5816 		    "sd_check_vpd_page_support: This drive does not implement "
5817 		    "VPD pages.\n");
5818 	}
5819 
5820 	kmem_free(page_list, page_length);
5821 
5822 	return (rval);
5823 }
5824 
5825 
5826 /*
5827  *    Function: sd_setup_pm
5828  *
5829  * Description: Initialize Power Management on the device
5830  *
5831  *     Context: Kernel Thread
5832  */
5833 
5834 static void
5835 sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi)
5836 {
5837 	uint_t		log_page_size;
5838 	uchar_t		*log_page_data;
5839 	int		rval = 0;
5840 	struct sd_lun	*un;
5841 
5842 	ASSERT(ssc != NULL);
5843 	un = ssc->ssc_un;
5844 	ASSERT(un != NULL);
5845 
5846 	/*
5847 	 * Since we are called from attach, holding a mutex for
5848 	 * un is unnecessary. Because some of the routines called
5849 	 * from here require SD_MUTEX to not be held, assert this
5850 	 * right up front.
5851 	 */
5852 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5853 	/*
5854 	 * Since the sd device does not have the 'reg' property,
5855 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5856 	 * The following code is to tell cpr that this device
5857 	 * DOES need to be suspended and resumed.
5858 	 */
5859 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5860 	    "pm-hardware-state", "needs-suspend-resume");
5861 
5862 	/*
5863 	 * This complies with the new power management framework
5864 	 * for certain desktop machines. Create the pm_components
5865 	 * property as a string array property.
5866 	 * If un_f_pm_supported is TRUE, that means the disk
5867 	 * attached HBA has set the "pm-capable" property and
5868 	 * the value of this property is bigger than 0.
5869 	 */
5870 	if (un->un_f_pm_supported) {
5871 		/*
5872 		 * not all devices have a motor, try it first.
5873 		 * some devices may return ILLEGAL REQUEST, some
5874 		 * will hang
5875 		 * The following START_STOP_UNIT is used to check if target
5876 		 * device has a motor.
5877 		 */
5878 		un->un_f_start_stop_supported = TRUE;
5879 
5880 		if (un->un_f_power_condition_supported) {
5881 			rval = sd_send_scsi_START_STOP_UNIT(ssc,
5882 			    SD_POWER_CONDITION, SD_TARGET_ACTIVE,
5883 			    SD_PATH_DIRECT);
5884 			if (rval != 0) {
5885 				un->un_f_power_condition_supported = FALSE;
5886 			}
5887 		}
5888 		if (!un->un_f_power_condition_supported) {
5889 			rval = sd_send_scsi_START_STOP_UNIT(ssc,
5890 			    SD_START_STOP, SD_TARGET_START, SD_PATH_DIRECT);
5891 		}
5892 		if (rval != 0) {
5893 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5894 			un->un_f_start_stop_supported = FALSE;
5895 		}
5896 
5897 		/*
5898 		 * create pm properties anyways otherwise the parent can't
5899 		 * go to sleep
5900 		 */
5901 		un->un_f_pm_is_enabled = TRUE;
5902 		(void) sd_create_pm_components(devi, un);
5903 
5904 		/*
5905 		 * If it claims that log sense is supported, check it out.
5906 		 */
5907 		if (un->un_f_log_sense_supported) {
5908 			rval = sd_log_page_supported(ssc,
5909 			    START_STOP_CYCLE_PAGE);
5910 			if (rval == 1) {
5911 				/* Page found, use it. */
5912 				un->un_start_stop_cycle_page =
5913 				    START_STOP_CYCLE_PAGE;
5914 			} else {
5915 				/*
5916 				 * Page not found or log sense is not
5917 				 * supported.
5918 				 * Notice we do not check the old style
5919 				 * START_STOP_CYCLE_VU_PAGE because this
5920 				 * code path does not apply to old disks.
5921 				 */
5922 				un->un_f_log_sense_supported = FALSE;
5923 				un->un_f_pm_log_sense_smart = FALSE;
5924 			}
5925 		}
5926 
5927 		return;
5928 	}
5929 
5930 	/*
5931 	 * For the disk whose attached HBA has not set the "pm-capable"
5932 	 * property, check if it supports the power management.
5933 	 */
5934 	if (!un->un_f_log_sense_supported) {
5935 		un->un_power_level = SD_SPINDLE_ON;
5936 		un->un_f_pm_is_enabled = FALSE;
5937 		return;
5938 	}
5939 
5940 	rval = sd_log_page_supported(ssc, START_STOP_CYCLE_PAGE);
5941 
5942 #ifdef	SDDEBUG
5943 	if (sd_force_pm_supported) {
5944 		/* Force a successful result */
5945 		rval = 1;
5946 	}
5947 #endif
5948 
5949 	/*
5950 	 * If the start-stop cycle counter log page is not supported
5951 	 * or if the pm-capable property is set to be false (0),
5952 	 * then we should not create the pm_components property.
5953 	 */
5954 	if (rval == -1) {
5955 		/*
5956 		 * Error.
5957 		 * Reading log sense failed, most likely this is
5958 		 * an older drive that does not support log sense.
5959 		 * If this fails auto-pm is not supported.
5960 		 */
5961 		un->un_power_level = SD_SPINDLE_ON;
5962 		un->un_f_pm_is_enabled = FALSE;
5963 
5964 	} else if (rval == 0) {
5965 		/*
5966 		 * Page not found.
5967 		 * The start stop cycle counter is implemented as page
5968 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
5969 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
5970 		 */
5971 		if (sd_log_page_supported(ssc, START_STOP_CYCLE_VU_PAGE) == 1) {
5972 			/*
5973 			 * Page found, use this one.
5974 			 */
5975 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
5976 			un->un_f_pm_is_enabled = TRUE;
5977 		} else {
5978 			/*
5979 			 * Error or page not found.
5980 			 * auto-pm is not supported for this device.
5981 			 */
5982 			un->un_power_level = SD_SPINDLE_ON;
5983 			un->un_f_pm_is_enabled = FALSE;
5984 		}
5985 	} else {
5986 		/*
5987 		 * Page found, use it.
5988 		 */
5989 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
5990 		un->un_f_pm_is_enabled = TRUE;
5991 	}
5992 
5993 
5994 	if (un->un_f_pm_is_enabled == TRUE) {
5995 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
5996 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
5997 
5998 		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
5999 		    log_page_size, un->un_start_stop_cycle_page,
6000 		    0x01, 0, SD_PATH_DIRECT);
6001 
6002 		if (rval != 0) {
6003 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6004 		}
6005 
6006 #ifdef	SDDEBUG
6007 		if (sd_force_pm_supported) {
6008 			/* Force a successful result */
6009 			rval = 0;
6010 		}
6011 #endif
6012 
6013 		/*
6014 		 * If the Log sense for Page( Start/stop cycle counter page)
6015 		 * succeeds, then power management is supported and we can
6016 		 * enable auto-pm.
6017 		 */
6018 		if (rval == 0)  {
6019 			(void) sd_create_pm_components(devi, un);
6020 		} else {
6021 			un->un_power_level = SD_SPINDLE_ON;
6022 			un->un_f_pm_is_enabled = FALSE;
6023 		}
6024 
6025 		kmem_free(log_page_data, log_page_size);
6026 	}
6027 }
6028 
6029 
6030 /*
6031  *    Function: sd_create_pm_components
6032  *
6033  * Description: Initialize PM property.
6034  *
6035  *     Context: Kernel thread context
6036  */
6037 
6038 static void
6039 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6040 {
6041 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6042 
6043 	if (un->un_f_power_condition_supported) {
6044 		if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6045 		    "pm-components", sd_pwr_pc.pm_comp, 5)
6046 		    != DDI_PROP_SUCCESS) {
6047 			un->un_power_level = SD_SPINDLE_ACTIVE;
6048 			un->un_f_pm_is_enabled = FALSE;
6049 			return;
6050 		}
6051 	} else {
6052 		if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6053 		    "pm-components", sd_pwr_ss.pm_comp, 3)
6054 		    != DDI_PROP_SUCCESS) {
6055 			un->un_power_level = SD_SPINDLE_ON;
6056 			un->un_f_pm_is_enabled = FALSE;
6057 			return;
6058 		}
6059 	}
6060 	/*
6061 	 * When components are initially created they are idle,
6062 	 * power up any non-removables.
6063 	 * Note: the return value of pm_raise_power can't be used
6064 	 * for determining if PM should be enabled for this device.
6065 	 * Even if you check the return values and remove this
6066 	 * property created above, the PM framework will not honor the
6067 	 * change after the first call to pm_raise_power. Hence,
6068 	 * removal of that property does not help if pm_raise_power
6069 	 * fails. In the case of removable media, the start/stop
6070 	 * will fail if the media is not present.
6071 	 */
6072 	if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6073 	    SD_PM_STATE_ACTIVE(un)) == DDI_SUCCESS)) {
6074 		mutex_enter(SD_MUTEX(un));
6075 		un->un_power_level = SD_PM_STATE_ACTIVE(un);
6076 		mutex_enter(&un->un_pm_mutex);
6077 		/* Set to on and not busy. */
6078 		un->un_pm_count = 0;
6079 	} else {
6080 		mutex_enter(SD_MUTEX(un));
6081 		un->un_power_level = SD_PM_STATE_STOPPED(un);
6082 		mutex_enter(&un->un_pm_mutex);
6083 		/* Set to off. */
6084 		un->un_pm_count = -1;
6085 	}
6086 	mutex_exit(&un->un_pm_mutex);
6087 	mutex_exit(SD_MUTEX(un));
6088 }
6089 
6090 
6091 /*
6092  *    Function: sd_ddi_suspend
6093  *
6094  * Description: Performs system power-down operations. This includes
6095  *		setting the drive state to indicate its suspended so
6096  *		that no new commands will be accepted. Also, wait for
6097  *		all commands that are in transport or queued to a timer
6098  *		for retry to complete. All timeout threads are cancelled.
6099  *
6100  * Return Code: DDI_FAILURE or DDI_SUCCESS
6101  *
6102  *     Context: Kernel thread context
6103  */
6104 
6105 static int
6106 sd_ddi_suspend(dev_info_t *devi)
6107 {
6108 	struct	sd_lun	*un;
6109 	clock_t		wait_cmds_complete;
6110 
6111 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6112 	if (un == NULL) {
6113 		return (DDI_FAILURE);
6114 	}
6115 
6116 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6117 
6118 	mutex_enter(SD_MUTEX(un));
6119 
6120 	/* Return success if the device is already suspended. */
6121 	if (un->un_state == SD_STATE_SUSPENDED) {
6122 		mutex_exit(SD_MUTEX(un));
6123 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6124 		    "device already suspended, exiting\n");
6125 		return (DDI_SUCCESS);
6126 	}
6127 
6128 	/* Return failure if the device is being used by HA */
6129 	if (un->un_resvd_status &
6130 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6131 		mutex_exit(SD_MUTEX(un));
6132 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6133 		    "device in use by HA, exiting\n");
6134 		return (DDI_FAILURE);
6135 	}
6136 
6137 	/*
6138 	 * Return failure if the device is in a resource wait
6139 	 * or power changing state.
6140 	 */
6141 	if ((un->un_state == SD_STATE_RWAIT) ||
6142 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6143 		mutex_exit(SD_MUTEX(un));
6144 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6145 		    "device in resource wait state, exiting\n");
6146 		return (DDI_FAILURE);
6147 	}
6148 
6149 
6150 	un->un_save_state = un->un_last_state;
6151 	New_state(un, SD_STATE_SUSPENDED);
6152 
6153 	/*
6154 	 * Wait for all commands that are in transport or queued to a timer
6155 	 * for retry to complete.
6156 	 *
6157 	 * While waiting, no new commands will be accepted or sent because of
6158 	 * the new state we set above.
6159 	 *
6160 	 * Wait till current operation has completed. If we are in the resource
6161 	 * wait state (with an intr outstanding) then we need to wait till the
6162 	 * intr completes and starts the next cmd. We want to wait for
6163 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6164 	 */
6165 	wait_cmds_complete = ddi_get_lbolt() +
6166 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6167 
6168 	while (un->un_ncmds_in_transport != 0) {
6169 		/*
6170 		 * Fail if commands do not finish in the specified time.
6171 		 */
6172 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6173 		    wait_cmds_complete) == -1) {
6174 			/*
6175 			 * Undo the state changes made above. Everything
6176 			 * must go back to it's original value.
6177 			 */
6178 			Restore_state(un);
6179 			un->un_last_state = un->un_save_state;
6180 			/* Wake up any threads that might be waiting. */
6181 			cv_broadcast(&un->un_suspend_cv);
6182 			mutex_exit(SD_MUTEX(un));
6183 			SD_ERROR(SD_LOG_IO_PM, un,
6184 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6185 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6186 			return (DDI_FAILURE);
6187 		}
6188 	}
6189 
6190 	/*
6191 	 * Cancel SCSI watch thread and timeouts, if any are active
6192 	 */
6193 
6194 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6195 		opaque_t temp_token = un->un_swr_token;
6196 		mutex_exit(SD_MUTEX(un));
6197 		scsi_watch_suspend(temp_token);
6198 		mutex_enter(SD_MUTEX(un));
6199 	}
6200 
6201 	if (un->un_reset_throttle_timeid != NULL) {
6202 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6203 		un->un_reset_throttle_timeid = NULL;
6204 		mutex_exit(SD_MUTEX(un));
6205 		(void) untimeout(temp_id);
6206 		mutex_enter(SD_MUTEX(un));
6207 	}
6208 
6209 	if (un->un_dcvb_timeid != NULL) {
6210 		timeout_id_t temp_id = un->un_dcvb_timeid;
6211 		un->un_dcvb_timeid = NULL;
6212 		mutex_exit(SD_MUTEX(un));
6213 		(void) untimeout(temp_id);
6214 		mutex_enter(SD_MUTEX(un));
6215 	}
6216 
6217 	mutex_enter(&un->un_pm_mutex);
6218 	if (un->un_pm_timeid != NULL) {
6219 		timeout_id_t temp_id = un->un_pm_timeid;
6220 		un->un_pm_timeid = NULL;
6221 		mutex_exit(&un->un_pm_mutex);
6222 		mutex_exit(SD_MUTEX(un));
6223 		(void) untimeout(temp_id);
6224 		mutex_enter(SD_MUTEX(un));
6225 	} else {
6226 		mutex_exit(&un->un_pm_mutex);
6227 	}
6228 
6229 	if (un->un_rmw_msg_timeid != NULL) {
6230 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
6231 		un->un_rmw_msg_timeid = NULL;
6232 		mutex_exit(SD_MUTEX(un));
6233 		(void) untimeout(temp_id);
6234 		mutex_enter(SD_MUTEX(un));
6235 	}
6236 
6237 	if (un->un_retry_timeid != NULL) {
6238 		timeout_id_t temp_id = un->un_retry_timeid;
6239 		un->un_retry_timeid = NULL;
6240 		mutex_exit(SD_MUTEX(un));
6241 		(void) untimeout(temp_id);
6242 		mutex_enter(SD_MUTEX(un));
6243 
6244 		if (un->un_retry_bp != NULL) {
6245 			un->un_retry_bp->av_forw = un->un_waitq_headp;
6246 			un->un_waitq_headp = un->un_retry_bp;
6247 			if (un->un_waitq_tailp == NULL) {
6248 				un->un_waitq_tailp = un->un_retry_bp;
6249 			}
6250 			un->un_retry_bp = NULL;
6251 			un->un_retry_statp = NULL;
6252 		}
6253 	}
6254 
6255 	if (un->un_direct_priority_timeid != NULL) {
6256 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6257 		un->un_direct_priority_timeid = NULL;
6258 		mutex_exit(SD_MUTEX(un));
6259 		(void) untimeout(temp_id);
6260 		mutex_enter(SD_MUTEX(un));
6261 	}
6262 
6263 	if (un->un_f_is_fibre == TRUE) {
6264 		/*
6265 		 * Remove callbacks for insert and remove events
6266 		 */
6267 		if (un->un_insert_event != NULL) {
6268 			mutex_exit(SD_MUTEX(un));
6269 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6270 			mutex_enter(SD_MUTEX(un));
6271 			un->un_insert_event = NULL;
6272 		}
6273 
6274 		if (un->un_remove_event != NULL) {
6275 			mutex_exit(SD_MUTEX(un));
6276 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6277 			mutex_enter(SD_MUTEX(un));
6278 			un->un_remove_event = NULL;
6279 		}
6280 	}
6281 
6282 	mutex_exit(SD_MUTEX(un));
6283 
6284 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6285 
6286 	return (DDI_SUCCESS);
6287 }
6288 
6289 
6290 /*
6291  *    Function: sd_ddi_resume
6292  *
6293  * Description: Performs system power-up operations..
6294  *
6295  * Return Code: DDI_SUCCESS
6296  *		DDI_FAILURE
6297  *
6298  *     Context: Kernel thread context
6299  */
6300 
6301 static int
6302 sd_ddi_resume(dev_info_t *devi)
6303 {
6304 	struct	sd_lun	*un;
6305 
6306 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6307 	if (un == NULL) {
6308 		return (DDI_FAILURE);
6309 	}
6310 
6311 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6312 
6313 	mutex_enter(SD_MUTEX(un));
6314 	Restore_state(un);
6315 
6316 	/*
6317 	 * Restore the state which was saved to give the
6318 	 * the right state in un_last_state
6319 	 */
6320 	un->un_last_state = un->un_save_state;
6321 	/*
6322 	 * Note: throttle comes back at full.
6323 	 * Also note: this MUST be done before calling pm_raise_power
6324 	 * otherwise the system can get hung in biowait. The scenario where
6325 	 * this'll happen is under cpr suspend. Writing of the system
6326 	 * state goes through sddump, which writes 0 to un_throttle. If
6327 	 * writing the system state then fails, example if the partition is
6328 	 * too small, then cpr attempts a resume. If throttle isn't restored
6329 	 * from the saved value until after calling pm_raise_power then
6330 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
6331 	 * in biowait.
6332 	 */
6333 	un->un_throttle = un->un_saved_throttle;
6334 
6335 	/*
6336 	 * The chance of failure is very rare as the only command done in power
6337 	 * entry point is START command when you transition from 0->1 or
6338 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
6339 	 * which suspend was done. Ignore the return value as the resume should
6340 	 * not be failed. In the case of removable media the media need not be
6341 	 * inserted and hence there is a chance that raise power will fail with
6342 	 * media not present.
6343 	 */
6344 	if (un->un_f_attach_spinup) {
6345 		mutex_exit(SD_MUTEX(un));
6346 		(void) pm_raise_power(SD_DEVINFO(un), 0,
6347 		    SD_PM_STATE_ACTIVE(un));
6348 		mutex_enter(SD_MUTEX(un));
6349 	}
6350 
6351 	/*
6352 	 * Don't broadcast to the suspend cv and therefore possibly
6353 	 * start I/O until after power has been restored.
6354 	 */
6355 	cv_broadcast(&un->un_suspend_cv);
6356 	cv_broadcast(&un->un_state_cv);
6357 
6358 	/* restart thread */
6359 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
6360 		scsi_watch_resume(un->un_swr_token);
6361 	}
6362 
6363 #if (defined(__fibre))
6364 	if (un->un_f_is_fibre == TRUE) {
6365 		/*
6366 		 * Add callbacks for insert and remove events
6367 		 */
6368 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
6369 			sd_init_event_callbacks(un);
6370 		}
6371 	}
6372 #endif
6373 
6374 	/*
6375 	 * Transport any pending commands to the target.
6376 	 *
6377 	 * If this is a low-activity device commands in queue will have to wait
6378 	 * until new commands come in, which may take awhile. Also, we
6379 	 * specifically don't check un_ncmds_in_transport because we know that
6380 	 * there really are no commands in progress after the unit was
6381 	 * suspended and we could have reached the throttle level, been
6382 	 * suspended, and have no new commands coming in for awhile. Highly
6383 	 * unlikely, but so is the low-activity disk scenario.
6384 	 */
6385 	ddi_xbuf_dispatch(un->un_xbuf_attr);
6386 
6387 	sd_start_cmds(un, NULL);
6388 	mutex_exit(SD_MUTEX(un));
6389 
6390 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
6391 
6392 	return (DDI_SUCCESS);
6393 }
6394 
6395 
6396 /*
6397  *    Function: sd_pm_state_change
6398  *
6399  * Description: Change the driver power state.
6400  * 		Someone else is required to actually change the driver
6401  * 		power level.
6402  *
6403  *   Arguments: un - driver soft state (unit) structure
6404  *              level - the power level that is changed to
6405  *              flag - to decide how to change the power state
6406  *
6407  * Return Code: DDI_SUCCESS
6408  *
6409  *     Context: Kernel thread context
6410  */
6411 static int
6412 sd_pm_state_change(struct sd_lun *un, int level, int flag)
6413 {
6414 	ASSERT(un != NULL);
6415 	SD_TRACE(SD_LOG_POWER, un, "sd_pm_state_change: entry\n");
6416 
6417 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6418 	mutex_enter(SD_MUTEX(un));
6419 
6420 	if (flag == SD_PM_STATE_ROLLBACK || SD_PM_IS_IO_CAPABLE(un, level)) {
6421 		un->un_power_level = level;
6422 		ASSERT(!mutex_owned(&un->un_pm_mutex));
6423 		mutex_enter(&un->un_pm_mutex);
6424 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
6425 			un->un_pm_count++;
6426 			ASSERT(un->un_pm_count == 0);
6427 		}
6428 		mutex_exit(&un->un_pm_mutex);
6429 	} else {
6430 		/*
6431 		 * Exit if power management is not enabled for this device,
6432 		 * or if the device is being used by HA.
6433 		 */
6434 		if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6435 		    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6436 			mutex_exit(SD_MUTEX(un));
6437 			SD_TRACE(SD_LOG_POWER, un,
6438 			    "sd_pm_state_change: exiting\n");
6439 			return (DDI_FAILURE);
6440 		}
6441 
6442 		SD_INFO(SD_LOG_POWER, un, "sd_pm_state_change: "
6443 		    "un_ncmds_in_driver=%ld\n", un->un_ncmds_in_driver);
6444 
6445 		/*
6446 		 * See if the device is not busy, ie.:
6447 		 *    - we have no commands in the driver for this device
6448 		 *    - not waiting for resources
6449 		 */
6450 		if ((un->un_ncmds_in_driver == 0) &&
6451 		    (un->un_state != SD_STATE_RWAIT)) {
6452 			/*
6453 			 * The device is not busy, so it is OK to go to low
6454 			 * power state. Indicate low power, but rely on someone
6455 			 * else to actually change it.
6456 			 */
6457 			mutex_enter(&un->un_pm_mutex);
6458 			un->un_pm_count = -1;
6459 			mutex_exit(&un->un_pm_mutex);
6460 			un->un_power_level = level;
6461 		}
6462 	}
6463 
6464 	mutex_exit(SD_MUTEX(un));
6465 
6466 	SD_TRACE(SD_LOG_POWER, un, "sd_pm_state_change: exit\n");
6467 
6468 	return (DDI_SUCCESS);
6469 }
6470 
6471 
6472 /*
6473  *    Function: sd_pm_idletimeout_handler
6474  *
6475  * Description: A timer routine that's active only while a device is busy.
6476  *		The purpose is to extend slightly the pm framework's busy
6477  *		view of the device to prevent busy/idle thrashing for
6478  *		back-to-back commands. Do this by comparing the current time
6479  *		to the time at which the last command completed and when the
6480  *		difference is greater than sd_pm_idletime, call
6481  *		pm_idle_component. In addition to indicating idle to the pm
6482  *		framework, update the chain type to again use the internal pm
6483  *		layers of the driver.
6484  *
6485  *   Arguments: arg - driver soft state (unit) structure
6486  *
6487  *     Context: Executes in a timeout(9F) thread context
6488  */
6489 
6490 static void
6491 sd_pm_idletimeout_handler(void *arg)
6492 {
6493 	struct sd_lun *un = arg;
6494 
6495 	time_t	now;
6496 
6497 	mutex_enter(&sd_detach_mutex);
6498 	if (un->un_detach_count != 0) {
6499 		/* Abort if the instance is detaching */
6500 		mutex_exit(&sd_detach_mutex);
6501 		return;
6502 	}
6503 	mutex_exit(&sd_detach_mutex);
6504 
6505 	now = ddi_get_time();
6506 	/*
6507 	 * Grab both mutexes, in the proper order, since we're accessing
6508 	 * both PM and softstate variables.
6509 	 */
6510 	mutex_enter(SD_MUTEX(un));
6511 	mutex_enter(&un->un_pm_mutex);
6512 	if (((now - un->un_pm_idle_time) > sd_pm_idletime) &&
6513 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
6514 		/*
6515 		 * Update the chain types.
6516 		 * This takes affect on the next new command received.
6517 		 */
6518 		if (un->un_f_non_devbsize_supported) {
6519 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6520 		} else {
6521 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6522 		}
6523 		un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD;
6524 
6525 		SD_TRACE(SD_LOG_IO_PM, un,
6526 		    "sd_pm_idletimeout_handler: idling device\n");
6527 		(void) pm_idle_component(SD_DEVINFO(un), 0);
6528 		un->un_pm_idle_timeid = NULL;
6529 	} else {
6530 		un->un_pm_idle_timeid =
6531 		    timeout(sd_pm_idletimeout_handler, un,
6532 		    (drv_usectohz((clock_t)300000))); /* 300 ms. */
6533 	}
6534 	mutex_exit(&un->un_pm_mutex);
6535 	mutex_exit(SD_MUTEX(un));
6536 }
6537 
6538 
6539 /*
6540  *    Function: sd_pm_timeout_handler
6541  *
6542  * Description: Callback to tell framework we are idle.
6543  *
6544  *     Context: timeout(9f) thread context.
6545  */
6546 
6547 static void
6548 sd_pm_timeout_handler(void *arg)
6549 {
6550 	struct sd_lun *un = arg;
6551 
6552 	(void) pm_idle_component(SD_DEVINFO(un), 0);
6553 	mutex_enter(&un->un_pm_mutex);
6554 	un->un_pm_timeid = NULL;
6555 	mutex_exit(&un->un_pm_mutex);
6556 }
6557 
6558 
6559 /*
6560  *    Function: sdpower
6561  *
6562  * Description: PM entry point.
6563  *
6564  * Return Code: DDI_SUCCESS
6565  *		DDI_FAILURE
6566  *
6567  *     Context: Kernel thread context
6568  */
6569 
6570 static int
6571 sdpower(dev_info_t *devi, int component, int level)
6572 {
6573 	struct sd_lun	*un;
6574 	int		instance;
6575 	int		rval = DDI_SUCCESS;
6576 	uint_t		i, log_page_size, maxcycles, ncycles;
6577 	uchar_t		*log_page_data;
6578 	int		log_sense_page;
6579 	int		medium_present;
6580 	time_t		intvlp;
6581 	struct pm_trans_data	sd_pm_tran_data;
6582 	uchar_t		save_state;
6583 	int		sval;
6584 	uchar_t		state_before_pm;
6585 	int		got_semaphore_here;
6586 	sd_ssc_t	*ssc;
6587 	int	last_power_level;
6588 
6589 	instance = ddi_get_instance(devi);
6590 
6591 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
6592 	    !SD_PM_IS_LEVEL_VALID(un, level) || component != 0) {
6593 		return (DDI_FAILURE);
6594 	}
6595 
6596 	ssc = sd_ssc_init(un);
6597 
6598 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
6599 
6600 	/*
6601 	 * Must synchronize power down with close.
6602 	 * Attempt to decrement/acquire the open/close semaphore,
6603 	 * but do NOT wait on it. If it's not greater than zero,
6604 	 * ie. it can't be decremented without waiting, then
6605 	 * someone else, either open or close, already has it
6606 	 * and the try returns 0. Use that knowledge here to determine
6607 	 * if it's OK to change the device power level.
6608 	 * Also, only increment it on exit if it was decremented, ie. gotten,
6609 	 * here.
6610 	 */
6611 	got_semaphore_here = sema_tryp(&un->un_semoclose);
6612 
6613 	mutex_enter(SD_MUTEX(un));
6614 
6615 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
6616 	    un->un_ncmds_in_driver);
6617 
6618 	/*
6619 	 * If un_ncmds_in_driver is non-zero it indicates commands are
6620 	 * already being processed in the driver, or if the semaphore was
6621 	 * not gotten here it indicates an open or close is being processed.
6622 	 * At the same time somebody is requesting to go to a lower power
6623 	 * that can't perform I/O, which can't happen, therefore we need to
6624 	 * return failure.
6625 	 */
6626 	if ((!SD_PM_IS_IO_CAPABLE(un, level)) &&
6627 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
6628 		mutex_exit(SD_MUTEX(un));
6629 
6630 		if (got_semaphore_here != 0) {
6631 			sema_v(&un->un_semoclose);
6632 		}
6633 		SD_TRACE(SD_LOG_IO_PM, un,
6634 		    "sdpower: exit, device has queued cmds.\n");
6635 
6636 		goto sdpower_failed;
6637 	}
6638 
6639 	/*
6640 	 * if it is OFFLINE that means the disk is completely dead
6641 	 * in our case we have to put the disk in on or off by sending commands
6642 	 * Of course that will fail anyway so return back here.
6643 	 *
6644 	 * Power changes to a device that's OFFLINE or SUSPENDED
6645 	 * are not allowed.
6646 	 */
6647 	if ((un->un_state == SD_STATE_OFFLINE) ||
6648 	    (un->un_state == SD_STATE_SUSPENDED)) {
6649 		mutex_exit(SD_MUTEX(un));
6650 
6651 		if (got_semaphore_here != 0) {
6652 			sema_v(&un->un_semoclose);
6653 		}
6654 		SD_TRACE(SD_LOG_IO_PM, un,
6655 		    "sdpower: exit, device is off-line.\n");
6656 
6657 		goto sdpower_failed;
6658 	}
6659 
6660 	/*
6661 	 * Change the device's state to indicate it's power level
6662 	 * is being changed. Do this to prevent a power off in the
6663 	 * middle of commands, which is especially bad on devices
6664 	 * that are really powered off instead of just spun down.
6665 	 */
6666 	state_before_pm = un->un_state;
6667 	un->un_state = SD_STATE_PM_CHANGING;
6668 
6669 	mutex_exit(SD_MUTEX(un));
6670 
6671 	/*
6672 	 * If log sense command is not supported, bypass the
6673 	 * following checking, otherwise, check the log sense
6674 	 * information for this device.
6675 	 */
6676 	if (SD_PM_STOP_MOTOR_NEEDED(un, level) &&
6677 	    un->un_f_log_sense_supported) {
6678 		/*
6679 		 * Get the log sense information to understand whether the
6680 		 * the powercycle counts have gone beyond the threshhold.
6681 		 */
6682 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6683 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6684 
6685 		mutex_enter(SD_MUTEX(un));
6686 		log_sense_page = un->un_start_stop_cycle_page;
6687 		mutex_exit(SD_MUTEX(un));
6688 
6689 		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
6690 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
6691 
6692 		if (rval != 0) {
6693 			if (rval == EIO)
6694 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6695 			else
6696 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6697 		}
6698 
6699 #ifdef	SDDEBUG
6700 		if (sd_force_pm_supported) {
6701 			/* Force a successful result */
6702 			rval = 0;
6703 		}
6704 #endif
6705 		if (rval != 0) {
6706 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
6707 			    "Log Sense Failed\n");
6708 
6709 			kmem_free(log_page_data, log_page_size);
6710 			/* Cannot support power management on those drives */
6711 
6712 			if (got_semaphore_here != 0) {
6713 				sema_v(&un->un_semoclose);
6714 			}
6715 			/*
6716 			 * On exit put the state back to it's original value
6717 			 * and broadcast to anyone waiting for the power
6718 			 * change completion.
6719 			 */
6720 			mutex_enter(SD_MUTEX(un));
6721 			un->un_state = state_before_pm;
6722 			cv_broadcast(&un->un_suspend_cv);
6723 			mutex_exit(SD_MUTEX(un));
6724 			SD_TRACE(SD_LOG_IO_PM, un,
6725 			    "sdpower: exit, Log Sense Failed.\n");
6726 
6727 			goto sdpower_failed;
6728 		}
6729 
6730 		/*
6731 		 * From the page data - Convert the essential information to
6732 		 * pm_trans_data
6733 		 */
6734 		maxcycles =
6735 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
6736 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
6737 
6738 		ncycles =
6739 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
6740 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
6741 
6742 		if (un->un_f_pm_log_sense_smart) {
6743 			sd_pm_tran_data.un.smart_count.allowed = maxcycles;
6744 			sd_pm_tran_data.un.smart_count.consumed = ncycles;
6745 			sd_pm_tran_data.un.smart_count.flag = 0;
6746 			sd_pm_tran_data.format = DC_SMART_FORMAT;
6747 		} else {
6748 			sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
6749 			sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
6750 			for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
6751 				sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
6752 				    log_page_data[8+i];
6753 			}
6754 			sd_pm_tran_data.un.scsi_cycles.flag = 0;
6755 			sd_pm_tran_data.format = DC_SCSI_FORMAT;
6756 		}
6757 
6758 		kmem_free(log_page_data, log_page_size);
6759 
6760 		/*
6761 		 * Call pm_trans_check routine to get the Ok from
6762 		 * the global policy
6763 		 */
6764 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
6765 #ifdef	SDDEBUG
6766 		if (sd_force_pm_supported) {
6767 			/* Force a successful result */
6768 			rval = 1;
6769 		}
6770 #endif
6771 		switch (rval) {
6772 		case 0:
6773 			/*
6774 			 * Not Ok to Power cycle or error in parameters passed
6775 			 * Would have given the advised time to consider power
6776 			 * cycle. Based on the new intvlp parameter we are
6777 			 * supposed to pretend we are busy so that pm framework
6778 			 * will never call our power entry point. Because of
6779 			 * that install a timeout handler and wait for the
6780 			 * recommended time to elapse so that power management
6781 			 * can be effective again.
6782 			 *
6783 			 * To effect this behavior, call pm_busy_component to
6784 			 * indicate to the framework this device is busy.
6785 			 * By not adjusting un_pm_count the rest of PM in
6786 			 * the driver will function normally, and independent
6787 			 * of this but because the framework is told the device
6788 			 * is busy it won't attempt powering down until it gets
6789 			 * a matching idle. The timeout handler sends this.
6790 			 * Note: sd_pm_entry can't be called here to do this
6791 			 * because sdpower may have been called as a result
6792 			 * of a call to pm_raise_power from within sd_pm_entry.
6793 			 *
6794 			 * If a timeout handler is already active then
6795 			 * don't install another.
6796 			 */
6797 			mutex_enter(&un->un_pm_mutex);
6798 			if (un->un_pm_timeid == NULL) {
6799 				un->un_pm_timeid =
6800 				    timeout(sd_pm_timeout_handler,
6801 				    un, intvlp * drv_usectohz(1000000));
6802 				mutex_exit(&un->un_pm_mutex);
6803 				(void) pm_busy_component(SD_DEVINFO(un), 0);
6804 			} else {
6805 				mutex_exit(&un->un_pm_mutex);
6806 			}
6807 			if (got_semaphore_here != 0) {
6808 				sema_v(&un->un_semoclose);
6809 			}
6810 			/*
6811 			 * On exit put the state back to it's original value
6812 			 * and broadcast to anyone waiting for the power
6813 			 * change completion.
6814 			 */
6815 			mutex_enter(SD_MUTEX(un));
6816 			un->un_state = state_before_pm;
6817 			cv_broadcast(&un->un_suspend_cv);
6818 			mutex_exit(SD_MUTEX(un));
6819 
6820 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
6821 			    "trans check Failed, not ok to power cycle.\n");
6822 
6823 			goto sdpower_failed;
6824 		case -1:
6825 			if (got_semaphore_here != 0) {
6826 				sema_v(&un->un_semoclose);
6827 			}
6828 			/*
6829 			 * On exit put the state back to it's original value
6830 			 * and broadcast to anyone waiting for the power
6831 			 * change completion.
6832 			 */
6833 			mutex_enter(SD_MUTEX(un));
6834 			un->un_state = state_before_pm;
6835 			cv_broadcast(&un->un_suspend_cv);
6836 			mutex_exit(SD_MUTEX(un));
6837 			SD_TRACE(SD_LOG_IO_PM, un,
6838 			    "sdpower: exit, trans check command Failed.\n");
6839 
6840 			goto sdpower_failed;
6841 		}
6842 	}
6843 
6844 	if (!SD_PM_IS_IO_CAPABLE(un, level)) {
6845 		/*
6846 		 * Save the last state... if the STOP FAILS we need it
6847 		 * for restoring
6848 		 */
6849 		mutex_enter(SD_MUTEX(un));
6850 		save_state = un->un_last_state;
6851 		last_power_level = un->un_power_level;
6852 		/*
6853 		 * There must not be any cmds. getting processed
6854 		 * in the driver when we get here. Power to the
6855 		 * device is potentially going off.
6856 		 */
6857 		ASSERT(un->un_ncmds_in_driver == 0);
6858 		mutex_exit(SD_MUTEX(un));
6859 
6860 		/*
6861 		 * For now PM suspend the device completely before spindle is
6862 		 * turned off
6863 		 */
6864 		if ((rval = sd_pm_state_change(un, level, SD_PM_STATE_CHANGE))
6865 		    == DDI_FAILURE) {
6866 			if (got_semaphore_here != 0) {
6867 				sema_v(&un->un_semoclose);
6868 			}
6869 			/*
6870 			 * On exit put the state back to it's original value
6871 			 * and broadcast to anyone waiting for the power
6872 			 * change completion.
6873 			 */
6874 			mutex_enter(SD_MUTEX(un));
6875 			un->un_state = state_before_pm;
6876 			un->un_power_level = last_power_level;
6877 			cv_broadcast(&un->un_suspend_cv);
6878 			mutex_exit(SD_MUTEX(un));
6879 			SD_TRACE(SD_LOG_IO_PM, un,
6880 			    "sdpower: exit, PM suspend Failed.\n");
6881 
6882 			goto sdpower_failed;
6883 		}
6884 	}
6885 
6886 	/*
6887 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
6888 	 * close, or strategy. Dump no long uses this routine, it uses it's
6889 	 * own code so it can be done in polled mode.
6890 	 */
6891 
6892 	medium_present = TRUE;
6893 
6894 	/*
6895 	 * When powering up, issue a TUR in case the device is at unit
6896 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
6897 	 * a deadlock on un_pm_busy_cv will occur.
6898 	 */
6899 	if (SD_PM_IS_IO_CAPABLE(un, level)) {
6900 		sval = sd_send_scsi_TEST_UNIT_READY(ssc,
6901 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
6902 		if (sval != 0)
6903 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6904 	}
6905 
6906 	if (un->un_f_power_condition_supported) {
6907 		char *pm_condition_name[] = {"STOPPED", "STANDBY",
6908 		    "IDLE", "ACTIVE"};
6909 		SD_TRACE(SD_LOG_IO_PM, un,
6910 		    "sdpower: sending \'%s\' power condition",
6911 		    pm_condition_name[level]);
6912 		sval = sd_send_scsi_START_STOP_UNIT(ssc, SD_POWER_CONDITION,
6913 		    sd_pl2pc[level], SD_PATH_DIRECT);
6914 	} else {
6915 		SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
6916 		    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
6917 		sval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
6918 		    ((level == SD_SPINDLE_ON) ? SD_TARGET_START :
6919 		    SD_TARGET_STOP), SD_PATH_DIRECT);
6920 	}
6921 	if (sval != 0) {
6922 		if (sval == EIO)
6923 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6924 		else
6925 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6926 	}
6927 
6928 	/* Command failed, check for media present. */
6929 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
6930 		medium_present = FALSE;
6931 	}
6932 
6933 	/*
6934 	 * The conditions of interest here are:
6935 	 *   if a spindle off with media present fails,
6936 	 *	then restore the state and return an error.
6937 	 *   else if a spindle on fails,
6938 	 *	then return an error (there's no state to restore).
6939 	 * In all other cases we setup for the new state
6940 	 * and return success.
6941 	 */
6942 	if (!SD_PM_IS_IO_CAPABLE(un, level)) {
6943 		if ((medium_present == TRUE) && (sval != 0)) {
6944 			/* The stop command from above failed */
6945 			rval = DDI_FAILURE;
6946 			/*
6947 			 * The stop command failed, and we have media
6948 			 * present. Put the level back by calling the
6949 			 * sd_pm_resume() and set the state back to
6950 			 * it's previous value.
6951 			 */
6952 			(void) sd_pm_state_change(un, last_power_level,
6953 			    SD_PM_STATE_ROLLBACK);
6954 			mutex_enter(SD_MUTEX(un));
6955 			un->un_last_state = save_state;
6956 			mutex_exit(SD_MUTEX(un));
6957 		} else if (un->un_f_monitor_media_state) {
6958 			/*
6959 			 * The stop command from above succeeded.
6960 			 * Terminate watch thread in case of removable media
6961 			 * devices going into low power state. This is as per
6962 			 * the requirements of pm framework, otherwise commands
6963 			 * will be generated for the device (through watch
6964 			 * thread), even when the device is in low power state.
6965 			 */
6966 			mutex_enter(SD_MUTEX(un));
6967 			un->un_f_watcht_stopped = FALSE;
6968 			if (un->un_swr_token != NULL) {
6969 				opaque_t temp_token = un->un_swr_token;
6970 				un->un_f_watcht_stopped = TRUE;
6971 				un->un_swr_token = NULL;
6972 				mutex_exit(SD_MUTEX(un));
6973 				(void) scsi_watch_request_terminate(temp_token,
6974 				    SCSI_WATCH_TERMINATE_ALL_WAIT);
6975 			} else {
6976 				mutex_exit(SD_MUTEX(un));
6977 			}
6978 		}
6979 	} else {
6980 		/*
6981 		 * The level requested is I/O capable.
6982 		 * Legacy behavior: return success on a failed spinup
6983 		 * if there is no media in the drive.
6984 		 * Do this by looking at medium_present here.
6985 		 */
6986 		if ((sval != 0) && medium_present) {
6987 			/* The start command from above failed */
6988 			rval = DDI_FAILURE;
6989 		} else {
6990 			/*
6991 			 * The start command from above succeeded
6992 			 * PM resume the devices now that we have
6993 			 * started the disks
6994 			 */
6995 			(void) sd_pm_state_change(un, level,
6996 			    SD_PM_STATE_CHANGE);
6997 
6998 			/*
6999 			 * Resume the watch thread since it was suspended
7000 			 * when the device went into low power mode.
7001 			 */
7002 			if (un->un_f_monitor_media_state) {
7003 				mutex_enter(SD_MUTEX(un));
7004 				if (un->un_f_watcht_stopped == TRUE) {
7005 					opaque_t temp_token;
7006 
7007 					un->un_f_watcht_stopped = FALSE;
7008 					mutex_exit(SD_MUTEX(un));
7009 					temp_token =
7010 					    sd_watch_request_submit(un);
7011 					mutex_enter(SD_MUTEX(un));
7012 					un->un_swr_token = temp_token;
7013 				}
7014 				mutex_exit(SD_MUTEX(un));
7015 			}
7016 		}
7017 	}
7018 
7019 	if (got_semaphore_here != 0) {
7020 		sema_v(&un->un_semoclose);
7021 	}
7022 	/*
7023 	 * On exit put the state back to it's original value
7024 	 * and broadcast to anyone waiting for the power
7025 	 * change completion.
7026 	 */
7027 	mutex_enter(SD_MUTEX(un));
7028 	un->un_state = state_before_pm;
7029 	cv_broadcast(&un->un_suspend_cv);
7030 	mutex_exit(SD_MUTEX(un));
7031 
7032 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7033 
7034 	sd_ssc_fini(ssc);
7035 	return (rval);
7036 
7037 sdpower_failed:
7038 
7039 	sd_ssc_fini(ssc);
7040 	return (DDI_FAILURE);
7041 }
7042 
7043 
7044 
7045 /*
7046  *    Function: sdattach
7047  *
7048  * Description: Driver's attach(9e) entry point function.
7049  *
7050  *   Arguments: devi - opaque device info handle
7051  *		cmd  - attach  type
7052  *
7053  * Return Code: DDI_SUCCESS
7054  *		DDI_FAILURE
7055  *
7056  *     Context: Kernel thread context
7057  */
7058 
7059 static int
7060 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7061 {
7062 	switch (cmd) {
7063 	case DDI_ATTACH:
7064 		return (sd_unit_attach(devi));
7065 	case DDI_RESUME:
7066 		return (sd_ddi_resume(devi));
7067 	default:
7068 		break;
7069 	}
7070 	return (DDI_FAILURE);
7071 }
7072 
7073 
7074 /*
7075  *    Function: sddetach
7076  *
7077  * Description: Driver's detach(9E) entry point function.
7078  *
7079  *   Arguments: devi - opaque device info handle
7080  *		cmd  - detach  type
7081  *
7082  * Return Code: DDI_SUCCESS
7083  *		DDI_FAILURE
7084  *
7085  *     Context: Kernel thread context
7086  */
7087 
7088 static int
7089 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7090 {
7091 	switch (cmd) {
7092 	case DDI_DETACH:
7093 		return (sd_unit_detach(devi));
7094 	case DDI_SUSPEND:
7095 		return (sd_ddi_suspend(devi));
7096 	default:
7097 		break;
7098 	}
7099 	return (DDI_FAILURE);
7100 }
7101 
7102 
7103 /*
7104  *     Function: sd_sync_with_callback
7105  *
7106  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7107  *		 state while the callback routine is active.
7108  *
7109  *    Arguments: un: softstate structure for the instance
7110  *
7111  *	Context: Kernel thread context
7112  */
7113 
7114 static void
7115 sd_sync_with_callback(struct sd_lun *un)
7116 {
7117 	ASSERT(un != NULL);
7118 
7119 	mutex_enter(SD_MUTEX(un));
7120 
7121 	ASSERT(un->un_in_callback >= 0);
7122 
7123 	while (un->un_in_callback > 0) {
7124 		mutex_exit(SD_MUTEX(un));
7125 		delay(2);
7126 		mutex_enter(SD_MUTEX(un));
7127 	}
7128 
7129 	mutex_exit(SD_MUTEX(un));
7130 }
7131 
7132 /*
7133  *    Function: sd_unit_attach
7134  *
7135  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7136  *		the soft state structure for the device and performs
7137  *		all necessary structure and device initializations.
7138  *
7139  *   Arguments: devi: the system's dev_info_t for the device.
7140  *
7141  * Return Code: DDI_SUCCESS if attach is successful.
7142  *		DDI_FAILURE if any part of the attach fails.
7143  *
7144  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7145  *		Kernel thread context only.  Can sleep.
7146  */
7147 
7148 static int
7149 sd_unit_attach(dev_info_t *devi)
7150 {
7151 	struct	scsi_device	*devp;
7152 	struct	sd_lun		*un;
7153 	char			*variantp;
7154 	char			name_str[48];
7155 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7156 	int	instance;
7157 	int	rval;
7158 	int	wc_enabled;
7159 	int	tgt;
7160 	uint64_t	capacity;
7161 	uint_t		lbasize = 0;
7162 	dev_info_t	*pdip = ddi_get_parent(devi);
7163 	int		offbyone = 0;
7164 	int		geom_label_valid = 0;
7165 	sd_ssc_t	*ssc;
7166 	int		status;
7167 	struct sd_fm_internal	*sfip = NULL;
7168 	int		max_xfer_size;
7169 
7170 	/*
7171 	 * Retrieve the target driver's private data area. This was set
7172 	 * up by the HBA.
7173 	 */
7174 	devp = ddi_get_driver_private(devi);
7175 
7176 	/*
7177 	 * Retrieve the target ID of the device.
7178 	 */
7179 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7180 	    SCSI_ADDR_PROP_TARGET, -1);
7181 
7182 	/*
7183 	 * Since we have no idea what state things were left in by the last
7184 	 * user of the device, set up some 'default' settings, ie. turn 'em
7185 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7186 	 * Do this before the scsi_probe, which sends an inquiry.
7187 	 * This is a fix for bug (4430280).
7188 	 * Of special importance is wide-xfer. The drive could have been left
7189 	 * in wide transfer mode by the last driver to communicate with it,
7190 	 * this includes us. If that's the case, and if the following is not
7191 	 * setup properly or we don't re-negotiate with the drive prior to
7192 	 * transferring data to/from the drive, it causes bus parity errors,
7193 	 * data overruns, and unexpected interrupts. This first occurred when
7194 	 * the fix for bug (4378686) was made.
7195 	 */
7196 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7197 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7198 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7199 
7200 	/*
7201 	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
7202 	 * on a target. Setting it per lun instance actually sets the
7203 	 * capability of this target, which affects those luns already
7204 	 * attached on the same target. So during attach, we can only disable
7205 	 * this capability only when no other lun has been attached on this
7206 	 * target. By doing this, we assume a target has the same tagged-qing
7207 	 * capability for every lun. The condition can be removed when HBA
7208 	 * is changed to support per lun based tagged-qing capability.
7209 	 */
7210 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7211 		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7212 	}
7213 
7214 	/*
7215 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7216 	 * This call will allocate and fill in the scsi_inquiry structure
7217 	 * and point the sd_inq member of the scsi_device structure to it.
7218 	 * If the attach succeeds, then this memory will not be de-allocated
7219 	 * (via scsi_unprobe()) until the instance is detached.
7220 	 */
7221 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7222 		goto probe_failed;
7223 	}
7224 
7225 	/*
7226 	 * Check the device type as specified in the inquiry data and
7227 	 * claim it if it is of a type that we support.
7228 	 */
7229 	switch (devp->sd_inq->inq_dtype) {
7230 	case DTYPE_DIRECT:
7231 		break;
7232 	case DTYPE_RODIRECT:
7233 		break;
7234 	case DTYPE_OPTICAL:
7235 		break;
7236 	case DTYPE_NOTPRESENT:
7237 	default:
7238 		/* Unsupported device type; fail the attach. */
7239 		goto probe_failed;
7240 	}
7241 
7242 	/*
7243 	 * Allocate the soft state structure for this unit.
7244 	 *
7245 	 * We rely upon this memory being set to all zeroes by
7246 	 * ddi_soft_state_zalloc().  We assume that any member of the
7247 	 * soft state structure that is not explicitly initialized by
7248 	 * this routine will have a value of zero.
7249 	 */
7250 	instance = ddi_get_instance(devp->sd_dev);
7251 #ifndef XPV_HVM_DRIVER
7252 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7253 		goto probe_failed;
7254 	}
7255 #endif /* !XPV_HVM_DRIVER */
7256 
7257 	/*
7258 	 * Retrieve a pointer to the newly-allocated soft state.
7259 	 *
7260 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7261 	 * was successful, unless something has gone horribly wrong and the
7262 	 * ddi's soft state internals are corrupt (in which case it is
7263 	 * probably better to halt here than just fail the attach....)
7264 	 */
7265 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7266 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7267 		    instance);
7268 		/*NOTREACHED*/
7269 	}
7270 
7271 	/*
7272 	 * Link the back ptr of the driver soft state to the scsi_device
7273 	 * struct for this lun.
7274 	 * Save a pointer to the softstate in the driver-private area of
7275 	 * the scsi_device struct.
7276 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7277 	 * we first set un->un_sd below.
7278 	 */
7279 	un->un_sd = devp;
7280 	devp->sd_private = (opaque_t)un;
7281 
7282 	/*
7283 	 * The following must be after devp is stored in the soft state struct.
7284 	 */
7285 #ifdef SDDEBUG
7286 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7287 	    "%s_unit_attach: un:0x%p instance:%d\n",
7288 	    ddi_driver_name(devi), un, instance);
7289 #endif
7290 
7291 	/*
7292 	 * Set up the device type and node type (for the minor nodes).
7293 	 * By default we assume that the device can at least support the
7294 	 * Common Command Set. Call it a CD-ROM if it reports itself
7295 	 * as a RODIRECT device.
7296 	 */
7297 	switch (devp->sd_inq->inq_dtype) {
7298 	case DTYPE_RODIRECT:
7299 		un->un_node_type = DDI_NT_CD_CHAN;
7300 		un->un_ctype	 = CTYPE_CDROM;
7301 		break;
7302 	case DTYPE_OPTICAL:
7303 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7304 		un->un_ctype	 = CTYPE_ROD;
7305 		break;
7306 	default:
7307 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7308 		un->un_ctype	 = CTYPE_CCS;
7309 		break;
7310 	}
7311 
7312 	/*
7313 	 * Try to read the interconnect type from the HBA.
7314 	 *
7315 	 * Note: This driver is currently compiled as two binaries, a parallel
7316 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7317 	 * differences are determined at compile time. In the future a single
7318 	 * binary will be provided and the interconnect type will be used to
7319 	 * differentiate between fibre and parallel scsi behaviors. At that time
7320 	 * it will be necessary for all fibre channel HBAs to support this
7321 	 * property.
7322 	 *
7323 	 * set un_f_is_fiber to TRUE ( default fiber )
7324 	 */
7325 	un->un_f_is_fibre = TRUE;
7326 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7327 	case INTERCONNECT_SSA:
7328 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7329 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7330 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7331 		break;
7332 	case INTERCONNECT_PARALLEL:
7333 		un->un_f_is_fibre = FALSE;
7334 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7335 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7336 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7337 		break;
7338 	case INTERCONNECT_SAS:
7339 		un->un_f_is_fibre = FALSE;
7340 		un->un_interconnect_type = SD_INTERCONNECT_SAS;
7341 		un->un_node_type = DDI_NT_BLOCK_SAS;
7342 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7343 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SAS\n", un);
7344 		break;
7345 	case INTERCONNECT_SATA:
7346 		un->un_f_is_fibre = FALSE;
7347 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
7348 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7349 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
7350 		break;
7351 	case INTERCONNECT_FIBRE:
7352 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7353 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7354 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7355 		break;
7356 	case INTERCONNECT_FABRIC:
7357 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7358 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7359 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7360 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7361 		break;
7362 	default:
7363 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7364 		/*
7365 		 * The HBA does not support the "interconnect-type" property
7366 		 * (or did not provide a recognized type).
7367 		 *
7368 		 * Note: This will be obsoleted when a single fibre channel
7369 		 * and parallel scsi driver is delivered. In the meantime the
7370 		 * interconnect type will be set to the platform default.If that
7371 		 * type is not parallel SCSI, it means that we should be
7372 		 * assuming "ssd" semantics. However, here this also means that
7373 		 * the FC HBA is not supporting the "interconnect-type" property
7374 		 * like we expect it to, so log this occurrence.
7375 		 */
7376 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7377 		if (!SD_IS_PARALLEL_SCSI(un)) {
7378 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7379 			    "sd_unit_attach: un:0x%p Assuming "
7380 			    "INTERCONNECT_FIBRE\n", un);
7381 		} else {
7382 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7383 			    "sd_unit_attach: un:0x%p Assuming "
7384 			    "INTERCONNECT_PARALLEL\n", un);
7385 			un->un_f_is_fibre = FALSE;
7386 		}
7387 #else
7388 		/*
7389 		 * Note: This source will be implemented when a single fibre
7390 		 * channel and parallel scsi driver is delivered. The default
7391 		 * will be to assume that if a device does not support the
7392 		 * "interconnect-type" property it is a parallel SCSI HBA and
7393 		 * we will set the interconnect type for parallel scsi.
7394 		 */
7395 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7396 		un->un_f_is_fibre = FALSE;
7397 #endif
7398 		break;
7399 	}
7400 
7401 	if (un->un_f_is_fibre == TRUE) {
7402 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7403 		    SCSI_VERSION_3) {
7404 			switch (un->un_interconnect_type) {
7405 			case SD_INTERCONNECT_FIBRE:
7406 			case SD_INTERCONNECT_SSA:
7407 				un->un_node_type = DDI_NT_BLOCK_WWN;
7408 				break;
7409 			default:
7410 				break;
7411 			}
7412 		}
7413 	}
7414 
7415 	/*
7416 	 * Initialize the Request Sense command for the target
7417 	 */
7418 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7419 		goto alloc_rqs_failed;
7420 	}
7421 
7422 	/*
7423 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7424 	 * with separate binary for sd and ssd.
7425 	 *
7426 	 * x86 has 1 binary, un_retry_count is set base on connection type.
7427 	 * The hardcoded values will go away when Sparc uses 1 binary
7428 	 * for sd and ssd.  This hardcoded values need to match
7429 	 * SD_RETRY_COUNT in sddef.h
7430 	 * The value used is base on interconnect type.
7431 	 * fibre = 3, parallel = 5
7432 	 */
7433 #if defined(__i386) || defined(__amd64)
7434 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
7435 #else
7436 	un->un_retry_count = SD_RETRY_COUNT;
7437 #endif
7438 
7439 	/*
7440 	 * Set the per disk retry count to the default number of retries
7441 	 * for disks and CDROMs. This value can be overridden by the
7442 	 * disk property list or an entry in sd.conf.
7443 	 */
7444 	un->un_notready_retry_count =
7445 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
7446 	    : DISK_NOT_READY_RETRY_COUNT(un);
7447 
7448 	/*
7449 	 * Set the busy retry count to the default value of un_retry_count.
7450 	 * This can be overridden by entries in sd.conf or the device
7451 	 * config table.
7452 	 */
7453 	un->un_busy_retry_count = un->un_retry_count;
7454 
7455 	/*
7456 	 * Init the reset threshold for retries.  This number determines
7457 	 * how many retries must be performed before a reset can be issued
7458 	 * (for certain error conditions). This can be overridden by entries
7459 	 * in sd.conf or the device config table.
7460 	 */
7461 	un->un_reset_retry_count = (un->un_retry_count / 2);
7462 
7463 	/*
7464 	 * Set the victim_retry_count to the default un_retry_count
7465 	 */
7466 	un->un_victim_retry_count = (2 * un->un_retry_count);
7467 
7468 	/*
7469 	 * Set the reservation release timeout to the default value of
7470 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
7471 	 * device config table.
7472 	 */
7473 	un->un_reserve_release_time = 5;
7474 
7475 	/*
7476 	 * Set up the default maximum transfer size. Note that this may
7477 	 * get updated later in the attach, when setting up default wide
7478 	 * operations for disks.
7479 	 */
7480 #if defined(__i386) || defined(__amd64)
7481 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
7482 	un->un_partial_dma_supported = 1;
7483 #else
7484 	un->un_max_xfer_size = (uint_t)maxphys;
7485 #endif
7486 
7487 	/*
7488 	 * Get "allow bus device reset" property (defaults to "enabled" if
7489 	 * the property was not defined). This is to disable bus resets for
7490 	 * certain kinds of error recovery. Note: In the future when a run-time
7491 	 * fibre check is available the soft state flag should default to
7492 	 * enabled.
7493 	 */
7494 	if (un->un_f_is_fibre == TRUE) {
7495 		un->un_f_allow_bus_device_reset = TRUE;
7496 	} else {
7497 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7498 		    "allow-bus-device-reset", 1) != 0) {
7499 			un->un_f_allow_bus_device_reset = TRUE;
7500 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7501 			    "sd_unit_attach: un:0x%p Bus device reset "
7502 			    "enabled\n", un);
7503 		} else {
7504 			un->un_f_allow_bus_device_reset = FALSE;
7505 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7506 			    "sd_unit_attach: un:0x%p Bus device reset "
7507 			    "disabled\n", un);
7508 		}
7509 	}
7510 
7511 	/*
7512 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7513 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7514 	 *
7515 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7516 	 * property. The new "variant" property with a value of "atapi" has been
7517 	 * introduced so that future 'variants' of standard SCSI behavior (like
7518 	 * atapi) could be specified by the underlying HBA drivers by supplying
7519 	 * a new value for the "variant" property, instead of having to define a
7520 	 * new property.
7521 	 */
7522 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7523 		un->un_f_cfg_is_atapi = TRUE;
7524 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7525 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7526 	}
7527 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7528 	    &variantp) == DDI_PROP_SUCCESS) {
7529 		if (strcmp(variantp, "atapi") == 0) {
7530 			un->un_f_cfg_is_atapi = TRUE;
7531 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7532 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7533 		}
7534 		ddi_prop_free(variantp);
7535 	}
7536 
7537 	un->un_cmd_timeout	= SD_IO_TIME;
7538 
7539 	un->un_busy_timeout  = SD_BSY_TIMEOUT;
7540 
7541 	/* Info on current states, statuses, etc. (Updated frequently) */
7542 	un->un_state		= SD_STATE_NORMAL;
7543 	un->un_last_state	= SD_STATE_NORMAL;
7544 
7545 	/* Control & status info for command throttling */
7546 	un->un_throttle		= sd_max_throttle;
7547 	un->un_saved_throttle	= sd_max_throttle;
7548 	un->un_min_throttle	= sd_min_throttle;
7549 
7550 	if (un->un_f_is_fibre == TRUE) {
7551 		un->un_f_use_adaptive_throttle = TRUE;
7552 	} else {
7553 		un->un_f_use_adaptive_throttle = FALSE;
7554 	}
7555 
7556 	/* Removable media support. */
7557 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7558 	un->un_mediastate		= DKIO_NONE;
7559 	un->un_specified_mediastate	= DKIO_NONE;
7560 
7561 	/* CVs for suspend/resume (PM or DR) */
7562 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7563 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
7564 
7565 	/* Power management support. */
7566 	un->un_power_level = SD_SPINDLE_UNINIT;
7567 
7568 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
7569 	un->un_f_wcc_inprog = 0;
7570 
7571 	/*
7572 	 * The open/close semaphore is used to serialize threads executing
7573 	 * in the driver's open & close entry point routines for a given
7574 	 * instance.
7575 	 */
7576 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
7577 
7578 	/*
7579 	 * The conf file entry and softstate variable is a forceful override,
7580 	 * meaning a non-zero value must be entered to change the default.
7581 	 */
7582 	un->un_f_disksort_disabled = FALSE;
7583 	un->un_f_rmw_type = SD_RMW_TYPE_DEFAULT;
7584 
7585 	/*
7586 	 * GET EVENT STATUS NOTIFICATION media polling enabled by default, but
7587 	 * can be overridden via [s]sd-config-list "mmc-gesn-polling" property.
7588 	 */
7589 	un->un_f_mmc_gesn_polling = TRUE;
7590 
7591 	/*
7592 	 * Retrieve the properties from the static driver table or the driver
7593 	 * configuration file (.conf) for this unit and update the soft state
7594 	 * for the device as needed for the indicated properties.
7595 	 * Note: the property configuration needs to occur here as some of the
7596 	 * following routines may have dependencies on soft state flags set
7597 	 * as part of the driver property configuration.
7598 	 */
7599 	sd_read_unit_properties(un);
7600 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7601 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
7602 
7603 	/*
7604 	 * Only if a device has "hotpluggable" property, it is
7605 	 * treated as hotpluggable device. Otherwise, it is
7606 	 * regarded as non-hotpluggable one.
7607 	 */
7608 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
7609 	    -1) != -1) {
7610 		un->un_f_is_hotpluggable = TRUE;
7611 	}
7612 
7613 	/*
7614 	 * set unit's attributes(flags) according to "hotpluggable" and
7615 	 * RMB bit in INQUIRY data.
7616 	 */
7617 	sd_set_unit_attributes(un, devi);
7618 
7619 	/*
7620 	 * By default, we mark the capacity, lbasize, and geometry
7621 	 * as invalid. Only if we successfully read a valid capacity
7622 	 * will we update the un_blockcount and un_tgt_blocksize with the
7623 	 * valid values (the geometry will be validated later).
7624 	 */
7625 	un->un_f_blockcount_is_valid	= FALSE;
7626 	un->un_f_tgt_blocksize_is_valid	= FALSE;
7627 
7628 	/*
7629 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
7630 	 * otherwise.
7631 	 */
7632 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
7633 	un->un_blockcount = 0;
7634 
7635 	/*
7636 	 * Set up the per-instance info needed to determine the correct
7637 	 * CDBs and other info for issuing commands to the target.
7638 	 */
7639 	sd_init_cdb_limits(un);
7640 
7641 	/*
7642 	 * Set up the IO chains to use, based upon the target type.
7643 	 */
7644 	if (un->un_f_non_devbsize_supported) {
7645 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7646 	} else {
7647 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7648 	}
7649 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7650 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
7651 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
7652 
7653 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
7654 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
7655 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
7656 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
7657 
7658 
7659 	if (ISCD(un)) {
7660 		un->un_additional_codes = sd_additional_codes;
7661 	} else {
7662 		un->un_additional_codes = NULL;
7663 	}
7664 
7665 	/*
7666 	 * Create the kstats here so they can be available for attach-time
7667 	 * routines that send commands to the unit (either polled or via
7668 	 * sd_send_scsi_cmd).
7669 	 *
7670 	 * Note: This is a critical sequence that needs to be maintained:
7671 	 *	1) Instantiate the kstats here, before any routines using the
7672 	 *	   iopath (i.e. sd_send_scsi_cmd).
7673 	 *	2) Instantiate and initialize the partition stats
7674 	 *	   (sd_set_pstats).
7675 	 *	3) Initialize the error stats (sd_set_errstats), following
7676 	 *	   sd_validate_geometry(),sd_register_devid(),
7677 	 *	   and sd_cache_control().
7678 	 */
7679 
7680 	un->un_stats = kstat_create(sd_label, instance,
7681 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
7682 	if (un->un_stats != NULL) {
7683 		un->un_stats->ks_lock = SD_MUTEX(un);
7684 		kstat_install(un->un_stats);
7685 	}
7686 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7687 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
7688 
7689 	sd_create_errstats(un, instance);
7690 	if (un->un_errstats == NULL) {
7691 		goto create_errstats_failed;
7692 	}
7693 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7694 	    "sd_unit_attach: un:0x%p errstats created\n", un);
7695 
7696 	/*
7697 	 * The following if/else code was relocated here from below as part
7698 	 * of the fix for bug (4430280). However with the default setup added
7699 	 * on entry to this routine, it's no longer absolutely necessary for
7700 	 * this to be before the call to sd_spin_up_unit.
7701 	 */
7702 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
7703 		int tq_trigger_flag = (((devp->sd_inq->inq_ansi == 4) ||
7704 		    (devp->sd_inq->inq_ansi == 5)) &&
7705 		    devp->sd_inq->inq_bque) || devp->sd_inq->inq_cmdque;
7706 
7707 		/*
7708 		 * If tagged queueing is supported by the target
7709 		 * and by the host adapter then we will enable it
7710 		 */
7711 		un->un_tagflags = 0;
7712 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) && tq_trigger_flag &&
7713 		    (un->un_f_arq_enabled == TRUE)) {
7714 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
7715 			    1, 1) == 1) {
7716 				un->un_tagflags = FLAG_STAG;
7717 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7718 				    "sd_unit_attach: un:0x%p tag queueing "
7719 				    "enabled\n", un);
7720 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
7721 			    "untagged-qing", 0) == 1) {
7722 				un->un_f_opt_queueing = TRUE;
7723 				un->un_saved_throttle = un->un_throttle =
7724 				    min(un->un_throttle, 3);
7725 			} else {
7726 				un->un_f_opt_queueing = FALSE;
7727 				un->un_saved_throttle = un->un_throttle = 1;
7728 			}
7729 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
7730 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
7731 			/* The Host Adapter supports internal queueing. */
7732 			un->un_f_opt_queueing = TRUE;
7733 			un->un_saved_throttle = un->un_throttle =
7734 			    min(un->un_throttle, 3);
7735 		} else {
7736 			un->un_f_opt_queueing = FALSE;
7737 			un->un_saved_throttle = un->un_throttle = 1;
7738 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7739 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
7740 		}
7741 
7742 		/*
7743 		 * Enable large transfers for SATA/SAS drives
7744 		 */
7745 		if (SD_IS_SERIAL(un)) {
7746 			un->un_max_xfer_size =
7747 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7748 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7749 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7750 			    "sd_unit_attach: un:0x%p max transfer "
7751 			    "size=0x%x\n", un, un->un_max_xfer_size);
7752 
7753 		}
7754 
7755 		/* Setup or tear down default wide operations for disks */
7756 
7757 		/*
7758 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
7759 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
7760 		 * system and be set to different values. In the future this
7761 		 * code may need to be updated when the ssd module is
7762 		 * obsoleted and removed from the system. (4299588)
7763 		 */
7764 		if (SD_IS_PARALLEL_SCSI(un) &&
7765 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
7766 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
7767 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7768 			    1, 1) == 1) {
7769 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7770 				    "sd_unit_attach: un:0x%p Wide Transfer "
7771 				    "enabled\n", un);
7772 			}
7773 
7774 			/*
7775 			 * If tagged queuing has also been enabled, then
7776 			 * enable large xfers
7777 			 */
7778 			if (un->un_saved_throttle == sd_max_throttle) {
7779 				un->un_max_xfer_size =
7780 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7781 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7782 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7783 				    "sd_unit_attach: un:0x%p max transfer "
7784 				    "size=0x%x\n", un, un->un_max_xfer_size);
7785 			}
7786 		} else {
7787 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7788 			    0, 1) == 1) {
7789 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7790 				    "sd_unit_attach: un:0x%p "
7791 				    "Wide Transfer disabled\n", un);
7792 			}
7793 		}
7794 	} else {
7795 		un->un_tagflags = FLAG_STAG;
7796 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
7797 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
7798 	}
7799 
7800 	/*
7801 	 * If this target supports LUN reset, try to enable it.
7802 	 */
7803 	if (un->un_f_lun_reset_enabled) {
7804 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
7805 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7806 			    "un:0x%p lun_reset capability set\n", un);
7807 		} else {
7808 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7809 			    "un:0x%p lun-reset capability not set\n", un);
7810 		}
7811 	}
7812 
7813 	/*
7814 	 * Adjust the maximum transfer size. This is to fix
7815 	 * the problem of partial DMA support on SPARC. Some
7816 	 * HBA driver, like aac, has very small dma_attr_maxxfer
7817 	 * size, which requires partial DMA support on SPARC.
7818 	 * In the future the SPARC pci nexus driver may solve
7819 	 * the problem instead of this fix.
7820 	 */
7821 	max_xfer_size = scsi_ifgetcap(SD_ADDRESS(un), "dma-max", 1);
7822 	if ((max_xfer_size > 0) && (max_xfer_size < un->un_max_xfer_size)) {
7823 		/* We need DMA partial even on sparc to ensure sddump() works */
7824 		un->un_max_xfer_size = max_xfer_size;
7825 		if (un->un_partial_dma_supported == 0)
7826 			un->un_partial_dma_supported = 1;
7827 	}
7828 	if (ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7829 	    DDI_PROP_DONTPASS, "buf_break", 0) == 1) {
7830 		if (ddi_xbuf_attr_setup_brk(un->un_xbuf_attr,
7831 		    un->un_max_xfer_size) == 1) {
7832 			un->un_buf_breakup_supported = 1;
7833 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7834 			    "un:0x%p Buf breakup enabled\n", un);
7835 		}
7836 	}
7837 
7838 	/*
7839 	 * Set PKT_DMA_PARTIAL flag.
7840 	 */
7841 	if (un->un_partial_dma_supported == 1) {
7842 		un->un_pkt_flags = PKT_DMA_PARTIAL;
7843 	} else {
7844 		un->un_pkt_flags = 0;
7845 	}
7846 
7847 	/* Initialize sd_ssc_t for internal uscsi commands */
7848 	ssc = sd_ssc_init(un);
7849 	scsi_fm_init(devp);
7850 
7851 	/*
7852 	 * Allocate memory for SCSI FMA stuffs.
7853 	 */
7854 	un->un_fm_private =
7855 	    kmem_zalloc(sizeof (struct sd_fm_internal), KM_SLEEP);
7856 	sfip = (struct sd_fm_internal *)un->un_fm_private;
7857 	sfip->fm_ssc.ssc_uscsi_cmd = &sfip->fm_ucmd;
7858 	sfip->fm_ssc.ssc_uscsi_info = &sfip->fm_uinfo;
7859 	sfip->fm_ssc.ssc_un = un;
7860 
7861 	if (ISCD(un) ||
7862 	    un->un_f_has_removable_media ||
7863 	    devp->sd_fm_capable == DDI_FM_NOT_CAPABLE) {
7864 		/*
7865 		 * We don't touch CDROM or the DDI_FM_NOT_CAPABLE device.
7866 		 * Their log are unchanged.
7867 		 */
7868 		sfip->fm_log_level = SD_FM_LOG_NSUP;
7869 	} else {
7870 		/*
7871 		 * If enter here, it should be non-CDROM and FM-capable
7872 		 * device, and it will not keep the old scsi_log as before
7873 		 * in /var/adm/messages. However, the property
7874 		 * "fm-scsi-log" will control whether the FM telemetry will
7875 		 * be logged in /var/adm/messages.
7876 		 */
7877 		int fm_scsi_log;
7878 		fm_scsi_log = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7879 		    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "fm-scsi-log", 0);
7880 
7881 		if (fm_scsi_log)
7882 			sfip->fm_log_level = SD_FM_LOG_EREPORT;
7883 		else
7884 			sfip->fm_log_level = SD_FM_LOG_SILENT;
7885 	}
7886 
7887 	/*
7888 	 * At this point in the attach, we have enough info in the
7889 	 * soft state to be able to issue commands to the target.
7890 	 *
7891 	 * All command paths used below MUST issue their commands as
7892 	 * SD_PATH_DIRECT. This is important as intermediate layers
7893 	 * are not all initialized yet (such as PM).
7894 	 */
7895 
7896 	/*
7897 	 * Send a TEST UNIT READY command to the device. This should clear
7898 	 * any outstanding UNIT ATTENTION that may be present.
7899 	 *
7900 	 * Note: Don't check for success, just track if there is a reservation,
7901 	 * this is a throw away command to clear any unit attentions.
7902 	 *
7903 	 * Note: This MUST be the first command issued to the target during
7904 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
7905 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
7906 	 * with attempts at spinning up a device with no media.
7907 	 */
7908 	status = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
7909 	if (status != 0) {
7910 		if (status == EACCES)
7911 			reservation_flag = SD_TARGET_IS_RESERVED;
7912 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7913 	}
7914 
7915 	/*
7916 	 * If the device is NOT a removable media device, attempt to spin
7917 	 * it up (using the START_STOP_UNIT command) and read its capacity
7918 	 * (using the READ CAPACITY command).  Note, however, that either
7919 	 * of these could fail and in some cases we would continue with
7920 	 * the attach despite the failure (see below).
7921 	 */
7922 	if (un->un_f_descr_format_supported) {
7923 
7924 		switch (sd_spin_up_unit(ssc)) {
7925 		case 0:
7926 			/*
7927 			 * Spin-up was successful; now try to read the
7928 			 * capacity.  If successful then save the results
7929 			 * and mark the capacity & lbasize as valid.
7930 			 */
7931 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7932 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
7933 
7934 			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
7935 			    &lbasize, SD_PATH_DIRECT);
7936 
7937 			switch (status) {
7938 			case 0: {
7939 				if (capacity > DK_MAX_BLOCKS) {
7940 #ifdef _LP64
7941 					if ((capacity + 1) >
7942 					    SD_GROUP1_MAX_ADDRESS) {
7943 						/*
7944 						 * Enable descriptor format
7945 						 * sense data so that we can
7946 						 * get 64 bit sense data
7947 						 * fields.
7948 						 */
7949 						sd_enable_descr_sense(ssc);
7950 					}
7951 #else
7952 					/* 32-bit kernels can't handle this */
7953 					scsi_log(SD_DEVINFO(un),
7954 					    sd_label, CE_WARN,
7955 					    "disk has %llu blocks, which "
7956 					    "is too large for a 32-bit "
7957 					    "kernel", capacity);
7958 
7959 #if defined(__i386) || defined(__amd64)
7960 					/*
7961 					 * 1TB disk was treated as (1T - 512)B
7962 					 * in the past, so that it might have
7963 					 * valid VTOC and solaris partitions,
7964 					 * we have to allow it to continue to
7965 					 * work.
7966 					 */
7967 					if (capacity -1 > DK_MAX_BLOCKS)
7968 #endif
7969 					goto spinup_failed;
7970 #endif
7971 				}
7972 
7973 				/*
7974 				 * Here it's not necessary to check the case:
7975 				 * the capacity of the device is bigger than
7976 				 * what the max hba cdb can support. Because
7977 				 * sd_send_scsi_READ_CAPACITY will retrieve
7978 				 * the capacity by sending USCSI command, which
7979 				 * is constrained by the max hba cdb. Actually,
7980 				 * sd_send_scsi_READ_CAPACITY will return
7981 				 * EINVAL when using bigger cdb than required
7982 				 * cdb length. Will handle this case in
7983 				 * "case EINVAL".
7984 				 */
7985 
7986 				/*
7987 				 * The following relies on
7988 				 * sd_send_scsi_READ_CAPACITY never
7989 				 * returning 0 for capacity and/or lbasize.
7990 				 */
7991 				sd_update_block_info(un, lbasize, capacity);
7992 
7993 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7994 				    "sd_unit_attach: un:0x%p capacity = %ld "
7995 				    "blocks; lbasize= %ld.\n", un,
7996 				    un->un_blockcount, un->un_tgt_blocksize);
7997 
7998 				break;
7999 			}
8000 			case EINVAL:
8001 				/*
8002 				 * In the case where the max-cdb-length property
8003 				 * is smaller than the required CDB length for
8004 				 * a SCSI device, a target driver can fail to
8005 				 * attach to that device.
8006 				 */
8007 				scsi_log(SD_DEVINFO(un),
8008 				    sd_label, CE_WARN,
8009 				    "disk capacity is too large "
8010 				    "for current cdb length");
8011 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8012 
8013 				goto spinup_failed;
8014 			case EACCES:
8015 				/*
8016 				 * Should never get here if the spin-up
8017 				 * succeeded, but code it in anyway.
8018 				 * From here, just continue with the attach...
8019 				 */
8020 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8021 				    "sd_unit_attach: un:0x%p "
8022 				    "sd_send_scsi_READ_CAPACITY "
8023 				    "returned reservation conflict\n", un);
8024 				reservation_flag = SD_TARGET_IS_RESERVED;
8025 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8026 				break;
8027 			default:
8028 				/*
8029 				 * Likewise, should never get here if the
8030 				 * spin-up succeeded. Just continue with
8031 				 * the attach...
8032 				 */
8033 				if (status == EIO)
8034 					sd_ssc_assessment(ssc,
8035 					    SD_FMT_STATUS_CHECK);
8036 				else
8037 					sd_ssc_assessment(ssc,
8038 					    SD_FMT_IGNORE);
8039 				break;
8040 			}
8041 			break;
8042 		case EACCES:
8043 			/*
8044 			 * Device is reserved by another host.  In this case
8045 			 * we could not spin it up or read the capacity, but
8046 			 * we continue with the attach anyway.
8047 			 */
8048 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8049 			    "sd_unit_attach: un:0x%p spin-up reservation "
8050 			    "conflict.\n", un);
8051 			reservation_flag = SD_TARGET_IS_RESERVED;
8052 			break;
8053 		default:
8054 			/* Fail the attach if the spin-up failed. */
8055 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8056 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8057 			goto spinup_failed;
8058 		}
8059 
8060 	}
8061 
8062 	/*
8063 	 * Check to see if this is a MMC drive
8064 	 */
8065 	if (ISCD(un)) {
8066 		sd_set_mmc_caps(ssc);
8067 	}
8068 
8069 	/*
8070 	 * Add a zero-length attribute to tell the world we support
8071 	 * kernel ioctls (for layered drivers)
8072 	 */
8073 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8074 	    DDI_KERNEL_IOCTL, NULL, 0);
8075 
8076 	/*
8077 	 * Add a boolean property to tell the world we support
8078 	 * the B_FAILFAST flag (for layered drivers)
8079 	 */
8080 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8081 	    "ddi-failfast-supported", NULL, 0);
8082 
8083 	/*
8084 	 * Initialize power management
8085 	 */
8086 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8087 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8088 	sd_setup_pm(ssc, devi);
8089 	if (un->un_f_pm_is_enabled == FALSE) {
8090 		/*
8091 		 * For performance, point to a jump table that does
8092 		 * not include pm.
8093 		 * The direct and priority chains don't change with PM.
8094 		 *
8095 		 * Note: this is currently done based on individual device
8096 		 * capabilities. When an interface for determining system
8097 		 * power enabled state becomes available, or when additional
8098 		 * layers are added to the command chain, these values will
8099 		 * have to be re-evaluated for correctness.
8100 		 */
8101 		if (un->un_f_non_devbsize_supported) {
8102 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8103 		} else {
8104 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8105 		}
8106 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8107 	}
8108 
8109 	/*
8110 	 * This property is set to 0 by HA software to avoid retries
8111 	 * on a reserved disk. (The preferred property name is
8112 	 * "retry-on-reservation-conflict") (1189689)
8113 	 *
8114 	 * Note: The use of a global here can have unintended consequences. A
8115 	 * per instance variable is preferable to match the capabilities of
8116 	 * different underlying hba's (4402600)
8117 	 */
8118 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8119 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8120 	    sd_retry_on_reservation_conflict);
8121 	if (sd_retry_on_reservation_conflict != 0) {
8122 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8123 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8124 		    sd_retry_on_reservation_conflict);
8125 	}
8126 
8127 	/* Set up options for QFULL handling. */
8128 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8129 	    "qfull-retries", -1)) != -1) {
8130 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8131 		    rval, 1);
8132 	}
8133 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8134 	    "qfull-retry-interval", -1)) != -1) {
8135 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8136 		    rval, 1);
8137 	}
8138 
8139 	/*
8140 	 * This just prints a message that announces the existence of the
8141 	 * device. The message is always printed in the system logfile, but
8142 	 * only appears on the console if the system is booted with the
8143 	 * -v (verbose) argument.
8144 	 */
8145 	ddi_report_dev(devi);
8146 
8147 	un->un_mediastate = DKIO_NONE;
8148 
8149 	/*
8150 	 * Check if this is a SSD(Solid State Drive).
8151 	 */
8152 	sd_check_solid_state(ssc);
8153 
8154 	cmlb_alloc_handle(&un->un_cmlbhandle);
8155 
8156 #if defined(__i386) || defined(__amd64)
8157 	/*
8158 	 * On x86, compensate for off-by-1 legacy error
8159 	 */
8160 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
8161 	    (lbasize == un->un_sys_blocksize))
8162 		offbyone = CMLB_OFF_BY_ONE;
8163 #endif
8164 
8165 	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
8166 	    VOID2BOOLEAN(un->un_f_has_removable_media != 0),
8167 	    VOID2BOOLEAN(un->un_f_is_hotpluggable != 0),
8168 	    un->un_node_type, offbyone, un->un_cmlbhandle,
8169 	    (void *)SD_PATH_DIRECT) != 0) {
8170 		goto cmlb_attach_failed;
8171 	}
8172 
8173 
8174 	/*
8175 	 * Read and validate the device's geometry (ie, disk label)
8176 	 * A new unformatted drive will not have a valid geometry, but
8177 	 * the driver needs to successfully attach to this device so
8178 	 * the drive can be formatted via ioctls.
8179 	 */
8180 	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
8181 	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
8182 
8183 	mutex_enter(SD_MUTEX(un));
8184 
8185 	/*
8186 	 * Read and initialize the devid for the unit.
8187 	 */
8188 	if (un->un_f_devid_supported) {
8189 		sd_register_devid(ssc, devi, reservation_flag);
8190 	}
8191 	mutex_exit(SD_MUTEX(un));
8192 
8193 #if (defined(__fibre))
8194 	/*
8195 	 * Register callbacks for fibre only.  You can't do this solely
8196 	 * on the basis of the devid_type because this is hba specific.
8197 	 * We need to query our hba capabilities to find out whether to
8198 	 * register or not.
8199 	 */
8200 	if (un->un_f_is_fibre) {
8201 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8202 			sd_init_event_callbacks(un);
8203 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8204 			    "sd_unit_attach: un:0x%p event callbacks inserted",
8205 			    un);
8206 		}
8207 	}
8208 #endif
8209 
8210 	if (un->un_f_opt_disable_cache == TRUE) {
8211 		/*
8212 		 * Disable both read cache and write cache.  This is
8213 		 * the historic behavior of the keywords in the config file.
8214 		 */
8215 		if (sd_cache_control(ssc, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8216 		    0) {
8217 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8218 			    "sd_unit_attach: un:0x%p Could not disable "
8219 			    "caching", un);
8220 			goto devid_failed;
8221 		}
8222 	}
8223 
8224 	/*
8225 	 * Check the value of the WCE bit now and
8226 	 * set un_f_write_cache_enabled accordingly.
8227 	 */
8228 	(void) sd_get_write_cache_enabled(ssc, &wc_enabled);
8229 	mutex_enter(SD_MUTEX(un));
8230 	un->un_f_write_cache_enabled = (wc_enabled != 0);
8231 	mutex_exit(SD_MUTEX(un));
8232 
8233 	if (un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR &&
8234 	    un->un_tgt_blocksize != DEV_BSIZE) {
8235 		if (!(un->un_wm_cache)) {
8236 			(void) snprintf(name_str, sizeof (name_str),
8237 			    "%s%d_cache",
8238 			    ddi_driver_name(SD_DEVINFO(un)),
8239 			    ddi_get_instance(SD_DEVINFO(un)));
8240 			un->un_wm_cache = kmem_cache_create(
8241 			    name_str, sizeof (struct sd_w_map),
8242 			    8, sd_wm_cache_constructor,
8243 			    sd_wm_cache_destructor, NULL,
8244 			    (void *)un, NULL, 0);
8245 			if (!(un->un_wm_cache)) {
8246 				goto wm_cache_failed;
8247 			}
8248 		}
8249 	}
8250 
8251 	/*
8252 	 * Check the value of the NV_SUP bit and set
8253 	 * un_f_suppress_cache_flush accordingly.
8254 	 */
8255 	sd_get_nv_sup(ssc);
8256 
8257 	/*
8258 	 * Find out what type of reservation this disk supports.
8259 	 */
8260 	status = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS, 0, NULL);
8261 
8262 	switch (status) {
8263 	case 0:
8264 		/*
8265 		 * SCSI-3 reservations are supported.
8266 		 */
8267 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8268 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8269 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8270 		break;
8271 	case ENOTSUP:
8272 		/*
8273 		 * The PERSISTENT RESERVE IN command would not be recognized by
8274 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8275 		 */
8276 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8277 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8278 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8279 
8280 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8281 		break;
8282 	default:
8283 		/*
8284 		 * default to SCSI-3 reservations
8285 		 */
8286 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8287 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8288 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8289 
8290 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8291 		break;
8292 	}
8293 
8294 	/*
8295 	 * Set the pstat and error stat values here, so data obtained during the
8296 	 * previous attach-time routines is available.
8297 	 *
8298 	 * Note: This is a critical sequence that needs to be maintained:
8299 	 *	1) Instantiate the kstats before any routines using the iopath
8300 	 *	   (i.e. sd_send_scsi_cmd).
8301 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8302 	 *	   stats (sd_set_pstats)here, following
8303 	 *	   cmlb_validate_geometry(), sd_register_devid(), and
8304 	 *	   sd_cache_control().
8305 	 */
8306 
8307 	if (un->un_f_pkstats_enabled && geom_label_valid) {
8308 		sd_set_pstats(un);
8309 		SD_TRACE(SD_LOG_IO_PARTITION, un,
8310 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8311 	}
8312 
8313 	sd_set_errstats(un);
8314 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8315 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8316 
8317 
8318 	/*
8319 	 * After successfully attaching an instance, we record the information
8320 	 * of how many luns have been attached on the relative target and
8321 	 * controller for parallel SCSI. This information is used when sd tries
8322 	 * to set the tagged queuing capability in HBA.
8323 	 */
8324 	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
8325 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
8326 	}
8327 
8328 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8329 	    "sd_unit_attach: un:0x%p exit success\n", un);
8330 
8331 	/* Uninitialize sd_ssc_t pointer */
8332 	sd_ssc_fini(ssc);
8333 
8334 	return (DDI_SUCCESS);
8335 
8336 	/*
8337 	 * An error occurred during the attach; clean up & return failure.
8338 	 */
8339 wm_cache_failed:
8340 devid_failed:
8341 
8342 setup_pm_failed:
8343 	ddi_remove_minor_node(devi, NULL);
8344 
8345 cmlb_attach_failed:
8346 	/*
8347 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8348 	 */
8349 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8350 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8351 
8352 	/*
8353 	 * Refer to the comments of setting tagged-qing in the beginning of
8354 	 * sd_unit_attach. We can only disable tagged queuing when there is
8355 	 * no lun attached on the target.
8356 	 */
8357 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
8358 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8359 	}
8360 
8361 	if (un->un_f_is_fibre == FALSE) {
8362 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8363 	}
8364 
8365 spinup_failed:
8366 
8367 	/* Uninitialize sd_ssc_t pointer */
8368 	sd_ssc_fini(ssc);
8369 
8370 	mutex_enter(SD_MUTEX(un));
8371 
8372 	/* Deallocate SCSI FMA memory spaces */
8373 	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
8374 
8375 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8376 	if (un->un_direct_priority_timeid != NULL) {
8377 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8378 		un->un_direct_priority_timeid = NULL;
8379 		mutex_exit(SD_MUTEX(un));
8380 		(void) untimeout(temp_id);
8381 		mutex_enter(SD_MUTEX(un));
8382 	}
8383 
8384 	/* Cancel any pending start/stop timeouts */
8385 	if (un->un_startstop_timeid != NULL) {
8386 		timeout_id_t temp_id = un->un_startstop_timeid;
8387 		un->un_startstop_timeid = NULL;
8388 		mutex_exit(SD_MUTEX(un));
8389 		(void) untimeout(temp_id);
8390 		mutex_enter(SD_MUTEX(un));
8391 	}
8392 
8393 	/* Cancel any pending reset-throttle timeouts */
8394 	if (un->un_reset_throttle_timeid != NULL) {
8395 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8396 		un->un_reset_throttle_timeid = NULL;
8397 		mutex_exit(SD_MUTEX(un));
8398 		(void) untimeout(temp_id);
8399 		mutex_enter(SD_MUTEX(un));
8400 	}
8401 
8402 	/* Cancel rmw warning message timeouts */
8403 	if (un->un_rmw_msg_timeid != NULL) {
8404 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
8405 		un->un_rmw_msg_timeid = NULL;
8406 		mutex_exit(SD_MUTEX(un));
8407 		(void) untimeout(temp_id);
8408 		mutex_enter(SD_MUTEX(un));
8409 	}
8410 
8411 	/* Cancel any pending retry timeouts */
8412 	if (un->un_retry_timeid != NULL) {
8413 		timeout_id_t temp_id = un->un_retry_timeid;
8414 		un->un_retry_timeid = NULL;
8415 		mutex_exit(SD_MUTEX(un));
8416 		(void) untimeout(temp_id);
8417 		mutex_enter(SD_MUTEX(un));
8418 	}
8419 
8420 	/* Cancel any pending delayed cv broadcast timeouts */
8421 	if (un->un_dcvb_timeid != NULL) {
8422 		timeout_id_t temp_id = un->un_dcvb_timeid;
8423 		un->un_dcvb_timeid = NULL;
8424 		mutex_exit(SD_MUTEX(un));
8425 		(void) untimeout(temp_id);
8426 		mutex_enter(SD_MUTEX(un));
8427 	}
8428 
8429 	mutex_exit(SD_MUTEX(un));
8430 
8431 	/* There should not be any in-progress I/O so ASSERT this check */
8432 	ASSERT(un->un_ncmds_in_transport == 0);
8433 	ASSERT(un->un_ncmds_in_driver == 0);
8434 
8435 	/* Do not free the softstate if the callback routine is active */
8436 	sd_sync_with_callback(un);
8437 
8438 	/*
8439 	 * Partition stats apparently are not used with removables. These would
8440 	 * not have been created during attach, so no need to clean them up...
8441 	 */
8442 	if (un->un_errstats != NULL) {
8443 		kstat_delete(un->un_errstats);
8444 		un->un_errstats = NULL;
8445 	}
8446 
8447 create_errstats_failed:
8448 
8449 	if (un->un_stats != NULL) {
8450 		kstat_delete(un->un_stats);
8451 		un->un_stats = NULL;
8452 	}
8453 
8454 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8455 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8456 
8457 	ddi_prop_remove_all(devi);
8458 	sema_destroy(&un->un_semoclose);
8459 	cv_destroy(&un->un_state_cv);
8460 
8461 getrbuf_failed:
8462 
8463 	sd_free_rqs(un);
8464 
8465 alloc_rqs_failed:
8466 
8467 	devp->sd_private = NULL;
8468 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8469 
8470 get_softstate_failed:
8471 	/*
8472 	 * Note: the man pages are unclear as to whether or not doing a
8473 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8474 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8475 	 * ddi_get_soft_state() fails.  The implication seems to be
8476 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8477 	 */
8478 #ifndef XPV_HVM_DRIVER
8479 	ddi_soft_state_free(sd_state, instance);
8480 #endif /* !XPV_HVM_DRIVER */
8481 
8482 probe_failed:
8483 	scsi_unprobe(devp);
8484 
8485 	return (DDI_FAILURE);
8486 }
8487 
8488 
8489 /*
8490  *    Function: sd_unit_detach
8491  *
8492  * Description: Performs DDI_DETACH processing for sddetach().
8493  *
8494  * Return Code: DDI_SUCCESS
8495  *		DDI_FAILURE
8496  *
8497  *     Context: Kernel thread context
8498  */
8499 
8500 static int
8501 sd_unit_detach(dev_info_t *devi)
8502 {
8503 	struct scsi_device	*devp;
8504 	struct sd_lun		*un;
8505 	int			i;
8506 	int			tgt;
8507 	dev_t			dev;
8508 	dev_info_t		*pdip = ddi_get_parent(devi);
8509 #ifndef XPV_HVM_DRIVER
8510 	int			instance = ddi_get_instance(devi);
8511 #endif /* !XPV_HVM_DRIVER */
8512 
8513 	mutex_enter(&sd_detach_mutex);
8514 
8515 	/*
8516 	 * Fail the detach for any of the following:
8517 	 *  - Unable to get the sd_lun struct for the instance
8518 	 *  - A layered driver has an outstanding open on the instance
8519 	 *  - Another thread is already detaching this instance
8520 	 *  - Another thread is currently performing an open
8521 	 */
8522 	devp = ddi_get_driver_private(devi);
8523 	if ((devp == NULL) ||
8524 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8525 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8526 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8527 		mutex_exit(&sd_detach_mutex);
8528 		return (DDI_FAILURE);
8529 	}
8530 
8531 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8532 
8533 	/*
8534 	 * Mark this instance as currently in a detach, to inhibit any
8535 	 * opens from a layered driver.
8536 	 */
8537 	un->un_detach_count++;
8538 	mutex_exit(&sd_detach_mutex);
8539 
8540 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8541 	    SCSI_ADDR_PROP_TARGET, -1);
8542 
8543 	dev = sd_make_device(SD_DEVINFO(un));
8544 
8545 #ifndef lint
8546 	_NOTE(COMPETING_THREADS_NOW);
8547 #endif
8548 
8549 	mutex_enter(SD_MUTEX(un));
8550 
8551 	/*
8552 	 * Fail the detach if there are any outstanding layered
8553 	 * opens on this device.
8554 	 */
8555 	for (i = 0; i < NDKMAP; i++) {
8556 		if (un->un_ocmap.lyropen[i] != 0) {
8557 			goto err_notclosed;
8558 		}
8559 	}
8560 
8561 	/*
8562 	 * Verify there are NO outstanding commands issued to this device.
8563 	 * ie, un_ncmds_in_transport == 0.
8564 	 * It's possible to have outstanding commands through the physio
8565 	 * code path, even though everything's closed.
8566 	 */
8567 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8568 	    (un->un_direct_priority_timeid != NULL) ||
8569 	    (un->un_state == SD_STATE_RWAIT)) {
8570 		mutex_exit(SD_MUTEX(un));
8571 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8572 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8573 		goto err_stillbusy;
8574 	}
8575 
8576 	/*
8577 	 * If we have the device reserved, release the reservation.
8578 	 */
8579 	if ((un->un_resvd_status & SD_RESERVE) &&
8580 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8581 		mutex_exit(SD_MUTEX(un));
8582 		/*
8583 		 * Note: sd_reserve_release sends a command to the device
8584 		 * via the sd_ioctlcmd() path, and can sleep.
8585 		 */
8586 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8587 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8588 			    "sd_dr_detach: Cannot release reservation \n");
8589 		}
8590 	} else {
8591 		mutex_exit(SD_MUTEX(un));
8592 	}
8593 
8594 	/*
8595 	 * Untimeout any reserve recover, throttle reset, restart unit
8596 	 * and delayed broadcast timeout threads. Protect the timeout pointer
8597 	 * from getting nulled by their callback functions.
8598 	 */
8599 	mutex_enter(SD_MUTEX(un));
8600 	if (un->un_resvd_timeid != NULL) {
8601 		timeout_id_t temp_id = un->un_resvd_timeid;
8602 		un->un_resvd_timeid = NULL;
8603 		mutex_exit(SD_MUTEX(un));
8604 		(void) untimeout(temp_id);
8605 		mutex_enter(SD_MUTEX(un));
8606 	}
8607 
8608 	if (un->un_reset_throttle_timeid != NULL) {
8609 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8610 		un->un_reset_throttle_timeid = NULL;
8611 		mutex_exit(SD_MUTEX(un));
8612 		(void) untimeout(temp_id);
8613 		mutex_enter(SD_MUTEX(un));
8614 	}
8615 
8616 	if (un->un_startstop_timeid != NULL) {
8617 		timeout_id_t temp_id = un->un_startstop_timeid;
8618 		un->un_startstop_timeid = NULL;
8619 		mutex_exit(SD_MUTEX(un));
8620 		(void) untimeout(temp_id);
8621 		mutex_enter(SD_MUTEX(un));
8622 	}
8623 
8624 	if (un->un_rmw_msg_timeid != NULL) {
8625 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
8626 		un->un_rmw_msg_timeid = NULL;
8627 		mutex_exit(SD_MUTEX(un));
8628 		(void) untimeout(temp_id);
8629 		mutex_enter(SD_MUTEX(un));
8630 	}
8631 
8632 	if (un->un_dcvb_timeid != NULL) {
8633 		timeout_id_t temp_id = un->un_dcvb_timeid;
8634 		un->un_dcvb_timeid = NULL;
8635 		mutex_exit(SD_MUTEX(un));
8636 		(void) untimeout(temp_id);
8637 	} else {
8638 		mutex_exit(SD_MUTEX(un));
8639 	}
8640 
8641 	/* Remove any pending reservation reclaim requests for this device */
8642 	sd_rmv_resv_reclaim_req(dev);
8643 
8644 	mutex_enter(SD_MUTEX(un));
8645 
8646 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8647 	if (un->un_direct_priority_timeid != NULL) {
8648 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8649 		un->un_direct_priority_timeid = NULL;
8650 		mutex_exit(SD_MUTEX(un));
8651 		(void) untimeout(temp_id);
8652 		mutex_enter(SD_MUTEX(un));
8653 	}
8654 
8655 	/* Cancel any active multi-host disk watch thread requests */
8656 	if (un->un_mhd_token != NULL) {
8657 		mutex_exit(SD_MUTEX(un));
8658 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8659 		if (scsi_watch_request_terminate(un->un_mhd_token,
8660 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8661 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8662 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8663 			/*
8664 			 * Note: We are returning here after having removed
8665 			 * some driver timeouts above. This is consistent with
8666 			 * the legacy implementation but perhaps the watch
8667 			 * terminate call should be made with the wait flag set.
8668 			 */
8669 			goto err_stillbusy;
8670 		}
8671 		mutex_enter(SD_MUTEX(un));
8672 		un->un_mhd_token = NULL;
8673 	}
8674 
8675 	if (un->un_swr_token != NULL) {
8676 		mutex_exit(SD_MUTEX(un));
8677 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8678 		if (scsi_watch_request_terminate(un->un_swr_token,
8679 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8680 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8681 			    "sd_dr_detach: Cannot cancel swr watch request\n");
8682 			/*
8683 			 * Note: We are returning here after having removed
8684 			 * some driver timeouts above. This is consistent with
8685 			 * the legacy implementation but perhaps the watch
8686 			 * terminate call should be made with the wait flag set.
8687 			 */
8688 			goto err_stillbusy;
8689 		}
8690 		mutex_enter(SD_MUTEX(un));
8691 		un->un_swr_token = NULL;
8692 	}
8693 
8694 	mutex_exit(SD_MUTEX(un));
8695 
8696 	/*
8697 	 * Clear any scsi_reset_notifies. We clear the reset notifies
8698 	 * if we have not registered one.
8699 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8700 	 */
8701 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8702 	    sd_mhd_reset_notify_cb, (caddr_t)un);
8703 
8704 	/*
8705 	 * protect the timeout pointers from getting nulled by
8706 	 * their callback functions during the cancellation process.
8707 	 * In such a scenario untimeout can be invoked with a null value.
8708 	 */
8709 	_NOTE(NO_COMPETING_THREADS_NOW);
8710 
8711 	mutex_enter(&un->un_pm_mutex);
8712 	if (un->un_pm_idle_timeid != NULL) {
8713 		timeout_id_t temp_id = un->un_pm_idle_timeid;
8714 		un->un_pm_idle_timeid = NULL;
8715 		mutex_exit(&un->un_pm_mutex);
8716 
8717 		/*
8718 		 * Timeout is active; cancel it.
8719 		 * Note that it'll never be active on a device
8720 		 * that does not support PM therefore we don't
8721 		 * have to check before calling pm_idle_component.
8722 		 */
8723 		(void) untimeout(temp_id);
8724 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8725 		mutex_enter(&un->un_pm_mutex);
8726 	}
8727 
8728 	/*
8729 	 * Check whether there is already a timeout scheduled for power
8730 	 * management. If yes then don't lower the power here, that's.
8731 	 * the timeout handler's job.
8732 	 */
8733 	if (un->un_pm_timeid != NULL) {
8734 		timeout_id_t temp_id = un->un_pm_timeid;
8735 		un->un_pm_timeid = NULL;
8736 		mutex_exit(&un->un_pm_mutex);
8737 		/*
8738 		 * Timeout is active; cancel it.
8739 		 * Note that it'll never be active on a device
8740 		 * that does not support PM therefore we don't
8741 		 * have to check before calling pm_idle_component.
8742 		 */
8743 		(void) untimeout(temp_id);
8744 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8745 
8746 	} else {
8747 		mutex_exit(&un->un_pm_mutex);
8748 		if ((un->un_f_pm_is_enabled == TRUE) &&
8749 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_PM_STATE_STOPPED(un))
8750 		    != DDI_SUCCESS)) {
8751 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8752 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8753 			/*
8754 			 * Fix for bug: 4297749, item # 13
8755 			 * The above test now includes a check to see if PM is
8756 			 * supported by this device before call
8757 			 * pm_lower_power().
8758 			 * Note, the following is not dead code. The call to
8759 			 * pm_lower_power above will generate a call back into
8760 			 * our sdpower routine which might result in a timeout
8761 			 * handler getting activated. Therefore the following
8762 			 * code is valid and necessary.
8763 			 */
8764 			mutex_enter(&un->un_pm_mutex);
8765 			if (un->un_pm_timeid != NULL) {
8766 				timeout_id_t temp_id = un->un_pm_timeid;
8767 				un->un_pm_timeid = NULL;
8768 				mutex_exit(&un->un_pm_mutex);
8769 				(void) untimeout(temp_id);
8770 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8771 			} else {
8772 				mutex_exit(&un->un_pm_mutex);
8773 			}
8774 		}
8775 	}
8776 
8777 	/*
8778 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8779 	 * Relocated here from above to be after the call to
8780 	 * pm_lower_power, which was getting errors.
8781 	 */
8782 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8783 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8784 
8785 	/*
8786 	 * Currently, tagged queuing is supported per target based by HBA.
8787 	 * Setting this per lun instance actually sets the capability of this
8788 	 * target in HBA, which affects those luns already attached on the
8789 	 * same target. So during detach, we can only disable this capability
8790 	 * only when this is the only lun left on this target. By doing
8791 	 * this, we assume a target has the same tagged queuing capability
8792 	 * for every lun. The condition can be removed when HBA is changed to
8793 	 * support per lun based tagged queuing capability.
8794 	 */
8795 	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
8796 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8797 	}
8798 
8799 	if (un->un_f_is_fibre == FALSE) {
8800 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8801 	}
8802 
8803 	/*
8804 	 * Remove any event callbacks, fibre only
8805 	 */
8806 	if (un->un_f_is_fibre == TRUE) {
8807 		if ((un->un_insert_event != NULL) &&
8808 		    (ddi_remove_event_handler(un->un_insert_cb_id) !=
8809 		    DDI_SUCCESS)) {
8810 			/*
8811 			 * Note: We are returning here after having done
8812 			 * substantial cleanup above. This is consistent
8813 			 * with the legacy implementation but this may not
8814 			 * be the right thing to do.
8815 			 */
8816 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8817 			    "sd_dr_detach: Cannot cancel insert event\n");
8818 			goto err_remove_event;
8819 		}
8820 		un->un_insert_event = NULL;
8821 
8822 		if ((un->un_remove_event != NULL) &&
8823 		    (ddi_remove_event_handler(un->un_remove_cb_id) !=
8824 		    DDI_SUCCESS)) {
8825 			/*
8826 			 * Note: We are returning here after having done
8827 			 * substantial cleanup above. This is consistent
8828 			 * with the legacy implementation but this may not
8829 			 * be the right thing to do.
8830 			 */
8831 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8832 			    "sd_dr_detach: Cannot cancel remove event\n");
8833 			goto err_remove_event;
8834 		}
8835 		un->un_remove_event = NULL;
8836 	}
8837 
8838 	/* Do not free the softstate if the callback routine is active */
8839 	sd_sync_with_callback(un);
8840 
8841 	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
8842 	cmlb_free_handle(&un->un_cmlbhandle);
8843 
8844 	/*
8845 	 * Hold the detach mutex here, to make sure that no other threads ever
8846 	 * can access a (partially) freed soft state structure.
8847 	 */
8848 	mutex_enter(&sd_detach_mutex);
8849 
8850 	/*
8851 	 * Clean up the soft state struct.
8852 	 * Cleanup is done in reverse order of allocs/inits.
8853 	 * At this point there should be no competing threads anymore.
8854 	 */
8855 
8856 	scsi_fm_fini(devp);
8857 
8858 	/*
8859 	 * Deallocate memory for SCSI FMA.
8860 	 */
8861 	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
8862 
8863 	/*
8864 	 * Unregister and free device id if it was not registered
8865 	 * by the transport.
8866 	 */
8867 	if (un->un_f_devid_transport_defined == FALSE)
8868 		ddi_devid_unregister(devi);
8869 
8870 	/*
8871 	 * free the devid structure if allocated before (by ddi_devid_init()
8872 	 * or ddi_devid_get()).
8873 	 */
8874 	if (un->un_devid) {
8875 		ddi_devid_free(un->un_devid);
8876 		un->un_devid = NULL;
8877 	}
8878 
8879 	/*
8880 	 * Destroy wmap cache if it exists.
8881 	 */
8882 	if (un->un_wm_cache != NULL) {
8883 		kmem_cache_destroy(un->un_wm_cache);
8884 		un->un_wm_cache = NULL;
8885 	}
8886 
8887 	/*
8888 	 * kstat cleanup is done in detach for all device types (4363169).
8889 	 * We do not want to fail detach if the device kstats are not deleted
8890 	 * since there is a confusion about the devo_refcnt for the device.
8891 	 * We just delete the kstats and let detach complete successfully.
8892 	 */
8893 	if (un->un_stats != NULL) {
8894 		kstat_delete(un->un_stats);
8895 		un->un_stats = NULL;
8896 	}
8897 	if (un->un_errstats != NULL) {
8898 		kstat_delete(un->un_errstats);
8899 		un->un_errstats = NULL;
8900 	}
8901 
8902 	/* Remove partition stats */
8903 	if (un->un_f_pkstats_enabled) {
8904 		for (i = 0; i < NSDMAP; i++) {
8905 			if (un->un_pstats[i] != NULL) {
8906 				kstat_delete(un->un_pstats[i]);
8907 				un->un_pstats[i] = NULL;
8908 			}
8909 		}
8910 	}
8911 
8912 	/* Remove xbuf registration */
8913 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8914 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8915 
8916 	/* Remove driver properties */
8917 	ddi_prop_remove_all(devi);
8918 
8919 	mutex_destroy(&un->un_pm_mutex);
8920 	cv_destroy(&un->un_pm_busy_cv);
8921 
8922 	cv_destroy(&un->un_wcc_cv);
8923 
8924 	/* Open/close semaphore */
8925 	sema_destroy(&un->un_semoclose);
8926 
8927 	/* Removable media condvar. */
8928 	cv_destroy(&un->un_state_cv);
8929 
8930 	/* Suspend/resume condvar. */
8931 	cv_destroy(&un->un_suspend_cv);
8932 	cv_destroy(&un->un_disk_busy_cv);
8933 
8934 	sd_free_rqs(un);
8935 
8936 	/* Free up soft state */
8937 	devp->sd_private = NULL;
8938 
8939 	bzero(un, sizeof (struct sd_lun));
8940 #ifndef XPV_HVM_DRIVER
8941 	ddi_soft_state_free(sd_state, instance);
8942 #endif /* !XPV_HVM_DRIVER */
8943 
8944 	mutex_exit(&sd_detach_mutex);
8945 
8946 	/* This frees up the INQUIRY data associated with the device. */
8947 	scsi_unprobe(devp);
8948 
8949 	/*
8950 	 * After successfully detaching an instance, we update the information
8951 	 * of how many luns have been attached in the relative target and
8952 	 * controller for parallel SCSI. This information is used when sd tries
8953 	 * to set the tagged queuing capability in HBA.
8954 	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
8955 	 * check if the device is parallel SCSI. However, we don't need to
8956 	 * check here because we've already checked during attach. No device
8957 	 * that is not parallel SCSI is in the chain.
8958 	 */
8959 	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
8960 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
8961 	}
8962 
8963 	return (DDI_SUCCESS);
8964 
8965 err_notclosed:
8966 	mutex_exit(SD_MUTEX(un));
8967 
8968 err_stillbusy:
8969 	_NOTE(NO_COMPETING_THREADS_NOW);
8970 
8971 err_remove_event:
8972 	mutex_enter(&sd_detach_mutex);
8973 	un->un_detach_count--;
8974 	mutex_exit(&sd_detach_mutex);
8975 
8976 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
8977 	return (DDI_FAILURE);
8978 }
8979 
8980 
8981 /*
8982  *    Function: sd_create_errstats
8983  *
8984  * Description: This routine instantiates the device error stats.
8985  *
8986  *		Note: During attach the stats are instantiated first so they are
8987  *		available for attach-time routines that utilize the driver
8988  *		iopath to send commands to the device. The stats are initialized
8989  *		separately so data obtained during some attach-time routines is
8990  *		available. (4362483)
8991  *
8992  *   Arguments: un - driver soft state (unit) structure
8993  *		instance - driver instance
8994  *
8995  *     Context: Kernel thread context
8996  */
8997 
8998 static void
8999 sd_create_errstats(struct sd_lun *un, int instance)
9000 {
9001 	struct	sd_errstats	*stp;
9002 	char	kstatmodule_err[KSTAT_STRLEN];
9003 	char	kstatname[KSTAT_STRLEN];
9004 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9005 
9006 	ASSERT(un != NULL);
9007 
9008 	if (un->un_errstats != NULL) {
9009 		return;
9010 	}
9011 
9012 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9013 	    "%serr", sd_label);
9014 	(void) snprintf(kstatname, sizeof (kstatname),
9015 	    "%s%d,err", sd_label, instance);
9016 
9017 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9018 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9019 
9020 	if (un->un_errstats == NULL) {
9021 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9022 		    "sd_create_errstats: Failed kstat_create\n");
9023 		return;
9024 	}
9025 
9026 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9027 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9028 	    KSTAT_DATA_UINT32);
9029 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9030 	    KSTAT_DATA_UINT32);
9031 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9032 	    KSTAT_DATA_UINT32);
9033 	kstat_named_init(&stp->sd_vid,		"Vendor",
9034 	    KSTAT_DATA_CHAR);
9035 	kstat_named_init(&stp->sd_pid,		"Product",
9036 	    KSTAT_DATA_CHAR);
9037 	kstat_named_init(&stp->sd_revision,	"Revision",
9038 	    KSTAT_DATA_CHAR);
9039 	kstat_named_init(&stp->sd_serial,	"Serial No",
9040 	    KSTAT_DATA_CHAR);
9041 	kstat_named_init(&stp->sd_capacity,	"Size",
9042 	    KSTAT_DATA_ULONGLONG);
9043 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9044 	    KSTAT_DATA_UINT32);
9045 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9046 	    KSTAT_DATA_UINT32);
9047 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9048 	    KSTAT_DATA_UINT32);
9049 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9050 	    KSTAT_DATA_UINT32);
9051 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9052 	    KSTAT_DATA_UINT32);
9053 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9054 	    KSTAT_DATA_UINT32);
9055 
9056 	un->un_errstats->ks_private = un;
9057 	un->un_errstats->ks_update  = nulldev;
9058 
9059 	kstat_install(un->un_errstats);
9060 }
9061 
9062 
9063 /*
9064  *    Function: sd_set_errstats
9065  *
9066  * Description: This routine sets the value of the vendor id, product id,
9067  *		revision, serial number, and capacity device error stats.
9068  *
9069  *		Note: During attach the stats are instantiated first so they are
9070  *		available for attach-time routines that utilize the driver
9071  *		iopath to send commands to the device. The stats are initialized
9072  *		separately so data obtained during some attach-time routines is
9073  *		available. (4362483)
9074  *
9075  *   Arguments: un - driver soft state (unit) structure
9076  *
9077  *     Context: Kernel thread context
9078  */
9079 
9080 static void
9081 sd_set_errstats(struct sd_lun *un)
9082 {
9083 	struct	sd_errstats	*stp;
9084 
9085 	ASSERT(un != NULL);
9086 	ASSERT(un->un_errstats != NULL);
9087 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9088 	ASSERT(stp != NULL);
9089 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9090 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9091 	(void) strncpy(stp->sd_revision.value.c,
9092 	    un->un_sd->sd_inq->inq_revision, 4);
9093 
9094 	/*
9095 	 * All the errstats are persistent across detach/attach,
9096 	 * so reset all the errstats here in case of the hot
9097 	 * replacement of disk drives, except for not changed
9098 	 * Sun qualified drives.
9099 	 */
9100 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
9101 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9102 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
9103 		stp->sd_softerrs.value.ui32 = 0;
9104 		stp->sd_harderrs.value.ui32 = 0;
9105 		stp->sd_transerrs.value.ui32 = 0;
9106 		stp->sd_rq_media_err.value.ui32 = 0;
9107 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
9108 		stp->sd_rq_nodev_err.value.ui32 = 0;
9109 		stp->sd_rq_recov_err.value.ui32 = 0;
9110 		stp->sd_rq_illrq_err.value.ui32 = 0;
9111 		stp->sd_rq_pfa_err.value.ui32 = 0;
9112 	}
9113 
9114 	/*
9115 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9116 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9117 	 * (4376302))
9118 	 */
9119 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9120 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9121 		    sizeof (SD_INQUIRY(un)->inq_serial));
9122 	}
9123 
9124 	if (un->un_f_blockcount_is_valid != TRUE) {
9125 		/*
9126 		 * Set capacity error stat to 0 for no media. This ensures
9127 		 * a valid capacity is displayed in response to 'iostat -E'
9128 		 * when no media is present in the device.
9129 		 */
9130 		stp->sd_capacity.value.ui64 = 0;
9131 	} else {
9132 		/*
9133 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9134 		 * capacity.
9135 		 *
9136 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9137 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9138 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9139 		 */
9140 		stp->sd_capacity.value.ui64 = (uint64_t)
9141 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9142 	}
9143 }
9144 
9145 
9146 /*
9147  *    Function: sd_set_pstats
9148  *
9149  * Description: This routine instantiates and initializes the partition
9150  *              stats for each partition with more than zero blocks.
9151  *		(4363169)
9152  *
9153  *   Arguments: un - driver soft state (unit) structure
9154  *
9155  *     Context: Kernel thread context
9156  */
9157 
9158 static void
9159 sd_set_pstats(struct sd_lun *un)
9160 {
9161 	char	kstatname[KSTAT_STRLEN];
9162 	int	instance;
9163 	int	i;
9164 	diskaddr_t	nblks = 0;
9165 	char	*partname = NULL;
9166 
9167 	ASSERT(un != NULL);
9168 
9169 	instance = ddi_get_instance(SD_DEVINFO(un));
9170 
9171 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9172 	for (i = 0; i < NSDMAP; i++) {
9173 
9174 		if (cmlb_partinfo(un->un_cmlbhandle, i,
9175 		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
9176 			continue;
9177 		mutex_enter(SD_MUTEX(un));
9178 
9179 		if ((un->un_pstats[i] == NULL) &&
9180 		    (nblks != 0)) {
9181 
9182 			(void) snprintf(kstatname, sizeof (kstatname),
9183 			    "%s%d,%s", sd_label, instance,
9184 			    partname);
9185 
9186 			un->un_pstats[i] = kstat_create(sd_label,
9187 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9188 			    1, KSTAT_FLAG_PERSISTENT);
9189 			if (un->un_pstats[i] != NULL) {
9190 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9191 				kstat_install(un->un_pstats[i]);
9192 			}
9193 		}
9194 		mutex_exit(SD_MUTEX(un));
9195 	}
9196 }
9197 
9198 
9199 #if (defined(__fibre))
9200 /*
9201  *    Function: sd_init_event_callbacks
9202  *
9203  * Description: This routine initializes the insertion and removal event
9204  *		callbacks. (fibre only)
9205  *
9206  *   Arguments: un - driver soft state (unit) structure
9207  *
9208  *     Context: Kernel thread context
9209  */
9210 
9211 static void
9212 sd_init_event_callbacks(struct sd_lun *un)
9213 {
9214 	ASSERT(un != NULL);
9215 
9216 	if ((un->un_insert_event == NULL) &&
9217 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9218 	    &un->un_insert_event) == DDI_SUCCESS)) {
9219 		/*
9220 		 * Add the callback for an insertion event
9221 		 */
9222 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9223 		    un->un_insert_event, sd_event_callback, (void *)un,
9224 		    &(un->un_insert_cb_id));
9225 	}
9226 
9227 	if ((un->un_remove_event == NULL) &&
9228 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9229 	    &un->un_remove_event) == DDI_SUCCESS)) {
9230 		/*
9231 		 * Add the callback for a removal event
9232 		 */
9233 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9234 		    un->un_remove_event, sd_event_callback, (void *)un,
9235 		    &(un->un_remove_cb_id));
9236 	}
9237 }
9238 
9239 
9240 /*
9241  *    Function: sd_event_callback
9242  *
9243  * Description: This routine handles insert/remove events (photon). The
9244  *		state is changed to OFFLINE which can be used to supress
9245  *		error msgs. (fibre only)
9246  *
9247  *   Arguments: un - driver soft state (unit) structure
9248  *
9249  *     Context: Callout thread context
9250  */
9251 /* ARGSUSED */
9252 static void
9253 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9254     void *bus_impldata)
9255 {
9256 	struct sd_lun *un = (struct sd_lun *)arg;
9257 
9258 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9259 	if (event == un->un_insert_event) {
9260 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9261 		mutex_enter(SD_MUTEX(un));
9262 		if (un->un_state == SD_STATE_OFFLINE) {
9263 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9264 				un->un_state = un->un_last_state;
9265 			} else {
9266 				/*
9267 				 * We have gone through SUSPEND/RESUME while
9268 				 * we were offline. Restore the last state
9269 				 */
9270 				un->un_state = un->un_save_state;
9271 			}
9272 		}
9273 		mutex_exit(SD_MUTEX(un));
9274 
9275 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9276 	} else if (event == un->un_remove_event) {
9277 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9278 		mutex_enter(SD_MUTEX(un));
9279 		/*
9280 		 * We need to handle an event callback that occurs during
9281 		 * the suspend operation, since we don't prevent it.
9282 		 */
9283 		if (un->un_state != SD_STATE_OFFLINE) {
9284 			if (un->un_state != SD_STATE_SUSPENDED) {
9285 				New_state(un, SD_STATE_OFFLINE);
9286 			} else {
9287 				un->un_last_state = SD_STATE_OFFLINE;
9288 			}
9289 		}
9290 		mutex_exit(SD_MUTEX(un));
9291 	} else {
9292 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9293 		    "!Unknown event\n");
9294 	}
9295 
9296 }
9297 #endif
9298 
9299 /*
9300  *    Function: sd_cache_control()
9301  *
9302  * Description: This routine is the driver entry point for setting
9303  *		read and write caching by modifying the WCE (write cache
9304  *		enable) and RCD (read cache disable) bits of mode
9305  *		page 8 (MODEPAGE_CACHING).
9306  *
9307  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
9308  *                      structure for this target.
9309  *		rcd_flag - flag for controlling the read cache
9310  *		wce_flag - flag for controlling the write cache
9311  *
9312  * Return Code: EIO
9313  *		code returned by sd_send_scsi_MODE_SENSE and
9314  *		sd_send_scsi_MODE_SELECT
9315  *
9316  *     Context: Kernel Thread
9317  */
9318 
9319 static int
9320 sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag)
9321 {
9322 	struct mode_caching	*mode_caching_page;
9323 	uchar_t			*header;
9324 	size_t			buflen;
9325 	int			hdrlen;
9326 	int			bd_len;
9327 	int			rval = 0;
9328 	struct mode_header_grp2	*mhp;
9329 	struct sd_lun		*un;
9330 	int			status;
9331 
9332 	ASSERT(ssc != NULL);
9333 	un = ssc->ssc_un;
9334 	ASSERT(un != NULL);
9335 
9336 	/*
9337 	 * Do a test unit ready, otherwise a mode sense may not work if this
9338 	 * is the first command sent to the device after boot.
9339 	 */
9340 	status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
9341 	if (status != 0)
9342 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9343 
9344 	if (un->un_f_cfg_is_atapi == TRUE) {
9345 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9346 	} else {
9347 		hdrlen = MODE_HEADER_LENGTH;
9348 	}
9349 
9350 	/*
9351 	 * Allocate memory for the retrieved mode page and its headers.  Set
9352 	 * a pointer to the page itself.  Use mode_cache_scsi3 to insure
9353 	 * we get all of the mode sense data otherwise, the mode select
9354 	 * will fail.  mode_cache_scsi3 is a superset of mode_caching.
9355 	 */
9356 	buflen = hdrlen + MODE_BLK_DESC_LENGTH +
9357 	    sizeof (struct mode_cache_scsi3);
9358 
9359 	header = kmem_zalloc(buflen, KM_SLEEP);
9360 
9361 	/* Get the information from the device. */
9362 	if (un->un_f_cfg_is_atapi == TRUE) {
9363 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, header, buflen,
9364 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9365 	} else {
9366 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
9367 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9368 	}
9369 
9370 	if (rval != 0) {
9371 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9372 		    "sd_cache_control: Mode Sense Failed\n");
9373 		goto mode_sense_failed;
9374 	}
9375 
9376 	/*
9377 	 * Determine size of Block Descriptors in order to locate
9378 	 * the mode page data. ATAPI devices return 0, SCSI devices
9379 	 * should return MODE_BLK_DESC_LENGTH.
9380 	 */
9381 	if (un->un_f_cfg_is_atapi == TRUE) {
9382 		mhp	= (struct mode_header_grp2 *)header;
9383 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9384 	} else {
9385 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9386 	}
9387 
9388 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9389 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, 0,
9390 		    "sd_cache_control: Mode Sense returned invalid block "
9391 		    "descriptor length\n");
9392 		rval = EIO;
9393 		goto mode_sense_failed;
9394 	}
9395 
9396 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9397 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9398 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
9399 		    "sd_cache_control: Mode Sense caching page code mismatch "
9400 		    "%d\n", mode_caching_page->mode_page.code);
9401 		rval = EIO;
9402 		goto mode_sense_failed;
9403 	}
9404 
9405 	/* Check the relevant bits on successful mode sense. */
9406 	if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9407 	    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9408 	    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9409 	    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9410 
9411 		size_t sbuflen;
9412 		uchar_t save_pg;
9413 
9414 		/*
9415 		 * Construct select buffer length based on the
9416 		 * length of the sense data returned.
9417 		 */
9418 		sbuflen =  hdrlen + bd_len +
9419 		    sizeof (struct mode_page) +
9420 		    (int)mode_caching_page->mode_page.length;
9421 
9422 		/*
9423 		 * Set the caching bits as requested.
9424 		 */
9425 		if (rcd_flag == SD_CACHE_ENABLE)
9426 			mode_caching_page->rcd = 0;
9427 		else if (rcd_flag == SD_CACHE_DISABLE)
9428 			mode_caching_page->rcd = 1;
9429 
9430 		if (wce_flag == SD_CACHE_ENABLE)
9431 			mode_caching_page->wce = 1;
9432 		else if (wce_flag == SD_CACHE_DISABLE)
9433 			mode_caching_page->wce = 0;
9434 
9435 		/*
9436 		 * Save the page if the mode sense says the
9437 		 * drive supports it.
9438 		 */
9439 		save_pg = mode_caching_page->mode_page.ps ?
9440 		    SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9441 
9442 		/* Clear reserved bits before mode select. */
9443 		mode_caching_page->mode_page.ps = 0;
9444 
9445 		/*
9446 		 * Clear out mode header for mode select.
9447 		 * The rest of the retrieved page will be reused.
9448 		 */
9449 		bzero(header, hdrlen);
9450 
9451 		if (un->un_f_cfg_is_atapi == TRUE) {
9452 			mhp = (struct mode_header_grp2 *)header;
9453 			mhp->bdesc_length_hi = bd_len >> 8;
9454 			mhp->bdesc_length_lo = (uchar_t)bd_len & 0xff;
9455 		} else {
9456 			((struct mode_header *)header)->bdesc_length = bd_len;
9457 		}
9458 
9459 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9460 
9461 		/* Issue mode select to change the cache settings */
9462 		if (un->un_f_cfg_is_atapi == TRUE) {
9463 			rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP1, header,
9464 			    sbuflen, save_pg, SD_PATH_DIRECT);
9465 		} else {
9466 			rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, header,
9467 			    sbuflen, save_pg, SD_PATH_DIRECT);
9468 		}
9469 
9470 	}
9471 
9472 
9473 mode_sense_failed:
9474 
9475 	kmem_free(header, buflen);
9476 
9477 	if (rval != 0) {
9478 		if (rval == EIO)
9479 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9480 		else
9481 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9482 	}
9483 	return (rval);
9484 }
9485 
9486 
9487 /*
9488  *    Function: sd_get_write_cache_enabled()
9489  *
9490  * Description: This routine is the driver entry point for determining if
9491  *		write caching is enabled.  It examines the WCE (write cache
9492  *		enable) bits of mode page 8 (MODEPAGE_CACHING).
9493  *
9494  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
9495  *                      structure for this target.
9496  *		is_enabled - pointer to int where write cache enabled state
9497  *		is returned (non-zero -> write cache enabled)
9498  *
9499  *
9500  * Return Code: EIO
9501  *		code returned by sd_send_scsi_MODE_SENSE
9502  *
9503  *     Context: Kernel Thread
9504  *
9505  * NOTE: If ioctl is added to disable write cache, this sequence should
9506  * be followed so that no locking is required for accesses to
9507  * un->un_f_write_cache_enabled:
9508  * 	do mode select to clear wce
9509  * 	do synchronize cache to flush cache
9510  * 	set un->un_f_write_cache_enabled = FALSE
9511  *
9512  * Conversely, an ioctl to enable the write cache should be done
9513  * in this order:
9514  * 	set un->un_f_write_cache_enabled = TRUE
9515  * 	do mode select to set wce
9516  */
9517 
9518 static int
9519 sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled)
9520 {
9521 	struct mode_caching	*mode_caching_page;
9522 	uchar_t			*header;
9523 	size_t			buflen;
9524 	int			hdrlen;
9525 	int			bd_len;
9526 	int			rval = 0;
9527 	struct sd_lun		*un;
9528 	int			status;
9529 
9530 	ASSERT(ssc != NULL);
9531 	un = ssc->ssc_un;
9532 	ASSERT(un != NULL);
9533 	ASSERT(is_enabled != NULL);
9534 
9535 	/* in case of error, flag as enabled */
9536 	*is_enabled = TRUE;
9537 
9538 	/*
9539 	 * Do a test unit ready, otherwise a mode sense may not work if this
9540 	 * is the first command sent to the device after boot.
9541 	 */
9542 	status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
9543 
9544 	if (status != 0)
9545 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9546 
9547 	if (un->un_f_cfg_is_atapi == TRUE) {
9548 		hdrlen = MODE_HEADER_LENGTH_GRP2;
9549 	} else {
9550 		hdrlen = MODE_HEADER_LENGTH;
9551 	}
9552 
9553 	/*
9554 	 * Allocate memory for the retrieved mode page and its headers.  Set
9555 	 * a pointer to the page itself.
9556 	 */
9557 	buflen = hdrlen + MODE_BLK_DESC_LENGTH + sizeof (struct mode_caching);
9558 	header = kmem_zalloc(buflen, KM_SLEEP);
9559 
9560 	/* Get the information from the device. */
9561 	if (un->un_f_cfg_is_atapi == TRUE) {
9562 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, header, buflen,
9563 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9564 	} else {
9565 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
9566 		    MODEPAGE_CACHING, SD_PATH_DIRECT);
9567 	}
9568 
9569 	if (rval != 0) {
9570 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
9571 		    "sd_get_write_cache_enabled: Mode Sense Failed\n");
9572 		goto mode_sense_failed;
9573 	}
9574 
9575 	/*
9576 	 * Determine size of Block Descriptors in order to locate
9577 	 * the mode page data. ATAPI devices return 0, SCSI devices
9578 	 * should return MODE_BLK_DESC_LENGTH.
9579 	 */
9580 	if (un->un_f_cfg_is_atapi == TRUE) {
9581 		struct mode_header_grp2	*mhp;
9582 		mhp	= (struct mode_header_grp2 *)header;
9583 		bd_len  = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9584 	} else {
9585 		bd_len  = ((struct mode_header *)header)->bdesc_length;
9586 	}
9587 
9588 	if (bd_len > MODE_BLK_DESC_LENGTH) {
9589 		/* FMA should make upset complain here */
9590 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, 0,
9591 		    "sd_get_write_cache_enabled: Mode Sense returned invalid "
9592 		    "block descriptor length\n");
9593 		rval = EIO;
9594 		goto mode_sense_failed;
9595 	}
9596 
9597 	mode_caching_page = (struct mode_caching *)(header + hdrlen + bd_len);
9598 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9599 		/* FMA could make upset complain here */
9600 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
9601 		    "sd_get_write_cache_enabled: Mode Sense caching page "
9602 		    "code mismatch %d\n", mode_caching_page->mode_page.code);
9603 		rval = EIO;
9604 		goto mode_sense_failed;
9605 	}
9606 	*is_enabled = mode_caching_page->wce;
9607 
9608 mode_sense_failed:
9609 	if (rval == 0) {
9610 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
9611 	} else if (rval == EIO) {
9612 		/*
9613 		 * Some disks do not support mode sense(6), we
9614 		 * should ignore this kind of error(sense key is
9615 		 * 0x5 - illegal request).
9616 		 */
9617 		uint8_t *sensep;
9618 		int senlen;
9619 
9620 		sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
9621 		senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
9622 		    ssc->ssc_uscsi_cmd->uscsi_rqresid);
9623 
9624 		if (senlen > 0 &&
9625 		    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
9626 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
9627 		} else {
9628 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9629 		}
9630 	} else {
9631 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9632 	}
9633 	kmem_free(header, buflen);
9634 	return (rval);
9635 }
9636 
9637 /*
9638  *    Function: sd_get_nv_sup()
9639  *
9640  * Description: This routine is the driver entry point for
9641  * determining whether non-volatile cache is supported. This
9642  * determination process works as follows:
9643  *
9644  * 1. sd first queries sd.conf on whether
9645  * suppress_cache_flush bit is set for this device.
9646  *
9647  * 2. if not there, then queries the internal disk table.
9648  *
9649  * 3. if either sd.conf or internal disk table specifies
9650  * cache flush be suppressed, we don't bother checking
9651  * NV_SUP bit.
9652  *
9653  * If SUPPRESS_CACHE_FLUSH bit is not set to 1, sd queries
9654  * the optional INQUIRY VPD page 0x86. If the device
9655  * supports VPD page 0x86, sd examines the NV_SUP
9656  * (non-volatile cache support) bit in the INQUIRY VPD page
9657  * 0x86:
9658  *   o If NV_SUP bit is set, sd assumes the device has a
9659  *   non-volatile cache and set the
9660  *   un_f_sync_nv_supported to TRUE.
9661  *   o Otherwise cache is not non-volatile,
9662  *   un_f_sync_nv_supported is set to FALSE.
9663  *
9664  * Arguments: un - driver soft state (unit) structure
9665  *
9666  * Return Code:
9667  *
9668  *     Context: Kernel Thread
9669  */
9670 
9671 static void
9672 sd_get_nv_sup(sd_ssc_t *ssc)
9673 {
9674 	int		rval		= 0;
9675 	uchar_t		*inq86		= NULL;
9676 	size_t		inq86_len	= MAX_INQUIRY_SIZE;
9677 	size_t		inq86_resid	= 0;
9678 	struct		dk_callback *dkc;
9679 	struct sd_lun	*un;
9680 
9681 	ASSERT(ssc != NULL);
9682 	un = ssc->ssc_un;
9683 	ASSERT(un != NULL);
9684 
9685 	mutex_enter(SD_MUTEX(un));
9686 
9687 	/*
9688 	 * Be conservative on the device's support of
9689 	 * SYNC_NV bit: un_f_sync_nv_supported is
9690 	 * initialized to be false.
9691 	 */
9692 	un->un_f_sync_nv_supported = FALSE;
9693 
9694 	/*
9695 	 * If either sd.conf or internal disk table
9696 	 * specifies cache flush be suppressed, then
9697 	 * we don't bother checking NV_SUP bit.
9698 	 */
9699 	if (un->un_f_suppress_cache_flush == TRUE) {
9700 		mutex_exit(SD_MUTEX(un));
9701 		return;
9702 	}
9703 
9704 	if (sd_check_vpd_page_support(ssc) == 0 &&
9705 	    un->un_vpd_page_mask & SD_VPD_EXTENDED_DATA_PG) {
9706 		mutex_exit(SD_MUTEX(un));
9707 		/* collect page 86 data if available */
9708 		inq86 = kmem_zalloc(inq86_len, KM_SLEEP);
9709 
9710 		rval = sd_send_scsi_INQUIRY(ssc, inq86, inq86_len,
9711 		    0x01, 0x86, &inq86_resid);
9712 
9713 		if (rval == 0 && (inq86_len - inq86_resid > 6)) {
9714 			SD_TRACE(SD_LOG_COMMON, un,
9715 			    "sd_get_nv_sup: \
9716 			    successfully get VPD page: %x \
9717 			    PAGE LENGTH: %x BYTE 6: %x\n",
9718 			    inq86[1], inq86[3], inq86[6]);
9719 
9720 			mutex_enter(SD_MUTEX(un));
9721 			/*
9722 			 * check the value of NV_SUP bit: only if the device
9723 			 * reports NV_SUP bit to be 1, the
9724 			 * un_f_sync_nv_supported bit will be set to true.
9725 			 */
9726 			if (inq86[6] & SD_VPD_NV_SUP) {
9727 				un->un_f_sync_nv_supported = TRUE;
9728 			}
9729 			mutex_exit(SD_MUTEX(un));
9730 		} else if (rval != 0) {
9731 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9732 		}
9733 
9734 		kmem_free(inq86, inq86_len);
9735 	} else {
9736 		mutex_exit(SD_MUTEX(un));
9737 	}
9738 
9739 	/*
9740 	 * Send a SYNC CACHE command to check whether
9741 	 * SYNC_NV bit is supported. This command should have
9742 	 * un_f_sync_nv_supported set to correct value.
9743 	 */
9744 	mutex_enter(SD_MUTEX(un));
9745 	if (un->un_f_sync_nv_supported) {
9746 		mutex_exit(SD_MUTEX(un));
9747 		dkc = kmem_zalloc(sizeof (struct dk_callback), KM_SLEEP);
9748 		dkc->dkc_flag = FLUSH_VOLATILE;
9749 		(void) sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
9750 
9751 		/*
9752 		 * Send a TEST UNIT READY command to the device. This should
9753 		 * clear any outstanding UNIT ATTENTION that may be present.
9754 		 */
9755 		rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
9756 		if (rval != 0)
9757 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9758 
9759 		kmem_free(dkc, sizeof (struct dk_callback));
9760 	} else {
9761 		mutex_exit(SD_MUTEX(un));
9762 	}
9763 
9764 	SD_TRACE(SD_LOG_COMMON, un, "sd_get_nv_sup: \
9765 	    un_f_suppress_cache_flush is set to %d\n",
9766 	    un->un_f_suppress_cache_flush);
9767 }
9768 
9769 /*
9770  *    Function: sd_make_device
9771  *
9772  * Description: Utility routine to return the Solaris device number from
9773  *		the data in the device's dev_info structure.
9774  *
9775  * Return Code: The Solaris device number
9776  *
9777  *     Context: Any
9778  */
9779 
9780 static dev_t
9781 sd_make_device(dev_info_t *devi)
9782 {
9783 	return (makedevice(ddi_driver_major(devi),
9784 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9785 }
9786 
9787 
9788 /*
9789  *    Function: sd_pm_entry
9790  *
9791  * Description: Called at the start of a new command to manage power
9792  *		and busy status of a device. This includes determining whether
9793  *		the current power state of the device is sufficient for
9794  *		performing the command or whether it must be changed.
9795  *		The PM framework is notified appropriately.
9796  *		Only with a return status of DDI_SUCCESS will the
9797  *		component be busy to the framework.
9798  *
9799  *		All callers of sd_pm_entry must check the return status
9800  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9801  *		of DDI_FAILURE indicates the device failed to power up.
9802  *		In this case un_pm_count has been adjusted so the result
9803  *		on exit is still powered down, ie. count is less than 0.
9804  *		Calling sd_pm_exit with this count value hits an ASSERT.
9805  *
9806  * Return Code: DDI_SUCCESS or DDI_FAILURE
9807  *
9808  *     Context: Kernel thread context.
9809  */
9810 
9811 static int
9812 sd_pm_entry(struct sd_lun *un)
9813 {
9814 	int return_status = DDI_SUCCESS;
9815 
9816 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9817 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9818 
9819 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9820 
9821 	if (un->un_f_pm_is_enabled == FALSE) {
9822 		SD_TRACE(SD_LOG_IO_PM, un,
9823 		    "sd_pm_entry: exiting, PM not enabled\n");
9824 		return (return_status);
9825 	}
9826 
9827 	/*
9828 	 * Just increment a counter if PM is enabled. On the transition from
9829 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9830 	 * the count with each IO and mark the device as idle when the count
9831 	 * hits 0.
9832 	 *
9833 	 * If the count is less than 0 the device is powered down. If a powered
9834 	 * down device is successfully powered up then the count must be
9835 	 * incremented to reflect the power up. Note that it'll get incremented
9836 	 * a second time to become busy.
9837 	 *
9838 	 * Because the following has the potential to change the device state
9839 	 * and must release the un_pm_mutex to do so, only one thread can be
9840 	 * allowed through at a time.
9841 	 */
9842 
9843 	mutex_enter(&un->un_pm_mutex);
9844 	while (un->un_pm_busy == TRUE) {
9845 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
9846 	}
9847 	un->un_pm_busy = TRUE;
9848 
9849 	if (un->un_pm_count < 1) {
9850 
9851 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
9852 
9853 		/*
9854 		 * Indicate we are now busy so the framework won't attempt to
9855 		 * power down the device. This call will only fail if either
9856 		 * we passed a bad component number or the device has no
9857 		 * components. Neither of these should ever happen.
9858 		 */
9859 		mutex_exit(&un->un_pm_mutex);
9860 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
9861 		ASSERT(return_status == DDI_SUCCESS);
9862 
9863 		mutex_enter(&un->un_pm_mutex);
9864 
9865 		if (un->un_pm_count < 0) {
9866 			mutex_exit(&un->un_pm_mutex);
9867 
9868 			SD_TRACE(SD_LOG_IO_PM, un,
9869 			    "sd_pm_entry: power up component\n");
9870 
9871 			/*
9872 			 * pm_raise_power will cause sdpower to be called
9873 			 * which brings the device power level to the
9874 			 * desired state, If successful, un_pm_count and
9875 			 * un_power_level will be updated appropriately.
9876 			 */
9877 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
9878 			    SD_PM_STATE_ACTIVE(un));
9879 
9880 			mutex_enter(&un->un_pm_mutex);
9881 
9882 			if (return_status != DDI_SUCCESS) {
9883 				/*
9884 				 * Power up failed.
9885 				 * Idle the device and adjust the count
9886 				 * so the result on exit is that we're
9887 				 * still powered down, ie. count is less than 0.
9888 				 */
9889 				SD_TRACE(SD_LOG_IO_PM, un,
9890 				    "sd_pm_entry: power up failed,"
9891 				    " idle the component\n");
9892 
9893 				(void) pm_idle_component(SD_DEVINFO(un), 0);
9894 				un->un_pm_count--;
9895 			} else {
9896 				/*
9897 				 * Device is powered up, verify the
9898 				 * count is non-negative.
9899 				 * This is debug only.
9900 				 */
9901 				ASSERT(un->un_pm_count == 0);
9902 			}
9903 		}
9904 
9905 		if (return_status == DDI_SUCCESS) {
9906 			/*
9907 			 * For performance, now that the device has been tagged
9908 			 * as busy, and it's known to be powered up, update the
9909 			 * chain types to use jump tables that do not include
9910 			 * pm. This significantly lowers the overhead and
9911 			 * therefore improves performance.
9912 			 */
9913 
9914 			mutex_exit(&un->un_pm_mutex);
9915 			mutex_enter(SD_MUTEX(un));
9916 			SD_TRACE(SD_LOG_IO_PM, un,
9917 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
9918 			    un->un_uscsi_chain_type);
9919 
9920 			if (un->un_f_non_devbsize_supported) {
9921 				un->un_buf_chain_type =
9922 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
9923 			} else {
9924 				un->un_buf_chain_type =
9925 				    SD_CHAIN_INFO_DISK_NO_PM;
9926 			}
9927 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
9928 
9929 			SD_TRACE(SD_LOG_IO_PM, un,
9930 			    "             changed  uscsi_chain_type to   %d\n",
9931 			    un->un_uscsi_chain_type);
9932 			mutex_exit(SD_MUTEX(un));
9933 			mutex_enter(&un->un_pm_mutex);
9934 
9935 			if (un->un_pm_idle_timeid == NULL) {
9936 				/* 300 ms. */
9937 				un->un_pm_idle_timeid =
9938 				    timeout(sd_pm_idletimeout_handler, un,
9939 				    (drv_usectohz((clock_t)300000)));
9940 				/*
9941 				 * Include an extra call to busy which keeps the
9942 				 * device busy with-respect-to the PM layer
9943 				 * until the timer fires, at which time it'll
9944 				 * get the extra idle call.
9945 				 */
9946 				(void) pm_busy_component(SD_DEVINFO(un), 0);
9947 			}
9948 		}
9949 	}
9950 	un->un_pm_busy = FALSE;
9951 	/* Next... */
9952 	cv_signal(&un->un_pm_busy_cv);
9953 
9954 	un->un_pm_count++;
9955 
9956 	SD_TRACE(SD_LOG_IO_PM, un,
9957 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
9958 
9959 	mutex_exit(&un->un_pm_mutex);
9960 
9961 	return (return_status);
9962 }
9963 
9964 
9965 /*
9966  *    Function: sd_pm_exit
9967  *
9968  * Description: Called at the completion of a command to manage busy
9969  *		status for the device. If the device becomes idle the
9970  *		PM framework is notified.
9971  *
9972  *     Context: Kernel thread context
9973  */
9974 
9975 static void
9976 sd_pm_exit(struct sd_lun *un)
9977 {
9978 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9979 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9980 
9981 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
9982 
9983 	/*
9984 	 * After attach the following flag is only read, so don't
9985 	 * take the penalty of acquiring a mutex for it.
9986 	 */
9987 	if (un->un_f_pm_is_enabled == TRUE) {
9988 
9989 		mutex_enter(&un->un_pm_mutex);
9990 		un->un_pm_count--;
9991 
9992 		SD_TRACE(SD_LOG_IO_PM, un,
9993 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
9994 
9995 		ASSERT(un->un_pm_count >= 0);
9996 		if (un->un_pm_count == 0) {
9997 			mutex_exit(&un->un_pm_mutex);
9998 
9999 			SD_TRACE(SD_LOG_IO_PM, un,
10000 			    "sd_pm_exit: idle component\n");
10001 
10002 			(void) pm_idle_component(SD_DEVINFO(un), 0);
10003 
10004 		} else {
10005 			mutex_exit(&un->un_pm_mutex);
10006 		}
10007 	}
10008 
10009 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10010 }
10011 
10012 
10013 /*
10014  *    Function: sdopen
10015  *
10016  * Description: Driver's open(9e) entry point function.
10017  *
10018  *   Arguments: dev_i   - pointer to device number
10019  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10020  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10021  *		cred_p  - user credential pointer
10022  *
10023  * Return Code: EINVAL
10024  *		ENXIO
10025  *		EIO
10026  *		EROFS
10027  *		EBUSY
10028  *
10029  *     Context: Kernel thread context
10030  */
10031 /* ARGSUSED */
10032 static int
10033 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10034 {
10035 	struct sd_lun	*un;
10036 	int		nodelay;
10037 	int		part;
10038 	uint64_t	partmask;
10039 	int		instance;
10040 	dev_t		dev;
10041 	int		rval = EIO;
10042 	diskaddr_t	nblks = 0;
10043 	diskaddr_t	label_cap;
10044 
10045 	/* Validate the open type */
10046 	if (otyp >= OTYPCNT) {
10047 		return (EINVAL);
10048 	}
10049 
10050 	dev = *dev_p;
10051 	instance = SDUNIT(dev);
10052 	mutex_enter(&sd_detach_mutex);
10053 
10054 	/*
10055 	 * Fail the open if there is no softstate for the instance, or
10056 	 * if another thread somewhere is trying to detach the instance.
10057 	 */
10058 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10059 	    (un->un_detach_count != 0)) {
10060 		mutex_exit(&sd_detach_mutex);
10061 		/*
10062 		 * The probe cache only needs to be cleared when open (9e) fails
10063 		 * with ENXIO (4238046).
10064 		 */
10065 		/*
10066 		 * un-conditionally clearing probe cache is ok with
10067 		 * separate sd/ssd binaries
10068 		 * x86 platform can be an issue with both parallel
10069 		 * and fibre in 1 binary
10070 		 */
10071 		sd_scsi_clear_probe_cache();
10072 		return (ENXIO);
10073 	}
10074 
10075 	/*
10076 	 * The un_layer_count is to prevent another thread in specfs from
10077 	 * trying to detach the instance, which can happen when we are
10078 	 * called from a higher-layer driver instead of thru specfs.
10079 	 * This will not be needed when DDI provides a layered driver
10080 	 * interface that allows specfs to know that an instance is in
10081 	 * use by a layered driver & should not be detached.
10082 	 *
10083 	 * Note: the semantics for layered driver opens are exactly one
10084 	 * close for every open.
10085 	 */
10086 	if (otyp == OTYP_LYR) {
10087 		un->un_layer_count++;
10088 	}
10089 
10090 	/*
10091 	 * Keep a count of the current # of opens in progress. This is because
10092 	 * some layered drivers try to call us as a regular open. This can
10093 	 * cause problems that we cannot prevent, however by keeping this count
10094 	 * we can at least keep our open and detach routines from racing against
10095 	 * each other under such conditions.
10096 	 */
10097 	un->un_opens_in_progress++;
10098 	mutex_exit(&sd_detach_mutex);
10099 
10100 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10101 	part	 = SDPART(dev);
10102 	partmask = 1 << part;
10103 
10104 	/*
10105 	 * We use a semaphore here in order to serialize
10106 	 * open and close requests on the device.
10107 	 */
10108 	sema_p(&un->un_semoclose);
10109 
10110 	mutex_enter(SD_MUTEX(un));
10111 
10112 	/*
10113 	 * All device accesses go thru sdstrategy() where we check
10114 	 * on suspend status but there could be a scsi_poll command,
10115 	 * which bypasses sdstrategy(), so we need to check pm
10116 	 * status.
10117 	 */
10118 
10119 	if (!nodelay) {
10120 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10121 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10122 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10123 		}
10124 
10125 		mutex_exit(SD_MUTEX(un));
10126 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10127 			rval = EIO;
10128 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10129 			    "sdopen: sd_pm_entry failed\n");
10130 			goto open_failed_with_pm;
10131 		}
10132 		mutex_enter(SD_MUTEX(un));
10133 	}
10134 
10135 	/* check for previous exclusive open */
10136 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10137 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10138 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10139 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10140 
10141 	if (un->un_exclopen & (partmask)) {
10142 		goto excl_open_fail;
10143 	}
10144 
10145 	if (flag & FEXCL) {
10146 		int i;
10147 		if (un->un_ocmap.lyropen[part]) {
10148 			goto excl_open_fail;
10149 		}
10150 		for (i = 0; i < (OTYPCNT - 1); i++) {
10151 			if (un->un_ocmap.regopen[i] & (partmask)) {
10152 				goto excl_open_fail;
10153 			}
10154 		}
10155 	}
10156 
10157 	/*
10158 	 * Check the write permission if this is a removable media device,
10159 	 * NDELAY has not been set, and writable permission is requested.
10160 	 *
10161 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10162 	 * attempt will fail with EIO as part of the I/O processing. This is a
10163 	 * more permissive implementation that allows the open to succeed and
10164 	 * WRITE attempts to fail when appropriate.
10165 	 */
10166 	if (un->un_f_chk_wp_open) {
10167 		if ((flag & FWRITE) && (!nodelay)) {
10168 			mutex_exit(SD_MUTEX(un));
10169 			/*
10170 			 * Defer the check for write permission on writable
10171 			 * DVD drive till sdstrategy and will not fail open even
10172 			 * if FWRITE is set as the device can be writable
10173 			 * depending upon the media and the media can change
10174 			 * after the call to open().
10175 			 */
10176 			if (un->un_f_dvdram_writable_device == FALSE) {
10177 				if (ISCD(un) || sr_check_wp(dev)) {
10178 				rval = EROFS;
10179 				mutex_enter(SD_MUTEX(un));
10180 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10181 				    "write to cd or write protected media\n");
10182 				goto open_fail;
10183 				}
10184 			}
10185 			mutex_enter(SD_MUTEX(un));
10186 		}
10187 	}
10188 
10189 	/*
10190 	 * If opening in NDELAY/NONBLOCK mode, just return.
10191 	 * Check if disk is ready and has a valid geometry later.
10192 	 */
10193 	if (!nodelay) {
10194 		sd_ssc_t	*ssc;
10195 
10196 		mutex_exit(SD_MUTEX(un));
10197 		ssc = sd_ssc_init(un);
10198 		rval = sd_ready_and_valid(ssc, part);
10199 		sd_ssc_fini(ssc);
10200 		mutex_enter(SD_MUTEX(un));
10201 		/*
10202 		 * Fail if device is not ready or if the number of disk
10203 		 * blocks is zero or negative for non CD devices.
10204 		 */
10205 
10206 		nblks = 0;
10207 
10208 		if (rval == SD_READY_VALID && (!ISCD(un))) {
10209 			/* if cmlb_partinfo fails, nblks remains 0 */
10210 			mutex_exit(SD_MUTEX(un));
10211 			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
10212 			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
10213 			mutex_enter(SD_MUTEX(un));
10214 		}
10215 
10216 		if ((rval != SD_READY_VALID) ||
10217 		    (!ISCD(un) && nblks <= 0)) {
10218 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10219 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10220 			    "device not ready or invalid disk block value\n");
10221 			goto open_fail;
10222 		}
10223 #if defined(__i386) || defined(__amd64)
10224 	} else {
10225 		uchar_t *cp;
10226 		/*
10227 		 * x86 requires special nodelay handling, so that p0 is
10228 		 * always defined and accessible.
10229 		 * Invalidate geometry only if device is not already open.
10230 		 */
10231 		cp = &un->un_ocmap.chkd[0];
10232 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10233 			if (*cp != (uchar_t)0) {
10234 				break;
10235 			}
10236 			cp++;
10237 		}
10238 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10239 			mutex_exit(SD_MUTEX(un));
10240 			cmlb_invalidate(un->un_cmlbhandle,
10241 			    (void *)SD_PATH_DIRECT);
10242 			mutex_enter(SD_MUTEX(un));
10243 		}
10244 
10245 #endif
10246 	}
10247 
10248 	if (otyp == OTYP_LYR) {
10249 		un->un_ocmap.lyropen[part]++;
10250 	} else {
10251 		un->un_ocmap.regopen[otyp] |= partmask;
10252 	}
10253 
10254 	/* Set up open and exclusive open flags */
10255 	if (flag & FEXCL) {
10256 		un->un_exclopen |= (partmask);
10257 	}
10258 
10259 	/*
10260 	 * If the lun is EFI labeled and lun capacity is greater than the
10261 	 * capacity contained in the label, log a sys-event to notify the
10262 	 * interested module.
10263 	 * To avoid an infinite loop of logging sys-event, we only log the
10264 	 * event when the lun is not opened in NDELAY mode. The event handler
10265 	 * should open the lun in NDELAY mode.
10266 	 */
10267 	if (!(flag & FNDELAY)) {
10268 		mutex_exit(SD_MUTEX(un));
10269 		if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
10270 		    (void*)SD_PATH_DIRECT) == 0) {
10271 			mutex_enter(SD_MUTEX(un));
10272 			if (un->un_f_blockcount_is_valid &&
10273 			    un->un_blockcount > label_cap) {
10274 				mutex_exit(SD_MUTEX(un));
10275 				sd_log_lun_expansion_event(un,
10276 				    (nodelay ? KM_NOSLEEP : KM_SLEEP));
10277 				mutex_enter(SD_MUTEX(un));
10278 			}
10279 		} else {
10280 			mutex_enter(SD_MUTEX(un));
10281 		}
10282 	}
10283 
10284 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10285 	    "open of part %d type %d\n", part, otyp);
10286 
10287 	mutex_exit(SD_MUTEX(un));
10288 	if (!nodelay) {
10289 		sd_pm_exit(un);
10290 	}
10291 
10292 	sema_v(&un->un_semoclose);
10293 
10294 	mutex_enter(&sd_detach_mutex);
10295 	un->un_opens_in_progress--;
10296 	mutex_exit(&sd_detach_mutex);
10297 
10298 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10299 	return (DDI_SUCCESS);
10300 
10301 excl_open_fail:
10302 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10303 	rval = EBUSY;
10304 
10305 open_fail:
10306 	mutex_exit(SD_MUTEX(un));
10307 
10308 	/*
10309 	 * On a failed open we must exit the pm management.
10310 	 */
10311 	if (!nodelay) {
10312 		sd_pm_exit(un);
10313 	}
10314 open_failed_with_pm:
10315 	sema_v(&un->un_semoclose);
10316 
10317 	mutex_enter(&sd_detach_mutex);
10318 	un->un_opens_in_progress--;
10319 	if (otyp == OTYP_LYR) {
10320 		un->un_layer_count--;
10321 	}
10322 	mutex_exit(&sd_detach_mutex);
10323 
10324 	return (rval);
10325 }
10326 
10327 
10328 /*
10329  *    Function: sdclose
10330  *
10331  * Description: Driver's close(9e) entry point function.
10332  *
10333  *   Arguments: dev    - device number
10334  *		flag   - file status flag, informational only
10335  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10336  *		cred_p - user credential pointer
10337  *
10338  * Return Code: ENXIO
10339  *
10340  *     Context: Kernel thread context
10341  */
10342 /* ARGSUSED */
10343 static int
10344 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10345 {
10346 	struct sd_lun	*un;
10347 	uchar_t		*cp;
10348 	int		part;
10349 	int		nodelay;
10350 	int		rval = 0;
10351 
10352 	/* Validate the open type */
10353 	if (otyp >= OTYPCNT) {
10354 		return (ENXIO);
10355 	}
10356 
10357 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10358 		return (ENXIO);
10359 	}
10360 
10361 	part = SDPART(dev);
10362 	nodelay = flag & (FNDELAY | FNONBLOCK);
10363 
10364 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10365 	    "sdclose: close of part %d type %d\n", part, otyp);
10366 
10367 	/*
10368 	 * We use a semaphore here in order to serialize
10369 	 * open and close requests on the device.
10370 	 */
10371 	sema_p(&un->un_semoclose);
10372 
10373 	mutex_enter(SD_MUTEX(un));
10374 
10375 	/* Don't proceed if power is being changed. */
10376 	while (un->un_state == SD_STATE_PM_CHANGING) {
10377 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10378 	}
10379 
10380 	if (un->un_exclopen & (1 << part)) {
10381 		un->un_exclopen &= ~(1 << part);
10382 	}
10383 
10384 	/* Update the open partition map */
10385 	if (otyp == OTYP_LYR) {
10386 		un->un_ocmap.lyropen[part] -= 1;
10387 	} else {
10388 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10389 	}
10390 
10391 	cp = &un->un_ocmap.chkd[0];
10392 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10393 		if (*cp != NULL) {
10394 			break;
10395 		}
10396 		cp++;
10397 	}
10398 
10399 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10400 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10401 
10402 		/*
10403 		 * We avoid persistance upon the last close, and set
10404 		 * the throttle back to the maximum.
10405 		 */
10406 		un->un_throttle = un->un_saved_throttle;
10407 
10408 		if (un->un_state == SD_STATE_OFFLINE) {
10409 			if (un->un_f_is_fibre == FALSE) {
10410 				scsi_log(SD_DEVINFO(un), sd_label,
10411 				    CE_WARN, "offline\n");
10412 			}
10413 			mutex_exit(SD_MUTEX(un));
10414 			cmlb_invalidate(un->un_cmlbhandle,
10415 			    (void *)SD_PATH_DIRECT);
10416 			mutex_enter(SD_MUTEX(un));
10417 
10418 		} else {
10419 			/*
10420 			 * Flush any outstanding writes in NVRAM cache.
10421 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10422 			 * cmd, it may not work for non-Pluto devices.
10423 			 * SYNCHRONIZE CACHE is not required for removables,
10424 			 * except DVD-RAM drives.
10425 			 *
10426 			 * Also note: because SYNCHRONIZE CACHE is currently
10427 			 * the only command issued here that requires the
10428 			 * drive be powered up, only do the power up before
10429 			 * sending the Sync Cache command. If additional
10430 			 * commands are added which require a powered up
10431 			 * drive, the following sequence may have to change.
10432 			 *
10433 			 * And finally, note that parallel SCSI on SPARC
10434 			 * only issues a Sync Cache to DVD-RAM, a newly
10435 			 * supported device.
10436 			 */
10437 #if defined(__i386) || defined(__amd64)
10438 			if ((un->un_f_sync_cache_supported &&
10439 			    un->un_f_sync_cache_required) ||
10440 			    un->un_f_dvdram_writable_device == TRUE) {
10441 #else
10442 			if (un->un_f_dvdram_writable_device == TRUE) {
10443 #endif
10444 				mutex_exit(SD_MUTEX(un));
10445 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10446 					rval =
10447 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10448 					    NULL);
10449 					/* ignore error if not supported */
10450 					if (rval == ENOTSUP) {
10451 						rval = 0;
10452 					} else if (rval != 0) {
10453 						rval = EIO;
10454 					}
10455 					sd_pm_exit(un);
10456 				} else {
10457 					rval = EIO;
10458 				}
10459 				mutex_enter(SD_MUTEX(un));
10460 			}
10461 
10462 			/*
10463 			 * For devices which supports DOOR_LOCK, send an ALLOW
10464 			 * MEDIA REMOVAL command, but don't get upset if it
10465 			 * fails. We need to raise the power of the drive before
10466 			 * we can call sd_send_scsi_DOORLOCK()
10467 			 */
10468 			if (un->un_f_doorlock_supported) {
10469 				mutex_exit(SD_MUTEX(un));
10470 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10471 					sd_ssc_t	*ssc;
10472 
10473 					ssc = sd_ssc_init(un);
10474 					rval = sd_send_scsi_DOORLOCK(ssc,
10475 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10476 					if (rval != 0)
10477 						sd_ssc_assessment(ssc,
10478 						    SD_FMT_IGNORE);
10479 					sd_ssc_fini(ssc);
10480 
10481 					sd_pm_exit(un);
10482 					if (ISCD(un) && (rval != 0) &&
10483 					    (nodelay != 0)) {
10484 						rval = ENXIO;
10485 					}
10486 				} else {
10487 					rval = EIO;
10488 				}
10489 				mutex_enter(SD_MUTEX(un));
10490 			}
10491 
10492 			/*
10493 			 * If a device has removable media, invalidate all
10494 			 * parameters related to media, such as geometry,
10495 			 * blocksize, and blockcount.
10496 			 */
10497 			if (un->un_f_has_removable_media) {
10498 				sr_ejected(un);
10499 			}
10500 
10501 			/*
10502 			 * Destroy the cache (if it exists) which was
10503 			 * allocated for the write maps since this is
10504 			 * the last close for this media.
10505 			 */
10506 			if (un->un_wm_cache) {
10507 				/*
10508 				 * Check if there are pending commands.
10509 				 * and if there are give a warning and
10510 				 * do not destroy the cache.
10511 				 */
10512 				if (un->un_ncmds_in_driver > 0) {
10513 					scsi_log(SD_DEVINFO(un),
10514 					    sd_label, CE_WARN,
10515 					    "Unable to clean up memory "
10516 					    "because of pending I/O\n");
10517 				} else {
10518 					kmem_cache_destroy(
10519 					    un->un_wm_cache);
10520 					un->un_wm_cache = NULL;
10521 				}
10522 			}
10523 		}
10524 	}
10525 
10526 	mutex_exit(SD_MUTEX(un));
10527 	sema_v(&un->un_semoclose);
10528 
10529 	if (otyp == OTYP_LYR) {
10530 		mutex_enter(&sd_detach_mutex);
10531 		/*
10532 		 * The detach routine may run when the layer count
10533 		 * drops to zero.
10534 		 */
10535 		un->un_layer_count--;
10536 		mutex_exit(&sd_detach_mutex);
10537 	}
10538 
10539 	return (rval);
10540 }
10541 
10542 
10543 /*
10544  *    Function: sd_ready_and_valid
10545  *
10546  * Description: Test if device is ready and has a valid geometry.
10547  *
10548  *   Arguments: ssc - sd_ssc_t will contain un
10549  *		un  - driver soft state (unit) structure
10550  *
10551  * Return Code: SD_READY_VALID		ready and valid label
10552  *		SD_NOT_READY_VALID	not ready, no label
10553  *		SD_RESERVED_BY_OTHERS	reservation conflict
10554  *
10555  *     Context: Never called at interrupt context.
10556  */
10557 
10558 static int
10559 sd_ready_and_valid(sd_ssc_t *ssc, int part)
10560 {
10561 	struct sd_errstats	*stp;
10562 	uint64_t		capacity;
10563 	uint_t			lbasize;
10564 	int			rval = SD_READY_VALID;
10565 	char			name_str[48];
10566 	boolean_t		is_valid;
10567 	struct sd_lun		*un;
10568 	int			status;
10569 
10570 	ASSERT(ssc != NULL);
10571 	un = ssc->ssc_un;
10572 	ASSERT(un != NULL);
10573 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10574 
10575 	mutex_enter(SD_MUTEX(un));
10576 	/*
10577 	 * If a device has removable media, we must check if media is
10578 	 * ready when checking if this device is ready and valid.
10579 	 */
10580 	if (un->un_f_has_removable_media) {
10581 		mutex_exit(SD_MUTEX(un));
10582 		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10583 
10584 		if (status != 0) {
10585 			rval = SD_NOT_READY_VALID;
10586 			mutex_enter(SD_MUTEX(un));
10587 
10588 			/* Ignore all failed status for removalbe media */
10589 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10590 
10591 			goto done;
10592 		}
10593 
10594 		is_valid = SD_IS_VALID_LABEL(un);
10595 		mutex_enter(SD_MUTEX(un));
10596 		if (!is_valid ||
10597 		    (un->un_f_blockcount_is_valid == FALSE) ||
10598 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10599 
10600 			/* capacity has to be read every open. */
10601 			mutex_exit(SD_MUTEX(un));
10602 			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
10603 			    &lbasize, SD_PATH_DIRECT);
10604 
10605 			if (status != 0) {
10606 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10607 
10608 				cmlb_invalidate(un->un_cmlbhandle,
10609 				    (void *)SD_PATH_DIRECT);
10610 				mutex_enter(SD_MUTEX(un));
10611 				rval = SD_NOT_READY_VALID;
10612 
10613 				goto done;
10614 			} else {
10615 				mutex_enter(SD_MUTEX(un));
10616 				sd_update_block_info(un, lbasize, capacity);
10617 			}
10618 		}
10619 
10620 		/*
10621 		 * Check if the media in the device is writable or not.
10622 		 */
10623 		if (!is_valid && ISCD(un)) {
10624 			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
10625 		}
10626 
10627 	} else {
10628 		/*
10629 		 * Do a test unit ready to clear any unit attention from non-cd
10630 		 * devices.
10631 		 */
10632 		mutex_exit(SD_MUTEX(un));
10633 
10634 		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10635 		if (status != 0) {
10636 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10637 		}
10638 
10639 		mutex_enter(SD_MUTEX(un));
10640 	}
10641 
10642 
10643 	/*
10644 	 * If this is a non 512 block device, allocate space for
10645 	 * the wmap cache. This is being done here since every time
10646 	 * a media is changed this routine will be called and the
10647 	 * block size is a function of media rather than device.
10648 	 */
10649 	if ((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR ||
10650 	    un->un_f_non_devbsize_supported) &&
10651 	    un->un_tgt_blocksize != DEV_BSIZE) {
10652 		if (!(un->un_wm_cache)) {
10653 			(void) snprintf(name_str, sizeof (name_str),
10654 			    "%s%d_cache",
10655 			    ddi_driver_name(SD_DEVINFO(un)),
10656 			    ddi_get_instance(SD_DEVINFO(un)));
10657 			un->un_wm_cache = kmem_cache_create(
10658 			    name_str, sizeof (struct sd_w_map),
10659 			    8, sd_wm_cache_constructor,
10660 			    sd_wm_cache_destructor, NULL,
10661 			    (void *)un, NULL, 0);
10662 			if (!(un->un_wm_cache)) {
10663 				rval = ENOMEM;
10664 				goto done;
10665 			}
10666 		}
10667 	}
10668 
10669 	if (un->un_state == SD_STATE_NORMAL) {
10670 		/*
10671 		 * If the target is not yet ready here (defined by a TUR
10672 		 * failure), invalidate the geometry and print an 'offline'
10673 		 * message. This is a legacy message, as the state of the
10674 		 * target is not actually changed to SD_STATE_OFFLINE.
10675 		 *
10676 		 * If the TUR fails for EACCES (Reservation Conflict),
10677 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
10678 		 * reservation conflict. If the TUR fails for other
10679 		 * reasons, SD_NOT_READY_VALID will be returned.
10680 		 */
10681 		int err;
10682 
10683 		mutex_exit(SD_MUTEX(un));
10684 		err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10685 		mutex_enter(SD_MUTEX(un));
10686 
10687 		if (err != 0) {
10688 			mutex_exit(SD_MUTEX(un));
10689 			cmlb_invalidate(un->un_cmlbhandle,
10690 			    (void *)SD_PATH_DIRECT);
10691 			mutex_enter(SD_MUTEX(un));
10692 			if (err == EACCES) {
10693 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10694 				    "reservation conflict\n");
10695 				rval = SD_RESERVED_BY_OTHERS;
10696 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10697 			} else {
10698 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10699 				    "drive offline\n");
10700 				rval = SD_NOT_READY_VALID;
10701 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
10702 			}
10703 			goto done;
10704 		}
10705 	}
10706 
10707 	if (un->un_f_format_in_progress == FALSE) {
10708 		mutex_exit(SD_MUTEX(un));
10709 
10710 		(void) cmlb_validate(un->un_cmlbhandle, 0,
10711 		    (void *)SD_PATH_DIRECT);
10712 		if (cmlb_partinfo(un->un_cmlbhandle, part, NULL, NULL, NULL,
10713 		    NULL, (void *) SD_PATH_DIRECT) != 0) {
10714 			rval = SD_NOT_READY_VALID;
10715 			mutex_enter(SD_MUTEX(un));
10716 
10717 			goto done;
10718 		}
10719 		if (un->un_f_pkstats_enabled) {
10720 			sd_set_pstats(un);
10721 			SD_TRACE(SD_LOG_IO_PARTITION, un,
10722 			    "sd_ready_and_valid: un:0x%p pstats created and "
10723 			    "set\n", un);
10724 		}
10725 		mutex_enter(SD_MUTEX(un));
10726 	}
10727 
10728 	/*
10729 	 * If this device supports DOOR_LOCK command, try and send
10730 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10731 	 * if it fails. For a CD, however, it is an error
10732 	 */
10733 	if (un->un_f_doorlock_supported) {
10734 		mutex_exit(SD_MUTEX(un));
10735 		status = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
10736 		    SD_PATH_DIRECT);
10737 
10738 		if ((status != 0) && ISCD(un)) {
10739 			rval = SD_NOT_READY_VALID;
10740 			mutex_enter(SD_MUTEX(un));
10741 
10742 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10743 
10744 			goto done;
10745 		} else if (status != 0)
10746 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10747 		mutex_enter(SD_MUTEX(un));
10748 	}
10749 
10750 	/* The state has changed, inform the media watch routines */
10751 	un->un_mediastate = DKIO_INSERTED;
10752 	cv_broadcast(&un->un_state_cv);
10753 	rval = SD_READY_VALID;
10754 
10755 done:
10756 
10757 	/*
10758 	 * Initialize the capacity kstat value, if no media previously
10759 	 * (capacity kstat is 0) and a media has been inserted
10760 	 * (un_blockcount > 0).
10761 	 */
10762 	if (un->un_errstats != NULL) {
10763 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10764 		if ((stp->sd_capacity.value.ui64 == 0) &&
10765 		    (un->un_f_blockcount_is_valid == TRUE)) {
10766 			stp->sd_capacity.value.ui64 =
10767 			    (uint64_t)((uint64_t)un->un_blockcount *
10768 			    un->un_sys_blocksize);
10769 		}
10770 	}
10771 
10772 	mutex_exit(SD_MUTEX(un));
10773 	return (rval);
10774 }
10775 
10776 
10777 /*
10778  *    Function: sdmin
10779  *
10780  * Description: Routine to limit the size of a data transfer. Used in
10781  *		conjunction with physio(9F).
10782  *
10783  *   Arguments: bp - pointer to the indicated buf(9S) struct.
10784  *
10785  *     Context: Kernel thread context.
10786  */
10787 
10788 static void
10789 sdmin(struct buf *bp)
10790 {
10791 	struct sd_lun	*un;
10792 	int		instance;
10793 
10794 	instance = SDUNIT(bp->b_edev);
10795 
10796 	un = ddi_get_soft_state(sd_state, instance);
10797 	ASSERT(un != NULL);
10798 
10799 	/*
10800 	 * We depend on buf breakup to restrict
10801 	 * IO size if it is enabled.
10802 	 */
10803 	if (un->un_buf_breakup_supported) {
10804 		return;
10805 	}
10806 
10807 	if (bp->b_bcount > un->un_max_xfer_size) {
10808 		bp->b_bcount = un->un_max_xfer_size;
10809 	}
10810 }
10811 
10812 
10813 /*
10814  *    Function: sdread
10815  *
10816  * Description: Driver's read(9e) entry point function.
10817  *
10818  *   Arguments: dev   - device number
10819  *		uio   - structure pointer describing where data is to be stored
10820  *			in user's space
10821  *		cred_p  - user credential pointer
10822  *
10823  * Return Code: ENXIO
10824  *		EIO
10825  *		EINVAL
10826  *		value returned by physio
10827  *
10828  *     Context: Kernel thread context.
10829  */
10830 /* ARGSUSED */
10831 static int
10832 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10833 {
10834 	struct sd_lun	*un = NULL;
10835 	int		secmask;
10836 	int		err = 0;
10837 	sd_ssc_t	*ssc;
10838 
10839 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10840 		return (ENXIO);
10841 	}
10842 
10843 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10844 
10845 
10846 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10847 		mutex_enter(SD_MUTEX(un));
10848 		/*
10849 		 * Because the call to sd_ready_and_valid will issue I/O we
10850 		 * must wait here if either the device is suspended or
10851 		 * if it's power level is changing.
10852 		 */
10853 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10854 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10855 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10856 		}
10857 		un->un_ncmds_in_driver++;
10858 		mutex_exit(SD_MUTEX(un));
10859 
10860 		/* Initialize sd_ssc_t for internal uscsi commands */
10861 		ssc = sd_ssc_init(un);
10862 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10863 			err = EIO;
10864 		} else {
10865 			err = 0;
10866 		}
10867 		sd_ssc_fini(ssc);
10868 
10869 		mutex_enter(SD_MUTEX(un));
10870 		un->un_ncmds_in_driver--;
10871 		ASSERT(un->un_ncmds_in_driver >= 0);
10872 		mutex_exit(SD_MUTEX(un));
10873 		if (err != 0)
10874 			return (err);
10875 	}
10876 
10877 	/*
10878 	 * Read requests are restricted to multiples of the system block size.
10879 	 */
10880 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR)
10881 		secmask = un->un_tgt_blocksize - 1;
10882 	else
10883 		secmask = DEV_BSIZE - 1;
10884 
10885 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10886 		SD_ERROR(SD_LOG_READ_WRITE, un,
10887 		    "sdread: file offset not modulo %d\n",
10888 		    secmask + 1);
10889 		err = EINVAL;
10890 	} else if (uio->uio_iov->iov_len & (secmask)) {
10891 		SD_ERROR(SD_LOG_READ_WRITE, un,
10892 		    "sdread: transfer length not modulo %d\n",
10893 		    secmask + 1);
10894 		err = EINVAL;
10895 	} else {
10896 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
10897 	}
10898 
10899 	return (err);
10900 }
10901 
10902 
10903 /*
10904  *    Function: sdwrite
10905  *
10906  * Description: Driver's write(9e) entry point function.
10907  *
10908  *   Arguments: dev   - device number
10909  *		uio   - structure pointer describing where data is stored in
10910  *			user's space
10911  *		cred_p  - user credential pointer
10912  *
10913  * Return Code: ENXIO
10914  *		EIO
10915  *		EINVAL
10916  *		value returned by physio
10917  *
10918  *     Context: Kernel thread context.
10919  */
10920 /* ARGSUSED */
10921 static int
10922 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
10923 {
10924 	struct sd_lun	*un = NULL;
10925 	int		secmask;
10926 	int		err = 0;
10927 	sd_ssc_t	*ssc;
10928 
10929 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10930 		return (ENXIO);
10931 	}
10932 
10933 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10934 
10935 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
10936 		mutex_enter(SD_MUTEX(un));
10937 		/*
10938 		 * Because the call to sd_ready_and_valid will issue I/O we
10939 		 * must wait here if either the device is suspended or
10940 		 * if it's power level is changing.
10941 		 */
10942 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10943 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10944 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10945 		}
10946 		un->un_ncmds_in_driver++;
10947 		mutex_exit(SD_MUTEX(un));
10948 
10949 		/* Initialize sd_ssc_t for internal uscsi commands */
10950 		ssc = sd_ssc_init(un);
10951 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
10952 			err = EIO;
10953 		} else {
10954 			err = 0;
10955 		}
10956 		sd_ssc_fini(ssc);
10957 
10958 		mutex_enter(SD_MUTEX(un));
10959 		un->un_ncmds_in_driver--;
10960 		ASSERT(un->un_ncmds_in_driver >= 0);
10961 		mutex_exit(SD_MUTEX(un));
10962 		if (err != 0)
10963 			return (err);
10964 	}
10965 
10966 	/*
10967 	 * Write requests are restricted to multiples of the system block size.
10968 	 */
10969 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR)
10970 		secmask = un->un_tgt_blocksize - 1;
10971 	else
10972 		secmask = DEV_BSIZE - 1;
10973 
10974 	if (uio->uio_loffset & ((offset_t)(secmask))) {
10975 		SD_ERROR(SD_LOG_READ_WRITE, un,
10976 		    "sdwrite: file offset not modulo %d\n",
10977 		    secmask + 1);
10978 		err = EINVAL;
10979 	} else if (uio->uio_iov->iov_len & (secmask)) {
10980 		SD_ERROR(SD_LOG_READ_WRITE, un,
10981 		    "sdwrite: transfer length not modulo %d\n",
10982 		    secmask + 1);
10983 		err = EINVAL;
10984 	} else {
10985 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
10986 	}
10987 
10988 	return (err);
10989 }
10990 
10991 
10992 /*
10993  *    Function: sdaread
10994  *
10995  * Description: Driver's aread(9e) entry point function.
10996  *
10997  *   Arguments: dev   - device number
10998  *		aio   - structure pointer describing where data is to be stored
10999  *		cred_p  - user credential pointer
11000  *
11001  * Return Code: ENXIO
11002  *		EIO
11003  *		EINVAL
11004  *		value returned by aphysio
11005  *
11006  *     Context: Kernel thread context.
11007  */
11008 /* ARGSUSED */
11009 static int
11010 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11011 {
11012 	struct sd_lun	*un = NULL;
11013 	struct uio	*uio = aio->aio_uio;
11014 	int		secmask;
11015 	int		err = 0;
11016 	sd_ssc_t	*ssc;
11017 
11018 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11019 		return (ENXIO);
11020 	}
11021 
11022 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11023 
11024 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
11025 		mutex_enter(SD_MUTEX(un));
11026 		/*
11027 		 * Because the call to sd_ready_and_valid will issue I/O we
11028 		 * must wait here if either the device is suspended or
11029 		 * if it's power level is changing.
11030 		 */
11031 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11032 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11033 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11034 		}
11035 		un->un_ncmds_in_driver++;
11036 		mutex_exit(SD_MUTEX(un));
11037 
11038 		/* Initialize sd_ssc_t for internal uscsi commands */
11039 		ssc = sd_ssc_init(un);
11040 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
11041 			err = EIO;
11042 		} else {
11043 			err = 0;
11044 		}
11045 		sd_ssc_fini(ssc);
11046 
11047 		mutex_enter(SD_MUTEX(un));
11048 		un->un_ncmds_in_driver--;
11049 		ASSERT(un->un_ncmds_in_driver >= 0);
11050 		mutex_exit(SD_MUTEX(un));
11051 		if (err != 0)
11052 			return (err);
11053 	}
11054 
11055 	/*
11056 	 * Read requests are restricted to multiples of the system block size.
11057 	 */
11058 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR)
11059 		secmask = un->un_tgt_blocksize - 1;
11060 	else
11061 		secmask = DEV_BSIZE - 1;
11062 
11063 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11064 		SD_ERROR(SD_LOG_READ_WRITE, un,
11065 		    "sdaread: file offset not modulo %d\n",
11066 		    secmask + 1);
11067 		err = EINVAL;
11068 	} else if (uio->uio_iov->iov_len & (secmask)) {
11069 		SD_ERROR(SD_LOG_READ_WRITE, un,
11070 		    "sdaread: transfer length not modulo %d\n",
11071 		    secmask + 1);
11072 		err = EINVAL;
11073 	} else {
11074 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11075 	}
11076 
11077 	return (err);
11078 }
11079 
11080 
11081 /*
11082  *    Function: sdawrite
11083  *
11084  * Description: Driver's awrite(9e) entry point function.
11085  *
11086  *   Arguments: dev   - device number
11087  *		aio   - structure pointer describing where data is stored
11088  *		cred_p  - user credential pointer
11089  *
11090  * Return Code: ENXIO
11091  *		EIO
11092  *		EINVAL
11093  *		value returned by aphysio
11094  *
11095  *     Context: Kernel thread context.
11096  */
11097 /* ARGSUSED */
11098 static int
11099 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11100 {
11101 	struct sd_lun	*un = NULL;
11102 	struct uio	*uio = aio->aio_uio;
11103 	int		secmask;
11104 	int		err = 0;
11105 	sd_ssc_t	*ssc;
11106 
11107 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11108 		return (ENXIO);
11109 	}
11110 
11111 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11112 
11113 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
11114 		mutex_enter(SD_MUTEX(un));
11115 		/*
11116 		 * Because the call to sd_ready_and_valid will issue I/O we
11117 		 * must wait here if either the device is suspended or
11118 		 * if it's power level is changing.
11119 		 */
11120 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11121 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11122 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11123 		}
11124 		un->un_ncmds_in_driver++;
11125 		mutex_exit(SD_MUTEX(un));
11126 
11127 		/* Initialize sd_ssc_t for internal uscsi commands */
11128 		ssc = sd_ssc_init(un);
11129 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
11130 			err = EIO;
11131 		} else {
11132 			err = 0;
11133 		}
11134 		sd_ssc_fini(ssc);
11135 
11136 		mutex_enter(SD_MUTEX(un));
11137 		un->un_ncmds_in_driver--;
11138 		ASSERT(un->un_ncmds_in_driver >= 0);
11139 		mutex_exit(SD_MUTEX(un));
11140 		if (err != 0)
11141 			return (err);
11142 	}
11143 
11144 	/*
11145 	 * Write requests are restricted to multiples of the system block size.
11146 	 */
11147 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR)
11148 		secmask = un->un_tgt_blocksize - 1;
11149 	else
11150 		secmask = DEV_BSIZE - 1;
11151 
11152 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11153 		SD_ERROR(SD_LOG_READ_WRITE, un,
11154 		    "sdawrite: file offset not modulo %d\n",
11155 		    secmask + 1);
11156 		err = EINVAL;
11157 	} else if (uio->uio_iov->iov_len & (secmask)) {
11158 		SD_ERROR(SD_LOG_READ_WRITE, un,
11159 		    "sdawrite: transfer length not modulo %d\n",
11160 		    secmask + 1);
11161 		err = EINVAL;
11162 	} else {
11163 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11164 	}
11165 
11166 	return (err);
11167 }
11168 
11169 
11170 
11171 
11172 
11173 /*
11174  * Driver IO processing follows the following sequence:
11175  *
11176  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11177  *         |                |                     ^
11178  *         v                v                     |
11179  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11180  *         |                |                     |                   |
11181  *         v                |                     |                   |
11182  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11183  *         |                |                     ^                   ^
11184  *         v                v                     |                   |
11185  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11186  *         |                |                     |                   |
11187  *     +---+                |                     +------------+      +-------+
11188  *     |                    |                                  |              |
11189  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11190  *     |                    v                                  |              |
11191  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11192  *     |                    |                                  ^              |
11193  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11194  *     |                    v                                  |              |
11195  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11196  *     |                    |                                  ^              |
11197  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11198  *     |                    v                                  |              |
11199  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11200  *     |                    |                                  ^              |
11201  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11202  *     |                    v                                  |              |
11203  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11204  *     |                    |                                  ^              |
11205  *     |                    |                                  |              |
11206  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11207  *                          |                           ^
11208  *                          v                           |
11209  *                   sd_core_iostart()                  |
11210  *                          |                           |
11211  *                          |                           +------>(*destroypkt)()
11212  *                          +-> sd_start_cmds() <-+     |           |
11213  *                          |                     |     |           v
11214  *                          |                     |     |  scsi_destroy_pkt(9F)
11215  *                          |                     |     |
11216  *                          +->(*initpkt)()       +- sdintr()
11217  *                          |  |                        |  |
11218  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11219  *                          |  +-> scsi_setup_cdb(9F)   |
11220  *                          |                           |
11221  *                          +--> scsi_transport(9F)     |
11222  *                                     |                |
11223  *                                     +----> SCSA ---->+
11224  *
11225  *
11226  * This code is based upon the following presumptions:
11227  *
11228  *   - iostart and iodone functions operate on buf(9S) structures. These
11229  *     functions perform the necessary operations on the buf(9S) and pass
11230  *     them along to the next function in the chain by using the macros
11231  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11232  *     (for iodone side functions).
11233  *
11234  *   - The iostart side functions may sleep. The iodone side functions
11235  *     are called under interrupt context and may NOT sleep. Therefore
11236  *     iodone side functions also may not call iostart side functions.
11237  *     (NOTE: iostart side functions should NOT sleep for memory, as
11238  *     this could result in deadlock.)
11239  *
11240  *   - An iostart side function may call its corresponding iodone side
11241  *     function directly (if necessary).
11242  *
11243  *   - In the event of an error, an iostart side function can return a buf(9S)
11244  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11245  *     b_error in the usual way of course).
11246  *
11247  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11248  *     requests to the iostart side functions.  The iostart side functions in
11249  *     this case would be called under the context of a taskq thread, so it's
11250  *     OK for them to block/sleep/spin in this case.
11251  *
11252  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11253  *     pass them along to the next function in the chain.  The corresponding
11254  *     iodone side functions must coalesce the "shadow" bufs and return
11255  *     the "original" buf to the next higher layer.
11256  *
11257  *   - The b_private field of the buf(9S) struct holds a pointer to
11258  *     an sd_xbuf struct, which contains information needed to
11259  *     construct the scsi_pkt for the command.
11260  *
11261  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11262  *     layer must acquire & release the SD_MUTEX(un) as needed.
11263  */
11264 
11265 
11266 /*
11267  * Create taskq for all targets in the system. This is created at
11268  * _init(9E) and destroyed at _fini(9E).
11269  *
11270  * Note: here we set the minalloc to a reasonably high number to ensure that
11271  * we will have an adequate supply of task entries available at interrupt time.
11272  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11273  * sd_create_taskq().  Since we do not want to sleep for allocations at
11274  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11275  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11276  * requests any one instant in time.
11277  */
11278 #define	SD_TASKQ_NUMTHREADS	8
11279 #define	SD_TASKQ_MINALLOC	256
11280 #define	SD_TASKQ_MAXALLOC	256
11281 
11282 static taskq_t	*sd_tq = NULL;
11283 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11284 
11285 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11286 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11287 
11288 /*
11289  * The following task queue is being created for the write part of
11290  * read-modify-write of non-512 block size devices.
11291  * Limit the number of threads to 1 for now. This number has been chosen
11292  * considering the fact that it applies only to dvd ram drives/MO drives
11293  * currently. Performance for which is not main criteria at this stage.
11294  * Note: It needs to be explored if we can use a single taskq in future
11295  */
11296 #define	SD_WMR_TASKQ_NUMTHREADS	1
11297 static taskq_t	*sd_wmr_tq = NULL;
11298 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11299 
11300 /*
11301  *    Function: sd_taskq_create
11302  *
11303  * Description: Create taskq thread(s) and preallocate task entries
11304  *
11305  * Return Code: Returns a pointer to the allocated taskq_t.
11306  *
11307  *     Context: Can sleep. Requires blockable context.
11308  *
11309  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11310  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11311  *		- taskq_create() will block for memory, also it will panic
11312  *		  if it cannot create the requested number of threads.
11313  *		- Currently taskq_create() creates threads that cannot be
11314  *		  swapped.
11315  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11316  *		  supply of taskq entries at interrupt time (ie, so that we
11317  *		  do not have to sleep for memory)
11318  */
11319 
11320 static void
11321 sd_taskq_create(void)
11322 {
11323 	char	taskq_name[TASKQ_NAMELEN];
11324 
11325 	ASSERT(sd_tq == NULL);
11326 	ASSERT(sd_wmr_tq == NULL);
11327 
11328 	(void) snprintf(taskq_name, sizeof (taskq_name),
11329 	    "%s_drv_taskq", sd_label);
11330 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11331 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11332 	    TASKQ_PREPOPULATE));
11333 
11334 	(void) snprintf(taskq_name, sizeof (taskq_name),
11335 	    "%s_rmw_taskq", sd_label);
11336 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11337 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11338 	    TASKQ_PREPOPULATE));
11339 }
11340 
11341 
11342 /*
11343  *    Function: sd_taskq_delete
11344  *
11345  * Description: Complementary cleanup routine for sd_taskq_create().
11346  *
11347  *     Context: Kernel thread context.
11348  */
11349 
11350 static void
11351 sd_taskq_delete(void)
11352 {
11353 	ASSERT(sd_tq != NULL);
11354 	ASSERT(sd_wmr_tq != NULL);
11355 	taskq_destroy(sd_tq);
11356 	taskq_destroy(sd_wmr_tq);
11357 	sd_tq = NULL;
11358 	sd_wmr_tq = NULL;
11359 }
11360 
11361 
11362 /*
11363  *    Function: sdstrategy
11364  *
11365  * Description: Driver's strategy (9E) entry point function.
11366  *
11367  *   Arguments: bp - pointer to buf(9S)
11368  *
11369  * Return Code: Always returns zero
11370  *
11371  *     Context: Kernel thread context.
11372  */
11373 
11374 static int
11375 sdstrategy(struct buf *bp)
11376 {
11377 	struct sd_lun *un;
11378 
11379 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11380 	if (un == NULL) {
11381 		bioerror(bp, EIO);
11382 		bp->b_resid = bp->b_bcount;
11383 		biodone(bp);
11384 		return (0);
11385 	}
11386 
11387 	/* As was done in the past, fail new cmds. if state is dumping. */
11388 	if (un->un_state == SD_STATE_DUMPING) {
11389 		bioerror(bp, ENXIO);
11390 		bp->b_resid = bp->b_bcount;
11391 		biodone(bp);
11392 		return (0);
11393 	}
11394 
11395 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11396 
11397 	/*
11398 	 * Commands may sneak in while we released the mutex in
11399 	 * DDI_SUSPEND, we should block new commands. However, old
11400 	 * commands that are still in the driver at this point should
11401 	 * still be allowed to drain.
11402 	 */
11403 	mutex_enter(SD_MUTEX(un));
11404 	/*
11405 	 * Must wait here if either the device is suspended or
11406 	 * if it's power level is changing.
11407 	 */
11408 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11409 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11410 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11411 	}
11412 
11413 	un->un_ncmds_in_driver++;
11414 
11415 	/*
11416 	 * atapi: Since we are running the CD for now in PIO mode we need to
11417 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11418 	 * the HBA's init_pkt routine.
11419 	 */
11420 	if (un->un_f_cfg_is_atapi == TRUE) {
11421 		mutex_exit(SD_MUTEX(un));
11422 		bp_mapin(bp);
11423 		mutex_enter(SD_MUTEX(un));
11424 	}
11425 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11426 	    un->un_ncmds_in_driver);
11427 
11428 	if (bp->b_flags & B_WRITE)
11429 		un->un_f_sync_cache_required = TRUE;
11430 
11431 	mutex_exit(SD_MUTEX(un));
11432 
11433 	/*
11434 	 * This will (eventually) allocate the sd_xbuf area and
11435 	 * call sd_xbuf_strategy().  We just want to return the
11436 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11437 	 * imized tail call which saves us a stack frame.
11438 	 */
11439 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11440 }
11441 
11442 
11443 /*
11444  *    Function: sd_xbuf_strategy
11445  *
11446  * Description: Function for initiating IO operations via the
11447  *		ddi_xbuf_qstrategy() mechanism.
11448  *
11449  *     Context: Kernel thread context.
11450  */
11451 
11452 static void
11453 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11454 {
11455 	struct sd_lun *un = arg;
11456 
11457 	ASSERT(bp != NULL);
11458 	ASSERT(xp != NULL);
11459 	ASSERT(un != NULL);
11460 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11461 
11462 	/*
11463 	 * Initialize the fields in the xbuf and save a pointer to the
11464 	 * xbuf in bp->b_private.
11465 	 */
11466 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11467 
11468 	/* Send the buf down the iostart chain */
11469 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11470 }
11471 
11472 
11473 /*
11474  *    Function: sd_xbuf_init
11475  *
11476  * Description: Prepare the given sd_xbuf struct for use.
11477  *
11478  *   Arguments: un - ptr to softstate
11479  *		bp - ptr to associated buf(9S)
11480  *		xp - ptr to associated sd_xbuf
11481  *		chain_type - IO chain type to use:
11482  *			SD_CHAIN_NULL
11483  *			SD_CHAIN_BUFIO
11484  *			SD_CHAIN_USCSI
11485  *			SD_CHAIN_DIRECT
11486  *			SD_CHAIN_DIRECT_PRIORITY
11487  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11488  *			initialization; may be NULL if none.
11489  *
11490  *     Context: Kernel thread context
11491  */
11492 
11493 static void
11494 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11495 	uchar_t chain_type, void *pktinfop)
11496 {
11497 	int index;
11498 
11499 	ASSERT(un != NULL);
11500 	ASSERT(bp != NULL);
11501 	ASSERT(xp != NULL);
11502 
11503 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11504 	    bp, chain_type);
11505 
11506 	xp->xb_un	= un;
11507 	xp->xb_pktp	= NULL;
11508 	xp->xb_pktinfo	= pktinfop;
11509 	xp->xb_private	= bp->b_private;
11510 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11511 
11512 	/*
11513 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11514 	 * upon the specified chain type to use.
11515 	 */
11516 	switch (chain_type) {
11517 	case SD_CHAIN_NULL:
11518 		/*
11519 		 * Fall thru to just use the values for the buf type, even
11520 		 * tho for the NULL chain these values will never be used.
11521 		 */
11522 		/* FALLTHRU */
11523 	case SD_CHAIN_BUFIO:
11524 		index = un->un_buf_chain_type;
11525 		if ((!un->un_f_has_removable_media) &&
11526 		    (un->un_tgt_blocksize != 0) &&
11527 		    (un->un_tgt_blocksize != DEV_BSIZE)) {
11528 			int secmask = 0, blknomask = 0;
11529 			blknomask =
11530 			    (un->un_tgt_blocksize / DEV_BSIZE) - 1;
11531 			secmask = un->un_tgt_blocksize - 1;
11532 
11533 			if ((bp->b_lblkno & (blknomask)) ||
11534 			    (bp->b_bcount & (secmask))) {
11535 				if (un->un_f_rmw_type !=
11536 				    SD_RMW_TYPE_RETURN_ERROR) {
11537 					if (un->un_f_pm_is_enabled == FALSE)
11538 						index =
11539 						    SD_CHAIN_INFO_MSS_DSK_NO_PM;
11540 					else
11541 						index =
11542 						    SD_CHAIN_INFO_MSS_DISK;
11543 				}
11544 			}
11545 		}
11546 		break;
11547 	case SD_CHAIN_USCSI:
11548 		index = un->un_uscsi_chain_type;
11549 		break;
11550 	case SD_CHAIN_DIRECT:
11551 		index = un->un_direct_chain_type;
11552 		break;
11553 	case SD_CHAIN_DIRECT_PRIORITY:
11554 		index = un->un_priority_chain_type;
11555 		break;
11556 	default:
11557 		/* We're really broken if we ever get here... */
11558 		panic("sd_xbuf_init: illegal chain type!");
11559 		/*NOTREACHED*/
11560 	}
11561 
11562 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11563 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11564 
11565 	/*
11566 	 * It might be a bit easier to simply bzero the entire xbuf above,
11567 	 * but it turns out that since we init a fair number of members anyway,
11568 	 * we save a fair number cycles by doing explicit assignment of zero.
11569 	 */
11570 	xp->xb_pkt_flags	= 0;
11571 	xp->xb_dma_resid	= 0;
11572 	xp->xb_retry_count	= 0;
11573 	xp->xb_victim_retry_count = 0;
11574 	xp->xb_ua_retry_count	= 0;
11575 	xp->xb_nr_retry_count	= 0;
11576 	xp->xb_sense_bp		= NULL;
11577 	xp->xb_sense_status	= 0;
11578 	xp->xb_sense_state	= 0;
11579 	xp->xb_sense_resid	= 0;
11580 	xp->xb_ena		= 0;
11581 
11582 	bp->b_private	= xp;
11583 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11584 	bp->b_resid	= 0;
11585 	bp->av_forw	= NULL;
11586 	bp->av_back	= NULL;
11587 	bioerror(bp, 0);
11588 
11589 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11590 }
11591 
11592 
11593 /*
11594  *    Function: sd_uscsi_strategy
11595  *
11596  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11597  *
11598  *   Arguments: bp - buf struct ptr
11599  *
11600  * Return Code: Always returns 0
11601  *
11602  *     Context: Kernel thread context
11603  */
11604 
11605 static int
11606 sd_uscsi_strategy(struct buf *bp)
11607 {
11608 	struct sd_lun		*un;
11609 	struct sd_uscsi_info	*uip;
11610 	struct sd_xbuf		*xp;
11611 	uchar_t			chain_type;
11612 	uchar_t			cmd;
11613 
11614 	ASSERT(bp != NULL);
11615 
11616 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11617 	if (un == NULL) {
11618 		bioerror(bp, EIO);
11619 		bp->b_resid = bp->b_bcount;
11620 		biodone(bp);
11621 		return (0);
11622 	}
11623 
11624 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11625 
11626 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11627 
11628 	/*
11629 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11630 	 */
11631 	ASSERT(bp->b_private != NULL);
11632 	uip = (struct sd_uscsi_info *)bp->b_private;
11633 	cmd = ((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_cdb[0];
11634 
11635 	mutex_enter(SD_MUTEX(un));
11636 	/*
11637 	 * atapi: Since we are running the CD for now in PIO mode we need to
11638 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11639 	 * the HBA's init_pkt routine.
11640 	 */
11641 	if (un->un_f_cfg_is_atapi == TRUE) {
11642 		mutex_exit(SD_MUTEX(un));
11643 		bp_mapin(bp);
11644 		mutex_enter(SD_MUTEX(un));
11645 	}
11646 	un->un_ncmds_in_driver++;
11647 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11648 	    un->un_ncmds_in_driver);
11649 
11650 	if ((bp->b_flags & B_WRITE) && (bp->b_bcount != 0) &&
11651 	    (cmd != SCMD_MODE_SELECT) && (cmd != SCMD_MODE_SELECT_G1))
11652 		un->un_f_sync_cache_required = TRUE;
11653 
11654 	mutex_exit(SD_MUTEX(un));
11655 
11656 	switch (uip->ui_flags) {
11657 	case SD_PATH_DIRECT:
11658 		chain_type = SD_CHAIN_DIRECT;
11659 		break;
11660 	case SD_PATH_DIRECT_PRIORITY:
11661 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11662 		break;
11663 	default:
11664 		chain_type = SD_CHAIN_USCSI;
11665 		break;
11666 	}
11667 
11668 	/*
11669 	 * We may allocate extra buf for external USCSI commands. If the
11670 	 * application asks for bigger than 20-byte sense data via USCSI,
11671 	 * SCSA layer will allocate 252 bytes sense buf for that command.
11672 	 */
11673 	if (((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_rqlen >
11674 	    SENSE_LENGTH) {
11675 		xp = kmem_zalloc(sizeof (struct sd_xbuf) - SENSE_LENGTH +
11676 		    MAX_SENSE_LENGTH, KM_SLEEP);
11677 	} else {
11678 		xp = kmem_zalloc(sizeof (struct sd_xbuf), KM_SLEEP);
11679 	}
11680 
11681 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11682 
11683 	/* Use the index obtained within xbuf_init */
11684 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11685 
11686 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11687 
11688 	return (0);
11689 }
11690 
11691 /*
11692  *    Function: sd_send_scsi_cmd
11693  *
11694  * Description: Runs a USCSI command for user (when called thru sdioctl),
11695  *		or for the driver
11696  *
11697  *   Arguments: dev - the dev_t for the device
11698  *		incmd - ptr to a valid uscsi_cmd struct
11699  *		flag - bit flag, indicating open settings, 32/64 bit type
11700  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11701  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11702  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11703  *			to use the USCSI "direct" chain and bypass the normal
11704  *			command waitq.
11705  *
11706  * Return Code: 0 -  successful completion of the given command
11707  *		EIO - scsi_uscsi_handle_command() failed
11708  *		ENXIO  - soft state not found for specified dev
11709  *		EINVAL
11710  *		EFAULT - copyin/copyout error
11711  *		return code of scsi_uscsi_handle_command():
11712  *			EIO
11713  *			ENXIO
11714  *			EACCES
11715  *
11716  *     Context: Waits for command to complete. Can sleep.
11717  */
11718 
11719 static int
11720 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
11721 	enum uio_seg dataspace, int path_flag)
11722 {
11723 	struct sd_lun	*un;
11724 	sd_ssc_t	*ssc;
11725 	int		rval;
11726 
11727 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11728 	if (un == NULL) {
11729 		return (ENXIO);
11730 	}
11731 
11732 	/*
11733 	 * Using sd_ssc_send to handle uscsi cmd
11734 	 */
11735 	ssc = sd_ssc_init(un);
11736 	rval = sd_ssc_send(ssc, incmd, flag, dataspace, path_flag);
11737 	sd_ssc_fini(ssc);
11738 
11739 	return (rval);
11740 }
11741 
11742 /*
11743  *    Function: sd_ssc_init
11744  *
11745  * Description: Uscsi end-user call this function to initialize necessary
11746  *              fields, such as uscsi_cmd and sd_uscsi_info struct.
11747  *
11748  *              The return value of sd_send_scsi_cmd will be treated as a
11749  *              fault in various conditions. Even it is not Zero, some
11750  *              callers may ignore the return value. That is to say, we can
11751  *              not make an accurate assessment in sdintr, since if a
11752  *              command is failed in sdintr it does not mean the caller of
11753  *              sd_send_scsi_cmd will treat it as a real failure.
11754  *
11755  *              To avoid printing too many error logs for a failed uscsi
11756  *              packet that the caller may not treat it as a failure, the
11757  *              sd will keep silent for handling all uscsi commands.
11758  *
11759  *              During detach->attach and attach-open, for some types of
11760  *              problems, the driver should be providing information about
11761  *              the problem encountered. Device use USCSI_SILENT, which
11762  *              suppresses all driver information. The result is that no
11763  *              information about the problem is available. Being
11764  *              completely silent during this time is inappropriate. The
11765  *              driver needs a more selective filter than USCSI_SILENT, so
11766  *              that information related to faults is provided.
11767  *
11768  *              To make the accurate accessment, the caller  of
11769  *              sd_send_scsi_USCSI_CMD should take the ownership and
11770  *              get necessary information to print error messages.
11771  *
11772  *              If we want to print necessary info of uscsi command, we need to
11773  *              keep the uscsi_cmd and sd_uscsi_info till we can make the
11774  *              assessment. We use sd_ssc_init to alloc necessary
11775  *              structs for sending an uscsi command and we are also
11776  *              responsible for free the memory by calling
11777  *              sd_ssc_fini.
11778  *
11779  *              The calling secquences will look like:
11780  *              sd_ssc_init->
11781  *
11782  *                  ...
11783  *
11784  *                  sd_send_scsi_USCSI_CMD->
11785  *                      sd_ssc_send-> - - - sdintr
11786  *                  ...
11787  *
11788  *                  if we think the return value should be treated as a
11789  *                  failure, we make the accessment here and print out
11790  *                  necessary by retrieving uscsi_cmd and sd_uscsi_info'
11791  *
11792  *                  ...
11793  *
11794  *              sd_ssc_fini
11795  *
11796  *
11797  *   Arguments: un - pointer to driver soft state (unit) structure for this
11798  *                   target.
11799  *
11800  * Return code: sd_ssc_t - pointer to allocated sd_ssc_t struct, it contains
11801  *                         uscsi_cmd and sd_uscsi_info.
11802  *                  NULL - if can not alloc memory for sd_ssc_t struct
11803  *
11804  *     Context: Kernel Thread.
11805  */
11806 static sd_ssc_t *
11807 sd_ssc_init(struct sd_lun *un)
11808 {
11809 	sd_ssc_t		*ssc;
11810 	struct uscsi_cmd	*ucmdp;
11811 	struct sd_uscsi_info	*uip;
11812 
11813 	ASSERT(un != NULL);
11814 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11815 
11816 	/*
11817 	 * Allocate sd_ssc_t structure
11818 	 */
11819 	ssc = kmem_zalloc(sizeof (sd_ssc_t), KM_SLEEP);
11820 
11821 	/*
11822 	 * Allocate uscsi_cmd by calling scsi_uscsi_alloc common routine
11823 	 */
11824 	ucmdp = scsi_uscsi_alloc();
11825 
11826 	/*
11827 	 * Allocate sd_uscsi_info structure
11828 	 */
11829 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
11830 
11831 	ssc->ssc_uscsi_cmd = ucmdp;
11832 	ssc->ssc_uscsi_info = uip;
11833 	ssc->ssc_un = un;
11834 
11835 	return (ssc);
11836 }
11837 
11838 /*
11839  * Function: sd_ssc_fini
11840  *
11841  * Description: To free sd_ssc_t and it's hanging off
11842  *
11843  * Arguments: ssc - struct pointer of sd_ssc_t.
11844  */
11845 static void
11846 sd_ssc_fini(sd_ssc_t *ssc)
11847 {
11848 	scsi_uscsi_free(ssc->ssc_uscsi_cmd);
11849 
11850 	if (ssc->ssc_uscsi_info != NULL) {
11851 		kmem_free(ssc->ssc_uscsi_info, sizeof (struct sd_uscsi_info));
11852 		ssc->ssc_uscsi_info = NULL;
11853 	}
11854 
11855 	kmem_free(ssc, sizeof (sd_ssc_t));
11856 	ssc = NULL;
11857 }
11858 
11859 /*
11860  * Function: sd_ssc_send
11861  *
11862  * Description: Runs a USCSI command for user when called through sdioctl,
11863  *              or for the driver.
11864  *
11865  *   Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
11866  *                    sd_uscsi_info in.
11867  *		incmd - ptr to a valid uscsi_cmd struct
11868  *		flag - bit flag, indicating open settings, 32/64 bit type
11869  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11870  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11871  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11872  *			to use the USCSI "direct" chain and bypass the normal
11873  *			command waitq.
11874  *
11875  * Return Code: 0 -  successful completion of the given command
11876  *		EIO - scsi_uscsi_handle_command() failed
11877  *		ENXIO  - soft state not found for specified dev
11878  *		ECANCELED - command cancelled due to low power
11879  *		EINVAL
11880  *		EFAULT - copyin/copyout error
11881  *		return code of scsi_uscsi_handle_command():
11882  *			EIO
11883  *			ENXIO
11884  *			EACCES
11885  *
11886  *     Context: Kernel Thread;
11887  *              Waits for command to complete. Can sleep.
11888  */
11889 static int
11890 sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd, int flag,
11891 	enum uio_seg dataspace, int path_flag)
11892 {
11893 	struct sd_uscsi_info	*uip;
11894 	struct uscsi_cmd	*uscmd;
11895 	struct sd_lun		*un;
11896 	dev_t			dev;
11897 
11898 	int	format = 0;
11899 	int	rval;
11900 
11901 	ASSERT(ssc != NULL);
11902 	un = ssc->ssc_un;
11903 	ASSERT(un != NULL);
11904 	uscmd = ssc->ssc_uscsi_cmd;
11905 	ASSERT(uscmd != NULL);
11906 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11907 	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
11908 		/*
11909 		 * If enter here, it indicates that the previous uscsi
11910 		 * command has not been processed by sd_ssc_assessment.
11911 		 * This is violating our rules of FMA telemetry processing.
11912 		 * We should print out this message and the last undisposed
11913 		 * uscsi command.
11914 		 */
11915 		if (uscmd->uscsi_cdb != NULL) {
11916 			SD_INFO(SD_LOG_SDTEST, un,
11917 			    "sd_ssc_send is missing the alternative "
11918 			    "sd_ssc_assessment when running command 0x%x.\n",
11919 			    uscmd->uscsi_cdb[0]);
11920 		}
11921 		/*
11922 		 * Set the ssc_flags to SSC_FLAGS_UNKNOWN, which should be
11923 		 * the initial status.
11924 		 */
11925 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
11926 	}
11927 
11928 	/*
11929 	 * We need to make sure sd_ssc_send will have sd_ssc_assessment
11930 	 * followed to avoid missing FMA telemetries.
11931 	 */
11932 	ssc->ssc_flags |= SSC_FLAGS_NEED_ASSESSMENT;
11933 
11934 	/*
11935 	 * if USCSI_PMFAILFAST is set and un is in low power, fail the
11936 	 * command immediately.
11937 	 */
11938 	mutex_enter(SD_MUTEX(un));
11939 	mutex_enter(&un->un_pm_mutex);
11940 	if ((uscmd->uscsi_flags & USCSI_PMFAILFAST) &&
11941 	    SD_DEVICE_IS_IN_LOW_POWER(un)) {
11942 		SD_TRACE(SD_LOG_IO, un, "sd_ssc_send:"
11943 		    "un:0x%p is in low power\n", un);
11944 		mutex_exit(&un->un_pm_mutex);
11945 		mutex_exit(SD_MUTEX(un));
11946 		return (ECANCELED);
11947 	}
11948 	mutex_exit(&un->un_pm_mutex);
11949 	mutex_exit(SD_MUTEX(un));
11950 
11951 #ifdef SDDEBUG
11952 	switch (dataspace) {
11953 	case UIO_USERSPACE:
11954 		SD_TRACE(SD_LOG_IO, un,
11955 		    "sd_ssc_send: entry: un:0x%p UIO_USERSPACE\n", un);
11956 		break;
11957 	case UIO_SYSSPACE:
11958 		SD_TRACE(SD_LOG_IO, un,
11959 		    "sd_ssc_send: entry: un:0x%p UIO_SYSSPACE\n", un);
11960 		break;
11961 	default:
11962 		SD_TRACE(SD_LOG_IO, un,
11963 		    "sd_ssc_send: entry: un:0x%p UNEXPECTED SPACE\n", un);
11964 		break;
11965 	}
11966 #endif
11967 
11968 	rval = scsi_uscsi_copyin((intptr_t)incmd, flag,
11969 	    SD_ADDRESS(un), &uscmd);
11970 	if (rval != 0) {
11971 		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
11972 		    "scsi_uscsi_alloc_and_copyin failed\n", un);
11973 		return (rval);
11974 	}
11975 
11976 	if ((uscmd->uscsi_cdb != NULL) &&
11977 	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
11978 		mutex_enter(SD_MUTEX(un));
11979 		un->un_f_format_in_progress = TRUE;
11980 		mutex_exit(SD_MUTEX(un));
11981 		format = 1;
11982 	}
11983 
11984 	/*
11985 	 * Allocate an sd_uscsi_info struct and fill it with the info
11986 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
11987 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
11988 	 * since we allocate the buf here in this function, we do not
11989 	 * need to preserve the prior contents of b_private.
11990 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
11991 	 */
11992 	uip = ssc->ssc_uscsi_info;
11993 	uip->ui_flags = path_flag;
11994 	uip->ui_cmdp = uscmd;
11995 
11996 	/*
11997 	 * Commands sent with priority are intended for error recovery
11998 	 * situations, and do not have retries performed.
11999 	 */
12000 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
12001 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
12002 	}
12003 	uscmd->uscsi_flags &= ~USCSI_NOINTR;
12004 
12005 	dev = SD_GET_DEV(un);
12006 	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
12007 	    sd_uscsi_strategy, NULL, uip);
12008 
12009 	/*
12010 	 * mark ssc_flags right after handle_cmd to make sure
12011 	 * the uscsi has been sent
12012 	 */
12013 	ssc->ssc_flags |= SSC_FLAGS_CMD_ISSUED;
12014 
12015 #ifdef SDDEBUG
12016 	SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
12017 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
12018 	    uscmd->uscsi_status, uscmd->uscsi_resid);
12019 	if (uscmd->uscsi_bufaddr != NULL) {
12020 		SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
12021 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
12022 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
12023 		if (dataspace == UIO_SYSSPACE) {
12024 			SD_DUMP_MEMORY(un, SD_LOG_IO,
12025 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
12026 			    uscmd->uscsi_buflen, SD_LOG_HEX);
12027 		}
12028 	}
12029 #endif
12030 
12031 	if (format == 1) {
12032 		mutex_enter(SD_MUTEX(un));
12033 		un->un_f_format_in_progress = FALSE;
12034 		mutex_exit(SD_MUTEX(un));
12035 	}
12036 
12037 	(void) scsi_uscsi_copyout((intptr_t)incmd, uscmd);
12038 
12039 	return (rval);
12040 }
12041 
12042 /*
12043  *     Function: sd_ssc_print
12044  *
12045  * Description: Print information available to the console.
12046  *
12047  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12048  *                    sd_uscsi_info in.
12049  *            sd_severity - log level.
12050  *     Context: Kernel thread or interrupt context.
12051  */
12052 static void
12053 sd_ssc_print(sd_ssc_t *ssc, int sd_severity)
12054 {
12055 	struct uscsi_cmd	*ucmdp;
12056 	struct scsi_device	*devp;
12057 	dev_info_t 		*devinfo;
12058 	uchar_t			*sensep;
12059 	int			senlen;
12060 	union scsi_cdb		*cdbp;
12061 	uchar_t			com;
12062 	extern struct scsi_key_strings scsi_cmds[];
12063 
12064 	ASSERT(ssc != NULL);
12065 	ASSERT(ssc->ssc_un != NULL);
12066 
12067 	if (SD_FM_LOG(ssc->ssc_un) != SD_FM_LOG_EREPORT)
12068 		return;
12069 	ucmdp = ssc->ssc_uscsi_cmd;
12070 	devp = SD_SCSI_DEVP(ssc->ssc_un);
12071 	devinfo = SD_DEVINFO(ssc->ssc_un);
12072 	ASSERT(ucmdp != NULL);
12073 	ASSERT(devp != NULL);
12074 	ASSERT(devinfo != NULL);
12075 	sensep = (uint8_t *)ucmdp->uscsi_rqbuf;
12076 	senlen = ucmdp->uscsi_rqlen - ucmdp->uscsi_rqresid;
12077 	cdbp = (union scsi_cdb *)ucmdp->uscsi_cdb;
12078 
12079 	/* In certain case (like DOORLOCK), the cdb could be NULL. */
12080 	if (cdbp == NULL)
12081 		return;
12082 	/* We don't print log if no sense data available. */
12083 	if (senlen == 0)
12084 		sensep = NULL;
12085 	com = cdbp->scc_cmd;
12086 	scsi_generic_errmsg(devp, sd_label, sd_severity, 0, 0, com,
12087 	    scsi_cmds, sensep, ssc->ssc_un->un_additional_codes, NULL);
12088 }
12089 
12090 /*
12091  *     Function: sd_ssc_assessment
12092  *
12093  * Description: We use this function to make an assessment at the point
12094  *              where SD driver may encounter a potential error.
12095  *
12096  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12097  *                  sd_uscsi_info in.
12098  *            tp_assess - a hint of strategy for ereport posting.
12099  *            Possible values of tp_assess include:
12100  *                SD_FMT_IGNORE - we don't post any ereport because we're
12101  *                sure that it is ok to ignore the underlying problems.
12102  *                SD_FMT_IGNORE_COMPROMISE - we don't post any ereport for now
12103  *                but it might be not correct to ignore the underlying hardware
12104  *                error.
12105  *                SD_FMT_STATUS_CHECK - we will post an ereport with the
12106  *                payload driver-assessment of value "fail" or
12107  *                "fatal"(depending on what information we have here). This
12108  *                assessment value is usually set when SD driver think there
12109  *                is a potential error occurred(Typically, when return value
12110  *                of the SCSI command is EIO).
12111  *                SD_FMT_STANDARD - we will post an ereport with the payload
12112  *                driver-assessment of value "info". This assessment value is
12113  *                set when the SCSI command returned successfully and with
12114  *                sense data sent back.
12115  *
12116  *     Context: Kernel thread.
12117  */
12118 static void
12119 sd_ssc_assessment(sd_ssc_t *ssc, enum sd_type_assessment tp_assess)
12120 {
12121 	int senlen = 0;
12122 	struct uscsi_cmd *ucmdp = NULL;
12123 	struct sd_lun *un;
12124 
12125 	ASSERT(ssc != NULL);
12126 	un = ssc->ssc_un;
12127 	ASSERT(un != NULL);
12128 	ucmdp = ssc->ssc_uscsi_cmd;
12129 	ASSERT(ucmdp != NULL);
12130 
12131 	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
12132 		ssc->ssc_flags &= ~SSC_FLAGS_NEED_ASSESSMENT;
12133 	} else {
12134 		/*
12135 		 * If enter here, it indicates that we have a wrong
12136 		 * calling sequence of sd_ssc_send and sd_ssc_assessment,
12137 		 * both of which should be called in a pair in case of
12138 		 * loss of FMA telemetries.
12139 		 */
12140 		if (ucmdp->uscsi_cdb != NULL) {
12141 			SD_INFO(SD_LOG_SDTEST, un,
12142 			    "sd_ssc_assessment is missing the "
12143 			    "alternative sd_ssc_send when running 0x%x, "
12144 			    "or there are superfluous sd_ssc_assessment for "
12145 			    "the same sd_ssc_send.\n",
12146 			    ucmdp->uscsi_cdb[0]);
12147 		}
12148 		/*
12149 		 * Set the ssc_flags to the initial value to avoid passing
12150 		 * down dirty flags to the following sd_ssc_send function.
12151 		 */
12152 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12153 		return;
12154 	}
12155 
12156 	/*
12157 	 * Only handle an issued command which is waiting for assessment.
12158 	 * A command which is not issued will not have
12159 	 * SSC_FLAGS_INVALID_DATA set, so it'ok we just return here.
12160 	 */
12161 	if (!(ssc->ssc_flags & SSC_FLAGS_CMD_ISSUED)) {
12162 		sd_ssc_print(ssc, SCSI_ERR_INFO);
12163 		return;
12164 	} else {
12165 		/*
12166 		 * For an issued command, we should clear this flag in
12167 		 * order to make the sd_ssc_t structure be used off
12168 		 * multiple uscsi commands.
12169 		 */
12170 		ssc->ssc_flags &= ~SSC_FLAGS_CMD_ISSUED;
12171 	}
12172 
12173 	/*
12174 	 * We will not deal with non-retryable(flag USCSI_DIAGNOSE set)
12175 	 * commands here. And we should clear the ssc_flags before return.
12176 	 */
12177 	if (ucmdp->uscsi_flags & USCSI_DIAGNOSE) {
12178 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12179 		return;
12180 	}
12181 
12182 	switch (tp_assess) {
12183 	case SD_FMT_IGNORE:
12184 	case SD_FMT_IGNORE_COMPROMISE:
12185 		break;
12186 	case SD_FMT_STATUS_CHECK:
12187 		/*
12188 		 * For a failed command(including the succeeded command
12189 		 * with invalid data sent back).
12190 		 */
12191 		sd_ssc_post(ssc, SD_FM_DRV_FATAL);
12192 		break;
12193 	case SD_FMT_STANDARD:
12194 		/*
12195 		 * Always for the succeeded commands probably with sense
12196 		 * data sent back.
12197 		 * Limitation:
12198 		 *	We can only handle a succeeded command with sense
12199 		 *	data sent back when auto-request-sense is enabled.
12200 		 */
12201 		senlen = ssc->ssc_uscsi_cmd->uscsi_rqlen -
12202 		    ssc->ssc_uscsi_cmd->uscsi_rqresid;
12203 		if ((ssc->ssc_uscsi_info->ui_pkt_state & STATE_ARQ_DONE) &&
12204 		    (un->un_f_arq_enabled == TRUE) &&
12205 		    senlen > 0 &&
12206 		    ssc->ssc_uscsi_cmd->uscsi_rqbuf != NULL) {
12207 			sd_ssc_post(ssc, SD_FM_DRV_NOTICE);
12208 		}
12209 		break;
12210 	default:
12211 		/*
12212 		 * Should not have other type of assessment.
12213 		 */
12214 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
12215 		    "sd_ssc_assessment got wrong "
12216 		    "sd_type_assessment %d.\n", tp_assess);
12217 		break;
12218 	}
12219 	/*
12220 	 * Clear up the ssc_flags before return.
12221 	 */
12222 	ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12223 }
12224 
12225 /*
12226  *    Function: sd_ssc_post
12227  *
12228  * Description: 1. read the driver property to get fm-scsi-log flag.
12229  *              2. print log if fm_log_capable is non-zero.
12230  *              3. call sd_ssc_ereport_post to post ereport if possible.
12231  *
12232  *    Context: May be called from kernel thread or interrupt context.
12233  */
12234 static void
12235 sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess)
12236 {
12237 	struct sd_lun	*un;
12238 	int		sd_severity;
12239 
12240 	ASSERT(ssc != NULL);
12241 	un = ssc->ssc_un;
12242 	ASSERT(un != NULL);
12243 
12244 	/*
12245 	 * We may enter here from sd_ssc_assessment(for USCSI command) or
12246 	 * by directly called from sdintr context.
12247 	 * We don't handle a non-disk drive(CD-ROM, removable media).
12248 	 * Clear the ssc_flags before return in case we've set
12249 	 * SSC_FLAGS_INVALID_XXX which should be skipped for a non-disk
12250 	 * driver.
12251 	 */
12252 	if (ISCD(un) || un->un_f_has_removable_media) {
12253 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12254 		return;
12255 	}
12256 
12257 	switch (sd_assess) {
12258 		case SD_FM_DRV_FATAL:
12259 			sd_severity = SCSI_ERR_FATAL;
12260 			break;
12261 		case SD_FM_DRV_RECOVERY:
12262 			sd_severity = SCSI_ERR_RECOVERED;
12263 			break;
12264 		case SD_FM_DRV_RETRY:
12265 			sd_severity = SCSI_ERR_RETRYABLE;
12266 			break;
12267 		case SD_FM_DRV_NOTICE:
12268 			sd_severity = SCSI_ERR_INFO;
12269 			break;
12270 		default:
12271 			sd_severity = SCSI_ERR_UNKNOWN;
12272 	}
12273 	/* print log */
12274 	sd_ssc_print(ssc, sd_severity);
12275 
12276 	/* always post ereport */
12277 	sd_ssc_ereport_post(ssc, sd_assess);
12278 }
12279 
12280 /*
12281  *    Function: sd_ssc_set_info
12282  *
12283  * Description: Mark ssc_flags and set ssc_info which would be the
12284  *              payload of uderr ereport. This function will cause
12285  *              sd_ssc_ereport_post to post uderr ereport only.
12286  *              Besides, when ssc_flags == SSC_FLAGS_INVALID_DATA(USCSI),
12287  *              the function will also call SD_ERROR or scsi_log for a
12288  *              CDROM/removable-media/DDI_FM_NOT_CAPABLE device.
12289  *
12290  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12291  *                  sd_uscsi_info in.
12292  *            ssc_flags - indicate the sub-category of a uderr.
12293  *            comp - this argument is meaningful only when
12294  *                   ssc_flags == SSC_FLAGS_INVALID_DATA, and its possible
12295  *                   values include:
12296  *                   > 0, SD_ERROR is used with comp as the driver logging
12297  *                   component;
12298  *                   = 0, scsi-log is used to log error telemetries;
12299  *                   < 0, no log available for this telemetry.
12300  *
12301  *    Context: Kernel thread or interrupt context
12302  */
12303 static void
12304 sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp, const char *fmt, ...)
12305 {
12306 	va_list	ap;
12307 
12308 	ASSERT(ssc != NULL);
12309 	ASSERT(ssc->ssc_un != NULL);
12310 
12311 	ssc->ssc_flags |= ssc_flags;
12312 	va_start(ap, fmt);
12313 	(void) vsnprintf(ssc->ssc_info, sizeof (ssc->ssc_info), fmt, ap);
12314 	va_end(ap);
12315 
12316 	/*
12317 	 * If SSC_FLAGS_INVALID_DATA is set, it should be a uscsi command
12318 	 * with invalid data sent back. For non-uscsi command, the
12319 	 * following code will be bypassed.
12320 	 */
12321 	if (ssc_flags & SSC_FLAGS_INVALID_DATA) {
12322 		if (SD_FM_LOG(ssc->ssc_un) == SD_FM_LOG_NSUP) {
12323 			/*
12324 			 * If the error belong to certain component and we
12325 			 * do not want it to show up on the console, we
12326 			 * will use SD_ERROR, otherwise scsi_log is
12327 			 * preferred.
12328 			 */
12329 			if (comp > 0) {
12330 				SD_ERROR(comp, ssc->ssc_un, ssc->ssc_info);
12331 			} else if (comp == 0) {
12332 				scsi_log(SD_DEVINFO(ssc->ssc_un), sd_label,
12333 				    CE_WARN, ssc->ssc_info);
12334 			}
12335 		}
12336 	}
12337 }
12338 
12339 /*
12340  *    Function: sd_buf_iodone
12341  *
12342  * Description: Frees the sd_xbuf & returns the buf to its originator.
12343  *
12344  *     Context: May be called from interrupt context.
12345  */
12346 /* ARGSUSED */
12347 static void
12348 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
12349 {
12350 	struct sd_xbuf *xp;
12351 
12352 	ASSERT(un != NULL);
12353 	ASSERT(bp != NULL);
12354 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12355 
12356 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
12357 
12358 	xp = SD_GET_XBUF(bp);
12359 	ASSERT(xp != NULL);
12360 
12361 	/* xbuf is gone after this */
12362 	if (ddi_xbuf_done(bp, un->un_xbuf_attr)) {
12363 		mutex_enter(SD_MUTEX(un));
12364 
12365 		/*
12366 		 * Grab time when the cmd completed.
12367 		 * This is used for determining if the system has been
12368 		 * idle long enough to make it idle to the PM framework.
12369 		 * This is for lowering the overhead, and therefore improving
12370 		 * performance per I/O operation.
12371 		 */
12372 		un->un_pm_idle_time = ddi_get_time();
12373 
12374 		un->un_ncmds_in_driver--;
12375 		ASSERT(un->un_ncmds_in_driver >= 0);
12376 		SD_INFO(SD_LOG_IO, un,
12377 		    "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12378 		    un->un_ncmds_in_driver);
12379 
12380 		mutex_exit(SD_MUTEX(un));
12381 	}
12382 
12383 	biodone(bp);				/* bp is gone after this */
12384 
12385 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12386 }
12387 
12388 
12389 /*
12390  *    Function: sd_uscsi_iodone
12391  *
12392  * Description: Frees the sd_xbuf & returns the buf to its originator.
12393  *
12394  *     Context: May be called from interrupt context.
12395  */
12396 /* ARGSUSED */
12397 static void
12398 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12399 {
12400 	struct sd_xbuf *xp;
12401 
12402 	ASSERT(un != NULL);
12403 	ASSERT(bp != NULL);
12404 
12405 	xp = SD_GET_XBUF(bp);
12406 	ASSERT(xp != NULL);
12407 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12408 
12409 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12410 
12411 	bp->b_private = xp->xb_private;
12412 
12413 	mutex_enter(SD_MUTEX(un));
12414 
12415 	/*
12416 	 * Grab time when the cmd completed.
12417 	 * This is used for determining if the system has been
12418 	 * idle long enough to make it idle to the PM framework.
12419 	 * This is for lowering the overhead, and therefore improving
12420 	 * performance per I/O operation.
12421 	 */
12422 	un->un_pm_idle_time = ddi_get_time();
12423 
12424 	un->un_ncmds_in_driver--;
12425 	ASSERT(un->un_ncmds_in_driver >= 0);
12426 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12427 	    un->un_ncmds_in_driver);
12428 
12429 	mutex_exit(SD_MUTEX(un));
12430 
12431 	if (((struct uscsi_cmd *)(xp->xb_pktinfo))->uscsi_rqlen >
12432 	    SENSE_LENGTH) {
12433 		kmem_free(xp, sizeof (struct sd_xbuf) - SENSE_LENGTH +
12434 		    MAX_SENSE_LENGTH);
12435 	} else {
12436 		kmem_free(xp, sizeof (struct sd_xbuf));
12437 	}
12438 
12439 	biodone(bp);
12440 
12441 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12442 }
12443 
12444 
12445 /*
12446  *    Function: sd_mapblockaddr_iostart
12447  *
12448  * Description: Verify request lies within the partition limits for
12449  *		the indicated minor device.  Issue "overrun" buf if
12450  *		request would exceed partition range.  Converts
12451  *		partition-relative block address to absolute.
12452  *
12453  *              Upon exit of this function:
12454  *              1.I/O is aligned
12455  *                 xp->xb_blkno represents the absolute sector address
12456  *              2.I/O is misaligned
12457  *                 xp->xb_blkno represents the absolute logical block address
12458  *                 based on DEV_BSIZE. The logical block address will be
12459  *                 converted to physical sector address in sd_mapblocksize_\
12460  *                 iostart.
12461  *              3.I/O is misaligned but is aligned in "overrun" buf
12462  *                 xp->xb_blkno represents the absolute logical block address
12463  *                 based on DEV_BSIZE. The logical block address will be
12464  *                 converted to physical sector address in sd_mapblocksize_\
12465  *                 iostart. But no RMW will be issued in this case.
12466  *
12467  *     Context: Can sleep
12468  *
12469  *      Issues: This follows what the old code did, in terms of accessing
12470  *		some of the partition info in the unit struct without holding
12471  *		the mutext.  This is a general issue, if the partition info
12472  *		can be altered while IO is in progress... as soon as we send
12473  *		a buf, its partitioning can be invalid before it gets to the
12474  *		device.  Probably the right fix is to move partitioning out
12475  *		of the driver entirely.
12476  */
12477 
12478 static void
12479 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12480 {
12481 	diskaddr_t	nblocks;	/* #blocks in the given partition */
12482 	daddr_t	blocknum;	/* Block number specified by the buf */
12483 	size_t	requested_nblocks;
12484 	size_t	available_nblocks;
12485 	int	partition;
12486 	diskaddr_t	partition_offset;
12487 	struct sd_xbuf *xp;
12488 	int secmask = 0, blknomask = 0;
12489 	ushort_t is_aligned = TRUE;
12490 
12491 	ASSERT(un != NULL);
12492 	ASSERT(bp != NULL);
12493 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12494 
12495 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12496 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12497 
12498 	xp = SD_GET_XBUF(bp);
12499 	ASSERT(xp != NULL);
12500 
12501 	/*
12502 	 * If the geometry is not indicated as valid, attempt to access
12503 	 * the unit & verify the geometry/label. This can be the case for
12504 	 * removable-media devices, of if the device was opened in
12505 	 * NDELAY/NONBLOCK mode.
12506 	 */
12507 	partition = SDPART(bp->b_edev);
12508 
12509 	if (!SD_IS_VALID_LABEL(un)) {
12510 		sd_ssc_t *ssc;
12511 		/*
12512 		 * Initialize sd_ssc_t for internal uscsi commands
12513 		 * In case of potential porformance issue, we need
12514 		 * to alloc memory only if there is invalid label
12515 		 */
12516 		ssc = sd_ssc_init(un);
12517 
12518 		if (sd_ready_and_valid(ssc, partition) != SD_READY_VALID) {
12519 			/*
12520 			 * For removable devices it is possible to start an
12521 			 * I/O without a media by opening the device in nodelay
12522 			 * mode. Also for writable CDs there can be many
12523 			 * scenarios where there is no geometry yet but volume
12524 			 * manager is trying to issue a read() just because
12525 			 * it can see TOC on the CD. So do not print a message
12526 			 * for removables.
12527 			 */
12528 			if (!un->un_f_has_removable_media) {
12529 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12530 				    "i/o to invalid geometry\n");
12531 			}
12532 			bioerror(bp, EIO);
12533 			bp->b_resid = bp->b_bcount;
12534 			SD_BEGIN_IODONE(index, un, bp);
12535 
12536 			sd_ssc_fini(ssc);
12537 			return;
12538 		}
12539 		sd_ssc_fini(ssc);
12540 	}
12541 
12542 	nblocks = 0;
12543 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
12544 	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
12545 
12546 	blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
12547 	secmask = un->un_tgt_blocksize - 1;
12548 
12549 	if ((bp->b_lblkno & (blknomask)) || (bp->b_bcount & (secmask))) {
12550 		is_aligned = FALSE;
12551 	}
12552 
12553 	if (!(NOT_DEVBSIZE(un))) {
12554 		/*
12555 		 * If I/O is aligned, no need to involve RMW(Read Modify Write)
12556 		 * Convert the logical block number to target's physical sector
12557 		 * number.
12558 		 */
12559 		if (is_aligned) {
12560 			xp->xb_blkno = SD_SYS2TGTBLOCK(un, xp->xb_blkno);
12561 		} else {
12562 			switch (un->un_f_rmw_type) {
12563 			case SD_RMW_TYPE_RETURN_ERROR:
12564 				bp->b_flags |= B_ERROR;
12565 				goto error_exit;
12566 
12567 			case SD_RMW_TYPE_DEFAULT:
12568 				mutex_enter(SD_MUTEX(un));
12569 				if (un->un_rmw_msg_timeid == NULL) {
12570 					scsi_log(SD_DEVINFO(un), sd_label,
12571 					    CE_WARN, "I/O request is not "
12572 					    "aligned with %d disk sector size. "
12573 					    "It is handled through Read Modify "
12574 					    "Write but the performance is "
12575 					    "very low.\n",
12576 					    un->un_tgt_blocksize);
12577 					un->un_rmw_msg_timeid =
12578 					    timeout(sd_rmw_msg_print_handler,
12579 					    un, SD_RMW_MSG_PRINT_TIMEOUT);
12580 				} else {
12581 					un->un_rmw_incre_count ++;
12582 				}
12583 				mutex_exit(SD_MUTEX(un));
12584 				break;
12585 
12586 			case SD_RMW_TYPE_NO_WARNING:
12587 			default:
12588 				break;
12589 			}
12590 
12591 			nblocks = SD_TGT2SYSBLOCK(un, nblocks);
12592 			partition_offset = SD_TGT2SYSBLOCK(un,
12593 			    partition_offset);
12594 		}
12595 	}
12596 
12597 	/*
12598 	 * blocknum is the starting block number of the request. At this
12599 	 * point it is still relative to the start of the minor device.
12600 	 */
12601 	blocknum = xp->xb_blkno;
12602 
12603 	/*
12604 	 * Legacy: If the starting block number is one past the last block
12605 	 * in the partition, do not set B_ERROR in the buf.
12606 	 */
12607 	if (blocknum == nblocks)  {
12608 		goto error_exit;
12609 	}
12610 
12611 	/*
12612 	 * Confirm that the first block of the request lies within the
12613 	 * partition limits. Also the requested number of bytes must be
12614 	 * a multiple of the system block size.
12615 	 */
12616 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12617 	    ((bp->b_bcount & (DEV_BSIZE - 1)) != 0)) {
12618 		bp->b_flags |= B_ERROR;
12619 		goto error_exit;
12620 	}
12621 
12622 	/*
12623 	 * If the requsted # blocks exceeds the available # blocks, that
12624 	 * is an overrun of the partition.
12625 	 */
12626 	if ((!NOT_DEVBSIZE(un)) && is_aligned) {
12627 		requested_nblocks = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
12628 	} else {
12629 		requested_nblocks = SD_BYTES2SYSBLOCKS(bp->b_bcount);
12630 	}
12631 
12632 	available_nblocks = (size_t)(nblocks - blocknum);
12633 	ASSERT(nblocks >= blocknum);
12634 
12635 	if (requested_nblocks > available_nblocks) {
12636 		size_t resid;
12637 
12638 		/*
12639 		 * Allocate an "overrun" buf to allow the request to proceed
12640 		 * for the amount of space available in the partition. The
12641 		 * amount not transferred will be added into the b_resid
12642 		 * when the operation is complete. The overrun buf
12643 		 * replaces the original buf here, and the original buf
12644 		 * is saved inside the overrun buf, for later use.
12645 		 */
12646 		if ((!NOT_DEVBSIZE(un)) && is_aligned) {
12647 			resid = SD_TGTBLOCKS2BYTES(un,
12648 			    (offset_t)(requested_nblocks - available_nblocks));
12649 		} else {
12650 			resid = SD_SYSBLOCKS2BYTES(
12651 			    (offset_t)(requested_nblocks - available_nblocks));
12652 		}
12653 
12654 		size_t count = bp->b_bcount - resid;
12655 		/*
12656 		 * Note: count is an unsigned entity thus it'll NEVER
12657 		 * be less than 0 so ASSERT the original values are
12658 		 * correct.
12659 		 */
12660 		ASSERT(bp->b_bcount >= resid);
12661 
12662 		bp = sd_bioclone_alloc(bp, count, blocknum,
12663 		    (int (*)(struct buf *)) sd_mapblockaddr_iodone);
12664 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12665 		ASSERT(xp != NULL);
12666 	}
12667 
12668 	/* At this point there should be no residual for this buf. */
12669 	ASSERT(bp->b_resid == 0);
12670 
12671 	/* Convert the block number to an absolute address. */
12672 	xp->xb_blkno += partition_offset;
12673 
12674 	SD_NEXT_IOSTART(index, un, bp);
12675 
12676 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12677 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12678 
12679 	return;
12680 
12681 error_exit:
12682 	bp->b_resid = bp->b_bcount;
12683 	SD_BEGIN_IODONE(index, un, bp);
12684 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12685 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12686 }
12687 
12688 
12689 /*
12690  *    Function: sd_mapblockaddr_iodone
12691  *
12692  * Description: Completion-side processing for partition management.
12693  *
12694  *     Context: May be called under interrupt context
12695  */
12696 
12697 static void
12698 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12699 {
12700 	/* int	partition; */	/* Not used, see below. */
12701 	ASSERT(un != NULL);
12702 	ASSERT(bp != NULL);
12703 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12704 
12705 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12706 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12707 
12708 	if (bp->b_iodone == (int (*)(struct buf *)) sd_mapblockaddr_iodone) {
12709 		/*
12710 		 * We have an "overrun" buf to deal with...
12711 		 */
12712 		struct sd_xbuf	*xp;
12713 		struct buf	*obp;	/* ptr to the original buf */
12714 
12715 		xp = SD_GET_XBUF(bp);
12716 		ASSERT(xp != NULL);
12717 
12718 		/* Retrieve the pointer to the original buf */
12719 		obp = (struct buf *)xp->xb_private;
12720 		ASSERT(obp != NULL);
12721 
12722 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12723 		bioerror(obp, bp->b_error);
12724 
12725 		sd_bioclone_free(bp);
12726 
12727 		/*
12728 		 * Get back the original buf.
12729 		 * Note that since the restoration of xb_blkno below
12730 		 * was removed, the sd_xbuf is not needed.
12731 		 */
12732 		bp = obp;
12733 		/*
12734 		 * xp = SD_GET_XBUF(bp);
12735 		 * ASSERT(xp != NULL);
12736 		 */
12737 	}
12738 
12739 	/*
12740 	 * Convert sd->xb_blkno back to a minor-device relative value.
12741 	 * Note: this has been commented out, as it is not needed in the
12742 	 * current implementation of the driver (ie, since this function
12743 	 * is at the top of the layering chains, so the info will be
12744 	 * discarded) and it is in the "hot" IO path.
12745 	 *
12746 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12747 	 * xp->xb_blkno -= un->un_offset[partition];
12748 	 */
12749 
12750 	SD_NEXT_IODONE(index, un, bp);
12751 
12752 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12753 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12754 }
12755 
12756 
12757 /*
12758  *    Function: sd_mapblocksize_iostart
12759  *
12760  * Description: Convert between system block size (un->un_sys_blocksize)
12761  *		and target block size (un->un_tgt_blocksize).
12762  *
12763  *     Context: Can sleep to allocate resources.
12764  *
12765  * Assumptions: A higher layer has already performed any partition validation,
12766  *		and converted the xp->xb_blkno to an absolute value relative
12767  *		to the start of the device.
12768  *
12769  *		It is also assumed that the higher layer has implemented
12770  *		an "overrun" mechanism for the case where the request would
12771  *		read/write beyond the end of a partition.  In this case we
12772  *		assume (and ASSERT) that bp->b_resid == 0.
12773  *
12774  *		Note: The implementation for this routine assumes the target
12775  *		block size remains constant between allocation and transport.
12776  */
12777 
12778 static void
12779 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12780 {
12781 	struct sd_mapblocksize_info	*bsp;
12782 	struct sd_xbuf			*xp;
12783 	offset_t first_byte;
12784 	daddr_t	start_block, end_block;
12785 	daddr_t	request_bytes;
12786 	ushort_t is_aligned = FALSE;
12787 
12788 	ASSERT(un != NULL);
12789 	ASSERT(bp != NULL);
12790 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12791 	ASSERT(bp->b_resid == 0);
12792 
12793 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12794 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12795 
12796 	/*
12797 	 * For a non-writable CD, a write request is an error
12798 	 */
12799 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12800 	    (un->un_f_mmc_writable_media == FALSE)) {
12801 		bioerror(bp, EIO);
12802 		bp->b_resid = bp->b_bcount;
12803 		SD_BEGIN_IODONE(index, un, bp);
12804 		return;
12805 	}
12806 
12807 	/*
12808 	 * We do not need a shadow buf if the device is using
12809 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12810 	 * In this case there is no layer-private data block allocated.
12811 	 */
12812 	if ((un->un_tgt_blocksize == DEV_BSIZE) ||
12813 	    (bp->b_bcount == 0)) {
12814 		goto done;
12815 	}
12816 
12817 #if defined(__i386) || defined(__amd64)
12818 	/* We do not support non-block-aligned transfers for ROD devices */
12819 	ASSERT(!ISROD(un));
12820 #endif
12821 
12822 	xp = SD_GET_XBUF(bp);
12823 	ASSERT(xp != NULL);
12824 
12825 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12826 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
12827 	    un->un_tgt_blocksize, DEV_BSIZE);
12828 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12829 	    "request start block:0x%x\n", xp->xb_blkno);
12830 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
12831 	    "request len:0x%x\n", bp->b_bcount);
12832 
12833 	/*
12834 	 * Allocate the layer-private data area for the mapblocksize layer.
12835 	 * Layers are allowed to use the xp_private member of the sd_xbuf
12836 	 * struct to store the pointer to their layer-private data block, but
12837 	 * each layer also has the responsibility of restoring the prior
12838 	 * contents of xb_private before returning the buf/xbuf to the
12839 	 * higher layer that sent it.
12840 	 *
12841 	 * Here we save the prior contents of xp->xb_private into the
12842 	 * bsp->mbs_oprivate field of our layer-private data area. This value
12843 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
12844 	 * the layer-private area and returning the buf/xbuf to the layer
12845 	 * that sent it.
12846 	 *
12847 	 * Note that here we use kmem_zalloc for the allocation as there are
12848 	 * parts of the mapblocksize code that expect certain fields to be
12849 	 * zero unless explicitly set to a required value.
12850 	 */
12851 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12852 	bsp->mbs_oprivate = xp->xb_private;
12853 	xp->xb_private = bsp;
12854 
12855 	/*
12856 	 * This treats the data on the disk (target) as an array of bytes.
12857 	 * first_byte is the byte offset, from the beginning of the device,
12858 	 * to the location of the request. This is converted from a
12859 	 * un->un_sys_blocksize block address to a byte offset, and then back
12860 	 * to a block address based upon a un->un_tgt_blocksize block size.
12861 	 *
12862 	 * xp->xb_blkno should be absolute upon entry into this function,
12863 	 * but, but it is based upon partitions that use the "system"
12864 	 * block size. It must be adjusted to reflect the block size of
12865 	 * the target.
12866 	 *
12867 	 * Note that end_block is actually the block that follows the last
12868 	 * block of the request, but that's what is needed for the computation.
12869 	 */
12870 	first_byte  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
12871 	start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
12872 	end_block   = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
12873 	    un->un_tgt_blocksize;
12874 
12875 	/* request_bytes is rounded up to a multiple of the target block size */
12876 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
12877 
12878 	/*
12879 	 * See if the starting address of the request and the request
12880 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
12881 	 * then we do not need to allocate a shadow buf to handle the request.
12882 	 */
12883 	if (((first_byte   % un->un_tgt_blocksize) == 0) &&
12884 	    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
12885 		is_aligned = TRUE;
12886 	}
12887 
12888 	if ((bp->b_flags & B_READ) == 0) {
12889 		/*
12890 		 * Lock the range for a write operation. An aligned request is
12891 		 * considered a simple write; otherwise the request must be a
12892 		 * read-modify-write.
12893 		 */
12894 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
12895 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
12896 	}
12897 
12898 	/*
12899 	 * Alloc a shadow buf if the request is not aligned. Also, this is
12900 	 * where the READ command is generated for a read-modify-write. (The
12901 	 * write phase is deferred until after the read completes.)
12902 	 */
12903 	if (is_aligned == FALSE) {
12904 
12905 		struct sd_mapblocksize_info	*shadow_bsp;
12906 		struct sd_xbuf	*shadow_xp;
12907 		struct buf	*shadow_bp;
12908 
12909 		/*
12910 		 * Allocate the shadow buf and it associated xbuf. Note that
12911 		 * after this call the xb_blkno value in both the original
12912 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
12913 		 * same: absolute relative to the start of the device, and
12914 		 * adjusted for the target block size. The b_blkno in the
12915 		 * shadow buf will also be set to this value. We should never
12916 		 * change b_blkno in the original bp however.
12917 		 *
12918 		 * Note also that the shadow buf will always need to be a
12919 		 * READ command, regardless of whether the incoming command
12920 		 * is a READ or a WRITE.
12921 		 */
12922 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
12923 		    xp->xb_blkno,
12924 		    (int (*)(struct buf *)) sd_mapblocksize_iodone);
12925 
12926 		shadow_xp = SD_GET_XBUF(shadow_bp);
12927 
12928 		/*
12929 		 * Allocate the layer-private data for the shadow buf.
12930 		 * (No need to preserve xb_private in the shadow xbuf.)
12931 		 */
12932 		shadow_xp->xb_private = shadow_bsp =
12933 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
12934 
12935 		/*
12936 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
12937 		 * to figure out where the start of the user data is (based upon
12938 		 * the system block size) in the data returned by the READ
12939 		 * command (which will be based upon the target blocksize). Note
12940 		 * that this is only really used if the request is unaligned.
12941 		 */
12942 		bsp->mbs_copy_offset = (ssize_t)(first_byte -
12943 		    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
12944 		ASSERT((bsp->mbs_copy_offset >= 0) &&
12945 		    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
12946 
12947 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
12948 
12949 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
12950 
12951 		/* Transfer the wmap (if any) to the shadow buf */
12952 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
12953 		bsp->mbs_wmp = NULL;
12954 
12955 		/*
12956 		 * The shadow buf goes on from here in place of the
12957 		 * original buf.
12958 		 */
12959 		shadow_bsp->mbs_orig_bp = bp;
12960 		bp = shadow_bp;
12961 	}
12962 
12963 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12964 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
12965 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12966 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
12967 	    request_bytes);
12968 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
12969 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
12970 
12971 done:
12972 	SD_NEXT_IOSTART(index, un, bp);
12973 
12974 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12975 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
12976 }
12977 
12978 
12979 /*
12980  *    Function: sd_mapblocksize_iodone
12981  *
12982  * Description: Completion side processing for block-size mapping.
12983  *
12984  *     Context: May be called under interrupt context
12985  */
12986 
12987 static void
12988 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
12989 {
12990 	struct sd_mapblocksize_info	*bsp;
12991 	struct sd_xbuf	*xp;
12992 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
12993 	struct buf	*orig_bp;	/* ptr to the original buf */
12994 	offset_t	shadow_end;
12995 	offset_t	request_end;
12996 	offset_t	shadow_start;
12997 	ssize_t		copy_offset;
12998 	size_t		copy_length;
12999 	size_t		shortfall;
13000 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
13001 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
13002 
13003 	ASSERT(un != NULL);
13004 	ASSERT(bp != NULL);
13005 
13006 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
13007 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
13008 
13009 	/*
13010 	 * There is no shadow buf or layer-private data if the target is
13011 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
13012 	 */
13013 	if ((un->un_tgt_blocksize == DEV_BSIZE) ||
13014 	    (bp->b_bcount == 0)) {
13015 		goto exit;
13016 	}
13017 
13018 	xp = SD_GET_XBUF(bp);
13019 	ASSERT(xp != NULL);
13020 
13021 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
13022 	bsp = xp->xb_private;
13023 
13024 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
13025 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
13026 
13027 	if (is_write) {
13028 		/*
13029 		 * For a WRITE request we must free up the block range that
13030 		 * we have locked up.  This holds regardless of whether this is
13031 		 * an aligned write request or a read-modify-write request.
13032 		 */
13033 		sd_range_unlock(un, bsp->mbs_wmp);
13034 		bsp->mbs_wmp = NULL;
13035 	}
13036 
13037 	if ((bp->b_iodone != (int(*)(struct buf *))sd_mapblocksize_iodone)) {
13038 		/*
13039 		 * An aligned read or write command will have no shadow buf;
13040 		 * there is not much else to do with it.
13041 		 */
13042 		goto done;
13043 	}
13044 
13045 	orig_bp = bsp->mbs_orig_bp;
13046 	ASSERT(orig_bp != NULL);
13047 	orig_xp = SD_GET_XBUF(orig_bp);
13048 	ASSERT(orig_xp != NULL);
13049 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13050 
13051 	if (!is_write && has_wmap) {
13052 		/*
13053 		 * A READ with a wmap means this is the READ phase of a
13054 		 * read-modify-write. If an error occurred on the READ then
13055 		 * we do not proceed with the WRITE phase or copy any data.
13056 		 * Just release the write maps and return with an error.
13057 		 */
13058 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
13059 			orig_bp->b_resid = orig_bp->b_bcount;
13060 			bioerror(orig_bp, bp->b_error);
13061 			sd_range_unlock(un, bsp->mbs_wmp);
13062 			goto freebuf_done;
13063 		}
13064 	}
13065 
13066 	/*
13067 	 * Here is where we set up to copy the data from the shadow buf
13068 	 * into the space associated with the original buf.
13069 	 *
13070 	 * To deal with the conversion between block sizes, these
13071 	 * computations treat the data as an array of bytes, with the
13072 	 * first byte (byte 0) corresponding to the first byte in the
13073 	 * first block on the disk.
13074 	 */
13075 
13076 	/*
13077 	 * shadow_start and shadow_len indicate the location and size of
13078 	 * the data returned with the shadow IO request.
13079 	 */
13080 	shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
13081 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
13082 
13083 	/*
13084 	 * copy_offset gives the offset (in bytes) from the start of the first
13085 	 * block of the READ request to the beginning of the data.  We retrieve
13086 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
13087 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
13088 	 * data to be copied (in bytes).
13089 	 */
13090 	copy_offset  = bsp->mbs_copy_offset;
13091 	ASSERT((copy_offset >= 0) && (copy_offset < un->un_tgt_blocksize));
13092 	copy_length  = orig_bp->b_bcount;
13093 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
13094 
13095 	/*
13096 	 * Set up the resid and error fields of orig_bp as appropriate.
13097 	 */
13098 	if (shadow_end >= request_end) {
13099 		/* We got all the requested data; set resid to zero */
13100 		orig_bp->b_resid = 0;
13101 	} else {
13102 		/*
13103 		 * We failed to get enough data to fully satisfy the original
13104 		 * request. Just copy back whatever data we got and set
13105 		 * up the residual and error code as required.
13106 		 *
13107 		 * 'shortfall' is the amount by which the data received with the
13108 		 * shadow buf has "fallen short" of the requested amount.
13109 		 */
13110 		shortfall = (size_t)(request_end - shadow_end);
13111 
13112 		if (shortfall > orig_bp->b_bcount) {
13113 			/*
13114 			 * We did not get enough data to even partially
13115 			 * fulfill the original request.  The residual is
13116 			 * equal to the amount requested.
13117 			 */
13118 			orig_bp->b_resid = orig_bp->b_bcount;
13119 		} else {
13120 			/*
13121 			 * We did not get all the data that we requested
13122 			 * from the device, but we will try to return what
13123 			 * portion we did get.
13124 			 */
13125 			orig_bp->b_resid = shortfall;
13126 		}
13127 		ASSERT(copy_length >= orig_bp->b_resid);
13128 		copy_length  -= orig_bp->b_resid;
13129 	}
13130 
13131 	/* Propagate the error code from the shadow buf to the original buf */
13132 	bioerror(orig_bp, bp->b_error);
13133 
13134 	if (is_write) {
13135 		goto freebuf_done;	/* No data copying for a WRITE */
13136 	}
13137 
13138 	if (has_wmap) {
13139 		/*
13140 		 * This is a READ command from the READ phase of a
13141 		 * read-modify-write request. We have to copy the data given
13142 		 * by the user OVER the data returned by the READ command,
13143 		 * then convert the command from a READ to a WRITE and send
13144 		 * it back to the target.
13145 		 */
13146 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
13147 		    copy_length);
13148 
13149 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
13150 
13151 		/*
13152 		 * Dispatch the WRITE command to the taskq thread, which
13153 		 * will in turn send the command to the target. When the
13154 		 * WRITE command completes, we (sd_mapblocksize_iodone())
13155 		 * will get called again as part of the iodone chain
13156 		 * processing for it. Note that we will still be dealing
13157 		 * with the shadow buf at that point.
13158 		 */
13159 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
13160 		    KM_NOSLEEP) != 0) {
13161 			/*
13162 			 * Dispatch was successful so we are done. Return
13163 			 * without going any higher up the iodone chain. Do
13164 			 * not free up any layer-private data until after the
13165 			 * WRITE completes.
13166 			 */
13167 			return;
13168 		}
13169 
13170 		/*
13171 		 * Dispatch of the WRITE command failed; set up the error
13172 		 * condition and send this IO back up the iodone chain.
13173 		 */
13174 		bioerror(orig_bp, EIO);
13175 		orig_bp->b_resid = orig_bp->b_bcount;
13176 
13177 	} else {
13178 		/*
13179 		 * This is a regular READ request (ie, not a RMW). Copy the
13180 		 * data from the shadow buf into the original buf. The
13181 		 * copy_offset compensates for any "misalignment" between the
13182 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
13183 		 * original buf (with its un->un_sys_blocksize blocks).
13184 		 */
13185 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
13186 		    copy_length);
13187 	}
13188 
13189 freebuf_done:
13190 
13191 	/*
13192 	 * At this point we still have both the shadow buf AND the original
13193 	 * buf to deal with, as well as the layer-private data area in each.
13194 	 * Local variables are as follows:
13195 	 *
13196 	 * bp -- points to shadow buf
13197 	 * xp -- points to xbuf of shadow buf
13198 	 * bsp -- points to layer-private data area of shadow buf
13199 	 * orig_bp -- points to original buf
13200 	 *
13201 	 * First free the shadow buf and its associated xbuf, then free the
13202 	 * layer-private data area from the shadow buf. There is no need to
13203 	 * restore xb_private in the shadow xbuf.
13204 	 */
13205 	sd_shadow_buf_free(bp);
13206 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
13207 
13208 	/*
13209 	 * Now update the local variables to point to the original buf, xbuf,
13210 	 * and layer-private area.
13211 	 */
13212 	bp = orig_bp;
13213 	xp = SD_GET_XBUF(bp);
13214 	ASSERT(xp != NULL);
13215 	ASSERT(xp == orig_xp);
13216 	bsp = xp->xb_private;
13217 	ASSERT(bsp != NULL);
13218 
13219 done:
13220 	/*
13221 	 * Restore xb_private to whatever it was set to by the next higher
13222 	 * layer in the chain, then free the layer-private data area.
13223 	 */
13224 	xp->xb_private = bsp->mbs_oprivate;
13225 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
13226 
13227 exit:
13228 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
13229 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
13230 
13231 	SD_NEXT_IODONE(index, un, bp);
13232 }
13233 
13234 
13235 /*
13236  *    Function: sd_checksum_iostart
13237  *
13238  * Description: A stub function for a layer that's currently not used.
13239  *		For now just a placeholder.
13240  *
13241  *     Context: Kernel thread context
13242  */
13243 
13244 static void
13245 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
13246 {
13247 	ASSERT(un != NULL);
13248 	ASSERT(bp != NULL);
13249 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13250 	SD_NEXT_IOSTART(index, un, bp);
13251 }
13252 
13253 
13254 /*
13255  *    Function: sd_checksum_iodone
13256  *
13257  * Description: A stub function for a layer that's currently not used.
13258  *		For now just a placeholder.
13259  *
13260  *     Context: May be called under interrupt context
13261  */
13262 
13263 static void
13264 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
13265 {
13266 	ASSERT(un != NULL);
13267 	ASSERT(bp != NULL);
13268 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13269 	SD_NEXT_IODONE(index, un, bp);
13270 }
13271 
13272 
13273 /*
13274  *    Function: sd_checksum_uscsi_iostart
13275  *
13276  * Description: A stub function for a layer that's currently not used.
13277  *		For now just a placeholder.
13278  *
13279  *     Context: Kernel thread context
13280  */
13281 
13282 static void
13283 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
13284 {
13285 	ASSERT(un != NULL);
13286 	ASSERT(bp != NULL);
13287 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13288 	SD_NEXT_IOSTART(index, un, bp);
13289 }
13290 
13291 
13292 /*
13293  *    Function: sd_checksum_uscsi_iodone
13294  *
13295  * Description: A stub function for a layer that's currently not used.
13296  *		For now just a placeholder.
13297  *
13298  *     Context: May be called under interrupt context
13299  */
13300 
13301 static void
13302 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
13303 {
13304 	ASSERT(un != NULL);
13305 	ASSERT(bp != NULL);
13306 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13307 	SD_NEXT_IODONE(index, un, bp);
13308 }
13309 
13310 
13311 /*
13312  *    Function: sd_pm_iostart
13313  *
13314  * Description: iostart-side routine for Power mangement.
13315  *
13316  *     Context: Kernel thread context
13317  */
13318 
13319 static void
13320 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
13321 {
13322 	ASSERT(un != NULL);
13323 	ASSERT(bp != NULL);
13324 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13325 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13326 
13327 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
13328 
13329 	if (sd_pm_entry(un) != DDI_SUCCESS) {
13330 		/*
13331 		 * Set up to return the failed buf back up the 'iodone'
13332 		 * side of the calling chain.
13333 		 */
13334 		bioerror(bp, EIO);
13335 		bp->b_resid = bp->b_bcount;
13336 
13337 		SD_BEGIN_IODONE(index, un, bp);
13338 
13339 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13340 		return;
13341 	}
13342 
13343 	SD_NEXT_IOSTART(index, un, bp);
13344 
13345 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13346 }
13347 
13348 
13349 /*
13350  *    Function: sd_pm_iodone
13351  *
13352  * Description: iodone-side routine for power mangement.
13353  *
13354  *     Context: may be called from interrupt context
13355  */
13356 
13357 static void
13358 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
13359 {
13360 	ASSERT(un != NULL);
13361 	ASSERT(bp != NULL);
13362 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13363 
13364 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
13365 
13366 	/*
13367 	 * After attach the following flag is only read, so don't
13368 	 * take the penalty of acquiring a mutex for it.
13369 	 */
13370 	if (un->un_f_pm_is_enabled == TRUE) {
13371 		sd_pm_exit(un);
13372 	}
13373 
13374 	SD_NEXT_IODONE(index, un, bp);
13375 
13376 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
13377 }
13378 
13379 
13380 /*
13381  *    Function: sd_core_iostart
13382  *
13383  * Description: Primary driver function for enqueuing buf(9S) structs from
13384  *		the system and initiating IO to the target device
13385  *
13386  *     Context: Kernel thread context. Can sleep.
13387  *
13388  * Assumptions:  - The given xp->xb_blkno is absolute
13389  *		   (ie, relative to the start of the device).
13390  *		 - The IO is to be done using the native blocksize of
13391  *		   the device, as specified in un->un_tgt_blocksize.
13392  */
13393 /* ARGSUSED */
13394 static void
13395 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
13396 {
13397 	struct sd_xbuf *xp;
13398 
13399 	ASSERT(un != NULL);
13400 	ASSERT(bp != NULL);
13401 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13402 	ASSERT(bp->b_resid == 0);
13403 
13404 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
13405 
13406 	xp = SD_GET_XBUF(bp);
13407 	ASSERT(xp != NULL);
13408 
13409 	mutex_enter(SD_MUTEX(un));
13410 
13411 	/*
13412 	 * If we are currently in the failfast state, fail any new IO
13413 	 * that has B_FAILFAST set, then return.
13414 	 */
13415 	if ((bp->b_flags & B_FAILFAST) &&
13416 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
13417 		mutex_exit(SD_MUTEX(un));
13418 		bioerror(bp, EIO);
13419 		bp->b_resid = bp->b_bcount;
13420 		SD_BEGIN_IODONE(index, un, bp);
13421 		return;
13422 	}
13423 
13424 	if (SD_IS_DIRECT_PRIORITY(xp)) {
13425 		/*
13426 		 * Priority command -- transport it immediately.
13427 		 *
13428 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
13429 		 * because all direct priority commands should be associated
13430 		 * with error recovery actions which we don't want to retry.
13431 		 */
13432 		sd_start_cmds(un, bp);
13433 	} else {
13434 		/*
13435 		 * Normal command -- add it to the wait queue, then start
13436 		 * transporting commands from the wait queue.
13437 		 */
13438 		sd_add_buf_to_waitq(un, bp);
13439 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13440 		sd_start_cmds(un, NULL);
13441 	}
13442 
13443 	mutex_exit(SD_MUTEX(un));
13444 
13445 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
13446 }
13447 
13448 
13449 /*
13450  *    Function: sd_init_cdb_limits
13451  *
13452  * Description: This is to handle scsi_pkt initialization differences
13453  *		between the driver platforms.
13454  *
13455  *		Legacy behaviors:
13456  *
13457  *		If the block number or the sector count exceeds the
13458  *		capabilities of a Group 0 command, shift over to a
13459  *		Group 1 command. We don't blindly use Group 1
13460  *		commands because a) some drives (CDC Wren IVs) get a
13461  *		bit confused, and b) there is probably a fair amount
13462  *		of speed difference for a target to receive and decode
13463  *		a 10 byte command instead of a 6 byte command.
13464  *
13465  *		The xfer time difference of 6 vs 10 byte CDBs is
13466  *		still significant so this code is still worthwhile.
13467  *		10 byte CDBs are very inefficient with the fas HBA driver
13468  *		and older disks. Each CDB byte took 1 usec with some
13469  *		popular disks.
13470  *
13471  *     Context: Must be called at attach time
13472  */
13473 
13474 static void
13475 sd_init_cdb_limits(struct sd_lun *un)
13476 {
13477 	int hba_cdb_limit;
13478 
13479 	/*
13480 	 * Use CDB_GROUP1 commands for most devices except for
13481 	 * parallel SCSI fixed drives in which case we get better
13482 	 * performance using CDB_GROUP0 commands (where applicable).
13483 	 */
13484 	un->un_mincdb = SD_CDB_GROUP1;
13485 #if !defined(__fibre)
13486 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13487 	    !un->un_f_has_removable_media) {
13488 		un->un_mincdb = SD_CDB_GROUP0;
13489 	}
13490 #endif
13491 
13492 	/*
13493 	 * Try to read the max-cdb-length supported by HBA.
13494 	 */
13495 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
13496 	if (0 >= un->un_max_hba_cdb) {
13497 		un->un_max_hba_cdb = CDB_GROUP4;
13498 		hba_cdb_limit = SD_CDB_GROUP4;
13499 	} else if (0 < un->un_max_hba_cdb &&
13500 	    un->un_max_hba_cdb < CDB_GROUP1) {
13501 		hba_cdb_limit = SD_CDB_GROUP0;
13502 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
13503 	    un->un_max_hba_cdb < CDB_GROUP5) {
13504 		hba_cdb_limit = SD_CDB_GROUP1;
13505 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
13506 	    un->un_max_hba_cdb < CDB_GROUP4) {
13507 		hba_cdb_limit = SD_CDB_GROUP5;
13508 	} else {
13509 		hba_cdb_limit = SD_CDB_GROUP4;
13510 	}
13511 
13512 	/*
13513 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13514 	 * commands for fixed disks unless we are building for a 32 bit
13515 	 * kernel.
13516 	 */
13517 #ifdef _LP64
13518 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13519 	    min(hba_cdb_limit, SD_CDB_GROUP4);
13520 #else
13521 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13522 	    min(hba_cdb_limit, SD_CDB_GROUP1);
13523 #endif
13524 
13525 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13526 	    ? sizeof (struct scsi_arq_status) : 1);
13527 	un->un_cmd_timeout = (ushort_t)sd_io_time;
13528 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13529 }
13530 
13531 
13532 /*
13533  *    Function: sd_initpkt_for_buf
13534  *
13535  * Description: Allocate and initialize for transport a scsi_pkt struct,
13536  *		based upon the info specified in the given buf struct.
13537  *
13538  *		Assumes the xb_blkno in the request is absolute (ie,
13539  *		relative to the start of the device (NOT partition!).
13540  *		Also assumes that the request is using the native block
13541  *		size of the device (as returned by the READ CAPACITY
13542  *		command).
13543  *
13544  * Return Code: SD_PKT_ALLOC_SUCCESS
13545  *		SD_PKT_ALLOC_FAILURE
13546  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13547  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13548  *
13549  *     Context: Kernel thread and may be called from software interrupt context
13550  *		as part of a sdrunout callback. This function may not block or
13551  *		call routines that block
13552  */
13553 
13554 static int
13555 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13556 {
13557 	struct sd_xbuf	*xp;
13558 	struct scsi_pkt *pktp = NULL;
13559 	struct sd_lun	*un;
13560 	size_t		blockcount;
13561 	daddr_t		startblock;
13562 	int		rval;
13563 	int		cmd_flags;
13564 
13565 	ASSERT(bp != NULL);
13566 	ASSERT(pktpp != NULL);
13567 	xp = SD_GET_XBUF(bp);
13568 	ASSERT(xp != NULL);
13569 	un = SD_GET_UN(bp);
13570 	ASSERT(un != NULL);
13571 	ASSERT(mutex_owned(SD_MUTEX(un)));
13572 	ASSERT(bp->b_resid == 0);
13573 
13574 	SD_TRACE(SD_LOG_IO_CORE, un,
13575 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13576 
13577 	mutex_exit(SD_MUTEX(un));
13578 
13579 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13580 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13581 		/*
13582 		 * Already have a scsi_pkt -- just need DMA resources.
13583 		 * We must recompute the CDB in case the mapping returns
13584 		 * a nonzero pkt_resid.
13585 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13586 		 * that is being retried, the unmap/remap of the DMA resouces
13587 		 * will result in the entire transfer starting over again
13588 		 * from the very first block.
13589 		 */
13590 		ASSERT(xp->xb_pktp != NULL);
13591 		pktp = xp->xb_pktp;
13592 	} else {
13593 		pktp = NULL;
13594 	}
13595 #endif /* __i386 || __amd64 */
13596 
13597 	startblock = xp->xb_blkno;	/* Absolute block num. */
13598 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13599 
13600 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13601 
13602 	/*
13603 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13604 	 * call scsi_init_pkt, and build the CDB.
13605 	 */
13606 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13607 	    cmd_flags, sdrunout, (caddr_t)un,
13608 	    startblock, blockcount);
13609 
13610 	if (rval == 0) {
13611 		/*
13612 		 * Success.
13613 		 *
13614 		 * If partial DMA is being used and required for this transfer.
13615 		 * set it up here.
13616 		 */
13617 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13618 		    (pktp->pkt_resid != 0)) {
13619 
13620 			/*
13621 			 * Save the CDB length and pkt_resid for the
13622 			 * next xfer
13623 			 */
13624 			xp->xb_dma_resid = pktp->pkt_resid;
13625 
13626 			/* rezero resid */
13627 			pktp->pkt_resid = 0;
13628 
13629 		} else {
13630 			xp->xb_dma_resid = 0;
13631 		}
13632 
13633 		pktp->pkt_flags = un->un_tagflags;
13634 		pktp->pkt_time  = un->un_cmd_timeout;
13635 		pktp->pkt_comp  = sdintr;
13636 
13637 		pktp->pkt_private = bp;
13638 		*pktpp = pktp;
13639 
13640 		SD_TRACE(SD_LOG_IO_CORE, un,
13641 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13642 
13643 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
13644 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13645 #endif
13646 
13647 		mutex_enter(SD_MUTEX(un));
13648 		return (SD_PKT_ALLOC_SUCCESS);
13649 
13650 	}
13651 
13652 	/*
13653 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13654 	 * from sd_setup_rw_pkt.
13655 	 */
13656 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13657 
13658 	if (rval == SD_PKT_ALLOC_FAILURE) {
13659 		*pktpp = NULL;
13660 		/*
13661 		 * Set the driver state to RWAIT to indicate the driver
13662 		 * is waiting on resource allocations. The driver will not
13663 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13664 		 */
13665 		mutex_enter(SD_MUTEX(un));
13666 		New_state(un, SD_STATE_RWAIT);
13667 
13668 		SD_ERROR(SD_LOG_IO_CORE, un,
13669 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13670 
13671 		if ((bp->b_flags & B_ERROR) != 0) {
13672 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13673 		}
13674 		return (SD_PKT_ALLOC_FAILURE);
13675 	} else {
13676 		/*
13677 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13678 		 *
13679 		 * This should never happen.  Maybe someone messed with the
13680 		 * kernel's minphys?
13681 		 */
13682 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13683 		    "Request rejected: too large for CDB: "
13684 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13685 		SD_ERROR(SD_LOG_IO_CORE, un,
13686 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13687 		mutex_enter(SD_MUTEX(un));
13688 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13689 
13690 	}
13691 }
13692 
13693 
13694 /*
13695  *    Function: sd_destroypkt_for_buf
13696  *
13697  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13698  *
13699  *     Context: Kernel thread or interrupt context
13700  */
13701 
13702 static void
13703 sd_destroypkt_for_buf(struct buf *bp)
13704 {
13705 	ASSERT(bp != NULL);
13706 	ASSERT(SD_GET_UN(bp) != NULL);
13707 
13708 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13709 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13710 
13711 	ASSERT(SD_GET_PKTP(bp) != NULL);
13712 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13713 
13714 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13715 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13716 }
13717 
13718 /*
13719  *    Function: sd_setup_rw_pkt
13720  *
13721  * Description: Determines appropriate CDB group for the requested LBA
13722  *		and transfer length, calls scsi_init_pkt, and builds
13723  *		the CDB.  Do not use for partial DMA transfers except
13724  *		for the initial transfer since the CDB size must
13725  *		remain constant.
13726  *
13727  *     Context: Kernel thread and may be called from software interrupt
13728  *		context as part of a sdrunout callback. This function may not
13729  *		block or call routines that block
13730  */
13731 
13732 
13733 int
13734 sd_setup_rw_pkt(struct sd_lun *un,
13735     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13736     int (*callback)(caddr_t), caddr_t callback_arg,
13737     diskaddr_t lba, uint32_t blockcount)
13738 {
13739 	struct scsi_pkt *return_pktp;
13740 	union scsi_cdb *cdbp;
13741 	struct sd_cdbinfo *cp = NULL;
13742 	int i;
13743 
13744 	/*
13745 	 * See which size CDB to use, based upon the request.
13746 	 */
13747 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13748 
13749 		/*
13750 		 * Check lba and block count against sd_cdbtab limits.
13751 		 * In the partial DMA case, we have to use the same size
13752 		 * CDB for all the transfers.  Check lba + blockcount
13753 		 * against the max LBA so we know that segment of the
13754 		 * transfer can use the CDB we select.
13755 		 */
13756 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13757 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13758 
13759 			/*
13760 			 * The command will fit into the CDB type
13761 			 * specified by sd_cdbtab[i].
13762 			 */
13763 			cp = sd_cdbtab + i;
13764 
13765 			/*
13766 			 * Call scsi_init_pkt so we can fill in the
13767 			 * CDB.
13768 			 */
13769 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13770 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13771 			    flags, callback, callback_arg);
13772 
13773 			if (return_pktp != NULL) {
13774 
13775 				/*
13776 				 * Return new value of pkt
13777 				 */
13778 				*pktpp = return_pktp;
13779 
13780 				/*
13781 				 * To be safe, zero the CDB insuring there is
13782 				 * no leftover data from a previous command.
13783 				 */
13784 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13785 
13786 				/*
13787 				 * Handle partial DMA mapping
13788 				 */
13789 				if (return_pktp->pkt_resid != 0) {
13790 
13791 					/*
13792 					 * Not going to xfer as many blocks as
13793 					 * originally expected
13794 					 */
13795 					blockcount -=
13796 					    SD_BYTES2TGTBLOCKS(un,
13797 					    return_pktp->pkt_resid);
13798 				}
13799 
13800 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
13801 
13802 				/*
13803 				 * Set command byte based on the CDB
13804 				 * type we matched.
13805 				 */
13806 				cdbp->scc_cmd = cp->sc_grpmask |
13807 				    ((bp->b_flags & B_READ) ?
13808 				    SCMD_READ : SCMD_WRITE);
13809 
13810 				SD_FILL_SCSI1_LUN(un, return_pktp);
13811 
13812 				/*
13813 				 * Fill in LBA and length
13814 				 */
13815 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
13816 				    (cp->sc_grpcode == CDB_GROUP4) ||
13817 				    (cp->sc_grpcode == CDB_GROUP0) ||
13818 				    (cp->sc_grpcode == CDB_GROUP5));
13819 
13820 				if (cp->sc_grpcode == CDB_GROUP1) {
13821 					FORMG1ADDR(cdbp, lba);
13822 					FORMG1COUNT(cdbp, blockcount);
13823 					return (0);
13824 				} else if (cp->sc_grpcode == CDB_GROUP4) {
13825 					FORMG4LONGADDR(cdbp, lba);
13826 					FORMG4COUNT(cdbp, blockcount);
13827 					return (0);
13828 				} else if (cp->sc_grpcode == CDB_GROUP0) {
13829 					FORMG0ADDR(cdbp, lba);
13830 					FORMG0COUNT(cdbp, blockcount);
13831 					return (0);
13832 				} else if (cp->sc_grpcode == CDB_GROUP5) {
13833 					FORMG5ADDR(cdbp, lba);
13834 					FORMG5COUNT(cdbp, blockcount);
13835 					return (0);
13836 				}
13837 
13838 				/*
13839 				 * It should be impossible to not match one
13840 				 * of the CDB types above, so we should never
13841 				 * reach this point.  Set the CDB command byte
13842 				 * to test-unit-ready to avoid writing
13843 				 * to somewhere we don't intend.
13844 				 */
13845 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
13846 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13847 			} else {
13848 				/*
13849 				 * Couldn't get scsi_pkt
13850 				 */
13851 				return (SD_PKT_ALLOC_FAILURE);
13852 			}
13853 		}
13854 	}
13855 
13856 	/*
13857 	 * None of the available CDB types were suitable.  This really
13858 	 * should never happen:  on a 64 bit system we support
13859 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
13860 	 * and on a 32 bit system we will refuse to bind to a device
13861 	 * larger than 2TB so addresses will never be larger than 32 bits.
13862 	 */
13863 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13864 }
13865 
13866 /*
13867  *    Function: sd_setup_next_rw_pkt
13868  *
13869  * Description: Setup packet for partial DMA transfers, except for the
13870  * 		initial transfer.  sd_setup_rw_pkt should be used for
13871  *		the initial transfer.
13872  *
13873  *     Context: Kernel thread and may be called from interrupt context.
13874  */
13875 
13876 int
13877 sd_setup_next_rw_pkt(struct sd_lun *un,
13878     struct scsi_pkt *pktp, struct buf *bp,
13879     diskaddr_t lba, uint32_t blockcount)
13880 {
13881 	uchar_t com;
13882 	union scsi_cdb *cdbp;
13883 	uchar_t cdb_group_id;
13884 
13885 	ASSERT(pktp != NULL);
13886 	ASSERT(pktp->pkt_cdbp != NULL);
13887 
13888 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
13889 	com = cdbp->scc_cmd;
13890 	cdb_group_id = CDB_GROUPID(com);
13891 
13892 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
13893 	    (cdb_group_id == CDB_GROUPID_1) ||
13894 	    (cdb_group_id == CDB_GROUPID_4) ||
13895 	    (cdb_group_id == CDB_GROUPID_5));
13896 
13897 	/*
13898 	 * Move pkt to the next portion of the xfer.
13899 	 * func is NULL_FUNC so we do not have to release
13900 	 * the disk mutex here.
13901 	 */
13902 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
13903 	    NULL_FUNC, NULL) == pktp) {
13904 		/* Success.  Handle partial DMA */
13905 		if (pktp->pkt_resid != 0) {
13906 			blockcount -=
13907 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
13908 		}
13909 
13910 		cdbp->scc_cmd = com;
13911 		SD_FILL_SCSI1_LUN(un, pktp);
13912 		if (cdb_group_id == CDB_GROUPID_1) {
13913 			FORMG1ADDR(cdbp, lba);
13914 			FORMG1COUNT(cdbp, blockcount);
13915 			return (0);
13916 		} else if (cdb_group_id == CDB_GROUPID_4) {
13917 			FORMG4LONGADDR(cdbp, lba);
13918 			FORMG4COUNT(cdbp, blockcount);
13919 			return (0);
13920 		} else if (cdb_group_id == CDB_GROUPID_0) {
13921 			FORMG0ADDR(cdbp, lba);
13922 			FORMG0COUNT(cdbp, blockcount);
13923 			return (0);
13924 		} else if (cdb_group_id == CDB_GROUPID_5) {
13925 			FORMG5ADDR(cdbp, lba);
13926 			FORMG5COUNT(cdbp, blockcount);
13927 			return (0);
13928 		}
13929 
13930 		/* Unreachable */
13931 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13932 	}
13933 
13934 	/*
13935 	 * Error setting up next portion of cmd transfer.
13936 	 * Something is definitely very wrong and this
13937 	 * should not happen.
13938 	 */
13939 	return (SD_PKT_ALLOC_FAILURE);
13940 }
13941 
13942 /*
13943  *    Function: sd_initpkt_for_uscsi
13944  *
13945  * Description: Allocate and initialize for transport a scsi_pkt struct,
13946  *		based upon the info specified in the given uscsi_cmd struct.
13947  *
13948  * Return Code: SD_PKT_ALLOC_SUCCESS
13949  *		SD_PKT_ALLOC_FAILURE
13950  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13951  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13952  *
13953  *     Context: Kernel thread and may be called from software interrupt context
13954  *		as part of a sdrunout callback. This function may not block or
13955  *		call routines that block
13956  */
13957 
13958 static int
13959 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
13960 {
13961 	struct uscsi_cmd *uscmd;
13962 	struct sd_xbuf	*xp;
13963 	struct scsi_pkt	*pktp;
13964 	struct sd_lun	*un;
13965 	uint32_t	flags = 0;
13966 
13967 	ASSERT(bp != NULL);
13968 	ASSERT(pktpp != NULL);
13969 	xp = SD_GET_XBUF(bp);
13970 	ASSERT(xp != NULL);
13971 	un = SD_GET_UN(bp);
13972 	ASSERT(un != NULL);
13973 	ASSERT(mutex_owned(SD_MUTEX(un)));
13974 
13975 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
13976 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
13977 	ASSERT(uscmd != NULL);
13978 
13979 	SD_TRACE(SD_LOG_IO_CORE, un,
13980 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
13981 
13982 	/*
13983 	 * Allocate the scsi_pkt for the command.
13984 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
13985 	 *	 during scsi_init_pkt time and will continue to use the
13986 	 *	 same path as long as the same scsi_pkt is used without
13987 	 *	 intervening scsi_dma_free(). Since uscsi command does
13988 	 *	 not call scsi_dmafree() before retry failed command, it
13989 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
13990 	 *	 set such that scsi_vhci can use other available path for
13991 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
13992 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
13993 	 */
13994 	if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
13995 		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
13996 		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
13997 		    ((int)(uscmd->uscsi_rqlen) + sizeof (struct scsi_arq_status)
13998 		    - sizeof (struct scsi_extended_sense)), 0,
13999 		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL) | PKT_XARQ,
14000 		    sdrunout, (caddr_t)un);
14001 	} else {
14002 		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
14003 		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
14004 		    sizeof (struct scsi_arq_status), 0,
14005 		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
14006 		    sdrunout, (caddr_t)un);
14007 	}
14008 
14009 	if (pktp == NULL) {
14010 		*pktpp = NULL;
14011 		/*
14012 		 * Set the driver state to RWAIT to indicate the driver
14013 		 * is waiting on resource allocations. The driver will not
14014 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
14015 		 */
14016 		New_state(un, SD_STATE_RWAIT);
14017 
14018 		SD_ERROR(SD_LOG_IO_CORE, un,
14019 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
14020 
14021 		if ((bp->b_flags & B_ERROR) != 0) {
14022 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
14023 		}
14024 		return (SD_PKT_ALLOC_FAILURE);
14025 	}
14026 
14027 	/*
14028 	 * We do not do DMA breakup for USCSI commands, so return failure
14029 	 * here if all the needed DMA resources were not allocated.
14030 	 */
14031 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
14032 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
14033 		scsi_destroy_pkt(pktp);
14034 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
14035 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
14036 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
14037 	}
14038 
14039 	/* Init the cdb from the given uscsi struct */
14040 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
14041 	    uscmd->uscsi_cdb[0], 0, 0, 0);
14042 
14043 	SD_FILL_SCSI1_LUN(un, pktp);
14044 
14045 	/*
14046 	 * Set up the optional USCSI flags. See the uscsi (7I) man page
14047 	 * for listing of the supported flags.
14048 	 */
14049 
14050 	if (uscmd->uscsi_flags & USCSI_SILENT) {
14051 		flags |= FLAG_SILENT;
14052 	}
14053 
14054 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
14055 		flags |= FLAG_DIAGNOSE;
14056 	}
14057 
14058 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
14059 		flags |= FLAG_ISOLATE;
14060 	}
14061 
14062 	if (un->un_f_is_fibre == FALSE) {
14063 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
14064 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
14065 		}
14066 	}
14067 
14068 	/*
14069 	 * Set the pkt flags here so we save time later.
14070 	 * Note: These flags are NOT in the uscsi man page!!!
14071 	 */
14072 	if (uscmd->uscsi_flags & USCSI_HEAD) {
14073 		flags |= FLAG_HEAD;
14074 	}
14075 
14076 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
14077 		flags |= FLAG_NOINTR;
14078 	}
14079 
14080 	/*
14081 	 * For tagged queueing, things get a bit complicated.
14082 	 * Check first for head of queue and last for ordered queue.
14083 	 * If neither head nor order, use the default driver tag flags.
14084 	 */
14085 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
14086 		if (uscmd->uscsi_flags & USCSI_HTAG) {
14087 			flags |= FLAG_HTAG;
14088 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
14089 			flags |= FLAG_OTAG;
14090 		} else {
14091 			flags |= un->un_tagflags & FLAG_TAGMASK;
14092 		}
14093 	}
14094 
14095 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
14096 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
14097 	}
14098 
14099 	pktp->pkt_flags = flags;
14100 
14101 	/* Transfer uscsi information to scsi_pkt */
14102 	(void) scsi_uscsi_pktinit(uscmd, pktp);
14103 
14104 	/* Copy the caller's CDB into the pkt... */
14105 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
14106 
14107 	if (uscmd->uscsi_timeout == 0) {
14108 		pktp->pkt_time = un->un_uscsi_timeout;
14109 	} else {
14110 		pktp->pkt_time = uscmd->uscsi_timeout;
14111 	}
14112 
14113 	/* need it later to identify USCSI request in sdintr */
14114 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
14115 
14116 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
14117 
14118 	pktp->pkt_private = bp;
14119 	pktp->pkt_comp = sdintr;
14120 	*pktpp = pktp;
14121 
14122 	SD_TRACE(SD_LOG_IO_CORE, un,
14123 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
14124 
14125 	return (SD_PKT_ALLOC_SUCCESS);
14126 }
14127 
14128 
14129 /*
14130  *    Function: sd_destroypkt_for_uscsi
14131  *
14132  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
14133  *		IOs.. Also saves relevant info into the associated uscsi_cmd
14134  *		struct.
14135  *
14136  *     Context: May be called under interrupt context
14137  */
14138 
14139 static void
14140 sd_destroypkt_for_uscsi(struct buf *bp)
14141 {
14142 	struct uscsi_cmd *uscmd;
14143 	struct sd_xbuf	*xp;
14144 	struct scsi_pkt	*pktp;
14145 	struct sd_lun	*un;
14146 	struct sd_uscsi_info *suip;
14147 
14148 	ASSERT(bp != NULL);
14149 	xp = SD_GET_XBUF(bp);
14150 	ASSERT(xp != NULL);
14151 	un = SD_GET_UN(bp);
14152 	ASSERT(un != NULL);
14153 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14154 	pktp = SD_GET_PKTP(bp);
14155 	ASSERT(pktp != NULL);
14156 
14157 	SD_TRACE(SD_LOG_IO_CORE, un,
14158 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
14159 
14160 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
14161 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
14162 	ASSERT(uscmd != NULL);
14163 
14164 	/* Save the status and the residual into the uscsi_cmd struct */
14165 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
14166 	uscmd->uscsi_resid  = bp->b_resid;
14167 
14168 	/* Transfer scsi_pkt information to uscsi */
14169 	(void) scsi_uscsi_pktfini(pktp, uscmd);
14170 
14171 	/*
14172 	 * If enabled, copy any saved sense data into the area specified
14173 	 * by the uscsi command.
14174 	 */
14175 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
14176 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
14177 		/*
14178 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
14179 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
14180 		 */
14181 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
14182 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
14183 		if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
14184 			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
14185 			    MAX_SENSE_LENGTH);
14186 		} else {
14187 			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
14188 			    SENSE_LENGTH);
14189 		}
14190 	}
14191 	/*
14192 	 * The following assignments are for SCSI FMA.
14193 	 */
14194 	ASSERT(xp->xb_private != NULL);
14195 	suip = (struct sd_uscsi_info *)xp->xb_private;
14196 	suip->ui_pkt_reason = pktp->pkt_reason;
14197 	suip->ui_pkt_state = pktp->pkt_state;
14198 	suip->ui_pkt_statistics = pktp->pkt_statistics;
14199 	suip->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
14200 
14201 	/* We are done with the scsi_pkt; free it now */
14202 	ASSERT(SD_GET_PKTP(bp) != NULL);
14203 	scsi_destroy_pkt(SD_GET_PKTP(bp));
14204 
14205 	SD_TRACE(SD_LOG_IO_CORE, un,
14206 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
14207 }
14208 
14209 
14210 /*
14211  *    Function: sd_bioclone_alloc
14212  *
14213  * Description: Allocate a buf(9S) and init it as per the given buf
14214  *		and the various arguments.  The associated sd_xbuf
14215  *		struct is (nearly) duplicated.  The struct buf *bp
14216  *		argument is saved in new_xp->xb_private.
14217  *
14218  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14219  *		datalen - size of data area for the shadow bp
14220  *		blkno - starting LBA
14221  *		func - function pointer for b_iodone in the shadow buf. (May
14222  *			be NULL if none.)
14223  *
14224  * Return Code: Pointer to allocates buf(9S) struct
14225  *
14226  *     Context: Can sleep.
14227  */
14228 
14229 static struct buf *
14230 sd_bioclone_alloc(struct buf *bp, size_t datalen,
14231 	daddr_t blkno, int (*func)(struct buf *))
14232 {
14233 	struct	sd_lun	*un;
14234 	struct	sd_xbuf	*xp;
14235 	struct	sd_xbuf	*new_xp;
14236 	struct	buf	*new_bp;
14237 
14238 	ASSERT(bp != NULL);
14239 	xp = SD_GET_XBUF(bp);
14240 	ASSERT(xp != NULL);
14241 	un = SD_GET_UN(bp);
14242 	ASSERT(un != NULL);
14243 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14244 
14245 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
14246 	    NULL, KM_SLEEP);
14247 
14248 	new_bp->b_lblkno	= blkno;
14249 
14250 	/*
14251 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14252 	 * original xbuf into it.
14253 	 */
14254 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14255 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14256 
14257 	/*
14258 	 * The given bp is automatically saved in the xb_private member
14259 	 * of the new xbuf.  Callers are allowed to depend on this.
14260 	 */
14261 	new_xp->xb_private = bp;
14262 
14263 	new_bp->b_private  = new_xp;
14264 
14265 	return (new_bp);
14266 }
14267 
14268 /*
14269  *    Function: sd_shadow_buf_alloc
14270  *
14271  * Description: Allocate a buf(9S) and init it as per the given buf
14272  *		and the various arguments.  The associated sd_xbuf
14273  *		struct is (nearly) duplicated.  The struct buf *bp
14274  *		argument is saved in new_xp->xb_private.
14275  *
14276  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14277  *		datalen - size of data area for the shadow bp
14278  *		bflags - B_READ or B_WRITE (pseudo flag)
14279  *		blkno - starting LBA
14280  *		func - function pointer for b_iodone in the shadow buf. (May
14281  *			be NULL if none.)
14282  *
14283  * Return Code: Pointer to allocates buf(9S) struct
14284  *
14285  *     Context: Can sleep.
14286  */
14287 
14288 static struct buf *
14289 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
14290 	daddr_t blkno, int (*func)(struct buf *))
14291 {
14292 	struct	sd_lun	*un;
14293 	struct	sd_xbuf	*xp;
14294 	struct	sd_xbuf	*new_xp;
14295 	struct	buf	*new_bp;
14296 
14297 	ASSERT(bp != NULL);
14298 	xp = SD_GET_XBUF(bp);
14299 	ASSERT(xp != NULL);
14300 	un = SD_GET_UN(bp);
14301 	ASSERT(un != NULL);
14302 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14303 
14304 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
14305 		bp_mapin(bp);
14306 	}
14307 
14308 	bflags &= (B_READ | B_WRITE);
14309 #if defined(__i386) || defined(__amd64)
14310 	new_bp = getrbuf(KM_SLEEP);
14311 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
14312 	new_bp->b_bcount = datalen;
14313 	new_bp->b_flags = bflags |
14314 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
14315 #else
14316 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
14317 	    datalen, bflags, SLEEP_FUNC, NULL);
14318 #endif
14319 	new_bp->av_forw	= NULL;
14320 	new_bp->av_back	= NULL;
14321 	new_bp->b_dev	= bp->b_dev;
14322 	new_bp->b_blkno	= blkno;
14323 	new_bp->b_iodone = func;
14324 	new_bp->b_edev	= bp->b_edev;
14325 	new_bp->b_resid	= 0;
14326 
14327 	/* We need to preserve the B_FAILFAST flag */
14328 	if (bp->b_flags & B_FAILFAST) {
14329 		new_bp->b_flags |= B_FAILFAST;
14330 	}
14331 
14332 	/*
14333 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14334 	 * original xbuf into it.
14335 	 */
14336 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14337 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14338 
14339 	/* Need later to copy data between the shadow buf & original buf! */
14340 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
14341 
14342 	/*
14343 	 * The given bp is automatically saved in the xb_private member
14344 	 * of the new xbuf.  Callers are allowed to depend on this.
14345 	 */
14346 	new_xp->xb_private = bp;
14347 
14348 	new_bp->b_private  = new_xp;
14349 
14350 	return (new_bp);
14351 }
14352 
14353 /*
14354  *    Function: sd_bioclone_free
14355  *
14356  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
14357  *		in the larger than partition operation.
14358  *
14359  *     Context: May be called under interrupt context
14360  */
14361 
14362 static void
14363 sd_bioclone_free(struct buf *bp)
14364 {
14365 	struct sd_xbuf	*xp;
14366 
14367 	ASSERT(bp != NULL);
14368 	xp = SD_GET_XBUF(bp);
14369 	ASSERT(xp != NULL);
14370 
14371 	/*
14372 	 * Call bp_mapout() before freeing the buf,  in case a lower
14373 	 * layer or HBA  had done a bp_mapin().  we must do this here
14374 	 * as we are the "originator" of the shadow buf.
14375 	 */
14376 	bp_mapout(bp);
14377 
14378 	/*
14379 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14380 	 * never gets confused by a stale value in this field. (Just a little
14381 	 * extra defensiveness here.)
14382 	 */
14383 	bp->b_iodone = NULL;
14384 
14385 	freerbuf(bp);
14386 
14387 	kmem_free(xp, sizeof (struct sd_xbuf));
14388 }
14389 
14390 /*
14391  *    Function: sd_shadow_buf_free
14392  *
14393  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
14394  *
14395  *     Context: May be called under interrupt context
14396  */
14397 
14398 static void
14399 sd_shadow_buf_free(struct buf *bp)
14400 {
14401 	struct sd_xbuf	*xp;
14402 
14403 	ASSERT(bp != NULL);
14404 	xp = SD_GET_XBUF(bp);
14405 	ASSERT(xp != NULL);
14406 
14407 #if defined(__sparc)
14408 	/*
14409 	 * Call bp_mapout() before freeing the buf,  in case a lower
14410 	 * layer or HBA  had done a bp_mapin().  we must do this here
14411 	 * as we are the "originator" of the shadow buf.
14412 	 */
14413 	bp_mapout(bp);
14414 #endif
14415 
14416 	/*
14417 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14418 	 * never gets confused by a stale value in this field. (Just a little
14419 	 * extra defensiveness here.)
14420 	 */
14421 	bp->b_iodone = NULL;
14422 
14423 #if defined(__i386) || defined(__amd64)
14424 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
14425 	freerbuf(bp);
14426 #else
14427 	scsi_free_consistent_buf(bp);
14428 #endif
14429 
14430 	kmem_free(xp, sizeof (struct sd_xbuf));
14431 }
14432 
14433 
14434 /*
14435  *    Function: sd_print_transport_rejected_message
14436  *
14437  * Description: This implements the ludicrously complex rules for printing
14438  *		a "transport rejected" message.  This is to address the
14439  *		specific problem of having a flood of this error message
14440  *		produced when a failover occurs.
14441  *
14442  *     Context: Any.
14443  */
14444 
14445 static void
14446 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
14447 	int code)
14448 {
14449 	ASSERT(un != NULL);
14450 	ASSERT(mutex_owned(SD_MUTEX(un)));
14451 	ASSERT(xp != NULL);
14452 
14453 	/*
14454 	 * Print the "transport rejected" message under the following
14455 	 * conditions:
14456 	 *
14457 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
14458 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
14459 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
14460 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
14461 	 *   scsi_transport(9F) (which indicates that the target might have
14462 	 *   gone off-line).  This uses the un->un_tran_fatal_count
14463 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
14464 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
14465 	 *   from scsi_transport().
14466 	 *
14467 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
14468 	 * the preceeding cases in order for the message to be printed.
14469 	 */
14470 	if (((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) &&
14471 	    (SD_FM_LOG(un) == SD_FM_LOG_NSUP)) {
14472 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
14473 		    (code != TRAN_FATAL_ERROR) ||
14474 		    (un->un_tran_fatal_count == 1)) {
14475 			switch (code) {
14476 			case TRAN_BADPKT:
14477 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14478 				    "transport rejected bad packet\n");
14479 				break;
14480 			case TRAN_FATAL_ERROR:
14481 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14482 				    "transport rejected fatal error\n");
14483 				break;
14484 			default:
14485 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14486 				    "transport rejected (%d)\n", code);
14487 				break;
14488 			}
14489 		}
14490 	}
14491 }
14492 
14493 
14494 /*
14495  *    Function: sd_add_buf_to_waitq
14496  *
14497  * Description: Add the given buf(9S) struct to the wait queue for the
14498  *		instance.  If sorting is enabled, then the buf is added
14499  *		to the queue via an elevator sort algorithm (a la
14500  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
14501  *		If sorting is not enabled, then the buf is just added
14502  *		to the end of the wait queue.
14503  *
14504  * Return Code: void
14505  *
14506  *     Context: Does not sleep/block, therefore technically can be called
14507  *		from any context.  However if sorting is enabled then the
14508  *		execution time is indeterminate, and may take long if
14509  *		the wait queue grows large.
14510  */
14511 
14512 static void
14513 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14514 {
14515 	struct buf *ap;
14516 
14517 	ASSERT(bp != NULL);
14518 	ASSERT(un != NULL);
14519 	ASSERT(mutex_owned(SD_MUTEX(un)));
14520 
14521 	/* If the queue is empty, add the buf as the only entry & return. */
14522 	if (un->un_waitq_headp == NULL) {
14523 		ASSERT(un->un_waitq_tailp == NULL);
14524 		un->un_waitq_headp = un->un_waitq_tailp = bp;
14525 		bp->av_forw = NULL;
14526 		return;
14527 	}
14528 
14529 	ASSERT(un->un_waitq_tailp != NULL);
14530 
14531 	/*
14532 	 * If sorting is disabled, just add the buf to the tail end of
14533 	 * the wait queue and return.
14534 	 */
14535 	if (un->un_f_disksort_disabled) {
14536 		un->un_waitq_tailp->av_forw = bp;
14537 		un->un_waitq_tailp = bp;
14538 		bp->av_forw = NULL;
14539 		return;
14540 	}
14541 
14542 	/*
14543 	 * Sort thru the list of requests currently on the wait queue
14544 	 * and add the new buf request at the appropriate position.
14545 	 *
14546 	 * The un->un_waitq_headp is an activity chain pointer on which
14547 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14548 	 * first queue holds those requests which are positioned after
14549 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14550 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14551 	 * Thus we implement a one way scan, retracting after reaching
14552 	 * the end of the drive to the first request on the second
14553 	 * queue, at which time it becomes the first queue.
14554 	 * A one-way scan is natural because of the way UNIX read-ahead
14555 	 * blocks are allocated.
14556 	 *
14557 	 * If we lie after the first request, then we must locate the
14558 	 * second request list and add ourselves to it.
14559 	 */
14560 	ap = un->un_waitq_headp;
14561 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14562 		while (ap->av_forw != NULL) {
14563 			/*
14564 			 * Look for an "inversion" in the (normally
14565 			 * ascending) block numbers. This indicates
14566 			 * the start of the second request list.
14567 			 */
14568 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14569 				/*
14570 				 * Search the second request list for the
14571 				 * first request at a larger block number.
14572 				 * We go before that; however if there is
14573 				 * no such request, we go at the end.
14574 				 */
14575 				do {
14576 					if (SD_GET_BLKNO(bp) <
14577 					    SD_GET_BLKNO(ap->av_forw)) {
14578 						goto insert;
14579 					}
14580 					ap = ap->av_forw;
14581 				} while (ap->av_forw != NULL);
14582 				goto insert;		/* after last */
14583 			}
14584 			ap = ap->av_forw;
14585 		}
14586 
14587 		/*
14588 		 * No inversions... we will go after the last, and
14589 		 * be the first request in the second request list.
14590 		 */
14591 		goto insert;
14592 	}
14593 
14594 	/*
14595 	 * Request is at/after the current request...
14596 	 * sort in the first request list.
14597 	 */
14598 	while (ap->av_forw != NULL) {
14599 		/*
14600 		 * We want to go after the current request (1) if
14601 		 * there is an inversion after it (i.e. it is the end
14602 		 * of the first request list), or (2) if the next
14603 		 * request is a larger block no. than our request.
14604 		 */
14605 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14606 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14607 			goto insert;
14608 		}
14609 		ap = ap->av_forw;
14610 	}
14611 
14612 	/*
14613 	 * Neither a second list nor a larger request, therefore
14614 	 * we go at the end of the first list (which is the same
14615 	 * as the end of the whole schebang).
14616 	 */
14617 insert:
14618 	bp->av_forw = ap->av_forw;
14619 	ap->av_forw = bp;
14620 
14621 	/*
14622 	 * If we inserted onto the tail end of the waitq, make sure the
14623 	 * tail pointer is updated.
14624 	 */
14625 	if (ap == un->un_waitq_tailp) {
14626 		un->un_waitq_tailp = bp;
14627 	}
14628 }
14629 
14630 
14631 /*
14632  *    Function: sd_start_cmds
14633  *
14634  * Description: Remove and transport cmds from the driver queues.
14635  *
14636  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14637  *
14638  *		immed_bp - ptr to a buf to be transported immediately. Only
14639  *		the immed_bp is transported; bufs on the waitq are not
14640  *		processed and the un_retry_bp is not checked.  If immed_bp is
14641  *		NULL, then normal queue processing is performed.
14642  *
14643  *     Context: May be called from kernel thread context, interrupt context,
14644  *		or runout callback context. This function may not block or
14645  *		call routines that block.
14646  */
14647 
14648 static void
14649 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14650 {
14651 	struct	sd_xbuf	*xp;
14652 	struct	buf	*bp;
14653 	void	(*statp)(kstat_io_t *);
14654 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14655 	void	(*saved_statp)(kstat_io_t *);
14656 #endif
14657 	int	rval;
14658 	struct sd_fm_internal *sfip = NULL;
14659 
14660 	ASSERT(un != NULL);
14661 	ASSERT(mutex_owned(SD_MUTEX(un)));
14662 	ASSERT(un->un_ncmds_in_transport >= 0);
14663 	ASSERT(un->un_throttle >= 0);
14664 
14665 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14666 
14667 	do {
14668 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14669 		saved_statp = NULL;
14670 #endif
14671 
14672 		/*
14673 		 * If we are syncing or dumping, fail the command to
14674 		 * avoid recursively calling back into scsi_transport().
14675 		 * The dump I/O itself uses a separate code path so this
14676 		 * only prevents non-dump I/O from being sent while dumping.
14677 		 * File system sync takes place before dumping begins.
14678 		 * During panic, filesystem I/O is allowed provided
14679 		 * un_in_callback is <= 1.  This is to prevent recursion
14680 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14681 		 * sd_start_cmds and so on.  See panic.c for more information
14682 		 * about the states the system can be in during panic.
14683 		 */
14684 		if ((un->un_state == SD_STATE_DUMPING) ||
14685 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14686 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14687 			    "sd_start_cmds: panicking\n");
14688 			goto exit;
14689 		}
14690 
14691 		if ((bp = immed_bp) != NULL) {
14692 			/*
14693 			 * We have a bp that must be transported immediately.
14694 			 * It's OK to transport the immed_bp here without doing
14695 			 * the throttle limit check because the immed_bp is
14696 			 * always used in a retry/recovery case. This means
14697 			 * that we know we are not at the throttle limit by
14698 			 * virtue of the fact that to get here we must have
14699 			 * already gotten a command back via sdintr(). This also
14700 			 * relies on (1) the command on un_retry_bp preventing
14701 			 * further commands from the waitq from being issued;
14702 			 * and (2) the code in sd_retry_command checking the
14703 			 * throttle limit before issuing a delayed or immediate
14704 			 * retry. This holds even if the throttle limit is
14705 			 * currently ratcheted down from its maximum value.
14706 			 */
14707 			statp = kstat_runq_enter;
14708 			if (bp == un->un_retry_bp) {
14709 				ASSERT((un->un_retry_statp == NULL) ||
14710 				    (un->un_retry_statp == kstat_waitq_enter) ||
14711 				    (un->un_retry_statp ==
14712 				    kstat_runq_back_to_waitq));
14713 				/*
14714 				 * If the waitq kstat was incremented when
14715 				 * sd_set_retry_bp() queued this bp for a retry,
14716 				 * then we must set up statp so that the waitq
14717 				 * count will get decremented correctly below.
14718 				 * Also we must clear un->un_retry_statp to
14719 				 * ensure that we do not act on a stale value
14720 				 * in this field.
14721 				 */
14722 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14723 				    (un->un_retry_statp ==
14724 				    kstat_runq_back_to_waitq)) {
14725 					statp = kstat_waitq_to_runq;
14726 				}
14727 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14728 				saved_statp = un->un_retry_statp;
14729 #endif
14730 				un->un_retry_statp = NULL;
14731 
14732 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14733 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14734 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14735 				    un, un->un_retry_bp, un->un_throttle,
14736 				    un->un_ncmds_in_transport);
14737 			} else {
14738 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14739 				    "processing priority bp:0x%p\n", bp);
14740 			}
14741 
14742 		} else if ((bp = un->un_waitq_headp) != NULL) {
14743 			/*
14744 			 * A command on the waitq is ready to go, but do not
14745 			 * send it if:
14746 			 *
14747 			 * (1) the throttle limit has been reached, or
14748 			 * (2) a retry is pending, or
14749 			 * (3) a START_STOP_UNIT callback pending, or
14750 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14751 			 *	command is pending.
14752 			 *
14753 			 * For all of these conditions, IO processing will
14754 			 * restart after the condition is cleared.
14755 			 */
14756 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14757 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14758 				    "sd_start_cmds: exiting, "
14759 				    "throttle limit reached!\n");
14760 				goto exit;
14761 			}
14762 			if (un->un_retry_bp != NULL) {
14763 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14764 				    "sd_start_cmds: exiting, retry pending!\n");
14765 				goto exit;
14766 			}
14767 			if (un->un_startstop_timeid != NULL) {
14768 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14769 				    "sd_start_cmds: exiting, "
14770 				    "START_STOP pending!\n");
14771 				goto exit;
14772 			}
14773 			if (un->un_direct_priority_timeid != NULL) {
14774 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14775 				    "sd_start_cmds: exiting, "
14776 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14777 				goto exit;
14778 			}
14779 
14780 			/* Dequeue the command */
14781 			un->un_waitq_headp = bp->av_forw;
14782 			if (un->un_waitq_headp == NULL) {
14783 				un->un_waitq_tailp = NULL;
14784 			}
14785 			bp->av_forw = NULL;
14786 			statp = kstat_waitq_to_runq;
14787 			SD_TRACE(SD_LOG_IO_CORE, un,
14788 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
14789 
14790 		} else {
14791 			/* No work to do so bail out now */
14792 			SD_TRACE(SD_LOG_IO_CORE, un,
14793 			    "sd_start_cmds: no more work, exiting!\n");
14794 			goto exit;
14795 		}
14796 
14797 		/*
14798 		 * Reset the state to normal. This is the mechanism by which
14799 		 * the state transitions from either SD_STATE_RWAIT or
14800 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
14801 		 * If state is SD_STATE_PM_CHANGING then this command is
14802 		 * part of the device power control and the state must
14803 		 * not be put back to normal. Doing so would would
14804 		 * allow new commands to proceed when they shouldn't,
14805 		 * the device may be going off.
14806 		 */
14807 		if ((un->un_state != SD_STATE_SUSPENDED) &&
14808 		    (un->un_state != SD_STATE_PM_CHANGING)) {
14809 			New_state(un, SD_STATE_NORMAL);
14810 		}
14811 
14812 		xp = SD_GET_XBUF(bp);
14813 		ASSERT(xp != NULL);
14814 
14815 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14816 		/*
14817 		 * Allocate the scsi_pkt if we need one, or attach DMA
14818 		 * resources if we have a scsi_pkt that needs them. The
14819 		 * latter should only occur for commands that are being
14820 		 * retried.
14821 		 */
14822 		if ((xp->xb_pktp == NULL) ||
14823 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
14824 #else
14825 		if (xp->xb_pktp == NULL) {
14826 #endif
14827 			/*
14828 			 * There is no scsi_pkt allocated for this buf. Call
14829 			 * the initpkt function to allocate & init one.
14830 			 *
14831 			 * The scsi_init_pkt runout callback functionality is
14832 			 * implemented as follows:
14833 			 *
14834 			 * 1) The initpkt function always calls
14835 			 *    scsi_init_pkt(9F) with sdrunout specified as the
14836 			 *    callback routine.
14837 			 * 2) A successful packet allocation is initialized and
14838 			 *    the I/O is transported.
14839 			 * 3) The I/O associated with an allocation resource
14840 			 *    failure is left on its queue to be retried via
14841 			 *    runout or the next I/O.
14842 			 * 4) The I/O associated with a DMA error is removed
14843 			 *    from the queue and failed with EIO. Processing of
14844 			 *    the transport queues is also halted to be
14845 			 *    restarted via runout or the next I/O.
14846 			 * 5) The I/O associated with a CDB size or packet
14847 			 *    size error is removed from the queue and failed
14848 			 *    with EIO. Processing of the transport queues is
14849 			 *    continued.
14850 			 *
14851 			 * Note: there is no interface for canceling a runout
14852 			 * callback. To prevent the driver from detaching or
14853 			 * suspending while a runout is pending the driver
14854 			 * state is set to SD_STATE_RWAIT
14855 			 *
14856 			 * Note: using the scsi_init_pkt callback facility can
14857 			 * result in an I/O request persisting at the head of
14858 			 * the list which cannot be satisfied even after
14859 			 * multiple retries. In the future the driver may
14860 			 * implement some kind of maximum runout count before
14861 			 * failing an I/O.
14862 			 *
14863 			 * Note: the use of funcp below may seem superfluous,
14864 			 * but it helps warlock figure out the correct
14865 			 * initpkt function calls (see [s]sd.wlcmd).
14866 			 */
14867 			struct scsi_pkt	*pktp;
14868 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
14869 
14870 			ASSERT(bp != un->un_rqs_bp);
14871 
14872 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
14873 			switch ((*funcp)(bp, &pktp)) {
14874 			case  SD_PKT_ALLOC_SUCCESS:
14875 				xp->xb_pktp = pktp;
14876 				SD_TRACE(SD_LOG_IO_CORE, un,
14877 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
14878 				    pktp);
14879 				goto got_pkt;
14880 
14881 			case SD_PKT_ALLOC_FAILURE:
14882 				/*
14883 				 * Temporary (hopefully) resource depletion.
14884 				 * Since retries and RQS commands always have a
14885 				 * scsi_pkt allocated, these cases should never
14886 				 * get here. So the only cases this needs to
14887 				 * handle is a bp from the waitq (which we put
14888 				 * back onto the waitq for sdrunout), or a bp
14889 				 * sent as an immed_bp (which we just fail).
14890 				 */
14891 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14892 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
14893 
14894 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
14895 
14896 				if (bp == immed_bp) {
14897 					/*
14898 					 * If SD_XB_DMA_FREED is clear, then
14899 					 * this is a failure to allocate a
14900 					 * scsi_pkt, and we must fail the
14901 					 * command.
14902 					 */
14903 					if ((xp->xb_pkt_flags &
14904 					    SD_XB_DMA_FREED) == 0) {
14905 						break;
14906 					}
14907 
14908 					/*
14909 					 * If this immediate command is NOT our
14910 					 * un_retry_bp, then we must fail it.
14911 					 */
14912 					if (bp != un->un_retry_bp) {
14913 						break;
14914 					}
14915 
14916 					/*
14917 					 * We get here if this cmd is our
14918 					 * un_retry_bp that was DMAFREED, but
14919 					 * scsi_init_pkt() failed to reallocate
14920 					 * DMA resources when we attempted to
14921 					 * retry it. This can happen when an
14922 					 * mpxio failover is in progress, but
14923 					 * we don't want to just fail the
14924 					 * command in this case.
14925 					 *
14926 					 * Use timeout(9F) to restart it after
14927 					 * a 100ms delay.  We don't want to
14928 					 * let sdrunout() restart it, because
14929 					 * sdrunout() is just supposed to start
14930 					 * commands that are sitting on the
14931 					 * wait queue.  The un_retry_bp stays
14932 					 * set until the command completes, but
14933 					 * sdrunout can be called many times
14934 					 * before that happens.  Since sdrunout
14935 					 * cannot tell if the un_retry_bp is
14936 					 * already in the transport, it could
14937 					 * end up calling scsi_transport() for
14938 					 * the un_retry_bp multiple times.
14939 					 *
14940 					 * Also: don't schedule the callback
14941 					 * if some other callback is already
14942 					 * pending.
14943 					 */
14944 					if (un->un_retry_statp == NULL) {
14945 						/*
14946 						 * restore the kstat pointer to
14947 						 * keep kstat counts coherent
14948 						 * when we do retry the command.
14949 						 */
14950 						un->un_retry_statp =
14951 						    saved_statp;
14952 					}
14953 
14954 					if ((un->un_startstop_timeid == NULL) &&
14955 					    (un->un_retry_timeid == NULL) &&
14956 					    (un->un_direct_priority_timeid ==
14957 					    NULL)) {
14958 
14959 						un->un_retry_timeid =
14960 						    timeout(
14961 						    sd_start_retry_command,
14962 						    un, SD_RESTART_TIMEOUT);
14963 					}
14964 					goto exit;
14965 				}
14966 
14967 #else
14968 				if (bp == immed_bp) {
14969 					break;	/* Just fail the command */
14970 				}
14971 #endif
14972 
14973 				/* Add the buf back to the head of the waitq */
14974 				bp->av_forw = un->un_waitq_headp;
14975 				un->un_waitq_headp = bp;
14976 				if (un->un_waitq_tailp == NULL) {
14977 					un->un_waitq_tailp = bp;
14978 				}
14979 				goto exit;
14980 
14981 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
14982 				/*
14983 				 * HBA DMA resource failure. Fail the command
14984 				 * and continue processing of the queues.
14985 				 */
14986 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14987 				    "sd_start_cmds: "
14988 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
14989 				break;
14990 
14991 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
14992 				/*
14993 				 * Note:x86: Partial DMA mapping not supported
14994 				 * for USCSI commands, and all the needed DMA
14995 				 * resources were not allocated.
14996 				 */
14997 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14998 				    "sd_start_cmds: "
14999 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
15000 				break;
15001 
15002 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
15003 				/*
15004 				 * Note:x86: Request cannot fit into CDB based
15005 				 * on lba and len.
15006 				 */
15007 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15008 				    "sd_start_cmds: "
15009 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
15010 				break;
15011 
15012 			default:
15013 				/* Should NEVER get here! */
15014 				panic("scsi_initpkt error");
15015 				/*NOTREACHED*/
15016 			}
15017 
15018 			/*
15019 			 * Fatal error in allocating a scsi_pkt for this buf.
15020 			 * Update kstats & return the buf with an error code.
15021 			 * We must use sd_return_failed_command_no_restart() to
15022 			 * avoid a recursive call back into sd_start_cmds().
15023 			 * However this also means that we must keep processing
15024 			 * the waitq here in order to avoid stalling.
15025 			 */
15026 			if (statp == kstat_waitq_to_runq) {
15027 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
15028 			}
15029 			sd_return_failed_command_no_restart(un, bp, EIO);
15030 			if (bp == immed_bp) {
15031 				/* immed_bp is gone by now, so clear this */
15032 				immed_bp = NULL;
15033 			}
15034 			continue;
15035 		}
15036 got_pkt:
15037 		if (bp == immed_bp) {
15038 			/* goto the head of the class.... */
15039 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15040 		}
15041 
15042 		un->un_ncmds_in_transport++;
15043 		SD_UPDATE_KSTATS(un, statp, bp);
15044 
15045 		/*
15046 		 * Call scsi_transport() to send the command to the target.
15047 		 * According to SCSA architecture, we must drop the mutex here
15048 		 * before calling scsi_transport() in order to avoid deadlock.
15049 		 * Note that the scsi_pkt's completion routine can be executed
15050 		 * (from interrupt context) even before the call to
15051 		 * scsi_transport() returns.
15052 		 */
15053 		SD_TRACE(SD_LOG_IO_CORE, un,
15054 		    "sd_start_cmds: calling scsi_transport()\n");
15055 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
15056 
15057 		mutex_exit(SD_MUTEX(un));
15058 		rval = scsi_transport(xp->xb_pktp);
15059 		mutex_enter(SD_MUTEX(un));
15060 
15061 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15062 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
15063 
15064 		switch (rval) {
15065 		case TRAN_ACCEPT:
15066 			/* Clear this with every pkt accepted by the HBA */
15067 			un->un_tran_fatal_count = 0;
15068 			break;	/* Success; try the next cmd (if any) */
15069 
15070 		case TRAN_BUSY:
15071 			un->un_ncmds_in_transport--;
15072 			ASSERT(un->un_ncmds_in_transport >= 0);
15073 
15074 			/*
15075 			 * Don't retry request sense, the sense data
15076 			 * is lost when another request is sent.
15077 			 * Free up the rqs buf and retry
15078 			 * the original failed cmd.  Update kstat.
15079 			 */
15080 			if (bp == un->un_rqs_bp) {
15081 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15082 				bp = sd_mark_rqs_idle(un, xp);
15083 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15084 				    NULL, NULL, EIO, un->un_busy_timeout / 500,
15085 				    kstat_waitq_enter);
15086 				goto exit;
15087 			}
15088 
15089 #if defined(__i386) || defined(__amd64)	/* DMAFREE for x86 only */
15090 			/*
15091 			 * Free the DMA resources for the  scsi_pkt. This will
15092 			 * allow mpxio to select another path the next time
15093 			 * we call scsi_transport() with this scsi_pkt.
15094 			 * See sdintr() for the rationalization behind this.
15095 			 */
15096 			if ((un->un_f_is_fibre == TRUE) &&
15097 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
15098 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
15099 				scsi_dmafree(xp->xb_pktp);
15100 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
15101 			}
15102 #endif
15103 
15104 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
15105 				/*
15106 				 * Commands that are SD_PATH_DIRECT_PRIORITY
15107 				 * are for error recovery situations. These do
15108 				 * not use the normal command waitq, so if they
15109 				 * get a TRAN_BUSY we cannot put them back onto
15110 				 * the waitq for later retry. One possible
15111 				 * problem is that there could already be some
15112 				 * other command on un_retry_bp that is waiting
15113 				 * for this one to complete, so we would be
15114 				 * deadlocked if we put this command back onto
15115 				 * the waitq for later retry (since un_retry_bp
15116 				 * must complete before the driver gets back to
15117 				 * commands on the waitq).
15118 				 *
15119 				 * To avoid deadlock we must schedule a callback
15120 				 * that will restart this command after a set
15121 				 * interval.  This should keep retrying for as
15122 				 * long as the underlying transport keeps
15123 				 * returning TRAN_BUSY (just like for other
15124 				 * commands).  Use the same timeout interval as
15125 				 * for the ordinary TRAN_BUSY retry.
15126 				 */
15127 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15128 				    "sd_start_cmds: scsi_transport() returned "
15129 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
15130 
15131 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15132 				un->un_direct_priority_timeid =
15133 				    timeout(sd_start_direct_priority_command,
15134 				    bp, un->un_busy_timeout / 500);
15135 
15136 				goto exit;
15137 			}
15138 
15139 			/*
15140 			 * For TRAN_BUSY, we want to reduce the throttle value,
15141 			 * unless we are retrying a command.
15142 			 */
15143 			if (bp != un->un_retry_bp) {
15144 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
15145 			}
15146 
15147 			/*
15148 			 * Set up the bp to be tried again 10 ms later.
15149 			 * Note:x86: Is there a timeout value in the sd_lun
15150 			 * for this condition?
15151 			 */
15152 			sd_set_retry_bp(un, bp, un->un_busy_timeout / 500,
15153 			    kstat_runq_back_to_waitq);
15154 			goto exit;
15155 
15156 		case TRAN_FATAL_ERROR:
15157 			un->un_tran_fatal_count++;
15158 			/* FALLTHRU */
15159 
15160 		case TRAN_BADPKT:
15161 		default:
15162 			un->un_ncmds_in_transport--;
15163 			ASSERT(un->un_ncmds_in_transport >= 0);
15164 
15165 			/*
15166 			 * If this is our REQUEST SENSE command with a
15167 			 * transport error, we must get back the pointers
15168 			 * to the original buf, and mark the REQUEST
15169 			 * SENSE command as "available".
15170 			 */
15171 			if (bp == un->un_rqs_bp) {
15172 				bp = sd_mark_rqs_idle(un, xp);
15173 				xp = SD_GET_XBUF(bp);
15174 			} else {
15175 				/*
15176 				 * Legacy behavior: do not update transport
15177 				 * error count for request sense commands.
15178 				 */
15179 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
15180 			}
15181 
15182 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15183 			sd_print_transport_rejected_message(un, xp, rval);
15184 
15185 			/*
15186 			 * This command will be terminated by SD driver due
15187 			 * to a fatal transport error. We should post
15188 			 * ereport.io.scsi.cmd.disk.tran with driver-assessment
15189 			 * of "fail" for any command to indicate this
15190 			 * situation.
15191 			 */
15192 			if (xp->xb_ena > 0) {
15193 				ASSERT(un->un_fm_private != NULL);
15194 				sfip = un->un_fm_private;
15195 				sfip->fm_ssc.ssc_flags |= SSC_FLAGS_TRAN_ABORT;
15196 				sd_ssc_extract_info(&sfip->fm_ssc, un,
15197 				    xp->xb_pktp, bp, xp);
15198 				sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
15199 			}
15200 
15201 			/*
15202 			 * We must use sd_return_failed_command_no_restart() to
15203 			 * avoid a recursive call back into sd_start_cmds().
15204 			 * However this also means that we must keep processing
15205 			 * the waitq here in order to avoid stalling.
15206 			 */
15207 			sd_return_failed_command_no_restart(un, bp, EIO);
15208 
15209 			/*
15210 			 * Notify any threads waiting in sd_ddi_suspend() that
15211 			 * a command completion has occurred.
15212 			 */
15213 			if (un->un_state == SD_STATE_SUSPENDED) {
15214 				cv_broadcast(&un->un_disk_busy_cv);
15215 			}
15216 
15217 			if (bp == immed_bp) {
15218 				/* immed_bp is gone by now, so clear this */
15219 				immed_bp = NULL;
15220 			}
15221 			break;
15222 		}
15223 
15224 	} while (immed_bp == NULL);
15225 
15226 exit:
15227 	ASSERT(mutex_owned(SD_MUTEX(un)));
15228 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
15229 }
15230 
15231 
15232 /*
15233  *    Function: sd_return_command
15234  *
15235  * Description: Returns a command to its originator (with or without an
15236  *		error).  Also starts commands waiting to be transported
15237  *		to the target.
15238  *
15239  *     Context: May be called from interrupt, kernel, or timeout context
15240  */
15241 
15242 static void
15243 sd_return_command(struct sd_lun *un, struct buf *bp)
15244 {
15245 	struct sd_xbuf *xp;
15246 	struct scsi_pkt *pktp;
15247 	struct sd_fm_internal *sfip;
15248 
15249 	ASSERT(bp != NULL);
15250 	ASSERT(un != NULL);
15251 	ASSERT(mutex_owned(SD_MUTEX(un)));
15252 	ASSERT(bp != un->un_rqs_bp);
15253 	xp = SD_GET_XBUF(bp);
15254 	ASSERT(xp != NULL);
15255 
15256 	pktp = SD_GET_PKTP(bp);
15257 	sfip = (struct sd_fm_internal *)un->un_fm_private;
15258 	ASSERT(sfip != NULL);
15259 
15260 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
15261 
15262 	/*
15263 	 * Note: check for the "sdrestart failed" case.
15264 	 */
15265 	if ((un->un_partial_dma_supported == 1) &&
15266 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
15267 	    (geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
15268 	    (xp->xb_pktp->pkt_resid == 0)) {
15269 
15270 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
15271 			/*
15272 			 * Successfully set up next portion of cmd
15273 			 * transfer, try sending it
15274 			 */
15275 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15276 			    NULL, NULL, 0, (clock_t)0, NULL);
15277 			sd_start_cmds(un, NULL);
15278 			return;	/* Note:x86: need a return here? */
15279 		}
15280 	}
15281 
15282 	/*
15283 	 * If this is the failfast bp, clear it from un_failfast_bp. This
15284 	 * can happen if upon being re-tried the failfast bp either
15285 	 * succeeded or encountered another error (possibly even a different
15286 	 * error than the one that precipitated the failfast state, but in
15287 	 * that case it would have had to exhaust retries as well). Regardless,
15288 	 * this should not occur whenever the instance is in the active
15289 	 * failfast state.
15290 	 */
15291 	if (bp == un->un_failfast_bp) {
15292 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15293 		un->un_failfast_bp = NULL;
15294 	}
15295 
15296 	/*
15297 	 * Clear the failfast state upon successful completion of ANY cmd.
15298 	 */
15299 	if (bp->b_error == 0) {
15300 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15301 		/*
15302 		 * If this is a successful command, but used to be retried,
15303 		 * we will take it as a recovered command and post an
15304 		 * ereport with driver-assessment of "recovered".
15305 		 */
15306 		if (xp->xb_ena > 0) {
15307 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15308 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RECOVERY);
15309 		}
15310 	} else {
15311 		/*
15312 		 * If this is a failed non-USCSI command we will post an
15313 		 * ereport with driver-assessment set accordingly("fail" or
15314 		 * "fatal").
15315 		 */
15316 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
15317 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15318 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
15319 		}
15320 	}
15321 
15322 	/*
15323 	 * This is used if the command was retried one or more times. Show that
15324 	 * we are done with it, and allow processing of the waitq to resume.
15325 	 */
15326 	if (bp == un->un_retry_bp) {
15327 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15328 		    "sd_return_command: un:0x%p: "
15329 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15330 		un->un_retry_bp = NULL;
15331 		un->un_retry_statp = NULL;
15332 	}
15333 
15334 	SD_UPDATE_RDWR_STATS(un, bp);
15335 	SD_UPDATE_PARTITION_STATS(un, bp);
15336 
15337 	switch (un->un_state) {
15338 	case SD_STATE_SUSPENDED:
15339 		/*
15340 		 * Notify any threads waiting in sd_ddi_suspend() that
15341 		 * a command completion has occurred.
15342 		 */
15343 		cv_broadcast(&un->un_disk_busy_cv);
15344 		break;
15345 	default:
15346 		sd_start_cmds(un, NULL);
15347 		break;
15348 	}
15349 
15350 	/* Return this command up the iodone chain to its originator. */
15351 	mutex_exit(SD_MUTEX(un));
15352 
15353 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15354 	xp->xb_pktp = NULL;
15355 
15356 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15357 
15358 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15359 	mutex_enter(SD_MUTEX(un));
15360 
15361 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
15362 }
15363 
15364 
15365 /*
15366  *    Function: sd_return_failed_command
15367  *
15368  * Description: Command completion when an error occurred.
15369  *
15370  *     Context: May be called from interrupt context
15371  */
15372 
15373 static void
15374 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
15375 {
15376 	ASSERT(bp != NULL);
15377 	ASSERT(un != NULL);
15378 	ASSERT(mutex_owned(SD_MUTEX(un)));
15379 
15380 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15381 	    "sd_return_failed_command: entry\n");
15382 
15383 	/*
15384 	 * b_resid could already be nonzero due to a partial data
15385 	 * transfer, so do not change it here.
15386 	 */
15387 	SD_BIOERROR(bp, errcode);
15388 
15389 	sd_return_command(un, bp);
15390 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15391 	    "sd_return_failed_command: exit\n");
15392 }
15393 
15394 
15395 /*
15396  *    Function: sd_return_failed_command_no_restart
15397  *
15398  * Description: Same as sd_return_failed_command, but ensures that no
15399  *		call back into sd_start_cmds will be issued.
15400  *
15401  *     Context: May be called from interrupt context
15402  */
15403 
15404 static void
15405 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
15406 	int errcode)
15407 {
15408 	struct sd_xbuf *xp;
15409 
15410 	ASSERT(bp != NULL);
15411 	ASSERT(un != NULL);
15412 	ASSERT(mutex_owned(SD_MUTEX(un)));
15413 	xp = SD_GET_XBUF(bp);
15414 	ASSERT(xp != NULL);
15415 	ASSERT(errcode != 0);
15416 
15417 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15418 	    "sd_return_failed_command_no_restart: entry\n");
15419 
15420 	/*
15421 	 * b_resid could already be nonzero due to a partial data
15422 	 * transfer, so do not change it here.
15423 	 */
15424 	SD_BIOERROR(bp, errcode);
15425 
15426 	/*
15427 	 * If this is the failfast bp, clear it. This can happen if the
15428 	 * failfast bp encounterd a fatal error when we attempted to
15429 	 * re-try it (such as a scsi_transport(9F) failure).  However
15430 	 * we should NOT be in an active failfast state if the failfast
15431 	 * bp is not NULL.
15432 	 */
15433 	if (bp == un->un_failfast_bp) {
15434 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15435 		un->un_failfast_bp = NULL;
15436 	}
15437 
15438 	if (bp == un->un_retry_bp) {
15439 		/*
15440 		 * This command was retried one or more times. Show that we are
15441 		 * done with it, and allow processing of the waitq to resume.
15442 		 */
15443 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15444 		    "sd_return_failed_command_no_restart: "
15445 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15446 		un->un_retry_bp = NULL;
15447 		un->un_retry_statp = NULL;
15448 	}
15449 
15450 	SD_UPDATE_RDWR_STATS(un, bp);
15451 	SD_UPDATE_PARTITION_STATS(un, bp);
15452 
15453 	mutex_exit(SD_MUTEX(un));
15454 
15455 	if (xp->xb_pktp != NULL) {
15456 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15457 		xp->xb_pktp = NULL;
15458 	}
15459 
15460 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15461 
15462 	mutex_enter(SD_MUTEX(un));
15463 
15464 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15465 	    "sd_return_failed_command_no_restart: exit\n");
15466 }
15467 
15468 
15469 /*
15470  *    Function: sd_retry_command
15471  *
15472  * Description: queue up a command for retry, or (optionally) fail it
15473  *		if retry counts are exhausted.
15474  *
15475  *   Arguments: un - Pointer to the sd_lun struct for the target.
15476  *
15477  *		bp - Pointer to the buf for the command to be retried.
15478  *
15479  *		retry_check_flag - Flag to see which (if any) of the retry
15480  *		   counts should be decremented/checked. If the indicated
15481  *		   retry count is exhausted, then the command will not be
15482  *		   retried; it will be failed instead. This should use a
15483  *		   value equal to one of the following:
15484  *
15485  *			SD_RETRIES_NOCHECK
15486  *			SD_RESD_RETRIES_STANDARD
15487  *			SD_RETRIES_VICTIM
15488  *
15489  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
15490  *		   if the check should be made to see of FLAG_ISOLATE is set
15491  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
15492  *		   not retried, it is simply failed.
15493  *
15494  *		user_funcp - Ptr to function to call before dispatching the
15495  *		   command. May be NULL if no action needs to be performed.
15496  *		   (Primarily intended for printing messages.)
15497  *
15498  *		user_arg - Optional argument to be passed along to
15499  *		   the user_funcp call.
15500  *
15501  *		failure_code - errno return code to set in the bp if the
15502  *		   command is going to be failed.
15503  *
15504  *		retry_delay - Retry delay interval in (clock_t) units. May
15505  *		   be zero which indicates that the retry should be retried
15506  *		   immediately (ie, without an intervening delay).
15507  *
15508  *		statp - Ptr to kstat function to be updated if the command
15509  *		   is queued for a delayed retry. May be NULL if no kstat
15510  *		   update is desired.
15511  *
15512  *     Context: May be called from interrupt context.
15513  */
15514 
15515 static void
15516 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15517 	void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int
15518 	code), void *user_arg, int failure_code,  clock_t retry_delay,
15519 	void (*statp)(kstat_io_t *))
15520 {
15521 	struct sd_xbuf	*xp;
15522 	struct scsi_pkt	*pktp;
15523 	struct sd_fm_internal *sfip;
15524 
15525 	ASSERT(un != NULL);
15526 	ASSERT(mutex_owned(SD_MUTEX(un)));
15527 	ASSERT(bp != NULL);
15528 	xp = SD_GET_XBUF(bp);
15529 	ASSERT(xp != NULL);
15530 	pktp = SD_GET_PKTP(bp);
15531 	ASSERT(pktp != NULL);
15532 
15533 	sfip = (struct sd_fm_internal *)un->un_fm_private;
15534 	ASSERT(sfip != NULL);
15535 
15536 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15537 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15538 
15539 	/*
15540 	 * If we are syncing or dumping, fail the command to avoid
15541 	 * recursively calling back into scsi_transport().
15542 	 */
15543 	if (ddi_in_panic()) {
15544 		goto fail_command_no_log;
15545 	}
15546 
15547 	/*
15548 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15549 	 * log an error and fail the command.
15550 	 */
15551 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15552 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15553 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15554 		sd_dump_memory(un, SD_LOG_IO, "CDB",
15555 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15556 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15557 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15558 		goto fail_command;
15559 	}
15560 
15561 	/*
15562 	 * If we are suspended, then put the command onto head of the
15563 	 * wait queue since we don't want to start more commands, and
15564 	 * clear the un_retry_bp. Next time when we are resumed, will
15565 	 * handle the command in the wait queue.
15566 	 */
15567 	switch (un->un_state) {
15568 	case SD_STATE_SUSPENDED:
15569 	case SD_STATE_DUMPING:
15570 		bp->av_forw = un->un_waitq_headp;
15571 		un->un_waitq_headp = bp;
15572 		if (un->un_waitq_tailp == NULL) {
15573 			un->un_waitq_tailp = bp;
15574 		}
15575 		if (bp == un->un_retry_bp) {
15576 			un->un_retry_bp = NULL;
15577 			un->un_retry_statp = NULL;
15578 		}
15579 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15580 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15581 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15582 		return;
15583 	default:
15584 		break;
15585 	}
15586 
15587 	/*
15588 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15589 	 * is set; if it is then we do not want to retry the command.
15590 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15591 	 */
15592 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15593 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15594 			goto fail_command;
15595 		}
15596 	}
15597 
15598 
15599 	/*
15600 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15601 	 * command timeout or a selection timeout has occurred. This means
15602 	 * that we were unable to establish an kind of communication with
15603 	 * the target, and subsequent retries and/or commands are likely
15604 	 * to encounter similar results and take a long time to complete.
15605 	 *
15606 	 * If this is a failfast error condition, we need to update the
15607 	 * failfast state, even if this bp does not have B_FAILFAST set.
15608 	 */
15609 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15610 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15611 			ASSERT(un->un_failfast_bp == NULL);
15612 			/*
15613 			 * If we are already in the active failfast state, and
15614 			 * another failfast error condition has been detected,
15615 			 * then fail this command if it has B_FAILFAST set.
15616 			 * If B_FAILFAST is clear, then maintain the legacy
15617 			 * behavior of retrying heroically, even tho this will
15618 			 * take a lot more time to fail the command.
15619 			 */
15620 			if (bp->b_flags & B_FAILFAST) {
15621 				goto fail_command;
15622 			}
15623 		} else {
15624 			/*
15625 			 * We're not in the active failfast state, but we
15626 			 * have a failfast error condition, so we must begin
15627 			 * transition to the next state. We do this regardless
15628 			 * of whether or not this bp has B_FAILFAST set.
15629 			 */
15630 			if (un->un_failfast_bp == NULL) {
15631 				/*
15632 				 * This is the first bp to meet a failfast
15633 				 * condition so save it on un_failfast_bp &
15634 				 * do normal retry processing. Do not enter
15635 				 * active failfast state yet. This marks
15636 				 * entry into the "failfast pending" state.
15637 				 */
15638 				un->un_failfast_bp = bp;
15639 
15640 			} else if (un->un_failfast_bp == bp) {
15641 				/*
15642 				 * This is the second time *this* bp has
15643 				 * encountered a failfast error condition,
15644 				 * so enter active failfast state & flush
15645 				 * queues as appropriate.
15646 				 */
15647 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15648 				un->un_failfast_bp = NULL;
15649 				sd_failfast_flushq(un);
15650 
15651 				/*
15652 				 * Fail this bp now if B_FAILFAST set;
15653 				 * otherwise continue with retries. (It would
15654 				 * be pretty ironic if this bp succeeded on a
15655 				 * subsequent retry after we just flushed all
15656 				 * the queues).
15657 				 */
15658 				if (bp->b_flags & B_FAILFAST) {
15659 					goto fail_command;
15660 				}
15661 
15662 #if !defined(lint) && !defined(__lint)
15663 			} else {
15664 				/*
15665 				 * If neither of the preceeding conditionals
15666 				 * was true, it means that there is some
15667 				 * *other* bp that has met an inital failfast
15668 				 * condition and is currently either being
15669 				 * retried or is waiting to be retried. In
15670 				 * that case we should perform normal retry
15671 				 * processing on *this* bp, since there is a
15672 				 * chance that the current failfast condition
15673 				 * is transient and recoverable. If that does
15674 				 * not turn out to be the case, then retries
15675 				 * will be cleared when the wait queue is
15676 				 * flushed anyway.
15677 				 */
15678 #endif
15679 			}
15680 		}
15681 	} else {
15682 		/*
15683 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15684 		 * likely were able to at least establish some level of
15685 		 * communication with the target and subsequent commands
15686 		 * and/or retries are likely to get through to the target,
15687 		 * In this case we want to be aggressive about clearing
15688 		 * the failfast state. Note that this does not affect
15689 		 * the "failfast pending" condition.
15690 		 */
15691 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15692 	}
15693 
15694 
15695 	/*
15696 	 * Check the specified retry count to see if we can still do
15697 	 * any retries with this pkt before we should fail it.
15698 	 */
15699 	switch (retry_check_flag & SD_RETRIES_MASK) {
15700 	case SD_RETRIES_VICTIM:
15701 		/*
15702 		 * Check the victim retry count. If exhausted, then fall
15703 		 * thru & check against the standard retry count.
15704 		 */
15705 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15706 			/* Increment count & proceed with the retry */
15707 			xp->xb_victim_retry_count++;
15708 			break;
15709 		}
15710 		/* Victim retries exhausted, fall back to std. retries... */
15711 		/* FALLTHRU */
15712 
15713 	case SD_RETRIES_STANDARD:
15714 		if (xp->xb_retry_count >= un->un_retry_count) {
15715 			/* Retries exhausted, fail the command */
15716 			SD_TRACE(SD_LOG_IO_CORE, un,
15717 			    "sd_retry_command: retries exhausted!\n");
15718 			/*
15719 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15720 			 * commands with nonzero pkt_resid.
15721 			 */
15722 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15723 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15724 			    (pktp->pkt_resid != 0)) {
15725 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15726 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15727 					SD_UPDATE_B_RESID(bp, pktp);
15728 				}
15729 			}
15730 			goto fail_command;
15731 		}
15732 		xp->xb_retry_count++;
15733 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15734 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15735 		break;
15736 
15737 	case SD_RETRIES_UA:
15738 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15739 			/* Retries exhausted, fail the command */
15740 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15741 			    "Unit Attention retries exhausted. "
15742 			    "Check the target.\n");
15743 			goto fail_command;
15744 		}
15745 		xp->xb_ua_retry_count++;
15746 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15747 		    "sd_retry_command: retry count:%d\n",
15748 		    xp->xb_ua_retry_count);
15749 		break;
15750 
15751 	case SD_RETRIES_BUSY:
15752 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15753 			/* Retries exhausted, fail the command */
15754 			SD_TRACE(SD_LOG_IO_CORE, un,
15755 			    "sd_retry_command: retries exhausted!\n");
15756 			goto fail_command;
15757 		}
15758 		xp->xb_retry_count++;
15759 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15760 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15761 		break;
15762 
15763 	case SD_RETRIES_NOCHECK:
15764 	default:
15765 		/* No retry count to check. Just proceed with the retry */
15766 		break;
15767 	}
15768 
15769 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15770 
15771 	/*
15772 	 * If this is a non-USCSI command being retried
15773 	 * during execution last time, we should post an ereport with
15774 	 * driver-assessment of the value "retry".
15775 	 * For partial DMA, request sense and STATUS_QFULL, there are no
15776 	 * hardware errors, we bypass ereport posting.
15777 	 */
15778 	if (failure_code != 0) {
15779 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
15780 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15781 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RETRY);
15782 		}
15783 	}
15784 
15785 	/*
15786 	 * If we were given a zero timeout, we must attempt to retry the
15787 	 * command immediately (ie, without a delay).
15788 	 */
15789 	if (retry_delay == 0) {
15790 		/*
15791 		 * Check some limiting conditions to see if we can actually
15792 		 * do the immediate retry.  If we cannot, then we must
15793 		 * fall back to queueing up a delayed retry.
15794 		 */
15795 		if (un->un_ncmds_in_transport >= un->un_throttle) {
15796 			/*
15797 			 * We are at the throttle limit for the target,
15798 			 * fall back to delayed retry.
15799 			 */
15800 			retry_delay = un->un_busy_timeout;
15801 			statp = kstat_waitq_enter;
15802 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15803 			    "sd_retry_command: immed. retry hit "
15804 			    "throttle!\n");
15805 		} else {
15806 			/*
15807 			 * We're clear to proceed with the immediate retry.
15808 			 * First call the user-provided function (if any)
15809 			 */
15810 			if (user_funcp != NULL) {
15811 				(*user_funcp)(un, bp, user_arg,
15812 				    SD_IMMEDIATE_RETRY_ISSUED);
15813 #ifdef __lock_lint
15814 				sd_print_incomplete_msg(un, bp, user_arg,
15815 				    SD_IMMEDIATE_RETRY_ISSUED);
15816 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
15817 				    SD_IMMEDIATE_RETRY_ISSUED);
15818 				sd_print_sense_failed_msg(un, bp, user_arg,
15819 				    SD_IMMEDIATE_RETRY_ISSUED);
15820 #endif
15821 			}
15822 
15823 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15824 			    "sd_retry_command: issuing immediate retry\n");
15825 
15826 			/*
15827 			 * Call sd_start_cmds() to transport the command to
15828 			 * the target.
15829 			 */
15830 			sd_start_cmds(un, bp);
15831 
15832 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15833 			    "sd_retry_command exit\n");
15834 			return;
15835 		}
15836 	}
15837 
15838 	/*
15839 	 * Set up to retry the command after a delay.
15840 	 * First call the user-provided function (if any)
15841 	 */
15842 	if (user_funcp != NULL) {
15843 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
15844 	}
15845 
15846 	sd_set_retry_bp(un, bp, retry_delay, statp);
15847 
15848 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15849 	return;
15850 
15851 fail_command:
15852 
15853 	if (user_funcp != NULL) {
15854 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
15855 	}
15856 
15857 fail_command_no_log:
15858 
15859 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15860 	    "sd_retry_command: returning failed command\n");
15861 
15862 	sd_return_failed_command(un, bp, failure_code);
15863 
15864 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
15865 }
15866 
15867 
15868 /*
15869  *    Function: sd_set_retry_bp
15870  *
15871  * Description: Set up the given bp for retry.
15872  *
15873  *   Arguments: un - ptr to associated softstate
15874  *		bp - ptr to buf(9S) for the command
15875  *		retry_delay - time interval before issuing retry (may be 0)
15876  *		statp - optional pointer to kstat function
15877  *
15878  *     Context: May be called under interrupt context
15879  */
15880 
15881 static void
15882 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
15883 	void (*statp)(kstat_io_t *))
15884 {
15885 	ASSERT(un != NULL);
15886 	ASSERT(mutex_owned(SD_MUTEX(un)));
15887 	ASSERT(bp != NULL);
15888 
15889 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15890 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
15891 
15892 	/*
15893 	 * Indicate that the command is being retried. This will not allow any
15894 	 * other commands on the wait queue to be transported to the target
15895 	 * until this command has been completed (success or failure). The
15896 	 * "retry command" is not transported to the target until the given
15897 	 * time delay expires, unless the user specified a 0 retry_delay.
15898 	 *
15899 	 * Note: the timeout(9F) callback routine is what actually calls
15900 	 * sd_start_cmds() to transport the command, with the exception of a
15901 	 * zero retry_delay. The only current implementor of a zero retry delay
15902 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
15903 	 */
15904 	if (un->un_retry_bp == NULL) {
15905 		ASSERT(un->un_retry_statp == NULL);
15906 		un->un_retry_bp = bp;
15907 
15908 		/*
15909 		 * If the user has not specified a delay the command should
15910 		 * be queued and no timeout should be scheduled.
15911 		 */
15912 		if (retry_delay == 0) {
15913 			/*
15914 			 * Save the kstat pointer that will be used in the
15915 			 * call to SD_UPDATE_KSTATS() below, so that
15916 			 * sd_start_cmds() can correctly decrement the waitq
15917 			 * count when it is time to transport this command.
15918 			 */
15919 			un->un_retry_statp = statp;
15920 			goto done;
15921 		}
15922 	}
15923 
15924 	if (un->un_retry_bp == bp) {
15925 		/*
15926 		 * Save the kstat pointer that will be used in the call to
15927 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
15928 		 * correctly decrement the waitq count when it is time to
15929 		 * transport this command.
15930 		 */
15931 		un->un_retry_statp = statp;
15932 
15933 		/*
15934 		 * Schedule a timeout if:
15935 		 *   1) The user has specified a delay.
15936 		 *   2) There is not a START_STOP_UNIT callback pending.
15937 		 *
15938 		 * If no delay has been specified, then it is up to the caller
15939 		 * to ensure that IO processing continues without stalling.
15940 		 * Effectively, this means that the caller will issue the
15941 		 * required call to sd_start_cmds(). The START_STOP_UNIT
15942 		 * callback does this after the START STOP UNIT command has
15943 		 * completed. In either of these cases we should not schedule
15944 		 * a timeout callback here.  Also don't schedule the timeout if
15945 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
15946 		 */
15947 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
15948 		    (un->un_direct_priority_timeid == NULL)) {
15949 			un->un_retry_timeid =
15950 			    timeout(sd_start_retry_command, un, retry_delay);
15951 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15952 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
15953 			    " bp:0x%p un_retry_timeid:0x%p\n",
15954 			    un, bp, un->un_retry_timeid);
15955 		}
15956 	} else {
15957 		/*
15958 		 * We only get in here if there is already another command
15959 		 * waiting to be retried.  In this case, we just put the
15960 		 * given command onto the wait queue, so it can be transported
15961 		 * after the current retry command has completed.
15962 		 *
15963 		 * Also we have to make sure that if the command at the head
15964 		 * of the wait queue is the un_failfast_bp, that we do not
15965 		 * put ahead of it any other commands that are to be retried.
15966 		 */
15967 		if ((un->un_failfast_bp != NULL) &&
15968 		    (un->un_failfast_bp == un->un_waitq_headp)) {
15969 			/*
15970 			 * Enqueue this command AFTER the first command on
15971 			 * the wait queue (which is also un_failfast_bp).
15972 			 */
15973 			bp->av_forw = un->un_waitq_headp->av_forw;
15974 			un->un_waitq_headp->av_forw = bp;
15975 			if (un->un_waitq_headp == un->un_waitq_tailp) {
15976 				un->un_waitq_tailp = bp;
15977 			}
15978 		} else {
15979 			/* Enqueue this command at the head of the waitq. */
15980 			bp->av_forw = un->un_waitq_headp;
15981 			un->un_waitq_headp = bp;
15982 			if (un->un_waitq_tailp == NULL) {
15983 				un->un_waitq_tailp = bp;
15984 			}
15985 		}
15986 
15987 		if (statp == NULL) {
15988 			statp = kstat_waitq_enter;
15989 		}
15990 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15991 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
15992 	}
15993 
15994 done:
15995 	if (statp != NULL) {
15996 		SD_UPDATE_KSTATS(un, statp, bp);
15997 	}
15998 
15999 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16000 	    "sd_set_retry_bp: exit un:0x%p\n", un);
16001 }
16002 
16003 
16004 /*
16005  *    Function: sd_start_retry_command
16006  *
16007  * Description: Start the command that has been waiting on the target's
16008  *		retry queue.  Called from timeout(9F) context after the
16009  *		retry delay interval has expired.
16010  *
16011  *   Arguments: arg - pointer to associated softstate for the device.
16012  *
16013  *     Context: timeout(9F) thread context.  May not sleep.
16014  */
16015 
16016 static void
16017 sd_start_retry_command(void *arg)
16018 {
16019 	struct sd_lun *un = arg;
16020 
16021 	ASSERT(un != NULL);
16022 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16023 
16024 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16025 	    "sd_start_retry_command: entry\n");
16026 
16027 	mutex_enter(SD_MUTEX(un));
16028 
16029 	un->un_retry_timeid = NULL;
16030 
16031 	if (un->un_retry_bp != NULL) {
16032 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16033 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
16034 		    un, un->un_retry_bp);
16035 		sd_start_cmds(un, un->un_retry_bp);
16036 	}
16037 
16038 	mutex_exit(SD_MUTEX(un));
16039 
16040 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16041 	    "sd_start_retry_command: exit\n");
16042 }
16043 
16044 /*
16045  *    Function: sd_rmw_msg_print_handler
16046  *
16047  * Description: If RMW mode is enabled and warning message is triggered
16048  *              print I/O count during a fixed interval.
16049  *
16050  *   Arguments: arg - pointer to associated softstate for the device.
16051  *
16052  *     Context: timeout(9F) thread context. May not sleep.
16053  */
16054 static void
16055 sd_rmw_msg_print_handler(void *arg)
16056 {
16057 	struct sd_lun *un = arg;
16058 
16059 	ASSERT(un != NULL);
16060 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16061 
16062 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16063 	    "sd_rmw_msg_print_handler: entry\n");
16064 
16065 	mutex_enter(SD_MUTEX(un));
16066 
16067 	if (un->un_rmw_incre_count > 0) {
16068 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16069 		    "%"PRIu64" I/O requests are not aligned with %d disk "
16070 		    "sector size in %ld seconds. They are handled through "
16071 		    "Read Modify Write but the performance is very low!\n",
16072 		    un->un_rmw_incre_count, un->un_tgt_blocksize,
16073 		    drv_hztousec(SD_RMW_MSG_PRINT_TIMEOUT) / 1000000);
16074 		un->un_rmw_incre_count = 0;
16075 		un->un_rmw_msg_timeid = timeout(sd_rmw_msg_print_handler,
16076 		    un, SD_RMW_MSG_PRINT_TIMEOUT);
16077 	} else {
16078 		un->un_rmw_msg_timeid = NULL;
16079 	}
16080 
16081 	mutex_exit(SD_MUTEX(un));
16082 
16083 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16084 	    "sd_rmw_msg_print_handler: exit\n");
16085 }
16086 
16087 /*
16088  *    Function: sd_start_direct_priority_command
16089  *
16090  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
16091  *		received TRAN_BUSY when we called scsi_transport() to send it
16092  *		to the underlying HBA. This function is called from timeout(9F)
16093  *		context after the delay interval has expired.
16094  *
16095  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
16096  *
16097  *     Context: timeout(9F) thread context.  May not sleep.
16098  */
16099 
16100 static void
16101 sd_start_direct_priority_command(void *arg)
16102 {
16103 	struct buf	*priority_bp = arg;
16104 	struct sd_lun	*un;
16105 
16106 	ASSERT(priority_bp != NULL);
16107 	un = SD_GET_UN(priority_bp);
16108 	ASSERT(un != NULL);
16109 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16110 
16111 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16112 	    "sd_start_direct_priority_command: entry\n");
16113 
16114 	mutex_enter(SD_MUTEX(un));
16115 	un->un_direct_priority_timeid = NULL;
16116 	sd_start_cmds(un, priority_bp);
16117 	mutex_exit(SD_MUTEX(un));
16118 
16119 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16120 	    "sd_start_direct_priority_command: exit\n");
16121 }
16122 
16123 
16124 /*
16125  *    Function: sd_send_request_sense_command
16126  *
16127  * Description: Sends a REQUEST SENSE command to the target
16128  *
16129  *     Context: May be called from interrupt context.
16130  */
16131 
16132 static void
16133 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
16134 	struct scsi_pkt *pktp)
16135 {
16136 	ASSERT(bp != NULL);
16137 	ASSERT(un != NULL);
16138 	ASSERT(mutex_owned(SD_MUTEX(un)));
16139 
16140 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
16141 	    "entry: buf:0x%p\n", bp);
16142 
16143 	/*
16144 	 * If we are syncing or dumping, then fail the command to avoid a
16145 	 * recursive callback into scsi_transport(). Also fail the command
16146 	 * if we are suspended (legacy behavior).
16147 	 */
16148 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
16149 	    (un->un_state == SD_STATE_DUMPING)) {
16150 		sd_return_failed_command(un, bp, EIO);
16151 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16152 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
16153 		return;
16154 	}
16155 
16156 	/*
16157 	 * Retry the failed command and don't issue the request sense if:
16158 	 *    1) the sense buf is busy
16159 	 *    2) we have 1 or more outstanding commands on the target
16160 	 *    (the sense data will be cleared or invalidated any way)
16161 	 *
16162 	 * Note: There could be an issue with not checking a retry limit here,
16163 	 * the problem is determining which retry limit to check.
16164 	 */
16165 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
16166 		/* Don't retry if the command is flagged as non-retryable */
16167 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16168 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
16169 			    NULL, NULL, 0, un->un_busy_timeout,
16170 			    kstat_waitq_enter);
16171 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16172 			    "sd_send_request_sense_command: "
16173 			    "at full throttle, retrying exit\n");
16174 		} else {
16175 			sd_return_failed_command(un, bp, EIO);
16176 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16177 			    "sd_send_request_sense_command: "
16178 			    "at full throttle, non-retryable exit\n");
16179 		}
16180 		return;
16181 	}
16182 
16183 	sd_mark_rqs_busy(un, bp);
16184 	sd_start_cmds(un, un->un_rqs_bp);
16185 
16186 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16187 	    "sd_send_request_sense_command: exit\n");
16188 }
16189 
16190 
16191 /*
16192  *    Function: sd_mark_rqs_busy
16193  *
16194  * Description: Indicate that the request sense bp for this instance is
16195  *		in use.
16196  *
16197  *     Context: May be called under interrupt context
16198  */
16199 
16200 static void
16201 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
16202 {
16203 	struct sd_xbuf	*sense_xp;
16204 
16205 	ASSERT(un != NULL);
16206 	ASSERT(bp != NULL);
16207 	ASSERT(mutex_owned(SD_MUTEX(un)));
16208 	ASSERT(un->un_sense_isbusy == 0);
16209 
16210 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
16211 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
16212 
16213 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
16214 	ASSERT(sense_xp != NULL);
16215 
16216 	SD_INFO(SD_LOG_IO, un,
16217 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
16218 
16219 	ASSERT(sense_xp->xb_pktp != NULL);
16220 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
16221 	    == (FLAG_SENSING | FLAG_HEAD));
16222 
16223 	un->un_sense_isbusy = 1;
16224 	un->un_rqs_bp->b_resid = 0;
16225 	sense_xp->xb_pktp->pkt_resid  = 0;
16226 	sense_xp->xb_pktp->pkt_reason = 0;
16227 
16228 	/* So we can get back the bp at interrupt time! */
16229 	sense_xp->xb_sense_bp = bp;
16230 
16231 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
16232 
16233 	/*
16234 	 * Mark this buf as awaiting sense data. (This is already set in
16235 	 * the pkt_flags for the RQS packet.)
16236 	 */
16237 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
16238 
16239 	/* Request sense down same path */
16240 	if (scsi_pkt_allocated_correctly((SD_GET_XBUF(bp))->xb_pktp) &&
16241 	    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance)
16242 		sense_xp->xb_pktp->pkt_path_instance =
16243 		    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance;
16244 
16245 	sense_xp->xb_retry_count	= 0;
16246 	sense_xp->xb_victim_retry_count = 0;
16247 	sense_xp->xb_ua_retry_count	= 0;
16248 	sense_xp->xb_nr_retry_count 	= 0;
16249 	sense_xp->xb_dma_resid  = 0;
16250 
16251 	/* Clean up the fields for auto-request sense */
16252 	sense_xp->xb_sense_status = 0;
16253 	sense_xp->xb_sense_state  = 0;
16254 	sense_xp->xb_sense_resid  = 0;
16255 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
16256 
16257 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
16258 }
16259 
16260 
16261 /*
16262  *    Function: sd_mark_rqs_idle
16263  *
16264  * Description: SD_MUTEX must be held continuously through this routine
16265  *		to prevent reuse of the rqs struct before the caller can
16266  *		complete it's processing.
16267  *
16268  * Return Code: Pointer to the RQS buf
16269  *
16270  *     Context: May be called under interrupt context
16271  */
16272 
16273 static struct buf *
16274 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
16275 {
16276 	struct buf *bp;
16277 	ASSERT(un != NULL);
16278 	ASSERT(sense_xp != NULL);
16279 	ASSERT(mutex_owned(SD_MUTEX(un)));
16280 	ASSERT(un->un_sense_isbusy != 0);
16281 
16282 	un->un_sense_isbusy = 0;
16283 	bp = sense_xp->xb_sense_bp;
16284 	sense_xp->xb_sense_bp = NULL;
16285 
16286 	/* This pkt is no longer interested in getting sense data */
16287 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
16288 
16289 	return (bp);
16290 }
16291 
16292 
16293 
16294 /*
16295  *    Function: sd_alloc_rqs
16296  *
16297  * Description: Set up the unit to receive auto request sense data
16298  *
16299  * Return Code: DDI_SUCCESS or DDI_FAILURE
16300  *
16301  *     Context: Called under attach(9E) context
16302  */
16303 
16304 static int
16305 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
16306 {
16307 	struct sd_xbuf *xp;
16308 
16309 	ASSERT(un != NULL);
16310 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16311 	ASSERT(un->un_rqs_bp == NULL);
16312 	ASSERT(un->un_rqs_pktp == NULL);
16313 
16314 	/*
16315 	 * First allocate the required buf and scsi_pkt structs, then set up
16316 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
16317 	 */
16318 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
16319 	    MAX_SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
16320 	if (un->un_rqs_bp == NULL) {
16321 		return (DDI_FAILURE);
16322 	}
16323 
16324 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
16325 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
16326 
16327 	if (un->un_rqs_pktp == NULL) {
16328 		sd_free_rqs(un);
16329 		return (DDI_FAILURE);
16330 	}
16331 
16332 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
16333 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
16334 	    SCMD_REQUEST_SENSE, 0, MAX_SENSE_LENGTH, 0);
16335 
16336 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
16337 
16338 	/* Set up the other needed members in the ARQ scsi_pkt. */
16339 	un->un_rqs_pktp->pkt_comp   = sdintr;
16340 	un->un_rqs_pktp->pkt_time   = sd_io_time;
16341 	un->un_rqs_pktp->pkt_flags |=
16342 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
16343 
16344 	/*
16345 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
16346 	 * provide any intpkt, destroypkt routines as we take care of
16347 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
16348 	 */
16349 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
16350 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
16351 	xp->xb_pktp = un->un_rqs_pktp;
16352 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
16353 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
16354 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
16355 
16356 	/*
16357 	 * Save the pointer to the request sense private bp so it can
16358 	 * be retrieved in sdintr.
16359 	 */
16360 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
16361 	ASSERT(un->un_rqs_bp->b_private == xp);
16362 
16363 	/*
16364 	 * See if the HBA supports auto-request sense for the specified
16365 	 * target/lun. If it does, then try to enable it (if not already
16366 	 * enabled).
16367 	 *
16368 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
16369 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
16370 	 * return success.  However, in both of these cases ARQ is always
16371 	 * enabled and scsi_ifgetcap will always return true. The best approach
16372 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
16373 	 *
16374 	 * The 3rd case is the HBA (adp) always return enabled on
16375 	 * scsi_ifgetgetcap even when it's not enable, the best approach
16376 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
16377 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
16378 	 */
16379 
16380 	if (un->un_f_is_fibre == TRUE) {
16381 		un->un_f_arq_enabled = TRUE;
16382 	} else {
16383 #if defined(__i386) || defined(__amd64)
16384 		/*
16385 		 * Circumvent the Adaptec bug, remove this code when
16386 		 * the bug is fixed
16387 		 */
16388 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
16389 #endif
16390 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
16391 		case 0:
16392 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16393 			    "sd_alloc_rqs: HBA supports ARQ\n");
16394 			/*
16395 			 * ARQ is supported by this HBA but currently is not
16396 			 * enabled. Attempt to enable it and if successful then
16397 			 * mark this instance as ARQ enabled.
16398 			 */
16399 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
16400 			    == 1) {
16401 				/* Successfully enabled ARQ in the HBA */
16402 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16403 				    "sd_alloc_rqs: ARQ enabled\n");
16404 				un->un_f_arq_enabled = TRUE;
16405 			} else {
16406 				/* Could not enable ARQ in the HBA */
16407 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16408 				    "sd_alloc_rqs: failed ARQ enable\n");
16409 				un->un_f_arq_enabled = FALSE;
16410 			}
16411 			break;
16412 		case 1:
16413 			/*
16414 			 * ARQ is supported by this HBA and is already enabled.
16415 			 * Just mark ARQ as enabled for this instance.
16416 			 */
16417 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16418 			    "sd_alloc_rqs: ARQ already enabled\n");
16419 			un->un_f_arq_enabled = TRUE;
16420 			break;
16421 		default:
16422 			/*
16423 			 * ARQ is not supported by this HBA; disable it for this
16424 			 * instance.
16425 			 */
16426 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16427 			    "sd_alloc_rqs: HBA does not support ARQ\n");
16428 			un->un_f_arq_enabled = FALSE;
16429 			break;
16430 		}
16431 	}
16432 
16433 	return (DDI_SUCCESS);
16434 }
16435 
16436 
16437 /*
16438  *    Function: sd_free_rqs
16439  *
16440  * Description: Cleanup for the pre-instance RQS command.
16441  *
16442  *     Context: Kernel thread context
16443  */
16444 
16445 static void
16446 sd_free_rqs(struct sd_lun *un)
16447 {
16448 	ASSERT(un != NULL);
16449 
16450 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
16451 
16452 	/*
16453 	 * If consistent memory is bound to a scsi_pkt, the pkt
16454 	 * has to be destroyed *before* freeing the consistent memory.
16455 	 * Don't change the sequence of this operations.
16456 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
16457 	 * after it was freed in scsi_free_consistent_buf().
16458 	 */
16459 	if (un->un_rqs_pktp != NULL) {
16460 		scsi_destroy_pkt(un->un_rqs_pktp);
16461 		un->un_rqs_pktp = NULL;
16462 	}
16463 
16464 	if (un->un_rqs_bp != NULL) {
16465 		struct sd_xbuf *xp = SD_GET_XBUF(un->un_rqs_bp);
16466 		if (xp != NULL) {
16467 			kmem_free(xp, sizeof (struct sd_xbuf));
16468 		}
16469 		scsi_free_consistent_buf(un->un_rqs_bp);
16470 		un->un_rqs_bp = NULL;
16471 	}
16472 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
16473 }
16474 
16475 
16476 
16477 /*
16478  *    Function: sd_reduce_throttle
16479  *
16480  * Description: Reduces the maximum # of outstanding commands on a
16481  *		target to the current number of outstanding commands.
16482  *		Queues a tiemout(9F) callback to restore the limit
16483  *		after a specified interval has elapsed.
16484  *		Typically used when we get a TRAN_BUSY return code
16485  *		back from scsi_transport().
16486  *
16487  *   Arguments: un - ptr to the sd_lun softstate struct
16488  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
16489  *
16490  *     Context: May be called from interrupt context
16491  */
16492 
16493 static void
16494 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
16495 {
16496 	ASSERT(un != NULL);
16497 	ASSERT(mutex_owned(SD_MUTEX(un)));
16498 	ASSERT(un->un_ncmds_in_transport >= 0);
16499 
16500 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16501 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
16502 	    un, un->un_throttle, un->un_ncmds_in_transport);
16503 
16504 	if (un->un_throttle > 1) {
16505 		if (un->un_f_use_adaptive_throttle == TRUE) {
16506 			switch (throttle_type) {
16507 			case SD_THROTTLE_TRAN_BUSY:
16508 				if (un->un_busy_throttle == 0) {
16509 					un->un_busy_throttle = un->un_throttle;
16510 				}
16511 				break;
16512 			case SD_THROTTLE_QFULL:
16513 				un->un_busy_throttle = 0;
16514 				break;
16515 			default:
16516 				ASSERT(FALSE);
16517 			}
16518 
16519 			if (un->un_ncmds_in_transport > 0) {
16520 				un->un_throttle = un->un_ncmds_in_transport;
16521 			}
16522 
16523 		} else {
16524 			if (un->un_ncmds_in_transport == 0) {
16525 				un->un_throttle = 1;
16526 			} else {
16527 				un->un_throttle = un->un_ncmds_in_transport;
16528 			}
16529 		}
16530 	}
16531 
16532 	/* Reschedule the timeout if none is currently active */
16533 	if (un->un_reset_throttle_timeid == NULL) {
16534 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
16535 		    un, SD_THROTTLE_RESET_INTERVAL);
16536 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16537 		    "sd_reduce_throttle: timeout scheduled!\n");
16538 	}
16539 
16540 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16541 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16542 }
16543 
16544 
16545 
16546 /*
16547  *    Function: sd_restore_throttle
16548  *
16549  * Description: Callback function for timeout(9F).  Resets the current
16550  *		value of un->un_throttle to its default.
16551  *
16552  *   Arguments: arg - pointer to associated softstate for the device.
16553  *
16554  *     Context: May be called from interrupt context
16555  */
16556 
16557 static void
16558 sd_restore_throttle(void *arg)
16559 {
16560 	struct sd_lun	*un = arg;
16561 
16562 	ASSERT(un != NULL);
16563 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16564 
16565 	mutex_enter(SD_MUTEX(un));
16566 
16567 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16568 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16569 
16570 	un->un_reset_throttle_timeid = NULL;
16571 
16572 	if (un->un_f_use_adaptive_throttle == TRUE) {
16573 		/*
16574 		 * If un_busy_throttle is nonzero, then it contains the
16575 		 * value that un_throttle was when we got a TRAN_BUSY back
16576 		 * from scsi_transport(). We want to revert back to this
16577 		 * value.
16578 		 *
16579 		 * In the QFULL case, the throttle limit will incrementally
16580 		 * increase until it reaches max throttle.
16581 		 */
16582 		if (un->un_busy_throttle > 0) {
16583 			un->un_throttle = un->un_busy_throttle;
16584 			un->un_busy_throttle = 0;
16585 		} else {
16586 			/*
16587 			 * increase throttle by 10% open gate slowly, schedule
16588 			 * another restore if saved throttle has not been
16589 			 * reached
16590 			 */
16591 			short throttle;
16592 			if (sd_qfull_throttle_enable) {
16593 				throttle = un->un_throttle +
16594 				    max((un->un_throttle / 10), 1);
16595 				un->un_throttle =
16596 				    (throttle < un->un_saved_throttle) ?
16597 				    throttle : un->un_saved_throttle;
16598 				if (un->un_throttle < un->un_saved_throttle) {
16599 					un->un_reset_throttle_timeid =
16600 					    timeout(sd_restore_throttle,
16601 					    un,
16602 					    SD_QFULL_THROTTLE_RESET_INTERVAL);
16603 				}
16604 			}
16605 		}
16606 
16607 		/*
16608 		 * If un_throttle has fallen below the low-water mark, we
16609 		 * restore the maximum value here (and allow it to ratchet
16610 		 * down again if necessary).
16611 		 */
16612 		if (un->un_throttle < un->un_min_throttle) {
16613 			un->un_throttle = un->un_saved_throttle;
16614 		}
16615 	} else {
16616 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16617 		    "restoring limit from 0x%x to 0x%x\n",
16618 		    un->un_throttle, un->un_saved_throttle);
16619 		un->un_throttle = un->un_saved_throttle;
16620 	}
16621 
16622 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16623 	    "sd_restore_throttle: calling sd_start_cmds!\n");
16624 
16625 	sd_start_cmds(un, NULL);
16626 
16627 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16628 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16629 	    un, un->un_throttle);
16630 
16631 	mutex_exit(SD_MUTEX(un));
16632 
16633 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16634 }
16635 
16636 /*
16637  *    Function: sdrunout
16638  *
16639  * Description: Callback routine for scsi_init_pkt when a resource allocation
16640  *		fails.
16641  *
16642  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16643  *		soft state instance.
16644  *
16645  * Return Code: The scsi_init_pkt routine allows for the callback function to
16646  *		return a 0 indicating the callback should be rescheduled or a 1
16647  *		indicating not to reschedule. This routine always returns 1
16648  *		because the driver always provides a callback function to
16649  *		scsi_init_pkt. This results in a callback always being scheduled
16650  *		(via the scsi_init_pkt callback implementation) if a resource
16651  *		failure occurs.
16652  *
16653  *     Context: This callback function may not block or call routines that block
16654  *
16655  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16656  *		request persisting at the head of the list which cannot be
16657  *		satisfied even after multiple retries. In the future the driver
16658  *		may implement some time of maximum runout count before failing
16659  *		an I/O.
16660  */
16661 
16662 static int
16663 sdrunout(caddr_t arg)
16664 {
16665 	struct sd_lun	*un = (struct sd_lun *)arg;
16666 
16667 	ASSERT(un != NULL);
16668 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16669 
16670 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16671 
16672 	mutex_enter(SD_MUTEX(un));
16673 	sd_start_cmds(un, NULL);
16674 	mutex_exit(SD_MUTEX(un));
16675 	/*
16676 	 * This callback routine always returns 1 (i.e. do not reschedule)
16677 	 * because we always specify sdrunout as the callback handler for
16678 	 * scsi_init_pkt inside the call to sd_start_cmds.
16679 	 */
16680 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16681 	return (1);
16682 }
16683 
16684 
16685 /*
16686  *    Function: sdintr
16687  *
16688  * Description: Completion callback routine for scsi_pkt(9S) structs
16689  *		sent to the HBA driver via scsi_transport(9F).
16690  *
16691  *     Context: Interrupt context
16692  */
16693 
16694 static void
16695 sdintr(struct scsi_pkt *pktp)
16696 {
16697 	struct buf	*bp;
16698 	struct sd_xbuf	*xp;
16699 	struct sd_lun	*un;
16700 	size_t		actual_len;
16701 	sd_ssc_t	*sscp;
16702 
16703 	ASSERT(pktp != NULL);
16704 	bp = (struct buf *)pktp->pkt_private;
16705 	ASSERT(bp != NULL);
16706 	xp = SD_GET_XBUF(bp);
16707 	ASSERT(xp != NULL);
16708 	ASSERT(xp->xb_pktp != NULL);
16709 	un = SD_GET_UN(bp);
16710 	ASSERT(un != NULL);
16711 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16712 
16713 #ifdef SD_FAULT_INJECTION
16714 
16715 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16716 	/* SD FaultInjection */
16717 	sd_faultinjection(pktp);
16718 
16719 #endif /* SD_FAULT_INJECTION */
16720 
16721 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16722 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16723 
16724 	mutex_enter(SD_MUTEX(un));
16725 
16726 	ASSERT(un->un_fm_private != NULL);
16727 	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
16728 	ASSERT(sscp != NULL);
16729 
16730 	/* Reduce the count of the #commands currently in transport */
16731 	un->un_ncmds_in_transport--;
16732 	ASSERT(un->un_ncmds_in_transport >= 0);
16733 
16734 	/* Increment counter to indicate that the callback routine is active */
16735 	un->un_in_callback++;
16736 
16737 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16738 
16739 #ifdef	SDDEBUG
16740 	if (bp == un->un_retry_bp) {
16741 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16742 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16743 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16744 	}
16745 #endif
16746 
16747 	/*
16748 	 * If pkt_reason is CMD_DEV_GONE, fail the command, and update the media
16749 	 * state if needed.
16750 	 */
16751 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16752 		/* Prevent multiple console messages for the same failure. */
16753 		if (un->un_last_pkt_reason != CMD_DEV_GONE) {
16754 			un->un_last_pkt_reason = CMD_DEV_GONE;
16755 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16756 			    "Command failed to complete...Device is gone\n");
16757 		}
16758 		if (un->un_mediastate != DKIO_DEV_GONE) {
16759 			un->un_mediastate = DKIO_DEV_GONE;
16760 			cv_broadcast(&un->un_state_cv);
16761 		}
16762 		/*
16763 		 * If the command happens to be the REQUEST SENSE command,
16764 		 * free up the rqs buf and fail the original command.
16765 		 */
16766 		if (bp == un->un_rqs_bp) {
16767 			bp = sd_mark_rqs_idle(un, xp);
16768 		}
16769 		sd_return_failed_command(un, bp, EIO);
16770 		goto exit;
16771 	}
16772 
16773 	if (pktp->pkt_state & STATE_XARQ_DONE) {
16774 		SD_TRACE(SD_LOG_COMMON, un,
16775 		    "sdintr: extra sense data received. pkt=%p\n", pktp);
16776 	}
16777 
16778 	/*
16779 	 * First see if the pkt has auto-request sense data with it....
16780 	 * Look at the packet state first so we don't take a performance
16781 	 * hit looking at the arq enabled flag unless absolutely necessary.
16782 	 */
16783 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
16784 	    (un->un_f_arq_enabled == TRUE)) {
16785 		/*
16786 		 * The HBA did an auto request sense for this command so check
16787 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16788 		 * driver command that should not be retried.
16789 		 */
16790 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16791 			/*
16792 			 * Save the relevant sense info into the xp for the
16793 			 * original cmd.
16794 			 */
16795 			struct scsi_arq_status *asp;
16796 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
16797 			xp->xb_sense_status =
16798 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
16799 			xp->xb_sense_state  = asp->sts_rqpkt_state;
16800 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
16801 			if (pktp->pkt_state & STATE_XARQ_DONE) {
16802 				actual_len = MAX_SENSE_LENGTH -
16803 				    xp->xb_sense_resid;
16804 				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16805 				    MAX_SENSE_LENGTH);
16806 			} else {
16807 				if (xp->xb_sense_resid > SENSE_LENGTH) {
16808 					actual_len = MAX_SENSE_LENGTH -
16809 					    xp->xb_sense_resid;
16810 				} else {
16811 					actual_len = SENSE_LENGTH -
16812 					    xp->xb_sense_resid;
16813 				}
16814 				if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16815 					if ((((struct uscsi_cmd *)
16816 					    (xp->xb_pktinfo))->uscsi_rqlen) >
16817 					    actual_len) {
16818 						xp->xb_sense_resid =
16819 						    (((struct uscsi_cmd *)
16820 						    (xp->xb_pktinfo))->
16821 						    uscsi_rqlen) - actual_len;
16822 					} else {
16823 						xp->xb_sense_resid = 0;
16824 					}
16825 				}
16826 				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
16827 				    SENSE_LENGTH);
16828 			}
16829 
16830 			/* fail the command */
16831 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16832 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
16833 			sd_return_failed_command(un, bp, EIO);
16834 			goto exit;
16835 		}
16836 
16837 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16838 		/*
16839 		 * We want to either retry or fail this command, so free
16840 		 * the DMA resources here.  If we retry the command then
16841 		 * the DMA resources will be reallocated in sd_start_cmds().
16842 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
16843 		 * causes the *entire* transfer to start over again from the
16844 		 * beginning of the request, even for PARTIAL chunks that
16845 		 * have already transferred successfully.
16846 		 */
16847 		if ((un->un_f_is_fibre == TRUE) &&
16848 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16849 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16850 			scsi_dmafree(pktp);
16851 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16852 		}
16853 #endif
16854 
16855 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16856 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
16857 
16858 		sd_handle_auto_request_sense(un, bp, xp, pktp);
16859 		goto exit;
16860 	}
16861 
16862 	/* Next see if this is the REQUEST SENSE pkt for the instance */
16863 	if (pktp->pkt_flags & FLAG_SENSING)  {
16864 		/* This pktp is from the unit's REQUEST_SENSE command */
16865 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16866 		    "sdintr: sd_handle_request_sense\n");
16867 		sd_handle_request_sense(un, bp, xp, pktp);
16868 		goto exit;
16869 	}
16870 
16871 	/*
16872 	 * Check to see if the command successfully completed as requested;
16873 	 * this is the most common case (and also the hot performance path).
16874 	 *
16875 	 * Requirements for successful completion are:
16876 	 * pkt_reason is CMD_CMPLT and packet status is status good.
16877 	 * In addition:
16878 	 * - A residual of zero indicates successful completion no matter what
16879 	 *   the command is.
16880 	 * - If the residual is not zero and the command is not a read or
16881 	 *   write, then it's still defined as successful completion. In other
16882 	 *   words, if the command is a read or write the residual must be
16883 	 *   zero for successful completion.
16884 	 * - If the residual is not zero and the command is a read or
16885 	 *   write, and it's a USCSICMD, then it's still defined as
16886 	 *   successful completion.
16887 	 */
16888 	if ((pktp->pkt_reason == CMD_CMPLT) &&
16889 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
16890 
16891 		/*
16892 		 * Since this command is returned with a good status, we
16893 		 * can reset the count for Sonoma failover.
16894 		 */
16895 		un->un_sonoma_failure_count = 0;
16896 
16897 		/*
16898 		 * Return all USCSI commands on good status
16899 		 */
16900 		if (pktp->pkt_resid == 0) {
16901 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16902 			    "sdintr: returning command for resid == 0\n");
16903 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
16904 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
16905 			SD_UPDATE_B_RESID(bp, pktp);
16906 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16907 			    "sdintr: returning command for resid != 0\n");
16908 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
16909 			SD_UPDATE_B_RESID(bp, pktp);
16910 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16911 			    "sdintr: returning uscsi command\n");
16912 		} else {
16913 			goto not_successful;
16914 		}
16915 		sd_return_command(un, bp);
16916 
16917 		/*
16918 		 * Decrement counter to indicate that the callback routine
16919 		 * is done.
16920 		 */
16921 		un->un_in_callback--;
16922 		ASSERT(un->un_in_callback >= 0);
16923 		mutex_exit(SD_MUTEX(un));
16924 
16925 		return;
16926 	}
16927 
16928 not_successful:
16929 
16930 #if (defined(__i386) || defined(__amd64))	/* DMAFREE for x86 only */
16931 	/*
16932 	 * The following is based upon knowledge of the underlying transport
16933 	 * and its use of DMA resources.  This code should be removed when
16934 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
16935 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
16936 	 * and sd_start_cmds().
16937 	 *
16938 	 * Free any DMA resources associated with this command if there
16939 	 * is a chance it could be retried or enqueued for later retry.
16940 	 * If we keep the DMA binding then mpxio cannot reissue the
16941 	 * command on another path whenever a path failure occurs.
16942 	 *
16943 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
16944 	 * causes the *entire* transfer to start over again from the
16945 	 * beginning of the request, even for PARTIAL chunks that
16946 	 * have already transferred successfully.
16947 	 *
16948 	 * This is only done for non-uscsi commands (and also skipped for the
16949 	 * driver's internal RQS command). Also just do this for Fibre Channel
16950 	 * devices as these are the only ones that support mpxio.
16951 	 */
16952 	if ((un->un_f_is_fibre == TRUE) &&
16953 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
16954 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
16955 		scsi_dmafree(pktp);
16956 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
16957 	}
16958 #endif
16959 
16960 	/*
16961 	 * The command did not successfully complete as requested so check
16962 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
16963 	 * driver command that should not be retried so just return. If
16964 	 * FLAG_DIAGNOSE is not set the error will be processed below.
16965 	 */
16966 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
16967 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16968 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
16969 		/*
16970 		 * Issue a request sense if a check condition caused the error
16971 		 * (we handle the auto request sense case above), otherwise
16972 		 * just fail the command.
16973 		 */
16974 		if ((pktp->pkt_reason == CMD_CMPLT) &&
16975 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
16976 			sd_send_request_sense_command(un, bp, pktp);
16977 		} else {
16978 			sd_return_failed_command(un, bp, EIO);
16979 		}
16980 		goto exit;
16981 	}
16982 
16983 	/*
16984 	 * The command did not successfully complete as requested so process
16985 	 * the error, retry, and/or attempt recovery.
16986 	 */
16987 	switch (pktp->pkt_reason) {
16988 	case CMD_CMPLT:
16989 		switch (SD_GET_PKT_STATUS(pktp)) {
16990 		case STATUS_GOOD:
16991 			/*
16992 			 * The command completed successfully with a non-zero
16993 			 * residual
16994 			 */
16995 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16996 			    "sdintr: STATUS_GOOD \n");
16997 			sd_pkt_status_good(un, bp, xp, pktp);
16998 			break;
16999 
17000 		case STATUS_CHECK:
17001 		case STATUS_TERMINATED:
17002 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17003 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
17004 			sd_pkt_status_check_condition(un, bp, xp, pktp);
17005 			break;
17006 
17007 		case STATUS_BUSY:
17008 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17009 			    "sdintr: STATUS_BUSY\n");
17010 			sd_pkt_status_busy(un, bp, xp, pktp);
17011 			break;
17012 
17013 		case STATUS_RESERVATION_CONFLICT:
17014 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17015 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
17016 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17017 			break;
17018 
17019 		case STATUS_QFULL:
17020 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17021 			    "sdintr: STATUS_QFULL\n");
17022 			sd_pkt_status_qfull(un, bp, xp, pktp);
17023 			break;
17024 
17025 		case STATUS_MET:
17026 		case STATUS_INTERMEDIATE:
17027 		case STATUS_SCSI2:
17028 		case STATUS_INTERMEDIATE_MET:
17029 		case STATUS_ACA_ACTIVE:
17030 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17031 			    "Unexpected SCSI status received: 0x%x\n",
17032 			    SD_GET_PKT_STATUS(pktp));
17033 			/*
17034 			 * Mark the ssc_flags when detected invalid status
17035 			 * code for non-USCSI command.
17036 			 */
17037 			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17038 				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
17039 				    0, "stat-code");
17040 			}
17041 			sd_return_failed_command(un, bp, EIO);
17042 			break;
17043 
17044 		default:
17045 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17046 			    "Invalid SCSI status received: 0x%x\n",
17047 			    SD_GET_PKT_STATUS(pktp));
17048 			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17049 				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
17050 				    0, "stat-code");
17051 			}
17052 			sd_return_failed_command(un, bp, EIO);
17053 			break;
17054 
17055 		}
17056 		break;
17057 
17058 	case CMD_INCOMPLETE:
17059 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17060 		    "sdintr:  CMD_INCOMPLETE\n");
17061 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
17062 		break;
17063 	case CMD_TRAN_ERR:
17064 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17065 		    "sdintr: CMD_TRAN_ERR\n");
17066 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
17067 		break;
17068 	case CMD_RESET:
17069 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17070 		    "sdintr: CMD_RESET \n");
17071 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
17072 		break;
17073 	case CMD_ABORTED:
17074 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17075 		    "sdintr: CMD_ABORTED \n");
17076 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
17077 		break;
17078 	case CMD_TIMEOUT:
17079 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17080 		    "sdintr: CMD_TIMEOUT\n");
17081 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
17082 		break;
17083 	case CMD_UNX_BUS_FREE:
17084 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17085 		    "sdintr: CMD_UNX_BUS_FREE \n");
17086 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
17087 		break;
17088 	case CMD_TAG_REJECT:
17089 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17090 		    "sdintr: CMD_TAG_REJECT\n");
17091 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
17092 		break;
17093 	default:
17094 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17095 		    "sdintr: default\n");
17096 		/*
17097 		 * Mark the ssc_flags for detecting invliad pkt_reason.
17098 		 */
17099 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17100 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_PKT_REASON,
17101 			    0, "pkt-reason");
17102 		}
17103 		sd_pkt_reason_default(un, bp, xp, pktp);
17104 		break;
17105 	}
17106 
17107 exit:
17108 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
17109 
17110 	/* Decrement counter to indicate that the callback routine is done. */
17111 	un->un_in_callback--;
17112 	ASSERT(un->un_in_callback >= 0);
17113 
17114 	/*
17115 	 * At this point, the pkt has been dispatched, ie, it is either
17116 	 * being re-tried or has been returned to its caller and should
17117 	 * not be referenced.
17118 	 */
17119 
17120 	mutex_exit(SD_MUTEX(un));
17121 }
17122 
17123 
17124 /*
17125  *    Function: sd_print_incomplete_msg
17126  *
17127  * Description: Prints the error message for a CMD_INCOMPLETE error.
17128  *
17129  *   Arguments: un - ptr to associated softstate for the device.
17130  *		bp - ptr to the buf(9S) for the command.
17131  *		arg - message string ptr
17132  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
17133  *			or SD_NO_RETRY_ISSUED.
17134  *
17135  *     Context: May be called under interrupt context
17136  */
17137 
17138 static void
17139 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17140 {
17141 	struct scsi_pkt	*pktp;
17142 	char	*msgp;
17143 	char	*cmdp = arg;
17144 
17145 	ASSERT(un != NULL);
17146 	ASSERT(mutex_owned(SD_MUTEX(un)));
17147 	ASSERT(bp != NULL);
17148 	ASSERT(arg != NULL);
17149 	pktp = SD_GET_PKTP(bp);
17150 	ASSERT(pktp != NULL);
17151 
17152 	switch (code) {
17153 	case SD_DELAYED_RETRY_ISSUED:
17154 	case SD_IMMEDIATE_RETRY_ISSUED:
17155 		msgp = "retrying";
17156 		break;
17157 	case SD_NO_RETRY_ISSUED:
17158 	default:
17159 		msgp = "giving up";
17160 		break;
17161 	}
17162 
17163 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17164 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17165 		    "incomplete %s- %s\n", cmdp, msgp);
17166 	}
17167 }
17168 
17169 
17170 
17171 /*
17172  *    Function: sd_pkt_status_good
17173  *
17174  * Description: Processing for a STATUS_GOOD code in pkt_status.
17175  *
17176  *     Context: May be called under interrupt context
17177  */
17178 
17179 static void
17180 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
17181 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17182 {
17183 	char	*cmdp;
17184 
17185 	ASSERT(un != NULL);
17186 	ASSERT(mutex_owned(SD_MUTEX(un)));
17187 	ASSERT(bp != NULL);
17188 	ASSERT(xp != NULL);
17189 	ASSERT(pktp != NULL);
17190 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
17191 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
17192 	ASSERT(pktp->pkt_resid != 0);
17193 
17194 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
17195 
17196 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17197 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
17198 	case SCMD_READ:
17199 		cmdp = "read";
17200 		break;
17201 	case SCMD_WRITE:
17202 		cmdp = "write";
17203 		break;
17204 	default:
17205 		SD_UPDATE_B_RESID(bp, pktp);
17206 		sd_return_command(un, bp);
17207 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
17208 		return;
17209 	}
17210 
17211 	/*
17212 	 * See if we can retry the read/write, preferrably immediately.
17213 	 * If retries are exhaused, then sd_retry_command() will update
17214 	 * the b_resid count.
17215 	 */
17216 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
17217 	    cmdp, EIO, (clock_t)0, NULL);
17218 
17219 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
17220 }
17221 
17222 
17223 
17224 
17225 
17226 /*
17227  *    Function: sd_handle_request_sense
17228  *
17229  * Description: Processing for non-auto Request Sense command.
17230  *
17231  *   Arguments: un - ptr to associated softstate
17232  *		sense_bp - ptr to buf(9S) for the RQS command
17233  *		sense_xp - ptr to the sd_xbuf for the RQS command
17234  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
17235  *
17236  *     Context: May be called under interrupt context
17237  */
17238 
17239 static void
17240 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
17241 	struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
17242 {
17243 	struct buf	*cmd_bp;	/* buf for the original command */
17244 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
17245 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
17246 	size_t		actual_len;	/* actual sense data length */
17247 
17248 	ASSERT(un != NULL);
17249 	ASSERT(mutex_owned(SD_MUTEX(un)));
17250 	ASSERT(sense_bp != NULL);
17251 	ASSERT(sense_xp != NULL);
17252 	ASSERT(sense_pktp != NULL);
17253 
17254 	/*
17255 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
17256 	 * RQS command and not the original command.
17257 	 */
17258 	ASSERT(sense_pktp == un->un_rqs_pktp);
17259 	ASSERT(sense_bp   == un->un_rqs_bp);
17260 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
17261 	    (FLAG_SENSING | FLAG_HEAD));
17262 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
17263 	    FLAG_SENSING) == FLAG_SENSING);
17264 
17265 	/* These are the bp, xp, and pktp for the original command */
17266 	cmd_bp = sense_xp->xb_sense_bp;
17267 	cmd_xp = SD_GET_XBUF(cmd_bp);
17268 	cmd_pktp = SD_GET_PKTP(cmd_bp);
17269 
17270 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
17271 		/*
17272 		 * The REQUEST SENSE command failed.  Release the REQUEST
17273 		 * SENSE command for re-use, get back the bp for the original
17274 		 * command, and attempt to re-try the original command if
17275 		 * FLAG_DIAGNOSE is not set in the original packet.
17276 		 */
17277 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17278 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
17279 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
17280 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
17281 			    NULL, NULL, EIO, (clock_t)0, NULL);
17282 			return;
17283 		}
17284 	}
17285 
17286 	/*
17287 	 * Save the relevant sense info into the xp for the original cmd.
17288 	 *
17289 	 * Note: if the request sense failed the state info will be zero
17290 	 * as set in sd_mark_rqs_busy()
17291 	 */
17292 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
17293 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
17294 	actual_len = MAX_SENSE_LENGTH - sense_pktp->pkt_resid;
17295 	if ((cmd_xp->xb_pkt_flags & SD_XB_USCSICMD) &&
17296 	    (((struct uscsi_cmd *)cmd_xp->xb_pktinfo)->uscsi_rqlen >
17297 	    SENSE_LENGTH)) {
17298 		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
17299 		    MAX_SENSE_LENGTH);
17300 		cmd_xp->xb_sense_resid = sense_pktp->pkt_resid;
17301 	} else {
17302 		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
17303 		    SENSE_LENGTH);
17304 		if (actual_len < SENSE_LENGTH) {
17305 			cmd_xp->xb_sense_resid = SENSE_LENGTH - actual_len;
17306 		} else {
17307 			cmd_xp->xb_sense_resid = 0;
17308 		}
17309 	}
17310 
17311 	/*
17312 	 *  Free up the RQS command....
17313 	 *  NOTE:
17314 	 *	Must do this BEFORE calling sd_validate_sense_data!
17315 	 *	sd_validate_sense_data may return the original command in
17316 	 *	which case the pkt will be freed and the flags can no
17317 	 *	longer be touched.
17318 	 *	SD_MUTEX is held through this process until the command
17319 	 *	is dispatched based upon the sense data, so there are
17320 	 *	no race conditions.
17321 	 */
17322 	(void) sd_mark_rqs_idle(un, sense_xp);
17323 
17324 	/*
17325 	 * For a retryable command see if we have valid sense data, if so then
17326 	 * turn it over to sd_decode_sense() to figure out the right course of
17327 	 * action. Just fail a non-retryable command.
17328 	 */
17329 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
17330 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp, actual_len) ==
17331 		    SD_SENSE_DATA_IS_VALID) {
17332 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
17333 		}
17334 	} else {
17335 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
17336 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17337 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
17338 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
17339 		sd_return_failed_command(un, cmd_bp, EIO);
17340 	}
17341 }
17342 
17343 
17344 
17345 
17346 /*
17347  *    Function: sd_handle_auto_request_sense
17348  *
17349  * Description: Processing for auto-request sense information.
17350  *
17351  *   Arguments: un - ptr to associated softstate
17352  *		bp - ptr to buf(9S) for the command
17353  *		xp - ptr to the sd_xbuf for the command
17354  *		pktp - ptr to the scsi_pkt(9S) for the command
17355  *
17356  *     Context: May be called under interrupt context
17357  */
17358 
17359 static void
17360 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
17361 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17362 {
17363 	struct scsi_arq_status *asp;
17364 	size_t actual_len;
17365 
17366 	ASSERT(un != NULL);
17367 	ASSERT(mutex_owned(SD_MUTEX(un)));
17368 	ASSERT(bp != NULL);
17369 	ASSERT(xp != NULL);
17370 	ASSERT(pktp != NULL);
17371 	ASSERT(pktp != un->un_rqs_pktp);
17372 	ASSERT(bp   != un->un_rqs_bp);
17373 
17374 	/*
17375 	 * For auto-request sense, we get a scsi_arq_status back from
17376 	 * the HBA, with the sense data in the sts_sensedata member.
17377 	 * The pkt_scbp of the packet points to this scsi_arq_status.
17378 	 */
17379 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
17380 
17381 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
17382 		/*
17383 		 * The auto REQUEST SENSE failed; see if we can re-try
17384 		 * the original command.
17385 		 */
17386 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17387 		    "auto request sense failed (reason=%s)\n",
17388 		    scsi_rname(asp->sts_rqpkt_reason));
17389 
17390 		sd_reset_target(un, pktp);
17391 
17392 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17393 		    NULL, NULL, EIO, (clock_t)0, NULL);
17394 		return;
17395 	}
17396 
17397 	/* Save the relevant sense info into the xp for the original cmd. */
17398 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
17399 	xp->xb_sense_state  = asp->sts_rqpkt_state;
17400 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
17401 	if (xp->xb_sense_state & STATE_XARQ_DONE) {
17402 		actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
17403 		bcopy(&asp->sts_sensedata, xp->xb_sense_data,
17404 		    MAX_SENSE_LENGTH);
17405 	} else {
17406 		if (xp->xb_sense_resid > SENSE_LENGTH) {
17407 			actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
17408 		} else {
17409 			actual_len = SENSE_LENGTH - xp->xb_sense_resid;
17410 		}
17411 		if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
17412 			if ((((struct uscsi_cmd *)
17413 			    (xp->xb_pktinfo))->uscsi_rqlen) > actual_len) {
17414 				xp->xb_sense_resid = (((struct uscsi_cmd *)
17415 				    (xp->xb_pktinfo))->uscsi_rqlen) -
17416 				    actual_len;
17417 			} else {
17418 				xp->xb_sense_resid = 0;
17419 			}
17420 		}
17421 		bcopy(&asp->sts_sensedata, xp->xb_sense_data, SENSE_LENGTH);
17422 	}
17423 
17424 	/*
17425 	 * See if we have valid sense data, if so then turn it over to
17426 	 * sd_decode_sense() to figure out the right course of action.
17427 	 */
17428 	if (sd_validate_sense_data(un, bp, xp, actual_len) ==
17429 	    SD_SENSE_DATA_IS_VALID) {
17430 		sd_decode_sense(un, bp, xp, pktp);
17431 	}
17432 }
17433 
17434 
17435 /*
17436  *    Function: sd_print_sense_failed_msg
17437  *
17438  * Description: Print log message when RQS has failed.
17439  *
17440  *   Arguments: un - ptr to associated softstate
17441  *		bp - ptr to buf(9S) for the command
17442  *		arg - generic message string ptr
17443  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17444  *			or SD_NO_RETRY_ISSUED
17445  *
17446  *     Context: May be called from interrupt context
17447  */
17448 
17449 static void
17450 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
17451 	int code)
17452 {
17453 	char	*msgp = arg;
17454 
17455 	ASSERT(un != NULL);
17456 	ASSERT(mutex_owned(SD_MUTEX(un)));
17457 	ASSERT(bp != NULL);
17458 
17459 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
17460 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
17461 	}
17462 }
17463 
17464 
17465 /*
17466  *    Function: sd_validate_sense_data
17467  *
17468  * Description: Check the given sense data for validity.
17469  *		If the sense data is not valid, the command will
17470  *		be either failed or retried!
17471  *
17472  * Return Code: SD_SENSE_DATA_IS_INVALID
17473  *		SD_SENSE_DATA_IS_VALID
17474  *
17475  *     Context: May be called from interrupt context
17476  */
17477 
17478 static int
17479 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17480 	size_t actual_len)
17481 {
17482 	struct scsi_extended_sense *esp;
17483 	struct	scsi_pkt *pktp;
17484 	char	*msgp = NULL;
17485 	sd_ssc_t *sscp;
17486 
17487 	ASSERT(un != NULL);
17488 	ASSERT(mutex_owned(SD_MUTEX(un)));
17489 	ASSERT(bp != NULL);
17490 	ASSERT(bp != un->un_rqs_bp);
17491 	ASSERT(xp != NULL);
17492 	ASSERT(un->un_fm_private != NULL);
17493 
17494 	pktp = SD_GET_PKTP(bp);
17495 	ASSERT(pktp != NULL);
17496 
17497 	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
17498 	ASSERT(sscp != NULL);
17499 
17500 	/*
17501 	 * Check the status of the RQS command (auto or manual).
17502 	 */
17503 	switch (xp->xb_sense_status & STATUS_MASK) {
17504 	case STATUS_GOOD:
17505 		break;
17506 
17507 	case STATUS_RESERVATION_CONFLICT:
17508 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17509 		return (SD_SENSE_DATA_IS_INVALID);
17510 
17511 	case STATUS_BUSY:
17512 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17513 		    "Busy Status on REQUEST SENSE\n");
17514 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
17515 		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
17516 		return (SD_SENSE_DATA_IS_INVALID);
17517 
17518 	case STATUS_QFULL:
17519 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17520 		    "QFULL Status on REQUEST SENSE\n");
17521 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
17522 		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
17523 		return (SD_SENSE_DATA_IS_INVALID);
17524 
17525 	case STATUS_CHECK:
17526 	case STATUS_TERMINATED:
17527 		msgp = "Check Condition on REQUEST SENSE\n";
17528 		goto sense_failed;
17529 
17530 	default:
17531 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
17532 		goto sense_failed;
17533 	}
17534 
17535 	/*
17536 	 * See if we got the minimum required amount of sense data.
17537 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
17538 	 * or less.
17539 	 */
17540 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
17541 	    (actual_len == 0)) {
17542 		msgp = "Request Sense couldn't get sense data\n";
17543 		goto sense_failed;
17544 	}
17545 
17546 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
17547 		msgp = "Not enough sense information\n";
17548 		/* Mark the ssc_flags for detecting invalid sense data */
17549 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17550 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17551 			    "sense-data");
17552 		}
17553 		goto sense_failed;
17554 	}
17555 
17556 	/*
17557 	 * We require the extended sense data
17558 	 */
17559 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
17560 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
17561 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17562 			static char tmp[8];
17563 			static char buf[148];
17564 			char *p = (char *)(xp->xb_sense_data);
17565 			int i;
17566 
17567 			mutex_enter(&sd_sense_mutex);
17568 			(void) strcpy(buf, "undecodable sense information:");
17569 			for (i = 0; i < actual_len; i++) {
17570 				(void) sprintf(tmp, " 0x%x", *(p++)&0xff);
17571 				(void) strcpy(&buf[strlen(buf)], tmp);
17572 			}
17573 			i = strlen(buf);
17574 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
17575 
17576 			if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
17577 				scsi_log(SD_DEVINFO(un), sd_label,
17578 				    CE_WARN, buf);
17579 			}
17580 			mutex_exit(&sd_sense_mutex);
17581 		}
17582 
17583 		/* Mark the ssc_flags for detecting invalid sense data */
17584 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17585 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17586 			    "sense-data");
17587 		}
17588 
17589 		/* Note: Legacy behavior, fail the command with no retry */
17590 		sd_return_failed_command(un, bp, EIO);
17591 		return (SD_SENSE_DATA_IS_INVALID);
17592 	}
17593 
17594 	/*
17595 	 * Check that es_code is valid (es_class concatenated with es_code
17596 	 * make up the "response code" field.  es_class will always be 7, so
17597 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
17598 	 * format.
17599 	 */
17600 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
17601 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
17602 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
17603 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
17604 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
17605 		/* Mark the ssc_flags for detecting invalid sense data */
17606 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17607 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17608 			    "sense-data");
17609 		}
17610 		goto sense_failed;
17611 	}
17612 
17613 	return (SD_SENSE_DATA_IS_VALID);
17614 
17615 sense_failed:
17616 	/*
17617 	 * If the request sense failed (for whatever reason), attempt
17618 	 * to retry the original command.
17619 	 */
17620 #if defined(__i386) || defined(__amd64)
17621 	/*
17622 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
17623 	 * sddef.h for Sparc platform, and x86 uses 1 binary
17624 	 * for both SCSI/FC.
17625 	 * The SD_RETRY_DELAY value need to be adjusted here
17626 	 * when SD_RETRY_DELAY change in sddef.h
17627 	 */
17628 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17629 	    sd_print_sense_failed_msg, msgp, EIO,
17630 	    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0, NULL);
17631 #else
17632 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17633 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
17634 #endif
17635 
17636 	return (SD_SENSE_DATA_IS_INVALID);
17637 }
17638 
17639 /*
17640  *    Function: sd_decode_sense
17641  *
17642  * Description: Take recovery action(s) when SCSI Sense Data is received.
17643  *
17644  *     Context: Interrupt context.
17645  */
17646 
17647 static void
17648 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17649 	struct scsi_pkt *pktp)
17650 {
17651 	uint8_t sense_key;
17652 
17653 	ASSERT(un != NULL);
17654 	ASSERT(mutex_owned(SD_MUTEX(un)));
17655 	ASSERT(bp != NULL);
17656 	ASSERT(bp != un->un_rqs_bp);
17657 	ASSERT(xp != NULL);
17658 	ASSERT(pktp != NULL);
17659 
17660 	sense_key = scsi_sense_key(xp->xb_sense_data);
17661 
17662 	switch (sense_key) {
17663 	case KEY_NO_SENSE:
17664 		sd_sense_key_no_sense(un, bp, xp, pktp);
17665 		break;
17666 	case KEY_RECOVERABLE_ERROR:
17667 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17668 		    bp, xp, pktp);
17669 		break;
17670 	case KEY_NOT_READY:
17671 		sd_sense_key_not_ready(un, xp->xb_sense_data,
17672 		    bp, xp, pktp);
17673 		break;
17674 	case KEY_MEDIUM_ERROR:
17675 	case KEY_HARDWARE_ERROR:
17676 		sd_sense_key_medium_or_hardware_error(un,
17677 		    xp->xb_sense_data, bp, xp, pktp);
17678 		break;
17679 	case KEY_ILLEGAL_REQUEST:
17680 		sd_sense_key_illegal_request(un, bp, xp, pktp);
17681 		break;
17682 	case KEY_UNIT_ATTENTION:
17683 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17684 		    bp, xp, pktp);
17685 		break;
17686 	case KEY_WRITE_PROTECT:
17687 	case KEY_VOLUME_OVERFLOW:
17688 	case KEY_MISCOMPARE:
17689 		sd_sense_key_fail_command(un, bp, xp, pktp);
17690 		break;
17691 	case KEY_BLANK_CHECK:
17692 		sd_sense_key_blank_check(un, bp, xp, pktp);
17693 		break;
17694 	case KEY_ABORTED_COMMAND:
17695 		sd_sense_key_aborted_command(un, bp, xp, pktp);
17696 		break;
17697 	case KEY_VENDOR_UNIQUE:
17698 	case KEY_COPY_ABORTED:
17699 	case KEY_EQUAL:
17700 	case KEY_RESERVED:
17701 	default:
17702 		sd_sense_key_default(un, xp->xb_sense_data,
17703 		    bp, xp, pktp);
17704 		break;
17705 	}
17706 }
17707 
17708 
17709 /*
17710  *    Function: sd_dump_memory
17711  *
17712  * Description: Debug logging routine to print the contents of a user provided
17713  *		buffer. The output of the buffer is broken up into 256 byte
17714  *		segments due to a size constraint of the scsi_log.
17715  *		implementation.
17716  *
17717  *   Arguments: un - ptr to softstate
17718  *		comp - component mask
17719  *		title - "title" string to preceed data when printed
17720  *		data - ptr to data block to be printed
17721  *		len - size of data block to be printed
17722  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17723  *
17724  *     Context: May be called from interrupt context
17725  */
17726 
17727 #define	SD_DUMP_MEMORY_BUF_SIZE	256
17728 
17729 static char *sd_dump_format_string[] = {
17730 		" 0x%02x",
17731 		" %c"
17732 };
17733 
17734 static void
17735 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17736     int len, int fmt)
17737 {
17738 	int	i, j;
17739 	int	avail_count;
17740 	int	start_offset;
17741 	int	end_offset;
17742 	size_t	entry_len;
17743 	char	*bufp;
17744 	char	*local_buf;
17745 	char	*format_string;
17746 
17747 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17748 
17749 	/*
17750 	 * In the debug version of the driver, this function is called from a
17751 	 * number of places which are NOPs in the release driver.
17752 	 * The debug driver therefore has additional methods of filtering
17753 	 * debug output.
17754 	 */
17755 #ifdef SDDEBUG
17756 	/*
17757 	 * In the debug version of the driver we can reduce the amount of debug
17758 	 * messages by setting sd_error_level to something other than
17759 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17760 	 * sd_component_mask.
17761 	 */
17762 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17763 	    (sd_error_level != SCSI_ERR_ALL)) {
17764 		return;
17765 	}
17766 	if (((sd_component_mask & comp) == 0) ||
17767 	    (sd_error_level != SCSI_ERR_ALL)) {
17768 		return;
17769 	}
17770 #else
17771 	if (sd_error_level != SCSI_ERR_ALL) {
17772 		return;
17773 	}
17774 #endif
17775 
17776 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17777 	bufp = local_buf;
17778 	/*
17779 	 * Available length is the length of local_buf[], minus the
17780 	 * length of the title string, minus one for the ":", minus
17781 	 * one for the newline, minus one for the NULL terminator.
17782 	 * This gives the #bytes available for holding the printed
17783 	 * values from the given data buffer.
17784 	 */
17785 	if (fmt == SD_LOG_HEX) {
17786 		format_string = sd_dump_format_string[0];
17787 	} else /* SD_LOG_CHAR */ {
17788 		format_string = sd_dump_format_string[1];
17789 	}
17790 	/*
17791 	 * Available count is the number of elements from the given
17792 	 * data buffer that we can fit into the available length.
17793 	 * This is based upon the size of the format string used.
17794 	 * Make one entry and find it's size.
17795 	 */
17796 	(void) sprintf(bufp, format_string, data[0]);
17797 	entry_len = strlen(bufp);
17798 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
17799 
17800 	j = 0;
17801 	while (j < len) {
17802 		bufp = local_buf;
17803 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
17804 		start_offset = j;
17805 
17806 		end_offset = start_offset + avail_count;
17807 
17808 		(void) sprintf(bufp, "%s:", title);
17809 		bufp += strlen(bufp);
17810 		for (i = start_offset; ((i < end_offset) && (j < len));
17811 		    i++, j++) {
17812 			(void) sprintf(bufp, format_string, data[i]);
17813 			bufp += entry_len;
17814 		}
17815 		(void) sprintf(bufp, "\n");
17816 
17817 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
17818 	}
17819 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
17820 }
17821 
17822 /*
17823  *    Function: sd_print_sense_msg
17824  *
17825  * Description: Log a message based upon the given sense data.
17826  *
17827  *   Arguments: un - ptr to associated softstate
17828  *		bp - ptr to buf(9S) for the command
17829  *		arg - ptr to associate sd_sense_info struct
17830  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17831  *			or SD_NO_RETRY_ISSUED
17832  *
17833  *     Context: May be called from interrupt context
17834  */
17835 
17836 static void
17837 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17838 {
17839 	struct sd_xbuf	*xp;
17840 	struct scsi_pkt	*pktp;
17841 	uint8_t *sensep;
17842 	daddr_t request_blkno;
17843 	diskaddr_t err_blkno;
17844 	int severity;
17845 	int pfa_flag;
17846 	extern struct scsi_key_strings scsi_cmds[];
17847 
17848 	ASSERT(un != NULL);
17849 	ASSERT(mutex_owned(SD_MUTEX(un)));
17850 	ASSERT(bp != NULL);
17851 	xp = SD_GET_XBUF(bp);
17852 	ASSERT(xp != NULL);
17853 	pktp = SD_GET_PKTP(bp);
17854 	ASSERT(pktp != NULL);
17855 	ASSERT(arg != NULL);
17856 
17857 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
17858 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
17859 
17860 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
17861 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
17862 		severity = SCSI_ERR_RETRYABLE;
17863 	}
17864 
17865 	/* Use absolute block number for the request block number */
17866 	request_blkno = xp->xb_blkno;
17867 
17868 	/*
17869 	 * Now try to get the error block number from the sense data
17870 	 */
17871 	sensep = xp->xb_sense_data;
17872 
17873 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
17874 	    (uint64_t *)&err_blkno)) {
17875 		/*
17876 		 * We retrieved the error block number from the information
17877 		 * portion of the sense data.
17878 		 *
17879 		 * For USCSI commands we are better off using the error
17880 		 * block no. as the requested block no. (This is the best
17881 		 * we can estimate.)
17882 		 */
17883 		if ((SD_IS_BUFIO(xp) == FALSE) &&
17884 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
17885 			request_blkno = err_blkno;
17886 		}
17887 	} else {
17888 		/*
17889 		 * Without the es_valid bit set (for fixed format) or an
17890 		 * information descriptor (for descriptor format) we cannot
17891 		 * be certain of the error blkno, so just use the
17892 		 * request_blkno.
17893 		 */
17894 		err_blkno = (diskaddr_t)request_blkno;
17895 	}
17896 
17897 	/*
17898 	 * The following will log the buffer contents for the release driver
17899 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
17900 	 * level is set to verbose.
17901 	 */
17902 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
17903 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17904 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
17905 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
17906 
17907 	if (pfa_flag == FALSE) {
17908 		/* This is normally only set for USCSI */
17909 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
17910 			return;
17911 		}
17912 
17913 		if ((SD_IS_BUFIO(xp) == TRUE) &&
17914 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
17915 		    (severity < sd_error_level))) {
17916 			return;
17917 		}
17918 	}
17919 	/*
17920 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
17921 	 */
17922 	if ((SD_IS_LSI(un)) &&
17923 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
17924 	    (scsi_sense_asc(sensep) == 0x94) &&
17925 	    (scsi_sense_ascq(sensep) == 0x01)) {
17926 		un->un_sonoma_failure_count++;
17927 		if (un->un_sonoma_failure_count > 1) {
17928 			return;
17929 		}
17930 	}
17931 
17932 	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP ||
17933 	    ((scsi_sense_key(sensep) == KEY_RECOVERABLE_ERROR) &&
17934 	    (pktp->pkt_resid == 0))) {
17935 		scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
17936 		    request_blkno, err_blkno, scsi_cmds,
17937 		    (struct scsi_extended_sense *)sensep,
17938 		    un->un_additional_codes, NULL);
17939 	}
17940 }
17941 
17942 /*
17943  *    Function: sd_sense_key_no_sense
17944  *
17945  * Description: Recovery action when sense data was not received.
17946  *
17947  *     Context: May be called from interrupt context
17948  */
17949 
17950 static void
17951 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
17952 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
17953 {
17954 	struct sd_sense_info	si;
17955 
17956 	ASSERT(un != NULL);
17957 	ASSERT(mutex_owned(SD_MUTEX(un)));
17958 	ASSERT(bp != NULL);
17959 	ASSERT(xp != NULL);
17960 	ASSERT(pktp != NULL);
17961 
17962 	si.ssi_severity = SCSI_ERR_FATAL;
17963 	si.ssi_pfa_flag = FALSE;
17964 
17965 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
17966 
17967 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
17968 	    &si, EIO, (clock_t)0, NULL);
17969 }
17970 
17971 
17972 /*
17973  *    Function: sd_sense_key_recoverable_error
17974  *
17975  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
17976  *
17977  *     Context: May be called from interrupt context
17978  */
17979 
17980 static void
17981 sd_sense_key_recoverable_error(struct sd_lun *un,
17982 	uint8_t *sense_datap,
17983 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
17984 {
17985 	struct sd_sense_info	si;
17986 	uint8_t asc = scsi_sense_asc(sense_datap);
17987 
17988 	ASSERT(un != NULL);
17989 	ASSERT(mutex_owned(SD_MUTEX(un)));
17990 	ASSERT(bp != NULL);
17991 	ASSERT(xp != NULL);
17992 	ASSERT(pktp != NULL);
17993 
17994 	/*
17995 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
17996 	 */
17997 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
17998 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
17999 		si.ssi_severity = SCSI_ERR_INFO;
18000 		si.ssi_pfa_flag = TRUE;
18001 	} else {
18002 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
18003 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
18004 		si.ssi_severity = SCSI_ERR_RECOVERED;
18005 		si.ssi_pfa_flag = FALSE;
18006 	}
18007 
18008 	if (pktp->pkt_resid == 0) {
18009 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18010 		sd_return_command(un, bp);
18011 		return;
18012 	}
18013 
18014 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18015 	    &si, EIO, (clock_t)0, NULL);
18016 }
18017 
18018 
18019 
18020 
18021 /*
18022  *    Function: sd_sense_key_not_ready
18023  *
18024  * Description: Recovery actions for a SCSI "Not Ready" sense key.
18025  *
18026  *     Context: May be called from interrupt context
18027  */
18028 
18029 static void
18030 sd_sense_key_not_ready(struct sd_lun *un,
18031 	uint8_t *sense_datap,
18032 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18033 {
18034 	struct sd_sense_info	si;
18035 	uint8_t asc = scsi_sense_asc(sense_datap);
18036 	uint8_t ascq = scsi_sense_ascq(sense_datap);
18037 
18038 	ASSERT(un != NULL);
18039 	ASSERT(mutex_owned(SD_MUTEX(un)));
18040 	ASSERT(bp != NULL);
18041 	ASSERT(xp != NULL);
18042 	ASSERT(pktp != NULL);
18043 
18044 	si.ssi_severity = SCSI_ERR_FATAL;
18045 	si.ssi_pfa_flag = FALSE;
18046 
18047 	/*
18048 	 * Update error stats after first NOT READY error. Disks may have
18049 	 * been powered down and may need to be restarted.  For CDROMs,
18050 	 * report NOT READY errors only if media is present.
18051 	 */
18052 	if ((ISCD(un) && (asc == 0x3A)) ||
18053 	    (xp->xb_nr_retry_count > 0)) {
18054 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18055 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
18056 	}
18057 
18058 	/*
18059 	 * Just fail if the "not ready" retry limit has been reached.
18060 	 */
18061 	if (xp->xb_nr_retry_count >= un->un_notready_retry_count) {
18062 		/* Special check for error message printing for removables. */
18063 		if (un->un_f_has_removable_media && (asc == 0x04) &&
18064 		    (ascq >= 0x04)) {
18065 			si.ssi_severity = SCSI_ERR_ALL;
18066 		}
18067 		goto fail_command;
18068 	}
18069 
18070 	/*
18071 	 * Check the ASC and ASCQ in the sense data as needed, to determine
18072 	 * what to do.
18073 	 */
18074 	switch (asc) {
18075 	case 0x04:	/* LOGICAL UNIT NOT READY */
18076 		/*
18077 		 * disk drives that don't spin up result in a very long delay
18078 		 * in format without warning messages. We will log a message
18079 		 * if the error level is set to verbose.
18080 		 */
18081 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18082 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18083 			    "logical unit not ready, resetting disk\n");
18084 		}
18085 
18086 		/*
18087 		 * There are different requirements for CDROMs and disks for
18088 		 * the number of retries.  If a CD-ROM is giving this, it is
18089 		 * probably reading TOC and is in the process of getting
18090 		 * ready, so we should keep on trying for a long time to make
18091 		 * sure that all types of media are taken in account (for
18092 		 * some media the drive takes a long time to read TOC).  For
18093 		 * disks we do not want to retry this too many times as this
18094 		 * can cause a long hang in format when the drive refuses to
18095 		 * spin up (a very common failure).
18096 		 */
18097 		switch (ascq) {
18098 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
18099 			/*
18100 			 * Disk drives frequently refuse to spin up which
18101 			 * results in a very long hang in format without
18102 			 * warning messages.
18103 			 *
18104 			 * Note: This code preserves the legacy behavior of
18105 			 * comparing xb_nr_retry_count against zero for fibre
18106 			 * channel targets instead of comparing against the
18107 			 * un_reset_retry_count value.  The reason for this
18108 			 * discrepancy has been so utterly lost beneath the
18109 			 * Sands of Time that even Indiana Jones could not
18110 			 * find it.
18111 			 */
18112 			if (un->un_f_is_fibre == TRUE) {
18113 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
18114 				    (xp->xb_nr_retry_count > 0)) &&
18115 				    (un->un_startstop_timeid == NULL)) {
18116 					scsi_log(SD_DEVINFO(un), sd_label,
18117 					    CE_WARN, "logical unit not ready, "
18118 					    "resetting disk\n");
18119 					sd_reset_target(un, pktp);
18120 				}
18121 			} else {
18122 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
18123 				    (xp->xb_nr_retry_count >
18124 				    un->un_reset_retry_count)) &&
18125 				    (un->un_startstop_timeid == NULL)) {
18126 					scsi_log(SD_DEVINFO(un), sd_label,
18127 					    CE_WARN, "logical unit not ready, "
18128 					    "resetting disk\n");
18129 					sd_reset_target(un, pktp);
18130 				}
18131 			}
18132 			break;
18133 
18134 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
18135 			/*
18136 			 * If the target is in the process of becoming
18137 			 * ready, just proceed with the retry. This can
18138 			 * happen with CD-ROMs that take a long time to
18139 			 * read TOC after a power cycle or reset.
18140 			 */
18141 			goto do_retry;
18142 
18143 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
18144 			break;
18145 
18146 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
18147 			/*
18148 			 * Retries cannot help here so just fail right away.
18149 			 */
18150 			goto fail_command;
18151 
18152 		case 0x88:
18153 			/*
18154 			 * Vendor-unique code for T3/T4: it indicates a
18155 			 * path problem in a mutipathed config, but as far as
18156 			 * the target driver is concerned it equates to a fatal
18157 			 * error, so we should just fail the command right away
18158 			 * (without printing anything to the console). If this
18159 			 * is not a T3/T4, fall thru to the default recovery
18160 			 * action.
18161 			 * T3/T4 is FC only, don't need to check is_fibre
18162 			 */
18163 			if (SD_IS_T3(un) || SD_IS_T4(un)) {
18164 				sd_return_failed_command(un, bp, EIO);
18165 				return;
18166 			}
18167 			/* FALLTHRU */
18168 
18169 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
18170 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
18171 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
18172 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
18173 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
18174 		default:    /* Possible future codes in SCSI spec? */
18175 			/*
18176 			 * For removable-media devices, do not retry if
18177 			 * ASCQ > 2 as these result mostly from USCSI commands
18178 			 * on MMC devices issued to check status of an
18179 			 * operation initiated in immediate mode.  Also for
18180 			 * ASCQ >= 4 do not print console messages as these
18181 			 * mainly represent a user-initiated operation
18182 			 * instead of a system failure.
18183 			 */
18184 			if (un->un_f_has_removable_media) {
18185 				si.ssi_severity = SCSI_ERR_ALL;
18186 				goto fail_command;
18187 			}
18188 			break;
18189 		}
18190 
18191 		/*
18192 		 * As part of our recovery attempt for the NOT READY
18193 		 * condition, we issue a START STOP UNIT command. However
18194 		 * we want to wait for a short delay before attempting this
18195 		 * as there may still be more commands coming back from the
18196 		 * target with the check condition. To do this we use
18197 		 * timeout(9F) to call sd_start_stop_unit_callback() after
18198 		 * the delay interval expires. (sd_start_stop_unit_callback()
18199 		 * dispatches sd_start_stop_unit_task(), which will issue
18200 		 * the actual START STOP UNIT command. The delay interval
18201 		 * is one-half of the delay that we will use to retry the
18202 		 * command that generated the NOT READY condition.
18203 		 *
18204 		 * Note that we could just dispatch sd_start_stop_unit_task()
18205 		 * from here and allow it to sleep for the delay interval,
18206 		 * but then we would be tying up the taskq thread
18207 		 * uncesessarily for the duration of the delay.
18208 		 *
18209 		 * Do not issue the START STOP UNIT if the current command
18210 		 * is already a START STOP UNIT.
18211 		 */
18212 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
18213 			break;
18214 		}
18215 
18216 		/*
18217 		 * Do not schedule the timeout if one is already pending.
18218 		 */
18219 		if (un->un_startstop_timeid != NULL) {
18220 			SD_INFO(SD_LOG_ERROR, un,
18221 			    "sd_sense_key_not_ready: restart already issued to"
18222 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
18223 			    ddi_get_instance(SD_DEVINFO(un)));
18224 			break;
18225 		}
18226 
18227 		/*
18228 		 * Schedule the START STOP UNIT command, then queue the command
18229 		 * for a retry.
18230 		 *
18231 		 * Note: A timeout is not scheduled for this retry because we
18232 		 * want the retry to be serial with the START_STOP_UNIT. The
18233 		 * retry will be started when the START_STOP_UNIT is completed
18234 		 * in sd_start_stop_unit_task.
18235 		 */
18236 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
18237 		    un, un->un_busy_timeout / 2);
18238 		xp->xb_nr_retry_count++;
18239 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
18240 		return;
18241 
18242 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
18243 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18244 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18245 			    "unit does not respond to selection\n");
18246 		}
18247 		break;
18248 
18249 	case 0x3A:	/* MEDIUM NOT PRESENT */
18250 		if (sd_error_level >= SCSI_ERR_FATAL) {
18251 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18252 			    "Caddy not inserted in drive\n");
18253 		}
18254 
18255 		sr_ejected(un);
18256 		un->un_mediastate = DKIO_EJECTED;
18257 		/* The state has changed, inform the media watch routines */
18258 		cv_broadcast(&un->un_state_cv);
18259 		/* Just fail if no media is present in the drive. */
18260 		goto fail_command;
18261 
18262 	default:
18263 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18264 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
18265 			    "Unit not Ready. Additional sense code 0x%x\n",
18266 			    asc);
18267 		}
18268 		break;
18269 	}
18270 
18271 do_retry:
18272 
18273 	/*
18274 	 * Retry the command, as some targets may report NOT READY for
18275 	 * several seconds after being reset.
18276 	 */
18277 	xp->xb_nr_retry_count++;
18278 	si.ssi_severity = SCSI_ERR_RETRYABLE;
18279 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
18280 	    &si, EIO, un->un_busy_timeout, NULL);
18281 
18282 	return;
18283 
18284 fail_command:
18285 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18286 	sd_return_failed_command(un, bp, EIO);
18287 }
18288 
18289 
18290 
18291 /*
18292  *    Function: sd_sense_key_medium_or_hardware_error
18293  *
18294  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
18295  *		sense key.
18296  *
18297  *     Context: May be called from interrupt context
18298  */
18299 
18300 static void
18301 sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
18302 	uint8_t *sense_datap,
18303 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18304 {
18305 	struct sd_sense_info	si;
18306 	uint8_t sense_key = scsi_sense_key(sense_datap);
18307 	uint8_t asc = scsi_sense_asc(sense_datap);
18308 
18309 	ASSERT(un != NULL);
18310 	ASSERT(mutex_owned(SD_MUTEX(un)));
18311 	ASSERT(bp != NULL);
18312 	ASSERT(xp != NULL);
18313 	ASSERT(pktp != NULL);
18314 
18315 	si.ssi_severity = SCSI_ERR_FATAL;
18316 	si.ssi_pfa_flag = FALSE;
18317 
18318 	if (sense_key == KEY_MEDIUM_ERROR) {
18319 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
18320 	}
18321 
18322 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18323 
18324 	if ((un->un_reset_retry_count != 0) &&
18325 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
18326 		mutex_exit(SD_MUTEX(un));
18327 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
18328 		if (un->un_f_allow_bus_device_reset == TRUE) {
18329 
18330 			boolean_t try_resetting_target = B_TRUE;
18331 
18332 			/*
18333 			 * We need to be able to handle specific ASC when we are
18334 			 * handling a KEY_HARDWARE_ERROR. In particular
18335 			 * taking the default action of resetting the target may
18336 			 * not be the appropriate way to attempt recovery.
18337 			 * Resetting a target because of a single LUN failure
18338 			 * victimizes all LUNs on that target.
18339 			 *
18340 			 * This is true for the LSI arrays, if an LSI
18341 			 * array controller returns an ASC of 0x84 (LUN Dead) we
18342 			 * should trust it.
18343 			 */
18344 
18345 			if (sense_key == KEY_HARDWARE_ERROR) {
18346 				switch (asc) {
18347 				case 0x84:
18348 					if (SD_IS_LSI(un)) {
18349 						try_resetting_target = B_FALSE;
18350 					}
18351 					break;
18352 				default:
18353 					break;
18354 				}
18355 			}
18356 
18357 			if (try_resetting_target == B_TRUE) {
18358 				int reset_retval = 0;
18359 				if (un->un_f_lun_reset_enabled == TRUE) {
18360 					SD_TRACE(SD_LOG_IO_CORE, un,
18361 					    "sd_sense_key_medium_or_hardware_"
18362 					    "error: issuing RESET_LUN\n");
18363 					reset_retval =
18364 					    scsi_reset(SD_ADDRESS(un),
18365 					    RESET_LUN);
18366 				}
18367 				if (reset_retval == 0) {
18368 					SD_TRACE(SD_LOG_IO_CORE, un,
18369 					    "sd_sense_key_medium_or_hardware_"
18370 					    "error: issuing RESET_TARGET\n");
18371 					(void) scsi_reset(SD_ADDRESS(un),
18372 					    RESET_TARGET);
18373 				}
18374 			}
18375 		}
18376 		mutex_enter(SD_MUTEX(un));
18377 	}
18378 
18379 	/*
18380 	 * This really ought to be a fatal error, but we will retry anyway
18381 	 * as some drives report this as a spurious error.
18382 	 */
18383 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18384 	    &si, EIO, (clock_t)0, NULL);
18385 }
18386 
18387 
18388 
18389 /*
18390  *    Function: sd_sense_key_illegal_request
18391  *
18392  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
18393  *
18394  *     Context: May be called from interrupt context
18395  */
18396 
18397 static void
18398 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
18399 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18400 {
18401 	struct sd_sense_info	si;
18402 
18403 	ASSERT(un != NULL);
18404 	ASSERT(mutex_owned(SD_MUTEX(un)));
18405 	ASSERT(bp != NULL);
18406 	ASSERT(xp != NULL);
18407 	ASSERT(pktp != NULL);
18408 
18409 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
18410 
18411 	si.ssi_severity = SCSI_ERR_INFO;
18412 	si.ssi_pfa_flag = FALSE;
18413 
18414 	/* Pointless to retry if the target thinks it's an illegal request */
18415 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18416 	sd_return_failed_command(un, bp, EIO);
18417 }
18418 
18419 
18420 
18421 
18422 /*
18423  *    Function: sd_sense_key_unit_attention
18424  *
18425  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
18426  *
18427  *     Context: May be called from interrupt context
18428  */
18429 
18430 static void
18431 sd_sense_key_unit_attention(struct sd_lun *un,
18432 	uint8_t *sense_datap,
18433 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18434 {
18435 	/*
18436 	 * For UNIT ATTENTION we allow retries for one minute. Devices
18437 	 * like Sonoma can return UNIT ATTENTION close to a minute
18438 	 * under certain conditions.
18439 	 */
18440 	int	retry_check_flag = SD_RETRIES_UA;
18441 	boolean_t	kstat_updated = B_FALSE;
18442 	struct	sd_sense_info		si;
18443 	uint8_t asc = scsi_sense_asc(sense_datap);
18444 	uint8_t	ascq = scsi_sense_ascq(sense_datap);
18445 
18446 	ASSERT(un != NULL);
18447 	ASSERT(mutex_owned(SD_MUTEX(un)));
18448 	ASSERT(bp != NULL);
18449 	ASSERT(xp != NULL);
18450 	ASSERT(pktp != NULL);
18451 
18452 	si.ssi_severity = SCSI_ERR_INFO;
18453 	si.ssi_pfa_flag = FALSE;
18454 
18455 
18456 	switch (asc) {
18457 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
18458 		if (sd_report_pfa != 0) {
18459 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
18460 			si.ssi_pfa_flag = TRUE;
18461 			retry_check_flag = SD_RETRIES_STANDARD;
18462 			goto do_retry;
18463 		}
18464 
18465 		break;
18466 
18467 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
18468 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
18469 			un->un_resvd_status |=
18470 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
18471 		}
18472 #ifdef _LP64
18473 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
18474 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
18475 			    un, KM_NOSLEEP) == 0) {
18476 				/*
18477 				 * If we can't dispatch the task we'll just
18478 				 * live without descriptor sense.  We can
18479 				 * try again on the next "unit attention"
18480 				 */
18481 				SD_ERROR(SD_LOG_ERROR, un,
18482 				    "sd_sense_key_unit_attention: "
18483 				    "Could not dispatch "
18484 				    "sd_reenable_dsense_task\n");
18485 			}
18486 		}
18487 #endif /* _LP64 */
18488 		/* FALLTHRU */
18489 
18490 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
18491 		if (!un->un_f_has_removable_media) {
18492 			break;
18493 		}
18494 
18495 		/*
18496 		 * When we get a unit attention from a removable-media device,
18497 		 * it may be in a state that will take a long time to recover
18498 		 * (e.g., from a reset).  Since we are executing in interrupt
18499 		 * context here, we cannot wait around for the device to come
18500 		 * back. So hand this command off to sd_media_change_task()
18501 		 * for deferred processing under taskq thread context. (Note
18502 		 * that the command still may be failed if a problem is
18503 		 * encountered at a later time.)
18504 		 */
18505 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
18506 		    KM_NOSLEEP) == 0) {
18507 			/*
18508 			 * Cannot dispatch the request so fail the command.
18509 			 */
18510 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
18511 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18512 			si.ssi_severity = SCSI_ERR_FATAL;
18513 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18514 			sd_return_failed_command(un, bp, EIO);
18515 		}
18516 
18517 		/*
18518 		 * If failed to dispatch sd_media_change_task(), we already
18519 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
18520 		 * we should update kstat later if it encounters an error. So,
18521 		 * we update kstat_updated flag here.
18522 		 */
18523 		kstat_updated = B_TRUE;
18524 
18525 		/*
18526 		 * Either the command has been successfully dispatched to a
18527 		 * task Q for retrying, or the dispatch failed. In either case
18528 		 * do NOT retry again by calling sd_retry_command. This sets up
18529 		 * two retries of the same command and when one completes and
18530 		 * frees the resources the other will access freed memory,
18531 		 * a bad thing.
18532 		 */
18533 		return;
18534 
18535 	default:
18536 		break;
18537 	}
18538 
18539 	/*
18540 	 * ASC  ASCQ
18541 	 *  2A   09	Capacity data has changed
18542 	 *  2A   01	Mode parameters changed
18543 	 *  3F   0E	Reported luns data has changed
18544 	 * Arrays that support logical unit expansion should report
18545 	 * capacity changes(2Ah/09). Mode parameters changed and
18546 	 * reported luns data has changed are the approximation.
18547 	 */
18548 	if (((asc == 0x2a) && (ascq == 0x09)) ||
18549 	    ((asc == 0x2a) && (ascq == 0x01)) ||
18550 	    ((asc == 0x3f) && (ascq == 0x0e))) {
18551 		if (taskq_dispatch(sd_tq, sd_target_change_task, un,
18552 		    KM_NOSLEEP) == 0) {
18553 			SD_ERROR(SD_LOG_ERROR, un,
18554 			    "sd_sense_key_unit_attention: "
18555 			    "Could not dispatch sd_target_change_task\n");
18556 		}
18557 	}
18558 
18559 	/*
18560 	 * Update kstat if we haven't done that.
18561 	 */
18562 	if (!kstat_updated) {
18563 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18564 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18565 	}
18566 
18567 do_retry:
18568 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
18569 	    EIO, SD_UA_RETRY_DELAY, NULL);
18570 }
18571 
18572 
18573 
18574 /*
18575  *    Function: sd_sense_key_fail_command
18576  *
18577  * Description: Use to fail a command when we don't like the sense key that
18578  *		was returned.
18579  *
18580  *     Context: May be called from interrupt context
18581  */
18582 
18583 static void
18584 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
18585 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18586 {
18587 	struct sd_sense_info	si;
18588 
18589 	ASSERT(un != NULL);
18590 	ASSERT(mutex_owned(SD_MUTEX(un)));
18591 	ASSERT(bp != NULL);
18592 	ASSERT(xp != NULL);
18593 	ASSERT(pktp != NULL);
18594 
18595 	si.ssi_severity = SCSI_ERR_FATAL;
18596 	si.ssi_pfa_flag = FALSE;
18597 
18598 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18599 	sd_return_failed_command(un, bp, EIO);
18600 }
18601 
18602 
18603 
18604 /*
18605  *    Function: sd_sense_key_blank_check
18606  *
18607  * Description: Recovery actions for a SCSI "Blank Check" sense key.
18608  *		Has no monetary connotation.
18609  *
18610  *     Context: May be called from interrupt context
18611  */
18612 
18613 static void
18614 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
18615 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18616 {
18617 	struct sd_sense_info	si;
18618 
18619 	ASSERT(un != NULL);
18620 	ASSERT(mutex_owned(SD_MUTEX(un)));
18621 	ASSERT(bp != NULL);
18622 	ASSERT(xp != NULL);
18623 	ASSERT(pktp != NULL);
18624 
18625 	/*
18626 	 * Blank check is not fatal for removable devices, therefore
18627 	 * it does not require a console message.
18628 	 */
18629 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18630 	    SCSI_ERR_FATAL;
18631 	si.ssi_pfa_flag = FALSE;
18632 
18633 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18634 	sd_return_failed_command(un, bp, EIO);
18635 }
18636 
18637 
18638 
18639 
18640 /*
18641  *    Function: sd_sense_key_aborted_command
18642  *
18643  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18644  *
18645  *     Context: May be called from interrupt context
18646  */
18647 
18648 static void
18649 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18650 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18651 {
18652 	struct sd_sense_info	si;
18653 
18654 	ASSERT(un != NULL);
18655 	ASSERT(mutex_owned(SD_MUTEX(un)));
18656 	ASSERT(bp != NULL);
18657 	ASSERT(xp != NULL);
18658 	ASSERT(pktp != NULL);
18659 
18660 	si.ssi_severity = SCSI_ERR_FATAL;
18661 	si.ssi_pfa_flag = FALSE;
18662 
18663 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18664 
18665 	/*
18666 	 * This really ought to be a fatal error, but we will retry anyway
18667 	 * as some drives report this as a spurious error.
18668 	 */
18669 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18670 	    &si, EIO, drv_usectohz(100000), NULL);
18671 }
18672 
18673 
18674 
18675 /*
18676  *    Function: sd_sense_key_default
18677  *
18678  * Description: Default recovery action for several SCSI sense keys (basically
18679  *		attempts a retry).
18680  *
18681  *     Context: May be called from interrupt context
18682  */
18683 
18684 static void
18685 sd_sense_key_default(struct sd_lun *un,
18686 	uint8_t *sense_datap,
18687 	struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18688 {
18689 	struct sd_sense_info	si;
18690 	uint8_t sense_key = scsi_sense_key(sense_datap);
18691 
18692 	ASSERT(un != NULL);
18693 	ASSERT(mutex_owned(SD_MUTEX(un)));
18694 	ASSERT(bp != NULL);
18695 	ASSERT(xp != NULL);
18696 	ASSERT(pktp != NULL);
18697 
18698 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18699 
18700 	/*
18701 	 * Undecoded sense key.	Attempt retries and hope that will fix
18702 	 * the problem.  Otherwise, we're dead.
18703 	 */
18704 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18705 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18706 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18707 	}
18708 
18709 	si.ssi_severity = SCSI_ERR_FATAL;
18710 	si.ssi_pfa_flag = FALSE;
18711 
18712 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18713 	    &si, EIO, (clock_t)0, NULL);
18714 }
18715 
18716 
18717 
18718 /*
18719  *    Function: sd_print_retry_msg
18720  *
18721  * Description: Print a message indicating the retry action being taken.
18722  *
18723  *   Arguments: un - ptr to associated softstate
18724  *		bp - ptr to buf(9S) for the command
18725  *		arg - not used.
18726  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18727  *			or SD_NO_RETRY_ISSUED
18728  *
18729  *     Context: May be called from interrupt context
18730  */
18731 /* ARGSUSED */
18732 static void
18733 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18734 {
18735 	struct sd_xbuf	*xp;
18736 	struct scsi_pkt *pktp;
18737 	char *reasonp;
18738 	char *msgp;
18739 
18740 	ASSERT(un != NULL);
18741 	ASSERT(mutex_owned(SD_MUTEX(un)));
18742 	ASSERT(bp != NULL);
18743 	pktp = SD_GET_PKTP(bp);
18744 	ASSERT(pktp != NULL);
18745 	xp = SD_GET_XBUF(bp);
18746 	ASSERT(xp != NULL);
18747 
18748 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18749 	mutex_enter(&un->un_pm_mutex);
18750 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18751 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18752 	    (pktp->pkt_flags & FLAG_SILENT)) {
18753 		mutex_exit(&un->un_pm_mutex);
18754 		goto update_pkt_reason;
18755 	}
18756 	mutex_exit(&un->un_pm_mutex);
18757 
18758 	/*
18759 	 * Suppress messages if they are all the same pkt_reason; with
18760 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18761 	 * If we are in panic, then suppress the retry messages.
18762 	 */
18763 	switch (flag) {
18764 	case SD_NO_RETRY_ISSUED:
18765 		msgp = "giving up";
18766 		break;
18767 	case SD_IMMEDIATE_RETRY_ISSUED:
18768 	case SD_DELAYED_RETRY_ISSUED:
18769 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18770 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18771 		    (sd_error_level != SCSI_ERR_ALL))) {
18772 			return;
18773 		}
18774 		msgp = "retrying command";
18775 		break;
18776 	default:
18777 		goto update_pkt_reason;
18778 	}
18779 
18780 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18781 	    scsi_rname(pktp->pkt_reason));
18782 
18783 	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
18784 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18785 		    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18786 	}
18787 
18788 update_pkt_reason:
18789 	/*
18790 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18791 	 * This is to prevent multiple console messages for the same failure
18792 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18793 	 * when the command is retried successfully because there still may be
18794 	 * more commands coming back with the same value of pktp->pkt_reason.
18795 	 */
18796 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
18797 		un->un_last_pkt_reason = pktp->pkt_reason;
18798 	}
18799 }
18800 
18801 
18802 /*
18803  *    Function: sd_print_cmd_incomplete_msg
18804  *
18805  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
18806  *
18807  *   Arguments: un - ptr to associated softstate
18808  *		bp - ptr to buf(9S) for the command
18809  *		arg - passed to sd_print_retry_msg()
18810  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18811  *			or SD_NO_RETRY_ISSUED
18812  *
18813  *     Context: May be called from interrupt context
18814  */
18815 
18816 static void
18817 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
18818 	int code)
18819 {
18820 	dev_info_t	*dip;
18821 
18822 	ASSERT(un != NULL);
18823 	ASSERT(mutex_owned(SD_MUTEX(un)));
18824 	ASSERT(bp != NULL);
18825 
18826 	switch (code) {
18827 	case SD_NO_RETRY_ISSUED:
18828 		/* Command was failed. Someone turned off this target? */
18829 		if (un->un_state != SD_STATE_OFFLINE) {
18830 			/*
18831 			 * Suppress message if we are detaching and
18832 			 * device has been disconnected
18833 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
18834 			 * private interface and not part of the DDI
18835 			 */
18836 			dip = un->un_sd->sd_dev;
18837 			if (!(DEVI_IS_DETACHING(dip) &&
18838 			    DEVI_IS_DEVICE_REMOVED(dip))) {
18839 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18840 				"disk not responding to selection\n");
18841 			}
18842 			New_state(un, SD_STATE_OFFLINE);
18843 		}
18844 		break;
18845 
18846 	case SD_DELAYED_RETRY_ISSUED:
18847 	case SD_IMMEDIATE_RETRY_ISSUED:
18848 	default:
18849 		/* Command was successfully queued for retry */
18850 		sd_print_retry_msg(un, bp, arg, code);
18851 		break;
18852 	}
18853 }
18854 
18855 
18856 /*
18857  *    Function: sd_pkt_reason_cmd_incomplete
18858  *
18859  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
18860  *
18861  *     Context: May be called from interrupt context
18862  */
18863 
18864 static void
18865 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
18866 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18867 {
18868 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
18869 
18870 	ASSERT(un != NULL);
18871 	ASSERT(mutex_owned(SD_MUTEX(un)));
18872 	ASSERT(bp != NULL);
18873 	ASSERT(xp != NULL);
18874 	ASSERT(pktp != NULL);
18875 
18876 	/* Do not do a reset if selection did not complete */
18877 	/* Note: Should this not just check the bit? */
18878 	if (pktp->pkt_state != STATE_GOT_BUS) {
18879 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18880 		sd_reset_target(un, pktp);
18881 	}
18882 
18883 	/*
18884 	 * If the target was not successfully selected, then set
18885 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
18886 	 * with the target, and further retries and/or commands are
18887 	 * likely to take a long time.
18888 	 */
18889 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
18890 		flag |= SD_RETRIES_FAILFAST;
18891 	}
18892 
18893 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18894 
18895 	sd_retry_command(un, bp, flag,
18896 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18897 }
18898 
18899 
18900 
18901 /*
18902  *    Function: sd_pkt_reason_cmd_tran_err
18903  *
18904  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
18905  *
18906  *     Context: May be called from interrupt context
18907  */
18908 
18909 static void
18910 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
18911 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18912 {
18913 	ASSERT(un != NULL);
18914 	ASSERT(mutex_owned(SD_MUTEX(un)));
18915 	ASSERT(bp != NULL);
18916 	ASSERT(xp != NULL);
18917 	ASSERT(pktp != NULL);
18918 
18919 	/*
18920 	 * Do not reset if we got a parity error, or if
18921 	 * selection did not complete.
18922 	 */
18923 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18924 	/* Note: Should this not just check the bit for pkt_state? */
18925 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
18926 	    (pktp->pkt_state != STATE_GOT_BUS)) {
18927 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
18928 		sd_reset_target(un, pktp);
18929 	}
18930 
18931 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18932 
18933 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
18934 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18935 }
18936 
18937 
18938 
18939 /*
18940  *    Function: sd_pkt_reason_cmd_reset
18941  *
18942  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
18943  *
18944  *     Context: May be called from interrupt context
18945  */
18946 
18947 static void
18948 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
18949 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18950 {
18951 	ASSERT(un != NULL);
18952 	ASSERT(mutex_owned(SD_MUTEX(un)));
18953 	ASSERT(bp != NULL);
18954 	ASSERT(xp != NULL);
18955 	ASSERT(pktp != NULL);
18956 
18957 	/* The target may still be running the command, so try to reset. */
18958 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18959 	sd_reset_target(un, pktp);
18960 
18961 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
18962 
18963 	/*
18964 	 * If pkt_reason is CMD_RESET chances are that this pkt got
18965 	 * reset because another target on this bus caused it. The target
18966 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
18967 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
18968 	 */
18969 
18970 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
18971 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
18972 }
18973 
18974 
18975 
18976 
18977 /*
18978  *    Function: sd_pkt_reason_cmd_aborted
18979  *
18980  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
18981  *
18982  *     Context: May be called from interrupt context
18983  */
18984 
18985 static void
18986 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
18987 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
18988 {
18989 	ASSERT(un != NULL);
18990 	ASSERT(mutex_owned(SD_MUTEX(un)));
18991 	ASSERT(bp != NULL);
18992 	ASSERT(xp != NULL);
18993 	ASSERT(pktp != NULL);
18994 
18995 	/* The target may still be running the command, so try to reset. */
18996 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
18997 	sd_reset_target(un, pktp);
18998 
18999 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19000 
19001 	/*
19002 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
19003 	 * aborted because another target on this bus caused it. The target
19004 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
19005 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
19006 	 */
19007 
19008 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
19009 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19010 }
19011 
19012 
19013 
19014 /*
19015  *    Function: sd_pkt_reason_cmd_timeout
19016  *
19017  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
19018  *
19019  *     Context: May be called from interrupt context
19020  */
19021 
19022 static void
19023 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
19024 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19025 {
19026 	ASSERT(un != NULL);
19027 	ASSERT(mutex_owned(SD_MUTEX(un)));
19028 	ASSERT(bp != NULL);
19029 	ASSERT(xp != NULL);
19030 	ASSERT(pktp != NULL);
19031 
19032 
19033 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19034 	sd_reset_target(un, pktp);
19035 
19036 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19037 
19038 	/*
19039 	 * A command timeout indicates that we could not establish
19040 	 * communication with the target, so set SD_RETRIES_FAILFAST
19041 	 * as further retries/commands are likely to take a long time.
19042 	 */
19043 	sd_retry_command(un, bp,
19044 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
19045 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19046 }
19047 
19048 
19049 
19050 /*
19051  *    Function: sd_pkt_reason_cmd_unx_bus_free
19052  *
19053  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
19054  *
19055  *     Context: May be called from interrupt context
19056  */
19057 
19058 static void
19059 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
19060 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19061 {
19062 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
19063 
19064 	ASSERT(un != NULL);
19065 	ASSERT(mutex_owned(SD_MUTEX(un)));
19066 	ASSERT(bp != NULL);
19067 	ASSERT(xp != NULL);
19068 	ASSERT(pktp != NULL);
19069 
19070 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
19071 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19072 
19073 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
19074 	    sd_print_retry_msg : NULL;
19075 
19076 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
19077 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19078 }
19079 
19080 
19081 /*
19082  *    Function: sd_pkt_reason_cmd_tag_reject
19083  *
19084  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
19085  *
19086  *     Context: May be called from interrupt context
19087  */
19088 
19089 static void
19090 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
19091 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19092 {
19093 	ASSERT(un != NULL);
19094 	ASSERT(mutex_owned(SD_MUTEX(un)));
19095 	ASSERT(bp != NULL);
19096 	ASSERT(xp != NULL);
19097 	ASSERT(pktp != NULL);
19098 
19099 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
19100 	pktp->pkt_flags = 0;
19101 	un->un_tagflags = 0;
19102 	if (un->un_f_opt_queueing == TRUE) {
19103 		un->un_throttle = min(un->un_throttle, 3);
19104 	} else {
19105 		un->un_throttle = 1;
19106 	}
19107 	mutex_exit(SD_MUTEX(un));
19108 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
19109 	mutex_enter(SD_MUTEX(un));
19110 
19111 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19112 
19113 	/* Legacy behavior not to check retry counts here. */
19114 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
19115 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19116 }
19117 
19118 
19119 /*
19120  *    Function: sd_pkt_reason_default
19121  *
19122  * Description: Default recovery actions for SCSA pkt_reason values that
19123  *		do not have more explicit recovery actions.
19124  *
19125  *     Context: May be called from interrupt context
19126  */
19127 
19128 static void
19129 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
19130 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19131 {
19132 	ASSERT(un != NULL);
19133 	ASSERT(mutex_owned(SD_MUTEX(un)));
19134 	ASSERT(bp != NULL);
19135 	ASSERT(xp != NULL);
19136 	ASSERT(pktp != NULL);
19137 
19138 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19139 	sd_reset_target(un, pktp);
19140 
19141 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19142 
19143 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
19144 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19145 }
19146 
19147 
19148 
19149 /*
19150  *    Function: sd_pkt_status_check_condition
19151  *
19152  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
19153  *
19154  *     Context: May be called from interrupt context
19155  */
19156 
19157 static void
19158 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
19159 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19160 {
19161 	ASSERT(un != NULL);
19162 	ASSERT(mutex_owned(SD_MUTEX(un)));
19163 	ASSERT(bp != NULL);
19164 	ASSERT(xp != NULL);
19165 	ASSERT(pktp != NULL);
19166 
19167 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
19168 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
19169 
19170 	/*
19171 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
19172 	 * command will be retried after the request sense). Otherwise, retry
19173 	 * the command. Note: we are issuing the request sense even though the
19174 	 * retry limit may have been reached for the failed command.
19175 	 */
19176 	if (un->un_f_arq_enabled == FALSE) {
19177 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
19178 		    "no ARQ, sending request sense command\n");
19179 		sd_send_request_sense_command(un, bp, pktp);
19180 	} else {
19181 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
19182 		    "ARQ,retrying request sense command\n");
19183 #if defined(__i386) || defined(__amd64)
19184 		/*
19185 		 * The SD_RETRY_DELAY value need to be adjusted here
19186 		 * when SD_RETRY_DELAY change in sddef.h
19187 		 */
19188 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
19189 		    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
19190 		    NULL);
19191 #else
19192 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
19193 		    EIO, SD_RETRY_DELAY, NULL);
19194 #endif
19195 	}
19196 
19197 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
19198 }
19199 
19200 
19201 /*
19202  *    Function: sd_pkt_status_busy
19203  *
19204  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
19205  *
19206  *     Context: May be called from interrupt context
19207  */
19208 
19209 static void
19210 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19211 	struct scsi_pkt *pktp)
19212 {
19213 	ASSERT(un != NULL);
19214 	ASSERT(mutex_owned(SD_MUTEX(un)));
19215 	ASSERT(bp != NULL);
19216 	ASSERT(xp != NULL);
19217 	ASSERT(pktp != NULL);
19218 
19219 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19220 	    "sd_pkt_status_busy: entry\n");
19221 
19222 	/* If retries are exhausted, just fail the command. */
19223 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
19224 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
19225 		    "device busy too long\n");
19226 		sd_return_failed_command(un, bp, EIO);
19227 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19228 		    "sd_pkt_status_busy: exit\n");
19229 		return;
19230 	}
19231 	xp->xb_retry_count++;
19232 
19233 	/*
19234 	 * Try to reset the target. However, we do not want to perform
19235 	 * more than one reset if the device continues to fail. The reset
19236 	 * will be performed when the retry count reaches the reset
19237 	 * threshold.  This threshold should be set such that at least
19238 	 * one retry is issued before the reset is performed.
19239 	 */
19240 	if (xp->xb_retry_count ==
19241 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
19242 		int rval = 0;
19243 		mutex_exit(SD_MUTEX(un));
19244 		if (un->un_f_allow_bus_device_reset == TRUE) {
19245 			/*
19246 			 * First try to reset the LUN; if we cannot then
19247 			 * try to reset the target.
19248 			 */
19249 			if (un->un_f_lun_reset_enabled == TRUE) {
19250 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19251 				    "sd_pkt_status_busy: RESET_LUN\n");
19252 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
19253 			}
19254 			if (rval == 0) {
19255 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19256 				    "sd_pkt_status_busy: RESET_TARGET\n");
19257 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
19258 			}
19259 		}
19260 		if (rval == 0) {
19261 			/*
19262 			 * If the RESET_LUN and/or RESET_TARGET failed,
19263 			 * try RESET_ALL
19264 			 */
19265 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19266 			    "sd_pkt_status_busy: RESET_ALL\n");
19267 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
19268 		}
19269 		mutex_enter(SD_MUTEX(un));
19270 		if (rval == 0) {
19271 			/*
19272 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
19273 			 * At this point we give up & fail the command.
19274 			 */
19275 			sd_return_failed_command(un, bp, EIO);
19276 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19277 			    "sd_pkt_status_busy: exit (failed cmd)\n");
19278 			return;
19279 		}
19280 	}
19281 
19282 	/*
19283 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
19284 	 * we have already checked the retry counts above.
19285 	 */
19286 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
19287 	    EIO, un->un_busy_timeout, NULL);
19288 
19289 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19290 	    "sd_pkt_status_busy: exit\n");
19291 }
19292 
19293 
19294 /*
19295  *    Function: sd_pkt_status_reservation_conflict
19296  *
19297  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
19298  *		command status.
19299  *
19300  *     Context: May be called from interrupt context
19301  */
19302 
19303 static void
19304 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
19305 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19306 {
19307 	ASSERT(un != NULL);
19308 	ASSERT(mutex_owned(SD_MUTEX(un)));
19309 	ASSERT(bp != NULL);
19310 	ASSERT(xp != NULL);
19311 	ASSERT(pktp != NULL);
19312 
19313 	/*
19314 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
19315 	 * conflict could be due to various reasons like incorrect keys, not
19316 	 * registered or not reserved etc. So, we return EACCES to the caller.
19317 	 */
19318 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
19319 		int cmd = SD_GET_PKT_OPCODE(pktp);
19320 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
19321 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
19322 			sd_return_failed_command(un, bp, EACCES);
19323 			return;
19324 		}
19325 	}
19326 
19327 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
19328 
19329 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
19330 		if (sd_failfast_enable != 0) {
19331 			/* By definition, we must panic here.... */
19332 			sd_panic_for_res_conflict(un);
19333 			/*NOTREACHED*/
19334 		}
19335 		SD_ERROR(SD_LOG_IO, un,
19336 		    "sd_handle_resv_conflict: Disk Reserved\n");
19337 		sd_return_failed_command(un, bp, EACCES);
19338 		return;
19339 	}
19340 
19341 	/*
19342 	 * 1147670: retry only if sd_retry_on_reservation_conflict
19343 	 * property is set (default is 1). Retries will not succeed
19344 	 * on a disk reserved by another initiator. HA systems
19345 	 * may reset this via sd.conf to avoid these retries.
19346 	 *
19347 	 * Note: The legacy return code for this failure is EIO, however EACCES
19348 	 * seems more appropriate for a reservation conflict.
19349 	 */
19350 	if (sd_retry_on_reservation_conflict == 0) {
19351 		SD_ERROR(SD_LOG_IO, un,
19352 		    "sd_handle_resv_conflict: Device Reserved\n");
19353 		sd_return_failed_command(un, bp, EIO);
19354 		return;
19355 	}
19356 
19357 	/*
19358 	 * Retry the command if we can.
19359 	 *
19360 	 * Note: The legacy return code for this failure is EIO, however EACCES
19361 	 * seems more appropriate for a reservation conflict.
19362 	 */
19363 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
19364 	    (clock_t)2, NULL);
19365 }
19366 
19367 
19368 
19369 /*
19370  *    Function: sd_pkt_status_qfull
19371  *
19372  * Description: Handle a QUEUE FULL condition from the target.  This can
19373  *		occur if the HBA does not handle the queue full condition.
19374  *		(Basically this means third-party HBAs as Sun HBAs will
19375  *		handle the queue full condition.)  Note that if there are
19376  *		some commands already in the transport, then the queue full
19377  *		has occurred because the queue for this nexus is actually
19378  *		full. If there are no commands in the transport, then the
19379  *		queue full is resulting from some other initiator or lun
19380  *		consuming all the resources at the target.
19381  *
19382  *     Context: May be called from interrupt context
19383  */
19384 
19385 static void
19386 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
19387 	struct sd_xbuf *xp, struct scsi_pkt *pktp)
19388 {
19389 	ASSERT(un != NULL);
19390 	ASSERT(mutex_owned(SD_MUTEX(un)));
19391 	ASSERT(bp != NULL);
19392 	ASSERT(xp != NULL);
19393 	ASSERT(pktp != NULL);
19394 
19395 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19396 	    "sd_pkt_status_qfull: entry\n");
19397 
19398 	/*
19399 	 * Just lower the QFULL throttle and retry the command.  Note that
19400 	 * we do not limit the number of retries here.
19401 	 */
19402 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
19403 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
19404 	    SD_RESTART_TIMEOUT, NULL);
19405 
19406 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19407 	    "sd_pkt_status_qfull: exit\n");
19408 }
19409 
19410 
19411 /*
19412  *    Function: sd_reset_target
19413  *
19414  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
19415  *		RESET_TARGET, or RESET_ALL.
19416  *
19417  *     Context: May be called under interrupt context.
19418  */
19419 
19420 static void
19421 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
19422 {
19423 	int rval = 0;
19424 
19425 	ASSERT(un != NULL);
19426 	ASSERT(mutex_owned(SD_MUTEX(un)));
19427 	ASSERT(pktp != NULL);
19428 
19429 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
19430 
19431 	/*
19432 	 * No need to reset if the transport layer has already done so.
19433 	 */
19434 	if ((pktp->pkt_statistics &
19435 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
19436 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19437 		    "sd_reset_target: no reset\n");
19438 		return;
19439 	}
19440 
19441 	mutex_exit(SD_MUTEX(un));
19442 
19443 	if (un->un_f_allow_bus_device_reset == TRUE) {
19444 		if (un->un_f_lun_reset_enabled == TRUE) {
19445 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19446 			    "sd_reset_target: RESET_LUN\n");
19447 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
19448 		}
19449 		if (rval == 0) {
19450 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19451 			    "sd_reset_target: RESET_TARGET\n");
19452 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
19453 		}
19454 	}
19455 
19456 	if (rval == 0) {
19457 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19458 		    "sd_reset_target: RESET_ALL\n");
19459 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
19460 	}
19461 
19462 	mutex_enter(SD_MUTEX(un));
19463 
19464 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
19465 }
19466 
19467 /*
19468  *    Function: sd_target_change_task
19469  *
19470  * Description: Handle dynamic target change
19471  *
19472  *     Context: Executes in a taskq() thread context
19473  */
19474 static void
19475 sd_target_change_task(void *arg)
19476 {
19477 	struct sd_lun		*un = arg;
19478 	uint64_t		capacity;
19479 	diskaddr_t		label_cap;
19480 	uint_t			lbasize;
19481 	sd_ssc_t		*ssc;
19482 
19483 	ASSERT(un != NULL);
19484 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19485 
19486 	if ((un->un_f_blockcount_is_valid == FALSE) ||
19487 	    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
19488 		return;
19489 	}
19490 
19491 	ssc = sd_ssc_init(un);
19492 
19493 	if (sd_send_scsi_READ_CAPACITY(ssc, &capacity,
19494 	    &lbasize, SD_PATH_DIRECT) != 0) {
19495 		SD_ERROR(SD_LOG_ERROR, un,
19496 		    "sd_target_change_task: fail to read capacity\n");
19497 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19498 		goto task_exit;
19499 	}
19500 
19501 	mutex_enter(SD_MUTEX(un));
19502 	if (capacity <= un->un_blockcount) {
19503 		mutex_exit(SD_MUTEX(un));
19504 		goto task_exit;
19505 	}
19506 
19507 	sd_update_block_info(un, lbasize, capacity);
19508 	mutex_exit(SD_MUTEX(un));
19509 
19510 	/*
19511 	 * If lun is EFI labeled and lun capacity is greater than the
19512 	 * capacity contained in the label, log a sys event.
19513 	 */
19514 	if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
19515 	    (void*)SD_PATH_DIRECT) == 0) {
19516 		mutex_enter(SD_MUTEX(un));
19517 		if (un->un_f_blockcount_is_valid &&
19518 		    un->un_blockcount > label_cap) {
19519 			mutex_exit(SD_MUTEX(un));
19520 			sd_log_lun_expansion_event(un, KM_SLEEP);
19521 		} else {
19522 			mutex_exit(SD_MUTEX(un));
19523 		}
19524 	}
19525 
19526 task_exit:
19527 	sd_ssc_fini(ssc);
19528 }
19529 
19530 
19531 /*
19532  *    Function: sd_log_dev_status_event
19533  *
19534  * Description: Log EC_dev_status sysevent
19535  *
19536  *     Context: Never called from interrupt context
19537  */
19538 static void
19539 sd_log_dev_status_event(struct sd_lun *un, char *esc, int km_flag)
19540 {
19541 	int err;
19542 	char			*path;
19543 	nvlist_t		*attr_list;
19544 
19545 	/* Allocate and build sysevent attribute list */
19546 	err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, km_flag);
19547 	if (err != 0) {
19548 		SD_ERROR(SD_LOG_ERROR, un,
19549 		    "sd_log_dev_status_event: fail to allocate space\n");
19550 		return;
19551 	}
19552 
19553 	path = kmem_alloc(MAXPATHLEN, km_flag);
19554 	if (path == NULL) {
19555 		nvlist_free(attr_list);
19556 		SD_ERROR(SD_LOG_ERROR, un,
19557 		    "sd_log_dev_status_event: fail to allocate space\n");
19558 		return;
19559 	}
19560 	/*
19561 	 * Add path attribute to identify the lun.
19562 	 * We are using minor node 'a' as the sysevent attribute.
19563 	 */
19564 	(void) snprintf(path, MAXPATHLEN, "/devices");
19565 	(void) ddi_pathname(SD_DEVINFO(un), path + strlen(path));
19566 	(void) snprintf(path + strlen(path), MAXPATHLEN - strlen(path),
19567 	    ":a");
19568 
19569 	err = nvlist_add_string(attr_list, DEV_PHYS_PATH, path);
19570 	if (err != 0) {
19571 		nvlist_free(attr_list);
19572 		kmem_free(path, MAXPATHLEN);
19573 		SD_ERROR(SD_LOG_ERROR, un,
19574 		    "sd_log_dev_status_event: fail to add attribute\n");
19575 		return;
19576 	}
19577 
19578 	/* Log dynamic lun expansion sysevent */
19579 	err = ddi_log_sysevent(SD_DEVINFO(un), SUNW_VENDOR, EC_DEV_STATUS,
19580 	    esc, attr_list, NULL, km_flag);
19581 	if (err != DDI_SUCCESS) {
19582 		SD_ERROR(SD_LOG_ERROR, un,
19583 		    "sd_log_dev_status_event: fail to log sysevent\n");
19584 	}
19585 
19586 	nvlist_free(attr_list);
19587 	kmem_free(path, MAXPATHLEN);
19588 }
19589 
19590 
19591 /*
19592  *    Function: sd_log_lun_expansion_event
19593  *
19594  * Description: Log lun expansion sys event
19595  *
19596  *     Context: Never called from interrupt context
19597  */
19598 static void
19599 sd_log_lun_expansion_event(struct sd_lun *un, int km_flag)
19600 {
19601 	sd_log_dev_status_event(un, ESC_DEV_DLE, km_flag);
19602 }
19603 
19604 
19605 /*
19606  *    Function: sd_log_eject_request_event
19607  *
19608  * Description: Log eject request sysevent
19609  *
19610  *     Context: Never called from interrupt context
19611  */
19612 static void
19613 sd_log_eject_request_event(struct sd_lun *un, int km_flag)
19614 {
19615 	sd_log_dev_status_event(un, ESC_DEV_EJECT_REQUEST, km_flag);
19616 }
19617 
19618 
19619 /*
19620  *    Function: sd_media_change_task
19621  *
19622  * Description: Recovery action for CDROM to become available.
19623  *
19624  *     Context: Executes in a taskq() thread context
19625  */
19626 
19627 static void
19628 sd_media_change_task(void *arg)
19629 {
19630 	struct	scsi_pkt	*pktp = arg;
19631 	struct	sd_lun		*un;
19632 	struct	buf		*bp;
19633 	struct	sd_xbuf		*xp;
19634 	int	err		= 0;
19635 	int	retry_count	= 0;
19636 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
19637 	struct	sd_sense_info	si;
19638 
19639 	ASSERT(pktp != NULL);
19640 	bp = (struct buf *)pktp->pkt_private;
19641 	ASSERT(bp != NULL);
19642 	xp = SD_GET_XBUF(bp);
19643 	ASSERT(xp != NULL);
19644 	un = SD_GET_UN(bp);
19645 	ASSERT(un != NULL);
19646 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19647 	ASSERT(un->un_f_monitor_media_state);
19648 
19649 	si.ssi_severity = SCSI_ERR_INFO;
19650 	si.ssi_pfa_flag = FALSE;
19651 
19652 	/*
19653 	 * When a reset is issued on a CDROM, it takes a long time to
19654 	 * recover. First few attempts to read capacity and other things
19655 	 * related to handling unit attention fail (with a ASC 0x4 and
19656 	 * ASCQ 0x1). In that case we want to do enough retries and we want
19657 	 * to limit the retries in other cases of genuine failures like
19658 	 * no media in drive.
19659 	 */
19660 	while (retry_count++ < retry_limit) {
19661 		if ((err = sd_handle_mchange(un)) == 0) {
19662 			break;
19663 		}
19664 		if (err == EAGAIN) {
19665 			retry_limit = SD_UNIT_ATTENTION_RETRY;
19666 		}
19667 		/* Sleep for 0.5 sec. & try again */
19668 		delay(drv_usectohz(500000));
19669 	}
19670 
19671 	/*
19672 	 * Dispatch (retry or fail) the original command here,
19673 	 * along with appropriate console messages....
19674 	 *
19675 	 * Must grab the mutex before calling sd_retry_command,
19676 	 * sd_print_sense_msg and sd_return_failed_command.
19677 	 */
19678 	mutex_enter(SD_MUTEX(un));
19679 	if (err != SD_CMD_SUCCESS) {
19680 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
19681 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
19682 		si.ssi_severity = SCSI_ERR_FATAL;
19683 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
19684 		sd_return_failed_command(un, bp, EIO);
19685 	} else {
19686 		sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
19687 		    &si, EIO, (clock_t)0, NULL);
19688 	}
19689 	mutex_exit(SD_MUTEX(un));
19690 }
19691 
19692 
19693 
19694 /*
19695  *    Function: sd_handle_mchange
19696  *
19697  * Description: Perform geometry validation & other recovery when CDROM
19698  *		has been removed from drive.
19699  *
19700  * Return Code: 0 for success
19701  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
19702  *		sd_send_scsi_READ_CAPACITY()
19703  *
19704  *     Context: Executes in a taskq() thread context
19705  */
19706 
19707 static int
19708 sd_handle_mchange(struct sd_lun *un)
19709 {
19710 	uint64_t	capacity;
19711 	uint32_t	lbasize;
19712 	int		rval;
19713 	sd_ssc_t	*ssc;
19714 
19715 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19716 	ASSERT(un->un_f_monitor_media_state);
19717 
19718 	ssc = sd_ssc_init(un);
19719 	rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
19720 	    SD_PATH_DIRECT_PRIORITY);
19721 
19722 	if (rval != 0)
19723 		goto failed;
19724 
19725 	mutex_enter(SD_MUTEX(un));
19726 	sd_update_block_info(un, lbasize, capacity);
19727 
19728 	if (un->un_errstats != NULL) {
19729 		struct	sd_errstats *stp =
19730 		    (struct sd_errstats *)un->un_errstats->ks_data;
19731 		stp->sd_capacity.value.ui64 = (uint64_t)
19732 		    ((uint64_t)un->un_blockcount *
19733 		    (uint64_t)un->un_tgt_blocksize);
19734 	}
19735 
19736 	/*
19737 	 * Check if the media in the device is writable or not
19738 	 */
19739 	if (ISCD(un)) {
19740 		sd_check_for_writable_cd(ssc, SD_PATH_DIRECT_PRIORITY);
19741 	}
19742 
19743 	/*
19744 	 * Note: Maybe let the strategy/partitioning chain worry about getting
19745 	 * valid geometry.
19746 	 */
19747 	mutex_exit(SD_MUTEX(un));
19748 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
19749 
19750 
19751 	if (cmlb_validate(un->un_cmlbhandle, 0,
19752 	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
19753 		sd_ssc_fini(ssc);
19754 		return (EIO);
19755 	} else {
19756 		if (un->un_f_pkstats_enabled) {
19757 			sd_set_pstats(un);
19758 			SD_TRACE(SD_LOG_IO_PARTITION, un,
19759 			    "sd_handle_mchange: un:0x%p pstats created and "
19760 			    "set\n", un);
19761 		}
19762 	}
19763 
19764 	/*
19765 	 * Try to lock the door
19766 	 */
19767 	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
19768 	    SD_PATH_DIRECT_PRIORITY);
19769 failed:
19770 	if (rval != 0)
19771 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19772 	sd_ssc_fini(ssc);
19773 	return (rval);
19774 }
19775 
19776 
19777 /*
19778  *    Function: sd_send_scsi_DOORLOCK
19779  *
19780  * Description: Issue the scsi DOOR LOCK command
19781  *
19782  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19783  *                      structure for this target.
19784  *		flag  - SD_REMOVAL_ALLOW
19785  *			SD_REMOVAL_PREVENT
19786  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19787  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19788  *			to use the USCSI "direct" chain and bypass the normal
19789  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19790  *			command is issued as part of an error recovery action.
19791  *
19792  * Return Code: 0   - Success
19793  *		errno return code from sd_ssc_send()
19794  *
19795  *     Context: Can sleep.
19796  */
19797 
19798 static int
19799 sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag)
19800 {
19801 	struct scsi_extended_sense	sense_buf;
19802 	union scsi_cdb		cdb;
19803 	struct uscsi_cmd	ucmd_buf;
19804 	int			status;
19805 	struct sd_lun		*un;
19806 
19807 	ASSERT(ssc != NULL);
19808 	un = ssc->ssc_un;
19809 	ASSERT(un != NULL);
19810 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19811 
19812 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
19813 
19814 	/* already determined doorlock is not supported, fake success */
19815 	if (un->un_f_doorlock_supported == FALSE) {
19816 		return (0);
19817 	}
19818 
19819 	/*
19820 	 * If we are ejecting and see an SD_REMOVAL_PREVENT
19821 	 * ignore the command so we can complete the eject
19822 	 * operation.
19823 	 */
19824 	if (flag == SD_REMOVAL_PREVENT) {
19825 		mutex_enter(SD_MUTEX(un));
19826 		if (un->un_f_ejecting == TRUE) {
19827 			mutex_exit(SD_MUTEX(un));
19828 			return (EAGAIN);
19829 		}
19830 		mutex_exit(SD_MUTEX(un));
19831 	}
19832 
19833 	bzero(&cdb, sizeof (cdb));
19834 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19835 
19836 	cdb.scc_cmd = SCMD_DOORLOCK;
19837 	cdb.cdb_opaque[4] = (uchar_t)flag;
19838 
19839 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19840 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
19841 	ucmd_buf.uscsi_bufaddr	= NULL;
19842 	ucmd_buf.uscsi_buflen	= 0;
19843 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19844 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19845 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
19846 	ucmd_buf.uscsi_timeout	= 15;
19847 
19848 	SD_TRACE(SD_LOG_IO, un,
19849 	    "sd_send_scsi_DOORLOCK: returning sd_ssc_send\n");
19850 
19851 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19852 	    UIO_SYSSPACE, path_flag);
19853 
19854 	if (status == 0)
19855 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
19856 
19857 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
19858 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
19859 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
19860 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19861 
19862 		/* fake success and skip subsequent doorlock commands */
19863 		un->un_f_doorlock_supported = FALSE;
19864 		return (0);
19865 	}
19866 
19867 	return (status);
19868 }
19869 
19870 /*
19871  *    Function: sd_send_scsi_READ_CAPACITY
19872  *
19873  * Description: This routine uses the scsi READ CAPACITY command to determine
19874  *		the device capacity in number of blocks and the device native
19875  *		block size. If this function returns a failure, then the
19876  *		values in *capp and *lbap are undefined.  If the capacity
19877  *		returned is 0xffffffff then the lun is too large for a
19878  *		normal READ CAPACITY command and the results of a
19879  *		READ CAPACITY 16 will be used instead.
19880  *
19881  *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
19882  *		capp - ptr to unsigned 64-bit variable to receive the
19883  *			capacity value from the command.
19884  *		lbap - ptr to unsigned 32-bit varaible to receive the
19885  *			block size value from the command
19886  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19887  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
19888  *			to use the USCSI "direct" chain and bypass the normal
19889  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
19890  *			command is issued as part of an error recovery action.
19891  *
19892  * Return Code: 0   - Success
19893  *		EIO - IO error
19894  *		EACCES - Reservation conflict detected
19895  *		EAGAIN - Device is becoming ready
19896  *		errno return code from sd_ssc_send()
19897  *
19898  *     Context: Can sleep.  Blocks until command completes.
19899  */
19900 
19901 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
19902 
19903 static int
19904 sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
19905 	int path_flag)
19906 {
19907 	struct	scsi_extended_sense	sense_buf;
19908 	struct	uscsi_cmd	ucmd_buf;
19909 	union	scsi_cdb	cdb;
19910 	uint32_t		*capacity_buf;
19911 	uint64_t		capacity;
19912 	uint32_t		lbasize;
19913 	uint32_t		pbsize;
19914 	int			status;
19915 	struct sd_lun		*un;
19916 
19917 	ASSERT(ssc != NULL);
19918 
19919 	un = ssc->ssc_un;
19920 	ASSERT(un != NULL);
19921 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19922 	ASSERT(capp != NULL);
19923 	ASSERT(lbap != NULL);
19924 
19925 	SD_TRACE(SD_LOG_IO, un,
19926 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
19927 
19928 	/*
19929 	 * First send a READ_CAPACITY command to the target.
19930 	 * (This command is mandatory under SCSI-2.)
19931 	 *
19932 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
19933 	 * Medium Indicator bit is cleared.  The address field must be
19934 	 * zero if the PMI bit is zero.
19935 	 */
19936 	bzero(&cdb, sizeof (cdb));
19937 	bzero(&ucmd_buf, sizeof (ucmd_buf));
19938 
19939 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
19940 
19941 	cdb.scc_cmd = SCMD_READ_CAPACITY;
19942 
19943 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
19944 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
19945 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
19946 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
19947 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
19948 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
19949 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
19950 	ucmd_buf.uscsi_timeout	= 60;
19951 
19952 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
19953 	    UIO_SYSSPACE, path_flag);
19954 
19955 	switch (status) {
19956 	case 0:
19957 		/* Return failure if we did not get valid capacity data. */
19958 		if (ucmd_buf.uscsi_resid != 0) {
19959 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
19960 			    "sd_send_scsi_READ_CAPACITY received invalid "
19961 			    "capacity data");
19962 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19963 			return (EIO);
19964 		}
19965 		/*
19966 		 * Read capacity and block size from the READ CAPACITY 10 data.
19967 		 * This data may be adjusted later due to device specific
19968 		 * issues.
19969 		 *
19970 		 * According to the SCSI spec, the READ CAPACITY 10
19971 		 * command returns the following:
19972 		 *
19973 		 *  bytes 0-3: Maximum logical block address available.
19974 		 *		(MSB in byte:0 & LSB in byte:3)
19975 		 *
19976 		 *  bytes 4-7: Block length in bytes
19977 		 *		(MSB in byte:4 & LSB in byte:7)
19978 		 *
19979 		 */
19980 		capacity = BE_32(capacity_buf[0]);
19981 		lbasize = BE_32(capacity_buf[1]);
19982 
19983 		/*
19984 		 * Done with capacity_buf
19985 		 */
19986 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
19987 
19988 		/*
19989 		 * if the reported capacity is set to all 0xf's, then
19990 		 * this disk is too large and requires SBC-2 commands.
19991 		 * Reissue the request using READ CAPACITY 16.
19992 		 */
19993 		if (capacity == 0xffffffff) {
19994 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19995 			status = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity,
19996 			    &lbasize, &pbsize, path_flag);
19997 			if (status != 0) {
19998 				return (status);
19999 			}
20000 		}
20001 		break;	/* Success! */
20002 	case EIO:
20003 		switch (ucmd_buf.uscsi_status) {
20004 		case STATUS_RESERVATION_CONFLICT:
20005 			status = EACCES;
20006 			break;
20007 		case STATUS_CHECK:
20008 			/*
20009 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
20010 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
20011 			 */
20012 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20013 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
20014 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
20015 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20016 				return (EAGAIN);
20017 			}
20018 			break;
20019 		default:
20020 			break;
20021 		}
20022 		/* FALLTHRU */
20023 	default:
20024 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20025 		return (status);
20026 	}
20027 
20028 	/*
20029 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
20030 	 * (2352 and 0 are common) so for these devices always force the value
20031 	 * to 2048 as required by the ATAPI specs.
20032 	 */
20033 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
20034 		lbasize = 2048;
20035 	}
20036 
20037 	/*
20038 	 * Get the maximum LBA value from the READ CAPACITY data.
20039 	 * Here we assume that the Partial Medium Indicator (PMI) bit
20040 	 * was cleared when issuing the command. This means that the LBA
20041 	 * returned from the device is the LBA of the last logical block
20042 	 * on the logical unit.  The actual logical block count will be
20043 	 * this value plus one.
20044 	 */
20045 	capacity += 1;
20046 
20047 	/*
20048 	 * Currently, for removable media, the capacity is saved in terms
20049 	 * of un->un_sys_blocksize, so scale the capacity value to reflect this.
20050 	 */
20051 	if (un->un_f_has_removable_media)
20052 		capacity *= (lbasize / un->un_sys_blocksize);
20053 
20054 	/*
20055 	 * Copy the values from the READ CAPACITY command into the space
20056 	 * provided by the caller.
20057 	 */
20058 	*capp = capacity;
20059 	*lbap = lbasize;
20060 
20061 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
20062 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
20063 
20064 	/*
20065 	 * Both the lbasize and capacity from the device must be nonzero,
20066 	 * otherwise we assume that the values are not valid and return
20067 	 * failure to the caller. (4203735)
20068 	 */
20069 	if ((capacity == 0) || (lbasize == 0)) {
20070 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20071 		    "sd_send_scsi_READ_CAPACITY received invalid value "
20072 		    "capacity %llu lbasize %d", capacity, lbasize);
20073 		return (EIO);
20074 	}
20075 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20076 	return (0);
20077 }
20078 
20079 /*
20080  *    Function: sd_send_scsi_READ_CAPACITY_16
20081  *
20082  * Description: This routine uses the scsi READ CAPACITY 16 command to
20083  *		determine the device capacity in number of blocks and the
20084  *		device native block size.  If this function returns a failure,
20085  *		then the values in *capp and *lbap are undefined.
20086  *		This routine should be called by sd_send_scsi_READ_CAPACITY
20087  *              which will apply any device specific adjustments to capacity
20088  *              and lbasize. One exception is it is also called by
20089  *              sd_get_media_info_ext. In that function, there is no need to
20090  *              adjust the capacity and lbasize.
20091  *
20092  *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
20093  *		capp - ptr to unsigned 64-bit variable to receive the
20094  *			capacity value from the command.
20095  *		lbap - ptr to unsigned 32-bit varaible to receive the
20096  *			block size value from the command
20097  *              psp  - ptr to unsigned 32-bit variable to receive the
20098  *                      physical block size value from the command
20099  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20100  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20101  *			to use the USCSI "direct" chain and bypass the normal
20102  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
20103  *			this command is issued as part of an error recovery
20104  *			action.
20105  *
20106  * Return Code: 0   - Success
20107  *		EIO - IO error
20108  *		EACCES - Reservation conflict detected
20109  *		EAGAIN - Device is becoming ready
20110  *		errno return code from sd_ssc_send()
20111  *
20112  *     Context: Can sleep.  Blocks until command completes.
20113  */
20114 
20115 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
20116 
20117 static int
20118 sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
20119 	uint32_t *lbap, uint32_t *psp, int path_flag)
20120 {
20121 	struct	scsi_extended_sense	sense_buf;
20122 	struct	uscsi_cmd	ucmd_buf;
20123 	union	scsi_cdb	cdb;
20124 	uint64_t		*capacity16_buf;
20125 	uint64_t		capacity;
20126 	uint32_t		lbasize;
20127 	uint32_t		pbsize;
20128 	uint32_t		lbpb_exp;
20129 	int			status;
20130 	struct sd_lun		*un;
20131 
20132 	ASSERT(ssc != NULL);
20133 
20134 	un = ssc->ssc_un;
20135 	ASSERT(un != NULL);
20136 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20137 	ASSERT(capp != NULL);
20138 	ASSERT(lbap != NULL);
20139 
20140 	SD_TRACE(SD_LOG_IO, un,
20141 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
20142 
20143 	/*
20144 	 * First send a READ_CAPACITY_16 command to the target.
20145 	 *
20146 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
20147 	 * Medium Indicator bit is cleared.  The address field must be
20148 	 * zero if the PMI bit is zero.
20149 	 */
20150 	bzero(&cdb, sizeof (cdb));
20151 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20152 
20153 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
20154 
20155 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20156 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
20157 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
20158 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
20159 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20160 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
20161 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20162 	ucmd_buf.uscsi_timeout	= 60;
20163 
20164 	/*
20165 	 * Read Capacity (16) is a Service Action In command.  One
20166 	 * command byte (0x9E) is overloaded for multiple operations,
20167 	 * with the second CDB byte specifying the desired operation
20168 	 */
20169 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
20170 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
20171 
20172 	/*
20173 	 * Fill in allocation length field
20174 	 */
20175 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
20176 
20177 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20178 	    UIO_SYSSPACE, path_flag);
20179 
20180 	switch (status) {
20181 	case 0:
20182 		/* Return failure if we did not get valid capacity data. */
20183 		if (ucmd_buf.uscsi_resid > 20) {
20184 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20185 			    "sd_send_scsi_READ_CAPACITY_16 received invalid "
20186 			    "capacity data");
20187 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20188 			return (EIO);
20189 		}
20190 
20191 		/*
20192 		 * Read capacity and block size from the READ CAPACITY 16 data.
20193 		 * This data may be adjusted later due to device specific
20194 		 * issues.
20195 		 *
20196 		 * According to the SCSI spec, the READ CAPACITY 16
20197 		 * command returns the following:
20198 		 *
20199 		 *  bytes 0-7: Maximum logical block address available.
20200 		 *		(MSB in byte:0 & LSB in byte:7)
20201 		 *
20202 		 *  bytes 8-11: Block length in bytes
20203 		 *		(MSB in byte:8 & LSB in byte:11)
20204 		 *
20205 		 *  byte 13: LOGICAL BLOCKS PER PHYSICAL BLOCK EXPONENT
20206 		 */
20207 		capacity = BE_64(capacity16_buf[0]);
20208 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
20209 		lbpb_exp = (BE_64(capacity16_buf[1]) >> 16) & 0x0f;
20210 
20211 		pbsize = lbasize << lbpb_exp;
20212 
20213 		/*
20214 		 * Done with capacity16_buf
20215 		 */
20216 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20217 
20218 		/*
20219 		 * if the reported capacity is set to all 0xf's, then
20220 		 * this disk is too large.  This could only happen with
20221 		 * a device that supports LBAs larger than 64 bits which
20222 		 * are not defined by any current T10 standards.
20223 		 */
20224 		if (capacity == 0xffffffffffffffff) {
20225 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20226 			    "disk is too large");
20227 			return (EIO);
20228 		}
20229 		break;	/* Success! */
20230 	case EIO:
20231 		switch (ucmd_buf.uscsi_status) {
20232 		case STATUS_RESERVATION_CONFLICT:
20233 			status = EACCES;
20234 			break;
20235 		case STATUS_CHECK:
20236 			/*
20237 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
20238 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
20239 			 */
20240 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20241 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
20242 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
20243 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20244 				return (EAGAIN);
20245 			}
20246 			break;
20247 		default:
20248 			break;
20249 		}
20250 		/* FALLTHRU */
20251 	default:
20252 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20253 		return (status);
20254 	}
20255 
20256 	*capp = capacity;
20257 	*lbap = lbasize;
20258 	*psp = pbsize;
20259 
20260 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
20261 	    "capacity:0x%llx  lbasize:0x%x, pbsize: 0x%x\n",
20262 	    capacity, lbasize, pbsize);
20263 
20264 	return (0);
20265 }
20266 
20267 
20268 /*
20269  *    Function: sd_send_scsi_START_STOP_UNIT
20270  *
20271  * Description: Issue a scsi START STOP UNIT command to the target.
20272  *
20273  *   Arguments: ssc    - ssc contatins pointer to driver soft state (unit)
20274  *                       structure for this target.
20275  *      pc_flag - SD_POWER_CONDITION
20276  *                SD_START_STOP
20277  *		flag  - SD_TARGET_START
20278  *			SD_TARGET_STOP
20279  *			SD_TARGET_EJECT
20280  *			SD_TARGET_CLOSE
20281  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20282  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20283  *			to use the USCSI "direct" chain and bypass the normal
20284  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
20285  *			command is issued as part of an error recovery action.
20286  *
20287  * Return Code: 0   - Success
20288  *		EIO - IO error
20289  *		EACCES - Reservation conflict detected
20290  *		ENXIO  - Not Ready, medium not present
20291  *		errno return code from sd_ssc_send()
20292  *
20293  *     Context: Can sleep.
20294  */
20295 
20296 static int
20297 sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int pc_flag, int flag,
20298     int path_flag)
20299 {
20300 	struct	scsi_extended_sense	sense_buf;
20301 	union scsi_cdb		cdb;
20302 	struct uscsi_cmd	ucmd_buf;
20303 	int			status;
20304 	struct sd_lun		*un;
20305 
20306 	ASSERT(ssc != NULL);
20307 	un = ssc->ssc_un;
20308 	ASSERT(un != NULL);
20309 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20310 
20311 	SD_TRACE(SD_LOG_IO, un,
20312 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
20313 
20314 	if (un->un_f_check_start_stop &&
20315 	    (pc_flag == SD_START_STOP) &&
20316 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
20317 	    (un->un_f_start_stop_supported != TRUE)) {
20318 		return (0);
20319 	}
20320 
20321 	/*
20322 	 * If we are performing an eject operation and
20323 	 * we receive any command other than SD_TARGET_EJECT
20324 	 * we should immediately return.
20325 	 */
20326 	if (flag != SD_TARGET_EJECT) {
20327 		mutex_enter(SD_MUTEX(un));
20328 		if (un->un_f_ejecting == TRUE) {
20329 			mutex_exit(SD_MUTEX(un));
20330 			return (EAGAIN);
20331 		}
20332 		mutex_exit(SD_MUTEX(un));
20333 	}
20334 
20335 	bzero(&cdb, sizeof (cdb));
20336 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20337 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20338 
20339 	cdb.scc_cmd = SCMD_START_STOP;
20340 	cdb.cdb_opaque[4] = (pc_flag == SD_POWER_CONDITION) ?
20341 	    (uchar_t)(flag << 4) : (uchar_t)flag;
20342 
20343 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20344 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20345 	ucmd_buf.uscsi_bufaddr	= NULL;
20346 	ucmd_buf.uscsi_buflen	= 0;
20347 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20348 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20349 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20350 	ucmd_buf.uscsi_timeout	= 200;
20351 
20352 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20353 	    UIO_SYSSPACE, path_flag);
20354 
20355 	switch (status) {
20356 	case 0:
20357 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20358 		break;	/* Success! */
20359 	case EIO:
20360 		switch (ucmd_buf.uscsi_status) {
20361 		case STATUS_RESERVATION_CONFLICT:
20362 			status = EACCES;
20363 			break;
20364 		case STATUS_CHECK:
20365 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
20366 				switch (scsi_sense_key(
20367 				    (uint8_t *)&sense_buf)) {
20368 				case KEY_ILLEGAL_REQUEST:
20369 					status = ENOTSUP;
20370 					break;
20371 				case KEY_NOT_READY:
20372 					if (scsi_sense_asc(
20373 					    (uint8_t *)&sense_buf)
20374 					    == 0x3A) {
20375 						status = ENXIO;
20376 					}
20377 					break;
20378 				default:
20379 					break;
20380 				}
20381 			}
20382 			break;
20383 		default:
20384 			break;
20385 		}
20386 		break;
20387 	default:
20388 		break;
20389 	}
20390 
20391 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
20392 
20393 	return (status);
20394 }
20395 
20396 
20397 /*
20398  *    Function: sd_start_stop_unit_callback
20399  *
20400  * Description: timeout(9F) callback to begin recovery process for a
20401  *		device that has spun down.
20402  *
20403  *   Arguments: arg - pointer to associated softstate struct.
20404  *
20405  *     Context: Executes in a timeout(9F) thread context
20406  */
20407 
20408 static void
20409 sd_start_stop_unit_callback(void *arg)
20410 {
20411 	struct sd_lun	*un = arg;
20412 	ASSERT(un != NULL);
20413 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20414 
20415 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
20416 
20417 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
20418 }
20419 
20420 
20421 /*
20422  *    Function: sd_start_stop_unit_task
20423  *
20424  * Description: Recovery procedure when a drive is spun down.
20425  *
20426  *   Arguments: arg - pointer to associated softstate struct.
20427  *
20428  *     Context: Executes in a taskq() thread context
20429  */
20430 
20431 static void
20432 sd_start_stop_unit_task(void *arg)
20433 {
20434 	struct sd_lun	*un = arg;
20435 	sd_ssc_t	*ssc;
20436 	int		power_level;
20437 	int		rval;
20438 
20439 	ASSERT(un != NULL);
20440 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20441 
20442 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
20443 
20444 	/*
20445 	 * Some unformatted drives report not ready error, no need to
20446 	 * restart if format has been initiated.
20447 	 */
20448 	mutex_enter(SD_MUTEX(un));
20449 	if (un->un_f_format_in_progress == TRUE) {
20450 		mutex_exit(SD_MUTEX(un));
20451 		return;
20452 	}
20453 	mutex_exit(SD_MUTEX(un));
20454 
20455 	ssc = sd_ssc_init(un);
20456 	/*
20457 	 * When a START STOP command is issued from here, it is part of a
20458 	 * failure recovery operation and must be issued before any other
20459 	 * commands, including any pending retries. Thus it must be sent
20460 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
20461 	 * succeeds or not, we will start I/O after the attempt.
20462 	 * If power condition is supported and the current power level
20463 	 * is capable of performing I/O, we should set the power condition
20464 	 * to that level. Otherwise, set the power condition to ACTIVE.
20465 	 */
20466 	if (un->un_f_power_condition_supported) {
20467 		mutex_enter(SD_MUTEX(un));
20468 		ASSERT(SD_PM_IS_LEVEL_VALID(un, un->un_power_level));
20469 		power_level = sd_pwr_pc.ran_perf[un->un_power_level]
20470 		    > 0 ? un->un_power_level : SD_SPINDLE_ACTIVE;
20471 		mutex_exit(SD_MUTEX(un));
20472 		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_POWER_CONDITION,
20473 		    sd_pl2pc[power_level], SD_PATH_DIRECT_PRIORITY);
20474 	} else {
20475 		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
20476 		    SD_TARGET_START, SD_PATH_DIRECT_PRIORITY);
20477 	}
20478 
20479 	if (rval != 0)
20480 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
20481 	sd_ssc_fini(ssc);
20482 	/*
20483 	 * The above call blocks until the START_STOP_UNIT command completes.
20484 	 * Now that it has completed, we must re-try the original IO that
20485 	 * received the NOT READY condition in the first place. There are
20486 	 * three possible conditions here:
20487 	 *
20488 	 *  (1) The original IO is on un_retry_bp.
20489 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
20490 	 *	is NULL.
20491 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
20492 	 *	points to some other, unrelated bp.
20493 	 *
20494 	 * For each case, we must call sd_start_cmds() with un_retry_bp
20495 	 * as the argument. If un_retry_bp is NULL, this will initiate
20496 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
20497 	 * then this will process the bp on un_retry_bp. That may or may not
20498 	 * be the original IO, but that does not matter: the important thing
20499 	 * is to keep the IO processing going at this point.
20500 	 *
20501 	 * Note: This is a very specific error recovery sequence associated
20502 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
20503 	 * serialize the I/O with completion of the spin-up.
20504 	 */
20505 	mutex_enter(SD_MUTEX(un));
20506 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
20507 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
20508 	    un, un->un_retry_bp);
20509 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
20510 	sd_start_cmds(un, un->un_retry_bp);
20511 	mutex_exit(SD_MUTEX(un));
20512 
20513 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
20514 }
20515 
20516 
20517 /*
20518  *    Function: sd_send_scsi_INQUIRY
20519  *
20520  * Description: Issue the scsi INQUIRY command.
20521  *
20522  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20523  *                      structure for this target.
20524  *		bufaddr
20525  *		buflen
20526  *		evpd
20527  *		page_code
20528  *		page_length
20529  *
20530  * Return Code: 0   - Success
20531  *		errno return code from sd_ssc_send()
20532  *
20533  *     Context: Can sleep. Does not return until command is completed.
20534  */
20535 
20536 static int
20537 sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr, size_t buflen,
20538 	uchar_t evpd, uchar_t page_code, size_t *residp)
20539 {
20540 	union scsi_cdb		cdb;
20541 	struct uscsi_cmd	ucmd_buf;
20542 	int			status;
20543 	struct sd_lun		*un;
20544 
20545 	ASSERT(ssc != NULL);
20546 	un = ssc->ssc_un;
20547 	ASSERT(un != NULL);
20548 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20549 	ASSERT(bufaddr != NULL);
20550 
20551 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
20552 
20553 	bzero(&cdb, sizeof (cdb));
20554 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20555 	bzero(bufaddr, buflen);
20556 
20557 	cdb.scc_cmd = SCMD_INQUIRY;
20558 	cdb.cdb_opaque[1] = evpd;
20559 	cdb.cdb_opaque[2] = page_code;
20560 	FORMG0COUNT(&cdb, buflen);
20561 
20562 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20563 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20564 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20565 	ucmd_buf.uscsi_buflen	= buflen;
20566 	ucmd_buf.uscsi_rqbuf	= NULL;
20567 	ucmd_buf.uscsi_rqlen	= 0;
20568 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
20569 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
20570 
20571 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20572 	    UIO_SYSSPACE, SD_PATH_DIRECT);
20573 
20574 	/*
20575 	 * Only handle status == 0, the upper-level caller
20576 	 * will put different assessment based on the context.
20577 	 */
20578 	if (status == 0)
20579 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20580 
20581 	if ((status == 0) && (residp != NULL)) {
20582 		*residp = ucmd_buf.uscsi_resid;
20583 	}
20584 
20585 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
20586 
20587 	return (status);
20588 }
20589 
20590 
20591 /*
20592  *    Function: sd_send_scsi_TEST_UNIT_READY
20593  *
20594  * Description: Issue the scsi TEST UNIT READY command.
20595  *		This routine can be told to set the flag USCSI_DIAGNOSE to
20596  *		prevent retrying failed commands. Use this when the intent
20597  *		is either to check for device readiness, to clear a Unit
20598  *		Attention, or to clear any outstanding sense data.
20599  *		However under specific conditions the expected behavior
20600  *		is for retries to bring a device ready, so use the flag
20601  *		with caution.
20602  *
20603  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20604  *                      structure for this target.
20605  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
20606  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
20607  *			0: dont check for media present, do retries on cmd.
20608  *
20609  * Return Code: 0   - Success
20610  *		EIO - IO error
20611  *		EACCES - Reservation conflict detected
20612  *		ENXIO  - Not Ready, medium not present
20613  *		errno return code from sd_ssc_send()
20614  *
20615  *     Context: Can sleep. Does not return until command is completed.
20616  */
20617 
20618 static int
20619 sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag)
20620 {
20621 	struct	scsi_extended_sense	sense_buf;
20622 	union scsi_cdb		cdb;
20623 	struct uscsi_cmd	ucmd_buf;
20624 	int			status;
20625 	struct sd_lun		*un;
20626 
20627 	ASSERT(ssc != NULL);
20628 	un = ssc->ssc_un;
20629 	ASSERT(un != NULL);
20630 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20631 
20632 	SD_TRACE(SD_LOG_IO, un,
20633 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
20634 
20635 	/*
20636 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
20637 	 * timeouts when they receive a TUR and the queue is not empty. Check
20638 	 * the configuration flag set during attach (indicating the drive has
20639 	 * this firmware bug) and un_ncmds_in_transport before issuing the
20640 	 * TUR. If there are
20641 	 * pending commands return success, this is a bit arbitrary but is ok
20642 	 * for non-removables (i.e. the eliteI disks) and non-clustering
20643 	 * configurations.
20644 	 */
20645 	if (un->un_f_cfg_tur_check == TRUE) {
20646 		mutex_enter(SD_MUTEX(un));
20647 		if (un->un_ncmds_in_transport != 0) {
20648 			mutex_exit(SD_MUTEX(un));
20649 			return (0);
20650 		}
20651 		mutex_exit(SD_MUTEX(un));
20652 	}
20653 
20654 	bzero(&cdb, sizeof (cdb));
20655 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20656 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20657 
20658 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
20659 
20660 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20661 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20662 	ucmd_buf.uscsi_bufaddr	= NULL;
20663 	ucmd_buf.uscsi_buflen	= 0;
20664 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20665 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20666 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20667 
20668 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
20669 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
20670 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
20671 	}
20672 	ucmd_buf.uscsi_timeout	= 60;
20673 
20674 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20675 	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
20676 	    SD_PATH_STANDARD));
20677 
20678 	switch (status) {
20679 	case 0:
20680 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20681 		break;	/* Success! */
20682 	case EIO:
20683 		switch (ucmd_buf.uscsi_status) {
20684 		case STATUS_RESERVATION_CONFLICT:
20685 			status = EACCES;
20686 			break;
20687 		case STATUS_CHECK:
20688 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
20689 				break;
20690 			}
20691 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20692 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20693 			    KEY_NOT_READY) &&
20694 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
20695 				status = ENXIO;
20696 			}
20697 			break;
20698 		default:
20699 			break;
20700 		}
20701 		break;
20702 	default:
20703 		break;
20704 	}
20705 
20706 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
20707 
20708 	return (status);
20709 }
20710 
20711 /*
20712  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
20713  *
20714  * Description: Issue the scsi PERSISTENT RESERVE IN command.
20715  *
20716  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20717  *                      structure for this target.
20718  *
20719  * Return Code: 0   - Success
20720  *		EACCES
20721  *		ENOTSUP
20722  *		errno return code from sd_ssc_send()
20723  *
20724  *     Context: Can sleep. Does not return until command is completed.
20725  */
20726 
20727 static int
20728 sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc, uchar_t  usr_cmd,
20729 	uint16_t data_len, uchar_t *data_bufp)
20730 {
20731 	struct scsi_extended_sense	sense_buf;
20732 	union scsi_cdb		cdb;
20733 	struct uscsi_cmd	ucmd_buf;
20734 	int			status;
20735 	int			no_caller_buf = FALSE;
20736 	struct sd_lun		*un;
20737 
20738 	ASSERT(ssc != NULL);
20739 	un = ssc->ssc_un;
20740 	ASSERT(un != NULL);
20741 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20742 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
20743 
20744 	SD_TRACE(SD_LOG_IO, un,
20745 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
20746 
20747 	bzero(&cdb, sizeof (cdb));
20748 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20749 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20750 	if (data_bufp == NULL) {
20751 		/* Allocate a default buf if the caller did not give one */
20752 		ASSERT(data_len == 0);
20753 		data_len  = MHIOC_RESV_KEY_SIZE;
20754 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
20755 		no_caller_buf = TRUE;
20756 	}
20757 
20758 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
20759 	cdb.cdb_opaque[1] = usr_cmd;
20760 	FORMG1COUNT(&cdb, data_len);
20761 
20762 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20763 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20764 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
20765 	ucmd_buf.uscsi_buflen	= data_len;
20766 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20767 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20768 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20769 	ucmd_buf.uscsi_timeout	= 60;
20770 
20771 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20772 	    UIO_SYSSPACE, SD_PATH_STANDARD);
20773 
20774 	switch (status) {
20775 	case 0:
20776 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20777 
20778 		break;	/* Success! */
20779 	case EIO:
20780 		switch (ucmd_buf.uscsi_status) {
20781 		case STATUS_RESERVATION_CONFLICT:
20782 			status = EACCES;
20783 			break;
20784 		case STATUS_CHECK:
20785 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20786 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20787 			    KEY_ILLEGAL_REQUEST)) {
20788 				status = ENOTSUP;
20789 			}
20790 			break;
20791 		default:
20792 			break;
20793 		}
20794 		break;
20795 	default:
20796 		break;
20797 	}
20798 
20799 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
20800 
20801 	if (no_caller_buf == TRUE) {
20802 		kmem_free(data_bufp, data_len);
20803 	}
20804 
20805 	return (status);
20806 }
20807 
20808 
20809 /*
20810  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
20811  *
20812  * Description: This routine is the driver entry point for handling CD-ROM
20813  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
20814  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
20815  *		device.
20816  *
20817  *   Arguments: ssc  -  ssc contains un - pointer to soft state struct
20818  *                      for the target.
20819  *		usr_cmd SCSI-3 reservation facility command (one of
20820  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
20821  *			SD_SCSI3_PREEMPTANDABORT)
20822  *		usr_bufp - user provided pointer register, reserve descriptor or
20823  *			preempt and abort structure (mhioc_register_t,
20824  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
20825  *
20826  * Return Code: 0   - Success
20827  *		EACCES
20828  *		ENOTSUP
20829  *		errno return code from sd_ssc_send()
20830  *
20831  *     Context: Can sleep. Does not return until command is completed.
20832  */
20833 
20834 static int
20835 sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc, uchar_t usr_cmd,
20836 	uchar_t	*usr_bufp)
20837 {
20838 	struct scsi_extended_sense	sense_buf;
20839 	union scsi_cdb		cdb;
20840 	struct uscsi_cmd	ucmd_buf;
20841 	int			status;
20842 	uchar_t			data_len = sizeof (sd_prout_t);
20843 	sd_prout_t		*prp;
20844 	struct sd_lun		*un;
20845 
20846 	ASSERT(ssc != NULL);
20847 	un = ssc->ssc_un;
20848 	ASSERT(un != NULL);
20849 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20850 	ASSERT(data_len == 24);	/* required by scsi spec */
20851 
20852 	SD_TRACE(SD_LOG_IO, un,
20853 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
20854 
20855 	if (usr_bufp == NULL) {
20856 		return (EINVAL);
20857 	}
20858 
20859 	bzero(&cdb, sizeof (cdb));
20860 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20861 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20862 	prp = kmem_zalloc(data_len, KM_SLEEP);
20863 
20864 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
20865 	cdb.cdb_opaque[1] = usr_cmd;
20866 	FORMG1COUNT(&cdb, data_len);
20867 
20868 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20869 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20870 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
20871 	ucmd_buf.uscsi_buflen	= data_len;
20872 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20873 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20874 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
20875 	ucmd_buf.uscsi_timeout	= 60;
20876 
20877 	switch (usr_cmd) {
20878 	case SD_SCSI3_REGISTER: {
20879 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
20880 
20881 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20882 		bcopy(ptr->newkey.key, prp->service_key,
20883 		    MHIOC_RESV_KEY_SIZE);
20884 		prp->aptpl = ptr->aptpl;
20885 		break;
20886 	}
20887 	case SD_SCSI3_RESERVE:
20888 	case SD_SCSI3_RELEASE: {
20889 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
20890 
20891 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20892 		prp->scope_address = BE_32(ptr->scope_specific_addr);
20893 		cdb.cdb_opaque[2] = ptr->type;
20894 		break;
20895 	}
20896 	case SD_SCSI3_PREEMPTANDABORT: {
20897 		mhioc_preemptandabort_t *ptr =
20898 		    (mhioc_preemptandabort_t *)usr_bufp;
20899 
20900 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
20901 		bcopy(ptr->victim_key.key, prp->service_key,
20902 		    MHIOC_RESV_KEY_SIZE);
20903 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
20904 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
20905 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
20906 		break;
20907 	}
20908 	case SD_SCSI3_REGISTERANDIGNOREKEY:
20909 	{
20910 		mhioc_registerandignorekey_t *ptr;
20911 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
20912 		bcopy(ptr->newkey.key,
20913 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
20914 		prp->aptpl = ptr->aptpl;
20915 		break;
20916 	}
20917 	default:
20918 		ASSERT(FALSE);
20919 		break;
20920 	}
20921 
20922 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20923 	    UIO_SYSSPACE, SD_PATH_STANDARD);
20924 
20925 	switch (status) {
20926 	case 0:
20927 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20928 		break;	/* Success! */
20929 	case EIO:
20930 		switch (ucmd_buf.uscsi_status) {
20931 		case STATUS_RESERVATION_CONFLICT:
20932 			status = EACCES;
20933 			break;
20934 		case STATUS_CHECK:
20935 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20936 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20937 			    KEY_ILLEGAL_REQUEST)) {
20938 				status = ENOTSUP;
20939 			}
20940 			break;
20941 		default:
20942 			break;
20943 		}
20944 		break;
20945 	default:
20946 		break;
20947 	}
20948 
20949 	kmem_free(prp, data_len);
20950 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
20951 	return (status);
20952 }
20953 
20954 
20955 /*
20956  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
20957  *
20958  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
20959  *
20960  *   Arguments: un - pointer to the target's soft state struct
20961  *              dkc - pointer to the callback structure
20962  *
20963  * Return Code: 0 - success
20964  *		errno-type error code
20965  *
20966  *     Context: kernel thread context only.
20967  *
20968  *  _______________________________________________________________
20969  * | dkc_flag &   | dkc_callback | DKIOCFLUSHWRITECACHE            |
20970  * |FLUSH_VOLATILE|              | operation                       |
20971  * |______________|______________|_________________________________|
20972  * | 0            | NULL         | Synchronous flush on both       |
20973  * |              |              | volatile and non-volatile cache |
20974  * |______________|______________|_________________________________|
20975  * | 1            | NULL         | Synchronous flush on volatile   |
20976  * |              |              | cache; disk drivers may suppress|
20977  * |              |              | flush if disk table indicates   |
20978  * |              |              | non-volatile cache              |
20979  * |______________|______________|_________________________________|
20980  * | 0            | !NULL        | Asynchronous flush on both      |
20981  * |              |              | volatile and non-volatile cache;|
20982  * |______________|______________|_________________________________|
20983  * | 1            | !NULL        | Asynchronous flush on volatile  |
20984  * |              |              | cache; disk drivers may suppress|
20985  * |              |              | flush if disk table indicates   |
20986  * |              |              | non-volatile cache              |
20987  * |______________|______________|_________________________________|
20988  *
20989  */
20990 
20991 static int
20992 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
20993 {
20994 	struct sd_uscsi_info	*uip;
20995 	struct uscsi_cmd	*uscmd;
20996 	union scsi_cdb		*cdb;
20997 	struct buf		*bp;
20998 	int			rval = 0;
20999 	int			is_async;
21000 
21001 	SD_TRACE(SD_LOG_IO, un,
21002 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
21003 
21004 	ASSERT(un != NULL);
21005 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21006 
21007 	if (dkc == NULL || dkc->dkc_callback == NULL) {
21008 		is_async = FALSE;
21009 	} else {
21010 		is_async = TRUE;
21011 	}
21012 
21013 	mutex_enter(SD_MUTEX(un));
21014 	/* check whether cache flush should be suppressed */
21015 	if (un->un_f_suppress_cache_flush == TRUE) {
21016 		mutex_exit(SD_MUTEX(un));
21017 		/*
21018 		 * suppress the cache flush if the device is told to do
21019 		 * so by sd.conf or disk table
21020 		 */
21021 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_SYNCHRONIZE_CACHE: \
21022 		    skip the cache flush since suppress_cache_flush is %d!\n",
21023 		    un->un_f_suppress_cache_flush);
21024 
21025 		if (is_async == TRUE) {
21026 			/* invoke callback for asynchronous flush */
21027 			(*dkc->dkc_callback)(dkc->dkc_cookie, 0);
21028 		}
21029 		return (rval);
21030 	}
21031 	mutex_exit(SD_MUTEX(un));
21032 
21033 	/*
21034 	 * check dkc_flag & FLUSH_VOLATILE so SYNC_NV bit can be
21035 	 * set properly
21036 	 */
21037 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
21038 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
21039 
21040 	mutex_enter(SD_MUTEX(un));
21041 	if (dkc != NULL && un->un_f_sync_nv_supported &&
21042 	    (dkc->dkc_flag & FLUSH_VOLATILE)) {
21043 		/*
21044 		 * if the device supports SYNC_NV bit, turn on
21045 		 * the SYNC_NV bit to only flush volatile cache
21046 		 */
21047 		cdb->cdb_un.tag |= SD_SYNC_NV_BIT;
21048 	}
21049 	mutex_exit(SD_MUTEX(un));
21050 
21051 	/*
21052 	 * First get some memory for the uscsi_cmd struct and cdb
21053 	 * and initialize for SYNCHRONIZE_CACHE cmd.
21054 	 */
21055 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
21056 	uscmd->uscsi_cdblen = CDB_GROUP1;
21057 	uscmd->uscsi_cdb = (caddr_t)cdb;
21058 	uscmd->uscsi_bufaddr = NULL;
21059 	uscmd->uscsi_buflen = 0;
21060 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
21061 	uscmd->uscsi_rqlen = SENSE_LENGTH;
21062 	uscmd->uscsi_rqresid = SENSE_LENGTH;
21063 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
21064 	uscmd->uscsi_timeout = sd_io_time;
21065 
21066 	/*
21067 	 * Allocate an sd_uscsi_info struct and fill it with the info
21068 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
21069 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
21070 	 * since we allocate the buf here in this function, we do not
21071 	 * need to preserve the prior contents of b_private.
21072 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
21073 	 */
21074 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
21075 	uip->ui_flags = SD_PATH_DIRECT;
21076 	uip->ui_cmdp  = uscmd;
21077 
21078 	bp = getrbuf(KM_SLEEP);
21079 	bp->b_private = uip;
21080 
21081 	/*
21082 	 * Setup buffer to carry uscsi request.
21083 	 */
21084 	bp->b_flags  = B_BUSY;
21085 	bp->b_bcount = 0;
21086 	bp->b_blkno  = 0;
21087 
21088 	if (is_async == TRUE) {
21089 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
21090 		uip->ui_dkc = *dkc;
21091 	}
21092 
21093 	bp->b_edev = SD_GET_DEV(un);
21094 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
21095 
21096 	/*
21097 	 * Unset un_f_sync_cache_required flag
21098 	 */
21099 	mutex_enter(SD_MUTEX(un));
21100 	un->un_f_sync_cache_required = FALSE;
21101 	mutex_exit(SD_MUTEX(un));
21102 
21103 	(void) sd_uscsi_strategy(bp);
21104 
21105 	/*
21106 	 * If synchronous request, wait for completion
21107 	 * If async just return and let b_iodone callback
21108 	 * cleanup.
21109 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
21110 	 * but it was also incremented in sd_uscsi_strategy(), so
21111 	 * we should be ok.
21112 	 */
21113 	if (is_async == FALSE) {
21114 		(void) biowait(bp);
21115 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
21116 	}
21117 
21118 	return (rval);
21119 }
21120 
21121 
21122 static int
21123 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
21124 {
21125 	struct sd_uscsi_info *uip;
21126 	struct uscsi_cmd *uscmd;
21127 	uint8_t *sense_buf;
21128 	struct sd_lun *un;
21129 	int status;
21130 	union scsi_cdb *cdb;
21131 
21132 	uip = (struct sd_uscsi_info *)(bp->b_private);
21133 	ASSERT(uip != NULL);
21134 
21135 	uscmd = uip->ui_cmdp;
21136 	ASSERT(uscmd != NULL);
21137 
21138 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
21139 	ASSERT(sense_buf != NULL);
21140 
21141 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
21142 	ASSERT(un != NULL);
21143 
21144 	cdb = (union scsi_cdb *)uscmd->uscsi_cdb;
21145 
21146 	status = geterror(bp);
21147 	switch (status) {
21148 	case 0:
21149 		break;	/* Success! */
21150 	case EIO:
21151 		switch (uscmd->uscsi_status) {
21152 		case STATUS_RESERVATION_CONFLICT:
21153 			/* Ignore reservation conflict */
21154 			status = 0;
21155 			goto done;
21156 
21157 		case STATUS_CHECK:
21158 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
21159 			    (scsi_sense_key(sense_buf) ==
21160 			    KEY_ILLEGAL_REQUEST)) {
21161 				/* Ignore Illegal Request error */
21162 				if (cdb->cdb_un.tag&SD_SYNC_NV_BIT) {
21163 					mutex_enter(SD_MUTEX(un));
21164 					un->un_f_sync_nv_supported = FALSE;
21165 					mutex_exit(SD_MUTEX(un));
21166 					status = 0;
21167 					SD_TRACE(SD_LOG_IO, un,
21168 					    "un_f_sync_nv_supported \
21169 					    is set to false.\n");
21170 					goto done;
21171 				}
21172 
21173 				mutex_enter(SD_MUTEX(un));
21174 				un->un_f_sync_cache_supported = FALSE;
21175 				mutex_exit(SD_MUTEX(un));
21176 				SD_TRACE(SD_LOG_IO, un,
21177 				    "sd_send_scsi_SYNCHRONIZE_CACHE_biodone: \
21178 				    un_f_sync_cache_supported set to false \
21179 				    with asc = %x, ascq = %x\n",
21180 				    scsi_sense_asc(sense_buf),
21181 				    scsi_sense_ascq(sense_buf));
21182 				status = ENOTSUP;
21183 				goto done;
21184 			}
21185 			break;
21186 		default:
21187 			break;
21188 		}
21189 		/* FALLTHRU */
21190 	default:
21191 		/*
21192 		 * Turn on the un_f_sync_cache_required flag
21193 		 * since the SYNC CACHE command failed
21194 		 */
21195 		mutex_enter(SD_MUTEX(un));
21196 		un->un_f_sync_cache_required = TRUE;
21197 		mutex_exit(SD_MUTEX(un));
21198 
21199 		/*
21200 		 * Don't log an error message if this device
21201 		 * has removable media.
21202 		 */
21203 		if (!un->un_f_has_removable_media) {
21204 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
21205 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
21206 		}
21207 		break;
21208 	}
21209 
21210 done:
21211 	if (uip->ui_dkc.dkc_callback != NULL) {
21212 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
21213 	}
21214 
21215 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
21216 	freerbuf(bp);
21217 	kmem_free(uip, sizeof (struct sd_uscsi_info));
21218 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
21219 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
21220 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
21221 
21222 	return (status);
21223 }
21224 
21225 
21226 /*
21227  *    Function: sd_send_scsi_GET_CONFIGURATION
21228  *
21229  * Description: Issues the get configuration command to the device.
21230  *		Called from sd_check_for_writable_cd & sd_get_media_info
21231  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
21232  *   Arguments: ssc
21233  *		ucmdbuf
21234  *		rqbuf
21235  *		rqbuflen
21236  *		bufaddr
21237  *		buflen
21238  *		path_flag
21239  *
21240  * Return Code: 0   - Success
21241  *		errno return code from sd_ssc_send()
21242  *
21243  *     Context: Can sleep. Does not return until command is completed.
21244  *
21245  */
21246 
21247 static int
21248 sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc, struct uscsi_cmd *ucmdbuf,
21249 	uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
21250 	int path_flag)
21251 {
21252 	char	cdb[CDB_GROUP1];
21253 	int	status;
21254 	struct sd_lun	*un;
21255 
21256 	ASSERT(ssc != NULL);
21257 	un = ssc->ssc_un;
21258 	ASSERT(un != NULL);
21259 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21260 	ASSERT(bufaddr != NULL);
21261 	ASSERT(ucmdbuf != NULL);
21262 	ASSERT(rqbuf != NULL);
21263 
21264 	SD_TRACE(SD_LOG_IO, un,
21265 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
21266 
21267 	bzero(cdb, sizeof (cdb));
21268 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
21269 	bzero(rqbuf, rqbuflen);
21270 	bzero(bufaddr, buflen);
21271 
21272 	/*
21273 	 * Set up cdb field for the get configuration command.
21274 	 */
21275 	cdb[0] = SCMD_GET_CONFIGURATION;
21276 	cdb[1] = 0x02;  /* Requested Type */
21277 	cdb[8] = SD_PROFILE_HEADER_LEN;
21278 	ucmdbuf->uscsi_cdb = cdb;
21279 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
21280 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
21281 	ucmdbuf->uscsi_buflen = buflen;
21282 	ucmdbuf->uscsi_timeout = sd_io_time;
21283 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
21284 	ucmdbuf->uscsi_rqlen = rqbuflen;
21285 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
21286 
21287 	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
21288 	    UIO_SYSSPACE, path_flag);
21289 
21290 	switch (status) {
21291 	case 0:
21292 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21293 		break;  /* Success! */
21294 	case EIO:
21295 		switch (ucmdbuf->uscsi_status) {
21296 		case STATUS_RESERVATION_CONFLICT:
21297 			status = EACCES;
21298 			break;
21299 		default:
21300 			break;
21301 		}
21302 		break;
21303 	default:
21304 		break;
21305 	}
21306 
21307 	if (status == 0) {
21308 		SD_DUMP_MEMORY(un, SD_LOG_IO,
21309 		    "sd_send_scsi_GET_CONFIGURATION: data",
21310 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
21311 	}
21312 
21313 	SD_TRACE(SD_LOG_IO, un,
21314 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
21315 
21316 	return (status);
21317 }
21318 
21319 /*
21320  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
21321  *
21322  * Description: Issues the get configuration command to the device to
21323  *              retrieve a specific feature. Called from
21324  *		sd_check_for_writable_cd & sd_set_mmc_caps.
21325  *   Arguments: ssc
21326  *              ucmdbuf
21327  *              rqbuf
21328  *              rqbuflen
21329  *              bufaddr
21330  *              buflen
21331  *		feature
21332  *
21333  * Return Code: 0   - Success
21334  *              errno return code from sd_ssc_send()
21335  *
21336  *     Context: Can sleep. Does not return until command is completed.
21337  *
21338  */
21339 static int
21340 sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc,
21341 	struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
21342 	uchar_t *bufaddr, uint_t buflen, char feature, int path_flag)
21343 {
21344 	char    cdb[CDB_GROUP1];
21345 	int	status;
21346 	struct sd_lun	*un;
21347 
21348 	ASSERT(ssc != NULL);
21349 	un = ssc->ssc_un;
21350 	ASSERT(un != NULL);
21351 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21352 	ASSERT(bufaddr != NULL);
21353 	ASSERT(ucmdbuf != NULL);
21354 	ASSERT(rqbuf != NULL);
21355 
21356 	SD_TRACE(SD_LOG_IO, un,
21357 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
21358 
21359 	bzero(cdb, sizeof (cdb));
21360 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
21361 	bzero(rqbuf, rqbuflen);
21362 	bzero(bufaddr, buflen);
21363 
21364 	/*
21365 	 * Set up cdb field for the get configuration command.
21366 	 */
21367 	cdb[0] = SCMD_GET_CONFIGURATION;
21368 	cdb[1] = 0x02;  /* Requested Type */
21369 	cdb[3] = feature;
21370 	cdb[8] = buflen;
21371 	ucmdbuf->uscsi_cdb = cdb;
21372 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
21373 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
21374 	ucmdbuf->uscsi_buflen = buflen;
21375 	ucmdbuf->uscsi_timeout = sd_io_time;
21376 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
21377 	ucmdbuf->uscsi_rqlen = rqbuflen;
21378 	ucmdbuf->uscsi_flags = USCSI_RQENABLE|USCSI_SILENT|USCSI_READ;
21379 
21380 	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
21381 	    UIO_SYSSPACE, path_flag);
21382 
21383 	switch (status) {
21384 	case 0:
21385 
21386 		break;  /* Success! */
21387 	case EIO:
21388 		switch (ucmdbuf->uscsi_status) {
21389 		case STATUS_RESERVATION_CONFLICT:
21390 			status = EACCES;
21391 			break;
21392 		default:
21393 			break;
21394 		}
21395 		break;
21396 	default:
21397 		break;
21398 	}
21399 
21400 	if (status == 0) {
21401 		SD_DUMP_MEMORY(un, SD_LOG_IO,
21402 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
21403 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
21404 	}
21405 
21406 	SD_TRACE(SD_LOG_IO, un,
21407 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
21408 
21409 	return (status);
21410 }
21411 
21412 
21413 /*
21414  *    Function: sd_send_scsi_MODE_SENSE
21415  *
21416  * Description: Utility function for issuing a scsi MODE SENSE command.
21417  *		Note: This routine uses a consistent implementation for Group0,
21418  *		Group1, and Group2 commands across all platforms. ATAPI devices
21419  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
21420  *
21421  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21422  *                      structure for this target.
21423  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
21424  *			  CDB_GROUP[1|2] (10 byte).
21425  *		bufaddr - buffer for page data retrieved from the target.
21426  *		buflen - size of page to be retrieved.
21427  *		page_code - page code of data to be retrieved from the target.
21428  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21429  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21430  *			to use the USCSI "direct" chain and bypass the normal
21431  *			command waitq.
21432  *
21433  * Return Code: 0   - Success
21434  *		errno return code from sd_ssc_send()
21435  *
21436  *     Context: Can sleep. Does not return until command is completed.
21437  */
21438 
21439 static int
21440 sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
21441 	size_t buflen,  uchar_t page_code, int path_flag)
21442 {
21443 	struct	scsi_extended_sense	sense_buf;
21444 	union scsi_cdb		cdb;
21445 	struct uscsi_cmd	ucmd_buf;
21446 	int			status;
21447 	int			headlen;
21448 	struct sd_lun		*un;
21449 
21450 	ASSERT(ssc != NULL);
21451 	un = ssc->ssc_un;
21452 	ASSERT(un != NULL);
21453 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21454 	ASSERT(bufaddr != NULL);
21455 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
21456 	    (cdbsize == CDB_GROUP2));
21457 
21458 	SD_TRACE(SD_LOG_IO, un,
21459 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
21460 
21461 	bzero(&cdb, sizeof (cdb));
21462 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21463 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21464 	bzero(bufaddr, buflen);
21465 
21466 	if (cdbsize == CDB_GROUP0) {
21467 		cdb.scc_cmd = SCMD_MODE_SENSE;
21468 		cdb.cdb_opaque[2] = page_code;
21469 		FORMG0COUNT(&cdb, buflen);
21470 		headlen = MODE_HEADER_LENGTH;
21471 	} else {
21472 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
21473 		cdb.cdb_opaque[2] = page_code;
21474 		FORMG1COUNT(&cdb, buflen);
21475 		headlen = MODE_HEADER_LENGTH_GRP2;
21476 	}
21477 
21478 	ASSERT(headlen <= buflen);
21479 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21480 
21481 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21482 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21483 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21484 	ucmd_buf.uscsi_buflen	= buflen;
21485 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21486 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21487 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21488 	ucmd_buf.uscsi_timeout	= 60;
21489 
21490 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21491 	    UIO_SYSSPACE, path_flag);
21492 
21493 	switch (status) {
21494 	case 0:
21495 		/*
21496 		 * sr_check_wp() uses 0x3f page code and check the header of
21497 		 * mode page to determine if target device is write-protected.
21498 		 * But some USB devices return 0 bytes for 0x3f page code. For
21499 		 * this case, make sure that mode page header is returned at
21500 		 * least.
21501 		 */
21502 		if (buflen - ucmd_buf.uscsi_resid <  headlen) {
21503 			status = EIO;
21504 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
21505 			    "mode page header is not returned");
21506 		}
21507 		break;	/* Success! */
21508 	case EIO:
21509 		switch (ucmd_buf.uscsi_status) {
21510 		case STATUS_RESERVATION_CONFLICT:
21511 			status = EACCES;
21512 			break;
21513 		default:
21514 			break;
21515 		}
21516 		break;
21517 	default:
21518 		break;
21519 	}
21520 
21521 	if (status == 0) {
21522 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
21523 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21524 	}
21525 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
21526 
21527 	return (status);
21528 }
21529 
21530 
21531 /*
21532  *    Function: sd_send_scsi_MODE_SELECT
21533  *
21534  * Description: Utility function for issuing a scsi MODE SELECT command.
21535  *		Note: This routine uses a consistent implementation for Group0,
21536  *		Group1, and Group2 commands across all platforms. ATAPI devices
21537  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
21538  *
21539  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21540  *                      structure for this target.
21541  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
21542  *			  CDB_GROUP[1|2] (10 byte).
21543  *		bufaddr - buffer for page data retrieved from the target.
21544  *		buflen - size of page to be retrieved.
21545  *		save_page - boolean to determin if SP bit should be set.
21546  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21547  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21548  *			to use the USCSI "direct" chain and bypass the normal
21549  *			command waitq.
21550  *
21551  * Return Code: 0   - Success
21552  *		errno return code from sd_ssc_send()
21553  *
21554  *     Context: Can sleep. Does not return until command is completed.
21555  */
21556 
21557 static int
21558 sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
21559 	size_t buflen,  uchar_t save_page, int path_flag)
21560 {
21561 	struct	scsi_extended_sense	sense_buf;
21562 	union scsi_cdb		cdb;
21563 	struct uscsi_cmd	ucmd_buf;
21564 	int			status;
21565 	struct sd_lun		*un;
21566 
21567 	ASSERT(ssc != NULL);
21568 	un = ssc->ssc_un;
21569 	ASSERT(un != NULL);
21570 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21571 	ASSERT(bufaddr != NULL);
21572 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
21573 	    (cdbsize == CDB_GROUP2));
21574 
21575 	SD_TRACE(SD_LOG_IO, un,
21576 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
21577 
21578 	bzero(&cdb, sizeof (cdb));
21579 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21580 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21581 
21582 	/* Set the PF bit for many third party drives */
21583 	cdb.cdb_opaque[1] = 0x10;
21584 
21585 	/* Set the savepage(SP) bit if given */
21586 	if (save_page == SD_SAVE_PAGE) {
21587 		cdb.cdb_opaque[1] |= 0x01;
21588 	}
21589 
21590 	if (cdbsize == CDB_GROUP0) {
21591 		cdb.scc_cmd = SCMD_MODE_SELECT;
21592 		FORMG0COUNT(&cdb, buflen);
21593 	} else {
21594 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
21595 		FORMG1COUNT(&cdb, buflen);
21596 	}
21597 
21598 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21599 
21600 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21601 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21602 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21603 	ucmd_buf.uscsi_buflen	= buflen;
21604 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21605 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21606 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
21607 	ucmd_buf.uscsi_timeout	= 60;
21608 
21609 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21610 	    UIO_SYSSPACE, path_flag);
21611 
21612 	switch (status) {
21613 	case 0:
21614 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21615 		break;	/* Success! */
21616 	case EIO:
21617 		switch (ucmd_buf.uscsi_status) {
21618 		case STATUS_RESERVATION_CONFLICT:
21619 			status = EACCES;
21620 			break;
21621 		default:
21622 			break;
21623 		}
21624 		break;
21625 	default:
21626 		break;
21627 	}
21628 
21629 	if (status == 0) {
21630 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
21631 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21632 	}
21633 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
21634 
21635 	return (status);
21636 }
21637 
21638 
21639 /*
21640  *    Function: sd_send_scsi_RDWR
21641  *
21642  * Description: Issue a scsi READ or WRITE command with the given parameters.
21643  *
21644  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21645  *                      structure for this target.
21646  *		cmd:	 SCMD_READ or SCMD_WRITE
21647  *		bufaddr: Address of caller's buffer to receive the RDWR data
21648  *		buflen:  Length of caller's buffer receive the RDWR data.
21649  *		start_block: Block number for the start of the RDWR operation.
21650  *			 (Assumes target-native block size.)
21651  *		residp:  Pointer to variable to receive the redisual of the
21652  *			 RDWR operation (may be NULL of no residual requested).
21653  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21654  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21655  *			to use the USCSI "direct" chain and bypass the normal
21656  *			command waitq.
21657  *
21658  * Return Code: 0   - Success
21659  *		errno return code from sd_ssc_send()
21660  *
21661  *     Context: Can sleep. Does not return until command is completed.
21662  */
21663 
21664 static int
21665 sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
21666 	size_t buflen, daddr_t start_block, int path_flag)
21667 {
21668 	struct	scsi_extended_sense	sense_buf;
21669 	union scsi_cdb		cdb;
21670 	struct uscsi_cmd	ucmd_buf;
21671 	uint32_t		block_count;
21672 	int			status;
21673 	int			cdbsize;
21674 	uchar_t			flag;
21675 	struct sd_lun		*un;
21676 
21677 	ASSERT(ssc != NULL);
21678 	un = ssc->ssc_un;
21679 	ASSERT(un != NULL);
21680 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21681 	ASSERT(bufaddr != NULL);
21682 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
21683 
21684 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
21685 
21686 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
21687 		return (EINVAL);
21688 	}
21689 
21690 	mutex_enter(SD_MUTEX(un));
21691 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
21692 	mutex_exit(SD_MUTEX(un));
21693 
21694 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
21695 
21696 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
21697 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
21698 	    bufaddr, buflen, start_block, block_count);
21699 
21700 	bzero(&cdb, sizeof (cdb));
21701 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21702 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21703 
21704 	/* Compute CDB size to use */
21705 	if (start_block > 0xffffffff)
21706 		cdbsize = CDB_GROUP4;
21707 	else if ((start_block & 0xFFE00000) ||
21708 	    (un->un_f_cfg_is_atapi == TRUE))
21709 		cdbsize = CDB_GROUP1;
21710 	else
21711 		cdbsize = CDB_GROUP0;
21712 
21713 	switch (cdbsize) {
21714 	case CDB_GROUP0:	/* 6-byte CDBs */
21715 		cdb.scc_cmd = cmd;
21716 		FORMG0ADDR(&cdb, start_block);
21717 		FORMG0COUNT(&cdb, block_count);
21718 		break;
21719 	case CDB_GROUP1:	/* 10-byte CDBs */
21720 		cdb.scc_cmd = cmd | SCMD_GROUP1;
21721 		FORMG1ADDR(&cdb, start_block);
21722 		FORMG1COUNT(&cdb, block_count);
21723 		break;
21724 	case CDB_GROUP4:	/* 16-byte CDBs */
21725 		cdb.scc_cmd = cmd | SCMD_GROUP4;
21726 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
21727 		FORMG4COUNT(&cdb, block_count);
21728 		break;
21729 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
21730 	default:
21731 		/* All others reserved */
21732 		return (EINVAL);
21733 	}
21734 
21735 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
21736 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21737 
21738 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21739 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21740 	ucmd_buf.uscsi_bufaddr	= bufaddr;
21741 	ucmd_buf.uscsi_buflen	= buflen;
21742 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21743 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21744 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
21745 	ucmd_buf.uscsi_timeout	= 60;
21746 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21747 	    UIO_SYSSPACE, path_flag);
21748 
21749 	switch (status) {
21750 	case 0:
21751 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21752 		break;	/* Success! */
21753 	case EIO:
21754 		switch (ucmd_buf.uscsi_status) {
21755 		case STATUS_RESERVATION_CONFLICT:
21756 			status = EACCES;
21757 			break;
21758 		default:
21759 			break;
21760 		}
21761 		break;
21762 	default:
21763 		break;
21764 	}
21765 
21766 	if (status == 0) {
21767 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
21768 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21769 	}
21770 
21771 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
21772 
21773 	return (status);
21774 }
21775 
21776 
21777 /*
21778  *    Function: sd_send_scsi_LOG_SENSE
21779  *
21780  * Description: Issue a scsi LOG_SENSE command with the given parameters.
21781  *
21782  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21783  *                      structure for this target.
21784  *
21785  * Return Code: 0   - Success
21786  *		errno return code from sd_ssc_send()
21787  *
21788  *     Context: Can sleep. Does not return until command is completed.
21789  */
21790 
21791 static int
21792 sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr, uint16_t buflen,
21793 	uchar_t page_code, uchar_t page_control, uint16_t param_ptr,
21794 	int path_flag)
21795 
21796 {
21797 	struct scsi_extended_sense	sense_buf;
21798 	union scsi_cdb		cdb;
21799 	struct uscsi_cmd	ucmd_buf;
21800 	int			status;
21801 	struct sd_lun		*un;
21802 
21803 	ASSERT(ssc != NULL);
21804 	un = ssc->ssc_un;
21805 	ASSERT(un != NULL);
21806 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21807 
21808 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
21809 
21810 	bzero(&cdb, sizeof (cdb));
21811 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21812 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21813 
21814 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
21815 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
21816 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
21817 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
21818 	FORMG1COUNT(&cdb, buflen);
21819 
21820 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21821 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21822 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21823 	ucmd_buf.uscsi_buflen	= buflen;
21824 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21825 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21826 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21827 	ucmd_buf.uscsi_timeout	= 60;
21828 
21829 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21830 	    UIO_SYSSPACE, path_flag);
21831 
21832 	switch (status) {
21833 	case 0:
21834 		break;
21835 	case EIO:
21836 		switch (ucmd_buf.uscsi_status) {
21837 		case STATUS_RESERVATION_CONFLICT:
21838 			status = EACCES;
21839 			break;
21840 		case STATUS_CHECK:
21841 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
21842 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
21843 				KEY_ILLEGAL_REQUEST) &&
21844 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
21845 				/*
21846 				 * ASC 0x24: INVALID FIELD IN CDB
21847 				 */
21848 				switch (page_code) {
21849 				case START_STOP_CYCLE_PAGE:
21850 					/*
21851 					 * The start stop cycle counter is
21852 					 * implemented as page 0x31 in earlier
21853 					 * generation disks. In new generation
21854 					 * disks the start stop cycle counter is
21855 					 * implemented as page 0xE. To properly
21856 					 * handle this case if an attempt for
21857 					 * log page 0xE is made and fails we
21858 					 * will try again using page 0x31.
21859 					 *
21860 					 * Network storage BU committed to
21861 					 * maintain the page 0x31 for this
21862 					 * purpose and will not have any other
21863 					 * page implemented with page code 0x31
21864 					 * until all disks transition to the
21865 					 * standard page.
21866 					 */
21867 					mutex_enter(SD_MUTEX(un));
21868 					un->un_start_stop_cycle_page =
21869 					    START_STOP_CYCLE_VU_PAGE;
21870 					cdb.cdb_opaque[2] =
21871 					    (char)(page_control << 6) |
21872 					    un->un_start_stop_cycle_page;
21873 					mutex_exit(SD_MUTEX(un));
21874 					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
21875 					status = sd_ssc_send(
21876 					    ssc, &ucmd_buf, FKIOCTL,
21877 					    UIO_SYSSPACE, path_flag);
21878 
21879 					break;
21880 				case TEMPERATURE_PAGE:
21881 					status = ENOTTY;
21882 					break;
21883 				default:
21884 					break;
21885 				}
21886 			}
21887 			break;
21888 		default:
21889 			break;
21890 		}
21891 		break;
21892 	default:
21893 		break;
21894 	}
21895 
21896 	if (status == 0) {
21897 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21898 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
21899 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
21900 	}
21901 
21902 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
21903 
21904 	return (status);
21905 }
21906 
21907 
21908 /*
21909  *    Function: sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION
21910  *
21911  * Description: Issue the scsi GET EVENT STATUS NOTIFICATION command.
21912  *
21913  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21914  *                      structure for this target.
21915  *		bufaddr
21916  *		buflen
21917  *		class_req
21918  *
21919  * Return Code: 0   - Success
21920  *		errno return code from sd_ssc_send()
21921  *
21922  *     Context: Can sleep. Does not return until command is completed.
21923  */
21924 
21925 static int
21926 sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(sd_ssc_t *ssc, uchar_t *bufaddr,
21927 	size_t buflen, uchar_t class_req)
21928 {
21929 	union scsi_cdb		cdb;
21930 	struct uscsi_cmd	ucmd_buf;
21931 	int			status;
21932 	struct sd_lun		*un;
21933 
21934 	ASSERT(ssc != NULL);
21935 	un = ssc->ssc_un;
21936 	ASSERT(un != NULL);
21937 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21938 	ASSERT(bufaddr != NULL);
21939 
21940 	SD_TRACE(SD_LOG_IO, un,
21941 	    "sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION: entry: un:0x%p\n", un);
21942 
21943 	bzero(&cdb, sizeof (cdb));
21944 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21945 	bzero(bufaddr, buflen);
21946 
21947 	cdb.scc_cmd = SCMD_GET_EVENT_STATUS_NOTIFICATION;
21948 	cdb.cdb_opaque[1] = 1; /* polled */
21949 	cdb.cdb_opaque[4] = class_req;
21950 	FORMG1COUNT(&cdb, buflen);
21951 
21952 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21953 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21954 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21955 	ucmd_buf.uscsi_buflen	= buflen;
21956 	ucmd_buf.uscsi_rqbuf	= NULL;
21957 	ucmd_buf.uscsi_rqlen	= 0;
21958 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
21959 	ucmd_buf.uscsi_timeout	= 60;
21960 
21961 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21962 	    UIO_SYSSPACE, SD_PATH_DIRECT);
21963 
21964 	/*
21965 	 * Only handle status == 0, the upper-level caller
21966 	 * will put different assessment based on the context.
21967 	 */
21968 	if (status == 0) {
21969 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21970 
21971 		if (ucmd_buf.uscsi_resid != 0) {
21972 			status = EIO;
21973 		}
21974 	}
21975 
21976 	SD_TRACE(SD_LOG_IO, un,
21977 	    "sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION: exit\n");
21978 
21979 	return (status);
21980 }
21981 
21982 
21983 static boolean_t
21984 sd_gesn_media_data_valid(uchar_t *data)
21985 {
21986 	uint16_t			len;
21987 
21988 	len = (data[1] << 8) | data[0];
21989 	return ((len >= 6) &&
21990 	    ((data[2] & SD_GESN_HEADER_NEA) == 0) &&
21991 	    ((data[2] & SD_GESN_HEADER_CLASS) == SD_GESN_MEDIA_CLASS) &&
21992 	    ((data[3] & (1 << SD_GESN_MEDIA_CLASS)) != 0));
21993 }
21994 
21995 
21996 /*
21997  *    Function: sdioctl
21998  *
21999  * Description: Driver's ioctl(9e) entry point function.
22000  *
22001  *   Arguments: dev     - device number
22002  *		cmd     - ioctl operation to be performed
22003  *		arg     - user argument, contains data to be set or reference
22004  *			  parameter for get
22005  *		flag    - bit flag, indicating open settings, 32/64 bit type
22006  *		cred_p  - user credential pointer
22007  *		rval_p  - calling process return value (OPT)
22008  *
22009  * Return Code: EINVAL
22010  *		ENOTTY
22011  *		ENXIO
22012  *		EIO
22013  *		EFAULT
22014  *		ENOTSUP
22015  *		EPERM
22016  *
22017  *     Context: Called from the device switch at normal priority.
22018  */
22019 
22020 static int
22021 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
22022 {
22023 	struct sd_lun	*un = NULL;
22024 	int		err = 0;
22025 	int		i = 0;
22026 	cred_t		*cr;
22027 	int		tmprval = EINVAL;
22028 	boolean_t	is_valid;
22029 	sd_ssc_t	*ssc;
22030 
22031 	/*
22032 	 * All device accesses go thru sdstrategy where we check on suspend
22033 	 * status
22034 	 */
22035 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22036 		return (ENXIO);
22037 	}
22038 
22039 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22040 
22041 	/* Initialize sd_ssc_t for internal uscsi commands */
22042 	ssc = sd_ssc_init(un);
22043 
22044 	is_valid = SD_IS_VALID_LABEL(un);
22045 
22046 	/*
22047 	 * Moved this wait from sd_uscsi_strategy to here for
22048 	 * reasons of deadlock prevention. Internal driver commands,
22049 	 * specifically those to change a devices power level, result
22050 	 * in a call to sd_uscsi_strategy.
22051 	 */
22052 	mutex_enter(SD_MUTEX(un));
22053 	while ((un->un_state == SD_STATE_SUSPENDED) ||
22054 	    (un->un_state == SD_STATE_PM_CHANGING)) {
22055 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
22056 	}
22057 	/*
22058 	 * Twiddling the counter here protects commands from now
22059 	 * through to the top of sd_uscsi_strategy. Without the
22060 	 * counter inc. a power down, for example, could get in
22061 	 * after the above check for state is made and before
22062 	 * execution gets to the top of sd_uscsi_strategy.
22063 	 * That would cause problems.
22064 	 */
22065 	un->un_ncmds_in_driver++;
22066 
22067 	if (!is_valid &&
22068 	    (flag & (FNDELAY | FNONBLOCK))) {
22069 		switch (cmd) {
22070 		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
22071 		case DKIOCGVTOC:
22072 		case DKIOCGEXTVTOC:
22073 		case DKIOCGAPART:
22074 		case DKIOCPARTINFO:
22075 		case DKIOCEXTPARTINFO:
22076 		case DKIOCSGEOM:
22077 		case DKIOCSAPART:
22078 		case DKIOCGETEFI:
22079 		case DKIOCPARTITION:
22080 		case DKIOCSVTOC:
22081 		case DKIOCSEXTVTOC:
22082 		case DKIOCSETEFI:
22083 		case DKIOCGMBOOT:
22084 		case DKIOCSMBOOT:
22085 		case DKIOCG_PHYGEOM:
22086 		case DKIOCG_VIRTGEOM:
22087 #if defined(__i386) || defined(__amd64)
22088 		case DKIOCSETEXTPART:
22089 #endif
22090 			/* let cmlb handle it */
22091 			goto skip_ready_valid;
22092 
22093 		case CDROMPAUSE:
22094 		case CDROMRESUME:
22095 		case CDROMPLAYMSF:
22096 		case CDROMPLAYTRKIND:
22097 		case CDROMREADTOCHDR:
22098 		case CDROMREADTOCENTRY:
22099 		case CDROMSTOP:
22100 		case CDROMSTART:
22101 		case CDROMVOLCTRL:
22102 		case CDROMSUBCHNL:
22103 		case CDROMREADMODE2:
22104 		case CDROMREADMODE1:
22105 		case CDROMREADOFFSET:
22106 		case CDROMSBLKMODE:
22107 		case CDROMGBLKMODE:
22108 		case CDROMGDRVSPEED:
22109 		case CDROMSDRVSPEED:
22110 		case CDROMCDDA:
22111 		case CDROMCDXA:
22112 		case CDROMSUBCODE:
22113 			if (!ISCD(un)) {
22114 				un->un_ncmds_in_driver--;
22115 				ASSERT(un->un_ncmds_in_driver >= 0);
22116 				mutex_exit(SD_MUTEX(un));
22117 				err = ENOTTY;
22118 				goto done_without_assess;
22119 			}
22120 			break;
22121 		case FDEJECT:
22122 		case DKIOCEJECT:
22123 		case CDROMEJECT:
22124 			if (!un->un_f_eject_media_supported) {
22125 				un->un_ncmds_in_driver--;
22126 				ASSERT(un->un_ncmds_in_driver >= 0);
22127 				mutex_exit(SD_MUTEX(un));
22128 				err = ENOTTY;
22129 				goto done_without_assess;
22130 			}
22131 			break;
22132 		case DKIOCFLUSHWRITECACHE:
22133 			mutex_exit(SD_MUTEX(un));
22134 			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
22135 			if (err != 0) {
22136 				mutex_enter(SD_MUTEX(un));
22137 				un->un_ncmds_in_driver--;
22138 				ASSERT(un->un_ncmds_in_driver >= 0);
22139 				mutex_exit(SD_MUTEX(un));
22140 				err = EIO;
22141 				goto done_quick_assess;
22142 			}
22143 			mutex_enter(SD_MUTEX(un));
22144 			/* FALLTHROUGH */
22145 		case DKIOCREMOVABLE:
22146 		case DKIOCHOTPLUGGABLE:
22147 		case DKIOCINFO:
22148 		case DKIOCGMEDIAINFO:
22149 		case DKIOCGMEDIAINFOEXT:
22150 		case MHIOCENFAILFAST:
22151 		case MHIOCSTATUS:
22152 		case MHIOCTKOWN:
22153 		case MHIOCRELEASE:
22154 		case MHIOCGRP_INKEYS:
22155 		case MHIOCGRP_INRESV:
22156 		case MHIOCGRP_REGISTER:
22157 		case MHIOCGRP_RESERVE:
22158 		case MHIOCGRP_PREEMPTANDABORT:
22159 		case MHIOCGRP_REGISTERANDIGNOREKEY:
22160 		case CDROMCLOSETRAY:
22161 		case USCSICMD:
22162 			goto skip_ready_valid;
22163 		default:
22164 			break;
22165 		}
22166 
22167 		mutex_exit(SD_MUTEX(un));
22168 		err = sd_ready_and_valid(ssc, SDPART(dev));
22169 		mutex_enter(SD_MUTEX(un));
22170 
22171 		if (err != SD_READY_VALID) {
22172 			switch (cmd) {
22173 			case DKIOCSTATE:
22174 			case CDROMGDRVSPEED:
22175 			case CDROMSDRVSPEED:
22176 			case FDEJECT:	/* for eject command */
22177 			case DKIOCEJECT:
22178 			case CDROMEJECT:
22179 			case DKIOCREMOVABLE:
22180 			case DKIOCHOTPLUGGABLE:
22181 				break;
22182 			default:
22183 				if (un->un_f_has_removable_media) {
22184 					err = ENXIO;
22185 				} else {
22186 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
22187 					if (err == SD_RESERVED_BY_OTHERS) {
22188 						err = EACCES;
22189 					} else {
22190 						err = EIO;
22191 					}
22192 				}
22193 				un->un_ncmds_in_driver--;
22194 				ASSERT(un->un_ncmds_in_driver >= 0);
22195 				mutex_exit(SD_MUTEX(un));
22196 
22197 				goto done_without_assess;
22198 			}
22199 		}
22200 	}
22201 
22202 skip_ready_valid:
22203 	mutex_exit(SD_MUTEX(un));
22204 
22205 	switch (cmd) {
22206 	case DKIOCINFO:
22207 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
22208 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
22209 		break;
22210 
22211 	case DKIOCGMEDIAINFO:
22212 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
22213 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
22214 		break;
22215 
22216 	case DKIOCGMEDIAINFOEXT:
22217 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFOEXT\n");
22218 		err = sd_get_media_info_ext(dev, (caddr_t)arg, flag);
22219 		break;
22220 
22221 	case DKIOCGGEOM:
22222 	case DKIOCGVTOC:
22223 	case DKIOCGEXTVTOC:
22224 	case DKIOCGAPART:
22225 	case DKIOCPARTINFO:
22226 	case DKIOCEXTPARTINFO:
22227 	case DKIOCSGEOM:
22228 	case DKIOCSAPART:
22229 	case DKIOCGETEFI:
22230 	case DKIOCPARTITION:
22231 	case DKIOCSVTOC:
22232 	case DKIOCSEXTVTOC:
22233 	case DKIOCSETEFI:
22234 	case DKIOCGMBOOT:
22235 	case DKIOCSMBOOT:
22236 	case DKIOCG_PHYGEOM:
22237 	case DKIOCG_VIRTGEOM:
22238 #if defined(__i386) || defined(__amd64)
22239 	case DKIOCSETEXTPART:
22240 #endif
22241 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
22242 
22243 		/* TUR should spin up */
22244 
22245 		if (un->un_f_has_removable_media)
22246 			err = sd_send_scsi_TEST_UNIT_READY(ssc,
22247 			    SD_CHECK_FOR_MEDIA);
22248 
22249 		else
22250 			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
22251 
22252 		if (err != 0)
22253 			goto done_with_assess;
22254 
22255 		err = cmlb_ioctl(un->un_cmlbhandle, dev,
22256 		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
22257 
22258 		if ((err == 0) &&
22259 		    ((cmd == DKIOCSETEFI) ||
22260 		    (un->un_f_pkstats_enabled) &&
22261 		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC ||
22262 		    cmd == DKIOCSEXTVTOC))) {
22263 
22264 			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
22265 			    (void *)SD_PATH_DIRECT);
22266 			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
22267 				sd_set_pstats(un);
22268 				SD_TRACE(SD_LOG_IO_PARTITION, un,
22269 				    "sd_ioctl: un:0x%p pstats created and "
22270 				    "set\n", un);
22271 			}
22272 		}
22273 
22274 		if ((cmd == DKIOCSVTOC || cmd == DKIOCSEXTVTOC) ||
22275 		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
22276 
22277 			mutex_enter(SD_MUTEX(un));
22278 			if (un->un_f_devid_supported &&
22279 			    (un->un_f_opt_fab_devid == TRUE)) {
22280 				if (un->un_devid == NULL) {
22281 					sd_register_devid(ssc, SD_DEVINFO(un),
22282 					    SD_TARGET_IS_UNRESERVED);
22283 				} else {
22284 					/*
22285 					 * The device id for this disk
22286 					 * has been fabricated. The
22287 					 * device id must be preserved
22288 					 * by writing it back out to
22289 					 * disk.
22290 					 */
22291 					if (sd_write_deviceid(ssc) != 0) {
22292 						ddi_devid_free(un->un_devid);
22293 						un->un_devid = NULL;
22294 					}
22295 				}
22296 			}
22297 			mutex_exit(SD_MUTEX(un));
22298 		}
22299 
22300 		break;
22301 
22302 	case DKIOCLOCK:
22303 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
22304 		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
22305 		    SD_PATH_STANDARD);
22306 		goto done_with_assess;
22307 
22308 	case DKIOCUNLOCK:
22309 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
22310 		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
22311 		    SD_PATH_STANDARD);
22312 		goto done_with_assess;
22313 
22314 	case DKIOCSTATE: {
22315 		enum dkio_state		state;
22316 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
22317 
22318 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
22319 			err = EFAULT;
22320 		} else {
22321 			err = sd_check_media(dev, state);
22322 			if (err == 0) {
22323 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
22324 				    sizeof (int), flag) != 0)
22325 					err = EFAULT;
22326 			}
22327 		}
22328 		break;
22329 	}
22330 
22331 	case DKIOCREMOVABLE:
22332 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
22333 		i = un->un_f_has_removable_media ? 1 : 0;
22334 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22335 			err = EFAULT;
22336 		} else {
22337 			err = 0;
22338 		}
22339 		break;
22340 
22341 	case DKIOCHOTPLUGGABLE:
22342 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
22343 		i = un->un_f_is_hotpluggable ? 1 : 0;
22344 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22345 			err = EFAULT;
22346 		} else {
22347 			err = 0;
22348 		}
22349 		break;
22350 
22351 	case DKIOCREADONLY:
22352 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREADONLY\n");
22353 		i = 0;
22354 		if ((ISCD(un) && !un->un_f_mmc_writable_media) ||
22355 		    (sr_check_wp(dev) != 0)) {
22356 			i = 1;
22357 		}
22358 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22359 			err = EFAULT;
22360 		} else {
22361 			err = 0;
22362 		}
22363 		break;
22364 
22365 	case DKIOCGTEMPERATURE:
22366 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
22367 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
22368 		break;
22369 
22370 	case MHIOCENFAILFAST:
22371 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
22372 		if ((err = drv_priv(cred_p)) == 0) {
22373 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
22374 		}
22375 		break;
22376 
22377 	case MHIOCTKOWN:
22378 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
22379 		if ((err = drv_priv(cred_p)) == 0) {
22380 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
22381 		}
22382 		break;
22383 
22384 	case MHIOCRELEASE:
22385 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
22386 		if ((err = drv_priv(cred_p)) == 0) {
22387 			err = sd_mhdioc_release(dev);
22388 		}
22389 		break;
22390 
22391 	case MHIOCSTATUS:
22392 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
22393 		if ((err = drv_priv(cred_p)) == 0) {
22394 			switch (sd_send_scsi_TEST_UNIT_READY(ssc, 0)) {
22395 			case 0:
22396 				err = 0;
22397 				break;
22398 			case EACCES:
22399 				*rval_p = 1;
22400 				err = 0;
22401 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22402 				break;
22403 			default:
22404 				err = EIO;
22405 				goto done_with_assess;
22406 			}
22407 		}
22408 		break;
22409 
22410 	case MHIOCQRESERVE:
22411 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
22412 		if ((err = drv_priv(cred_p)) == 0) {
22413 			err = sd_reserve_release(dev, SD_RESERVE);
22414 		}
22415 		break;
22416 
22417 	case MHIOCREREGISTERDEVID:
22418 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
22419 		if (drv_priv(cred_p) == EPERM) {
22420 			err = EPERM;
22421 		} else if (!un->un_f_devid_supported) {
22422 			err = ENOTTY;
22423 		} else {
22424 			err = sd_mhdioc_register_devid(dev);
22425 		}
22426 		break;
22427 
22428 	case MHIOCGRP_INKEYS:
22429 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
22430 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
22431 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22432 				err = ENOTSUP;
22433 			} else {
22434 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
22435 				    flag);
22436 			}
22437 		}
22438 		break;
22439 
22440 	case MHIOCGRP_INRESV:
22441 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
22442 		if (((err = drv_priv(cred_p)) != EPERM) && arg != NULL) {
22443 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22444 				err = ENOTSUP;
22445 			} else {
22446 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
22447 			}
22448 		}
22449 		break;
22450 
22451 	case MHIOCGRP_REGISTER:
22452 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
22453 		if ((err = drv_priv(cred_p)) != EPERM) {
22454 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22455 				err = ENOTSUP;
22456 			} else if (arg != NULL) {
22457 				mhioc_register_t reg;
22458 				if (ddi_copyin((void *)arg, &reg,
22459 				    sizeof (mhioc_register_t), flag) != 0) {
22460 					err = EFAULT;
22461 				} else {
22462 					err =
22463 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22464 					    ssc, SD_SCSI3_REGISTER,
22465 					    (uchar_t *)&reg);
22466 					if (err != 0)
22467 						goto done_with_assess;
22468 				}
22469 			}
22470 		}
22471 		break;
22472 
22473 	case MHIOCGRP_RESERVE:
22474 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
22475 		if ((err = drv_priv(cred_p)) != EPERM) {
22476 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22477 				err = ENOTSUP;
22478 			} else if (arg != NULL) {
22479 				mhioc_resv_desc_t resv_desc;
22480 				if (ddi_copyin((void *)arg, &resv_desc,
22481 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
22482 					err = EFAULT;
22483 				} else {
22484 					err =
22485 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22486 					    ssc, SD_SCSI3_RESERVE,
22487 					    (uchar_t *)&resv_desc);
22488 					if (err != 0)
22489 						goto done_with_assess;
22490 				}
22491 			}
22492 		}
22493 		break;
22494 
22495 	case MHIOCGRP_PREEMPTANDABORT:
22496 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
22497 		if ((err = drv_priv(cred_p)) != EPERM) {
22498 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22499 				err = ENOTSUP;
22500 			} else if (arg != NULL) {
22501 				mhioc_preemptandabort_t preempt_abort;
22502 				if (ddi_copyin((void *)arg, &preempt_abort,
22503 				    sizeof (mhioc_preemptandabort_t),
22504 				    flag) != 0) {
22505 					err = EFAULT;
22506 				} else {
22507 					err =
22508 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22509 					    ssc, SD_SCSI3_PREEMPTANDABORT,
22510 					    (uchar_t *)&preempt_abort);
22511 					if (err != 0)
22512 						goto done_with_assess;
22513 				}
22514 			}
22515 		}
22516 		break;
22517 
22518 	case MHIOCGRP_REGISTERANDIGNOREKEY:
22519 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTERANDIGNOREKEY\n");
22520 		if ((err = drv_priv(cred_p)) != EPERM) {
22521 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22522 				err = ENOTSUP;
22523 			} else if (arg != NULL) {
22524 				mhioc_registerandignorekey_t r_and_i;
22525 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
22526 				    sizeof (mhioc_registerandignorekey_t),
22527 				    flag) != 0) {
22528 					err = EFAULT;
22529 				} else {
22530 					err =
22531 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22532 					    ssc, SD_SCSI3_REGISTERANDIGNOREKEY,
22533 					    (uchar_t *)&r_and_i);
22534 					if (err != 0)
22535 						goto done_with_assess;
22536 				}
22537 			}
22538 		}
22539 		break;
22540 
22541 	case USCSICMD:
22542 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
22543 		cr = ddi_get_cred();
22544 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
22545 			err = EPERM;
22546 		} else {
22547 			enum uio_seg	uioseg;
22548 
22549 			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
22550 			    UIO_USERSPACE;
22551 			if (un->un_f_format_in_progress == TRUE) {
22552 				err = EAGAIN;
22553 				break;
22554 			}
22555 
22556 			err = sd_ssc_send(ssc,
22557 			    (struct uscsi_cmd *)arg,
22558 			    flag, uioseg, SD_PATH_STANDARD);
22559 			if (err != 0)
22560 				goto done_with_assess;
22561 			else
22562 				sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22563 		}
22564 		break;
22565 
22566 	case CDROMPAUSE:
22567 	case CDROMRESUME:
22568 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
22569 		if (!ISCD(un)) {
22570 			err = ENOTTY;
22571 		} else {
22572 			err = sr_pause_resume(dev, cmd);
22573 		}
22574 		break;
22575 
22576 	case CDROMPLAYMSF:
22577 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
22578 		if (!ISCD(un)) {
22579 			err = ENOTTY;
22580 		} else {
22581 			err = sr_play_msf(dev, (caddr_t)arg, flag);
22582 		}
22583 		break;
22584 
22585 	case CDROMPLAYTRKIND:
22586 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
22587 #if defined(__i386) || defined(__amd64)
22588 		/*
22589 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
22590 		 */
22591 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
22592 #else
22593 		if (!ISCD(un)) {
22594 #endif
22595 			err = ENOTTY;
22596 		} else {
22597 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
22598 		}
22599 		break;
22600 
22601 	case CDROMREADTOCHDR:
22602 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
22603 		if (!ISCD(un)) {
22604 			err = ENOTTY;
22605 		} else {
22606 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
22607 		}
22608 		break;
22609 
22610 	case CDROMREADTOCENTRY:
22611 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
22612 		if (!ISCD(un)) {
22613 			err = ENOTTY;
22614 		} else {
22615 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
22616 		}
22617 		break;
22618 
22619 	case CDROMSTOP:
22620 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
22621 		if (!ISCD(un)) {
22622 			err = ENOTTY;
22623 		} else {
22624 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
22625 			    SD_TARGET_STOP, SD_PATH_STANDARD);
22626 			goto done_with_assess;
22627 		}
22628 		break;
22629 
22630 	case CDROMSTART:
22631 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
22632 		if (!ISCD(un)) {
22633 			err = ENOTTY;
22634 		} else {
22635 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
22636 			    SD_TARGET_START, SD_PATH_STANDARD);
22637 			goto done_with_assess;
22638 		}
22639 		break;
22640 
22641 	case CDROMCLOSETRAY:
22642 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
22643 		if (!ISCD(un)) {
22644 			err = ENOTTY;
22645 		} else {
22646 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
22647 			    SD_TARGET_CLOSE, SD_PATH_STANDARD);
22648 			goto done_with_assess;
22649 		}
22650 		break;
22651 
22652 	case FDEJECT:	/* for eject command */
22653 	case DKIOCEJECT:
22654 	case CDROMEJECT:
22655 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
22656 		if (!un->un_f_eject_media_supported) {
22657 			err = ENOTTY;
22658 		} else {
22659 			err = sr_eject(dev);
22660 		}
22661 		break;
22662 
22663 	case CDROMVOLCTRL:
22664 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
22665 		if (!ISCD(un)) {
22666 			err = ENOTTY;
22667 		} else {
22668 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
22669 		}
22670 		break;
22671 
22672 	case CDROMSUBCHNL:
22673 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
22674 		if (!ISCD(un)) {
22675 			err = ENOTTY;
22676 		} else {
22677 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
22678 		}
22679 		break;
22680 
22681 	case CDROMREADMODE2:
22682 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
22683 		if (!ISCD(un)) {
22684 			err = ENOTTY;
22685 		} else if (un->un_f_cfg_is_atapi == TRUE) {
22686 			/*
22687 			 * If the drive supports READ CD, use that instead of
22688 			 * switching the LBA size via a MODE SELECT
22689 			 * Block Descriptor
22690 			 */
22691 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
22692 		} else {
22693 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
22694 		}
22695 		break;
22696 
22697 	case CDROMREADMODE1:
22698 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
22699 		if (!ISCD(un)) {
22700 			err = ENOTTY;
22701 		} else {
22702 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
22703 		}
22704 		break;
22705 
22706 	case CDROMREADOFFSET:
22707 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
22708 		if (!ISCD(un)) {
22709 			err = ENOTTY;
22710 		} else {
22711 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
22712 			    flag);
22713 		}
22714 		break;
22715 
22716 	case CDROMSBLKMODE:
22717 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
22718 		/*
22719 		 * There is no means of changing block size in case of atapi
22720 		 * drives, thus return ENOTTY if drive type is atapi
22721 		 */
22722 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
22723 			err = ENOTTY;
22724 		} else if (un->un_f_mmc_cap == TRUE) {
22725 
22726 			/*
22727 			 * MMC Devices do not support changing the
22728 			 * logical block size
22729 			 *
22730 			 * Note: EINVAL is being returned instead of ENOTTY to
22731 			 * maintain consistancy with the original mmc
22732 			 * driver update.
22733 			 */
22734 			err = EINVAL;
22735 		} else {
22736 			mutex_enter(SD_MUTEX(un));
22737 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
22738 			    (un->un_ncmds_in_transport > 0)) {
22739 				mutex_exit(SD_MUTEX(un));
22740 				err = EINVAL;
22741 			} else {
22742 				mutex_exit(SD_MUTEX(un));
22743 				err = sr_change_blkmode(dev, cmd, arg, flag);
22744 			}
22745 		}
22746 		break;
22747 
22748 	case CDROMGBLKMODE:
22749 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
22750 		if (!ISCD(un)) {
22751 			err = ENOTTY;
22752 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
22753 		    (un->un_f_blockcount_is_valid != FALSE)) {
22754 			/*
22755 			 * Drive is an ATAPI drive so return target block
22756 			 * size for ATAPI drives since we cannot change the
22757 			 * blocksize on ATAPI drives. Used primarily to detect
22758 			 * if an ATAPI cdrom is present.
22759 			 */
22760 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
22761 			    sizeof (int), flag) != 0) {
22762 				err = EFAULT;
22763 			} else {
22764 				err = 0;
22765 			}
22766 
22767 		} else {
22768 			/*
22769 			 * Drive supports changing block sizes via a Mode
22770 			 * Select.
22771 			 */
22772 			err = sr_change_blkmode(dev, cmd, arg, flag);
22773 		}
22774 		break;
22775 
22776 	case CDROMGDRVSPEED:
22777 	case CDROMSDRVSPEED:
22778 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
22779 		if (!ISCD(un)) {
22780 			err = ENOTTY;
22781 		} else if (un->un_f_mmc_cap == TRUE) {
22782 			/*
22783 			 * Note: In the future the driver implementation
22784 			 * for getting and
22785 			 * setting cd speed should entail:
22786 			 * 1) If non-mmc try the Toshiba mode page
22787 			 *    (sr_change_speed)
22788 			 * 2) If mmc but no support for Real Time Streaming try
22789 			 *    the SET CD SPEED (0xBB) command
22790 			 *   (sr_atapi_change_speed)
22791 			 * 3) If mmc and support for Real Time Streaming
22792 			 *    try the GET PERFORMANCE and SET STREAMING
22793 			 *    commands (not yet implemented, 4380808)
22794 			 */
22795 			/*
22796 			 * As per recent MMC spec, CD-ROM speed is variable
22797 			 * and changes with LBA. Since there is no such
22798 			 * things as drive speed now, fail this ioctl.
22799 			 *
22800 			 * Note: EINVAL is returned for consistancy of original
22801 			 * implementation which included support for getting
22802 			 * the drive speed of mmc devices but not setting
22803 			 * the drive speed. Thus EINVAL would be returned
22804 			 * if a set request was made for an mmc device.
22805 			 * We no longer support get or set speed for
22806 			 * mmc but need to remain consistent with regard
22807 			 * to the error code returned.
22808 			 */
22809 			err = EINVAL;
22810 		} else if (un->un_f_cfg_is_atapi == TRUE) {
22811 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
22812 		} else {
22813 			err = sr_change_speed(dev, cmd, arg, flag);
22814 		}
22815 		break;
22816 
22817 	case CDROMCDDA:
22818 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
22819 		if (!ISCD(un)) {
22820 			err = ENOTTY;
22821 		} else {
22822 			err = sr_read_cdda(dev, (void *)arg, flag);
22823 		}
22824 		break;
22825 
22826 	case CDROMCDXA:
22827 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
22828 		if (!ISCD(un)) {
22829 			err = ENOTTY;
22830 		} else {
22831 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
22832 		}
22833 		break;
22834 
22835 	case CDROMSUBCODE:
22836 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
22837 		if (!ISCD(un)) {
22838 			err = ENOTTY;
22839 		} else {
22840 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
22841 		}
22842 		break;
22843 
22844 
22845 #ifdef SDDEBUG
22846 /* RESET/ABORTS testing ioctls */
22847 	case DKIOCRESET: {
22848 		int	reset_level;
22849 
22850 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
22851 			err = EFAULT;
22852 		} else {
22853 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
22854 			    "reset_level = 0x%lx\n", reset_level);
22855 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
22856 				err = 0;
22857 			} else {
22858 				err = EIO;
22859 			}
22860 		}
22861 		break;
22862 	}
22863 
22864 	case DKIOCABORT:
22865 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
22866 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
22867 			err = 0;
22868 		} else {
22869 			err = EIO;
22870 		}
22871 		break;
22872 #endif
22873 
22874 #ifdef SD_FAULT_INJECTION
22875 /* SDIOC FaultInjection testing ioctls */
22876 	case SDIOCSTART:
22877 	case SDIOCSTOP:
22878 	case SDIOCINSERTPKT:
22879 	case SDIOCINSERTXB:
22880 	case SDIOCINSERTUN:
22881 	case SDIOCINSERTARQ:
22882 	case SDIOCPUSH:
22883 	case SDIOCRETRIEVE:
22884 	case SDIOCRUN:
22885 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
22886 		    "SDIOC detected cmd:0x%X:\n", cmd);
22887 		/* call error generator */
22888 		sd_faultinjection_ioctl(cmd, arg, un);
22889 		err = 0;
22890 		break;
22891 
22892 #endif /* SD_FAULT_INJECTION */
22893 
22894 	case DKIOCFLUSHWRITECACHE:
22895 		{
22896 			struct dk_callback *dkc = (struct dk_callback *)arg;
22897 
22898 			mutex_enter(SD_MUTEX(un));
22899 			if (!un->un_f_sync_cache_supported ||
22900 			    !un->un_f_write_cache_enabled) {
22901 				err = un->un_f_sync_cache_supported ?
22902 				    0 : ENOTSUP;
22903 				mutex_exit(SD_MUTEX(un));
22904 				if ((flag & FKIOCTL) && dkc != NULL &&
22905 				    dkc->dkc_callback != NULL) {
22906 					(*dkc->dkc_callback)(dkc->dkc_cookie,
22907 					    err);
22908 					/*
22909 					 * Did callback and reported error.
22910 					 * Since we did a callback, ioctl
22911 					 * should return 0.
22912 					 */
22913 					err = 0;
22914 				}
22915 				break;
22916 			}
22917 			mutex_exit(SD_MUTEX(un));
22918 
22919 			if ((flag & FKIOCTL) && dkc != NULL &&
22920 			    dkc->dkc_callback != NULL) {
22921 				/* async SYNC CACHE request */
22922 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
22923 			} else {
22924 				/* synchronous SYNC CACHE request */
22925 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
22926 			}
22927 		}
22928 		break;
22929 
22930 	case DKIOCGETWCE: {
22931 
22932 		int wce;
22933 
22934 		if ((err = sd_get_write_cache_enabled(ssc, &wce)) != 0) {
22935 			break;
22936 		}
22937 
22938 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
22939 			err = EFAULT;
22940 		}
22941 		break;
22942 	}
22943 
22944 	case DKIOCSETWCE: {
22945 
22946 		int wce, sync_supported;
22947 		int cur_wce = 0;
22948 
22949 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
22950 			err = EFAULT;
22951 			break;
22952 		}
22953 
22954 		/*
22955 		 * Synchronize multiple threads trying to enable
22956 		 * or disable the cache via the un_f_wcc_cv
22957 		 * condition variable.
22958 		 */
22959 		mutex_enter(SD_MUTEX(un));
22960 
22961 		/*
22962 		 * Don't allow the cache to be enabled if the
22963 		 * config file has it disabled.
22964 		 */
22965 		if (un->un_f_opt_disable_cache && wce) {
22966 			mutex_exit(SD_MUTEX(un));
22967 			err = EINVAL;
22968 			break;
22969 		}
22970 
22971 		/*
22972 		 * Wait for write cache change in progress
22973 		 * bit to be clear before proceeding.
22974 		 */
22975 		while (un->un_f_wcc_inprog)
22976 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
22977 
22978 		un->un_f_wcc_inprog = 1;
22979 
22980 		mutex_exit(SD_MUTEX(un));
22981 
22982 		/*
22983 		 * Get the current write cache state
22984 		 */
22985 		if ((err = sd_get_write_cache_enabled(ssc, &cur_wce)) != 0) {
22986 			mutex_enter(SD_MUTEX(un));
22987 			un->un_f_wcc_inprog = 0;
22988 			cv_broadcast(&un->un_wcc_cv);
22989 			mutex_exit(SD_MUTEX(un));
22990 			break;
22991 		}
22992 
22993 		mutex_enter(SD_MUTEX(un));
22994 		un->un_f_write_cache_enabled = (cur_wce != 0);
22995 
22996 		if (un->un_f_write_cache_enabled && wce == 0) {
22997 			/*
22998 			 * Disable the write cache.  Don't clear
22999 			 * un_f_write_cache_enabled until after
23000 			 * the mode select and flush are complete.
23001 			 */
23002 			sync_supported = un->un_f_sync_cache_supported;
23003 
23004 			/*
23005 			 * If cache flush is suppressed, we assume that the
23006 			 * controller firmware will take care of managing the
23007 			 * write cache for us: no need to explicitly
23008 			 * disable it.
23009 			 */
23010 			if (!un->un_f_suppress_cache_flush) {
23011 				mutex_exit(SD_MUTEX(un));
23012 				if ((err = sd_cache_control(ssc,
23013 				    SD_CACHE_NOCHANGE,
23014 				    SD_CACHE_DISABLE)) == 0 &&
23015 				    sync_supported) {
23016 					err = sd_send_scsi_SYNCHRONIZE_CACHE(un,
23017 					    NULL);
23018 				}
23019 			} else {
23020 				mutex_exit(SD_MUTEX(un));
23021 			}
23022 
23023 			mutex_enter(SD_MUTEX(un));
23024 			if (err == 0) {
23025 				un->un_f_write_cache_enabled = 0;
23026 			}
23027 
23028 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
23029 			/*
23030 			 * Set un_f_write_cache_enabled first, so there is
23031 			 * no window where the cache is enabled, but the
23032 			 * bit says it isn't.
23033 			 */
23034 			un->un_f_write_cache_enabled = 1;
23035 
23036 			/*
23037 			 * If cache flush is suppressed, we assume that the
23038 			 * controller firmware will take care of managing the
23039 			 * write cache for us: no need to explicitly
23040 			 * enable it.
23041 			 */
23042 			if (!un->un_f_suppress_cache_flush) {
23043 				mutex_exit(SD_MUTEX(un));
23044 				err = sd_cache_control(ssc, SD_CACHE_NOCHANGE,
23045 				    SD_CACHE_ENABLE);
23046 			} else {
23047 				mutex_exit(SD_MUTEX(un));
23048 			}
23049 
23050 			mutex_enter(SD_MUTEX(un));
23051 
23052 			if (err) {
23053 				un->un_f_write_cache_enabled = 0;
23054 			}
23055 		}
23056 
23057 		un->un_f_wcc_inprog = 0;
23058 		cv_broadcast(&un->un_wcc_cv);
23059 		mutex_exit(SD_MUTEX(un));
23060 		break;
23061 	}
23062 
23063 	default:
23064 		err = ENOTTY;
23065 		break;
23066 	}
23067 	mutex_enter(SD_MUTEX(un));
23068 	un->un_ncmds_in_driver--;
23069 	ASSERT(un->un_ncmds_in_driver >= 0);
23070 	mutex_exit(SD_MUTEX(un));
23071 
23072 
23073 done_without_assess:
23074 	sd_ssc_fini(ssc);
23075 
23076 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
23077 	return (err);
23078 
23079 done_with_assess:
23080 	mutex_enter(SD_MUTEX(un));
23081 	un->un_ncmds_in_driver--;
23082 	ASSERT(un->un_ncmds_in_driver >= 0);
23083 	mutex_exit(SD_MUTEX(un));
23084 
23085 done_quick_assess:
23086 	if (err != 0)
23087 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23088 	/* Uninitialize sd_ssc_t pointer */
23089 	sd_ssc_fini(ssc);
23090 
23091 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
23092 	return (err);
23093 }
23094 
23095 
23096 /*
23097  *    Function: sd_dkio_ctrl_info
23098  *
23099  * Description: This routine is the driver entry point for handling controller
23100  *		information ioctl requests (DKIOCINFO).
23101  *
23102  *   Arguments: dev  - the device number
23103  *		arg  - pointer to user provided dk_cinfo structure
23104  *		       specifying the controller type and attributes.
23105  *		flag - this argument is a pass through to ddi_copyxxx()
23106  *		       directly from the mode argument of ioctl().
23107  *
23108  * Return Code: 0
23109  *		EFAULT
23110  *		ENXIO
23111  */
23112 
23113 static int
23114 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
23115 {
23116 	struct sd_lun	*un = NULL;
23117 	struct dk_cinfo	*info;
23118 	dev_info_t	*pdip;
23119 	int		lun, tgt;
23120 
23121 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23122 		return (ENXIO);
23123 	}
23124 
23125 	info = (struct dk_cinfo *)
23126 	    kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
23127 
23128 	switch (un->un_ctype) {
23129 	case CTYPE_CDROM:
23130 		info->dki_ctype = DKC_CDROM;
23131 		break;
23132 	default:
23133 		info->dki_ctype = DKC_SCSI_CCS;
23134 		break;
23135 	}
23136 	pdip = ddi_get_parent(SD_DEVINFO(un));
23137 	info->dki_cnum = ddi_get_instance(pdip);
23138 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
23139 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
23140 	} else {
23141 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
23142 		    DK_DEVLEN - 1);
23143 	}
23144 
23145 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
23146 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
23147 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
23148 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
23149 
23150 	/* Unit Information */
23151 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
23152 	info->dki_slave = ((tgt << 3) | lun);
23153 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
23154 	    DK_DEVLEN - 1);
23155 	info->dki_flags = DKI_FMTVOL;
23156 	info->dki_partition = SDPART(dev);
23157 
23158 	/* Max Transfer size of this device in blocks */
23159 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
23160 	info->dki_addr = 0;
23161 	info->dki_space = 0;
23162 	info->dki_prio = 0;
23163 	info->dki_vec = 0;
23164 
23165 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
23166 		kmem_free(info, sizeof (struct dk_cinfo));
23167 		return (EFAULT);
23168 	} else {
23169 		kmem_free(info, sizeof (struct dk_cinfo));
23170 		return (0);
23171 	}
23172 }
23173 
23174 
23175 /*
23176  *    Function: sd_get_media_info
23177  *
23178  * Description: This routine is the driver entry point for handling ioctl
23179  *		requests for the media type or command set profile used by the
23180  *		drive to operate on the media (DKIOCGMEDIAINFO).
23181  *
23182  *   Arguments: dev	- the device number
23183  *		arg	- pointer to user provided dk_minfo structure
23184  *			  specifying the media type, logical block size and
23185  *			  drive capacity.
23186  *		flag	- this argument is a pass through to ddi_copyxxx()
23187  *			  directly from the mode argument of ioctl().
23188  *
23189  * Return Code: 0
23190  *		EACCESS
23191  *		EFAULT
23192  *		ENXIO
23193  *		EIO
23194  */
23195 
23196 static int
23197 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
23198 {
23199 	struct sd_lun		*un = NULL;
23200 	struct uscsi_cmd	com;
23201 	struct scsi_inquiry	*sinq;
23202 	struct dk_minfo		media_info;
23203 	u_longlong_t		media_capacity;
23204 	uint64_t		capacity;
23205 	uint_t			lbasize;
23206 	uchar_t			*out_data;
23207 	uchar_t			*rqbuf;
23208 	int			rval = 0;
23209 	int			rtn;
23210 	sd_ssc_t		*ssc;
23211 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
23212 	    (un->un_state == SD_STATE_OFFLINE)) {
23213 		return (ENXIO);
23214 	}
23215 
23216 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info: entry\n");
23217 
23218 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
23219 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
23220 
23221 	/* Issue a TUR to determine if the drive is ready with media present */
23222 	ssc = sd_ssc_init(un);
23223 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_CHECK_FOR_MEDIA);
23224 	if (rval == ENXIO) {
23225 		goto done;
23226 	} else if (rval != 0) {
23227 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23228 	}
23229 
23230 	/* Now get configuration data */
23231 	if (ISCD(un)) {
23232 		media_info.dki_media_type = DK_CDROM;
23233 
23234 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
23235 		if (un->un_f_mmc_cap == TRUE) {
23236 			rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf,
23237 			    SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
23238 			    SD_PATH_STANDARD);
23239 
23240 			if (rtn) {
23241 				/*
23242 				 * We ignore all failures for CD and need to
23243 				 * put the assessment before processing code
23244 				 * to avoid missing assessment for FMA.
23245 				 */
23246 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23247 				/*
23248 				 * Failed for other than an illegal request
23249 				 * or command not supported
23250 				 */
23251 				if ((com.uscsi_status == STATUS_CHECK) &&
23252 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
23253 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
23254 					    (rqbuf[12] != 0x20)) {
23255 						rval = EIO;
23256 						goto no_assessment;
23257 					}
23258 				}
23259 			} else {
23260 				/*
23261 				 * The GET CONFIGURATION command succeeded
23262 				 * so set the media type according to the
23263 				 * returned data
23264 				 */
23265 				media_info.dki_media_type = out_data[6];
23266 				media_info.dki_media_type <<= 8;
23267 				media_info.dki_media_type |= out_data[7];
23268 			}
23269 		}
23270 	} else {
23271 		/*
23272 		 * The profile list is not available, so we attempt to identify
23273 		 * the media type based on the inquiry data
23274 		 */
23275 		sinq = un->un_sd->sd_inq;
23276 		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
23277 		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
23278 			/* This is a direct access device  or optical disk */
23279 			media_info.dki_media_type = DK_FIXED_DISK;
23280 
23281 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
23282 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
23283 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
23284 					media_info.dki_media_type = DK_ZIP;
23285 				} else if (
23286 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
23287 					media_info.dki_media_type = DK_JAZ;
23288 				}
23289 			}
23290 		} else {
23291 			/*
23292 			 * Not a CD, direct access or optical disk so return
23293 			 * unknown media
23294 			 */
23295 			media_info.dki_media_type = DK_UNKNOWN;
23296 		}
23297 	}
23298 
23299 	/* Now read the capacity so we can provide the lbasize and capacity */
23300 	rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
23301 	    SD_PATH_DIRECT);
23302 	switch (rval) {
23303 	case 0:
23304 		break;
23305 	case EACCES:
23306 		rval = EACCES;
23307 		goto done;
23308 	default:
23309 		rval = EIO;
23310 		goto done;
23311 	}
23312 
23313 	/*
23314 	 * If lun is expanded dynamically, update the un structure.
23315 	 */
23316 	mutex_enter(SD_MUTEX(un));
23317 	if ((un->un_f_blockcount_is_valid == TRUE) &&
23318 	    (un->un_f_tgt_blocksize_is_valid == TRUE) &&
23319 	    (capacity > un->un_blockcount)) {
23320 		sd_update_block_info(un, lbasize, capacity);
23321 	}
23322 	mutex_exit(SD_MUTEX(un));
23323 
23324 	media_info.dki_lbsize = lbasize;
23325 	media_capacity = capacity;
23326 
23327 	/*
23328 	 * sd_send_scsi_READ_CAPACITY() reports capacity in
23329 	 * un->un_sys_blocksize chunks. So we need to convert it into
23330 	 * cap.lbasize chunks.
23331 	 */
23332 	media_capacity *= un->un_sys_blocksize;
23333 	media_capacity /= lbasize;
23334 	media_info.dki_capacity = media_capacity;
23335 
23336 	if (ddi_copyout(&media_info, arg, sizeof (struct dk_minfo), flag)) {
23337 		rval = EFAULT;
23338 		/* Put goto. Anybody might add some code below in future */
23339 		goto no_assessment;
23340 	}
23341 done:
23342 	if (rval != 0) {
23343 		if (rval == EIO)
23344 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23345 		else
23346 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23347 	}
23348 no_assessment:
23349 	sd_ssc_fini(ssc);
23350 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
23351 	kmem_free(rqbuf, SENSE_LENGTH);
23352 	return (rval);
23353 }
23354 
23355 /*
23356  *    Function: sd_get_media_info_ext
23357  *
23358  * Description: This routine is the driver entry point for handling ioctl
23359  *		requests for the media type or command set profile used by the
23360  *		drive to operate on the media (DKIOCGMEDIAINFOEXT). The
23361  *		difference this ioctl and DKIOCGMEDIAINFO is the return value
23362  *		of this ioctl contains both logical block size and physical
23363  *		block size.
23364  *
23365  *
23366  *   Arguments: dev	- the device number
23367  *		arg	- pointer to user provided dk_minfo_ext structure
23368  *			  specifying the media type, logical block size,
23369  *			  physical block size and disk capacity.
23370  *		flag	- this argument is a pass through to ddi_copyxxx()
23371  *			  directly from the mode argument of ioctl().
23372  *
23373  * Return Code: 0
23374  *		EACCESS
23375  *		EFAULT
23376  *		ENXIO
23377  *		EIO
23378  */
23379 
23380 static int
23381 sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag)
23382 {
23383 	struct sd_lun		*un = NULL;
23384 	struct uscsi_cmd	com;
23385 	struct scsi_inquiry	*sinq;
23386 	struct dk_minfo_ext	media_info_ext;
23387 	u_longlong_t		media_capacity;
23388 	uint64_t		capacity;
23389 	uint_t			lbasize;
23390 	uint_t			pbsize;
23391 	uchar_t			*out_data;
23392 	uchar_t			*rqbuf;
23393 	int			rval = 0;
23394 	int			rtn;
23395 	sd_ssc_t		*ssc;
23396 
23397 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
23398 	    (un->un_state == SD_STATE_OFFLINE)) {
23399 		return (ENXIO);
23400 	}
23401 
23402 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info_ext: entry\n");
23403 
23404 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
23405 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
23406 	ssc = sd_ssc_init(un);
23407 
23408 	/* Issue a TUR to determine if the drive is ready with media present */
23409 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_CHECK_FOR_MEDIA);
23410 	if (rval == ENXIO) {
23411 		goto done;
23412 	} else if (rval != 0) {
23413 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23414 	}
23415 
23416 	/* Now get configuration data */
23417 	if (ISCD(un)) {
23418 		media_info_ext.dki_media_type = DK_CDROM;
23419 
23420 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
23421 		if (un->un_f_mmc_cap == TRUE) {
23422 			rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf,
23423 			    SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
23424 			    SD_PATH_STANDARD);
23425 
23426 			if (rtn) {
23427 				/*
23428 				 * We ignore all failures for CD and need to
23429 				 * put the assessment before processing code
23430 				 * to avoid missing assessment for FMA.
23431 				 */
23432 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23433 				/*
23434 				 * Failed for other than an illegal request
23435 				 * or command not supported
23436 				 */
23437 				if ((com.uscsi_status == STATUS_CHECK) &&
23438 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
23439 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
23440 					    (rqbuf[12] != 0x20)) {
23441 						rval = EIO;
23442 						goto no_assessment;
23443 					}
23444 				}
23445 			} else {
23446 				/*
23447 				 * The GET CONFIGURATION command succeeded
23448 				 * so set the media type according to the
23449 				 * returned data
23450 				 */
23451 				media_info_ext.dki_media_type = out_data[6];
23452 				media_info_ext.dki_media_type <<= 8;
23453 				media_info_ext.dki_media_type |= out_data[7];
23454 			}
23455 		}
23456 	} else {
23457 		/*
23458 		 * The profile list is not available, so we attempt to identify
23459 		 * the media type based on the inquiry data
23460 		 */
23461 		sinq = un->un_sd->sd_inq;
23462 		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
23463 		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
23464 			/* This is a direct access device  or optical disk */
23465 			media_info_ext.dki_media_type = DK_FIXED_DISK;
23466 
23467 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
23468 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
23469 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
23470 					media_info_ext.dki_media_type = DK_ZIP;
23471 				} else if (
23472 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
23473 					media_info_ext.dki_media_type = DK_JAZ;
23474 				}
23475 			}
23476 		} else {
23477 			/*
23478 			 * Not a CD, direct access or optical disk so return
23479 			 * unknown media
23480 			 */
23481 			media_info_ext.dki_media_type = DK_UNKNOWN;
23482 		}
23483 	}
23484 
23485 	/*
23486 	 * Now read the capacity so we can provide the lbasize,
23487 	 * pbsize and capacity.
23488 	 */
23489 	rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize, &pbsize,
23490 	    SD_PATH_DIRECT);
23491 
23492 	if (rval != 0) {
23493 		rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
23494 		    SD_PATH_DIRECT);
23495 
23496 		switch (rval) {
23497 		case 0:
23498 			pbsize = lbasize;
23499 			media_capacity = capacity;
23500 			/*
23501 			 * sd_send_scsi_READ_CAPACITY() reports capacity in
23502 			 * un->un_sys_blocksize chunks. So we need to convert
23503 			 * it into cap.lbsize chunks.
23504 			 */
23505 			if (un->un_f_has_removable_media) {
23506 				media_capacity *= un->un_sys_blocksize;
23507 				media_capacity /= lbasize;
23508 			}
23509 			break;
23510 		case EACCES:
23511 			rval = EACCES;
23512 			goto done;
23513 		default:
23514 			rval = EIO;
23515 			goto done;
23516 		}
23517 	} else {
23518 		media_capacity = capacity;
23519 	}
23520 
23521 	/*
23522 	 * If lun is expanded dynamically, update the un structure.
23523 	 */
23524 	mutex_enter(SD_MUTEX(un));
23525 	if ((un->un_f_blockcount_is_valid == TRUE) &&
23526 	    (un->un_f_tgt_blocksize_is_valid == TRUE) &&
23527 	    (capacity > un->un_blockcount)) {
23528 		sd_update_block_info(un, lbasize, capacity);
23529 	}
23530 	mutex_exit(SD_MUTEX(un));
23531 
23532 	media_info_ext.dki_lbsize = lbasize;
23533 	media_info_ext.dki_capacity = media_capacity;
23534 	media_info_ext.dki_pbsize = pbsize;
23535 
23536 	if (ddi_copyout(&media_info_ext, arg, sizeof (struct dk_minfo_ext),
23537 	    flag)) {
23538 		rval = EFAULT;
23539 		goto no_assessment;
23540 	}
23541 done:
23542 	if (rval != 0) {
23543 		if (rval == EIO)
23544 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23545 		else
23546 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23547 	}
23548 no_assessment:
23549 	sd_ssc_fini(ssc);
23550 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
23551 	kmem_free(rqbuf, SENSE_LENGTH);
23552 	return (rval);
23553 }
23554 
23555 /*
23556  *    Function: sd_watch_request_submit
23557  *
23558  * Description: Call scsi_watch_request_submit or scsi_mmc_watch_request_submit
23559  *		depending on which is supported by device.
23560  */
23561 static opaque_t
23562 sd_watch_request_submit(struct sd_lun *un)
23563 {
23564 	dev_t			dev;
23565 
23566 	/* All submissions are unified to use same device number */
23567 	dev = sd_make_device(SD_DEVINFO(un));
23568 
23569 	if (un->un_f_mmc_cap && un->un_f_mmc_gesn_polling) {
23570 		return (scsi_mmc_watch_request_submit(SD_SCSI_DEVP(un),
23571 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
23572 		    (caddr_t)dev));
23573 	} else {
23574 		return (scsi_watch_request_submit(SD_SCSI_DEVP(un),
23575 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
23576 		    (caddr_t)dev));
23577 	}
23578 }
23579 
23580 
23581 /*
23582  *    Function: sd_check_media
23583  *
23584  * Description: This utility routine implements the functionality for the
23585  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
23586  *		driver state changes from that specified by the user
23587  *		(inserted or ejected). For example, if the user specifies
23588  *		DKIO_EJECTED and the current media state is inserted this
23589  *		routine will immediately return DKIO_INSERTED. However, if the
23590  *		current media state is not inserted the user thread will be
23591  *		blocked until the drive state changes. If DKIO_NONE is specified
23592  *		the user thread will block until a drive state change occurs.
23593  *
23594  *   Arguments: dev  - the device number
23595  *		state  - user pointer to a dkio_state, updated with the current
23596  *			drive state at return.
23597  *
23598  * Return Code: ENXIO
23599  *		EIO
23600  *		EAGAIN
23601  *		EINTR
23602  */
23603 
23604 static int
23605 sd_check_media(dev_t dev, enum dkio_state state)
23606 {
23607 	struct sd_lun		*un = NULL;
23608 	enum dkio_state		prev_state;
23609 	opaque_t		token = NULL;
23610 	int			rval = 0;
23611 	sd_ssc_t		*ssc;
23612 
23613 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23614 		return (ENXIO);
23615 	}
23616 
23617 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
23618 
23619 	ssc = sd_ssc_init(un);
23620 
23621 	mutex_enter(SD_MUTEX(un));
23622 
23623 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
23624 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
23625 
23626 	prev_state = un->un_mediastate;
23627 
23628 	/* is there anything to do? */
23629 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
23630 		/*
23631 		 * submit the request to the scsi_watch service;
23632 		 * scsi_media_watch_cb() does the real work
23633 		 */
23634 		mutex_exit(SD_MUTEX(un));
23635 
23636 		/*
23637 		 * This change handles the case where a scsi watch request is
23638 		 * added to a device that is powered down. To accomplish this
23639 		 * we power up the device before adding the scsi watch request,
23640 		 * since the scsi watch sends a TUR directly to the device
23641 		 * which the device cannot handle if it is powered down.
23642 		 */
23643 		if (sd_pm_entry(un) != DDI_SUCCESS) {
23644 			mutex_enter(SD_MUTEX(un));
23645 			goto done;
23646 		}
23647 
23648 		token = sd_watch_request_submit(un);
23649 
23650 		sd_pm_exit(un);
23651 
23652 		mutex_enter(SD_MUTEX(un));
23653 		if (token == NULL) {
23654 			rval = EAGAIN;
23655 			goto done;
23656 		}
23657 
23658 		/*
23659 		 * This is a special case IOCTL that doesn't return
23660 		 * until the media state changes. Routine sdpower
23661 		 * knows about and handles this so don't count it
23662 		 * as an active cmd in the driver, which would
23663 		 * keep the device busy to the pm framework.
23664 		 * If the count isn't decremented the device can't
23665 		 * be powered down.
23666 		 */
23667 		un->un_ncmds_in_driver--;
23668 		ASSERT(un->un_ncmds_in_driver >= 0);
23669 
23670 		/*
23671 		 * if a prior request had been made, this will be the same
23672 		 * token, as scsi_watch was designed that way.
23673 		 */
23674 		un->un_swr_token = token;
23675 		un->un_specified_mediastate = state;
23676 
23677 		/*
23678 		 * now wait for media change
23679 		 * we will not be signalled unless mediastate == state but it is
23680 		 * still better to test for this condition, since there is a
23681 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
23682 		 */
23683 		SD_TRACE(SD_LOG_COMMON, un,
23684 		    "sd_check_media: waiting for media state change\n");
23685 		while (un->un_mediastate == state) {
23686 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
23687 				SD_TRACE(SD_LOG_COMMON, un,
23688 				    "sd_check_media: waiting for media state "
23689 				    "was interrupted\n");
23690 				un->un_ncmds_in_driver++;
23691 				rval = EINTR;
23692 				goto done;
23693 			}
23694 			SD_TRACE(SD_LOG_COMMON, un,
23695 			    "sd_check_media: received signal, state=%x\n",
23696 			    un->un_mediastate);
23697 		}
23698 		/*
23699 		 * Inc the counter to indicate the device once again
23700 		 * has an active outstanding cmd.
23701 		 */
23702 		un->un_ncmds_in_driver++;
23703 	}
23704 
23705 	/* invalidate geometry */
23706 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
23707 		sr_ejected(un);
23708 	}
23709 
23710 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
23711 		uint64_t	capacity;
23712 		uint_t		lbasize;
23713 
23714 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
23715 		mutex_exit(SD_MUTEX(un));
23716 		/*
23717 		 * Since the following routines use SD_PATH_DIRECT, we must
23718 		 * call PM directly before the upcoming disk accesses. This
23719 		 * may cause the disk to be power/spin up.
23720 		 */
23721 
23722 		if (sd_pm_entry(un) == DDI_SUCCESS) {
23723 			rval = sd_send_scsi_READ_CAPACITY(ssc,
23724 			    &capacity, &lbasize, SD_PATH_DIRECT);
23725 			if (rval != 0) {
23726 				sd_pm_exit(un);
23727 				if (rval == EIO)
23728 					sd_ssc_assessment(ssc,
23729 					    SD_FMT_STATUS_CHECK);
23730 				else
23731 					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23732 				mutex_enter(SD_MUTEX(un));
23733 				goto done;
23734 			}
23735 		} else {
23736 			rval = EIO;
23737 			mutex_enter(SD_MUTEX(un));
23738 			goto done;
23739 		}
23740 		mutex_enter(SD_MUTEX(un));
23741 
23742 		sd_update_block_info(un, lbasize, capacity);
23743 
23744 		/*
23745 		 *  Check if the media in the device is writable or not
23746 		 */
23747 		if (ISCD(un)) {
23748 			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
23749 		}
23750 
23751 		mutex_exit(SD_MUTEX(un));
23752 		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
23753 		if ((cmlb_validate(un->un_cmlbhandle, 0,
23754 		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
23755 			sd_set_pstats(un);
23756 			SD_TRACE(SD_LOG_IO_PARTITION, un,
23757 			    "sd_check_media: un:0x%p pstats created and "
23758 			    "set\n", un);
23759 		}
23760 
23761 		rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
23762 		    SD_PATH_DIRECT);
23763 
23764 		sd_pm_exit(un);
23765 
23766 		if (rval != 0) {
23767 			if (rval == EIO)
23768 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23769 			else
23770 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23771 		}
23772 
23773 		mutex_enter(SD_MUTEX(un));
23774 	}
23775 done:
23776 	sd_ssc_fini(ssc);
23777 	un->un_f_watcht_stopped = FALSE;
23778 	if (token != NULL && un->un_swr_token != NULL) {
23779 		/*
23780 		 * Use of this local token and the mutex ensures that we avoid
23781 		 * some race conditions associated with terminating the
23782 		 * scsi watch.
23783 		 */
23784 		token = un->un_swr_token;
23785 		mutex_exit(SD_MUTEX(un));
23786 		(void) scsi_watch_request_terminate(token,
23787 		    SCSI_WATCH_TERMINATE_WAIT);
23788 		if (scsi_watch_get_ref_count(token) == 0) {
23789 			mutex_enter(SD_MUTEX(un));
23790 			un->un_swr_token = (opaque_t)NULL;
23791 		} else {
23792 			mutex_enter(SD_MUTEX(un));
23793 		}
23794 	}
23795 
23796 	/*
23797 	 * Update the capacity kstat value, if no media previously
23798 	 * (capacity kstat is 0) and a media has been inserted
23799 	 * (un_f_blockcount_is_valid == TRUE)
23800 	 */
23801 	if (un->un_errstats) {
23802 		struct sd_errstats	*stp = NULL;
23803 
23804 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
23805 		if ((stp->sd_capacity.value.ui64 == 0) &&
23806 		    (un->un_f_blockcount_is_valid == TRUE)) {
23807 			stp->sd_capacity.value.ui64 =
23808 			    (uint64_t)((uint64_t)un->un_blockcount *
23809 			    un->un_sys_blocksize);
23810 		}
23811 	}
23812 	mutex_exit(SD_MUTEX(un));
23813 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
23814 	return (rval);
23815 }
23816 
23817 
23818 /*
23819  *    Function: sd_delayed_cv_broadcast
23820  *
23821  * Description: Delayed cv_broadcast to allow for target to recover from media
23822  *		insertion.
23823  *
23824  *   Arguments: arg - driver soft state (unit) structure
23825  */
23826 
23827 static void
23828 sd_delayed_cv_broadcast(void *arg)
23829 {
23830 	struct sd_lun *un = arg;
23831 
23832 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
23833 
23834 	mutex_enter(SD_MUTEX(un));
23835 	un->un_dcvb_timeid = NULL;
23836 	cv_broadcast(&un->un_state_cv);
23837 	mutex_exit(SD_MUTEX(un));
23838 }
23839 
23840 
23841 /*
23842  *    Function: sd_media_watch_cb
23843  *
23844  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
23845  *		routine processes the TUR sense data and updates the driver
23846  *		state if a transition has occurred. The user thread
23847  *		(sd_check_media) is then signalled.
23848  *
23849  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
23850  *			among multiple watches that share this callback function
23851  *		resultp - scsi watch facility result packet containing scsi
23852  *			  packet, status byte and sense data
23853  *
23854  * Return Code: 0 for success, -1 for failure
23855  */
23856 
23857 static int
23858 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
23859 {
23860 	struct sd_lun			*un;
23861 	struct scsi_status		*statusp = resultp->statusp;
23862 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
23863 	enum dkio_state			state = DKIO_NONE;
23864 	dev_t				dev = (dev_t)arg;
23865 	uchar_t				actual_sense_length;
23866 	uint8_t				skey, asc, ascq;
23867 
23868 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23869 		return (-1);
23870 	}
23871 	actual_sense_length = resultp->actual_sense_length;
23872 
23873 	mutex_enter(SD_MUTEX(un));
23874 	SD_TRACE(SD_LOG_COMMON, un,
23875 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
23876 	    *((char *)statusp), (void *)sensep, actual_sense_length);
23877 
23878 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
23879 		un->un_mediastate = DKIO_DEV_GONE;
23880 		cv_broadcast(&un->un_state_cv);
23881 		mutex_exit(SD_MUTEX(un));
23882 
23883 		return (0);
23884 	}
23885 
23886 	if (un->un_f_mmc_cap && un->un_f_mmc_gesn_polling) {
23887 		if (sd_gesn_media_data_valid(resultp->mmc_data)) {
23888 			if ((resultp->mmc_data[5] &
23889 			    SD_GESN_MEDIA_EVENT_STATUS_PRESENT) != 0) {
23890 				state = DKIO_INSERTED;
23891 			} else {
23892 				state = DKIO_EJECTED;
23893 			}
23894 			if ((resultp->mmc_data[4] & SD_GESN_MEDIA_EVENT_CODE) ==
23895 			    SD_GESN_MEDIA_EVENT_EJECTREQUEST) {
23896 				sd_log_eject_request_event(un, KM_NOSLEEP);
23897 			}
23898 		}
23899 	} else if (sensep != NULL) {
23900 		/*
23901 		 * If there was a check condition then sensep points to valid
23902 		 * sense data. If status was not a check condition but a
23903 		 * reservation or busy status then the new state is DKIO_NONE.
23904 		 */
23905 		skey = scsi_sense_key(sensep);
23906 		asc = scsi_sense_asc(sensep);
23907 		ascq = scsi_sense_ascq(sensep);
23908 
23909 		SD_INFO(SD_LOG_COMMON, un,
23910 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
23911 		    skey, asc, ascq);
23912 		/* This routine only uses up to 13 bytes of sense data. */
23913 		if (actual_sense_length >= 13) {
23914 			if (skey == KEY_UNIT_ATTENTION) {
23915 				if (asc == 0x28) {
23916 					state = DKIO_INSERTED;
23917 				}
23918 			} else if (skey == KEY_NOT_READY) {
23919 				/*
23920 				 * Sense data of 02/06/00 means that the
23921 				 * drive could not read the media (No
23922 				 * reference position found). In this case
23923 				 * to prevent a hang on the DKIOCSTATE IOCTL
23924 				 * we set the media state to DKIO_INSERTED.
23925 				 */
23926 				if (asc == 0x06 && ascq == 0x00)
23927 					state = DKIO_INSERTED;
23928 
23929 				/*
23930 				 * if 02/04/02  means that the host
23931 				 * should send start command. Explicitly
23932 				 * leave the media state as is
23933 				 * (inserted) as the media is inserted
23934 				 * and host has stopped device for PM
23935 				 * reasons. Upon next true read/write
23936 				 * to this media will bring the
23937 				 * device to the right state good for
23938 				 * media access.
23939 				 */
23940 				if (asc == 0x3a) {
23941 					state = DKIO_EJECTED;
23942 				} else {
23943 					/*
23944 					 * If the drive is busy with an
23945 					 * operation or long write, keep the
23946 					 * media in an inserted state.
23947 					 */
23948 
23949 					if ((asc == 0x04) &&
23950 					    ((ascq == 0x02) ||
23951 					    (ascq == 0x07) ||
23952 					    (ascq == 0x08))) {
23953 						state = DKIO_INSERTED;
23954 					}
23955 				}
23956 			} else if (skey == KEY_NO_SENSE) {
23957 				if ((asc == 0x00) && (ascq == 0x00)) {
23958 					/*
23959 					 * Sense Data 00/00/00 does not provide
23960 					 * any information about the state of
23961 					 * the media. Ignore it.
23962 					 */
23963 					mutex_exit(SD_MUTEX(un));
23964 					return (0);
23965 				}
23966 			}
23967 		}
23968 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
23969 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
23970 		state = DKIO_INSERTED;
23971 	}
23972 
23973 	SD_TRACE(SD_LOG_COMMON, un,
23974 	    "sd_media_watch_cb: state=%x, specified=%x\n",
23975 	    state, un->un_specified_mediastate);
23976 
23977 	/*
23978 	 * now signal the waiting thread if this is *not* the specified state;
23979 	 * delay the signal if the state is DKIO_INSERTED to allow the target
23980 	 * to recover
23981 	 */
23982 	if (state != un->un_specified_mediastate) {
23983 		un->un_mediastate = state;
23984 		if (state == DKIO_INSERTED) {
23985 			/*
23986 			 * delay the signal to give the drive a chance
23987 			 * to do what it apparently needs to do
23988 			 */
23989 			SD_TRACE(SD_LOG_COMMON, un,
23990 			    "sd_media_watch_cb: delayed cv_broadcast\n");
23991 			if (un->un_dcvb_timeid == NULL) {
23992 				un->un_dcvb_timeid =
23993 				    timeout(sd_delayed_cv_broadcast, un,
23994 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
23995 			}
23996 		} else {
23997 			SD_TRACE(SD_LOG_COMMON, un,
23998 			    "sd_media_watch_cb: immediate cv_broadcast\n");
23999 			cv_broadcast(&un->un_state_cv);
24000 		}
24001 	}
24002 	mutex_exit(SD_MUTEX(un));
24003 	return (0);
24004 }
24005 
24006 
24007 /*
24008  *    Function: sd_dkio_get_temp
24009  *
24010  * Description: This routine is the driver entry point for handling ioctl
24011  *		requests to get the disk temperature.
24012  *
24013  *   Arguments: dev  - the device number
24014  *		arg  - pointer to user provided dk_temperature structure.
24015  *		flag - this argument is a pass through to ddi_copyxxx()
24016  *		       directly from the mode argument of ioctl().
24017  *
24018  * Return Code: 0
24019  *		EFAULT
24020  *		ENXIO
24021  *		EAGAIN
24022  */
24023 
24024 static int
24025 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24026 {
24027 	struct sd_lun		*un = NULL;
24028 	struct dk_temperature	*dktemp = NULL;
24029 	uchar_t			*temperature_page;
24030 	int			rval = 0;
24031 	int			path_flag = SD_PATH_STANDARD;
24032 	sd_ssc_t		*ssc;
24033 
24034 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24035 		return (ENXIO);
24036 	}
24037 
24038 	ssc = sd_ssc_init(un);
24039 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24040 
24041 	/* copyin the disk temp argument to get the user flags */
24042 	if (ddi_copyin((void *)arg, dktemp,
24043 	    sizeof (struct dk_temperature), flag) != 0) {
24044 		rval = EFAULT;
24045 		goto done;
24046 	}
24047 
24048 	/* Initialize the temperature to invalid. */
24049 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24050 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24051 
24052 	/*
24053 	 * Note: Investigate removing the "bypass pm" semantic.
24054 	 * Can we just bypass PM always?
24055 	 */
24056 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24057 		path_flag = SD_PATH_DIRECT;
24058 		ASSERT(!mutex_owned(&un->un_pm_mutex));
24059 		mutex_enter(&un->un_pm_mutex);
24060 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24061 			/*
24062 			 * If DKT_BYPASS_PM is set, and the drive happens to be
24063 			 * in low power mode, we can not wake it up, Need to
24064 			 * return EAGAIN.
24065 			 */
24066 			mutex_exit(&un->un_pm_mutex);
24067 			rval = EAGAIN;
24068 			goto done;
24069 		} else {
24070 			/*
24071 			 * Indicate to PM the device is busy. This is required
24072 			 * to avoid a race - i.e. the ioctl is issuing a
24073 			 * command and the pm framework brings down the device
24074 			 * to low power mode (possible power cut-off on some
24075 			 * platforms).
24076 			 */
24077 			mutex_exit(&un->un_pm_mutex);
24078 			if (sd_pm_entry(un) != DDI_SUCCESS) {
24079 				rval = EAGAIN;
24080 				goto done;
24081 			}
24082 		}
24083 	}
24084 
24085 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24086 
24087 	rval = sd_send_scsi_LOG_SENSE(ssc, temperature_page,
24088 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag);
24089 	if (rval != 0)
24090 		goto done2;
24091 
24092 	/*
24093 	 * For the current temperature verify that the parameter length is 0x02
24094 	 * and the parameter code is 0x00
24095 	 */
24096 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24097 	    (temperature_page[5] == 0x00)) {
24098 		if (temperature_page[9] == 0xFF) {
24099 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24100 		} else {
24101 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24102 		}
24103 	}
24104 
24105 	/*
24106 	 * For the reference temperature verify that the parameter
24107 	 * length is 0x02 and the parameter code is 0x01
24108 	 */
24109 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24110 	    (temperature_page[11] == 0x01)) {
24111 		if (temperature_page[15] == 0xFF) {
24112 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24113 		} else {
24114 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24115 		}
24116 	}
24117 
24118 	/* Do the copyout regardless of the temperature commands status. */
24119 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24120 	    flag) != 0) {
24121 		rval = EFAULT;
24122 		goto done1;
24123 	}
24124 
24125 done2:
24126 	if (rval != 0) {
24127 		if (rval == EIO)
24128 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24129 		else
24130 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24131 	}
24132 done1:
24133 	if (path_flag == SD_PATH_DIRECT) {
24134 		sd_pm_exit(un);
24135 	}
24136 
24137 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24138 done:
24139 	sd_ssc_fini(ssc);
24140 	if (dktemp != NULL) {
24141 		kmem_free(dktemp, sizeof (struct dk_temperature));
24142 	}
24143 
24144 	return (rval);
24145 }
24146 
24147 
24148 /*
24149  *    Function: sd_log_page_supported
24150  *
24151  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24152  *		supported log pages.
24153  *
24154  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
24155  *                      structure for this target.
24156  *		log_page -
24157  *
24158  * Return Code: -1 - on error (log sense is optional and may not be supported).
24159  *		0  - log page not found.
24160  *  		1  - log page found.
24161  */
24162 
24163 static int
24164 sd_log_page_supported(sd_ssc_t *ssc, int log_page)
24165 {
24166 	uchar_t *log_page_data;
24167 	int	i;
24168 	int	match = 0;
24169 	int	log_size;
24170 	int	status = 0;
24171 	struct sd_lun	*un;
24172 
24173 	ASSERT(ssc != NULL);
24174 	un = ssc->ssc_un;
24175 	ASSERT(un != NULL);
24176 
24177 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24178 
24179 	status = sd_send_scsi_LOG_SENSE(ssc, log_page_data, 0xFF, 0, 0x01, 0,
24180 	    SD_PATH_DIRECT);
24181 
24182 	if (status != 0) {
24183 		if (status == EIO) {
24184 			/*
24185 			 * Some disks do not support log sense, we
24186 			 * should ignore this kind of error(sense key is
24187 			 * 0x5 - illegal request).
24188 			 */
24189 			uint8_t *sensep;
24190 			int senlen;
24191 
24192 			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
24193 			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
24194 			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
24195 
24196 			if (senlen > 0 &&
24197 			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
24198 				sd_ssc_assessment(ssc,
24199 				    SD_FMT_IGNORE_COMPROMISE);
24200 			} else {
24201 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24202 			}
24203 		} else {
24204 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24205 		}
24206 
24207 		SD_ERROR(SD_LOG_COMMON, un,
24208 		    "sd_log_page_supported: failed log page retrieval\n");
24209 		kmem_free(log_page_data, 0xFF);
24210 		return (-1);
24211 	}
24212 
24213 	log_size = log_page_data[3];
24214 
24215 	/*
24216 	 * The list of supported log pages start from the fourth byte. Check
24217 	 * until we run out of log pages or a match is found.
24218 	 */
24219 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24220 		if (log_page_data[i] == log_page) {
24221 			match++;
24222 		}
24223 	}
24224 	kmem_free(log_page_data, 0xFF);
24225 	return (match);
24226 }
24227 
24228 
24229 /*
24230  *    Function: sd_mhdioc_failfast
24231  *
24232  * Description: This routine is the driver entry point for handling ioctl
24233  *		requests to enable/disable the multihost failfast option.
24234  *		(MHIOCENFAILFAST)
24235  *
24236  *   Arguments: dev	- the device number
24237  *		arg	- user specified probing interval.
24238  *		flag	- this argument is a pass through to ddi_copyxxx()
24239  *			  directly from the mode argument of ioctl().
24240  *
24241  * Return Code: 0
24242  *		EFAULT
24243  *		ENXIO
24244  */
24245 
24246 static int
24247 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24248 {
24249 	struct sd_lun	*un = NULL;
24250 	int		mh_time;
24251 	int		rval = 0;
24252 
24253 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24254 		return (ENXIO);
24255 	}
24256 
24257 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24258 		return (EFAULT);
24259 
24260 	if (mh_time) {
24261 		mutex_enter(SD_MUTEX(un));
24262 		un->un_resvd_status |= SD_FAILFAST;
24263 		mutex_exit(SD_MUTEX(un));
24264 		/*
24265 		 * If mh_time is INT_MAX, then this ioctl is being used for
24266 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24267 		 */
24268 		if (mh_time != INT_MAX) {
24269 			rval = sd_check_mhd(dev, mh_time);
24270 		}
24271 	} else {
24272 		(void) sd_check_mhd(dev, 0);
24273 		mutex_enter(SD_MUTEX(un));
24274 		un->un_resvd_status &= ~SD_FAILFAST;
24275 		mutex_exit(SD_MUTEX(un));
24276 	}
24277 	return (rval);
24278 }
24279 
24280 
24281 /*
24282  *    Function: sd_mhdioc_takeown
24283  *
24284  * Description: This routine is the driver entry point for handling ioctl
24285  *		requests to forcefully acquire exclusive access rights to the
24286  *		multihost disk (MHIOCTKOWN).
24287  *
24288  *   Arguments: dev	- the device number
24289  *		arg	- user provided structure specifying the delay
24290  *			  parameters in milliseconds
24291  *		flag	- this argument is a pass through to ddi_copyxxx()
24292  *			  directly from the mode argument of ioctl().
24293  *
24294  * Return Code: 0
24295  *		EFAULT
24296  *		ENXIO
24297  */
24298 
24299 static int
24300 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24301 {
24302 	struct sd_lun		*un = NULL;
24303 	struct mhioctkown	*tkown = NULL;
24304 	int			rval = 0;
24305 
24306 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24307 		return (ENXIO);
24308 	}
24309 
24310 	if (arg != NULL) {
24311 		tkown = (struct mhioctkown *)
24312 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24313 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24314 		if (rval != 0) {
24315 			rval = EFAULT;
24316 			goto error;
24317 		}
24318 	}
24319 
24320 	rval = sd_take_ownership(dev, tkown);
24321 	mutex_enter(SD_MUTEX(un));
24322 	if (rval == 0) {
24323 		un->un_resvd_status |= SD_RESERVE;
24324 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24325 			sd_reinstate_resv_delay =
24326 			    tkown->reinstate_resv_delay * 1000;
24327 		} else {
24328 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24329 		}
24330 		/*
24331 		 * Give the scsi_watch routine interval set by
24332 		 * the MHIOCENFAILFAST ioctl precedence here.
24333 		 */
24334 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24335 			mutex_exit(SD_MUTEX(un));
24336 			(void) sd_check_mhd(dev, sd_reinstate_resv_delay/1000);
24337 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24338 			    "sd_mhdioc_takeown : %d\n",
24339 			    sd_reinstate_resv_delay);
24340 		} else {
24341 			mutex_exit(SD_MUTEX(un));
24342 		}
24343 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24344 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24345 	} else {
24346 		un->un_resvd_status &= ~SD_RESERVE;
24347 		mutex_exit(SD_MUTEX(un));
24348 	}
24349 
24350 error:
24351 	if (tkown != NULL) {
24352 		kmem_free(tkown, sizeof (struct mhioctkown));
24353 	}
24354 	return (rval);
24355 }
24356 
24357 
24358 /*
24359  *    Function: sd_mhdioc_release
24360  *
24361  * Description: This routine is the driver entry point for handling ioctl
24362  *		requests to release exclusive access rights to the multihost
24363  *		disk (MHIOCRELEASE).
24364  *
24365  *   Arguments: dev	- the device number
24366  *
24367  * Return Code: 0
24368  *		ENXIO
24369  */
24370 
24371 static int
24372 sd_mhdioc_release(dev_t dev)
24373 {
24374 	struct sd_lun		*un = NULL;
24375 	timeout_id_t		resvd_timeid_save;
24376 	int			resvd_status_save;
24377 	int			rval = 0;
24378 
24379 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24380 		return (ENXIO);
24381 	}
24382 
24383 	mutex_enter(SD_MUTEX(un));
24384 	resvd_status_save = un->un_resvd_status;
24385 	un->un_resvd_status &=
24386 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
24387 	if (un->un_resvd_timeid) {
24388 		resvd_timeid_save = un->un_resvd_timeid;
24389 		un->un_resvd_timeid = NULL;
24390 		mutex_exit(SD_MUTEX(un));
24391 		(void) untimeout(resvd_timeid_save);
24392 	} else {
24393 		mutex_exit(SD_MUTEX(un));
24394 	}
24395 
24396 	/*
24397 	 * destroy any pending timeout thread that may be attempting to
24398 	 * reinstate reservation on this device.
24399 	 */
24400 	sd_rmv_resv_reclaim_req(dev);
24401 
24402 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
24403 		mutex_enter(SD_MUTEX(un));
24404 		if ((un->un_mhd_token) &&
24405 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
24406 			mutex_exit(SD_MUTEX(un));
24407 			(void) sd_check_mhd(dev, 0);
24408 		} else {
24409 			mutex_exit(SD_MUTEX(un));
24410 		}
24411 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
24412 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24413 	} else {
24414 		/*
24415 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
24416 		 */
24417 		mutex_enter(SD_MUTEX(un));
24418 		un->un_resvd_status = resvd_status_save;
24419 		mutex_exit(SD_MUTEX(un));
24420 	}
24421 	return (rval);
24422 }
24423 
24424 
24425 /*
24426  *    Function: sd_mhdioc_register_devid
24427  *
24428  * Description: This routine is the driver entry point for handling ioctl
24429  *		requests to register the device id (MHIOCREREGISTERDEVID).
24430  *
24431  *		Note: The implementation for this ioctl has been updated to
24432  *		be consistent with the original PSARC case (1999/357)
24433  *		(4375899, 4241671, 4220005)
24434  *
24435  *   Arguments: dev	- the device number
24436  *
24437  * Return Code: 0
24438  *		ENXIO
24439  */
24440 
24441 static int
24442 sd_mhdioc_register_devid(dev_t dev)
24443 {
24444 	struct sd_lun	*un = NULL;
24445 	int		rval = 0;
24446 	sd_ssc_t	*ssc;
24447 
24448 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24449 		return (ENXIO);
24450 	}
24451 
24452 	ASSERT(!mutex_owned(SD_MUTEX(un)));
24453 
24454 	mutex_enter(SD_MUTEX(un));
24455 
24456 	/* If a devid already exists, de-register it */
24457 	if (un->un_devid != NULL) {
24458 		ddi_devid_unregister(SD_DEVINFO(un));
24459 		/*
24460 		 * After unregister devid, needs to free devid memory
24461 		 */
24462 		ddi_devid_free(un->un_devid);
24463 		un->un_devid = NULL;
24464 	}
24465 
24466 	/* Check for reservation conflict */
24467 	mutex_exit(SD_MUTEX(un));
24468 	ssc = sd_ssc_init(un);
24469 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
24470 	mutex_enter(SD_MUTEX(un));
24471 
24472 	switch (rval) {
24473 	case 0:
24474 		sd_register_devid(ssc, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
24475 		break;
24476 	case EACCES:
24477 		break;
24478 	default:
24479 		rval = EIO;
24480 	}
24481 
24482 	mutex_exit(SD_MUTEX(un));
24483 	if (rval != 0) {
24484 		if (rval == EIO)
24485 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24486 		else
24487 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24488 	}
24489 	sd_ssc_fini(ssc);
24490 	return (rval);
24491 }
24492 
24493 
24494 /*
24495  *    Function: sd_mhdioc_inkeys
24496  *
24497  * Description: This routine is the driver entry point for handling ioctl
24498  *		requests to issue the SCSI-3 Persistent In Read Keys command
24499  *		to the device (MHIOCGRP_INKEYS).
24500  *
24501  *   Arguments: dev	- the device number
24502  *		arg	- user provided in_keys structure
24503  *		flag	- this argument is a pass through to ddi_copyxxx()
24504  *			  directly from the mode argument of ioctl().
24505  *
24506  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
24507  *		ENXIO
24508  *		EFAULT
24509  */
24510 
24511 static int
24512 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
24513 {
24514 	struct sd_lun		*un;
24515 	mhioc_inkeys_t		inkeys;
24516 	int			rval = 0;
24517 
24518 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24519 		return (ENXIO);
24520 	}
24521 
24522 #ifdef _MULTI_DATAMODEL
24523 	switch (ddi_model_convert_from(flag & FMODELS)) {
24524 	case DDI_MODEL_ILP32: {
24525 		struct mhioc_inkeys32	inkeys32;
24526 
24527 		if (ddi_copyin(arg, &inkeys32,
24528 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
24529 			return (EFAULT);
24530 		}
24531 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
24532 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24533 		    &inkeys, flag)) != 0) {
24534 			return (rval);
24535 		}
24536 		inkeys32.generation = inkeys.generation;
24537 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
24538 		    flag) != 0) {
24539 			return (EFAULT);
24540 		}
24541 		break;
24542 	}
24543 	case DDI_MODEL_NONE:
24544 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
24545 		    flag) != 0) {
24546 			return (EFAULT);
24547 		}
24548 		if ((rval = sd_persistent_reservation_in_read_keys(un,
24549 		    &inkeys, flag)) != 0) {
24550 			return (rval);
24551 		}
24552 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
24553 		    flag) != 0) {
24554 			return (EFAULT);
24555 		}
24556 		break;
24557 	}
24558 
24559 #else /* ! _MULTI_DATAMODEL */
24560 
24561 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
24562 		return (EFAULT);
24563 	}
24564 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
24565 	if (rval != 0) {
24566 		return (rval);
24567 	}
24568 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
24569 		return (EFAULT);
24570 	}
24571 
24572 #endif /* _MULTI_DATAMODEL */
24573 
24574 	return (rval);
24575 }
24576 
24577 
24578 /*
24579  *    Function: sd_mhdioc_inresv
24580  *
24581  * Description: This routine is the driver entry point for handling ioctl
24582  *		requests to issue the SCSI-3 Persistent In Read Reservations
24583  *		command to the device (MHIOCGRP_INKEYS).
24584  *
24585  *   Arguments: dev	- the device number
24586  *		arg	- user provided in_resv structure
24587  *		flag	- this argument is a pass through to ddi_copyxxx()
24588  *			  directly from the mode argument of ioctl().
24589  *
24590  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
24591  *		ENXIO
24592  *		EFAULT
24593  */
24594 
24595 static int
24596 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
24597 {
24598 	struct sd_lun		*un;
24599 	mhioc_inresvs_t		inresvs;
24600 	int			rval = 0;
24601 
24602 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24603 		return (ENXIO);
24604 	}
24605 
24606 #ifdef _MULTI_DATAMODEL
24607 
24608 	switch (ddi_model_convert_from(flag & FMODELS)) {
24609 	case DDI_MODEL_ILP32: {
24610 		struct mhioc_inresvs32	inresvs32;
24611 
24612 		if (ddi_copyin(arg, &inresvs32,
24613 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24614 			return (EFAULT);
24615 		}
24616 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
24617 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24618 		    &inresvs, flag)) != 0) {
24619 			return (rval);
24620 		}
24621 		inresvs32.generation = inresvs.generation;
24622 		if (ddi_copyout(&inresvs32, arg,
24623 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
24624 			return (EFAULT);
24625 		}
24626 		break;
24627 	}
24628 	case DDI_MODEL_NONE:
24629 		if (ddi_copyin(arg, &inresvs,
24630 		    sizeof (mhioc_inresvs_t), flag) != 0) {
24631 			return (EFAULT);
24632 		}
24633 		if ((rval = sd_persistent_reservation_in_read_resv(un,
24634 		    &inresvs, flag)) != 0) {
24635 			return (rval);
24636 		}
24637 		if (ddi_copyout(&inresvs, arg,
24638 		    sizeof (mhioc_inresvs_t), flag) != 0) {
24639 			return (EFAULT);
24640 		}
24641 		break;
24642 	}
24643 
24644 #else /* ! _MULTI_DATAMODEL */
24645 
24646 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
24647 		return (EFAULT);
24648 	}
24649 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
24650 	if (rval != 0) {
24651 		return (rval);
24652 	}
24653 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
24654 		return (EFAULT);
24655 	}
24656 
24657 #endif /* ! _MULTI_DATAMODEL */
24658 
24659 	return (rval);
24660 }
24661 
24662 
24663 /*
24664  * The following routines support the clustering functionality described below
24665  * and implement lost reservation reclaim functionality.
24666  *
24667  * Clustering
24668  * ----------
24669  * The clustering code uses two different, independent forms of SCSI
24670  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
24671  * Persistent Group Reservations. For any particular disk, it will use either
24672  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
24673  *
24674  * SCSI-2
24675  * The cluster software takes ownership of a multi-hosted disk by issuing the
24676  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
24677  * MHIOCRELEASE ioctl.  Closely related is the MHIOCENFAILFAST ioctl -- a
24678  * cluster, just after taking ownership of the disk with the MHIOCTKOWN ioctl
24679  * then issues the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the
24680  * driver. The meaning of failfast is that if the driver (on this host) ever
24681  * encounters the scsi error return code RESERVATION_CONFLICT from the device,
24682  * it should immediately panic the host. The motivation for this ioctl is that
24683  * if this host does encounter reservation conflict, the underlying cause is
24684  * that some other host of the cluster has decided that this host is no longer
24685  * in the cluster and has seized control of the disks for itself. Since this
24686  * host is no longer in the cluster, it ought to panic itself. The
24687  * MHIOCENFAILFAST ioctl does two things:
24688  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
24689  *      error to panic the host
24690  *      (b) it sets up a periodic timer to test whether this host still has
24691  *      "access" (in that no other host has reserved the device):  if the
24692  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
24693  *      purpose of that periodic timer is to handle scenarios where the host is
24694  *      otherwise temporarily quiescent, temporarily doing no real i/o.
24695  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
24696  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
24697  * the device itself.
24698  *
24699  * SCSI-3 PGR
24700  * A direct semantic implementation of the SCSI-3 Persistent Reservation
24701  * facility is supported through the shared multihost disk ioctls
24702  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
24703  * MHIOCGRP_PREEMPTANDABORT)
24704  *
24705  * Reservation Reclaim:
24706  * --------------------
24707  * To support the lost reservation reclaim operations this driver creates a
24708  * single thread to handle reinstating reservations on all devices that have
24709  * lost reservations sd_resv_reclaim_requests are logged for all devices that
24710  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
24711  * and the reservation reclaim thread loops through the requests to regain the
24712  * lost reservations.
24713  */
24714 
24715 /*
24716  *    Function: sd_check_mhd()
24717  *
24718  * Description: This function sets up and submits a scsi watch request or
24719  *		terminates an existing watch request. This routine is used in
24720  *		support of reservation reclaim.
24721  *
24722  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
24723  *			 among multiple watches that share the callback function
24724  *		interval - the number of microseconds specifying the watch
24725  *			   interval for issuing TEST UNIT READY commands. If
24726  *			   set to 0 the watch should be terminated. If the
24727  *			   interval is set to 0 and if the device is required
24728  *			   to hold reservation while disabling failfast, the
24729  *			   watch is restarted with an interval of
24730  *			   reinstate_resv_delay.
24731  *
24732  * Return Code: 0	   - Successful submit/terminate of scsi watch request
24733  *		ENXIO      - Indicates an invalid device was specified
24734  *		EAGAIN     - Unable to submit the scsi watch request
24735  */
24736 
24737 static int
24738 sd_check_mhd(dev_t dev, int interval)
24739 {
24740 	struct sd_lun	*un;
24741 	opaque_t	token;
24742 
24743 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24744 		return (ENXIO);
24745 	}
24746 
24747 	/* is this a watch termination request? */
24748 	if (interval == 0) {
24749 		mutex_enter(SD_MUTEX(un));
24750 		/* if there is an existing watch task then terminate it */
24751 		if (un->un_mhd_token) {
24752 			token = un->un_mhd_token;
24753 			un->un_mhd_token = NULL;
24754 			mutex_exit(SD_MUTEX(un));
24755 			(void) scsi_watch_request_terminate(token,
24756 			    SCSI_WATCH_TERMINATE_ALL_WAIT);
24757 			mutex_enter(SD_MUTEX(un));
24758 		} else {
24759 			mutex_exit(SD_MUTEX(un));
24760 			/*
24761 			 * Note: If we return here we don't check for the
24762 			 * failfast case. This is the original legacy
24763 			 * implementation but perhaps we should be checking
24764 			 * the failfast case.
24765 			 */
24766 			return (0);
24767 		}
24768 		/*
24769 		 * If the device is required to hold reservation while
24770 		 * disabling failfast, we need to restart the scsi_watch
24771 		 * routine with an interval of reinstate_resv_delay.
24772 		 */
24773 		if (un->un_resvd_status & SD_RESERVE) {
24774 			interval = sd_reinstate_resv_delay/1000;
24775 		} else {
24776 			/* no failfast so bail */
24777 			mutex_exit(SD_MUTEX(un));
24778 			return (0);
24779 		}
24780 		mutex_exit(SD_MUTEX(un));
24781 	}
24782 
24783 	/*
24784 	 * adjust minimum time interval to 1 second,
24785 	 * and convert from msecs to usecs
24786 	 */
24787 	if (interval > 0 && interval < 1000) {
24788 		interval = 1000;
24789 	}
24790 	interval *= 1000;
24791 
24792 	/*
24793 	 * submit the request to the scsi_watch service
24794 	 */
24795 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
24796 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
24797 	if (token == NULL) {
24798 		return (EAGAIN);
24799 	}
24800 
24801 	/*
24802 	 * save token for termination later on
24803 	 */
24804 	mutex_enter(SD_MUTEX(un));
24805 	un->un_mhd_token = token;
24806 	mutex_exit(SD_MUTEX(un));
24807 	return (0);
24808 }
24809 
24810 
24811 /*
24812  *    Function: sd_mhd_watch_cb()
24813  *
24814  * Description: This function is the call back function used by the scsi watch
24815  *		facility. The scsi watch facility sends the "Test Unit Ready"
24816  *		and processes the status. If applicable (i.e. a "Unit Attention"
24817  *		status and automatic "Request Sense" not used) the scsi watch
24818  *		facility will send a "Request Sense" and retrieve the sense data
24819  *		to be passed to this callback function. In either case the
24820  *		automatic "Request Sense" or the facility submitting one, this
24821  *		callback is passed the status and sense data.
24822  *
24823  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24824  *			among multiple watches that share this callback function
24825  *		resultp - scsi watch facility result packet containing scsi
24826  *			  packet, status byte and sense data
24827  *
24828  * Return Code: 0 - continue the watch task
24829  *		non-zero - terminate the watch task
24830  */
24831 
24832 static int
24833 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24834 {
24835 	struct sd_lun			*un;
24836 	struct scsi_status		*statusp;
24837 	uint8_t				*sensep;
24838 	struct scsi_pkt			*pkt;
24839 	uchar_t				actual_sense_length;
24840 	dev_t  				dev = (dev_t)arg;
24841 
24842 	ASSERT(resultp != NULL);
24843 	statusp			= resultp->statusp;
24844 	sensep			= (uint8_t *)resultp->sensep;
24845 	pkt			= resultp->pkt;
24846 	actual_sense_length	= resultp->actual_sense_length;
24847 
24848 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24849 		return (ENXIO);
24850 	}
24851 
24852 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
24853 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
24854 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
24855 
24856 	/* Begin processing of the status and/or sense data */
24857 	if (pkt->pkt_reason != CMD_CMPLT) {
24858 		/* Handle the incomplete packet */
24859 		sd_mhd_watch_incomplete(un, pkt);
24860 		return (0);
24861 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
24862 		if (*((unsigned char *)statusp)
24863 		    == STATUS_RESERVATION_CONFLICT) {
24864 			/*
24865 			 * Handle a reservation conflict by panicking if
24866 			 * configured for failfast or by logging the conflict
24867 			 * and updating the reservation status
24868 			 */
24869 			mutex_enter(SD_MUTEX(un));
24870 			if ((un->un_resvd_status & SD_FAILFAST) &&
24871 			    (sd_failfast_enable)) {
24872 				sd_panic_for_res_conflict(un);
24873 				/*NOTREACHED*/
24874 			}
24875 			SD_INFO(SD_LOG_IOCTL_MHD, un,
24876 			    "sd_mhd_watch_cb: Reservation Conflict\n");
24877 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
24878 			mutex_exit(SD_MUTEX(un));
24879 		}
24880 	}
24881 
24882 	if (sensep != NULL) {
24883 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
24884 			mutex_enter(SD_MUTEX(un));
24885 			if ((scsi_sense_asc(sensep) ==
24886 			    SD_SCSI_RESET_SENSE_CODE) &&
24887 			    (un->un_resvd_status & SD_RESERVE)) {
24888 				/*
24889 				 * The additional sense code indicates a power
24890 				 * on or bus device reset has occurred; update
24891 				 * the reservation status.
24892 				 */
24893 				un->un_resvd_status |=
24894 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
24895 				SD_INFO(SD_LOG_IOCTL_MHD, un,
24896 				    "sd_mhd_watch_cb: Lost Reservation\n");
24897 			}
24898 		} else {
24899 			return (0);
24900 		}
24901 	} else {
24902 		mutex_enter(SD_MUTEX(un));
24903 	}
24904 
24905 	if ((un->un_resvd_status & SD_RESERVE) &&
24906 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
24907 		if (un->un_resvd_status & SD_WANT_RESERVE) {
24908 			/*
24909 			 * A reset occurred in between the last probe and this
24910 			 * one so if a timeout is pending cancel it.
24911 			 */
24912 			if (un->un_resvd_timeid) {
24913 				timeout_id_t temp_id = un->un_resvd_timeid;
24914 				un->un_resvd_timeid = NULL;
24915 				mutex_exit(SD_MUTEX(un));
24916 				(void) untimeout(temp_id);
24917 				mutex_enter(SD_MUTEX(un));
24918 			}
24919 			un->un_resvd_status &= ~SD_WANT_RESERVE;
24920 		}
24921 		if (un->un_resvd_timeid == 0) {
24922 			/* Schedule a timeout to handle the lost reservation */
24923 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
24924 			    (void *)dev,
24925 			    drv_usectohz(sd_reinstate_resv_delay));
24926 		}
24927 	}
24928 	mutex_exit(SD_MUTEX(un));
24929 	return (0);
24930 }
24931 
24932 
24933 /*
24934  *    Function: sd_mhd_watch_incomplete()
24935  *
24936  * Description: This function is used to find out why a scsi pkt sent by the
24937  *		scsi watch facility was not completed. Under some scenarios this
24938  *		routine will return. Otherwise it will send a bus reset to see
24939  *		if the drive is still online.
24940  *
24941  *   Arguments: un  - driver soft state (unit) structure
24942  *		pkt - incomplete scsi pkt
24943  */
24944 
24945 static void
24946 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
24947 {
24948 	int	be_chatty;
24949 	int	perr;
24950 
24951 	ASSERT(pkt != NULL);
24952 	ASSERT(un != NULL);
24953 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
24954 	perr		= (pkt->pkt_statistics & STAT_PERR);
24955 
24956 	mutex_enter(SD_MUTEX(un));
24957 	if (un->un_state == SD_STATE_DUMPING) {
24958 		mutex_exit(SD_MUTEX(un));
24959 		return;
24960 	}
24961 
24962 	switch (pkt->pkt_reason) {
24963 	case CMD_UNX_BUS_FREE:
24964 		/*
24965 		 * If we had a parity error that caused the target to drop BSY*,
24966 		 * don't be chatty about it.
24967 		 */
24968 		if (perr && be_chatty) {
24969 			be_chatty = 0;
24970 		}
24971 		break;
24972 	case CMD_TAG_REJECT:
24973 		/*
24974 		 * The SCSI-2 spec states that a tag reject will be sent by the
24975 		 * target if tagged queuing is not supported. A tag reject may
24976 		 * also be sent during certain initialization periods or to
24977 		 * control internal resources. For the latter case the target
24978 		 * may also return Queue Full.
24979 		 *
24980 		 * If this driver receives a tag reject from a target that is
24981 		 * going through an init period or controlling internal
24982 		 * resources tagged queuing will be disabled. This is a less
24983 		 * than optimal behavior but the driver is unable to determine
24984 		 * the target state and assumes tagged queueing is not supported
24985 		 */
24986 		pkt->pkt_flags = 0;
24987 		un->un_tagflags = 0;
24988 
24989 		if (un->un_f_opt_queueing == TRUE) {
24990 			un->un_throttle = min(un->un_throttle, 3);
24991 		} else {
24992 			un->un_throttle = 1;
24993 		}
24994 		mutex_exit(SD_MUTEX(un));
24995 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
24996 		mutex_enter(SD_MUTEX(un));
24997 		break;
24998 	case CMD_INCOMPLETE:
24999 		/*
25000 		 * The transport stopped with an abnormal state, fallthrough and
25001 		 * reset the target and/or bus unless selection did not complete
25002 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25003 		 * go through a target/bus reset
25004 		 */
25005 		if (pkt->pkt_state == STATE_GOT_BUS) {
25006 			break;
25007 		}
25008 		/*FALLTHROUGH*/
25009 
25010 	case CMD_TIMEOUT:
25011 	default:
25012 		/*
25013 		 * The lun may still be running the command, so a lun reset
25014 		 * should be attempted. If the lun reset fails or cannot be
25015 		 * issued, than try a target reset. Lastly try a bus reset.
25016 		 */
25017 		if ((pkt->pkt_statistics &
25018 		    (STAT_BUS_RESET|STAT_DEV_RESET|STAT_ABORTED)) == 0) {
25019 			int reset_retval = 0;
25020 			mutex_exit(SD_MUTEX(un));
25021 			if (un->un_f_allow_bus_device_reset == TRUE) {
25022 				if (un->un_f_lun_reset_enabled == TRUE) {
25023 					reset_retval =
25024 					    scsi_reset(SD_ADDRESS(un),
25025 					    RESET_LUN);
25026 				}
25027 				if (reset_retval == 0) {
25028 					reset_retval =
25029 					    scsi_reset(SD_ADDRESS(un),
25030 					    RESET_TARGET);
25031 				}
25032 			}
25033 			if (reset_retval == 0) {
25034 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25035 			}
25036 			mutex_enter(SD_MUTEX(un));
25037 		}
25038 		break;
25039 	}
25040 
25041 	/* A device/bus reset has occurred; update the reservation status. */
25042 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25043 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25044 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25045 			un->un_resvd_status |=
25046 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25047 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25048 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25049 		}
25050 	}
25051 
25052 	/*
25053 	 * The disk has been turned off; Update the device state.
25054 	 *
25055 	 * Note: Should we be offlining the disk here?
25056 	 */
25057 	if (pkt->pkt_state == STATE_GOT_BUS) {
25058 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25059 		    "Disk not responding to selection\n");
25060 		if (un->un_state != SD_STATE_OFFLINE) {
25061 			New_state(un, SD_STATE_OFFLINE);
25062 		}
25063 	} else if (be_chatty) {
25064 		/*
25065 		 * suppress messages if they are all the same pkt reason;
25066 		 * with TQ, many (up to 256) are returned with the same
25067 		 * pkt_reason
25068 		 */
25069 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25070 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25071 			    "sd_mhd_watch_incomplete: "
25072 			    "SCSI transport failed: reason '%s'\n",
25073 			    scsi_rname(pkt->pkt_reason));
25074 		}
25075 	}
25076 	un->un_last_pkt_reason = pkt->pkt_reason;
25077 	mutex_exit(SD_MUTEX(un));
25078 }
25079 
25080 
25081 /*
25082  *    Function: sd_sname()
25083  *
25084  * Description: This is a simple little routine to return a string containing
25085  *		a printable description of command status byte for use in
25086  *		logging.
25087  *
25088  *   Arguments: status - pointer to a status byte
25089  *
25090  * Return Code: char * - string containing status description.
25091  */
25092 
25093 static char *
25094 sd_sname(uchar_t status)
25095 {
25096 	switch (status & STATUS_MASK) {
25097 	case STATUS_GOOD:
25098 		return ("good status");
25099 	case STATUS_CHECK:
25100 		return ("check condition");
25101 	case STATUS_MET:
25102 		return ("condition met");
25103 	case STATUS_BUSY:
25104 		return ("busy");
25105 	case STATUS_INTERMEDIATE:
25106 		return ("intermediate");
25107 	case STATUS_INTERMEDIATE_MET:
25108 		return ("intermediate - condition met");
25109 	case STATUS_RESERVATION_CONFLICT:
25110 		return ("reservation_conflict");
25111 	case STATUS_TERMINATED:
25112 		return ("command terminated");
25113 	case STATUS_QFULL:
25114 		return ("queue full");
25115 	default:
25116 		return ("<unknown status>");
25117 	}
25118 }
25119 
25120 
25121 /*
25122  *    Function: sd_mhd_resvd_recover()
25123  *
25124  * Description: This function adds a reservation entry to the
25125  *		sd_resv_reclaim_request list and signals the reservation
25126  *		reclaim thread that there is work pending. If the reservation
25127  *		reclaim thread has not been previously created this function
25128  *		will kick it off.
25129  *
25130  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25131  *			among multiple watches that share this callback function
25132  *
25133  *     Context: This routine is called by timeout() and is run in interrupt
25134  *		context. It must not sleep or call other functions which may
25135  *		sleep.
25136  */
25137 
25138 static void
25139 sd_mhd_resvd_recover(void *arg)
25140 {
25141 	dev_t			dev = (dev_t)arg;
25142 	struct sd_lun		*un;
25143 	struct sd_thr_request	*sd_treq = NULL;
25144 	struct sd_thr_request	*sd_cur = NULL;
25145 	struct sd_thr_request	*sd_prev = NULL;
25146 	int			already_there = 0;
25147 
25148 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25149 		return;
25150 	}
25151 
25152 	mutex_enter(SD_MUTEX(un));
25153 	un->un_resvd_timeid = NULL;
25154 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25155 		/*
25156 		 * There was a reset so don't issue the reserve, allow the
25157 		 * sd_mhd_watch_cb callback function to notice this and
25158 		 * reschedule the timeout for reservation.
25159 		 */
25160 		mutex_exit(SD_MUTEX(un));
25161 		return;
25162 	}
25163 	mutex_exit(SD_MUTEX(un));
25164 
25165 	/*
25166 	 * Add this device to the sd_resv_reclaim_request list and the
25167 	 * sd_resv_reclaim_thread should take care of the rest.
25168 	 *
25169 	 * Note: We can't sleep in this context so if the memory allocation
25170 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25171 	 * reschedule the timeout for reservation.  (4378460)
25172 	 */
25173 	sd_treq = (struct sd_thr_request *)
25174 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25175 	if (sd_treq == NULL) {
25176 		return;
25177 	}
25178 
25179 	sd_treq->sd_thr_req_next = NULL;
25180 	sd_treq->dev = dev;
25181 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25182 	if (sd_tr.srq_thr_req_head == NULL) {
25183 		sd_tr.srq_thr_req_head = sd_treq;
25184 	} else {
25185 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25186 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25187 			if (sd_cur->dev == dev) {
25188 				/*
25189 				 * already in Queue so don't log
25190 				 * another request for the device
25191 				 */
25192 				already_there = 1;
25193 				break;
25194 			}
25195 			sd_prev = sd_cur;
25196 		}
25197 		if (!already_there) {
25198 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25199 			    "logging request for %lx\n", dev);
25200 			sd_prev->sd_thr_req_next = sd_treq;
25201 		} else {
25202 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25203 		}
25204 	}
25205 
25206 	/*
25207 	 * Create a kernel thread to do the reservation reclaim and free up this
25208 	 * thread. We cannot block this thread while we go away to do the
25209 	 * reservation reclaim
25210 	 */
25211 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25212 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25213 		    sd_resv_reclaim_thread, NULL,
25214 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25215 
25216 	/* Tell the reservation reclaim thread that it has work to do */
25217 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25218 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25219 }
25220 
25221 /*
25222  *    Function: sd_resv_reclaim_thread()
25223  *
25224  * Description: This function implements the reservation reclaim operations
25225  *
25226  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25227  *		      among multiple watches that share this callback function
25228  */
25229 
25230 static void
25231 sd_resv_reclaim_thread()
25232 {
25233 	struct sd_lun		*un;
25234 	struct sd_thr_request	*sd_mhreq;
25235 
25236 	/* Wait for work */
25237 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25238 	if (sd_tr.srq_thr_req_head == NULL) {
25239 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25240 		    &sd_tr.srq_resv_reclaim_mutex);
25241 	}
25242 
25243 	/* Loop while we have work */
25244 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25245 		un = ddi_get_soft_state(sd_state,
25246 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25247 		if (un == NULL) {
25248 			/*
25249 			 * softstate structure is NULL so just
25250 			 * dequeue the request and continue
25251 			 */
25252 			sd_tr.srq_thr_req_head =
25253 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25254 			kmem_free(sd_tr.srq_thr_cur_req,
25255 			    sizeof (struct sd_thr_request));
25256 			continue;
25257 		}
25258 
25259 		/* dequeue the request */
25260 		sd_mhreq = sd_tr.srq_thr_cur_req;
25261 		sd_tr.srq_thr_req_head =
25262 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25263 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25264 
25265 		/*
25266 		 * Reclaim reservation only if SD_RESERVE is still set. There
25267 		 * may have been a call to MHIOCRELEASE before we got here.
25268 		 */
25269 		mutex_enter(SD_MUTEX(un));
25270 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25271 			/*
25272 			 * Note: The SD_LOST_RESERVE flag is cleared before
25273 			 * reclaiming the reservation. If this is done after the
25274 			 * call to sd_reserve_release a reservation loss in the
25275 			 * window between pkt completion of reserve cmd and
25276 			 * mutex_enter below may not be recognized
25277 			 */
25278 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25279 			mutex_exit(SD_MUTEX(un));
25280 
25281 			if (sd_reserve_release(sd_mhreq->dev,
25282 			    SD_RESERVE) == 0) {
25283 				mutex_enter(SD_MUTEX(un));
25284 				un->un_resvd_status |= SD_RESERVE;
25285 				mutex_exit(SD_MUTEX(un));
25286 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25287 				    "sd_resv_reclaim_thread: "
25288 				    "Reservation Recovered\n");
25289 			} else {
25290 				mutex_enter(SD_MUTEX(un));
25291 				un->un_resvd_status |= SD_LOST_RESERVE;
25292 				mutex_exit(SD_MUTEX(un));
25293 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25294 				    "sd_resv_reclaim_thread: Failed "
25295 				    "Reservation Recovery\n");
25296 			}
25297 		} else {
25298 			mutex_exit(SD_MUTEX(un));
25299 		}
25300 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25301 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25302 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25303 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25304 		/*
25305 		 * wakeup the destroy thread if anyone is waiting on
25306 		 * us to complete.
25307 		 */
25308 		cv_signal(&sd_tr.srq_inprocess_cv);
25309 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25310 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25311 	}
25312 
25313 	/*
25314 	 * cleanup the sd_tr structure now that this thread will not exist
25315 	 */
25316 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25317 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25318 	sd_tr.srq_resv_reclaim_thread = NULL;
25319 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25320 	thread_exit();
25321 }
25322 
25323 
25324 /*
25325  *    Function: sd_rmv_resv_reclaim_req()
25326  *
25327  * Description: This function removes any pending reservation reclaim requests
25328  *		for the specified device.
25329  *
25330  *   Arguments: dev - the device 'dev_t'
25331  */
25332 
25333 static void
25334 sd_rmv_resv_reclaim_req(dev_t dev)
25335 {
25336 	struct sd_thr_request *sd_mhreq;
25337 	struct sd_thr_request *sd_prev;
25338 
25339 	/* Remove a reservation reclaim request from the list */
25340 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25341 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25342 		/*
25343 		 * We are attempting to reinstate reservation for
25344 		 * this device. We wait for sd_reserve_release()
25345 		 * to return before we return.
25346 		 */
25347 		cv_wait(&sd_tr.srq_inprocess_cv,
25348 		    &sd_tr.srq_resv_reclaim_mutex);
25349 	} else {
25350 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25351 		if (sd_mhreq && sd_mhreq->dev == dev) {
25352 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25353 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25354 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25355 			return;
25356 		}
25357 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25358 			if (sd_mhreq && sd_mhreq->dev == dev) {
25359 				break;
25360 			}
25361 			sd_prev = sd_mhreq;
25362 		}
25363 		if (sd_mhreq != NULL) {
25364 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25365 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25366 		}
25367 	}
25368 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25369 }
25370 
25371 
25372 /*
25373  *    Function: sd_mhd_reset_notify_cb()
25374  *
25375  * Description: This is a call back function for scsi_reset_notify. This
25376  *		function updates the softstate reserved status and logs the
25377  *		reset. The driver scsi watch facility callback function
25378  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25379  *		will reclaim the reservation.
25380  *
25381  *   Arguments: arg  - driver soft state (unit) structure
25382  */
25383 
25384 static void
25385 sd_mhd_reset_notify_cb(caddr_t arg)
25386 {
25387 	struct sd_lun *un = (struct sd_lun *)arg;
25388 
25389 	mutex_enter(SD_MUTEX(un));
25390 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25391 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
25392 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25393 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
25394 	}
25395 	mutex_exit(SD_MUTEX(un));
25396 }
25397 
25398 
25399 /*
25400  *    Function: sd_take_ownership()
25401  *
25402  * Description: This routine implements an algorithm to achieve a stable
25403  *		reservation on disks which don't implement priority reserve,
25404  *		and makes sure that other host lose re-reservation attempts.
25405  *		This algorithm contains of a loop that keeps issuing the RESERVE
25406  *		for some period of time (min_ownership_delay, default 6 seconds)
25407  *		During that loop, it looks to see if there has been a bus device
25408  *		reset or bus reset (both of which cause an existing reservation
25409  *		to be lost). If the reservation is lost issue RESERVE until a
25410  *		period of min_ownership_delay with no resets has gone by, or
25411  *		until max_ownership_delay has expired. This loop ensures that
25412  *		the host really did manage to reserve the device, in spite of
25413  *		resets. The looping for min_ownership_delay (default six
25414  *		seconds) is important to early generation clustering products,
25415  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
25416  *		MHIOCENFAILFAST periodic timer of two seconds. By having
25417  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
25418  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
25419  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
25420  *		have already noticed, via the MHIOCENFAILFAST polling, that it
25421  *		no longer "owns" the disk and will have panicked itself.  Thus,
25422  *		the host issuing the MHIOCTKOWN is assured (with timing
25423  *		dependencies) that by the time it actually starts to use the
25424  *		disk for real work, the old owner is no longer accessing it.
25425  *
25426  *		min_ownership_delay is the minimum amount of time for which the
25427  *		disk must be reserved continuously devoid of resets before the
25428  *		MHIOCTKOWN ioctl will return success.
25429  *
25430  *		max_ownership_delay indicates the amount of time by which the
25431  *		take ownership should succeed or timeout with an error.
25432  *
25433  *   Arguments: dev - the device 'dev_t'
25434  *		*p  - struct containing timing info.
25435  *
25436  * Return Code: 0 for success or error code
25437  */
25438 
25439 static int
25440 sd_take_ownership(dev_t dev, struct mhioctkown *p)
25441 {
25442 	struct sd_lun	*un;
25443 	int		rval;
25444 	int		err;
25445 	int		reservation_count   = 0;
25446 	int		min_ownership_delay =  6000000; /* in usec */
25447 	int		max_ownership_delay = 30000000; /* in usec */
25448 	clock_t		start_time;	/* starting time of this algorithm */
25449 	clock_t		end_time;	/* time limit for giving up */
25450 	clock_t		ownership_time;	/* time limit for stable ownership */
25451 	clock_t		current_time;
25452 	clock_t		previous_current_time;
25453 
25454 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25455 		return (ENXIO);
25456 	}
25457 
25458 	/*
25459 	 * Attempt a device reservation. A priority reservation is requested.
25460 	 */
25461 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
25462 	    != SD_SUCCESS) {
25463 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25464 		    "sd_take_ownership: return(1)=%d\n", rval);
25465 		return (rval);
25466 	}
25467 
25468 	/* Update the softstate reserved status to indicate the reservation */
25469 	mutex_enter(SD_MUTEX(un));
25470 	un->un_resvd_status |= SD_RESERVE;
25471 	un->un_resvd_status &=
25472 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
25473 	mutex_exit(SD_MUTEX(un));
25474 
25475 	if (p != NULL) {
25476 		if (p->min_ownership_delay != 0) {
25477 			min_ownership_delay = p->min_ownership_delay * 1000;
25478 		}
25479 		if (p->max_ownership_delay != 0) {
25480 			max_ownership_delay = p->max_ownership_delay * 1000;
25481 		}
25482 	}
25483 	SD_INFO(SD_LOG_IOCTL_MHD, un,
25484 	    "sd_take_ownership: min, max delays: %d, %d\n",
25485 	    min_ownership_delay, max_ownership_delay);
25486 
25487 	start_time = ddi_get_lbolt();
25488 	current_time	= start_time;
25489 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
25490 	end_time	= start_time + drv_usectohz(max_ownership_delay);
25491 
25492 	while (current_time - end_time < 0) {
25493 		delay(drv_usectohz(500000));
25494 
25495 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
25496 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
25497 				mutex_enter(SD_MUTEX(un));
25498 				rval = (un->un_resvd_status &
25499 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
25500 				mutex_exit(SD_MUTEX(un));
25501 				break;
25502 			}
25503 		}
25504 		previous_current_time = current_time;
25505 		current_time = ddi_get_lbolt();
25506 		mutex_enter(SD_MUTEX(un));
25507 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
25508 			ownership_time = ddi_get_lbolt() +
25509 			    drv_usectohz(min_ownership_delay);
25510 			reservation_count = 0;
25511 		} else {
25512 			reservation_count++;
25513 		}
25514 		un->un_resvd_status |= SD_RESERVE;
25515 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
25516 		mutex_exit(SD_MUTEX(un));
25517 
25518 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25519 		    "sd_take_ownership: ticks for loop iteration=%ld, "
25520 		    "reservation=%s\n", (current_time - previous_current_time),
25521 		    reservation_count ? "ok" : "reclaimed");
25522 
25523 		if (current_time - ownership_time >= 0 &&
25524 		    reservation_count >= 4) {
25525 			rval = 0; /* Achieved a stable ownership */
25526 			break;
25527 		}
25528 		if (current_time - end_time >= 0) {
25529 			rval = EACCES; /* No ownership in max possible time */
25530 			break;
25531 		}
25532 	}
25533 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25534 	    "sd_take_ownership: return(2)=%d\n", rval);
25535 	return (rval);
25536 }
25537 
25538 
25539 /*
25540  *    Function: sd_reserve_release()
25541  *
25542  * Description: This function builds and sends scsi RESERVE, RELEASE, and
25543  *		PRIORITY RESERVE commands based on a user specified command type
25544  *
25545  *   Arguments: dev - the device 'dev_t'
25546  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
25547  *		      SD_RESERVE, SD_RELEASE
25548  *
25549  * Return Code: 0 or Error Code
25550  */
25551 
25552 static int
25553 sd_reserve_release(dev_t dev, int cmd)
25554 {
25555 	struct uscsi_cmd	*com = NULL;
25556 	struct sd_lun		*un = NULL;
25557 	char			cdb[CDB_GROUP0];
25558 	int			rval;
25559 
25560 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
25561 	    (cmd == SD_PRIORITY_RESERVE));
25562 
25563 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25564 		return (ENXIO);
25565 	}
25566 
25567 	/* instantiate and initialize the command and cdb */
25568 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
25569 	bzero(cdb, CDB_GROUP0);
25570 	com->uscsi_flags   = USCSI_SILENT;
25571 	com->uscsi_timeout = un->un_reserve_release_time;
25572 	com->uscsi_cdblen  = CDB_GROUP0;
25573 	com->uscsi_cdb	   = cdb;
25574 	if (cmd == SD_RELEASE) {
25575 		cdb[0] = SCMD_RELEASE;
25576 	} else {
25577 		cdb[0] = SCMD_RESERVE;
25578 	}
25579 
25580 	/* Send the command. */
25581 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25582 	    SD_PATH_STANDARD);
25583 
25584 	/*
25585 	 * "break" a reservation that is held by another host, by issuing a
25586 	 * reset if priority reserve is desired, and we could not get the
25587 	 * device.
25588 	 */
25589 	if ((cmd == SD_PRIORITY_RESERVE) &&
25590 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25591 		/*
25592 		 * First try to reset the LUN. If we cannot, then try a target
25593 		 * reset, followed by a bus reset if the target reset fails.
25594 		 */
25595 		int reset_retval = 0;
25596 		if (un->un_f_lun_reset_enabled == TRUE) {
25597 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
25598 		}
25599 		if (reset_retval == 0) {
25600 			/* The LUN reset either failed or was not issued */
25601 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
25602 		}
25603 		if ((reset_retval == 0) &&
25604 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
25605 			rval = EIO;
25606 			kmem_free(com, sizeof (*com));
25607 			return (rval);
25608 		}
25609 
25610 		bzero(com, sizeof (struct uscsi_cmd));
25611 		com->uscsi_flags   = USCSI_SILENT;
25612 		com->uscsi_cdb	   = cdb;
25613 		com->uscsi_cdblen  = CDB_GROUP0;
25614 		com->uscsi_timeout = 5;
25615 
25616 		/*
25617 		 * Reissue the last reserve command, this time without request
25618 		 * sense.  Assume that it is just a regular reserve command.
25619 		 */
25620 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
25621 		    SD_PATH_STANDARD);
25622 	}
25623 
25624 	/* Return an error if still getting a reservation conflict. */
25625 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
25626 		rval = EACCES;
25627 	}
25628 
25629 	kmem_free(com, sizeof (*com));
25630 	return (rval);
25631 }
25632 
25633 
25634 #define	SD_NDUMP_RETRIES	12
25635 /*
25636  *	System Crash Dump routine
25637  */
25638 
25639 static int
25640 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
25641 {
25642 	int		instance;
25643 	int		partition;
25644 	int		i;
25645 	int		err;
25646 	struct sd_lun	*un;
25647 	struct scsi_pkt *wr_pktp;
25648 	struct buf	*wr_bp;
25649 	struct buf	wr_buf;
25650 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
25651 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
25652 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
25653 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
25654 	size_t		io_start_offset;
25655 	int		doing_rmw = FALSE;
25656 	int		rval;
25657 	ssize_t		dma_resid;
25658 	daddr_t		oblkno;
25659 	diskaddr_t	nblks = 0;
25660 	diskaddr_t	start_block;
25661 
25662 	instance = SDUNIT(dev);
25663 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
25664 	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
25665 		return (ENXIO);
25666 	}
25667 
25668 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
25669 
25670 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
25671 
25672 	partition = SDPART(dev);
25673 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
25674 
25675 	if (!(NOT_DEVBSIZE(un))) {
25676 		int secmask = 0;
25677 		int blknomask = 0;
25678 
25679 		blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
25680 		secmask = un->un_tgt_blocksize - 1;
25681 
25682 		if (blkno & blknomask) {
25683 			SD_TRACE(SD_LOG_DUMP, un,
25684 			    "sddump: dump start block not modulo %d\n",
25685 			    un->un_tgt_blocksize);
25686 			return (EINVAL);
25687 		}
25688 
25689 		if ((nblk * DEV_BSIZE) & secmask) {
25690 			SD_TRACE(SD_LOG_DUMP, un,
25691 			    "sddump: dump length not modulo %d\n",
25692 			    un->un_tgt_blocksize);
25693 			return (EINVAL);
25694 		}
25695 
25696 	}
25697 
25698 	/* Validate blocks to dump at against partition size. */
25699 
25700 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
25701 	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
25702 
25703 	if (NOT_DEVBSIZE(un)) {
25704 		if ((blkno + nblk) > nblks) {
25705 			SD_TRACE(SD_LOG_DUMP, un,
25706 			    "sddump: dump range larger than partition: "
25707 			    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
25708 			    blkno, nblk, nblks);
25709 			return (EINVAL);
25710 		}
25711 	} else {
25712 		if (((blkno / (un->un_tgt_blocksize / DEV_BSIZE)) +
25713 		    (nblk / (un->un_tgt_blocksize / DEV_BSIZE))) > nblks) {
25714 			SD_TRACE(SD_LOG_DUMP, un,
25715 			    "sddump: dump range larger than partition: "
25716 			    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
25717 			    blkno, nblk, nblks);
25718 			return (EINVAL);
25719 		}
25720 	}
25721 
25722 	mutex_enter(&un->un_pm_mutex);
25723 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
25724 		struct scsi_pkt *start_pktp;
25725 
25726 		mutex_exit(&un->un_pm_mutex);
25727 
25728 		/*
25729 		 * use pm framework to power on HBA 1st
25730 		 */
25731 		(void) pm_raise_power(SD_DEVINFO(un), 0,
25732 		    SD_PM_STATE_ACTIVE(un));
25733 
25734 		/*
25735 		 * Dump no long uses sdpower to power on a device, it's
25736 		 * in-line here so it can be done in polled mode.
25737 		 */
25738 
25739 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
25740 
25741 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
25742 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
25743 
25744 		if (start_pktp == NULL) {
25745 			/* We were not given a SCSI packet, fail. */
25746 			return (EIO);
25747 		}
25748 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
25749 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
25750 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
25751 		start_pktp->pkt_flags = FLAG_NOINTR;
25752 
25753 		mutex_enter(SD_MUTEX(un));
25754 		SD_FILL_SCSI1_LUN(un, start_pktp);
25755 		mutex_exit(SD_MUTEX(un));
25756 		/*
25757 		 * Scsi_poll returns 0 (success) if the command completes and
25758 		 * the status block is STATUS_GOOD.
25759 		 */
25760 		if (sd_scsi_poll(un, start_pktp) != 0) {
25761 			scsi_destroy_pkt(start_pktp);
25762 			return (EIO);
25763 		}
25764 		scsi_destroy_pkt(start_pktp);
25765 		(void) sd_pm_state_change(un, SD_PM_STATE_ACTIVE(un),
25766 		    SD_PM_STATE_CHANGE);
25767 	} else {
25768 		mutex_exit(&un->un_pm_mutex);
25769 	}
25770 
25771 	mutex_enter(SD_MUTEX(un));
25772 	un->un_throttle = 0;
25773 
25774 	/*
25775 	 * The first time through, reset the specific target device.
25776 	 * However, when cpr calls sddump we know that sd is in a
25777 	 * a good state so no bus reset is required.
25778 	 * Clear sense data via Request Sense cmd.
25779 	 * In sddump we don't care about allow_bus_device_reset anymore
25780 	 */
25781 
25782 	if ((un->un_state != SD_STATE_SUSPENDED) &&
25783 	    (un->un_state != SD_STATE_DUMPING)) {
25784 
25785 		New_state(un, SD_STATE_DUMPING);
25786 
25787 		if (un->un_f_is_fibre == FALSE) {
25788 			mutex_exit(SD_MUTEX(un));
25789 			/*
25790 			 * Attempt a bus reset for parallel scsi.
25791 			 *
25792 			 * Note: A bus reset is required because on some host
25793 			 * systems (i.e. E420R) a bus device reset is
25794 			 * insufficient to reset the state of the target.
25795 			 *
25796 			 * Note: Don't issue the reset for fibre-channel,
25797 			 * because this tends to hang the bus (loop) for
25798 			 * too long while everyone is logging out and in
25799 			 * and the deadman timer for dumping will fire
25800 			 * before the dump is complete.
25801 			 */
25802 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
25803 				mutex_enter(SD_MUTEX(un));
25804 				Restore_state(un);
25805 				mutex_exit(SD_MUTEX(un));
25806 				return (EIO);
25807 			}
25808 
25809 			/* Delay to give the device some recovery time. */
25810 			drv_usecwait(10000);
25811 
25812 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
25813 				SD_INFO(SD_LOG_DUMP, un,
25814 				    "sddump: sd_send_polled_RQS failed\n");
25815 			}
25816 			mutex_enter(SD_MUTEX(un));
25817 		}
25818 	}
25819 
25820 	/*
25821 	 * Convert the partition-relative block number to a
25822 	 * disk physical block number.
25823 	 */
25824 	if (NOT_DEVBSIZE(un)) {
25825 		blkno += start_block;
25826 	} else {
25827 		blkno = blkno / (un->un_tgt_blocksize / DEV_BSIZE);
25828 		blkno += start_block;
25829 	}
25830 
25831 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
25832 
25833 
25834 	/*
25835 	 * Check if the device has a non-512 block size.
25836 	 */
25837 	wr_bp = NULL;
25838 	if (NOT_DEVBSIZE(un)) {
25839 		tgt_byte_offset = blkno * un->un_sys_blocksize;
25840 		tgt_byte_count = nblk * un->un_sys_blocksize;
25841 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
25842 		    (tgt_byte_count % un->un_tgt_blocksize)) {
25843 			doing_rmw = TRUE;
25844 			/*
25845 			 * Calculate the block number and number of block
25846 			 * in terms of the media block size.
25847 			 */
25848 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
25849 			tgt_nblk =
25850 			    ((tgt_byte_offset + tgt_byte_count +
25851 			    (un->un_tgt_blocksize - 1)) /
25852 			    un->un_tgt_blocksize) - tgt_blkno;
25853 
25854 			/*
25855 			 * Invoke the routine which is going to do read part
25856 			 * of read-modify-write.
25857 			 * Note that this routine returns a pointer to
25858 			 * a valid bp in wr_bp.
25859 			 */
25860 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
25861 			    &wr_bp);
25862 			if (err) {
25863 				mutex_exit(SD_MUTEX(un));
25864 				return (err);
25865 			}
25866 			/*
25867 			 * Offset is being calculated as -
25868 			 * (original block # * system block size) -
25869 			 * (new block # * target block size)
25870 			 */
25871 			io_start_offset =
25872 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
25873 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
25874 
25875 			ASSERT((io_start_offset >= 0) &&
25876 			    (io_start_offset < un->un_tgt_blocksize));
25877 			/*
25878 			 * Do the modify portion of read modify write.
25879 			 */
25880 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
25881 			    (size_t)nblk * un->un_sys_blocksize);
25882 		} else {
25883 			doing_rmw = FALSE;
25884 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
25885 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
25886 		}
25887 
25888 		/* Convert blkno and nblk to target blocks */
25889 		blkno = tgt_blkno;
25890 		nblk = tgt_nblk;
25891 	} else {
25892 		wr_bp = &wr_buf;
25893 		bzero(wr_bp, sizeof (struct buf));
25894 		wr_bp->b_flags		= B_BUSY;
25895 		wr_bp->b_un.b_addr	= addr;
25896 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
25897 		wr_bp->b_resid		= 0;
25898 	}
25899 
25900 	mutex_exit(SD_MUTEX(un));
25901 
25902 	/*
25903 	 * Obtain a SCSI packet for the write command.
25904 	 * It should be safe to call the allocator here without
25905 	 * worrying about being locked for DVMA mapping because
25906 	 * the address we're passed is already a DVMA mapping
25907 	 *
25908 	 * We are also not going to worry about semaphore ownership
25909 	 * in the dump buffer. Dumping is single threaded at present.
25910 	 */
25911 
25912 	wr_pktp = NULL;
25913 
25914 	dma_resid = wr_bp->b_bcount;
25915 	oblkno = blkno;
25916 
25917 	if (!(NOT_DEVBSIZE(un))) {
25918 		nblk = nblk / (un->un_tgt_blocksize / DEV_BSIZE);
25919 	}
25920 
25921 	while (dma_resid != 0) {
25922 
25923 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
25924 		wr_bp->b_flags &= ~B_ERROR;
25925 
25926 		if (un->un_partial_dma_supported == 1) {
25927 			blkno = oblkno +
25928 			    ((wr_bp->b_bcount - dma_resid) /
25929 			    un->un_tgt_blocksize);
25930 			nblk = dma_resid / un->un_tgt_blocksize;
25931 
25932 			if (wr_pktp) {
25933 				/*
25934 				 * Partial DMA transfers after initial transfer
25935 				 */
25936 				rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
25937 				    blkno, nblk);
25938 			} else {
25939 				/* Initial transfer */
25940 				rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
25941 				    un->un_pkt_flags, NULL_FUNC, NULL,
25942 				    blkno, nblk);
25943 			}
25944 		} else {
25945 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
25946 			    0, NULL_FUNC, NULL, blkno, nblk);
25947 		}
25948 
25949 		if (rval == 0) {
25950 			/* We were given a SCSI packet, continue. */
25951 			break;
25952 		}
25953 
25954 		if (i == 0) {
25955 			if (wr_bp->b_flags & B_ERROR) {
25956 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25957 				    "no resources for dumping; "
25958 				    "error code: 0x%x, retrying",
25959 				    geterror(wr_bp));
25960 			} else {
25961 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
25962 				    "no resources for dumping; retrying");
25963 			}
25964 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
25965 			if (wr_bp->b_flags & B_ERROR) {
25966 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25967 				    "no resources for dumping; error code: "
25968 				    "0x%x, retrying\n", geterror(wr_bp));
25969 			}
25970 		} else {
25971 			if (wr_bp->b_flags & B_ERROR) {
25972 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25973 				    "no resources for dumping; "
25974 				    "error code: 0x%x, retries failed, "
25975 				    "giving up.\n", geterror(wr_bp));
25976 			} else {
25977 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
25978 				    "no resources for dumping; "
25979 				    "retries failed, giving up.\n");
25980 			}
25981 			mutex_enter(SD_MUTEX(un));
25982 			Restore_state(un);
25983 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
25984 				mutex_exit(SD_MUTEX(un));
25985 				scsi_free_consistent_buf(wr_bp);
25986 			} else {
25987 				mutex_exit(SD_MUTEX(un));
25988 			}
25989 			return (EIO);
25990 		}
25991 		drv_usecwait(10000);
25992 	}
25993 
25994 	if (un->un_partial_dma_supported == 1) {
25995 		/*
25996 		 * save the resid from PARTIAL_DMA
25997 		 */
25998 		dma_resid = wr_pktp->pkt_resid;
25999 		if (dma_resid != 0)
26000 			nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26001 		wr_pktp->pkt_resid = 0;
26002 	} else {
26003 		dma_resid = 0;
26004 	}
26005 
26006 	/* SunBug 1222170 */
26007 	wr_pktp->pkt_flags = FLAG_NOINTR;
26008 
26009 	err = EIO;
26010 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26011 
26012 		/*
26013 		 * Scsi_poll returns 0 (success) if the command completes and
26014 		 * the status block is STATUS_GOOD.  We should only check
26015 		 * errors if this condition is not true.  Even then we should
26016 		 * send our own request sense packet only if we have a check
26017 		 * condition and auto request sense has not been performed by
26018 		 * the hba.
26019 		 */
26020 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26021 
26022 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26023 		    (wr_pktp->pkt_resid == 0)) {
26024 			err = SD_SUCCESS;
26025 			break;
26026 		}
26027 
26028 		/*
26029 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26030 		 */
26031 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26032 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26033 			    "Error while dumping state...Device is gone\n");
26034 			break;
26035 		}
26036 
26037 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26038 			SD_INFO(SD_LOG_DUMP, un,
26039 			    "sddump: write failed with CHECK, try # %d\n", i);
26040 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26041 				(void) sd_send_polled_RQS(un);
26042 			}
26043 
26044 			continue;
26045 		}
26046 
26047 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26048 			int reset_retval = 0;
26049 
26050 			SD_INFO(SD_LOG_DUMP, un,
26051 			    "sddump: write failed with BUSY, try # %d\n", i);
26052 
26053 			if (un->un_f_lun_reset_enabled == TRUE) {
26054 				reset_retval = scsi_reset(SD_ADDRESS(un),
26055 				    RESET_LUN);
26056 			}
26057 			if (reset_retval == 0) {
26058 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26059 			}
26060 			(void) sd_send_polled_RQS(un);
26061 
26062 		} else {
26063 			SD_INFO(SD_LOG_DUMP, un,
26064 			    "sddump: write failed with 0x%x, try # %d\n",
26065 			    SD_GET_PKT_STATUS(wr_pktp), i);
26066 			mutex_enter(SD_MUTEX(un));
26067 			sd_reset_target(un, wr_pktp);
26068 			mutex_exit(SD_MUTEX(un));
26069 		}
26070 
26071 		/*
26072 		 * If we are not getting anywhere with lun/target resets,
26073 		 * let's reset the bus.
26074 		 */
26075 		if (i == SD_NDUMP_RETRIES/2) {
26076 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26077 			(void) sd_send_polled_RQS(un);
26078 		}
26079 	}
26080 	}
26081 
26082 	scsi_destroy_pkt(wr_pktp);
26083 	mutex_enter(SD_MUTEX(un));
26084 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26085 		mutex_exit(SD_MUTEX(un));
26086 		scsi_free_consistent_buf(wr_bp);
26087 	} else {
26088 		mutex_exit(SD_MUTEX(un));
26089 	}
26090 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26091 	return (err);
26092 }
26093 
26094 /*
26095  *    Function: sd_scsi_poll()
26096  *
26097  * Description: This is a wrapper for the scsi_poll call.
26098  *
26099  *   Arguments: sd_lun - The unit structure
26100  *              scsi_pkt - The scsi packet being sent to the device.
26101  *
26102  * Return Code: 0 - Command completed successfully with good status
26103  *             -1 - Command failed.  This could indicate a check condition
26104  *                  or other status value requiring recovery action.
26105  *
26106  * NOTE: This code is only called off sddump().
26107  */
26108 
26109 static int
26110 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26111 {
26112 	int status;
26113 
26114 	ASSERT(un != NULL);
26115 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26116 	ASSERT(pktp != NULL);
26117 
26118 	status = SD_SUCCESS;
26119 
26120 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26121 		pktp->pkt_flags |= un->un_tagflags;
26122 		pktp->pkt_flags &= ~FLAG_NODISCON;
26123 	}
26124 
26125 	status = sd_ddi_scsi_poll(pktp);
26126 	/*
26127 	 * Scsi_poll returns 0 (success) if the command completes and the
26128 	 * status block is STATUS_GOOD.  We should only check errors if this
26129 	 * condition is not true.  Even then we should send our own request
26130 	 * sense packet only if we have a check condition and auto
26131 	 * request sense has not been performed by the hba.
26132 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26133 	 */
26134 	if ((status != SD_SUCCESS) &&
26135 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26136 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26137 	    (pktp->pkt_reason != CMD_DEV_GONE))
26138 		(void) sd_send_polled_RQS(un);
26139 
26140 	return (status);
26141 }
26142 
26143 /*
26144  *    Function: sd_send_polled_RQS()
26145  *
26146  * Description: This sends the request sense command to a device.
26147  *
26148  *   Arguments: sd_lun - The unit structure
26149  *
26150  * Return Code: 0 - Command completed successfully with good status
26151  *             -1 - Command failed.
26152  *
26153  */
26154 
26155 static int
26156 sd_send_polled_RQS(struct sd_lun *un)
26157 {
26158 	int	ret_val;
26159 	struct	scsi_pkt	*rqs_pktp;
26160 	struct	buf		*rqs_bp;
26161 
26162 	ASSERT(un != NULL);
26163 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26164 
26165 	ret_val = SD_SUCCESS;
26166 
26167 	rqs_pktp = un->un_rqs_pktp;
26168 	rqs_bp	 = un->un_rqs_bp;
26169 
26170 	mutex_enter(SD_MUTEX(un));
26171 
26172 	if (un->un_sense_isbusy) {
26173 		ret_val = SD_FAILURE;
26174 		mutex_exit(SD_MUTEX(un));
26175 		return (ret_val);
26176 	}
26177 
26178 	/*
26179 	 * If the request sense buffer (and packet) is not in use,
26180 	 * let's set the un_sense_isbusy and send our packet
26181 	 */
26182 	un->un_sense_isbusy 	= 1;
26183 	rqs_pktp->pkt_resid  	= 0;
26184 	rqs_pktp->pkt_reason 	= 0;
26185 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26186 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26187 
26188 	mutex_exit(SD_MUTEX(un));
26189 
26190 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26191 	    " 0x%p\n", rqs_bp->b_un.b_addr);
26192 
26193 	/*
26194 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26195 	 * axle - it has a call into us!
26196 	 */
26197 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26198 		SD_INFO(SD_LOG_COMMON, un,
26199 		    "sd_send_polled_RQS: RQS failed\n");
26200 	}
26201 
26202 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26203 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26204 
26205 	mutex_enter(SD_MUTEX(un));
26206 	un->un_sense_isbusy = 0;
26207 	mutex_exit(SD_MUTEX(un));
26208 
26209 	return (ret_val);
26210 }
26211 
26212 /*
26213  * Defines needed for localized version of the scsi_poll routine.
26214  */
26215 #define	CSEC		10000			/* usecs */
26216 #define	SEC_TO_CSEC	(1000000/CSEC)
26217 
26218 /*
26219  *    Function: sd_ddi_scsi_poll()
26220  *
26221  * Description: Localized version of the scsi_poll routine.  The purpose is to
26222  *		send a scsi_pkt to a device as a polled command.  This version
26223  *		is to ensure more robust handling of transport errors.
26224  *		Specifically this routine cures not ready, coming ready
26225  *		transition for power up and reset of sonoma's.  This can take
26226  *		up to 45 seconds for power-on and 20 seconds for reset of a
26227  * 		sonoma lun.
26228  *
26229  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26230  *
26231  * Return Code: 0 - Command completed successfully with good status
26232  *             -1 - Command failed.
26233  *
26234  * NOTE: This code is almost identical to scsi_poll, however before 6668774 can
26235  * be fixed (removing this code), we need to determine how to handle the
26236  * KEY_UNIT_ATTENTION condition below in conditions not as limited as sddump().
26237  *
26238  * NOTE: This code is only called off sddump().
26239  */
26240 static int
26241 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26242 {
26243 	int			rval = -1;
26244 	int			savef;
26245 	long			savet;
26246 	void			(*savec)();
26247 	int			timeout;
26248 	int			busy_count;
26249 	int			poll_delay;
26250 	int			rc;
26251 	uint8_t			*sensep;
26252 	struct scsi_arq_status	*arqstat;
26253 	extern int		do_polled_io;
26254 
26255 	ASSERT(pkt->pkt_scbp);
26256 
26257 	/*
26258 	 * save old flags..
26259 	 */
26260 	savef = pkt->pkt_flags;
26261 	savec = pkt->pkt_comp;
26262 	savet = pkt->pkt_time;
26263 
26264 	pkt->pkt_flags |= FLAG_NOINTR;
26265 
26266 	/*
26267 	 * XXX there is nothing in the SCSA spec that states that we should not
26268 	 * do a callback for polled cmds; however, removing this will break sd
26269 	 * and probably other target drivers
26270 	 */
26271 	pkt->pkt_comp = NULL;
26272 
26273 	/*
26274 	 * we don't like a polled command without timeout.
26275 	 * 60 seconds seems long enough.
26276 	 */
26277 	if (pkt->pkt_time == 0)
26278 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26279 
26280 	/*
26281 	 * Send polled cmd.
26282 	 *
26283 	 * We do some error recovery for various errors.  Tran_busy,
26284 	 * queue full, and non-dispatched commands are retried every 10 msec.
26285 	 * as they are typically transient failures.  Busy status and Not
26286 	 * Ready are retried every second as this status takes a while to
26287 	 * change.
26288 	 */
26289 	timeout = pkt->pkt_time * SEC_TO_CSEC;
26290 
26291 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26292 		/*
26293 		 * Initialize pkt status variables.
26294 		 */
26295 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26296 
26297 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26298 			if (rc != TRAN_BUSY) {
26299 				/* Transport failed - give up. */
26300 				break;
26301 			} else {
26302 				/* Transport busy - try again. */
26303 				poll_delay = 1 * CSEC;		/* 10 msec. */
26304 			}
26305 		} else {
26306 			/*
26307 			 * Transport accepted - check pkt status.
26308 			 */
26309 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26310 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26311 			    (rc == STATUS_CHECK) &&
26312 			    (pkt->pkt_state & STATE_ARQ_DONE)) {
26313 				arqstat =
26314 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26315 				sensep = (uint8_t *)&arqstat->sts_sensedata;
26316 			} else {
26317 				sensep = NULL;
26318 			}
26319 
26320 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26321 			    (rc == STATUS_GOOD)) {
26322 				/* No error - we're done */
26323 				rval = 0;
26324 				break;
26325 
26326 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26327 				/* Lost connection - give up */
26328 				break;
26329 
26330 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26331 			    (pkt->pkt_state == 0)) {
26332 				/* Pkt not dispatched - try again. */
26333 				poll_delay = 1 * CSEC;		/* 10 msec. */
26334 
26335 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26336 			    (rc == STATUS_QFULL)) {
26337 				/* Queue full - try again. */
26338 				poll_delay = 1 * CSEC;		/* 10 msec. */
26339 
26340 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26341 			    (rc == STATUS_BUSY)) {
26342 				/* Busy - try again. */
26343 				poll_delay = 100 * CSEC;	/* 1 sec. */
26344 				busy_count += (SEC_TO_CSEC - 1);
26345 
26346 			} else if ((sensep != NULL) &&
26347 			    (scsi_sense_key(sensep) == KEY_UNIT_ATTENTION)) {
26348 				/*
26349 				 * Unit Attention - try again.
26350 				 * Pretend it took 1 sec.
26351 				 * NOTE: 'continue' avoids poll_delay
26352 				 */
26353 				busy_count += (SEC_TO_CSEC - 1);
26354 				continue;
26355 
26356 			} else if ((sensep != NULL) &&
26357 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
26358 			    (scsi_sense_asc(sensep) == 0x04) &&
26359 			    (scsi_sense_ascq(sensep) == 0x01)) {
26360 				/*
26361 				 * Not ready -> ready - try again.
26362 				 * 04h/01h: LUN IS IN PROCESS OF BECOMING READY
26363 				 * ...same as STATUS_BUSY
26364 				 */
26365 				poll_delay = 100 * CSEC;	/* 1 sec. */
26366 				busy_count += (SEC_TO_CSEC - 1);
26367 
26368 			} else {
26369 				/* BAD status - give up. */
26370 				break;
26371 			}
26372 		}
26373 
26374 		if (((curthread->t_flag & T_INTR_THREAD) == 0) &&
26375 		    !do_polled_io) {
26376 			delay(drv_usectohz(poll_delay));
26377 		} else {
26378 			/* we busy wait during cpr_dump or interrupt threads */
26379 			drv_usecwait(poll_delay);
26380 		}
26381 	}
26382 
26383 	pkt->pkt_flags = savef;
26384 	pkt->pkt_comp = savec;
26385 	pkt->pkt_time = savet;
26386 
26387 	/* return on error */
26388 	if (rval)
26389 		return (rval);
26390 
26391 	/*
26392 	 * This is not a performance critical code path.
26393 	 *
26394 	 * As an accommodation for scsi_poll callers, to avoid ddi_dma_sync()
26395 	 * issues associated with looking at DMA memory prior to
26396 	 * scsi_pkt_destroy(), we scsi_sync_pkt() prior to return.
26397 	 */
26398 	scsi_sync_pkt(pkt);
26399 	return (0);
26400 }
26401 
26402 
26403 
26404 /*
26405  *    Function: sd_persistent_reservation_in_read_keys
26406  *
26407  * Description: This routine is the driver entry point for handling CD-ROM
26408  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26409  *		by sending the SCSI-3 PRIN commands to the device.
26410  *		Processes the read keys command response by copying the
26411  *		reservation key information into the user provided buffer.
26412  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26413  *
26414  *   Arguments: un   -  Pointer to soft state struct for the target.
26415  *		usrp -	user provided pointer to multihost Persistent In Read
26416  *			Keys structure (mhioc_inkeys_t)
26417  *		flag -	this argument is a pass through to ddi_copyxxx()
26418  *			directly from the mode argument of ioctl().
26419  *
26420  * Return Code: 0   - Success
26421  *		EACCES
26422  *		ENOTSUP
26423  *		errno return code from sd_send_scsi_cmd()
26424  *
26425  *     Context: Can sleep. Does not return until command is completed.
26426  */
26427 
26428 static int
26429 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26430     mhioc_inkeys_t *usrp, int flag)
26431 {
26432 #ifdef _MULTI_DATAMODEL
26433 	struct mhioc_key_list32	li32;
26434 #endif
26435 	sd_prin_readkeys_t	*in;
26436 	mhioc_inkeys_t		*ptr;
26437 	mhioc_key_list_t	li;
26438 	uchar_t			*data_bufp;
26439 	int 			data_len;
26440 	int			rval = 0;
26441 	size_t			copysz;
26442 	sd_ssc_t		*ssc;
26443 
26444 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26445 		return (EINVAL);
26446 	}
26447 	bzero(&li, sizeof (mhioc_key_list_t));
26448 
26449 	ssc = sd_ssc_init(un);
26450 
26451 	/*
26452 	 * Get the listsize from user
26453 	 */
26454 #ifdef _MULTI_DATAMODEL
26455 
26456 	switch (ddi_model_convert_from(flag & FMODELS)) {
26457 	case DDI_MODEL_ILP32:
26458 		copysz = sizeof (struct mhioc_key_list32);
26459 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26460 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26461 			    "sd_persistent_reservation_in_read_keys: "
26462 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26463 			rval = EFAULT;
26464 			goto done;
26465 		}
26466 		li.listsize = li32.listsize;
26467 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26468 		break;
26469 
26470 	case DDI_MODEL_NONE:
26471 		copysz = sizeof (mhioc_key_list_t);
26472 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26473 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26474 			    "sd_persistent_reservation_in_read_keys: "
26475 			    "failed ddi_copyin: mhioc_key_list_t\n");
26476 			rval = EFAULT;
26477 			goto done;
26478 		}
26479 		break;
26480 	}
26481 
26482 #else /* ! _MULTI_DATAMODEL */
26483 	copysz = sizeof (mhioc_key_list_t);
26484 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26485 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26486 		    "sd_persistent_reservation_in_read_keys: "
26487 		    "failed ddi_copyin: mhioc_key_list_t\n");
26488 		rval = EFAULT;
26489 		goto done;
26490 	}
26491 #endif
26492 
26493 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26494 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26495 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26496 
26497 	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS,
26498 	    data_len, data_bufp);
26499 	if (rval != 0) {
26500 		if (rval == EIO)
26501 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
26502 		else
26503 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
26504 		goto done;
26505 	}
26506 	in = (sd_prin_readkeys_t *)data_bufp;
26507 	ptr->generation = BE_32(in->generation);
26508 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
26509 
26510 	/*
26511 	 * Return the min(listsize, listlen) keys
26512 	 */
26513 #ifdef _MULTI_DATAMODEL
26514 
26515 	switch (ddi_model_convert_from(flag & FMODELS)) {
26516 	case DDI_MODEL_ILP32:
26517 		li32.listlen = li.listlen;
26518 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
26519 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26520 			    "sd_persistent_reservation_in_read_keys: "
26521 			    "failed ddi_copyout: mhioc_key_list32_t\n");
26522 			rval = EFAULT;
26523 			goto done;
26524 		}
26525 		break;
26526 
26527 	case DDI_MODEL_NONE:
26528 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26529 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26530 			    "sd_persistent_reservation_in_read_keys: "
26531 			    "failed ddi_copyout: mhioc_key_list_t\n");
26532 			rval = EFAULT;
26533 			goto done;
26534 		}
26535 		break;
26536 	}
26537 
26538 #else /* ! _MULTI_DATAMODEL */
26539 
26540 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
26541 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26542 		    "sd_persistent_reservation_in_read_keys: "
26543 		    "failed ddi_copyout: mhioc_key_list_t\n");
26544 		rval = EFAULT;
26545 		goto done;
26546 	}
26547 
26548 #endif /* _MULTI_DATAMODEL */
26549 
26550 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
26551 	    li.listsize * MHIOC_RESV_KEY_SIZE);
26552 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
26553 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26554 		    "sd_persistent_reservation_in_read_keys: "
26555 		    "failed ddi_copyout: keylist\n");
26556 		rval = EFAULT;
26557 	}
26558 done:
26559 	sd_ssc_fini(ssc);
26560 	kmem_free(data_bufp, data_len);
26561 	return (rval);
26562 }
26563 
26564 
26565 /*
26566  *    Function: sd_persistent_reservation_in_read_resv
26567  *
26568  * Description: This routine is the driver entry point for handling CD-ROM
26569  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
26570  *		by sending the SCSI-3 PRIN commands to the device.
26571  *		Process the read persistent reservations command response by
26572  *		copying the reservation information into the user provided
26573  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
26574  *
26575  *   Arguments: un   -  Pointer to soft state struct for the target.
26576  *		usrp -	user provided pointer to multihost Persistent In Read
26577  *			Keys structure (mhioc_inkeys_t)
26578  *		flag -	this argument is a pass through to ddi_copyxxx()
26579  *			directly from the mode argument of ioctl().
26580  *
26581  * Return Code: 0   - Success
26582  *		EACCES
26583  *		ENOTSUP
26584  *		errno return code from sd_send_scsi_cmd()
26585  *
26586  *     Context: Can sleep. Does not return until command is completed.
26587  */
26588 
26589 static int
26590 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
26591     mhioc_inresvs_t *usrp, int flag)
26592 {
26593 #ifdef _MULTI_DATAMODEL
26594 	struct mhioc_resv_desc_list32 resvlist32;
26595 #endif
26596 	sd_prin_readresv_t	*in;
26597 	mhioc_inresvs_t		*ptr;
26598 	sd_readresv_desc_t	*readresv_ptr;
26599 	mhioc_resv_desc_list_t	resvlist;
26600 	mhioc_resv_desc_t 	resvdesc;
26601 	uchar_t			*data_bufp = NULL;
26602 	int 			data_len;
26603 	int			rval = 0;
26604 	int			i;
26605 	size_t			copysz;
26606 	mhioc_resv_desc_t	*bufp;
26607 	sd_ssc_t		*ssc;
26608 
26609 	if ((ptr = usrp) == NULL) {
26610 		return (EINVAL);
26611 	}
26612 
26613 	ssc = sd_ssc_init(un);
26614 
26615 	/*
26616 	 * Get the listsize from user
26617 	 */
26618 #ifdef _MULTI_DATAMODEL
26619 	switch (ddi_model_convert_from(flag & FMODELS)) {
26620 	case DDI_MODEL_ILP32:
26621 		copysz = sizeof (struct mhioc_resv_desc_list32);
26622 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
26623 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26624 			    "sd_persistent_reservation_in_read_resv: "
26625 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26626 			rval = EFAULT;
26627 			goto done;
26628 		}
26629 		resvlist.listsize = resvlist32.listsize;
26630 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
26631 		break;
26632 
26633 	case DDI_MODEL_NONE:
26634 		copysz = sizeof (mhioc_resv_desc_list_t);
26635 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26636 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26637 			    "sd_persistent_reservation_in_read_resv: "
26638 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26639 			rval = EFAULT;
26640 			goto done;
26641 		}
26642 		break;
26643 	}
26644 #else /* ! _MULTI_DATAMODEL */
26645 	copysz = sizeof (mhioc_resv_desc_list_t);
26646 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
26647 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26648 		    "sd_persistent_reservation_in_read_resv: "
26649 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
26650 		rval = EFAULT;
26651 		goto done;
26652 	}
26653 #endif /* ! _MULTI_DATAMODEL */
26654 
26655 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
26656 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
26657 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26658 
26659 	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_RESV,
26660 	    data_len, data_bufp);
26661 	if (rval != 0) {
26662 		if (rval == EIO)
26663 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
26664 		else
26665 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
26666 		goto done;
26667 	}
26668 	in = (sd_prin_readresv_t *)data_bufp;
26669 	ptr->generation = BE_32(in->generation);
26670 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
26671 
26672 	/*
26673 	 * Return the min(listsize, listlen( keys
26674 	 */
26675 #ifdef _MULTI_DATAMODEL
26676 
26677 	switch (ddi_model_convert_from(flag & FMODELS)) {
26678 	case DDI_MODEL_ILP32:
26679 		resvlist32.listlen = resvlist.listlen;
26680 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
26681 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26682 			    "sd_persistent_reservation_in_read_resv: "
26683 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26684 			rval = EFAULT;
26685 			goto done;
26686 		}
26687 		break;
26688 
26689 	case DDI_MODEL_NONE:
26690 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26691 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26692 			    "sd_persistent_reservation_in_read_resv: "
26693 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26694 			rval = EFAULT;
26695 			goto done;
26696 		}
26697 		break;
26698 	}
26699 
26700 #else /* ! _MULTI_DATAMODEL */
26701 
26702 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
26703 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26704 		    "sd_persistent_reservation_in_read_resv: "
26705 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
26706 		rval = EFAULT;
26707 		goto done;
26708 	}
26709 
26710 #endif /* ! _MULTI_DATAMODEL */
26711 
26712 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
26713 	bufp = resvlist.list;
26714 	copysz = sizeof (mhioc_resv_desc_t);
26715 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
26716 	    i++, readresv_ptr++, bufp++) {
26717 
26718 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
26719 		    MHIOC_RESV_KEY_SIZE);
26720 		resvdesc.type  = readresv_ptr->type;
26721 		resvdesc.scope = readresv_ptr->scope;
26722 		resvdesc.scope_specific_addr =
26723 		    BE_32(readresv_ptr->scope_specific_addr);
26724 
26725 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
26726 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26727 			    "sd_persistent_reservation_in_read_resv: "
26728 			    "failed ddi_copyout: resvlist\n");
26729 			rval = EFAULT;
26730 			goto done;
26731 		}
26732 	}
26733 done:
26734 	sd_ssc_fini(ssc);
26735 	/* only if data_bufp is allocated, we need to free it */
26736 	if (data_bufp) {
26737 		kmem_free(data_bufp, data_len);
26738 	}
26739 	return (rval);
26740 }
26741 
26742 
26743 /*
26744  *    Function: sr_change_blkmode()
26745  *
26746  * Description: This routine is the driver entry point for handling CD-ROM
26747  *		block mode ioctl requests. Support for returning and changing
26748  *		the current block size in use by the device is implemented. The
26749  *		LBA size is changed via a MODE SELECT Block Descriptor.
26750  *
26751  *		This routine issues a mode sense with an allocation length of
26752  *		12 bytes for the mode page header and a single block descriptor.
26753  *
26754  *   Arguments: dev - the device 'dev_t'
26755  *		cmd - the request type; one of CDROMGBLKMODE (get) or
26756  *		      CDROMSBLKMODE (set)
26757  *		data - current block size or requested block size
26758  *		flag - this argument is a pass through to ddi_copyxxx() directly
26759  *		       from the mode argument of ioctl().
26760  *
26761  * Return Code: the code returned by sd_send_scsi_cmd()
26762  *		EINVAL if invalid arguments are provided
26763  *		EFAULT if ddi_copyxxx() fails
26764  *		ENXIO if fail ddi_get_soft_state
26765  *		EIO if invalid mode sense block descriptor length
26766  *
26767  */
26768 
26769 static int
26770 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
26771 {
26772 	struct sd_lun			*un = NULL;
26773 	struct mode_header		*sense_mhp, *select_mhp;
26774 	struct block_descriptor		*sense_desc, *select_desc;
26775 	int				current_bsize;
26776 	int				rval = EINVAL;
26777 	uchar_t				*sense = NULL;
26778 	uchar_t				*select = NULL;
26779 	sd_ssc_t			*ssc;
26780 
26781 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
26782 
26783 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26784 		return (ENXIO);
26785 	}
26786 
26787 	/*
26788 	 * The block length is changed via the Mode Select block descriptor, the
26789 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
26790 	 * required as part of this routine. Therefore the mode sense allocation
26791 	 * length is specified to be the length of a mode page header and a
26792 	 * block descriptor.
26793 	 */
26794 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
26795 
26796 	ssc = sd_ssc_init(un);
26797 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
26798 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
26799 	sd_ssc_fini(ssc);
26800 	if (rval != 0) {
26801 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26802 		    "sr_change_blkmode: Mode Sense Failed\n");
26803 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26804 		return (rval);
26805 	}
26806 
26807 	/* Check the block descriptor len to handle only 1 block descriptor */
26808 	sense_mhp = (struct mode_header *)sense;
26809 	if ((sense_mhp->bdesc_length == 0) ||
26810 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
26811 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26812 		    "sr_change_blkmode: Mode Sense returned invalid block"
26813 		    " descriptor length\n");
26814 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26815 		return (EIO);
26816 	}
26817 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
26818 	current_bsize = ((sense_desc->blksize_hi << 16) |
26819 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
26820 
26821 	/* Process command */
26822 	switch (cmd) {
26823 	case CDROMGBLKMODE:
26824 		/* Return the block size obtained during the mode sense */
26825 		if (ddi_copyout(&current_bsize, (void *)data,
26826 		    sizeof (int), flag) != 0)
26827 			rval = EFAULT;
26828 		break;
26829 	case CDROMSBLKMODE:
26830 		/* Validate the requested block size */
26831 		switch (data) {
26832 		case CDROM_BLK_512:
26833 		case CDROM_BLK_1024:
26834 		case CDROM_BLK_2048:
26835 		case CDROM_BLK_2056:
26836 		case CDROM_BLK_2336:
26837 		case CDROM_BLK_2340:
26838 		case CDROM_BLK_2352:
26839 		case CDROM_BLK_2368:
26840 		case CDROM_BLK_2448:
26841 		case CDROM_BLK_2646:
26842 		case CDROM_BLK_2647:
26843 			break;
26844 		default:
26845 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26846 			    "sr_change_blkmode: "
26847 			    "Block Size '%ld' Not Supported\n", data);
26848 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26849 			return (EINVAL);
26850 		}
26851 
26852 		/*
26853 		 * The current block size matches the requested block size so
26854 		 * there is no need to send the mode select to change the size
26855 		 */
26856 		if (current_bsize == data) {
26857 			break;
26858 		}
26859 
26860 		/* Build the select data for the requested block size */
26861 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
26862 		select_mhp = (struct mode_header *)select;
26863 		select_desc =
26864 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
26865 		/*
26866 		 * The LBA size is changed via the block descriptor, so the
26867 		 * descriptor is built according to the user data
26868 		 */
26869 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
26870 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
26871 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
26872 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
26873 
26874 		/* Send the mode select for the requested block size */
26875 		ssc = sd_ssc_init(un);
26876 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
26877 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
26878 		    SD_PATH_STANDARD);
26879 		sd_ssc_fini(ssc);
26880 		if (rval != 0) {
26881 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26882 			    "sr_change_blkmode: Mode Select Failed\n");
26883 			/*
26884 			 * The mode select failed for the requested block size,
26885 			 * so reset the data for the original block size and
26886 			 * send it to the target. The error is indicated by the
26887 			 * return value for the failed mode select.
26888 			 */
26889 			select_desc->blksize_hi  = sense_desc->blksize_hi;
26890 			select_desc->blksize_mid = sense_desc->blksize_mid;
26891 			select_desc->blksize_lo  = sense_desc->blksize_lo;
26892 			ssc = sd_ssc_init(un);
26893 			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
26894 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
26895 			    SD_PATH_STANDARD);
26896 			sd_ssc_fini(ssc);
26897 		} else {
26898 			ASSERT(!mutex_owned(SD_MUTEX(un)));
26899 			mutex_enter(SD_MUTEX(un));
26900 			sd_update_block_info(un, (uint32_t)data, 0);
26901 			mutex_exit(SD_MUTEX(un));
26902 		}
26903 		break;
26904 	default:
26905 		/* should not reach here, but check anyway */
26906 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26907 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
26908 		rval = EINVAL;
26909 		break;
26910 	}
26911 
26912 	if (select) {
26913 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
26914 	}
26915 	if (sense) {
26916 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
26917 	}
26918 	return (rval);
26919 }
26920 
26921 
26922 /*
26923  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
26924  * implement driver support for getting and setting the CD speed. The command
26925  * set used will be based on the device type. If the device has not been
26926  * identified as MMC the Toshiba vendor specific mode page will be used. If
26927  * the device is MMC but does not support the Real Time Streaming feature
26928  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
26929  * be used to read the speed.
26930  */
26931 
26932 /*
26933  *    Function: sr_change_speed()
26934  *
26935  * Description: This routine is the driver entry point for handling CD-ROM
26936  *		drive speed ioctl requests for devices supporting the Toshiba
26937  *		vendor specific drive speed mode page. Support for returning
26938  *		and changing the current drive speed in use by the device is
26939  *		implemented.
26940  *
26941  *   Arguments: dev - the device 'dev_t'
26942  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
26943  *		      CDROMSDRVSPEED (set)
26944  *		data - current drive speed or requested drive speed
26945  *		flag - this argument is a pass through to ddi_copyxxx() directly
26946  *		       from the mode argument of ioctl().
26947  *
26948  * Return Code: the code returned by sd_send_scsi_cmd()
26949  *		EINVAL if invalid arguments are provided
26950  *		EFAULT if ddi_copyxxx() fails
26951  *		ENXIO if fail ddi_get_soft_state
26952  *		EIO if invalid mode sense block descriptor length
26953  */
26954 
26955 static int
26956 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
26957 {
26958 	struct sd_lun			*un = NULL;
26959 	struct mode_header		*sense_mhp, *select_mhp;
26960 	struct mode_speed		*sense_page, *select_page;
26961 	int				current_speed;
26962 	int				rval = EINVAL;
26963 	int				bd_len;
26964 	uchar_t				*sense = NULL;
26965 	uchar_t				*select = NULL;
26966 	sd_ssc_t			*ssc;
26967 
26968 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
26969 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26970 		return (ENXIO);
26971 	}
26972 
26973 	/*
26974 	 * Note: The drive speed is being modified here according to a Toshiba
26975 	 * vendor specific mode page (0x31).
26976 	 */
26977 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
26978 
26979 	ssc = sd_ssc_init(un);
26980 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
26981 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
26982 	    SD_PATH_STANDARD);
26983 	sd_ssc_fini(ssc);
26984 	if (rval != 0) {
26985 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26986 		    "sr_change_speed: Mode Sense Failed\n");
26987 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26988 		return (rval);
26989 	}
26990 	sense_mhp  = (struct mode_header *)sense;
26991 
26992 	/* Check the block descriptor len to handle only 1 block descriptor */
26993 	bd_len = sense_mhp->bdesc_length;
26994 	if (bd_len > MODE_BLK_DESC_LENGTH) {
26995 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26996 		    "sr_change_speed: Mode Sense returned invalid block "
26997 		    "descriptor length\n");
26998 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
26999 		return (EIO);
27000 	}
27001 
27002 	sense_page = (struct mode_speed *)
27003 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27004 	current_speed = sense_page->speed;
27005 
27006 	/* Process command */
27007 	switch (cmd) {
27008 	case CDROMGDRVSPEED:
27009 		/* Return the drive speed obtained during the mode sense */
27010 		if (current_speed == 0x2) {
27011 			current_speed = CDROM_TWELVE_SPEED;
27012 		}
27013 		if (ddi_copyout(&current_speed, (void *)data,
27014 		    sizeof (int), flag) != 0) {
27015 			rval = EFAULT;
27016 		}
27017 		break;
27018 	case CDROMSDRVSPEED:
27019 		/* Validate the requested drive speed */
27020 		switch ((uchar_t)data) {
27021 		case CDROM_TWELVE_SPEED:
27022 			data = 0x2;
27023 			/*FALLTHROUGH*/
27024 		case CDROM_NORMAL_SPEED:
27025 		case CDROM_DOUBLE_SPEED:
27026 		case CDROM_QUAD_SPEED:
27027 		case CDROM_MAXIMUM_SPEED:
27028 			break;
27029 		default:
27030 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27031 			    "sr_change_speed: "
27032 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27033 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27034 			return (EINVAL);
27035 		}
27036 
27037 		/*
27038 		 * The current drive speed matches the requested drive speed so
27039 		 * there is no need to send the mode select to change the speed
27040 		 */
27041 		if (current_speed == data) {
27042 			break;
27043 		}
27044 
27045 		/* Build the select data for the requested drive speed */
27046 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27047 		select_mhp = (struct mode_header *)select;
27048 		select_mhp->bdesc_length = 0;
27049 		select_page =
27050 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27051 		select_page =
27052 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27053 		select_page->mode_page.code = CDROM_MODE_SPEED;
27054 		select_page->mode_page.length = 2;
27055 		select_page->speed = (uchar_t)data;
27056 
27057 		/* Send the mode select for the requested block size */
27058 		ssc = sd_ssc_init(un);
27059 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
27060 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27061 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27062 		sd_ssc_fini(ssc);
27063 		if (rval != 0) {
27064 			/*
27065 			 * The mode select failed for the requested drive speed,
27066 			 * so reset the data for the original drive speed and
27067 			 * send it to the target. The error is indicated by the
27068 			 * return value for the failed mode select.
27069 			 */
27070 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27071 			    "sr_drive_speed: Mode Select Failed\n");
27072 			select_page->speed = sense_page->speed;
27073 			ssc = sd_ssc_init(un);
27074 			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
27075 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27076 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27077 			sd_ssc_fini(ssc);
27078 		}
27079 		break;
27080 	default:
27081 		/* should not reach here, but check anyway */
27082 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27083 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27084 		rval = EINVAL;
27085 		break;
27086 	}
27087 
27088 	if (select) {
27089 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27090 	}
27091 	if (sense) {
27092 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27093 	}
27094 
27095 	return (rval);
27096 }
27097 
27098 
27099 /*
27100  *    Function: sr_atapi_change_speed()
27101  *
27102  * Description: This routine is the driver entry point for handling CD-ROM
27103  *		drive speed ioctl requests for MMC devices that do not support
27104  *		the Real Time Streaming feature (0x107).
27105  *
27106  *		Note: This routine will use the SET SPEED command which may not
27107  *		be supported by all devices.
27108  *
27109  *   Arguments: dev- the device 'dev_t'
27110  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27111  *		     CDROMSDRVSPEED (set)
27112  *		data- current drive speed or requested drive speed
27113  *		flag- this argument is a pass through to ddi_copyxxx() directly
27114  *		      from the mode argument of ioctl().
27115  *
27116  * Return Code: the code returned by sd_send_scsi_cmd()
27117  *		EINVAL if invalid arguments are provided
27118  *		EFAULT if ddi_copyxxx() fails
27119  *		ENXIO if fail ddi_get_soft_state
27120  *		EIO if invalid mode sense block descriptor length
27121  */
27122 
27123 static int
27124 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27125 {
27126 	struct sd_lun			*un;
27127 	struct uscsi_cmd		*com = NULL;
27128 	struct mode_header_grp2		*sense_mhp;
27129 	uchar_t				*sense_page;
27130 	uchar_t				*sense = NULL;
27131 	char				cdb[CDB_GROUP5];
27132 	int				bd_len;
27133 	int				current_speed = 0;
27134 	int				max_speed = 0;
27135 	int				rval;
27136 	sd_ssc_t			*ssc;
27137 
27138 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27139 
27140 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27141 		return (ENXIO);
27142 	}
27143 
27144 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27145 
27146 	ssc = sd_ssc_init(un);
27147 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
27148 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27149 	    SD_PATH_STANDARD);
27150 	sd_ssc_fini(ssc);
27151 	if (rval != 0) {
27152 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27153 		    "sr_atapi_change_speed: Mode Sense Failed\n");
27154 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27155 		return (rval);
27156 	}
27157 
27158 	/* Check the block descriptor len to handle only 1 block descriptor */
27159 	sense_mhp = (struct mode_header_grp2 *)sense;
27160 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27161 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27162 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27163 		    "sr_atapi_change_speed: Mode Sense returned invalid "
27164 		    "block descriptor length\n");
27165 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27166 		return (EIO);
27167 	}
27168 
27169 	/* Calculate the current and maximum drive speeds */
27170 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27171 	current_speed = (sense_page[14] << 8) | sense_page[15];
27172 	max_speed = (sense_page[8] << 8) | sense_page[9];
27173 
27174 	/* Process the command */
27175 	switch (cmd) {
27176 	case CDROMGDRVSPEED:
27177 		current_speed /= SD_SPEED_1X;
27178 		if (ddi_copyout(&current_speed, (void *)data,
27179 		    sizeof (int), flag) != 0)
27180 			rval = EFAULT;
27181 		break;
27182 	case CDROMSDRVSPEED:
27183 		/* Convert the speed code to KB/sec */
27184 		switch ((uchar_t)data) {
27185 		case CDROM_NORMAL_SPEED:
27186 			current_speed = SD_SPEED_1X;
27187 			break;
27188 		case CDROM_DOUBLE_SPEED:
27189 			current_speed = 2 * SD_SPEED_1X;
27190 			break;
27191 		case CDROM_QUAD_SPEED:
27192 			current_speed = 4 * SD_SPEED_1X;
27193 			break;
27194 		case CDROM_TWELVE_SPEED:
27195 			current_speed = 12 * SD_SPEED_1X;
27196 			break;
27197 		case CDROM_MAXIMUM_SPEED:
27198 			current_speed = 0xffff;
27199 			break;
27200 		default:
27201 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27202 			    "sr_atapi_change_speed: invalid drive speed %d\n",
27203 			    (uchar_t)data);
27204 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27205 			return (EINVAL);
27206 		}
27207 
27208 		/* Check the request against the drive's max speed. */
27209 		if (current_speed != 0xffff) {
27210 			if (current_speed > max_speed) {
27211 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27212 				return (EINVAL);
27213 			}
27214 		}
27215 
27216 		/*
27217 		 * Build and send the SET SPEED command
27218 		 *
27219 		 * Note: The SET SPEED (0xBB) command used in this routine is
27220 		 * obsolete per the SCSI MMC spec but still supported in the
27221 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27222 		 * therefore the command is still implemented in this routine.
27223 		 */
27224 		bzero(cdb, sizeof (cdb));
27225 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27226 		cdb[2] = (uchar_t)(current_speed >> 8);
27227 		cdb[3] = (uchar_t)current_speed;
27228 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27229 		com->uscsi_cdb	   = (caddr_t)cdb;
27230 		com->uscsi_cdblen  = CDB_GROUP5;
27231 		com->uscsi_bufaddr = NULL;
27232 		com->uscsi_buflen  = 0;
27233 		com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT;
27234 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
27235 		break;
27236 	default:
27237 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27238 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27239 		rval = EINVAL;
27240 	}
27241 
27242 	if (sense) {
27243 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27244 	}
27245 	if (com) {
27246 		kmem_free(com, sizeof (*com));
27247 	}
27248 	return (rval);
27249 }
27250 
27251 
27252 /*
27253  *    Function: sr_pause_resume()
27254  *
27255  * Description: This routine is the driver entry point for handling CD-ROM
27256  *		pause/resume ioctl requests. This only affects the audio play
27257  *		operation.
27258  *
27259  *   Arguments: dev - the device 'dev_t'
27260  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27261  *		      for setting the resume bit of the cdb.
27262  *
27263  * Return Code: the code returned by sd_send_scsi_cmd()
27264  *		EINVAL if invalid mode specified
27265  *
27266  */
27267 
27268 static int
27269 sr_pause_resume(dev_t dev, int cmd)
27270 {
27271 	struct sd_lun		*un;
27272 	struct uscsi_cmd	*com;
27273 	char			cdb[CDB_GROUP1];
27274 	int			rval;
27275 
27276 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27277 		return (ENXIO);
27278 	}
27279 
27280 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27281 	bzero(cdb, CDB_GROUP1);
27282 	cdb[0] = SCMD_PAUSE_RESUME;
27283 	switch (cmd) {
27284 	case CDROMRESUME:
27285 		cdb[8] = 1;
27286 		break;
27287 	case CDROMPAUSE:
27288 		cdb[8] = 0;
27289 		break;
27290 	default:
27291 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27292 		    " Command '%x' Not Supported\n", cmd);
27293 		rval = EINVAL;
27294 		goto done;
27295 	}
27296 
27297 	com->uscsi_cdb    = cdb;
27298 	com->uscsi_cdblen = CDB_GROUP1;
27299 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27300 
27301 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27302 	    SD_PATH_STANDARD);
27303 
27304 done:
27305 	kmem_free(com, sizeof (*com));
27306 	return (rval);
27307 }
27308 
27309 
27310 /*
27311  *    Function: sr_play_msf()
27312  *
27313  * Description: This routine is the driver entry point for handling CD-ROM
27314  *		ioctl requests to output the audio signals at the specified
27315  *		starting address and continue the audio play until the specified
27316  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27317  *		Frame (MSF) format.
27318  *
27319  *   Arguments: dev	- the device 'dev_t'
27320  *		data	- pointer to user provided audio msf structure,
27321  *		          specifying start/end addresses.
27322  *		flag	- this argument is a pass through to ddi_copyxxx()
27323  *		          directly from the mode argument of ioctl().
27324  *
27325  * Return Code: the code returned by sd_send_scsi_cmd()
27326  *		EFAULT if ddi_copyxxx() fails
27327  *		ENXIO if fail ddi_get_soft_state
27328  *		EINVAL if data pointer is NULL
27329  */
27330 
27331 static int
27332 sr_play_msf(dev_t dev, caddr_t data, int flag)
27333 {
27334 	struct sd_lun		*un;
27335 	struct uscsi_cmd	*com;
27336 	struct cdrom_msf	msf_struct;
27337 	struct cdrom_msf	*msf = &msf_struct;
27338 	char			cdb[CDB_GROUP1];
27339 	int			rval;
27340 
27341 	if (data == NULL) {
27342 		return (EINVAL);
27343 	}
27344 
27345 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27346 		return (ENXIO);
27347 	}
27348 
27349 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27350 		return (EFAULT);
27351 	}
27352 
27353 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27354 	bzero(cdb, CDB_GROUP1);
27355 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27356 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27357 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27358 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27359 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27360 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27361 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27362 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27363 	} else {
27364 		cdb[3] = msf->cdmsf_min0;
27365 		cdb[4] = msf->cdmsf_sec0;
27366 		cdb[5] = msf->cdmsf_frame0;
27367 		cdb[6] = msf->cdmsf_min1;
27368 		cdb[7] = msf->cdmsf_sec1;
27369 		cdb[8] = msf->cdmsf_frame1;
27370 	}
27371 	com->uscsi_cdb    = cdb;
27372 	com->uscsi_cdblen = CDB_GROUP1;
27373 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27374 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27375 	    SD_PATH_STANDARD);
27376 	kmem_free(com, sizeof (*com));
27377 	return (rval);
27378 }
27379 
27380 
27381 /*
27382  *    Function: sr_play_trkind()
27383  *
27384  * Description: This routine is the driver entry point for handling CD-ROM
27385  *		ioctl requests to output the audio signals at the specified
27386  *		starting address and continue the audio play until the specified
27387  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27388  *		format.
27389  *
27390  *   Arguments: dev	- the device 'dev_t'
27391  *		data	- pointer to user provided audio track/index structure,
27392  *		          specifying start/end addresses.
27393  *		flag	- this argument is a pass through to ddi_copyxxx()
27394  *		          directly from the mode argument of ioctl().
27395  *
27396  * Return Code: the code returned by sd_send_scsi_cmd()
27397  *		EFAULT if ddi_copyxxx() fails
27398  *		ENXIO if fail ddi_get_soft_state
27399  *		EINVAL if data pointer is NULL
27400  */
27401 
27402 static int
27403 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27404 {
27405 	struct cdrom_ti		ti_struct;
27406 	struct cdrom_ti		*ti = &ti_struct;
27407 	struct uscsi_cmd	*com = NULL;
27408 	char			cdb[CDB_GROUP1];
27409 	int			rval;
27410 
27411 	if (data == NULL) {
27412 		return (EINVAL);
27413 	}
27414 
27415 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27416 		return (EFAULT);
27417 	}
27418 
27419 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27420 	bzero(cdb, CDB_GROUP1);
27421 	cdb[0] = SCMD_PLAYAUDIO_TI;
27422 	cdb[4] = ti->cdti_trk0;
27423 	cdb[5] = ti->cdti_ind0;
27424 	cdb[7] = ti->cdti_trk1;
27425 	cdb[8] = ti->cdti_ind1;
27426 	com->uscsi_cdb    = cdb;
27427 	com->uscsi_cdblen = CDB_GROUP1;
27428 	com->uscsi_flags  = USCSI_DIAGNOSE|USCSI_SILENT;
27429 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27430 	    SD_PATH_STANDARD);
27431 	kmem_free(com, sizeof (*com));
27432 	return (rval);
27433 }
27434 
27435 
27436 /*
27437  *    Function: sr_read_all_subcodes()
27438  *
27439  * Description: This routine is the driver entry point for handling CD-ROM
27440  *		ioctl requests to return raw subcode data while the target is
27441  *		playing audio (CDROMSUBCODE).
27442  *
27443  *   Arguments: dev	- the device 'dev_t'
27444  *		data	- pointer to user provided cdrom subcode structure,
27445  *		          specifying the transfer length and address.
27446  *		flag	- this argument is a pass through to ddi_copyxxx()
27447  *		          directly from the mode argument of ioctl().
27448  *
27449  * Return Code: the code returned by sd_send_scsi_cmd()
27450  *		EFAULT if ddi_copyxxx() fails
27451  *		ENXIO if fail ddi_get_soft_state
27452  *		EINVAL if data pointer is NULL
27453  */
27454 
27455 static int
27456 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27457 {
27458 	struct sd_lun		*un = NULL;
27459 	struct uscsi_cmd	*com = NULL;
27460 	struct cdrom_subcode	*subcode = NULL;
27461 	int			rval;
27462 	size_t			buflen;
27463 	char			cdb[CDB_GROUP5];
27464 
27465 #ifdef _MULTI_DATAMODEL
27466 	/* To support ILP32 applications in an LP64 world */
27467 	struct cdrom_subcode32		cdrom_subcode32;
27468 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27469 #endif
27470 	if (data == NULL) {
27471 		return (EINVAL);
27472 	}
27473 
27474 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27475 		return (ENXIO);
27476 	}
27477 
27478 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27479 
27480 #ifdef _MULTI_DATAMODEL
27481 	switch (ddi_model_convert_from(flag & FMODELS)) {
27482 	case DDI_MODEL_ILP32:
27483 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27484 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27485 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27486 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27487 			return (EFAULT);
27488 		}
27489 		/* Convert the ILP32 uscsi data from the application to LP64 */
27490 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27491 		break;
27492 	case DDI_MODEL_NONE:
27493 		if (ddi_copyin(data, subcode,
27494 		    sizeof (struct cdrom_subcode), flag)) {
27495 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27496 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27497 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27498 			return (EFAULT);
27499 		}
27500 		break;
27501 	}
27502 #else /* ! _MULTI_DATAMODEL */
27503 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27504 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27505 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27506 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27507 		return (EFAULT);
27508 	}
27509 #endif /* _MULTI_DATAMODEL */
27510 
27511 	/*
27512 	 * Since MMC-2 expects max 3 bytes for length, check if the
27513 	 * length input is greater than 3 bytes
27514 	 */
27515 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
27516 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27517 		    "sr_read_all_subcodes: "
27518 		    "cdrom transfer length too large: %d (limit %d)\n",
27519 		    subcode->cdsc_length, 0xFFFFFF);
27520 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27521 		return (EINVAL);
27522 	}
27523 
27524 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
27525 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27526 	bzero(cdb, CDB_GROUP5);
27527 
27528 	if (un->un_f_mmc_cap == TRUE) {
27529 		cdb[0] = (char)SCMD_READ_CD;
27530 		cdb[2] = (char)0xff;
27531 		cdb[3] = (char)0xff;
27532 		cdb[4] = (char)0xff;
27533 		cdb[5] = (char)0xff;
27534 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27535 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27536 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
27537 		cdb[10] = 1;
27538 	} else {
27539 		/*
27540 		 * Note: A vendor specific command (0xDF) is being used her to
27541 		 * request a read of all subcodes.
27542 		 */
27543 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
27544 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
27545 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
27546 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
27547 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
27548 	}
27549 	com->uscsi_cdb	   = cdb;
27550 	com->uscsi_cdblen  = CDB_GROUP5;
27551 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
27552 	com->uscsi_buflen  = buflen;
27553 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27554 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
27555 	    SD_PATH_STANDARD);
27556 	kmem_free(subcode, sizeof (struct cdrom_subcode));
27557 	kmem_free(com, sizeof (*com));
27558 	return (rval);
27559 }
27560 
27561 
27562 /*
27563  *    Function: sr_read_subchannel()
27564  *
27565  * Description: This routine is the driver entry point for handling CD-ROM
27566  *		ioctl requests to return the Q sub-channel data of the CD
27567  *		current position block. (CDROMSUBCHNL) The data includes the
27568  *		track number, index number, absolute CD-ROM address (LBA or MSF
27569  *		format per the user) , track relative CD-ROM address (LBA or MSF
27570  *		format per the user), control data and audio status.
27571  *
27572  *   Arguments: dev	- the device 'dev_t'
27573  *		data	- pointer to user provided cdrom sub-channel structure
27574  *		flag	- this argument is a pass through to ddi_copyxxx()
27575  *		          directly from the mode argument of ioctl().
27576  *
27577  * Return Code: the code returned by sd_send_scsi_cmd()
27578  *		EFAULT if ddi_copyxxx() fails
27579  *		ENXIO if fail ddi_get_soft_state
27580  *		EINVAL if data pointer is NULL
27581  */
27582 
27583 static int
27584 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
27585 {
27586 	struct sd_lun		*un;
27587 	struct uscsi_cmd	*com;
27588 	struct cdrom_subchnl	subchanel;
27589 	struct cdrom_subchnl	*subchnl = &subchanel;
27590 	char			cdb[CDB_GROUP1];
27591 	caddr_t			buffer;
27592 	int			rval;
27593 
27594 	if (data == NULL) {
27595 		return (EINVAL);
27596 	}
27597 
27598 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27599 	    (un->un_state == SD_STATE_OFFLINE)) {
27600 		return (ENXIO);
27601 	}
27602 
27603 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
27604 		return (EFAULT);
27605 	}
27606 
27607 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
27608 	bzero(cdb, CDB_GROUP1);
27609 	cdb[0] = SCMD_READ_SUBCHANNEL;
27610 	/* Set the MSF bit based on the user requested address format */
27611 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
27612 	/*
27613 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
27614 	 * returned
27615 	 */
27616 	cdb[2] = 0x40;
27617 	/*
27618 	 * Set byte 3 to specify the return data format. A value of 0x01
27619 	 * indicates that the CD-ROM current position should be returned.
27620 	 */
27621 	cdb[3] = 0x01;
27622 	cdb[8] = 0x10;
27623 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27624 	com->uscsi_cdb	   = cdb;
27625 	com->uscsi_cdblen  = CDB_GROUP1;
27626 	com->uscsi_bufaddr = buffer;
27627 	com->uscsi_buflen  = 16;
27628 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27629 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27630 	    SD_PATH_STANDARD);
27631 	if (rval != 0) {
27632 		kmem_free(buffer, 16);
27633 		kmem_free(com, sizeof (*com));
27634 		return (rval);
27635 	}
27636 
27637 	/* Process the returned Q sub-channel data */
27638 	subchnl->cdsc_audiostatus = buffer[1];
27639 	subchnl->cdsc_adr	= (buffer[5] & 0xF0);
27640 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
27641 	subchnl->cdsc_trk	= buffer[6];
27642 	subchnl->cdsc_ind	= buffer[7];
27643 	if (subchnl->cdsc_format & CDROM_LBA) {
27644 		subchnl->cdsc_absaddr.lba =
27645 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27646 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27647 		subchnl->cdsc_reladdr.lba =
27648 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
27649 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
27650 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
27651 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
27652 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
27653 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
27654 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
27655 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
27656 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
27657 	} else {
27658 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
27659 		subchnl->cdsc_absaddr.msf.second = buffer[10];
27660 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
27661 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
27662 		subchnl->cdsc_reladdr.msf.second = buffer[14];
27663 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
27664 	}
27665 	kmem_free(buffer, 16);
27666 	kmem_free(com, sizeof (*com));
27667 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
27668 	    != 0) {
27669 		return (EFAULT);
27670 	}
27671 	return (rval);
27672 }
27673 
27674 
27675 /*
27676  *    Function: sr_read_tocentry()
27677  *
27678  * Description: This routine is the driver entry point for handling CD-ROM
27679  *		ioctl requests to read from the Table of Contents (TOC)
27680  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
27681  *		fields, the starting address (LBA or MSF format per the user)
27682  *		and the data mode if the user specified track is a data track.
27683  *
27684  *		Note: The READ HEADER (0x44) command used in this routine is
27685  *		obsolete per the SCSI MMC spec but still supported in the
27686  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27687  *		therefore the command is still implemented in this routine.
27688  *
27689  *   Arguments: dev	- the device 'dev_t'
27690  *		data	- pointer to user provided toc entry structure,
27691  *			  specifying the track # and the address format
27692  *			  (LBA or MSF).
27693  *		flag	- this argument is a pass through to ddi_copyxxx()
27694  *		          directly from the mode argument of ioctl().
27695  *
27696  * Return Code: the code returned by sd_send_scsi_cmd()
27697  *		EFAULT if ddi_copyxxx() fails
27698  *		ENXIO if fail ddi_get_soft_state
27699  *		EINVAL if data pointer is NULL
27700  */
27701 
27702 static int
27703 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
27704 {
27705 	struct sd_lun		*un = NULL;
27706 	struct uscsi_cmd	*com;
27707 	struct cdrom_tocentry	toc_entry;
27708 	struct cdrom_tocentry	*entry = &toc_entry;
27709 	caddr_t			buffer;
27710 	int			rval;
27711 	char			cdb[CDB_GROUP1];
27712 
27713 	if (data == NULL) {
27714 		return (EINVAL);
27715 	}
27716 
27717 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27718 	    (un->un_state == SD_STATE_OFFLINE)) {
27719 		return (ENXIO);
27720 	}
27721 
27722 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
27723 		return (EFAULT);
27724 	}
27725 
27726 	/* Validate the requested track and address format */
27727 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
27728 		return (EINVAL);
27729 	}
27730 
27731 	if (entry->cdte_track == 0) {
27732 		return (EINVAL);
27733 	}
27734 
27735 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
27736 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27737 	bzero(cdb, CDB_GROUP1);
27738 
27739 	cdb[0] = SCMD_READ_TOC;
27740 	/* Set the MSF bit based on the user requested address format  */
27741 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
27742 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27743 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
27744 	} else {
27745 		cdb[6] = entry->cdte_track;
27746 	}
27747 
27748 	/*
27749 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
27750 	 * (4 byte TOC response header + 8 byte track descriptor)
27751 	 */
27752 	cdb[8] = 12;
27753 	com->uscsi_cdb	   = cdb;
27754 	com->uscsi_cdblen  = CDB_GROUP1;
27755 	com->uscsi_bufaddr = buffer;
27756 	com->uscsi_buflen  = 0x0C;
27757 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
27758 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27759 	    SD_PATH_STANDARD);
27760 	if (rval != 0) {
27761 		kmem_free(buffer, 12);
27762 		kmem_free(com, sizeof (*com));
27763 		return (rval);
27764 	}
27765 
27766 	/* Process the toc entry */
27767 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
27768 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
27769 	if (entry->cdte_format & CDROM_LBA) {
27770 		entry->cdte_addr.lba =
27771 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
27772 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
27773 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
27774 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
27775 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
27776 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
27777 		/*
27778 		 * Send a READ TOC command using the LBA address format to get
27779 		 * the LBA for the track requested so it can be used in the
27780 		 * READ HEADER request
27781 		 *
27782 		 * Note: The MSF bit of the READ HEADER command specifies the
27783 		 * output format. The block address specified in that command
27784 		 * must be in LBA format.
27785 		 */
27786 		cdb[1] = 0;
27787 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27788 		    SD_PATH_STANDARD);
27789 		if (rval != 0) {
27790 			kmem_free(buffer, 12);
27791 			kmem_free(com, sizeof (*com));
27792 			return (rval);
27793 		}
27794 	} else {
27795 		entry->cdte_addr.msf.minute	= buffer[9];
27796 		entry->cdte_addr.msf.second	= buffer[10];
27797 		entry->cdte_addr.msf.frame	= buffer[11];
27798 		/*
27799 		 * Send a READ TOC command using the LBA address format to get
27800 		 * the LBA for the track requested so it can be used in the
27801 		 * READ HEADER request
27802 		 *
27803 		 * Note: The MSF bit of the READ HEADER command specifies the
27804 		 * output format. The block address specified in that command
27805 		 * must be in LBA format.
27806 		 */
27807 		cdb[1] = 0;
27808 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27809 		    SD_PATH_STANDARD);
27810 		if (rval != 0) {
27811 			kmem_free(buffer, 12);
27812 			kmem_free(com, sizeof (*com));
27813 			return (rval);
27814 		}
27815 	}
27816 
27817 	/*
27818 	 * Build and send the READ HEADER command to determine the data mode of
27819 	 * the user specified track.
27820 	 */
27821 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
27822 	    (entry->cdte_track != CDROM_LEADOUT)) {
27823 		bzero(cdb, CDB_GROUP1);
27824 		cdb[0] = SCMD_READ_HEADER;
27825 		cdb[2] = buffer[8];
27826 		cdb[3] = buffer[9];
27827 		cdb[4] = buffer[10];
27828 		cdb[5] = buffer[11];
27829 		cdb[8] = 0x08;
27830 		com->uscsi_buflen = 0x08;
27831 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27832 		    SD_PATH_STANDARD);
27833 		if (rval == 0) {
27834 			entry->cdte_datamode = buffer[0];
27835 		} else {
27836 			/*
27837 			 * READ HEADER command failed, since this is
27838 			 * obsoleted in one spec, its better to return
27839 			 * -1 for an invlid track so that we can still
27840 			 * receive the rest of the TOC data.
27841 			 */
27842 			entry->cdte_datamode = (uchar_t)-1;
27843 		}
27844 	} else {
27845 		entry->cdte_datamode = (uchar_t)-1;
27846 	}
27847 
27848 	kmem_free(buffer, 12);
27849 	kmem_free(com, sizeof (*com));
27850 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
27851 		return (EFAULT);
27852 
27853 	return (rval);
27854 }
27855 
27856 
27857 /*
27858  *    Function: sr_read_tochdr()
27859  *
27860  * Description: This routine is the driver entry point for handling CD-ROM
27861  * 		ioctl requests to read the Table of Contents (TOC) header
27862  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
27863  *		and ending track numbers
27864  *
27865  *   Arguments: dev	- the device 'dev_t'
27866  *		data	- pointer to user provided toc header structure,
27867  *			  specifying the starting and ending track numbers.
27868  *		flag	- this argument is a pass through to ddi_copyxxx()
27869  *			  directly from the mode argument of ioctl().
27870  *
27871  * Return Code: the code returned by sd_send_scsi_cmd()
27872  *		EFAULT if ddi_copyxxx() fails
27873  *		ENXIO if fail ddi_get_soft_state
27874  *		EINVAL if data pointer is NULL
27875  */
27876 
27877 static int
27878 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
27879 {
27880 	struct sd_lun		*un;
27881 	struct uscsi_cmd	*com;
27882 	struct cdrom_tochdr	toc_header;
27883 	struct cdrom_tochdr	*hdr = &toc_header;
27884 	char			cdb[CDB_GROUP1];
27885 	int			rval;
27886 	caddr_t			buffer;
27887 
27888 	if (data == NULL) {
27889 		return (EINVAL);
27890 	}
27891 
27892 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27893 	    (un->un_state == SD_STATE_OFFLINE)) {
27894 		return (ENXIO);
27895 	}
27896 
27897 	buffer = kmem_zalloc(4, KM_SLEEP);
27898 	bzero(cdb, CDB_GROUP1);
27899 	cdb[0] = SCMD_READ_TOC;
27900 	/*
27901 	 * Specifying a track number of 0x00 in the READ TOC command indicates
27902 	 * that the TOC header should be returned
27903 	 */
27904 	cdb[6] = 0x00;
27905 	/*
27906 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
27907 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
27908 	 */
27909 	cdb[8] = 0x04;
27910 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27911 	com->uscsi_cdb	   = cdb;
27912 	com->uscsi_cdblen  = CDB_GROUP1;
27913 	com->uscsi_bufaddr = buffer;
27914 	com->uscsi_buflen  = 0x04;
27915 	com->uscsi_timeout = 300;
27916 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
27917 
27918 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27919 	    SD_PATH_STANDARD);
27920 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
27921 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
27922 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
27923 	} else {
27924 		hdr->cdth_trk0 = buffer[2];
27925 		hdr->cdth_trk1 = buffer[3];
27926 	}
27927 	kmem_free(buffer, 4);
27928 	kmem_free(com, sizeof (*com));
27929 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
27930 		return (EFAULT);
27931 	}
27932 	return (rval);
27933 }
27934 
27935 
27936 /*
27937  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
27938  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
27939  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
27940  * digital audio and extended architecture digital audio. These modes are
27941  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
27942  * MMC specs.
27943  *
27944  * In addition to support for the various data formats these routines also
27945  * include support for devices that implement only the direct access READ
27946  * commands (0x08, 0x28), devices that implement the READ_CD commands
27947  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
27948  * READ CDXA commands (0xD8, 0xDB)
27949  */
27950 
27951 /*
27952  *    Function: sr_read_mode1()
27953  *
27954  * Description: This routine is the driver entry point for handling CD-ROM
27955  *		ioctl read mode1 requests (CDROMREADMODE1).
27956  *
27957  *   Arguments: dev	- the device 'dev_t'
27958  *		data	- pointer to user provided cd read structure specifying
27959  *			  the lba buffer address and length.
27960  *		flag	- this argument is a pass through to ddi_copyxxx()
27961  *			  directly from the mode argument of ioctl().
27962  *
27963  * Return Code: the code returned by sd_send_scsi_cmd()
27964  *		EFAULT if ddi_copyxxx() fails
27965  *		ENXIO if fail ddi_get_soft_state
27966  *		EINVAL if data pointer is NULL
27967  */
27968 
27969 static int
27970 sr_read_mode1(dev_t dev, caddr_t data, int flag)
27971 {
27972 	struct sd_lun		*un;
27973 	struct cdrom_read	mode1_struct;
27974 	struct cdrom_read	*mode1 = &mode1_struct;
27975 	int			rval;
27976 	sd_ssc_t		*ssc;
27977 
27978 #ifdef _MULTI_DATAMODEL
27979 	/* To support ILP32 applications in an LP64 world */
27980 	struct cdrom_read32	cdrom_read32;
27981 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
27982 #endif /* _MULTI_DATAMODEL */
27983 
27984 	if (data == NULL) {
27985 		return (EINVAL);
27986 	}
27987 
27988 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
27989 	    (un->un_state == SD_STATE_OFFLINE)) {
27990 		return (ENXIO);
27991 	}
27992 
27993 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
27994 	    "sd_read_mode1: entry: un:0x%p\n", un);
27995 
27996 #ifdef _MULTI_DATAMODEL
27997 	switch (ddi_model_convert_from(flag & FMODELS)) {
27998 	case DDI_MODEL_ILP32:
27999 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28000 			return (EFAULT);
28001 		}
28002 		/* Convert the ILP32 uscsi data from the application to LP64 */
28003 		cdrom_read32tocdrom_read(cdrd32, mode1);
28004 		break;
28005 	case DDI_MODEL_NONE:
28006 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28007 			return (EFAULT);
28008 		}
28009 	}
28010 #else /* ! _MULTI_DATAMODEL */
28011 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28012 		return (EFAULT);
28013 	}
28014 #endif /* _MULTI_DATAMODEL */
28015 
28016 	ssc = sd_ssc_init(un);
28017 	rval = sd_send_scsi_READ(ssc, mode1->cdread_bufaddr,
28018 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28019 	sd_ssc_fini(ssc);
28020 
28021 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28022 	    "sd_read_mode1: exit: un:0x%p\n", un);
28023 
28024 	return (rval);
28025 }
28026 
28027 
28028 /*
28029  *    Function: sr_read_cd_mode2()
28030  *
28031  * Description: This routine is the driver entry point for handling CD-ROM
28032  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28033  *		support the READ CD (0xBE) command or the 1st generation
28034  *		READ CD (0xD4) command.
28035  *
28036  *   Arguments: dev	- the device 'dev_t'
28037  *		data	- pointer to user provided cd read structure specifying
28038  *			  the lba buffer address and length.
28039  *		flag	- this argument is a pass through to ddi_copyxxx()
28040  *			  directly from the mode argument of ioctl().
28041  *
28042  * Return Code: the code returned by sd_send_scsi_cmd()
28043  *		EFAULT if ddi_copyxxx() fails
28044  *		ENXIO if fail ddi_get_soft_state
28045  *		EINVAL if data pointer is NULL
28046  */
28047 
28048 static int
28049 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28050 {
28051 	struct sd_lun		*un;
28052 	struct uscsi_cmd	*com;
28053 	struct cdrom_read	mode2_struct;
28054 	struct cdrom_read	*mode2 = &mode2_struct;
28055 	uchar_t			cdb[CDB_GROUP5];
28056 	int			nblocks;
28057 	int			rval;
28058 #ifdef _MULTI_DATAMODEL
28059 	/*  To support ILP32 applications in an LP64 world */
28060 	struct cdrom_read32	cdrom_read32;
28061 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28062 #endif /* _MULTI_DATAMODEL */
28063 
28064 	if (data == NULL) {
28065 		return (EINVAL);
28066 	}
28067 
28068 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28069 	    (un->un_state == SD_STATE_OFFLINE)) {
28070 		return (ENXIO);
28071 	}
28072 
28073 #ifdef _MULTI_DATAMODEL
28074 	switch (ddi_model_convert_from(flag & FMODELS)) {
28075 	case DDI_MODEL_ILP32:
28076 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28077 			return (EFAULT);
28078 		}
28079 		/* Convert the ILP32 uscsi data from the application to LP64 */
28080 		cdrom_read32tocdrom_read(cdrd32, mode2);
28081 		break;
28082 	case DDI_MODEL_NONE:
28083 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28084 			return (EFAULT);
28085 		}
28086 		break;
28087 	}
28088 
28089 #else /* ! _MULTI_DATAMODEL */
28090 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28091 		return (EFAULT);
28092 	}
28093 #endif /* _MULTI_DATAMODEL */
28094 
28095 	bzero(cdb, sizeof (cdb));
28096 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28097 		/* Read command supported by 1st generation atapi drives */
28098 		cdb[0] = SCMD_READ_CDD4;
28099 	} else {
28100 		/* Universal CD Access Command */
28101 		cdb[0] = SCMD_READ_CD;
28102 	}
28103 
28104 	/*
28105 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28106 	 */
28107 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28108 
28109 	/* set the start address */
28110 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28111 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28112 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28113 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28114 
28115 	/* set the transfer length */
28116 	nblocks = mode2->cdread_buflen / 2336;
28117 	cdb[6] = (uchar_t)(nblocks >> 16);
28118 	cdb[7] = (uchar_t)(nblocks >> 8);
28119 	cdb[8] = (uchar_t)nblocks;
28120 
28121 	/* set the filter bits */
28122 	cdb[9] = CDROM_READ_CD_USERDATA;
28123 
28124 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28125 	com->uscsi_cdb = (caddr_t)cdb;
28126 	com->uscsi_cdblen = sizeof (cdb);
28127 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28128 	com->uscsi_buflen = mode2->cdread_buflen;
28129 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28130 
28131 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28132 	    SD_PATH_STANDARD);
28133 	kmem_free(com, sizeof (*com));
28134 	return (rval);
28135 }
28136 
28137 
28138 /*
28139  *    Function: sr_read_mode2()
28140  *
28141  * Description: This routine is the driver entry point for handling CD-ROM
28142  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28143  *		do not support the READ CD (0xBE) command.
28144  *
28145  *   Arguments: dev	- the device 'dev_t'
28146  *		data	- pointer to user provided cd read structure specifying
28147  *			  the lba buffer address and length.
28148  *		flag	- this argument is a pass through to ddi_copyxxx()
28149  *			  directly from the mode argument of ioctl().
28150  *
28151  * Return Code: the code returned by sd_send_scsi_cmd()
28152  *		EFAULT if ddi_copyxxx() fails
28153  *		ENXIO if fail ddi_get_soft_state
28154  *		EINVAL if data pointer is NULL
28155  *		EIO if fail to reset block size
28156  *		EAGAIN if commands are in progress in the driver
28157  */
28158 
28159 static int
28160 sr_read_mode2(dev_t dev, caddr_t data, int flag)
28161 {
28162 	struct sd_lun		*un;
28163 	struct cdrom_read	mode2_struct;
28164 	struct cdrom_read	*mode2 = &mode2_struct;
28165 	int			rval;
28166 	uint32_t		restore_blksize;
28167 	struct uscsi_cmd	*com;
28168 	uchar_t			cdb[CDB_GROUP0];
28169 	int			nblocks;
28170 
28171 #ifdef _MULTI_DATAMODEL
28172 	/* To support ILP32 applications in an LP64 world */
28173 	struct cdrom_read32	cdrom_read32;
28174 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28175 #endif /* _MULTI_DATAMODEL */
28176 
28177 	if (data == NULL) {
28178 		return (EINVAL);
28179 	}
28180 
28181 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28182 	    (un->un_state == SD_STATE_OFFLINE)) {
28183 		return (ENXIO);
28184 	}
28185 
28186 	/*
28187 	 * Because this routine will update the device and driver block size
28188 	 * being used we want to make sure there are no commands in progress.
28189 	 * If commands are in progress the user will have to try again.
28190 	 *
28191 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28192 	 * in sdioctl to protect commands from sdioctl through to the top of
28193 	 * sd_uscsi_strategy. See sdioctl for details.
28194 	 */
28195 	mutex_enter(SD_MUTEX(un));
28196 	if (un->un_ncmds_in_driver != 1) {
28197 		mutex_exit(SD_MUTEX(un));
28198 		return (EAGAIN);
28199 	}
28200 	mutex_exit(SD_MUTEX(un));
28201 
28202 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28203 	    "sd_read_mode2: entry: un:0x%p\n", un);
28204 
28205 #ifdef _MULTI_DATAMODEL
28206 	switch (ddi_model_convert_from(flag & FMODELS)) {
28207 	case DDI_MODEL_ILP32:
28208 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28209 			return (EFAULT);
28210 		}
28211 		/* Convert the ILP32 uscsi data from the application to LP64 */
28212 		cdrom_read32tocdrom_read(cdrd32, mode2);
28213 		break;
28214 	case DDI_MODEL_NONE:
28215 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28216 			return (EFAULT);
28217 		}
28218 		break;
28219 	}
28220 #else /* ! _MULTI_DATAMODEL */
28221 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28222 		return (EFAULT);
28223 	}
28224 #endif /* _MULTI_DATAMODEL */
28225 
28226 	/* Store the current target block size for restoration later */
28227 	restore_blksize = un->un_tgt_blocksize;
28228 
28229 	/* Change the device and soft state target block size to 2336 */
28230 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28231 		rval = EIO;
28232 		goto done;
28233 	}
28234 
28235 
28236 	bzero(cdb, sizeof (cdb));
28237 
28238 	/* set READ operation */
28239 	cdb[0] = SCMD_READ;
28240 
28241 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28242 	mode2->cdread_lba >>= 2;
28243 
28244 	/* set the start address */
28245 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28246 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28247 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28248 
28249 	/* set the transfer length */
28250 	nblocks = mode2->cdread_buflen / 2336;
28251 	cdb[4] = (uchar_t)nblocks & 0xFF;
28252 
28253 	/* build command */
28254 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28255 	com->uscsi_cdb = (caddr_t)cdb;
28256 	com->uscsi_cdblen = sizeof (cdb);
28257 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28258 	com->uscsi_buflen = mode2->cdread_buflen;
28259 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28260 
28261 	/*
28262 	 * Issue SCSI command with user space address for read buffer.
28263 	 *
28264 	 * This sends the command through main channel in the driver.
28265 	 *
28266 	 * Since this is accessed via an IOCTL call, we go through the
28267 	 * standard path, so that if the device was powered down, then
28268 	 * it would be 'awakened' to handle the command.
28269 	 */
28270 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28271 	    SD_PATH_STANDARD);
28272 
28273 	kmem_free(com, sizeof (*com));
28274 
28275 	/* Restore the device and soft state target block size */
28276 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28277 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28278 		    "can't do switch back to mode 1\n");
28279 		/*
28280 		 * If sd_send_scsi_READ succeeded we still need to report
28281 		 * an error because we failed to reset the block size
28282 		 */
28283 		if (rval == 0) {
28284 			rval = EIO;
28285 		}
28286 	}
28287 
28288 done:
28289 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28290 	    "sd_read_mode2: exit: un:0x%p\n", un);
28291 
28292 	return (rval);
28293 }
28294 
28295 
28296 /*
28297  *    Function: sr_sector_mode()
28298  *
28299  * Description: This utility function is used by sr_read_mode2 to set the target
28300  *		block size based on the user specified size. This is a legacy
28301  *		implementation based upon a vendor specific mode page
28302  *
28303  *   Arguments: dev	- the device 'dev_t'
28304  *		data	- flag indicating if block size is being set to 2336 or
28305  *			  512.
28306  *
28307  * Return Code: the code returned by sd_send_scsi_cmd()
28308  *		EFAULT if ddi_copyxxx() fails
28309  *		ENXIO if fail ddi_get_soft_state
28310  *		EINVAL if data pointer is NULL
28311  */
28312 
28313 static int
28314 sr_sector_mode(dev_t dev, uint32_t blksize)
28315 {
28316 	struct sd_lun	*un;
28317 	uchar_t		*sense;
28318 	uchar_t		*select;
28319 	int		rval;
28320 	sd_ssc_t	*ssc;
28321 
28322 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28323 	    (un->un_state == SD_STATE_OFFLINE)) {
28324 		return (ENXIO);
28325 	}
28326 
28327 	sense = kmem_zalloc(20, KM_SLEEP);
28328 
28329 	/* Note: This is a vendor specific mode page (0x81) */
28330 	ssc = sd_ssc_init(un);
28331 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, 20, 0x81,
28332 	    SD_PATH_STANDARD);
28333 	sd_ssc_fini(ssc);
28334 	if (rval != 0) {
28335 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28336 		    "sr_sector_mode: Mode Sense failed\n");
28337 		kmem_free(sense, 20);
28338 		return (rval);
28339 	}
28340 	select = kmem_zalloc(20, KM_SLEEP);
28341 	select[3] = 0x08;
28342 	select[10] = ((blksize >> 8) & 0xff);
28343 	select[11] = (blksize & 0xff);
28344 	select[12] = 0x01;
28345 	select[13] = 0x06;
28346 	select[14] = sense[14];
28347 	select[15] = sense[15];
28348 	if (blksize == SD_MODE2_BLKSIZE) {
28349 		select[14] |= 0x01;
28350 	}
28351 
28352 	ssc = sd_ssc_init(un);
28353 	rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select, 20,
28354 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
28355 	sd_ssc_fini(ssc);
28356 	if (rval != 0) {
28357 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28358 		    "sr_sector_mode: Mode Select failed\n");
28359 	} else {
28360 		/*
28361 		 * Only update the softstate block size if we successfully
28362 		 * changed the device block mode.
28363 		 */
28364 		mutex_enter(SD_MUTEX(un));
28365 		sd_update_block_info(un, blksize, 0);
28366 		mutex_exit(SD_MUTEX(un));
28367 	}
28368 	kmem_free(sense, 20);
28369 	kmem_free(select, 20);
28370 	return (rval);
28371 }
28372 
28373 
28374 /*
28375  *    Function: sr_read_cdda()
28376  *
28377  * Description: This routine is the driver entry point for handling CD-ROM
28378  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28379  *		the target supports CDDA these requests are handled via a vendor
28380  *		specific command (0xD8) If the target does not support CDDA
28381  *		these requests are handled via the READ CD command (0xBE).
28382  *
28383  *   Arguments: dev	- the device 'dev_t'
28384  *		data	- pointer to user provided CD-DA structure specifying
28385  *			  the track starting address, transfer length, and
28386  *			  subcode options.
28387  *		flag	- this argument is a pass through to ddi_copyxxx()
28388  *			  directly from the mode argument of ioctl().
28389  *
28390  * Return Code: the code returned by sd_send_scsi_cmd()
28391  *		EFAULT if ddi_copyxxx() fails
28392  *		ENXIO if fail ddi_get_soft_state
28393  *		EINVAL if invalid arguments are provided
28394  *		ENOTTY
28395  */
28396 
28397 static int
28398 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28399 {
28400 	struct sd_lun			*un;
28401 	struct uscsi_cmd		*com;
28402 	struct cdrom_cdda		*cdda;
28403 	int				rval;
28404 	size_t				buflen;
28405 	char				cdb[CDB_GROUP5];
28406 
28407 #ifdef _MULTI_DATAMODEL
28408 	/* To support ILP32 applications in an LP64 world */
28409 	struct cdrom_cdda32	cdrom_cdda32;
28410 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28411 #endif /* _MULTI_DATAMODEL */
28412 
28413 	if (data == NULL) {
28414 		return (EINVAL);
28415 	}
28416 
28417 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28418 		return (ENXIO);
28419 	}
28420 
28421 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28422 
28423 #ifdef _MULTI_DATAMODEL
28424 	switch (ddi_model_convert_from(flag & FMODELS)) {
28425 	case DDI_MODEL_ILP32:
28426 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28427 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28428 			    "sr_read_cdda: ddi_copyin Failed\n");
28429 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28430 			return (EFAULT);
28431 		}
28432 		/* Convert the ILP32 uscsi data from the application to LP64 */
28433 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28434 		break;
28435 	case DDI_MODEL_NONE:
28436 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28437 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28438 			    "sr_read_cdda: ddi_copyin Failed\n");
28439 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28440 			return (EFAULT);
28441 		}
28442 		break;
28443 	}
28444 #else /* ! _MULTI_DATAMODEL */
28445 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28446 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28447 		    "sr_read_cdda: ddi_copyin Failed\n");
28448 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28449 		return (EFAULT);
28450 	}
28451 #endif /* _MULTI_DATAMODEL */
28452 
28453 	/*
28454 	 * Since MMC-2 expects max 3 bytes for length, check if the
28455 	 * length input is greater than 3 bytes
28456 	 */
28457 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28458 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28459 		    "cdrom transfer length too large: %d (limit %d)\n",
28460 		    cdda->cdda_length, 0xFFFFFF);
28461 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28462 		return (EINVAL);
28463 	}
28464 
28465 	switch (cdda->cdda_subcode) {
28466 	case CDROM_DA_NO_SUBCODE:
28467 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28468 		break;
28469 	case CDROM_DA_SUBQ:
28470 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28471 		break;
28472 	case CDROM_DA_ALL_SUBCODE:
28473 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28474 		break;
28475 	case CDROM_DA_SUBCODE_ONLY:
28476 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28477 		break;
28478 	default:
28479 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28480 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28481 		    cdda->cdda_subcode);
28482 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28483 		return (EINVAL);
28484 	}
28485 
28486 	/* Build and send the command */
28487 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28488 	bzero(cdb, CDB_GROUP5);
28489 
28490 	if (un->un_f_cfg_cdda == TRUE) {
28491 		cdb[0] = (char)SCMD_READ_CD;
28492 		cdb[1] = 0x04;
28493 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28494 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28495 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28496 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28497 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28498 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28499 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28500 		cdb[9] = 0x10;
28501 		switch (cdda->cdda_subcode) {
28502 		case CDROM_DA_NO_SUBCODE :
28503 			cdb[10] = 0x0;
28504 			break;
28505 		case CDROM_DA_SUBQ :
28506 			cdb[10] = 0x2;
28507 			break;
28508 		case CDROM_DA_ALL_SUBCODE :
28509 			cdb[10] = 0x1;
28510 			break;
28511 		case CDROM_DA_SUBCODE_ONLY :
28512 			/* FALLTHROUGH */
28513 		default :
28514 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28515 			kmem_free(com, sizeof (*com));
28516 			return (ENOTTY);
28517 		}
28518 	} else {
28519 		cdb[0] = (char)SCMD_READ_CDDA;
28520 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28521 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28522 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28523 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28524 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
28525 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28526 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28527 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
28528 		cdb[10] = cdda->cdda_subcode;
28529 	}
28530 
28531 	com->uscsi_cdb = cdb;
28532 	com->uscsi_cdblen = CDB_GROUP5;
28533 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
28534 	com->uscsi_buflen = buflen;
28535 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28536 
28537 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28538 	    SD_PATH_STANDARD);
28539 
28540 	kmem_free(cdda, sizeof (struct cdrom_cdda));
28541 	kmem_free(com, sizeof (*com));
28542 	return (rval);
28543 }
28544 
28545 
28546 /*
28547  *    Function: sr_read_cdxa()
28548  *
28549  * Description: This routine is the driver entry point for handling CD-ROM
28550  *		ioctl requests to return CD-XA (Extended Architecture) data.
28551  *		(CDROMCDXA).
28552  *
28553  *   Arguments: dev	- the device 'dev_t'
28554  *		data	- pointer to user provided CD-XA structure specifying
28555  *			  the data starting address, transfer length, and format
28556  *		flag	- this argument is a pass through to ddi_copyxxx()
28557  *			  directly from the mode argument of ioctl().
28558  *
28559  * Return Code: the code returned by sd_send_scsi_cmd()
28560  *		EFAULT if ddi_copyxxx() fails
28561  *		ENXIO if fail ddi_get_soft_state
28562  *		EINVAL if data pointer is NULL
28563  */
28564 
28565 static int
28566 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
28567 {
28568 	struct sd_lun		*un;
28569 	struct uscsi_cmd	*com;
28570 	struct cdrom_cdxa	*cdxa;
28571 	int			rval;
28572 	size_t			buflen;
28573 	char			cdb[CDB_GROUP5];
28574 	uchar_t			read_flags;
28575 
28576 #ifdef _MULTI_DATAMODEL
28577 	/* To support ILP32 applications in an LP64 world */
28578 	struct cdrom_cdxa32		cdrom_cdxa32;
28579 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
28580 #endif /* _MULTI_DATAMODEL */
28581 
28582 	if (data == NULL) {
28583 		return (EINVAL);
28584 	}
28585 
28586 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28587 		return (ENXIO);
28588 	}
28589 
28590 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
28591 
28592 #ifdef _MULTI_DATAMODEL
28593 	switch (ddi_model_convert_from(flag & FMODELS)) {
28594 	case DDI_MODEL_ILP32:
28595 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
28596 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28597 			return (EFAULT);
28598 		}
28599 		/*
28600 		 * Convert the ILP32 uscsi data from the
28601 		 * application to LP64 for internal use.
28602 		 */
28603 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
28604 		break;
28605 	case DDI_MODEL_NONE:
28606 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28607 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28608 			return (EFAULT);
28609 		}
28610 		break;
28611 	}
28612 #else /* ! _MULTI_DATAMODEL */
28613 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
28614 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28615 		return (EFAULT);
28616 	}
28617 #endif /* _MULTI_DATAMODEL */
28618 
28619 	/*
28620 	 * Since MMC-2 expects max 3 bytes for length, check if the
28621 	 * length input is greater than 3 bytes
28622 	 */
28623 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
28624 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
28625 		    "cdrom transfer length too large: %d (limit %d)\n",
28626 		    cdxa->cdxa_length, 0xFFFFFF);
28627 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28628 		return (EINVAL);
28629 	}
28630 
28631 	switch (cdxa->cdxa_format) {
28632 	case CDROM_XA_DATA:
28633 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
28634 		read_flags = 0x10;
28635 		break;
28636 	case CDROM_XA_SECTOR_DATA:
28637 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
28638 		read_flags = 0xf8;
28639 		break;
28640 	case CDROM_XA_DATA_W_ERROR:
28641 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
28642 		read_flags = 0xfc;
28643 		break;
28644 	default:
28645 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28646 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
28647 		    cdxa->cdxa_format);
28648 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28649 		return (EINVAL);
28650 	}
28651 
28652 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28653 	bzero(cdb, CDB_GROUP5);
28654 	if (un->un_f_mmc_cap == TRUE) {
28655 		cdb[0] = (char)SCMD_READ_CD;
28656 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28657 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28658 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28659 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28660 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28661 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28662 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
28663 		cdb[9] = (char)read_flags;
28664 	} else {
28665 		/*
28666 		 * Note: A vendor specific command (0xDB) is being used her to
28667 		 * request a read of all subcodes.
28668 		 */
28669 		cdb[0] = (char)SCMD_READ_CDXA;
28670 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
28671 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
28672 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
28673 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
28674 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
28675 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
28676 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
28677 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
28678 		cdb[10] = cdxa->cdxa_format;
28679 	}
28680 	com->uscsi_cdb	   = cdb;
28681 	com->uscsi_cdblen  = CDB_GROUP5;
28682 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
28683 	com->uscsi_buflen  = buflen;
28684 	com->uscsi_flags   = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
28685 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28686 	    SD_PATH_STANDARD);
28687 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
28688 	kmem_free(com, sizeof (*com));
28689 	return (rval);
28690 }
28691 
28692 
28693 /*
28694  *    Function: sr_eject()
28695  *
28696  * Description: This routine is the driver entry point for handling CD-ROM
28697  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
28698  *
28699  *   Arguments: dev	- the device 'dev_t'
28700  *
28701  * Return Code: the code returned by sd_send_scsi_cmd()
28702  */
28703 
28704 static int
28705 sr_eject(dev_t dev)
28706 {
28707 	struct sd_lun	*un;
28708 	int		rval;
28709 	sd_ssc_t	*ssc;
28710 
28711 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28712 	    (un->un_state == SD_STATE_OFFLINE)) {
28713 		return (ENXIO);
28714 	}
28715 
28716 	/*
28717 	 * To prevent race conditions with the eject
28718 	 * command, keep track of an eject command as
28719 	 * it progresses. If we are already handling
28720 	 * an eject command in the driver for the given
28721 	 * unit and another request to eject is received
28722 	 * immediately return EAGAIN so we don't lose
28723 	 * the command if the current eject command fails.
28724 	 */
28725 	mutex_enter(SD_MUTEX(un));
28726 	if (un->un_f_ejecting == TRUE) {
28727 		mutex_exit(SD_MUTEX(un));
28728 		return (EAGAIN);
28729 	}
28730 	un->un_f_ejecting = TRUE;
28731 	mutex_exit(SD_MUTEX(un));
28732 
28733 	ssc = sd_ssc_init(un);
28734 	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
28735 	    SD_PATH_STANDARD);
28736 	sd_ssc_fini(ssc);
28737 
28738 	if (rval != 0) {
28739 		mutex_enter(SD_MUTEX(un));
28740 		un->un_f_ejecting = FALSE;
28741 		mutex_exit(SD_MUTEX(un));
28742 		return (rval);
28743 	}
28744 
28745 	ssc = sd_ssc_init(un);
28746 	rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
28747 	    SD_TARGET_EJECT, SD_PATH_STANDARD);
28748 	sd_ssc_fini(ssc);
28749 
28750 	if (rval == 0) {
28751 		mutex_enter(SD_MUTEX(un));
28752 		sr_ejected(un);
28753 		un->un_mediastate = DKIO_EJECTED;
28754 		un->un_f_ejecting = FALSE;
28755 		cv_broadcast(&un->un_state_cv);
28756 		mutex_exit(SD_MUTEX(un));
28757 	} else {
28758 		mutex_enter(SD_MUTEX(un));
28759 		un->un_f_ejecting = FALSE;
28760 		mutex_exit(SD_MUTEX(un));
28761 	}
28762 	return (rval);
28763 }
28764 
28765 
28766 /*
28767  *    Function: sr_ejected()
28768  *
28769  * Description: This routine updates the soft state structure to invalidate the
28770  *		geometry information after the media has been ejected or a
28771  *		media eject has been detected.
28772  *
28773  *   Arguments: un - driver soft state (unit) structure
28774  */
28775 
28776 static void
28777 sr_ejected(struct sd_lun *un)
28778 {
28779 	struct sd_errstats *stp;
28780 
28781 	ASSERT(un != NULL);
28782 	ASSERT(mutex_owned(SD_MUTEX(un)));
28783 
28784 	un->un_f_blockcount_is_valid	= FALSE;
28785 	un->un_f_tgt_blocksize_is_valid	= FALSE;
28786 	mutex_exit(SD_MUTEX(un));
28787 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
28788 	mutex_enter(SD_MUTEX(un));
28789 
28790 	if (un->un_errstats != NULL) {
28791 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
28792 		stp->sd_capacity.value.ui64 = 0;
28793 	}
28794 }
28795 
28796 
28797 /*
28798  *    Function: sr_check_wp()
28799  *
28800  * Description: This routine checks the write protection of a removable
28801  *      media disk and hotpluggable devices via the write protect bit of
28802  *      the Mode Page Header device specific field. Some devices choke
28803  *      on unsupported mode page. In order to workaround this issue,
28804  *      this routine has been implemented to use 0x3f mode page(request
28805  *      for all pages) for all device types.
28806  *
28807  *   Arguments: dev             - the device 'dev_t'
28808  *
28809  * Return Code: int indicating if the device is write protected (1) or not (0)
28810  *
28811  *     Context: Kernel thread.
28812  *
28813  */
28814 
28815 static int
28816 sr_check_wp(dev_t dev)
28817 {
28818 	struct sd_lun	*un;
28819 	uchar_t		device_specific;
28820 	uchar_t		*sense;
28821 	int		hdrlen;
28822 	int		rval = FALSE;
28823 	int		status;
28824 	sd_ssc_t	*ssc;
28825 
28826 	/*
28827 	 * Note: The return codes for this routine should be reworked to
28828 	 * properly handle the case of a NULL softstate.
28829 	 */
28830 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28831 		return (FALSE);
28832 	}
28833 
28834 	if (un->un_f_cfg_is_atapi == TRUE) {
28835 		/*
28836 		 * The mode page contents are not required; set the allocation
28837 		 * length for the mode page header only
28838 		 */
28839 		hdrlen = MODE_HEADER_LENGTH_GRP2;
28840 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
28841 		ssc = sd_ssc_init(un);
28842 		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense, hdrlen,
28843 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
28844 		sd_ssc_fini(ssc);
28845 		if (status != 0)
28846 			goto err_exit;
28847 		device_specific =
28848 		    ((struct mode_header_grp2 *)sense)->device_specific;
28849 	} else {
28850 		hdrlen = MODE_HEADER_LENGTH;
28851 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
28852 		ssc = sd_ssc_init(un);
28853 		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, hdrlen,
28854 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
28855 		sd_ssc_fini(ssc);
28856 		if (status != 0)
28857 			goto err_exit;
28858 		device_specific =
28859 		    ((struct mode_header *)sense)->device_specific;
28860 	}
28861 
28862 
28863 	/*
28864 	 * Write protect mode sense failed; not all disks
28865 	 * understand this query. Return FALSE assuming that
28866 	 * these devices are not writable.
28867 	 */
28868 	if (device_specific & WRITE_PROTECT) {
28869 		rval = TRUE;
28870 	}
28871 
28872 err_exit:
28873 	kmem_free(sense, hdrlen);
28874 	return (rval);
28875 }
28876 
28877 /*
28878  *    Function: sr_volume_ctrl()
28879  *
28880  * Description: This routine is the driver entry point for handling CD-ROM
28881  *		audio output volume ioctl requests. (CDROMVOLCTRL)
28882  *
28883  *   Arguments: dev	- the device 'dev_t'
28884  *		data	- pointer to user audio volume control structure
28885  *		flag	- this argument is a pass through to ddi_copyxxx()
28886  *			  directly from the mode argument of ioctl().
28887  *
28888  * Return Code: the code returned by sd_send_scsi_cmd()
28889  *		EFAULT if ddi_copyxxx() fails
28890  *		ENXIO if fail ddi_get_soft_state
28891  *		EINVAL if data pointer is NULL
28892  *
28893  */
28894 
28895 static int
28896 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
28897 {
28898 	struct sd_lun		*un;
28899 	struct cdrom_volctrl    volume;
28900 	struct cdrom_volctrl    *vol = &volume;
28901 	uchar_t			*sense_page;
28902 	uchar_t			*select_page;
28903 	uchar_t			*sense;
28904 	uchar_t			*select;
28905 	int			sense_buflen;
28906 	int			select_buflen;
28907 	int			rval;
28908 	sd_ssc_t		*ssc;
28909 
28910 	if (data == NULL) {
28911 		return (EINVAL);
28912 	}
28913 
28914 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28915 	    (un->un_state == SD_STATE_OFFLINE)) {
28916 		return (ENXIO);
28917 	}
28918 
28919 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
28920 		return (EFAULT);
28921 	}
28922 
28923 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
28924 		struct mode_header_grp2		*sense_mhp;
28925 		struct mode_header_grp2		*select_mhp;
28926 		int				bd_len;
28927 
28928 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
28929 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
28930 		    MODEPAGE_AUDIO_CTRL_LEN;
28931 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
28932 		select = kmem_zalloc(select_buflen, KM_SLEEP);
28933 		ssc = sd_ssc_init(un);
28934 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
28935 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
28936 		    SD_PATH_STANDARD);
28937 		sd_ssc_fini(ssc);
28938 
28939 		if (rval != 0) {
28940 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28941 			    "sr_volume_ctrl: Mode Sense Failed\n");
28942 			kmem_free(sense, sense_buflen);
28943 			kmem_free(select, select_buflen);
28944 			return (rval);
28945 		}
28946 		sense_mhp = (struct mode_header_grp2 *)sense;
28947 		select_mhp = (struct mode_header_grp2 *)select;
28948 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
28949 		    sense_mhp->bdesc_length_lo;
28950 		if (bd_len > MODE_BLK_DESC_LENGTH) {
28951 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28952 			    "sr_volume_ctrl: Mode Sense returned invalid "
28953 			    "block descriptor length\n");
28954 			kmem_free(sense, sense_buflen);
28955 			kmem_free(select, select_buflen);
28956 			return (EIO);
28957 		}
28958 		sense_page = (uchar_t *)
28959 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
28960 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
28961 		select_mhp->length_msb = 0;
28962 		select_mhp->length_lsb = 0;
28963 		select_mhp->bdesc_length_hi = 0;
28964 		select_mhp->bdesc_length_lo = 0;
28965 	} else {
28966 		struct mode_header		*sense_mhp, *select_mhp;
28967 
28968 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
28969 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
28970 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
28971 		select = kmem_zalloc(select_buflen, KM_SLEEP);
28972 		ssc = sd_ssc_init(un);
28973 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
28974 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
28975 		    SD_PATH_STANDARD);
28976 		sd_ssc_fini(ssc);
28977 
28978 		if (rval != 0) {
28979 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28980 			    "sr_volume_ctrl: Mode Sense Failed\n");
28981 			kmem_free(sense, sense_buflen);
28982 			kmem_free(select, select_buflen);
28983 			return (rval);
28984 		}
28985 		sense_mhp  = (struct mode_header *)sense;
28986 		select_mhp = (struct mode_header *)select;
28987 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
28988 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28989 			    "sr_volume_ctrl: Mode Sense returned invalid "
28990 			    "block descriptor length\n");
28991 			kmem_free(sense, sense_buflen);
28992 			kmem_free(select, select_buflen);
28993 			return (EIO);
28994 		}
28995 		sense_page = (uchar_t *)
28996 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
28997 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
28998 		select_mhp->length = 0;
28999 		select_mhp->bdesc_length = 0;
29000 	}
29001 	/*
29002 	 * Note: An audio control data structure could be created and overlayed
29003 	 * on the following in place of the array indexing method implemented.
29004 	 */
29005 
29006 	/* Build the select data for the user volume data */
29007 	select_page[0] = MODEPAGE_AUDIO_CTRL;
29008 	select_page[1] = 0xE;
29009 	/* Set the immediate bit */
29010 	select_page[2] = 0x04;
29011 	/* Zero out reserved fields */
29012 	select_page[3] = 0x00;
29013 	select_page[4] = 0x00;
29014 	/* Return sense data for fields not to be modified */
29015 	select_page[5] = sense_page[5];
29016 	select_page[6] = sense_page[6];
29017 	select_page[7] = sense_page[7];
29018 	/* Set the user specified volume levels for channel 0 and 1 */
29019 	select_page[8] = 0x01;
29020 	select_page[9] = vol->channel0;
29021 	select_page[10] = 0x02;
29022 	select_page[11] = vol->channel1;
29023 	/* Channel 2 and 3 are currently unsupported so return the sense data */
29024 	select_page[12] = sense_page[12];
29025 	select_page[13] = sense_page[13];
29026 	select_page[14] = sense_page[14];
29027 	select_page[15] = sense_page[15];
29028 
29029 	ssc = sd_ssc_init(un);
29030 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29031 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP1, select,
29032 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29033 	} else {
29034 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
29035 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29036 	}
29037 	sd_ssc_fini(ssc);
29038 
29039 	kmem_free(sense, sense_buflen);
29040 	kmem_free(select, select_buflen);
29041 	return (rval);
29042 }
29043 
29044 
29045 /*
29046  *    Function: sr_read_sony_session_offset()
29047  *
29048  * Description: This routine is the driver entry point for handling CD-ROM
29049  *		ioctl requests for session offset information. (CDROMREADOFFSET)
29050  *		The address of the first track in the last session of a
29051  *		multi-session CD-ROM is returned
29052  *
29053  *		Note: This routine uses a vendor specific key value in the
29054  *		command control field without implementing any vendor check here
29055  *		or in the ioctl routine.
29056  *
29057  *   Arguments: dev	- the device 'dev_t'
29058  *		data	- pointer to an int to hold the requested address
29059  *		flag	- this argument is a pass through to ddi_copyxxx()
29060  *			  directly from the mode argument of ioctl().
29061  *
29062  * Return Code: the code returned by sd_send_scsi_cmd()
29063  *		EFAULT if ddi_copyxxx() fails
29064  *		ENXIO if fail ddi_get_soft_state
29065  *		EINVAL if data pointer is NULL
29066  */
29067 
29068 static int
29069 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29070 {
29071 	struct sd_lun		*un;
29072 	struct uscsi_cmd	*com;
29073 	caddr_t			buffer;
29074 	char			cdb[CDB_GROUP1];
29075 	int			session_offset = 0;
29076 	int			rval;
29077 
29078 	if (data == NULL) {
29079 		return (EINVAL);
29080 	}
29081 
29082 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29083 	    (un->un_state == SD_STATE_OFFLINE)) {
29084 		return (ENXIO);
29085 	}
29086 
29087 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29088 	bzero(cdb, CDB_GROUP1);
29089 	cdb[0] = SCMD_READ_TOC;
29090 	/*
29091 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29092 	 * (4 byte TOC response header + 8 byte response data)
29093 	 */
29094 	cdb[8] = SONY_SESSION_OFFSET_LEN;
29095 	/* Byte 9 is the control byte. A vendor specific value is used */
29096 	cdb[9] = SONY_SESSION_OFFSET_KEY;
29097 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29098 	com->uscsi_cdb = cdb;
29099 	com->uscsi_cdblen = CDB_GROUP1;
29100 	com->uscsi_bufaddr = buffer;
29101 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29102 	com->uscsi_flags = USCSI_DIAGNOSE|USCSI_SILENT|USCSI_READ;
29103 
29104 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
29105 	    SD_PATH_STANDARD);
29106 	if (rval != 0) {
29107 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29108 		kmem_free(com, sizeof (*com));
29109 		return (rval);
29110 	}
29111 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29112 		session_offset =
29113 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29114 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29115 		/*
29116 		 * Offset returned offset in current lbasize block's. Convert to
29117 		 * 2k block's to return to the user
29118 		 */
29119 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29120 			session_offset >>= 2;
29121 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29122 			session_offset >>= 1;
29123 		}
29124 	}
29125 
29126 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29127 		rval = EFAULT;
29128 	}
29129 
29130 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29131 	kmem_free(com, sizeof (*com));
29132 	return (rval);
29133 }
29134 
29135 
29136 /*
29137  *    Function: sd_wm_cache_constructor()
29138  *
29139  * Description: Cache Constructor for the wmap cache for the read/modify/write
29140  * 		devices.
29141  *
29142  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29143  *		un	- sd_lun structure for the device.
29144  *		flag	- the km flags passed to constructor
29145  *
29146  * Return Code: 0 on success.
29147  *		-1 on failure.
29148  */
29149 
29150 /*ARGSUSED*/
29151 static int
29152 sd_wm_cache_constructor(void *wm, void *un, int flags)
29153 {
29154 	bzero(wm, sizeof (struct sd_w_map));
29155 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29156 	return (0);
29157 }
29158 
29159 
29160 /*
29161  *    Function: sd_wm_cache_destructor()
29162  *
29163  * Description: Cache destructor for the wmap cache for the read/modify/write
29164  * 		devices.
29165  *
29166  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29167  *		un	- sd_lun structure for the device.
29168  */
29169 /*ARGSUSED*/
29170 static void
29171 sd_wm_cache_destructor(void *wm, void *un)
29172 {
29173 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29174 }
29175 
29176 
29177 /*
29178  *    Function: sd_range_lock()
29179  *
29180  * Description: Lock the range of blocks specified as parameter to ensure
29181  *		that read, modify write is atomic and no other i/o writes
29182  *		to the same location. The range is specified in terms
29183  *		of start and end blocks. Block numbers are the actual
29184  *		media block numbers and not system.
29185  *
29186  *   Arguments: un	- sd_lun structure for the device.
29187  *		startb - The starting block number
29188  *		endb - The end block number
29189  *		typ - type of i/o - simple/read_modify_write
29190  *
29191  * Return Code: wm  - pointer to the wmap structure.
29192  *
29193  *     Context: This routine can sleep.
29194  */
29195 
29196 static struct sd_w_map *
29197 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29198 {
29199 	struct sd_w_map *wmp = NULL;
29200 	struct sd_w_map *sl_wmp = NULL;
29201 	struct sd_w_map *tmp_wmp;
29202 	wm_state state = SD_WM_CHK_LIST;
29203 
29204 
29205 	ASSERT(un != NULL);
29206 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29207 
29208 	mutex_enter(SD_MUTEX(un));
29209 
29210 	while (state != SD_WM_DONE) {
29211 
29212 		switch (state) {
29213 		case SD_WM_CHK_LIST:
29214 			/*
29215 			 * This is the starting state. Check the wmap list
29216 			 * to see if the range is currently available.
29217 			 */
29218 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29219 				/*
29220 				 * If this is a simple write and no rmw
29221 				 * i/o is pending then try to lock the
29222 				 * range as the range should be available.
29223 				 */
29224 				state = SD_WM_LOCK_RANGE;
29225 			} else {
29226 				tmp_wmp = sd_get_range(un, startb, endb);
29227 				if (tmp_wmp != NULL) {
29228 					if ((wmp != NULL) && ONLIST(un, wmp)) {
29229 						/*
29230 						 * Should not keep onlist wmps
29231 						 * while waiting this macro
29232 						 * will also do wmp = NULL;
29233 						 */
29234 						FREE_ONLIST_WMAP(un, wmp);
29235 					}
29236 					/*
29237 					 * sl_wmp is the wmap on which wait
29238 					 * is done, since the tmp_wmp points
29239 					 * to the inuse wmap, set sl_wmp to
29240 					 * tmp_wmp and change the state to sleep
29241 					 */
29242 					sl_wmp = tmp_wmp;
29243 					state = SD_WM_WAIT_MAP;
29244 				} else {
29245 					state = SD_WM_LOCK_RANGE;
29246 				}
29247 
29248 			}
29249 			break;
29250 
29251 		case SD_WM_LOCK_RANGE:
29252 			ASSERT(un->un_wm_cache);
29253 			/*
29254 			 * The range need to be locked, try to get a wmap.
29255 			 * First attempt it with NO_SLEEP, want to avoid a sleep
29256 			 * if possible as we will have to release the sd mutex
29257 			 * if we have to sleep.
29258 			 */
29259 			if (wmp == NULL)
29260 				wmp = kmem_cache_alloc(un->un_wm_cache,
29261 				    KM_NOSLEEP);
29262 			if (wmp == NULL) {
29263 				mutex_exit(SD_MUTEX(un));
29264 				_NOTE(DATA_READABLE_WITHOUT_LOCK
29265 				    (sd_lun::un_wm_cache))
29266 				wmp = kmem_cache_alloc(un->un_wm_cache,
29267 				    KM_SLEEP);
29268 				mutex_enter(SD_MUTEX(un));
29269 				/*
29270 				 * we released the mutex so recheck and go to
29271 				 * check list state.
29272 				 */
29273 				state = SD_WM_CHK_LIST;
29274 			} else {
29275 				/*
29276 				 * We exit out of state machine since we
29277 				 * have the wmap. Do the housekeeping first.
29278 				 * place the wmap on the wmap list if it is not
29279 				 * on it already and then set the state to done.
29280 				 */
29281 				wmp->wm_start = startb;
29282 				wmp->wm_end = endb;
29283 				wmp->wm_flags = typ | SD_WM_BUSY;
29284 				if (typ & SD_WTYPE_RMW) {
29285 					un->un_rmw_count++;
29286 				}
29287 				/*
29288 				 * If not already on the list then link
29289 				 */
29290 				if (!ONLIST(un, wmp)) {
29291 					wmp->wm_next = un->un_wm;
29292 					wmp->wm_prev = NULL;
29293 					if (wmp->wm_next)
29294 						wmp->wm_next->wm_prev = wmp;
29295 					un->un_wm = wmp;
29296 				}
29297 				state = SD_WM_DONE;
29298 			}
29299 			break;
29300 
29301 		case SD_WM_WAIT_MAP:
29302 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29303 			/*
29304 			 * Wait is done on sl_wmp, which is set in the
29305 			 * check_list state.
29306 			 */
29307 			sl_wmp->wm_wanted_count++;
29308 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29309 			sl_wmp->wm_wanted_count--;
29310 			/*
29311 			 * We can reuse the memory from the completed sl_wmp
29312 			 * lock range for our new lock, but only if noone is
29313 			 * waiting for it.
29314 			 */
29315 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
29316 			if (sl_wmp->wm_wanted_count == 0) {
29317 				if (wmp != NULL)
29318 					CHK_N_FREEWMP(un, wmp);
29319 				wmp = sl_wmp;
29320 			}
29321 			sl_wmp = NULL;
29322 			/*
29323 			 * After waking up, need to recheck for availability of
29324 			 * range.
29325 			 */
29326 			state = SD_WM_CHK_LIST;
29327 			break;
29328 
29329 		default:
29330 			panic("sd_range_lock: "
29331 			    "Unknown state %d in sd_range_lock", state);
29332 			/*NOTREACHED*/
29333 		} /* switch(state) */
29334 
29335 	} /* while(state != SD_WM_DONE) */
29336 
29337 	mutex_exit(SD_MUTEX(un));
29338 
29339 	ASSERT(wmp != NULL);
29340 
29341 	return (wmp);
29342 }
29343 
29344 
29345 /*
29346  *    Function: sd_get_range()
29347  *
29348  * Description: Find if there any overlapping I/O to this one
29349  *		Returns the write-map of 1st such I/O, NULL otherwise.
29350  *
29351  *   Arguments: un	- sd_lun structure for the device.
29352  *		startb - The starting block number
29353  *		endb - The end block number
29354  *
29355  * Return Code: wm  - pointer to the wmap structure.
29356  */
29357 
29358 static struct sd_w_map *
29359 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29360 {
29361 	struct sd_w_map *wmp;
29362 
29363 	ASSERT(un != NULL);
29364 
29365 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29366 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29367 			continue;
29368 		}
29369 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29370 			break;
29371 		}
29372 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29373 			break;
29374 		}
29375 	}
29376 
29377 	return (wmp);
29378 }
29379 
29380 
29381 /*
29382  *    Function: sd_free_inlist_wmap()
29383  *
29384  * Description: Unlink and free a write map struct.
29385  *
29386  *   Arguments: un      - sd_lun structure for the device.
29387  *		wmp	- sd_w_map which needs to be unlinked.
29388  */
29389 
29390 static void
29391 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29392 {
29393 	ASSERT(un != NULL);
29394 
29395 	if (un->un_wm == wmp) {
29396 		un->un_wm = wmp->wm_next;
29397 	} else {
29398 		wmp->wm_prev->wm_next = wmp->wm_next;
29399 	}
29400 
29401 	if (wmp->wm_next) {
29402 		wmp->wm_next->wm_prev = wmp->wm_prev;
29403 	}
29404 
29405 	wmp->wm_next = wmp->wm_prev = NULL;
29406 
29407 	kmem_cache_free(un->un_wm_cache, wmp);
29408 }
29409 
29410 
29411 /*
29412  *    Function: sd_range_unlock()
29413  *
29414  * Description: Unlock the range locked by wm.
29415  *		Free write map if nobody else is waiting on it.
29416  *
29417  *   Arguments: un      - sd_lun structure for the device.
29418  *              wmp     - sd_w_map which needs to be unlinked.
29419  */
29420 
29421 static void
29422 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29423 {
29424 	ASSERT(un != NULL);
29425 	ASSERT(wm != NULL);
29426 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29427 
29428 	mutex_enter(SD_MUTEX(un));
29429 
29430 	if (wm->wm_flags & SD_WTYPE_RMW) {
29431 		un->un_rmw_count--;
29432 	}
29433 
29434 	if (wm->wm_wanted_count) {
29435 		wm->wm_flags = 0;
29436 		/*
29437 		 * Broadcast that the wmap is available now.
29438 		 */
29439 		cv_broadcast(&wm->wm_avail);
29440 	} else {
29441 		/*
29442 		 * If no one is waiting on the map, it should be free'ed.
29443 		 */
29444 		sd_free_inlist_wmap(un, wm);
29445 	}
29446 
29447 	mutex_exit(SD_MUTEX(un));
29448 }
29449 
29450 
29451 /*
29452  *    Function: sd_read_modify_write_task
29453  *
29454  * Description: Called from a taskq thread to initiate the write phase of
29455  *		a read-modify-write request.  This is used for targets where
29456  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29457  *
29458  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29459  *
29460  *     Context: Called under taskq thread context.
29461  */
29462 
29463 static void
29464 sd_read_modify_write_task(void *arg)
29465 {
29466 	struct sd_mapblocksize_info	*bsp;
29467 	struct buf	*bp;
29468 	struct sd_xbuf	*xp;
29469 	struct sd_lun	*un;
29470 
29471 	bp = arg;	/* The bp is given in arg */
29472 	ASSERT(bp != NULL);
29473 
29474 	/* Get the pointer to the layer-private data struct */
29475 	xp = SD_GET_XBUF(bp);
29476 	ASSERT(xp != NULL);
29477 	bsp = xp->xb_private;
29478 	ASSERT(bsp != NULL);
29479 
29480 	un = SD_GET_UN(bp);
29481 	ASSERT(un != NULL);
29482 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29483 
29484 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29485 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29486 
29487 	/*
29488 	 * This is the write phase of a read-modify-write request, called
29489 	 * under the context of a taskq thread in response to the completion
29490 	 * of the read portion of the rmw request completing under interrupt
29491 	 * context. The write request must be sent from here down the iostart
29492 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29493 	 * we use the layer index saved in the layer-private data area.
29494 	 */
29495 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29496 
29497 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29498 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29499 }
29500 
29501 
29502 /*
29503  *    Function: sddump_do_read_of_rmw()
29504  *
29505  * Description: This routine will be called from sddump, If sddump is called
29506  *		with an I/O which not aligned on device blocksize boundary
29507  *		then the write has to be converted to read-modify-write.
29508  *		Do the read part here in order to keep sddump simple.
29509  *		Note - That the sd_mutex is held across the call to this
29510  *		routine.
29511  *
29512  *   Arguments: un	- sd_lun
29513  *		blkno	- block number in terms of media block size.
29514  *		nblk	- number of blocks.
29515  *		bpp	- pointer to pointer to the buf structure. On return
29516  *			from this function, *bpp points to the valid buffer
29517  *			to which the write has to be done.
29518  *
29519  * Return Code: 0 for success or errno-type return code
29520  */
29521 
29522 static int
29523 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
29524 	struct buf **bpp)
29525 {
29526 	int err;
29527 	int i;
29528 	int rval;
29529 	struct buf *bp;
29530 	struct scsi_pkt *pkt = NULL;
29531 	uint32_t target_blocksize;
29532 
29533 	ASSERT(un != NULL);
29534 	ASSERT(mutex_owned(SD_MUTEX(un)));
29535 
29536 	target_blocksize = un->un_tgt_blocksize;
29537 
29538 	mutex_exit(SD_MUTEX(un));
29539 
29540 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
29541 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
29542 	if (bp == NULL) {
29543 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29544 		    "no resources for dumping; giving up");
29545 		err = ENOMEM;
29546 		goto done;
29547 	}
29548 
29549 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
29550 	    blkno, nblk);
29551 	if (rval != 0) {
29552 		scsi_free_consistent_buf(bp);
29553 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29554 		    "no resources for dumping; giving up");
29555 		err = ENOMEM;
29556 		goto done;
29557 	}
29558 
29559 	pkt->pkt_flags |= FLAG_NOINTR;
29560 
29561 	err = EIO;
29562 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
29563 
29564 		/*
29565 		 * Scsi_poll returns 0 (success) if the command completes and
29566 		 * the status block is STATUS_GOOD.  We should only check
29567 		 * errors if this condition is not true.  Even then we should
29568 		 * send our own request sense packet only if we have a check
29569 		 * condition and auto request sense has not been performed by
29570 		 * the hba.
29571 		 */
29572 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
29573 
29574 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
29575 			err = 0;
29576 			break;
29577 		}
29578 
29579 		/*
29580 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
29581 		 * no need to read RQS data.
29582 		 */
29583 		if (pkt->pkt_reason == CMD_DEV_GONE) {
29584 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29585 			    "Error while dumping state with rmw..."
29586 			    "Device is gone\n");
29587 			break;
29588 		}
29589 
29590 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
29591 			SD_INFO(SD_LOG_DUMP, un,
29592 			    "sddump: read failed with CHECK, try # %d\n", i);
29593 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
29594 				(void) sd_send_polled_RQS(un);
29595 			}
29596 
29597 			continue;
29598 		}
29599 
29600 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
29601 			int reset_retval = 0;
29602 
29603 			SD_INFO(SD_LOG_DUMP, un,
29604 			    "sddump: read failed with BUSY, try # %d\n", i);
29605 
29606 			if (un->un_f_lun_reset_enabled == TRUE) {
29607 				reset_retval = scsi_reset(SD_ADDRESS(un),
29608 				    RESET_LUN);
29609 			}
29610 			if (reset_retval == 0) {
29611 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
29612 			}
29613 			(void) sd_send_polled_RQS(un);
29614 
29615 		} else {
29616 			SD_INFO(SD_LOG_DUMP, un,
29617 			    "sddump: read failed with 0x%x, try # %d\n",
29618 			    SD_GET_PKT_STATUS(pkt), i);
29619 			mutex_enter(SD_MUTEX(un));
29620 			sd_reset_target(un, pkt);
29621 			mutex_exit(SD_MUTEX(un));
29622 		}
29623 
29624 		/*
29625 		 * If we are not getting anywhere with lun/target resets,
29626 		 * let's reset the bus.
29627 		 */
29628 		if (i > SD_NDUMP_RETRIES/2) {
29629 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
29630 			(void) sd_send_polled_RQS(un);
29631 		}
29632 
29633 	}
29634 	scsi_destroy_pkt(pkt);
29635 
29636 	if (err != 0) {
29637 		scsi_free_consistent_buf(bp);
29638 		*bpp = NULL;
29639 	} else {
29640 		*bpp = bp;
29641 	}
29642 
29643 done:
29644 	mutex_enter(SD_MUTEX(un));
29645 	return (err);
29646 }
29647 
29648 
29649 /*
29650  *    Function: sd_failfast_flushq
29651  *
29652  * Description: Take all bp's on the wait queue that have B_FAILFAST set
29653  *		in b_flags and move them onto the failfast queue, then kick
29654  *		off a thread to return all bp's on the failfast queue to
29655  *		their owners with an error set.
29656  *
29657  *   Arguments: un - pointer to the soft state struct for the instance.
29658  *
29659  *     Context: may execute in interrupt context.
29660  */
29661 
29662 static void
29663 sd_failfast_flushq(struct sd_lun *un)
29664 {
29665 	struct buf *bp;
29666 	struct buf *next_waitq_bp;
29667 	struct buf *prev_waitq_bp = NULL;
29668 
29669 	ASSERT(un != NULL);
29670 	ASSERT(mutex_owned(SD_MUTEX(un)));
29671 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
29672 	ASSERT(un->un_failfast_bp == NULL);
29673 
29674 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29675 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
29676 
29677 	/*
29678 	 * Check if we should flush all bufs when entering failfast state, or
29679 	 * just those with B_FAILFAST set.
29680 	 */
29681 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
29682 		/*
29683 		 * Move *all* bp's on the wait queue to the failfast flush
29684 		 * queue, including those that do NOT have B_FAILFAST set.
29685 		 */
29686 		if (un->un_failfast_headp == NULL) {
29687 			ASSERT(un->un_failfast_tailp == NULL);
29688 			un->un_failfast_headp = un->un_waitq_headp;
29689 		} else {
29690 			ASSERT(un->un_failfast_tailp != NULL);
29691 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
29692 		}
29693 
29694 		un->un_failfast_tailp = un->un_waitq_tailp;
29695 
29696 		/* update kstat for each bp moved out of the waitq */
29697 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
29698 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29699 		}
29700 
29701 		/* empty the waitq */
29702 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
29703 
29704 	} else {
29705 		/*
29706 		 * Go thru the wait queue, pick off all entries with
29707 		 * B_FAILFAST set, and move these onto the failfast queue.
29708 		 */
29709 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
29710 			/*
29711 			 * Save the pointer to the next bp on the wait queue,
29712 			 * so we get to it on the next iteration of this loop.
29713 			 */
29714 			next_waitq_bp = bp->av_forw;
29715 
29716 			/*
29717 			 * If this bp from the wait queue does NOT have
29718 			 * B_FAILFAST set, just move on to the next element
29719 			 * in the wait queue. Note, this is the only place
29720 			 * where it is correct to set prev_waitq_bp.
29721 			 */
29722 			if ((bp->b_flags & B_FAILFAST) == 0) {
29723 				prev_waitq_bp = bp;
29724 				continue;
29725 			}
29726 
29727 			/*
29728 			 * Remove the bp from the wait queue.
29729 			 */
29730 			if (bp == un->un_waitq_headp) {
29731 				/* The bp is the first element of the waitq. */
29732 				un->un_waitq_headp = next_waitq_bp;
29733 				if (un->un_waitq_headp == NULL) {
29734 					/* The wait queue is now empty */
29735 					un->un_waitq_tailp = NULL;
29736 				}
29737 			} else {
29738 				/*
29739 				 * The bp is either somewhere in the middle
29740 				 * or at the end of the wait queue.
29741 				 */
29742 				ASSERT(un->un_waitq_headp != NULL);
29743 				ASSERT(prev_waitq_bp != NULL);
29744 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
29745 				    == 0);
29746 				if (bp == un->un_waitq_tailp) {
29747 					/* bp is the last entry on the waitq. */
29748 					ASSERT(next_waitq_bp == NULL);
29749 					un->un_waitq_tailp = prev_waitq_bp;
29750 				}
29751 				prev_waitq_bp->av_forw = next_waitq_bp;
29752 			}
29753 			bp->av_forw = NULL;
29754 
29755 			/*
29756 			 * update kstat since the bp is moved out of
29757 			 * the waitq
29758 			 */
29759 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
29760 
29761 			/*
29762 			 * Now put the bp onto the failfast queue.
29763 			 */
29764 			if (un->un_failfast_headp == NULL) {
29765 				/* failfast queue is currently empty */
29766 				ASSERT(un->un_failfast_tailp == NULL);
29767 				un->un_failfast_headp =
29768 				    un->un_failfast_tailp = bp;
29769 			} else {
29770 				/* Add the bp to the end of the failfast q */
29771 				ASSERT(un->un_failfast_tailp != NULL);
29772 				ASSERT(un->un_failfast_tailp->b_flags &
29773 				    B_FAILFAST);
29774 				un->un_failfast_tailp->av_forw = bp;
29775 				un->un_failfast_tailp = bp;
29776 			}
29777 		}
29778 	}
29779 
29780 	/*
29781 	 * Now return all bp's on the failfast queue to their owners.
29782 	 */
29783 	while ((bp = un->un_failfast_headp) != NULL) {
29784 
29785 		un->un_failfast_headp = bp->av_forw;
29786 		if (un->un_failfast_headp == NULL) {
29787 			un->un_failfast_tailp = NULL;
29788 		}
29789 
29790 		/*
29791 		 * We want to return the bp with a failure error code, but
29792 		 * we do not want a call to sd_start_cmds() to occur here,
29793 		 * so use sd_return_failed_command_no_restart() instead of
29794 		 * sd_return_failed_command().
29795 		 */
29796 		sd_return_failed_command_no_restart(un, bp, EIO);
29797 	}
29798 
29799 	/* Flush the xbuf queues if required. */
29800 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
29801 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
29802 	}
29803 
29804 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
29805 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
29806 }
29807 
29808 
29809 /*
29810  *    Function: sd_failfast_flushq_callback
29811  *
29812  * Description: Return TRUE if the given bp meets the criteria for failfast
29813  *		flushing. Used with ddi_xbuf_flushq(9F).
29814  *
29815  *   Arguments: bp - ptr to buf struct to be examined.
29816  *
29817  *     Context: Any
29818  */
29819 
29820 static int
29821 sd_failfast_flushq_callback(struct buf *bp)
29822 {
29823 	/*
29824 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
29825 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
29826 	 */
29827 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
29828 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
29829 }
29830 
29831 
29832 
29833 /*
29834  * Function: sd_setup_next_xfer
29835  *
29836  * Description: Prepare next I/O operation using DMA_PARTIAL
29837  *
29838  */
29839 
29840 static int
29841 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
29842     struct scsi_pkt *pkt, struct sd_xbuf *xp)
29843 {
29844 	ssize_t	num_blks_not_xfered;
29845 	daddr_t	strt_blk_num;
29846 	ssize_t	bytes_not_xfered;
29847 	int	rval;
29848 
29849 	ASSERT(pkt->pkt_resid == 0);
29850 
29851 	/*
29852 	 * Calculate next block number and amount to be transferred.
29853 	 *
29854 	 * How much data NOT transfered to the HBA yet.
29855 	 */
29856 	bytes_not_xfered = xp->xb_dma_resid;
29857 
29858 	/*
29859 	 * figure how many blocks NOT transfered to the HBA yet.
29860 	 */
29861 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
29862 
29863 	/*
29864 	 * set starting block number to the end of what WAS transfered.
29865 	 */
29866 	strt_blk_num = xp->xb_blkno +
29867 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
29868 
29869 	/*
29870 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
29871 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
29872 	 * the disk mutex here.
29873 	 */
29874 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
29875 	    strt_blk_num, num_blks_not_xfered);
29876 
29877 	if (rval == 0) {
29878 
29879 		/*
29880 		 * Success.
29881 		 *
29882 		 * Adjust things if there are still more blocks to be
29883 		 * transfered.
29884 		 */
29885 		xp->xb_dma_resid = pkt->pkt_resid;
29886 		pkt->pkt_resid = 0;
29887 
29888 		return (1);
29889 	}
29890 
29891 	/*
29892 	 * There's really only one possible return value from
29893 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
29894 	 * returns NULL.
29895 	 */
29896 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
29897 
29898 	bp->b_resid = bp->b_bcount;
29899 	bp->b_flags |= B_ERROR;
29900 
29901 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29902 	    "Error setting up next portion of DMA transfer\n");
29903 
29904 	return (0);
29905 }
29906 
29907 /*
29908  *    Function: sd_panic_for_res_conflict
29909  *
29910  * Description: Call panic with a string formatted with "Reservation Conflict"
29911  *		and a human readable identifier indicating the SD instance
29912  *		that experienced the reservation conflict.
29913  *
29914  *   Arguments: un - pointer to the soft state struct for the instance.
29915  *
29916  *     Context: may execute in interrupt context.
29917  */
29918 
29919 #define	SD_RESV_CONFLICT_FMT_LEN 40
29920 void
29921 sd_panic_for_res_conflict(struct sd_lun *un)
29922 {
29923 	char panic_str[SD_RESV_CONFLICT_FMT_LEN+MAXPATHLEN];
29924 	char path_str[MAXPATHLEN];
29925 
29926 	(void) snprintf(panic_str, sizeof (panic_str),
29927 	    "Reservation Conflict\nDisk: %s",
29928 	    ddi_pathname(SD_DEVINFO(un), path_str));
29929 
29930 	panic(panic_str);
29931 }
29932 
29933 /*
29934  * Note: The following sd_faultinjection_ioctl( ) routines implement
29935  * driver support for handling fault injection for error analysis
29936  * causing faults in multiple layers of the driver.
29937  *
29938  */
29939 
29940 #ifdef SD_FAULT_INJECTION
29941 static uint_t   sd_fault_injection_on = 0;
29942 
29943 /*
29944  *    Function: sd_faultinjection_ioctl()
29945  *
29946  * Description: This routine is the driver entry point for handling
29947  *              faultinjection ioctls to inject errors into the
29948  *              layer model
29949  *
29950  *   Arguments: cmd	- the ioctl cmd received
29951  *		arg	- the arguments from user and returns
29952  */
29953 
29954 static void
29955 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un) {
29956 
29957 	uint_t i = 0;
29958 	uint_t rval;
29959 
29960 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
29961 
29962 	mutex_enter(SD_MUTEX(un));
29963 
29964 	switch (cmd) {
29965 	case SDIOCRUN:
29966 		/* Allow pushed faults to be injected */
29967 		SD_INFO(SD_LOG_SDTEST, un,
29968 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
29969 
29970 		sd_fault_injection_on = 1;
29971 
29972 		SD_INFO(SD_LOG_IOERR, un,
29973 		    "sd_faultinjection_ioctl: run finished\n");
29974 		break;
29975 
29976 	case SDIOCSTART:
29977 		/* Start Injection Session */
29978 		SD_INFO(SD_LOG_SDTEST, un,
29979 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
29980 
29981 		sd_fault_injection_on = 0;
29982 		un->sd_injection_mask = 0xFFFFFFFF;
29983 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
29984 			un->sd_fi_fifo_pkt[i] = NULL;
29985 			un->sd_fi_fifo_xb[i] = NULL;
29986 			un->sd_fi_fifo_un[i] = NULL;
29987 			un->sd_fi_fifo_arq[i] = NULL;
29988 		}
29989 		un->sd_fi_fifo_start = 0;
29990 		un->sd_fi_fifo_end = 0;
29991 
29992 		mutex_enter(&(un->un_fi_mutex));
29993 		un->sd_fi_log[0] = '\0';
29994 		un->sd_fi_buf_len = 0;
29995 		mutex_exit(&(un->un_fi_mutex));
29996 
29997 		SD_INFO(SD_LOG_IOERR, un,
29998 		    "sd_faultinjection_ioctl: start finished\n");
29999 		break;
30000 
30001 	case SDIOCSTOP:
30002 		/* Stop Injection Session */
30003 		SD_INFO(SD_LOG_SDTEST, un,
30004 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30005 		sd_fault_injection_on = 0;
30006 		un->sd_injection_mask = 0x0;
30007 
30008 		/* Empty stray or unuseds structs from fifo */
30009 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30010 			if (un->sd_fi_fifo_pkt[i] != NULL) {
30011 				kmem_free(un->sd_fi_fifo_pkt[i],
30012 				    sizeof (struct sd_fi_pkt));
30013 			}
30014 			if (un->sd_fi_fifo_xb[i] != NULL) {
30015 				kmem_free(un->sd_fi_fifo_xb[i],
30016 				    sizeof (struct sd_fi_xb));
30017 			}
30018 			if (un->sd_fi_fifo_un[i] != NULL) {
30019 				kmem_free(un->sd_fi_fifo_un[i],
30020 				    sizeof (struct sd_fi_un));
30021 			}
30022 			if (un->sd_fi_fifo_arq[i] != NULL) {
30023 				kmem_free(un->sd_fi_fifo_arq[i],
30024 				    sizeof (struct sd_fi_arq));
30025 			}
30026 			un->sd_fi_fifo_pkt[i] = NULL;
30027 			un->sd_fi_fifo_un[i] = NULL;
30028 			un->sd_fi_fifo_xb[i] = NULL;
30029 			un->sd_fi_fifo_arq[i] = NULL;
30030 		}
30031 		un->sd_fi_fifo_start = 0;
30032 		un->sd_fi_fifo_end = 0;
30033 
30034 		SD_INFO(SD_LOG_IOERR, un,
30035 		    "sd_faultinjection_ioctl: stop finished\n");
30036 		break;
30037 
30038 	case SDIOCINSERTPKT:
30039 		/* Store a packet struct to be pushed onto fifo */
30040 		SD_INFO(SD_LOG_SDTEST, un,
30041 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30042 
30043 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30044 
30045 		sd_fault_injection_on = 0;
30046 
30047 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30048 		if (un->sd_fi_fifo_pkt[i] != NULL) {
30049 			kmem_free(un->sd_fi_fifo_pkt[i],
30050 			    sizeof (struct sd_fi_pkt));
30051 		}
30052 		if (arg != NULL) {
30053 			un->sd_fi_fifo_pkt[i] =
30054 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30055 			if (un->sd_fi_fifo_pkt[i] == NULL) {
30056 				/* Alloc failed don't store anything */
30057 				break;
30058 			}
30059 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30060 			    sizeof (struct sd_fi_pkt), 0);
30061 			if (rval == -1) {
30062 				kmem_free(un->sd_fi_fifo_pkt[i],
30063 				    sizeof (struct sd_fi_pkt));
30064 				un->sd_fi_fifo_pkt[i] = NULL;
30065 			}
30066 		} else {
30067 			SD_INFO(SD_LOG_IOERR, un,
30068 			    "sd_faultinjection_ioctl: pkt null\n");
30069 		}
30070 		break;
30071 
30072 	case SDIOCINSERTXB:
30073 		/* Store a xb struct to be pushed onto fifo */
30074 		SD_INFO(SD_LOG_SDTEST, un,
30075 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30076 
30077 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30078 
30079 		sd_fault_injection_on = 0;
30080 
30081 		if (un->sd_fi_fifo_xb[i] != NULL) {
30082 			kmem_free(un->sd_fi_fifo_xb[i],
30083 			    sizeof (struct sd_fi_xb));
30084 			un->sd_fi_fifo_xb[i] = NULL;
30085 		}
30086 		if (arg != NULL) {
30087 			un->sd_fi_fifo_xb[i] =
30088 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30089 			if (un->sd_fi_fifo_xb[i] == NULL) {
30090 				/* Alloc failed don't store anything */
30091 				break;
30092 			}
30093 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30094 			    sizeof (struct sd_fi_xb), 0);
30095 
30096 			if (rval == -1) {
30097 				kmem_free(un->sd_fi_fifo_xb[i],
30098 				    sizeof (struct sd_fi_xb));
30099 				un->sd_fi_fifo_xb[i] = NULL;
30100 			}
30101 		} else {
30102 			SD_INFO(SD_LOG_IOERR, un,
30103 			    "sd_faultinjection_ioctl: xb null\n");
30104 		}
30105 		break;
30106 
30107 	case SDIOCINSERTUN:
30108 		/* Store a un struct to be pushed onto fifo */
30109 		SD_INFO(SD_LOG_SDTEST, un,
30110 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30111 
30112 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30113 
30114 		sd_fault_injection_on = 0;
30115 
30116 		if (un->sd_fi_fifo_un[i] != NULL) {
30117 			kmem_free(un->sd_fi_fifo_un[i],
30118 			    sizeof (struct sd_fi_un));
30119 			un->sd_fi_fifo_un[i] = NULL;
30120 		}
30121 		if (arg != NULL) {
30122 			un->sd_fi_fifo_un[i] =
30123 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30124 			if (un->sd_fi_fifo_un[i] == NULL) {
30125 				/* Alloc failed don't store anything */
30126 				break;
30127 			}
30128 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30129 			    sizeof (struct sd_fi_un), 0);
30130 			if (rval == -1) {
30131 				kmem_free(un->sd_fi_fifo_un[i],
30132 				    sizeof (struct sd_fi_un));
30133 				un->sd_fi_fifo_un[i] = NULL;
30134 			}
30135 
30136 		} else {
30137 			SD_INFO(SD_LOG_IOERR, un,
30138 			    "sd_faultinjection_ioctl: un null\n");
30139 		}
30140 
30141 		break;
30142 
30143 	case SDIOCINSERTARQ:
30144 		/* Store a arq struct to be pushed onto fifo */
30145 		SD_INFO(SD_LOG_SDTEST, un,
30146 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30147 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30148 
30149 		sd_fault_injection_on = 0;
30150 
30151 		if (un->sd_fi_fifo_arq[i] != NULL) {
30152 			kmem_free(un->sd_fi_fifo_arq[i],
30153 			    sizeof (struct sd_fi_arq));
30154 			un->sd_fi_fifo_arq[i] = NULL;
30155 		}
30156 		if (arg != NULL) {
30157 			un->sd_fi_fifo_arq[i] =
30158 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30159 			if (un->sd_fi_fifo_arq[i] == NULL) {
30160 				/* Alloc failed don't store anything */
30161 				break;
30162 			}
30163 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30164 			    sizeof (struct sd_fi_arq), 0);
30165 			if (rval == -1) {
30166 				kmem_free(un->sd_fi_fifo_arq[i],
30167 				    sizeof (struct sd_fi_arq));
30168 				un->sd_fi_fifo_arq[i] = NULL;
30169 			}
30170 
30171 		} else {
30172 			SD_INFO(SD_LOG_IOERR, un,
30173 			    "sd_faultinjection_ioctl: arq null\n");
30174 		}
30175 
30176 		break;
30177 
30178 	case SDIOCPUSH:
30179 		/* Push stored xb, pkt, un, and arq onto fifo */
30180 		sd_fault_injection_on = 0;
30181 
30182 		if (arg != NULL) {
30183 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30184 			if (rval != -1 &&
30185 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30186 				un->sd_fi_fifo_end += i;
30187 			}
30188 		} else {
30189 			SD_INFO(SD_LOG_IOERR, un,
30190 			    "sd_faultinjection_ioctl: push arg null\n");
30191 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30192 				un->sd_fi_fifo_end++;
30193 			}
30194 		}
30195 		SD_INFO(SD_LOG_IOERR, un,
30196 		    "sd_faultinjection_ioctl: push to end=%d\n",
30197 		    un->sd_fi_fifo_end);
30198 		break;
30199 
30200 	case SDIOCRETRIEVE:
30201 		/* Return buffer of log from Injection session */
30202 		SD_INFO(SD_LOG_SDTEST, un,
30203 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30204 
30205 		sd_fault_injection_on = 0;
30206 
30207 		mutex_enter(&(un->un_fi_mutex));
30208 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30209 		    un->sd_fi_buf_len+1, 0);
30210 		mutex_exit(&(un->un_fi_mutex));
30211 
30212 		if (rval == -1) {
30213 			/*
30214 			 * arg is possibly invalid setting
30215 			 * it to NULL for return
30216 			 */
30217 			arg = NULL;
30218 		}
30219 		break;
30220 	}
30221 
30222 	mutex_exit(SD_MUTEX(un));
30223 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl:"
30224 			    " exit\n");
30225 }
30226 
30227 
30228 /*
30229  *    Function: sd_injection_log()
30230  *
30231  * Description: This routine adds buff to the already existing injection log
30232  *              for retrieval via faultinjection_ioctl for use in fault
30233  *              detection and recovery
30234  *
30235  *   Arguments: buf - the string to add to the log
30236  */
30237 
30238 static void
30239 sd_injection_log(char *buf, struct sd_lun *un)
30240 {
30241 	uint_t len;
30242 
30243 	ASSERT(un != NULL);
30244 	ASSERT(buf != NULL);
30245 
30246 	mutex_enter(&(un->un_fi_mutex));
30247 
30248 	len = min(strlen(buf), 255);
30249 	/* Add logged value to Injection log to be returned later */
30250 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30251 		uint_t	offset = strlen((char *)un->sd_fi_log);
30252 		char *destp = (char *)un->sd_fi_log + offset;
30253 		int i;
30254 		for (i = 0; i < len; i++) {
30255 			*destp++ = *buf++;
30256 		}
30257 		un->sd_fi_buf_len += len;
30258 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30259 	}
30260 
30261 	mutex_exit(&(un->un_fi_mutex));
30262 }
30263 
30264 
30265 /*
30266  *    Function: sd_faultinjection()
30267  *
30268  * Description: This routine takes the pkt and changes its
30269  *		content based on error injection scenerio.
30270  *
30271  *   Arguments: pktp	- packet to be changed
30272  */
30273 
30274 static void
30275 sd_faultinjection(struct scsi_pkt *pktp)
30276 {
30277 	uint_t i;
30278 	struct sd_fi_pkt *fi_pkt;
30279 	struct sd_fi_xb *fi_xb;
30280 	struct sd_fi_un *fi_un;
30281 	struct sd_fi_arq *fi_arq;
30282 	struct buf *bp;
30283 	struct sd_xbuf *xb;
30284 	struct sd_lun *un;
30285 
30286 	ASSERT(pktp != NULL);
30287 
30288 	/* pull bp xb and un from pktp */
30289 	bp = (struct buf *)pktp->pkt_private;
30290 	xb = SD_GET_XBUF(bp);
30291 	un = SD_GET_UN(bp);
30292 
30293 	ASSERT(un != NULL);
30294 
30295 	mutex_enter(SD_MUTEX(un));
30296 
30297 	SD_TRACE(SD_LOG_SDTEST, un,
30298 	    "sd_faultinjection: entry Injection from sdintr\n");
30299 
30300 	/* if injection is off return */
30301 	if (sd_fault_injection_on == 0 ||
30302 	    un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30303 		mutex_exit(SD_MUTEX(un));
30304 		return;
30305 	}
30306 
30307 	SD_INFO(SD_LOG_SDTEST, un,
30308 	    "sd_faultinjection: is working for copying\n");
30309 
30310 	/* take next set off fifo */
30311 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30312 
30313 	fi_pkt = un->sd_fi_fifo_pkt[i];
30314 	fi_xb = un->sd_fi_fifo_xb[i];
30315 	fi_un = un->sd_fi_fifo_un[i];
30316 	fi_arq = un->sd_fi_fifo_arq[i];
30317 
30318 
30319 	/* set variables accordingly */
30320 	/* set pkt if it was on fifo */
30321 	if (fi_pkt != NULL) {
30322 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30323 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30324 		if (fi_pkt->pkt_cdbp != 0xff)
30325 			SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30326 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30327 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30328 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30329 
30330 	}
30331 	/* set xb if it was on fifo */
30332 	if (fi_xb != NULL) {
30333 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30334 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30335 		if (fi_xb->xb_retry_count != 0)
30336 			SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30337 		SD_CONDSET(xb, xb, xb_victim_retry_count,
30338 		    "xb_victim_retry_count");
30339 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30340 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30341 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30342 
30343 		/* copy in block data from sense */
30344 		/*
30345 		 * if (fi_xb->xb_sense_data[0] != -1) {
30346 		 *	bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30347 		 *	SENSE_LENGTH);
30348 		 * }
30349 		 */
30350 		bcopy(fi_xb->xb_sense_data, xb->xb_sense_data, SENSE_LENGTH);
30351 
30352 		/* copy in extended sense codes */
30353 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30354 		    xb, es_code, "es_code");
30355 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30356 		    xb, es_key, "es_key");
30357 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30358 		    xb, es_add_code, "es_add_code");
30359 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30360 		    xb, es_qual_code, "es_qual_code");
30361 		struct scsi_extended_sense *esp;
30362 		esp = (struct scsi_extended_sense *)xb->xb_sense_data;
30363 		esp->es_class = CLASS_EXTENDED_SENSE;
30364 	}
30365 
30366 	/* set un if it was on fifo */
30367 	if (fi_un != NULL) {
30368 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30369 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30370 		SD_CONDSET(un, un, un_reset_retry_count,
30371 		    "un_reset_retry_count");
30372 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30373 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30374 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30375 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30376 		    "un_f_allow_bus_device_reset");
30377 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30378 
30379 	}
30380 
30381 	/* copy in auto request sense if it was on fifo */
30382 	if (fi_arq != NULL) {
30383 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30384 	}
30385 
30386 	/* free structs */
30387 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30388 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30389 	}
30390 	if (un->sd_fi_fifo_xb[i] != NULL) {
30391 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30392 	}
30393 	if (un->sd_fi_fifo_un[i] != NULL) {
30394 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30395 	}
30396 	if (un->sd_fi_fifo_arq[i] != NULL) {
30397 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30398 	}
30399 
30400 	/*
30401 	 * kmem_free does not gurantee to set to NULL
30402 	 * since we uses these to determine if we set
30403 	 * values or not lets confirm they are always
30404 	 * NULL after free
30405 	 */
30406 	un->sd_fi_fifo_pkt[i] = NULL;
30407 	un->sd_fi_fifo_un[i] = NULL;
30408 	un->sd_fi_fifo_xb[i] = NULL;
30409 	un->sd_fi_fifo_arq[i] = NULL;
30410 
30411 	un->sd_fi_fifo_start++;
30412 
30413 	mutex_exit(SD_MUTEX(un));
30414 
30415 	SD_INFO(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30416 }
30417 
30418 #endif /* SD_FAULT_INJECTION */
30419 
30420 /*
30421  * This routine is invoked in sd_unit_attach(). Before calling it, the
30422  * properties in conf file should be processed already, and "hotpluggable"
30423  * property was processed also.
30424  *
30425  * The sd driver distinguishes 3 different type of devices: removable media,
30426  * non-removable media, and hotpluggable. Below the differences are defined:
30427  *
30428  * 1. Device ID
30429  *
30430  *     The device ID of a device is used to identify this device. Refer to
30431  *     ddi_devid_register(9F).
30432  *
30433  *     For a non-removable media disk device which can provide 0x80 or 0x83
30434  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
30435  *     device ID is created to identify this device. For other non-removable
30436  *     media devices, a default device ID is created only if this device has
30437  *     at least 2 alter cylinders. Otherwise, this device has no devid.
30438  *
30439  *     -------------------------------------------------------
30440  *     removable media   hotpluggable  | Can Have Device ID
30441  *     -------------------------------------------------------
30442  *         false             false     |     Yes
30443  *         false             true      |     Yes
30444  *         true                x       |     No
30445  *     ------------------------------------------------------
30446  *
30447  *
30448  * 2. SCSI group 4 commands
30449  *
30450  *     In SCSI specs, only some commands in group 4 command set can use
30451  *     8-byte addresses that can be used to access >2TB storage spaces.
30452  *     Other commands have no such capability. Without supporting group4,
30453  *     it is impossible to make full use of storage spaces of a disk with
30454  *     capacity larger than 2TB.
30455  *
30456  *     -----------------------------------------------
30457  *     removable media   hotpluggable   LP64  |  Group
30458  *     -----------------------------------------------
30459  *           false          false       false |   1
30460  *           false          false       true  |   4
30461  *           false          true        false |   1
30462  *           false          true        true  |   4
30463  *           true             x           x   |   5
30464  *     -----------------------------------------------
30465  *
30466  *
30467  * 3. Check for VTOC Label
30468  *
30469  *     If a direct-access disk has no EFI label, sd will check if it has a
30470  *     valid VTOC label. Now, sd also does that check for removable media
30471  *     and hotpluggable devices.
30472  *
30473  *     --------------------------------------------------------------
30474  *     Direct-Access   removable media    hotpluggable |  Check Label
30475  *     -------------------------------------------------------------
30476  *         false          false           false        |   No
30477  *         false          false           true         |   No
30478  *         false          true            false        |   Yes
30479  *         false          true            true         |   Yes
30480  *         true            x                x          |   Yes
30481  *     --------------------------------------------------------------
30482  *
30483  *
30484  * 4. Building default VTOC label
30485  *
30486  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30487  *     If those devices have no valid VTOC label, sd(7d) will attempt to
30488  *     create default VTOC for them. Currently sd creates default VTOC label
30489  *     for all devices on x86 platform (VTOC_16), but only for removable
30490  *     media devices on SPARC (VTOC_8).
30491  *
30492  *     -----------------------------------------------------------
30493  *       removable media hotpluggable platform   |   Default Label
30494  *     -----------------------------------------------------------
30495  *             false          false    sparc     |     No
30496  *             false          true      x86      |     Yes
30497  *             false          true     sparc     |     Yes
30498  *             true             x        x       |     Yes
30499  *     ----------------------------------------------------------
30500  *
30501  *
30502  * 5. Supported blocksizes of target devices
30503  *
30504  *     Sd supports non-512-byte blocksize for removable media devices only.
30505  *     For other devices, only 512-byte blocksize is supported. This may be
30506  *     changed in near future because some RAID devices require non-512-byte
30507  *     blocksize
30508  *
30509  *     -----------------------------------------------------------
30510  *     removable media    hotpluggable    | non-512-byte blocksize
30511  *     -----------------------------------------------------------
30512  *           false          false         |   No
30513  *           false          true          |   No
30514  *           true             x           |   Yes
30515  *     -----------------------------------------------------------
30516  *
30517  *
30518  * 6. Automatic mount & unmount
30519  *
30520  *     Sd(7d) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
30521  *     if a device is removable media device. It return 1 for removable media
30522  *     devices, and 0 for others.
30523  *
30524  *     The automatic mounting subsystem should distinguish between the types
30525  *     of devices and apply automounting policies to each.
30526  *
30527  *
30528  * 7. fdisk partition management
30529  *
30530  *     Fdisk is traditional partition method on x86 platform. Sd(7d) driver
30531  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
30532  *     doesn't support fdisk partitions at all. Note: pcfs(7fs) can recognize
30533  *     fdisk partitions on both x86 and SPARC platform.
30534  *
30535  *     -----------------------------------------------------------
30536  *       platform   removable media  USB/1394  |  fdisk supported
30537  *     -----------------------------------------------------------
30538  *        x86         X               X        |       true
30539  *     ------------------------------------------------------------
30540  *        sparc       X               X        |       false
30541  *     ------------------------------------------------------------
30542  *
30543  *
30544  * 8. MBOOT/MBR
30545  *
30546  *     Although sd(7d) doesn't support fdisk on SPARC platform, it does support
30547  *     read/write mboot for removable media devices on sparc platform.
30548  *
30549  *     -----------------------------------------------------------
30550  *       platform   removable media  USB/1394  |  mboot supported
30551  *     -----------------------------------------------------------
30552  *        x86         X               X        |       true
30553  *     ------------------------------------------------------------
30554  *        sparc      false           false     |       false
30555  *        sparc      false           true      |       true
30556  *        sparc      true            false     |       true
30557  *        sparc      true            true      |       true
30558  *     ------------------------------------------------------------
30559  *
30560  *
30561  * 9.  error handling during opening device
30562  *
30563  *     If failed to open a disk device, an errno is returned. For some kinds
30564  *     of errors, different errno is returned depending on if this device is
30565  *     a removable media device. This brings USB/1394 hard disks in line with
30566  *     expected hard disk behavior. It is not expected that this breaks any
30567  *     application.
30568  *
30569  *     ------------------------------------------------------
30570  *       removable media    hotpluggable   |  errno
30571  *     ------------------------------------------------------
30572  *             false          false        |   EIO
30573  *             false          true         |   EIO
30574  *             true             x          |   ENXIO
30575  *     ------------------------------------------------------
30576  *
30577  *
30578  * 11. ioctls: DKIOCEJECT, CDROMEJECT
30579  *
30580  *     These IOCTLs are applicable only to removable media devices.
30581  *
30582  *     -----------------------------------------------------------
30583  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
30584  *     -----------------------------------------------------------
30585  *             false          false        |     No
30586  *             false          true         |     No
30587  *             true            x           |     Yes
30588  *     -----------------------------------------------------------
30589  *
30590  *
30591  * 12. Kstats for partitions
30592  *
30593  *     sd creates partition kstat for non-removable media devices. USB and
30594  *     Firewire hard disks now have partition kstats
30595  *
30596  *      ------------------------------------------------------
30597  *       removable media    hotpluggable   |   kstat
30598  *      ------------------------------------------------------
30599  *             false          false        |    Yes
30600  *             false          true         |    Yes
30601  *             true             x          |    No
30602  *       ------------------------------------------------------
30603  *
30604  *
30605  * 13. Removable media & hotpluggable properties
30606  *
30607  *     Sd driver creates a "removable-media" property for removable media
30608  *     devices. Parent nexus drivers create a "hotpluggable" property if
30609  *     it supports hotplugging.
30610  *
30611  *     ---------------------------------------------------------------------
30612  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
30613  *     ---------------------------------------------------------------------
30614  *       false            false       |    No                   No
30615  *       false            true        |    No                   Yes
30616  *       true             false       |    Yes                  No
30617  *       true             true        |    Yes                  Yes
30618  *     ---------------------------------------------------------------------
30619  *
30620  *
30621  * 14. Power Management
30622  *
30623  *     sd only power manages removable media devices or devices that support
30624  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
30625  *
30626  *     A parent nexus that supports hotplugging can also set "pm-capable"
30627  *     if the disk can be power managed.
30628  *
30629  *     ------------------------------------------------------------
30630  *       removable media hotpluggable pm-capable  |   power manage
30631  *     ------------------------------------------------------------
30632  *             false          false     false     |     No
30633  *             false          false     true      |     Yes
30634  *             false          true      false     |     No
30635  *             false          true      true      |     Yes
30636  *             true             x        x        |     Yes
30637  *     ------------------------------------------------------------
30638  *
30639  *      USB and firewire hard disks can now be power managed independently
30640  *      of the framebuffer
30641  *
30642  *
30643  * 15. Support for USB disks with capacity larger than 1TB
30644  *
30645  *     Currently, sd doesn't permit a fixed disk device with capacity
30646  *     larger than 1TB to be used in a 32-bit operating system environment.
30647  *     However, sd doesn't do that for removable media devices. Instead, it
30648  *     assumes that removable media devices cannot have a capacity larger
30649  *     than 1TB. Therefore, using those devices on 32-bit system is partially
30650  *     supported, which can cause some unexpected results.
30651  *
30652  *     ---------------------------------------------------------------------
30653  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
30654  *     ---------------------------------------------------------------------
30655  *             false          false  |   true         |     no
30656  *             false          true   |   true         |     no
30657  *             true           false  |   true         |     Yes
30658  *             true           true   |   true         |     Yes
30659  *     ---------------------------------------------------------------------
30660  *
30661  *
30662  * 16. Check write-protection at open time
30663  *
30664  *     When a removable media device is being opened for writing without NDELAY
30665  *     flag, sd will check if this device is writable. If attempting to open
30666  *     without NDELAY flag a write-protected device, this operation will abort.
30667  *
30668  *     ------------------------------------------------------------
30669  *       removable media    USB/1394   |   WP Check
30670  *     ------------------------------------------------------------
30671  *             false          false    |     No
30672  *             false          true     |     No
30673  *             true           false    |     Yes
30674  *             true           true     |     Yes
30675  *     ------------------------------------------------------------
30676  *
30677  *
30678  * 17. syslog when corrupted VTOC is encountered
30679  *
30680  *      Currently, if an invalid VTOC is encountered, sd only print syslog
30681  *      for fixed SCSI disks.
30682  *     ------------------------------------------------------------
30683  *       removable media    USB/1394   |   print syslog
30684  *     ------------------------------------------------------------
30685  *             false          false    |     Yes
30686  *             false          true     |     No
30687  *             true           false    |     No
30688  *             true           true     |     No
30689  *     ------------------------------------------------------------
30690  */
30691 static void
30692 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
30693 {
30694 	int	pm_cap;
30695 
30696 	ASSERT(un->un_sd);
30697 	ASSERT(un->un_sd->sd_inq);
30698 
30699 	/*
30700 	 * Enable SYNC CACHE support for all devices.
30701 	 */
30702 	un->un_f_sync_cache_supported = TRUE;
30703 
30704 	/*
30705 	 * Set the sync cache required flag to false.
30706 	 * This would ensure that there is no SYNC CACHE
30707 	 * sent when there are no writes
30708 	 */
30709 	un->un_f_sync_cache_required = FALSE;
30710 
30711 	if (un->un_sd->sd_inq->inq_rmb) {
30712 		/*
30713 		 * The media of this device is removable. And for this kind
30714 		 * of devices, it is possible to change medium after opening
30715 		 * devices. Thus we should support this operation.
30716 		 */
30717 		un->un_f_has_removable_media = TRUE;
30718 
30719 		/*
30720 		 * support non-512-byte blocksize of removable media devices
30721 		 */
30722 		un->un_f_non_devbsize_supported = TRUE;
30723 
30724 		/*
30725 		 * Assume that all removable media devices support DOOR_LOCK
30726 		 */
30727 		un->un_f_doorlock_supported = TRUE;
30728 
30729 		/*
30730 		 * For a removable media device, it is possible to be opened
30731 		 * with NDELAY flag when there is no media in drive, in this
30732 		 * case we don't care if device is writable. But if without
30733 		 * NDELAY flag, we need to check if media is write-protected.
30734 		 */
30735 		un->un_f_chk_wp_open = TRUE;
30736 
30737 		/*
30738 		 * need to start a SCSI watch thread to monitor media state,
30739 		 * when media is being inserted or ejected, notify syseventd.
30740 		 */
30741 		un->un_f_monitor_media_state = TRUE;
30742 
30743 		/*
30744 		 * Some devices don't support START_STOP_UNIT command.
30745 		 * Therefore, we'd better check if a device supports it
30746 		 * before sending it.
30747 		 */
30748 		un->un_f_check_start_stop = TRUE;
30749 
30750 		/*
30751 		 * support eject media ioctl:
30752 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
30753 		 */
30754 		un->un_f_eject_media_supported = TRUE;
30755 
30756 		/*
30757 		 * Because many removable-media devices don't support
30758 		 * LOG_SENSE, we couldn't use this command to check if
30759 		 * a removable media device support power-management.
30760 		 * We assume that they support power-management via
30761 		 * START_STOP_UNIT command and can be spun up and down
30762 		 * without limitations.
30763 		 */
30764 		un->un_f_pm_supported = TRUE;
30765 
30766 		/*
30767 		 * Need to create a zero length (Boolean) property
30768 		 * removable-media for the removable media devices.
30769 		 * Note that the return value of the property is not being
30770 		 * checked, since if unable to create the property
30771 		 * then do not want the attach to fail altogether. Consistent
30772 		 * with other property creation in attach.
30773 		 */
30774 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
30775 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
30776 
30777 	} else {
30778 		/*
30779 		 * create device ID for device
30780 		 */
30781 		un->un_f_devid_supported = TRUE;
30782 
30783 		/*
30784 		 * Spin up non-removable-media devices once it is attached
30785 		 */
30786 		un->un_f_attach_spinup = TRUE;
30787 
30788 		/*
30789 		 * According to SCSI specification, Sense data has two kinds of
30790 		 * format: fixed format, and descriptor format. At present, we
30791 		 * don't support descriptor format sense data for removable
30792 		 * media.
30793 		 */
30794 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
30795 			un->un_f_descr_format_supported = TRUE;
30796 		}
30797 
30798 		/*
30799 		 * kstats are created only for non-removable media devices.
30800 		 *
30801 		 * Set this in sd.conf to 0 in order to disable kstats.  The
30802 		 * default is 1, so they are enabled by default.
30803 		 */
30804 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
30805 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
30806 		    "enable-partition-kstats", 1));
30807 
30808 		/*
30809 		 * Check if HBA has set the "pm-capable" property.
30810 		 * If "pm-capable" exists and is non-zero then we can
30811 		 * power manage the device without checking the start/stop
30812 		 * cycle count log sense page.
30813 		 *
30814 		 * If "pm-capable" exists and is set to be false (0),
30815 		 * then we should not power manage the device.
30816 		 *
30817 		 * If "pm-capable" doesn't exist then pm_cap will
30818 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
30819 		 * sd will check the start/stop cycle count log sense page
30820 		 * and power manage the device if the cycle count limit has
30821 		 * not been exceeded.
30822 		 */
30823 		pm_cap = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
30824 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
30825 		if (SD_PM_CAPABLE_IS_UNDEFINED(pm_cap)) {
30826 			un->un_f_log_sense_supported = TRUE;
30827 			if (!un->un_f_power_condition_disabled &&
30828 			    SD_INQUIRY(un)->inq_ansi == 6) {
30829 				un->un_f_power_condition_supported = TRUE;
30830 			}
30831 		} else {
30832 			/*
30833 			 * pm-capable property exists.
30834 			 *
30835 			 * Convert "TRUE" values for pm_cap to
30836 			 * SD_PM_CAPABLE_IS_TRUE to make it easier to check
30837 			 * later. "TRUE" values are any values defined in
30838 			 * inquiry.h.
30839 			 */
30840 			if (SD_PM_CAPABLE_IS_FALSE(pm_cap)) {
30841 				un->un_f_log_sense_supported = FALSE;
30842 			} else {
30843 				/* SD_PM_CAPABLE_IS_TRUE case */
30844 				un->un_f_pm_supported = TRUE;
30845 				if (!un->un_f_power_condition_disabled &&
30846 				    SD_PM_CAPABLE_IS_SPC_4(pm_cap)) {
30847 					un->un_f_power_condition_supported =
30848 					    TRUE;
30849 				}
30850 				if (SD_PM_CAP_LOG_SUPPORTED(pm_cap)) {
30851 					un->un_f_log_sense_supported = TRUE;
30852 					un->un_f_pm_log_sense_smart =
30853 					    SD_PM_CAP_SMART_LOG(pm_cap);
30854 				}
30855 			}
30856 
30857 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
30858 			    "sd_unit_attach: un:0x%p pm-capable "
30859 			    "property set to %d.\n", un, un->un_f_pm_supported);
30860 		}
30861 	}
30862 
30863 	if (un->un_f_is_hotpluggable) {
30864 
30865 		/*
30866 		 * Have to watch hotpluggable devices as well, since
30867 		 * that's the only way for userland applications to
30868 		 * detect hot removal while device is busy/mounted.
30869 		 */
30870 		un->un_f_monitor_media_state = TRUE;
30871 
30872 		un->un_f_check_start_stop = TRUE;
30873 
30874 	}
30875 }
30876 
30877 /*
30878  * sd_tg_rdwr:
30879  * Provides rdwr access for cmlb via sd_tgops. The start_block is
30880  * in sys block size, req_length in bytes.
30881  *
30882  */
30883 static int
30884 sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
30885     diskaddr_t start_block, size_t reqlength, void *tg_cookie)
30886 {
30887 	struct sd_lun *un;
30888 	int path_flag = (int)(uintptr_t)tg_cookie;
30889 	char *dkl = NULL;
30890 	diskaddr_t real_addr = start_block;
30891 	diskaddr_t first_byte, end_block;
30892 
30893 	size_t	buffer_size = reqlength;
30894 	int rval = 0;
30895 	diskaddr_t	cap;
30896 	uint32_t	lbasize;
30897 	sd_ssc_t	*ssc;
30898 
30899 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
30900 	if (un == NULL)
30901 		return (ENXIO);
30902 
30903 	if (cmd != TG_READ && cmd != TG_WRITE)
30904 		return (EINVAL);
30905 
30906 	ssc = sd_ssc_init(un);
30907 	mutex_enter(SD_MUTEX(un));
30908 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
30909 		mutex_exit(SD_MUTEX(un));
30910 		rval = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
30911 		    &lbasize, path_flag);
30912 		if (rval != 0)
30913 			goto done1;
30914 		mutex_enter(SD_MUTEX(un));
30915 		sd_update_block_info(un, lbasize, cap);
30916 		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
30917 			mutex_exit(SD_MUTEX(un));
30918 			rval = EIO;
30919 			goto done;
30920 		}
30921 	}
30922 
30923 	if (NOT_DEVBSIZE(un)) {
30924 		/*
30925 		 * sys_blocksize != tgt_blocksize, need to re-adjust
30926 		 * blkno and save the index to beginning of dk_label
30927 		 */
30928 		first_byte  = SD_SYSBLOCKS2BYTES(start_block);
30929 		real_addr = first_byte / un->un_tgt_blocksize;
30930 
30931 		end_block = (first_byte + reqlength +
30932 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
30933 
30934 		/* round up buffer size to multiple of target block size */
30935 		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
30936 
30937 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
30938 		    "label_addr: 0x%x allocation size: 0x%x\n",
30939 		    real_addr, buffer_size);
30940 
30941 		if (((first_byte % un->un_tgt_blocksize) != 0) ||
30942 		    (reqlength % un->un_tgt_blocksize) != 0)
30943 			/* the request is not aligned */
30944 			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
30945 	}
30946 
30947 	/*
30948 	 * The MMC standard allows READ CAPACITY to be
30949 	 * inaccurate by a bounded amount (in the interest of
30950 	 * response latency).  As a result, failed READs are
30951 	 * commonplace (due to the reading of metadata and not
30952 	 * data). Depending on the per-Vendor/drive Sense data,
30953 	 * the failed READ can cause many (unnecessary) retries.
30954 	 */
30955 
30956 	if (ISCD(un) && (cmd == TG_READ) &&
30957 	    (un->un_f_blockcount_is_valid == TRUE) &&
30958 	    ((start_block == (un->un_blockcount - 1))||
30959 	    (start_block == (un->un_blockcount - 2)))) {
30960 			path_flag = SD_PATH_DIRECT_PRIORITY;
30961 	}
30962 
30963 	mutex_exit(SD_MUTEX(un));
30964 	if (cmd == TG_READ) {
30965 		rval = sd_send_scsi_READ(ssc, (dkl != NULL)? dkl: bufaddr,
30966 		    buffer_size, real_addr, path_flag);
30967 		if (dkl != NULL)
30968 			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
30969 			    real_addr), bufaddr, reqlength);
30970 	} else {
30971 		if (dkl) {
30972 			rval = sd_send_scsi_READ(ssc, dkl, buffer_size,
30973 			    real_addr, path_flag);
30974 			if (rval) {
30975 				goto done1;
30976 			}
30977 			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
30978 			    real_addr), reqlength);
30979 		}
30980 		rval = sd_send_scsi_WRITE(ssc, (dkl != NULL)? dkl: bufaddr,
30981 		    buffer_size, real_addr, path_flag);
30982 	}
30983 
30984 done1:
30985 	if (dkl != NULL)
30986 		kmem_free(dkl, buffer_size);
30987 
30988 	if (rval != 0) {
30989 		if (rval == EIO)
30990 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
30991 		else
30992 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
30993 	}
30994 done:
30995 	sd_ssc_fini(ssc);
30996 	return (rval);
30997 }
30998 
30999 
31000 static int
31001 sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
31002 {
31003 
31004 	struct sd_lun *un;
31005 	diskaddr_t	cap;
31006 	uint32_t	lbasize;
31007 	int		path_flag = (int)(uintptr_t)tg_cookie;
31008 	int		ret = 0;
31009 
31010 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
31011 	if (un == NULL)
31012 		return (ENXIO);
31013 
31014 	switch (cmd) {
31015 	case TG_GETPHYGEOM:
31016 	case TG_GETVIRTGEOM:
31017 	case TG_GETCAPACITY:
31018 	case TG_GETBLOCKSIZE:
31019 		mutex_enter(SD_MUTEX(un));
31020 
31021 		if ((un->un_f_blockcount_is_valid == TRUE) &&
31022 		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
31023 			cap = un->un_blockcount;
31024 			lbasize = un->un_tgt_blocksize;
31025 			mutex_exit(SD_MUTEX(un));
31026 		} else {
31027 			sd_ssc_t	*ssc;
31028 			mutex_exit(SD_MUTEX(un));
31029 			ssc = sd_ssc_init(un);
31030 			ret = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
31031 			    &lbasize, path_flag);
31032 			if (ret != 0) {
31033 				if (ret == EIO)
31034 					sd_ssc_assessment(ssc,
31035 					    SD_FMT_STATUS_CHECK);
31036 				else
31037 					sd_ssc_assessment(ssc,
31038 					    SD_FMT_IGNORE);
31039 				sd_ssc_fini(ssc);
31040 				return (ret);
31041 			}
31042 			sd_ssc_fini(ssc);
31043 			mutex_enter(SD_MUTEX(un));
31044 			sd_update_block_info(un, lbasize, cap);
31045 			if ((un->un_f_blockcount_is_valid == FALSE) ||
31046 			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
31047 				mutex_exit(SD_MUTEX(un));
31048 				return (EIO);
31049 			}
31050 			mutex_exit(SD_MUTEX(un));
31051 		}
31052 
31053 		if (cmd == TG_GETCAPACITY) {
31054 			*(diskaddr_t *)arg = cap;
31055 			return (0);
31056 		}
31057 
31058 		if (cmd == TG_GETBLOCKSIZE) {
31059 			*(uint32_t *)arg = lbasize;
31060 			return (0);
31061 		}
31062 
31063 		if (cmd == TG_GETPHYGEOM)
31064 			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
31065 			    cap, lbasize, path_flag);
31066 		else
31067 			/* TG_GETVIRTGEOM */
31068 			ret = sd_get_virtual_geometry(un,
31069 			    (cmlb_geom_t *)arg, cap, lbasize);
31070 
31071 		return (ret);
31072 
31073 	case TG_GETATTR:
31074 		mutex_enter(SD_MUTEX(un));
31075 		((tg_attribute_t *)arg)->media_is_writable =
31076 		    un->un_f_mmc_writable_media;
31077 		((tg_attribute_t *)arg)->media_is_solid_state =
31078 		    un->un_f_is_solid_state;
31079 		mutex_exit(SD_MUTEX(un));
31080 		return (0);
31081 	default:
31082 		return (ENOTTY);
31083 
31084 	}
31085 }
31086 
31087 /*
31088  *    Function: sd_ssc_ereport_post
31089  *
31090  * Description: Will be called when SD driver need to post an ereport.
31091  *
31092  *    Context: Kernel thread or interrupt context.
31093  */
31094 static void
31095 sd_ssc_ereport_post(sd_ssc_t *ssc, enum sd_driver_assessment drv_assess)
31096 {
31097 	int uscsi_path_instance = 0;
31098 	uchar_t	uscsi_pkt_reason;
31099 	uint32_t uscsi_pkt_state;
31100 	uint32_t uscsi_pkt_statistics;
31101 	uint64_t uscsi_ena;
31102 	uchar_t op_code;
31103 	uint8_t *sensep;
31104 	union scsi_cdb *cdbp;
31105 	uint_t cdblen = 0;
31106 	uint_t senlen = 0;
31107 	struct sd_lun *un;
31108 	dev_info_t *dip;
31109 	char *devid;
31110 	int ssc_invalid_flags = SSC_FLAGS_INVALID_PKT_REASON |
31111 	    SSC_FLAGS_INVALID_STATUS |
31112 	    SSC_FLAGS_INVALID_SENSE |
31113 	    SSC_FLAGS_INVALID_DATA;
31114 	char assessment[16];
31115 
31116 	ASSERT(ssc != NULL);
31117 	ASSERT(ssc->ssc_uscsi_cmd != NULL);
31118 	ASSERT(ssc->ssc_uscsi_info != NULL);
31119 
31120 	un = ssc->ssc_un;
31121 	ASSERT(un != NULL);
31122 
31123 	dip = un->un_sd->sd_dev;
31124 
31125 	/*
31126 	 * Get the devid:
31127 	 *	devid will only be passed to non-transport error reports.
31128 	 */
31129 	devid = DEVI(dip)->devi_devid_str;
31130 
31131 	/*
31132 	 * If we are syncing or dumping, the command will not be executed
31133 	 * so we bypass this situation.
31134 	 */
31135 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
31136 	    (un->un_state == SD_STATE_DUMPING))
31137 		return;
31138 
31139 	uscsi_pkt_reason = ssc->ssc_uscsi_info->ui_pkt_reason;
31140 	uscsi_path_instance = ssc->ssc_uscsi_cmd->uscsi_path_instance;
31141 	uscsi_pkt_state = ssc->ssc_uscsi_info->ui_pkt_state;
31142 	uscsi_pkt_statistics = ssc->ssc_uscsi_info->ui_pkt_statistics;
31143 	uscsi_ena = ssc->ssc_uscsi_info->ui_ena;
31144 
31145 	sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
31146 	cdbp = (union scsi_cdb *)ssc->ssc_uscsi_cmd->uscsi_cdb;
31147 
31148 	/* In rare cases, EG:DOORLOCK, the cdb could be NULL */
31149 	if (cdbp == NULL) {
31150 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
31151 		    "sd_ssc_ereport_post meet empty cdb\n");
31152 		return;
31153 	}
31154 
31155 	op_code = cdbp->scc_cmd;
31156 
31157 	cdblen = (int)ssc->ssc_uscsi_cmd->uscsi_cdblen;
31158 	senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
31159 	    ssc->ssc_uscsi_cmd->uscsi_rqresid);
31160 
31161 	if (senlen > 0)
31162 		ASSERT(sensep != NULL);
31163 
31164 	/*
31165 	 * Initialize drv_assess to corresponding values.
31166 	 * SD_FM_DRV_FATAL will be mapped to "fail" or "fatal" depending
31167 	 * on the sense-key returned back.
31168 	 */
31169 	switch (drv_assess) {
31170 		case SD_FM_DRV_RECOVERY:
31171 			(void) sprintf(assessment, "%s", "recovered");
31172 			break;
31173 		case SD_FM_DRV_RETRY:
31174 			(void) sprintf(assessment, "%s", "retry");
31175 			break;
31176 		case SD_FM_DRV_NOTICE:
31177 			(void) sprintf(assessment, "%s", "info");
31178 			break;
31179 		case SD_FM_DRV_FATAL:
31180 		default:
31181 			(void) sprintf(assessment, "%s", "unknown");
31182 	}
31183 	/*
31184 	 * If drv_assess == SD_FM_DRV_RECOVERY, this should be a recovered
31185 	 * command, we will post ereport.io.scsi.cmd.disk.recovered.
31186 	 * driver-assessment will always be "recovered" here.
31187 	 */
31188 	if (drv_assess == SD_FM_DRV_RECOVERY) {
31189 		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31190 		    "cmd.disk.recovered", uscsi_ena, devid, DDI_NOSLEEP,
31191 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31192 		    "driver-assessment", DATA_TYPE_STRING, assessment,
31193 		    "op-code", DATA_TYPE_UINT8, op_code,
31194 		    "cdb", DATA_TYPE_UINT8_ARRAY,
31195 		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31196 		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31197 		    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
31198 		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
31199 		    NULL);
31200 		return;
31201 	}
31202 
31203 	/*
31204 	 * If there is un-expected/un-decodable data, we should post
31205 	 * ereport.io.scsi.cmd.disk.dev.uderr.
31206 	 * driver-assessment will be set based on parameter drv_assess.
31207 	 * SSC_FLAGS_INVALID_SENSE - invalid sense data sent back.
31208 	 * SSC_FLAGS_INVALID_PKT_REASON - invalid pkt-reason encountered.
31209 	 * SSC_FLAGS_INVALID_STATUS - invalid stat-code encountered.
31210 	 * SSC_FLAGS_INVALID_DATA - invalid data sent back.
31211 	 */
31212 	if (ssc->ssc_flags & ssc_invalid_flags) {
31213 		if (ssc->ssc_flags & SSC_FLAGS_INVALID_SENSE) {
31214 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31215 			    "cmd.disk.dev.uderr", uscsi_ena, devid, DDI_NOSLEEP,
31216 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31217 			    "driver-assessment", DATA_TYPE_STRING,
31218 			    drv_assess == SD_FM_DRV_FATAL ?
31219 			    "fail" : assessment,
31220 			    "op-code", DATA_TYPE_UINT8, op_code,
31221 			    "cdb", DATA_TYPE_UINT8_ARRAY,
31222 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31223 			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31224 			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
31225 			    "pkt-stats", DATA_TYPE_UINT32,
31226 			    uscsi_pkt_statistics,
31227 			    "stat-code", DATA_TYPE_UINT8,
31228 			    ssc->ssc_uscsi_cmd->uscsi_status,
31229 			    "un-decode-info", DATA_TYPE_STRING,
31230 			    ssc->ssc_info,
31231 			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
31232 			    senlen, sensep,
31233 			    NULL);
31234 		} else {
31235 			/*
31236 			 * For other type of invalid data, the
31237 			 * un-decode-value field would be empty because the
31238 			 * un-decodable content could be seen from upper
31239 			 * level payload or inside un-decode-info.
31240 			 */
31241 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31242 			    "cmd.disk.dev.uderr", uscsi_ena, devid, DDI_NOSLEEP,
31243 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31244 			    "driver-assessment", DATA_TYPE_STRING,
31245 			    drv_assess == SD_FM_DRV_FATAL ?
31246 			    "fail" : assessment,
31247 			    "op-code", DATA_TYPE_UINT8, op_code,
31248 			    "cdb", DATA_TYPE_UINT8_ARRAY,
31249 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31250 			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31251 			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
31252 			    "pkt-stats", DATA_TYPE_UINT32,
31253 			    uscsi_pkt_statistics,
31254 			    "stat-code", DATA_TYPE_UINT8,
31255 			    ssc->ssc_uscsi_cmd->uscsi_status,
31256 			    "un-decode-info", DATA_TYPE_STRING,
31257 			    ssc->ssc_info,
31258 			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
31259 			    0, NULL,
31260 			    NULL);
31261 		}
31262 		ssc->ssc_flags &= ~ssc_invalid_flags;
31263 		return;
31264 	}
31265 
31266 	if (uscsi_pkt_reason != CMD_CMPLT ||
31267 	    (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)) {
31268 		/*
31269 		 * pkt-reason != CMD_CMPLT or SSC_FLAGS_TRAN_ABORT was
31270 		 * set inside sd_start_cmds due to errors(bad packet or
31271 		 * fatal transport error), we should take it as a
31272 		 * transport error, so we post ereport.io.scsi.cmd.disk.tran.
31273 		 * driver-assessment will be set based on drv_assess.
31274 		 * We will set devid to NULL because it is a transport
31275 		 * error.
31276 		 */
31277 		if (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)
31278 			ssc->ssc_flags &= ~SSC_FLAGS_TRAN_ABORT;
31279 
31280 		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31281 		    "cmd.disk.tran", uscsi_ena, NULL, DDI_NOSLEEP, FM_VERSION,
31282 		    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31283 		    "driver-assessment", DATA_TYPE_STRING,
31284 		    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
31285 		    "op-code", DATA_TYPE_UINT8, op_code,
31286 		    "cdb", DATA_TYPE_UINT8_ARRAY,
31287 		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31288 		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31289 		    "pkt-state", DATA_TYPE_UINT8, uscsi_pkt_state,
31290 		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
31291 		    NULL);
31292 	} else {
31293 		/*
31294 		 * If we got here, we have a completed command, and we need
31295 		 * to further investigate the sense data to see what kind
31296 		 * of ereport we should post.
31297 		 * Post ereport.io.scsi.cmd.disk.dev.rqs.merr
31298 		 * if sense-key == 0x3.
31299 		 * Post ereport.io.scsi.cmd.disk.dev.rqs.derr otherwise.
31300 		 * driver-assessment will be set based on the parameter
31301 		 * drv_assess.
31302 		 */
31303 		if (senlen > 0) {
31304 			/*
31305 			 * Here we have sense data available.
31306 			 */
31307 			uint8_t sense_key;
31308 			sense_key = scsi_sense_key(sensep);
31309 			if (sense_key == 0x3) {
31310 				/*
31311 				 * sense-key == 0x3(medium error),
31312 				 * driver-assessment should be "fatal" if
31313 				 * drv_assess is SD_FM_DRV_FATAL.
31314 				 */
31315 				scsi_fm_ereport_post(un->un_sd,
31316 				    uscsi_path_instance,
31317 				    "cmd.disk.dev.rqs.merr",
31318 				    uscsi_ena, devid, DDI_NOSLEEP, FM_VERSION,
31319 				    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31320 				    "driver-assessment",
31321 				    DATA_TYPE_STRING,
31322 				    drv_assess == SD_FM_DRV_FATAL ?
31323 				    "fatal" : assessment,
31324 				    "op-code",
31325 				    DATA_TYPE_UINT8, op_code,
31326 				    "cdb",
31327 				    DATA_TYPE_UINT8_ARRAY, cdblen,
31328 				    ssc->ssc_uscsi_cmd->uscsi_cdb,
31329 				    "pkt-reason",
31330 				    DATA_TYPE_UINT8, uscsi_pkt_reason,
31331 				    "pkt-state",
31332 				    DATA_TYPE_UINT8, uscsi_pkt_state,
31333 				    "pkt-stats",
31334 				    DATA_TYPE_UINT32,
31335 				    uscsi_pkt_statistics,
31336 				    "stat-code",
31337 				    DATA_TYPE_UINT8,
31338 				    ssc->ssc_uscsi_cmd->uscsi_status,
31339 				    "key",
31340 				    DATA_TYPE_UINT8,
31341 				    scsi_sense_key(sensep),
31342 				    "asc",
31343 				    DATA_TYPE_UINT8,
31344 				    scsi_sense_asc(sensep),
31345 				    "ascq",
31346 				    DATA_TYPE_UINT8,
31347 				    scsi_sense_ascq(sensep),
31348 				    "sense-data",
31349 				    DATA_TYPE_UINT8_ARRAY,
31350 				    senlen, sensep,
31351 				    "lba",
31352 				    DATA_TYPE_UINT64,
31353 				    ssc->ssc_uscsi_info->ui_lba,
31354 				    NULL);
31355 				} else {
31356 					/*
31357 					 * if sense-key == 0x4(hardware
31358 					 * error), driver-assessment should
31359 					 * be "fatal" if drv_assess is
31360 					 * SD_FM_DRV_FATAL.
31361 					 */
31362 					scsi_fm_ereport_post(un->un_sd,
31363 					    uscsi_path_instance,
31364 					    "cmd.disk.dev.rqs.derr",
31365 					    uscsi_ena, devid, DDI_NOSLEEP,
31366 					    FM_VERSION,
31367 					    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31368 					    "driver-assessment",
31369 					    DATA_TYPE_STRING,
31370 					    drv_assess == SD_FM_DRV_FATAL ?
31371 					    (sense_key == 0x4 ?
31372 					    "fatal" : "fail") : assessment,
31373 					    "op-code",
31374 					    DATA_TYPE_UINT8, op_code,
31375 					    "cdb",
31376 					    DATA_TYPE_UINT8_ARRAY, cdblen,
31377 					    ssc->ssc_uscsi_cmd->uscsi_cdb,
31378 					    "pkt-reason",
31379 					    DATA_TYPE_UINT8, uscsi_pkt_reason,
31380 					    "pkt-state",
31381 					    DATA_TYPE_UINT8, uscsi_pkt_state,
31382 					    "pkt-stats",
31383 					    DATA_TYPE_UINT32,
31384 					    uscsi_pkt_statistics,
31385 					    "stat-code",
31386 					    DATA_TYPE_UINT8,
31387 					    ssc->ssc_uscsi_cmd->uscsi_status,
31388 					    "key",
31389 					    DATA_TYPE_UINT8,
31390 					    scsi_sense_key(sensep),
31391 					    "asc",
31392 					    DATA_TYPE_UINT8,
31393 					    scsi_sense_asc(sensep),
31394 					    "ascq",
31395 					    DATA_TYPE_UINT8,
31396 					    scsi_sense_ascq(sensep),
31397 					    "sense-data",
31398 					    DATA_TYPE_UINT8_ARRAY,
31399 					    senlen, sensep,
31400 					    NULL);
31401 				}
31402 		} else {
31403 			/*
31404 			 * For stat_code == STATUS_GOOD, this is not a
31405 			 * hardware error.
31406 			 */
31407 			if (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD)
31408 				return;
31409 
31410 			/*
31411 			 * Post ereport.io.scsi.cmd.disk.dev.serr if we got the
31412 			 * stat-code but with sense data unavailable.
31413 			 * driver-assessment will be set based on parameter
31414 			 * drv_assess.
31415 			 */
31416 			scsi_fm_ereport_post(un->un_sd,
31417 			    uscsi_path_instance, "cmd.disk.dev.serr", uscsi_ena,
31418 			    devid, DDI_NOSLEEP, FM_VERSION, DATA_TYPE_UINT8,
31419 			    FM_EREPORT_VERS0,
31420 			    "driver-assessment", DATA_TYPE_STRING,
31421 			    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
31422 			    "op-code", DATA_TYPE_UINT8, op_code,
31423 			    "cdb",
31424 			    DATA_TYPE_UINT8_ARRAY,
31425 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31426 			    "pkt-reason",
31427 			    DATA_TYPE_UINT8, uscsi_pkt_reason,
31428 			    "pkt-state",
31429 			    DATA_TYPE_UINT8, uscsi_pkt_state,
31430 			    "pkt-stats",
31431 			    DATA_TYPE_UINT32, uscsi_pkt_statistics,
31432 			    "stat-code",
31433 			    DATA_TYPE_UINT8,
31434 			    ssc->ssc_uscsi_cmd->uscsi_status,
31435 			    NULL);
31436 		}
31437 	}
31438 }
31439 
31440 /*
31441  *     Function: sd_ssc_extract_info
31442  *
31443  * Description: Extract information available to help generate ereport.
31444  *
31445  *     Context: Kernel thread or interrupt context.
31446  */
31447 static void
31448 sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un, struct scsi_pkt *pktp,
31449     struct buf *bp, struct sd_xbuf *xp)
31450 {
31451 	size_t senlen = 0;
31452 	union scsi_cdb *cdbp;
31453 	int path_instance;
31454 	/*
31455 	 * Need scsi_cdb_size array to determine the cdb length.
31456 	 */
31457 	extern uchar_t	scsi_cdb_size[];
31458 
31459 	ASSERT(un != NULL);
31460 	ASSERT(pktp != NULL);
31461 	ASSERT(bp != NULL);
31462 	ASSERT(xp != NULL);
31463 	ASSERT(ssc != NULL);
31464 	ASSERT(mutex_owned(SD_MUTEX(un)));
31465 
31466 	/*
31467 	 * Transfer the cdb buffer pointer here.
31468 	 */
31469 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
31470 
31471 	ssc->ssc_uscsi_cmd->uscsi_cdblen = scsi_cdb_size[GETGROUP(cdbp)];
31472 	ssc->ssc_uscsi_cmd->uscsi_cdb = (caddr_t)cdbp;
31473 
31474 	/*
31475 	 * Transfer the sense data buffer pointer if sense data is available,
31476 	 * calculate the sense data length first.
31477 	 */
31478 	if ((xp->xb_sense_state & STATE_XARQ_DONE) ||
31479 	    (xp->xb_sense_state & STATE_ARQ_DONE)) {
31480 		/*
31481 		 * For arq case, we will enter here.
31482 		 */
31483 		if (xp->xb_sense_state & STATE_XARQ_DONE) {
31484 			senlen = MAX_SENSE_LENGTH - xp->xb_sense_resid;
31485 		} else {
31486 			senlen = SENSE_LENGTH;
31487 		}
31488 	} else {
31489 		/*
31490 		 * For non-arq case, we will enter this branch.
31491 		 */
31492 		if (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK &&
31493 		    (xp->xb_sense_state & STATE_XFERRED_DATA)) {
31494 			senlen = SENSE_LENGTH - xp->xb_sense_resid;
31495 		}
31496 
31497 	}
31498 
31499 	ssc->ssc_uscsi_cmd->uscsi_rqlen = (senlen & 0xff);
31500 	ssc->ssc_uscsi_cmd->uscsi_rqresid = 0;
31501 	ssc->ssc_uscsi_cmd->uscsi_rqbuf = (caddr_t)xp->xb_sense_data;
31502 
31503 	ssc->ssc_uscsi_cmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
31504 
31505 	/*
31506 	 * Only transfer path_instance when scsi_pkt was properly allocated.
31507 	 */
31508 	path_instance = pktp->pkt_path_instance;
31509 	if (scsi_pkt_allocated_correctly(pktp) && path_instance)
31510 		ssc->ssc_uscsi_cmd->uscsi_path_instance = path_instance;
31511 	else
31512 		ssc->ssc_uscsi_cmd->uscsi_path_instance = 0;
31513 
31514 	/*
31515 	 * Copy in the other fields we may need when posting ereport.
31516 	 */
31517 	ssc->ssc_uscsi_info->ui_pkt_reason = pktp->pkt_reason;
31518 	ssc->ssc_uscsi_info->ui_pkt_state = pktp->pkt_state;
31519 	ssc->ssc_uscsi_info->ui_pkt_statistics = pktp->pkt_statistics;
31520 	ssc->ssc_uscsi_info->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
31521 
31522 	/*
31523 	 * For partially read/write command, we will not create ena
31524 	 * in case of a successful command be reconized as recovered.
31525 	 */
31526 	if ((pktp->pkt_reason == CMD_CMPLT) &&
31527 	    (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD) &&
31528 	    (senlen == 0)) {
31529 		return;
31530 	}
31531 
31532 	/*
31533 	 * To associate ereports of a single command execution flow, we
31534 	 * need a shared ena for a specific command.
31535 	 */
31536 	if (xp->xb_ena == 0)
31537 		xp->xb_ena = fm_ena_generate(0, FM_ENA_FMT1);
31538 	ssc->ssc_uscsi_info->ui_ena = xp->xb_ena;
31539 }
31540 
31541 
31542 /*
31543  *     Function: sd_check_solid_state
31544  *
31545  * Description: Query the optional INQUIRY VPD page 0xb1. If the device
31546  *              supports VPD page 0xb1, sd examines the MEDIUM ROTATION
31547  *              RATE. If the MEDIUM ROTATION RATE is 1, sd assumes the
31548  *              device is a solid state drive.
31549  *
31550  *     Context: Kernel thread or interrupt context.
31551  */
31552 
31553 static void
31554 sd_check_solid_state(sd_ssc_t *ssc)
31555 {
31556 	int		rval		= 0;
31557 	uchar_t		*inqb1		= NULL;
31558 	size_t		inqb1_len	= MAX_INQUIRY_SIZE;
31559 	size_t		inqb1_resid	= 0;
31560 	struct sd_lun	*un;
31561 
31562 	ASSERT(ssc != NULL);
31563 	un = ssc->ssc_un;
31564 	ASSERT(un != NULL);
31565 	ASSERT(!mutex_owned(SD_MUTEX(un)));
31566 
31567 	mutex_enter(SD_MUTEX(un));
31568 	un->un_f_is_solid_state = FALSE;
31569 
31570 	if (ISCD(un)) {
31571 		mutex_exit(SD_MUTEX(un));
31572 		return;
31573 	}
31574 
31575 	if (sd_check_vpd_page_support(ssc) == 0 &&
31576 	    un->un_vpd_page_mask & SD_VPD_DEV_CHARACTER_PG) {
31577 		mutex_exit(SD_MUTEX(un));
31578 		/* collect page b1 data */
31579 		inqb1 = kmem_zalloc(inqb1_len, KM_SLEEP);
31580 
31581 		rval = sd_send_scsi_INQUIRY(ssc, inqb1, inqb1_len,
31582 		    0x01, 0xB1, &inqb1_resid);
31583 
31584 		if (rval == 0 && (inqb1_len - inqb1_resid > 5)) {
31585 			SD_TRACE(SD_LOG_COMMON, un,
31586 			    "sd_check_solid_state: \
31587 			    successfully get VPD page: %x \
31588 			    PAGE LENGTH: %x BYTE 4: %x \
31589 			    BYTE 5: %x", inqb1[1], inqb1[3], inqb1[4],
31590 			    inqb1[5]);
31591 
31592 			mutex_enter(SD_MUTEX(un));
31593 			/*
31594 			 * Check the MEDIUM ROTATION RATE. If it is set
31595 			 * to 1, the device is a solid state drive.
31596 			 */
31597 			if (inqb1[4] == 0 && inqb1[5] == 1) {
31598 				un->un_f_is_solid_state = TRUE;
31599 			}
31600 			mutex_exit(SD_MUTEX(un));
31601 		} else if (rval != 0) {
31602 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
31603 		}
31604 
31605 		kmem_free(inqb1, inqb1_len);
31606 	} else {
31607 		mutex_exit(SD_MUTEX(un));
31608 	}
31609 }
31610